5 #ifndef MAPARRAY_KERNELS_H
6 #define MAPARRAY_KERNELS_H
32 "__kernel void MapArrayKernel_KERNELNAME(__global TYPE* input1, __global TYPE* input2, __global TYPE* output, size_t n, CONST_TYPE const1)\n"
34 " size_t i = get_global_id(0);\n"
35 " size_t gridSize = get_local_size(0)*get_num_groups(0);\n"
38 " output[i] = FUNCTIONNAME(&input1[0], input2[i], const1);\n"
54 "__kernel void MapArrayKernel_Matrix_Blockwise_KERNELNAME(__global TYPE* input1, __global TYPE* input2, __global TYPE* output, size_t outSize, size_t p2BlockSize, CONST_TYPE const1)\n"
56 " size_t i = get_global_id(0);\n"
57 " size_t gridSize = get_local_size(0)*get_num_groups(0);\n"
60 " output[i] = FUNCTIONNAME(&input1[0], &input2[i*p2BlockSize], const1);\n"
73 "__kernel void MapArrayKernel_Sparse_Matrix_Blockwise_KERNELNAME(__global TYPE* input1, __global TYPE* in2_values, __global size_t *in2_row_offsets, __global size_t *in2_col_indices, __global TYPE* output, size_t outSize, size_t indexOffset, CONST_TYPE const1)\n"
75 " size_t i = get_global_id(0);\n"
76 " size_t gridSize = get_local_size(0)*get_num_groups(0);\n"
79 " size_t rowId = in2_row_offsets[i] - indexOffset;\n"
80 " size_t row2Id = in2_row_offsets[i+1] - indexOffset;\n"
81 " output[i] = FUNCTIONNAME(&input1[0], &in2_values[rowId], (row2Id-rowId), &in2_col_indices[rowId], const1);\n"
94 "__kernel void MapArrayKernel_Matrix_KERNELNAME(__global TYPE* input1, __global TYPE* input2, __global TYPE* output, size_t n, size_t xsize, size_t ysize, size_t yoffset, CONST_TYPE const1)\n"
96 " size_t xindex = get_global_id(0);\n"
97 " size_t yindex = get_global_id(1);\n"
98 " size_t i = yindex*xsize + xindex; \n"
99 " if(i < n && xindex<xsize && yindex <ysize)\n"
101 " output[i] = FUNCTIONNAME(&input1[0], input2[i], xindex, yindex+yoffset, const1);\n"
132 template <
typename in,
typename out,
typename ArrayFunc>
133 __global__
void MapArrayKernel_VAR(ArrayFunc mapArrayFunc, in* input,MultiVector P, out* output,
size_t n)
135 size_t i = blockIdx.x * blockDim.x + threadIdx.x;
136 size_t gridSize = blockDim.x*gridDim.x;
140 output[i] = mapArrayFunc.CU(input[i], P);
150 template <
typename T,
typename ArrayFunc>
151 __global__
void MapArrayKernel_CU(ArrayFunc mapArrayFunc, T* input1, T* input2, T* output,
size_t n)
153 size_t i = blockIdx.x * blockDim.x + threadIdx.x;
154 size_t gridSize = blockDim.x*gridDim.x;
158 output[i] = mapArrayFunc.CU(&input1[0], input2[i]);
171 template <
typename T,
typename ArrayFunc>
174 size_t i = blockIdx.x * blockDim.x + threadIdx.x;
175 size_t gridSize = blockDim.x*gridDim.x;
179 output[i] = mapArrayFunc.CU(&input1[0], &input2[i*p2BlockSize]);
190 template <
typename T,
typename ArrayFunc>
193 size_t i = blockIdx.x * blockDim.x + threadIdx.x;
194 size_t gridSize = blockDim.x*gridDim.x;
198 size_t rowId = in2_row_offsets[i] - indexOffset;
199 size_t row2Id = in2_row_offsets[i+1] - indexOffset;
200 output[i] = mapArrayFunc.CU(&input1[0], &in2_values[rowId], (row2Id-rowId), &in2_col_indices[rowId]);
212 template <
typename T,
typename ArrayFunc>
213 __global__
void MapArrayKernel_CU_Matrix(ArrayFunc mapArrayFunc, T* input1, T* input2, T* output,
size_t n,
size_t xsize,
size_t ysize,
size_t yoffset)
215 size_t xindex = blockIdx.x * blockDim.x + threadIdx.x;
216 size_t yindex = blockIdx.y * blockDim.y + threadIdx.y;
217 size_t outaddr = yindex*xsize + xindex;
219 if(outaddr < n && xindex<xsize && yindex <ysize)
221 output[outaddr] = mapArrayFunc.CU(&input1[0], input2[outaddr], xindex, yindex+yoffset);
static std::string MapArrayKernel_CL("__kernel void MapArrayKernel_KERNELNAME(__global TYPE* input1, __global TYPE* input2, __global TYPE* output, size_t n, CONST_TYPE const1)\n""{\n"" size_t i = get_global_id(0);\n"" size_t gridSize = get_local_size(0)*get_num_groups(0);\n"" while(i < n)\n"" {\n"" output[i] = FUNCTIONNAME(&input1[0], input2[i], const1);\n"" i += gridSize;\n"" }\n""}\n")
__global__ void MapArrayKernel_CU_Matrix_Blockwise(ArrayFunc mapArrayFunc, T *input1, T *input2, T *output, size_t outSize, size_t p2BlockSize)
Definition: maparray_kernels.h:172
static std::string MapArrayKernel_CL_Matrix("__kernel void MapArrayKernel_Matrix_KERNELNAME(__global TYPE* input1, __global TYPE* input2, __global TYPE* output, size_t n, size_t xsize, size_t ysize, size_t yoffset, CONST_TYPE const1)\n""{\n"" size_t xindex = get_global_id(0);\n"" size_t yindex = get_global_id(1);\n"" size_t i = yindex*xsize + xindex; \n"" if(i < n && xindex<xsize && yindex <ysize)\n"" {\n"" output[i] = FUNCTIONNAME(&input1[0], input2[i], xindex, yindex+yoffset, const1);\n"" }\n""}\n")
static std::string MapArrayKernel_CL_Matrix_Blockwise("__kernel void MapArrayKernel_Matrix_Blockwise_KERNELNAME(__global TYPE* input1, __global TYPE* input2, __global TYPE* output, size_t outSize, size_t p2BlockSize, CONST_TYPE const1)\n""{\n"" size_t i = get_global_id(0);\n"" size_t gridSize = get_local_size(0)*get_num_groups(0);\n"" if(i < outSize)\n"" {\n"" output[i] = FUNCTIONNAME(&input1[0], &input2[i*p2BlockSize], const1);\n"" i += gridSize;\n"" }\n""}\n")
__global__ void MapArrayKernel_CU_Sparse_Matrix_Blockwise(ArrayFunc mapArrayFunc, T *input1, T *in2_values, size_t *in2_row_offsets, size_t *in2_col_indices, T *output, size_t outSize, size_t indexOffset)
Definition: maparray_kernels.h:191
static std::string MapArrayKernel_CL_Sparse_Matrix_Blockwise("__kernel void MapArrayKernel_Sparse_Matrix_Blockwise_KERNELNAME(__global TYPE* input1, __global TYPE* in2_values, __global size_t *in2_row_offsets, __global size_t *in2_col_indices, __global TYPE* output, size_t outSize, size_t indexOffset, CONST_TYPE const1)\n""{\n"" size_t i = get_global_id(0);\n"" size_t gridSize = get_local_size(0)*get_num_groups(0);\n"" if(i < outSize)\n"" {\n"" size_t rowId = in2_row_offsets[i] - indexOffset;\n"" size_t row2Id = in2_row_offsets[i+1] - indexOffset;\n"" output[i] = FUNCTIONNAME(&input1[0], &in2_values[rowId], (row2Id-rowId), &in2_col_indices[rowId], const1);\n"" i += gridSize;\n"" }\n""}\n")
__global__ void MapArrayKernel_CU(ArrayFunc mapArrayFunc, T *input1, T *input2, T *output, size_t n)
Definition: maparray_kernels.h:151
__global__ void MapArrayKernel_CU_Matrix(ArrayFunc mapArrayFunc, T *input1, T *input2, T *output, size_t n, size_t xsize, size_t ysize, size_t yoffset)
Definition: maparray_kernels.h:213