5 #ifndef GENERATE_KERNELS_H
6 #define GENERATE_KERNELS_H
31 "__kernel void GenerateKernel_KERNELNAME(__global TYPE* output, size_t numElements, size_t indexOffset, CONST_TYPE const1)\n"
33 " size_t i = get_global_id(0);\n"
34 " size_t gridSize = get_local_size(0)*get_num_groups(0);\n"
35 " while(i < numElements)\n"
37 " output[i] = FUNCTIONNAME(i+indexOffset, const1);\n"
49 "__kernel void GenerateKernel_Matrix_KERNELNAME(__global TYPE* output, size_t numElements, size_t xsize, size_t ysize, size_t yoffset, CONST_TYPE const1)\n"
51 " size_t xindex = get_global_id(0);\n"
52 " size_t yindex = get_global_id(1);\n"
53 " size_t i = yindex*xsize + xindex; \n"
54 " if(i < numElements && xindex<xsize && yindex <ysize)\n"
56 " output[i] = FUNCTIONNAME(xindex, yindex+yoffset, const1);\n"
89 template <
typename T,
typename GenerateFunc>
90 __global__
void GenerateKernel_CU(GenerateFunc generateFunc, T* output,
size_t numElements,
size_t indexOffset)
92 size_t i = blockIdx.x * blockDim.x + threadIdx.x;
93 size_t gridSize = blockDim.x*gridDim.x;
95 while(i < numElements)
97 output[i] = generateFunc.CU(i+indexOffset);
108 template <
typename T,
typename GenerateFunc>
109 __global__
void GenerateKernel_CU_Matrix(GenerateFunc generateFunc, T* output,
size_t numElements,
size_t xsize,
size_t ysize,
size_t yoffset)
111 size_t xindex = blockIdx.x * blockDim.x + threadIdx.x;
112 size_t yindex = blockIdx.y * blockDim.y + threadIdx.y;
113 size_t outaddr = yindex*xsize + xindex;
115 size_t gridSize = blockDim.x*blockDim.y*gridDim.x*gridDim.y;
116 while(outaddr < numElements && xindex<xsize && yindex <ysize)
118 output[outaddr] = generateFunc.CU(xindex, yindex+yoffset);
120 xindex += blockDim.x*gridDim.x;
121 yindex += blockDim.y*gridDim.y;
__global__ void GenerateKernel_CU(GenerateFunc generateFunc, T *output, size_t numElements, size_t indexOffset)
Definition: generate_kernels.h:90
__global__ void GenerateKernel_CU_Matrix(GenerateFunc generateFunc, T *output, size_t numElements, size_t xsize, size_t ysize, size_t yoffset)
Definition: generate_kernels.h:109
static std::string GenerateKernel_CL("__kernel void GenerateKernel_KERNELNAME(__global TYPE* output, size_t numElements, size_t indexOffset, CONST_TYPE const1)\n""{\n"" size_t i = get_global_id(0);\n"" size_t gridSize = get_local_size(0)*get_num_groups(0);\n"" while(i < numElements)\n"" {\n"" output[i] = FUNCTIONNAME(i+indexOffset, const1);\n"" i += gridSize;\n"" }\n""}\n")
static std::string GenerateKernel_CL_Matrix("__kernel void GenerateKernel_Matrix_KERNELNAME(__global TYPE* output, size_t numElements, size_t xsize, size_t ysize, size_t yoffset, CONST_TYPE const1)\n""{\n"" size_t xindex = get_global_id(0);\n"" size_t yindex = get_global_id(1);\n"" size_t i = yindex*xsize + xindex; \n"" if(i < numElements && xindex<xsize && yindex <ysize)\n"" {\n"" output[i] = FUNCTIONNAME(xindex, yindex+yoffset, const1);\n"" }\n""}\n")