5 #ifndef GENERATE_KERNELS_H
6 #define GENERATE_KERNELS_H
31 "__kernel void GenerateKernel_KERNELNAME(__global TYPE* output, unsigned int numElements, unsigned int offset, CONST_TYPE const1)\n"
33 " output = (__global void *)output + offset; /* partitioning is special with opencl */ \n"
34 " unsigned int i = get_global_id(0);\n"
35 " unsigned int gridSize = get_local_size(0)*get_num_groups(0);\n"
36 " while(i < numElements)\n"
38 " output[i] = FUNCTIONNAME(i+offset, const1);\n"
50 "__kernel void GenerateKernel_Matrix_KERNELNAME(__global TYPE* output, unsigned int numElements, unsigned int xsize, unsigned int ysize, unsigned int offset, CONST_TYPE const1)\n"
52 " output = (__global void *)output + offset; /* partitioning is special with opencl */ \n"
53 " int xindex = get_global_id(0);\n"
54 " int yindex = get_global_id(1);\n"
55 " int i = yindex*xsize + xindex; \n"
56 " if(i < numElements && xindex<xsize && yindex <ysize)\n"
58 " output[i] = FUNCTIONNAME(xindex, yindex+offset, const1);\n"
91 template <
typename T,
typename GenerateFunc>
92 __global__
void GenerateKernel_CU(GenerateFunc generateFunc, T* output,
unsigned int numElements,
unsigned int indexOffset)
94 unsigned int i = blockIdx.x * blockDim.x + threadIdx.x;
95 unsigned int gridSize = blockDim.x*gridDim.x;
97 while(i < numElements)
99 output[i] = generateFunc.CU(i+indexOffset);
110 template <
typename T,
typename GenerateFunc>
111 __global__
void GenerateKernel_CU_Matrix(GenerateFunc generateFunc, T* output,
unsigned int numElements,
unsigned int xsize,
unsigned int ysize,
unsigned int yoffset)
113 unsigned int xindex = blockIdx.x * blockDim.x + threadIdx.x;
114 unsigned int yindex = blockIdx.y * blockDim.y + threadIdx.y;
115 unsigned int outaddr = yindex*xsize + xindex;
117 unsigned int gridSize = blockDim.x*blockDim.y*gridDim.x*gridDim.y;
118 while(outaddr < numElements && xindex<xsize && yindex <ysize)
120 output[outaddr] = generateFunc.CU(xindex, yindex+yoffset);
122 xindex += blockDim.x*gridDim.x;
123 yindex += blockDim.y*gridDim.y;
__global__ void GenerateKernel_CU_Matrix(GenerateFunc generateFunc, T *output, unsigned int numElements, unsigned int xsize, unsigned int ysize, unsigned int yoffset)
Definition: generate_kernels.h:111
__global__ void GenerateKernel_CU(GenerateFunc generateFunc, T *output, unsigned int numElements, unsigned int indexOffset)
Definition: generate_kernels.h:92
static std::string GenerateKernel_CL("__kernel void GenerateKernel_KERNELNAME(__global TYPE* output, unsigned int numElements, unsigned int offset, CONST_TYPE const1)\n""{\n"" output = (__global void *)output + offset; /* partitioning is special with opencl */ \n"" unsigned int i = get_global_id(0);\n"" unsigned int gridSize = get_local_size(0)*get_num_groups(0);\n"" while(i < numElements)\n"" {\n"" output[i] = FUNCTIONNAME(i+offset, const1);\n"" i += gridSize;\n"" }\n""}\n")
static std::string GenerateKernel_CL_Matrix("__kernel void GenerateKernel_Matrix_KERNELNAME(__global TYPE* output, unsigned int numElements, unsigned int xsize, unsigned int ysize, unsigned int offset, CONST_TYPE const1)\n""{\n"" output = (__global void *)output + offset; /* partitioning is special with opencl */ \n"" int xindex = get_global_id(0);\n"" int yindex = get_global_id(1);\n"" int i = yindex*xsize + xindex; \n"" if(i < numElements && xindex<xsize && yindex <ysize)\n"" {\n"" output[i] = FUNCTIONNAME(xindex, yindex+offset, const1);\n"" }\n""}\n")