SkePU  1.2
 All Classes Namespaces Files Functions Variables Enumerations Friends Macros Groups Pages
generate_kernels.h
Go to the documentation of this file.
1 
5 #ifndef GENERATE_KERNELS_H
6 #define GENERATE_KERNELS_H
7 
8 #ifdef SKEPU_OPENCL
9 
10 #include <string>
11 
12 namespace skepu
13 {
14 
30 static std::string GenerateKernel_CL(
31  "__kernel void GenerateKernel_KERNELNAME(__global TYPE* output, size_t numElements, size_t indexOffset, CONST_TYPE const1)\n"
32  "{\n"
33  " size_t i = get_global_id(0);\n"
34  " size_t gridSize = get_local_size(0)*get_num_groups(0);\n"
35  " while(i < numElements)\n"
36  " {\n"
37  " output[i] = FUNCTIONNAME(i+indexOffset, const1);\n"
38  " i += gridSize;\n"
39  " }\n"
40  "}\n"
41 );
42 
43 
48 static std::string GenerateKernel_CL_Matrix(
49  "__kernel void GenerateKernel_Matrix_KERNELNAME(__global TYPE* output, size_t numElements, size_t xsize, size_t ysize, size_t yoffset, CONST_TYPE const1)\n"
50  "{\n"
51  " size_t xindex = get_global_id(0);\n"
52  " size_t yindex = get_global_id(1);\n"
53  " size_t i = yindex*xsize + xindex; \n"
54  " if(i < numElements && xindex<xsize && yindex <ysize)\n"
55  " {\n"
56  " output[i] = FUNCTIONNAME(xindex, yindex+yoffset, const1);\n"
57  " }\n"
58  "}\n"
59 );
60 
65 }
66 
67 #endif
68 
69 #ifdef SKEPU_CUDA
70 
71 namespace skepu
72 {
73 
89 template <typename T, typename GenerateFunc>
90 __global__ void GenerateKernel_CU(GenerateFunc generateFunc, T* output, size_t numElements, size_t indexOffset)
91 {
92  size_t i = blockIdx.x * blockDim.x + threadIdx.x;
93  size_t gridSize = blockDim.x*gridDim.x;
94 
95  while(i < numElements)
96  {
97  output[i] = generateFunc.CU(i+indexOffset);
98  i += gridSize;
99  }
100 }
101 
102 
103 
108 template <typename T, typename GenerateFunc>
109 __global__ void GenerateKernel_CU_Matrix(GenerateFunc generateFunc, T* output, size_t numElements, size_t xsize, size_t ysize, size_t yoffset)
110 {
111  size_t xindex = blockIdx.x * blockDim.x + threadIdx.x;
112  size_t yindex = blockIdx.y * blockDim.y + threadIdx.y;
113  size_t outaddr = yindex*xsize + xindex;
114 
115  size_t gridSize = blockDim.x*blockDim.y*gridDim.x*gridDim.y;
116  while(outaddr < numElements && xindex<xsize && yindex <ysize)
117  {
118  output[outaddr] = generateFunc.CU(xindex, yindex+yoffset);
119  outaddr += gridSize;
120  xindex += blockDim.x*gridDim.x;
121  yindex += blockDim.y*gridDim.y;
122  }
123 // if(outaddr < numElements && xindex<xsize && yindex <ysize)
124 // {
125 // output[outaddr] = generateFunc.CU(xindex, yindex+yoffset);
126 // }
127 
128 
129 
130 }
131 
136 }
137 
138 #endif
139 
140 #endif
141 
142 
__global__ void GenerateKernel_CU(GenerateFunc generateFunc, T *output, size_t numElements, size_t indexOffset)
Definition: generate_kernels.h:90
__global__ void GenerateKernel_CU_Matrix(GenerateFunc generateFunc, T *output, size_t numElements, size_t xsize, size_t ysize, size_t yoffset)
Definition: generate_kernels.h:109
static std::string GenerateKernel_CL("__kernel void GenerateKernel_KERNELNAME(__global TYPE* output, size_t numElements, size_t indexOffset, CONST_TYPE const1)\n""{\n"" size_t i = get_global_id(0);\n"" size_t gridSize = get_local_size(0)*get_num_groups(0);\n"" while(i < numElements)\n"" {\n"" output[i] = FUNCTIONNAME(i+indexOffset, const1);\n"" i += gridSize;\n"" }\n""}\n")
static std::string GenerateKernel_CL_Matrix("__kernel void GenerateKernel_Matrix_KERNELNAME(__global TYPE* output, size_t numElements, size_t xsize, size_t ysize, size_t yoffset, CONST_TYPE const1)\n""{\n"" size_t xindex = get_global_id(0);\n"" size_t yindex = get_global_id(1);\n"" size_t i = yindex*xsize + xindex; \n"" if(i < numElements && xindex<xsize && yindex <ysize)\n"" {\n"" output[i] = FUNCTIONNAME(xindex, yindex+yoffset, const1);\n"" }\n""}\n")