SkePU(integratedwithStarPU)  0.8.1
 All Classes Namespaces Files Functions Enumerations Friends Macros Groups Pages
generate_kernels.h
Go to the documentation of this file.
1 
5 #ifndef GENERATE_KERNELS_H
6 #define GENERATE_KERNELS_H
7 
8 #ifdef SKEPU_OPENCL
9 
10 #include <string>
11 
12 namespace skepu
13 {
14 
30 static std::string GenerateKernel_CL(
31 "__kernel void GenerateKernel_KERNELNAME(__global TYPE* output, unsigned int numElements, unsigned int offset, CONST_TYPE const1)\n"
32 "{\n"
33 " output = (__global void *)output + offset; /* partitioning is special with opencl */ \n"
34 " unsigned int i = get_global_id(0);\n"
35 " unsigned int gridSize = get_local_size(0)*get_num_groups(0);\n"
36 " while(i < numElements)\n"
37 " {\n"
38 " output[i] = FUNCTIONNAME(i+offset, const1);\n"
39 " i += gridSize;\n"
40 " }\n"
41 "}\n"
42 );
43 
44 
49 static std::string GenerateKernel_CL_Matrix(
50 "__kernel void GenerateKernel_Matrix_KERNELNAME(__global TYPE* output, unsigned int numElements, unsigned int xsize, unsigned int ysize, unsigned int offset, CONST_TYPE const1)\n"
51 "{\n"
52 " output = (__global void *)output + offset; /* partitioning is special with opencl */ \n"
53 " int xindex = get_global_id(0);\n"
54 " int yindex = get_global_id(1);\n"
55 " int i = yindex*xsize + xindex; \n"
56 " if(i < numElements && xindex<xsize && yindex <ysize)\n"
57 " {\n"
58 " output[i] = FUNCTIONNAME(xindex, yindex+offset, const1);\n"
59 " }\n"
60 "}\n"
61 );
62 
67 }
68 
69 #endif
70 
71 #ifdef SKEPU_CUDA
72 
73 namespace skepu
74 {
75 
91 template <typename T, typename GenerateFunc>
92 __global__ void GenerateKernel_CU(GenerateFunc generateFunc, T* output, unsigned int numElements, unsigned int indexOffset)
93 {
94  unsigned int i = blockIdx.x * blockDim.x + threadIdx.x;
95  unsigned int gridSize = blockDim.x*gridDim.x;
96 
97  while(i < numElements)
98  {
99  output[i] = generateFunc.CU(i+indexOffset);
100  i += gridSize;
101  }
102 }
103 
104 
105 
110 template <typename T, typename GenerateFunc>
111 __global__ void GenerateKernel_CU_Matrix(GenerateFunc generateFunc, T* output, unsigned int numElements, unsigned int xsize, unsigned int ysize, unsigned int yoffset)
112 {
113  unsigned int xindex = blockIdx.x * blockDim.x + threadIdx.x;
114  unsigned int yindex = blockIdx.y * blockDim.y + threadIdx.y;
115  unsigned int outaddr = yindex*xsize + xindex;
116 
117  unsigned int gridSize = blockDim.x*blockDim.y*gridDim.x*gridDim.y;
118  while(outaddr < numElements && xindex<xsize && yindex <ysize)
119  {
120  output[outaddr] = generateFunc.CU(xindex, yindex+yoffset);
121  outaddr += gridSize;
122  xindex += blockDim.x*gridDim.x;
123  yindex += blockDim.y*gridDim.y;
124  }
125 // if(outaddr < numElements && xindex<xsize && yindex <ysize)
126 // {
127 // output[outaddr] = generateFunc.CU(xindex, yindex+yoffset);
128 // }
129 
130 
131 
132 }
133 
138 }
139 
140 #endif
141 
142 #endif
__global__ void GenerateKernel_CU_Matrix(GenerateFunc generateFunc, T *output, unsigned int numElements, unsigned int xsize, unsigned int ysize, unsigned int yoffset)
Definition: generate_kernels.h:111
__global__ void GenerateKernel_CU(GenerateFunc generateFunc, T *output, unsigned int numElements, unsigned int indexOffset)
Definition: generate_kernels.h:92
static std::string GenerateKernel_CL("__kernel void GenerateKernel_KERNELNAME(__global TYPE* output, unsigned int numElements, unsigned int offset, CONST_TYPE const1)\n""{\n"" output = (__global void *)output + offset; /* partitioning is special with opencl */ \n"" unsigned int i = get_global_id(0);\n"" unsigned int gridSize = get_local_size(0)*get_num_groups(0);\n"" while(i < numElements)\n"" {\n"" output[i] = FUNCTIONNAME(i+offset, const1);\n"" i += gridSize;\n"" }\n""}\n")
static std::string GenerateKernel_CL_Matrix("__kernel void GenerateKernel_Matrix_KERNELNAME(__global TYPE* output, unsigned int numElements, unsigned int xsize, unsigned int ysize, unsigned int offset, CONST_TYPE const1)\n""{\n"" output = (__global void *)output + offset; /* partitioning is special with opencl */ \n"" int xindex = get_global_id(0);\n"" int yindex = get_global_id(1);\n"" int i = yindex*xsize + xindex; \n"" if(i < numElements && xindex<xsize && yindex <ysize)\n"" {\n"" output[i] = FUNCTIONNAME(xindex, yindex+offset, const1);\n"" }\n""}\n")