Go to the documentation of this file.00001
00005 #ifndef MAPOVERLAP_KERNELS_H
00006 #define MAPOVERLAP_KERNELS_H
00007
00008 #ifdef SKEPU_OPENCL
00009
00010 #include <string>
00011
00012 namespace skepu
00013 {
00014
00034 static std::string MapOverlapKernel_CL(
00035 "__kernel void MapOverlapKernel_KERNELNAME(__global TYPE* input, __global TYPE* output, __global TYPE* wrap, int n, int overlap, int out_offset, int out_numelements, int poly, TYPE pad, __local TYPE* sdata)\n"
00036 "{\n"
00037 " int tid = get_local_id(0);\n"
00038 " int i = get_group_id(0) * get_local_size(0) + get_local_id(0);\n"
00039 " if(poly == 0)\n"
00040 " {\n"
00041 " sdata[overlap+tid] = (i < n) ? input[i] : pad;\n"
00042 " if(tid < overlap)\n"
00043 " {\n"
00044 " sdata[tid] = (get_group_id(0) == 0) ? pad : input[i-overlap];\n"
00045 " }\n"
00046 " if(tid >= (get_local_size(0)-overlap))\n"
00047 " {\n"
00048 " sdata[tid+2*overlap] = (get_group_id(0) != get_num_groups(0)-1 && i+overlap < n) ? input[i+overlap] : pad;\n"
00049 " }\n"
00050 " }\n"
00051 " else if(poly == 1)\n"
00052 " {\n"
00053 " if(i < n)\n"
00054 " {\n"
00055 " sdata[overlap+tid] = input[i];\n"
00056 " }\n"
00057 " else if(i-n < overlap)\n"
00058 " {\n"
00059 " sdata[overlap+tid] = wrap[overlap+(i-n)];\n"
00060 " }\n"
00061 " else\n"
00062 " {\n"
00063 " sdata[overlap+tid] = pad;\n"
00064 " }\n"
00065 " if(tid < overlap)\n"
00066 " {\n"
00067 " sdata[tid] = (get_group_id(0) == 0) ? wrap[tid] : input[i-overlap];\n"
00068 " }\n"
00069 " if(tid >= (get_local_size(0)-overlap))\n"
00070 " {\n"
00071 " sdata[tid+2*overlap] = (get_group_id(0) != get_num_groups(0)-1 && i+overlap < n) ? input[i+overlap] : wrap[overlap+(i+overlap-n)];\n"
00072 " }\n"
00073 " }\n"
00074 " else if(poly == 2)\n"
00075 " {\n"
00076 " sdata[overlap+tid] = (i < n) ? input[i] : input[n-1];\n"
00077 " if(tid < overlap)\n"
00078 " {\n"
00079 " sdata[tid] = (get_group_id(0) == 0) ? input[0] : input[i-overlap];\n"
00080 " }\n"
00081 " if(tid >= (get_local_size(0)-overlap))\n"
00082 " {\n"
00083 " sdata[tid+2*overlap] = (get_group_id(0) != get_num_groups(0)-1 && i+overlap < n) ? input[i+overlap] : input[n-1];\n"
00084 " }\n"
00085 " }\n"
00086 " barrier(CLK_LOCAL_MEM_FENCE);\n"
00087 " if( (i >= out_offset) && (i < out_offset+out_numelements) )\n"
00088 " {\n"
00089 " output[i-out_offset] = FUNCTIONNAME(&(sdata[tid+overlap]));\n"
00090 " }\n"
00091 "}\n"
00092 );
00093
00098 }
00099
00100 #endif
00101
00102 #ifdef SKEPU_CUDA
00103
00104 namespace skepu
00105 {
00106
00124 template <int poly, typename T, typename OverlapFunc>
00125 __global__ void MapOverlapKernel_CU(OverlapFunc mapOverlapFunc, T* input, T* output, T* wrap, unsigned int n, unsigned int out_offset, unsigned int out_numelements, T pad)
00126 {
00127 extern __shared__ char _sdata[];
00128 T* sdata = reinterpret_cast<T*>(_sdata);
00129
00130 unsigned int tid = threadIdx.x;
00131 unsigned int i = blockIdx.x * blockDim.x + threadIdx.x;
00132 int overlap = mapOverlapFunc.overlap;
00133
00134
00135 if(poly == 0)
00136 {
00137 sdata[overlap+tid] = (i < n) ? input[i] : pad;
00138
00139 if(tid < overlap)
00140 {
00141 sdata[tid] = (blockIdx.x == 0) ? pad : input[i-overlap];
00142 }
00143
00144 if(tid >= (blockDim.x-overlap))
00145 {
00146 sdata[tid+2*overlap] = (blockIdx.x != gridDim.x-1 && i+overlap < n) ? input[i+overlap] : pad;
00147 }
00148 }
00149 else if(poly == 1)
00150 {
00151 if(i < n)
00152 {
00153 sdata[overlap+tid] = input[i];
00154 }
00155 else if(i-n < overlap)
00156 {
00157 sdata[overlap+tid] = wrap[overlap+(i-n)];
00158 }
00159 else
00160 {
00161 sdata[overlap+tid] = pad;
00162 }
00163
00164 if(tid < overlap)
00165 {
00166 sdata[tid] = (blockIdx.x == 0) ? wrap[tid] : input[i-overlap];
00167 }
00168
00169 if(tid >= (blockDim.x-overlap))
00170 {
00171 sdata[tid+2*overlap] = (blockIdx.x != gridDim.x-1 && i+overlap < n) ? input[i+overlap] : wrap[overlap+(i+overlap-n)];
00172 }
00173 }
00174 else if(poly == 2)
00175 {
00176 sdata[overlap+tid] = (i < n) ? input[i] : input[n-1];
00177
00178 if(tid < overlap)
00179 {
00180 sdata[tid] = (blockIdx.x == 0) ? input[0] : input[i-overlap];
00181 }
00182
00183 if(tid >= (blockDim.x-overlap))
00184 {
00185 sdata[tid+2*overlap] = (blockIdx.x != gridDim.x-1 && i+overlap < n) ? input[i+overlap] : input[n-1];
00186 }
00187 }
00188
00189 __syncthreads();
00190
00191
00192 if( (i >= out_offset) && (i < out_offset+out_numelements) )
00193 {
00194 output[i-out_offset] = mapOverlapFunc.CU(&(sdata[tid+overlap]));
00195 }
00196 }
00197
00202 }
00203
00204 #endif
00205
00206 #endif