SkePU(integratedwithStarPU)
0.8.1
|
![]() |
Functions | |
static std::string | skepu::MapOverlapKernel_CL ("__kernel void MapOverlapKernel_KERNELNAME(__global TYPE* input, __global TYPE* output, __global TYPE* wrap, int n, int overlap, int out_offset, int out_numelements, int poly, TYPE pad, __local TYPE* sdata)\n""{\n"" int tid = get_local_id(0);\n"" int i = get_group_id(0) * get_local_size(0) + get_local_id(0);\n"" if(poly == 0)\n"" {\n"" sdata[overlap+tid] = (i < n) ? input[i] : pad;\n"" if(tid < overlap)\n"" {\n"" sdata[tid] = (get_group_id(0) == 0) ? pad : input[i-overlap];\n"" }\n"" if(tid >= (get_local_size(0)-overlap))\n"" {\n"" sdata[tid+2*overlap] = (get_group_id(0) != get_num_groups(0)-1 && i+overlap < n) ? input[i+overlap] : pad;\n"" }\n"" }\n"" else if(poly == 1)\n"" {\n"" if(i < n)\n"" {\n"" sdata[overlap+tid] = input[i];\n"" }\n"" else if(i-n < overlap)\n"" {\n"" sdata[overlap+tid] = wrap[overlap+(i-n)];\n"" }\n"" else\n"" {\n"" sdata[overlap+tid] = pad;\n"" }\n"" if(tid < overlap)\n"" {\n"" sdata[tid] = (get_group_id(0) == 0) ? wrap[tid] : input[i-overlap];\n"" }\n"" if(tid >= (get_local_size(0)-overlap))\n"" {\n"" sdata[tid+2*overlap] = (get_group_id(0) != get_num_groups(0)-1 && i+overlap < n) ? input[i+overlap] : wrap[overlap+(i+overlap-n)];\n"" }\n"" }\n"" else if(poly == 2)\n"" {\n"" sdata[overlap+tid] = (i < n) ? input[i] : input[n-1];\n"" if(tid < overlap)\n"" {\n"" sdata[tid] = (get_group_id(0) == 0) ? input[0] : input[i-overlap];\n"" }\n"" if(tid >= (get_local_size(0)-overlap))\n"" {\n"" sdata[tid+2*overlap] = (get_group_id(0) != get_num_groups(0)-1 && i+overlap < n) ? input[i+overlap] : input[n-1];\n"" }\n"" }\n"" barrier(CLK_LOCAL_MEM_FENCE);\n"" if( (i >= out_offset) && (i < out_offset+out_numelements) )\n"" {\n"" output[i-out_offset] = FUNCTIONNAME(&(sdata[tid+overlap]));\n"" }\n""}\n") |
template<int poly, typename T , typename OverlapFunc > | |
__global__ void | skepu::MapOverlapKernel_CU (OverlapFunc mapOverlapFunc, T *input, T *output, T *wrap, unsigned int n, unsigned int out_offset, unsigned int out_numelements, T pad) |
template<int poly, typename T , typename OverlapFunc > | |
__global__ void | skepu::MapOverlapKernel_CU_Matrix_Row (OverlapFunc mapOverlapFunc, T *input, T *output, T *wrap, unsigned int n, unsigned int out_offset, unsigned int out_numelements, T pad, unsigned int blocksPerRow, unsigned int rowWidth) |
template<int poly, typename T , typename OverlapFunc > | |
__global__ void | skepu::MapOverlapKernel_CU_Matrix_Col (OverlapFunc mapOverlapFunc, T *input, T *output, T *wrap, unsigned int n, unsigned int out_offset, unsigned int out_numelements, T pad, unsigned int blocksPerCol, unsigned int rowWidth, unsigned int colWidth) |
Definitions of CUDA convolution kernel that uses shared memory.
Definitions of CUDA convolution kernel that uses shared memory and tiling.
Definitions of CUDA and OpenCL kernels for the Map skeleton.
|
static |
OpenCL MapOverlap kernel. It uses one device thread per element so maximum number of device threads limits the maximum number of elements this kernel can handle. Also the amount of shared memory and the maximum blocksize limits the maximum overlap that is possible to use, typically limits the overlap to < 256.
__global__ void skepu::MapOverlapKernel_CU | ( | OverlapFunc | mapOverlapFunc, |
T * | input, | ||
T * | output, | ||
T * | wrap, | ||
unsigned int | n, | ||
unsigned int | out_offset, | ||
unsigned int | out_numelements, | ||
T | pad | ||
) |
CUDA MapOverlap kernel. It uses one device thread per element so maximum number of device threads limits the maximum number of elements this kernel can handle. Also the amount of shared memory and the maximum blocksize limits the maximum overlap that is possible to use, typically limits the overlap to < 256.
__global__ void skepu::MapOverlapKernel_CU_Matrix_Col | ( | OverlapFunc | mapOverlapFunc, |
T * | input, | ||
T * | output, | ||
T * | wrap, | ||
unsigned int | n, | ||
unsigned int | out_offset, | ||
unsigned int | out_numelements, | ||
T | pad, | ||
unsigned int | blocksPerCol, | ||
unsigned int | rowWidth, | ||
unsigned int | colWidth | ||
) |
OpenCL MapOverlap kernel for applying column-wise overlap to matrix elements.
__global__ void skepu::MapOverlapKernel_CU_Matrix_Row | ( | OverlapFunc | mapOverlapFunc, |
T * | input, | ||
T * | output, | ||
T * | wrap, | ||
unsigned int | n, | ||
unsigned int | out_offset, | ||
unsigned int | out_numelements, | ||
T | pad, | ||
unsigned int | blocksPerRow, | ||
unsigned int | rowWidth | ||
) |
OpenCL MapOverlap kernel for applying row-wise overlap to matrix elements.