The main nemaspace for SkePU library. More...

Classes
class	Generate
	A class representing the Generate skeleton. More...

class	Map
	A class representing the Map skeleton. More...

class	MapArray
	A class representing the MapArray skeleton. More...

class	MapOverlap
	A class representing the MapOverlap skeleton. More...

class	MapReduce
	A class representing the MapReduce skeleton. More...

class	Matrix
	A matrix container class (2D matrix), internally uses 1D container (std::vector). More...

class	Reduce
	A class representing the Reduce skeleton. More...

class	Scan
	A class representing the Scan skeleton. More...

class	SparseMatrix
	A sparse matrix container class that mainly stores its data in CSR format. More...

class	DataCollector2D
	A class that can be used to collect 2D data. More...

class	Device_CL
	A class representing an OpenCL device. More...

class	Device_CU
	A class representing a CUDA device. More...

class	DeviceMemPointer_CL
	A class representing an OpenCL device memory allocation. More...

class	DeviceMemPointer_CU
	A class representing a CUDA device memory allocation. More...

class	DeviceMemPointer_Matrix_CL
	A class representing an OpenCL device memory allocation for Matrix. More...

struct	openclGenProp

struct	openclDeviceProp

class	EnvironmentDestroyer

class	Environment
	A class representing a execution environment. More...

class	ExecPlan
	A class that describes an execution plan, not used very much in this case as decision is mostly left to StarPU. More...

class	Threads

class	ThreadPool
	to enable thread pooling while using multiple CUDA devices. ThreadPool class manages all the ThreadPool related activities. This includes keeping track of idle threads and ynchronizations between all threads. More...

class	TimerLinux_GTOD
	A class that can be used measure time on Linux systems. More...

class	Task
	A class representing a Task for the farm skeleton. More...

class	Vector
	A vector container class, implemented as a wrapper for std::vector. It is configured to use StarPU DSM as its backend, which means that it does not do lazy memory copying in this translation. More...

Enumerations
enum	OverlapPolicy

enum	EdgePolicy

enum	AccessType
	Can be used to specify whether the access is row-wise or column-wise. More...

enum	ScanType

enum	SparseFileFormat
	Can be used to specify the input format for a sparse matrix that is supplied in constructor. More...

Functions
void	join_farm_all ()

void	join_farm ()

template<typename Function1 , typename Function2 >
void	farm (Function1 f1, Function2 f2)

static std::string	GenerateKernel_CL ("__kernel void GenerateKernel_KERNELNAME(__global TYPE* output, unsigned int numElements, unsigned int offset, CONST_TYPE const1)\n""{\n"" output = (__global void )output + offset; / partitioning is special with opencl / \n"" unsigned int i = get_global_id(0);\n"" unsigned int gridSize = get_local_size(0)get_num_groups(0);\n"" while(i < numElements)\n"" {\n"" output[i] = FUNCTIONNAME(i+offset, const1);\n"" i += gridSize;\n"" }\n""}\n")

static std::string	GenerateKernel_CL_Matrix ("__kernel void GenerateKernel_Matrix_KERNELNAME(__global TYPE* output, unsigned int numElements, unsigned int xsize, unsigned int ysize, unsigned int offset, CONST_TYPE const1)\n""{\n"" output = (__global void )output + offset; / partitioning is special with opencl / \n"" int xindex = get_global_id(0);\n"" int yindex = get_global_id(1);\n"" int i = yindexxsize + xindex; \n"" if(i < numElements && xindex<xsize && yindex <ysize)\n"" {\n"" output[i] = FUNCTIONNAME(xindex, yindex+offset, const1);\n"" }\n""}\n")

template<typename T , typename GenerateFunc >
__global__ void	GenerateKernel_CU (GenerateFunc generateFunc, T *output, unsigned int numElements, unsigned int indexOffset)

template<typename T , typename GenerateFunc >
__global__ void	GenerateKernel_CU_Matrix (GenerateFunc generateFunc, T *output, unsigned int numElements, unsigned int xsize, unsigned int ysize, unsigned int yoffset)

const std::string	trimSpaces (const std::string &pString, const std::string &pWhitespace=" \t")

template<typename T >
T	get_random_number (T min, T max)

std::string	read_file_into_string (const std::string &filename)

void	toLowerCase (std::string &str)

void	toUpperCase (std::string &str)

bool	startsWith (const std::string &main, const std::string &prefix)

template<typename T >
T *	allocateHostMemory (unsigned int n)

template<typename T >
void	deallocateHostMemory (T *p)

static std::string	UnaryMapKernel_CL ("__kernel void UnaryMapKernel_KERNELNAME(__global TYPE* input, __global TYPE* output, unsigned int numElements, unsigned int offset1, unsigned int offset2, CONST_TYPE const1)\n""{\n"" input = (__global void )input + offset1; / partitioning is special with opencl / \n"" output = (__global void )output + offset2; /* partitioning is special with opencl / \n"" int i = get_global_id(0);\n"" unsigned int gridSize = get_local_size(0)get_num_groups(0);\n"" while(i < numElements)\n"" {\n"" output[i] = FUNCTIONNAME(input[i], const1);\n"" i += gridSize;\n"" }\n""}\n")

static std::string	BinaryMapKernel_CL ("__kernel void BinaryMapKernel_KERNELNAME(__global TYPE* input1, __global TYPE* input2, __global TYPE* output, unsigned int n, unsigned int offset1, unsigned int offset2, unsigned int offset3, CONST_TYPE const1)\n""{\n"" input1 = (__global void )input1 + offset1; / partitioning is special with opencl / \n"" input2 = (__global void )input2 + offset2; /* partitioning is special with opencl / \n"" output = (__global void )output + offset3; /* partitioning is special with opencl / \n"" int i = get_global_id(0);\n"" unsigned int gridSize = get_local_size(0)get_num_groups(0);\n"" while(i < n)\n"" {\n"" output[i] = FUNCTIONNAME(input1[i], input2[i], const1);\n"" i += gridSize;\n"" }\n""}\n")

static std::string	TrinaryMapKernel_CL ("__kernel void TrinaryMapKernel_KERNELNAME(__global TYPE* input1, __global TYPE* input2, __global TYPE* input3, __global TYPE* output, unsigned int n, unsigned int offset1, unsigned int offset2, unsigned int offset3, unsigned int offset4, CONST_TYPE const1)\n""{\n"" input1 = (__global void )input1 + offset1; / partitioning is special with opencl / \n"" input2 = (__global void )input2 + offset2; /* partitioning is special with opencl / \n"" input3 = (__global void )input3 + offset3; /* partitioning is special with opencl / \n"" output = (__global void )output + offset4; /* partitioning is special with opencl / \n"" int i = get_global_id(0);\n"" unsigned int gridSize = get_local_size(0)get_num_groups(0);\n"" while(i < n)\n"" {\n"" output[i] = FUNCTIONNAME(input1[i], input2[i], input3[i], const1);\n"" i += gridSize;\n"" }\n""}\n")

template<typename T , typename U , typename UnaryFunc >
__global__ void	MapKernelUnary_CU (UnaryFunc mapFunc, T input, T output, unsigned int n, U const1)

template<typename T , typename U , typename BinaryFunc >
__global__ void	MapKernelBinary_CU (BinaryFunc mapFunc, T input1, T input2, T *output, unsigned int n, U const1)

template<typename T , typename U , typename TrinaryFunc >
__global__ void	MapKernelTrinary_CU (TrinaryFunc mapFunc, T input1, T input2, T input3, T output, unsigned int n, U const1)

static std::string	MapArrayKernel_CL ("__kernel void MapArrayKernel_KERNELNAME(__global TYPE* input1, __global TYPE* input2, __global TYPE* output, unsigned int n)\n""{\n"" int i = get_global_id(0);\n"" unsigned int gridSize = get_local_size(0)*get_num_groups(0);\n"" while(i < n)\n"" {\n"" output[i] = FUNCTIONNAME(&input1[0], input2[i]);\n"" i += gridSize;\n"" }\n""}\n")

static std::string	MapArrayKernel_CL_Matrix ("__kernel void MapArrayKernel_Matrix_KERNELNAME(__global TYPE* input1, __global TYPE* input2, __global TYPE* output, unsigned int n, unsigned int xsize, unsigned int ysize, int yoffset, CONST_TYPE const1)\n""{\n"" int xindex = get_global_id(0);\n"" int yindex = get_global_id(1);\n"" int i = yindex*xsize + xindex; \n"" if(i < n && xindex<xsize && yindex <ysize)\n"" {\n"" output[i] = FUNCTIONNAME(&input1[0], input2[i], xindex, yindex+yoffset, const1);\n"" }\n""}\n")

static std::string	MapArrayKernel_CL_Matrix_Blockwise ("__kernel void MapArrayKernel_Matrix_Blockwise_KERNELNAME(__global TYPE* input1, __global TYPE* input2, __global TYPE* output, unsigned int outSize, int p2BlockSize, CONST_TYPE const1)\n""{\n"" int i = get_global_id(0);\n"" unsigned int gridSize = get_local_size(0)get_num_groups(0);\n"" if(i < outSize)\n"" {\n"" output[i] = FUNCTIONNAME(&input1[0], &input2[ip2BlockSize], const1);\n"" i += gridSize;\n"" }\n""}\n")

static std::string	MapArrayKernel_CL_Sparse_Matrix_Blockwise ("__kernel void MapArrayKernel_Sparse_Matrix_Blockwise_KERNELNAME(__global TYPE* input1, __global TYPE* in2_values, __global unsigned int in2_row_offsets, __global unsigned int in2_col_indices, __global TYPE* output, unsigned int outSize, int indexOffset, CONST_TYPE const1)\n""{\n"" int i = get_global_id(0);\n"" unsigned int gridSize = get_local_size(0)*get_num_groups(0);\n"" if(i < outSize)\n"" {\n"" int rowId = in2_row_offsets[i] - indexOffset;\n"" int row2Id = in2_row_offsets[i+1] - indexOffset;\n"" output[i] = FUNCTIONNAME(&input1[0], &in2_values[rowId], (row2Id-rowId), &in2_col_indices[rowId], const1);\n"" i += gridSize;\n"" }\n""}\n")

template<typename T , typename ArrayFunc >
__global__ void	MapArrayKernel_CU (ArrayFunc mapArrayFunc, T input1, T input2, T *output, unsigned int n)

template<typename T , typename ArrayFunc >
__global__ void	MapArrayKernel_CU_Matrix (ArrayFunc mapArrayFunc, T input1, T input2, T *output, unsigned int n, unsigned int xsize, unsigned int ysize, unsigned int yoffset)

template<typename T , typename ArrayFunc >
__global__ void	MapArrayKernel_CU_Matrix_Blockwise (ArrayFunc mapArrayFunc, T input1, T input2, T *output, unsigned int outSize, int p2BlockSize)

template<typename T , typename ArrayFunc >
__global__ void	MapArrayKernel_CU_Sparse_Matrix_Blockwise (ArrayFunc mapArrayFunc, T input1, T in2_values, unsigned int in2_row_offsets, unsigned int in2_col_indices, T *output, unsigned int outSize, int indexOffset)

static std::string	MapOverlapKernel_CL ("__kernel void MapOverlapKernel_KERNELNAME(__global TYPE* input, __global TYPE* output, __global TYPE* wrap, int n, int overlap, int out_offset, int out_numelements, int poly, TYPE pad, __local TYPE* sdata)\n""{\n"" int tid = get_local_id(0);\n"" int i = get_group_id(0) * get_local_size(0) + get_local_id(0);\n"" if(poly == 0)\n"" {\n"" sdata[overlap+tid] = (i < n) ? input[i] : pad;\n"" if(tid < overlap)\n"" {\n"" sdata[tid] = (get_group_id(0) == 0) ? pad : input[i-overlap];\n"" }\n"" if(tid >= (get_local_size(0)-overlap))\n"" {\n"" sdata[tid+2overlap] = (get_group_id(0) != get_num_groups(0)-1 && i+overlap < n) ? input[i+overlap] : pad;\n"" }\n"" }\n"" else if(poly == 1)\n"" {\n"" if(i < n)\n"" {\n"" sdata[overlap+tid] = input[i];\n"" }\n"" else if(i-n < overlap)\n"" {\n"" sdata[overlap+tid] = wrap[overlap+(i-n)];\n"" }\n"" else\n"" {\n"" sdata[overlap+tid] = pad;\n"" }\n"" if(tid < overlap)\n"" {\n"" sdata[tid] = (get_group_id(0) == 0) ? wrap[tid] : input[i-overlap];\n"" }\n"" if(tid >= (get_local_size(0)-overlap))\n"" {\n"" sdata[tid+2overlap] = (get_group_id(0) != get_num_groups(0)-1 && i+overlap < n) ? input[i+overlap] : wrap[overlap+(i+overlap-n)];\n"" }\n"" }\n"" else if(poly == 2)\n"" {\n"" sdata[overlap+tid] = (i < n) ? input[i] : input[n-1];\n"" if(tid < overlap)\n"" {\n"" sdata[tid] = (get_group_id(0) == 0) ? input[0] : input[i-overlap];\n"" }\n"" if(tid >= (get_local_size(0)-overlap))\n"" {\n"" sdata[tid+2*overlap] = (get_group_id(0) != get_num_groups(0)-1 && i+overlap < n) ? input[i+overlap] : input[n-1];\n"" }\n"" }\n"" barrier(CLK_LOCAL_MEM_FENCE);\n"" if( (i >= out_offset) && (i < out_offset+out_numelements) )\n"" {\n"" output[i-out_offset] = FUNCTIONNAME(&(sdata[tid+overlap]));\n"" }\n""}\n")

static std::string	MapOverlapKernel_CL_Matrix_Row ("__kernel void MapOverlapKernel_MatRowWise_KERNELNAME(__global TYPE* input, __global TYPE* output, __global TYPE* wrap, int n, int overlap, int out_offset, int out_numelements, int poly, TYPE pad, int blocksPerRow, int rowWidth, __local TYPE* sdata)\n""{\n"" int tid = get_local_id(0);\n"" int i = get_group_id(0) * get_local_size(0) + get_local_id(0);\n"" int wrapIndex= 2 * overlap * (int)(get_group_id(0)/blocksPerRow);\n"" int tmp= (get_group_id(0) % blocksPerRow);\n"" int tmp2= (get_group_id(0) / blocksPerRow);\n"" if(poly == 0)\n"" {\n"" sdata[overlap+tid] = (i < n) ? input[i] : pad;\n"" if(tid < overlap)\n"" {\n"" sdata[tid] = (tmp==0) ? pad : input[i-overlap];\n"" }\n"" if(tid >= (get_local_size(0)-overlap))\n"" {\n"" sdata[tid+2overlap] = (get_group_id(0) != (get_num_groups(0)-1) && (i+overlap < n) && tmp!=(blocksPerRow-1)) ? input[i+overlap] : pad;\n"" }\n"" }\n"" else if(poly == 1)\n"" {\n"" if(i < n)\n"" {\n"" sdata[overlap+tid] = input[i];\n"" }\n"" else if(i-n < overlap)\n"" {\n"" sdata[overlap+tid] = wrap[(overlap+(i-n))+ wrapIndex];\n"" }\n"" else\n"" {\n"" sdata[overlap+tid] = pad;\n"" }\n"" if(tid < overlap)\n"" {\n"" sdata[tid] = (tmp==0) ? wrap[tid+wrapIndex] : input[i-overlap];\n"" }\n"" if(tid >= (get_local_size(0)-overlap))\n"" {\n"" sdata[tid+2overlap] = (get_group_id(0) != (get_num_groups(0)-1) && i+overlap < n && tmp!=(blocksPerRow-1)) ? input[i+overlap] : wrap[overlap+wrapIndex+(tid+overlap-get_local_size(0))];\n"" }\n"" }\n"" else if(poly == 2)\n"" {\n"" sdata[overlap+tid] = (i < n) ? input[i] : input[n-1];\n"" if(tid < overlap)\n"" {\n"" sdata[tid] = (tmp==0) ? input[tmp2rowWidth] : input[i-overlap];\n"" }\n"" if(tid >= (get_local_size(0)-overlap))\n"" {\n"" sdata[tid+2overlap] = (get_group_id(0) != (get_num_groups(0)-1) && (i+overlap < n) && (tmp!=(blocksPerRow-1))) ? input[i+overlap] : input[(tmp2+1)*rowWidth-1];\n"" }\n"" }\n"" barrier(CLK_LOCAL_MEM_FENCE);\n"" if( (i >= out_offset) && (i < out_offset+out_numelements) )\n"" {\n"" output[i-out_offset] = FUNCTIONNAME(&(sdata[tid+overlap]));\n"" }\n""}\n")

static std::string	MapOverlapKernel_CL_Matrix_Col ("__kernel void MapOverlapKernel_MatColWise_KERNELNAME(__global TYPE* input, __global TYPE* output, __global TYPE* wrap, int n, int overlap, int out_offset, int out_numelements, int poly, TYPE pad, int blocksPerCol, int rowWidth, int colWidth, __local TYPE* sdata)\n""{\n"" int tid = get_local_id(0);\n"" int i = get_group_id(0) * get_local_size(0) + get_local_id(0);\n"" int wrapIndex= 2 * overlap * (int)(get_group_id(0)/blocksPerCol);\n"" int tmp= (get_group_id(0) % blocksPerCol);\n"" int tmp2= (get_group_id(0) / blocksPerCol);\n"" int arrInd = (tid + tmpget_local_size(0))rowWidth + tmp2;\n"" if(poly == 0)\n"" {\n"" sdata[overlap+tid] = (i < n) ? input[arrInd] : pad;\n"" if(tid < overlap)\n"" {\n"" sdata[tid] = (tmp==0) ? pad : input[(arrInd-(overlaprowWidth))];\n"" }\n"" if(tid >= (get_local_size(0)-overlap))\n"" {\n"" sdata[tid+2overlap] = (get_group_id(0) != (get_num_groups(0)-1) && (arrInd+(overlaprowWidth)) < n && (tmp!=(blocksPerCol-1))) ? input[(arrInd+(overlaprowWidth))] : pad;\n"" }\n"" }\n"" else if(poly == 1)\n"" {\n"" if(i < n)\n"" {\n"" sdata[overlap+tid] = input[arrInd];\n"" }\n"" else if(i-n < overlap)\n"" {\n"" sdata[overlap+tid] = wrap[(overlap+(i-n))+ wrapIndex];\n"" }\n"" else\n"" {\n"" sdata[overlap+tid] = pad;\n"" }\n"" if(tid < overlap)\n"" {\n"" sdata[tid] = (tmp==0) ? wrap[tid+wrapIndex] : input[(arrInd-(overlaprowWidth))];\n"" }\n"" if(tid >= (get_local_size(0)-overlap))\n"" {\n"" sdata[tid+2overlap] = (get_group_id(0) != (get_num_groups(0)-1) && (arrInd+(overlaprowWidth)) < n && (tmp!=(blocksPerCol-1))) ? input[(arrInd+(overlaprowWidth))] : wrap[overlap+wrapIndex+(tid+overlap-get_local_size(0))];\n"" }\n"" }\n"" else if(poly == 2)\n"" {\n"" sdata[overlap+tid] = (i < n) ? input[arrInd] : input[n-1];\n"" if(tid < overlap)\n"" {\n"" sdata[tid] = (tmp==0) ? input[tmp2] : input[(arrInd-(overlaprowWidth))];\n"" }\n"" if(tid >= (get_local_size(0)-overlap))\n"" {\n"" sdata[tid+2overlap] = (get_group_id(0) != (get_num_groups(0)-1) && (arrInd+(overlaprowWidth)) < n && (tmp!=(blocksPerCol-1))) ? input[(arrInd+(overlaprowWidth))] : input[tmp2+(colWidth-1)*rowWidth];\n"" }\n"" }\n"" barrier(CLK_LOCAL_MEM_FENCE);\n"" if( (i >= out_offset) && (i < out_offset+out_numelements) )\n"" {\n"" output[arrInd-out_offset] = FUNCTIONNAME(&(sdata[tid+overlap]));\n"" }\n""}\n")

template<int poly, typename T , typename OverlapFunc >
__global__ void	MapOverlapKernel_CU (OverlapFunc mapOverlapFunc, T input, T output, T *wrap, unsigned int n, unsigned int out_offset, unsigned int out_numelements, T pad)

template<int poly, typename T , typename OverlapFunc >
__global__ void	MapOverlapKernel_CU_Matrix_Row (OverlapFunc mapOverlapFunc, T input, T output, T *wrap, unsigned int n, unsigned int out_offset, unsigned int out_numelements, T pad, unsigned int blocksPerRow, unsigned int rowWidth)

template<int poly, typename T , typename OverlapFunc >
__global__ void	MapOverlapKernel_CU_Matrix_Col (OverlapFunc mapOverlapFunc, T input, T output, T *wrap, unsigned int n, unsigned int out_offset, unsigned int out_numelements, T pad, unsigned int blocksPerCol, unsigned int rowWidth, unsigned int colWidth)

static std::string	UnaryMapReduceKernel_CL ("__kernel void UnaryMapReduceKernel_KERNELNAME(__global TYPE* input, __global TYPE* output, unsigned int n, __local TYPE* sdata, TYPE const1)\n""{\n"" unsigned int blockSize = get_local_size(0);\n"" unsigned int tid = get_local_id(0);\n"" unsigned int i = get_group_id(0)blockSize + get_local_id(0);\n"" unsigned int gridSize = blockSizeget_num_groups(0);\n"" TYPE result = 0;\n"" if(i < n)\n"" {\n"" result = FUNCTIONNAME_MAP(input[i], const1);\n"" i += gridSize;\n"" }\n"" while(i < n)\n"" {\n"" TYPE tempMap;\n"" tempMap = FUNCTIONNAME_MAP(input[i], const1);\n"" result = FUNCTIONNAME_REDUCE(result, tempMap, (TYPE)0);\n"" i += gridSize;\n"" }\n"" sdata[tid] = result;\n"" barrier(CLK_LOCAL_MEM_FENCE);\n"" if(blockSize >= 512) { if (tid < 256 && tid + 256 < n) { sdata[tid] = FUNCTIONNAME_REDUCE(sdata[tid], sdata[tid + 256], (TYPE)0); } barrier(CLK_LOCAL_MEM_FENCE); }\n"" if(blockSize >= 256) { if (tid < 128 && tid + 128 < n) { sdata[tid] = FUNCTIONNAME_REDUCE(sdata[tid], sdata[tid + 128], (TYPE)0); } barrier(CLK_LOCAL_MEM_FENCE); }\n"" if(blockSize >= 128) { if (tid < 64 && tid + 64 < n) { sdata[tid] = FUNCTIONNAME_REDUCE(sdata[tid], sdata[tid + 64], (TYPE)0); } barrier(CLK_LOCAL_MEM_FENCE); }\n"" if(blockSize >= 64) { if (tid < 32 && tid + 32 < n) { sdata[tid] = FUNCTIONNAME_REDUCE(sdata[tid], sdata[tid + 32], (TYPE)0); } barrier(CLK_LOCAL_MEM_FENCE); }\n"" if(blockSize >= 32) { if (tid < 16 && tid + 16 < n) { sdata[tid] = FUNCTIONNAME_REDUCE(sdata[tid], sdata[tid + 16], (TYPE)0); } barrier(CLK_LOCAL_MEM_FENCE); }\n"" if(blockSize >= 16) { if (tid < 8 && tid + 8 < n) { sdata[tid] = FUNCTIONNAME_REDUCE(sdata[tid], sdata[tid + 8], (TYPE)0); } barrier(CLK_LOCAL_MEM_FENCE); }\n"" if(blockSize >= 8) { if (tid < 4 && tid + 4 < n) { sdata[tid] = FUNCTIONNAME_REDUCE(sdata[tid], sdata[tid + 4], (TYPE)0); } barrier(CLK_LOCAL_MEM_FENCE); }\n"" if(blockSize >= 4) { if (tid < 2 && tid + 2 < n) { sdata[tid] = FUNCTIONNAME_REDUCE(sdata[tid], sdata[tid + 2], (TYPE)0); } barrier(CLK_LOCAL_MEM_FENCE); }\n"" if(blockSize >= 2) { if (tid < 1 && tid + 1 < n) { sdata[tid] = FUNCTIONNAME_REDUCE(sdata[tid], sdata[tid + 1], (TYPE)0); } barrier(CLK_LOCAL_MEM_FENCE); }\n"" if(tid == 0)\n"" {\n"" output[get_group_id(0)] = sdata[tid];\n"" }\n""}\n")

static std::string	BinaryMapReduceKernel_CL ("__kernel void BinaryMapReduceKernel_KERNELNAME(__global TYPE* input1, __global TYPE* input2, __global TYPE* output, unsigned int n, __local TYPE* sdata, TYPE const1)\n""{\n"" unsigned int blockSize = get_local_size(0);\n"" unsigned int tid = get_local_id(0);\n"" unsigned int i = get_group_id(0)blockSize + get_local_id(0);\n"" unsigned int gridSize = blockSizeget_num_groups(0);\n"" TYPE result = 0;\n"" if(i < n)\n"" {\n"" result = FUNCTIONNAME_MAP(input1[i], input2[i], const1);\n"" i += gridSize;\n"" }\n"" while(i < n)\n"" {\n"" TYPE tempMap;\n"" tempMap = FUNCTIONNAME_MAP(input1[i], input2[i], const1);\n"" result = FUNCTIONNAME_REDUCE(result, tempMap, (TYPE)0);\n"" i += gridSize;\n"" }\n"" sdata[tid] = result;\n"" barrier(CLK_LOCAL_MEM_FENCE);\n"" if(blockSize >= 512) { if (tid < 256 && tid + 256 < n) { sdata[tid] = FUNCTIONNAME_REDUCE(sdata[tid], sdata[tid + 256], (TYPE)0); } barrier(CLK_LOCAL_MEM_FENCE); }\n"" if(blockSize >= 256) { if (tid < 128 && tid + 128 < n) { sdata[tid] = FUNCTIONNAME_REDUCE(sdata[tid], sdata[tid + 128], (TYPE)0); } barrier(CLK_LOCAL_MEM_FENCE); }\n"" if(blockSize >= 128) { if (tid < 64 && tid + 64 < n) { sdata[tid] = FUNCTIONNAME_REDUCE(sdata[tid], sdata[tid + 64], (TYPE)0); } barrier(CLK_LOCAL_MEM_FENCE); }\n"" if(blockSize >= 64) { if (tid < 32 && tid + 32 < n) { sdata[tid] = FUNCTIONNAME_REDUCE(sdata[tid], sdata[tid + 32], (TYPE)0); } barrier(CLK_LOCAL_MEM_FENCE); }\n"" if(blockSize >= 32) { if (tid < 16 && tid + 16 < n) { sdata[tid] = FUNCTIONNAME_REDUCE(sdata[tid], sdata[tid + 16], (TYPE)0); } barrier(CLK_LOCAL_MEM_FENCE); }\n"" if(blockSize >= 16) { if (tid < 8 && tid + 8 < n) { sdata[tid] = FUNCTIONNAME_REDUCE(sdata[tid], sdata[tid + 8], (TYPE)0); } barrier(CLK_LOCAL_MEM_FENCE); }\n"" if(blockSize >= 8) { if (tid < 4 && tid + 4 < n) { sdata[tid] = FUNCTIONNAME_REDUCE(sdata[tid], sdata[tid + 4], (TYPE)0); } barrier(CLK_LOCAL_MEM_FENCE); }\n"" if(blockSize >= 4) { if (tid < 2 && tid + 2 < n) { sdata[tid] = FUNCTIONNAME_REDUCE(sdata[tid], sdata[tid + 2], (TYPE)0); } barrier(CLK_LOCAL_MEM_FENCE); }\n"" if(blockSize >= 2) { if (tid < 1 && tid + 1 < n) { sdata[tid] = FUNCTIONNAME_REDUCE(sdata[tid], sdata[tid + 1], (TYPE)0); } barrier(CLK_LOCAL_MEM_FENCE); }\n"" if(tid == 0)\n"" {\n"" output[get_group_id(0)] = sdata[tid];\n"" }\n""}\n")

static std::string	TrinaryMapReduceKernel_CL ("__kernel void TrinaryMapReduceKernel_KERNELNAME(__global TYPE* input1, __global TYPE* input2, __global TYPE* input3, __global TYPE* output, unsigned int n, __local TYPE* sdata, TYPE const1)\n""{\n"" unsigned int blockSize = get_local_size(0);\n"" unsigned int tid = get_local_id(0);\n"" unsigned int i = get_group_id(0)blockSize + get_local_id(0);\n"" unsigned int gridSize = blockSizeget_num_groups(0);\n"" TYPE result = 0;\n"" if(i < n)\n"" {\n"" result = FUNCTIONNAME_MAP(input1[i], input2[i], input3[i], const1);\n"" i += gridSize;\n"" }\n"" while(i < n)\n"" {\n"" TYPE tempMap;\n"" tempMap = FUNCTIONNAME_MAP(input1[i], input2[i], input3[i], const1);\n"" result = FUNCTIONNAME_REDUCE(result, tempMap, (TYPE)0);\n"" i += gridSize;\n"" }\n"" sdata[tid] = result;\n"" barrier(CLK_LOCAL_MEM_FENCE);\n"" if(blockSize >= 512) { if (tid < 256 && tid + 256 < n) { sdata[tid] = FUNCTIONNAME_REDUCE(sdata[tid], sdata[tid + 256], (TYPE)0); } barrier(CLK_LOCAL_MEM_FENCE); }\n"" if(blockSize >= 256) { if (tid < 128 && tid + 128 < n) { sdata[tid] = FUNCTIONNAME_REDUCE(sdata[tid], sdata[tid + 128], (TYPE)0); } barrier(CLK_LOCAL_MEM_FENCE); }\n"" if(blockSize >= 128) { if (tid < 64 && tid + 64 < n) { sdata[tid] = FUNCTIONNAME_REDUCE(sdata[tid], sdata[tid + 64], (TYPE)0); } barrier(CLK_LOCAL_MEM_FENCE); }\n"" if(blockSize >= 64) { if (tid < 32 && tid + 32 < n) { sdata[tid] = FUNCTIONNAME_REDUCE(sdata[tid], sdata[tid + 32], (TYPE)0); } barrier(CLK_LOCAL_MEM_FENCE); }\n"" if(blockSize >= 32) { if (tid < 16 && tid + 16 < n) { sdata[tid] = FUNCTIONNAME_REDUCE(sdata[tid], sdata[tid + 16], (TYPE)0); } barrier(CLK_LOCAL_MEM_FENCE); }\n"" if(blockSize >= 16) { if (tid < 8 && tid + 8 < n) { sdata[tid] = FUNCTIONNAME_REDUCE(sdata[tid], sdata[tid + 8], (TYPE)0); } barrier(CLK_LOCAL_MEM_FENCE); }\n"" if(blockSize >= 8) { if (tid < 4 && tid + 4 < n) { sdata[tid] = FUNCTIONNAME_REDUCE(sdata[tid], sdata[tid + 4], (TYPE)0); } barrier(CLK_LOCAL_MEM_FENCE); }\n"" if(blockSize >= 4) { if (tid < 2 && tid + 2 < n) { sdata[tid] = FUNCTIONNAME_REDUCE(sdata[tid], sdata[tid + 2], (TYPE)0); } barrier(CLK_LOCAL_MEM_FENCE); }\n"" if(blockSize >= 2) { if (tid < 1 && tid + 1 < n) { sdata[tid] = FUNCTIONNAME_REDUCE(sdata[tid], sdata[tid + 1], (TYPE)0); } barrier(CLK_LOCAL_MEM_FENCE); }\n"" if(tid == 0)\n"" {\n"" output[get_group_id(0)] = sdata[tid];\n"" }\n""}\n")

template<typename T , typename UnaryFunc , typename BinaryFunc >
__global__ void	MapReduceKernel1_CU (UnaryFunc mapFunc, BinaryFunc reduceFunc, T input, T output, unsigned int n)

template<typename T , typename BinaryFunc1 , typename BinaryFunc2 >
__global__ void	MapReduceKernel2_CU (BinaryFunc1 mapFunc, BinaryFunc2 reduceFunc, T input1, T input2, T *output, unsigned int n)

template<typename T , typename TrinaryFunc , typename BinaryFunc >
__global__ void	MapReduceKernel3_CU (TrinaryFunc mapFunc, BinaryFunc reduceFunc, T input1, T input2, T input3, T output, unsigned int n)

static std::string	ReduceKernel_CL ("__kernel void ReduceKernel_KERNELNAME(__global TYPE* input, __global TYPE* output, unsigned int n, __local TYPE* sdata)\n""{\n"" unsigned int blockSize = get_local_size(0);\n"" unsigned int tid = get_local_id(0);\n"" unsigned int i = get_group_id(0)blockSize + get_local_id(0);\n"" unsigned int gridSize = blockSizeget_num_groups(0);\n"" TYPE result = 0;\n"" if(i < n)\n"" {\n"" result = input[i];\n"" i += gridSize;\n"" }\n"" while(i < n)\n"" {\n"" result = FUNCTIONNAME(result, input[i], (TYPE)0);\n"" i += gridSize;\n"" }\n"" sdata[tid] = result;\n"" barrier(CLK_LOCAL_MEM_FENCE);\n"" if(blockSize >= 512) { if (tid < 256 && tid + 256 < n) { sdata[tid] = FUNCTIONNAME(sdata[tid], sdata[tid + 256], (TYPE)0); } barrier(CLK_LOCAL_MEM_FENCE); }\n"" if(blockSize >= 256) { if (tid < 128 && tid + 128 < n) { sdata[tid] = FUNCTIONNAME(sdata[tid], sdata[tid + 128], (TYPE)0); } barrier(CLK_LOCAL_MEM_FENCE); }\n"" if(blockSize >= 128) { if (tid < 64 && tid + 64 < n) { sdata[tid] = FUNCTIONNAME(sdata[tid], sdata[tid + 64], (TYPE)0); } barrier(CLK_LOCAL_MEM_FENCE); }\n"" if(blockSize >= 64) { if (tid < 32 && tid + 32 < n) { sdata[tid] = FUNCTIONNAME(sdata[tid], sdata[tid + 32], (TYPE)0); } barrier(CLK_LOCAL_MEM_FENCE); }\n"" if(blockSize >= 32) { if (tid < 16 && tid + 16 < n) { sdata[tid] = FUNCTIONNAME(sdata[tid], sdata[tid + 16], (TYPE)0); } barrier(CLK_LOCAL_MEM_FENCE); }\n"" if(blockSize >= 16) { if (tid < 8 && tid + 8 < n) { sdata[tid] = FUNCTIONNAME(sdata[tid], sdata[tid + 8], (TYPE)0); } barrier(CLK_LOCAL_MEM_FENCE); }\n"" if(blockSize >= 8) { if (tid < 4 && tid + 4 < n) { sdata[tid] = FUNCTIONNAME(sdata[tid], sdata[tid + 4], (TYPE)0); } barrier(CLK_LOCAL_MEM_FENCE); }\n"" if(blockSize >= 4) { if (tid < 2 && tid + 2 < n) { sdata[tid] = FUNCTIONNAME(sdata[tid], sdata[tid + 2], (TYPE)0); } barrier(CLK_LOCAL_MEM_FENCE); }\n"" if(blockSize >= 2) { if (tid < 1 && tid + 1 < n) { sdata[tid] = FUNCTIONNAME(sdata[tid], sdata[tid + 1], (TYPE)0); } barrier(CLK_LOCAL_MEM_FENCE); }\n"" if(tid == 0)\n"" {\n"" output[get_group_id(0)] = sdata[tid];\n"" }\n""}\n")

template<typename T , typename BinaryFunc >
__global__ void	ReduceKernel_CU (BinaryFunc reduceFunc, T input, T output, unsigned int n)

static std::string	ScanKernel_CL ("__kernel void ScanKernel_KERNELNAME(__global TYPE* input, __global TYPE* output, __global TYPE* blockSums, unsigned int n, unsigned int numElements, __local TYPE* sdata)\n""{\n"" unsigned int threadIdx = get_local_id(0);\n"" unsigned int blockDim = get_local_size(0);\n"" unsigned int blockIdx = get_group_id(0);\n"" unsigned int gridDim = get_num_groups(0);\n"" int thid = threadIdx;\n"" int pout = 0;\n"" int pin = 1;\n"" int mem = get_global_id(0);\n"" int blockNr = blockIdx;\n"" unsigned int gridSize = blockDimgridDim;\n"" unsigned int numBlocks = numElements/(blockDim) + (numElements%(blockDim) == 0 ? 0:1);\n"" int offset;\n"" while(blockNr < numBlocks)\n"" {\n"" sdata[poutn+thid] = (mem < numElements) ? input[mem] : 0;\n"" barrier(CLK_LOCAL_MEM_FENCE);\n"" for(offset = 1; offset < n; offset =2)\n"" {\n"" pout = 1-pout;\n"" pin = 1-pout;\n"" if(thid >= offset)\n"" sdata[poutn+thid] = FUNCTIONNAME(sdata[pinn+thid], sdata[pinn+thid-offset], (TYPE)0);\n"" else\n"" sdata[poutn+thid] = sdata[pinn+thid];\n"" barrier(CLK_LOCAL_MEM_FENCE);\n"" }\n"" if(thid == blockDim - 1)\n"" blockSums[blockNr] = sdata[poutn+blockDim-1];\n"" if(mem < numElements)\n"" output[mem] = sdata[poutn+thid];\n"" mem += gridSize;\n"" blockNr += gridDim;\n"" barrier(CLK_LOCAL_MEM_FENCE);\n"" }\n""}\n")

static std::string	ScanUpdate_CL ("__kernel void ScanUpdate_KERNELNAME(__global TYPE* data, __global TYPE* sums, int isInclusive, TYPE init, int n, __global TYPE* ret, __local TYPE* sdata)\n""{\n"" __local TYPE offset;\n"" __local TYPE inc_offset;\n"" unsigned int threadIdx = get_local_id(0);\n"" unsigned int blockDim = get_local_size(0);\n"" unsigned int blockIdx = get_group_id(0);\n"" unsigned int gridDim = get_num_groups(0);\n"" int thid = threadIdx;\n"" int blockNr = blockIdx;\n"" unsigned int gridSize = blockDimgridDim;\n"" int mem = get_global_id(0);\n"" unsigned int numBlocks = n/(blockDim) + (n%(blockDim) == 0 ? 0:1);\n"" while(blockNr < numBlocks)\n"" {\n"" if(thid == 0)\n"" {\n"" if(isInclusive == 0)\n"" {\n"" offset = init;\n"" if(blockNr > 0)\n"" {\n"" offset = FUNCTIONNAME(offset, sums[blockNr-1], (TYPE)0);\n"" inc_offset = sums[blockNr-1];\n"" }\n"" }\n"" else\n"" {\n"" if(blockNr > 0)\n"" offset = sums[blockNr-1];\n"" }\n"" }\n"" barrier(CLK_LOCAL_MEM_FENCE);\n"" if(isInclusive == 1)\n"" {\n"" if(blockNr > 0)\n"" sdata[thid] = (mem < n) ? FUNCTIONNAME(offset, data[mem], (TYPE)0) : 0;\n"" else\n"" sdata[thid] = (mem < n) ? data[mem] : 0;\n"" if(mem == n-1)\n"" ret = sdata[thid];\n"" }\n"" else\n"" {\n"" if(mem == n-1)\n"" *ret = FUNCTIONNAME(inc_offset, data[mem], (TYPE)0);\n"" if(thid == 0)\n"" sdata[thid] = offset;\n"" else\n"" sdata[thid] = (mem-1 < n) ? FUNCTIONNAME(offset, data[mem-1], (TYPE)0) : 0;\n"" }\n"" barrier(CLK_LOCAL_MEM_FENCE);\n"" if(mem < n)\n"" data[mem] = sdata[thid];\n"" mem += gridSize;\n"" blockNr += gridDim;\n"" barrier(CLK_LOCAL_MEM_FENCE);\n"" }\n""}\n")

static std::string	ScanAdd_CL ("__kernel void ScanAdd_KERNELNAME(__global TYPE* data, TYPE sum, int n)\n""{\n"" int i = get_global_id(0);\n"" unsigned int gridSize = get_local_size(0)*get_num_groups(0);\n"" while(i < n)\n"" {\n"" data[i] = FUNCTIONNAME(data[i], sum, (TYPE)0);\n"" i += gridSize;\n"" }\n""}\n")

template<typename T , typename BinaryFunc >
__global__ void	ScanKernel_CU (BinaryFunc scanFunc, T input, T output, T *blockSums, unsigned int n, unsigned int numElements)

template<typename T , typename BinaryFunc >
__global__ void	ScanUpdate_CU (BinaryFunc scanFunc, T data, T sums, int isInclusive, T init, int n, T *ret)

template<typename T , typename BinaryFunc >
__global__ void	ScanAdd_CU (BinaryFunc scanFunc, T *data, T sum, int n)

template<typename T >
void	ExecuteReduceOnADevice (unsigned int n, const size_t &numThreads, const size_t &numBlocks, _cl_mem &in_p, _cl_mem &out_p, cl_kernel &kernel, Device_CL *device)

void	replaceTextInString (std::string &text, std::string find, std::string replace)

Detailed Description

The main nemaspace for SkePU library.

All classes and functions in the SkePU library are in this namespace.

Enumeration Type Documentation

enum skepu::AccessType

Can be used to specify whether the access is row-wise or column-wise.

Used in some cases to mention type of access required in a certain operation.

enum skepu::EdgePolicy

Enumeration of the different edge policies (what happens when a read outside the vector is performed) that the map overlap skeletons support.

enum skepu::OverlapPolicy

Enumeration of the different overlap patterns for Mapoverlap for Matrix container.

enum skepu::ScanType

Enumeration of the two types of Scan that can be performed: Inclusive and Exclusive.

enum skepu::SparseFileFormat

Can be used to specify the input format for a sparse matrix that is supplied in constructor.

Used to load sparse matrix from existing files.

Function Documentation

template<typename T >

T* skepu::allocateHostMemory ( unsigned int n )

Method to allocate host memory of a given size. Can do pinned memory allocation if enabled.

template<typename T >

void skepu::deallocateHostMemory ( T * p )

Method to deallocate host memory.

template<typename T >

void skepu::ExecuteReduceOnADevice	(	unsigned int	n,
		const size_t &	numThreads,
		const size_t &	numBlocks,
		_cl_mem *&	in_p,
		_cl_mem *&	out_p,
		cl_kernel &	kernel,
		Device_CL *	device
	)

A helper function that is used to call the actual kernel for reduction. Used by other functions to call the actual kernel Internally, it just calls 2 kernels by setting their arguments. No synchronization is enforced.

Parameters

n	size of the input array to be reduced.
numThreads	Number of threads to be used for kernel execution.
numBlocks	Number of blocks to be used for kernel execution.
in_p	OpenCL memory pointer to input array.
out_p	OpenCL memory pointer to output array.
kernel	OpenCL kernel handle.
device	OpenCL device handle.

References skepu::Device_CL::getQueue().

Here is the call graph for this function:

template<typename Function1 , typename Function2 >

void skepu::farm	(	Function1 *	f1,
		Function2 *	f2
	)

binary compose, run functions asynchronously

template<typename T >

T skepu::get_random_number	(	T	min,
		T	max
	)

inline

Method to get a random number between a given range.

void skepu::join_farm ( )

if you set useTagID to true before calling farm, then tasks will not be automaitcally joined at then end of farm. In this case, you need to explicitly call this method which will just ensure completion of spawned tasks by all previous farm calls.

void skepu::join_farm_all ( )

if you set useTagID to true before calling farm, then tasks will not be automaitcally joined at then end of farm. In this case, you need to explicitly call this method which will just ensure completion of all previpously created tasks.

template<typename T , typename U , typename BinaryFunc >

__global__ void skepu::MapKernelBinary_CU	(	BinaryFunc	mapFunc,
		T *	input1,
		T *	input2,
		T *	output,
		unsigned int	n,
		U	const1
	)

CUDA Map kernel for binary user functions.

template<typename T , typename U , typename TrinaryFunc >

__global__ void skepu::MapKernelTrinary_CU	(	TrinaryFunc	mapFunc,
		T *	input1,
		T *	input2,
		T *	input3,
		T *	output,
		unsigned int	n,
		U	const1
	)

CUDA Map kernel for ternary user functions.

static std::string skepu::MapOverlapKernel_CL_Matrix_Col ( "__kernel void MapOverlapKernel_MatColWise_KERNELNAME(__global TYPE* input, __global TYPE* output, __global TYPE* wrap, int n, int overlap, int out_offset, int out_numelements, int poly, TYPE pad, int blocksPerCol, int rowWidth, int colWidth, __local TYPE* sdata)\n""{\n"" int tid = get_local_id(0);\n"" int i = get_group_id(0) * get_local_size(0) + get_local_id(0);\n"" int wrapIndex= 2 * overlap * (int)(get_group_id(0)/blocksPerCol);\n"" int tmp= (get_group_id(0) % blocksPerCol);\n"" int tmp2= (get_group_id(0) / blocksPerCol);\n"" int arrInd = (tid + tmp*get_local_size(0))*rowWidth + tmp2;\n"" if(poly == 0)\n"" {\n"" sdata[overlap+tid] = (i < n) ? input[arrInd] : pad;\n"" if(tid < overlap)\n"" {\n"" sdata[tid] = (tmp==0) ? pad : input[(arrInd-(overlap*rowWidth))];\n"" }\n"" if(tid >= (get_local_size(0)-overlap))\n"" {\n"" sdata[tid+2*overlap] = (get_group_id(0) != (get_num_groups(0)-1) && (arrInd+(overlap*rowWidth)) < n && (tmp!=(blocksPerCol-1))) ? input[(arrInd+(overlap*rowWidth))] : pad;\n"" }\n"" }\n"" else if(poly == 1)\n"" {\n"" if(i < n)\n"" {\n"" sdata[overlap+tid] = input[arrInd];\n"" }\n"" else if(i-n < overlap)\n"" {\n"" sdata[overlap+tid] = wrap[(overlap+(i-n))+ wrapIndex];\n"" }\n"" else\n"" {\n"" sdata[overlap+tid] = pad;\n"" }\n"" if(tid < overlap)\n"" {\n"" sdata[tid] = (tmp==0) ? wrap[tid+wrapIndex] : input[(arrInd-(overlap*rowWidth))];\n"" }\n"" if(tid >= (get_local_size(0)-overlap))\n"" {\n"" sdata[tid+2*overlap] = (get_group_id(0) != (get_num_groups(0)-1) && (arrInd+(overlap*rowWidth)) < n && (tmp!=(blocksPerCol-1))) ? input[(arrInd+(overlap*rowWidth))] : wrap[overlap+wrapIndex+(tid+overlap-get_local_size(0))];\n"" }\n"" }\n"" else if(poly == 2)\n"" {\n"" sdata[overlap+tid] = (i < n) ? input[arrInd] : input[n-1];\n"" if(tid < overlap)\n"" {\n"" sdata[tid] = (tmp==0) ? input[tmp2] : input[(arrInd-(overlap*rowWidth))];\n"" }\n"" if(tid >= (get_local_size(0)-overlap))\n"" {\n"" sdata[tid+2*overlap] = (get_group_id(0) != (get_num_groups(0)-1) && (arrInd+(overlap*rowWidth)) < n && (tmp!=(blocksPerCol-1))) ? input[(arrInd+(overlap*rowWidth))] : input[tmp2+(colWidth-1)*rowWidth];\n"" }\n"" }\n"" barrier(CLK_LOCAL_MEM_FENCE);\n"" if( (i >= out_offset) && (i < out_offset+out_numelements) )\n"" {\n"" output[arrInd-out_offset] = FUNCTIONNAME(&(sdata[tid+overlap]));\n"" }\n""}\n" )

static

OpenCL MapOverlap kernel for applying column-wise overlap to matrix elements.

static std::string skepu::MapOverlapKernel_CL_Matrix_Row ( "__kernel void MapOverlapKernel_MatRowWise_KERNELNAME(__global TYPE* input, __global TYPE* output, __global TYPE* wrap, int n, int overlap, int out_offset, int out_numelements, int poly, TYPE pad, int blocksPerRow, int rowWidth, __local TYPE* sdata)\n""{\n"" int tid = get_local_id(0);\n"" int i = get_group_id(0) * get_local_size(0) + get_local_id(0);\n"" int wrapIndex= 2 * overlap * (int)(get_group_id(0)/blocksPerRow);\n"" int tmp= (get_group_id(0) % blocksPerRow);\n"" int tmp2= (get_group_id(0) / blocksPerRow);\n"" if(poly == 0)\n"" {\n"" sdata[overlap+tid] = (i < n) ? input[i] : pad;\n"" if(tid < overlap)\n"" {\n"" sdata[tid] = (tmp==0) ? pad : input[i-overlap];\n"" }\n"" if(tid >= (get_local_size(0)-overlap))\n"" {\n"" sdata[tid+2*overlap] = (get_group_id(0) != (get_num_groups(0)-1) && (i+overlap < n) && tmp!=(blocksPerRow-1)) ? input[i+overlap] : pad;\n"" }\n"" }\n"" else if(poly == 1)\n"" {\n"" if(i < n)\n"" {\n"" sdata[overlap+tid] = input[i];\n"" }\n"" else if(i-n < overlap)\n"" {\n"" sdata[overlap+tid] = wrap[(overlap+(i-n))+ wrapIndex];\n"" }\n"" else\n"" {\n"" sdata[overlap+tid] = pad;\n"" }\n"" if(tid < overlap)\n"" {\n"" sdata[tid] = (tmp==0) ? wrap[tid+wrapIndex] : input[i-overlap];\n"" }\n"" if(tid >= (get_local_size(0)-overlap))\n"" {\n"" sdata[tid+2*overlap] = (get_group_id(0) != (get_num_groups(0)-1) && i+overlap < n && tmp!=(blocksPerRow-1)) ? input[i+overlap] : wrap[overlap+wrapIndex+(tid+overlap-get_local_size(0))];\n"" }\n"" }\n"" else if(poly == 2)\n"" {\n"" sdata[overlap+tid] = (i < n) ? input[i] : input[n-1];\n"" if(tid < overlap)\n"" {\n"" sdata[tid] = (tmp==0) ? input[tmp2*rowWidth] : input[i-overlap];\n"" }\n"" if(tid >= (get_local_size(0)-overlap))\n"" {\n"" sdata[tid+2*overlap] = (get_group_id(0) != (get_num_groups(0)-1) && (i+overlap < n) && (tmp!=(blocksPerRow-1))) ? input[i+overlap] : input[(tmp2+1)*rowWidth-1];\n"" }\n"" }\n"" barrier(CLK_LOCAL_MEM_FENCE);\n"" if( (i >= out_offset) && (i < out_offset+out_numelements) )\n"" {\n"" output[i-out_offset] = FUNCTIONNAME(&(sdata[tid+overlap]));\n"" }\n""}\n" )

static

OpenCL MapOverlap kernel for applying row-wise overlap to matrix elements.

std::string skepu::read_file_into_string ( const std::string & filename )

Method to read text file data into a string.

void skepu::replaceTextInString	(	std::string &	text,
		std::string	find,
		std::string	replace
	)

A helper function used by createOpenCLProgram(). It finds all instances of a string in another string and replaces it with a third string.

Parameters

text	A `std::string` which is searched.
find	The `std::string` which is searched for and replaced.
replace	The relpacement `std::string`.

bool skepu::startsWith	(	const std::string &	main,
		const std::string &	prefix
	)

Method to check whether a string starts with a given pattern.

void skepu::toLowerCase ( std::string & str )

Method to convert to lower case.

void skepu::toUpperCase ( std::string & str )

Method to convert to upper case.

const std::string skepu::trimSpaces	(	const std::string &	pString,
		const std::string &	pWhitespace = `" \t"`
	)

Method to remove leading and trailing spaces from a string.

Classes

Enumerations

Functions

Detailed Description

Enumeration Type Documentation

Function Documentation