5 #ifndef SKEPU_CUDA_HELPER_H
6 #define SKEPU_CUDA_HELPER_H
20 #define MIN(a,b) ((a < b) ? a : b)
24 #define MAX(a,b) ((a > b) ? a : b)
31 # ifdef _DEBUG // Do this only in debug mode...
32 inline void VSPrintf(FILE *file, LPCSTR fmt, ...)
34 size_t fmt2_sz = 2048;
35 char *fmt2 = (
char*)malloc(fmt2_sz);
38 while((_vsnprintf(fmt2, fmt2_sz, fmt, vlist)) < 0)
42 fmt2 = (
char*)malloc(fmt2_sz);
44 OutputDebugStringA(fmt2);
48 # define FPRINTF(a) VSPrintf a
50 # define FPRINTF(a) fprintf a
55 # define FPRINTF(a) fprintf a
58 # define FPRINTF(a) fprintf a
63 void copyDeviceToHost(T *hostPtr, cl_mem devPtr,
int numElements, Device_CL* device,
int offset)
65 if(devPtr != NULL && hostPtr != NULL)
67 DEBUG_TEXT_LEVEL2(
"** DEVICE_TO_HOST OpenCL: "<< numElements <<
"!!!\n")
73 sizeVec = numElements*sizeof(T);
75 err = clEnqueueReadBuffer(device->getQueue(), devPtr, CL_TRUE, offset, sizeVec, (
void*)hostPtr, 0, NULL, NULL);
77 if(err != CL_SUCCESS){FPRINTF((stderr,
"Error copying data from device\n"));}
84 void copyHostToDevice(T *hostPtr, cl_mem devPtr,
int numElements, Device_CL* device,
int offset)
86 if(hostPtr != NULL && devPtr != NULL)
88 DEBUG_TEXT_LEVEL2(
"** HOST_TO_DEVICE OpenCL: "<< numElements <<
"!!!\n")
94 sizeVec = numElements*sizeof(T);
96 err = clEnqueueWriteBuffer(device->getQueue(), devPtr, CL_TRUE, offset, sizeVec, (
void*)hostPtr, 0, NULL, NULL);
98 if(err != CL_SUCCESS){FPRINTF((stderr,
"Error copying data to device\n"));}
103 template <
typename T>
104 inline cl_mem allocateOpenCLMemory(
unsigned int size, Device_CL* device)
106 DEBUG_TEXT_LEVEL2(
"** ALLOC OpenCL: "<< size <<
"!!!\n")
109 cl_mem devicePointer;
111 size_t sizeVec = size*sizeof(T);
113 devicePointer = clCreateBuffer(device->getContext(), CL_MEM_READ_WRITE, sizeVec, NULL, &err);
114 if(err != CL_SUCCESS){FPRINTF((stderr,
"Error allocating memory on device\n"));}
116 return devicePointer;
120 template <
typename T>
121 inline void freeOpenCLMemory(cl_mem d_pointer)
123 DEBUG_TEXT_LEVEL2(
"** DE-ALLOC OpenCL !!!\n")
127 if(clReleaseMemObject(d_pointer) != CL_SUCCESS)
128 FPRINTF((stderr,
"Error releasing memory on device\n"));
146 template <
typename T>
151 size_t globalWorkSize[1];
152 size_t localWorkSize[1];
154 size_t sharedMemSize = (numThreads <= 32) ? 2 * numThreads *
sizeof(T) : numThreads *
sizeof(T);
157 clSetKernelArg(kernel, 0,
sizeof(cl_mem), (
void*)&in_p);
158 clSetKernelArg(kernel, 1,
sizeof(cl_mem), (
void*)&out_p);
159 clSetKernelArg(kernel, 2,
sizeof(
unsigned int), (
void*)&n);
160 clSetKernelArg(kernel, 3, sharedMemSize, NULL);
162 globalWorkSize[0] = numBlocks * numThreads;
163 localWorkSize[0] = numThreads;
166 err = clEnqueueNDRangeKernel(device->
getQueue(), kernel, 1, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL);
167 if(err != CL_SUCCESS){std::cerr<<
"Error launching kernel RowWise!! 1st\n";}
171 clSetKernelArg(kernel, 0,
sizeof(cl_mem), (
void*)&out_p);
172 clSetKernelArg(kernel, 1,
sizeof(cl_mem), (
void*)&out_p);
173 clSetKernelArg(kernel, 2,
sizeof(
unsigned int), (
void*)&n);
174 clSetKernelArg(kernel, 3, sharedMemSize, NULL);
176 globalWorkSize[0] = 1 * numThreads;
177 localWorkSize[0] = numThreads;
180 err = clEnqueueNDRangeKernel(device->
getQueue(), kernel, 1, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL);
181 if(err != CL_SUCCESS){std::cerr<<
"Error launching kernel RowWise!! 2nd\n";}
197 std::string::size_type pos=0;
198 while((pos = text.find(find, pos)) != std::string::npos)
200 text.erase(pos, find.length());
201 text.insert(pos, replace);
202 pos+=replace.length();
void ExecuteReduceOnADevice(unsigned int n, const size_t &numThreads, const size_t &numBlocks, _cl_mem *&in_p, _cl_mem *&out_p, cl_kernel &kernel, Device_CL *device)
Definition: skepu_opencl_helpers.h:147
const cl_command_queue & getQueue() const
Definition: device_cl.h:164
Contains a class declaration for the object that represents an OpenCL device.
void replaceTextInString(std::string &text, std::string find, std::string replace)
Definition: skepu_opencl_helpers.h:195
A class representing an OpenCL device.
Definition: device_cl.h:37