5 #ifndef SKEPU_CUDA_HELPER_H
6 #define SKEPU_CUDA_HELPER_H
20 #define MIN(a,b) ((a < b) ? a : b)
24 #define MAX(a,b) ((a > b) ? a : b)
31 # ifdef _DEBUG // Do this only in debug mode...
32 inline void VSPrintf(FILE *file, LPCSTR fmt, ...)
34 size_t fmt2_sz = 2048;
35 char *fmt2 = (
char*)malloc(fmt2_sz);
38 while((_vsnprintf(fmt2, fmt2_sz, fmt, vlist)) < 0)
42 fmt2 = (
char*)malloc(fmt2_sz);
44 OutputDebugStringA(fmt2);
48 # define FPRINTF(a) VSPrintf a
50 # define FPRINTF(a) fprintf a
55 # define FPRINTF(a) fprintf a
58 # define FPRINTF(a) fprintf a
63 void copyDeviceToHost(T *hostPtr, cl_mem devPtr,
size_t numElements, Device_CL* device,
size_t offset)
65 if(devPtr != NULL && hostPtr != NULL)
67 DEBUG_TEXT_LEVEL2(
"** DEVICE_TO_HOST OpenCL: "<< numElements <<
"!!!\n")
73 sizeVec = numElements*sizeof(T);
75 err = clEnqueueReadBuffer(device->getQueue(), devPtr, CL_TRUE, offset, sizeVec, (
void*)hostPtr, 0, NULL, NULL);
79 FPRINTF((stderr,
"Error copying data from device\n"));
87 void copyHostToDevice(T *hostPtr, cl_mem devPtr,
size_t numElements, Device_CL* device,
size_t offset)
89 if(hostPtr != NULL && devPtr != NULL)
91 DEBUG_TEXT_LEVEL2(
"** HOST_TO_DEVICE OpenCL: "<< numElements <<
"!!!\n")
97 sizeVec = numElements*sizeof(T);
99 err = clEnqueueWriteBuffer(device->getQueue(), devPtr, CL_TRUE, offset, sizeVec, (
void*)hostPtr, 0, NULL, NULL);
101 if(err != CL_SUCCESS)
103 FPRINTF((stderr,
"Error copying data to device\n"));
109 template <
typename T>
110 inline cl_mem allocateOpenCLMemory(
size_t size, Device_CL* device)
112 DEBUG_TEXT_LEVEL2(
"** ALLOC OpenCL: "<< size <<
"!!!\n")
115 cl_mem devicePointer;
117 size_t sizeVec = size*sizeof(T);
119 devicePointer = clCreateBuffer(device->getContext(), CL_MEM_READ_WRITE, sizeVec, NULL, &err);
120 if(err != CL_SUCCESS)
122 FPRINTF((stderr,
"Error allocating memory on device\n"));
125 return devicePointer;
129 template <
typename T>
130 inline void freeOpenCLMemory(cl_mem d_pointer)
132 DEBUG_TEXT_LEVEL2(
"** DE-ALLOC OpenCL !!!\n")
136 if(clReleaseMemObject(d_pointer) != CL_SUCCESS)
137 FPRINTF((stderr,
"Error releasing memory on device\n"));
155 template <
typename T>
160 size_t globalWorkSize[1];
161 size_t localWorkSize[1];
163 size_t sharedMemSize = (numThreads <= 32) ? 2 * numThreads *
sizeof(T) : numThreads *
sizeof(T);
166 clSetKernelArg(kernel, 0,
sizeof(cl_mem), (
void*)&in_p);
167 clSetKernelArg(kernel, 1,
sizeof(cl_mem), (
void*)&out_p);
168 clSetKernelArg(kernel, 2,
sizeof(
size_t), (
void*)&n);
169 clSetKernelArg(kernel, 3, sharedMemSize, NULL);
171 globalWorkSize[0] = numBlocks * numThreads;
172 localWorkSize[0] = numThreads;
175 err = clEnqueueNDRangeKernel(device->
getQueue(), kernel, 1, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL);
176 if(err != CL_SUCCESS)
178 std::cerr<<
"Error launching kernel RowWise!! 1st\n";
183 clSetKernelArg(kernel, 0,
sizeof(cl_mem), (
void*)&out_p);
184 clSetKernelArg(kernel, 1,
sizeof(cl_mem), (
void*)&out_p);
185 clSetKernelArg(kernel, 2,
sizeof(
size_t), (
void*)&n);
186 clSetKernelArg(kernel, 3, sharedMemSize, NULL);
188 globalWorkSize[0] = 1 * numThreads;
189 localWorkSize[0] = numThreads;
192 err = clEnqueueNDRangeKernel(device->
getQueue(), kernel, 1, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL);
193 if(err != CL_SUCCESS)
195 std::cerr<<
"Error launching kernel RowWise!! 2nd\n";
212 std::string::size_type pos=0;
213 while((pos = text.find(find, pos)) != std::string::npos)
215 text.erase(pos, find.length());
216 text.insert(pos, replace);
217 pos+=replace.length();
230 if (Err != CL_SUCCESS)
234 case CL_DEVICE_NOT_FOUND:
235 msg =
"Device not found";
237 case CL_DEVICE_NOT_AVAILABLE:
238 msg =
"Device not available";
240 case CL_COMPILER_NOT_AVAILABLE:
241 msg =
"Compiler not available";
243 case CL_MEM_OBJECT_ALLOCATION_FAILURE:
244 msg =
"Memory object allocation failure";
246 case CL_OUT_OF_RESOURCES:
247 msg =
"Out of resources";
249 case CL_OUT_OF_HOST_MEMORY:
250 msg =
"Out of host memory";
252 case CL_PROFILING_INFO_NOT_AVAILABLE:
253 msg =
"Profiling info not available";
255 case CL_MEM_COPY_OVERLAP:
256 msg =
"Memory copy overlap";
258 case CL_IMAGE_FORMAT_MISMATCH:
259 msg =
"Image format mismatch";
261 case CL_IMAGE_FORMAT_NOT_SUPPORTED:
262 msg =
"Image format not supported";
264 case CL_BUILD_PROGRAM_FAILURE:
265 msg =
"Build program failure";
270 case CL_MISALIGNED_SUB_BUFFER_OFFSET:
271 msg =
"Misaligned sub buffer offset";
273 case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST:
274 msg =
"Exec status error for events in wait list";
276 case CL_INVALID_VALUE:
277 msg =
"Invalid value";
279 case CL_INVALID_DEVICE_TYPE:
280 msg =
"Invalid device type";
282 case CL_INVALID_PLATFORM:
283 msg =
"Invalid platform";
285 case CL_INVALID_DEVICE:
286 msg =
"Invalid device";
288 case CL_INVALID_CONTEXT:
289 msg =
"Invalid context";
291 case CL_INVALID_QUEUE_PROPERTIES:
292 msg =
"Invalid queue properties";
294 case CL_INVALID_COMMAND_QUEUE:
295 msg =
"Invalid command queue";
297 case CL_INVALID_HOST_PTR:
298 msg =
"Invalid host pointer";
300 case CL_INVALID_MEM_OBJECT:
301 msg =
"Invalid memory object";
303 case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:
304 msg =
"Invalid image format descriptor";
306 case CL_INVALID_IMAGE_SIZE:
307 msg =
"Invalid image size";
309 case CL_INVALID_SAMPLER:
310 msg =
"Invalid sampler";
312 case CL_INVALID_BINARY:
313 msg =
"Invalid binary";
315 case CL_INVALID_BUILD_OPTIONS:
316 msg =
"Invalid build options";
318 case CL_INVALID_PROGRAM:
319 msg =
"Invalid program";
321 case CL_INVALID_PROGRAM_EXECUTABLE:
322 msg =
"Invalid program executable";
324 case CL_INVALID_KERNEL_NAME:
325 msg =
"Invalid kernel name";
327 case CL_INVALID_KERNEL_DEFINITION:
328 msg =
"Invalid kernel definition";
330 case CL_INVALID_KERNEL:
331 msg =
"Invalid kernel";
333 case CL_INVALID_ARG_INDEX:
334 msg =
"Invalid argument index";
336 case CL_INVALID_ARG_VALUE:
337 msg =
"Invalid argument value";
339 case CL_INVALID_ARG_SIZE:
340 msg =
"Invalid argument size";
342 case CL_INVALID_KERNEL_ARGS:
343 msg =
"Invalid kernel arguments";
345 case CL_INVALID_WORK_DIMENSION:
346 msg =
"Invalid work dimension";
348 case CL_INVALID_WORK_GROUP_SIZE:
349 msg =
"Invalid work group size";
351 case CL_INVALID_WORK_ITEM_SIZE:
352 msg =
"Invalid work item size";
354 case CL_INVALID_GLOBAL_OFFSET:
355 msg =
"Invalid global offset";
357 case CL_INVALID_EVENT_WAIT_LIST:
358 msg =
"Invalid event wait list";
360 case CL_INVALID_EVENT:
361 msg =
"Invalid event";
363 case CL_INVALID_OPERATION:
364 msg =
"Invalid operation";
366 case CL_INVALID_GL_OBJECT:
367 msg =
"Invalid GL object";
369 case CL_INVALID_BUFFER_SIZE:
370 msg =
"Invalid buffer size";
372 case CL_INVALID_MIP_LEVEL:
373 msg =
"Invalid MIP level";
375 case CL_INVALID_GLOBAL_WORK_SIZE:
376 msg =
"Invalid global work size";
378 case CL_INVALID_PROPERTY:
379 msg =
"Invalid property";
382 msg =
"Unknown error";
385 SKEPU_ERROR(s<<
" OpenCL error code "<<Err<<
" "<<msg<<
" \n");
const cl_command_queue & getQueue() const
Definition: device_cl.h:178
Contains a class declaration for the object that represents an OpenCL device.
void replaceTextInString(std::string &text, std::string find, std::string replace)
Definition: skepu_opencl_helpers.h:210
void printCLError(cl_int Err, std::string s=std::string())
Definition: skepu_opencl_helpers.h:227
A class representing an OpenCL device.
Definition: device_cl.h:36
void ExecuteReduceOnADevice(ReduceFunc *reduceFunc, size_t n, size_t numThreads, size_t numBlocks, size_t maxThreads, size_t maxBlocks, T *d_idata, T *d_odata, unsigned int deviceID, bool enableIsPow2=true)
Definition: reduce_kernels.h:695