SkePU  1.2
 All Classes Namespaces Files Functions Variables Enumerations Friends Macros Groups Pages
skepu_opencl_helpers.h
Go to the documentation of this file.
1 
5 #ifndef SKEPU_CUDA_HELPER_H
6 #define SKEPU_CUDA_HELPER_H
7 
8 #include <stdio.h>
9 #include <string.h>
10 #include <stdlib.h>
11 
12 #include "device_cl.h"
13 
14 
15 namespace skepu
16 {
17 
18 
19 #ifndef MIN
20 #define MIN(a,b) ((a < b) ? a : b)
21 #endif
22 
23 #ifndef MAX
24 #define MAX(a,b) ((a > b) ? a : b)
25 #endif
26 
27 
28 // Give a little more for Windows : the console window often disapears before we can read the message
29 #ifdef _WIN32
30 # if 1//ndef UNICODE
31 # ifdef _DEBUG // Do this only in debug mode...
32 inline void VSPrintf(FILE *file, LPCSTR fmt, ...)
33 {
34  size_t fmt2_sz = 2048;
35  char *fmt2 = (char*)malloc(fmt2_sz);
36  va_list vlist;
37  va_start(vlist, fmt);
38  while((_vsnprintf(fmt2, fmt2_sz, fmt, vlist)) < 0) // means there wasn't anough room
39  {
40  fmt2_sz *= 2;
41  if(fmt2) free(fmt2);
42  fmt2 = (char*)malloc(fmt2_sz);
43  }
44  OutputDebugStringA(fmt2);
45  fprintf(file, fmt2);
46  free(fmt2);
47 }
48 # define FPRINTF(a) VSPrintf a
49 # else //debug
50 # define FPRINTF(a) fprintf a
51 // For other than Win32
52 # endif //debug
53 # else //unicode
54 // Unicode case... let's give-up for now and keep basic printf
55 # define FPRINTF(a) fprintf a
56 # endif //unicode
57 #else //win32
58 # define FPRINTF(a) fprintf a
59 #endif //win32
60 
61 
62 template <typename T>
63 void copyDeviceToHost(T *hostPtr, cl_mem devPtr, size_t numElements, Device_CL* device, size_t offset)
64 {
65  if(devPtr != NULL && hostPtr != NULL)
66  {
67  DEBUG_TEXT_LEVEL2("** DEVICE_TO_HOST OpenCL: "<< numElements <<"!!!\n")
68 
69  cl_int err;
70 
71  size_t sizeVec;
72 
73  sizeVec = numElements*sizeof(T);
74 
75  err = clEnqueueReadBuffer(device->getQueue(), devPtr, CL_TRUE, offset, sizeVec, (void*)hostPtr, 0, NULL, NULL);
76 
77  if(err != CL_SUCCESS)
78  {
79  FPRINTF((stderr, "Error copying data from device\n"));
80  }
81  }
82 }
83 
84 
85 
86 template <typename T>
87 void copyHostToDevice(T *hostPtr, cl_mem devPtr, size_t numElements, Device_CL* device, size_t offset)
88 {
89  if(hostPtr != NULL && devPtr != NULL)
90  {
91  DEBUG_TEXT_LEVEL2("** HOST_TO_DEVICE OpenCL: "<< numElements <<"!!!\n")
92 
93  cl_int err;
94 
95  size_t sizeVec;
96 
97  sizeVec = numElements*sizeof(T);
98 
99  err = clEnqueueWriteBuffer(device->getQueue(), devPtr, CL_TRUE, offset, sizeVec, (void*)hostPtr, 0, NULL, NULL);
100 
101  if(err != CL_SUCCESS)
102  {
103  FPRINTF((stderr, "Error copying data to device\n"));
104  }
105  }
106 }
107 
108 
109 template <typename T>
110 inline cl_mem allocateOpenCLMemory(size_t size, Device_CL* device)
111 {
112  DEBUG_TEXT_LEVEL2("** ALLOC OpenCL: "<< size <<"!!!\n")
113 
114  cl_int err;
115  cl_mem devicePointer;
116 
117  size_t sizeVec = size*sizeof(T);
118 
119  devicePointer = clCreateBuffer(device->getContext(), CL_MEM_READ_WRITE, sizeVec, NULL, &err);
120  if(err != CL_SUCCESS)
121  {
122  FPRINTF((stderr, "Error allocating memory on device\n"));
123  }
124 
125  return devicePointer;
126 }
127 
128 
129 template <typename T>
130 inline void freeOpenCLMemory(cl_mem d_pointer)
131 {
132  DEBUG_TEXT_LEVEL2("** DE-ALLOC OpenCL !!!\n")
133 
134 // if(d_pointer!=NULL)
135  {
136  if(clReleaseMemObject(d_pointer) != CL_SUCCESS)
137  FPRINTF((stderr, "Error releasing memory on device\n"));
138  }
139 }
140 
141 
142 
155 template <typename T>
156 void ExecuteReduceOnADevice(size_t n, const size_t &numThreads, const size_t &numBlocks, _cl_mem*& in_p, _cl_mem*& out_p, cl_kernel &kernel, Device_CL *device)
157 {
158  cl_int err;
159 
160  size_t globalWorkSize[1];
161  size_t localWorkSize[1];
162 
163  size_t sharedMemSize = (numThreads <= 32) ? 2 * numThreads * sizeof(T) : numThreads * sizeof(T);
164 
165  // Sets the kernel arguments for first reduction
166  clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&in_p);
167  clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&out_p);
168  clSetKernelArg(kernel, 2, sizeof(size_t), (void*)&n);
169  clSetKernelArg(kernel, 3, sharedMemSize, NULL);
170 
171  globalWorkSize[0] = numBlocks * numThreads;
172  localWorkSize[0] = numThreads;
173 
174  // First reduce all elements blockwise so that each block produces one element.
175  err = clEnqueueNDRangeKernel(device->getQueue(), kernel, 1, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL);
176  if(err != CL_SUCCESS)
177  {
178  std::cerr<<"Error launching kernel RowWise!! 1st\n";
179  }
180 
181  // Sets the kernel arguments for second reduction
182  n = numBlocks;
183  clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&out_p);
184  clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&out_p);
185  clSetKernelArg(kernel, 2, sizeof(size_t), (void*)&n);
186  clSetKernelArg(kernel, 3, sharedMemSize, NULL);
187 
188  globalWorkSize[0] = 1 * numThreads;
189  localWorkSize[0] = numThreads;
190 
191  // Reduces the elements from the previous reduction in a single block to produce the scalar result.
192  err = clEnqueueNDRangeKernel(device->getQueue(), kernel, 1, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL);
193  if(err != CL_SUCCESS)
194  {
195  std::cerr<<"Error launching kernel RowWise!! 2nd\n";
196  }
197 }
198 
199 
200 
201 
210 void replaceTextInString(std::string& text, std::string find, std::string replace)
211 {
212  std::string::size_type pos=0;
213  while((pos = text.find(find, pos)) != std::string::npos)
214  {
215  text.erase(pos, find.length());
216  text.insert(pos, replace);
217  pos+=replace.length();
218  }
219 }
220 
227 void printCLError(cl_int Err, std::string s = std::string())
228 {
229  std::string msg;
230  if (Err != CL_SUCCESS)
231  {
232  switch(Err)
233  {
234  case CL_DEVICE_NOT_FOUND:
235  msg = "Device not found";
236  break;
237  case CL_DEVICE_NOT_AVAILABLE:
238  msg = "Device not available";
239  break;
240  case CL_COMPILER_NOT_AVAILABLE:
241  msg = "Compiler not available";
242  break;
243  case CL_MEM_OBJECT_ALLOCATION_FAILURE:
244  msg = "Memory object allocation failure";
245  break;
246  case CL_OUT_OF_RESOURCES:
247  msg = "Out of resources";
248  break;
249  case CL_OUT_OF_HOST_MEMORY:
250  msg = "Out of host memory";
251  break;
252  case CL_PROFILING_INFO_NOT_AVAILABLE:
253  msg = "Profiling info not available";
254  break;
255  case CL_MEM_COPY_OVERLAP:
256  msg = "Memory copy overlap";
257  break;
258  case CL_IMAGE_FORMAT_MISMATCH:
259  msg = "Image format mismatch";
260  break;
261  case CL_IMAGE_FORMAT_NOT_SUPPORTED:
262  msg = "Image format not supported";
263  break;
264  case CL_BUILD_PROGRAM_FAILURE:
265  msg = "Build program failure";
266  break;
267  case CL_MAP_FAILURE:
268  msg = "Map failure";
269  break;
270  case CL_MISALIGNED_SUB_BUFFER_OFFSET:
271  msg = "Misaligned sub buffer offset";
272  break;
273  case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST:
274  msg = "Exec status error for events in wait list";
275  break;
276  case CL_INVALID_VALUE:
277  msg = "Invalid value";
278  break;
279  case CL_INVALID_DEVICE_TYPE:
280  msg = "Invalid device type";
281  break;
282  case CL_INVALID_PLATFORM:
283  msg = "Invalid platform";
284  break;
285  case CL_INVALID_DEVICE:
286  msg = "Invalid device";
287  break;
288  case CL_INVALID_CONTEXT:
289  msg = "Invalid context";
290  break;
291  case CL_INVALID_QUEUE_PROPERTIES:
292  msg = "Invalid queue properties";
293  break;
294  case CL_INVALID_COMMAND_QUEUE:
295  msg = "Invalid command queue";
296  break;
297  case CL_INVALID_HOST_PTR:
298  msg = "Invalid host pointer";
299  break;
300  case CL_INVALID_MEM_OBJECT:
301  msg = "Invalid memory object";
302  break;
303  case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:
304  msg = "Invalid image format descriptor";
305  break;
306  case CL_INVALID_IMAGE_SIZE:
307  msg = "Invalid image size";
308  break;
309  case CL_INVALID_SAMPLER:
310  msg = "Invalid sampler";
311  break;
312  case CL_INVALID_BINARY:
313  msg = "Invalid binary";
314  break;
315  case CL_INVALID_BUILD_OPTIONS:
316  msg = "Invalid build options";
317  break;
318  case CL_INVALID_PROGRAM:
319  msg = "Invalid program";
320  break;
321  case CL_INVALID_PROGRAM_EXECUTABLE:
322  msg = "Invalid program executable";
323  break;
324  case CL_INVALID_KERNEL_NAME:
325  msg = "Invalid kernel name";
326  break;
327  case CL_INVALID_KERNEL_DEFINITION:
328  msg = "Invalid kernel definition";
329  break;
330  case CL_INVALID_KERNEL:
331  msg = "Invalid kernel";
332  break;
333  case CL_INVALID_ARG_INDEX:
334  msg = "Invalid argument index";
335  break;
336  case CL_INVALID_ARG_VALUE:
337  msg = "Invalid argument value";
338  break;
339  case CL_INVALID_ARG_SIZE:
340  msg = "Invalid argument size";
341  break;
342  case CL_INVALID_KERNEL_ARGS:
343  msg = "Invalid kernel arguments";
344  break;
345  case CL_INVALID_WORK_DIMENSION:
346  msg = "Invalid work dimension";
347  break;
348  case CL_INVALID_WORK_GROUP_SIZE:
349  msg = "Invalid work group size";
350  break;
351  case CL_INVALID_WORK_ITEM_SIZE:
352  msg = "Invalid work item size";
353  break;
354  case CL_INVALID_GLOBAL_OFFSET:
355  msg = "Invalid global offset";
356  break;
357  case CL_INVALID_EVENT_WAIT_LIST:
358  msg = "Invalid event wait list";
359  break;
360  case CL_INVALID_EVENT:
361  msg = "Invalid event";
362  break;
363  case CL_INVALID_OPERATION:
364  msg = "Invalid operation";
365  break;
366  case CL_INVALID_GL_OBJECT:
367  msg = "Invalid GL object";
368  break;
369  case CL_INVALID_BUFFER_SIZE:
370  msg = "Invalid buffer size";
371  break;
372  case CL_INVALID_MIP_LEVEL:
373  msg = "Invalid MIP level";
374  break;
375  case CL_INVALID_GLOBAL_WORK_SIZE:
376  msg = "Invalid global work size";
377  break;
378  case CL_INVALID_PROPERTY:
379  msg = "Invalid property";
380  break;
381  default:
382  msg = "Unknown error";
383  break;
384  }
385  SKEPU_ERROR(s<<" OpenCL error code "<<Err<<" "<<msg<<" \n");
386  }
387 }
388 
389 
390 }
391 
392 #endif
393 
const cl_command_queue & getQueue() const
Definition: device_cl.h:178
Contains a class declaration for the object that represents an OpenCL device.
void replaceTextInString(std::string &text, std::string find, std::string replace)
Definition: skepu_opencl_helpers.h:210
void printCLError(cl_int Err, std::string s=std::string())
Definition: skepu_opencl_helpers.h:227
A class representing an OpenCL device.
Definition: device_cl.h:36
void ExecuteReduceOnADevice(ReduceFunc *reduceFunc, size_t n, size_t numThreads, size_t numBlocks, size_t maxThreads, size_t maxBlocks, T *d_idata, T *d_odata, unsigned int deviceID, bool enableIsPow2=true)
Definition: reduce_kernels.h:695