SkePU(integratedwithStarPU)  0.8.1
 All Classes Namespaces Files Functions Enumerations Friends Macros Groups Pages
skepu_opencl_helpers.h
Go to the documentation of this file.
1 
5 #ifndef SKEPU_CUDA_HELPER_H
6 #define SKEPU_CUDA_HELPER_H
7 
8 #include <stdio.h>
9 #include <string.h>
10 #include <stdlib.h>
11 
12 #include "device_cl.h"
13 
14 
15 namespace skepu
16 {
17 
18 
19 #ifndef MIN
20 #define MIN(a,b) ((a < b) ? a : b)
21 #endif
22 
23 #ifndef MAX
24 #define MAX(a,b) ((a > b) ? a : b)
25 #endif
26 
27 
28 // Give a little more for Windows : the console window often disapears before we can read the message
29 #ifdef _WIN32
30 # if 1//ndef UNICODE
31 # ifdef _DEBUG // Do this only in debug mode...
32  inline void VSPrintf(FILE *file, LPCSTR fmt, ...)
33  {
34  size_t fmt2_sz = 2048;
35  char *fmt2 = (char*)malloc(fmt2_sz);
36  va_list vlist;
37  va_start(vlist, fmt);
38  while((_vsnprintf(fmt2, fmt2_sz, fmt, vlist)) < 0) // means there wasn't anough room
39  {
40  fmt2_sz *= 2;
41  if(fmt2) free(fmt2);
42  fmt2 = (char*)malloc(fmt2_sz);
43  }
44  OutputDebugStringA(fmt2);
45  fprintf(file, fmt2);
46  free(fmt2);
47  }
48 # define FPRINTF(a) VSPrintf a
49 # else //debug
50 # define FPRINTF(a) fprintf a
51 // For other than Win32
52 # endif //debug
53 # else //unicode
54 // Unicode case... let's give-up for now and keep basic printf
55 # define FPRINTF(a) fprintf a
56 # endif //unicode
57 #else //win32
58 # define FPRINTF(a) fprintf a
59 #endif //win32
60 
61 
62 template <typename T>
63 void copyDeviceToHost(T *hostPtr, cl_mem devPtr, int numElements, Device_CL* device, int offset)
64 {
65  if(devPtr != NULL && hostPtr != NULL)
66  {
67  DEBUG_TEXT_LEVEL2("** DEVICE_TO_HOST OpenCL: "<< numElements <<"!!!\n")
68 
69  cl_int err;
70 
71  size_t sizeVec;
72 
73  sizeVec = numElements*sizeof(T);
74 
75  err = clEnqueueReadBuffer(device->getQueue(), devPtr, CL_TRUE, offset, sizeVec, (void*)hostPtr, 0, NULL, NULL);
76 
77  if(err != CL_SUCCESS){FPRINTF((stderr, "Error copying data from device\n"));}
78  }
79 }
80 
81 
82 
83 template <typename T>
84 void copyHostToDevice(T *hostPtr, cl_mem devPtr, int numElements, Device_CL* device, int offset)
85 {
86  if(hostPtr != NULL && devPtr != NULL)
87  {
88  DEBUG_TEXT_LEVEL2("** HOST_TO_DEVICE OpenCL: "<< numElements <<"!!!\n")
89 
90  cl_int err;
91 
92  size_t sizeVec;
93 
94  sizeVec = numElements*sizeof(T);
95 
96  err = clEnqueueWriteBuffer(device->getQueue(), devPtr, CL_TRUE, offset, sizeVec, (void*)hostPtr, 0, NULL, NULL);
97 
98  if(err != CL_SUCCESS){FPRINTF((stderr, "Error copying data to device\n"));}
99  }
100 }
101 
102 
103 template <typename T>
104 inline cl_mem allocateOpenCLMemory(unsigned int size, Device_CL* device)
105 {
106  DEBUG_TEXT_LEVEL2("** ALLOC OpenCL: "<< size <<"!!!\n")
107 
108  cl_int err;
109  cl_mem devicePointer;
110 
111  size_t sizeVec = size*sizeof(T);
112 
113  devicePointer = clCreateBuffer(device->getContext(), CL_MEM_READ_WRITE, sizeVec, NULL, &err);
114  if(err != CL_SUCCESS){FPRINTF((stderr, "Error allocating memory on device\n"));}
115 
116  return devicePointer;
117 }
118 
119 
120 template <typename T>
121 inline void freeOpenCLMemory(cl_mem d_pointer)
122 {
123  DEBUG_TEXT_LEVEL2("** DE-ALLOC OpenCL !!!\n")
124 
125 // if(d_pointer!=NULL)
126  {
127  if(clReleaseMemObject(d_pointer) != CL_SUCCESS)
128  FPRINTF((stderr, "Error releasing memory on device\n"));
129  }
130 }
131 
132 
133 
146 template <typename T>
147 void ExecuteReduceOnADevice(unsigned int n, const size_t &numThreads, const size_t &numBlocks, _cl_mem*& in_p, _cl_mem*& out_p, cl_kernel &kernel, Device_CL *device)
148 {
149  cl_int err;
150 
151  size_t globalWorkSize[1];
152  size_t localWorkSize[1];
153 
154  size_t sharedMemSize = (numThreads <= 32) ? 2 * numThreads * sizeof(T) : numThreads * sizeof(T);
155 
156  // Sets the kernel arguments for first reduction
157  clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&in_p);
158  clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&out_p);
159  clSetKernelArg(kernel, 2, sizeof(unsigned int), (void*)&n);
160  clSetKernelArg(kernel, 3, sharedMemSize, NULL);
161 
162  globalWorkSize[0] = numBlocks * numThreads;
163  localWorkSize[0] = numThreads;
164 
165  // First reduce all elements blockwise so that each block produces one element.
166  err = clEnqueueNDRangeKernel(device->getQueue(), kernel, 1, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL);
167  if(err != CL_SUCCESS){std::cerr<<"Error launching kernel RowWise!! 1st\n";}
168 
169  // Sets the kernel arguments for second reduction
170  n = numBlocks;
171  clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&out_p);
172  clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&out_p);
173  clSetKernelArg(kernel, 2, sizeof(unsigned int), (void*)&n);
174  clSetKernelArg(kernel, 3, sharedMemSize, NULL);
175 
176  globalWorkSize[0] = 1 * numThreads;
177  localWorkSize[0] = numThreads;
178 
179  // Reduces the elements from the previous reduction in a single block to produce the scalar result.
180  err = clEnqueueNDRangeKernel(device->getQueue(), kernel, 1, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL);
181  if(err != CL_SUCCESS){std::cerr<<"Error launching kernel RowWise!! 2nd\n";}
182 }
183 
184 
185 
186 
195 void replaceTextInString(std::string& text, std::string find, std::string replace)
196 {
197  std::string::size_type pos=0;
198  while((pos = text.find(find, pos)) != std::string::npos)
199  {
200  text.erase(pos, find.length());
201  text.insert(pos, replace);
202  pos+=replace.length();
203  }
204 }
205 
206 
207 }
208 
209 #endif
void ExecuteReduceOnADevice(unsigned int n, const size_t &numThreads, const size_t &numBlocks, _cl_mem *&in_p, _cl_mem *&out_p, cl_kernel &kernel, Device_CL *device)
Definition: skepu_opencl_helpers.h:147
const cl_command_queue & getQueue() const
Definition: device_cl.h:164
Contains a class declaration for the object that represents an OpenCL device.
void replaceTextInString(std::string &text, std::string find, std::string replace)
Definition: skepu_opencl_helpers.h:195
A class representing an OpenCL device.
Definition: device_cl.h:37