SkePU  1.2
 All Classes Namespaces Files Functions Variables Enumerations Friends Macros Groups Pages
mapoverlap.h
Go to the documentation of this file.
1 
5 #ifndef MAPOVERLAP_H
6 #define MAPOVERLAP_H
7 
8 #ifdef SKEPU_OPENCL
9 #include <string>
10 #include <vector>
11 #ifdef USE_MAC_OPENCL
12 #include <OpenCL/opencl.h>
13 #else
14 #include <CL/cl.h>
15 #endif
16 #include "src/device_cl.h"
17 #endif
18 
19 #include "src/environment.h"
20 
21 #include "skepu/vector.h"
22 #include "skepu/matrix.h"
23 
24 #include "src/operator_macros.h"
25 #include "src/exec_plan.h"
26 
27 namespace skepu
28 {
29 
37 {
38  OVERLAP_ROW_WISE,
39  OVERLAP_COL_WISE,
40  OVERLAP_ROW_COL_WISE // , OVERLAP_NEIGHBOUR_WISE
41 };
42 
43 enum EdgePolicy
44 {
45  CONSTANT,
46  CYCLIC,
47  DUPLICATE
48 };
49 
74 template <typename MapOverlapFunc>
76 {
77 
78 public:
79 
80  MapOverlap(MapOverlapFunc* mapOverlapFunc);
81 
82  ~MapOverlap();
83 
84  void finishAll()
85  {
86  m_environment->finishAll();
87  }
88 
89  void setExecPlan(ExecPlan& plan)
90  {
91  m_execPlan = &plan;
92  }
93  void setExecPlan(ExecPlan *plan)
94  {
95  m_execPlanMulti = plan;
96  }
97 
98 private:
99  Environment<int>* m_environment;
100  MapOverlapFunc* m_mapOverlapFunc;
101 
103  ExecPlan *m_execPlanMulti;
104 
106  ExecPlan *m_execPlan;
107 
109  ExecPlan m_defPlan;
110 
111 public:
112  template <typename T>
113  void operator()(Vector<T>& input, EdgePolicy poly = CONSTANT, T pad = T());
114 
115  template <typename InputIterator>
116  void operator()(InputIterator inputBegin, InputIterator inputEnd, EdgePolicy poly, typename InputIterator::value_type pad = typename InputIterator::value_type());
117 
118  template <typename T>
119  void operator()(Matrix<T>& input, OverlapPolicy overlapPolicy, EdgePolicy poly = CONSTANT, T pad = T());
120 
121  template <typename InputIterator>
122  void operator()(InputIterator inputBegin, InputIterator inputEnd, OverlapPolicy overlapPolicy, EdgePolicy poly, typename InputIterator::value_type pad = typename InputIterator::value_type());
123 
124  template <typename T>
125  void operator()(Vector<T>& input, Vector<T>& output, EdgePolicy poly = CONSTANT, T pad = T());
126 
127  template <typename InputIterator, typename OutputIterator>
128  void operator()(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, EdgePolicy poly, typename InputIterator::value_type pad = typename InputIterator::value_type());
129 
130  template <typename T>
131  void operator()(Matrix<T>& input, Matrix<T>& output, OverlapPolicy overlapPolicy, EdgePolicy poly = CONSTANT, T pad = T());
132 
133  template <typename InputIterator, typename OutputIterator>
134  void operator()(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, OverlapPolicy overlapPolicy, EdgePolicy poly = CONSTANT, typename InputIterator::value_type pad = typename InputIterator::value_type());
135 
136 public:
137  template <typename T>
138  void CPU(Vector<T>& input, EdgePolicy poly = CONSTANT, T pad = T());
139 
140  template <typename InputIterator>
141  void CPU(InputIterator inputBegin, InputIterator inputEnd, EdgePolicy poly, typename InputIterator::value_type pad = typename InputIterator::value_type());
142 
143  template <typename T>
144  void CPU(Matrix<T>& input, OverlapPolicy overlapPolicy, EdgePolicy poly = CONSTANT, T pad = T());
145 
146  template <typename InputIterator>
147  void CPU(InputIterator inputBegin, InputIterator inputEnd, OverlapPolicy overlapPolicy, EdgePolicy poly, typename InputIterator::value_type pad = typename InputIterator::value_type());
148 
149  template <typename T>
150  void CPU(Vector<T>& input, Vector<T>& output, EdgePolicy poly = CONSTANT, T pad = T());
151 
152  template <typename InputIterator, typename OutputIterator>
153  void CPU(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, EdgePolicy poly, typename InputIterator::value_type pad = typename InputIterator::value_type());
154 
155  template <typename T>
156  void CPU(Matrix<T>& input, Matrix<T>& output, OverlapPolicy overlapPolicy, EdgePolicy poly = CONSTANT, T pad = T());
157 
158  template <typename InputIterator, typename OutputIterator>
159  void CPU(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, OverlapPolicy overlapPolicy, EdgePolicy poly, typename InputIterator::value_type pad = typename InputIterator::value_type());
160 
161 private:
162  template <typename InputIterator, typename OutputIterator>
163  void CPU_ROWWISE(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, EdgePolicy poly = CONSTANT, typename InputIterator::value_type pad = typename InputIterator::value_type());
164 
165  template <typename InputIterator, typename OutputIterator>
166  void CPU_COLWISE(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, EdgePolicy poly = CONSTANT, typename InputIterator::value_type pad = typename InputIterator::value_type());
167 
168 #ifdef SKEPU_OPENMP
169 public:
170  template <typename T>
171  void OMP(Vector<T>& input, EdgePolicy poly = CONSTANT, T pad = T());
172 
173  template <typename InputIterator>
174  void OMP(InputIterator inputBegin, InputIterator inputEnd, EdgePolicy poly, typename InputIterator::value_type pad = typename InputIterator::value_type());
175 
176  template <typename T>
177  void OMP(Matrix<T>& input, OverlapPolicy overlapPolicy, EdgePolicy poly = CONSTANT, T pad = T());
178 
179  template <typename InputIterator>
180  void OMP(InputIterator inputBegin, InputIterator inputEnd, OverlapPolicy overlapPolicy, EdgePolicy poly, typename InputIterator::value_type pad = typename InputIterator::value_type());
181 
182  template <typename T>
183  void OMP(Vector<T>& input, Vector<T>& output, EdgePolicy poly = CONSTANT, T pad = T());
184 
185  template <typename InputIterator, typename OutputIterator>
186  void OMP(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, EdgePolicy poly, typename InputIterator::value_type pad = typename InputIterator::value_type());
187 
188  template <typename T>
189  void OMP(Matrix<T>& input, Matrix<T>& output, OverlapPolicy overlapPolicy, EdgePolicy poly = CONSTANT, T pad = T());
190 
191  template <typename InputIterator, typename OutputIterator>
192  void OMP(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, OverlapPolicy overlapPolicy, EdgePolicy poly, typename InputIterator::value_type pad = typename InputIterator::value_type());
193 
194 private:
195  template <typename InputIterator, typename OutputIterator>
196  void OMP_ROWWISE(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, EdgePolicy poly = CONSTANT, typename InputIterator::value_type pad = typename InputIterator::value_type());
197 
198  template <typename InputIterator, typename OutputIterator>
199  void OMP_COLWISE(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, EdgePolicy poly = CONSTANT, typename InputIterator::value_type pad = typename InputIterator::value_type());
200 
201 #endif
202 
203 #ifdef SKEPU_CUDA
204 public:
205  template <typename T>
206  void CU(Vector<T>& input, EdgePolicy poly = CONSTANT, T pad = T(), int useNumGPU = 1);
207 
208  template <typename InputIterator>
209  void CU(InputIterator inputBegin, InputIterator inputEnd, EdgePolicy poly, typename InputIterator::value_type pad = typename InputIterator::value_type(), int useNumGPU = 1);
210 
211  template <typename T>
212  void CU(Matrix<T>& input, OverlapPolicy overlapPolicy, EdgePolicy poly = CONSTANT, T pad = T(), int useNumGPU = 1);
213 
214  template <typename InputIterator>
215  void CU(InputIterator inputBegin, InputIterator inputEnd, OverlapPolicy overlapPolicy, EdgePolicy poly, typename InputIterator::value_type pad = typename InputIterator::value_type(), int useNumGPU = 1);
216 
217  template <typename T>
218  void CU(Vector<T>& input, Vector<T>& output, EdgePolicy poly = CONSTANT, T pad = T(), int useNumGPU = 1);
219 
220  template <typename InputIterator, typename OutputIterator>
221  void CU(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, EdgePolicy poly, typename InputIterator::value_type pad = typename InputIterator::value_type(), int useNumGPU = 1);
222 
223  template <typename T>
224  void CU(Matrix<T>& input, Matrix<T>& output, OverlapPolicy overlapPolicy, EdgePolicy poly = CONSTANT, T pad = T(), int useNumGPU = 1);
225 
226  template <typename InputIterator, typename OutputIterator>
227  void CU(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, OverlapPolicy overlapPolicy, EdgePolicy poly, typename InputIterator::value_type pad = typename InputIterator::value_type(), int useNumGPU = 1);
228 
229 private:
230  unsigned int cudaDeviceID;
231 
232  template <typename InputIterator, typename OutputIterator>
233  void mapOverlapSingleThread_CU(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, EdgePolicy poly, typename InputIterator::value_type pad, unsigned int deviceID);
234 
235  template <typename InputIterator, typename OutputIterator>
236  void mapOverlapSingleThread_CU(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, EdgePolicy poly, typename InputIterator::value_type pad, unsigned int deviceID, OverlapPolicy overlapPolicy);
237 
238  template <typename InputIterator, typename OutputIterator>
239  void mapOverlapSingleThread_CU_Row(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, EdgePolicy poly, typename InputIterator::value_type pad, unsigned int deviceID);
240 
241  template <typename InputIterator, typename OutputIterator>
242  void mapOverlapMultiThread_CU_Row(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, EdgePolicy poly, typename InputIterator::value_type pad, size_t numDevices);
243 
244  template <typename InputIterator, typename OutputIterator>
245  void mapOverlapSingleThread_CU_Col(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, EdgePolicy poly, typename InputIterator::value_type pad, unsigned int deviceID);
246 
247  template <typename InputIterator, typename OutputIterator>
248  void mapOverlapMultiThread_CU_Col(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, EdgePolicy poly, typename InputIterator::value_type pad, size_t numDevices);
249 
250  template <typename T>
251  size_t getThreadNumber_CU(size_t width, size_t &numThreads, unsigned int deviceID);
252 
253  template <typename T>
254  bool sharedMemAvailable_CU(size_t &numThreads, unsigned int deviceID);
255 
256 #endif
257 
258 #ifdef SKEPU_OPENCL
259 public:
260  template <typename T>
261  void CL(Vector<T>& input, EdgePolicy poly = CONSTANT, T pad = T(), int useNumGPU = 1);
262 
263  template <typename InputIterator>
264  void CL(InputIterator inputBegin, InputIterator inputEnd, EdgePolicy poly, typename InputIterator::value_type pad = typename InputIterator::value_type(), int useNumGPU = 1);
265 
266  template <typename T>
267  void CL(Matrix<T>& input, OverlapPolicy overlapPolicy, EdgePolicy poly = CONSTANT, T pad = T(), int useNumGPU = 1);
268 
269  template <typename InputIterator>
270  void CL(InputIterator inputBegin, InputIterator inputEnd, OverlapPolicy overlapPolicy, EdgePolicy poly, typename InputIterator::value_type pad = typename InputIterator::value_type(), int useNumGPU = 1);
271 
272  template <typename T>
273  void CL(Vector<T>& input, Vector<T>& output, EdgePolicy poly = CONSTANT, T pad = T(), int useNumGPU = 1);
274 
275  template <typename InputIterator, typename OutputIterator>
276  void CL(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, EdgePolicy poly, typename InputIterator::value_type pad = typename InputIterator::value_type(), int useNumGPU = 1);
277 
278  template <typename T>
279  void CL(Matrix<T>& input, Matrix<T>& output, OverlapPolicy overlapPolicy, EdgePolicy poly = CONSTANT, T pad = T(), int useNumGPU = 1);
280 
281  template <typename InputIterator, typename OutputIterator>
282  void CL(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, OverlapPolicy overlapPolicy, EdgePolicy poly, typename InputIterator::value_type pad = typename InputIterator::value_type(), int useNumGPU = 1);
283 
284 private:
285  template <typename InputIterator, typename OutputIterator>
286  void mapOverlapSingle_CL(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, EdgePolicy poly, typename InputIterator::value_type pad, unsigned int deviceID);
287 
288  template <typename InputIterator, typename OutputIterator>
289  void mapOverlapNumDevices_CL(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, EdgePolicy poly, typename InputIterator::value_type pad, size_t numDevices);
290 
291  template <typename InputIterator, typename OutputIterator>
292  void mapOverlapSingle_CL_Row(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, EdgePolicy poly, typename InputIterator::value_type pad, unsigned int deviceID);
293 
294  template <typename InputIterator, typename OutputIterator>
295  void mapOverlapSingle_CL_RowMulti(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, EdgePolicy poly, typename InputIterator::value_type pad, size_t numDevices);
296 
297  template <typename InputIterator, typename OutputIterator>
298  void mapOverlapSingle_CL_Col(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, EdgePolicy poly, typename InputIterator::value_type pad, unsigned int deviceID);
299 
300  template <typename InputIterator, typename OutputIterator>
301  void mapOverlapSingle_CL_ColMulti(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, EdgePolicy poly, typename InputIterator::value_type pad, size_t numDevices);
302 
303  template <typename InputIterator, typename OutputIterator>
304  void mapOverlapSingle_CL(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, EdgePolicy poly, typename InputIterator::value_type pad, unsigned int deviceID, OverlapPolicy overlapPolicy);
305 
306  template <typename InputIterator, typename OutputIterator>
307  void mapOverlapNumDevices_CL(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, EdgePolicy poly, typename InputIterator::value_type pad, size_t numDevices, OverlapPolicy overlapPolicy);
308 
309  template <typename T>
310  size_t getThreadNumber_CL(size_t width, size_t &numThreads, unsigned int deviceID);
311 
312  template <typename T>
313  bool sharedMemAvailable_CL(size_t &numThreads, unsigned int deviceID);
314 
315 private:
316  std::vector<std::pair<cl_kernel, Device_CL*> > m_kernels_CL;
317  std::vector<std::pair<cl_kernel, Device_CL*> > m_kernels_Mat_Row_CL;
318  std::vector<std::pair<cl_kernel, Device_CL*> > m_kernels_Mat_Col_CL;
319  std::vector<std::pair<cl_kernel, Device_CL*> > m_kernels_Mat_ColMulti_CL;
320  std::vector<std::pair<cl_kernel, Device_CL*> > m_kernels_Mat_ConvolFilter_CL;
321  std::vector<std::pair<cl_kernel, Device_CL*> > m_kernels_Mat_Convol_CL;
322 
323  std::vector<std::pair<cl_kernel, Device_CL*> > m_kernels_Mat_Transpose_CL;
324 
325  void createOpenCLProgram();
326 #endif
327 
328 };
329 
330 
331 
332 
333 
334 }
335 
336 #include "src/mapoverlap.inl"
337 
338 #include "src/mapoverlap_cpu.inl"
339 
340 #ifdef SKEPU_OPENMP
341 #include "src/mapoverlap_omp.inl"
342 #endif
343 
344 #ifdef SKEPU_OPENCL
345 #include "src/mapoverlap_cl.inl"
346 #endif
347 
348 #ifdef SKEPU_CUDA
349 #include "src/mapoverlap_cu.inl"
350 #endif
351 
352 
353 
354 
355 //---------------------------------------------------------------------------------------------
356 //------------------------------------------------------- Adding MapOverlap2D type definitions
357 //---------------------------------------------------------------------------------------------
358 
359 #include "src/2dmapoverlap.h"
360 
361 
362 
363 #endif
364 
365 
void CU(Vector< T > &input, EdgePolicy poly=CONSTANT, T pad=T(), int useNumGPU=1)
Definition: mapoverlap_cu.inl:33
Contains a class declaration for the MapOverlap skeleton.
Contains the definitions of OpenCL specific member functions for the MapOverlap skeleton.
Includes the macro files needed for the defined backends.
void operator()(Vector< T > &input, EdgePolicy poly=CONSTANT, T pad=T())
Definition: mapoverlap.inl:111
void finishAll()
Definition: environment.inl:575
void CPU(Vector< T > &input, EdgePolicy poly=CONSTANT, T pad=T())
Definition: mapoverlap_cpu.inl:23
void OMP(Vector< T > &input, EdgePolicy poly=CONSTANT, T pad=T())
Definition: mapoverlap_omp.inl:26
Contains the definitions of CUDA specific member functions for the MapOverlap skeleton.
Contains the definitions of non-backend specific member functions for the MapOverlap skeleton...
void CL(Vector< T > &input, EdgePolicy poly=CONSTANT, T pad=T(), int useNumGPU=1)
Definition: mapoverlap_cl.inl:469
Contains a class declaration for the object that represents an OpenCL device.
MapOverlap(MapOverlapFunc *mapOverlapFunc)
Definition: mapoverlap.inl:25
Contains the definitions of CPU specific member functions for the MapOverlap skeleton.
Contains a class declaration for the Matrix container.
A vector container class, implemented as a wrapper for std::vector.
Definition: vector.h:61
A class that describes an execution plan.
Definition: exec_plan.h:47
Contains a class declaration for the Vector container.
Contains the definitions of OpenMP specific member functions for the MapOverlap skeleton.
Contains a class declaration for Environment class.
A class representing the MapOverlap skeleton.
Definition: mapoverlap.h:75
A class representing the column-wise iterator for the Matrix data-type.
~MapOverlap()
Definition: mapoverlap.inl:92
OverlapPolicy
Definition: mapoverlap.h:36
Contains a class that stores information about which back ends to use when executing.