SkePU  1.2
 All Classes Namespaces Files Functions Variables Enumerations Friends Macros Groups Pages
reduce.h
Go to the documentation of this file.
1 
5 #ifndef REDUCE_H
6 #define REDUCE_H
7 
8 #ifdef SKEPU_OPENCL
9 #include <string>
10 #include <vector>
11 #ifdef USE_MAC_OPENCL
12 #include <OpenCL/opencl.h>
13 #else
14 #include <CL/cl.h>
15 #endif
16 #include "src/device_cl.h"
17 #endif
18 
19 #include "src/environment.h"
20 
21 #include "skepu/vector.h"
22 #include "skepu/matrix.h"
23 #include "skepu/sparse_matrix.h"
24 
25 #include "src/operator_macros.h"
26 #include "src/exec_plan.h"
27 
28 
29 
30 
31 
32 
33 namespace skepu
34 {
35 
36 
43 {
44  REDUCE_ROW_WISE_ONLY,
45  REDUCE_COL_WISE_ONLY
46 };
47 
76 template <typename ReduceFuncRowWise, typename ReduceFuncColWise = ReduceFuncRowWise>
77 class Reduce
78 {
79 
80 public:
81 
82  Reduce(ReduceFuncRowWise* reduceFuncRowWise, ReduceFuncColWise* reduceFuncColWise);
83 
84  ~Reduce();
85 
86  void finishAll()
87  {
88  m_environment->finishAll();
89  }
90 
91  void setExecPlan(ExecPlan& plan)
92  {
93  m_execPlan = &plan;
94  }
95  void setExecPlan(ExecPlan *plan)
96  {
97  m_execPlanMulti = plan;
98  }
99 
100 private:
101  Environment<int>* m_environment;
102  ReduceFuncRowWise* m_reduceFuncRowWise;
103  ReduceFuncColWise* m_reduceFuncColWise;
104 
106  ExecPlan *m_execPlanMulti;
107 
109  ExecPlan *m_execPlan;
110 
112  ExecPlan m_defPlan;
113 
114 public:
115  template <typename T>
116  T operator()(Matrix<T>& input);
117 
118  template <typename T>
119  T operator()(SparseMatrix<T>& input);
120 
121 public:
122  template <typename T>
123  T CPU(Matrix<T>& input);
124 
125  template <typename T>
126  T CPU(SparseMatrix<T>& input);
127 
128 #ifdef SKEPU_OPENMP
129 public:
130  template <typename T>
131  T OMP(Matrix<T>& input);
132 
133  template <typename T>
134  T OMP(SparseMatrix<T>& input);
135 
136 private:
137  template <typename T>
138  T ompVectorReduce(std::vector<T> &input, const size_t &numThreads);
139 #endif
140 
141 #ifdef SKEPU_CUDA
142 public:
143  template <typename T>
144  T CU(Matrix<T>& input, int useNumGPU = 1);
145 
146  template <typename T>
147  T CU(SparseMatrix<T>& input, int useNumGPU = 1);
148 
149 private:
150  unsigned int cudaDeviceID;
151 
152  template <typename T>
153  T reduceSingleThread_CU(Matrix<T>& input, unsigned int deviceID);
154 
155  template <typename T>
156  T reduceMultipleThreads_CU(Matrix<T>& input, size_t numDevices);
157 
158  template <typename T>
159  T reduceSingleThread_CU(SparseMatrix<T>& input, unsigned int deviceID);
160 
161  template <typename T>
162  T reduceMultipleThreads_CU(SparseMatrix<T>& input, size_t numDevices);
163 
164 #endif
165 
166 #ifdef SKEPU_OPENCL
167 public:
168  template <typename T>
169  T CL(Matrix<T>& input, int useNumGPU = 1);
170 
171  template <typename T>
172  T CL(SparseMatrix<T>& input, int useNumGPU = 1);
173 
174 private:
175  template <typename T>
176  T reduceSingle_CL(Matrix<T> &input, unsigned int deviceID);
177 
178  template <typename T>
179  T reduceNumDevices_CL(Matrix<T> &input, size_t numDevices);
180 
181  template <typename T>
182  T reduceSingle_CL(SparseMatrix<T> &input, unsigned int deviceID);
183 
184  template <typename T>
185  T reduceNumDevices_CL(SparseMatrix<T> &input, size_t numDevices);
186 
187 private:
188  std::vector<std::pair<cl_kernel, Device_CL*> > m_kernels_CL_RowWise;
189  std::vector<std::pair<cl_kernel, Device_CL*> > m_kernels_CL_ColWise;
190 
191  void createOpenCLProgram();
192 #endif
193 
194 };
195 
196 
197 
198 
199 
207 template <typename ReduceFunc>
208 class Reduce<ReduceFunc, ReduceFunc>
209 {
210 public:
211  Reduce(ReduceFunc* reduceFunc);
212 
213  ~Reduce();
214 
215  void finishAll()
216  {
217  m_environment->finishAll();
218  }
219 
220  void setExecPlan(ExecPlan& plan)
221  {
222  m_execPlan = &plan;
223  }
224  void setExecPlan(ExecPlan *plan)
225  {
226  m_execPlanMulti = plan;
227  }
228 
229 private:
230  Environment<int>* m_environment;
231  ReduceFunc* m_reduceFunc;
232 
234  ExecPlan *m_execPlanMulti;
235 
237  ExecPlan *m_execPlan;
238 
240  ExecPlan m_defPlan;
241 
242 public:
243  template <typename T>
244  T operator()(Vector<T>& input);
245 
246  template <typename T>
247  T operator()(Matrix<T>& input);
248 
249  template <typename T>
250  T operator()(SparseMatrix<T>& input);
251 
252  template <typename T>
253  Vector<T> operator()(Matrix<T>& input, ReducePolicy reducePolicy);
254 
255  template <typename T>
256  Vector<T> operator()(SparseMatrix<T>& input, ReducePolicy reducePolicy);
257 
258  template <typename InputIterator>
259  typename InputIterator::value_type operator()(InputIterator inputBegin, InputIterator inputEnd);
260 
261 public:
262  template <typename T>
263  T CPU(Vector<T>& input);
264 
265  template <typename T>
266  T CPU(Matrix<T>& input);
267 
268  template <typename T>
269  T CPU(SparseMatrix<T>& input);
270 
271  template <typename T>
272  Vector<T> CPU(Matrix<T>& input, ReducePolicy reducePolicy);
273 
274  template <typename T>
275  Vector<T> CPU(SparseMatrix<T>& input, ReducePolicy reducePolicy);
276 
277  template <typename InputIterator>
278  typename InputIterator::value_type CPU(InputIterator inputBegin, InputIterator inputEnd);
279 
280 #ifdef SKEPU_OPENMP
281 public:
282  template <typename T>
283  T OMP(Vector<T>& input);
284 
285  template <typename T>
286  T OMP(Matrix<T>& input);
287 
288  template <typename T>
289  T OMP(SparseMatrix<T>& input);
290 
291  template <typename T>
292  Vector<T> OMP(Matrix<T>& input, ReducePolicy reducePolicy);
293 
294  template <typename T>
295  Vector<T> OMP(SparseMatrix<T>& input, ReducePolicy reducePolicy);
296 
297  template <typename InputIterator>
298  typename InputIterator::value_type OMP(InputIterator inputBegin, InputIterator inputEnd);
299 #endif
300 
301 #ifdef SKEPU_CUDA
302 public:
303  template <typename T>
304  T CU(Vector<T>& input, int useNumGPU = 1);
305 
306  template <typename T>
307  T CU(Matrix<T>& input, int useNumGPU = 1);
308 
309  template <typename T>
310  T CU(SparseMatrix<T>& input, int useNumGPU = 1);
311 
312  template <typename T>
313  Vector<T> CU(Matrix<T>& input, ReducePolicy reducePolicy, int useNumGPU = 1);
314 
315  template <typename T>
316  Vector<T> CU(SparseMatrix<T>& input, ReducePolicy reducePolicy, int useNumGPU = 1);
317 
318  template <typename InputIterator>
319  typename InputIterator::value_type CU(InputIterator inputBegin, InputIterator inputEnd, int useNumGPU = 1);
320 
321 private:
322  int cudaDeviceID;
323 
324  template <typename InputIterator>
325  typename InputIterator::value_type reduceSingleThread_CU(InputIterator inputBegin, InputIterator inputEnd, unsigned int deviceID);
326 
327  template <typename T>
328  T reduceSingleThread_CU(SparseMatrix<T>& input, unsigned int deviceID);
329 
330  template <typename T>
331  void reduceSingleThreadOneDim_CU(Matrix<T>& input, unsigned int deviceID, Vector<T> &result);
332 
333  template <typename T>
334  void reduceSingleThreadOneDim_CU(SparseMatrix<T>& input, unsigned int deviceID, Vector<T> &result);
335 #endif
336 
337 #ifdef SKEPU_OPENCL
338 public:
339  template <typename T>
340  T CL(Vector<T>& input, int useNumGPU = 1);
341 
342  template <typename T>
343  T CL(Matrix<T>& input, int useNumGPU = 1);
344 
345  template <typename T>
346  T CL(SparseMatrix<T>& input, int useNumGPU = 1);
347 
348  template <typename T>
349  Vector<T> CL(Matrix<T>& input, ReducePolicy reducePolicy, int useNumGPU = 1);
350 
351  template <typename T>
352  Vector<T> CL(SparseMatrix<T>& input, ReducePolicy reducePolicy, int useNumGPU = 1);
353 
354  template <typename InputIterator>
355  typename InputIterator::value_type CL(InputIterator inputBegin, InputIterator inputEnd, int useNumGPU = 1);
356 
357 private:
358  template <typename InputIterator>
359  typename InputIterator::value_type reduceSingle_CL(InputIterator inputBegin, InputIterator inputEnd, unsigned int deviceID);
360 
361  template <typename T>
362  T reduceSingle_CL(SparseMatrix<T>& input, unsigned int deviceID);
363 
364  template <typename T>
365  void reduceSingleThreadOneDim_CL(Matrix<T>& input, unsigned int deviceID, Vector<T> &result);
366 
367  template <typename T>
368  void reduceSingleThreadOneDim_CL(SparseMatrix<T>& input, unsigned int deviceID, Vector<T> &result);
369 
370 private:
371  std::vector<std::pair<cl_kernel, Device_CL*> > m_kernels_CL;
372 
373  void createOpenCLProgram();
374 #endif
375 
376 };
377 
378 }
379 
380 
381 #include "src/reduce_common.h"
382 
383 #include "src/reduce.inl"
384 #include "src/reduce_2d.inl"
385 
386 #include "src/reduce_cpu.inl"
387 #include "src/reduce_cpu_2d.inl"
388 
389 #ifdef SKEPU_OPENMP
390 #include "src/reduce_omp.inl"
391 #include "src/reduce_omp_2d.inl"
392 #endif
393 
394 #ifdef SKEPU_OPENCL
395 #include "src/reduce_cl.inl"
396 #include "src/reduce_cl_2d.inl"
397 #endif
398 
399 #ifdef SKEPU_CUDA
400 #include "src/reduce_cu.inl"
401 #include "src/reduce_cu_2d.inl"
402 #endif
403 
404 
405 
406 
407 
408 #endif
409 
410 
T OMP(Matrix< T > &input)
Definition: reduce_omp_2d.inl:27
Contains the definitions of CUDA specific member functions for the 2DReduce skeleton.
Contains the definitions of OpenCL specific member functions for the 2DReduce skeleton.
Contains a class declaration for the SparseMatrix container.
Includes the macro files needed for the defined backends.
Contains the definitions of OpenMP specific member functions for the Reduce skeleton.
A sparse matrix container class that mainly stores its data in CSR format.
Definition: sparse_matrix.h:73
Contains the definitions of non-backend specific member functions for the 2DReduce skeleton...
Contains the definitions of OpenMP specific member functions for the 2DReduce skeleton.
void finishAll()
Definition: environment.inl:575
Contains the definitions of CPU specific member functions for the Reduce skeleton.
T CL(Matrix< T > &input, int useNumGPU=1)
Definition: reduce_cl_2d.inl:426
Contains a class declaration for the object that represents an OpenCL device.
Contains a class declaration for the Matrix container.
Contains the definitions of non-backend specific member functions for the Reduce skeleton.
Contains the definitions of CUDA specific member functions for the Reduce skeleton.
ReducePolicy
Can be used to specify the direction of reduce for 2D containers.
Definition: reduce.h:42
A vector container class, implemented as a wrapper for std::vector.
Definition: vector.h:61
Contains the definitions of common member functions for the Reduce skeleton that is used for both 1D ...
A class that describes an execution plan.
Definition: exec_plan.h:47
Contains a class declaration for the Vector container.
A class representing the Reduce skeleton both for 1D and 2D reduce operation for 1D Vector...
Definition: reduce.h:77
Contains a class declaration for Environment class.
Reduce(ReduceFuncRowWise *reduceFuncRowWise, ReduceFuncColWise *reduceFuncColWise)
Definition: reduce_2d.inl:24
A class representing the column-wise iterator for the Matrix data-type.
Contains the definitions of CPU specific member functions for the 2DReduce skeleton.
T CPU(Matrix< T > &input)
Definition: reduce_cpu_2d.inl:26
T CU(Matrix< T > &input, int useNumGPU=1)
Definition: reduce_cu_2d.inl:311
~Reduce()
Definition: reduce_2d.inl:92
Contains a class that stores information about which back ends to use when executing.
Contains the definitions of OpenCL specific member functions for the Reduce skeleton.
T operator()(Matrix< T > &input)
Definition: reduce_2d.inl:113