12 #include <OpenCL/opencl.h>
76 template <
typename ReduceFuncRowWise,
typename ReduceFuncColWise = ReduceFuncRowWise>
82 Reduce(ReduceFuncRowWise* reduceFuncRowWise, ReduceFuncColWise* reduceFuncColWise);
97 m_execPlanMulti = plan;
102 ReduceFuncRowWise* m_reduceFuncRowWise;
103 ReduceFuncColWise* m_reduceFuncColWise;
115 template <
typename T>
118 template <
typename T>
122 template <
typename T>
125 template <
typename T>
130 template <
typename T>
133 template <
typename T>
137 template <
typename T>
138 T ompVectorReduce(std::vector<T> &input,
const size_t &numThreads);
143 template <
typename T>
146 template <
typename T>
150 unsigned int cudaDeviceID;
152 template <
typename T>
153 T reduceSingleThread_CU(
Matrix<T>& input,
unsigned int deviceID);
155 template <
typename T>
156 T reduceMultipleThreads_CU(
Matrix<T>& input,
size_t numDevices);
158 template <
typename T>
159 T reduceSingleThread_CU(
SparseMatrix<T>& input,
unsigned int deviceID);
161 template <
typename T>
168 template <
typename T>
171 template <
typename T>
175 template <
typename T>
176 T reduceSingle_CL(
Matrix<T> &input,
unsigned int deviceID);
178 template <
typename T>
179 T reduceNumDevices_CL(
Matrix<T> &input,
size_t numDevices);
181 template <
typename T>
184 template <
typename T>
188 std::vector<std::pair<cl_kernel, Device_CL*> > m_kernels_CL_RowWise;
189 std::vector<std::pair<cl_kernel, Device_CL*> > m_kernels_CL_ColWise;
191 void createOpenCLProgram();
207 template <
typename ReduceFunc>
211 Reduce(ReduceFunc* reduceFunc);
226 m_execPlanMulti = plan;
231 ReduceFunc* m_reduceFunc;
243 template <
typename T>
246 template <
typename T>
249 template <
typename T>
252 template <
typename T>
255 template <
typename T>
258 template <
typename InputIterator>
259 typename InputIterator::value_type
operator()(InputIterator inputBegin, InputIterator inputEnd);
262 template <
typename T>
265 template <
typename T>
268 template <
typename T>
271 template <
typename T>
274 template <
typename T>
277 template <
typename InputIterator>
278 typename InputIterator::value_type
CPU(InputIterator inputBegin, InputIterator inputEnd);
282 template <
typename T>
285 template <
typename T>
288 template <
typename T>
291 template <
typename T>
294 template <
typename T>
297 template <
typename InputIterator>
298 typename InputIterator::value_type
OMP(InputIterator inputBegin, InputIterator inputEnd);
303 template <
typename T>
306 template <
typename T>
309 template <
typename T>
312 template <
typename T>
315 template <
typename T>
318 template <
typename InputIterator>
319 typename InputIterator::value_type
CU(InputIterator inputBegin, InputIterator inputEnd,
int useNumGPU = 1);
324 template <
typename InputIterator>
325 typename InputIterator::value_type reduceSingleThread_CU(InputIterator inputBegin, InputIterator inputEnd,
unsigned int deviceID);
327 template <
typename T>
328 T reduceSingleThread_CU(
SparseMatrix<T>& input,
unsigned int deviceID);
330 template <
typename T>
331 void reduceSingleThreadOneDim_CU(
Matrix<T>& input,
unsigned int deviceID,
Vector<T> &result);
333 template <
typename T>
339 template <
typename T>
342 template <
typename T>
345 template <
typename T>
348 template <
typename T>
351 template <
typename T>
354 template <
typename InputIterator>
355 typename InputIterator::value_type
CL(InputIterator inputBegin, InputIterator inputEnd,
int useNumGPU = 1);
358 template <
typename InputIterator>
359 typename InputIterator::value_type reduceSingle_CL(InputIterator inputBegin, InputIterator inputEnd,
unsigned int deviceID);
361 template <
typename T>
364 template <
typename T>
365 void reduceSingleThreadOneDim_CL(
Matrix<T>& input,
unsigned int deviceID,
Vector<T> &result);
367 template <
typename T>
371 std::vector<std::pair<cl_kernel, Device_CL*> > m_kernels_CL;
373 void createOpenCLProgram();
T OMP(Matrix< T > &input)
Definition: reduce_omp_2d.inl:27
Contains the definitions of CUDA specific member functions for the 2DReduce skeleton.
Contains the definitions of OpenCL specific member functions for the 2DReduce skeleton.
Contains a class declaration for the SparseMatrix container.
Includes the macro files needed for the defined backends.
Contains the definitions of OpenMP specific member functions for the Reduce skeleton.
A sparse matrix container class that mainly stores its data in CSR format.
Definition: sparse_matrix.h:73
Contains the definitions of non-backend specific member functions for the 2DReduce skeleton...
Contains the definitions of OpenMP specific member functions for the 2DReduce skeleton.
void finishAll()
Definition: environment.inl:575
Contains the definitions of CPU specific member functions for the Reduce skeleton.
T CL(Matrix< T > &input, int useNumGPU=1)
Definition: reduce_cl_2d.inl:426
Contains a class declaration for the object that represents an OpenCL device.
Contains a class declaration for the Matrix container.
Contains the definitions of non-backend specific member functions for the Reduce skeleton.
Contains the definitions of CUDA specific member functions for the Reduce skeleton.
ReducePolicy
Can be used to specify the direction of reduce for 2D containers.
Definition: reduce.h:42
A vector container class, implemented as a wrapper for std::vector.
Definition: vector.h:61
Contains the definitions of common member functions for the Reduce skeleton that is used for both 1D ...
A class that describes an execution plan.
Definition: exec_plan.h:47
Contains a class declaration for the Vector container.
A class representing the Reduce skeleton both for 1D and 2D reduce operation for 1D Vector...
Definition: reduce.h:77
Contains a class declaration for Environment class.
Reduce(ReduceFuncRowWise *reduceFuncRowWise, ReduceFuncColWise *reduceFuncColWise)
Definition: reduce_2d.inl:24
A class representing the column-wise iterator for the Matrix data-type.
Contains the definitions of CPU specific member functions for the 2DReduce skeleton.
T CPU(Matrix< T > &input)
Definition: reduce_cpu_2d.inl:26
T CU(Matrix< T > &input, int useNumGPU=1)
Definition: reduce_cu_2d.inl:311
~Reduce()
Definition: reduce_2d.inl:92
Contains a class that stores information about which back ends to use when executing.
Contains the definitions of OpenCL specific member functions for the Reduce skeleton.
T operator()(Matrix< T > &input)
Definition: reduce_2d.inl:113