12 #include <OpenCL/opencl.h>
74 template <
typename MapOverlapFunc>
95 m_execPlanMulti = plan;
100 MapOverlapFunc* m_mapOverlapFunc;
112 template <
typename T>
115 template <
typename InputIterator>
116 void operator()(InputIterator inputBegin, InputIterator inputEnd, EdgePolicy poly,
typename InputIterator::value_type pad =
typename InputIterator::value_type());
118 template <
typename T>
121 template <
typename InputIterator>
122 void operator()(InputIterator inputBegin, InputIterator inputEnd,
OverlapPolicy overlapPolicy, EdgePolicy poly,
typename InputIterator::value_type pad =
typename InputIterator::value_type());
124 template <
typename T>
127 template <
typename InputIterator,
typename OutputIterator>
128 void operator()(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, EdgePolicy poly,
typename InputIterator::value_type pad =
typename InputIterator::value_type());
130 template <
typename T>
133 template <
typename InputIterator,
typename OutputIterator>
134 void operator()(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin,
OverlapPolicy overlapPolicy, EdgePolicy poly = CONSTANT,
typename InputIterator::value_type pad =
typename InputIterator::value_type());
137 template <
typename T>
138 void CPU(
Vector<T>& input, EdgePolicy poly = CONSTANT, T pad = T());
140 template <
typename InputIterator>
141 void CPU(InputIterator inputBegin, InputIterator inputEnd, EdgePolicy poly,
typename InputIterator::value_type pad =
typename InputIterator::value_type());
143 template <
typename T>
146 template <
typename InputIterator>
147 void CPU(InputIterator inputBegin, InputIterator inputEnd,
OverlapPolicy overlapPolicy, EdgePolicy poly,
typename InputIterator::value_type pad =
typename InputIterator::value_type());
149 template <
typename T>
152 template <
typename InputIterator,
typename OutputIterator>
153 void CPU(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, EdgePolicy poly,
typename InputIterator::value_type pad =
typename InputIterator::value_type());
155 template <
typename T>
158 template <
typename InputIterator,
typename OutputIterator>
159 void CPU(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin,
OverlapPolicy overlapPolicy, EdgePolicy poly,
typename InputIterator::value_type pad =
typename InputIterator::value_type());
162 template <
typename InputIterator,
typename OutputIterator>
163 void CPU_ROWWISE(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, EdgePolicy poly = CONSTANT,
typename InputIterator::value_type pad =
typename InputIterator::value_type());
165 template <
typename InputIterator,
typename OutputIterator>
166 void CPU_COLWISE(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, EdgePolicy poly = CONSTANT,
typename InputIterator::value_type pad =
typename InputIterator::value_type());
170 template <
typename T>
171 void OMP(
Vector<T>& input, EdgePolicy poly = CONSTANT, T pad = T());
173 template <
typename InputIterator>
174 void OMP(InputIterator inputBegin, InputIterator inputEnd, EdgePolicy poly,
typename InputIterator::value_type pad =
typename InputIterator::value_type());
176 template <
typename T>
179 template <
typename InputIterator>
180 void OMP(InputIterator inputBegin, InputIterator inputEnd,
OverlapPolicy overlapPolicy, EdgePolicy poly,
typename InputIterator::value_type pad =
typename InputIterator::value_type());
182 template <
typename T>
185 template <
typename InputIterator,
typename OutputIterator>
186 void OMP(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, EdgePolicy poly,
typename InputIterator::value_type pad =
typename InputIterator::value_type());
188 template <
typename T>
191 template <
typename InputIterator,
typename OutputIterator>
192 void OMP(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin,
OverlapPolicy overlapPolicy, EdgePolicy poly,
typename InputIterator::value_type pad =
typename InputIterator::value_type());
195 template <
typename InputIterator,
typename OutputIterator>
196 void OMP_ROWWISE(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, EdgePolicy poly = CONSTANT,
typename InputIterator::value_type pad =
typename InputIterator::value_type());
198 template <
typename InputIterator,
typename OutputIterator>
199 void OMP_COLWISE(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, EdgePolicy poly = CONSTANT,
typename InputIterator::value_type pad =
typename InputIterator::value_type());
205 template <
typename T>
206 void CU(
Vector<T>& input, EdgePolicy poly = CONSTANT, T pad = T(),
int useNumGPU = 1);
208 template <
typename InputIterator>
209 void CU(InputIterator inputBegin, InputIterator inputEnd, EdgePolicy poly,
typename InputIterator::value_type pad =
typename InputIterator::value_type(),
int useNumGPU = 1);
211 template <
typename T>
214 template <
typename InputIterator>
215 void CU(InputIterator inputBegin, InputIterator inputEnd,
OverlapPolicy overlapPolicy, EdgePolicy poly,
typename InputIterator::value_type pad =
typename InputIterator::value_type(),
int useNumGPU = 1);
217 template <
typename T>
218 void CU(
Vector<T>& input,
Vector<T>& output, EdgePolicy poly = CONSTANT, T pad = T(),
int useNumGPU = 1);
220 template <
typename InputIterator,
typename OutputIterator>
221 void CU(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, EdgePolicy poly,
typename InputIterator::value_type pad =
typename InputIterator::value_type(),
int useNumGPU = 1);
223 template <
typename T>
226 template <
typename InputIterator,
typename OutputIterator>
227 void CU(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin,
OverlapPolicy overlapPolicy, EdgePolicy poly,
typename InputIterator::value_type pad =
typename InputIterator::value_type(),
int useNumGPU = 1);
230 unsigned int cudaDeviceID;
232 template <
typename InputIterator,
typename OutputIterator>
233 void mapOverlapSingleThread_CU(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, EdgePolicy poly,
typename InputIterator::value_type pad,
unsigned int deviceID);
235 template <
typename InputIterator,
typename OutputIterator>
236 void mapOverlapSingleThread_CU(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, EdgePolicy poly,
typename InputIterator::value_type pad,
unsigned int deviceID,
OverlapPolicy overlapPolicy);
238 template <
typename InputIterator,
typename OutputIterator>
239 void mapOverlapSingleThread_CU_Row(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, EdgePolicy poly,
typename InputIterator::value_type pad,
unsigned int deviceID);
241 template <
typename InputIterator,
typename OutputIterator>
242 void mapOverlapMultiThread_CU_Row(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, EdgePolicy poly,
typename InputIterator::value_type pad,
size_t numDevices);
244 template <
typename InputIterator,
typename OutputIterator>
245 void mapOverlapSingleThread_CU_Col(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, EdgePolicy poly,
typename InputIterator::value_type pad,
unsigned int deviceID);
247 template <
typename InputIterator,
typename OutputIterator>
248 void mapOverlapMultiThread_CU_Col(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, EdgePolicy poly,
typename InputIterator::value_type pad,
size_t numDevices);
250 template <
typename T>
251 size_t getThreadNumber_CU(
size_t width,
size_t &numThreads,
unsigned int deviceID);
253 template <
typename T>
254 bool sharedMemAvailable_CU(
size_t &numThreads,
unsigned int deviceID);
260 template <
typename T>
261 void CL(
Vector<T>& input, EdgePolicy poly = CONSTANT, T pad = T(),
int useNumGPU = 1);
263 template <
typename InputIterator>
264 void CL(InputIterator inputBegin, InputIterator inputEnd, EdgePolicy poly,
typename InputIterator::value_type pad =
typename InputIterator::value_type(),
int useNumGPU = 1);
266 template <
typename T>
269 template <
typename InputIterator>
270 void CL(InputIterator inputBegin, InputIterator inputEnd,
OverlapPolicy overlapPolicy, EdgePolicy poly,
typename InputIterator::value_type pad =
typename InputIterator::value_type(),
int useNumGPU = 1);
272 template <
typename T>
273 void CL(
Vector<T>& input,
Vector<T>& output, EdgePolicy poly = CONSTANT, T pad = T(),
int useNumGPU = 1);
275 template <
typename InputIterator,
typename OutputIterator>
276 void CL(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, EdgePolicy poly,
typename InputIterator::value_type pad =
typename InputIterator::value_type(),
int useNumGPU = 1);
278 template <
typename T>
281 template <
typename InputIterator,
typename OutputIterator>
282 void CL(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin,
OverlapPolicy overlapPolicy, EdgePolicy poly,
typename InputIterator::value_type pad =
typename InputIterator::value_type(),
int useNumGPU = 1);
285 template <
typename InputIterator,
typename OutputIterator>
286 void mapOverlapSingle_CL(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, EdgePolicy poly,
typename InputIterator::value_type pad,
unsigned int deviceID);
288 template <
typename InputIterator,
typename OutputIterator>
289 void mapOverlapNumDevices_CL(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, EdgePolicy poly,
typename InputIterator::value_type pad,
size_t numDevices);
291 template <
typename InputIterator,
typename OutputIterator>
292 void mapOverlapSingle_CL_Row(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, EdgePolicy poly,
typename InputIterator::value_type pad,
unsigned int deviceID);
294 template <
typename InputIterator,
typename OutputIterator>
295 void mapOverlapSingle_CL_RowMulti(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, EdgePolicy poly,
typename InputIterator::value_type pad,
size_t numDevices);
297 template <
typename InputIterator,
typename OutputIterator>
298 void mapOverlapSingle_CL_Col(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, EdgePolicy poly,
typename InputIterator::value_type pad,
unsigned int deviceID);
300 template <
typename InputIterator,
typename OutputIterator>
301 void mapOverlapSingle_CL_ColMulti(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, EdgePolicy poly,
typename InputIterator::value_type pad,
size_t numDevices);
303 template <
typename InputIterator,
typename OutputIterator>
304 void mapOverlapSingle_CL(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, EdgePolicy poly,
typename InputIterator::value_type pad,
unsigned int deviceID,
OverlapPolicy overlapPolicy);
306 template <
typename InputIterator,
typename OutputIterator>
307 void mapOverlapNumDevices_CL(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, EdgePolicy poly,
typename InputIterator::value_type pad,
size_t numDevices,
OverlapPolicy overlapPolicy);
309 template <
typename T>
310 size_t getThreadNumber_CL(
size_t width,
size_t &numThreads,
unsigned int deviceID);
312 template <
typename T>
313 bool sharedMemAvailable_CL(
size_t &numThreads,
unsigned int deviceID);
316 std::vector<std::pair<cl_kernel, Device_CL*> > m_kernels_CL;
317 std::vector<std::pair<cl_kernel, Device_CL*> > m_kernels_Mat_Row_CL;
318 std::vector<std::pair<cl_kernel, Device_CL*> > m_kernels_Mat_Col_CL;
319 std::vector<std::pair<cl_kernel, Device_CL*> > m_kernels_Mat_ColMulti_CL;
320 std::vector<std::pair<cl_kernel, Device_CL*> > m_kernels_Mat_ConvolFilter_CL;
321 std::vector<std::pair<cl_kernel, Device_CL*> > m_kernels_Mat_Convol_CL;
323 std::vector<std::pair<cl_kernel, Device_CL*> > m_kernels_Mat_Transpose_CL;
325 void createOpenCLProgram();
void CU(Vector< T > &input, EdgePolicy poly=CONSTANT, T pad=T(), int useNumGPU=1)
Definition: mapoverlap_cu.inl:33
Contains a class declaration for the MapOverlap skeleton.
Contains the definitions of OpenCL specific member functions for the MapOverlap skeleton.
Includes the macro files needed for the defined backends.
void operator()(Vector< T > &input, EdgePolicy poly=CONSTANT, T pad=T())
Definition: mapoverlap.inl:111
void finishAll()
Definition: environment.inl:575
void CPU(Vector< T > &input, EdgePolicy poly=CONSTANT, T pad=T())
Definition: mapoverlap_cpu.inl:23
void OMP(Vector< T > &input, EdgePolicy poly=CONSTANT, T pad=T())
Definition: mapoverlap_omp.inl:26
Contains the definitions of CUDA specific member functions for the MapOverlap skeleton.
Contains the definitions of non-backend specific member functions for the MapOverlap skeleton...
void CL(Vector< T > &input, EdgePolicy poly=CONSTANT, T pad=T(), int useNumGPU=1)
Definition: mapoverlap_cl.inl:469
Contains a class declaration for the object that represents an OpenCL device.
MapOverlap(MapOverlapFunc *mapOverlapFunc)
Definition: mapoverlap.inl:25
Contains the definitions of CPU specific member functions for the MapOverlap skeleton.
Contains a class declaration for the Matrix container.
A vector container class, implemented as a wrapper for std::vector.
Definition: vector.h:61
A class that describes an execution plan.
Definition: exec_plan.h:47
Contains a class declaration for the Vector container.
Contains the definitions of OpenMP specific member functions for the MapOverlap skeleton.
Contains a class declaration for Environment class.
A class representing the MapOverlap skeleton.
Definition: mapoverlap.h:75
A class representing the column-wise iterator for the Matrix data-type.
~MapOverlap()
Definition: mapoverlap.inl:92
OverlapPolicy
Definition: mapoverlap.h:36
Contains a class that stores information about which back ends to use when executing.