12 #include <OpenCL/opencl.h>
52 template <
typename GenerateFunc>
58 Generate(GenerateFunc* generateFunc);
68 m_generateFunc->setConstant(constant1);
82 void setExecPlan(ExecPlan *plan)
84 m_execPlanMulti = plan;
88 Environment<int>* m_environment;
89 GenerateFunc* m_generateFunc;
92 ExecPlan *m_execPlanMulti;
102 template <
typename T>
103 void operator()(
size_t numElements, Vector<T>& output);
105 template <
typename T>
108 template <
typename OutputIterator>
109 void operator()(
size_t numElements, OutputIterator outputBegin);
112 template <
typename T>
113 void CPU(
size_t numElements, Vector<T>& output);
115 template <
typename T>
116 void CPU(
size_t numRows,
size_t numCols,
Matrix<T>& output);
118 template <
typename OutputIterator>
119 void CPU(
size_t numElements, OutputIterator outputBegin);
124 template <
typename T>
125 void OMP(
size_t numElements, Vector<T>& output);
127 template <
typename T>
128 void OMP(
size_t numRows,
size_t numCols,
Matrix<T>& output);
130 template <
typename OutputIterator>
131 void OMP(
size_t numElements, OutputIterator outputBegin);
137 template <
typename T>
138 void CU(
size_t numElements, Vector<T>& output,
int useNumGPU = 1);
140 template <
typename T>
141 void CU(
size_t numRows,
size_t numCols,
Matrix<T>& output,
int useNumGPU = 1);
143 template <
typename OutputIterator>
144 void CU(
size_t numElements, OutputIterator outputBegin,
int useNumGPU = 1);
147 unsigned int cudaDeviceID;
149 template <
typename OutputIterator>
150 void generateSingleThread_CU(
size_t numElements, OutputIterator outputBegin,
unsigned int deviceID);
156 template <
typename T>
157 void CL(
size_t numElements, Vector<T>& output,
int useNumGPU = 1);
159 template <
typename T>
160 void CL(
size_t numRows,
size_t numCols,
Matrix<T>& output,
int useNumGPU = 1);
162 template <
typename OutputIterator>
163 void CL(
size_t numElements, OutputIterator outputBegin,
int useNumGPU = 1);
166 template <
typename OutputIterator>
167 void generateNumDevices_CL(
size_t numElements, OutputIterator outputBegin,
size_t numDevices);
170 std::vector<std::pair<cl_kernel, Device_CL*> > m_kernels_CL;
172 void createOpenCLProgram();
Contains the definitions of CUDA specific member functions for the Generate skeleton.
void operator()(size_t numElements, Vector< T > &output)
Definition: generate.inl:102
Generate(GenerateFunc *generateFunc)
Definition: generate.inl:21
Includes the macro files needed for the defined backends.
Contains the definitions of CPU specific member functions for the Generate skeleton.
void finishAll()
Definition: environment.inl:575
A class representing the Generate skeleton.
Definition: generate.h:53
void CL(size_t numElements, Vector< T > &output, int useNumGPU=1)
Definition: generate_cl.inl:217
Contains a class declaration for the object that represents an OpenCL device.
void setConstant(T constant1)
Definition: generate.h:66
void CU(size_t numElements, Vector< T > &output, int useNumGPU=1)
Definition: generate_cu.inl:70
void OMP(size_t numElements, Vector< T > &output)
Definition: generate_omp.inl:26
Contains the definitions of non-backend specific member functions for the Generate skeleton...
void finishAll()
Definition: generate.h:74
Contains a class declaration for the Matrix container.
~Generate()
Definition: generate.inl:83
A class that describes an execution plan.
Definition: exec_plan.h:47
void CPU(size_t numElements, Vector< T > &output)
Definition: generate_cpu.inl:23
Contains a class declaration for the Vector container.
Contains a class declaration for Environment class.
A class representing the column-wise iterator for the Matrix data-type.
Contains a class that stores information about which back ends to use when executing.
Contains the definitions of OpenCL specific member functions for the Generate skeleton.
Contains the definitions of OpenMP specific member functions for the Generate skeleton.