00001
00005 #ifndef SCAN_H
00006 #define SCAN_H
00007
00008 #ifdef SKEPU_OPENCL
00009 #include <string>
00010 #include <vector>
00011 #include <CL/cl.h>
00012 #include "src/device_cl.h"
00013 #endif
00014
00015 #include "src/environment.h"
00016 #include "skepu/vector.h"
00017 #include "src/operator_macros.h"
00018 #include "src/exec_plan.h"
00019
00020 namespace skepu
00021 {
00022
00026 enum ScanType
00027 {
00028 INCLUSIVE,
00029 EXCLUSIVE
00030 };
00031
00060 template <typename ScanFunc>
00061 class Scan
00062 {
00063
00064 public:
00065
00066 Scan(ScanFunc* scanFunc);
00067
00068 ~Scan();
00069
00070 void finishAll() {m_environment->finishAll();}
00071 void setExecPlan(ExecPlan& plan) {m_execPlan = plan;}
00072
00073 private:
00074 Environment<int>* m_environment;
00075 ScanFunc* m_scanFunc;
00076 ExecPlan m_execPlan;
00077
00078 public:
00079 template <typename T>
00080 void operator()(Vector<T>& input, ScanType type, T init = T());
00081
00082 template <typename InputIterator>
00083 void operator()(InputIterator inputBegin, InputIterator inputEnd, ScanType type, typename InputIterator::value_type init = typename InputIterator::value_type());
00084
00085 template <typename T>
00086 void operator()(Vector<T>& input, Vector<T>& output, ScanType type, T init = T());
00087
00088 template <typename InputIterator, typename OutputIterator>
00089 void operator()(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, ScanType type, typename InputIterator::value_type init = typename InputIterator::value_type());
00090
00091 public:
00092 template <typename T>
00093 void CPU(Vector<T>& input, ScanType type = INCLUSIVE, T init = T());
00094
00095 template <typename InputIterator>
00096 void CPU(InputIterator inputBegin, InputIterator inputEnd, ScanType type = INCLUSIVE, typename InputIterator::value_type init = typename InputIterator::value_type());
00097
00098 template <typename T>
00099 void CPU(Vector<T>& input, Vector<T>& output, ScanType type = INCLUSIVE, T init = T());
00100
00101 template <typename InputIterator, typename OutputIterator>
00102 void CPU(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, ScanType type = INCLUSIVE, typename InputIterator::value_type init = typename InputIterator::value_type());
00103
00104 #ifdef SKEPU_OPENMP
00105 public:
00106 template <typename T>
00107 void OMP(Vector<T>& input, ScanType type = INCLUSIVE, T init = T());
00108
00109 template <typename InputIterator>
00110 void OMP(InputIterator inputBegin, InputIterator inputEnd, ScanType type = INCLUSIVE, typename InputIterator::value_type init = typename InputIterator::value_type());
00111
00112 template <typename T>
00113 void OMP(Vector<T>& input, Vector<T>& output, ScanType type = INCLUSIVE, T init = T());
00114
00115 template <typename InputIterator, typename OutputIterator>
00116 void OMP(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, ScanType type = INCLUSIVE, typename InputIterator::value_type init = typename InputIterator::value_type());
00117 #endif
00118
00119 #ifdef SKEPU_CUDA
00120 public:
00121 template <typename T>
00122 void CU(Vector<T>& input, ScanType type = INCLUSIVE, T init = T(), int useNumGPU = 1);
00123
00124 template <typename InputIterator>
00125 void CU(InputIterator inputBegin, InputIterator inputEnd, ScanType type = INCLUSIVE, typename InputIterator::value_type init = typename InputIterator::value_type(), int useNumGPU = 1);
00126
00127 template <typename T>
00128 void CU(Vector<T>& input, Vector<T>& output, ScanType type = INCLUSIVE, T init = T(), int useNumGPU = 1);
00129
00130 template <typename InputIterator, typename OutputIterator>
00131 void CU(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, ScanType type = INCLUSIVE, typename InputIterator::value_type init = typename InputIterator::value_type(), int useNumGPU = 1);
00132
00133 private:
00134 template <typename InputIterator, typename OutputIterator>
00135 void scanSingleThread_CU(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, ScanType type, typename InputIterator::value_type init, int deviceID);
00136
00137 template <typename T>
00138 T scanLargeVectorRecursively_CU(DeviceMemPointer_CU<T>* input, DeviceMemPointer_CU<T>* output, std::vector<DeviceMemPointer_CU<T>*>& blockSums, unsigned int numElements, int level, ScanType type, T init, int deviceID);
00139
00140 #endif
00141
00142 #ifdef SKEPU_OPENCL
00143 public:
00144 template <typename T>
00145 void CL(Vector<T>& input, ScanType type = INCLUSIVE, T init = T(), int useNumGPU = 1);
00146
00147 template <typename InputIterator>
00148 void CL(InputIterator inputBegin, InputIterator inputEnd, ScanType type = INCLUSIVE, typename InputIterator::value_type init = typename InputIterator::value_type(), int useNumGPU = 1);
00149
00150 template <typename T>
00151 void CL(Vector<T>& input, Vector<T>& output, ScanType type = INCLUSIVE, T init = T(), int useNumGPU = 1);
00152
00153 template <typename InputIterator, typename OutputIterator>
00154 void CL(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, ScanType type = INCLUSIVE, typename InputIterator::value_type init = typename InputIterator::value_type(), int useNumGPU = 1);
00155
00156 private:
00157 template <typename InputIterator, typename OutputIterator>
00158 void scanSingle_CL(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, ScanType type, typename InputIterator::value_type init, int deviceID);
00159
00160 template <typename InputIterator, typename OutputIterator>
00161 void scanNumDevices_CL(InputIterator inputBegin, InputIterator inputEnd, OutputIterator outputBegin, ScanType type, typename InputIterator::value_type init, int numDevices);
00162
00163 template <typename T>
00164 T scanLargeVectorRecursively_CL(DeviceMemPointer_CL<T>* input, DeviceMemPointer_CL<T>* output, std::vector<DeviceMemPointer_CL<T>*>& blockSums, unsigned int numElements, int level, ScanType type, T init, int deviceID);
00165
00166 private:
00167 std::vector<std::pair<cl_kernel, Device_CL*> > m_scanKernels_CL;
00168 std::vector<std::pair<cl_kernel, Device_CL*> > m_scanUpdateKernels_CL;
00169 std::vector<std::pair<cl_kernel, Device_CL*> > m_scanAddKernels_CL;
00170
00171 void replaceText(std::string& text, std::string find, std::string replace);
00172 void createOpenCLProgram();
00173 #endif
00174
00175 };
00176
00177 }
00178
00179 #include "src/scan.inl"
00180
00181 #include "src/scan_cpu.inl"
00182
00183 #ifdef SKEPU_OPENMP
00184 #include "src/scan_omp.inl"
00185 #endif
00186
00187 #ifdef SKEPU_OPENCL
00188 #include "src/scan_cl.inl"
00189 #endif
00190
00191 #ifdef SKEPU_CUDA
00192 #include "src/scan_cu.inl"
00193 #endif
00194
00195 #endif