|
SkePU 0.7
|
00001 00005 #ifndef DEVICE_MEM_POINTER_CU_H 00006 #define DEVICE_MEM_POINTER_CU_H 00007 00008 #ifdef SKEPU_CUDA 00009 00010 #include <iostream> 00011 #include <cuda.h> 00012 00013 #include "debug.h" 00014 00015 #include "device_cu.h" 00016 00017 00018 namespace skepu 00019 { 00020 00033 template <typename T> 00034 class DeviceMemPointer_CU 00035 { 00036 00037 public: 00038 DeviceMemPointer_CU(T* start, int numElements, Device_CU *device); 00039 DeviceMemPointer_CU(T* start, int rows, int cols, Device_CU *device, bool usePitch=false); 00040 00041 DeviceMemPointer_CU(T* root, T* start, int numElements, Device_CU *device); 00042 ~DeviceMemPointer_CU(); 00043 00044 void copyHostToDevice(int numElements = -1) const; 00045 void copyDeviceToHost(int numElements = -1) const; 00046 T* getDeviceDataPointer() const; 00047 int getDeviceID() const; 00048 void changeDeviceData(); 00049 00050 size_t m_pitch; 00051 00052 size_t m_rows; 00053 00054 size_t m_cols; 00055 00056 // marks first initialization, useful when want to separate actual CUDA allocation and memory copy (HTD) such as when using mulit-GPU CUDA. 00057 mutable bool m_initialized; 00058 00059 private: 00060 T* m_hostDataPointer; 00061 T* m_deviceDataPointer; 00062 int m_numElements; 00063 int m_deviceID; 00064 00065 Device_CU *m_dev; 00066 00067 bool m_usePitch; 00068 00069 mutable bool deviceDataHasChanged; 00070 }; 00071 00080 template <typename T> 00081 DeviceMemPointer_CU<T>::DeviceMemPointer_CU(T* start, int numElements, Device_CU *device) : m_hostDataPointer(start), m_numElements(numElements), m_dev(device), m_rows(1), m_cols(numElements), m_pitch(numElements), m_initialized(false), m_usePitch(false) 00082 { 00083 cudaError_t err; 00084 size_t sizeVec = numElements*sizeof(T); 00085 00086 DEBUG_TEXT_LEVEL2("Alloc: " <<numElements <<"\n") 00087 00088 m_deviceID = m_dev->getDeviceID(); 00089 00090 cudaSetDevice(m_deviceID); 00091 00092 err = cudaMalloc((void**)&m_deviceDataPointer, sizeVec); 00093 if(err != cudaSuccess){std::cerr<<"Error allocating memory on device\n";} 00094 00095 deviceDataHasChanged = false; 00096 } 00097 00098 00099 00110 template <typename T> 00111 DeviceMemPointer_CU<T>::DeviceMemPointer_CU(T* start, int rows, int cols, Device_CU *device, bool usePitch) : m_hostDataPointer(start), m_numElements(rows*cols), m_rows(rows), m_cols(cols), m_dev(device), m_initialized(false), m_usePitch(usePitch) 00112 { 00113 cudaError_t err; 00114 size_t sizeVec = m_numElements*sizeof(T); 00115 00116 DEBUG_TEXT_LEVEL2("Alloc: " <<m_numElements <<"\n") 00117 00118 m_deviceID = m_dev->getDeviceID(); 00119 00120 cudaSetDevice(m_deviceID); 00121 00122 if(m_usePitch) 00123 { 00124 err = cudaMallocPitch((void**)&m_deviceDataPointer, &m_pitch, cols * sizeof(T), rows); 00125 m_pitch = (m_pitch)/sizeof(T); 00126 } 00127 else 00128 { 00129 err = cudaMalloc((void**)&m_deviceDataPointer, sizeVec); 00130 m_pitch = cols; 00131 } 00132 00133 if(err != cudaSuccess){std::cerr<<"Error allocating memory on device\n";} 00134 00135 deviceDataHasChanged = false; 00136 } 00137 00138 00148 template <typename T> 00149 DeviceMemPointer_CU<T>::DeviceMemPointer_CU(T* root, T* start, int numElements, Device_CU *device) : m_hostDataPointer(start), m_numElements(numElements), m_dev(device), m_rows(1), m_cols(numElements), m_pitch(numElements), m_initialized(false), m_usePitch(false) 00150 { 00151 cudaError_t err; 00152 size_t sizeVec = numElements*sizeof(T); 00153 00154 DEBUG_TEXT_LEVEL2("Alloc: " <<numElements <<"\n") 00155 00156 m_deviceID = m_dev->getDeviceID(); 00157 00158 cudaSetDevice(m_deviceID); 00159 00160 err = cudaMalloc((void**)&m_deviceDataPointer, sizeVec); 00161 if(err != cudaSuccess){std::cerr<<"Error allocating memory on device\n";} 00162 00163 deviceDataHasChanged = false; 00164 } 00165 00169 template <typename T> 00170 DeviceMemPointer_CU<T>::~DeviceMemPointer_CU() 00171 { 00172 DEBUG_TEXT_LEVEL2("DeAlloc: " <<m_numElements <<"\n") 00173 00174 cudaSetDevice(m_deviceID); 00175 00176 cudaFree(m_deviceDataPointer); 00177 } 00178 00184 template <typename T> 00185 void DeviceMemPointer_CU<T>::copyHostToDevice(int numElements) const 00186 { 00187 if(m_hostDataPointer != NULL) 00188 { 00189 DEBUG_TEXT_LEVEL1("HOST_TO_DEVICE: "<<((numElements==-1)? m_numElements: numElements)<<"!!!\n") 00190 00191 cudaError_t err; 00192 size_t sizeVec; 00193 00194 // used for pitch allocation. 00195 int _rows, _cols; 00196 00197 if(numElements < 1) 00198 { 00199 numElements = m_numElements; 00200 } 00201 if(m_usePitch) 00202 { 00203 if( (numElements%m_cols)!=0 || (numElements/m_cols)<1 ) // using pitch option, memory copy must be proper, respecting rows and cols 00204 { 00205 std::cerr<<"Error! Cannot copy data using pitch option when size mismatches with rows and columns. numElements: "<<numElements<<", rows:"<< m_rows <<", m_cols: "<<m_cols<<"\n"; 00206 } 00207 00208 _rows = numElements/m_cols; 00209 _cols = m_cols; 00210 } 00211 00212 sizeVec = numElements*sizeof(T); 00213 00214 cudaSetDevice(m_deviceID); 00215 00216 #ifdef USE_PINNED_MEMORY 00217 if(m_usePitch) 00218 err = cudaMemcpy2DAsync(m_deviceDataPointer,m_pitch*sizeof(T),m_hostDataPointer,_cols*sizeof(T), _cols*sizeof(T), _rows, cudaMemcpyHostToDevice, m_dev->stream); 00219 else 00220 err = cudaMemcpyAsync(m_deviceDataPointer, m_hostDataPointer, sizeVec, cudaMemcpyHostToDevice, m_dev->stream); 00221 #else 00222 if(m_usePitch) 00223 err = cudaMemcpy2D(m_deviceDataPointer,m_pitch*sizeof(T),m_hostDataPointer,_cols*sizeof(T), _cols*sizeof(T), _rows, cudaMemcpyHostToDevice); 00224 else 00225 err = cudaMemcpy(m_deviceDataPointer, m_hostDataPointer, sizeVec, cudaMemcpyHostToDevice); 00226 #endif 00227 00228 if(err != cudaSuccess){std::cerr<<"Error copying data to device\n" <<cudaGetErrorString(err) <<"\n";} 00229 00230 if(!m_initialized) // set that it is initialized 00231 m_initialized = true; 00232 00233 deviceDataHasChanged = false; 00234 } 00235 } 00236 00242 template <typename T> 00243 void DeviceMemPointer_CU<T>::copyDeviceToHost(int numElements) const 00244 { 00245 if(deviceDataHasChanged && m_hostDataPointer != NULL) 00246 { 00247 DEBUG_TEXT_LEVEL1("DEVICE_TO_HOST: "<<((numElements==-1)? m_numElements: numElements)<<"!!!\n") 00248 00249 cudaError_t err; 00250 size_t sizeVec; 00251 00252 // used for pitch allocation. 00253 int _rows, _cols; 00254 00255 if(numElements < 1) 00256 { 00257 numElements = m_numElements; 00258 } 00259 if(m_usePitch) 00260 { 00261 if( (numElements%m_cols)!=0 || (numElements/m_cols)<1 ) // using pitch option, memory copy must be proper, respecting rows and cols 00262 { 00263 std::cerr<<"Error! Cannot copy data using pitch option when size mismatches with rows and columns. numElements: "<<numElements<<", rows:"<< m_rows <<", m_cols: "<<m_cols<<"\n"; 00264 } 00265 00266 _rows = numElements/m_cols; 00267 _cols = m_cols; 00268 } 00269 00270 sizeVec = numElements*sizeof(T); 00271 00272 cudaSetDevice(m_deviceID); 00273 00274 #ifdef USE_PINNED_MEMORY 00275 if(m_usePitch) 00276 err = cudaMemcpy2DAsync(m_hostDataPointer,_cols*sizeof(T),m_deviceDataPointer,m_pitch*sizeof(T), _cols*sizeof(T), _rows, cudaMemcpyDeviceToHost, m_dev->stream); 00277 else 00278 err = cudaMemcpyAsync(m_hostDataPointer, m_deviceDataPointer, sizeVec, cudaMemcpyDeviceToHost, m_dev->stream); 00279 if(numElements!=-5) // if want not to wait, specify -5 00280 cudaStreamSynchronize(m_dev->stream); 00281 #else 00282 if(m_usePitch) 00283 err = cudaMemcpy2D(m_hostDataPointer,_cols*sizeof(T),m_deviceDataPointer,m_pitch*sizeof(T), _cols*sizeof(T), _rows, cudaMemcpyDeviceToHost); 00284 else 00285 err = cudaMemcpy(m_hostDataPointer, m_deviceDataPointer, sizeVec, cudaMemcpyDeviceToHost); 00286 #endif 00287 if(err != cudaSuccess){std::cerr<<"Error copying data from device: " <<cudaGetErrorString(err) <<"\n";} 00288 00289 deviceDataHasChanged = false; 00290 } 00291 } 00292 00296 template <typename T> 00297 T* DeviceMemPointer_CU<T>::getDeviceDataPointer() const 00298 { 00299 return m_deviceDataPointer; 00300 } 00301 00305 template <typename T> 00306 int DeviceMemPointer_CU<T>::getDeviceID() const 00307 { 00308 return m_deviceID; 00309 } 00310 00314 template <typename T> 00315 void DeviceMemPointer_CU<T>::changeDeviceData() 00316 { 00317 deviceDataHasChanged = true; 00318 } 00319 00320 } 00321 00322 #endif 00323 00324 #endif 00325
1.7.4