SkePU 0.7
include/skepu/src/device_mem_pointer_cu.h
Go to the documentation of this file.
00001 
00005 #ifndef DEVICE_MEM_POINTER_CU_H
00006 #define DEVICE_MEM_POINTER_CU_H
00007 
00008 #ifdef SKEPU_CUDA
00009 
00010 #include <iostream>
00011 #include <cuda.h>
00012 
00013 #include "debug.h"
00014 
00015 #include "device_cu.h"
00016 
00017 
00018 namespace skepu
00019 {
00020 
00033 template <typename T>
00034 class DeviceMemPointer_CU
00035 {
00036 
00037 public:
00038     DeviceMemPointer_CU(T* start, int numElements, Device_CU *device);
00039     DeviceMemPointer_CU(T* start, int rows, int cols, Device_CU *device, bool usePitch=false);
00040     
00041     DeviceMemPointer_CU(T* root, T* start, int numElements, Device_CU *device);
00042     ~DeviceMemPointer_CU();
00043 
00044     void copyHostToDevice(int numElements = -1) const;
00045     void copyDeviceToHost(int numElements = -1) const;
00046     T* getDeviceDataPointer() const;
00047     int getDeviceID() const;
00048     void changeDeviceData();
00049     
00050     size_t m_pitch;
00051     
00052     size_t m_rows;
00053     
00054     size_t m_cols;
00055     
00056     // marks first initialization, useful when want to separate actual CUDA allocation and memory copy (HTD) such as when using mulit-GPU CUDA.
00057     mutable bool m_initialized; 
00058 
00059 private:
00060     T* m_hostDataPointer;
00061     T* m_deviceDataPointer;
00062     int m_numElements;
00063     int m_deviceID;
00064     
00065     Device_CU *m_dev;
00066     
00067     bool m_usePitch;
00068 
00069     mutable bool deviceDataHasChanged;
00070 };
00071 
00080 template <typename T>
00081 DeviceMemPointer_CU<T>::DeviceMemPointer_CU(T* start, int numElements, Device_CU *device) : m_hostDataPointer(start), m_numElements(numElements), m_dev(device), m_rows(1), m_cols(numElements), m_pitch(numElements), m_initialized(false), m_usePitch(false)
00082 {
00083     cudaError_t err;
00084     size_t sizeVec = numElements*sizeof(T);
00085 
00086     DEBUG_TEXT_LEVEL2("Alloc: " <<numElements <<"\n")
00087     
00088     m_deviceID = m_dev->getDeviceID();
00089     
00090     cudaSetDevice(m_deviceID);
00091 
00092     err = cudaMalloc((void**)&m_deviceDataPointer, sizeVec);
00093     if(err != cudaSuccess){std::cerr<<"Error allocating memory on device\n";}
00094 
00095     deviceDataHasChanged = false;
00096 }
00097 
00098 
00099 
00110 template <typename T>
00111 DeviceMemPointer_CU<T>::DeviceMemPointer_CU(T* start, int rows, int cols, Device_CU *device, bool usePitch) : m_hostDataPointer(start), m_numElements(rows*cols), m_rows(rows), m_cols(cols), m_dev(device), m_initialized(false), m_usePitch(usePitch)
00112 {
00113     cudaError_t err;
00114     size_t sizeVec = m_numElements*sizeof(T);
00115 
00116     DEBUG_TEXT_LEVEL2("Alloc: " <<m_numElements <<"\n")
00117     
00118     m_deviceID = m_dev->getDeviceID();
00119     
00120     cudaSetDevice(m_deviceID);
00121     
00122     if(m_usePitch)
00123     {
00124         err = cudaMallocPitch((void**)&m_deviceDataPointer, &m_pitch, cols * sizeof(T), rows);  
00125         m_pitch = (m_pitch)/sizeof(T);
00126     }
00127     else
00128     {
00129         err = cudaMalloc((void**)&m_deviceDataPointer, sizeVec);
00130         m_pitch = cols;
00131     }
00132 
00133     if(err != cudaSuccess){std::cerr<<"Error allocating memory on device\n";}
00134 
00135     deviceDataHasChanged = false;
00136 }
00137 
00138 
00148 template <typename T>
00149 DeviceMemPointer_CU<T>::DeviceMemPointer_CU(T* root, T* start, int numElements, Device_CU *device) : m_hostDataPointer(start), m_numElements(numElements), m_dev(device), m_rows(1), m_cols(numElements), m_pitch(numElements), m_initialized(false), m_usePitch(false)
00150 {
00151     cudaError_t err;
00152     size_t sizeVec = numElements*sizeof(T);
00153 
00154     DEBUG_TEXT_LEVEL2("Alloc: " <<numElements <<"\n")
00155     
00156     m_deviceID = m_dev->getDeviceID();
00157     
00158     cudaSetDevice(m_deviceID);
00159 
00160     err = cudaMalloc((void**)&m_deviceDataPointer, sizeVec);
00161     if(err != cudaSuccess){std::cerr<<"Error allocating memory on device\n";}
00162 
00163     deviceDataHasChanged = false;
00164 }
00165 
00169 template <typename T>
00170 DeviceMemPointer_CU<T>::~DeviceMemPointer_CU()
00171 {
00172     DEBUG_TEXT_LEVEL2("DeAlloc: " <<m_numElements <<"\n")
00173     
00174     cudaSetDevice(m_deviceID);
00175 
00176     cudaFree(m_deviceDataPointer);
00177 }
00178 
00184 template <typename T>
00185 void DeviceMemPointer_CU<T>::copyHostToDevice(int numElements) const
00186 {
00187     if(m_hostDataPointer != NULL)
00188     {
00189         DEBUG_TEXT_LEVEL1("HOST_TO_DEVICE: "<<((numElements==-1)? m_numElements: numElements)<<"!!!\n")
00190 
00191         cudaError_t err;
00192         size_t sizeVec;
00193         
00194         // used for pitch allocation.
00195         int _rows, _cols;
00196 
00197         if(numElements < 1)
00198         {
00199             numElements = m_numElements;
00200         }
00201         if(m_usePitch)
00202         {
00203             if( (numElements%m_cols)!=0 || (numElements/m_cols)<1 ) // using pitch option, memory copy must be proper, respecting rows and cols
00204             {
00205                 std::cerr<<"Error! Cannot copy data using pitch option when size mismatches with rows and columns. numElements: "<<numElements<<",  rows:"<< m_rows <<", m_cols: "<<m_cols<<"\n";
00206             }
00207             
00208             _rows = numElements/m_cols;
00209             _cols = m_cols;
00210         }
00211         
00212         sizeVec = numElements*sizeof(T);
00213             
00214         cudaSetDevice(m_deviceID);    
00215 
00216         #ifdef USE_PINNED_MEMORY
00217             if(m_usePitch)
00218                 err = cudaMemcpy2DAsync(m_deviceDataPointer,m_pitch*sizeof(T),m_hostDataPointer,_cols*sizeof(T), _cols*sizeof(T), _rows, cudaMemcpyHostToDevice, m_dev->stream);    
00219             else    
00220                 err = cudaMemcpyAsync(m_deviceDataPointer, m_hostDataPointer, sizeVec, cudaMemcpyHostToDevice, m_dev->stream);
00221         #else
00222             if(m_usePitch)
00223                 err = cudaMemcpy2D(m_deviceDataPointer,m_pitch*sizeof(T),m_hostDataPointer,_cols*sizeof(T), _cols*sizeof(T), _rows, cudaMemcpyHostToDevice);    
00224             else    
00225                 err = cudaMemcpy(m_deviceDataPointer, m_hostDataPointer, sizeVec, cudaMemcpyHostToDevice);
00226         #endif
00227         
00228         if(err != cudaSuccess){std::cerr<<"Error copying data to device\n" <<cudaGetErrorString(err) <<"\n";}
00229         
00230         if(!m_initialized) // set that it is initialized
00231             m_initialized = true; 
00232 
00233         deviceDataHasChanged = false;
00234     }
00235 }
00236 
00242 template <typename T>
00243 void DeviceMemPointer_CU<T>::copyDeviceToHost(int numElements) const
00244 {
00245     if(deviceDataHasChanged && m_hostDataPointer != NULL)
00246     {
00247         DEBUG_TEXT_LEVEL1("DEVICE_TO_HOST: "<<((numElements==-1)? m_numElements: numElements)<<"!!!\n")
00248 
00249         cudaError_t err;
00250         size_t sizeVec;
00251 
00252         // used for pitch allocation.
00253         int _rows, _cols;
00254 
00255         if(numElements < 1)
00256         {
00257             numElements = m_numElements;
00258         }
00259         if(m_usePitch)
00260         {
00261             if( (numElements%m_cols)!=0 || (numElements/m_cols)<1 ) // using pitch option, memory copy must be proper, respecting rows and cols
00262             {
00263                 std::cerr<<"Error! Cannot copy data using pitch option when size mismatches with rows and columns. numElements: "<<numElements<<",  rows:"<< m_rows <<", m_cols: "<<m_cols<<"\n";
00264             }
00265             
00266             _rows = numElements/m_cols;
00267             _cols = m_cols;
00268         }
00269         
00270         sizeVec = numElements*sizeof(T);
00271             
00272         cudaSetDevice(m_deviceID);
00273              
00274         #ifdef USE_PINNED_MEMORY
00275             if(m_usePitch)
00276                 err = cudaMemcpy2DAsync(m_hostDataPointer,_cols*sizeof(T),m_deviceDataPointer,m_pitch*sizeof(T), _cols*sizeof(T), _rows, cudaMemcpyDeviceToHost, m_dev->stream);    
00277             else    
00278                 err = cudaMemcpyAsync(m_hostDataPointer, m_deviceDataPointer, sizeVec, cudaMemcpyDeviceToHost, m_dev->stream);
00279             if(numElements!=-5) // if want not to wait, specify -5
00280                 cudaStreamSynchronize(m_dev->stream);
00281         #else
00282             if(m_usePitch)
00283                 err = cudaMemcpy2D(m_hostDataPointer,_cols*sizeof(T),m_deviceDataPointer,m_pitch*sizeof(T), _cols*sizeof(T), _rows, cudaMemcpyDeviceToHost);    
00284             else    
00285                 err = cudaMemcpy(m_hostDataPointer, m_deviceDataPointer, sizeVec, cudaMemcpyDeviceToHost);
00286         #endif
00287         if(err != cudaSuccess){std::cerr<<"Error copying data from device: " <<cudaGetErrorString(err) <<"\n";}
00288 
00289         deviceDataHasChanged = false;
00290     }
00291 }
00292 
00296 template <typename T>
00297 T* DeviceMemPointer_CU<T>::getDeviceDataPointer() const
00298 {
00299     return m_deviceDataPointer;
00300 }
00301 
00305 template <typename T>
00306 int DeviceMemPointer_CU<T>::getDeviceID() const
00307 {
00308     return m_deviceID;
00309 }
00310 
00314 template <typename T>
00315 void DeviceMemPointer_CU<T>::changeDeviceData()
00316 {
00317     deviceDataHasChanged = true;
00318 }
00319 
00320 }
00321 
00322 #endif
00323 
00324 #endif
00325 
 All Classes Namespaces Files Functions Enumerations Friends Defines