5 #ifndef DEVICE_MEM_POINTER_MATRIX_CU_H
6 #define DEVICE_MEM_POINTER_MATRIX_CU_H
50 T* m_deviceDataPointer;
60 return m_rows * m_cols;
63 mutable bool deviceDataHasChanged;
79 size_t sizeVec = rows*cols*
sizeof(T);
81 DEBUG_TEXT_LEVEL2(
"Alloc: " << rows*cols <<
"\n")
85 cudaSetDevice(m_deviceID);
87 m_usePitch = usePitch;
90 err = cudaMallocPitch((
void**)&m_deviceDataPointer, &m_pitch, cols *
sizeof(T), rows);
91 m_pitch = (m_pitch)/
sizeof(T);
95 err = cudaMalloc((
void**)&m_deviceDataPointer, sizeVec);
98 if(err != cudaSuccess)
100 std::cerr<<
"Error allocating memory on device\n";
103 deviceDataHasChanged =
false;
112 template <
typename T>
115 DEBUG_TEXT_LEVEL2(
"DeAlloc: " <<
"\n")
117 cudaSetDevice(m_deviceID);
119 cudaFree(m_deviceDataPointer);
128 template <
typename T>
131 if(m_hostDataPointer != NULL)
133 DEBUG_TEXT_LEVEL2(
"HOST_TO_DEVICE!!!\n")
139 _rows = ((rows == -1) ? m_rows : rows);
140 _cols = ((cols == -1) ? m_cols : cols);
142 sizeVec = _rows * _cols *
sizeof(T);
144 cudaSetDevice(m_deviceID);
146 #ifdef USE_PINNED_MEMORY
148 err = cudaMemcpy2DAsync(m_deviceDataPointer,m_pitch*
sizeof(T),m_hostDataPointer,_cols*
sizeof(T), _cols*
sizeof(T), _rows, cudaMemcpyHostToDevice, m_dev->stream);
150 err = cudaMemcpyAsync(m_deviceDataPointer, m_hostDataPointer, sizeVec, cudaMemcpyHostToDevice, m_dev->stream);
153 err = cudaMemcpy2D(m_deviceDataPointer,m_pitch*
sizeof(T),m_hostDataPointer,_cols*
sizeof(T), _cols*
sizeof(T), _rows, cudaMemcpyHostToDevice);
155 err = cudaMemcpy(m_deviceDataPointer, m_hostDataPointer, sizeVec, cudaMemcpyHostToDevice);
158 if(err != cudaSuccess)
160 std::cerr<<
"Error copying data to device\n" <<cudaGetErrorString(err) <<
"\n";
163 deviceDataHasChanged =
false;
173 template <
typename T>
176 if(deviceDataHasChanged && m_hostDataPointer != NULL)
178 DEBUG_TEXT_LEVEL2(
"DEVICE_TO_HOST!!!\n")
185 _rows = ((rows == -1) ? m_rows : rows);
186 _cols = ((cols == -1) ? m_cols : cols);
188 sizeVec = _rows * _cols *
sizeof(T);
190 cudaSetDevice(m_deviceID);
192 #ifdef USE_PINNED_MEMORY
194 err = cudaMemcpy2DAsync(m_hostDataPointer,_cols*
sizeof(T),m_deviceDataPointer,m_pitch*
sizeof(T), _cols*
sizeof(T), _rows, cudaMemcpyDeviceToHost, m_dev->stream);
196 err = cudaMemcpyAsync(m_hostDataPointer, m_deviceDataPointer, sizeVec, cudaMemcpyDeviceToHost, m_dev->stream);
197 cudaStreamSynchronize(m_dev->stream);
200 err = cudaMemcpy2D(m_hostDataPointer,_cols*
sizeof(T),m_deviceDataPointer,m_pitch*
sizeof(T), _cols*
sizeof(T), _rows, cudaMemcpyDeviceToHost);
202 err = cudaMemcpy(m_hostDataPointer, m_deviceDataPointer, sizeVec, cudaMemcpyDeviceToHost);
205 if(err != cudaSuccess)
207 std::cerr<<
"Error copying data from device: " <<cudaGetErrorString(err) <<
"\n";
210 deviceDataHasChanged =
false;
217 template <
typename T>
220 return m_deviceDataPointer;
226 template <
typename T>
235 template <
typename T>
238 deviceDataHasChanged =
true;
~DeviceMemPointer_Matrix_CU()
Definition: device_mem_pointer_matrix_cu.h:113
void copyHostToDevice(int rows=-1, int cols=-1) const
Definition: device_mem_pointer_matrix_cu.h:129
void changeDeviceData()
Definition: device_mem_pointer_matrix_cu.h:236
T * getDeviceDataPointer() const
Definition: device_mem_pointer_matrix_cu.h:218
DeviceMemPointer_Matrix_CU(T *start, int rows, int cols, Device_CU *device, bool usePitch=false)
Definition: device_mem_pointer_matrix_cu.h:76
int getDeviceID() const
Definition: device_mem_pointer_matrix_cu.h:227
Contains a class declaration for the object that represents a CUDA device.
A class representing a CUDA 2D device memory allocation for Matrix container.
Definition: device_mem_pointer_matrix_cu.h:34
unsigned int getDeviceID() const
Definition: device_cu.h:346
A class representing a CUDA device.
Definition: device_cu.h:30
void copyDeviceToHost(int rows=-1, int cols=-1) const
Definition: device_mem_pointer_matrix_cu.h:174