SkePU(integratedwithStarPU)  0.8.1
 All Classes Namespaces Files Functions Enumerations Friends Macros Groups Pages
device_mem_pointer_cu.h
Go to the documentation of this file.
1 
5 #ifndef DEVICE_MEM_POINTER_CU_H
6 #define DEVICE_MEM_POINTER_CU_H
7 
8 #ifdef SKEPU_CUDA
9 
10 #include <iostream>
11 #include <cuda.h>
12 #include <vector>
13 #include <algorithm>
14 #include <iterator>
15 
16 #include "debug.h"
17 
18 namespace skepu
19 {
20 
33 template <typename T>
35 {
36 
37 public:
38  DeviceMemPointer_CU(T* root, T* start, int numElements, int deviceID, int totalVecSize=-1);
40 
41  void copyHostToDevice(int numElements = -1, bool copyLast=true) const;
42  void copyDeviceToHost(int numElements = -1, bool copyLast=true) const;
43  void copyDeviceToDevice(T* copyToPointer,int numElements,int dstOffset = 0, int srcOffset = 0) const;
44 
45  T* getDeviceDataPointer() const;
46  int getDeviceID() const;
47  void changeDeviceData();
48 
49 private:
50 
51 
52  void copyHostToDevice_internal(T* srcPtr, T* destPtr, int numElements, int dstOffset=0) const;
53 
54  #ifndef DEFAULT_LMC
55  mutable std::vector<bool> vecMask;
56  #endif
57 
58  T* m_rootHostDataPointer;
59  T* m_effectiveHostDataPointer;
60  T* m_hostDataPointer;
61 
62 
63  T* m_deviceDataPointer;
64  T* m_effectiveDeviceDataPointer;
65 
66  int m_numElements;
67  int m_effectiveNumElements;
68  int m_deviceID;
69 
70  mutable bool deviceDataHasChanged;
71 };
72 
81 template <typename T>
82 DeviceMemPointer_CU<T>::DeviceMemPointer_CU(T* root, T* start, int numElements, int deviceID, int totalVecSize) : m_rootHostDataPointer(root), m_effectiveHostDataPointer(start), m_hostDataPointer(start), m_numElements(numElements), m_effectiveNumElements(numElements), m_deviceID(deviceID)
83 {
84  cudaError_t err;
85  size_t sizeVec = numElements*sizeof(T);
86 
87  DEBUG_TEXT_LEVEL2("Alloc: " <<numElements <<"\n")
88 
89 #ifdef SKEPU_MEASURE_TIME_DISTRIBUTION
90 devMemAllocTimer.start();
91 #endif
92  err = cudaMalloc((void**)&m_deviceDataPointer, sizeVec);
93  if(err != cudaSuccess){std::cerr<<"Error allocating memory on device\n";}
94 
95  m_effectiveDeviceDataPointer = m_deviceDataPointer;
96 
97  #ifndef DEFAULT_LMC
98  #ifdef USE_PESSIMISTIC_LMC
99  emptyRanges = std::vector<std::pair<int, int> >();
100  #endif
101  vecMask = std::vector<bool>( ((totalVecSize == -1) ? numElements: totalVecSize), false); // Sets mask for total vector
102  #endif
103 
104 #ifdef SKEPU_MEASURE_TIME_DISTRIBUTION
105 devMemAllocTimer.stop();
106 #endif
107  deviceDataHasChanged = false;
108 }
109 
113 template <typename T>
115 {
116  DEBUG_TEXT_LEVEL2("DeAlloc: " <<m_numElements <<"\n")
117 
118  cudaFree(m_deviceDataPointer);
119 }
120 
127 template <typename T>
128 void DeviceMemPointer_CU<T>::copyHostToDevice(int numElements, bool copyLast) const
129 {
130  if(m_hostDataPointer != NULL)
131  {
132  DEBUG_TEXT_LEVEL2("HOST_TO_DEVICE!!!\n")
133 
134  cudaError_t err;
135  size_t sizeVec;
136 
137  int totElements;
138  if(numElements == -1)
139  if(copyLast)
140  totElements = m_numElements;
141  else
142  totElements = m_effectiveNumElements;
143  else
144  totElements = numElements;
145 
146  sizeVec = totElements*sizeof(T);
147 
148 #ifdef SKEPU_MEASURE_TIME_DISTRIBUTION
149 #ifdef SKEPU_MEASURE_ONLY_COPY
150 cudaThreadSynchronize();
151 #endif
152 copyUpTimer.start();
153 #endif
154  T* tmpPointer;
155  if(copyLast)
156  {
157  err = cudaMemcpy(m_deviceDataPointer, m_hostDataPointer, sizeVec, cudaMemcpyHostToDevice);
158  tmpPointer = m_hostDataPointer;
159  }
160  else
161  {
162  err = cudaMemcpy(m_effectiveDeviceDataPointer, m_effectiveHostDataPointer, sizeVec, cudaMemcpyHostToDevice);
163  tmpPointer = m_effectiveHostDataPointer;
164  }
165 
166  if(err != cudaSuccess){std::cerr<<"Error copying data to device\n" <<cudaGetErrorString(err) <<"\n";}
167 #ifdef SKEPU_MEASURE_TIME_DISTRIBUTION
168 #ifdef SKEPU_MEASURE_ONLY_COPY
169 cudaThreadSynchronize();
170 #endif
171 copyUpTimer.stop();
172 #endif
173 
174  #ifndef DEFAULT_LMC // reset flag for bits copied back
175  int _limit= (tmpPointer+totElements)-m_rootHostDataPointer;
176  for(int i=(tmpPointer-m_rootHostDataPointer); i<_limit ; i++)
177  {
178  vecMask[i]=false;
179  }
180  #endif
181 
182  deviceDataHasChanged = false;
183  }
184 }
185 
186 
187 
188 
189 
198 template <typename T>
199 void DeviceMemPointer_CU<T>::copyDeviceToDevice(T* copyToPointer,int numElements, int dstOffset, int srcOffset) const
200 {
201  if(m_hostDataPointer != NULL)
202  {
203  DEBUG_TEXT_LEVEL2("DEVICE_TO_DEVICE!!!\n")
204 
205  cudaError_t err;
206  size_t sizeVec;
207 
208  if(numElements == -1)
209  sizeVec = m_numElements*sizeof(T);
210  else
211  sizeVec = numElements*sizeof(T);
212 
213  #ifdef SKEPU_MEASURE_TIME_DISTRIBUTION
214  #ifdef SKEPU_MEASURE_ONLY_COPY
215  cudaThreadSynchronize();
216  #endif
217  copyUpTimer.start();
218  #endif
219 
220  err = cudaMemcpy(copyToPointer+dstOffset, m_effectiveDeviceDataPointer+srcOffset, sizeVec, cudaMemcpyDeviceToDevice);
221 
222  if(err != cudaSuccess){std::cerr<<"Error copying data device to device\n" <<cudaGetErrorString(err) <<"\n";}
223  #ifdef SKEPU_MEASURE_TIME_DISTRIBUTION
224  #ifdef SKEPU_MEASURE_ONLY_COPY
225  cudaThreadSynchronize();
226  #endif
227  copyUpTimer.stop();
228  #endif
229  }
230 }
231 
232 
233 
240 template <typename T>
241 void DeviceMemPointer_CU<T>::copyDeviceToHost(int numElements, bool copyLast) const
242 {
243  if(deviceDataHasChanged && m_hostDataPointer != NULL)
244  {
245  DEBUG_TEXT_LEVEL2("DEVICE_TO_HOST!!!\n")
246 
247  cudaError_t err;
248  size_t sizeVec;
249 
250  #ifndef DEFAULT_LMC // ignore flags set by user especially copyLast
251  bool isWrite=false;
252  int lower = -1;
253  int upper = -1;
254  int _limit = ( (m_effectiveHostDataPointer+m_effectiveNumElements)-m_rootHostDataPointer);
255 
256  for(int i=(m_effectiveHostDataPointer-m_rootHostDataPointer); i< _limit;i++)
257  {
258  if(!isWrite && vecMask[i])
259  {
260  lower = (m_rootHostDataPointer + i) - m_effectiveHostDataPointer;
261  isWrite = true;
262  vecMask[i] = false;
263  }
264  else if( (isWrite && !vecMask[i]) || (isWrite && vecMask[i] && i==(_limit-1) ) )
265  {
266  upper = (m_rootHostDataPointer + i) - m_effectiveHostDataPointer;
267  if(isWrite && vecMask[i] && i==(_limit-1))
268  {
269  upper++;
270  vecMask[i] = false;
271  }
272  isWrite = false;
273  }
274  if(lower!=-1 && upper!=-1) // Need to write back thsi range
275  {
276  sizeVec = (upper-lower)*sizeof(T);
277 
278  #ifdef SKEPU_MEASURE_TIME_DISTRIBUTION
279  #ifdef SKEPU_MEASURE_ONLY_COPY
280  cudaThreadSynchronize();
281  #endif
282  copyDownTimer.start();
283  #endif
284 
285  err = cudaMemcpy(m_effectiveHostDataPointer+lower, m_effectiveDeviceDataPointer+lower, sizeVec, cudaMemcpyDeviceToHost);
286  if(err != cudaSuccess){std::cerr<<"Error copying data from device: " <<cudaGetErrorString(err) <<"\n";}
287 
288  #ifdef SKEPU_MEASURE_TIME_DISTRIBUTION
289  #ifdef SKEPU_MEASURE_ONLY_COPY
290  cudaThreadSynchronize();
291  #endif
292  copyDownTimer.stop();
293  #endif
294 
295  isWrite=false;
296  lower = -1;
297  upper = -1;
298  }
299  }
300 
301  deviceDataHasChanged = false; //
302 
303  #else
304  int totElements;
305  if(numElements == -1)
306  if(copyLast)
307  totElements = m_numElements;
308  else
309  totElements = m_effectiveNumElements;
310  else
311  totElements = numElements;
312 
313  sizeVec = totElements*sizeof(T);
314 
315  #ifdef SKEPU_MEASURE_TIME_DISTRIBUTION
316  #ifdef SKEPU_MEASURE_ONLY_COPY
317  cudaThreadSynchronize();
318  #endif
319  copyDownTimer.start();
320  #endif
321  if(copyLast)
322  err = cudaMemcpy(m_hostDataPointer, m_deviceDataPointer, sizeVec, cudaMemcpyDeviceToHost);
323  else
324  err = cudaMemcpy(m_effectiveHostDataPointer, m_effectiveDeviceDataPointer, sizeVec, cudaMemcpyDeviceToHost);
325 
326  if(err != cudaSuccess){std::cerr<<"Error copying data from device: " <<cudaGetErrorString(err) <<"\n";}
327 
328  #ifdef SKEPU_MEASURE_TIME_DISTRIBUTION
329  #ifdef SKEPU_MEASURE_ONLY_COPY
330  cudaThreadSynchronize();
331  #endif
332  copyDownTimer.stop();
333  #endif
334 
335  deviceDataHasChanged = false;
336 
337  #endif
338 
339  }
340 }
341 
350 template <typename T>
351 void DeviceMemPointer_CU<T>::copyHostToDevice_internal(T* srcPointer,T* dstPointer, int numElements, int dstOffset) const
352 {
353  if(m_hostDataPointer != NULL)
354  {
355  DEBUG_TEXT_LEVEL2("DEVICE_TO_DEVICE!!!\n")
356 
357  cudaError_t err;
358  size_t sizeVec = numElements*sizeof(T);
359 
360  #ifdef SKEPU_MEASURE_TIME_DISTRIBUTION
361  #ifdef SKEPU_MEASURE_ONLY_COPY
362  cudaThreadSynchronize();
363  #endif
364  copyUpTimer.start();
365  #endif
366 
367  err = cudaMemcpy(dstPointer+dstOffset, srcPointer, sizeVec, cudaMemcpyHostToDevice);
368 
369  if(err != cudaSuccess){std::cerr<<"Error copying data device to device\n" <<cudaGetErrorString(err) <<"\n";}
370  #ifdef SKEPU_MEASURE_TIME_DISTRIBUTION
371  #ifdef SKEPU_MEASURE_ONLY_COPY
372  cudaThreadSynchronize();
373  #endif
374  copyUpTimer.stop();
375  #endif
376  }
377 }
378 
379 
380 
381 
385 template <typename T>
387 {
388  return m_deviceDataPointer;
389 }
390 
394 template <typename T>
396 {
397  return m_deviceID;
398 }
399 
403 template <typename T>
405 {
406  deviceDataHasChanged = true;
407 
408  #ifndef DEFAULT_LMC
409  // Set modified flag for last interval.. (NB: not concurrency safe on same device)
410  int _limit = (m_hostDataPointer+m_numElements)-m_rootHostDataPointer;
411  for(int i=(m_hostDataPointer-m_rootHostDataPointer); i< _limit; i++)
412  {
413  vecMask[i] = true;
414  }
415  #endif
416 }
417 
418 }
419 
420 #endif
421 
422 #endif
423 
~DeviceMemPointer_CU()
Definition: device_mem_pointer_cu.h:114
int getDeviceID() const
Definition: device_mem_pointer_cu.h:395
DeviceMemPointer_CU(T *root, T *start, int numElements, int deviceID, int totalVecSize=-1)
Definition: device_mem_pointer_cu.h:82
Defines a few macros that can be used to output text when debugging. The macros use std::cerr...
T * getDeviceDataPointer() const
Definition: device_mem_pointer_cu.h:386
void copyDeviceToHost(int numElements=-1, bool copyLast=true) const
Definition: device_mem_pointer_cu.h:241
void copyHostToDevice(int numElements=-1, bool copyLast=true) const
Definition: device_mem_pointer_cu.h:128
void changeDeviceData()
Definition: device_mem_pointer_cu.h:404
void copyDeviceToDevice(T *copyToPointer, int numElements, int dstOffset=0, int srcOffset=0) const
Definition: device_mem_pointer_cu.h:199
A class representing a CUDA device memory allocation.
Definition: device_mem_pointer_cu.h:34