SkePU  1.2
 All Classes Namespaces Files Functions Variables Enumerations Friends Macros Groups Pages
matrix.h
Go to the documentation of this file.
1 
5 #ifndef MATRIX_H
6 #define MATRIX_H
7 
8 #include <iostream>
9 #include <fstream>
10 #include <sstream>
11 #include <cstdlib>
12 
13 #include <vector>
14 #include <map>
15 
16 #ifdef SKEPU_OPENCL
17 #ifdef USE_MAC_OPENCL
18 #include <OpenCL/opencl.h>
19 #else
20 #include <CL/cl.h>
21 #endif
23 #endif
24 
25 
26 
27 #ifdef SKEPU_CUDA
29 #include <cuda.h>
30 #endif
31 
32 #include "skepu/src/malloc_allocator.h"
33 #include "skepu/src/environment.h"
34 
35 
36 
37 namespace skepu
38 {
39 
46 {
47  ROW_WISE, //C style iterating from rows
48  COL_WISE // fortran style iterating from columns
49 };
50 
51 
52 
71 template<typename T>
72 class Matrix
73 {
74 
75  // typedefs
76 public:
77 
78 #ifdef SKEPU_CUDA
80 #endif
81 
82 #ifdef SKEPU_OPENCL
84 #endif
85 
86 #ifdef USE_PINNED_MEMORY
87  typedef std::vector<T, malloc_allocator<T> > container_type;
88  typedef typename std::vector<T, malloc_allocator<T> >::iterator vector_iterator;
89  typedef typename std::vector<T, malloc_allocator<T> >::size_type size_type;
90  typedef typename std::vector<T, malloc_allocator<T> >::value_type value_type;
91  typedef typename std::vector<T, malloc_allocator<T> >::difference_type difference_type;
92  typedef typename std::vector<T, malloc_allocator<T> >::pointer pointer;
93  typedef typename std::vector<T, malloc_allocator<T> >::reference reference;
94  typedef typename std::vector<T, malloc_allocator<T> >::const_reference const_reference;
95  typedef typename std::vector<T, malloc_allocator<T> >::const_iterator const_iterator;
96  typedef typename std::vector<T, malloc_allocator<T> >::const_reverse_iterator const_reverse_iterator;
97 #else
98  typedef std::vector<T> container_type;
99  typedef typename std::vector<T>::iterator vector_iterator;
100  typedef typename std::vector<T>::size_type size_type;
101  typedef typename std::vector<T>::value_type value_type;
102  typedef typename std::vector<T>::difference_type difference_type;
103  typedef typename std::vector<T>::pointer pointer;
104  typedef typename std::vector<T>::reference reference;
105  typedef typename std::vector<T>::const_reference const_reference;
106  typedef typename std::vector<T>::const_iterator const_iterator;
107  typedef typename std::vector<T>::const_reverse_iterator const_reverse_iterator;
108 #endif
109 
110 public: //-- For Testing --//
111 
112  void setValidFlag(bool val)
113  {
114  m_valid = val;
115  }
116 
121  {
122  return &m_data[0];
123  }
124 
130  friend std::ostream& operator<<(std::ostream &os, Matrix<T>& matrix)
131  {
132  matrix.updateHost();
133 
134  os << "Matrix: ("<< matrix.total_rows() <<" X "<<matrix.total_cols()<<")\n";
135  for(size_type i=0; i<matrix.size(); i++)
136  {
137  os<<(matrix(i))<<" ";
138  if((i+1)%(matrix.total_cols())==0)
139  os << "\n";
140  }
141  os<<"\n";
142  return os;
143  }
144 
145 
155  void randomize(int min = 0, int max = RAND_MAX)
156  {
158 
159  for(size_type i = 0; i < size(); i++)
160  {
161  m_data.at(i) = (T)( rand() % (int)(max-min+1) + min);
162  // m_data.at(i) = min + (T)rand()/((T)RAND_MAX/(max-min));
163  }
164  }
165 
174  void save(const std::string& filename)
175  {
176  updateHost();
177 
178  std::ofstream file(filename.c_str());
179 
180  if (file.is_open())
181  {
182  for(size_type i = 0; i < m_data.size(); ++i)
183  {
184  file<<m_data.at(i) <<" ";
185  }
186  file.close();
187  }
188  else
189  {
190  std::cout<<"Unable to open file\n";
191  }
192  }
193 
204  void load(const std::string& filename, size_type rowWidth, size_type numRows = 0)
205  {
207 
208  std::ifstream file(filename.c_str());
209 
210  if (file.is_open())
211  {
212  std::string line;
213  getline (file,line);
214  std::istringstream ss(line);
215  T num;
216  clear();
217 
218  //Load all elements
219  if(numRows == 0)
220  {
221  while(ss >> num)
222  {
223  push_back(num);
224  }
225  }
226  // Load only numElements elements
227  else
228  {
229  for(size_type i = 0; i < (numRows*rowWidth); ++i)
230  {
231  ss >> num;
232  push_back(num);
233  }
234  }
235 
236  m_cols = rowWidth;
237  m_rows = (size()/rowWidth);
238 
239  file.close();
240  }
241  else
242  {
243  std::cout<<"Unable to open file\n";
244  }
245  }
246 
247 
248 // Constructors, destructors
249 public:
250 
255  {
256 #ifdef SKEPU_OPENCL
257  releaseDeviceAllocations_CL();
258 #endif
259 
260 #ifdef SKEPU_CUDA
261  releaseDeviceAllocations_CU();
262 #endif
263 
264  if(m_transpose_matrix)
265  delete m_transpose_matrix;
266  }
267 
268 
274  Matrix(size_type _rows, size_type _cols): m_rows(_rows), m_cols(_cols), m_data(m_rows * m_cols), m_dataChanged(false), m_transpose_matrix(0), m_noValidDeviceCopy(true), m_valid(true)
275  {
276 #ifdef SKEPU_OPENCL
277  m_transposeKernels_CL = &(Environment<T>::getInstance()->m_transposeKernels_CL);
278 #endif
279  }
280 
287  Matrix(size_type _rows, size_type _cols, const T& val): m_rows(_rows), m_cols(_cols),m_data(m_rows * m_cols, val), m_dataChanged(false), m_transpose_matrix(0), m_noValidDeviceCopy(true), m_valid(true)
288  {
289 #ifdef SKEPU_OPENCL
290  m_transposeKernels_CL = &(Environment<T>::getInstance()->m_transposeKernels_CL);
291 #endif
292  }
293 
294 
301  Matrix(const Matrix<T>& copy): m_noValidDeviceCopy(true), m_valid(true)
302  {
303  copy.updateHost();
304  this->m_rows = copy.m_rows;
305  this->m_cols = copy.m_cols;
306  this->m_data= copy.m_data;
307  this->m_transpose_matrix = copy.m_transpose_matrix;
308  this->m_dataChanged = copy.m_dataChanged;
309 
310 #ifdef SKEPU_OPENCL
311  this->m_transposeKernels_CL = copy.m_transposeKernels_CL;
312 #endif
313  }
314 
315 private:
316  Matrix(): m_rows(0), m_cols(0),m_data(), m_dataChanged(false), m_transpose_matrix(0), m_noValidDeviceCopy(true), m_valid(true)
317  {}
318 
319 public:
320 
325  size_type size() const
326  {
327  return m_data.size();
328  }
329 
334  size_type total_rows() const
335  {
336  return m_rows;
337  }
338 
343  size_type total_cols() const
344  {
345  return m_cols;
346  }
347 
348  // highly dangerous, use with care.
349  T *getAddress()
350  {
351  return &m_data[0];
352  }
353 
359  {
360  size_type tmp = m_rows;
361  m_rows=m_cols;
362  m_cols = tmp;
363 
364  if(m_transpose_matrix && m_transpose_matrix->total_rows()==m_cols && m_transpose_matrix->total_cols()==m_rows && !m_dataChanged)
365  m_transpose_matrix->change_layout();
366  }
367 
368 private:
369 #ifdef SKEPU_CUDA
370 // std::map<std::pair< int, std::pair< T*, size_type > >, device_pointer_type_cu > m_deviceMemPointers_CU;
371  std::map<std::pair< T*, size_type >, device_pointer_type_cu > m_deviceMemPointers_CU[MAX_GPU_DEVICES];
372 
374  mutable std::map<std::pair< T*, size_type >, device_pointer_type_cu > m_deviceMemPointers_Modified_CU[MAX_GPU_DEVICES];
375 #endif
376 
377 #ifdef SKEPU_OPENCL
378  std::map<std::pair< cl_device_id, std::pair< T*, size_type > >, device_pointer_type_cl > m_deviceMemPointers_CL;
379 #endif
380 
381  size_type m_rows, m_cols;
382  bool m_dataChanged;
383  bool m_noValidDeviceCopy;
384 
385 
386 #ifdef USE_PINNED_MEMORY
387  mutable std::vector<T, malloc_allocator<T> > m_data;
388 #else
389  mutable std::vector<T> m_data;
390 #endif
391 
392  mutable bool m_valid;
394  // for col_iterator,
395  mutable Matrix<T> *m_transpose_matrix;
396 
397  template<typename Type>
398  void item_swap(Type &t1, Type &t2);
399 
400 
401 // External classes
402 public:
403  class iterator;
404 
405  class proxy_elem;
406 
407 public: //-- Operators --//
408 
409  void resize(size_type _rows, size_type _cols, T val = T());
410 
411  Matrix<T>& operator=(const Matrix<T>& other);
412  Matrix<T>& operator=(const T& elem);
413 
414  bool operator==(const Matrix<T>& c1);
415  bool operator!=(const Matrix<T>& c1);
416  bool operator<(const Matrix<T>& c1);
417  bool operator>(const Matrix<T>& c1);
418  bool operator<=(const Matrix<T>& c1);
419  bool operator>=(const Matrix<T>& c1);
420 
421  Matrix<T>& subsection(size_type row, size_type col, size_type rowWidth, size_type colWidth);
422 
423 // #if SKEPU_DEBUG>0
424  std::string m_nameVerbose; // for debugging useful
425 // #endif
426 
427 public: //-- STL vector regular interface --//
428 
429  //Iterators
430  iterator begin();
431  const_iterator begin() const;
432  iterator begin(unsigned row);
433  const_iterator begin(unsigned row) const;
434 
435  iterator end();
436  const_iterator end() const;
437  iterator end(unsigned row);
438  const_iterator end(unsigned row) const;
439 
440  //Capacity
441  size_type capacity() const;
442 
443  void flush();
444  bool empty() const;
445 
446  //Element access
447  proxy_elem at(size_type row, size_type col);
448  const T& at(size_type row, size_type col) const;
449 
450  size_type row_back(size_type row);
451  const T& row_back(size_type row) const;
452 
453  size_type row_front(size_type row);
454  const T& row_front(size_type row) const;
455 
456  proxy_elem col_back(size_type col);
457  const T& col_back(size_type col) const;
458 
459  proxy_elem col_front(size_type col);
460  const T& col_front(size_type col) const;
461 
462  void clear();
463 
464  iterator erase( iterator loc );
465  iterator erase( iterator start, iterator end );
466 
467  void swap(Matrix<T>& from);
468 
469 public: //-- Additions to interface --//
470 
471 #ifdef SKEPU_OPENCL
472  device_pointer_type_cl updateDevice_CL(T* start, size_type rows, size_type cols, Device_CL* device, bool copy);
473  device_pointer_type_cl updateDevice_CL(T* start, size_type cols, Device_CL* device, bool copy);
474  void flush_CL();
475 #endif
476 
477 #ifdef SKEPU_CUDA
478  void copyDataToAnInvalidDeviceCopy(DeviceMemPointer_CU<T> *copy, unsigned int deviceID, unsigned int streamID = 0);
479  device_pointer_type_cu updateDevice_CU(T* start, size_type rows, size_type cols, unsigned int deviceID, unsigned int streamID, bool copy, bool writeAccess, bool usePitch, bool markOnlyLocalCopiesInvalid=false);
480  device_pointer_type_cu updateDevice_CU(T* start, size_type cols, unsigned int deviceID, bool copy, bool writeAccess, bool markOnlyLocalCopiesInvalid=false, unsigned int streamID = 0);
481  void flush_CU();
482 
483  bool isMatrixOnDevice_CU(int deviceID);
484  bool isModified_CU(unsigned int deviceID);
485 #endif
486 
487  // Care about device data
488  const T& operator()(const size_type row, const size_type col) const;
489 
490  // Care about device data
491  T& operator()(const size_type row, const size_type col);
492 
493  // Does not care about device data, use with care
494  T& operator()(const size_type index);
495 
496  // Care about device data
497  const T& operator[](const size_type index) const;
498 
499  // Care about device data
500  T& operator[](const size_type index);
501 
502 
507 
508 
509  void transpose_CPU();
510 
511 #ifdef SKEPU_OPENMP
512  void transpose_OMP();
513 #endif
514 
515 #ifdef SKEPU_CUDA
516  void transpose_CU(Device_CU *device);
517 #endif
518 
519 #ifdef SKEPU_OPENCL
520  void transpose_CL(unsigned int deviceID);
521  std::vector<std::pair<cl_kernel, Device_CL*> > *m_transposeKernels_CL;
522 #endif
523 
524  // unary transpose operator
525  inline Matrix<T>& operator~()
526  {
527  if(m_transpose_matrix && m_transpose_matrix->m_rows==m_cols && m_transpose_matrix->m_cols==m_rows && !m_dataChanged)
528  return *m_transpose_matrix;
529 
530 #if defined(SKEPU_CUDA)
531  transpose_CU(Environment<int>::getInstance()->m_devices_CU.at(Environment<int>::getInstance()->bestCUDADevID));
532 #elif defined(SKEPU_OPENCL)
533  transpose_CL(0);
534 #elif defined(SKEPU_OPENMP)
535  transpose_OMP();
536 #else
537  transpose_CPU();
538 #endif
539 
540  m_dataChanged = false;
541 
542  return *m_transpose_matrix;
543  }
544 
545  // To be able to explicitly force updates without flushing entire matrix.
546  // Could be used with operator () above to avoid unneccesary function calls
547  // due to implicit synch.
548 
549  void updateHost() const;
550  void invalidateDeviceData();
554 
555 
556  const Matrix<T>& operator+=(const Matrix<T>& rhs);
557  const Matrix<T>& operator+=(const T& rhs);
558 
559  const Matrix<T>& operator-=(const Matrix<T>& rhs);
560  const Matrix<T>& operator-=(const T& rhs);
561 
562  const Matrix<T>& operator*=(const Matrix<T>& rhs);
563  const Matrix<T>& operator*=(const T& rhs);
564 
565  const Matrix<T>& operator/=(const Matrix<T>& rhs);
566  const Matrix<T>& operator/=(const T& rhs);
567 
568  const Matrix<T>& operator%=(const Matrix<T>& rhs);
569  const Matrix<T>& operator%=(const T& rhs);
570 
571 private:
572 
573 #ifdef SKEPU_OPENCL
574  void updateHost_CL() const;
575  void invalidateDeviceData_CL();
576  void releaseDeviceAllocations_CL();
577 #endif
578 
579 #ifdef SKEPU_CUDA
580  void updateHost_CU(int deviceID = -1) const;
581  void invalidateDeviceData_CU(int deviceID = -1);
582  void releaseDeviceAllocations_CU(int deviceID = -1);
583 #endif
584 
585 }; // end class Matrix...
586 
587 
588 } // end namespace skepu
589 
590 #include "src/matrix_iterator.inl"
591 
592 #include "src/matrix_proxy.inl"
593 #include "src/matrix.inl"
594 
595 #include "src/matrix_transpose.inl"
596 
597 
598 #ifdef SKEPU_OPENCL
599 #include "src/matrix_cl.inl"
600 #endif
601 
602 #ifdef SKEPU_CUDA
603 #include "src/matrix_cu.inl"
604 #endif
605 
606 #endif
607 
608 
609 
Contains the definitions of non-backend specific member functions for the Matrix container.
bool isMatrixOnDevice_CU(int deviceID)
Definition: matrix_cu.inl:147
iterator begin()
Definition: matrix.inl:381
Contains a class declaration for an object which represents an OpenCL device memory allocation for co...
Contains a class declaration for an object which represents an CUDA device memory allocation for Vect...
size_type capacity() const
Definition: matrix.inl:479
void clear()
Definition: matrix.inl:707
proxy_elem at(size_type row, size_type col)
Definition: matrix.inl:505
bool operator>(const Matrix< T > &c1)
Definition: matrix.inl:914
size_type row_front(size_type row)
Definition: matrix.inl:612
Matrix(size_type _rows, size_type _cols, const T &val)
Definition: matrix.h:287
Matrix< T > & subsection(size_type row, size_type col, size_type rowWidth, size_type colWidth)
Definition: matrix.inl:550
A matrix container class (2D matrix), internally uses 1D container (std::vector) to store elements in...
Definition: matrix.h:72
Contains the definitions of OpenCL specific member functions of the Matrix class. ...
void load(const std::string &filename, size_type rowWidth, size_type numRows=0)
Loads the Matrix from a file.
Definition: matrix.h:204
void transpose_OMP()
A method to take Matrix transpose on OpenMP backend.
Definition: matrix_transpose.inl:54
Contains the definitions of the Matrix transpose functions for CPU, CUDA, OpenMP and OpenCL...
size_type total_cols() const
Definition: matrix.h:343
#define MAX_GPU_DEVICES
Definition: globals.h:43
const Matrix< T > & operator-=(const Matrix< T > &rhs)
Definition: matrix.inl:126
bool operator!=(const Matrix< T > &c1)
Definition: matrix.inl:887
void transpose_CPU()
A method to take Matrix transpose on CPU backend.
Definition: matrix_transpose.inl:16
An matrix iterator class that tranverses row-wise.
Definition: matrix_iterator.inl:20
void transpose_CU(Device_CU *device)
A method to take Matrix transpose on CUDA backend. Always uses 1 CUDA GPU for transpose even if multi...
Definition: matrix_transpose.inl:169
void save(const std::string &filename)
Saves content of Matrix to a file.
Definition: matrix.h:174
T * GetArrayRep()
Definition: matrix.h:120
A class representing an OpenCL device memory allocation for container.
Definition: device_mem_pointer_cl.h:38
const Matrix< T > & operator%=(const Matrix< T > &rhs)
Definition: matrix.inl:242
Matrix< T > & operator=(const Matrix< T > &other)
Definition: matrix.inl:33
T min(T a, T b)
Definition: mapoverlap_convol_kernels.h:212
const T & operator[](const size_type index) const
Definition: matrix.inl:836
void copyDataToAnInvalidDeviceCopy(DeviceMemPointer_CU< T > *copy, unsigned int deviceID, unsigned int streamID=0)
Used by updateDevice_CU function to copy data to a device copy.. the device copy could be a new one (...
Definition: matrix_cu.inl:20
size_type row_back(size_type row)
Definition: matrix.inl:579
bool empty() const
Definition: matrix.inl:490
void flush_CU()
Flushes the matrix.
Definition: matrix_cu.inl:388
proxy_elem col_back(size_type col)
Definition: matrix.inl:644
iterator end()
Definition: matrix.inl:430
void transpose_CL(unsigned int deviceID)
A method to take Matrix transpose on OpenCL backend. Always uses 1 OpenCL device for transpose even i...
Definition: matrix_transpose.inl:225
const Matrix< T > & operator/=(const Matrix< T > &rhs)
Definition: matrix.inl:203
void flush_CL()
Flushes the matrix.
Definition: matrix_cl.inl:92
T max(T a, T b)
Definition: mapoverlap_convol_kernels.h:203
size_type size() const
Definition: matrix.h:325
Contains the definitions of CUDA specific member functions of the Matrix class.
void invalidateDeviceData()
Definition: matrix.inl:308
const Matrix< T > & operator+=(const Matrix< T > &rhs)
Definition: matrix.inl:87
AccessType
Can be used to specify whether the access is row-wise or column-wise.
Definition: matrix.h:45
void updateHostAndInvalidateDevice()
Definition: matrix.inl:336
void updateHostAndReleaseDeviceAllocations()
Definition: matrix.inl:363
void flush()
Definition: matrix.inl:770
void resize(size_type _rows, size_type _cols, T val=T())
Definition: matrix.inl:52
A class representing a CUDA device memory allocation for container.
Definition: device_mem_pointer_cu.h:58
A class representing a execution environment.
Definition: environment.h:80
void releaseDeviceAllocations()
Definition: matrix.inl:346
void updateHost() const
Definition: matrix.inl:287
static Environment * getInstance()
Definition: environment.inl:90
Contains a class declaration for Environment class.
Matrix(size_type _rows, size_type _cols)
Definition: matrix.h:274
const Matrix< T > & operator*=(const Matrix< T > &rhs)
Definition: matrix.inl:165
device_pointer_type_cu updateDevice_CU(T *start, size_type rows, size_type cols, unsigned int deviceID, unsigned int streamID, bool copy, bool writeAccess, bool usePitch, bool markOnlyLocalCopiesInvalid=false)
Update device with matrix content.
Definition: matrix_cu.inl:176
proxy_elem col_front(size_type col)
Definition: matrix.inl:679
A class representing a CUDA device.
Definition: device_cu.h:30
A class representing an OpenCL device.
Definition: device_cl.h:36
const T & operator()(const size_type row, const size_type col) const
Definition: matrix.inl:790
Contains the definitions of Matrix::iterator class.
device_pointer_type_cl updateDevice_CL(T *start, size_type rows, size_type cols, Device_CL *device, bool copy)
Update device with matrix content.
Definition: matrix_cl.inl:27
void change_layout()
Definition: matrix.h:358
A class representing the column-wise iterator for the Matrix data-type.
bool operator>=(const Matrix< T > &c1)
Definition: matrix.inl:941
void randomize(int min=0, int max=RAND_MAX)
Randomizes the Matrix.
Definition: matrix.h:155
size_type total_rows() const
Definition: matrix.h:334
Contains the definitions of the Matrix::proxy_elem class.
void swap(Matrix< T > &from)
Definition: matrix.inl:719
bool operator==(const Matrix< T > &c1)
Definition: matrix.inl:874
bool isModified_CU(unsigned int deviceID)
Definition: matrix_cu.inl:484
Matrix(const Matrix< T > &copy)
Definition: matrix.h:301
~Matrix()
Definition: matrix.h:254