SkePU(integratedwithStarPU)  0.8.1
 All Classes Namespaces Files Functions Enumerations Friends Macros Groups Pages
matrix.h
1 #ifndef MATRIX_H
2 #define MATRIX_H
3 
4 #include <iostream>
5 #include <fstream>
6 #include <sstream>
7 #include <cstdlib>
8 
9 #include <vector>
10 
11 #include <map>
12 
13 
14 
15 #include <starpu.h>
16 
17 #include "skepu/src/environment.h"
18 
19 #include "skepu/src/malloc_allocator.h"
20 
21 
22 
23 
24 
25 namespace skepu
26 {
27 
34  {
35  ROW_WISE, //C style iterating from rows
36  COL_WISE // fortran style iterating from columns
37  };
38 
39 
40 
41  #ifdef UNEVEN_PART
42 /* This filter function takes a vector, and divides it into uneven two nparts with different no of non zero entries. */
43 static void matrix_filter_uneven_func(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nchunks)
44 {
45  struct starpu_matrix_interface *matrix_father = (struct starpu_matrix_interface *) father_interface;
46  struct starpu_matrix_interface *matrix_child = (struct starpu_matrix_interface *) child_interface;
47 
48  uint32_t nx = matrix_father->nx;
49  uint32_t ny = matrix_father->ny;
50  size_t elemsize = matrix_father->elemsize;
51 
52  #ifdef SKEPU_PARTDIM
53  assert(nchunks <= ny && nchunks==SKEPU_PARTDIM);
54  #else
55  assert(nchunks <= ny && nchunks==2);
56  #endif
57 
58  uint32_t chunk_size = SKEPU_PARTS[id];
59 
60  size_t offset = 0;
61  for(int i=0; i<(id); ++i)
62  {
63  offset += SKEPU_PARTS[i] * (matrix_father->ld) * elemsize;
64  }
65 
66  std::cerr << "chunk_size: " << chunk_size << ", offset: " << offset << "\n";
67 
68  uint32_t child_ny = chunk_size;
69 
70  matrix_child->ld = matrix_father->ld;
71  matrix_child->nx = nx;
72  matrix_child->ny = child_ny;
73  matrix_child->elemsize = elemsize;
74 
75  if (matrix_father->ptr)
76  matrix_child->ptr = matrix_father->ptr + offset;
77 
78  if (matrix_father->dev_handle)
79  {
80  matrix_child->dev_handle = matrix_father->dev_handle;
81  matrix_child->offset = matrix_father->offset + offset;
82  }
83 }
84 
85 #endif
86 
87 
88 
102 template<typename T>
103 class Matrix
104 {
105 
106 private:
107 
108  void unpartitionMatrix() const
109  {
110  DEBUG_TEXT_LEVEL1("***** MATRIX UNPARTIOTIINING xparts: "<< xparts <<", yparts: "<< yparts <<" *****\n")
111  if(xparts>1 && yparts >1)
112  starpu_data_unpartition(matrix_handle, 0); // need to check as it might only unpartition only one filter.
113  else if(xparts>1 || yparts >1)
114  starpu_data_unpartition(matrix_handle, 0); // no matter x or y filter, unpartitioning logic is same.
115  xparts=1;
116  yparts=1;
117  }
118 
122  void release_acquire()
123  {
124  if(isAcquire)
125  {
126  if(xparts>1 || yparts>1) // partitioning
127  {
128  DEBUG_TEXT_LEVEL1("***** MATRIX RELEASE **** xparts: "<< xparts << ", yparts: "<< yparts <<"\n")
129 
130  if(xparts > 1 && yparts > 1) // 2d partitioning
131  {
132  for (int y = 0; y < yparts; y++)
133  for (int x = 0; x < xparts; x++)
134  {
135  starpu_data_release(starpu_data_get_sub_data(matrix_handle, 2, x, y));
136  }
137  }
138  else // 1D partitioning
139  {
140  int parts = (xparts>1) ? xparts : yparts;
141  // parts should be equal to what parts variable points to
142  assert (parts == starpu_data_get_nb_children(matrix_handle));
143  for (int x = 0; x < parts; x++)
144  {
145  starpu_data_release(starpu_data_get_sub_data(matrix_handle, 1, x));
146  }
147  }
148  }
149  else // no partitioning
150  {
151  DEBUG_TEXT_LEVEL1("***** MATRIX RELEASE ****\n")
152  starpu_data_release(matrix_handle);
153  }
154  isAcquire = false;
155  }
156  }
157 
158 
159  public:
160 
161 
162  starpu_data_handle_t matrix_handle;
163  mutable bool isOnStarPU;
164  mutable bool isReadBack;
165  mutable bool isWriteBack;
166  mutable int xparts; // horizontal partitioning
167  mutable int yparts; // vertical partitioning
168 
169  mutable bool isAcquire;
170  mutable enum starpu_data_access_mode mode;
171 
172  struct starpu_data_filter matrix_filter_x;
173  struct starpu_data_filter matrix_filter_y;
174  //enum starpu_data_access_mode accessMode;
175 
179  starpu_data_handle_t registerMatrix()
180  {
181  release_acquire();
182 
183  if(!isOnStarPU)
184  {
185  starpu_matrix_data_register(&matrix_handle, 0, (uintptr_t)&m_data[0], m_cols, m_cols, m_rows, sizeof(m_data[0]));
186  isOnStarPU=true;
187  DEBUG_TEXT_LEVEL1("***** MATRIX REGISTERING **** rows: "<< m_rows <<" cols: "<< m_cols <<" \n")
188  }
189  else if(xparts>1 || yparts >1)
190  {
191  unpartitionMatrix();
192  }
193  return matrix_handle;
194  }
195 
199  void unregisterMatrix(bool update=true)
200  {
201  if(isOnStarPU)
202  {
203  release_acquire(); // Should we release before unregister? Yes!
204 
205  if(xparts>1 || yparts >1) // do we need to unpartition before unregistering? Yes!
206  {
207  unpartitionMatrix();
208  }
209 
210  if(update)
211  starpu_data_unregister(matrix_handle);
212 
213  else // dont need to get updated data back
214  starpu_data_unregister_no_coherency(matrix_handle);
215 
216  isOnStarPU = false;
217  isAcquire = false;
218  }
219  }
220 
224  starpu_data_handle_t registerPartitions(int _xparts=1, int _yparts=1)
225  {
226  release_acquire();
227 
228  if(!isOnStarPU) // if not registered, register it first
229  {
230  starpu_matrix_data_register(&matrix_handle, 0, (uintptr_t)&m_data[0], m_cols, m_cols, m_rows, sizeof(m_data[0]));
231  isOnStarPU=true;
232  DEBUG_TEXT_LEVEL1("***** MATRIX REGISTERING **** rows: "<< m_rows <<" cols: "<< m_cols <<" \n")
233  }
234 
235  if(_xparts == xparts && _yparts==yparts)
236  return matrix_handle;
237  if(xparts>1 || yparts>1)
238  {
239  unpartitionMatrix();
240  }
241  if(_xparts>1 && _yparts>1)
242  {
243  matrix_filter_x.nchildren = _xparts;
244  matrix_filter_y.nchildren = _yparts;
245  starpu_data_map_filters(matrix_handle, 2, &matrix_filter_x, &matrix_filter_y);
246  DEBUG_TEXT_LEVEL1("***** MATRIX PARTIOTIINING xparts: "<< _xparts <<", yparts: "<< _yparts <<" *****\n")
247  xparts = _xparts;
248  yparts = _yparts;
249  }
250  else if(_xparts>1)
251  {
252  matrix_filter_x.nchildren = _xparts;
253  starpu_data_partition(matrix_handle, &matrix_filter_x);
254  DEBUG_TEXT_LEVEL1("***** MATRIX PARTIOTIINING xparts: "<< _xparts <<" *****\n")
255  xparts = _xparts;
256  }
257  else if(_yparts>1)
258  {
259  matrix_filter_y.nchildren = _yparts;
260  starpu_data_partition(matrix_handle, &matrix_filter_y);
261  DEBUG_TEXT_LEVEL1("***** MATRIX PARTIOTIINING yparts: "<< _yparts <<" *****\n")
262  yparts = _yparts;
263  }
264 
265  return matrix_handle;
266  }
267 
268 
269 
270  // typedefs
271 #ifdef USE_PINNED_MEMORY
272  typedef std::vector<T, malloc_allocator<T> > container_type;
273  typedef typename std::vector<T, malloc_allocator<T> >::size_type size_type;
274  typedef typename std::vector<T, malloc_allocator<T> >::value_type value_type;
275  typedef typename std::vector<T, malloc_allocator<T> >::difference_type difference_type;
276  typedef typename std::vector<T, malloc_allocator<T> >::pointer pointer;
277  typedef typename std::vector<T, malloc_allocator<T> >::reference reference;
278  typedef typename std::vector<T, malloc_allocator<T> >::const_reference const_reference;
279  typedef typename std::vector<T, malloc_allocator<T> >::const_iterator const_iterator;
280  typedef typename std::vector<T, malloc_allocator<T> >::const_reverse_iterator const_reverse_iterator;
281 #else
282  typedef std::vector<T> container_type;
283  typedef typename std::vector<T>::size_type size_type;
284  typedef typename std::vector<T>::value_type value_type;
285  typedef typename std::vector<T>::difference_type difference_type;
286  typedef typename std::vector<T>::pointer pointer;
287  typedef typename std::vector<T>::reference reference;
288  typedef typename std::vector<T>::const_reference const_reference;
289  typedef typename std::vector<T>::const_iterator const_iterator;
290  typedef typename std::vector<T>::const_reverse_iterator const_reverse_iterator;
291 #endif
292 
293  public: //-- For Testing --//
294 
299  {
300  return &m_data[0];
301  }
302 
308  friend std::ostream& operator<<(std::ostream &os, Matrix<T>& matrix)
309  {
310  matrix.acquireRead();
311 
312  os << "Matrix: ("<< matrix.total_rows() <<" X "<<matrix.total_cols()<<")\n";
313  for(int i=0;i<matrix.size();i++)
314  {
315  os<<(matrix(i))<<" ";
316  if((i+1)%(matrix.total_cols())==0)
317  os << "\n";
318  }
319  os<<"\n";
320  return os;
321  }
322 
323 
333  void randomize(int min = 0, int max = RAND_MAX)
334  {
335  unregisterMatrix(false);
336 
337  for(unsigned int i = 0; i < size(); i++)
338  {
339  m_data.at(i) = (T)( rand() % max + min);
340  }
341  }
342 
351  void save(const std::string& filename)
352  {
353  std::ofstream file(filename.c_str());
354 
355  if (file.is_open())
356  {
357  for(unsigned int i = 0; i < m_data.size(); ++i)
358  {
359  file<<m_data.at(i) <<" ";
360  }
361  file.close();
362  }
363  else
364  {
365  std::cout<<"Unable to open file\n";
366  }
367  }
368 
379  void load(const std::string& filename, int rowWidth, int numRows = 0)
380  {
381  std::ifstream file(filename.c_str());
382 
383  if (file.is_open())
384  {
385  std::string line;
386  getline (file,line);
387  std::istringstream ss(line);
388  T num;
389  clear();
390 
391  //Load all elements
392  if(numRows == 0)
393  {
394  while(ss >> num)
395  {
396  push_back(num);
397  }
398  }
399  // Load only numElements elements
400  else
401  {
402  for(int i = 0; i < (numRows*rowWidth); ++i)
403  {
404  ss >> num;
405  push_back(num);
406  }
407  }
408 
409  m_cols = rowWidth;
410  m_rows = (size()/rowWidth);
411 
412  file.close();
413  }
414  else
415  {
416  std::cout<<"Unable to open file\n";
417  }
418  }
419 
420 
421 
422 
423 // Constructors, destructors
424 public:
425 
430  {
431  DEBUG_TEXT_LEVEL1("***** MATRIX DESTRUCTOR **** rows: "<< total_rows()<<", cols: "<< total_cols() <<"\n")
432  if(isOnStarPU)
433  {
434  release_acquire();
435 
436  if(xparts>1 || yparts>1)
437  unpartitionMatrix();
438 
439  starpu_data_unregister(matrix_handle);
440  }
441  }
442 
443 
449  Matrix(size_type _rows, size_type _cols): m_rows(_rows), m_cols(_cols), m_data(m_rows * m_cols), transpose_matrix(0)
450  {
451  isOnStarPU = false;
452  isAcquire = false;
453  xparts=1;
454  yparts=1;
455 
456  memset(&matrix_filter_x, 0, sizeof(matrix_filter_x));
457 
458  #ifdef UNEVEN_PART
459  matrix_filter_x.filter_func=matrix_filter_uneven_func;
460  #else
461  matrix_filter_x.filter_func=starpu_vertical_block_filter_func; // starpu_matrix_filter_vertical_block
462  #endif
463 
464  memset(&matrix_filter_y, 0, sizeof(matrix_filter_y));
465  matrix_filter_y.filter_func=starpu_block_filter_func; // starpu_matrix_filter_block
466  }
467 
474  Matrix(size_type _rows, size_type _cols, const T& val): m_rows(_rows), m_cols(_cols),m_data(m_rows * m_cols, val), transpose_matrix(0)
475  {
476  isOnStarPU = false;
477  isAcquire = false;
478  xparts=1;
479  yparts=1;
480  memset(&matrix_filter_x, 0, sizeof(matrix_filter_x));
481 
482  #ifdef UNEVEN_PART
483  matrix_filter_x.filter_func=matrix_filter_uneven_func;
484  #else
485  matrix_filter_x.filter_func=starpu_vertical_block_filter_func; // starpu_matrix_filter_vertical_block
486  #endif
487 
488  memset(&matrix_filter_y, 0, sizeof(matrix_filter_y));
489  matrix_filter_y.filter_func=starpu_block_filter_func;
490  }
491 
492 
499  Matrix(const Matrix<T>& copy)
500  {
501  copy.acquireRead();
502 
503  this->m_rows = copy.total_rows();
504  this->m_cols = copy.total_cols();
505  this->m_data= copy.m_data;
506  this->transpose_matrix = copy.transpose_matrix;
507 
508  isOnStarPU = copy.isOnStarPU;
509  isAcquire = copy.isAcquire;
510 
511  matrix_filter_x = copy.matrix_filter_x;
512  matrix_filter_y = copy.matrix_filter_y;
513 
514  xparts=copy.xparts;
515  yparts=copy.yparts;
516  }
517 
518  public:
519 
524  size_type size() const { return m_data.size();}
525 
530  size_type total_rows() const {return m_rows; }
531 
536  size_type total_cols() const {return m_cols; }
537 
543  {
544  int tmp = m_rows;
545  m_rows=m_cols;
546  m_cols = tmp;
547  }
548 
549 
550 
551  private:
552  size_type m_rows, m_cols;
553 
554 // A custom memory allocator is written to support pinned memory allocation for CUDA. Enabled by defining USE_PINNED_MEMORY macro
555 #ifdef USE_PINNED_MEMORY
556  mutable std::vector<T, malloc_allocator<T> > m_data;
557 #else
558  mutable std::vector<T> m_data;
559 #endif
560 
561 
562  // for col_iterator, not tested
563  mutable Matrix<T> *transpose_matrix;
564 
565  template<typename Type>
566  void item_swap(Type &t1, Type &t2);
567 
568 
569  // External classes
570 public:
571  //template <AccessType accessType=ROW_WISE>
572  class iterator;
573 
574  class col_iterator;
575 
576  class proxy_elem;
577 
578 
579 
580 
581 public: //-- Operators --//
582 
583  void resize(size_type _rows, size_type _cols, T val = T());
584 
585  Matrix<T>& operator=(Matrix<T>& other);
586  Matrix<T>& operator=(const T& elem);
587 
588  bool operator==(const Matrix<T>& c1);
589  bool operator!=(const Matrix<T>& c1);
590  bool operator<(const Matrix<T>& c1);
591  bool operator>(const Matrix<T>& c1);
592  bool operator<=(const Matrix<T>& c1);
593  bool operator>=(const Matrix<T>& c1);
594 
595  Matrix<T>& subsection(size_type row, size_type col, size_type rowWidth, size_type colWidth);
596 
597 
598 public: //-- STL vector regular interface --//
599  //Iterators
600  iterator begin();
601  const_iterator begin() const;
602  iterator begin(unsigned row);
603  const_iterator begin(unsigned row) const;
604 
605  iterator end();
606  const_iterator end() const;
607  iterator end(unsigned row);
608  const_iterator end(unsigned row) const;
609 
610  //Column Iterators, taking transpose and using its iterator, work only for const_iterators
611  // iterator col_begin();
612  std::pair<const_iterator, const_iterator> col_iterator_range();
613  col_iterator col_begin();
614 
615  // iterator col_end();
616  col_iterator col_end();
617 
618 
619  //Capacity
620  size_type capacity() const;
621 
622 
623 
624  void flush();
625 
626 
627 
628  bool empty() const;
629 
630  //Element access
631  proxy_elem at(size_type row, size_type col);
632  const T& at(size_type row, size_type col) const;
633 
634  size_type row_back(size_type row);
635  const T& row_back(size_type row) const;
636 
637  size_type row_front(size_type row);
638  const T& row_front(size_type row) const;
639 
640  proxy_elem col_back(size_type col);
641  const T& col_back(size_type col) const;
642 
643  proxy_elem col_front(size_type col);
644  const T& col_front(size_type col) const;
645 
646  void clear();
647 
648  iterator erase( iterator loc );
649  iterator erase( iterator start, iterator end );
650 
651  void swap(Matrix<T>& from);
652 
653 public: //-- Additions to interface --//
654 
655  // Does care about device data
656 // proxy_elem operator()(const size_type row, const size_type col) const;
657 
658  // Does care about device data
659  const T& operator()(const size_type row, const size_type col) const;
660 
661  T& operator()(const size_type row, const size_type col);
662 
663  // Does not care about device data, use with care
664  T& operator()(const size_type index);
665 
666  // Does not care about device data, use with care
667  T& operator[](const size_type index);
668 
669  // unary transpose operator
670  inline Matrix<T> operator~()
671  {
672  Matrix<T> temp(total_cols(),total_rows());
673  for (size_t i=0; i < total_rows(); i++)
674  for (size_t j=0; j < total_cols(); j++)
675  {
676  T x = (*this)(i,j);
677  temp(j,i) = x;
678  }
679  return temp;
680  }
681 
682  Matrix<T>& get_transpose_matrix(bool updated = false)
683  {
684  if(transpose_matrix==0 || updated)
685  {
686  transpose_matrix = new Matrix<T>(total_cols(),total_rows());
687  for (size_t i=0; i < total_rows(); i++)
688  for (size_t j=0; j < total_cols(); j++)
689  {
690  T x = (*this)(i,j);
691  (*transpose_matrix)(j,i) = x;
692  }
693  }
694  return (*transpose_matrix);
695  }
696 
697 
698 
699  // To be able to explicitly force updates without flushing entire matrix.
700  // Could be used with operator () above to avoid unneccesary function calls
701  // due to implicit synch.
702 
703  void acquireRead() const;
704  void acquireReadWrite();
705 
706 // void updateHost() const;
707 // void invalidateDevice();
708 // void updateHostAndInvalidateDevice();
709 
710 
711  const Matrix<T>& operator+=(const Matrix<T>& rhs);
712  const Matrix<T>& operator+=(const T& rhs);
713 
714  const Matrix<T>& operator-=(const Matrix<T>& rhs);
715  const Matrix<T>& operator-=(const T& rhs);
716 
717  const Matrix<T>& operator*=(const Matrix<T>& rhs);
718  const Matrix<T>& operator*=(const T& rhs);
719 
720  const Matrix<T>& operator/=(const Matrix<T>& rhs);
721  const Matrix<T>& operator/=(const T& rhs);
722 
723  const Matrix<T>& operator%=(const Matrix<T>& rhs);
724  const Matrix<T>& operator%=(const T& rhs);
725 };
726 
727 }
728 
729 #include "src/matrix_iterator.inl"
730 
731 #include "src/matrix_col_iterator.inl"
732 
733 #include "src/matrix_proxy.inl"
734 #include "src/matrix.inl"
735 
736 #endif
737 
738 
T & operator[](const size_type index)
Definition: matrix.inl:947
void acquireReadWrite()
Definition: matrix.inl:335
iterator begin()
Definition: matrix.inl:421
size_type capacity() const
Definition: matrix.inl:567
void unregisterMatrix(bool update=true)
Definition: matrix.h:199
void clear()
Definition: matrix.inl:794
bool operator>(const Matrix< T > &c1)
Definition: matrix.inl:1012
Matrix(size_type _rows, size_type _cols, const T &val)
Definition: matrix.h:474
Matrix< T > & subsection(size_type row, size_type col, size_type rowWidth, size_type colWidth)
Definition: matrix.inl:636
A matrix container class (2D matrix), internally uses 1D container (std::vector). ...
Definition: matrix.h:103
std::pair< const_iterator, const_iterator > col_iterator_range()
Definition: matrix.inl:472
size_type total_cols() const
Definition: matrix.h:536
const Matrix< T > & operator-=(const Matrix< T > &rhs)
Definition: matrix.inl:126
bool operator!=(const Matrix< T > &c1)
Definition: matrix.inl:985
void save(const std::string &filename)
Saves content of Matrix to a file.
Definition: matrix.h:351
col_iterator col_end()
Definition: matrix.inl:552
T * GetArrayRep()
Definition: matrix.h:298
const Matrix< T > & operator%=(const Matrix< T > &rhs)
Definition: matrix.inl:242
void acquireRead() const
Definition: matrix.inl:290
col_iterator col_begin()
Definition: matrix.inl:487
starpu_data_handle_t registerPartitions(int _xparts=1, int _yparts=1)
To register Matrix to StarPU. This method can create partitions of the matrix.
Definition: matrix.h:224
bool empty() const
Definition: matrix.inl:578
iterator end()
Definition: matrix.inl:503
const Matrix< T > & operator/=(const Matrix< T > &rhs)
Definition: matrix.inl:203
size_type size() const
Definition: matrix.h:524
const Matrix< T > & operator+=(const Matrix< T > &rhs)
Definition: matrix.inl:87
AccessType
Can be used to specify whether the access is row-wise or column-wise.
Definition: matrix.h:33
void load(const std::string &filename, int rowWidth, int numRows=0)
Loads the Matrix from a file.
Definition: matrix.h:379
void flush()
Definition: matrix.inl:857
void resize(size_type _rows, size_type _cols, T val=T())
Definition: matrix.inl:51
starpu_data_handle_t registerMatrix()
To register Matrix with StarPU. Does not create partitions of Matrix.
Definition: matrix.h:179
Contains a class declaration for Environment class.
Matrix(size_type _rows, size_type _cols)
Definition: matrix.h:449
const Matrix< T > & operator*=(const Matrix< T > &rhs)
Definition: matrix.inl:165
Matrix< T > & operator=(Matrix< T > &other)
Definition: matrix.inl:35
void change_layout()
Definition: matrix.h:542
bool operator>=(const Matrix< T > &c1)
Definition: matrix.inl:1039
void randomize(int min=0, int max=RAND_MAX)
Randomizes the Matrix.
Definition: matrix.h:333
size_type total_rows() const
Definition: matrix.h:530
void swap(Matrix< T > &from)
Definition: matrix.inl:806
bool operator==(const Matrix< T > &c1)
Definition: matrix.inl:972
Matrix(const Matrix< T > &copy)
Definition: matrix.h:499
~Matrix()
Definition: matrix.h:429