SkePU(integratedwithStarPU) 0.8
include_starpu/skepu/matrix.h
00001 #ifndef MATRIX_H
00002 #define MATRIX_H
00003 
00004 #include <iostream>
00005 #include <fstream>
00006 #include <sstream>
00007 #include <cstdlib>
00008 
00009 #include <vector>
00010 
00011 #include <map>
00012 
00013 
00014 
00015 #include <starpu.h>
00016 
00017 #include "skepu/src/environment.h"
00018 
00019 #include "skepu/src/malloc_allocator.h"
00020 
00021 
00022 
00023 
00024 
00025 namespace skepu
00026 {
00027   
00033  enum AccessType
00034  {
00035   ROW_WISE, //C style iterating from rows
00036   COL_WISE // fortran style iterating from columns 
00037  };
00038 
00039  
00040  
00054 template<typename T>
00055 class Matrix
00056 {
00057 
00058 private:
00059 
00060     void unpartitionMatrix() const
00061     {
00062         DEBUG_TEXT_LEVEL1("*****  MATRIX UNPARTIOTIINING xparts: "<< xparts <<",  yparts: "<< yparts <<" *****\n")
00063         if(xparts>1 && yparts >1)
00064           starpu_data_unpartition(matrix_handle, 0); // need to check as it might only unpartition only one filter.
00065         else if(xparts>1 || yparts >1)
00066           starpu_data_unpartition(matrix_handle, 0); // no matter x or y filter, unpartitioning logic is same.
00067         xparts=1;
00068         yparts=1;     
00069     }
00070     
00074     void release_acquire()
00075     {
00076         if(isAcquire)
00077         {
00078             if(xparts>1 || yparts>1) // partitioning
00079             {
00080                 DEBUG_TEXT_LEVEL1("*****  MATRIX RELEASE **** xparts: "<< xparts << ",  yparts: "<< yparts <<"\n")
00081                 
00082                 if(xparts > 1 && yparts > 1)  // 2d partitioning
00083                 {
00084                     for (int y = 0; y < yparts; y++) 
00085                         for (int x = 0; x < xparts; x++) 
00086                         {
00087                             starpu_data_release(starpu_data_get_sub_data(matrix_handle, 2, x, y));
00088                         }
00089                 }
00090                 else // 1D partitioning
00091                 {
00092                     int parts = (xparts>1) ? xparts : yparts;
00093                     // parts should be equal to what parts variable points to
00094                     assert (parts == starpu_data_get_nb_children(matrix_handle));
00095                     for (int x = 0; x < parts; x++) 
00096                     {
00097                         starpu_data_release(starpu_data_get_sub_data(matrix_handle, 1, x));
00098                     }
00099                 }
00100             }
00101             else // no partitioning
00102             {
00103                 DEBUG_TEXT_LEVEL1("*****  MATRIX RELEASE ****\n")
00104                 starpu_data_release(matrix_handle);
00105             }
00106             isAcquire = false;
00107         }
00108     }
00109 
00110   
00111   public:
00112   
00113 
00114     starpu_data_handle_t matrix_handle;
00115     mutable bool isOnStarPU;
00116     mutable bool isReadBack;
00117     mutable bool isWriteBack;
00118     mutable int xparts; // horizontal partitioning
00119     mutable int yparts; // vertical partitioning
00120     
00121     mutable bool isAcquire;
00122     mutable enum starpu_access_mode mode;
00123     
00124     struct starpu_data_filter matrix_filter_x;
00125     struct starpu_data_filter matrix_filter_y;
00126     //enum enum starpu_access_mode accessMode;
00127     
00131     starpu_data_handle_t registerMatrix()
00132     {
00133         release_acquire();
00134         
00135         if(!isOnStarPU)
00136         {
00137             starpu_matrix_data_register(&matrix_handle, 0, (uintptr_t)&m_data[0], m_cols, m_cols, m_rows, sizeof(m_data[0]));
00138             isOnStarPU=true;
00139             DEBUG_TEXT_LEVEL1("*****  MATRIX REGISTERING **** rows: "<< m_rows <<" cols: "<< m_cols <<" \n")
00140         }
00141         else if(xparts>1 || yparts >1)
00142         {
00143             unpartitionMatrix();
00144         }
00145         return matrix_handle;
00146     }
00147     
00151     void unregisterMatrix(bool update=true)
00152     {
00153         if(isOnStarPU)
00154         {
00155             release_acquire(); // Should we release before unregister? Yes!
00156                 
00157             if(xparts>1 || yparts >1) // do we need to unpartition before unregistering? Yes!
00158             {
00159                 unpartitionMatrix();
00160             }
00161             
00162             if(update)  
00163                 starpu_data_unregister(matrix_handle);
00164                 
00165             else // dont need to get updated data back
00166                 starpu_data_unregister_no_coherency(matrix_handle);
00167                 
00168             isOnStarPU = false;
00169             isAcquire = false;      
00170         }
00171     }
00172     
00176     starpu_data_handle_t registerPartitions(int _xparts=1, int _yparts=1)
00177     {
00178         release_acquire();
00179         
00180         if(!isOnStarPU) // if not registered, register it first
00181         {
00182             starpu_matrix_data_register(&matrix_handle, 0, (uintptr_t)&m_data[0], m_cols, m_cols, m_rows, sizeof(m_data[0]));
00183             isOnStarPU=true;
00184             DEBUG_TEXT_LEVEL1("*****   MATRIX REGISTERING **** rows: "<< m_rows <<" cols: "<< m_cols <<" \n")
00185         }
00186         
00187         if(_xparts == xparts && _yparts==yparts)
00188             return matrix_handle;   
00189         if(xparts>1 || yparts>1)
00190         {
00191             unpartitionMatrix();
00192         }
00193         if(_xparts>1 && _yparts>1)
00194         {
00195             matrix_filter_x.nchildren = _xparts;
00196             matrix_filter_y.nchildren = _yparts;    
00197             starpu_data_map_filters(matrix_handle, 2, &matrix_filter_x, &matrix_filter_y);
00198             DEBUG_TEXT_LEVEL1("*****  MATRIX PARTIOTIINING xparts: "<< _xparts <<",  yparts: "<< _yparts <<" *****\n")
00199             xparts = _xparts;
00200             yparts = _yparts;
00201         }
00202         else if(_xparts>1)
00203         {
00204             matrix_filter_x.nchildren = _xparts;
00205             starpu_data_partition(matrix_handle, &matrix_filter_x);
00206             DEBUG_TEXT_LEVEL1("*****  MATRIX PARTIOTIINING xparts: "<< _xparts <<" *****\n")
00207             xparts = _xparts;
00208         }
00209         else if(_yparts>1)
00210         {
00211             matrix_filter_y.nchildren = _yparts;
00212             starpu_data_partition(matrix_handle, &matrix_filter_y);
00213             DEBUG_TEXT_LEVEL1("*****  MATRIX PARTIOTIINING yparts: "<< _yparts <<" *****\n")
00214             yparts = _yparts;
00215         }
00216         
00217         return matrix_handle;
00218     }
00219     
00220     
00221    
00222    // typedefs
00223 #ifdef USE_PINNED_MEMORY
00224     typedef std::vector<T, malloc_allocator<T> > container_type;
00225     typedef typename std::vector<T, malloc_allocator<T> >::size_type size_type;
00226     typedef typename std::vector<T, malloc_allocator<T> >::value_type value_type;
00227     typedef typename std::vector<T, malloc_allocator<T> >::difference_type difference_type;
00228     typedef typename std::vector<T, malloc_allocator<T> >::pointer pointer;
00229     typedef typename std::vector<T, malloc_allocator<T> >::reference reference;
00230     typedef typename std::vector<T, malloc_allocator<T> >::const_reference const_reference;
00231     typedef typename std::vector<T, malloc_allocator<T> >::const_iterator const_iterator;
00232     typedef typename std::vector<T, malloc_allocator<T> >::const_reverse_iterator const_reverse_iterator;
00233 #else
00234     typedef std::vector<T> container_type;
00235     typedef typename std::vector<T>::size_type size_type;
00236     typedef typename std::vector<T>::value_type value_type;
00237     typedef typename std::vector<T>::difference_type difference_type;
00238     typedef typename std::vector<T>::pointer pointer;
00239     typedef typename std::vector<T>::reference reference;
00240     typedef typename std::vector<T>::const_reference const_reference;
00241     typedef typename std::vector<T>::const_iterator const_iterator;
00242     typedef typename std::vector<T>::const_reverse_iterator const_reverse_iterator;
00243 #endif
00244   
00245   public: //-- For Testing --//
00246     
00250      T* GetArrayRep()
00251      {
00252        return &m_data[0];
00253      }
00254    
00260     friend std::ostream& operator<<(std::ostream &os, Matrix<T>& matrix)
00261     {
00262         matrix.acquireRead();
00263         
00264         os << "Matrix: ("<< matrix.total_rows() <<" X "<<matrix.total_cols()<<")\n";
00265         for(int i=0;i<matrix.size();i++)
00266         {
00267           os<<(matrix(i))<<" ";
00268           if((i+1)%(matrix.total_cols())==0)
00269              os << "\n";
00270         }
00271         os<<"\n";
00272         return os;
00273     }   
00274 
00275     
00285     void randomize(int min = 0, int max = RAND_MAX)
00286     {
00287         unregisterMatrix(false);
00288 
00289         for(unsigned int i = 0; i < size(); i++)
00290         {
00291             m_data.at(i) = (T)( rand() % max + min);
00292         }
00293     }
00294 
00303     void save(const std::string& filename)
00304     {
00305         std::ofstream file(filename.c_str());
00306 
00307         if (file.is_open())
00308         {
00309             for(unsigned int i = 0; i < m_data.size(); ++i)
00310             {
00311                 file<<m_data.at(i) <<" ";
00312             }
00313             file.close();
00314         }
00315         else
00316         {
00317             std::cout<<"Unable to open file\n";
00318         }
00319     }
00320 
00331     void load(const std::string& filename, int rowWidth, int numRows = 0)
00332     {
00333         std::ifstream file(filename.c_str());
00334 
00335         if (file.is_open())
00336         {
00337             std::string line;
00338             getline (file,line);
00339             std::istringstream ss(line);
00340             T num;
00341             clear();
00342 
00343             //Load all elements
00344             if(numRows == 0)
00345             {
00346                 while(ss >> num)
00347                 {
00348                     push_back(num);
00349                 }
00350             }
00351             // Load only numElements elements
00352             else
00353             {
00354                 for(int i = 0; i < (numRows*rowWidth); ++i)
00355                 {
00356                     ss >> num;
00357                     push_back(num);
00358                 }
00359             }
00360         
00361         m_cols = rowWidth;
00362         m_rows = (size()/rowWidth);
00363 
00364             file.close();
00365         }
00366         else
00367         {
00368             std::cout<<"Unable to open file\n";
00369         }
00370     }
00371     
00372     
00373 
00374   
00375 // Constructors, destructors
00376 public:
00377       
00381     ~Matrix()
00382     {
00383         DEBUG_TEXT_LEVEL1("*****  MATRIX DESTRUCTOR **** rows: "<< total_rows()<<",  cols: "<< total_cols() <<"\n")
00384         if(isOnStarPU)
00385         {
00386             release_acquire();
00387             
00388             if(xparts>1 || yparts>1)
00389                 unpartitionMatrix();
00390             
00391 //          starpu_data_unregister(matrix_handle);
00392         }  
00393      }
00394 
00395 
00401  Matrix(size_type _rows, size_type _cols): m_rows(_rows), m_cols(_cols), m_data(m_rows * m_cols), transpose_matrix(0)
00402     {
00403         isOnStarPU = false;
00404         isAcquire = false;
00405         xparts=1;
00406         yparts=1;
00407         memset(&matrix_filter_x, 0, sizeof(matrix_filter_x));
00408         matrix_filter_x.filter_func=starpu_vertical_block_filter_func;
00409         
00410         memset(&matrix_filter_y, 0, sizeof(matrix_filter_y));
00411         matrix_filter_y.filter_func=starpu_block_filter_func;
00412     }
00413     
00420  Matrix(size_type _rows, size_type _cols, const T& val): m_rows(_rows), m_cols(_cols),m_data(m_rows * m_cols, val), transpose_matrix(0)
00421     {
00422         isOnStarPU = false;
00423         isAcquire = false;
00424         xparts=1;
00425         yparts=1;
00426         memset(&matrix_filter_x, 0, sizeof(matrix_filter_x));
00427         matrix_filter_x.filter_func=starpu_vertical_block_filter_func;
00428         
00429         memset(&matrix_filter_y, 0, sizeof(matrix_filter_y));
00430         matrix_filter_y.filter_func=starpu_block_filter_func;
00431     }
00432     
00433     
00440     Matrix(const Matrix<T>& copy)
00441     {
00442        copy.acquireRead();
00443       
00444        this->m_rows = copy.total_rows();
00445        this->m_cols = copy.total_cols();
00446        this->m_data= copy.m_data;
00447        this->transpose_matrix = copy.transpose_matrix;
00448        
00449        isOnStarPU = copy.isOnStarPU;
00450        isAcquire = copy.isAcquire;
00451        
00452        matrix_filter_x = copy.matrix_filter_x;
00453        matrix_filter_y = copy.matrix_filter_y;
00454        
00455        xparts=copy.xparts;
00456        yparts=copy.yparts;
00457     }
00458 
00459  public:
00460 
00465     size_type size() const { return m_data.size();}
00466     
00471     size_type total_rows() const {return m_rows; }
00472     
00477     size_type total_cols() const {return m_cols; }
00478 
00483     void change_layout()
00484     {  
00485       int tmp = m_rows;
00486       m_rows=m_cols;
00487       m_cols = tmp;
00488     }
00489     
00490 
00491 
00492  private:
00493     size_type m_rows, m_cols;
00494     
00495 // A custom memory allocator is written to support pinned memory allocation for CUDA. Enabled by defining USE_PINNED_MEMORY macro
00496 #ifdef USE_PINNED_MEMORY
00497     mutable std::vector<T, malloc_allocator<T> > m_data;
00498 #else
00499     mutable std::vector<T> m_data;
00500 #endif
00501 
00502     
00503     // for col_iterator, not tested 
00504     mutable Matrix<T> *transpose_matrix;
00505 
00506     template<typename Type>
00507     void item_swap(Type &t1, Type &t2);
00508     
00509     
00510     // External classes
00511 public:
00512         //template <AccessType accessType=ROW_WISE>
00513     class iterator;
00514     
00515     class col_iterator;
00516     
00517     class proxy_elem;
00518     
00519     
00520     
00521 
00522 public: //-- Operators --//
00523 
00524     void resize(size_type _rows, size_type _cols, T val = T());
00525 
00526     Matrix<T>& operator=(Matrix<T>& other);
00527     Matrix<T>& operator=(const T& elem);
00528 
00529     bool operator==(const Matrix<T>& c1);
00530     bool operator!=(const Matrix<T>& c1);
00531     bool operator<(const Matrix<T>& c1);
00532     bool operator>(const Matrix<T>& c1);
00533     bool operator<=(const Matrix<T>& c1);
00534     bool operator>=(const Matrix<T>& c1);
00535 
00536     Matrix<T>& subsection(size_type row, size_type col, size_type rowWidth, size_type colWidth);
00537     
00538 
00539 public: //-- STL vector regular interface --//
00540     //Iterators
00541     iterator begin();
00542     const_iterator begin() const;
00543     iterator begin(unsigned row);
00544     const_iterator begin(unsigned row) const;
00545 
00546     iterator end();
00547     const_iterator end() const;
00548     iterator end(unsigned row);
00549     const_iterator end(unsigned row) const;
00550     
00551     //Column Iterators, taking transpose and using its iterator, work only for const_iterators
00552     // iterator col_begin();
00553     std::pair<const_iterator, const_iterator> col_iterator_range();
00554     col_iterator col_begin();
00555 
00556    // iterator col_end();
00557     col_iterator col_end();
00558 
00559 
00560     //Capacity
00561     size_type capacity() const;
00562 
00563     
00564 
00565     void flush();
00566     
00567     
00568     
00569     bool empty() const;
00570 
00571     //Element access
00572     proxy_elem at(size_type row, size_type col);
00573     const T& at(size_type row, size_type col) const;
00574 
00575     size_type row_back(size_type row);
00576     const T& row_back(size_type row) const;
00577 
00578     size_type row_front(size_type row);
00579     const T& row_front(size_type row) const;
00580 
00581     proxy_elem col_back(size_type col);
00582     const T& col_back(size_type col) const;
00583 
00584     proxy_elem col_front(size_type col);
00585     const T& col_front(size_type col) const;
00586 
00587     void clear();
00588 
00589     iterator erase( iterator loc );
00590     iterator erase( iterator start, iterator end );
00591 
00592     void swap(Matrix<T>& from);
00593 
00594 public: //-- Additions to interface --//   
00595 
00596     // Does care about device data
00597 //    proxy_elem operator()(const size_type row, const size_type col) const;
00598     
00599     // Does care about device data
00600     const T& operator()(const size_type row, const size_type col) const;
00601     
00602     T& operator()(const size_type row, const size_type col);
00603     
00604     // Does not care about device data, use with care
00605     T& operator()(const size_type index);
00606     
00607     // Does not care about device data, use with care
00608     T& operator[](const size_type index);
00609     
00610     // unary transpose operator
00611   inline Matrix<T> operator~()
00612   {
00613      Matrix<T> temp(total_cols(),total_rows());
00614      for (size_t i=0; i < total_rows(); i++)
00615       for (size_t j=0; j < total_cols(); j++)
00616       {
00617          T x = (*this)(i,j);
00618      temp(j,i) = x;
00619       }
00620      return temp;
00621   }
00622   
00623   Matrix<T>& get_transpose_matrix(bool updated = false)
00624   {
00625     if(transpose_matrix==0 || updated)
00626     {
00627       transpose_matrix = new Matrix<T>(total_cols(),total_rows());
00628       for (size_t i=0; i < total_rows(); i++)
00629       for (size_t j=0; j < total_cols(); j++)
00630       {
00631       T x = (*this)(i,j);
00632       (*transpose_matrix)(j,i) = x;
00633       }
00634     }
00635     return (*transpose_matrix);
00636   }
00637 
00638     
00639 
00640     // To be able to explicitly force updates without flushing entire matrix.
00641     // Could be used with operator () above to avoid unneccesary function calls
00642     // due to implicit synch.
00643     
00644     void acquireRead() const;
00645     void acquireReadWrite();
00646     
00647 //    void updateHost() const;
00648 //    void invalidateDevice();
00649 //    void updateHostAndInvalidateDevice();
00650     
00651     
00652     const Matrix<T>& operator+=(const Matrix<T>& rhs);
00653     const Matrix<T>& operator+=(const T& rhs);
00654     
00655     const Matrix<T>& operator-=(const Matrix<T>& rhs);
00656     const Matrix<T>& operator-=(const T& rhs);
00657     
00658     const Matrix<T>& operator*=(const Matrix<T>& rhs);
00659     const Matrix<T>& operator*=(const T& rhs);
00660     
00661     const Matrix<T>& operator/=(const Matrix<T>& rhs);
00662     const Matrix<T>& operator/=(const T& rhs);
00663     
00664     const Matrix<T>& operator%=(const Matrix<T>& rhs);
00665     const Matrix<T>& operator%=(const T& rhs);
00666 };
00667 
00668 }
00669 
00670 #include "src/matrix_iterator.inl"
00671 
00672 #include "src/matrix_col_iterator.inl"
00673 
00674 #include "src/matrix_proxy.inl"
00675 #include "src/matrix.inl"
00676 
00677 #endif
00678 
00679 
 All Classes Namespaces Files Functions Enumerations Friends Defines