SkePU 0.7
include_starpu/skepu/matrix.h
00001 #ifndef MATRIX_H
00002 #define MATRIX_H
00003 
00004 #include <iostream>
00005 #include <fstream>
00006 #include <sstream>
00007 #include <cstdlib>
00008 
00009 #include <vector>
00010 
00011 #include <map>
00012 
00013 
00014 
00015 #include <starpu.h>
00016 
00017 #include "skepu/src/debug.h"
00018 
00019 #include "skepu/src/malloc_allocator.h"
00020 
00021 #include "globals.h"
00022 
00023 
00024 
00025 
00026 namespace skepu
00027 {
00028   
00034  enum AccessType
00035  {
00036   ROW_WISE, //C style iterating from rows
00037   COL_WISE // fortran style iterating from columns 
00038  };
00039 
00040  
00041  
00057 template<typename T>
00058 class Matrix
00059 {
00060 
00061 private:
00062 
00063     void unpartitionMatrix() const
00064     {
00065         DEBUG_TEXT_LEVEL1("*****  MATRIX UNPARTIOTIINING xparts: "<< xparts <<",  yparts: "<< yparts <<" *****\n")
00066         if(xparts>1 && yparts >1)
00067           starpu_data_unpartition(matrix_handle, 0); // need to check as it might only unpartition only one filter.
00068         else if(xparts>1 || yparts >1)
00069           starpu_data_unpartition(matrix_handle, 0); // no matter x or y filter, unpartitioning logic is same.
00070         xparts=1;
00071         yparts=1;     
00072     }
00073     
00077     void release_acquire()
00078     {
00079         if(isAcquire)
00080         {
00081             if(xparts>1 || yparts>1) // partitioning
00082             {
00083                 DEBUG_TEXT_LEVEL1("*****  MATRIX RELEASE **** xparts: "<< xparts << ",  yparts: "<< yparts <<"\n")
00084                 
00085                 if(xparts > 1 && yparts > 1)  // 2d partitioning
00086                 {
00087                     for (int y = 0; y < yparts; y++) 
00088                         for (int x = 0; x < xparts; x++) 
00089                         {
00090                             starpu_data_release(starpu_data_get_sub_data(matrix_handle, 2, x, y));
00091                         }
00092                 }
00093                 else // 1D partitioning
00094                 {
00095                     int parts = (xparts>1) ? xparts : yparts;
00096                     // parts should be equal to what parts variable points to
00097                     assert (parts == starpu_data_get_nb_children(matrix_handle));
00098                     for (int x = 0; x < parts; x++) 
00099                     {
00100                         starpu_data_release(starpu_data_get_sub_data(matrix_handle, 1, x));
00101                     }
00102                 }
00103             }
00104             else // no partitioning
00105             {
00106                 DEBUG_TEXT_LEVEL1("*****  MATRIX RELEASE ****\n")
00107                 starpu_data_release(matrix_handle);
00108             }
00109             isAcquire = false;
00110         }
00111     }
00112 
00113   
00114   public:
00115   
00116 
00117     starpu_data_handle matrix_handle;
00118     mutable bool isOnStarPU;
00119     mutable bool isReadBack;
00120     mutable bool isWriteBack;
00121     mutable int xparts; // horizontal partitioning
00122     mutable int yparts; // vertical partitioning
00123     
00124     mutable bool isAcquire;
00125     mutable starpu_access_mode mode;
00126     
00127     struct starpu_data_filter matrix_filter_x;
00128     struct starpu_data_filter matrix_filter_y;
00129     //starpu_access_mode accessMode;
00130     
00134     starpu_data_handle registerMatrix()
00135     {
00136         release_acquire();
00137         
00138         if(!isOnStarPU)
00139         {
00140             starpu_matrix_data_register(&matrix_handle, 0, (uintptr_t)&m_data[0], m_cols, m_cols, m_rows, sizeof(m_data[0]));
00141             isOnStarPU=true;
00142             DEBUG_TEXT_LEVEL1("*****  MATRIX REGISTERING **** rows: "<< m_rows <<" cols: "<< m_cols <<" \n")
00143         }
00144         else if(xparts>1 || yparts >1)
00145         {
00146             unpartitionMatrix();
00147         }
00148         return matrix_handle;
00149     }
00150     
00154     void unregisterMatrix(bool update=true)
00155     {
00156         if(isOnStarPU)
00157         {
00158             release_acquire(); // Should we release before unregister? Yes!
00159                 
00160             if(xparts>1 || yparts >1) // do we need to unpartition before unregistering? Yes!
00161             {
00162                 unpartitionMatrix();
00163             }
00164             
00165             if(update)  
00166                 starpu_data_unregister(matrix_handle);
00167                 
00168             else // dont need to get updated data back
00169                 starpu_data_unregister_no_coherency(matrix_handle);
00170                 
00171             isOnStarPU = false;
00172             isAcquire = false;      
00173         }
00174     }
00175     
00179     starpu_data_handle registerPartitions(int _xparts=1, int _yparts=1)
00180     {
00181         release_acquire();
00182         
00183         if(!isOnStarPU) // if not registered, register it first
00184         {
00185             starpu_matrix_data_register(&matrix_handle, 0, (uintptr_t)&m_data[0], m_cols, m_cols, m_rows, sizeof(m_data[0]));
00186             isOnStarPU=true;
00187             DEBUG_TEXT_LEVEL1("*****   MATRIX REGISTERING **** rows: "<< m_rows <<" cols: "<< m_cols <<" \n")
00188         }
00189         
00190         if(_xparts == xparts && _yparts==yparts)
00191             return matrix_handle;   
00192         if(xparts>1 || yparts>1)
00193         {
00194             unpartitionMatrix();
00195         }
00196         if(_xparts>1 && _yparts>1)
00197         {
00198             matrix_filter_x.nchildren = _xparts;
00199             matrix_filter_y.nchildren = _yparts;    
00200             starpu_data_map_filters(matrix_handle, 2, &matrix_filter_x, &matrix_filter_y);
00201             DEBUG_TEXT_LEVEL1("*****  MATRIX PARTIOTIINING xparts: "<< _xparts <<",  yparts: "<< _yparts <<" *****\n")
00202             xparts = _xparts;
00203             yparts = _yparts;
00204         }
00205         else if(_xparts>1)
00206         {
00207             matrix_filter_x.nchildren = _xparts;
00208             starpu_data_partition(matrix_handle, &matrix_filter_x);
00209             DEBUG_TEXT_LEVEL1("*****  MATRIX PARTIOTIINING xparts: "<< _xparts <<" *****\n")
00210             xparts = _xparts;
00211         }
00212         else if(_yparts>1)
00213         {
00214             matrix_filter_y.nchildren = _yparts;
00215             starpu_data_partition(matrix_handle, &matrix_filter_y);
00216             DEBUG_TEXT_LEVEL1("*****  MATRIX PARTIOTIINING yparts: "<< _yparts <<" *****\n")
00217             yparts = _yparts;
00218         }
00219         
00220         return matrix_handle;
00221     }
00222     
00223     
00224    
00225    // typedefs
00226 #ifdef USE_PINNED_MEMORY
00227     typedef std::vector<T, malloc_allocator<T> > container_type;
00228     typedef typename std::vector<T, malloc_allocator<T> >::size_type size_type;
00229     typedef typename std::vector<T, malloc_allocator<T> >::value_type value_type;
00230     typedef typename std::vector<T, malloc_allocator<T> >::difference_type difference_type;
00231     typedef typename std::vector<T, malloc_allocator<T> >::pointer pointer;
00232     typedef typename std::vector<T, malloc_allocator<T> >::reference reference;
00233     typedef typename std::vector<T, malloc_allocator<T> >::const_reference const_reference;
00234     typedef typename std::vector<T, malloc_allocator<T> >::const_iterator const_iterator;
00235     typedef typename std::vector<T, malloc_allocator<T> >::const_reverse_iterator const_reverse_iterator;
00236 #else
00237     typedef std::vector<T> container_type;
00238     typedef typename std::vector<T>::size_type size_type;
00239     typedef typename std::vector<T>::value_type value_type;
00240     typedef typename std::vector<T>::difference_type difference_type;
00241     typedef typename std::vector<T>::pointer pointer;
00242     typedef typename std::vector<T>::reference reference;
00243     typedef typename std::vector<T>::const_reference const_reference;
00244     typedef typename std::vector<T>::const_iterator const_iterator;
00245     typedef typename std::vector<T>::const_reverse_iterator const_reverse_iterator;
00246 #endif
00247   
00248   public: //-- For Testing --//
00249     
00253      T* GetArrayRep()
00254      {
00255        return &m_data[0];
00256      }
00257    
00263     friend std::ostream& operator<<(std::ostream &os, Matrix<T>& matrix)
00264     {
00265         matrix.acquireRead();
00266         
00267         os << "Matrix: ("<< matrix.total_rows() <<" X "<<matrix.total_cols()<<")\n";
00268         for(int i=0;i<matrix.size();i++)
00269         {
00270           os<<(matrix(i))<<" ";
00271           if((i+1)%(matrix.total_cols())==0)
00272              os << "\n";
00273         }
00274         os<<"\n";
00275         return os;
00276     }   
00277 
00278     
00288     void randomize(int min = 0, int max = RAND_MAX)
00289     {
00290         unregisterMatrix(false);
00291 
00292         for(unsigned int i = 0; i < size(); i++)
00293         {
00294             m_data.at(i) = (T)( rand() % max + min);
00295         }
00296     }
00297 
00306     void save(const std::string& filename)
00307     {
00308         std::ofstream file(filename.c_str());
00309 
00310         if (file.is_open())
00311         {
00312             for(unsigned int i = 0; i < m_data.size(); ++i)
00313             {
00314                 file<<m_data.at(i) <<" ";
00315             }
00316             file.close();
00317         }
00318         else
00319         {
00320             std::cout<<"Unable to open file\n";
00321         }
00322     }
00323 
00334     void load(const std::string& filename, int rowWidth, int numRows = 0)
00335     {
00336         std::ifstream file(filename.c_str());
00337 
00338         if (file.is_open())
00339         {
00340             std::string line;
00341             getline (file,line);
00342             std::istringstream ss(line);
00343             T num;
00344             clear();
00345 
00346             //Load all elements
00347             if(numRows == 0)
00348             {
00349                 while(ss >> num)
00350                 {
00351                     push_back(num);
00352                 }
00353             }
00354             // Load only numElements elements
00355             else
00356             {
00357                 for(int i = 0; i < (numRows*rowWidth); ++i)
00358                 {
00359                     ss >> num;
00360                     push_back(num);
00361                 }
00362             }
00363         
00364         m_cols = rowWidth;
00365         m_rows = (size()/rowWidth);
00366 
00367             file.close();
00368         }
00369         else
00370         {
00371             std::cout<<"Unable to open file\n";
00372         }
00373     }
00374     
00375     
00376 
00377   
00378 // Constructors, destructors
00379 public:
00380       
00384     ~Matrix()
00385     {
00386         DEBUG_TEXT_LEVEL1("*****  MATRIX DESTRUCTOR **** rows: "<< total_rows()<<",  cols: "<< total_cols() <<"\n")
00387         if(isOnStarPU)
00388         {
00389             release_acquire();
00390             if(xparts>1 || yparts>1)
00391                 unpartitionMatrix();
00392                 
00393             starpu_data_unregister(matrix_handle);
00394         }  
00395      }
00396 
00397 
00403  Matrix(size_type _rows, size_type _cols): m_rows(_rows), m_cols(_cols), m_data(m_rows * m_cols), transpose_matrix(0)
00404     {
00405         isOnStarPU = false;
00406         isAcquire = false;
00407         xparts=1;
00408         yparts=1;
00409         memset(&matrix_filter_x, 0, sizeof(matrix_filter_x));
00410         matrix_filter_x.filter_func=starpu_vertical_block_filter_func;
00411         
00412         memset(&matrix_filter_y, 0, sizeof(matrix_filter_y));
00413         matrix_filter_y.filter_func=starpu_block_filter_func;
00414     }
00415     
00422  Matrix(size_type _rows, size_type _cols, const T& val): m_rows(_rows), m_cols(_cols),m_data(m_rows * m_cols, val), transpose_matrix(0)
00423     {
00424         isOnStarPU = false;
00425         isAcquire = false;
00426         xparts=1;
00427         yparts=1;
00428         memset(&matrix_filter_x, 0, sizeof(matrix_filter_x));
00429         matrix_filter_x.filter_func=starpu_vertical_block_filter_func;
00430         
00431         memset(&matrix_filter_y, 0, sizeof(matrix_filter_y));
00432         matrix_filter_y.filter_func=starpu_block_filter_func;
00433     }
00434     
00435     
00442     Matrix(const Matrix<T>& copy)
00443     {
00444        copy.acquireRead();
00445       
00446        this->m_rows = copy.total_rows();
00447        this->m_cols = copy.total_cols();
00448        this->m_data= copy.m_data;
00449        this->transpose_matrix = copy.transpose_matrix;
00450        
00451        isOnStarPU = copy.isOnStarPU;
00452        isAcquire = copy.isAcquire;
00453        
00454        matrix_filter_x = copy.matrix_filter_x;
00455        matrix_filter_y = copy.matrix_filter_y;
00456        
00457        xparts=copy.xparts;
00458        yparts=copy.yparts;
00459     }
00460 
00461  public:
00462 
00467     size_type size() const { return m_data.size();}
00468     
00473     size_type total_rows() const {return m_rows; }
00474     
00479     size_type total_cols() const {return m_cols; }
00480 
00485     void change_layout()
00486     {  
00487       int tmp = m_rows;
00488       m_rows=m_cols;
00489       m_cols = tmp;
00490     }
00491     
00492 
00493 
00494  private:
00495     size_type m_rows, m_cols;
00496     
00497 // A custom memory allocator is written to support pinned memory allocation for CUDA. Enabled by defining USE_PINNED_MEMORY macro
00498 #ifdef USE_PINNED_MEMORY
00499     mutable std::vector<T, malloc_allocator<T> > m_data;
00500 #else
00501     mutable std::vector<T> m_data;
00502 #endif
00503 
00504     
00505     // for col_iterator, not tested 
00506     mutable Matrix<T> *transpose_matrix;
00507 
00508     template<typename Type>
00509     void item_swap(Type &t1, Type &t2);
00510     
00511     
00512     // External classes
00513 public:
00514         //template <AccessType accessType=ROW_WISE>
00515     class iterator;
00516     
00517     class col_iterator;
00518     
00519     class proxy_elem;
00520     
00521     
00522     
00523 
00524 public: //-- Operators --//
00525 
00526     void resize(size_type _rows, size_type _cols, T val = T());
00527 
00528     Matrix<T>& operator=(Matrix<T>& other);
00529     Matrix<T>& operator=(const T& elem);
00530 
00531     bool operator==(const Matrix<T>& c1);
00532     bool operator!=(const Matrix<T>& c1);
00533     bool operator<(const Matrix<T>& c1);
00534     bool operator>(const Matrix<T>& c1);
00535     bool operator<=(const Matrix<T>& c1);
00536     bool operator>=(const Matrix<T>& c1);
00537 
00538     Matrix<T>& subsection(size_type row, size_type col, size_type rowWidth, size_type colWidth);
00539     
00540 
00541 public: //-- STL vector regular interface --//
00542     //Iterators
00543     iterator begin();
00544     const_iterator begin() const;
00545     iterator begin(unsigned row);
00546     const_iterator begin(unsigned row) const;
00547 
00548     iterator end();
00549     const_iterator end() const;
00550     iterator end(unsigned row);
00551     const_iterator end(unsigned row) const;
00552     
00553     //Column Iterators, taking transpose and using its iterator, work only for const_iterators
00554     // iterator col_begin();
00555     std::pair<const_iterator, const_iterator> col_iterator_range();
00556     col_iterator col_begin();
00557 
00558    // iterator col_end();
00559     col_iterator col_end();
00560 
00561 
00562     //Capacity
00563     size_type capacity() const;
00564 
00565     
00566 
00567     void flush();
00568     
00569     
00570     
00571     bool empty() const;
00572 
00573     //Element access
00574     proxy_elem at(size_type row, size_type col);
00575     const T& at(size_type row, size_type col) const;
00576 
00577     size_type row_back(size_type row);
00578     const T& row_back(size_type row) const;
00579 
00580     size_type row_front(size_type row);
00581     const T& row_front(size_type row) const;
00582 
00583     proxy_elem col_back(size_type col);
00584     const T& col_back(size_type col) const;
00585 
00586     proxy_elem col_front(size_type col);
00587     const T& col_front(size_type col) const;
00588 
00589     void clear();
00590 
00591     iterator erase( iterator loc );
00592     iterator erase( iterator start, iterator end );
00593 
00594     void swap(Matrix<T>& from);
00595 
00596 public: //-- Additions to interface --//   
00597 
00598     // Does care about device data
00599 //    proxy_elem operator()(const size_type row, const size_type col) const;
00600     
00601     // Does care about device data
00602     const T& operator()(const size_type row, const size_type col) const;
00603     
00604     T& operator()(const size_type row, const size_type col);
00605     
00606     // Does not care about device data, use with care
00607     T& operator()(const size_type index);
00608     
00609     // unary transpose operator
00610   inline Matrix<T> operator~()
00611   {
00612      Matrix<T> temp(total_cols(),total_rows());
00613      for (size_t i=0; i < total_rows(); i++)
00614       for (size_t j=0; j < total_cols(); j++)
00615       {
00616          T x = (*this)(i,j);
00617      temp(j,i) = x;
00618       }
00619      return temp;
00620   }
00621   
00622   Matrix<T>& get_transpose_matrix(bool updated = false)
00623   {
00624     if(transpose_matrix==0 || updated)
00625     {
00626       transpose_matrix = new Matrix<T>(total_cols(),total_rows());
00627       for (size_t i=0; i < total_rows(); i++)
00628       for (size_t j=0; j < total_cols(); j++)
00629       {
00630       T x = (*this)(i,j);
00631       (*transpose_matrix)(j,i) = x;
00632       }
00633     }
00634     return (*transpose_matrix);
00635   }
00636 
00637     
00638 
00639     // To be able to explicitly force updates without flushing entire matrix.
00640     // Could be used with operator () above to avoid unneccesary function calls
00641     // due to implicit synch.
00642     
00643     void acquireRead() const;
00644     void acquireReadWrite();
00645     
00646 //    void updateHost() const;
00647 //    void invalidateDevice();
00648 //    void updateHostAndInvalidateDevice();
00649     
00650     
00651     const Matrix<T>& operator+=(const Matrix<T>& rhs);
00652     const Matrix<T>& operator+=(const T& rhs);
00653     
00654     const Matrix<T>& operator-=(const Matrix<T>& rhs);
00655     const Matrix<T>& operator-=(const T& rhs);
00656     
00657     const Matrix<T>& operator*=(const Matrix<T>& rhs);
00658     const Matrix<T>& operator*=(const T& rhs);
00659     
00660     const Matrix<T>& operator/=(const Matrix<T>& rhs);
00661     const Matrix<T>& operator/=(const T& rhs);
00662     
00663     const Matrix<T>& operator%=(const Matrix<T>& rhs);
00664     const Matrix<T>& operator%=(const T& rhs);
00665 };
00666 
00667 }
00668 
00669 #include "src/matrix_iterator.inl"
00670 
00671 #include "src/matrix_col_iterator.inl"
00672 
00673 #include "src/matrix_proxy.inl"
00674 #include "src/matrix.inl"
00675 
00676 #endif
00677 
00678 
 All Classes Namespaces Files Functions Enumerations Friends Defines