|
SkePU(integratedwithStarPU) 0.8
|
00001 #ifndef MATRIX_H 00002 #define MATRIX_H 00003 00004 #include <iostream> 00005 #include <fstream> 00006 #include <sstream> 00007 #include <cstdlib> 00008 00009 #include <vector> 00010 00011 #include <map> 00012 00013 00014 00015 #include <starpu.h> 00016 00017 #include "skepu/src/environment.h" 00018 00019 #include "skepu/src/malloc_allocator.h" 00020 00021 00022 00023 00024 00025 namespace skepu 00026 { 00027 00033 enum AccessType 00034 { 00035 ROW_WISE, //C style iterating from rows 00036 COL_WISE // fortran style iterating from columns 00037 }; 00038 00039 00040 00054 template<typename T> 00055 class Matrix 00056 { 00057 00058 private: 00059 00060 void unpartitionMatrix() const 00061 { 00062 DEBUG_TEXT_LEVEL1("***** MATRIX UNPARTIOTIINING xparts: "<< xparts <<", yparts: "<< yparts <<" *****\n") 00063 if(xparts>1 && yparts >1) 00064 starpu_data_unpartition(matrix_handle, 0); // need to check as it might only unpartition only one filter. 00065 else if(xparts>1 || yparts >1) 00066 starpu_data_unpartition(matrix_handle, 0); // no matter x or y filter, unpartitioning logic is same. 00067 xparts=1; 00068 yparts=1; 00069 } 00070 00074 void release_acquire() 00075 { 00076 if(isAcquire) 00077 { 00078 if(xparts>1 || yparts>1) // partitioning 00079 { 00080 DEBUG_TEXT_LEVEL1("***** MATRIX RELEASE **** xparts: "<< xparts << ", yparts: "<< yparts <<"\n") 00081 00082 if(xparts > 1 && yparts > 1) // 2d partitioning 00083 { 00084 for (int y = 0; y < yparts; y++) 00085 for (int x = 0; x < xparts; x++) 00086 { 00087 starpu_data_release(starpu_data_get_sub_data(matrix_handle, 2, x, y)); 00088 } 00089 } 00090 else // 1D partitioning 00091 { 00092 int parts = (xparts>1) ? xparts : yparts; 00093 // parts should be equal to what parts variable points to 00094 assert (parts == starpu_data_get_nb_children(matrix_handle)); 00095 for (int x = 0; x < parts; x++) 00096 { 00097 starpu_data_release(starpu_data_get_sub_data(matrix_handle, 1, x)); 00098 } 00099 } 00100 } 00101 else // no partitioning 00102 { 00103 DEBUG_TEXT_LEVEL1("***** MATRIX RELEASE ****\n") 00104 starpu_data_release(matrix_handle); 00105 } 00106 isAcquire = false; 00107 } 00108 } 00109 00110 00111 public: 00112 00113 00114 starpu_data_handle_t matrix_handle; 00115 mutable bool isOnStarPU; 00116 mutable bool isReadBack; 00117 mutable bool isWriteBack; 00118 mutable int xparts; // horizontal partitioning 00119 mutable int yparts; // vertical partitioning 00120 00121 mutable bool isAcquire; 00122 mutable enum starpu_access_mode mode; 00123 00124 struct starpu_data_filter matrix_filter_x; 00125 struct starpu_data_filter matrix_filter_y; 00126 //enum enum starpu_access_mode accessMode; 00127 00131 starpu_data_handle_t registerMatrix() 00132 { 00133 release_acquire(); 00134 00135 if(!isOnStarPU) 00136 { 00137 starpu_matrix_data_register(&matrix_handle, 0, (uintptr_t)&m_data[0], m_cols, m_cols, m_rows, sizeof(m_data[0])); 00138 isOnStarPU=true; 00139 DEBUG_TEXT_LEVEL1("***** MATRIX REGISTERING **** rows: "<< m_rows <<" cols: "<< m_cols <<" \n") 00140 } 00141 else if(xparts>1 || yparts >1) 00142 { 00143 unpartitionMatrix(); 00144 } 00145 return matrix_handle; 00146 } 00147 00151 void unregisterMatrix(bool update=true) 00152 { 00153 if(isOnStarPU) 00154 { 00155 release_acquire(); // Should we release before unregister? Yes! 00156 00157 if(xparts>1 || yparts >1) // do we need to unpartition before unregistering? Yes! 00158 { 00159 unpartitionMatrix(); 00160 } 00161 00162 if(update) 00163 starpu_data_unregister(matrix_handle); 00164 00165 else // dont need to get updated data back 00166 starpu_data_unregister_no_coherency(matrix_handle); 00167 00168 isOnStarPU = false; 00169 isAcquire = false; 00170 } 00171 } 00172 00176 starpu_data_handle_t registerPartitions(int _xparts=1, int _yparts=1) 00177 { 00178 release_acquire(); 00179 00180 if(!isOnStarPU) // if not registered, register it first 00181 { 00182 starpu_matrix_data_register(&matrix_handle, 0, (uintptr_t)&m_data[0], m_cols, m_cols, m_rows, sizeof(m_data[0])); 00183 isOnStarPU=true; 00184 DEBUG_TEXT_LEVEL1("***** MATRIX REGISTERING **** rows: "<< m_rows <<" cols: "<< m_cols <<" \n") 00185 } 00186 00187 if(_xparts == xparts && _yparts==yparts) 00188 return matrix_handle; 00189 if(xparts>1 || yparts>1) 00190 { 00191 unpartitionMatrix(); 00192 } 00193 if(_xparts>1 && _yparts>1) 00194 { 00195 matrix_filter_x.nchildren = _xparts; 00196 matrix_filter_y.nchildren = _yparts; 00197 starpu_data_map_filters(matrix_handle, 2, &matrix_filter_x, &matrix_filter_y); 00198 DEBUG_TEXT_LEVEL1("***** MATRIX PARTIOTIINING xparts: "<< _xparts <<", yparts: "<< _yparts <<" *****\n") 00199 xparts = _xparts; 00200 yparts = _yparts; 00201 } 00202 else if(_xparts>1) 00203 { 00204 matrix_filter_x.nchildren = _xparts; 00205 starpu_data_partition(matrix_handle, &matrix_filter_x); 00206 DEBUG_TEXT_LEVEL1("***** MATRIX PARTIOTIINING xparts: "<< _xparts <<" *****\n") 00207 xparts = _xparts; 00208 } 00209 else if(_yparts>1) 00210 { 00211 matrix_filter_y.nchildren = _yparts; 00212 starpu_data_partition(matrix_handle, &matrix_filter_y); 00213 DEBUG_TEXT_LEVEL1("***** MATRIX PARTIOTIINING yparts: "<< _yparts <<" *****\n") 00214 yparts = _yparts; 00215 } 00216 00217 return matrix_handle; 00218 } 00219 00220 00221 00222 // typedefs 00223 #ifdef USE_PINNED_MEMORY 00224 typedef std::vector<T, malloc_allocator<T> > container_type; 00225 typedef typename std::vector<T, malloc_allocator<T> >::size_type size_type; 00226 typedef typename std::vector<T, malloc_allocator<T> >::value_type value_type; 00227 typedef typename std::vector<T, malloc_allocator<T> >::difference_type difference_type; 00228 typedef typename std::vector<T, malloc_allocator<T> >::pointer pointer; 00229 typedef typename std::vector<T, malloc_allocator<T> >::reference reference; 00230 typedef typename std::vector<T, malloc_allocator<T> >::const_reference const_reference; 00231 typedef typename std::vector<T, malloc_allocator<T> >::const_iterator const_iterator; 00232 typedef typename std::vector<T, malloc_allocator<T> >::const_reverse_iterator const_reverse_iterator; 00233 #else 00234 typedef std::vector<T> container_type; 00235 typedef typename std::vector<T>::size_type size_type; 00236 typedef typename std::vector<T>::value_type value_type; 00237 typedef typename std::vector<T>::difference_type difference_type; 00238 typedef typename std::vector<T>::pointer pointer; 00239 typedef typename std::vector<T>::reference reference; 00240 typedef typename std::vector<T>::const_reference const_reference; 00241 typedef typename std::vector<T>::const_iterator const_iterator; 00242 typedef typename std::vector<T>::const_reverse_iterator const_reverse_iterator; 00243 #endif 00244 00245 public: //-- For Testing --// 00246 00250 T* GetArrayRep() 00251 { 00252 return &m_data[0]; 00253 } 00254 00260 friend std::ostream& operator<<(std::ostream &os, Matrix<T>& matrix) 00261 { 00262 matrix.acquireRead(); 00263 00264 os << "Matrix: ("<< matrix.total_rows() <<" X "<<matrix.total_cols()<<")\n"; 00265 for(int i=0;i<matrix.size();i++) 00266 { 00267 os<<(matrix(i))<<" "; 00268 if((i+1)%(matrix.total_cols())==0) 00269 os << "\n"; 00270 } 00271 os<<"\n"; 00272 return os; 00273 } 00274 00275 00285 void randomize(int min = 0, int max = RAND_MAX) 00286 { 00287 unregisterMatrix(false); 00288 00289 for(unsigned int i = 0; i < size(); i++) 00290 { 00291 m_data.at(i) = (T)( rand() % max + min); 00292 } 00293 } 00294 00303 void save(const std::string& filename) 00304 { 00305 std::ofstream file(filename.c_str()); 00306 00307 if (file.is_open()) 00308 { 00309 for(unsigned int i = 0; i < m_data.size(); ++i) 00310 { 00311 file<<m_data.at(i) <<" "; 00312 } 00313 file.close(); 00314 } 00315 else 00316 { 00317 std::cout<<"Unable to open file\n"; 00318 } 00319 } 00320 00331 void load(const std::string& filename, int rowWidth, int numRows = 0) 00332 { 00333 std::ifstream file(filename.c_str()); 00334 00335 if (file.is_open()) 00336 { 00337 std::string line; 00338 getline (file,line); 00339 std::istringstream ss(line); 00340 T num; 00341 clear(); 00342 00343 //Load all elements 00344 if(numRows == 0) 00345 { 00346 while(ss >> num) 00347 { 00348 push_back(num); 00349 } 00350 } 00351 // Load only numElements elements 00352 else 00353 { 00354 for(int i = 0; i < (numRows*rowWidth); ++i) 00355 { 00356 ss >> num; 00357 push_back(num); 00358 } 00359 } 00360 00361 m_cols = rowWidth; 00362 m_rows = (size()/rowWidth); 00363 00364 file.close(); 00365 } 00366 else 00367 { 00368 std::cout<<"Unable to open file\n"; 00369 } 00370 } 00371 00372 00373 00374 00375 // Constructors, destructors 00376 public: 00377 00381 ~Matrix() 00382 { 00383 DEBUG_TEXT_LEVEL1("***** MATRIX DESTRUCTOR **** rows: "<< total_rows()<<", cols: "<< total_cols() <<"\n") 00384 if(isOnStarPU) 00385 { 00386 release_acquire(); 00387 00388 if(xparts>1 || yparts>1) 00389 unpartitionMatrix(); 00390 00391 // starpu_data_unregister(matrix_handle); 00392 } 00393 } 00394 00395 00401 Matrix(size_type _rows, size_type _cols): m_rows(_rows), m_cols(_cols), m_data(m_rows * m_cols), transpose_matrix(0) 00402 { 00403 isOnStarPU = false; 00404 isAcquire = false; 00405 xparts=1; 00406 yparts=1; 00407 memset(&matrix_filter_x, 0, sizeof(matrix_filter_x)); 00408 matrix_filter_x.filter_func=starpu_vertical_block_filter_func; 00409 00410 memset(&matrix_filter_y, 0, sizeof(matrix_filter_y)); 00411 matrix_filter_y.filter_func=starpu_block_filter_func; 00412 } 00413 00420 Matrix(size_type _rows, size_type _cols, const T& val): m_rows(_rows), m_cols(_cols),m_data(m_rows * m_cols, val), transpose_matrix(0) 00421 { 00422 isOnStarPU = false; 00423 isAcquire = false; 00424 xparts=1; 00425 yparts=1; 00426 memset(&matrix_filter_x, 0, sizeof(matrix_filter_x)); 00427 matrix_filter_x.filter_func=starpu_vertical_block_filter_func; 00428 00429 memset(&matrix_filter_y, 0, sizeof(matrix_filter_y)); 00430 matrix_filter_y.filter_func=starpu_block_filter_func; 00431 } 00432 00433 00440 Matrix(const Matrix<T>& copy) 00441 { 00442 copy.acquireRead(); 00443 00444 this->m_rows = copy.total_rows(); 00445 this->m_cols = copy.total_cols(); 00446 this->m_data= copy.m_data; 00447 this->transpose_matrix = copy.transpose_matrix; 00448 00449 isOnStarPU = copy.isOnStarPU; 00450 isAcquire = copy.isAcquire; 00451 00452 matrix_filter_x = copy.matrix_filter_x; 00453 matrix_filter_y = copy.matrix_filter_y; 00454 00455 xparts=copy.xparts; 00456 yparts=copy.yparts; 00457 } 00458 00459 public: 00460 00465 size_type size() const { return m_data.size();} 00466 00471 size_type total_rows() const {return m_rows; } 00472 00477 size_type total_cols() const {return m_cols; } 00478 00483 void change_layout() 00484 { 00485 int tmp = m_rows; 00486 m_rows=m_cols; 00487 m_cols = tmp; 00488 } 00489 00490 00491 00492 private: 00493 size_type m_rows, m_cols; 00494 00495 // A custom memory allocator is written to support pinned memory allocation for CUDA. Enabled by defining USE_PINNED_MEMORY macro 00496 #ifdef USE_PINNED_MEMORY 00497 mutable std::vector<T, malloc_allocator<T> > m_data; 00498 #else 00499 mutable std::vector<T> m_data; 00500 #endif 00501 00502 00503 // for col_iterator, not tested 00504 mutable Matrix<T> *transpose_matrix; 00505 00506 template<typename Type> 00507 void item_swap(Type &t1, Type &t2); 00508 00509 00510 // External classes 00511 public: 00512 //template <AccessType accessType=ROW_WISE> 00513 class iterator; 00514 00515 class col_iterator; 00516 00517 class proxy_elem; 00518 00519 00520 00521 00522 public: //-- Operators --// 00523 00524 void resize(size_type _rows, size_type _cols, T val = T()); 00525 00526 Matrix<T>& operator=(Matrix<T>& other); 00527 Matrix<T>& operator=(const T& elem); 00528 00529 bool operator==(const Matrix<T>& c1); 00530 bool operator!=(const Matrix<T>& c1); 00531 bool operator<(const Matrix<T>& c1); 00532 bool operator>(const Matrix<T>& c1); 00533 bool operator<=(const Matrix<T>& c1); 00534 bool operator>=(const Matrix<T>& c1); 00535 00536 Matrix<T>& subsection(size_type row, size_type col, size_type rowWidth, size_type colWidth); 00537 00538 00539 public: //-- STL vector regular interface --// 00540 //Iterators 00541 iterator begin(); 00542 const_iterator begin() const; 00543 iterator begin(unsigned row); 00544 const_iterator begin(unsigned row) const; 00545 00546 iterator end(); 00547 const_iterator end() const; 00548 iterator end(unsigned row); 00549 const_iterator end(unsigned row) const; 00550 00551 //Column Iterators, taking transpose and using its iterator, work only for const_iterators 00552 // iterator col_begin(); 00553 std::pair<const_iterator, const_iterator> col_iterator_range(); 00554 col_iterator col_begin(); 00555 00556 // iterator col_end(); 00557 col_iterator col_end(); 00558 00559 00560 //Capacity 00561 size_type capacity() const; 00562 00563 00564 00565 void flush(); 00566 00567 00568 00569 bool empty() const; 00570 00571 //Element access 00572 proxy_elem at(size_type row, size_type col); 00573 const T& at(size_type row, size_type col) const; 00574 00575 size_type row_back(size_type row); 00576 const T& row_back(size_type row) const; 00577 00578 size_type row_front(size_type row); 00579 const T& row_front(size_type row) const; 00580 00581 proxy_elem col_back(size_type col); 00582 const T& col_back(size_type col) const; 00583 00584 proxy_elem col_front(size_type col); 00585 const T& col_front(size_type col) const; 00586 00587 void clear(); 00588 00589 iterator erase( iterator loc ); 00590 iterator erase( iterator start, iterator end ); 00591 00592 void swap(Matrix<T>& from); 00593 00594 public: //-- Additions to interface --// 00595 00596 // Does care about device data 00597 // proxy_elem operator()(const size_type row, const size_type col) const; 00598 00599 // Does care about device data 00600 const T& operator()(const size_type row, const size_type col) const; 00601 00602 T& operator()(const size_type row, const size_type col); 00603 00604 // Does not care about device data, use with care 00605 T& operator()(const size_type index); 00606 00607 // Does not care about device data, use with care 00608 T& operator[](const size_type index); 00609 00610 // unary transpose operator 00611 inline Matrix<T> operator~() 00612 { 00613 Matrix<T> temp(total_cols(),total_rows()); 00614 for (size_t i=0; i < total_rows(); i++) 00615 for (size_t j=0; j < total_cols(); j++) 00616 { 00617 T x = (*this)(i,j); 00618 temp(j,i) = x; 00619 } 00620 return temp; 00621 } 00622 00623 Matrix<T>& get_transpose_matrix(bool updated = false) 00624 { 00625 if(transpose_matrix==0 || updated) 00626 { 00627 transpose_matrix = new Matrix<T>(total_cols(),total_rows()); 00628 for (size_t i=0; i < total_rows(); i++) 00629 for (size_t j=0; j < total_cols(); j++) 00630 { 00631 T x = (*this)(i,j); 00632 (*transpose_matrix)(j,i) = x; 00633 } 00634 } 00635 return (*transpose_matrix); 00636 } 00637 00638 00639 00640 // To be able to explicitly force updates without flushing entire matrix. 00641 // Could be used with operator () above to avoid unneccesary function calls 00642 // due to implicit synch. 00643 00644 void acquireRead() const; 00645 void acquireReadWrite(); 00646 00647 // void updateHost() const; 00648 // void invalidateDevice(); 00649 // void updateHostAndInvalidateDevice(); 00650 00651 00652 const Matrix<T>& operator+=(const Matrix<T>& rhs); 00653 const Matrix<T>& operator+=(const T& rhs); 00654 00655 const Matrix<T>& operator-=(const Matrix<T>& rhs); 00656 const Matrix<T>& operator-=(const T& rhs); 00657 00658 const Matrix<T>& operator*=(const Matrix<T>& rhs); 00659 const Matrix<T>& operator*=(const T& rhs); 00660 00661 const Matrix<T>& operator/=(const Matrix<T>& rhs); 00662 const Matrix<T>& operator/=(const T& rhs); 00663 00664 const Matrix<T>& operator%=(const Matrix<T>& rhs); 00665 const Matrix<T>& operator%=(const T& rhs); 00666 }; 00667 00668 } 00669 00670 #include "src/matrix_iterator.inl" 00671 00672 #include "src/matrix_col_iterator.inl" 00673 00674 #include "src/matrix_proxy.inl" 00675 #include "src/matrix.inl" 00676 00677 #endif 00678 00679
1.7.4