18 #include <OpenCL/opencl.h>
32 #include "skepu/src/malloc_allocator.h"
86 #ifdef USE_PINNED_MEMORY
87 typedef std::vector<T, malloc_allocator<T> > container_type;
88 typedef typename std::vector<T, malloc_allocator<T> >
::iterator vector_iterator;
89 typedef typename std::vector<T, malloc_allocator<T> >::size_type size_type;
90 typedef typename std::vector<T, malloc_allocator<T> >::value_type value_type;
91 typedef typename std::vector<T, malloc_allocator<T> >::difference_type difference_type;
92 typedef typename std::vector<T, malloc_allocator<T> >::pointer pointer;
93 typedef typename std::vector<T, malloc_allocator<T> >::reference reference;
94 typedef typename std::vector<T, malloc_allocator<T> >::const_reference const_reference;
95 typedef typename std::vector<T, malloc_allocator<T> >::const_iterator const_iterator;
96 typedef typename std::vector<T, malloc_allocator<T> >::const_reverse_iterator const_reverse_iterator;
98 typedef std::vector<T> container_type;
99 typedef typename std::vector<T>::iterator vector_iterator;
100 typedef typename std::vector<T>::size_type size_type;
101 typedef typename std::vector<T>::value_type value_type;
102 typedef typename std::vector<T>::difference_type difference_type;
103 typedef typename std::vector<T>::pointer pointer;
104 typedef typename std::vector<T>::reference reference;
105 typedef typename std::vector<T>::const_reference const_reference;
106 typedef typename std::vector<T>::const_iterator const_iterator;
107 typedef typename std::vector<T>::const_reverse_iterator const_reverse_iterator;
112 void setValidFlag(
bool val)
130 friend std::ostream& operator<<(std::ostream &os, Matrix<T>& matrix)
134 os <<
"Matrix: ("<< matrix.total_rows() <<
" X "<<matrix.total_cols()<<
")\n";
135 for(size_type i=0; i<matrix.size(); i++)
137 os<<(matrix(i))<<
" ";
138 if((i+1)%(matrix.total_cols())==0)
159 for(size_type i = 0; i <
size(); i++)
161 m_data.at(i) = (T)( rand() % (int)(
max-
min+1) +
min);
174 void save(
const std::string& filename)
178 std::ofstream file(filename.c_str());
182 for(size_type i = 0; i < m_data.size(); ++i)
184 file<<m_data.at(i) <<
" ";
190 std::cout<<
"Unable to open file\n";
204 void load(
const std::string& filename, size_type rowWidth, size_type numRows = 0)
208 std::ifstream file(filename.c_str());
214 std::istringstream ss(line);
229 for(size_type i = 0; i < (numRows*rowWidth); ++i)
237 m_rows = (
size()/rowWidth);
243 std::cout<<
"Unable to open file\n";
257 releaseDeviceAllocations_CL();
261 releaseDeviceAllocations_CU();
264 if(m_transpose_matrix)
265 delete m_transpose_matrix;
274 Matrix(size_type _rows, size_type _cols): m_rows(_rows), m_cols(_cols), m_data(m_rows * m_cols), m_dataChanged(false), m_transpose_matrix(0), m_noValidDeviceCopy(true), m_valid(true)
287 Matrix(size_type _rows, size_type _cols,
const T& val): m_rows(_rows), m_cols(_cols),m_data(m_rows * m_cols, val), m_dataChanged(false), m_transpose_matrix(0), m_noValidDeviceCopy(true), m_valid(true)
304 this->m_rows = copy.m_rows;
305 this->m_cols = copy.m_cols;
306 this->m_data= copy.m_data;
307 this->m_transpose_matrix = copy.m_transpose_matrix;
308 this->m_dataChanged = copy.m_dataChanged;
311 this->m_transposeKernels_CL = copy.m_transposeKernels_CL;
316 Matrix(): m_rows(0), m_cols(0),m_data(), m_dataChanged(false), m_transpose_matrix(0), m_noValidDeviceCopy(true), m_valid(true)
327 return m_data.size();
360 size_type tmp = m_rows;
364 if(m_transpose_matrix && m_transpose_matrix->total_rows()==m_cols && m_transpose_matrix->total_cols()==m_rows && !m_dataChanged)
365 m_transpose_matrix->change_layout();
371 std::map<std::pair< T*, size_type >, device_pointer_type_cu > m_deviceMemPointers_CU[
MAX_GPU_DEVICES];
374 mutable std::map<std::pair< T*, size_type >, device_pointer_type_cu > m_deviceMemPointers_Modified_CU[
MAX_GPU_DEVICES];
378 std::map<std::pair< cl_device_id, std::pair< T*, size_type > >, device_pointer_type_cl > m_deviceMemPointers_CL;
381 size_type m_rows, m_cols;
383 bool m_noValidDeviceCopy;
386 #ifdef USE_PINNED_MEMORY
387 mutable std::vector<T, malloc_allocator<T> > m_data;
389 mutable std::vector<T> m_data;
392 mutable bool m_valid;
397 template<
typename Type>
398 void item_swap(Type &t1, Type &t2);
409 void resize(size_type _rows, size_type _cols, T val = T());
416 bool operator<(const Matrix<T>& c1);
418 bool operator<=(const Matrix<T>& c1);
421 Matrix<T>&
subsection(size_type row, size_type col, size_type rowWidth, size_type colWidth);
424 std::string m_nameVerbose;
431 const_iterator
begin()
const;
432 iterator
begin(
unsigned row);
433 const_iterator
begin(
unsigned row)
const;
436 const_iterator
end()
const;
437 iterator
end(
unsigned row);
438 const_iterator
end(
unsigned row)
const;
447 proxy_elem
at(size_type row, size_type col);
448 const T&
at(size_type row, size_type col)
const;
451 const T&
row_back(size_type row)
const;
457 const T&
col_back(size_type col)
const;
464 iterator erase( iterator loc );
465 iterator erase( iterator start, iterator
end );
479 device_pointer_type_cu
updateDevice_CU(T* start, size_type rows, size_type cols,
unsigned int deviceID,
unsigned int streamID,
bool copy,
bool writeAccess,
bool usePitch,
bool markOnlyLocalCopiesInvalid=
false);
480 device_pointer_type_cu
updateDevice_CU(T* start, size_type cols,
unsigned int deviceID,
bool copy,
bool writeAccess,
bool markOnlyLocalCopiesInvalid=
false,
unsigned int streamID = 0);
488 const T&
operator()(
const size_type row,
const size_type col)
const;
491 T&
operator()(
const size_type row,
const size_type col);
497 const T&
operator[](
const size_type index)
const;
521 std::vector<std::pair<cl_kernel, Device_CL*> > *m_transposeKernels_CL;
527 if(m_transpose_matrix && m_transpose_matrix->m_rows==m_cols && m_transpose_matrix->m_cols==m_rows && !m_dataChanged)
528 return *m_transpose_matrix;
530 #if defined(SKEPU_CUDA)
532 #elif defined(SKEPU_OPENCL)
534 #elif defined(SKEPU_OPENMP)
540 m_dataChanged =
false;
542 return *m_transpose_matrix;
574 void updateHost_CL()
const;
575 void invalidateDeviceData_CL();
576 void releaseDeviceAllocations_CL();
580 void updateHost_CU(
int deviceID = -1)
const;
581 void invalidateDeviceData_CU(
int deviceID = -1);
582 void releaseDeviceAllocations_CU(
int deviceID = -1);
Contains the definitions of non-backend specific member functions for the Matrix container.
bool isMatrixOnDevice_CU(int deviceID)
Definition: matrix_cu.inl:147
iterator begin()
Definition: matrix.inl:381
Contains a class declaration for an object which represents an OpenCL device memory allocation for co...
Contains a class declaration for an object which represents an CUDA device memory allocation for Vect...
size_type capacity() const
Definition: matrix.inl:479
void clear()
Definition: matrix.inl:707
proxy_elem at(size_type row, size_type col)
Definition: matrix.inl:505
bool operator>(const Matrix< T > &c1)
Definition: matrix.inl:914
size_type row_front(size_type row)
Definition: matrix.inl:612
Matrix(size_type _rows, size_type _cols, const T &val)
Definition: matrix.h:287
Matrix< T > & subsection(size_type row, size_type col, size_type rowWidth, size_type colWidth)
Definition: matrix.inl:550
A matrix container class (2D matrix), internally uses 1D container (std::vector) to store elements in...
Definition: matrix.h:72
Contains the definitions of OpenCL specific member functions of the Matrix class. ...
void load(const std::string &filename, size_type rowWidth, size_type numRows=0)
Loads the Matrix from a file.
Definition: matrix.h:204
void transpose_OMP()
A method to take Matrix transpose on OpenMP backend.
Definition: matrix_transpose.inl:54
Contains the definitions of the Matrix transpose functions for CPU, CUDA, OpenMP and OpenCL...
size_type total_cols() const
Definition: matrix.h:343
#define MAX_GPU_DEVICES
Definition: globals.h:43
const Matrix< T > & operator-=(const Matrix< T > &rhs)
Definition: matrix.inl:126
bool operator!=(const Matrix< T > &c1)
Definition: matrix.inl:887
void transpose_CPU()
A method to take Matrix transpose on CPU backend.
Definition: matrix_transpose.inl:16
An matrix iterator class that tranverses row-wise.
Definition: matrix_iterator.inl:20
void transpose_CU(Device_CU *device)
A method to take Matrix transpose on CUDA backend. Always uses 1 CUDA GPU for transpose even if multi...
Definition: matrix_transpose.inl:169
void save(const std::string &filename)
Saves content of Matrix to a file.
Definition: matrix.h:174
T * GetArrayRep()
Definition: matrix.h:120
A class representing an OpenCL device memory allocation for container.
Definition: device_mem_pointer_cl.h:38
const Matrix< T > & operator%=(const Matrix< T > &rhs)
Definition: matrix.inl:242
Matrix< T > & operator=(const Matrix< T > &other)
Definition: matrix.inl:33
T min(T a, T b)
Definition: mapoverlap_convol_kernels.h:212
const T & operator[](const size_type index) const
Definition: matrix.inl:836
void copyDataToAnInvalidDeviceCopy(DeviceMemPointer_CU< T > *copy, unsigned int deviceID, unsigned int streamID=0)
Used by updateDevice_CU function to copy data to a device copy.. the device copy could be a new one (...
Definition: matrix_cu.inl:20
size_type row_back(size_type row)
Definition: matrix.inl:579
bool empty() const
Definition: matrix.inl:490
void flush_CU()
Flushes the matrix.
Definition: matrix_cu.inl:388
proxy_elem col_back(size_type col)
Definition: matrix.inl:644
iterator end()
Definition: matrix.inl:430
void transpose_CL(unsigned int deviceID)
A method to take Matrix transpose on OpenCL backend. Always uses 1 OpenCL device for transpose even i...
Definition: matrix_transpose.inl:225
const Matrix< T > & operator/=(const Matrix< T > &rhs)
Definition: matrix.inl:203
void flush_CL()
Flushes the matrix.
Definition: matrix_cl.inl:92
T max(T a, T b)
Definition: mapoverlap_convol_kernels.h:203
size_type size() const
Definition: matrix.h:325
Contains the definitions of CUDA specific member functions of the Matrix class.
void invalidateDeviceData()
Definition: matrix.inl:308
const Matrix< T > & operator+=(const Matrix< T > &rhs)
Definition: matrix.inl:87
AccessType
Can be used to specify whether the access is row-wise or column-wise.
Definition: matrix.h:45
void updateHostAndInvalidateDevice()
Definition: matrix.inl:336
void updateHostAndReleaseDeviceAllocations()
Definition: matrix.inl:363
void flush()
Definition: matrix.inl:770
void resize(size_type _rows, size_type _cols, T val=T())
Definition: matrix.inl:52
A class representing a CUDA device memory allocation for container.
Definition: device_mem_pointer_cu.h:58
A class representing a execution environment.
Definition: environment.h:80
void releaseDeviceAllocations()
Definition: matrix.inl:346
void updateHost() const
Definition: matrix.inl:287
static Environment * getInstance()
Definition: environment.inl:90
Contains a class declaration for Environment class.
Matrix(size_type _rows, size_type _cols)
Definition: matrix.h:274
const Matrix< T > & operator*=(const Matrix< T > &rhs)
Definition: matrix.inl:165
device_pointer_type_cu updateDevice_CU(T *start, size_type rows, size_type cols, unsigned int deviceID, unsigned int streamID, bool copy, bool writeAccess, bool usePitch, bool markOnlyLocalCopiesInvalid=false)
Update device with matrix content.
Definition: matrix_cu.inl:176
proxy_elem col_front(size_type col)
Definition: matrix.inl:679
A class representing a CUDA device.
Definition: device_cu.h:30
A class representing an OpenCL device.
Definition: device_cl.h:36
const T & operator()(const size_type row, const size_type col) const
Definition: matrix.inl:790
Contains the definitions of Matrix::iterator class.
device_pointer_type_cl updateDevice_CL(T *start, size_type rows, size_type cols, Device_CL *device, bool copy)
Update device with matrix content.
Definition: matrix_cl.inl:27
void change_layout()
Definition: matrix.h:358
A class representing the column-wise iterator for the Matrix data-type.
bool operator>=(const Matrix< T > &c1)
Definition: matrix.inl:941
void randomize(int min=0, int max=RAND_MAX)
Randomizes the Matrix.
Definition: matrix.h:155
size_type total_rows() const
Definition: matrix.h:334
Contains the definitions of the Matrix::proxy_elem class.
void swap(Matrix< T > &from)
Definition: matrix.inl:719
bool operator==(const Matrix< T > &c1)
Definition: matrix.inl:874
bool isModified_CU(unsigned int deviceID)
Definition: matrix_cu.inl:484
Matrix(const Matrix< T > ©)
Definition: matrix.h:301
~Matrix()
Definition: matrix.h:254