A matrix container class (2D matrix), internally uses 1D container (std::vector) to store elements in a contiguous memory allocations. More...

#include <matrix.h>

Classes
class	iterator
	An matrix iterator class that tranverses row-wise. More...

class	proxy_elem
	A proxy class representing one element of Matrix. More...

Public Member Functions
T *	GetArrayRep ()

void	randomize (int min=0, int max=RAND_MAX)
	Randomizes the Matrix. More...

void	save (const std::string &filename)
	Saves content of Matrix to a file. More...

void	load (const std::string &filename, size_type rowWidth, size_type numRows=0)
	Loads the Matrix from a file. More...

	~Matrix ()

	Matrix (size_type _rows, size_type _cols)

	Matrix (size_type _rows, size_type _cols, const T &val)

	Matrix (const Matrix< T > &copy)

size_type	size () const

size_type	total_rows () const

size_type	total_cols () const

void	change_layout ()

void	resize (size_type _rows, size_type _cols, T val=T())

Matrix< T > &	operator= (const Matrix< T > &other)

Matrix< T > &	operator= (const T &elem)

bool	operator== (const Matrix< T > &c1)

bool	operator!= (const Matrix< T > &c1)

bool	operator< (const Matrix< T > &c1)

bool	operator> (const Matrix< T > &c1)

bool	operator<= (const Matrix< T > &c1)

bool	operator>= (const Matrix< T > &c1)

Matrix< T > &	subsection (size_type row, size_type col, size_type rowWidth, size_type colWidth)

iterator	begin ()

const_iterator	begin () const

iterator	begin (unsigned row)

const_iterator	begin (unsigned row) const

iterator	end ()

const_iterator	end () const

iterator	end (unsigned row)

const_iterator	end (unsigned row) const

size_type	capacity () const

void	flush ()

bool	empty () const

proxy_elem	at (size_type row, size_type col)

const T &	at (size_type row, size_type col) const

size_type	row_back (size_type row)

const T &	row_back (size_type row) const

size_type	row_front (size_type row)

const T &	row_front (size_type row) const

proxy_elem	col_back (size_type col)

const T &	col_back (size_type col) const

proxy_elem	col_front (size_type col)

const T &	col_front (size_type col) const

void	clear ()

void	swap (Matrix< T > &from)

device_pointer_type_cl	updateDevice_CL (T start, size_type rows, size_type cols, Device_CL device, bool copy)
	Update device with matrix content. More...

device_pointer_type_cl	updateDevice_CL (T start, size_type cols, Device_CL device, bool copy)
	Update device with matrix content. More...

void	flush_CL ()
	Flushes the matrix. More...

void	copyDataToAnInvalidDeviceCopy (DeviceMemPointer_CU< T > *copy, unsigned int deviceID, unsigned int streamID=0)
	Used by updateDevice_CU function to copy data to a device copy.. the device copy could be a new one (just created) or an existing one with stale (marked invalid) data it tries to copy data from copies in existing device memory, then from host memory and in the end from other device memories... it can partially copy data from different sources in the process. More...

device_pointer_type_cu	updateDevice_CU (T *start, size_type rows, size_type cols, unsigned int deviceID, unsigned int streamID, bool copy, bool writeAccess, bool usePitch, bool markOnlyLocalCopiesInvalid=false)
	Update device with matrix content. More...

device_pointer_type_cu	updateDevice_CU (T *start, size_type cols, unsigned int deviceID, bool copy, bool writeAccess, bool markOnlyLocalCopiesInvalid=false, unsigned int streamID=0)
	Update device with matrix content. More...

void	flush_CU ()
	Flushes the matrix. More...

bool	isMatrixOnDevice_CU (int deviceID)

bool	isModified_CU (unsigned int deviceID)

const T &	operator() (const size_type row, const size_type col) const

T &	operator() (const size_type row, const size_type col)

T &	operator() (const size_type index)

const T &	operator[] (const size_type index) const

T &	operator[] (const size_type index)

void	transpose_CPU ()
	A method to take Matrix transpose on CPU backend.

void	transpose_OMP ()
	A method to take Matrix transpose on OpenMP backend.

void	transpose_CU (Device_CU *device)
	A method to take Matrix transpose on CUDA backend. Always uses 1 CUDA GPU for transpose even if multiple GPUs are available.

void	transpose_CL (unsigned int deviceID)
	A method to take Matrix transpose on OpenCL backend. Always uses 1 OpenCL device for transpose even if multiple OpenCL devices are available.

void	updateHost () const

void	invalidateDeviceData ()

void	updateHostAndInvalidateDevice ()

void	releaseDeviceAllocations ()

void	updateHostAndReleaseDeviceAllocations ()

const Matrix< T > &	operator+= (const Matrix< T > &rhs)

const Matrix< T > &	operator+= (const T &rhs)

const Matrix< T > &	operator-= (const Matrix< T > &rhs)

const Matrix< T > &	operator-= (const T &rhs)

const Matrix< T > &	operator*= (const Matrix< T > &rhs)

const Matrix< T > &	operator*= (const T &rhs)

const Matrix< T > &	operator/= (const Matrix< T > &rhs)

const Matrix< T > &	operator/= (const T &rhs)

const Matrix< T > &	operator%= (const Matrix< T > &rhs)

const Matrix< T > &	operator%= (const T &rhs)

Friends
std::ostream &	operator<< (std::ostream &os, Matrix< T > &matrix)
	Overloaded stream operator, for testing purposes. More...

Detailed Description

template<typename T>
class skepu::Matrix< T >

A matrix container class (2D matrix), internally uses 1D container (std::vector) to store elements in a contiguous memory allocations.

A skepu::Matrix is a 2D container that internally stores in a 1D std::vector to store elements in a contiguous memory allocations. Its interface and behaviour is largely compatible with skepu::Vector and std::vector but with some additions and variations. Instead of the regular element, it sometimes returns a proxy element so it can distinguish between reads and writes. It also keeps track of which parts of it are currently allocated and uploaded to the GPU. If a computation is done, changing the matrix in the GPU memory, it is not directly transferred back to the host memory. Instead, the Matrix waits until an element is accessed before any copying is done.

It also implements support for allocating and de-allocating page-locked memory using cudaMallocHost and cudaFreeHost. This could help is running asynchronous operations especially when using multiple CUDA devices. It can be enabled by defining USE_PINNED_MEMORY flag in the skeleton program.

Constructor & Destructor Documentation

template<typename T>

skepu::Matrix< T >::~Matrix ( )

inline

Destructor, used to deallocate memory mainly, device memory.

template<typename T>

skepu::Matrix< T >::Matrix	(	size_type	_rows,
		size_type	_cols
	)

inline

Constructor, used to allocate memory ($_rows * _cols$).

Parameters

_rows	Number of rows in the matrix.
_cols	Number of columns in the matrix.

References skepu::Environment< T >::getInstance().

Here is the call graph for this function:

template<typename T>

skepu::Matrix< T >::Matrix	(	size_type	_rows,
		size_type	_cols,
		const T &	val
	)

inline

Constructor, used to allocate memory ($_rows * _cols$). With a value ot initialize all elements.

Parameters

_rows	Number of rows in the matrix.
_cols	Number of columns in the matrix.
val	A value to initialize all elements.

References skepu::Environment< T >::getInstance().

Here is the call graph for this function:

template<typename T>

skepu::Matrix< T >::Matrix ( const Matrix< T > & copy )

inline

Copy Constructor, used to assign copy of another matrix.

Parameters

copy	Matrix that is being assigned.

Update the matrix before assigning it to assign latest copy.

Member Function Documentation

template<typename T >

Matrix< T >::proxy_elem skepu::Matrix< T >::at	(	size_type	row,
		size_type	col
	)

Please refer to the documentation of std::vector.

Returns a proxy_elem instead of an ordinary element. The proxy_elem usually behaves like an ordinary, but there might be exceptions.

Referenced by skepu::MapOverlap2D< MapOverlap2DFunc >::CL(), skepu::Generate< GenerateFunc >::CL(), and skepu::MapArray< MapArrayFunc >::CL().

template<typename T >

const T & skepu::Matrix< T >::at	(	size_type	row,
		size_type	col
	)		const

Please refer to the documentation of std::vector. Uses row and col instead of single index.

Parameters

row	Index of row to get.
col	Index of column to get.

Returns: a const reference to T element at position identified by row,column index.

template<typename T >

Matrix< T >::iterator skepu::Matrix< T >::begin ( )

Please refer to the documentation of std::vector and skepu::Matrix::iterator.

Referenced by skepu::MapOverlap< MapOverlapFunc >::CL(), skepu::MapOverlap< MapOverlapFunc >::CPU(), skepu::MapOverlap< MapOverlapFunc >::CU(), and skepu::MapOverlap< MapOverlapFunc >::OMP().

template<typename T >

Matrix< T >::const_iterator skepu::Matrix< T >::begin ( ) const

Please refer to the documentation of std::vector and skepu::Matrix::iterator.

template<typename T >

Matrix< T >::iterator skepu::Matrix< T >::begin ( unsigned row )

Please refer to the documentation of std::vector and skepu::Matrix::iterator. Uses row to get an iterator for that row.

Parameters

row	The index of row from where to start iterator.

template<typename T >

Matrix< T >::const_iterator skepu::Matrix< T >::begin ( unsigned row ) const

Please refer to the documentation of std::vector and skepu::Matrix::iterator. Uses row to get an iterator for that row.

Parameters

row	The index of row from where to start iterator.

template<typename T >

Matrix< T >::size_type skepu::Matrix< T >::capacity ( ) const

Please refer to the documentation of std::vector.

template<typename T>

void skepu::Matrix< T >::change_layout ( )

inline

A small utility to change rows and columns numbers with each other. A Matrix (4x7) will become (7x4) after this function call without changing the actual values. Not similar to transpose where you actually change the values.

template<typename T >

void skepu::Matrix< T >::clear ( )

Please refer to the documentation of std::vector. Invalidates all copies before clear.

Referenced by skepu::MapOverlap< MapOverlapFunc >::CL(), skepu::MapOverlap< MapOverlapFunc >::CPU(), skepu::MapOverlap< MapOverlapFunc >::CU(), skepu::Matrix< T >::load(), and skepu::MapOverlap< MapOverlapFunc >::OMP().

template<typename T >

Matrix< T >::proxy_elem skepu::Matrix< T >::col_back ( size_type col )

Returns proxy of last element in column.

Returns a proxy_elem instead of an ordinary element. The proxy_elem usually behaves like an ordinary, but there might be exceptions. col Index of the column.

template<typename T >

const T & skepu::Matrix< T >::col_back ( size_type col ) const

Returns last element in column.

col Index of the column.

template<typename T >

Matrix< T >::proxy_elem skepu::Matrix< T >::col_front ( size_type col )

Returns proxy of first element in column.

Returns a proxy_elem instead of an ordinary element. The proxy_elem usually behaves like an ordinary, but there might be exceptions. col Index of the column.

template<typename T >

const T & skepu::Matrix< T >::col_front ( size_type col ) const

Returns last element in column.

col Index of the column.

template<typename T >

void skepu::Matrix< T >::copyDataToAnInvalidDeviceCopy	(	DeviceMemPointer_CU< T > *	copy,
		unsigned int	deviceID,
		unsigned int	streamID = `0`
	)

Used by updateDevice_CU function to copy data to a device copy.. the device copy could be a new one (just created) or an existing one with stale (marked invalid) data it tries to copy data from copies in existing device memory, then from host memory and in the end from other device memories... it can partially copy data from different sources in the process.

Parameters

copy	it is the actual copy that the data is written to...
deviceID	id of the device where this copy belongs...

first check for copies within same device that is overlapping and valid... yes, there could be >1 copies, e.g., 2 overlapping "valid" copies if none of them is modified... 2 non-overlapping "valid" copies if atleast one of them is written...

sizeUpdStr is passed by referece.. will be updated inside called function

if "src" copy has modified contents then copy those contents to current "dst" copy but keep modified flag set for "src" copy now, if u read "dst" copy then no problem, "src" copy has "modified" flag, dst has no such flag if host or other copies need contents, they get from "src" but if u write "dst" copy then also no problem as later code in this function will mark "src" as invalid copy as "dst" has latest modified contents. now if host or other need to copy data they can copy from "dst" keep that

At one point in time, there could be at >one valid copy per each device for a container

if still there exist some parts (ranges) in copy that cannot be found in valid copies present in current device memory...

if main copy (Host) is valid then copy from there as copying from other device memories' valid copies wont be much faster than HTD?

sizeUpdStr is passed by referece.. will be updated inside called function

unfortunately, main copy is invalid so need to look for copies in other device memories...

if peer acces enabled for all of them then satt Bismillah, i.e. can transfer directly from other GPUs copies...

Copies from valid overlapping copies in other device mmeories

sizeUpdStr is passed by referece.. will be updated inside called function

it is posible that some parts are not copied yet (not present in device memories) so can copy them from host

sizeUpdStr is passed by referece.. will be updated inside called function

if peer access is not enabled then copy all overlapping "modified" copies from other device memories to host main copy and then copy it from there it does not guarantee that the main copy is valid as there might be some nonoverlapping copies in current or other devices that are modified and not written back to main copy but atleast it ensures that its safe to copy overlapping parts

Copies all overlapping copies from other devices back to host and mark them as invalid

first copy it back to host... internally set modified flag to false

remove this copy now from list of copies to be updated back to host

stupid condition as copy is not updated inside this loop, m_numOfRanges is not modified here

sizeUpdStr is passed by referece.. will be updated inside called function

now do actual Copy from all possible sources, HTD and DTD from within same and from other devices... internally sets the m_valid flag for this copy

reset ranges to default range which is total copy

References skepu::DeviceMemPointer_CU< T >::copiesOverlapInf(), skepu::DeviceMemPointer_CU< T >::copyAllRangesToDevice(), skepu::DeviceMemPointer_CU< T >::copyInfFromHostToDevice(), skepu::DeviceMemPointer_CU< T >::deviceDataHasChanged(), skepu::Environment< T >::getInstance(), skepu::DeviceMemPointer_CU< T >::isCopyValid(), MAX_COPYINF_SIZE, MAX_GPU_DEVICES, and skepu::DeviceMemPointer_CU< T >::resetRanges().

Here is the call graph for this function:

template<typename T >

bool skepu::Matrix< T >::empty ( ) const

Please refer to the documentation of std::vector.

template<typename T >

Matrix< T >::iterator skepu::Matrix< T >::end ( )

Please refer to the documentation of std::vector and skepu::Matrix::iterator.

template<typename T >

Matrix< T >::const_iterator skepu::Matrix< T >::end ( ) const

Please refer to the documentation of std::vector and skepu::Matrix::iterator.

template<typename T >

Matrix< T >::iterator skepu::Matrix< T >::end ( unsigned row )

Please refer to the documentation of std::vector and skepu::Matrix::iterator. Get iterator to last element of row.

Parameters

row	Index of row the iterator will point to the last element.

template<typename T >

Matrix< T >::const_iterator skepu::Matrix< T >::end ( unsigned row ) const

Please refer to the documentation of std::vector and skepu::Matrix::iterator. Get iterator to last element of row.

Parameters

row	Index of row the iterator will point to the last element.

template<typename T >

void skepu::Matrix< T >::flush ( )

Flushes the matrix, synchronizing it with the device then release all device allocations.

template<typename T >

void skepu::Matrix< T >::flush_CL ( )

Flushes the matrix.

First it updates the matrix from all its device allocations, then it releases all allocations.

template<typename T >

void skepu::Matrix< T >::flush_CU ( )

Flushes the matrix.

First it updates the matrix from all its device allocations, then it releases all allocations.

template<typename T>

T* skepu::Matrix< T >::GetArrayRep ( )

inline

Get array representation

template<typename T >

void skepu::Matrix< T >::invalidateDeviceData ( )

inline

Invalidates (mark copies data invalid) all device data that this matrix has allocated.

this flag is used to track whether contents in main matrix are changed so that the contents of the transpose matrix that was taken earlier need to be updated again... normally invalidation occurs when contents are changed so good place to update this flag (?)

Referenced by skepu::Matrix< T >::load(), and skepu::Matrix< T >::randomize().

template<typename T >

bool skepu::Matrix< T >::isMatrixOnDevice_CU ( int deviceID )

Can be used to query whether matrix is already available on a device or not.

template<typename T >

bool skepu::Matrix< T >::isModified_CU ( unsigned int deviceID )

Can be used to query whether vector is modified on a device or not.

template<typename T>

void skepu::Matrix< T >::load	(	const std::string &	filename,
		size_type	rowWidth,
		size_type	numRows = `0`
	)

inline

Loads the Matrix from a file.

Reads a variable number of elements from a file. In the file, all elemets should be in ASCII on one line with whitespace between each element. Mainly for testing purposes.

Parameters

filename	Name of file to save to.
rowWidth	The width of a row. All rows get same amount of width.
numRows	The number of rows to be loaded. Default value 0 means all rows.

References skepu::Matrix< T >::clear(), skepu::Matrix< T >::invalidateDeviceData(), and skepu::Matrix< T >::size().

Here is the call graph for this function:

template<typename T >

bool skepu::Matrix< T >::operator!= ( const Matrix< T > & c1 )

Please refer to the documentation of std::vector.

template<typename T >

const Matrix< T > & skepu::Matrix< T >::operator%= ( const Matrix< T > & rhs )

Taking Mod with rhs matrix, element wise to current matrix. Two matrices must be of same size.

Parameters

rhs	The value which is used in taking mod to current matrix.

template<typename T >

const Matrix< T > & skepu::Matrix< T >::operator%= ( const T & rhs )

Taking Mod with a scalar value to all elements in the current matrix.

Parameters

rhs	The value which is used in taking mod to current matrix.

template<typename T >

const T & skepu::Matrix< T >::operator()	(	const size_type	row,
		const size_type	col
	)		const

Behaves like operator[] and unlike skepu::Vector, it cares about synchronizing with device. Can be used when accessing to access elements row and column wise.

Parameters

row	Index to a specific row of the Matrix.
col	Index to a specific column of the Matrix.

template<typename T >

T & skepu::Matrix< T >::operator()	(	const size_type	row,
		const size_type	col
	)

Behaves like operator[] and unlike skepu::Vector, it cares about synchronizing with device. Can be used when accessing to access elements row and column wise.

Parameters

row	Index to a specific row of the Matrix.
col	Index to a specific column of the Matrix.

template<typename T >

T & skepu::Matrix< T >::operator() ( const size_type index )

Behaves like operator[] but does not care about synchronizing with device. Can be used when accessing many elements quickly so that no synchronization overhead effects performance. Make sure to properly synch with device by calling updateHost etc before use.

Parameters

index Index of element assuming continuous Matrix row-wise storage. To facilitate access using single indexing

template<typename T >

const Matrix< T > & skepu::Matrix< T >::operator*= ( const Matrix< T > & rhs )

Multiplies rhs matrix operation element wise to current matrix. Two matrices must be of same size. NB it is not matrix multiplication

Parameters

rhs	The matrix which is used in multiplication to current matrix.

template<typename T >

const Matrix< T > & skepu::Matrix< T >::operator*= ( const T & rhs )

Multiplies a scalar value to all elements in the current matrix.

Parameters

rhs	The value which is used in multiplication to current matrix.

template<typename T >

const Matrix< T > & skepu::Matrix< T >::operator+= ( const Matrix< T > & rhs )

Add rhs matrix operation element wise to current matrix. Two matrices must be of same size.

Parameters

rhs	The matrix which is used in addition to current matrix.

template<typename T >

const Matrix< T > & skepu::Matrix< T >::operator+= ( const T & rhs )

Adds a scalar value to all elements in the current matrix.

Parameters

rhs	The value which is used in addition to current matrix.

template<typename T >

const Matrix< T > & skepu::Matrix< T >::operator-= ( const Matrix< T > & rhs )

Subtract rhs matrix operation element wise to current matrix. Two matrices must be of same size.

Parameters

rhs	The matrix which is used in subtraction to current matrix.

template<typename T >

const Matrix< T > & skepu::Matrix< T >::operator-= ( const T & rhs )

Subtracts a scalar value to all elements in the current matrix.

Parameters

rhs	The value which is used in subtraction to current matrix.

template<typename T >

const Matrix< T > & skepu::Matrix< T >::operator/= ( const Matrix< T > & rhs )

Divides rhs matrix operation element wise to current matrix. Two matrices must be of same size. NB it is not matrix multiplication

Parameters

rhs	The matrix which is used in division to current matrix.

template<typename T >

const Matrix< T > & skepu::Matrix< T >::operator/= ( const T & rhs )

Divides a scalar value to all elements in the current matrix.

Parameters

rhs	The value which is used in division to current matrix.

template<typename T >

bool skepu::Matrix< T >::operator< ( const Matrix< T > & c1 )

Please refer to the documentation of std::vector.

template<typename T >

bool skepu::Matrix< T >::operator<= ( const Matrix< T > & c1 )

Please refer to the documentation of std::vector.

template<typename T >

Matrix< T > & skepu::Matrix< T >::operator= ( const Matrix< T > & other )

copy matrix,,, copy row and column count as well along with data

template<typename T >

Matrix< T > & skepu::Matrix< T >::operator= ( const T & elem )

To initialize a matrix with soem scalar value.

Parameters

elem	The element you want to assign to all matrix.

template<typename T >

bool skepu::Matrix< T >::operator== ( const Matrix< T > & c1 )

Please refer to the documentation of std::vector.

template<typename T >

bool skepu::Matrix< T >::operator> ( const Matrix< T > & c1 )

Please refer to the documentation of std::vector.

template<typename T >

bool skepu::Matrix< T >::operator>= ( const Matrix< T > & c1 )

Please refer to the documentation of std::vector.

template<typename T >

const T & skepu::Matrix< T >::operator[] ( const size_type index ) const

A operator[] that care about synchronizing with device. Can be used when accessing elements considering consecutive storage

Parameters

index Index of element assuming continuous Matrix row-wise storage. To facilitate access using single indexing

template<typename T >

T & skepu::Matrix< T >::operator[] ( const size_type index )

A operator[] that care about synchronizing with device. Can be used when accessing elements considering consecutive storage

Parameters

index Index of element assuming continuous Matrix row-wise storage. To facilitate access using single indexing

template<typename T>

void skepu::Matrix< T >::randomize	(	int	min = `0`,
		int	max = `RAND_MAX`
	)

inline

Randomizes the Matrix.

Sets each element of the Matrix to a random number between min and max. The numbers are generated as integers but are cast to the type of the matrix.

Parameters

min	The smallest number an element can become.
max	The largest number an element can become.

References skepu::Matrix< T >::invalidateDeviceData(), skepu::max(), skepu::min(), and skepu::Matrix< T >::size().

Here is the call graph for this function:

template<typename T >

void skepu::Matrix< T >::releaseDeviceAllocations ( )

inline

Removes the data copies allocated on devices.

template<typename T >

void skepu::Matrix< T >::resize	(	size_type	_rows,
		size_type	_cols,
		T	val = `T()`
	)

resize matrix,,, invalidates all copies before resizing.

References skepu::min().

Referenced by skepu::MapOverlap< MapOverlapFunc >::CL(), skepu::MapOverlap< MapOverlapFunc >::CPU(), skepu::MapOverlap< MapOverlapFunc >::CU(), and skepu::MapOverlap< MapOverlapFunc >::OMP().

Here is the call graph for this function:

template<typename T >

Matrix< T >::size_type skepu::Matrix< T >::row_back ( size_type row )

Return index of last element of row.

Parameters

row	Index of the row.

template<typename T >

const T & skepu::Matrix< T >::row_back ( size_type row ) const

Return last element of row.

Parameters

row	Index of the row.

template<typename T >

Matrix< T >::size_type skepu::Matrix< T >::row_front ( size_type row )

Return index of first element of row in 1D container.

Parameters

row	Index of the row.

template<typename T >

const T & skepu::Matrix< T >::row_front ( size_type row ) const

Return first element of row.

Parameters

row	Index of the row.

template<typename T>

void skepu::Matrix< T >::save ( const std::string & filename )

inline

Saves content of Matrix to a file.

Outputs the matrix as text on one line with space between elements to the specified file. Mainly for testing purposes.

Parameters

filename Name of file to save to.

References skepu::Matrix< T >::updateHost().

Here is the call graph for this function:

template<typename T>

size_type skepu::Matrix< T >::size ( ) const

inline

Returns total size of Matrix.

Returns: size of the Matrix.

Referenced by skepu::Reduce< ReduceFunc, ReduceFunc >::CL(), skepu::Matrix< T >::load(), and skepu::Matrix< T >::randomize().

template<typename T >

Matrix< T > & skepu::Matrix< T >::subsection	(	size_type	row,
		size_type	col,
		size_type	rowWidth,
		size_type	colWidth
	)

To get a subsection of matrix. This will creat a separate copy.

Parameters

row	Index of row to get.
rowWidth	Width of the row of new Matrix.
col	Index of column to get.
colWidth	Width of column of new Matrix.

template<typename T >

void skepu::Matrix< T >::swap ( Matrix< T > & from )

Please refer to the documentation of std::vector. Updates and invalidate both Matrices before swapping.

template<typename T>

size_type skepu::Matrix< T >::total_cols ( ) const

inline

Returns total number of columns in the Matrix.

Returns: columns in the Matrix.

template<typename T>

size_type skepu::Matrix< T >::total_rows ( ) const

inline

Returns total number of rows in the Matrix.

Returns: rows in the Matrix.

template<typename T >

Matrix< T >::device_pointer_type_cl skepu::Matrix< T >::updateDevice_CL	(	T *	start,
		size_type	rows,
		size_type	cols,
		Device_CL *	device,
		bool	copy
	)

Update device with matrix content.

Update device with a Matrix range by specifying rowsize and column size. This allows to create rowwise paritions. If Matrix does not have an allocation on the device for the current range, create a new allocation and if specified, also copy Matrix data to device. Saves newly allocated ranges to m_deviceMemPointers_CL so matrix can keep track of where and what it has stored on devices.

Parameters

start	Pointer to first element in range to be updated with device.
rows	Number of rows.
cols	Number of columns.
device	Pointer to the device that should be synched with.
copy	Boolean value that tells whether to only allocate or also copy matrix data to device. True copies, False only allocates.

References skepu::DeviceMemPointer_CL< T >::copyHostToDevice(), and skepu::Device_CL::getDeviceID().

Referenced by skepu::Matrix< T >::transpose_CL().

Here is the call graph for this function:

template<typename T >

Matrix< T >::device_pointer_type_cl skepu::Matrix< T >::updateDevice_CL	(	T *	start,
		size_type	cols,
		Device_CL *	device,
		bool	copy
	)

Update device with matrix content.

Update device with a Matrix range by specifying rowsize only as number of rows is assumed to be 1 in this case. Helper function, useful for scenarios where matrix need to be treated like Vector 1D. If Matrix does not have an allocation on the device for the current range, create a new allocation and if specified, also copy Matrix data to device. Saves newly allocated ranges to m_deviceMemPointers_CL so matrix can keep track of where and what it has stored on devices.

Parameters

start	Pointer to first element in range to be updated with device.
cols	Number of columns.
device	Pointer to the device that should be synched with.
copy	Boolean value that tells whether to only allocate or also copy matrix data to device. True copies, False only allocates.

template<typename T >

Matrix< T >::device_pointer_type_cu skepu::Matrix< T >::updateDevice_CU	(	T *	start,
		size_type	rows,
		size_type	cols,
		unsigned int	deviceID,
		unsigned int	streamID,
		bool	copy,
		bool	writeAccess,
		bool	usePitch,
		bool	markOnlyLocalCopiesInvalid = `false`
	)

Update device with matrix content.

Update device with a Matrix range by specifying rowsize and column size. This allows to create rowwise paritions. If Matrix does not have an allocation on the device for the current range, create a new allocation and if specified, also copy Matrix data to device. Saves newly allocated ranges to m_deviceMemPointers_CU so matrix can keep track of where and what it has stored on devices.

Parameters

start	Pointer to first element in range to be updated with device.
rows	Number of rows.
cols	Number of columns.
deviceID	Integer specififying the device that should be synched with.
streamID	specifies which cuda stream to use
copy	Boolean value that tells whether to only allocate or also copy matrix data to device. True copies, False only allocates.
writeAccess	specifies whether this copy is going to be read or written...
usePitch	To allow 2D CUDA memory allocation which can result in padding inserted to make coalescing work.
markOnlyLocalCopiesInvalid	This is for optimizations in multi-GPU execution, passed to true to only mark parent and local copies within that device memory as invalid...

m_noValidDeviceCopy is an optimization flag which is true when there is no valid device copy... used to skip invalidDeviceCOpy function call just like updateHost() is only called when m_valid is not set

TODO: BEFORE returning, MARK all other copies as invalid if you are writing this copy and they are overlapping with this copy

add this copy to modified list... This list keeps track of copies that have modified data which is not written back.. so far

First, mark parent copy invalid...

this is possible considering gpu-gpu transfers and in some other cases e.g. map(v1 RW); ... map2(..., v1 Written);

if not fully overlapped then need to transfer as some data should be written back to device memory if fully overlapped then no need to update it as it is overwritten in current copy...

should delete this copy from this list as it needs not to be updated back...

mark copy invalid

TODO: Mark all overlapping copies from all devices as invalid

this is possible considering gpu-gpu transfers and in some other cases e.g. map(v1 RW); ... map2(..., v1 Written);

if not fully overlapped then need to transfer as some data should be written back to device memory if fully overlapped then no need to update it as it is overwritten in current copy...

should delete this copy from this list as it needs not to be updated back...

mark copy invalid

TODO: Update main copy valid flag... set it to "true", i.e., valid, if there exist no modified device copy

Mark all overlapping copies from all devices as invalid

References skepu::DeviceMemPointer_CU< T >::copyHostToDevice(), skepu::DeviceMemPointer_CU< T >::doCopiesOverlap(), skepu::Environment< T >::getInstance(), skepu::DeviceMemPointer_CU< T >::isCopyValid(), and MAX_GPU_DEVICES.

Referenced by skepu::Matrix< T >::transpose_CU().

Here is the call graph for this function:

template<typename T >

Matrix< T >::device_pointer_type_cu skepu::Matrix< T >::updateDevice_CU	(	T *	start,
		size_type	cols,
		unsigned int	deviceID,
		bool	copy,
		bool	writeAccess,
		bool	markOnlyLocalCopiesInvalid = `false`,
		unsigned int	streamID = `0`
	)

Update device with matrix content.

Update device with a Matrix range by specifying rowsize only as number of rows is assumed to be 1 in this case. Helper function, useful for scenarios where matrix need to be treated like Vector 1D. If Matrix does not have an allocation on the device for the current range, create a new allocation and if specified, also copy Matrix data to device. Saves newly allocated ranges to m_deviceMemPointers_CU so matrix can keep track of where and what it has stored on devices.

Parameters

start	Pointer to first element in range to be updated with device.
cols	Number of columns.
deviceID	Integer specififying the device that should be synched with.
streamID	specifies which CUDA stream to use
copy	Boolean value that tells whether to only allocate or also copy matrix data to device. True copies, False only allocates.
writeAccess	specifies whether this copy is going to be read or written...
markOnlyLocalCopiesInvalid	This is for optimizations in multi-GPU execution, passed to true to only mark parent and local copies within that device memory as invalid...

template<typename T >

void skepu::Matrix< T >::updateHost ( ) const

inline

Updates the matrix from its device allocations.

the m_valid logic is only implemented for CUDA backend. The OpenCL still uses the old memory management mechanism

Referenced by skepu::Matrix< T >::save().

template<typename T >

void skepu::Matrix< T >::updateHostAndInvalidateDevice ( )

inline

First updates the matrix from its device allocations. Then invalidates (mark copies data invalid) the data allocated on devices.

template<typename T >

void skepu::Matrix< T >::updateHostAndReleaseDeviceAllocations ( )

inline

First updates the matrix from its device allocations. Then removes the data copies allocated on devices.

Friends And Related Function Documentation

template<typename T>

std::ostream& operator<<	(	std::ostream &	os,
		Matrix< T > &	matrix
	)

friend

Overloaded stream operator, for testing purposes.

Outputs the matrix rowwise having one row on each line.

The documentation for this class was generated from the following files:

include/skepu/matrix.h
include/skepu/src/matrix.inl
include/skepu/src/matrix_cl.inl
include/skepu/src/matrix_cu.inl
include/skepu/src/matrix_transpose.inl

Classes

Public Member Functions

Friends

Detailed Description

template<typename T> class skepu::Matrix< T >

Constructor & Destructor Documentation

Member Function Documentation

Friends And Related Function Documentation

template<typename T>
class skepu::Matrix< T >