SkePU  1.2
 All Classes Namespaces Files Functions Variables Enumerations Friends Macros Groups Pages
reduce_common.h
Go to the documentation of this file.
1 
5 #ifndef REDUCE_COMMON_HELPERS_H
6 #define REDUCE_COMMON_HELPERS_H
7 
8 
9 namespace skepu
10 {
11 
12 #ifdef SKEPU_OPENMP
13 
17 template <typename ReduceFunc, typename T>
18 void ompRegularWorkload(ReduceFunc *reduceFunc, SparseMatrix<T>& input, T *result_array, const unsigned int &numThreads)
19 {
20  size_t rows = input.total_rows();
21  size_t rowsPerThread=rows/numThreads;
22  size_t restRows = rows%numThreads;
23 
24  unsigned int myid;
25  size_t firstRow, lastRow;
26  T psum;
27 
28  // we divide the "N" remainder rows to first "N" threads instead of giving it to last thread to achieve better load balancing
29  #pragma omp parallel private(myid, firstRow, lastRow, psum) default(shared)
30  {
31  myid = omp_get_thread_num();
32 
33  firstRow = myid*rowsPerThread;
34 
35  if(myid!=0)
36  firstRow += (myid<restRows)? myid:restRows;
37 
38  if(myid < restRows)
39  lastRow = firstRow+rowsPerThread+1;
40  else
41  lastRow = firstRow+rowsPerThread;
42 
43  for(size_t r= firstRow; r<lastRow; ++r)
44  {
45  typename SparseMatrix<T>::iterator it = input.begin(r);
46 
47  size_t size= it.size();
48  if(size>0)
49  {
50  psum = it[0];
51 
52  for(size_t c=1; c<size; c++)
53  {
54  psum = reduceFunc->CPU(psum, it[c]);
55  }
56  }
57  else
58  psum = T();
59 
60  result_array[r] = psum;
61  }
62  }
63 }
64 
65 
66 
70 template <typename ReduceFunc, typename T>
71 void ompIrregularWorkload(ReduceFunc *reduceFunc, SparseMatrix<T>& input, T *result_array)
72 {
73  size_t rows = input.total_rows();
74 
75  T psum;
76 
77  // determine schedule at runtime, can be guided, dynamic with different chunk sizes
78  #pragma omp parallel for private(psum) default(shared) schedule(runtime)
79  for(size_t r= 0; r<rows; ++r)
80  {
81  typename SparseMatrix<T>::iterator it = input.begin(r);
82 
83  size_t size= it.size();
84  if(size>0)
85  {
86  psum = it[0];
87 
88  for(size_t c=1; c<size; c++)
89  {
90  psum = reduceFunc->CPU(psum, it[c]);
91  }
92  }
93  else
94  psum = T();
95 
96  result_array[r] = psum;
97  }
98 }
99 
100 
101 #endif
102 
103 
104 
105 
106 }
107 
108 #endif
109 
A sparse matrix container class that mainly stores its data in CSR format.
Definition: sparse_matrix.h:73
iterator begin(unsigned row)
Definition: sparse_matrix.inl:346
size_t total_rows() const
Definition: sparse_matrix.h:336
An sparse matrix iterator class that tranverses row-wise.
Definition: sparse_matrix_iterator.inl:20
void ompRegularWorkload(ReduceFunc *reduceFunc, SparseMatrix< T > &input, T *result_array, const unsigned int &numThreads)
Definition: reduce_common.h:18
void ompIrregularWorkload(ReduceFunc *reduceFunc, SparseMatrix< T > &input, T *result_array)
Definition: reduce_common.h:71