SkePU  1.2
 All Classes Namespaces Files Functions Variables Enumerations Friends Macros Groups Pages
trainer.h
1 #ifndef __TRAINER__H_
2 #define __TRAINER__H_
3 
4 
5 #include <iostream>
6 #include <cassert>
7 #include <map>
8 #include "environment.h"
9 
10 
11 #ifndef TRAINING_RUNS
12 #define TRAINING_RUNS 1
13 #endif
14 
15 namespace skepu
16 {
17 
18 
22 template<int T>
23 struct identity
24 {
25  const static int data = T;
26 };
27 
28 
32 template <unsigned int dimens>
33 struct ExecPlanNew
34 {
35 };
36 
40 template <>
41 struct ExecPlanNew<1>
42 {
43  std::map<std::pair<size_t,size_t>, ImplType> m_data;
44  bool calibrated;
45  int idx;
46 
47  ExecPlanNew<1>() : calibrated(false), idx(-1) {}
48 };
49 
53 struct Point2D
54 {
55  size_t dim1;
56  size_t dim2;
57 
58  Point2D() {}
59 
60  Point2D(const Point2D& copy)
61  {
62  dim1 = copy.dim1;
63  dim2 = copy.dim2;
64  }
65  Point2D(size_t _d1, size_t _d2): dim1(_d1), dim2(_d2)
66  { }
67 
68  bool operator < (const Point2D & p) const
69  {
70  if (dim1 == p.dim1)
71  return (dim2 < p.dim2);
72 
73  return (dim1 < p.dim1);
74  }
75 };
76 
80 template <>
81 struct ExecPlanNew<2>
82 {
83  std::map< std::pair<Point2D, Point2D>, ImplType> m_data;
84  bool isOpen;
85  std::multimap< std::pair<Point2D, Point2D>, std::pair<Point2D, ImplType> > openPoints;
86 
87  ExecPlanNew<2>() : isOpen(false), calibrated(false), idx(-1) {}
88 
89  bool calibrated;
90  int idx;
91 };
92 
93 int treeNodeId = 0;
94 
99 struct ExtraData
100 {
101  ExtraData(): actDimensions(-1), memUp(0), memDown(0)
102  {}
103  int actDimensions;
104  int *memUp;
105  int *memDown;
106 };
107 
108 
112 struct ImpDetail
113 {
114  ImpDetail(std::string _impName, ImplType _impType, void (*_impPtr)(void*)):impName(_impName), impType(_impType), impPtr(_impPtr)
115  {}
116 
117  std::string impName;
118  ImplType impType;
119 
120  void (*impPtr)(void*);
121 };
122 
126 struct TrainingData
127 {
128 public:
129  TrainingData(std::vector<size_t> &_problemSize, unsigned int _nImpls): problemSize(_problemSize), nImpls(_nImpls), extra(NULL), callBackFunction(NULL), callBackFunctionMapReduce(NULL)
130  {
131  assert(nImpls > 0 && nImpls <= MAX_EXEC_PLANS);
132  for(size_t i=0; i<nImpls; ++i)
133  {
134  exec_time[i] = -1;
135  }
136  }
137  unsigned int dimens;
138 
145 // int localityIdx;
146 
147  unsigned int nImpls;
148 
149  ExtraData *extra;
150  std::vector<size_t> &problemSize;
151  double exec_time[MAX_EXEC_PLANS];
152  void (*callBackFunction)(void*, size_t*, unsigned int);
153  void (*callBackFunctionMapReduce)(void*, void*, size_t*, unsigned int);
154 };
155 
156 
166 struct Point
167 {
168 public:
169  Point(std::vector<size_t> _problemSize, unsigned int _nImpls) : problemSize(_problemSize), nImpls(_nImpls)
170  {
171  assert(nImpls > 0 && nImpls <= MAX_EXEC_PLANS);
172  for(unsigned int i=0; i<nImpls; ++i)
173  {
174  cpuImpTime[i].second = -1;
175  ompImpTime[i].second = -1;
176  cudaImpTime[i].second = -1;
177  bestImpTime[i].second = -1;
178  cpuImpTime[i].first = NULL;
179  ompImpTime[i].first = NULL;
180  cudaImpTime[i].first = NULL;
181  bestImpTime[i].first = NULL;
182  }
183  }
184  std::vector<size_t> problemSize;
185  std::pair<ImpDetail*, double> cpuImpTime[MAX_EXEC_PLANS];
186  std::pair<ImpDetail*, double> ompImpTime[MAX_EXEC_PLANS];
187  std::pair<ImpDetail*, double> cudaImpTime[MAX_EXEC_PLANS];
188  std::pair<ImpDetail*, double> bestImpTime[MAX_EXEC_PLANS];
189 
191  unsigned int nImpls;
192 };
193 
194 
198 std::string getStringImplType(ImplType type)
199 {
200  switch(type)
201  {
202  case IMPL_CPU:
203  return "CPU";
204  case IMPL_OMP:
205  return "OMP";
206  case IMPL_CUDA:
207  return "CUDA";
208  default:
209  assert(false);
210  }
211  return "";
212 }
213 
217 struct StatsTuner
218 {
219  unsigned int maxDepth;
220  unsigned int numLeafClosedRanges;
221  unsigned int numLeafTotalRanges;
222  unsigned int numTotalRanges;
223  unsigned int numTrainingPoints; // marks total number of points that are traversed...
224  unsigned int numTrainingExec;
225 };
226 
227 
235 struct Node
236 {
237 public:
238  Node(std::vector<size_t> &_lowBounds, std::vector<size_t> &_uppBounds, unsigned int _nImpls): lowBounds(_lowBounds), uppBounds(_uppBounds), level(0), father(NULL), id(-1), nImpls(_nImpls)
239  {
240  for(int i=0; i<MAX_EXEC_PLANS; ++i)
241  {
242  bestNodeImp[i] = NULL;
243  isClosed[i] = false;
244  }
245  }
246 
247  unsigned int nImpls;
248  std::vector<Point*> points; // represent each border point....
249  std::vector<size_t> uppBounds; // represent low bounds of this area covered by this node...
250  std::vector<size_t> lowBounds; // represent upper bounds of this area covered by this node...
251  Node *father;
252  int id;
253  std::vector<Node*> childs;
254  ImpDetail* bestNodeImp[MAX_EXEC_PLANS];
255  bool isClosed[MAX_EXEC_PLANS];
256  unsigned int level;
257 
261  friend std::ostream& operator<<(std::ostream& os, const Node& node)
262  {
263  std::string padding = "";
264  for(unsigned int i=0; i<node.level; ++i)
265  padding += "----";
266  os << padding << "\"" << node.level << "\" ( ";
267  for (unsigned int i=0; i<node.lowBounds.size(); ++i)
268  {
269  if(i!= node.lowBounds.size()-1)
270  os << node.lowBounds[i] << ",";
271  else
272  os << node.lowBounds[i];
273  }
274  os << " --- ";
275  for (unsigned int i=0; i<node.uppBounds.size(); ++i)
276  {
277  if(i!= node.uppBounds.size()-1)
278  os << node.uppBounds[i] << ",";
279  else
280  os << node.uppBounds[i];
281  }
282  for(unsigned int i=0; i<node.nImpls; ++i)
283  {
284  if(i == 0)
285  os << ") ";
286 
287  if(node.isClosed[i])
288  {
289  assert(node.bestNodeImp[i] != NULL);
290  os << " " << getStringImplType(node.bestNodeImp[i]->impType) << " ";
291  }
292  else
293  os << " [OPEN] ";
294 
295  if(i == (node.nImpls-1))
296  os << "\n";
297  }
298 
299  for(unsigned int i=0; i<node.childs.size(); ++i)
300  {
301  os << *(node.childs[i]);
302  }
303  return os;
304  }
305 
306 
310  ~Node() // need to just call delete on root node as it internally delete recursively....
311  {
312  if(childs.empty() == false) // non-leaf node...
313  {
314  // first delete child nodes, recursively...
315  for(unsigned int i=0; i<childs.size(); ++i)
316  {
317 // DEBUG_TUNING_LEVEL3("Recursive *delete["<<i<<"]->points.size(): " << childs[i]->points.size() << "\n");
318  delete childs[i];
319  }
320  childs.clear();
321 
322  for(unsigned int i=0; i<points.size(); ++i)
323  {
324  delete points[i];
325  }
326  points.clear();
327  }
328  else // leaf node...
329  {
330  for(unsigned int i=0; i<points.size(); ++i)
331  {
332  delete points[i];
333  }
334  points.clear();
335  }
336  }
337 
338 
339  template <int dimens>
340  void constructExecPlanNew(ExecPlanNew<dimens> &plan, StatsTuner &stats, int idx)
341  {
342  constructExecPlanNew(plan, stats, idx, identity<dimens>());
343  }
344 
345 
346 
347 private:
348 
349  template <unsigned int dimens>
350  void constructExecPlanNew(ExecPlanNew<dimens> &plan, StatsTuner &stats, int idx, identity<dimens>)
351  {
352  assert(false);
353  }
354 
355  void constructExecPlanNew(ExecPlanNew<1> &plan, StatsTuner &stats, int idx, identity<1>)
356  {
357  assert(uppBounds.size() == lowBounds.size() && uppBounds.size() == 1);
358 
359  stats.numTrainingPoints += 2;
360  stats.numTotalRanges++;
361 
362  if(childs.empty() == false)
363  {
364  for(unsigned int i=0; i<childs.size(); ++i)
365  {
366 // DEBUG_TUNING_LEVEL2("Recursive *childs["<<i<<"]->points.size(): " << childs[i]->points.size() << "\n");
367  childs[i]->constructExecPlanNew<1>(plan, stats, idx);
368  }
369  }
370  else // leaf nodes...
371  {
372  stats.numLeafTotalRanges++;
373 
374  if(isClosed[idx])
375  {
376  stats.numLeafClosedRanges++;
377 
378  DEBUG_TUNING_LEVEL3("Closed space: " << points.size() << ", " << lowBounds[0] << " - " << uppBounds[0] << "\n");
379  assert(bestNodeImp[idx] != NULL);
380  assert( plan.m_data.insert(std::make_pair(std::make_pair(lowBounds[0], uppBounds[0]), bestNodeImp[idx]->impType)).second == true);
381  }
382  else
383  {
384  assert(points.size() > 1);
385  DEBUG_TUNING_LEVEL3("Open space: " << points.size() << ", " << lowBounds[0] << " - " << uppBounds[0] << "\n");
386 
388  assert(points[0]->bestImpTime[idx].first != NULL && points[1]->bestImpTime[idx].first != NULL);
389  assert(points[0]->bestImpTime[idx].first->impType != points[1]->bestImpTime[idx].first->impType);
390 
391  std::pair<ImplType, double> bestPoint1(points[0]->bestImpTime[idx].first->impType, points[0]->bestImpTime[idx].second);
392  std::pair<ImplType, double> bestPoint2(points[1]->bestImpTime[idx].first->impType, points[1]->bestImpTime[idx].second);
393 
394  double secBestPoint1 = 0;
395  switch(points[1]->bestImpTime[idx].first->impType)
396  {
397  case IMPL_CPU:
398  secBestPoint1 = points[0]->cpuImpTime[idx].second;
399  break;
400  case IMPL_OMP:
401  secBestPoint1 = points[0]->ompImpTime[idx].second;
402  break;
403  case IMPL_CUDA:
404  secBestPoint1 = points[0]->cudaImpTime[idx].second;
405  break;
406  default:
407  assert(false);
408  }
409 
410  std::pair<ImplType, double> secondBestPoint1(points[1]->bestImpTime[idx].first->impType, secBestPoint1);
411 
412  double secBestPoint2 = 0;
413  switch(points[0]->bestImpTime[idx].first->impType)
414  {
415  case IMPL_CPU:
416  secBestPoint2 = points[1]->cpuImpTime[idx].second;
417  break;
418  case IMPL_OMP:
419  secBestPoint2 = points[1]->ompImpTime[idx].second;
420  break;
421  case IMPL_CUDA:
422  secBestPoint2 = points[1]->cudaImpTime[idx].second;
423  break;
424  default:
425  assert(false);
426  }
427 
428  std::pair<ImplType, double> secondBestPoint2(points[0]->bestImpTime[idx].first->impType, secBestPoint2);
429 
430 // std::cerr << "Point1: Best: " << getStringImplType(bestPoint1.first) << ", " << bestPoint1.second << "\n";
431 // std::cerr << "Point2: Best: " << getStringImplType(bestPoint2.first) << ", " << bestPoint2.second << "\n";
432 // std::cerr << "Point1: Second: " << getStringImplType(secondBestPoint1.first) << ", " << secondBestPoint1.second << "\n";
433 // std::cerr << "Point2: Second: " << getStringImplType(secondBestPoint2.first) << ", " << secondBestPoint2.second << "\n";
434 
435  //Line1
436  double A1 = secondBestPoint2.second - bestPoint1.second; //sorted[lastBestImpl][i].second.first - sorted[lastBestImpl][lastIdx].second.first;
437  ssize_t B1 = lowBounds[0] - uppBounds[0]; // sorted[lastBestImpl][lastIdx].first - sorted[lastBestImpl][i].first;
438  double C1 = A1*(lowBounds[0]) + B1*(bestPoint1.second);
439 
440  double A2 = bestPoint2.second - secondBestPoint1.second; //sorted[lastBestImpl][i].second.first - sorted[lastBestImpl][lastIdx].second.first;
441  ssize_t B2 = B1; // sorted[lastBestImpl][lastIdx].first - sorted[lastBestImpl][i].first;
442  double C2 = A2*(lowBounds[0]) + B2*(secondBestPoint1.second);
443 
444  double delta = A1*B2 - A2*B1;
445 
446  assert(delta != 0);
447 
448  size_t x = (size_t)((B2*C1 - B1*C2)/delta);
449 
450  DEBUG_TUNING_LEVEL3("------------------------------------------\n");
451  DEBUG_TUNING_LEVEL3("(" << lowBounds[0] << "-" << x << ") " << getStringImplType(bestPoint1.first) << "\n");
452  DEBUG_TUNING_LEVEL3("(" << (x+1) << "-" << uppBounds[0] << ") " << getStringImplType(bestPoint2.first) << "\n");
453  DEBUG_TUNING_LEVEL3("------------------------------------------\n");
454 
455  assert( plan.m_data.insert(std::make_pair(std::make_pair(lowBounds[0], x), bestPoint1.first)).second == true );
456  assert( plan.m_data.insert(std::make_pair(std::make_pair(x+1, uppBounds[0]), bestPoint2.first)).second == true ); // to ensure that data is inserted always...
457  }
458  }
459 
460  if(plan.m_data.empty() == false)
461  plan.calibrated = true;
462 
463  plan.idx = idx;
464  }
465 
466 };
467 
468 
469 
470 
471 
478 class Trainer
479 {
480 public:
481  Trainer(std::vector<ImpDetail*> &impls, std::vector<size_t> &lowerBounds, std::vector<size_t> &upperBounds, unsigned int maxDepth, unsigned int _nImpls, ExtraData &extra, void (*_callBack1)(void*, size_t*, unsigned int), void (*_callBack2)(void*, void*, size_t*, unsigned int), bool Oversampling = false);
482  void train();
483 
484  template <unsigned int dimens>
485  void constructExecPlanNew(ExecPlanNew<dimens> *plan, StatsTuner &stats);
486 
490  void compressExecPlanNew(ExecPlanNew<1> &plan)
491  {
492  ExecPlanNew<1> newPlan;
493  std::pair< std::pair<size_t, size_t >, ImplType> prevBest;
494  bool init=false;
495  for(std::map<std::pair<size_t,size_t>, ImplType>::iterator it = plan.m_data.begin(); it != plan.m_data.end(); ++it)
496  {
497  if(!init)
498  {
499  prevBest = (*it);
500  init = true;
501  continue;
502  }
503 
504  if(it->second == prevBest.second)
505  {
506  assert(prevBest.first.second = it->first.first-1);
507  prevBest.first.second = it->first.second;
508  }
509  else
510  {
511  assert(newPlan.m_data.insert(prevBest).second == true);
512  prevBest = (*it);
513  }
514  }
515 
516  if(init) // add the last one after the loop...
517  assert(newPlan.m_data.insert(prevBest).second == true);
518 
519  newPlan.idx = plan.idx;
520  plan = newPlan;
521  }
522 
523  ~Trainer()
524  {
525  if(m_tree != NULL)
526  delete m_tree;
527 
528  m_tree = NULL;
529  }
530 
531 
532 
533 private:
534  void train(Node* &rootNode, bool discardFirst);
535  void generateSubSpaces(std::vector<size_t> &baseLowBound, std::vector<size_t> &baseUppBound, Node *father);
536  void generateAllPossibleCombinations(std::vector<size_t> &lowBound, std::vector<size_t> &uppBound, std::vector<std::vector<size_t> > &combinations, bool overSample);
537 
538  std::vector<size_t> &m_lowerBounds;
539  std::vector<size_t> &m_upperBounds;
540  unsigned int m_maxDepth;
541  bool m_overSampling;
542  ExtraData &m_extra;
543  std::vector<ImpDetail*> &m_impls;
544  void (*callBackFunction)(void*, size_t*, unsigned int);
545  void (*callBackFunctionMapReduce)(void*, void*, size_t*, unsigned int);
546 
547  unsigned int nImpls;
548 
549 public:
550  Node *m_tree; //[MAX_EXEC_PLANS];
551 };
552 
553 } // end namespace skepu
554 
555 #include "trainer.inl"
556 
557 #endif
558 
559 
Definition: trainer.h:235
Trainer(std::vector< ImpDetail * > &impls, std::vector< size_t > &lowerBounds, std::vector< size_t > &upperBounds, unsigned int maxDepth, unsigned int _nImpls, ExtraData &extra, void(*_callBack1)(void *, size_t *, unsigned int), void(*_callBack2)(void *, void *, size_t *, unsigned int), bool Oversampling=false)
Definition: trainer.inl:69
Definition: trainer.h:166
void compressExecPlanNew(ExecPlanNew< 1 > &plan)
Definition: trainer.h:490
void constructExecPlanNew(ExecPlanNew< dimens > *plan, StatsTuner &stats)
Definition: trainer.inl:103
end Node class...
Definition: trainer.h:478
unsigned int nImpls
Definition: trainer.h:191
void train()
Definition: trainer.inl:78
Contains a class declaration for Environment class.
~Node()
Definition: trainer.h:310
Any extra information that User want to pass to the function wrapper for implementations can be speci...
Definition: trainer.h:99
friend std::ostream & operator<<(std::ostream &os, const Node &node)
Definition: trainer.h:261