crown
1.0.0
|
00001 // The following definitions might be exchanged for others, 00002 // when integrating the following code into the framework. 00003 // The code implements a greedy heuristic to loadbalance the tasks 00004 // over cores such that the l2-norm of the load vector is minimized. 00005 // This should also minimize the l3-norm of the load vector. 00006 // J. Keller, July 14, 2014 00007 00008 #include <stdio.h> 00009 #include <stdlib.h> 00010 #include <string.h> 00011 #include <math.h> 00012 #include <iostream> 00013 #include <sstream> 00014 #include <fstream> 00015 #include <cstdlib> 00016 #include <set> 00017 #include <map> 00018 00019 #include <crown/allocation.h> 00020 #include <crown/mapping.h> 00021 #include <crown/scaling.h> 00022 #include <crown/crown.h> 00023 #include <crown/CrownMappingLTLG.hpp> 00024 #include <crown/CrownConfigBinary.hpp> 00025 00026 #include <pelib/XMLSchedule.hpp> 00027 #include <pelib/AmplInput.hpp> 00028 #include <pelib/AmplOutput.hpp> 00029 #include <pelib/Vector.hpp> 00030 #include <pelib/Matrix.hpp> 00031 #include <pelib/Set.hpp> 00032 #include <pelib/GraphML.hpp> 00033 #include <pelib/Schedule.hpp> 00034 #include <pelib/Task.hpp> 00035 #include <pelib/time.h> 00036 00037 #include <pelib/ParseException.hpp> 00038 #include <pelib/AmplInputScalar.hpp> 00039 #include <pelib/AmplInputVector.hpp> 00040 #include <pelib/AmplInputSet.hpp> 00041 #include <pelib/AmplInputMatrix.hpp> 00042 #include <pelib/Scalar.hpp> 00043 #include <pelib/Algebra.hpp> 00044 #include <pelib/AmplOutputScalar.hpp> 00045 #include <pelib/AmplOutputVector.hpp> 00046 #include <pelib/AmplOutputSet.hpp> 00047 #include <pelib/AmplOutputMatrix.hpp> 00048 #include <pelib/AmplOutput.hpp> 00049 00050 #include <boost/numeric/ublas/matrix.hpp> 00051 #include <boost/numeric/ublas/io.hpp> 00052 00053 #ifdef debug 00054 #undef debug 00055 #endif 00056 00057 #if LONGEST_WIDEST_LOWEST_TASK_FIRST 00058 #undef LONGEST_WIDEST_TASK_FIRST 00059 #define LONGEST_WIDEST_TASK_FIRST 1 00060 #endif 00061 00062 #if LONGEST_WIDEST_TASK_FIRST 00063 #undef LONGEST_TASK_FIRST 00064 #define LONGEST_TASK_FIRST 1 00065 #endif 00066 00067 #if 01 00068 #define debug(var) cout << "[" << __FILE__ << ":" << __FUNCTION__ << ":" << __LINE__ << "] " << #var << " = \"" << var << "\"" << endl; 00069 #else 00070 #define debug(var) 00071 #endif 00072 00073 using namespace std; 00074 using namespace pelib; 00075 using namespace pelib::crown; 00076 using namespace boost::numeric::ublas; 00077 00078 static const long long int nsec_in_sec = 1000000000; 00079 static int ROOTNUMBER = 1; 00080 00081 // Group structure containing the number of the group, its height, width descendants and ancestors 00082 struct Group 00083 { 00084 int number; 00085 float time; 00086 int width; 00087 std::vector<int> sons; 00088 std::vector<int> parents; 00089 00090 Group(int gn, int gt, int gw) 00091 { 00092 number = gn; 00093 time = gt; 00094 width = gw; 00095 sons = std::vector<int>(); 00096 parents = std::vector<int>(); 00097 } 00098 }; 00099 00100 typedef std::vector<pair<int, int> > mapping_t; 00101 typedef std::map<int, float> RowType; 00102 typedef std::map<int, RowType> MatrixType; 00103 00104 static map<int, Group*> groupmap; 00105 00106 // Needed to compare two int elements in a group number set 00107 struct cmp_groupnumber_time : binary_function <int, int, bool> 00108 { 00109 bool operator()(const int& x, const int& y) const 00110 { 00111 // Operand groups' time 00112 float xtime = groupmap[x]->time; 00113 float ytime = groupmap[y]->time; 00114 00115 return xtime == ytime ? x < y : xtime < ytime; 00116 } 00117 }; 00118 00119 static map<int, multiset<int, cmp_groupnumber_time> > groupqueue; 00120 00121 static Algebra *rec; 00122 static mapping_t schedule; 00123 static map<int, float> vector_Wi; 00124 static map<int, float> vector_Tau; 00125 static map<int, float> vector_wi; 00126 static MatrixType matrix_e; 00127 static MatrixType matrix_tree; 00128 static MatrixType matrix_groups; 00129 static int n, p; 00130 00131 static float update_group_time(int gn, float time_offset, int task_wi) 00132 { 00133 float res = 0; 00134 if(gn > 0) 00135 { 00136 float time = groupmap[gn]->time; 00137 groupqueue.find(task_wi)->second.erase(gn); 00138 groupmap[gn]->time += time_offset; 00139 groupqueue.find(task_wi)->second.insert(gn); 00140 res = time + time_offset; 00141 00142 } 00143 else 00144 { 00145 // Not supposed to be here, something is wrong with the group number 00146 debug("Something is wrong with the group you try to update, you may want to check the allocation phase"); 00147 debug(gn); 00148 } 00149 00150 return res; 00151 } 00152 00153 static float update_time(int gn, float time_offset, int task_wi) 00154 { 00155 double son, max; 00156 00157 max = update_group_time(gn, time_offset, task_wi); 00158 if (gn > ROOTNUMBER) { 00159 for(std::vector<int>::iterator it = groupmap[gn]->sons.begin() ; it != groupmap[gn]->sons.end() ; it++) 00160 { 00161 son = update_group_time(*it, time_offset, groupmap[*it]->width); 00162 if(son > max) 00163 { 00164 max = son; 00165 } 00166 } 00167 for(std::vector<int>::iterator it = groupmap[gn]->parents.begin() ; it != groupmap[gn]->parents.end(); it++) 00168 { 00169 update_group_time(*it, max - groupmap[*it]->time, groupmap[*it]->width); 00170 } 00171 } 00172 return max; 00173 } 00174 00175 static float Tau(int task) 00176 { 00177 return vector_Tau.find(task)->second; 00178 } 00179 00180 static float wi(int task) 00181 { 00182 int return_wi = (int)vector_wi.find(task)->second; 00183 00184 return return_wi; 00185 } 00186 00187 static float e(int task, int p) 00188 { 00189 return matrix_e.find(task)->second.find(p)->second; 00190 } 00191 00192 static float work(int task, int p) 00193 { 00194 float return_work = Tau(task) / e(task, p); 00195 return return_work; 00196 } 00197 00198 static float time(int task, int p) 00199 { 00200 float return_time = work(task, p) / (float)p; 00201 return return_time; 00202 } 00203 00204 #if LONGEST_TASK_FIRST 00205 // Needed tp compare two group_time_t elements in the set whose definition comes right after 00206 struct cmp_task : binary_function <int, int, bool> 00207 { 00208 bool 00209 operator()(const int& x, const int& y) const 00210 { 00211 #if LONGEST_WIDEST_TASK_FIRST 00212 if(time(x, wi(x)) == time(y, wi(y))) 00213 { 00214 #if LONGEST_WIDEST_LOWEST_TASK_FIRST 00215 if(wi(x) == wi(y)) 00216 { 00217 return x < y; 00218 } 00219 else 00220 { 00221 #endif 00222 return wi(x) > wi(y); 00223 #if LONGEST_WIDEST_LOWEST_TASK_FIRST 00224 } 00225 #endif 00226 } 00227 else 00228 { 00229 #endif 00230 return time(x, (int)wi(x)) > time(y, (int)wi(y)); 00231 #if LONGEST_WIDEST_TASK_FIRST 00232 } 00233 #endif 00234 } 00235 }; 00236 static multiset<int, cmp_task> ordered_tasks; 00237 #endif 00238 00239 static void parse() 00240 { 00241 schedule = mapping_t(); 00242 vector_Wi = map<int, float>(); 00243 vector_Tau = map<int, float>(); 00244 vector_wi = map<int, float>(); 00245 matrix_e = MatrixType(); 00246 AmplInput data(AmplInput::floatHandlers()); 00247 groupmap = map<int, Group*>(); 00248 groupqueue = map<int, multiset<int, cmp_groupnumber_time> >(); 00249 00250 // Read all values 00251 n = (int)rec->find<Scalar<float> >("n")->getValue(); 00252 p = (int)rec->find<Scalar<float> >("p")->getValue(); 00253 00254 vector_Wi = rec->find<Vector<int, float> >("Wi")->getValues(); 00255 vector_Tau = rec->find<Vector<int, float> >("Tau")->getValues(); 00256 vector_wi = rec->find<Vector<int, float> >("wi")->getValues(); 00257 matrix_e = rec->find<Matrix<int, int, float> >("e")->getValues(); 00258 matrix_groups = rec->find<Matrix<int, int, float> >(CrownConfig::crownConfigGroups)->getValues(); 00259 matrix_tree = rec->find<Matrix<int, int, float> >(CrownConfig::crownConfigTree)->getValues(); 00260 00261 // creating the group map and setting the width : p(2p-1) = 2p²-p = O(p²) 00262 for(unsigned int i = 1 ; i <= matrix_groups.size() ; ++i) 00263 { 00264 groupmap.insert(std::make_pair(i, new Group(i, 0, 0))); 00265 for(unsigned int j = 1 ; j <= matrix_groups.begin()->second.size() ; ++j) 00266 { 00267 if(matrix_groups.find(i)->second.find(j)->second) groupmap[i]->width++; 00268 } 00269 } 00270 00271 // setting the inheritance : (2p-1)² = 4p²-4p+3 = O(p²) 00272 for(unsigned int i = 1 ; i <= matrix_tree.size() ; ++i) 00273 { 00274 for(unsigned int j = 1 ; j <= matrix_tree.begin()->second.size() ; ++j) 00275 { 00276 if(matrix_tree.find(i)->second.find(j)->second) 00277 { 00278 groupmap[j]->sons.push_back(i); 00279 groupmap[i]->parents.push_back(j); 00280 } 00281 } 00282 } 00283 00284 // creating the group queue 00285 for(map<int, Group*>::iterator it = groupmap.begin() ; it != groupmap.end() ; ++it) 00286 { 00287 if(groupqueue.find(it->second->width) == groupqueue.end()) 00288 { 00289 groupqueue.insert(pair<int, multiset<int, cmp_groupnumber_time> >(it->second->width, multiset<int, cmp_groupnumber_time>())); 00290 } 00291 groupqueue.find(it->second->width)->second.insert(it->first); 00292 } 00293 } 00294 00295 static float group_recursive_load(int g, int b, int p, float *group) 00296 { 00297 int i; 00298 float load, max_load = 0; 00299 00300 for(i = g * b; i < (g + 1) * b && i < 2 * p; i++) 00301 { 00302 load = group_recursive_load(i, b, p, group); 00303 if(load > max_load) max_load = load; 00304 } 00305 00306 return max_load + group[g - 1]; 00307 } 00308 00309 CrownMappingLTLG::CrownMappingLTLG(const CrownConfig *config, const CrownAllocation *alloc, bool showOutput, bool showError) : CrownMapping(config, alloc, showOutput, showError) 00310 { 00311 /* Do nothing else */ 00312 } 00313 00314 CrownMappingLTLG::CrownMappingLTLG(const Algebra ¶m, const CrownConfig *config, const CrownAllocation *alloc, bool showOutput, bool showError): CrownMapping(param, config, alloc, showOutput, showError) 00315 { 00316 /* Do nothing else */ 00317 } 00318 00319 CrownMappingLTLG::CrownMappingLTLG(const Taskgraph &tg, const Platform &pt, const Algebra ¶m, const CrownConfig *config, const CrownAllocation *alloc, bool showOutput, bool showError): CrownMapping(tg, pt, param, config, alloc, showOutput, showError) 00320 { 00321 /* Do nothing else */ 00322 } 00323 00324 CrownMappingLTLG::CrownMappingLTLG(const CrownMappingLTLG &src) : CrownMapping(src) 00325 { 00326 // Do nothing else 00327 } 00328 00329 Algebra CrownMappingLTLG::map(const Algebra &alg, std::map<const string, double> &stats) const 00330 { 00331 Algebra input = alg; 00332 struct timespec start, stop, total_time; 00333 00334 rec = &input; 00335 parse(); 00336 clock_gettime(CLOCK_MONOTONIC, &start); 00337 00338 #if LONGEST_TASK_FIRST 00339 ordered_tasks = multiset<int, cmp_task>(); 00340 for(int i = 0 ; i < n ; i++) // O(n) 00341 { 00342 ordered_tasks.insert(i+1); // O(log n) 00343 } 00344 #endif 00345 00346 // Use a temporary vector to reduce asymptotic time with n 00347 std::vector<pair<int, float> > schedule_vector; 00348 00349 #if LONGEST_TASK_FIRST 00350 for(set<int>::iterator iter = ordered_tasks.begin() ; iter != ordered_tasks.end() ; iter++) 00351 #else 00352 for(int i = 0 ; i < n ; i++) 00353 #endif 00354 { 00355 int task; 00356 #if LONGEST_TASK_FIRST 00357 task = *iter; 00358 #else 00359 task = i + 1; 00360 #endif 00361 float task_wi = wi(task); 00362 float task_time = time(task, (int)task_wi); 00363 multiset<int, cmp_groupnumber_time> groups = groupqueue.find(task_wi)->second; 00364 int group = *(groups.begin()); 00365 00366 schedule_vector.push_back(pair<int, int>(task, group)); 00367 update_time(group, task_time, task_wi); 00368 00369 } 00370 00371 clock_gettime(CLOCK_MONOTONIC, &stop); 00372 // Build the final schedule cppelib structure from schedule_vector 00373 // Outside time measurement 00374 for(std::vector<pair<int, float> >::iterator iter = schedule_vector.begin(); iter != schedule_vector.end(); iter++) 00375 { 00376 ::schedule.push_back(pair<int, int>(iter->first, (int)iter->second)); 00377 } 00378 00379 pelib_timespec_subtract(&total_time, &stop, &start); 00380 stats.insert(pair<const string, double>("time_mapping", (total_time.tv_sec * nsec_in_sec + total_time.tv_nsec) / (float)nsec_in_sec)); 00381 // Make 00382 std::map<int, float> vector_group; 00383 bool all_sequential = true; 00384 for(mapping_t::iterator iter = ::schedule.begin(); iter != ::schedule.end(); iter++) 00385 { 00386 all_sequential = all_sequential && wi(iter->first) == 1; 00387 vector_group.insert(pair<int, int>(iter->first, iter->second)); 00388 // debug(iter->first); 00389 // debug(iter->second); 00390 } 00391 Vector<int, float> group("group", vector_group); 00392 rec->insert(&group); 00393 00394 //AmplInput(AmplInput::intFloatHandlers()).dump(cout, rec->find<Vector<int, float> >("group")); 00395 00396 return input; 00397 } 00398 00399 float CrownMappingLTLG::complexity(const Taskgraph &tg, const Platform &pt, const Algebra ¶m, const CrownConfig* config) const 00400 { 00401 Algebra alg = tg.buildAlgebra(pt); 00402 alg = alg.merge(pt.buildAlgebra()); 00403 alg = alg.merge(param); 00404 if(config == NULL) 00405 { 00406 CrownConfigBinary conf; 00407 config = &conf; 00408 } 00409 std::map<const string, double> stats; 00410 alg = alg.merge(config->configure(tg, pt, param, stats)); return complexity(alg); 00411 } 00412 00413 float CrownMappingLTLG::complexity(const pelib::Algebra &input) const 00414 { 00415 float n = input.find<Scalar<float> >("n")->getValue(); 00416 float p = input.find<Scalar<float> >("p")->getValue(); 00417 return n * (p * log(p) + log(p) * log(p)); 00418 } 00419 00420 std::string CrownMappingLTLG::getShortDescription() const 00421 { 00422 stringstream ss; 00423 std::streamsize p = ss.precision(); 00424 ss.precision(4); 00425 ss << "newLTLG"; 00426 ss.precision(p); 00427 return ss.str(); 00428 } 00429 00430 CrownMapping* CrownMappingLTLG::clone() const 00431 { 00432 return new CrownMappingLTLG(*this); 00433 }