crown
1.0.0
|
00001 // The following definitions might be exchanged for others, 00002 // when integrating the following code into the framework. 00003 // The code implements a greedy heuristic to loadbalance the tasks 00004 // over cores such that the l2-norm of the load vector is minimized. 00005 // This should also minimize the l3-norm of the load vector. 00006 // J. Keller, July 14, 2014 00007 00008 #include <stdio.h> 00009 #include <stdlib.h> 00010 #include <string.h> 00011 #include <math.h> 00012 #include <iostream> 00013 #include <sstream> 00014 00015 #include <crown/allocation.h> 00016 #include <crown/mapping.h> 00017 #include <crown/scaling.h> 00018 #include <crown/crown.h> 00019 #include <crown/annealing.h> 00020 #include <crown/CrownCompositeAnnealing.hpp> 00021 #include <crown/CrownConfigBinary.hpp> 00022 #include <crown/CrownException.hpp> 00023 #include <crown/CrownAllocationFastest.hpp> 00024 #include <crown/CrownMappingLTLG.hpp> 00025 #include <crown/CrownScalingHeight.hpp> 00026 00027 #include <pelib/AmplInput.hpp> 00028 #include <pelib/Schedule.hpp> 00029 #include <pelib/Scalar.hpp> 00030 #include <pelib/Vector.hpp> 00031 #include <pelib/Matrix.hpp> 00032 #include <pelib/Set.hpp> 00033 #include <pelib/time.h> 00034 00035 #ifdef debug 00036 #undef debug 00037 #endif 00038 00039 #define debug(var) cout << "[" << __FILE__ << ":" << __FUNCTION__ << ":" << __LINE__ << "] " << #var << " = \"" << (var) << "\"" << endl; 00040 00041 using namespace std; 00042 using namespace pelib; 00043 using namespace pelib::crown; 00044 00045 // Quality assessment parameters 00046 static const long long int nsec_in_sec = 1000000000; 00047 const float CrownCompositeAnnealing::default_init_temperature = 8; 00048 const float CrownCompositeAnnealing::default_cooling_factor = 0.6; 00049 const float CrownCompositeAnnealing::default_final_temperature = 0.9; 00050 const float CrownCompositeAnnealing::default_max_transformations = 2; 00051 const float CrownCompositeAnnealing::default_max_new_states = 2; 00052 const float CrownCompositeAnnealing::default_distance = 0.5; 00053 00054 Algebra 00055 CrownCompositeAnnealing::neighbor(const Algebra &solution, float tasks, float cores) const 00056 { 00057 Algebra schedule = solution; 00058 // distance == 1 -> change all neighbors by +- Wi processors 00059 // distance == 0 -> change only one neighbor by 1 processor 00060 00061 map<int, float> vector_wi = schedule.find<Vector<int, float> >("wi")->getValues(); 00062 //int b = (int)schedule.find<Scalar<float> >("b")->getValue(); 00063 int b = 2; 00064 int p = (int)schedule.find<Scalar<float> >("p")->getValue(); 00065 const Vector<int, float> *Wi = schedule.find<Vector<int, float> >("Wi"); 00066 map<int, float> vector_Wi = Wi->getValues(); 00067 int n = vector_wi.size(); 00068 00069 int num_tasks = ((int)((float)rand() / (float)RAND_MAX * n * tasks)); 00070 while (num_tasks > 0 && vector_Wi.size() > 0) 00071 { 00072 int size = vector_Wi.size(); 00073 map<int, float>::iterator itask = vector_Wi.begin(); 00074 int index = rand() % size; 00075 00076 for(int i = 0; i < index; i++) 00077 { 00078 itask++; 00079 } 00080 00081 // If a task is sequential, there is no neighbor to find there, remove that task from list and continue 00082 if(itask->second < 2) 00083 { 00084 vector_Wi.erase(itask); 00085 continue; 00086 } 00087 00088 // Task maximal width > 1 (Wit > 0) 00089 int task = itask->first; 00090 int Wit = (int)vector_Wi[task]; 00091 Wit = Wit > p ? p : Wit; 00092 Wit = (int)(log(Wit) / log(b)); 00093 00094 // task current allocated width 00095 int wi = (int)(log(vector_wi.find(task)->second) / log(b)); 00096 00097 int amplitude = Wit * (int)cores; // + 1 to count the number of possibilities, -1 to remove the current allocation from possible new allocation 00098 amplitude = amplitude < 1 ? 1 : amplitude; // Make sure at least one transformation is performed 00099 00100 // Count how many spots are available below and above current wi 00101 int avail_lower = wi; 00102 int avail_higher = Wit - wi; 00103 00104 // Cound how many spots we need below and above current wi 00105 int wi_lower, wi_higher; 00106 if(rand() % 2 == 0) 00107 { 00108 wi_lower = (int)floor((float)amplitude / 2); 00109 wi_higher = (int)ceil((float)amplitude / 2); 00110 } 00111 else 00112 { 00113 wi_lower = (int)ceil((float)amplitude / 2); 00114 wi_higher = (int)floor((float)amplitude / 2); 00115 } 00116 00117 int len_lower, len_higher; 00118 if(avail_lower >= wi_lower && avail_higher >=wi_higher) 00119 { 00120 len_lower = wi_lower; 00121 len_higher = wi_higher; 00122 } 00123 else if(avail_lower < wi_lower) 00124 { 00125 len_lower = avail_lower; 00126 len_higher = wi_higher + (wi_lower - avail_lower); 00127 } 00128 else if(avail_higher < wi_higher) 00129 { 00130 len_higher = avail_higher; 00131 len_lower = wi_lower + (wi_higher - avail_higher); 00132 } 00133 else 00134 { 00135 // Not enough space to fit the amplitude, aborting 00136 cerr << "# Not enough allocation opportunities to fit the amplitude" << endl; 00137 cerr << "# Task " << task << "; p = " << p << "; log2(Wi) = " << Wit << "; log2(wi) = " << wi << "; amplitude = " << amplitude << "; avail_lower = " << avail_lower << "; avail_higher = " << avail_higher << "; wi_lower = " << wi_lower << "; wi_higher = " << wi_higher << endl; 00138 abort(); 00139 } 00140 00141 int alternative = ((int)rand() % (amplitude)) + 1; // Start counting the new allocation number with 1 00142 int new_wi = 0; 00143 if(alternative <= len_lower) 00144 { 00145 new_wi = wi - (len_lower - alternative + 1); 00146 } 00147 else if(alternative <= len_lower + len_higher) 00148 { 00149 new_wi = wi + alternative - len_lower; 00150 } 00151 else 00152 { 00153 cerr << "# Chosen alternative is beyond the spots available" << endl; 00154 cerr << "# Task " << task << "; p = " << p << "; log2(Wi) = " << Wit << "; log2(wi) = " << wi << "; amplitude = " << amplitude << "; avail_lower = " << avail_lower << "; avail_higher = " << avail_higher << "; wi_lower = " << wi_lower << "; wi_higher = " << wi_higher << "; alternative = " << alternative << endl; 00155 len_lower = avail_lower; 00156 len_higher = wi_higher + (wi_lower - avail_lower); 00157 } 00158 00159 if(wi == new_wi || new_wi > Wit || new_wi < 0) 00160 { 00161 cerr << "# Task " << task << "; Wi " << Wit << "; wi " << wi << "; amplitude " << amplitude << "; new_wi " << new_wi << endl; 00162 abort(); 00163 } 00164 00165 new_wi = (int)pow((double)b, (double)new_wi); 00166 if(new_wi == 64) 00167 { 00168 cerr << "# Task " << task << "; Wi " << Wit << "; wi " << wi << "; amplitude " << amplitude << "; new_wi " << new_wi << endl; 00169 } 00170 vector_wi[task] = new_wi; 00171 00172 // Done with this task, continue with the next 00173 vector_Wi.erase(itask); 00174 num_tasks--; 00175 00176 } 00177 00178 Vector<int, float> wi("wi", vector_wi); 00179 schedule.insert(&wi); 00180 00181 return schedule; 00182 } 00183 00184 float 00185 CrownCompositeAnnealing::energy(const Algebra &solution) 00186 { 00187 Algebra rec = crownToSchedule(solution); 00188 int n = rec.find<Scalar<float> >("n")->getValue(); 00189 int p = rec.find<Scalar<float> >("p")->getValue(); 00190 float M = rec.find<Scalar<float> >("M")->getValue(); 00191 float kappa = rec.find<Scalar<float> >("kappa")->getValue(); 00192 float eta = rec.find<Scalar<float> >("eta")->getValue(); 00193 float zeta = rec.find<Scalar<float> >("zeta")->getValue(); 00194 float alpha = rec.find<Scalar<float> >("alpha")->getValue(); 00195 Vector<int, float> tau = rec.find<Vector<int, float> >("Tau"); 00196 const Vector<int, string> *nameptr = rec.find<Vector<int, string> >("name"); 00197 Vector<int, string> name("name", nameptr != NULL ? nameptr->getValues() : map<int, string>()); 00198 Vector<int, float> Wi = rec.find<Vector<int, float> >("Wi"); 00199 Vector<int, float> frequency = rec.find<Vector<int, float> >("frequency"); 00200 Set<float> F = rec.find<Set<float> >("F"); 00201 Matrix<int, int, float> e = rec.find<Matrix<int, int, float> >("e"); 00202 Matrix<int, int, float> schedule = rec.find<Matrix<int, int, float> >("schedule"); 00203 Vector<int, float> wi = rec.find<Vector<int, float> >("wi"); 00204 vector<double> busy_task(n, 0); 00205 vector<double> area_task(n, 0); 00206 vector<double> met_task(n, 0); 00207 00208 float lowest_freq = *(F.getValues().begin()); 00209 kappa = kappa * lowest_freq; 00210 eta = eta * (zeta * pow(lowest_freq, alpha) + (1 - zeta) * (lowest_freq + kappa)) / zeta; 00211 double idle_energy = p * M * pow(lowest_freq, alpha); 00212 double busy_energy = 0; 00213 00214 map<float, int> core_active; 00215 for(size_t i = 0; i < (size_t)p; i++) 00216 { 00217 core_active[i + 1] = 0; 00218 } 00219 int total_core_active = 0; 00220 00221 // Iterate through all task names until we find the dummy task; 00222 vector<size_t> dummy_index; 00223 size_t index = 0; 00224 for(map<int, string>::const_iterator i = name.getValues().begin(); i != name.getValues().end(); i++, index++) 00225 { 00226 stringstream ss; 00227 ss << DUMMY_TASK << " " << index + 1; 00228 if(string(i->second).compare(ss.str()) == 0) 00229 { 00230 dummy_index.push_back(index + 1); 00231 } 00232 } 00233 00234 // Now evaluate the quality of this schedule 00235 float total_area = 0; 00236 for(map<int, map<int, float> >::const_iterator i = schedule.getValues().begin(); i != schedule.getValues().end(); i++) 00237 { 00238 //int core = i->first; 00239 for(map<int, float>::const_iterator j = i->second.begin(); j != i->second.end(); j++) 00240 { 00241 //int index = j->first; 00242 int task = j->second; 00243 //cout << "Core " << core << ", task " << task << endl; 00244 00245 // Check if the task index is a dummy task 00246 bool dummy = false; 00247 for(size_t k = 0; k < dummy_index.size(); k++) 00248 { 00249 if((size_t)task == dummy_index[k]) 00250 { 00251 dummy = true; 00252 break; 00253 } 00254 } 00255 00256 // We don't take dummy task power consumption into account 00257 if(task > 0 && !dummy) 00258 { 00259 float width_t = wi.getValues().find(task)->second; 00260 double time = (double)tau.find(task) / (width_t * e.find(width_t, task) * (double)frequency.find(task)); 00261 total_area += time; 00262 float power = (zeta * pow(frequency.find(task), alpha) + (1 - zeta) * (frequency.find(task) + kappa)) / zeta; 00263 busy_energy += time * power; 00264 00265 // Mark this core as active 00266 if(core_active[i->first] == 0) 00267 { 00268 core_active[i->first] = 1; 00269 total_core_active++; 00270 } 00271 } 00272 } 00273 } 00274 00275 idle_energy = (M * (total_core_active) - total_area) * eta; 00276 00277 return zeta * busy_energy + (1 - zeta) * idle_energy; 00278 } 00279 00280 Algebra 00281 CrownCompositeAnnealing::solve(const Algebra &tg, const Algebra &pt, const pelib::Algebra ¶meters, std::map<const std::basic_string<char>, double> &stats) const 00282 { 00283 Algebra param = parameters; 00284 const Scalar<float> *b = param.find<Scalar<float> >("b"); 00285 Scalar<float> bb("b", 2); 00286 if(b == NULL) 00287 { 00288 param.insert(new Scalar<float>("b", 2)); 00289 b = param.find<Scalar<float> >("b"); 00290 b = &bb; 00291 } 00292 float p = pt.find<Scalar<float> >("p")->getValue(); 00293 Algebra best_schedule; 00294 00295 if(b != NULL && p == pow(b->getValue(), floor(log(p) / log(b->getValue())))) 00296 { 00297 struct timespec start, stop, total_time; 00298 //Algebra schedule = tg.merge(pt); 00299 //schedule = schedule.merge(param); 00300 clock_gettime(CLOCK_MONOTONIC, &start); 00301 00302 // Run Crown binary 00303 Algebra schedule = scheduler->solve(tg, pt, param, stats); 00304 //Algebra schedule = crown_binary(best, 0); 00305 00306 //best_schedule = crown_binary_annealing_allocation(schedule, this->init_temperature, this->cooling_factor, this->final_temperature, this->max_transformations, this->max_new_states, this->distance); 00307 00308 schedule = this->neighbor(schedule, distance, 0); 00309 00310 float m; 00311 int counter = 0; 00312 float T = init_temperature; 00313 00314 Algebra best = schedule; 00315 //float best_q = quality(best); 00316 float best_q = energy(best); 00317 Vector<int, float> *best_solution = best.find<Vector<int, float> >("wi")->clone(); 00318 float q = best_q; //quality(schedule); 00319 00320 while (T > final_temperature) 00321 { 00322 int new_state = 0; 00323 00324 for(int i = 0; i < max_transformations; i++) 00325 { 00326 // Keep track of the solution of last iteration 00327 Vector<int, float> *old_solution = schedule.find<Vector<int, float> >("wi")->clone(); 00328 float old_q = q; 00329 00330 // Transform the solution 00331 schedule = neighbor(schedule, distance, 0); 00332 schedule = this->mapping->map(schedule, stats); 00333 schedule = this->scaling->scale(schedule, stats); 00334 //schedule = mapping_ltlg(schedule); 00335 //schedule = frequency_height(schedule); 00336 00337 m = schedule.find<Scalar<float> >("m")->getValue(); 00338 //q = quality(schedule); 00339 q = energy(schedule); 00340 00341 // If the new energy is lower than the previous one, then we accept this solution 00342 if (q - old_q <= 0 && m >= 0) 00343 { 00344 delete old_solution; 00345 00346 // If this solution is better than the best found so far, then keep this best solution 00347 if (q < best_q) 00348 { 00349 best_solution = schedule.find<Vector<int, float> >("wi")->clone(); 00350 best_q = q; 00351 } 00352 00353 // One new state validated 00354 new_state++; 00355 } 00356 else 00357 { 00358 // Randomly accept a degrading solution, more likely when the temperature is higher 00359 if (rand() / RAND_MAX <= exp(-(q - old_q) / T) && m >= 0) 00360 { 00361 // Update current solution 00362 delete old_solution; 00363 00364 // One new state validated 00365 new_state++; 00366 } 00367 else 00368 { 00369 // Revert old solution 00370 schedule.insert(old_solution); 00371 q = old_q; 00372 } 00373 } 00374 00375 // Too much successful transformations 00376 if(new_state == max_new_states) 00377 { 00378 break; 00379 } 00380 } 00381 00382 T = T * cooling_factor; 00383 counter++; 00384 } 00385 00386 schedule.insert(best_solution); 00387 schedule = this->mapping->map(schedule, stats); 00388 schedule = this->scaling->scale(schedule, stats); 00389 //schedule = mapping_ltlg(schedule); 00390 //schedule = frequency_height(schedule); 00391 00392 clock_gettime(CLOCK_MONOTONIC, &stop); 00393 pelib_timespec_subtract(&total_time, &stop, &start); 00394 00395 // Report mapping quality 00396 //float best_quality = best_schedule.find<Scalar<float> >("quality")->getValue(); 00397 //stats.insert(pair<string, double>("quality", best_quality)); 00398 00399 // Report optimization time 00400 float time = (float)(total_time.tv_sec * nsec_in_sec + total_time.tv_nsec) / (float)nsec_in_sec; 00401 stats.insert(pair<string, double>("time", time)); 00402 00403 return schedule; 00404 } 00405 else 00406 { 00407 throw CrownException("Simulated annealing does not support non-power of two number of cores or non-balanced binary configured Crown Scheduler"); 00408 } 00409 } 00410 00411 void 00412 CrownCompositeAnnealing::initialize(const CrownMapping *mapping, const CrownScaling *scaling) 00413 { 00414 CrownAllocationFastest alloc; 00415 if(mapping == NULL) 00416 { 00417 this->mapping = new CrownMappingLTLG(config, &alloc, showOutput, showError); 00418 } 00419 else 00420 { 00421 this->mapping = mapping->clone(); 00422 } 00423 00424 if(scaling == NULL) 00425 { 00426 this->scaling = new CrownScalingHeight(config, &alloc, mapping, showOutput, showError); 00427 } 00428 else 00429 { 00430 this->scaling = scaling->clone(); 00431 } 00432 } 00433 00434 CrownCompositeAnnealing::CrownCompositeAnnealing(float init_temperature, float cooling_factor, float final_temperature, float max_transformations, float max_new_states, float distance, const CrownScheduler *scheduler, const CrownMapping *map, const CrownScaling *scale, bool showOutput, bool showError) : CrownComposite(scheduler, showOutput, showError) 00435 { 00436 this->init_temperature = init_temperature; 00437 this->cooling_factor = cooling_factor; 00438 this->final_temperature = final_temperature; 00439 this->max_transformations = max_transformations; 00440 this->max_new_states = max_new_states; 00441 this->distance = distance; 00442 initialize(map, scale); 00443 } 00444 00445 CrownCompositeAnnealing::CrownCompositeAnnealing(const Algebra ¶m, float init_temperature, float cooling_factor, float final_temperature, float max_transformations, float max_new_states, float distance, const CrownScheduler *scheduler, const CrownMapping *map, const CrownScaling *scale, bool showOutput, bool showError) : CrownComposite(param, scheduler, showOutput, showError) 00446 { 00447 this->init_temperature = init_temperature; 00448 this->cooling_factor = cooling_factor; 00449 this->final_temperature = final_temperature; 00450 this->max_transformations = max_transformations; 00451 this->max_new_states = max_new_states; 00452 this->distance = distance; 00453 initialize(map, scale); 00454 } 00455 00456 CrownCompositeAnnealing::CrownCompositeAnnealing(const Taskgraph &tg, const Platform &pt, const Algebra ¶m, float init_temperature, float cooling_factor, float final_temperature, float max_transformations, float max_new_states, float distance, const CrownScheduler *scheduler, const CrownMapping *map, const CrownScaling *scale, bool showOutput, bool showError) : CrownComposite(tg, pt, param, scheduler, showOutput, showError) 00457 { 00458 this->init_temperature = init_temperature; 00459 this->cooling_factor = cooling_factor; 00460 this->final_temperature = final_temperature; 00461 this->max_transformations = max_transformations; 00462 this->max_new_states = max_new_states; 00463 this->distance = distance; 00464 initialize(map, scale); 00465 } 00466 00467 CrownCompositeAnnealing::CrownCompositeAnnealing(const CrownCompositeAnnealing &src) : CrownComposite(src.tg, src.pt, src.param, src.scheduler, src.showOutput, src.showError) 00468 { 00469 this->init_temperature = init_temperature; 00470 this->cooling_factor = cooling_factor; 00471 this->final_temperature = final_temperature; 00472 this->max_transformations = max_transformations; 00473 this->max_new_states = max_new_states; 00474 this->distance = distance; 00475 initialize(src.mapping, src.scaling); 00476 } 00477 00478 float 00479 CrownCompositeAnnealing::complexity(const Algebra &problem) const 00480 { 00481 return 0; 00482 //return max_transform * ceil(log(temperature / final) / log(1 / cooling)); 00483 } 00484 00485 float 00486 CrownCompositeAnnealing::complexity(const Taskgraph&, const Platform&, const Algebra&) const 00487 { 00488 return 0; 00489 //return max_transform * ceil(log(temperature / final) / log(1 / cooling)); 00490 } 00491 00492 CrownCompositeAnnealing* 00493 CrownCompositeAnnealing::clone() const 00494 { 00495 CrownCompositeAnnealing *sched = new CrownCompositeAnnealing(*this); 00496 return sched; 00497 } 00498 00499 Schedule 00500 CrownCompositeAnnealing::schedule(const Taskgraph &tg, const Platform &pt, std::map<const string, double>& stats) const 00501 { 00502 Algebra param = this->param; 00503 00504 // TODO: Make scheduler work without scalar b 00505 param.insert(new Scalar<float>("b", 2)); 00506 param = param.merge(scheduler->addCrownConfig(tg, pt, param, stats)); 00507 00508 // Take task name out 00509 Algebra taskgraph = tg.buildAlgebra(pt); 00510 Vector<int, string> name = *taskgraph.find<Vector<int, string> >("name"); 00511 taskgraph.remove("name"); 00512 00513 //Perform actual work 00514 Algebra schedule = solve(taskgraph, pt.buildAlgebra(), param, stats); 00515 00516 // Put back task names 00517 schedule.insert(new Vector<int, string>(name)); 00518 00519 // Convert crown schedule to regular algebra schedule 00520 schedule = CrownScheduler::crownToSchedule(schedule); 00521 00522 // Add task starting time 00523 schedule = Schedule::addStartTime(schedule, tg, pt); 00524 00525 // report scheduling complexity 00526 float complexity = this->complexity(tg, pt, param); 00527 stats.insert(pair<string, double>("complexity", complexity)); 00528 00529 float M = tg.getDeadline(pt); 00530 float m = schedule.find<Scalar<float> >("m")->getValue(); 00531 Schedule *sched; 00532 if(m < 0 && M > 0) 00533 { 00534 stats.insert(pair<string, double>("feasible", 0)); 00535 sched = new Schedule(getShortDescription(), tg.getName(), Schedule::table()); 00536 } 00537 else 00538 { 00539 stats.insert(pair<string, double>("feasible", 1)); 00540 sched = new Schedule(getShortDescription(), tg.getName(), schedule); 00541 } 00542 00543 Schedule final = *sched; 00544 delete sched; 00545 return final; 00546 } 00547 00548 Schedule 00549 CrownCompositeAnnealing::schedule(const Taskgraph &tg, const Platform &pt, const Algebra ¶m, std::map<const string, double>& stats) const 00550 { 00551 return schedule(tg, pt, stats); 00552 } 00553 00554 std::string 00555 CrownCompositeAnnealing::getShortDescription() const 00556 { 00557 return string("Ann.") + scheduler->getShortDescription(); 00558 } 00559