crown  1.0.0
src/CrownCompositeAnnealing.cpp
Go to the documentation of this file.
00001 // The following definitions might be exchanged for others,
00002 // when integrating the following code into the framework.
00003 // The code implements a greedy heuristic to loadbalance the tasks
00004 // over cores such that the l2-norm of the load vector is minimized.
00005 // This should also minimize the l3-norm of the load vector.
00006 // J. Keller, July 14, 2014
00007 
00008 #include <stdio.h>
00009 #include <stdlib.h>
00010 #include <string.h>
00011 #include <math.h>
00012 #include <iostream>
00013 #include <sstream>
00014 
00015 #include <crown/allocation.h>
00016 #include <crown/mapping.h>
00017 #include <crown/scaling.h>
00018 #include <crown/crown.h>
00019 #include <crown/annealing.h>
00020 #include <crown/CrownCompositeAnnealing.hpp>
00021 #include <crown/CrownConfigBinary.hpp>
00022 #include <crown/CrownException.hpp>
00023 #include <crown/CrownAllocationFastest.hpp>
00024 #include <crown/CrownMappingLTLG.hpp>
00025 #include <crown/CrownScalingHeight.hpp>
00026 
00027 #include <pelib/AmplInput.hpp>
00028 #include <pelib/Schedule.hpp>
00029 #include <pelib/Scalar.hpp>
00030 #include <pelib/Vector.hpp>
00031 #include <pelib/Matrix.hpp>
00032 #include <pelib/Set.hpp>
00033 #include <pelib/time.h>
00034 
00035 #ifdef debug
00036 #undef debug
00037 #endif
00038 
00039 #define debug(var) cout << "[" << __FILE__ << ":" << __FUNCTION__ << ":" << __LINE__ << "] " << #var << " = \"" << (var) << "\"" << endl;
00040 
00041 using namespace std;
00042 using namespace pelib;
00043 using namespace pelib::crown;
00044 
00045 // Quality assessment parameters
00046 static const long long int nsec_in_sec = 1000000000;
00047 const float CrownCompositeAnnealing::default_init_temperature = 8;
00048 const float CrownCompositeAnnealing::default_cooling_factor = 0.6;
00049 const float CrownCompositeAnnealing::default_final_temperature = 0.9;
00050 const float CrownCompositeAnnealing::default_max_transformations = 2;
00051 const float CrownCompositeAnnealing::default_max_new_states = 2;
00052 const float CrownCompositeAnnealing::default_distance = 0.5;
00053 
00054 Algebra
00055 CrownCompositeAnnealing::neighbor(const Algebra &solution, float tasks, float cores) const
00056 {
00057         Algebra schedule = solution;
00058         // distance == 1 -> change all neighbors by +- Wi processors
00059         // distance == 0 -> change only one neighbor by 1 processor
00060 
00061         map<int, float> vector_wi = schedule.find<Vector<int, float> >("wi")->getValues();
00062         //int b = (int)schedule.find<Scalar<float> >("b")->getValue();
00063         int b = 2;
00064         int p = (int)schedule.find<Scalar<float> >("p")->getValue();
00065         const Vector<int, float> *Wi = schedule.find<Vector<int, float> >("Wi");
00066         map<int, float> vector_Wi = Wi->getValues();
00067         int n = vector_wi.size();
00068 
00069         int num_tasks = ((int)((float)rand() / (float)RAND_MAX * n * tasks));
00070         while (num_tasks > 0 && vector_Wi.size() > 0)
00071         {
00072                 int size = vector_Wi.size();
00073                 map<int, float>::iterator itask = vector_Wi.begin();
00074                 int index = rand() % size;
00075 
00076                 for(int i = 0; i < index; i++)
00077                 {
00078                         itask++;
00079                 }
00080 
00081                 // If a task is sequential, there is no neighbor to find there, remove that task from list and continue
00082                 if(itask->second < 2)
00083                 {
00084                         vector_Wi.erase(itask);
00085                         continue;
00086                 }
00087 
00088                 // Task maximal width > 1 (Wit > 0)
00089                 int task = itask->first;
00090                 int Wit = (int)vector_Wi[task];
00091                 Wit = Wit > p ? p : Wit;
00092                 Wit = (int)(log(Wit) / log(b));
00093 
00094                 // task current allocated width
00095                 int wi = (int)(log(vector_wi.find(task)->second) / log(b));
00096 
00097                 int amplitude = Wit * (int)cores; // + 1 to count the number of possibilities, -1 to remove the current allocation from possible new allocation
00098                 amplitude = amplitude < 1 ? 1 : amplitude; // Make sure at least one transformation is performed
00099 
00100                 // Count how many spots are available below and above current wi
00101                 int avail_lower = wi;
00102                 int avail_higher = Wit - wi;
00103         
00104                 // Cound how many spots we need below and above current wi
00105                 int wi_lower, wi_higher;
00106                 if(rand() % 2 == 0)
00107                 {
00108                         wi_lower = (int)floor((float)amplitude / 2);
00109                         wi_higher = (int)ceil((float)amplitude / 2);
00110                 }
00111                 else
00112                 {
00113                         wi_lower = (int)ceil((float)amplitude / 2);
00114                         wi_higher = (int)floor((float)amplitude / 2);
00115                 }
00116                 
00117                 int len_lower, len_higher;
00118                 if(avail_lower >= wi_lower && avail_higher >=wi_higher)
00119                 {
00120                         len_lower = wi_lower;
00121                         len_higher = wi_higher;
00122                 }
00123                 else if(avail_lower < wi_lower)
00124                 {
00125                         len_lower = avail_lower;
00126                         len_higher = wi_higher + (wi_lower - avail_lower);
00127                 }
00128                 else if(avail_higher < wi_higher)
00129                 {
00130                         len_higher = avail_higher;
00131                         len_lower = wi_lower + (wi_higher - avail_higher);
00132                 }
00133                 else
00134                 {
00135                         // Not enough space to fit the amplitude, aborting
00136                         cerr << "# Not enough allocation opportunities to fit the amplitude" << endl;
00137                         cerr << "# Task " << task << "; p = " << p << "; log2(Wi) = " << Wit << "; log2(wi) = " << wi << "; amplitude = " << amplitude << "; avail_lower = " << avail_lower << "; avail_higher = " << avail_higher << "; wi_lower = " << wi_lower << "; wi_higher = " << wi_higher << endl; 
00138                         abort();
00139                 }
00140 
00141                 int alternative = ((int)rand() % (amplitude)) + 1; // Start counting the new allocation number with 1
00142                 int new_wi = 0;
00143                 if(alternative <= len_lower)
00144                 {
00145                         new_wi = wi - (len_lower - alternative + 1);
00146                 }
00147                 else if(alternative <= len_lower + len_higher)
00148                 {
00149                         new_wi = wi + alternative - len_lower;
00150                 }
00151                 else
00152                 {
00153                         cerr << "# Chosen alternative is beyond the spots available" << endl;
00154                         cerr << "# Task " << task << "; p = " << p << "; log2(Wi) = " << Wit << "; log2(wi) = " << wi << "; amplitude = " << amplitude << "; avail_lower = " << avail_lower << "; avail_higher = " << avail_higher << "; wi_lower = " << wi_lower << "; wi_higher = " << wi_higher << "; alternative = " << alternative << endl; 
00155                         len_lower = avail_lower;
00156                         len_higher = wi_higher + (wi_lower - avail_lower);
00157                 }
00158                         
00159                 if(wi == new_wi || new_wi > Wit || new_wi < 0)
00160                 {
00161                         cerr << "# Task " << task << "; Wi " << Wit << "; wi " << wi << "; amplitude " << amplitude << "; new_wi " << new_wi << endl;
00162                         abort();
00163                 }
00164 
00165                 new_wi = (int)pow((double)b, (double)new_wi);
00166                 if(new_wi == 64)
00167                 {
00168                         cerr << "# Task " << task << "; Wi " << Wit << "; wi " << wi << "; amplitude " << amplitude << "; new_wi " << new_wi << endl;
00169                 }
00170                 vector_wi[task] = new_wi;
00171 
00172                 // Done with this task, continue with the next
00173                 vector_Wi.erase(itask);
00174                 num_tasks--;
00175 
00176         }
00177 
00178         Vector<int, float> wi("wi", vector_wi);
00179         schedule.insert(&wi);
00180 
00181         return schedule;
00182 }
00183 
00184 float
00185 CrownCompositeAnnealing::energy(const Algebra &solution)
00186 {
00187         Algebra rec = crownToSchedule(solution);
00188         int n = rec.find<Scalar<float> >("n")->getValue();
00189         int p = rec.find<Scalar<float> >("p")->getValue();
00190         float M = rec.find<Scalar<float> >("M")->getValue();
00191         float kappa = rec.find<Scalar<float> >("kappa")->getValue();
00192         float eta = rec.find<Scalar<float> >("eta")->getValue();
00193         float zeta = rec.find<Scalar<float> >("zeta")->getValue();
00194         float alpha = rec.find<Scalar<float> >("alpha")->getValue();
00195         Vector<int, float> tau = rec.find<Vector<int, float> >("Tau");
00196         const Vector<int, string> *nameptr = rec.find<Vector<int, string> >("name");
00197         Vector<int, string> name("name", nameptr != NULL ? nameptr->getValues() : map<int, string>());
00198         Vector<int, float> Wi = rec.find<Vector<int, float> >("Wi");
00199         Vector<int, float> frequency = rec.find<Vector<int, float> >("frequency");
00200         Set<float> F = rec.find<Set<float> >("F");
00201         Matrix<int, int, float> e = rec.find<Matrix<int, int, float> >("e");
00202         Matrix<int, int, float> schedule = rec.find<Matrix<int, int, float> >("schedule");
00203         Vector<int, float> wi = rec.find<Vector<int, float> >("wi");
00204         vector<double> busy_task(n, 0);
00205         vector<double> area_task(n, 0);
00206         vector<double> met_task(n, 0);
00207         
00208         float lowest_freq = *(F.getValues().begin());
00209         kappa = kappa * lowest_freq;
00210         eta = eta * (zeta * pow(lowest_freq, alpha) + (1 - zeta) * (lowest_freq + kappa)) / zeta;
00211         double idle_energy = p * M * pow(lowest_freq, alpha);
00212         double busy_energy = 0;
00213 
00214         map<float, int> core_active;
00215         for(size_t i = 0; i < (size_t)p; i++)
00216         {
00217                 core_active[i + 1] = 0;
00218         }
00219         int total_core_active = 0;
00220 
00221         // Iterate through all task names until we find the dummy task;
00222         vector<size_t> dummy_index;
00223         size_t index = 0;
00224         for(map<int, string>::const_iterator i = name.getValues().begin(); i != name.getValues().end(); i++, index++)
00225         {
00226                 stringstream ss;
00227                 ss << DUMMY_TASK << " " << index + 1;
00228                 if(string(i->second).compare(ss.str()) == 0)
00229                 {
00230                         dummy_index.push_back(index + 1);       
00231                 }
00232         }
00233 
00234         // Now evaluate the quality of this schedule
00235         float total_area = 0;
00236         for(map<int, map<int, float> >::const_iterator i = schedule.getValues().begin(); i != schedule.getValues().end(); i++)
00237         {
00238                 //int core = i->first;
00239                 for(map<int, float>::const_iterator j = i->second.begin(); j != i->second.end(); j++)
00240                 {
00241                         //int index = j->first;
00242                         int task = j->second;
00243                         //cout << "Core " << core << ", task " << task << endl;
00244 
00245                         // Check if the task index is a dummy task
00246                         bool dummy = false;
00247                         for(size_t k = 0; k < dummy_index.size(); k++)
00248                         {
00249                                 if((size_t)task == dummy_index[k])
00250                                 {
00251                                         dummy = true;
00252                                         break;
00253                                 }
00254                         }
00255 
00256                         // We don't take dummy task power consumption into account
00257                         if(task > 0 && !dummy)
00258                         {
00259                                 float width_t = wi.getValues().find(task)->second;
00260                                 double time = (double)tau.find(task) / (width_t * e.find(width_t, task) * (double)frequency.find(task));
00261                                 total_area += time;
00262                                 float power = (zeta * pow(frequency.find(task), alpha) + (1 - zeta) * (frequency.find(task) + kappa)) / zeta;
00263                                 busy_energy += time * power;
00264 
00265                                 // Mark this core as active
00266                                 if(core_active[i->first] == 0)
00267                                 {
00268                                         core_active[i->first] = 1;
00269                                         total_core_active++;
00270                                 }
00271                         }
00272                 }
00273         }
00274 
00275         idle_energy = (M * (total_core_active) - total_area) * eta;
00276 
00277         return zeta * busy_energy + (1 - zeta) * idle_energy;
00278 }
00279 
00280 Algebra
00281 CrownCompositeAnnealing::solve(const Algebra &tg, const Algebra &pt, const pelib::Algebra &parameters, std::map<const std::basic_string<char>, double> &stats) const
00282 {
00283         Algebra param = parameters;
00284         const Scalar<float> *b = param.find<Scalar<float> >("b");
00285         Scalar<float> bb("b", 2);
00286         if(b == NULL)
00287         {
00288                 param.insert(new Scalar<float>("b", 2));
00289                 b = param.find<Scalar<float> >("b");
00290                 b = &bb;
00291         }
00292         float p = pt.find<Scalar<float> >("p")->getValue();
00293         Algebra best_schedule;
00294 
00295         if(b != NULL && p == pow(b->getValue(), floor(log(p) / log(b->getValue()))))
00296         {
00297                 struct timespec start, stop, total_time;
00298                 //Algebra schedule = tg.merge(pt);
00299                 //schedule = schedule.merge(param);
00300                 clock_gettime(CLOCK_MONOTONIC, &start);
00301 
00302                 // Run Crown binary
00303                 Algebra schedule = scheduler->solve(tg, pt, param, stats);
00304                 //Algebra schedule = crown_binary(best, 0);
00305 
00306                 //best_schedule = crown_binary_annealing_allocation(schedule, this->init_temperature, this->cooling_factor, this->final_temperature, this->max_transformations, this->max_new_states, this->distance);
00307 
00308         schedule = this->neighbor(schedule, distance, 0);
00309 
00310         float m;
00311         int counter = 0;
00312         float T = init_temperature;
00313 
00314         Algebra best = schedule;
00315         //float best_q = quality(best);
00316         float best_q = energy(best);
00317         Vector<int, float> *best_solution = best.find<Vector<int, float> >("wi")->clone();
00318         float q = best_q; //quality(schedule);
00319 
00320         while (T > final_temperature)
00321         {
00322                 int new_state = 0;
00323 
00324                 for(int i = 0; i < max_transformations; i++)
00325                 {
00326                         // Keep track of the solution of last iteration
00327                         Vector<int, float> *old_solution = schedule.find<Vector<int, float> >("wi")->clone();
00328                         float old_q = q;
00329 
00330                         // Transform the solution
00331                         schedule = neighbor(schedule, distance, 0);
00332                         schedule = this->mapping->map(schedule, stats);
00333                         schedule = this->scaling->scale(schedule, stats);
00334                         //schedule = mapping_ltlg(schedule);
00335                         //schedule = frequency_height(schedule);
00336 
00337                         m = schedule.find<Scalar<float> >("m")->getValue();
00338                         //q = quality(schedule);
00339                         q = energy(schedule);
00340                         
00341                         // If the new energy is lower than the previous one, then we accept this solution
00342                         if (q - old_q <= 0 && m >= 0)
00343                         {
00344                                 delete old_solution;
00345 
00346                                 // If this solution is better than the best found so far, then keep this best solution
00347                                 if (q < best_q)
00348                                 {
00349                                         best_solution = schedule.find<Vector<int, float> >("wi")->clone();
00350                                         best_q = q;
00351                                 }
00352 
00353                                 // One new state validated
00354                                 new_state++;
00355                         }
00356                         else
00357                         {
00358                                 // Randomly accept a degrading solution, more likely when the temperature is higher
00359                                 if (rand() / RAND_MAX <= exp(-(q - old_q) / T) && m >= 0)
00360                                 {
00361                                         // Update current solution
00362                                         delete old_solution;
00363 
00364                                         // One new state validated
00365                                         new_state++;
00366                                 }
00367                                 else
00368                                 {
00369                                         // Revert old solution 
00370                                         schedule.insert(old_solution);
00371                                         q = old_q;
00372                                 }
00373                         }
00374                         
00375                         // Too much successful transformations
00376                         if(new_state == max_new_states) 
00377                         {
00378                                 break;
00379                         }
00380                 }
00381         
00382                 T = T * cooling_factor;
00383                 counter++;
00384         }
00385 
00386         schedule.insert(best_solution);
00387         schedule = this->mapping->map(schedule, stats);
00388         schedule = this->scaling->scale(schedule, stats);
00389         //schedule = mapping_ltlg(schedule);
00390         //schedule = frequency_height(schedule);
00391 
00392                 clock_gettime(CLOCK_MONOTONIC, &stop);
00393                 pelib_timespec_subtract(&total_time, &stop, &start);
00394 
00395                 // Report mapping quality
00396                 //float best_quality = best_schedule.find<Scalar<float> >("quality")->getValue();
00397                 //stats.insert(pair<string, double>("quality", best_quality));
00398 
00399                 // Report optimization time
00400                 float time = (float)(total_time.tv_sec * nsec_in_sec + total_time.tv_nsec) / (float)nsec_in_sec;
00401                 stats.insert(pair<string, double>("time", time));
00402 
00403                 return schedule;
00404         }
00405         else
00406         {
00407                 throw CrownException("Simulated annealing does not support non-power of two number of cores or non-balanced binary configured Crown Scheduler");
00408         }
00409 }
00410 
00411 void
00412 CrownCompositeAnnealing::initialize(const CrownMapping *mapping, const CrownScaling *scaling)
00413 {
00414         CrownAllocationFastest alloc;
00415         if(mapping == NULL)
00416         {
00417                 this->mapping = new CrownMappingLTLG(config, &alloc, showOutput, showError);
00418         }
00419         else
00420         {
00421                 this->mapping = mapping->clone();
00422         }
00423 
00424         if(scaling == NULL)
00425         {
00426                 this->scaling = new CrownScalingHeight(config, &alloc, mapping, showOutput, showError);
00427         }
00428         else
00429         {
00430                 this->scaling = scaling->clone();
00431         }
00432 }
00433 
00434 CrownCompositeAnnealing::CrownCompositeAnnealing(float init_temperature, float cooling_factor, float final_temperature, float max_transformations, float max_new_states, float distance, const CrownScheduler *scheduler, const CrownMapping *map, const CrownScaling *scale, bool showOutput, bool showError) : CrownComposite(scheduler, showOutput, showError)
00435 {
00436         this->init_temperature = init_temperature;
00437         this->cooling_factor = cooling_factor;
00438         this->final_temperature = final_temperature;
00439         this->max_transformations = max_transformations;
00440         this->max_new_states = max_new_states;
00441         this->distance = distance;
00442         initialize(map, scale);
00443 }
00444 
00445 CrownCompositeAnnealing::CrownCompositeAnnealing(const Algebra &param, float init_temperature, float cooling_factor, float final_temperature, float max_transformations, float max_new_states, float distance, const CrownScheduler *scheduler, const CrownMapping *map, const CrownScaling *scale, bool showOutput, bool showError) : CrownComposite(param, scheduler, showOutput, showError)
00446 {
00447         this->init_temperature = init_temperature;
00448         this->cooling_factor = cooling_factor;
00449         this->final_temperature = final_temperature;
00450         this->max_transformations = max_transformations;
00451         this->max_new_states = max_new_states;
00452         this->distance = distance;
00453         initialize(map, scale);
00454 }
00455 
00456 CrownCompositeAnnealing::CrownCompositeAnnealing(const Taskgraph &tg, const Platform &pt, const Algebra &param, float init_temperature, float cooling_factor, float final_temperature, float max_transformations, float max_new_states, float distance, const CrownScheduler *scheduler, const CrownMapping *map, const CrownScaling *scale, bool showOutput, bool showError) : CrownComposite(tg, pt, param, scheduler, showOutput, showError)
00457 {
00458         this->init_temperature = init_temperature;
00459         this->cooling_factor = cooling_factor;
00460         this->final_temperature = final_temperature;
00461         this->max_transformations = max_transformations;
00462         this->max_new_states = max_new_states;
00463         this->distance = distance;
00464         initialize(map, scale);
00465 }
00466 
00467 CrownCompositeAnnealing::CrownCompositeAnnealing(const CrownCompositeAnnealing &src) : CrownComposite(src.tg, src.pt, src.param, src.scheduler, src.showOutput, src.showError)
00468 {
00469         this->init_temperature = init_temperature;
00470         this->cooling_factor = cooling_factor;
00471         this->final_temperature = final_temperature;
00472         this->max_transformations = max_transformations;
00473         this->max_new_states = max_new_states;
00474         this->distance = distance;
00475         initialize(src.mapping, src.scaling);
00476 }
00477 
00478 float
00479 CrownCompositeAnnealing::complexity(const Algebra &problem) const
00480 {
00481         return 0;
00482         //return max_transform * ceil(log(temperature / final) / log(1 / cooling));
00483 }
00484 
00485 float
00486 CrownCompositeAnnealing::complexity(const Taskgraph&, const Platform&, const Algebra&) const
00487 {
00488         return 0;
00489         //return max_transform * ceil(log(temperature / final) / log(1 / cooling));
00490 }
00491 
00492 CrownCompositeAnnealing*
00493 CrownCompositeAnnealing::clone() const
00494 {
00495         CrownCompositeAnnealing *sched = new CrownCompositeAnnealing(*this);
00496         return sched;
00497 }
00498 
00499 Schedule
00500 CrownCompositeAnnealing::schedule(const Taskgraph &tg, const Platform &pt, std::map<const string, double>& stats) const
00501 {
00502         Algebra param = this->param;
00503 
00504         // TODO: Make scheduler work without scalar b
00505         param.insert(new Scalar<float>("b", 2));
00506         param = param.merge(scheduler->addCrownConfig(tg, pt, param, stats));
00507 
00508         // Take task name out
00509         Algebra taskgraph = tg.buildAlgebra(pt);
00510         Vector<int, string> name = *taskgraph.find<Vector<int, string> >("name");
00511         taskgraph.remove("name");
00512 
00513         //Perform actual work
00514         Algebra schedule = solve(taskgraph, pt.buildAlgebra(), param, stats);
00515 
00516         // Put back task names
00517         schedule.insert(new Vector<int, string>(name));
00518 
00519         // Convert crown schedule to regular algebra schedule
00520         schedule = CrownScheduler::crownToSchedule(schedule);
00521 
00522         // Add task starting time
00523         schedule = Schedule::addStartTime(schedule, tg, pt);
00524 
00525         // report scheduling complexity
00526         float complexity = this->complexity(tg, pt, param);
00527         stats.insert(pair<string, double>("complexity", complexity));
00528 
00529         float M = tg.getDeadline(pt);
00530         float m = schedule.find<Scalar<float> >("m")->getValue();
00531         Schedule *sched;
00532         if(m < 0 && M > 0)
00533         {
00534                 stats.insert(pair<string, double>("feasible", 0));
00535                 sched = new Schedule(getShortDescription(), tg.getName(), Schedule::table());
00536         }
00537         else
00538         {
00539                 stats.insert(pair<string, double>("feasible", 1));
00540                 sched = new Schedule(getShortDescription(), tg.getName(), schedule);
00541         }
00542 
00543         Schedule final = *sched;
00544         delete sched;
00545         return final;
00546 }
00547 
00548 Schedule
00549 CrownCompositeAnnealing::schedule(const Taskgraph &tg, const Platform &pt, const Algebra &param, std::map<const string, double>& stats) const
00550 {
00551         return schedule(tg, pt, stats);
00552 }
00553 
00554 std::string
00555 CrownCompositeAnnealing::getShortDescription() const
00556 {
00557         return string("Ann.") + scheduler->getShortDescription();
00558 }
00559