crown  1.0.0
src/CrownMappingLTLG.cpp
Go to the documentation of this file.
00001 // The following definitions might be exchanged for others,
00002 // when integrating the following code into the framework.
00003 // The code implements a greedy heuristic to loadbalance the tasks
00004 // over cores such that the l2-norm of the load vector is minimized.
00005 // This should also minimize the l3-norm of the load vector.
00006 // J. Keller, July 14, 2014
00007 
00008 #include <stdio.h>
00009 #include <stdlib.h>
00010 #include <string.h>
00011 #include <math.h>
00012 #include <iostream>
00013 #include <sstream>
00014 #include <fstream>
00015 #include <cstdlib>
00016 #include <set>
00017 #include <map>
00018 
00019 #include <crown/allocation.h>
00020 #include <crown/mapping.h>
00021 #include <crown/scaling.h>
00022 #include <crown/crown.h>
00023 #include <crown/CrownMappingLTLG.hpp>
00024 #include <crown/CrownConfigBinary.hpp>
00025 
00026 #include <pelib/XMLSchedule.hpp>
00027 #include <pelib/AmplInput.hpp>
00028 #include <pelib/AmplOutput.hpp>
00029 #include <pelib/Vector.hpp>
00030 #include <pelib/Matrix.hpp>
00031 #include <pelib/Set.hpp>
00032 #include <pelib/GraphML.hpp>
00033 #include <pelib/Schedule.hpp>
00034 #include <pelib/Task.hpp>
00035 #include <pelib/time.h>
00036 
00037 #include <pelib/ParseException.hpp>
00038 #include <pelib/AmplInputScalar.hpp>
00039 #include <pelib/AmplInputVector.hpp>
00040 #include <pelib/AmplInputSet.hpp>
00041 #include <pelib/AmplInputMatrix.hpp>
00042 #include <pelib/Scalar.hpp>
00043 #include <pelib/Algebra.hpp>
00044 #include <pelib/AmplOutputScalar.hpp>
00045 #include <pelib/AmplOutputVector.hpp>
00046 #include <pelib/AmplOutputSet.hpp>
00047 #include <pelib/AmplOutputMatrix.hpp>
00048 #include <pelib/AmplOutput.hpp>
00049 
00050 #include <boost/numeric/ublas/matrix.hpp>
00051 #include <boost/numeric/ublas/io.hpp>
00052 
00053 #ifdef debug
00054 #undef debug
00055 #endif
00056 
00057 #if LONGEST_WIDEST_LOWEST_TASK_FIRST
00058 #undef LONGEST_WIDEST_TASK_FIRST
00059 #define LONGEST_WIDEST_TASK_FIRST 1
00060 #endif
00061 
00062 #if LONGEST_WIDEST_TASK_FIRST
00063 #undef LONGEST_TASK_FIRST
00064 #define LONGEST_TASK_FIRST 1
00065 #endif
00066 
00067 #if 01
00068 #define debug(var) cout << "[" << __FILE__ << ":" << __FUNCTION__ << ":" << __LINE__ << "] " << #var << " = \"" << var << "\"" << endl;
00069 #else
00070 #define debug(var)
00071 #endif
00072 
00073 using namespace std;
00074 using namespace pelib;
00075 using namespace pelib::crown;
00076 using namespace boost::numeric::ublas;
00077 
00078 static const long long int nsec_in_sec = 1000000000;
00079 static int ROOTNUMBER = 1;
00080 
00081 // Group structure containing the number of the group, its height, width descendants and ancestors
00082 struct Group
00083 {
00084     int number;
00085     float time;
00086     int width;
00087     std::vector<int> sons;
00088     std::vector<int> parents;
00089 
00090     Group(int gn, int gt, int gw)
00091         {
00092             number = gn;
00093             time = gt;
00094             width = gw;
00095             sons = std::vector<int>();
00096             parents = std::vector<int>();
00097         }
00098 };
00099 
00100 typedef std::vector<pair<int, int> > mapping_t;
00101 typedef std::map<int, float> RowType;
00102 typedef std::map<int, RowType> MatrixType;
00103 
00104 static map<int, Group*> groupmap;
00105 
00106 // Needed to compare two int elements in a group number set
00107 struct cmp_groupnumber_time : binary_function <int, int, bool>
00108 {
00109     bool operator()(const int& x, const int& y) const
00110         {
00111             // Operand groups' time
00112             float xtime = groupmap[x]->time;
00113             float ytime = groupmap[y]->time;
00114 
00115             return xtime == ytime ? x < y : xtime < ytime;
00116         }
00117 };
00118 
00119 static map<int, multiset<int, cmp_groupnumber_time> > groupqueue;
00120 
00121 static Algebra *rec;
00122 static mapping_t schedule;
00123 static map<int, float> vector_Wi;
00124 static map<int, float> vector_Tau;
00125 static map<int, float> vector_wi;
00126 static MatrixType matrix_e;
00127 static MatrixType matrix_tree;
00128 static MatrixType matrix_groups;
00129 static int n, p;
00130 
00131 static float update_group_time(int gn, float time_offset, int task_wi)
00132 {
00133     float res = 0;
00134     if(gn > 0)
00135     {
00136         float time = groupmap[gn]->time;
00137         groupqueue.find(task_wi)->second.erase(gn);
00138         groupmap[gn]->time += time_offset;
00139         groupqueue.find(task_wi)->second.insert(gn);
00140         res = time + time_offset;
00141         
00142     }
00143     else
00144     {
00145         // Not supposed to be here, something is wrong with the group number
00146         debug("Something is wrong with the group you try to update, you may want to check the allocation phase");
00147         debug(gn);
00148     }
00149 
00150     return res;
00151 }
00152 
00153 static float update_time(int gn, float time_offset, int task_wi)
00154 {
00155     double son, max;
00156     
00157     max = update_group_time(gn, time_offset, task_wi);
00158     if (gn > ROOTNUMBER) {
00159         for(std::vector<int>::iterator it = groupmap[gn]->sons.begin() ; it != groupmap[gn]->sons.end() ; it++)
00160         {
00161             son = update_group_time(*it, time_offset, groupmap[*it]->width);
00162             if(son > max)
00163             {
00164                 max = son;
00165             }
00166         }
00167         for(std::vector<int>::iterator it = groupmap[gn]->parents.begin() ; it != groupmap[gn]->parents.end(); it++)
00168         {
00169             update_group_time(*it, max - groupmap[*it]->time, groupmap[*it]->width);
00170         }
00171     }
00172     return max;
00173 }
00174 
00175 static float Tau(int task)
00176 {
00177     return vector_Tau.find(task)->second;
00178 }
00179 
00180 static float wi(int task)
00181 {
00182     int return_wi = (int)vector_wi.find(task)->second;
00183 
00184     return return_wi;
00185 }
00186 
00187 static float e(int task, int p)
00188 {
00189     return matrix_e.find(task)->second.find(p)->second;
00190 }
00191 
00192 static float work(int task, int p)
00193 {
00194     float return_work = Tau(task) / e(task, p);
00195     return return_work;
00196 }
00197 
00198 static float time(int task, int p)
00199 {
00200     float return_time = work(task, p) / (float)p;
00201     return return_time;
00202 }
00203 
00204 #if LONGEST_TASK_FIRST
00205 // Needed tp compare two group_time_t elements in the set whose definition comes right after
00206 struct cmp_task : binary_function <int, int, bool>
00207 {
00208     bool
00209     operator()(const int& x, const int& y) const
00210         {
00211 #if LONGEST_WIDEST_TASK_FIRST
00212             if(time(x, wi(x)) == time(y, wi(y)))
00213             {
00214 #if LONGEST_WIDEST_LOWEST_TASK_FIRST
00215                 if(wi(x) == wi(y))
00216                 {
00217                     return x < y;
00218                 }
00219                 else
00220                 {
00221 #endif
00222                     return wi(x) > wi(y);
00223 #if LONGEST_WIDEST_LOWEST_TASK_FIRST
00224                 }
00225 #endif
00226             }
00227             else
00228             {
00229 #endif
00230                 return time(x, (int)wi(x)) > time(y, (int)wi(y));
00231 #if LONGEST_WIDEST_TASK_FIRST
00232             }
00233 #endif
00234         }
00235 };
00236 static multiset<int, cmp_task> ordered_tasks;
00237 #endif
00238 
00239 static void parse()
00240 {
00241     schedule = mapping_t();
00242     vector_Wi = map<int, float>();
00243     vector_Tau = map<int, float>();
00244     vector_wi = map<int, float>();
00245     matrix_e = MatrixType();
00246     AmplInput data(AmplInput::floatHandlers());
00247     groupmap = map<int, Group*>();
00248     groupqueue = map<int, multiset<int, cmp_groupnumber_time> >();
00249     
00250     // Read all values
00251     n = (int)rec->find<Scalar<float> >("n")->getValue();
00252     p = (int)rec->find<Scalar<float> >("p")->getValue();
00253 
00254     vector_Wi = rec->find<Vector<int, float> >("Wi")->getValues();
00255     vector_Tau = rec->find<Vector<int, float> >("Tau")->getValues();
00256     vector_wi = rec->find<Vector<int, float> >("wi")->getValues();
00257     matrix_e = rec->find<Matrix<int, int, float> >("e")->getValues();
00258     matrix_groups = rec->find<Matrix<int, int, float> >(CrownConfig::crownConfigGroups)->getValues();
00259     matrix_tree = rec->find<Matrix<int, int, float> >(CrownConfig::crownConfigTree)->getValues();
00260 
00261     // creating the group map and setting the width : p(2p-1) = 2p²-p = O(p²)
00262     for(unsigned int i = 1 ; i <= matrix_groups.size() ; ++i)
00263     {
00264         groupmap.insert(std::make_pair(i, new Group(i, 0, 0)));
00265         for(unsigned int j = 1 ; j <= matrix_groups.begin()->second.size() ; ++j)
00266         {
00267             if(matrix_groups.find(i)->second.find(j)->second) groupmap[i]->width++;
00268         }
00269     }
00270 
00271     // setting the inheritance : (2p-1)² = 4p²-4p+3 = O(p²)
00272     for(unsigned int i = 1 ; i <= matrix_tree.size() ; ++i)
00273     {
00274         for(unsigned int j = 1 ; j <= matrix_tree.begin()->second.size() ; ++j)
00275         {
00276             if(matrix_tree.find(i)->second.find(j)->second)
00277             {
00278                 groupmap[j]->sons.push_back(i);
00279                 groupmap[i]->parents.push_back(j);
00280             }
00281         }
00282     }
00283 
00284     // creating the group queue
00285     for(map<int, Group*>::iterator it = groupmap.begin() ; it != groupmap.end() ; ++it)
00286     {
00287         if(groupqueue.find(it->second->width) == groupqueue.end())
00288         {
00289             groupqueue.insert(pair<int, multiset<int, cmp_groupnumber_time> >(it->second->width, multiset<int, cmp_groupnumber_time>()));
00290         }
00291         groupqueue.find(it->second->width)->second.insert(it->first);
00292     }
00293 }
00294 
00295 static float group_recursive_load(int g, int b, int p, float *group)
00296 {
00297     int i;
00298     float load, max_load = 0;
00299 
00300     for(i = g * b; i < (g + 1) * b && i < 2 * p; i++)
00301     {
00302         load = group_recursive_load(i, b, p, group);
00303         if(load > max_load) max_load = load;
00304     }
00305 
00306     return max_load + group[g - 1];
00307 }
00308 
00309 CrownMappingLTLG::CrownMappingLTLG(const CrownConfig *config, const CrownAllocation *alloc, bool showOutput, bool showError) : CrownMapping(config, alloc, showOutput, showError)
00310 {
00311     /* Do nothing else */
00312 }
00313 
00314 CrownMappingLTLG::CrownMappingLTLG(const Algebra &param, const CrownConfig *config, const CrownAllocation *alloc, bool showOutput, bool showError): CrownMapping(param, config, alloc, showOutput, showError)
00315 {
00316     /* Do nothing else */
00317 }
00318 
00319 CrownMappingLTLG::CrownMappingLTLG(const Taskgraph &tg, const Platform &pt, const Algebra &param, const CrownConfig *config, const CrownAllocation *alloc, bool showOutput, bool showError): CrownMapping(tg, pt, param, config, alloc, showOutput, showError)
00320 {
00321     /* Do nothing else */
00322 }
00323 
00324 CrownMappingLTLG::CrownMappingLTLG(const CrownMappingLTLG &src) : CrownMapping(src)
00325 {
00326     // Do nothing else
00327 }
00328 
00329 Algebra CrownMappingLTLG::map(const Algebra &alg, std::map<const string, double> &stats) const
00330 {
00331     Algebra input = alg;
00332     struct timespec start, stop, total_time;
00333 
00334     rec = &input;
00335     parse();
00336     clock_gettime(CLOCK_MONOTONIC, &start);
00337     
00338 #if LONGEST_TASK_FIRST
00339     ordered_tasks = multiset<int, cmp_task>();
00340     for(int i = 0 ; i < n ; i++) // O(n)
00341     {
00342         ordered_tasks.insert(i+1); // O(log n)
00343     }
00344 #endif
00345     
00346     // Use a temporary vector to reduce asymptotic time with n
00347     std::vector<pair<int, float> > schedule_vector;
00348 
00349 #if LONGEST_TASK_FIRST
00350     for(set<int>::iterator iter = ordered_tasks.begin() ; iter != ordered_tasks.end() ; iter++)
00351 #else
00352         for(int i = 0 ; i < n ; i++)
00353 #endif
00354         {
00355             int task;
00356 #if LONGEST_TASK_FIRST
00357             task = *iter;
00358 #else
00359             task = i + 1;
00360 #endif
00361             float task_wi = wi(task);
00362             float task_time = time(task, (int)task_wi);
00363             multiset<int, cmp_groupnumber_time> groups = groupqueue.find(task_wi)->second;
00364             int group = *(groups.begin());
00365             
00366             schedule_vector.push_back(pair<int, int>(task, group));
00367             update_time(group, task_time, task_wi);
00368 
00369         }
00370 
00371     clock_gettime(CLOCK_MONOTONIC, &stop);
00372     // Build the final schedule cppelib structure from schedule_vector
00373     // Outside time measurement
00374     for(std::vector<pair<int, float> >::iterator iter = schedule_vector.begin(); iter != schedule_vector.end(); iter++)
00375     {
00376         ::schedule.push_back(pair<int, int>(iter->first, (int)iter->second));
00377     }
00378     
00379     pelib_timespec_subtract(&total_time, &stop, &start);
00380     stats.insert(pair<const string, double>("time_mapping", (total_time.tv_sec * nsec_in_sec + total_time.tv_nsec) / (float)nsec_in_sec));
00381     // Make
00382     std::map<int, float> vector_group;
00383     bool all_sequential = true;
00384     for(mapping_t::iterator iter = ::schedule.begin(); iter != ::schedule.end(); iter++)
00385     {
00386         all_sequential = all_sequential && wi(iter->first) == 1;
00387         vector_group.insert(pair<int, int>(iter->first, iter->second));
00388         // debug(iter->first);
00389         // debug(iter->second);
00390     }
00391     Vector<int, float> group("group", vector_group);
00392     rec->insert(&group);
00393     
00394     //AmplInput(AmplInput::intFloatHandlers()).dump(cout, rec->find<Vector<int, float> >("group"));
00395     
00396     return input;
00397 }
00398 
00399 float CrownMappingLTLG::complexity(const Taskgraph &tg, const Platform &pt, const Algebra &param, const CrownConfig* config) const
00400 {
00401     Algebra alg = tg.buildAlgebra(pt);
00402     alg = alg.merge(pt.buildAlgebra());
00403     alg = alg.merge(param);
00404     if(config == NULL)
00405     {
00406         CrownConfigBinary conf;
00407         config = &conf;
00408     }
00409     std::map<const string, double> stats;
00410     alg = alg.merge(config->configure(tg, pt, param, stats));    return complexity(alg);
00411 }
00412 
00413 float CrownMappingLTLG::complexity(const pelib::Algebra &input) const
00414 {
00415     float n = input.find<Scalar<float> >("n")->getValue();
00416     float p = input.find<Scalar<float> >("p")->getValue();
00417     return n * (p * log(p) + log(p) * log(p));
00418 }
00419 
00420 std::string CrownMappingLTLG::getShortDescription() const
00421 {
00422     stringstream ss;
00423     std::streamsize p = ss.precision();
00424     ss.precision(4);
00425     ss << "newLTLG";
00426     ss.precision(p);
00427     return ss.str();
00428 }
00429 
00430 CrownMapping* CrownMappingLTLG::clone() const
00431 {
00432     return new CrownMappingLTLG(*this);
00433 }