22 #include "TStopwatch.h" 38 events = std::vector< std::vector<Event*> >(1);
47 setTrainingEvents(trainingEvents);
61 for(
unsigned int i=0;
i < trees.size();
i++)
74 Event*
e = trainingEvents[0];
79 events = std::vector< std::vector<Event*> >();
83 for(
unsigned int i=0;
i<e->
data.size();
i++)
85 events.push_back(trainingEvents);
103 if(i<trees.size())
return trees[i];
136 std::cout << std::endl <<
"Listing Events... " << std::endl;
138 for(
unsigned int i=0;
i <
e.size();
i++)
140 std::cout << std::endl <<
"Variable " <<
i <<
" vector contents: " << std::endl;
141 for(
unsigned int j=0;
j<
e[
i].size();
j++)
143 e[
i][
j]->outputEvent();
169 return e1->
id < e2->
id;
180 for(
unsigned int i=0;
i<
e.size();
i++)
203 std::vector<double>
v(
events.size(), 0);
207 for(
unsigned int j=0;
j < trees.size();
j++)
209 trees[
j]->rankVariables(
v);
212 double max = *std::max_element(
v.begin(),
v.end());
215 for(
unsigned int i=0;
i <
v.size();
i++)
222 std::vector< std::pair<double, Int_t> >
w(
events.size());
224 for(
unsigned int i=0;
i<
v.size();
i++)
226 w[
i] = std::pair<double, Int_t>(
v[
i],
i);
230 std::sort(
w.begin(),
w.end());
233 for(
int i=(
v.size()-1);
i>=0;
i--)
250 std::ofstream splitvaluefile;
251 splitvaluefile.open(savefilename);
255 std::vector<std::vector<double>>
v(
events.size(), std::vector<double>());
260 for(
unsigned int j=0;
j<trees.size();
j++)
262 trees[
j]->getSplitValues(v);
266 for(
unsigned int i=0;
i<v.size();
i++)
274 for(
unsigned int i=1;
i<v.size();
i++)
277 for(
unsigned int j=0;
j<v[
i].size();
j++)
279 std::stringstream ss;
281 ss << std::scientific << v[
i][
j];
283 splitValues+=ss.str().c_str();
286 splitValues=splitValues(1,splitValues.Length());
287 splitvaluefile << splitValues << std::endl << std::endl;;
304 for(std::list<Node*>::iterator it=tn.begin(); it!=tn.end(); it++)
307 std::vector<Event*>&
v = (*it)->getEvents()[0];
313 fit = learningRate*
fit;
316 (*it)->setFitValue(fit);
320 for(
unsigned int j=0;
j<v.size();
j++)
328 (*it)->getEvents() = std::vector< std::vector<Event*> >();
344 for(std::list<Node*>::iterator it=tn.begin(); it!=tn.end(); it++)
346 std::vector<Event*>&
v = (*it)->getEvents()[0];
347 double fit = (*it)->getFitValue();
351 for(
unsigned int j=0;
j<v.size();
j++)
358 (*it)->getEvents() = std::vector< std::vector<Event*> >();
385 for(
unsigned int i=0;
i< (unsigned) treeLimit;
i++)
389 trees.push_back(tree);
393 updateRegTargets(tree, learningRate, l);
396 std::ostringstream ss;
397 ss << savetreesdirectory <<
"/" <<
i <<
".xml";
399 const char*
c = s.c_str();
418 if(numtrees > trees.size())
421 numtrees = trees.size();
425 for(
unsigned int i=0;
i < numtrees;
i++)
428 appendCorrection(eventsp,
i);
456 if(numtrees > trees.size())
459 numtrees = trees.size();
463 for(
unsigned int i=0;
i < numtrees;
i++)
466 appendCorrection(e,
i);
494 trees = std::vector<emtf::Tree*>(numTrees);
498 for(
unsigned int i=0;
i < numTrees;
i++)
502 std::stringstream ss;
503 ss << directory <<
"/" <<
i <<
".xml";
506 trees[
i]->loadFromXML(
edm::FileInPath(ss.str().c_str()).fullPath().c_str());
523 subSample = std::vector< std::vector<Event*> >(
events.size()) ;
524 size_t subSampleSize = fraction*
events[0].size();
533 for(
unsigned int i=0;
i<subSample.size();
i++)
538 sortEventVectors(subSample);
556 trees = std::vector<emtf::Tree*>(treeLimit);
569 for(
unsigned int i=0;
i< (unsigned) treeLimit;
i++)
572 prepareRandomSubsample(fraction);
574 trees[
i]->buildTree(nodeLimit);
578 trees[
i]->filterEvents(
events[0]);
581 updateRegTargets(trees[
i], learningRate, l);
584 std::ostringstream ss;
585 ss <<
"trees/" << i <<
".xml";
587 const char*
c = s.c_str();
589 trees[
i]->saveToXML(c);
static Int_t sortingIndex
std::vector< emtf::Event * > getTrainingEvents()
void setTrainingEvents(std::vector< emtf::Event * > &trainingEvents)
void predictEvents(std::vector< emtf::Event * > &eventsp, unsigned int trees)
void updateRegTargets(emtf::Tree *tree, double learningRate, L1TLossFunction *l)
void prepareRandomSubsample(double fraction)
void rankVariables(std::vector< int > &rank)
void doRegression(Int_t nodeLimit, Int_t treeLimit, double learningRate, L1TLossFunction *l, const char *savetreesdirectory, bool saveTrees)
void loadL1TForestFromXML(const char *directory, unsigned int numTrees)
void doStochasticRegression(Int_t nodeLimit, Int_t treeLimit, double learningRate, double fraction, L1TLossFunction *l)
emtf::Tree * getTree(unsigned int i)
bidiiter shuffle(bidiiter begin, bidiiter end, size_t num_random)
U second(std::pair< T, U > const &p)
void updateEvents(emtf::Tree *tree)
std::vector< Double_t > data
virtual Double_t target(emtf::Event *e)=0
def unique(seq, keepstr=True)
virtual Double_t fit(std::vector< emtf::Event * > &v)=0
std::list< Node * > & getTerminalNodes()
void buildTree(Int_t nodeLimit)
bool compareEvents(Event *e1, Event *e2)
void sortEventVectors(std::vector< std::vector< emtf::Event * > > &e)
Node * filterEvent(Event *e)
void listEvents(std::vector< std::vector< emtf::Event * > > &e)
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
void predictEvent(emtf::Event *e, unsigned int trees)
void saveToXML(const char *filename)
void appendCorrection(std::vector< emtf::Event * > &eventsp, Int_t treenum)
void saveSplitValues(const char *savefilename)
void filterEvents(std::vector< Event * > &tEvents)
bool compareEventsById(Event *e1, Event *e2)