23 #include "TStopwatch.h"
44 events = std::vector< std::vector<Event*> >(1);
51 Forest::Forest(std::vector<Event*>& trainingEvents, std::vector<Event*>& testingEvents)
68 for(
unsigned int i=0;
i <
trees.size();
i++)
73 for(
unsigned int j=0;
j <
events[0].size();
j++)
91 Event*
e = trainingEvents[0];
95 events = std::vector< std::vector<Event*> >();
97 for(
unsigned int i=0;
i<e->
data.size();
i++)
99 events.push_back(trainingEvents);
152 std::cout << std::endl <<
"Listing Events... " << std::endl;
154 for(
unsigned int i=0;
i <
e.size();
i++)
156 std::cout << std::endl <<
"Variable " <<
i <<
" vector contents: " << std::endl;
157 for(
unsigned int j=0;
j<
e[
i].size();
j++)
159 e[
i][
j]->outputEvent();
185 return e1->
id < e2->
id;
196 for(
unsigned int i=0;
i<
e.size();
i++)
219 std::vector<Double_t>
v(
events.size(), 0);
221 std::cout << std::endl <<
"Ranking Variables by Net Error Reduction... " << std::endl;
223 for(
unsigned int j=0;
j <
trees.size();
j++)
228 Double_t
max = *std::max_element(
v.begin(),
v.end());
231 for(
unsigned int i=0;
i <
v.size();
i++)
238 std::vector< std::pair<Double_t, Int_t> >
w(
events.size());
240 for(
unsigned int i=0;
i<
v.size();
i++)
242 w[
i] = std::pair<Double_t, Int_t>(
v[
i],
i);
246 std::sort(
w.begin(),
w.end());
249 for(
int i=(
v.size()-1);
i>=0;
i--)
251 std::cout <<
"x" <<
w[
i].second <<
": " <<
w[
i].first << std::endl;
254 std::cout << std::endl <<
"Done." << std::endl << std::endl;
273 for(std::list<Node*>::iterator it=tn.begin(); it!=tn.end(); it++)
276 std::vector<Event*>&
v = (*it)->getEvents()[0];
279 Double_t fit = l->
fit(v);
282 fit = learningRate*fit;
285 (*it)->setFitValue(fit);
289 for(
unsigned int j=0;
j<v.size();
j++)
297 (*it)->getEvents() = std::vector< std::vector<Event*> >();
313 for(std::list<Node*>::iterator it=tn.begin(); it!=tn.end(); it++)
315 std::vector<Event*>&
v = (*it)->getEvents()[0];
316 Double_t fit = (*it)->getFitValue();
320 for(
unsigned int j=0;
j<v.size();
j++)
327 (*it)->getEvents() = std::vector< std::vector<Event*> >();
339 std::cout << std::endl <<
"--Building Forest..." << std::endl << std::endl;
347 std::cout <<
"Sorting event vectors..." << std::endl;
354 for(
unsigned int i=0;
i< (unsigned) treeLimit;
i++)
356 std::cout <<
"++Building Tree " <<
i <<
"... " << std::endl;
358 trees.push_back(tree);
365 std::ostringstream
ss;
366 ss << savetreesdirectory <<
"/" <<
i <<
".xml";
368 const char*
c = s.c_str();
373 std::cout << std::endl <<
"Done." << std::endl << std::endl;
387 if(numtrees >
trees.size())
390 numtrees =
trees.size();
394 for(
unsigned int i=0;
i < numtrees;
i++)
425 trees = std::vector<Tree*>(numTrees);
429 for(
unsigned int i=0;
i < numTrees;
i++)
433 std::stringstream
ss;
434 ss << directory <<
"/" <<
i <<
".xml";
454 size_t subSampleSize = fraction*
events[0].size();
486 trees = std::vector<Tree*>(treeLimit);
493 std::cout << std::endl <<
"Running stochastic regression ... " << std::endl;
494 std::cout <<
"# Nodes: " << nodeLimit << std::endl;
495 std::cout <<
"Learning Rate: " << learningRate << std::endl;
496 std::cout <<
"Bagging Fraction: " << fraction << std::endl;
500 for(
unsigned int i=0;
i< (unsigned) treeLimit;
i++)
505 trees[
i]->buildTree(nodeLimit);
515 std::ostringstream
ss;
516 ss <<
"trees/" << i <<
".xml";
518 const char*
c = s.c_str();
523 std::cout << std::endl <<
"Done." << std::endl << std::endl;
525 std::cout << std::endl <<
"Total calculation time: " << timer.RealTime() << std::endl;
542 std::ofstream trainData;
543 trainData.open(
"training.data");
545 std::ofstream testData;
546 testData.open(
"testing.data");
550 std::vector<Event*>
v(n);
552 events = std::vector< std::vector<Event*> >(3, std::vector<Event*>(
n));
555 std::cout << std::endl <<
"Generating " << n <<
" events..." << std::endl;
558 for(
unsigned int i=0;
i< (unsigned) n;
i++)
562 std::vector<Double_t>
x(3);
572 x[1] += r.Gaus(0,sigma);
573 x[2] += r.Gaus(0,sigma);
587 for(
unsigned int i=0;
i <
events.size();
i++)
593 for(
unsigned int i=0;
i< (unsigned) m;
i++)
597 std::vector<Double_t>
x(3);
602 x[1] += r.Gaus(0,sigma);
603 x[2] += r.Gaus(0,sigma);
627 for(
unsigned int i=0;
i< (unsigned) n;
i++)
void generate(Int_t numTrainEvents, Int_t numTestEvents, Double_t sigma)
void prepareRandomSubsample(Double_t fraction)
std::vector< std::vector< Event * > > events
virtual Double_t fit(std::vector< Event * > &v)=0
virtual Double_t target(Event *e)=0
std::vector< Event * > getTestEvents()
std::vector< Event * > testEvents
void appendCorrection(std::vector< Event * > eventsp, Int_t treenum)
void predictEvents(std::vector< Event * > eventsp, unsigned int trees)
bidiiter shuffle(bidiiter begin, bidiiter end, size_t num_random)
void buildTree(Int_t nodeLimit)
static Int_t sortingIndex
void loadForestFromXML(const char *directory, unsigned int numTrees)
std::vector< Double_t > rankVariables()
void updateEvents(Tree *tree)
void doStochasticRegression(Int_t nodeLimit, Int_t treeLimit, Double_t learningRate, Double_t fraction, LossFunction *l)
void sortEventVectors(std::vector< std::vector< Event * > > &e)
void updateRegTargets(Tree *tree, Double_t learningRate, LossFunction *l)
bool compareEvents(Event *e1, Event *e2)
std::vector< Tree * > trees
std::vector< std::vector< Event * > > subSample
std::list< Node * > & getTerminalNodes()
void filterEvents(std::vector< Event * > &tEvents)
void listEvents(std::vector< std::vector< Event * > > &e)
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
void setTrainingEvents(std::vector< Event * > &trainingEvents)
void saveToXML(const char *filename)
std::vector< Event * > getTrainingEvents()
bool compareEventsById(Event *e1, Event *e2)
void doRegression(Int_t nodeLimit, Int_t treeLimit, Double_t learningRate, LossFunction *l, const char *savetreesdirectory, bool saveTrees)
std::vector< Double_t > data
void setTestEvents(std::vector< Event * > &testingEvents)