CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
List of all members | Public Member Functions | Private Attributes
Forest Class Reference

#include <Forest.h>

Public Member Functions

void appendCorrection (std::vector< Event * > eventsp, Int_t treenum)
 
void doRegression (Int_t nodeLimit, Int_t treeLimit, Double_t learningRate, LossFunction *l, const char *savetreesdirectory, bool saveTrees)
 
void doStochasticRegression (Int_t nodeLimit, Int_t treeLimit, Double_t learningRate, Double_t fraction, LossFunction *l)
 
 Forest ()
 
 Forest (std::vector< Event * > &trainingEvents, std::vector< Event * > &testEvents)
 
void generate (Int_t numTrainEvents, Int_t numTestEvents, Double_t sigma)
 
std::vector< Event * > getTestEvents ()
 
std::vector< Event * > getTrainingEvents ()
 
void listEvents (std::vector< std::vector< Event * > > &e)
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! More...
 
void loadForestFromXML (const char *directory, unsigned int numTrees)
 
void predictEvents (std::vector< Event * > eventsp, unsigned int trees)
 
void prepareRandomSubsample (Double_t fraction)
 
std::vector< Double_t > rankVariables ()
 
void setTestEvents (std::vector< Event * > &testingEvents)
 
void setTrainingEvents (std::vector< Event * > &trainingEvents)
 
unsigned int size ()
 
void sortEventVectors (std::vector< std::vector< Event * > > &e)
 
void updateEvents (Tree *tree)
 
void updateRegTargets (Tree *tree, Double_t learningRate, LossFunction *l)
 
 ~Forest ()
 

Private Attributes

std::vector< std::vector
< Event * > > 
events
 
std::vector< std::vector
< Event * > > 
subSample
 
std::vector< Event * > testEvents
 
std::vector< Tree * > trees
 

Detailed Description

Definition at line 9 of file Forest.h.

Constructor & Destructor Documentation

Forest::Forest ( )

Definition at line 42 of file Forest.cc.

References events.

43 {
44  events = std::vector< std::vector<Event*> >(1);
45 }
std::vector< std::vector< Event * > > events
Definition: Forest.h:53
Forest::Forest ( std::vector< Event * > &  trainingEvents,
std::vector< Event * > &  testEvents 
)

Definition at line 51 of file Forest.cc.

References setTestEvents(), and setTrainingEvents().

52 {
53  setTrainingEvents(trainingEvents);
54  setTestEvents(testingEvents);
55 }
void setTrainingEvents(std::vector< Event * > &trainingEvents)
Definition: Forest.cc:87
void setTestEvents(std::vector< Event * > &testingEvents)
Definition: Forest.cc:107
Forest::~Forest ( )

Definition at line 61 of file Forest.cc.

References events, i, j, testEvents, and trees.

62 {
63 // When the forest is destroyed it will delete the trees as well as the
64 // events from the training and testing sets.
65 // The user may want the events to remain after they destroy the forest
66 // this should be changed in future upgrades.
67 
68  for(unsigned int i=0; i < trees.size(); i++)
69  {
70  delete trees[i];
71  }
72 
73  for(unsigned int j=0; j < events[0].size(); j++)
74  {
75  delete events[0][j];
76  }
77 
78  for(unsigned int j=0; j < testEvents.size(); j++)
79  {
80  delete testEvents[j];
81  }
82 }
int i
Definition: DBlmapReader.cc:9
std::vector< std::vector< Event * > > events
Definition: Forest.h:53
std::vector< Event * > testEvents
Definition: Forest.h:55
int j
Definition: DBlmapReader.cc:9
std::vector< Tree * > trees
Definition: Forest.h:56

Member Function Documentation

void Forest::appendCorrection ( std::vector< Event * >  eventsp,
Int_t  treenum 
)

Definition at line 405 of file Forest.cc.

References Tree::filterEvents(), MainPageGenerator::tree, trees, and updateEvents().

Referenced by predictEvents().

406 {
407 // Update the prediction by appending the next correction.
408 
409  Tree* tree = trees[treenum];
410  tree->filterEvents(eventsp);
411 
412  // Update the events with their new prediction.
413  updateEvents(tree);
414 }
void updateEvents(Tree *tree)
Definition: Forest.cc:305
Definition: Tree.h:16
std::vector< Tree * > trees
Definition: Forest.h:56
void filterEvents(std::vector< Event * > &tEvents)
Definition: Tree.cc:163
void Forest::doRegression ( Int_t  nodeLimit,
Int_t  treeLimit,
Double_t  learningRate,
LossFunction l,
const char *  savetreesdirectory,
bool  saveTrees 
)

Definition at line 335 of file Forest.cc.

References Tree::buildTree(), EnergyCorrector::c, gather_cfg::cout, events, i, alignCSCRings::s, Tree::saveToXML(), sortEventVectors(), contentValuesCheck::ss, AlCaHLTBitMon_QueryRunRegistry::string, MainPageGenerator::tree, trees, and updateRegTargets().

336 {
337 // Build the forest using the training sample.
338 
339  std::cout << std::endl << "--Building Forest..." << std::endl << std::endl;
340 
341  // The trees work with a matrix of events where the rows have the same set of events. Each row however
342  // is sorted according to the feature variable given by event->data[row].
343  // If we only had one set of events we would have to sort it according to the
344  // feature variable every time we want to calculate the best split point for that feature.
345  // By keeping sorted copies we avoid the sorting operation during splint point calculation
346  // and save computation time. If we do not sort each of the rows the regression will fail.
347  std::cout << "Sorting event vectors..." << std::endl;
349 
350  // See how long the regression takes.
351  TStopwatch timer;
352  timer.Start(kTRUE);
353 
354  for(unsigned int i=0; i< (unsigned) treeLimit; i++)
355  {
356  std::cout << "++Building Tree " << i << "... " << std::endl;
357  Tree* tree = new Tree(events);
358  trees.push_back(tree);
359  tree->buildTree(nodeLimit);
360 
361  // Update the targets for the next tree to fit.
362  updateRegTargets(tree, learningRate, l);
363 
364  // Save trees to xml in some directory.
365  std::ostringstream ss;
366  ss << savetreesdirectory << "/" << i << ".xml";
367  std::string s = ss.str();
368  const char* c = s.c_str();
369 
370  if(saveTrees) tree->saveToXML(c);
371  }
372  std::cout << std::endl;
373  std::cout << std::endl << "Done." << std::endl << std::endl;
374 
375 // std::cout << std::endl << "Total calculation time: " << timer.RealTime() << std::endl;
376 }
int i
Definition: DBlmapReader.cc:9
std::vector< std::vector< Event * > > events
Definition: Forest.h:53
void buildTree(Int_t nodeLimit)
Definition: Tree.cc:107
Definition: Tree.h:16
void sortEventVectors(std::vector< std::vector< Event * > > &e)
Definition: Forest.cc:191
void updateRegTargets(Tree *tree, Double_t learningRate, LossFunction *l)
Definition: Forest.cc:263
std::vector< Tree * > trees
Definition: Forest.h:56
tuple cout
Definition: gather_cfg.py:145
void saveToXML(const char *filename)
Definition: Tree.cc:255
void Forest::doStochasticRegression ( Int_t  nodeLimit,
Int_t  treeLimit,
Double_t  learningRate,
Double_t  fraction,
LossFunction l 
)

Definition at line 475 of file Forest.cc.

References EnergyCorrector::c, gather_cfg::cout, events, i, prepareRandomSubsample(), alignCSCRings::s, sortEventVectors(), contentValuesCheck::ss, AlCaHLTBitMon_QueryRunRegistry::string, subSample, trees, and updateRegTargets().

476 {
477 // If the fraction of events to use is one then this algorithm is slower than doRegression due to the fact
478 // that we have to sort the events every time we extract a subsample. Without random sampling we simply
479 // use all of the events and keep them sorted.
480 
481 // Anyways, this algorithm uses a portion of the events to train each tree. All of the events are updated
482 // afterwards with the results from the subsample built tree.
483 
484  // Prepare some things.
486  trees = std::vector<Tree*>(treeLimit);
487 
488  // See how long the regression takes.
489  TStopwatch timer;
490  timer.Start(kTRUE);
491 
492  // Output the current settings.
493  std::cout << std::endl << "Running stochastic regression ... " << std::endl;
494  std::cout << "# Nodes: " << nodeLimit << std::endl;
495  std::cout << "Learning Rate: " << learningRate << std::endl;
496  std::cout << "Bagging Fraction: " << fraction << std::endl;
497  std::cout << std::endl;
498 
499 
500  for(unsigned int i=0; i< (unsigned) treeLimit; i++)
501  {
502  // Build the tree using a random subsample.
504  trees[i] = new Tree(subSample);
505  trees[i]->buildTree(nodeLimit);
506 
507  // Fit all of the events based upon the tree we built using
508  // the subsample of events.
509  trees[i]->filterEvents(events[0]);
510 
511  // Update the targets for the next tree to fit.
512  updateRegTargets(trees[i], learningRate, l);
513 
514  // Save trees to xml in some directory.
515  std::ostringstream ss;
516  ss << "trees/" << i << ".xml";
517  std::string s = ss.str();
518  const char* c = s.c_str();
519 
520  trees[i]->saveToXML(c);
521  }
522 
523  std::cout << std::endl << "Done." << std::endl << std::endl;
524 
525  std::cout << std::endl << "Total calculation time: " << timer.RealTime() << std::endl;
526 }
void prepareRandomSubsample(Double_t fraction)
Definition: Forest.cc:446
int i
Definition: DBlmapReader.cc:9
std::vector< std::vector< Event * > > events
Definition: Forest.h:53
Definition: Tree.h:16
void sortEventVectors(std::vector< std::vector< Event * > > &e)
Definition: Forest.cc:191
void updateRegTargets(Tree *tree, Double_t learningRate, LossFunction *l)
Definition: Forest.cc:263
std::vector< Tree * > trees
Definition: Forest.h:56
std::vector< std::vector< Event * > > subSample
Definition: Forest.h:54
tuple cout
Definition: gather_cfg.py:145
void Forest::generate ( Int_t  numTrainEvents,
Int_t  numTestEvents,
Double_t  sigma 
)

Definition at line 531 of file Forest.cc.

References gather_cfg::cout, Event::data, alignCSCRings::e, events, f, i, Event::id, visualization-live-secondInstance_cfg::m, gen::n, Event::predictedValue, alignCSCRings::r, Event::sortingIndex, testEvents, Event::trueValue, findQualityFiles::v, and x.

532 {
533 // Generate events to use for the building and testing of the forest.
534 // We keep as many copies of the events as there are variables.
535 // And we store these copies in the events vector of vectors.
536 // events[0] is a vector sorted by var 0, events[1] by var 1, etc.
537 // All of the vectors have the same events, but each vector is just
538 // sorted by a different variable.
539 
540  // Store these in case we need them
541  // for plotting or troubleshooting.
542  std::ofstream trainData;
543  trainData.open("training.data");
544 
545  std::ofstream testData;
546  testData.open("testing.data");
547 
548  // Prepare our containers.
549  TRandom3 r(0);
550  std::vector<Event*> v(n);
551 
552  events = std::vector< std::vector<Event*> >(3, std::vector<Event*>(n));
553  testEvents = std::vector<Event*>(m);
554 
555  std::cout << std::endl << "Generating " << n << " events..." << std::endl;
556 
557  // Generate the data set we will use to build the forest.
558  for(unsigned int i=0; i< (unsigned) n; i++)
559  {
560  // data[0] is the target, which is determined
561  // by the other variables data[1], data[2] ...
562  std::vector<Double_t> x(3);
563  x[1] = r.Rndm();
564  x[2] = r.Rndm();
565 
566  // Store the variable which is determined by the others.
567  // Our target for BDT prediction.
568  x[0] = x[1]*x[2];
569 
570 
571  // Add noise to the determining variables.
572  x[1] += r.Gaus(0,sigma);
573  x[2] += r.Gaus(0,sigma);
574 
575  // Create the event.
576  Event* e = new Event();
577  v[i]=e;
578 
579  // Store the event.
580  e->predictedValue = 0;
581  e->trueValue = x[0];
582  e->data = x;
583  e->id = i;
584  }
585 
586  // Set up the events matrix and the events vector.
587  for(unsigned int i=0; i < events.size(); i++)
588  {
589  events[i] = v;
590  }
591 
592  // Generate a separate data set for testing.
593  for(unsigned int i=0; i< (unsigned) m; i++)
594  {
595  // data[0] is the target, which is determined
596  // by the other variables data[1], data[2] ....
597  std::vector<Double_t> x(3);
598  x[1] = r.Rndm();
599  x[2] = r.Rndm();
600  x[0] = x[1]*x[2];
601 
602  x[1] += r.Gaus(0,sigma);
603  x[2] += r.Gaus(0,sigma);
604 
605  // Create the event.
606  Event* e = new Event();
607  Event* f = new Event();
608 
609  testEvents[i] = e;
610 
611  // Store the event.
612  e->predictedValue = 0;
613  e->trueValue = x[0];
614  e->data = x;
615  e->id = i;
616 
617  f->predictedValue = 0;
618  f->trueValue = x[0];
619  f->data = x;
620  f->id = i;
621 
622  }
623 
624  // Sort the events by the target variable.
626 
627  for(unsigned int i=0; i< (unsigned) n; i++)
628  {
629  // Argh, write to files if ye want, matie.
630  }
631 
632  trainData.close();
633  testData.close();
634 }
Double_t predictedValue
Definition: Event.h:20
int i
Definition: DBlmapReader.cc:9
Int_t id
Definition: Event.h:29
std::vector< std::vector< Event * > > events
Definition: Forest.h:53
std::vector< Event * > testEvents
Definition: Forest.h:55
Definition: Event.h:16
static Int_t sortingIndex
Definition: Event.h:28
double f[11][100]
tuple cout
Definition: gather_cfg.py:145
Double_t trueValue
Definition: Event.h:19
std::vector< Double_t > data
Definition: Event.h:30
std::vector< Event * > Forest::getTestEvents ( )

Definition at line 125 of file Forest.cc.

References testEvents.

125 { return testEvents; }
std::vector< Event * > testEvents
Definition: Forest.h:55
std::vector< Event * > Forest::getTrainingEvents ( )

Definition at line 118 of file Forest.cc.

References events.

118 { return events[0]; }
std::vector< std::vector< Event * > > events
Definition: Forest.h:53
void Forest::listEvents ( std::vector< std::vector< Event * > > &  e)

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

Definition at line 147 of file Forest.cc.

References gather_cfg::cout, alignCSCRings::e, i, and j.

148 {
149 // Simply list the events in each event vector. We have multiple copies
150 // of the events vector. Each copy is sorted according to a different
151 // determining variable.
152  std::cout << std::endl << "Listing Events... " << std::endl;
153 
154  for(unsigned int i=0; i < e.size(); i++)
155  {
156  std::cout << std::endl << "Variable " << i << " vector contents: " << std::endl;
157  for(unsigned int j=0; j<e[i].size(); j++)
158  {
159  e[i][j]->outputEvent();
160  }
161  std::cout << std::endl;
162  }
163 }
int i
Definition: DBlmapReader.cc:9
int j
Definition: DBlmapReader.cc:9
tuple cout
Definition: gather_cfg.py:145
void Forest::loadForestFromXML ( const char *  directory,
unsigned int  numTrees 
)

Definition at line 420 of file Forest.cc.

References contentValuesFiles::fullPath, i, contentValuesCheck::ss, and trees.

421 {
422 // Load a forest that has already been created and stored into XML somewhere.
423 
424  // Initialize the vector of trees.
425  trees = std::vector<Tree*>(numTrees);
426 
427  // Load the Forest.
428  //std::cout << std::endl << "Loading Forest from XML ... " << std::endl;
429  for(unsigned int i=0; i < numTrees; i++)
430  {
431  trees[i] = new Tree();
432 
433  std::stringstream ss;
434  ss << directory << "/" << i << ".xml";
435 
436  trees[i]->loadFromXML(edm::FileInPath(ss.str().c_str()).fullPath().c_str());
437  }
438 
439  // std::cout << "Done." << std::endl << std::endl;
440 }
int i
Definition: DBlmapReader.cc:9
Definition: Tree.h:16
std::vector< Tree * > trees
Definition: Forest.h:56
void Forest::predictEvents ( std::vector< Event * >  eventsp,
unsigned int  trees 
)

Definition at line 382 of file Forest.cc.

References appendCorrection(), i, and trees.

Referenced by CalculatePt_FullPrecision(), and getPt().

383 {
384 // Predict values for eventsp by running them through the forest up to numtrees.
385 
386  //std::cout << "Using " << numtrees << " trees from the forest to predict events ... " << std::endl;
387  if(numtrees > trees.size())
388  {
389  // std::cout << std::endl << "!! Input greater than the forest size. Using forest.size() = " << trees.size() << " to predict instead." << std::endl;
390  numtrees = trees.size();
391  }
392 
393  // i iterates through the trees in the forest. Each tree corrects the last prediction.
394  for(unsigned int i=0; i < numtrees; i++)
395  {
396  //std::cout << "++Tree " << i << "..." << std::endl;
397  appendCorrection(eventsp, i);
398  }
399 }
int i
Definition: DBlmapReader.cc:9
void appendCorrection(std::vector< Event * > eventsp, Int_t treenum)
Definition: Forest.cc:405
std::vector< Tree * > trees
Definition: Forest.h:56
void Forest::prepareRandomSubsample ( Double_t  fraction)

Definition at line 446 of file Forest.cc.

References begin, end, events, i, shuffle(), sortEventVectors(), subSample, and findQualityFiles::v.

Referenced by doStochasticRegression().

447 {
448 // We use this for Stochastic Gradient Boosting. Basically you
449 // take a subsample of the training events and build a tree using
450 // those. Then use the tree built from the subsample to update
451 // the predictions for all the events.
452 
453  subSample = std::vector< std::vector<Event*> >(events.size()) ;
454  size_t subSampleSize = fraction*events[0].size();
455 
456  // Randomize the first subSampleSize events in events[0].
457  shuffle(events[0].begin(), events[0].end(), subSampleSize);
458 
459  // Get a copy of the random subset we just made.
460  std::vector<Event*> v(events[0].begin(), events[0].begin()+subSampleSize);
461 
462  // Initialize and sort the subSample collection.
463  for(unsigned int i=0; i<subSample.size(); i++)
464  {
465  subSample[i] = v;
466  }
467 
469 }
int i
Definition: DBlmapReader.cc:9
std::vector< std::vector< Event * > > events
Definition: Forest.h:53
bidiiter shuffle(bidiiter begin, bidiiter end, size_t num_random)
Definition: Utilities.h:26
#define end
Definition: vmac.h:37
void sortEventVectors(std::vector< std::vector< Event * > > &e)
Definition: Forest.cc:191
std::vector< std::vector< Event * > > subSample
Definition: Forest.h:54
#define begin
Definition: vmac.h:30
std::vector< Double_t > Forest::rankVariables ( )

Definition at line 207 of file Forest.cc.

References gather_cfg::cout, events, i, j, bookConverter::max, trees, findQualityFiles::v, and w.

208 {
209 // This function ranks the determining variables according to their importance
210 // in determining the fit. Use a low learning rate for better results.
211 // Separates completely useless variables from useful ones well,
212 // but isn't the best at separating variables of similar importance.
213 // This is calculated using the error reduction on the training set. The function
214 // should be changed to use the testing set, but this works fine for now.
215 // I will try to change this in the future.
216 
217  // Initialize the vector v, which will store the total error reduction
218  // for each variable i in v[i].
219  std::vector<Double_t> v(events.size(), 0);
220 
221  std::cout << std::endl << "Ranking Variables by Net Error Reduction... " << std::endl;
222 
223  for(unsigned int j=0; j < trees.size(); j++)
224  {
225  trees[j]->rankVariables(v);
226  }
227 
228  Double_t max = *std::max_element(v.begin(), v.end());
229 
230  // Scale the importance. Maximum importance = 100.
231  for(unsigned int i=0; i < v.size(); i++)
232  {
233  v[i] = 100*v[i]/max;
234  }
235 
236  // Change the storage format so that we can keep the index
237  // and the value associated after sorting.
238  std::vector< std::pair<Double_t, Int_t> > w(events.size());
239 
240  for(unsigned int i=0; i<v.size(); i++)
241  {
242  w[i] = std::pair<Double_t, Int_t>(v[i],i);
243  }
244 
245  // Sort so that we can output in order of importance.
246  std::sort(w.begin(),w.end());
247 
248  // Output the results.
249  for(int i=(v.size()-1); i>=0; i--)
250  {
251  std::cout << "x" << w[i].second << ": " << w[i].first << std::endl;
252  }
253 
254  std::cout << std::endl << "Done." << std::endl << std::endl;
255  return v;
256 
257 }
int i
Definition: DBlmapReader.cc:9
std::vector< std::vector< Event * > > events
Definition: Forest.h:53
const double w
Definition: UKUtility.cc:23
int j
Definition: DBlmapReader.cc:9
std::vector< Tree * > trees
Definition: Forest.h:56
tuple cout
Definition: gather_cfg.py:145
void Forest::setTestEvents ( std::vector< Event * > &  testingEvents)

Definition at line 107 of file Forest.cc.

References testEvents.

Referenced by Forest().

108 {
109 // tell the forest which events to use for testing
110  testEvents = testingEvents;
111 }
std::vector< Event * > testEvents
Definition: Forest.h:55
void Forest::setTrainingEvents ( std::vector< Event * > &  trainingEvents)

Definition at line 87 of file Forest.cc.

References Event::data, alignCSCRings::e, events, and i.

Referenced by Forest().

88 {
89 // tell the forest which events to use for training
90 
91  Event* e = trainingEvents[0];
92  //unsigned int numrows = e->data.size();
93 
94  // Reset the events matrix.
95  events = std::vector< std::vector<Event*> >();
96 
97  for(unsigned int i=0; i<e->data.size(); i++)
98  {
99  events.push_back(trainingEvents);
100  }
101 }
int i
Definition: DBlmapReader.cc:9
std::vector< std::vector< Event * > > events
Definition: Forest.h:53
Definition: Event.h:16
std::vector< Double_t > data
Definition: Event.h:30
unsigned int Forest::size ( void  )

Definition at line 131 of file Forest.cc.

References trees.

132 {
133 // Return the number of trees in the forest.
134  return trees.size();
135 }
std::vector< Tree * > trees
Definition: Forest.h:56
void Forest::sortEventVectors ( std::vector< std::vector< Event * > > &  e)

Definition at line 191 of file Forest.cc.

References begin, compareEvents(), alignCSCRings::e, end, i, and Event::sortingIndex.

Referenced by doRegression(), doStochasticRegression(), and prepareRandomSubsample().

192 {
193 // When a node chooses the optimum split point and split variable it needs
194 // the events to be sorted according to the variable it is considering.
195 
196  for(unsigned int i=0; i<e.size(); i++)
197  {
199  std::sort(e[i].begin(), e[i].end(), compareEvents);
200  }
201 }
int i
Definition: DBlmapReader.cc:9
static Int_t sortingIndex
Definition: Event.h:28
#define end
Definition: vmac.h:37
bool compareEvents(Event *e1, Event *e2)
Definition: Forest.cc:173
#define begin
Definition: vmac.h:30
void Forest::updateEvents ( Tree tree)

Definition at line 305 of file Forest.cc.

References alignCSCRings::e, Tree::getTerminalNodes(), j, Event::predictedValue, and findQualityFiles::v.

Referenced by appendCorrection().

306 {
307 // Prepare the test events for the next tree.
308 
309  // Get the list of terminal nodes for this tree.
310  std::list<Node*>& tn = tree->getTerminalNodes();
311 
312  // Loop through the terminal nodes.
313  for(std::list<Node*>::iterator it=tn.begin(); it!=tn.end(); it++)
314  {
315  std::vector<Event*>& v = (*it)->getEvents()[0];
316  Double_t fit = (*it)->getFitValue();
317 
318  // Loop through each event in the terminal region and update the
319  // the global event it maps to.
320  for(unsigned int j=0; j<v.size(); j++)
321  {
322  Event* e = v[j];
323  e->predictedValue += fit;
324  }
325 
326  // Release memory.
327  (*it)->getEvents() = std::vector< std::vector<Event*> >();
328  }
329 }
Double_t predictedValue
Definition: Event.h:20
Definition: Event.h:16
int j
Definition: DBlmapReader.cc:9
std::list< Node * > & getTerminalNodes()
Definition: Tree.cc:75
void Forest::updateRegTargets ( Tree tree,
Double_t  learningRate,
LossFunction l 
)

Definition at line 263 of file Forest.cc.

References Event::data, alignCSCRings::e, LossFunction::fit(), Tree::getTerminalNodes(), j, Event::predictedValue, LossFunction::target(), and findQualityFiles::v.

Referenced by doRegression(), and doStochasticRegression().

264 {
265 // Prepare the global vector of events for the next tree.
266 // Update the fit for each event and set the new target value
267 // for the next tree.
268 
269  // Get the list of terminal nodes for this tree.
270  std::list<Node*>& tn = tree->getTerminalNodes();
271 
272  // Loop through the terminal nodes.
273  for(std::list<Node*>::iterator it=tn.begin(); it!=tn.end(); it++)
274  {
275  // Get the events in the current terminal region.
276  std::vector<Event*>& v = (*it)->getEvents()[0];
277 
278  // Fit the events depending on the loss function criteria.
279  Double_t fit = l->fit(v);
280 
281  // Scale the rate at which the algorithm converges.
282  fit = learningRate*fit;
283 
284  // Store the official fit value in the terminal node.
285  (*it)->setFitValue(fit);
286 
287  // Loop through each event in the terminal region and update the
288  // the target for the next tree.
289  for(unsigned int j=0; j<v.size(); j++)
290  {
291  Event* e = v[j];
292  e->predictedValue += fit;
293  e->data[0] = l->target(e);
294  }
295 
296  // Release memory.
297  (*it)->getEvents() = std::vector< std::vector<Event*> >();
298  }
299 }
Double_t predictedValue
Definition: Event.h:20
virtual Double_t fit(std::vector< Event * > &v)=0
virtual Double_t target(Event *e)=0
Definition: Event.h:16
int j
Definition: DBlmapReader.cc:9
std::list< Node * > & getTerminalNodes()
Definition: Tree.cc:75
std::vector< Double_t > data
Definition: Event.h:30

Member Data Documentation

std::vector< std::vector<Event*> > Forest::events
private
std::vector< std::vector<Event*> > Forest::subSample
private

Definition at line 54 of file Forest.h.

Referenced by doStochasticRegression(), and prepareRandomSubsample().

std::vector<Event*> Forest::testEvents
private

Definition at line 55 of file Forest.h.

Referenced by generate(), getTestEvents(), setTestEvents(), and ~Forest().

std::vector<Tree*> Forest::trees
private