CMS 3D CMS Logo

List of all members | Public Member Functions | Private Attributes
emtf::Forest Class Reference

#include <Forest.h>

Public Member Functions

void appendCorrection (std::vector< Event *> &eventsp, int treenum)
 
void appendCorrection (Event *e, int treenum)
 
void doRegression (int nodeLimit, int treeLimit, double learningRate, LossFunction *l, const char *savetreesdirectory, bool saveTrees)
 
void doStochasticRegression (int nodeLimit, int treeLimit, double learningRate, double fraction, LossFunction *l)
 
 Forest ()
 
 Forest (std::vector< Event *> &trainingEvents)
 
 Forest (const Forest &forest)
 
 Forest (Forest &&forest)=default
 
void generate (int numTrainEvents, int numTestEvents, double sigma)
 
std::vector< Event * > getTrainingEvents ()
 
TreegetTree (unsigned int i)
 
void listEvents (std::vector< std::vector< Event *> > &e)
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! More...
 
void loadForestFromXML (const char *directory, unsigned int numTrees)
 
void loadFromCondPayload (const L1TMuonEndCapForest::DForest &payload)
 
Forestoperator= (const Forest &forest)
 
void predictEvent (Event *e, unsigned int trees)
 
void predictEvents (std::vector< Event *> &eventsp, unsigned int trees)
 
void prepareRandomSubsample (double fraction)
 
void rankVariables (std::vector< int > &rank)
 
void saveSplitValues (const char *savefilename)
 
void setTrainingEvents (std::vector< Event *> &trainingEvents)
 
unsigned int size ()
 
void sortEventVectors (std::vector< std::vector< Event *> > &e)
 
void updateEvents (Tree *tree)
 
void updateRegTargets (Tree *tree, double learningRate, LossFunction *l)
 
 ~Forest ()
 

Private Attributes

std::vector< std::vector< Event * > > events
 
std::vector< std::vector< Event * > > subSample
 
std::vector< Tree * > trees
 

Detailed Description

Definition at line 12 of file Forest.h.

Constructor & Destructor Documentation

◆ Forest() [1/4]

Forest::Forest ( )

Definition at line 40 of file Forest.cc.

References events.

40 { events = std::vector<std::vector<Event*>>(1); }
std::vector< std::vector< Event * > > events
Definition: Forest.h:66

◆ Forest() [2/4]

Forest::Forest ( std::vector< Event *> &  trainingEvents)

Definition at line 46 of file Forest.cc.

References setTrainingEvents().

46 { setTrainingEvents(trainingEvents); }
void setTrainingEvents(std::vector< Event *> &trainingEvents)
Definition: Forest.cc:87

◆ ~Forest()

Forest::~Forest ( )

Definition at line 52 of file Forest.cc.

References mps_fire::i, and trees.

52  {
53  // When the forest is destroyed it will delete the trees as well as the
54  // events from the training and testing sets.
55  // The user may want the events to remain after they destroy the forest
56  // this should be changed in future upgrades.
57 
58  for (unsigned int i = 0; i < trees.size(); i++) {
59  if (trees[i])
60  delete trees[i];
61  }
62 }
std::vector< Tree * > trees
Definition: Forest.h:68

◆ Forest() [3/4]

Forest::Forest ( const Forest forest)

Definition at line 64 of file Forest.cc.

References HcalDetIdTransform::transform(), MainPageGenerator::tree, and trees.

64  {
65  transform(forest.trees.cbegin(), forest.trees.cend(), back_inserter(trees), [](const Tree* tree) {
66  return new Tree(*tree);
67  });
68 }
std::vector< Tree * > trees
Definition: Forest.h:68
Definition: tree.py:1
unsigned transform(const HcalDetId &id, unsigned transformCode)

◆ Forest() [4/4]

emtf::Forest::Forest ( Forest &&  forest)
default

Member Function Documentation

◆ appendCorrection() [1/2]

void Forest::appendCorrection ( std::vector< Event *> &  eventsp,
int  treenum 
)

Definition at line 426 of file Forest.cc.

References trees, and updateEvents().

Referenced by predictEvent(), and predictEvents().

426  {
427  // Update the prediction by appending the next correction.
428 
429  Tree* tree = trees[treenum];
430  tree->filterEvents(eventsp);
431 
432  // Update the events with their new prediction.
434 }
void updateEvents(Tree *tree)
Definition: Forest.cc:328
std::vector< Tree * > trees
Definition: Forest.h:68
Definition: tree.py:1

◆ appendCorrection() [2/2]

void Forest::appendCorrection ( Event e,
int  treenum 
)

Definition at line 463 of file Forest.cc.

References MillePedeFileConverter_cfg::e, l1tTowerCalibrationProducer_cfi::fit, emtf::Node::getFitValue(), and trees.

463  {
464  // Update the prediction by appending the next correction.
465 
466  Tree* tree = trees[treenum];
467  Node* terminalNode = tree->filterEvent(e);
468 
469  // Update the event with its new prediction.
470  double fit = terminalNode->getFitValue();
471  e->predictedValue += fit;
472 }
double getFitValue()
Definition: Node.cc:112
std::vector< Tree * > trees
Definition: Forest.h:68
Definition: tree.py:1

◆ doRegression()

void Forest::doRegression ( int  nodeLimit,
int  treeLimit,
double  learningRate,
LossFunction l,
const char *  savetreesdirectory,
bool  saveTrees 
)

Definition at line 355 of file Forest.cc.

References c, events, mps_fire::i, cmsLHEtoEOSManager::l, alignCSCRings::s, sortEventVectors(), contentValuesCheck::ss, AlCaHLTBitMon_QueryRunRegistry::string, trees, and updateRegTargets().

360  {
361  // Build the forest using the training sample.
362 
363  //std::cout << std::endl << "--Building Forest..." << std::endl << std::endl;
364 
365  // The trees work with a matrix of events where the rows have the same set of events. Each row however
366  // is sorted according to the feature variable given by event->data[row].
367  // If we only had one set of events we would have to sort it according to the
368  // feature variable every time we want to calculate the best split point for that feature.
369  // By keeping sorted copies we avoid the sorting operation during splint point calculation
370  // and save computation time. If we do not sort each of the rows the regression will fail.
371  //std::cout << "Sorting event vectors..." << std::endl;
373 
374  // See how long the regression takes.
375  TStopwatch timer;
376  timer.Start(kTRUE);
377 
378  for (unsigned int i = 0; i < (unsigned)treeLimit; i++) {
379  // std::cout << "++Building Tree " << i << "... " << std::endl;
380  Tree* tree = new Tree(events);
381  trees.push_back(tree);
382  tree->buildTree(nodeLimit);
383 
384  // Update the targets for the next tree to fit.
385  updateRegTargets(tree, learningRate, l);
386 
387  // Save trees to xml in some directory.
388  std::ostringstream ss;
389  ss << savetreesdirectory << "/" << i << ".xml";
390  std::string s = ss.str();
391  const char* c = s.c_str();
392 
393  if (saveTrees)
394  tree->saveToXML(c);
395  }
396  //std::cout << std::endl;
397  //std::cout << std::endl << "Done." << std::endl << std::endl;
398 
399  // std::cout << std::endl << "Total calculation time: " << timer.RealTime() << std::endl;
400 }
std::vector< std::vector< Event * > > events
Definition: Forest.h:66
void updateRegTargets(Tree *tree, double learningRate, LossFunction *l)
Definition: Forest.cc:289
void sortEventVectors(std::vector< std::vector< Event *> > &e)
Definition: Forest.cc:181
std::vector< Tree * > trees
Definition: Forest.h:68
Definition: tree.py:1

◆ doStochasticRegression()

void Forest::doStochasticRegression ( int  nodeLimit,
int  treeLimit,
double  learningRate,
double  fraction,
LossFunction l 
)

Definition at line 548 of file Forest.cc.

References c, events, HLT_2022v15_cff::fraction, mps_fire::i, cmsLHEtoEOSManager::l, prepareRandomSubsample(), alignCSCRings::s, sortEventVectors(), contentValuesCheck::ss, AlCaHLTBitMon_QueryRunRegistry::string, subSample, trees, and updateRegTargets().

549  {
550  // If the fraction of events to use is one then this algorithm is slower than doRegression due to the fact
551  // that we have to sort the events every time we extract a subsample. Without random sampling we simply
552  // use all of the events and keep them sorted.
553 
554  // Anyways, this algorithm uses a portion of the events to train each tree. All of the events are updated
555  // afterwards with the results from the subsample built tree.
556 
557  // Prepare some things.
559  trees = std::vector<Tree*>(treeLimit);
560 
561  // See how long the regression takes.
562  TStopwatch timer;
563  timer.Start(kTRUE);
564 
565  // Output the current settings.
566  // std::cout << std::endl << "Running stochastic regression ... " << std::endl;
567  //std::cout << "# Nodes: " << nodeLimit << std::endl;
568  //std::cout << "Learning Rate: " << learningRate << std::endl;
569  //std::cout << "Bagging Fraction: " << fraction << std::endl;
570  //std::cout << std::endl;
571 
572  for (unsigned int i = 0; i < (unsigned)treeLimit; i++) {
573  // Build the tree using a random subsample.
575  trees[i] = new Tree(subSample);
576  trees[i]->buildTree(nodeLimit);
577 
578  // Fit all of the events based upon the tree we built using
579  // the subsample of events.
580  trees[i]->filterEvents(events[0]);
581 
582  // Update the targets for the next tree to fit.
583  updateRegTargets(trees[i], learningRate, l);
584 
585  // Save trees to xml in some directory.
586  std::ostringstream ss;
587  ss << "trees/" << i << ".xml";
588  std::string s = ss.str();
589  const char* c = s.c_str();
590 
591  trees[i]->saveToXML(c);
592  }
593 
594  //std::cout << std::endl << "Done." << std::endl << std::endl;
595 
596  //std::cout << std::endl << "Total calculation time: " << timer.RealTime() << std::endl;
597 }
std::vector< std::vector< Event * > > events
Definition: Forest.h:66
void prepareRandomSubsample(double fraction)
Definition: Forest.cc:521
std::vector< std::vector< Event * > > subSample
Definition: Forest.h:67
void updateRegTargets(Tree *tree, double learningRate, LossFunction *l)
Definition: Forest.cc:289
void sortEventVectors(std::vector< std::vector< Event *> > &e)
Definition: Forest.cc:181
std::vector< Tree * > trees
Definition: Forest.h:68

◆ generate()

void emtf::Forest::generate ( int  numTrainEvents,
int  numTestEvents,
double  sigma 
)

◆ getTrainingEvents()

std::vector< Event * > Forest::getTrainingEvents ( )

Definition at line 107 of file Forest.cc.

References events.

107 { return events[0]; }
std::vector< std::vector< Event * > > events
Definition: Forest.h:66

◆ getTree()

Tree * Forest::getTree ( unsigned int  i)

Definition at line 114 of file Forest.cc.

References mps_fire::i, and trees.

Referenced by L1TMuonEndCapForestESProducer::produce().

114  {
115  if (/*i>=0 && */ i < trees.size())
116  return trees[i];
117  else {
118  //std::cout << i << "is an invalid input for getTree. Out of range." << std::endl;
119  return nullptr;
120  }
121 }
std::vector< Tree * > trees
Definition: Forest.h:68

◆ listEvents()

void Forest::listEvents ( std::vector< std::vector< Event *> > &  e)

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

Definition at line 142 of file Forest.cc.

References gather_cfg::cout, MillePedeFileConverter_cfg::e, mps_fire::i, and dqmiolumiharvest::j.

142  {
143  // Simply list the events in each event vector. We have multiple copies
144  // of the events vector. Each copy is sorted according to a different
145  // determining variable.
146  std::cout << std::endl << "Listing Events... " << std::endl;
147 
148  for (unsigned int i = 0; i < e.size(); i++) {
149  std::cout << std::endl << "Variable " << i << " vector contents: " << std::endl;
150  for (unsigned int j = 0; j < e[i].size(); j++) {
151  e[i][j]->outputEvent();
152  }
153  std::cout << std::endl;
154  }
155 }

◆ loadForestFromXML()

void Forest::loadForestFromXML ( const char *  directory,
unsigned int  numTrees 
)

Definition at line 477 of file Forest.cc.

References createBeamHaloJobs::directory, contentValuesFiles::fullPath, mps_fire::i, contentValuesCheck::ss, and trees.

477  {
478  // Load a forest that has already been created and stored into XML somewhere.
479 
480  // Initialize the vector of trees.
481  trees = std::vector<Tree*>(numTrees);
482 
483  // Load the Forest.
484  // std::cout << std::endl << "Loading Forest from XML ... " << std::endl;
485  for (unsigned int i = 0; i < numTrees; i++) {
486  trees[i] = new Tree();
487 
488  std::stringstream ss;
489  ss << directory << "/" << i << ".xml";
490 
491  trees[i]->loadFromXML(edm::FileInPath(ss.str().c_str()).fullPath().c_str());
492  }
493 
494  //std::cout << "Done." << std::endl << std::endl;
495 }
std::vector< Tree * > trees
Definition: Forest.h:68

◆ loadFromCondPayload()

void Forest::loadFromCondPayload ( const L1TMuonEndCapForest::DForest payload)

Definition at line 497 of file Forest.cc.

References mps_fire::i, and trees.

497  {
498  // Load a forest that has already been created and stored in CondDB.
499  // Initialize the vector of trees.
500  unsigned int numTrees = forest.size();
501 
502  // clean-up leftovers from previous initialization (if any)
503  for (unsigned int i = 0; i < trees.size(); i++) {
504  if (trees[i])
505  delete trees[i];
506  }
507 
508  trees = std::vector<Tree*>(numTrees);
509 
510  // Load the Forest.
511  for (unsigned int i = 0; i < numTrees; i++) {
512  trees[i] = new Tree();
513  trees[i]->loadFromCondPayload(forest[i]);
514  }
515 }
std::vector< Tree * > trees
Definition: Forest.h:68

◆ operator=()

Forest & Forest::operator= ( const Forest forest)

Definition at line 70 of file Forest.cc.

References mps_fire::i, HcalDetIdTransform::transform(), MainPageGenerator::tree, and trees.

70  {
71  for (unsigned int i = 0; i < trees.size(); i++) {
72  if (trees[i])
73  delete trees[i];
74  }
75  trees.resize(0);
76 
77  transform(forest.trees.cbegin(), forest.trees.cend(), back_inserter(trees), [](const Tree* tree) {
78  return new Tree(*tree);
79  });
80  return *this;
81 }
std::vector< Tree * > trees
Definition: Forest.h:68
Definition: tree.py:1
unsigned transform(const HcalDetId &id, unsigned transformCode)

◆ predictEvent()

void Forest::predictEvent ( Event e,
unsigned int  trees 
)

Definition at line 440 of file Forest.cc.

References appendCorrection(), MillePedeFileConverter_cfg::e, mps_fire::i, and trees.

Referenced by PtAssignmentEngine2016::calculate_pt_xml(), and PtAssignmentEngine2017::calculate_pt_xml().

440  {
441  // Predict values for eventsp by running them through the forest up to numtrees.
442 
443  //std::cout << "Using " << numtrees << " trees from the forest to predict events ... " << std::endl;
444  if (numtrees > trees.size()) {
445  //std::cout << std::endl << "!! Input greater than the forest size. Using forest.size() = " << trees.size() << " to predict instead." << std::endl;
446  numtrees = trees.size();
447  }
448 
449  // just like in line #2470 of https://root.cern.ch/doc/master/MethodBDT_8cxx_source.html for gradient boosting
450  e->predictedValue = trees[0]->getBoostWeight();
451 
452  // i iterates through the trees in the forest. Each tree corrects the last prediction.
453  for (unsigned int i = 0; i < numtrees; i++) {
454  //std::cout << "++Tree " << i << "..." << std::endl;
455  appendCorrection(e, i);
456  }
457 }
std::vector< Tree * > trees
Definition: Forest.h:68
void appendCorrection(std::vector< Event *> &eventsp, int treenum)
Definition: Forest.cc:426

◆ predictEvents()

void Forest::predictEvents ( std::vector< Event *> &  eventsp,
unsigned int  trees 
)

Definition at line 406 of file Forest.cc.

References appendCorrection(), mps_fire::i, and trees.

406  {
407  // Predict values for eventsp by running them through the forest up to numtrees.
408 
409  //std::cout << "Using " << numtrees << " trees from the forest to predict events ... " << std::endl;
410  if (numtrees > trees.size()) {
411  //std::cout << std::endl << "!! Input greater than the forest size. Using forest.size() = " << trees.size() << " to predict instead." << std::endl;
412  numtrees = trees.size();
413  }
414 
415  // i iterates through the trees in the forest. Each tree corrects the last prediction.
416  for (unsigned int i = 0; i < numtrees; i++) {
417  //std::cout << "++Tree " << i << "..." << std::endl;
418  appendCorrection(eventsp, i);
419  }
420 }
std::vector< Tree * > trees
Definition: Forest.h:68
void appendCorrection(std::vector< Event *> &eventsp, int treenum)
Definition: Forest.cc:426

◆ prepareRandomSubsample()

void Forest::prepareRandomSubsample ( double  fraction)

Definition at line 521 of file Forest.cc.

References events, HLT_2022v15_cff::fraction, mps_fire::i, emtf::shuffle(), sortEventVectors(), subSample, and findQualityFiles::v.

Referenced by doStochasticRegression().

521  {
522  // We use this for Stochastic Gradient Boosting. Basically you
523  // take a subsample of the training events and build a tree using
524  // those. Then use the tree built from the subsample to update
525  // the predictions for all the events.
526 
527  subSample = std::vector<std::vector<Event*>>(events.size());
528  size_t subSampleSize = fraction * events[0].size();
529 
530  // Randomize the first subSampleSize events in events[0].
531  shuffle(events[0].begin(), events[0].end(), subSampleSize);
532 
533  // Get a copy of the random subset we just made.
534  std::vector<Event*> v(events[0].begin(), events[0].begin() + subSampleSize);
535 
536  // Initialize and sort the subSample collection.
537  for (unsigned int i = 0; i < subSample.size(); i++) {
538  subSample[i] = v;
539  }
540 
542 }
std::vector< std::vector< Event * > > events
Definition: Forest.h:66
std::vector< std::vector< Event * > > subSample
Definition: Forest.h:67
void sortEventVectors(std::vector< std::vector< Event *> > &e)
Definition: Forest.cc:181
bidiiter shuffle(bidiiter begin, bidiiter end, size_t num_random)
Definition: Utilities.h:27

◆ rankVariables()

void Forest::rankVariables ( std::vector< int > &  rank)

Definition at line 195 of file Forest.cc.

References events, mps_fire::i, dqmiolumiharvest::j, SiStripPI::max, edm::second(), jetsAK4_CHS_cff::sort, trees, findQualityFiles::v, and w().

195  {
196  // This function ranks the determining variables according to their importance
197  // in determining the fit. Use a low learning rate for better results.
198  // Separates completely useless variables from useful ones well,
199  // but isn't the best at separating variables of similar importance.
200  // This is calculated using the error reduction on the training set. The function
201  // should be changed to use the testing set, but this works fine for now.
202  // I will try to change this in the future.
203 
204  // Initialize the vector v, which will store the total error reduction
205  // for each variable i in v[i].
206  std::vector<double> v(events.size(), 0);
207 
208  //std::cout << std::endl << "Ranking Variables by Net Error Reduction... " << std::endl;
209 
210  for (unsigned int j = 0; j < trees.size(); j++) {
211  trees[j]->rankVariables(v);
212  }
213 
214  double max = *std::max_element(v.begin(), v.end());
215 
216  // Scale the importance. Maximum importance = 100.
217  for (unsigned int i = 0; i < v.size(); i++) {
218  v[i] = 100 * v[i] / max;
219  }
220 
221  // Change the storage format so that we can keep the index
222  // and the value associated after sorting.
223  std::vector<std::pair<double, int>> w(events.size());
224 
225  for (unsigned int i = 0; i < v.size(); i++) {
226  w[i] = std::pair<double, int>(v[i], i);
227  }
228 
229  // Sort so that we can output in order of importance.
230  std::sort(w.begin(), w.end());
231 
232  // Output the results.
233  for (int i = (v.size() - 1); i >= 0; i--) {
234  rank.push_back(w[i].second);
235  // std::cout << "x" << w[i].second << ": " << w[i].first << std::endl;
236  }
237 
238  // std::cout << std::endl << "Done." << std::endl << std::endl;
239 }
std::vector< std::vector< Event * > > events
Definition: Forest.h:66
T w() const
U second(std::pair< T, U > const &p)
std::vector< Tree * > trees
Definition: Forest.h:68

◆ saveSplitValues()

void Forest::saveSplitValues ( const char *  savefilename)

Definition at line 245 of file Forest.cc.

References events, mps_fire::i, dqmiolumiharvest::j, jetsAK4_CHS_cff::sort, contentValuesCheck::ss, trees, tier0::unique(), and findQualityFiles::v.

245  {
246  // This function gathers all of the split values from the forest and puts them into lists.
247 
248  std::ofstream splitvaluefile;
249  splitvaluefile.open(savefilename);
250 
251  // Initialize the matrix v, which will store the list of split values
252  // for each variable i in v[i].
253  std::vector<std::vector<double>> v(events.size(), std::vector<double>());
254 
255  //std::cout << std::endl << "Gathering split values... " << std::endl;
256 
257  // Gather the split values from each tree in the forest.
258  for (unsigned int j = 0; j < trees.size(); j++) {
259  trees[j]->getSplitValues(v);
260  }
261 
262  // Sort the lists of split values and remove the duplicates.
263  for (unsigned int i = 0; i < v.size(); i++) {
264  std::sort(v[i].begin(), v[i].end());
265  v[i].erase(unique(v[i].begin(), v[i].end()), v[i].end());
266  }
267 
268  // Output the results after removing duplicates.
269  // The 0th variable is special and is not used for splitting, so we start at 1.
270  for (unsigned int i = 1; i < v.size(); i++) {
271  TString splitValues;
272  for (unsigned int j = 0; j < v[i].size(); j++) {
273  std::stringstream ss;
274  ss.precision(14);
275  ss << std::scientific << v[i][j];
276  splitValues += ",";
277  splitValues += ss.str().c_str();
278  }
279 
280  splitValues = splitValues(1, splitValues.Length());
281  splitvaluefile << splitValues << std::endl << std::endl;
282  ;
283  }
284 }
std::vector< std::vector< Event * > > events
Definition: Forest.h:66
def unique(seq, keepstr=True)
Definition: tier0.py:24
std::vector< Tree * > trees
Definition: Forest.h:68

◆ setTrainingEvents()

void Forest::setTrainingEvents ( std::vector< Event *> &  trainingEvents)

Definition at line 87 of file Forest.cc.

References MillePedeFileConverter_cfg::e, events, and mps_fire::i.

Referenced by Forest().

87  {
88  // tell the forest which events to use for training
89 
90  Event* e = trainingEvents[0];
91  // Unused variable
92  // unsigned int numrows = e->data.size();
93 
94  // Reset the events matrix.
95  events = std::vector<std::vector<Event*>>();
96 
97  for (unsigned int i = 0; i < e->data.size(); i++) {
98  events.push_back(trainingEvents);
99  }
100 }
std::vector< std::vector< Event * > > events
Definition: Forest.h:66

◆ size()

unsigned int Forest::size ( void  )

Definition at line 127 of file Forest.cc.

References trees.

Referenced by ntupleDataFormat._Collection::__iter__(), ntupleDataFormat._Collection::__len__(), and L1TMuonEndCapForestESProducer::produce().

127  {
128  // Return the number of trees in the forest.
129  return trees.size();
130 }
std::vector< Tree * > trees
Definition: Forest.h:68

◆ sortEventVectors()

void Forest::sortEventVectors ( std::vector< std::vector< Event *> > &  e)

Definition at line 181 of file Forest.cc.

References compareEvents(), MillePedeFileConverter_cfg::e, mps_fire::i, jetsAK4_CHS_cff::sort, and emtf::Event::sortingIndex.

Referenced by doRegression(), doStochasticRegression(), and prepareRandomSubsample().

181  {
182  // When a node chooses the optimum split point and split variable it needs
183  // the events to be sorted according to the variable it is considering.
184 
185  for (unsigned int i = 0; i < e.size(); i++) {
187  std::sort(e[i].begin(), e[i].end(), compareEvents);
188  }
189 }
bool compareEvents(Event *e1, Event *e2)
Definition: Forest.cc:165
static int sortingIndex
Definition: Event.h:27

◆ updateEvents()

void Forest::updateEvents ( Tree tree)

Definition at line 328 of file Forest.cc.

References MillePedeFileConverter_cfg::e, l1tTowerCalibrationProducer_cfi::fit, dqmiolumiharvest::j, and findQualityFiles::v.

Referenced by appendCorrection().

328  {
329  // Prepare the test events for the next tree.
330 
331  // Get the list of terminal nodes for this tree.
332  std::list<Node*>& tn = tree->getTerminalNodes();
333 
334  // Loop through the terminal nodes.
335  for (std::list<Node*>::iterator it = tn.begin(); it != tn.end(); it++) {
336  std::vector<Event*>& v = (*it)->getEvents()[0];
337  double fit = (*it)->getFitValue();
338 
339  // Loop through each event in the terminal region and update the
340  // the global event it maps to.
341  for (unsigned int j = 0; j < v.size(); j++) {
342  Event* e = v[j];
343  e->predictedValue += fit;
344  }
345 
346  // Release memory.
347  (*it)->getEvents() = std::vector<std::vector<Event*>>();
348  }
349 }
Definition: tree.py:1

◆ updateRegTargets()

void Forest::updateRegTargets ( Tree tree,
double  learningRate,
LossFunction l 
)

Definition at line 289 of file Forest.cc.

References MillePedeFileConverter_cfg::e, l1tTowerCalibrationProducer_cfi::fit, dqmiolumiharvest::j, cmsLHEtoEOSManager::l, and findQualityFiles::v.

Referenced by doRegression(), and doStochasticRegression().

289  {
290  // Prepare the global vector of events for the next tree.
291  // Update the fit for each event and set the new target value
292  // for the next tree.
293 
294  // Get the list of terminal nodes for this tree.
295  std::list<Node*>& tn = tree->getTerminalNodes();
296 
297  // Loop through the terminal nodes.
298  for (std::list<Node*>::iterator it = tn.begin(); it != tn.end(); it++) {
299  // Get the events in the current terminal region.
300  std::vector<Event*>& v = (*it)->getEvents()[0];
301 
302  // Fit the events depending on the loss function criteria.
303  double fit = l->fit(v);
304 
305  // Scale the rate at which the algorithm converges.
306  fit = learningRate * fit;
307 
308  // Store the official fit value in the terminal node.
309  (*it)->setFitValue(fit);
310 
311  // Loop through each event in the terminal region and update the
312  // the target for the next tree.
313  for (unsigned int j = 0; j < v.size(); j++) {
314  Event* e = v[j];
315  e->predictedValue += fit;
316  e->data[0] = l->target(e);
317  }
318 
319  // Release memory.
320  (*it)->getEvents() = std::vector<std::vector<Event*>>();
321  }
322 }
Definition: tree.py:1

Member Data Documentation

◆ events

std::vector<std::vector<Event*> > emtf::Forest::events
private

◆ subSample

std::vector<std::vector<Event*> > emtf::Forest::subSample
private

Definition at line 67 of file Forest.h.

Referenced by doStochasticRegression(), and prepareRandomSubsample().

◆ trees

std::vector<Tree*> emtf::Forest::trees
private