CMS 3D CMS Logo

List of all members | Public Member Functions | Private Attributes
emtf::Node Class Reference

#include <Node.h>

Public Member Functions

void calcOptimumSplit ()
 
void filterEventsToDaughters ()
 
NodefilterEventToDaughter (Event *e)
 
double getAvgError ()
 
double getErrorReduction ()
 
std::vector< std::vector< Event * > > & getEvents ()
 
double getFitValue ()
 
NodegetLeftDaughter ()
 
std::string getName ()
 
int getNumEvents ()
 
NodegetParent ()
 
NodegetRightDaughter ()
 
double getSplitValue ()
 
int getSplitVariable ()
 
double getTotalError ()
 
void listEvents ()
 
 Node ()
 
 Node (std::string cName)
 
 Node (Node &&)=default
 
 Node (const Node &)=delete
 
Nodeoperator= (const Node &)=delete
 
void setAvgError (double sAvgError)
 
void setErrorReduction (double sErrorReduction)
 
void setEvents (std::vector< std::vector< Event *> > &sEvents)
 
void setFitValue (double sFitValue)
 
void setLeftDaughter (Node *sLeftDaughter)
 
void setName (std::string sName)
 
void setNumEvents (int sNumEvents)
 
void setParent (Node *sParent)
 
void setRightDaughter (Node *sLeftDaughter)
 
void setSplitValue (double sSplitValue)
 
void setSplitVariable (int sSplitVar)
 
void setTotalError (double sTotalError)
 
void theMiracleOfChildBirth ()
 
 ~Node ()
 

Private Attributes

double avgError
 
double errorReduction
 
std::vector< std::vector< Event * > > events
 
double fitValue
 
NodeleftDaughter
 
std::string name
 
int numEvents
 
Nodeparent
 
NoderightDaughter
 
double splitValue
 
int splitVariable
 
double totalError
 

Detailed Description

Definition at line 12 of file Node.h.

Constructor & Destructor Documentation

◆ Node() [1/4]

Node::Node ( )

Definition at line 32 of file Node.cc.

References avgError, errorReduction, leftDaughter, name, parent, rightDaughter, splitValue, splitVariable, and totalError.

Referenced by theMiracleOfChildBirth().

32  {
33  name = "";
34  leftDaughter = nullptr;
35  rightDaughter = nullptr;
36  parent = nullptr;
37  splitValue = -99;
38  splitVariable = -1;
39  avgError = -1;
40  totalError = -1;
41  errorReduction = -1;
42 }
Node * leftDaughter
Definition: Node.h:67
int splitVariable
Definition: Node.h:72
std::string name
Definition: Node.h:65
Node * parent
Definition: Node.h:69
double errorReduction
Definition: Node.h:74
double splitValue
Definition: Node.h:71
Node * rightDaughter
Definition: Node.h:68
double avgError
Definition: Node.h:76
double totalError
Definition: Node.h:75

◆ Node() [2/4]

Node::Node ( std::string  cName)

Definition at line 44 of file Node.cc.

References avgError, errorReduction, leftDaughter, name, parent, rightDaughter, splitValue, splitVariable, and totalError.

44  {
45  name = cName;
46  leftDaughter = nullptr;
47  rightDaughter = nullptr;
48  parent = nullptr;
49  splitValue = -99;
50  splitVariable = -1;
51  avgError = -1;
52  totalError = -1;
53  errorReduction = -1;
54 }
Node * leftDaughter
Definition: Node.h:67
int splitVariable
Definition: Node.h:72
std::string name
Definition: Node.h:65
Node * parent
Definition: Node.h:69
double errorReduction
Definition: Node.h:74
double splitValue
Definition: Node.h:71
Node * rightDaughter
Definition: Node.h:68
double avgError
Definition: Node.h:76
double totalError
Definition: Node.h:75

◆ ~Node()

Node::~Node ( )

Definition at line 60 of file Node.cc.

References leftDaughter, and rightDaughter.

60  {
61  // Recursively delete all nodes in the tree.
62  if (leftDaughter)
63  delete leftDaughter;
64  if (rightDaughter)
65  delete rightDaughter;
66 }
Node * leftDaughter
Definition: Node.h:67
Node * rightDaughter
Definition: Node.h:68

◆ Node() [3/4]

emtf::Node::Node ( Node &&  )
default

◆ Node() [4/4]

emtf::Node::Node ( const Node )
delete

Member Function Documentation

◆ calcOptimumSplit()

void Node::calcOptimumSplit ( )

Definition at line 143 of file Node.cc.

References avgError, data, errorReduction, events, fitValue, mps_fire::i, numEvents, splitValue, splitVariable, SUM, filterCSVwithJSON::target, totalError, and findQualityFiles::v.

Referenced by emtf::Tree::buildTree().

143  {
144  // Determines the split variable and split point which would most reduce the error for the given node (region).
145  // In the process we calculate the fitValue and Error. The general aglorithm is based upon Luis Torgo's thesis.
146  // Check out the reference for a more in depth outline. This part is chapter 3.
147 
148  // Intialize some variables.
149  double bestSplitValue = 0;
150  int bestSplitVariable = -1;
151  double bestErrorReduction = -1;
152 
153  double SUM = 0;
154  double SSUM = 0;
155  numEvents = events[0].size();
156 
157  double candidateErrorReduction = -1;
158 
159  // Calculate the sum of the target variables and the sum of
160  // the target variables squared. We use these later.
161  for (unsigned int i = 0; i < events[0].size(); i++) {
162  double target = events[0][i]->data[0];
163  SUM += target;
164  SSUM += target * target;
165  }
166 
167  unsigned int numVars = events.size();
168 
169  // Calculate the best split point for each variable
170  for (unsigned int variableToCheck = 1; variableToCheck < numVars; variableToCheck++) {
171  // The sum of the target variables in the left, right nodes
172  double SUMleft = 0;
173  double SUMright = SUM;
174 
175  // The number of events in the left, right nodes
176  int nleft = 1;
177  int nright = events[variableToCheck].size() - 1;
178 
179  int candidateSplitVariable = variableToCheck;
180 
181  std::vector<Event*>& v = events[variableToCheck];
182 
183  // Find the best split point for this variable
184  for (unsigned int i = 1; i < v.size(); i++) {
185  // As the candidate split point interates, the number of events in the
186  // left/right node increases/decreases and SUMleft/right increases/decreases.
187 
188  SUMleft = SUMleft + v[i - 1]->data[0];
189  SUMright = SUMright - v[i - 1]->data[0];
190 
191  // No need to check the split point if x on both sides is equal
192  if (v[i - 1]->data[candidateSplitVariable] < v[i]->data[candidateSplitVariable]) {
193  // Finding the maximum error reduction for Least Squares boils down to maximizing
194  // the following statement.
195  candidateErrorReduction = SUMleft * SUMleft / nleft + SUMright * SUMright / nright - SUM * SUM / numEvents;
196  // std::cout << "candidateErrorReduction= " << candidateErrorReduction << std::endl << std::endl;
197 
198  // if the new candidate is better than the current best, then we have a new overall best.
199  if (candidateErrorReduction > bestErrorReduction) {
200  bestErrorReduction = candidateErrorReduction;
201  bestSplitValue = (v[i - 1]->data[candidateSplitVariable] + v[i]->data[candidateSplitVariable]) / 2;
202  bestSplitVariable = candidateSplitVariable;
203  }
204  }
205 
206  nright = nright - 1;
207  nleft = nleft + 1;
208  }
209  }
210 
211  // Store the information gained from our computations.
212 
213  // The fit value is the average for least squares.
214  fitValue = SUM / numEvents;
215  // std::cout << "fitValue= " << fitValue << std::endl;
216 
217  // n*[ <y^2>-k^2 ]
218  totalError = SSUM - SUM * SUM / numEvents;
219  // std::cout << "totalError= " << totalError << std::endl;
220 
221  // [ <y^2>-k^2 ]
223  // std::cout << "avgError= " << avgError << std::endl;
224 
225  errorReduction = bestErrorReduction;
226  // std::cout << "errorReduction= " << errorReduction << std::endl;
227 
228  splitVariable = bestSplitVariable;
229  // std::cout << "splitVariable= " << splitVariable << std::endl;
230 
231  splitValue = bestSplitValue;
232  // std::cout << "splitValue= " << splitValue << std::endl;
233 
234  //if(bestSplitVariable == -1) std::cout << "splitVar = -1. numEvents = " << numEvents << ". errRed = " << errorReduction << std::endl;
235 }
int splitVariable
Definition: Node.h:72
double fitValue
Definition: Node.h:78
int numEvents
Definition: Node.h:79
double errorReduction
Definition: Node.h:74
double splitValue
Definition: Node.h:71
std::vector< std::vector< Event * > > events
Definition: Node.h:81
double avgError
Definition: Node.h:76
double totalError
Definition: Node.h:75
#define SUM(A, B)
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:79

◆ filterEventsToDaughters()

void Node::filterEventsToDaughters ( )

Definition at line 267 of file Node.cc.

References MillePedeFileConverter_cfg::e, events, getEvents(), mps_fire::i, dqmiolumiharvest::j, cmsLHEtoEOSManager::l, leftDaughter, rightDaughter, setNumEvents(), splitValue, splitVariable, and pfDeepBoostedJetPreprocessParams_cfi::sv.

Referenced by emtf::Tree::buildTree(), and emtf::Tree::filterEventsRecursive().

267  {
268  // Keeping sorted copies of the event vectors allows us to save on
269  // computation time. That way we don't have to resort the events
270  // each time we calculate the splitpoint for a node. We sort them once.
271  // Every time we split a node, we simply filter them down correctly
272  // preserving the order. This way we have O(n) efficiency instead
273  // of O(nlogn) efficiency.
274 
275  // Anyways, this function takes events from the parent node
276  // and filters an event into the left or right daughter
277  // node depending on whether it is < or > the split point
278  // for the given split variable.
279 
280  unsigned int sv = splitVariable;
281  double sp = splitValue;
282 
283  Node* left = leftDaughter;
284  Node* right = rightDaughter;
285 
286  std::vector<std::vector<Event*> > l(events.size());
287  std::vector<std::vector<Event*> > r(events.size());
288 
289  for (unsigned int i = 0; i < events.size(); i++) {
290  for (unsigned int j = 0; j < events[i].size(); j++) {
291  Event* e = events[i][j];
292  // Prevent out-of-bounds access
293  if (sv >= e->data.size())
294  continue;
295  if (e->data[sv] < sp)
296  l[i].push_back(e);
297  if (e->data[sv] > sp)
298  r[i].push_back(e);
299  }
300  }
301 
302  events = std::vector<std::vector<Event*> >();
303 
304  left->getEvents().swap(l);
305  right->getEvents().swap(r);
306 
307  // Set the number of events in the node.
308  left->setNumEvents(left->getEvents()[0].size());
309  right->setNumEvents(right->getEvents()[0].size());
310 }
Node * leftDaughter
Definition: Node.h:67
int splitVariable
Definition: Node.h:72
double splitValue
Definition: Node.h:71
std::vector< std::vector< Event * > > & getEvents()
Definition: Node.cc:132
Node * rightDaughter
Definition: Node.h:68
std::vector< std::vector< Event * > > events
Definition: Node.h:81
void setNumEvents(int sNumEvents)
Definition: Node.cc:126

◆ filterEventToDaughter()

Node * Node::filterEventToDaughter ( Event e)

Definition at line 314 of file Node.cc.

References MillePedeFileConverter_cfg::e, leftDaughter, rightDaughter, splitValue, splitVariable, and pfDeepBoostedJetPreprocessParams_cfi::sv.

Referenced by emtf::Tree::filterEventRecursive().

314  {
315  // Anyways, this function takes an event from the parent node
316  // and filters an event into the left or right daughter
317  // node depending on whether it is < or > the split point
318  // for the given split variable.
319 
320  unsigned int sv = splitVariable;
321  double sp = splitValue;
322 
323  Node* left = leftDaughter;
324  Node* right = rightDaughter;
325  Node* nextNode = nullptr;
326 
327  // Prevent out-of-bounds access
328  if (left == nullptr || right == nullptr || sv >= e->data.size())
329  return nullptr;
330 
331  if (e->data[sv] < sp)
332  nextNode = left;
333  if (e->data[sv] >= sp)
334  nextNode = right;
335 
336  return nextNode;
337 }
Node * leftDaughter
Definition: Node.h:67
int splitVariable
Definition: Node.h:72
double splitValue
Definition: Node.h:71
Node * rightDaughter
Definition: Node.h:68

◆ getAvgError()

double Node::getAvgError ( )

Definition at line 122 of file Node.cc.

References avgError.

Referenced by emtf::Tree::copyFrom().

122 { return avgError; }
double avgError
Definition: Node.h:76

◆ getErrorReduction()

double Node::getErrorReduction ( )

Definition at line 80 of file Node.cc.

References errorReduction.

Referenced by emtf::Tree::copyFrom(), and emtf::Tree::rankVariablesRecursive().

80 { return errorReduction; }
double errorReduction
Definition: Node.h:74

◆ getEvents()

std::vector< std::vector< Event * > > & Node::getEvents ( )

Definition at line 132 of file Node.cc.

References events.

Referenced by emtf::Tree::filterEvents(), and filterEventsToDaughters().

132 { return events; }
std::vector< std::vector< Event * > > events
Definition: Node.h:81

◆ getFitValue()

double Node::getFitValue ( )

◆ getLeftDaughter()

Node * Node::getLeftDaughter ( )

◆ getName()

std::string Node::getName ( void  )

Definition at line 74 of file Node.cc.

References name.

Referenced by emtf::Tree::copyFrom(), plotting.Plot::draw(), and emtf::Tree::saveToXML().

74 { return name; }
std::string name
Definition: Node.h:65

◆ getNumEvents()

int Node::getNumEvents ( )

Definition at line 128 of file Node.cc.

References numEvents.

Referenced by emtf::Tree::calcError(), and emtf::Tree::copyFrom().

128 { return numEvents; }
int numEvents
Definition: Node.h:79

◆ getParent()

Node * Node::getParent ( )

Definition at line 96 of file Node.cc.

References parent.

96 { return parent; }
Node * parent
Definition: Node.h:69

◆ getRightDaughter()

Node * Node::getRightDaughter ( )

◆ getSplitValue()

double Node::getSplitValue ( )

◆ getSplitVariable()

int Node::getSplitVariable ( )

◆ getTotalError()

double Node::getTotalError ( )

Definition at line 118 of file Node.cc.

References totalError.

Referenced by emtf::Tree::copyFrom().

118 { return totalError; }
double totalError
Definition: Node.h:75

◆ listEvents()

void Node::listEvents ( )

Definition at line 239 of file Node.cc.

References gather_cfg::cout, events, mps_fire::i, and dqmiolumiharvest::j.

239  {
240  std::cout << std::endl << "Listing Events... " << std::endl;
241 
242  for (unsigned int i = 0; i < events.size(); i++) {
243  std::cout << std::endl << "Variable " << i << " vector contents: " << std::endl;
244  for (unsigned int j = 0; j < events[i].size(); j++) {
245  events[i][j]->outputEvent();
246  }
247  std::cout << std::endl;
248  }
249 }
std::vector< std::vector< Event * > > events
Definition: Node.h:81

◆ operator=()

Node& emtf::Node::operator= ( const Node )
delete

◆ setAvgError()

void Node::setAvgError ( double  sAvgError)

Definition at line 120 of file Node.cc.

References avgError.

120 { avgError = sAvgError; }
double avgError
Definition: Node.h:76

◆ setErrorReduction()

void Node::setErrorReduction ( double  sErrorReduction)

Definition at line 78 of file Node.cc.

References errorReduction.

78 { errorReduction = sErrorReduction; }
double errorReduction
Definition: Node.h:74

◆ setEvents()

void Node::setEvents ( std::vector< std::vector< Event *> > &  sEvents)

Definition at line 134 of file Node.cc.

References events, and numEvents.

Referenced by emtf::Tree::Tree().

134  {
135  events = sEvents;
136  numEvents = events[0].size();
137 }
int numEvents
Definition: Node.h:79
std::vector< std::vector< Event * > > events
Definition: Node.h:81

◆ setFitValue()

void Node::setFitValue ( double  sFitValue)

Definition at line 110 of file Node.cc.

References fitValue.

Referenced by emtf::Tree::loadFromCondPayloadRecursive(), and emtf::Tree::loadFromXMLRecursive().

110 { fitValue = sFitValue; }
double fitValue
Definition: Node.h:78

◆ setLeftDaughter()

void Node::setLeftDaughter ( Node sLeftDaughter)

Definition at line 84 of file Node.cc.

References leftDaughter.

84 { leftDaughter = sLeftDaughter; }
Node * leftDaughter
Definition: Node.h:67

◆ setName()

void Node::setName ( std::string  sName)

Definition at line 72 of file Node.cc.

References name.

72 { name = sName; }
std::string name
Definition: Node.h:65

◆ setNumEvents()

void Node::setNumEvents ( int  sNumEvents)

Definition at line 126 of file Node.cc.

References numEvents.

Referenced by filterEventsToDaughters().

126 { numEvents = sNumEvents; }
int numEvents
Definition: Node.h:79

◆ setParent()

void Node::setParent ( Node sParent)

Definition at line 94 of file Node.cc.

References parent.

Referenced by emtf::Tree::copyFrom(), and theMiracleOfChildBirth().

94 { parent = sParent; }
Node * parent
Definition: Node.h:69

◆ setRightDaughter()

void Node::setRightDaughter ( Node sLeftDaughter)

Definition at line 88 of file Node.cc.

References rightDaughter.

88 { rightDaughter = sRightDaughter; }
Node * rightDaughter
Definition: Node.h:68

◆ setSplitValue()

void Node::setSplitValue ( double  sSplitValue)

Definition at line 100 of file Node.cc.

References splitValue.

Referenced by emtf::Tree::loadFromCondPayloadRecursive(), and emtf::Tree::loadFromXMLRecursive().

100 { splitValue = sSplitValue; }
double splitValue
Definition: Node.h:71

◆ setSplitVariable()

void Node::setSplitVariable ( int  sSplitVar)

Definition at line 104 of file Node.cc.

References splitVariable.

Referenced by emtf::Tree::loadFromCondPayloadRecursive(), and emtf::Tree::loadFromXMLRecursive().

104 { splitVariable = sSplitVar; }
int splitVariable
Definition: Node.h:72

◆ setTotalError()

void Node::setTotalError ( double  sTotalError)

Definition at line 116 of file Node.cc.

References totalError.

116 { totalError = sTotalError; }
double totalError
Definition: Node.h:75

◆ theMiracleOfChildBirth()

void Node::theMiracleOfChildBirth ( )

Definition at line 253 of file Node.cc.

References leftDaughter, name, Node(), rightDaughter, and setParent().

Referenced by emtf::Tree::buildTree(), emtf::Tree::loadFromCondPayloadRecursive(), and emtf::Tree::loadFromXMLRecursive().

253  {
254  // Create Daughter Nodes
255  Node* left = new Node(name + " left");
256  Node* right = new Node(name + " right");
257 
258  // Link the Nodes Appropriately
259  leftDaughter = left;
260  rightDaughter = right;
261  left->setParent(this);
262  right->setParent(this);
263 }
Node * leftDaughter
Definition: Node.h:67
Node()
Definition: Node.cc:32
std::string name
Definition: Node.h:65
Node * rightDaughter
Definition: Node.h:68
void setParent(Node *sParent)
Definition: Node.cc:94

Member Data Documentation

◆ avgError

double emtf::Node::avgError
private

Definition at line 76 of file Node.h.

Referenced by calcOptimumSplit(), getAvgError(), Node(), and setAvgError().

◆ errorReduction

double emtf::Node::errorReduction
private

Definition at line 74 of file Node.h.

Referenced by calcOptimumSplit(), getErrorReduction(), Node(), and setErrorReduction().

◆ events

std::vector<std::vector<Event *> > emtf::Node::events
private

◆ fitValue

double emtf::Node::fitValue
private

Definition at line 78 of file Node.h.

Referenced by calcOptimumSplit(), getFitValue(), and setFitValue().

◆ leftDaughter

Node* emtf::Node::leftDaughter
private

◆ name

std::string emtf::Node::name
private

Definition at line 65 of file Node.h.

Referenced by ElectronMVAID.ElectronMVAID::__call__(), FWLite.ElectronMVAID::__call__(), dirstructure.Directory::__create_pie_image(), DisplayManager.DisplayManager::__del__(), dqm_interfaces.DirID::__eq__(), dirstructure.Directory::__get_full_path(), dirstructure.Comparison::__get_img_name(), dataset.Dataset::__getDataType(), dataset.Dataset::__getFileInfoList(), dirstructure.Comparison::__make_image(), core.autovars.NTupleVariable::__repr__(), core.autovars.NTupleObjectType::__repr__(), core.autovars.NTupleObject::__repr__(), core.autovars.NTupleCollection::__repr__(), dirstructure.Directory::__repr__(), dqm_interfaces.DirID::__repr__(), dirstructure.Comparison::__repr__(), config.Service::__setattr__(), config.CFG::__str__(), counter.Counter::__str__(), average.Average::__str__(), FWLite.WorkingPoints::_reformat_cut_definitions(), core.autovars.NTupleObjectType::addSubObjects(), core.autovars.NTupleObjectType::addVariables(), core.autovars.NTupleObjectType::allVars(), dataset.CMSDataset::buildListOfFiles(), dataset.LocalDataset::buildListOfFiles(), dataset.CMSDataset::buildListOfFilesDBS(), dirstructure.Directory::calcStats(), validation.Sample::digest(), python.rootplot.utilities.Hist::divide(), python.rootplot.utilities.Hist::divide_wilson(), DisplayManager.DisplayManager::Draw(), core.autovars.NTupleVariable::fillBranch(), core.autovars.NTupleObject::fillBranches(), core.autovars.NTupleCollection::fillBranchesScalar(), core.autovars.NTupleCollection::fillBranchesVector(), core.autovars.NTupleCollection::get_cpp_declaration(), core.autovars.NTupleCollection::get_cpp_wrapper_class(), core.autovars.NTupleCollection::get_py_wrapper_class(), utils.StatisticalTest::get_status(), getName(), production_tasks.Task::getname(), dataset.CMSDataset::getPrimaryDatasetEntries(), dataset.PrivateDataset::getPrimaryDatasetEntries(), VIDSelectorBase.VIDSelectorBase::initialize(), core.autovars.NTupleVariable::makeBranch(), core.autovars.NTupleObject::makeBranches(), core.autovars.NTupleCollection::makeBranchesScalar(), core.autovars.NTupleCollection::makeBranchesVector(), Node(), dirstructure.Directory::print_report(), dataset.BaseDataset::printInfo(), dataset.Dataset::printInfo(), production_tasks.MonitorJobs::run(), setName(), python.rootplot.utilities.Hist::TGraph(), python.rootplot.utilities.Hist::TH1F(), theMiracleOfChildBirth(), counter.Counter::write(), and average.Average::write().

◆ numEvents

int emtf::Node::numEvents
private

Definition at line 79 of file Node.h.

Referenced by calcOptimumSplit(), getNumEvents(), setEvents(), and setNumEvents().

◆ parent

Node* emtf::Node::parent
private

◆ rightDaughter

Node* emtf::Node::rightDaughter
private

◆ splitValue

double emtf::Node::splitValue
private

◆ splitVariable

int emtf::Node::splitVariable
private

◆ totalError

double emtf::Node::totalError
private

Definition at line 75 of file Node.h.

Referenced by calcOptimumSplit(), getTotalError(), Node(), and setTotalError().