CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
Tree.cc
Go to the documentation of this file.
1 // Tree.cxx //
3 // =====================================================================//
4 // This is the object implementation of a decision tree. //
5 // References include //
6 // *Elements of Statistical Learning by Hastie, //
7 // Tibshirani, and Friedman. //
8 // *Greedy Function Approximation: A Gradient Boosting Machine. //
9 // Friedman. The Annals of Statistics, Vol. 29, No. 5. Oct 2001. //
10 // *Inductive Learning of Tree-based Regression Models. Luis Torgo. //
11 // //
13 
15 // _______________________Includes_______________________________________//
17 
19 #include <iostream>
20 #include <sstream>
21 
23 // _______________________Constructor(s)________________________________//
25 
27 {
28  rootNode = new Node("root");
29 
30  terminalNodes.push_back(rootNode);
31  numTerminalNodes = 1;
32 }
33 
34 Tree::Tree(std::vector< std::vector<Event*> >& cEvents)
35 {
36  rootNode = new Node("root");
37  rootNode->setEvents(cEvents);
38 
39  terminalNodes.push_back(rootNode);
40  numTerminalNodes = 1;
41 }
43 // _______________________Destructor____________________________________//
45 
46 
48 {
49 // When the tree is destroyed it will delete all of the nodes in the tree.
50 // The deletion begins with the rootnode and continues recursively.yea.
51  delete rootNode;
52 }
53 
55 // ______________________Get/Set________________________________________//
57 
58 void Tree::setRootNode(Node *sRootNode)
59 {
60  rootNode = sRootNode;
61 }
62 
64 {
65  return rootNode;
66 }
67 
68 // ----------------------------------------------------------------------
69 
70 void Tree::setTerminalNodes(std::list<Node*>& sTNodes)
71 {
72  terminalNodes = sTNodes;
73 }
74 
75 std::list<Node*>& Tree::getTerminalNodes()
76 {
77  return terminalNodes;
78 }
79 
80 // ----------------------------------------------------------------------
81 
83 {
84  return numTerminalNodes;
85 }
86 
88 // ______________________Performace_____________________________________//
90 
92 {
93 // Loop through the separate predictive regions (terminal nodes) and
94 // add up the errors to get the error of the entire space.
95 
96  Double_t totalSquaredError = 0;
97 
98  for(std::list<Node*>::iterator it=terminalNodes.begin(); it!=terminalNodes.end(); it++)
99  {
100  totalSquaredError += (*it)->getTotalError();
101  }
102  rmsError = sqrt( totalSquaredError/rootNode->getNumEvents() );
103 }
104 
105 // ----------------------------------------------------------------------
106 
107 void Tree::buildTree(Int_t nodeLimit)
108 {
109  // We greedily pick the best terminal node to split.
110  Double_t bestNodeErrorReduction = -1;
111  Node* nodeToSplit;
112 
113  if(numTerminalNodes == 1)
114  {
116  calcError();
117 // std::cout << std::endl << " " << numTerminalNodes << " Nodes : " << rmsError << std::endl;
118  }
119 
120  for(std::list<Node*>::iterator it=terminalNodes.begin(); it!=terminalNodes.end(); it++)
121  {
122  if( (*it)->getErrorReduction() > bestNodeErrorReduction )
123  {
124  bestNodeErrorReduction = (*it)->getErrorReduction();
125  nodeToSplit = (*it);
126  }
127  }
128 
129  // Create daughter nodes, and link the nodes together appropriately.
130  nodeToSplit->theMiracleOfChildBirth();
131 
132  // Get left and right daughters for reference.
133  Node* left = nodeToSplit->getLeftDaughter();
134  Node* right = nodeToSplit->getRightDaughter();
135 
136  // Update the list of terminal nodes.
137  terminalNodes.remove(nodeToSplit);
138  terminalNodes.push_back(left);
139  terminalNodes.push_back(right);
141 
142  // Filter the events from the parent into the daughters.
143  nodeToSplit->filterEventsToDaughters();
144 
145  // Calculate the best splits for the new nodes.
146  left->calcOptimumSplit();
147  right->calcOptimumSplit();
148 
149  // See if the error reduces as we add more nodes.
150  calcError();
151 
152  if(numTerminalNodes % 1 == 0)
153  {
154 // std::cout << " " << numTerminalNodes << " Nodes : " << rmsError << std::endl;
155  }
156 
157  // Repeat until done.
158  if(numTerminalNodes < nodeLimit) buildTree(nodeLimit);
159 }
160 
161 // ----------------------------------------------------------------------
162 
163 void Tree::filterEvents(std::vector<Event*>& tEvents)
164 {
165 // Use trees which have already been built to fit a bunch of events
166 // given by the tEvents vector.
167 
168  // Set the events to be filtered.
169  rootNode->getEvents() = std::vector< std::vector<Event*> >(1);
170  rootNode->getEvents()[0] = tEvents;
171 
172  // The tree now knows about the events it needs to fit.
173  // Filter them into a predictive region (terminal node).
175 }
176 
177 // ----------------------------------------------------------------------
178 
180 {
181 // Filter the events repeatedly into the daughter nodes until they
182 // fall into a terminal node.
183 
184  Node* left = node->getLeftDaughter();
185  Node* right = node->getRightDaughter();
186 
187  if(left == 0 || right == 0) return;
188 
189  node->filterEventsToDaughters();
190 
191  filterEventsRecursive(left);
192  filterEventsRecursive(right);
193 }
194 
195 // ----------------------------------------------------------------------
196 
197 
198 void Tree::rankVariablesRecursive(Node* node, std::vector<Double_t>& v)
199 {
200 // We recursively go through all of the nodes in the tree and find the
201 // total error reduction for each variable. The one with the most
202 // error reduction should be the most important.
203 
204  Node* left = node->getLeftDaughter();
205  Node* right = node->getRightDaughter();
206 
207  if(left==0 || right==0) return;
208 
209  Int_t sv = node->getSplitVariable();
210  Double_t er = node->getErrorReduction();
211 
212  // Add error reduction to the current total for the appropriate
213  // variable.
214  v[sv] += er;
215 
216  rankVariablesRecursive(left, v);
217  rankVariablesRecursive(right, v);
218 
219 }
220 
221 // ----------------------------------------------------------------------
222 
223 void Tree::rankVariables(std::vector<Double_t>& v)
224 {
226 }
227 
229 // ______________________Storage/Retrieval______________________________//
231 
232 template <typename T>
234 {
235 // Convert a number to a string.
236  std::stringstream ss;
237  ss << num;
238  std::string s = ss.str();
239  return s;
240 }
241 
242 // ----------------------------------------------------------------------
243 
244 void Tree::addXMLAttributes(TXMLEngine* xml, Node* node, XMLNodePointer_t np)
245 {
246  // Convert Node members into XML attributes
247  // and add them to the XMLEngine.
248  xml->NewAttr(np, 0, "splitVar", numToStr(node->getSplitVariable()).c_str());
249  xml->NewAttr(np, 0, "splitVal", numToStr(node->getSplitValue()).c_str());
250  xml->NewAttr(np, 0, "fitVal", numToStr(node->getFitValue()).c_str());
251 }
252 
253 // ----------------------------------------------------------------------
254 
255 void Tree::saveToXML(const char* c)
256 {
257 
258  TXMLEngine* xml = new TXMLEngine();
259 
260  // Add the root node.
261  XMLNodePointer_t root = xml->NewChild(0, 0, rootNode->getName().c_str());
262  addXMLAttributes(xml, rootNode, root);
263 
264  // Recursively write the tree to XML.
265  saveToXMLRecursive(xml, rootNode, root);
266 
267  // Make the XML Document.
268  XMLDocPointer_t xmldoc = xml->NewDoc();
269  xml->DocSetRootElement(xmldoc, root);
270 
271  // Save to file.
272  xml->SaveDoc(xmldoc, c);
273 
274  // Clean up.
275  xml->FreeDoc(xmldoc);
276  delete xml;
277 }
278 
279 // ----------------------------------------------------------------------
280 
281 void Tree::saveToXMLRecursive(TXMLEngine* xml, Node* node, XMLNodePointer_t np)
282 {
283  Node* l = node->getLeftDaughter();
284  Node* r = node->getRightDaughter();
285 
286  if(l==0 || r==0) return;
287 
288  // Add children to the XMLEngine.
289  XMLNodePointer_t left = xml->NewChild(np, 0, "left");
290  XMLNodePointer_t right = xml->NewChild(np, 0, "right");
291 
292  // Add attributes to the children.
293  addXMLAttributes(xml, l, left);
294  addXMLAttributes(xml, r, right);
295 
296  // Recurse.
297  saveToXMLRecursive(xml, l, left);
298  saveToXMLRecursive(xml, r, right);
299 }
300 
301 // ----------------------------------------------------------------------
302 
303 void Tree::loadFromXML(const char* filename)
304 {
305  // First create the engine.
306  TXMLEngine* xml = new TXMLEngine();
307 
308  // Now try to parse xml file.
309  XMLDocPointer_t xmldoc = xml->ParseFile(filename);
310  if (xmldoc==0)
311  {
312  delete xml;
313  return;
314  }
315 
316  // Get access to main node of the xml file.
317  XMLNodePointer_t mainnode = xml->DocGetRootElement(xmldoc);
318 
319  // Recursively connect nodes together.
320  loadFromXMLRecursive(xml, mainnode, rootNode);
321 
322  // Release memory before exit
323  xml->FreeDoc(xmldoc);
324  delete xml;
325 }
326 
327 // ----------------------------------------------------------------------
328 
329 void Tree::loadFromXMLRecursive(TXMLEngine* xml, XMLNodePointer_t xnode, Node* tnode) //lkj
330 {
331 
332  // Get the split information from xml.
333  XMLAttrPointer_t attr = xml->GetFirstAttr(xnode);
334  std::vector<std::string> splitInfo(3);
335  for(unsigned int i=0; i<3; i++)
336  {
337  splitInfo[i] = xml->GetAttrValue(attr);
338  attr = xml->GetNextAttr(attr);
339  }
340 
341  // Convert strings into numbers.
342  std::stringstream converter;
343  Int_t splitVar;
344  Double_t splitVal;
345  Double_t fitVal;
346 
347  converter << splitInfo[0];
348  converter >> splitVar;
349  converter.str("");
350  converter.clear();
351 
352  converter << splitInfo[1];
353  converter >> splitVal;
354  converter.str("");
355  converter.clear();
356 
357  converter << splitInfo[2];
358  converter >> fitVal;
359  converter.str("");
360  converter.clear();
361 
362  // Store gathered splitInfo into the node object.
363  tnode->setSplitVariable(splitVar);
364  tnode->setSplitValue(splitVal);
365  tnode->setFitValue(fitVal);
366 
367  // Get the xml daughters of the current xml node.
368  XMLNodePointer_t xleft = xml->GetChild(xnode);
369  XMLNodePointer_t xright = xml->GetNext(xleft);
370 
371  // If there are no daughters we are done.
372  if(xleft == 0 || xright == 0) return;
373 
374  // If there are daughters link the node objects appropriately.
375  tnode->theMiracleOfChildBirth();
376  Node* tleft = tnode->getLeftDaughter();
377  Node* tright = tnode->getRightDaughter();
378 
379  // Update the list of terminal nodes.
380  terminalNodes.remove(tnode);
381  terminalNodes.push_back(tleft);
382  terminalNodes.push_back(tright);
384 
385  loadFromXMLRecursive(xml, xleft, tleft);
386  loadFromXMLRecursive(xml, xright, tright);
387 }
Double_t getFitValue()
Definition: Node.cc:155
int i
Definition: DBlmapReader.cc:9
void filterEventsRecursive(Node *node)
Definition: Tree.cc:179
Definition: Node.h:10
Int_t getNumEvents()
Definition: Node.cc:189
Node * getLeftDaughter()
Definition: Node.cc:99
void rankVariablesRecursive(Node *node, std::vector< Double_t > &v)
Definition: Tree.cc:198
void loadFromXML(const char *filename)
Definition: Tree.cc:303
void setSplitValue(Double_t sSplitValue)
Definition: Node.cc:128
~Tree()
Definition: Tree.cc:47
void setTerminalNodes(std::list< Node * > &sTNodes)
Definition: Tree.cc:70
string xmldoc
Some module&#39;s global variables.
std::list< Node * > terminalNodes
Definition: Tree.h:48
void buildTree(Int_t nodeLimit)
Definition: Tree.cc:107
Double_t getSplitValue()
Definition: Node.cc:133
void saveToXMLRecursive(TXMLEngine *xml, Node *node, XMLNodePointer_t np)
Definition: Tree.cc:281
int np
Definition: AMPTWrapper.h:33
void theMiracleOfChildBirth()
Definition: Node.cc:329
std::string getName()
Definition: Node.cc:75
Node * getRightDaughter()
Definition: Node.cc:109
T sqrt(T t)
Definition: SSEVec.h:18
void setRootNode(Node *sRootNode)
Definition: Tree.cc:58
Int_t getSplitVariable()
Definition: Node.cc:143
void filterEventsToDaughters()
Definition: Node.cc:344
std::vector< std::vector< Event * > > & getEvents()
Definition: Node.cc:196
void setEvents(std::vector< std::vector< Event * > > &sEvents)
Definition: Node.cc:201
Double_t getErrorReduction()
Definition: Node.cc:87
Node * getRootNode()
Definition: Tree.cc:63
Tree()
Definition: Tree.cc:26
void setFitValue(Double_t sFitValue)
Definition: Node.cc:150
std::list< Node * > & getTerminalNodes()
Definition: Tree.cc:75
Double_t rmsError
Definition: Tree.h:50
void addXMLAttributes(TXMLEngine *xml, Node *node, XMLNodePointer_t np)
Definition: Tree.cc:244
void filterEvents(std::vector< Event * > &tEvents)
Definition: Tree.cc:163
Int_t numTerminalNodes
Definition: Tree.h:49
tuple filename
Definition: lut2db_cfg.py:20
void setSplitVariable(Int_t sSplitVar)
Definition: Node.cc:138
Node * rootNode
Definition: Tree.h:47
void calcError()
Definition: Tree.cc:91
void rankVariables(std::vector< Double_t > &v)
Definition: Tree.cc:223
edm::TrieNode< PDet > Node
void saveToXML(const char *filename)
Definition: Tree.cc:255
std::string numToStr(T num)
Definition: Utilities.h:43
void loadFromXMLRecursive(TXMLEngine *xml, XMLNodePointer_t node, Node *tnode)
Definition: Tree.cc:329
long double T
void calcOptimumSplit()
Definition: Node.cc:210
Int_t getNumTerminalNodes()
Definition: Tree.cc:82