CMS 3D CMS Logo

SprRootReader.cc

Go to the documentation of this file.
00001 // $Id: SprRootReader.cc,v 1.5 2007/10/30 18:56:14 narsky Exp $
00002 
00003 #include "PhysicsTools/StatPatternRecognition/interface/SprExperiment.hh"
00004 #include "PhysicsTools/StatPatternRecognition/interface/SprRootReader.hh"
00005 #include "PhysicsTools/StatPatternRecognition/interface/SprData.hh"
00006 #include "PhysicsTools/StatPatternRecognition/interface/SprAbsFilter.hh"
00007 #include "PhysicsTools/StatPatternRecognition/interface/SprEmptyFilter.hh"
00008 #include "PhysicsTools/StatPatternRecognition/interface/SprPreFilter.hh"
00009 
00010 #include <TFile.h>
00011 #include <TTree.h>
00012 #include <TLeaf.h>
00013 #include <TObjArray.h>
00014 
00015 #include <stdlib.h>
00016 #include <utility>
00017 #include <fstream>
00018 #include <sstream>
00019 #include <iostream>
00020 #include <cassert>
00021 #include <memory>
00022 #include <algorithm>
00023 #include <iterator>
00024 
00025 using namespace std;
00026 
00027 SprRootReader::SprRootReader(SprPreFilter* filter)
00028   : 
00029   SprAbsReader(filter),
00030   treeNames_(),
00031   treeClasses_(),
00032   leafNames_(),
00033   fileObjects_(),
00034   hasSpecialClassifier_(false),
00035   classifierVarName_()
00036 {}
00037 
00038 // parses the text file to read names of root files
00039 // defers reading of those to readRootObjects()
00040 SprAbsFilter* SprRootReader::read(const char* filename)
00041 {
00042   ifstream file(filename);
00043   if (not file) {
00044     cerr << "Unable to open " << filename << endl;
00045     return 0;
00046   }
00047   
00048   string line;
00049   double weight = 1.0;
00050   
00051   // if the weight is never set, we can save some time
00052   bool weightHasChanged = false;
00053   cout << "Parsing File: " << filename << endl;
00054   
00055   while (getline(file, line)) {
00056     if (line.find('#') != string::npos) {
00057       line.erase(line.find_first_of('#'));
00058     }
00059     if (line.find_first_not_of(' ') == string::npos)
00060       continue;
00061 
00062     istringstream inString(line);
00063     vector<string> lineFields;
00064     string fieldDummy;
00065     while (inString >> fieldDummy)
00066       lineFields.push_back(fieldDummy);
00067     
00068     assert( lineFields.size() > 1 );
00069 
00070     if      (lineFields.at(0) == "Tree:") {
00071       assert( treeNames_.empty() );
00072       copy(&lineFields[1],&lineFields[lineFields.size()],
00073            back_inserter(treeNames_));
00074       assert( !treeNames_.empty() );
00075     }
00076     else if (lineFields.at(0) == "TreeClass:") {
00077       assert( treeClasses_.empty() );
00078       for( int i=1;i<lineFields.size();i++ )
00079         treeClasses_.push_back(atoi(lineFields[i].c_str()));
00080       if( treeNames_.size() != treeClasses_.size() ) {
00081         cerr << "If you supply TreeClass, you must supply as many " 
00082              << "tree classes as you supplied trees, one per tree." << endl;
00083         return 0;
00084       }
00085     }      
00086     else if (lineFields.at(0) == "ClassVariable:") {
00087       //Accept variable name of TrueClass       
00088       if(hasSpecialClassifier_){
00089         cout<<"WARNING - True class variable was already chosen as "
00090             <<classifierVarName_<<" will be overwritten to "
00091             <<lineFields.at(1)
00092             <<"\nPlease change your Run File"<<endl;
00093       }           
00094       hasSpecialClassifier_ = true;
00095       classifierVarName_ = lineFields.at(1);
00096     } 
00097     else if (lineFields.at(0) == "WeightVariable:") {
00098       //Accept variable name of TrueClass
00099       weightHasChanged = true;
00100       assert( weightLeafNames_.empty() );
00101       copy(&lineFields[1],&lineFields[lineFields.size()],
00102            back_inserter(weightLeafNames_)); 
00103     } 
00104     else if (lineFields.at(0) == "Leaves:") {
00105       copy(&lineFields[1],&lineFields[lineFields.size()],
00106            back_inserter(leafNames_)); 
00107     } 
00108     else if (lineFields.at(0) == "Weight:") {
00109       weightHasChanged = true;
00110       istringstream s(lineFields.at(1));
00111       s >> weight;
00112     } 
00113     else if (lineFields.at(0) == "File:") {
00114 
00115       assert( lineFields.size() > 1 );
00116 
00117       FileInfo thisFile;
00118       thisFile.name = lineFields.at(1);
00119 
00120       thisFile.start = 0;
00121       thisFile.end = -1;
00122 
00123       if( lineFields.size() > 2 ) {
00124         istringstream 
00125           dummyIn(string(lineFields.at(2),
00126                          0,
00127                          lineFields.at(2).find_first_of('-')));
00128         if( !(dummyIn >> thisFile.start) ) {
00129           thisFile.start = 0;
00130         }
00131         dummyIn.clear();
00132         dummyIn.str(string(lineFields.at(2), 
00133                            lineFields.at(2).find_first_of('-')+1, 
00134                            string::npos));
00135         if( !(dummyIn >> thisFile.end) ) {
00136           thisFile.end = -1;
00137         }
00138         dummyIn.clear();
00139       }
00140 
00141       thisFile.fileClass = 0;
00142       
00143       if( lineFields.size() > 3 ) {
00144         istringstream dummyIn(lineFields.at(3));
00145         if (not (dummyIn >> thisFile.fileClass)) {
00146           thisFile.fileClass = 0;
00147           cout << dummyIn.get();
00148         }
00149       }
00150       
00151       thisFile.weight = weight;
00152       fileObjects_.push_back(thisFile);
00153       
00154       cout << "Found file: " << thisFile.name
00155            << " start: " << thisFile.start
00156            << " end: " << thisFile.end
00157            << " class: " << thisFile.fileClass
00158            << " weight: " << thisFile.weight
00159            << endl;
00160     }
00161   }
00162   
00163   if(hasSpecialClassifier_){
00164     cout << "True class value is given by leaf " 
00165          << classifierVarName_ << endl;
00166   }
00167 
00168   if(weightLeafNames_.size()){
00169     cout<<"A variable determined weight has been chosen, the value"
00170         <<" assigned to ";
00171     for(int i = 0; i < weightLeafNames_.size(); i++){
00172       if(i%5 == 0) cout<<"\n\t";
00173       if(i == 0)  cout<<weightLeafNames_[i];
00174       else cout<<" * "<<weightLeafNames_[i];
00175     }
00176     cout<<"\n will be used for the weight."<<endl;
00177   }
00178   
00179   return readRootObjects(weightHasChanged);
00180 }
00181 
00182 SprAbsFilter* SprRootReader::readRootObjects(bool needToCalcWeights)
00183 {
00184   auto_ptr<SprData> data(new SprData);
00185   vector<double> weights;
00186 
00187   // set up pre-filter vars
00188   if( filter_!=0 && !filter_->setVars(leafNames_) ) {
00189     cerr << "Unable to apply pre-filter requirements." << endl;
00190     return 0;
00191   }
00192   
00193   // get a new list of variables
00194   vector<string> transformed;
00195   if( filter_ != 0 ) {
00196     if( !filter_->transformVars(leafNames_,transformed) ) {
00197       cerr << "Pre-filter is unable to transform variables." << endl;
00198       return 0;
00199     }
00200   }
00201   if( transformed.empty() ) transformed = leafNames_; 
00202   
00203   // set up data vars
00204   if( !data->setVars(transformed) ) {
00205     cerr << "Unable to set variable list for input data." << endl;
00206     return 0;
00207   }
00208 
00209   // loop over files
00210   for( vector<FileInfo>::const_iterator fileIter = fileObjects_.begin(); 
00211        fileIter != fileObjects_.end(); ++fileIter) {
00212     TFile f(fileIter->name.c_str());
00213 
00214     // loop over trees
00215     for( vector<string>::const_iterator treeIter = treeNames_.begin();
00216          treeIter != treeNames_.end(); ++treeIter ) {
00217       TTree* tree = dynamic_cast<TTree*>(f.Get(treeIter->c_str()));
00218       if( tree == 0 ) {
00219         cout<< "Tree " << treeIter->c_str() << " not found in file "
00220             << fileIter->name.c_str() << endl;
00221         continue;
00222       }
00223       int istart = fileIter->start;
00224       int iend   = fileIter->end;
00225       if( iend < 0 ) iend = tree->GetEntries();
00226       cout << "Reading File: " << fileIter->name.c_str()
00227            << " for Tree: " << treeIter->c_str()
00228            << " (" << iend-istart << " events)" << endl;
00229       map<string, TLeaf*> leaves;
00230 
00231       // leaves
00232       for (vector<string>::const_iterator leafIter = leafNames_.begin(); 
00233            leafIter != leafNames_.end(); ++leafIter) {
00234 
00235         TLeaf* tempLeaf = tree->GetLeaf(leafIter->c_str());
00236         
00237         if(tempLeaf == 0){
00238           cerr << "No Leaf associated with variable "
00239                << leafIter->c_str() << " ...aborting." <<endl;
00240           abort();
00241         }
00242         leaves.insert(make_pair(*leafIter, tempLeaf));
00243       }
00244 
00245       // events
00246       for (int iEvent=istart; iEvent<iend; ++iEvent) {
00247         if( tree->GetEntry(iEvent) <= 0 ) {
00248           cerr << "Unable to read event " << iEvent 
00249                << " from tree " << treeIter->c_str() 
00250                << " in file " << fileIter->name.c_str() 
00251                << ". Aborting event loop." << endl;
00252           break;
00253         }
00254         vector<double> row;
00255         for (vector<string>::const_iterator leafIter = leafNames_.begin(); 
00256              leafIter != leafNames_.end(); ++leafIter) {
00257           // Take always the first entry
00258           row.push_back(leaves[*leafIter]->GetValue(0));
00259         }
00260 
00261         // get class
00262         int assignedClass = fileIter->fileClass;
00263         // TreeClass overrides fileClass
00264         if( !treeClasses_.empty() ) {
00265           int nTree = treeIter - treeNames_.begin();
00266           assignedClass = treeClasses_[nTree];
00267         }
00268         // special ClassVariable overrides fileClass and TreeClass
00269         if( hasSpecialClassifier_ ) {
00270           TLeaf* classLeaf = tree->GetLeaf(classifierVarName_.c_str());
00271           if( classLeaf == 0 ) {
00272             cerr << "No Leaf associated with classifier variable. Aborting."
00273                  << endl;
00274             abort();
00275           }
00276           else {
00277             assignedClass = (int) classLeaf->GetValue(0);
00278           }
00279         }
00280     
00281         float assignedWeight = fileIter->weight;
00282         for (int i = 0; i < weightLeafNames_.size(); i++) {   
00283           TLeaf* tempLeaf = tree->GetLeaf(weightLeafNames_[i].c_str());
00284           if(tempLeaf == 0){
00285             cerr<<"No Leaf associated with variable "
00286                 <<weightLeafNames_[i]<<" - probably"
00287                 <<" a typo - please fix this"<<endl;
00288             abort();
00289           }
00290           else {
00291             assignedWeight *= (float) tempLeaf->GetValue(0);
00292           } 
00293         }
00294 
00295         // pass filter?
00296         if( filter_!=0 && !filter_->pass(assignedClass,row) ) continue;
00297         
00298         // compute user-defined class
00299         if( filter_!=0 ) {
00300           pair<int,bool> computedClass = filter_->computeClass(row);
00301           if( computedClass.second ) 
00302             assignedClass = computedClass.first;
00303         }
00304         
00305         // transform coordinates
00306         if( filter_ != 0 ) {
00307           vector<double> vNew;
00308           if( filter_->transformCoords(row,vNew) )
00309             data->insert(assignedClass,vNew);
00310           else {
00311             cerr << "Pre-filter is unable to transform coordinates." << endl;
00312             return 0;
00313           }
00314         }
00315         else
00316           data->insert(assignedClass,row);
00317         
00318         weights.push_back(assignedWeight);
00319       }
00320     }
00321     f.Close();
00322   }
00323 
00324   // exit
00325   if (needToCalcWeights)
00326     return new SprEmptyFilter(data.release(), weights, true);
00327   return new SprEmptyFilter(data.release(), true);
00328 }
00329 
00330 
00331 bool SprRootReader::chooseVars(const std::set<std::string>& vars)
00332 {
00333   cerr << "Unable to choose variables: "
00334        << "SprRootReader::chooseVars() not implemented." << endl;
00335   return false;
00336 }
00337 
00338 
00339 bool SprRootReader::chooseAllBut(const std::set<std::string>& vars)
00340 {
00341   cerr << "Unable to choose variables: "
00342        << "SprRootReader::chooseAllBut() not implemented." << endl;
00343   return false;
00344 }
00345 
00346 
00347 void SprRootReader::chooseAll()
00348 {
00349   vector<FileInfo>::iterator fileIter = fileObjects_.begin();
00350   TFile f(fileIter->name.c_str());
00351   TTree* tree = dynamic_cast<TTree*>(f.Get(treeNames_[0].c_str()));
00352     
00353   if(tree == 0) {
00354     cerr << "Tree " << treeNames_[0] << " not found in file "
00355          << fileIter->name.c_str() << endl;
00356     cerr << "No variables will be selected." << endl;
00357     return;
00358   }
00359   
00360   TObjArray* leafArray = tree->GetListOfLeaves();
00361   TIter leafIter(leafArray);
00362   leafIter.Reset();
00363   
00364   leafNames_.clear();
00365   TLeaf* thisLeaf = 0;
00366   while( (thisLeaf = (TLeaf*)leafIter.Next()) != 0 ){
00367     leafNames_.push_back(thisLeaf->GetName());
00368   }
00369 }

Generated on Tue Jun 9 17:42:03 2009 for CMSSW by  doxygen 1.5.4