00001
00002
00003 #include "PhysicsTools/StatPatternRecognition/interface/SprExperiment.hh"
00004 #include "PhysicsTools/StatPatternRecognition/interface/SprRootReader.hh"
00005 #include "PhysicsTools/StatPatternRecognition/interface/SprData.hh"
00006 #include "PhysicsTools/StatPatternRecognition/interface/SprAbsFilter.hh"
00007 #include "PhysicsTools/StatPatternRecognition/interface/SprEmptyFilter.hh"
00008 #include "PhysicsTools/StatPatternRecognition/interface/SprPreFilter.hh"
00009
00010 #include <TFile.h>
00011 #include <TTree.h>
00012 #include <TLeaf.h>
00013 #include <TObjArray.h>
00014
00015 #include <stdlib.h>
00016 #include <utility>
00017 #include <fstream>
00018 #include <sstream>
00019 #include <iostream>
00020 #include <cassert>
00021 #include <memory>
00022 #include <algorithm>
00023 #include <iterator>
00024
00025 using namespace std;
00026
00027 SprRootReader::SprRootReader(SprPreFilter* filter)
00028 :
00029 SprAbsReader(filter),
00030 treeNames_(),
00031 treeClasses_(),
00032 leafNames_(),
00033 fileObjects_(),
00034 hasSpecialClassifier_(false),
00035 classifierVarName_()
00036 {}
00037
00038
00039
00040 SprAbsFilter* SprRootReader::read(const char* filename)
00041 {
00042 ifstream file(filename);
00043 if (not file) {
00044 cerr << "Unable to open " << filename << endl;
00045 return 0;
00046 }
00047
00048 string line;
00049 double weight = 1.0;
00050
00051
00052 bool weightHasChanged = false;
00053 cout << "Parsing File: " << filename << endl;
00054
00055 while (getline(file, line)) {
00056 if (line.find('#') != string::npos) {
00057 line.erase(line.find_first_of('#'));
00058 }
00059 if (line.find_first_not_of(' ') == string::npos)
00060 continue;
00061
00062 istringstream inString(line);
00063 vector<string> lineFields;
00064 string fieldDummy;
00065 while (inString >> fieldDummy)
00066 lineFields.push_back(fieldDummy);
00067
00068 assert( lineFields.size() > 1 );
00069
00070 if (lineFields.at(0) == "Tree:") {
00071 assert( treeNames_.empty() );
00072 copy(&lineFields[1],&lineFields[lineFields.size()],
00073 back_inserter(treeNames_));
00074 assert( !treeNames_.empty() );
00075 }
00076 else if (lineFields.at(0) == "TreeClass:") {
00077 assert( treeClasses_.empty() );
00078 for( int i=1;i<lineFields.size();i++ )
00079 treeClasses_.push_back(atoi(lineFields[i].c_str()));
00080 if( treeNames_.size() != treeClasses_.size() ) {
00081 cerr << "If you supply TreeClass, you must supply as many "
00082 << "tree classes as you supplied trees, one per tree." << endl;
00083 return 0;
00084 }
00085 }
00086 else if (lineFields.at(0) == "ClassVariable:") {
00087
00088 if(hasSpecialClassifier_){
00089 cout<<"WARNING - True class variable was already chosen as "
00090 <<classifierVarName_<<" will be overwritten to "
00091 <<lineFields.at(1)
00092 <<"\nPlease change your Run File"<<endl;
00093 }
00094 hasSpecialClassifier_ = true;
00095 classifierVarName_ = lineFields.at(1);
00096 }
00097 else if (lineFields.at(0) == "WeightVariable:") {
00098
00099 weightHasChanged = true;
00100 assert( weightLeafNames_.empty() );
00101 copy(&lineFields[1],&lineFields[lineFields.size()],
00102 back_inserter(weightLeafNames_));
00103 }
00104 else if (lineFields.at(0) == "Leaves:") {
00105 copy(&lineFields[1],&lineFields[lineFields.size()],
00106 back_inserter(leafNames_));
00107 }
00108 else if (lineFields.at(0) == "Weight:") {
00109 weightHasChanged = true;
00110 istringstream s(lineFields.at(1));
00111 s >> weight;
00112 }
00113 else if (lineFields.at(0) == "File:") {
00114
00115 assert( lineFields.size() > 1 );
00116
00117 FileInfo thisFile;
00118 thisFile.name = lineFields.at(1);
00119
00120 thisFile.start = 0;
00121 thisFile.end = -1;
00122
00123 if( lineFields.size() > 2 ) {
00124 istringstream
00125 dummyIn(string(lineFields.at(2),
00126 0,
00127 lineFields.at(2).find_first_of('-')));
00128 if( !(dummyIn >> thisFile.start) ) {
00129 thisFile.start = 0;
00130 }
00131 dummyIn.clear();
00132 dummyIn.str(string(lineFields.at(2),
00133 lineFields.at(2).find_first_of('-')+1,
00134 string::npos));
00135 if( !(dummyIn >> thisFile.end) ) {
00136 thisFile.end = -1;
00137 }
00138 dummyIn.clear();
00139 }
00140
00141 thisFile.fileClass = 0;
00142
00143 if( lineFields.size() > 3 ) {
00144 istringstream dummyIn(lineFields.at(3));
00145 if (not (dummyIn >> thisFile.fileClass)) {
00146 thisFile.fileClass = 0;
00147 cout << dummyIn.get();
00148 }
00149 }
00150
00151 thisFile.weight = weight;
00152 fileObjects_.push_back(thisFile);
00153
00154 cout << "Found file: " << thisFile.name
00155 << " start: " << thisFile.start
00156 << " end: " << thisFile.end
00157 << " class: " << thisFile.fileClass
00158 << " weight: " << thisFile.weight
00159 << endl;
00160 }
00161 }
00162
00163 if(hasSpecialClassifier_){
00164 cout << "True class value is given by leaf "
00165 << classifierVarName_ << endl;
00166 }
00167
00168 if(weightLeafNames_.size()){
00169 cout<<"A variable determined weight has been chosen, the value"
00170 <<" assigned to ";
00171 for(int i = 0; i < weightLeafNames_.size(); i++){
00172 if(i%5 == 0) cout<<"\n\t";
00173 if(i == 0) cout<<weightLeafNames_[i];
00174 else cout<<" * "<<weightLeafNames_[i];
00175 }
00176 cout<<"\n will be used for the weight."<<endl;
00177 }
00178
00179 return readRootObjects(weightHasChanged);
00180 }
00181
00182 SprAbsFilter* SprRootReader::readRootObjects(bool needToCalcWeights)
00183 {
00184 auto_ptr<SprData> data(new SprData);
00185 vector<double> weights;
00186
00187
00188 if( filter_!=0 && !filter_->setVars(leafNames_) ) {
00189 cerr << "Unable to apply pre-filter requirements." << endl;
00190 return 0;
00191 }
00192
00193
00194 vector<string> transformed;
00195 if( filter_ != 0 ) {
00196 if( !filter_->transformVars(leafNames_,transformed) ) {
00197 cerr << "Pre-filter is unable to transform variables." << endl;
00198 return 0;
00199 }
00200 }
00201 if( transformed.empty() ) transformed = leafNames_;
00202
00203
00204 if( !data->setVars(transformed) ) {
00205 cerr << "Unable to set variable list for input data." << endl;
00206 return 0;
00207 }
00208
00209
00210 for( vector<FileInfo>::const_iterator fileIter = fileObjects_.begin();
00211 fileIter != fileObjects_.end(); ++fileIter) {
00212 TFile f(fileIter->name.c_str());
00213
00214
00215 for( vector<string>::const_iterator treeIter = treeNames_.begin();
00216 treeIter != treeNames_.end(); ++treeIter ) {
00217 TTree* tree = dynamic_cast<TTree*>(f.Get(treeIter->c_str()));
00218 if( tree == 0 ) {
00219 cout<< "Tree " << treeIter->c_str() << " not found in file "
00220 << fileIter->name.c_str() << endl;
00221 continue;
00222 }
00223 int istart = fileIter->start;
00224 int iend = fileIter->end;
00225 if( iend < 0 ) iend = tree->GetEntries();
00226 cout << "Reading File: " << fileIter->name.c_str()
00227 << " for Tree: " << treeIter->c_str()
00228 << " (" << iend-istart << " events)" << endl;
00229 map<string, TLeaf*> leaves;
00230
00231
00232 for (vector<string>::const_iterator leafIter = leafNames_.begin();
00233 leafIter != leafNames_.end(); ++leafIter) {
00234
00235 TLeaf* tempLeaf = tree->GetLeaf(leafIter->c_str());
00236
00237 if(tempLeaf == 0){
00238 cerr << "No Leaf associated with variable "
00239 << leafIter->c_str() << " ...aborting." <<endl;
00240 abort();
00241 }
00242 leaves.insert(make_pair(*leafIter, tempLeaf));
00243 }
00244
00245
00246 for (int iEvent=istart; iEvent<iend; ++iEvent) {
00247 if( tree->GetEntry(iEvent) <= 0 ) {
00248 cerr << "Unable to read event " << iEvent
00249 << " from tree " << treeIter->c_str()
00250 << " in file " << fileIter->name.c_str()
00251 << ". Aborting event loop." << endl;
00252 break;
00253 }
00254 vector<double> row;
00255 for (vector<string>::const_iterator leafIter = leafNames_.begin();
00256 leafIter != leafNames_.end(); ++leafIter) {
00257
00258 row.push_back(leaves[*leafIter]->GetValue(0));
00259 }
00260
00261
00262 int assignedClass = fileIter->fileClass;
00263
00264 if( !treeClasses_.empty() ) {
00265 int nTree = treeIter - treeNames_.begin();
00266 assignedClass = treeClasses_[nTree];
00267 }
00268
00269 if( hasSpecialClassifier_ ) {
00270 TLeaf* classLeaf = tree->GetLeaf(classifierVarName_.c_str());
00271 if( classLeaf == 0 ) {
00272 cerr << "No Leaf associated with classifier variable. Aborting."
00273 << endl;
00274 abort();
00275 }
00276 else {
00277 assignedClass = (int) classLeaf->GetValue(0);
00278 }
00279 }
00280
00281 float assignedWeight = fileIter->weight;
00282 for (int i = 0; i < weightLeafNames_.size(); i++) {
00283 TLeaf* tempLeaf = tree->GetLeaf(weightLeafNames_[i].c_str());
00284 if(tempLeaf == 0){
00285 cerr<<"No Leaf associated with variable "
00286 <<weightLeafNames_[i]<<" - probably"
00287 <<" a typo - please fix this"<<endl;
00288 abort();
00289 }
00290 else {
00291 assignedWeight *= (float) tempLeaf->GetValue(0);
00292 }
00293 }
00294
00295
00296 if( filter_!=0 && !filter_->pass(assignedClass,row) ) continue;
00297
00298
00299 if( filter_!=0 ) {
00300 pair<int,bool> computedClass = filter_->computeClass(row);
00301 if( computedClass.second )
00302 assignedClass = computedClass.first;
00303 }
00304
00305
00306 if( filter_ != 0 ) {
00307 vector<double> vNew;
00308 if( filter_->transformCoords(row,vNew) )
00309 data->insert(assignedClass,vNew);
00310 else {
00311 cerr << "Pre-filter is unable to transform coordinates." << endl;
00312 return 0;
00313 }
00314 }
00315 else
00316 data->insert(assignedClass,row);
00317
00318 weights.push_back(assignedWeight);
00319 }
00320 }
00321 f.Close();
00322 }
00323
00324
00325 if (needToCalcWeights)
00326 return new SprEmptyFilter(data.release(), weights, true);
00327 return new SprEmptyFilter(data.release(), true);
00328 }
00329
00330
00331 bool SprRootReader::chooseVars(const std::set<std::string>& vars)
00332 {
00333 cerr << "Unable to choose variables: "
00334 << "SprRootReader::chooseVars() not implemented." << endl;
00335 return false;
00336 }
00337
00338
00339 bool SprRootReader::chooseAllBut(const std::set<std::string>& vars)
00340 {
00341 cerr << "Unable to choose variables: "
00342 << "SprRootReader::chooseAllBut() not implemented." << endl;
00343 return false;
00344 }
00345
00346
00347 void SprRootReader::chooseAll()
00348 {
00349 vector<FileInfo>::iterator fileIter = fileObjects_.begin();
00350 TFile f(fileIter->name.c_str());
00351 TTree* tree = dynamic_cast<TTree*>(f.Get(treeNames_[0].c_str()));
00352
00353 if(tree == 0) {
00354 cerr << "Tree " << treeNames_[0] << " not found in file "
00355 << fileIter->name.c_str() << endl;
00356 cerr << "No variables will be selected." << endl;
00357 return;
00358 }
00359
00360 TObjArray* leafArray = tree->GetListOfLeaves();
00361 TIter leafIter(leafArray);
00362 leafIter.Reset();
00363
00364 leafNames_.clear();
00365 TLeaf* thisLeaf = 0;
00366 while( (thisLeaf = (TLeaf*)leafIter.Next()) != 0 ){
00367 leafNames_.push_back(thisLeaf->GetName());
00368 }
00369 }