CMS 3D CMS Logo

SprPCAApp.cc File Reference

#include "PhysicsTools/StatPatternRecognition/interface/SprExperiment.hh"
#include "PhysicsTools/StatPatternRecognition/interface/SprAbsFilter.hh"
#include "PhysicsTools/StatPatternRecognition/interface/SprData.hh"
#include "PhysicsTools/StatPatternRecognition/interface/SprAbsReader.hh"
#include "PhysicsTools/StatPatternRecognition/interface/SprAbsWriter.hh"
#include "PhysicsTools/StatPatternRecognition/interface/SprDataFeeder.hh"
#include "PhysicsTools/StatPatternRecognition/interface/SprRWFactory.hh"
#include "PhysicsTools/StatPatternRecognition/interface/SprStringParser.hh"
#include "PhysicsTools/StatPatternRecognition/interface/SprPCATransformer.hh"
#include "PhysicsTools/StatPatternRecognition/interface/SprTransformerFilter.hh"
#include <stdlib.h>
#include <unistd.h>
#include <iostream>
#include <vector>
#include <set>
#include <string>
#include <memory>

Go to the source code of this file.

Functions

void help (const char *prog)
int main (int argc, char **argv)


Function Documentation

void help ( const char *  prog  ) 

Definition at line 25 of file SprPCAApp.cc.

References GenMuonPlsPt100GeV_cfg::cout, and lat::endl().

00026 {
00027   cout << "Usage:  " << prog 
00028        << " training_data_file output_file_for_PCA_coefficients" << endl;
00029   cout << "\t Options: " << endl;
00030   cout << "\t-h --- help                                        " << endl;
00031   cout << "\t-o output Tuple file                               " << endl;
00032   cout << "\t-a input ascii file mode (see SprSimpleReader.hh)  " << endl;
00033   cout << "\t-A save output data in ascii instead of Root       " << endl;
00034   cout << "\t-y list of input classes (see SprAbsFilter.hh)     " << endl;
00035   cout << "\t-v verbose level (0=silent default,1,2)            " << endl;
00036   cout << "\t-V include only these input variables              " << endl;
00037   cout << "\t-z exclude input variables from the list           " << endl;
00038   cout << "\t\t Variables must be listed in quotes and separated by commas." 
00039        << endl;
00040 }

int main ( int  argc,
char **  argv 
)

Definition at line 43 of file SprPCAApp.cc.

References c, TestMuL1L2Filter_cff::cerr, GenMuonPlsPt100GeV_cfg::cout, lat::endl(), filter, help(), i, and vars.

00044 {
00045   // check command line
00046   if( argc < 2 ) {
00047     help(argv[0]);
00048     return 1;
00049   }
00050 
00051   // init
00052   string tupleFile;
00053   int readMode = 0;
00054   SprRWFactory::DataType writeMode = SprRWFactory::Root;
00055   int verbose = 0;
00056   string outFile;
00057   string includeList, excludeList;
00058   string inputClassesString;
00059   string stringVarsDoNotFeed;
00060 
00061   // decode command line
00062   int c;
00063   extern char* optarg;
00064   //  extern int optind;
00065   while( (c = getopt(argc,argv,"ho:a:Ay:v:K:DV:z:Z:")) != EOF ) {
00066     switch( c )
00067       {
00068       case 'h' :
00069         help(argv[0]);
00070         return 1;
00071       case 'o' :
00072         tupleFile = optarg;
00073         break;
00074       case 'a' :
00075         readMode = (optarg==0 ? 0 : atoi(optarg));
00076         break;
00077       case 'A' :
00078         writeMode = SprRWFactory::Ascii;
00079         break;
00080       case 'y' :
00081         inputClassesString = optarg;
00082         break;
00083       case 'v' :
00084         verbose = (optarg==0 ? 0 : atoi(optarg));
00085         break;
00086       case 'V' :
00087         includeList = optarg;
00088         break;
00089       case 'z' :
00090         excludeList = optarg;
00091         break;
00092       case 'Z' :
00093         stringVarsDoNotFeed = optarg;
00094         break;
00095       }
00096   }
00097 
00098   // 2 arguments after all options.
00099   string trFile = argv[argc-2];
00100   string pcaFile = argv[argc-1];
00101   if( trFile.empty() ) {
00102     cerr << "No training file is specified." << endl;
00103     return 1;
00104   }
00105   if( pcaFile.empty() ) {
00106     cerr << "No file for storing PCA coefficients is specified." << endl;
00107     return 1;
00108   }
00109 
00110   // make reader
00111   SprRWFactory::DataType inputType 
00112     = ( readMode==0 ? SprRWFactory::Root : SprRWFactory::Ascii );
00113   auto_ptr<SprAbsReader> reader(SprRWFactory::makeReader(inputType,readMode));
00114 
00115   // include variables
00116   set<string> includeSet;
00117   if( !includeList.empty() ) {
00118     vector<vector<string> > includeVars;
00119     SprStringParser::parseToStrings(includeList.c_str(),includeVars);
00120     assert( !includeVars.empty() );
00121     for( int i=0;i<includeVars[0].size();i++ ) 
00122       includeSet.insert(includeVars[0][i]);
00123     if( !reader->chooseVars(includeSet) ) {
00124       cerr << "Unable to include variables in training set." << endl;
00125       return 2;
00126     }
00127     else {
00128       cout << "Following variables have been included in optimization: ";
00129       for( set<string>::const_iterator 
00130              i=includeSet.begin();i!=includeSet.end();i++ )
00131         cout << "\"" << *i << "\"" << " ";
00132       cout << endl;
00133     }
00134   }
00135 
00136   // exclude variables
00137   set<string> excludeSet;
00138   if( !excludeList.empty() ) {
00139     vector<vector<string> > excludeVars;
00140     SprStringParser::parseToStrings(excludeList.c_str(),excludeVars);
00141     assert( !excludeVars.empty() );
00142     for( int i=0;i<excludeVars[0].size();i++ ) 
00143       excludeSet.insert(excludeVars[0][i]);
00144     if( !reader->chooseAllBut(excludeSet) ) {
00145       cerr << "Unable to exclude variables from training set." << endl;
00146       return 2;
00147     }
00148     else {
00149       cout << "Following variables have been excluded from optimization: ";
00150       for( set<string>::const_iterator 
00151              i=excludeSet.begin();i!=excludeSet.end();i++ )
00152         cout << "\"" << *i << "\"" << " ";
00153       cout << endl;
00154     }
00155   }
00156 
00157   // read training data from file
00158   auto_ptr<SprAbsFilter> filter(reader->read(trFile.c_str()));
00159   if( filter.get() == 0 ) {
00160     cerr << "Unable to read data from file " << trFile.c_str() << endl;
00161     return 2;
00162   }
00163   vector<string> vars;
00164   filter->vars(vars);
00165   cout << "Read data from file " << trFile.c_str() 
00166        << " for variables";
00167   for( int i=0;i<vars.size();i++ ) 
00168     cout << " \"" << vars[i].c_str() << "\"";
00169   cout << endl;
00170   cout << "Total number of points read: " << filter->size() << endl;
00171 
00172   // filter training data by class
00173   vector<SprClass> inputClasses;
00174   if( !filter->filterByClass(inputClassesString.c_str()) ) {
00175     cerr << "Cannot choose input classes for string " 
00176          << inputClassesString << endl;
00177     return 2;
00178   }
00179   filter->classes(inputClasses);
00180   assert( inputClasses.size() > 1 );
00181   cout << "Training data filtered by class." << endl;
00182   for( int i=0;i<inputClasses.size();i++ ) {
00183     cout << "Points in class " << inputClasses[i] << ":   " 
00184          << filter->ptsInClass(inputClasses[i]) << endl;
00185   }
00186 
00187   // compute PCA
00188   SprPCATransformer pca;
00189   if( !pca.train(filter.get(),verbose) ) {
00190     cerr << "Unable to compute PCA coefficients." << endl;
00191     return 3;
00192   }
00193   cout << "Computed PCA coefficients." << endl;
00194 
00195   // store PCA
00196   if( !pca.store(pcaFile.c_str()) ) {
00197     cerr << "Unable to store PCA coefficients." << endl;
00198     return 4;
00199   }
00200   cout << "Stored PCA coefficients." << endl;
00201 
00202   // make histogram if requested
00203   if( tupleFile.empty() ) return 0;
00204 
00205   // make a writer
00206   auto_ptr<SprAbsWriter> tuple(SprRWFactory::makeWriter(writeMode,"training"));
00207   if( !tuple->init(tupleFile.c_str()) ) {
00208     cerr << "Unable to open output file " << tupleFile.c_str() << endl;
00209     return 5;
00210   }
00211 
00212   // transform data
00213   SprTransformerFilter trans(filter.get());
00214   bool replaceOriginalData = true;
00215   if( !trans.transform(&pca,replaceOriginalData) ) {
00216     cerr << "Unable to transform input data." << endl;
00217     return 6;
00218   }
00219 
00220   // feed
00221   SprDataFeeder feeder(&trans,tuple.get());
00222   if( !feeder.feed(1000) ) {
00223     cerr << "Cannot feed data into file " << tupleFile.c_str() << endl;
00224     return 9;
00225   }
00226 
00227   // exit
00228   return 0;
00229 }


Generated on Tue Jun 9 17:55:01 2009 for CMSSW by  doxygen 1.5.4