CMS 3D CMS Logo

SprPlotter.cc

Go to the documentation of this file.
00001 //$Id: SprPlotter.cc,v 1.1 2007/09/21 22:32:10 narsky Exp $
00002 
00003 #include "PhysicsTools/StatPatternRecognition/interface/SprExperiment.hh"
00004 #include "PhysicsTools/StatPatternRecognition/interface/SprPlotter.hh"
00005 #include "PhysicsTools/StatPatternRecognition/interface/SprAbsTwoClassCriterion.hh"
00006 
00007 #include <algorithm>
00008 #include <functional>
00009 #include <iostream>
00010 #include <cmath>
00011 
00012 using namespace std;
00013 
00014 
00015 struct SPCmpPairDDFirst
00016   : public binary_function<pair<double,double>,pair<double,double>,bool> {
00017   bool operator()(const pair<double,double>& l, const pair<double,double>& r)
00018     const {
00019     return (l.first < r.first);
00020   }
00021 };
00022 
00023 
00024 SprPlotter::SprPlotter(const std::vector<Response>& responses) 
00025   : 
00026   responses_(responses), 
00027   crit_(0), 
00028   useAbsolute_(false), 
00029   scaleS_(1.), 
00030   scaleB_(1.),
00031   sigW_(0),
00032   bgrW_(0),
00033   sigN_(0),
00034   bgrN_(0)
00035 {
00036   bool status = this->init();
00037   assert( status );
00038 }
00039 
00040 
00041 SprPlotter::SprPlotter(const SprPlotter& other)
00042   :
00043   responses_(other.responses_), 
00044   crit_(other.crit_), 
00045   useAbsolute_(other.useAbsolute_), 
00046   scaleS_(other.scaleS_), 
00047   scaleB_(other.scaleB_),
00048   sigW_(other.sigW_),
00049   bgrW_(other.bgrW_),
00050   sigN_(other.sigN_),
00051   bgrN_(other.bgrN_)
00052 {}
00053 
00054 
00055 bool SprPlotter::init()
00056 {
00057   sigW_ = 0;
00058   bgrW_ = 0;
00059   sigN_ = 0;
00060   bgrN_ = 0;
00061   for( int i=0;i<responses_.size();i++ ) {
00062     if(      responses_[i].cls == 0 ) {
00063       bgrN_++;
00064       bgrW_ += responses_[i].weight;
00065     }
00066     else if( responses_[i].cls == 1 ) {
00067       sigN_++;
00068       sigW_ += responses_[i].weight;
00069     }
00070   }
00071   if( sigW_<SprUtils::eps() || bgrW_<SprUtils::eps() 
00072       || sigN_==0 || bgrN_==0 ) {
00073     cerr << "One of categories missing in the vector of responses." << endl;
00074     return false;
00075   }
00076   return true;
00077 }
00078 
00079 
00080 bool SprPlotter::setScaleFactors(double scaleS, double scaleB)
00081 {
00082   if( useAbsolute_ ) {
00083     scaleS_ = scaleS;
00084     scaleB_ = scaleB;
00085     return true;
00086   }
00087   cerr << "Use setAbsolute() first to choose absolute values "
00088        << "instead of relative efficiencies." << endl;
00089   return false;
00090 }
00091 
00092 
00093 bool SprPlotter::backgroundCurve(const std::vector<double>& signalEff,
00094                                  const char* classifier,
00095                                  std::vector<FigureOfMerit>& bgrndEff) const
00096 {
00097   // sanity check
00098   string sclassifier = classifier;
00099   if( sclassifier.empty() ) {
00100     cerr << "No classifier has been specified for plotting." << endl;
00101     return false;
00102   }
00103   if( signalEff.empty() ) {
00104     cerr << "No signal efficiencies specified." << endl;
00105     return false;
00106   }
00107 
00108   // sort signal efficiencies and prepare vectors
00109   vector<double> sigSortedEff(signalEff);
00110   stable_sort(sigSortedEff.begin(),sigSortedEff.end());
00111 
00112   // prepare vectors
00113   vector<pair<double,double> > signal;
00114   vector<pair<double,double> > bgrnd;
00115   if( !this->fillSandB(sclassifier,signal,bgrnd) ) {
00116     cerr << "Unable to fill out signal and background vectors in " 
00117          << "SprPlotter::backgroundCurve." << endl;
00118     return false;
00119   }
00120 
00121   // check sizes
00122   if( signal.empty() || bgrnd.empty() ) {
00123     cerr << "One of categories is empty for classifier " 
00124          << sclassifier.c_str() 
00125          << " in SprPlotter::backgroundCurve." << endl;
00126     return false;
00127   }
00128 
00129   // sort
00130   stable_sort(bgrnd.begin(),bgrnd.end(),not2(SPCmpPairDDFirst()));
00131   stable_sort(signal.begin(),signal.end(),not2(SPCmpPairDDFirst()));
00132 
00133   // if absolute values, check max allowed signal value
00134   int maxsize = sigSortedEff.size();
00135   vector<double>::iterator maxeff = sigSortedEff.end();
00136   if( useAbsolute_ ) {
00137     maxeff = find_if(sigSortedEff.begin(),sigSortedEff.end(),
00138                      bind2nd(greater<double>(),sigW_));
00139   }
00140   else {
00141     maxeff = find_if(sigSortedEff.begin(),sigSortedEff.end(),
00142                      bind2nd(greater<double>(),1.));
00143   }
00144   if( maxeff != sigSortedEff.end() )
00145     maxsize = maxeff-sigSortedEff.begin();
00146   if( maxsize == 0 ) {
00147     cerr << "Values of signal efficiency out of range." << endl;
00148     return false;
00149   }
00150 
00151   // find dividing point in classifier response
00152   vector<double> cuts(maxsize,signal[0].first);
00153   double w = 0;
00154   int startDivider = 0;
00155   int i = 0;
00156   while( i<signal.size() && startDivider<maxsize ) {
00157     w += signal[i].second;
00158     for( int divider=startDivider;divider<maxsize;divider++ ) {
00159       if( (useAbsolute_ && scaleS_*w>sigSortedEff[divider]) 
00160           || (!useAbsolute_ && (w/sigW_)>sigSortedEff[divider]) ) {
00161         if( i > 0 )
00162           cuts[divider] = 0.5 * (signal[i].first + signal[i-1].first);
00163         startDivider = divider + 1;
00164       }
00165       else
00166         break;
00167     }// end divider loop
00168     i++;
00169   }
00170         
00171   // find background fractions
00172   double defFOMvalue = 0;
00173   if( useAbsolute_ && crit_!=0 ) 
00174     defFOMvalue = crit_->fom(0,scaleB_*bgrW_,scaleS_*sigW_,0);
00175   FigureOfMerit defaultFOM(cuts[cuts.size()-1],
00176                            (useAbsolute_ ? bgrW_ : 1),
00177                            bgrN_,defFOMvalue);
00178   bgrndEff.clear();
00179   bgrndEff.resize(sigSortedEff.size(),defaultFOM);
00180   w = 0;
00181   startDivider = 0;
00182   i = 0;
00183   while( i<bgrnd.size() && startDivider<maxsize ) {
00184     for( int divider=startDivider;divider<maxsize;divider++ ) {
00185       if( bgrnd[i].first < cuts[divider] ) {
00186         if( useAbsolute_ ) {
00187           bgrndEff[divider]
00188             = FigureOfMerit(cuts[divider],scaleB_*w,i,
00189                             ( crit_==0 ? 0 : 
00190                               crit_->fom(scaleB_*(bgrW_-w),scaleB_*w,
00191                                          sigSortedEff[divider],
00192                                          scaleS_*sigW_
00193                                          -sigSortedEff[divider])));
00194         }
00195         else {
00196           bgrndEff[divider] = FigureOfMerit(cuts[divider],w/bgrW_,i);
00197         }
00198         startDivider = divider + 1;
00199       }
00200       else
00201         break;
00202     }// end divider loop
00203     w += bgrnd[i].second;
00204     i++;
00205   }
00206 
00207   // exit
00208   return true;
00209 }
00210 
00211 
00212 bool SprPlotter::fillSandB(const std::string& sclassifier,
00213                            std::vector<std::pair<double,double> >& signal,
00214                            std::vector<std::pair<double,double> >& bgrnd) const
00215 {
00216   // init
00217   signal.clear();
00218   bgrnd.clear();
00219 
00220   // fill them
00221   for( int i=0;i<responses_.size();i++ ) {
00222     // check if classifier is present
00223     map<string,double>::const_iterator found = 
00224       responses_[i].response.find(sclassifier);
00225     if( found == responses_[i].response.end() ) {
00226       cerr << "Point " << i << " does not have response value " 
00227            << "for classifier " << sclassifier.c_str() << endl;
00228       return false;
00229     }
00230 
00231     // fill out signal and background vectors
00232     int cls = responses_[i].cls;
00233     double w = responses_[i].weight;
00234     if(      cls == 0 )
00235       bgrnd.push_back(pair<double,double>(found->second,w));
00236     else if( cls == 1 )
00237       signal.push_back(pair<double,double>(found->second,w));
00238   }
00239 
00240   // exit
00241   return true;
00242 }
00243 
00244 
00245 int SprPlotter::histogram(const char* classifier, 
00246                           double xlo, double xhi, double dx,
00247                           std::vector<std::pair<double,double> >& sigHist,
00248                           std::vector<std::pair<double,double> >& bgrHist) 
00249   const
00250 {
00251   // sanity check
00252   const string sclassifier = classifier;
00253   if( sclassifier.empty() || dx<=0. || xlo>=xhi ) {
00254     cerr << "Invalid histogram parameters entered." << endl;
00255     return 0;
00256   }
00257 
00258   // prepare vectors
00259   vector<pair<double,double> > signal;
00260   vector<pair<double,double> > bgrnd;
00261 
00262   // fill them
00263   if( !this->fillSandB(sclassifier,signal,bgrnd) ) {
00264     cerr << "Unable to fill out signal and background vectors " 
00265          << "in SprPlotter::histogram." << endl;
00266     return 0;
00267   }
00268 
00269   // sort in ascending order
00270   stable_sort(bgrnd.begin(),bgrnd.end(),SPCmpPairDDFirst());
00271   stable_sort(signal.begin(),signal.end(),SPCmpPairDDFirst());
00272 
00273   // book histos
00274   int nbin = int(floor((xhi-xlo)/dx)) + 1;
00275   sigHist.clear(); bgrHist.clear();
00276   sigHist.resize(nbin); bgrHist.resize(nbin);
00277 
00278   // init indices
00279   int jsig(0), jbgr(0);
00280 
00281   // check starting indices
00282   if( !signal.empty() ) {
00283     if( signal[0].first < xlo ) {
00284       bool lbreak = false;
00285       for( int j=0;j<signal.size();j++ ) {
00286         if( signal[j].first >= xlo ) {
00287           jsig = j;
00288           lbreak = true;
00289           break;
00290         }
00291       }
00292       if( !lbreak ) jsig = signal.size();
00293     }
00294   }
00295   if( !bgrnd.empty() ) {
00296     if( bgrnd[jbgr].first < xlo ) {
00297       bool lbreak = false;
00298       for( int j=jbgr;j<bgrnd.size();j++ ) {
00299         if( bgrnd[j].first >= xlo ) {
00300           jbgr = j;
00301           lbreak = true;
00302           break;
00303         }
00304       }
00305       if( !lbreak ) jbgr = bgrnd.size();
00306     }
00307   }
00308       
00309   // fill histos
00310   for( int i=0;i<nbin;i++ ) {
00311     double xa = xlo + i*dx;
00312     double xb = xa + dx;
00313     
00314     // signal
00315     double wsig = 0;
00316     unsigned nsig = 0;
00317     bool lbreak = false;
00318     for( int j=jsig;j<signal.size();j++ ) {
00319       if( signal[j].first >= xb ) {
00320         jsig = j;
00321         lbreak = true;
00322         break;
00323       }
00324       wsig += signal[j].second;
00325       nsig++;
00326     }
00327     double errS = ( nsig>0 ? scaleS_*wsig/sqrt(double(nsig)) : 0 );
00328     sigHist[i] = pair<double,double>(scaleS_*wsig,errS);
00329     if( !lbreak ) jsig = signal.size();
00330 
00331     // background
00332     double wbgr = 0;
00333     unsigned nbgr = 0;
00334     lbreak = false;
00335     for( int j=jbgr;j<bgrnd.size();j++ ) {
00336       if( bgrnd[j].first >= xb ) {
00337         jbgr = j;
00338         lbreak = true;
00339         break;
00340       }
00341       wbgr += bgrnd[j].second;
00342       nbgr++;
00343     }
00344     double errB = ( nbgr>0 ? scaleB_*wbgr/sqrt(double(nbgr)) : 0 );
00345     bgrHist[i] = pair<double,double>(scaleB_*wbgr,errB);
00346     if( !lbreak ) jbgr = bgrnd.size();
00347   }// end bin loop
00348 
00349   // sanity check
00350   assert( sigHist.size() == bgrHist.size() );
00351 
00352   // exit
00353   return nbin;
00354 }

Generated on Tue Jun 9 17:42:03 2009 for CMSSW by  doxygen 1.5.4