00001
00002
00003 #include "PhysicsTools/StatPatternRecognition/interface/SprExperiment.hh"
00004 #include "PhysicsTools/StatPatternRecognition/interface/SprPlotter.hh"
00005 #include "PhysicsTools/StatPatternRecognition/interface/SprAbsTwoClassCriterion.hh"
00006
00007 #include <algorithm>
00008 #include <functional>
00009 #include <iostream>
00010 #include <cmath>
00011
00012 using namespace std;
00013
00014
00015 struct SPCmpPairDDFirst
00016 : public binary_function<pair<double,double>,pair<double,double>,bool> {
00017 bool operator()(const pair<double,double>& l, const pair<double,double>& r)
00018 const {
00019 return (l.first < r.first);
00020 }
00021 };
00022
00023
00024 SprPlotter::SprPlotter(const std::vector<Response>& responses)
00025 :
00026 responses_(responses),
00027 crit_(0),
00028 useAbsolute_(false),
00029 scaleS_(1.),
00030 scaleB_(1.),
00031 sigW_(0),
00032 bgrW_(0),
00033 sigN_(0),
00034 bgrN_(0)
00035 {
00036 bool status = this->init();
00037 assert( status );
00038 }
00039
00040
00041 SprPlotter::SprPlotter(const SprPlotter& other)
00042 :
00043 responses_(other.responses_),
00044 crit_(other.crit_),
00045 useAbsolute_(other.useAbsolute_),
00046 scaleS_(other.scaleS_),
00047 scaleB_(other.scaleB_),
00048 sigW_(other.sigW_),
00049 bgrW_(other.bgrW_),
00050 sigN_(other.sigN_),
00051 bgrN_(other.bgrN_)
00052 {}
00053
00054
00055 bool SprPlotter::init()
00056 {
00057 sigW_ = 0;
00058 bgrW_ = 0;
00059 sigN_ = 0;
00060 bgrN_ = 0;
00061 for( int i=0;i<responses_.size();i++ ) {
00062 if( responses_[i].cls == 0 ) {
00063 bgrN_++;
00064 bgrW_ += responses_[i].weight;
00065 }
00066 else if( responses_[i].cls == 1 ) {
00067 sigN_++;
00068 sigW_ += responses_[i].weight;
00069 }
00070 }
00071 if( sigW_<SprUtils::eps() || bgrW_<SprUtils::eps()
00072 || sigN_==0 || bgrN_==0 ) {
00073 cerr << "One of categories missing in the vector of responses." << endl;
00074 return false;
00075 }
00076 return true;
00077 }
00078
00079
00080 bool SprPlotter::setScaleFactors(double scaleS, double scaleB)
00081 {
00082 if( useAbsolute_ ) {
00083 scaleS_ = scaleS;
00084 scaleB_ = scaleB;
00085 return true;
00086 }
00087 cerr << "Use setAbsolute() first to choose absolute values "
00088 << "instead of relative efficiencies." << endl;
00089 return false;
00090 }
00091
00092
00093 bool SprPlotter::backgroundCurve(const std::vector<double>& signalEff,
00094 const char* classifier,
00095 std::vector<FigureOfMerit>& bgrndEff) const
00096 {
00097
00098 string sclassifier = classifier;
00099 if( sclassifier.empty() ) {
00100 cerr << "No classifier has been specified for plotting." << endl;
00101 return false;
00102 }
00103 if( signalEff.empty() ) {
00104 cerr << "No signal efficiencies specified." << endl;
00105 return false;
00106 }
00107
00108
00109 vector<double> sigSortedEff(signalEff);
00110 stable_sort(sigSortedEff.begin(),sigSortedEff.end());
00111
00112
00113 vector<pair<double,double> > signal;
00114 vector<pair<double,double> > bgrnd;
00115 if( !this->fillSandB(sclassifier,signal,bgrnd) ) {
00116 cerr << "Unable to fill out signal and background vectors in "
00117 << "SprPlotter::backgroundCurve." << endl;
00118 return false;
00119 }
00120
00121
00122 if( signal.empty() || bgrnd.empty() ) {
00123 cerr << "One of categories is empty for classifier "
00124 << sclassifier.c_str()
00125 << " in SprPlotter::backgroundCurve." << endl;
00126 return false;
00127 }
00128
00129
00130 stable_sort(bgrnd.begin(),bgrnd.end(),not2(SPCmpPairDDFirst()));
00131 stable_sort(signal.begin(),signal.end(),not2(SPCmpPairDDFirst()));
00132
00133
00134 int maxsize = sigSortedEff.size();
00135 vector<double>::iterator maxeff = sigSortedEff.end();
00136 if( useAbsolute_ ) {
00137 maxeff = find_if(sigSortedEff.begin(),sigSortedEff.end(),
00138 bind2nd(greater<double>(),sigW_));
00139 }
00140 else {
00141 maxeff = find_if(sigSortedEff.begin(),sigSortedEff.end(),
00142 bind2nd(greater<double>(),1.));
00143 }
00144 if( maxeff != sigSortedEff.end() )
00145 maxsize = maxeff-sigSortedEff.begin();
00146 if( maxsize == 0 ) {
00147 cerr << "Values of signal efficiency out of range." << endl;
00148 return false;
00149 }
00150
00151
00152 vector<double> cuts(maxsize,signal[0].first);
00153 double w = 0;
00154 int startDivider = 0;
00155 int i = 0;
00156 while( i<signal.size() && startDivider<maxsize ) {
00157 w += signal[i].second;
00158 for( int divider=startDivider;divider<maxsize;divider++ ) {
00159 if( (useAbsolute_ && scaleS_*w>sigSortedEff[divider])
00160 || (!useAbsolute_ && (w/sigW_)>sigSortedEff[divider]) ) {
00161 if( i > 0 )
00162 cuts[divider] = 0.5 * (signal[i].first + signal[i-1].first);
00163 startDivider = divider + 1;
00164 }
00165 else
00166 break;
00167 }
00168 i++;
00169 }
00170
00171
00172 double defFOMvalue = 0;
00173 if( useAbsolute_ && crit_!=0 )
00174 defFOMvalue = crit_->fom(0,scaleB_*bgrW_,scaleS_*sigW_,0);
00175 FigureOfMerit defaultFOM(cuts[cuts.size()-1],
00176 (useAbsolute_ ? bgrW_ : 1),
00177 bgrN_,defFOMvalue);
00178 bgrndEff.clear();
00179 bgrndEff.resize(sigSortedEff.size(),defaultFOM);
00180 w = 0;
00181 startDivider = 0;
00182 i = 0;
00183 while( i<bgrnd.size() && startDivider<maxsize ) {
00184 for( int divider=startDivider;divider<maxsize;divider++ ) {
00185 if( bgrnd[i].first < cuts[divider] ) {
00186 if( useAbsolute_ ) {
00187 bgrndEff[divider]
00188 = FigureOfMerit(cuts[divider],scaleB_*w,i,
00189 ( crit_==0 ? 0 :
00190 crit_->fom(scaleB_*(bgrW_-w),scaleB_*w,
00191 sigSortedEff[divider],
00192 scaleS_*sigW_
00193 -sigSortedEff[divider])));
00194 }
00195 else {
00196 bgrndEff[divider] = FigureOfMerit(cuts[divider],w/bgrW_,i);
00197 }
00198 startDivider = divider + 1;
00199 }
00200 else
00201 break;
00202 }
00203 w += bgrnd[i].second;
00204 i++;
00205 }
00206
00207
00208 return true;
00209 }
00210
00211
00212 bool SprPlotter::fillSandB(const std::string& sclassifier,
00213 std::vector<std::pair<double,double> >& signal,
00214 std::vector<std::pair<double,double> >& bgrnd) const
00215 {
00216
00217 signal.clear();
00218 bgrnd.clear();
00219
00220
00221 for( int i=0;i<responses_.size();i++ ) {
00222
00223 map<string,double>::const_iterator found =
00224 responses_[i].response.find(sclassifier);
00225 if( found == responses_[i].response.end() ) {
00226 cerr << "Point " << i << " does not have response value "
00227 << "for classifier " << sclassifier.c_str() << endl;
00228 return false;
00229 }
00230
00231
00232 int cls = responses_[i].cls;
00233 double w = responses_[i].weight;
00234 if( cls == 0 )
00235 bgrnd.push_back(pair<double,double>(found->second,w));
00236 else if( cls == 1 )
00237 signal.push_back(pair<double,double>(found->second,w));
00238 }
00239
00240
00241 return true;
00242 }
00243
00244
00245 int SprPlotter::histogram(const char* classifier,
00246 double xlo, double xhi, double dx,
00247 std::vector<std::pair<double,double> >& sigHist,
00248 std::vector<std::pair<double,double> >& bgrHist)
00249 const
00250 {
00251
00252 const string sclassifier = classifier;
00253 if( sclassifier.empty() || dx<=0. || xlo>=xhi ) {
00254 cerr << "Invalid histogram parameters entered." << endl;
00255 return 0;
00256 }
00257
00258
00259 vector<pair<double,double> > signal;
00260 vector<pair<double,double> > bgrnd;
00261
00262
00263 if( !this->fillSandB(sclassifier,signal,bgrnd) ) {
00264 cerr << "Unable to fill out signal and background vectors "
00265 << "in SprPlotter::histogram." << endl;
00266 return 0;
00267 }
00268
00269
00270 stable_sort(bgrnd.begin(),bgrnd.end(),SPCmpPairDDFirst());
00271 stable_sort(signal.begin(),signal.end(),SPCmpPairDDFirst());
00272
00273
00274 int nbin = int(floor((xhi-xlo)/dx)) + 1;
00275 sigHist.clear(); bgrHist.clear();
00276 sigHist.resize(nbin); bgrHist.resize(nbin);
00277
00278
00279 int jsig(0), jbgr(0);
00280
00281
00282 if( !signal.empty() ) {
00283 if( signal[0].first < xlo ) {
00284 bool lbreak = false;
00285 for( int j=0;j<signal.size();j++ ) {
00286 if( signal[j].first >= xlo ) {
00287 jsig = j;
00288 lbreak = true;
00289 break;
00290 }
00291 }
00292 if( !lbreak ) jsig = signal.size();
00293 }
00294 }
00295 if( !bgrnd.empty() ) {
00296 if( bgrnd[jbgr].first < xlo ) {
00297 bool lbreak = false;
00298 for( int j=jbgr;j<bgrnd.size();j++ ) {
00299 if( bgrnd[j].first >= xlo ) {
00300 jbgr = j;
00301 lbreak = true;
00302 break;
00303 }
00304 }
00305 if( !lbreak ) jbgr = bgrnd.size();
00306 }
00307 }
00308
00309
00310 for( int i=0;i<nbin;i++ ) {
00311 double xa = xlo + i*dx;
00312 double xb = xa + dx;
00313
00314
00315 double wsig = 0;
00316 unsigned nsig = 0;
00317 bool lbreak = false;
00318 for( int j=jsig;j<signal.size();j++ ) {
00319 if( signal[j].first >= xb ) {
00320 jsig = j;
00321 lbreak = true;
00322 break;
00323 }
00324 wsig += signal[j].second;
00325 nsig++;
00326 }
00327 double errS = ( nsig>0 ? scaleS_*wsig/sqrt(double(nsig)) : 0 );
00328 sigHist[i] = pair<double,double>(scaleS_*wsig,errS);
00329 if( !lbreak ) jsig = signal.size();
00330
00331
00332 double wbgr = 0;
00333 unsigned nbgr = 0;
00334 lbreak = false;
00335 for( int j=jbgr;j<bgrnd.size();j++ ) {
00336 if( bgrnd[j].first >= xb ) {
00337 jbgr = j;
00338 lbreak = true;
00339 break;
00340 }
00341 wbgr += bgrnd[j].second;
00342 nbgr++;
00343 }
00344 double errB = ( nbgr>0 ? scaleB_*wbgr/sqrt(double(nbgr)) : 0 );
00345 bgrHist[i] = pair<double,double>(scaleB_*wbgr,errB);
00346 if( !lbreak ) jbgr = bgrnd.size();
00347 }
00348
00349
00350 assert( sigHist.size() == bgrHist.size() );
00351
00352
00353 return nbin;
00354 }