CMS 3D CMS Logo

SprBumpHunter.cc

Go to the documentation of this file.
00001 //$Id: SprBumpHunter.cc,v 1.2 2007/09/21 22:32:09 narsky Exp $
00002 
00003 #include "PhysicsTools/StatPatternRecognition/interface/SprExperiment.hh"
00004 #include "PhysicsTools/StatPatternRecognition/interface/SprBumpHunter.hh"
00005 #include "PhysicsTools/StatPatternRecognition/interface/SprAbsTwoClassCriterion.hh"
00006 #include "PhysicsTools/StatPatternRecognition/interface/SprUtils.hh"
00007 
00008 #include <stdio.h>
00009 #include <cassert>
00010 #include <algorithm>
00011 #include <functional>
00012 #include <cmath>
00013 
00014 using namespace std;
00015 
00016 
00017 struct SBHCmpPairFirst 
00018   : public binary_function<pair<double,int>,pair<double,int>,bool> {
00019   bool operator()(const pair<double,int>& l, const pair<double,int>& r)
00020     const {
00021     return (l.first < r.first);
00022   }
00023 };
00024 
00025 
00026 SprBumpHunter::SprBumpHunter(SprAbsFilter* data, 
00027                              const SprAbsTwoClassCriterion* crit,
00028                              int nbump,
00029                              int nmin,
00030                              double apeel)
00031   :
00032   SprAbsClassifier(data),
00033   crit_(crit),
00034   nbump_(nbump),
00035   nmin_(nmin),
00036   apeel_(apeel),
00037   box_(new SprBoxFilter(data)),
00038   boxes_(),
00039   fom_(),
00040   n0_(),
00041   n1_(),
00042   w0_(),
00043   w1_(),
00044   nsplit_(0),
00045   cls0_(0),
00046   cls1_(1)
00047 {
00048   assert( crit_ != 0 );
00049   assert( nbump_ > 0 );
00050   assert( nmin_ > 0 );
00051   assert( apeel_ > 0 );
00052   this->setClasses();
00053 }
00054 
00055 
00056 void SprBumpHunter::setClasses() 
00057 {
00058   vector<SprClass> classes;
00059   box_->classes(classes);
00060   int size = classes.size();
00061   if( size > 0 ) cls0_ = classes[0];
00062   if( size > 1 ) cls1_ = classes[1];
00063   cout << "Classes for bump hunter are set to " 
00064        << cls0_ << " " << cls1_ << endl;
00065 }
00066 
00067 
00068 bool SprBumpHunter::train(int verbose)
00069 {
00070   // continue until all bumps are found
00071   SprBox limits;
00072   while( fom_.size() < nbump_ ) {
00073     // init
00074     int status = -1;
00075     double fom0(0), w0(0), w1(0);
00076     unsigned n0(0), n1(0);
00077 
00078     // find box by shrinking
00079     while( (status = this->shrink(limits,n0,n1,w0,w1,fom0,verbose)) == 0 ) {
00080       if( (nsplit_++%100) == 0 ) 
00081         cout << "Performing shrinking split " << (nsplit_-1) << " ..." << endl;
00082     }
00083     if( status < 0 ) return false;
00084 
00085     // expand box
00086     while( (status = this->expand(limits,n0,n1,w0,w1,fom0,verbose)) == 0 ) {
00087       if( (nsplit_++%100) == 0 ) 
00088         cout << "Performing expanding split " << (nsplit_-1) << " ..." << endl;
00089     }
00090     if( status < 0 ) return false;
00091 
00092     // box found
00093     if( w1 < SprUtils::eps() ) {
00094       cout << "Unable to find a new box. Exiting." << endl;
00095       break;
00096     }
00097     cout << "Found box " << fom_.size() << endl;
00098     fom_.push_back(fom0);
00099     n0_.push_back(n0);
00100     n1_.push_back(n1);
00101     w0_.push_back(w0);
00102     w1_.push_back(w1);
00103     boxes_.push_back(limits);
00104 
00105     // exclude the found box
00106     if( fom_.size() < nbump_ ) {
00107       SprData forRemoval(false);
00108       for( int i=0;i<box_->size();i++ ) {
00109         forRemoval.insert((*box_)[i]);
00110       }
00111       if( verbose > 0 ) {
00112         cout << "Will remove " << forRemoval.size() 
00113              << " points for the found box." << endl;
00114       }
00115       box_->clear();
00116       int beforeRemoval = box_->size();
00117       if( !box_->fastRemove(&forRemoval) ) {
00118         cerr << "Cannot remove data from box." << endl;
00119         return false;
00120       }
00121       int afterRemoval = box_->size();
00122       if( verbose > 0 ) {
00123         cout << "Sample has been reduced from " 
00124              << beforeRemoval << " to " << afterRemoval << " points." << endl;
00125       }
00126       SprBoxFilter* box = new SprBoxFilter(box_);
00127       delete box_;
00128       box_ = box;
00129       assert( box_->size() == afterRemoval );
00130       
00131       // reset limits
00132       limits.clear();
00133     }
00134   }
00135 
00136   // exit
00137   return !fom_.empty();
00138 }
00139 
00140 
00141 bool SprBumpHunter::reset()
00142 {
00143   delete box_;
00144   box_ = new SprBoxFilter(data_);
00145   boxes_.clear();
00146   fom_.clear();
00147   n0_.clear();
00148   n1_.clear();
00149   w0_.clear();
00150   w1_.clear();
00151   nsplit_ = 0;
00152   return true;
00153 }
00154 
00155 
00156 bool SprBumpHunter::setData(SprAbsFilter* data)
00157 {
00158   assert( data != 0 );
00159   data_ = data;
00160   return this->reset();
00161 }
00162 
00163 
00164 void SprBumpHunter::print(std::ostream& os) const
00165 {
00166   os << "Bumps: " << boxes_.size() << " " << SprVersion << endl;
00167   os << "-------------------------------------------------------" << endl;
00168   vector<string> vars;
00169   box_->vars(vars);
00170   for( int i=0;i<boxes_.size();i++ ) {
00171     int size = boxes_[i].size();
00172     char s [200];
00173     sprintf(s,"Bump %6i    Size %-4i    FOM=%-10g W0=%-10g W1=%-10g N0=%-10i N1=%-10i",i,size,fom_[i],w0_[i],w1_[i],n0_[i],n1_[i]);
00174     os << s << endl;
00175     for( SprBox::const_iterator j=boxes_[i].begin();j!=boxes_[i].end();j++ ) {
00176       assert( j->first < vars.size() );
00177       char s [200];
00178       sprintf(s,"Variable %30s    Limits  %15g %15g",
00179               vars[j->first].c_str(),j->second.first,j->second.second);
00180       os << s << endl;
00181     }
00182     os << "-------------------------------------------------------" << endl;
00183   }
00184 }
00185 
00186 
00187 SprTrainedDecisionTree* SprBumpHunter::makeTrained() const
00188 {
00189   // make tree
00190   SprTrainedDecisionTree* t =  new SprTrainedDecisionTree(boxes_);
00191 
00192   // vars
00193   vector<string> vars;
00194   data_->vars(vars);
00195   t->setVars(vars);
00196 
00197   // exit
00198   return t;
00199 }
00200 
00201 
00202 bool SprBumpHunter::sort(int dsort, 
00203                          std::vector<std::vector<int> >& sorted,
00204                          std::vector<std::vector<double> >& division) const
00205 {
00206   // sanity check
00207   int size = box_->size();
00208   if( size == 0 ) {
00209     cerr << "Unable to sort an empty box." << endl;
00210     return false;
00211   }
00212 
00213   // init
00214   unsigned dim = box_->dim();
00215   sorted.clear();
00216   sorted.resize(dim,vector<int>(size));
00217   division.clear();
00218   division.resize(dim);
00219 
00220   // loop through dimensions
00221   for( int d=0;d<dim;d++ ) {
00222     // check dim
00223     if( dsort>=0 && d!=dsort ) continue;
00224 
00225     // make vector
00226     vector<pair<double,int> > r(size);
00227 
00228     // loop through points
00229     for( int j=0;j<size;j++ )
00230       r[j] = pair<double,int>((*box_)[j]->x_[d],j);
00231     
00232     // sort
00233     stable_sort(r.begin(),r.end(),SBHCmpPairFirst());
00234     
00235     // fill out sorted indices
00236     division[d].push_back(SprUtils::min());
00237     double xprev = r[0].first;
00238     sorted[d][0] = r[0].second;
00239     for( int j=1;j<size;j++ ) {
00240       sorted[d][j] = r[j].second;
00241       double xcurr = r[j].first;
00242       if( (xcurr-xprev) > SprUtils::eps() ) {
00243         division[d].push_back(0.5*(xcurr+xprev));
00244         xprev = xcurr;
00245       }
00246     }
00247     division[d].push_back(SprUtils::max());
00248   }
00249 
00250   // exit
00251   return true;
00252 }
00253 
00254 
00255 int SprBumpHunter::shrink(SprBox& limits,
00256                           unsigned& n0, unsigned& n1,
00257                           double& w0, double& w1, double& fom0, int verbose)
00258 {
00259   // sanity check
00260   if( box_->empty() ) {
00261     if( verbose > 0 ) {
00262       cout << "Unable to shrink - the box is empty." << endl;
00263     }
00264     return 1;
00265   }
00266 
00267   // save initial number of events
00268   int initSize = box_->size();
00269 
00270   // sort
00271   vector<vector<int> > sorted;
00272   vector<vector<double> > division;
00273   if( !this->sort(-1,sorted,division) ) {
00274     cerr << "Unable to sort box." << endl;
00275     return -1;
00276   }
00277 
00278   // set limits
00279   if( verbose > 1 ) {
00280     cout << "Limits:" << endl;
00281     for( SprBox::const_iterator d=limits.begin();d!=limits.end();d++ ) {
00282       cout << "Dimension " << d->first << "    " 
00283            << d->second.first << " " << d->second.second << endl;
00284     }
00285   }
00286 
00287   // check number of events
00288   n0 = box_->ptsInClass(cls0_);
00289   n1 = box_->ptsInClass(cls1_);
00290   int ntot = n0 + n1;
00291   if( ntot < nmin_ ) {
00292     if( verbose > 1 ) {
00293       cout << "Split failed - not enough events." << endl;
00294     }
00295     return 1;
00296   }
00297 
00298   // get total weights
00299   w0 = box_->weightInClass(cls0_);
00300   w1 = box_->weightInClass(cls1_);
00301   if( w0<SprUtils::eps() || w1<SprUtils::eps() ) {
00302     if( verbose > 1 ) {
00303       cout << "Data missing one of categories." << endl;
00304     }
00305     return 1;
00306   }
00307 
00308   // compute fom
00309   fom0 = crit_->fom(0,w0,w1,0);
00310 
00311   // message
00312   if( verbose > 1 ) {
00313     cout << "===================" << endl;
00314     cout << "Splitting with " << w0 << " background and " 
00315          << w1 << " signal weights and " << ntot << " events." << endl;
00316     cout << "Starting FOM=" << fom0 << endl;
00317   }
00318 
00319   // init
00320   unsigned dim = box_->dim();
00321   vector<double> fom(dim,SprUtils::min());
00322   SprBox savedBox;
00323 
00324   // loop through dimensions
00325   for( int d=0;d<dim;d++ ) {
00326 
00327     // check divisions
00328     if( division[d].empty() ) continue;
00329 
00330     // make vectors
00331     vector<double> flo, fhi;
00332 
00333     // loop through points to find low cuts
00334     double wmis0 = w0;
00335     double wcor1 = w1;
00336     double wmis1(0), wcor0(0);
00337     int nmis0 = n0;
00338     int ncor1 = n1;
00339     int nmis1(0), ncor0(0);
00340     int istart(0), isplit(0);
00341     for( int k=0;k<division[d].size();k++ ) {
00342       double z = division[d][k];
00343       bool lbreak = false;
00344       for( isplit=istart;isplit<sorted[d].size();isplit++ ) {
00345         if( ((*box_)[sorted[d][isplit]])->x_[d] > z ) {
00346           lbreak = true;
00347           break;
00348         }
00349       }
00350       if( !lbreak ) isplit = sorted[d].size();
00351       for( int i=istart;i<isplit;i++ ) {
00352         int index = sorted[d][i];
00353         const SprPoint* p = (*box_)[index];
00354         double w = box_->w(index);
00355         if(      p->class_ == cls0_ ) {
00356           wmis0 -= w;
00357           wcor0 += w;
00358           nmis0--;
00359           ncor0++;
00360         }
00361         else if( p->class_ == cls1_ ) {
00362           wcor1 -= w;
00363           wmis1 += w;
00364           ncor1--;
00365           nmis1++;
00366         }
00367       }
00368       istart = isplit;
00369       if( (wcor0+wmis1) > apeel_*(w0+w1) ) break;
00370       if( (ncor1+nmis0) < nmin_ ) break;
00371       flo.push_back(crit_->fom(wcor0,wmis0,wcor1,wmis1));
00372       if( verbose > 2 ) {
00373         cout << "Imposing shrinking low split K=" << k 
00374              << " in dimension " << d << " at location=" << z
00375              << " with FOM=" << flo[flo.size()-1]
00376              << endl;
00377       }
00378     }
00379 
00380     // loop through points to find high cuts
00381     wmis0 = w0; wcor1 = w1;
00382     wmis1 = 0; wcor0 = 0;
00383     nmis0 = n0; ncor1 = n1;
00384     nmis1 = 0; ncor0 = 0;
00385     istart = sorted[d].size()-1;
00386     for( int k=division[d].size()-1;k>-1;k-- ) {
00387       double z = division[d][k];
00388       bool lbreak = false;
00389       for( isplit=istart;isplit>-1;isplit-- ) {
00390         if( ((*box_)[sorted[d][isplit]])->x_[d] < z ) {
00391           lbreak = true;
00392           break;
00393         }
00394       }
00395       if( !lbreak ) isplit = -1;
00396       for( int i=istart;i>isplit;i-- ) {
00397         int index = sorted[d][i];
00398         const SprPoint* p = (*box_)[index];
00399         double w = box_->w(index);
00400         if(      p->class_ == cls0_ ) {
00401           wmis0 -= w;
00402           wcor0 += w;
00403           nmis0--;
00404           ncor0++;
00405         }
00406         else if( p->class_ == cls1_ ) {
00407           wcor1 -= w;
00408           wmis1 += w;
00409           ncor1--;
00410           nmis1++;
00411         }
00412       }
00413       istart = isplit;
00414       if( (wcor0+wmis1) > apeel_*(w0+w1) ) break;
00415       if( (ncor1+nmis0) < nmin_ ) break;
00416       fhi.push_back(crit_->fom(wcor0,wmis0,wcor1,wmis1));
00417       if( verbose > 2 ) {
00418         cout << "Imposing shrinking high split K=" << k 
00419              << " in dimension " << d << "  at location=" << z
00420              << " with FOM=" << fhi[fhi.size()-1]
00421              << endl;
00422       }
00423     }
00424 
00425     // find optimal point and sign of cut
00426     if( flo.empty() && fhi.empty() ) continue;
00427     int klo(-1), khi(-1);
00428     double lfom = SprUtils::min();
00429     double hfom = SprUtils::min();
00430     if( !flo.empty() ) {
00431       vector<double>::iterator ilo = max_element(flo.begin(),flo.end());
00432       klo = ilo - flo.begin();
00433       lfom = *ilo;
00434     }
00435     if( !fhi.empty() ) {
00436       vector<double>::iterator ihi = max_element(fhi.begin(),fhi.end());
00437       khi = ihi - fhi.begin();
00438       hfom = *ihi;
00439     }
00440     if( lfom>hfom && klo!=0 && klo!=(division[d].size()-1) ) {
00441       if( !savedBox.insert(pair<const unsigned,SprInterval>(d,(SprUtils::lowerBound(division[d][klo]))[0])).second ) {
00442         cerr << "Unable to insert interval for dimension " << d << endl;
00443         return -1;
00444       }
00445       fom[d] = lfom;
00446     }
00447     else if( khi!=0 && khi!=(division[d].size()-1) ) {
00448       if( !savedBox.insert(pair<const unsigned,SprInterval>(d,(SprUtils::upperBound(division[d][division[d].size()-1-khi]))[0])).second ) {
00449         cerr << "Unable to insert interval for dimension " << d << endl;
00450         return -1;
00451       }
00452       fom[d] = hfom;
00453     }
00454   }
00455 
00456   // find optimal fom
00457   vector<double>::iterator imax = max_element(fom.begin(),fom.end());
00458 
00459   // update box
00460   bool lexit = true;
00461   if( *imax > fom0 ) {
00462     lexit = false;
00463     int d = imax - fom.begin();
00464     SprBox::const_iterator imposed = savedBox.find(d);
00465     assert( imposed != savedBox.end() );
00466     SprInterval z = imposed->second;
00467     if( verbose > 0 ) {
00468       cout << "Making shrinking split " << nsplit_ 
00469            << " in dimension " << d << " with " 
00470            << n1 << " signal and " << n0 << " background events" 
00471            << "     FOM=" << *imax 
00472            << "    interval=" << z.first << " " << z.second << endl;
00473     }
00474 
00475     // update limits
00476     SprBox::iterator found = limits.find(d);
00477     if( found == limits.end() ) {
00478       limits.insert(pair<const unsigned,SprInterval>(d,z));
00479     }
00480     else {
00481       if( found->second.first < z.first )
00482         found->second.first = z.first;
00483       if( found->second.second > z.second )
00484         found->second.second = z.second;
00485       assert( found->second.first < found->second.second );
00486     }
00487 
00488     // cut off tails
00489     box_->setBox(limits);
00490     if( !box_->filter() ) {
00491       cerr << "Cannot filter box." << endl;
00492       return -1;
00493     }
00494 
00495     // print out
00496     if( verbose > 1 ) {
00497       cout << "Imposed cut in dimension " << d << "  : "
00498            << found->second.first << " " <<  found->second.second 
00499            << "  Box reduced to " << box_->size() << " events." << endl;
00500     }
00501 
00502     // check if the size has changed
00503     if( box_->size() >= initSize ) {
00504       if( verbose > 0 ) 
00505         cout << "Warning: box has not been reduced." << endl;
00506       lexit = true;
00507     }
00508   }
00509 
00510   // see if time to exit
00511   if( lexit ) {
00512     // reset weights
00513     w0 = box_->weightInClass(cls0_);
00514     w1 = box_->weightInClass(cls1_);
00515     n0 = box_->ptsInClass(cls0_);
00516     n1 = box_->ptsInClass(cls1_);
00517     fom0 = crit_->fom(0,w0,w1,0);
00518 
00519     // message
00520     if( verbose > 0 ) {
00521       cout << "Found box " << boxes_.size() << " with "
00522            << w0 << " background and " << w1 << " signal weights;   "
00523            << n0 << " background and " << n1 << " signal events"
00524            << "     FOM=" << fom0  << endl;
00525     }
00526     return 1;
00527   }
00528 
00529   // exit
00530   if( verbose > 1 )
00531     cout << "===================" << endl;
00532   return 0;
00533 }
00534 
00535 
00536 int SprBumpHunter::expand(SprBox& limits, 
00537                           unsigned& n0, unsigned& n1,
00538                           double& w0, double& w1, double& fom0, int verbose)
00539 {
00540   // sanity check
00541   if( box_->empty() ) {
00542     if( verbose > 0 ) {
00543       cout << "Unable to expand - the box is empty." << endl;
00544     }
00545     return 1;
00546   }
00547 
00548   // save initial number of events
00549   int initSize = box_->size();
00550 
00551   // get total weights, fom and node type
00552   w0 = box_->weightInClass(cls0_);
00553   w1 = box_->weightInClass(cls1_);
00554 
00555   // check number of events
00556   n0 = box_->ptsInClass(cls0_);
00557   n1 = box_->ptsInClass(cls1_);
00558   int ntot = n0 + n1;
00559 
00560   // compute fom
00561   fom0 = crit_->fom(0,w0,w1,0);
00562 
00563   // message
00564   if( verbose > 1 ) {
00565     cout << "===================" << endl;
00566     cout << "Expanding with " << w0 << " background and " 
00567          << w1 << " signal weights and " << ntot << " events." << endl;
00568     cout << "Starting FOM=" << fom0 << endl;
00569   }
00570 
00571   // init
00572   unsigned dim = box_->dim();
00573   vector<double> fom(dim,SprUtils::min());
00574   SprBox savedBox = limits;
00575 
00576   // set limits
00577   if( verbose > 1 ) {
00578     cout << "Limits:" << endl;
00579     for( SprBox::const_iterator d=limits.begin();d!=limits.end();d++ ) {
00580       cout << "Dimension " << d->first << "    " 
00581            << d->second.first << " " << d->second.second << endl;
00582     }
00583   }
00584 
00585   // loop through dimensions
00586   for( int d=0;d<dim;d++ ) {
00587     // reset limits
00588     box_->setBox(limits);
00589 
00590     // check if any limits exist    
00591     SprBox::iterator found = savedBox.find(d);
00592     if( found == savedBox.end() ) {
00593       if( verbose > 1 )
00594         cout << "No cuts on dimension " << d << " imposed. " 
00595              << " will not attempt to expand."<< endl;
00596       continue;
00597     }
00598 
00599     // look at both sides of the box
00600     for( int side=-1;side<2;side+=2 ) {
00601 
00602       // compute starting weights
00603       if( verbose > 1 ) {
00604         cout << "Resetting cuts on dimension " << d << " at "
00605              << "     Low=" << found->second.first 
00606              << "    High=" << found->second.second << endl;
00607       }
00608       box_->setRange(d,found->second);
00609       if( !box_->filter() ) {
00610         cerr << "Cannot filter box." << endl;
00611         return -1;
00612       }
00613       if( box_->empty() ) {
00614         cerr << "Box is empty after cutting on dimension " << d 
00615              << " at " << found->second.first << " " 
00616              << found->second.second << endl;
00617         return 1;
00618       }
00619 
00620       // get total weights
00621       double w0d = box_->weightInClass(cls0_);
00622       double w1d = box_->weightInClass(cls1_);
00623       if( verbose > 1 ) {
00624         cout << "Starting optimization in dimension " << d
00625              << " with " << w1d << " signal and "
00626              << w0d << " background weights." << endl;
00627       }
00628       double startfom = crit_->fom(0,w0d,w1d,0);
00629 
00630       // check number of events
00631       int n0d = box_->ptsInClass(cls0_);
00632       int n1d = box_->ptsInClass(cls1_);
00633 
00634       // invert cut on dimension d to accept events outside the box
00635       SprInterval outer;
00636       if(      side < 0 )
00637         outer = SprInterval(SprUtils::min(),found->second.first);
00638       else if( side > 0 )
00639         outer = SprInterval(found->second.second,SprUtils::max());
00640       box_->setRange(d,outer);// invert cut on dimension d
00641 
00642       // filter
00643       if( !box_->filter() ) {
00644         cerr << "Cannot filter box." << endl;
00645         return -1;
00646       }
00647       if( box_->empty() ) continue;
00648 
00649       // compute weights
00650       double w0add = box_->weightInClass(cls0_);
00651       double w1add = box_->weightInClass(cls1_);
00652       if( verbose > 1 ) {
00653         cout << "Expanding in dimension " << d << " into area with " 
00654              << w0add << " background and " 
00655              << w1add << " signal events." << endl;
00656       }
00657       if( w1add < SprUtils::eps() ) continue;// do nothing if added area empty
00658 
00659       // sort
00660       vector<vector<int> > sorted;
00661       vector<vector<double> > division;
00662       if( !this->sort(d,sorted,division) ) {
00663         cerr << "Unable to sort box." << endl;
00664         return -1;
00665       }
00666 
00667       // prepare weights and fom
00668       double wmis0(w0add), wcor1(w1add);
00669       double wmis1(0), wcor0(0);
00670       int nmis0(n0d), ncor1(n1d);
00671       int nmis1(0), ncor0(0);
00672       vector<double> fomnew;
00673 
00674       // loop through points and find cuts
00675       if(      side < 0 ) {// lower cut
00676         int istart(0), isplit(0);
00677         for( int k=0;k<division[d].size();k++ ) {
00678           double z = division[d][k];
00679           bool lbreak = false;
00680           for( isplit=istart;isplit<sorted[d].size();isplit++ ) {
00681             if( ((*box_)[sorted[d][isplit]])->x_[d] > z ) {
00682               lbreak = true;
00683               break;
00684             }
00685           }
00686           if( !lbreak ) isplit = sorted[d].size();
00687           for( int i=istart;i<isplit;i++ ) {
00688             int index = sorted[d][i];
00689             const SprPoint* p = (*box_)[index];
00690             double w = box_->w(index);
00691             if(      p->class_ == cls0_ ) {
00692               wmis0 -= w;
00693               wcor0 += w;
00694               nmis0--;
00695               ncor0++;
00696             }
00697             else if( p->class_ == cls1_ ) {
00698               wcor1 -= w;
00699               wmis1 += w;
00700               ncor1--;
00701               nmis1++;
00702             }
00703           }
00704           istart = isplit;
00705           fomnew.push_back(crit_->fom(wcor0,wmis0+w0d,wcor1+w1d,wmis1));
00706           if( verbose > 2 ) {
00707             cout << "Imposing expanding low split K=" << k 
00708                  << " in dimension " << d << " with FOM=" 
00709                  << fomnew[fomnew.size()-1] << endl;
00710           }
00711         }
00712       }
00713       else if( side > 0 ) {// high cut
00714         int istart(sorted[d].size()-1), isplit(0);
00715         for( int k=division[d].size()-1;k>=0;k-- ) {
00716           double z = division[d][k];
00717           bool lbreak = false;
00718           for( isplit=istart;isplit>=0;isplit-- ) {
00719             if( ((*box_)[sorted[d][isplit]])->x_[d] < z ) {
00720               lbreak = true;
00721               break;
00722             }
00723           }
00724           if( !lbreak ) isplit = -1;
00725           for( int i=istart;i>isplit;i-- ) {
00726             int index = sorted[d][i];
00727             const SprPoint* p = (*box_)[index];
00728             double w = box_->w(index);
00729             if(      p->class_ == cls0_ ) {
00730               wmis0 -= w;
00731               wcor0 += w;
00732               nmis0--;
00733               ncor0++;
00734             }
00735             else if( p->class_ == cls1_ ) {
00736               wcor1 -= w;
00737               wmis1 += w;
00738               ncor1--;
00739               nmis1++;
00740             }
00741           }
00742           istart = isplit;
00743           fomnew.push_back(crit_->fom(wcor0,wmis0+w0d,wcor1+w1d,wmis1));
00744           if( verbose > 2 ) {
00745             cout << "Imposing expanding high split K=" << k 
00746                  << " in dimension " << d 
00747                  << " with FOM=" << fomnew[fomnew.size()-1] << endl;
00748           }
00749         }
00750       }// end side > 0
00751 
00752       // find optimal point and sign of cut
00753       vector<double>::iterator imax = max_element(fomnew.begin(),fomnew.end());
00754       int k = imax - fomnew.begin();
00755       double maxfom = *imax;
00756 
00757       // print out
00758       if( verbose > 1 ) {
00759         cout << "FOM's for dimension " << d 
00760              << ":   Side=" << side << "  Best FOM=" << maxfom 
00761              << "   Start FOM=" << startfom << "  Init FOM=" << fom0 
00762              << "   K=" << k << endl;
00763       }
00764 
00765       // find optimal cut
00766       if( maxfom > startfom ) {
00767         fom[d] = maxfom;
00768         if(      side < 0 ) {
00769           double z = division[d][k];
00770           if( found->second.first > z ) found->second.first = z;
00771         }
00772         else if( side > 0 ) {
00773           double z = division[d][division[d].size()-1-k];
00774           if( found->second.second < z ) found->second.second = z;
00775         }
00776       }
00777     }// end side loop
00778     if( verbose > 1 ) {
00779       cout << "Found the best cut on dimension " << d << " at " 
00780            << "    Low=" << found->second.first 
00781            << "   High=" << found->second.second 
00782            << "  with FOM=" << fom[d] << endl;
00783     }
00784   }// end dim loop
00785 
00786   // find optimal fom
00787   vector<double>::iterator imax = max_element(fom.begin(),fom.end());
00788 
00789   // update box
00790   bool lexit = true;
00791   if( *imax > fom0 ) {
00792     lexit = false;
00793     int d = imax - fom.begin();
00794     SprBox::const_iterator imposed = savedBox.find(d);
00795     assert( imposed != savedBox.end() );
00796     SprInterval z = imposed->second;
00797     if( verbose > 0 ) {
00798       cout << "Making expansion " << nsplit_ 
00799            << " in dimension " << d << " with " 
00800            << n1 << " signal and " << n0 << " background events" 
00801            << "     FOM=" << *imax 
00802            << "    interval=" << z.first << " " << z.second << endl;
00803     }
00804 
00805     // update limits
00806     SprBox::iterator found = limits.find(d);
00807     if( found != limits.end() ) {
00808       if( found->second.first > z.first )
00809         found->second.first = z.first;
00810       if( found->second.second < z.second )
00811         found->second.second = z.second;
00812       assert( found->second.first < found->second.second );
00813     }
00814 
00815     // cut off tails
00816     box_->setBox(limits);
00817     if( !box_->filter() ) {
00818       cerr << "Cannot filter box." << endl;
00819       return -1;
00820     }
00821 
00822     // check if the size has changed
00823     if( box_->size() <= initSize ) {
00824       if( verbose > 0 )
00825         cout << "Warning: box has not been increased." << endl;
00826       lexit = true;
00827     }
00828   }
00829 
00830   // see if time to exit
00831   if( lexit ) {
00832     // cut off tails
00833     box_->setBox(limits);
00834     if( !box_->filter() ) {
00835       cerr << "Cannot filter box." << endl;
00836       return -1;
00837     }
00838 
00839     // recompute weights
00840     w0 = box_->weightInClass(cls0_);
00841     w1 = box_->weightInClass(cls1_);
00842     n0 = box_->ptsInClass(cls0_);
00843     n1 = box_->ptsInClass(cls1_);
00844     fom0 = crit_->fom(0,w0,w1,0);
00845     
00846     // message
00847     if( verbose > 0 ) {
00848       cout << "Found box " << boxes_.size() << " with "
00849            << w0 << " background and " << w1 << " signal weights;   "
00850            << n0 << " background and " << n1 << " signal events"
00851            << "     FOM=" << fom0  << endl;
00852     }
00853     return 1;
00854   }
00855 
00856   // exit
00857   if( verbose > 1 )
00858     cout << "===================" << endl;
00859   return 0;
00860 }
00861 
00862 
00863 
00864 
00865 

Generated on Tue Jun 9 17:42:00 2009 for CMSSW by  doxygen 1.5.4