CMS 3D CMS Logo

/data/refman/pasoursint/CMSSW_5_3_9_patch3/src/HiggsAnalysis/CombinedLimit/src/MultiDimFit.cc

Go to the documentation of this file.
00001 #include "../interface/MultiDimFit.h"
00002 #include <stdexcept>
00003 #include <cmath>
00004 
00005 #include "TMath.h"
00006 #include "RooArgSet.h"
00007 #include "RooArgList.h"
00008 #include "RooRandom.h"
00009 #include "RooAbsData.h"
00010 #include "RooFitResult.h"
00011 #include "../interface/RooMinimizerOpt.h"
00012 #include <RooStats/ModelConfig.h>
00013 #include "../interface/Combine.h"
00014 #include "../interface/CascadeMinimizer.h"
00015 #include "../interface/CloseCoutSentry.h"
00016 #include "../interface/utils.h"
00017 
00018 #include <Math/Minimizer.h>
00019 #include <Math/MinimizerOptions.h>
00020 #include <Math/QuantFuncMathCore.h>
00021 #include <Math/ProbFunc.h>
00022 
00023 using namespace RooStats;
00024 
00025 MultiDimFit::Algo MultiDimFit::algo_ = None;
00026 std::vector<std::string>  MultiDimFit::poi_;
00027 std::vector<RooRealVar *> MultiDimFit::poiVars_;
00028 std::vector<float>        MultiDimFit::poiVals_;
00029 RooArgList                MultiDimFit::poiList_;
00030 float                     MultiDimFit::deltaNLL_ = 0;
00031 unsigned int MultiDimFit::points_ = 50;
00032 unsigned int MultiDimFit::firstPoint_ = 0;
00033 unsigned int MultiDimFit::lastPoint_  = std::numeric_limits<unsigned int>::max();
00034 bool MultiDimFit::floatOtherPOIs_ = false;
00035 unsigned int MultiDimFit::nOtherFloatingPoi_ = 0;
00036 bool MultiDimFit::fastScan_ = false;
00037 
00038 
00039 MultiDimFit::MultiDimFit() :
00040     FitterAlgoBase("MultiDimFit specific options")
00041 {
00042     options_.add_options()
00043         ("algo",  boost::program_options::value<std::string>()->default_value("none"), "Algorithm to compute uncertainties")
00044         ("poi,P",   boost::program_options::value<std::vector<std::string> >(&poi_), "Parameters of interest to fit (default = all)")
00045         ("floatOtherPOIs",   boost::program_options::value<bool>(&floatOtherPOIs_)->default_value(floatOtherPOIs_), "POIs other than the selected ones will be kept freely floating (1) or fixed (0, default)")
00046         ("points",  boost::program_options::value<unsigned int>(&points_)->default_value(points_), "Points to use for grid or contour scans")
00047         ("firstPoint",  boost::program_options::value<unsigned int>(&firstPoint_)->default_value(firstPoint_), "First point to use")
00048         ("lastPoint",  boost::program_options::value<unsigned int>(&lastPoint_)->default_value(lastPoint_), "Last point to use")
00049         ("fastScan", "Do a fast scan, evaluating the likelihood without profiling it.")
00050        ;
00051 }
00052 
00053 void MultiDimFit::applyOptions(const boost::program_options::variables_map &vm) 
00054 {
00055     applyOptionsBase(vm);
00056     std::string algo = vm["algo"].as<std::string>();
00057     if (algo == "none") {
00058         algo_ = None;
00059     } else if (algo == "singles") {
00060         algo_ = Singles;
00061     } else if (algo == "cross") {
00062         algo_ = Cross;
00063     } else if (algo == "grid") {
00064         algo_ = Grid;
00065     } else if (algo == "random") {
00066         algo_ = RandomPoints;
00067     } else if (algo == "contour2d") {
00068         algo_ = Contour2D;
00069     } else throw std::invalid_argument(std::string("Unknown algorithm: "+algo));
00070     fastScan_ = (vm.count("fastScan") > 0);
00071 }
00072 
00073 bool MultiDimFit::runSpecific(RooWorkspace *w, RooStats::ModelConfig *mc_s, RooStats::ModelConfig *mc_b, RooAbsData &data, double &limit, double &limitErr, const double *hint) { 
00074     // one-time initialization of POI variables, TTree branches, ...
00075     static int isInit = false;
00076     if (!isInit) { initOnce(w, mc_s); isInit = true; }
00077 
00078     // Get PDF
00079     RooAbsPdf &pdf = *mc_s->GetPdf();
00080 
00081     // Process POI not in list
00082     nOtherFloatingPoi_ = 0;
00083     RooLinkedListIter iterP = mc_s->GetParametersOfInterest()->iterator();
00084     for (RooAbsArg *a = (RooAbsArg*) iterP.Next(); a != 0; a = (RooAbsArg*) iterP.Next()) {
00085         if (poiList_.contains(*a)) continue;
00086         RooRealVar *rrv = dynamic_cast<RooRealVar *>(a);
00087         if (rrv == 0) { std::cerr << "MultiDimFit: Parameter of interest " << a->GetName() << " which is not a RooRealVar will be ignored" << std::endl; continue; }
00088         rrv->setConstant(!floatOtherPOIs_);
00089         if (floatOtherPOIs_) nOtherFloatingPoi_++;
00090     }
00091  
00092     // start with a best fit
00093     const RooCmdArg &constrainCmdArg = withSystematics  ? RooFit::Constrain(*mc_s->GetNuisanceParameters()) : RooFit::NumCPU(1);
00094     std::auto_ptr<RooFitResult> res(doFit(pdf, data, (algo_ == Singles ? poiList_ : RooArgList()), constrainCmdArg)); 
00095     if (res.get() || keepFailures_) {
00096         for (int i = 0, n = poi_.size(); i < n; ++i) {
00097             poiVals_[i] = poiVars_[i]->getVal();
00098         }
00099         Combine::commitPoint(/*expected=*/false, /*quantile=*/1.);
00100     }
00101     std::auto_ptr<RooAbsReal> nll;
00102     if (algo_ != None && algo_ != Singles) {
00103         nll.reset(pdf.createNLL(data, constrainCmdArg, RooFit::Extended(pdf.canBeExtended())));
00104     } 
00105     switch(algo_) {
00106         case None: 
00107             if (verbose > 0) {
00108                 std::cout << "\n --- MultiDimFit ---" << std::endl;
00109                 std::cout << "best fit parameter values: "  << std::endl;
00110                 int len = poi_[0].length();
00111                 for (int i = 0, n = poi_.size(); i < n; ++i) {
00112                     len = std::max<int>(len, poi_[i].length());
00113                 }
00114                 for (int i = 0, n = poi_.size(); i < n; ++i) {
00115                     printf("   %*s :  %+8.3f\n", len, poi_[i].c_str(), poiVals_[i]);
00116                 }
00117             }
00118             break;
00119         case Singles: if (res.get()) doSingles(*res); break;
00120         case Cross: doBox(*nll, cl, "box", true); break;
00121         case Grid: doGrid(*nll); break;
00122         case RandomPoints: doRandomPoints(*nll); break;
00123         case Contour2D: doContour2D(*nll); break;
00124     }
00125     
00126     return true;
00127 }
00128 
00129 void MultiDimFit::initOnce(RooWorkspace *w, RooStats::ModelConfig *mc_s) {
00130     RooArgSet mcPoi(*mc_s->GetParametersOfInterest());
00131     if (poi_.empty()) {
00132         RooLinkedListIter iterP = mc_s->GetParametersOfInterest()->iterator();
00133         for (RooAbsArg *a = (RooAbsArg*) iterP.Next(); a != 0; a = (RooAbsArg*) iterP.Next()) {
00134             poi_.push_back(a->GetName());
00135         }
00136     }
00137     for (std::vector<std::string>::const_iterator it = poi_.begin(), ed = poi_.end(); it != ed; ++it) {
00138         RooAbsArg *a = mcPoi.find(it->c_str());
00139         if (a == 0) throw std::invalid_argument(std::string("Parameter of interest ")+*it+" not in model.");
00140         RooRealVar *rrv = dynamic_cast<RooRealVar *>(a);
00141         if (rrv == 0) throw std::invalid_argument(std::string("Parameter of interest ")+*it+" not a RooRealVar.");
00142         poiVars_.push_back(rrv);
00143         poiVals_.push_back(rrv->getVal());
00144         poiList_.add(*rrv);
00145     }
00146     // then add the branches to the tree (at the end, so there are no resizes)
00147     for (int i = 0, n = poi_.size(); i < n; ++i) {
00148         Combine::addBranch(poi_[i].c_str(), &poiVals_[i], (poi_[i]+"/F").c_str()); 
00149     }
00150     Combine::addBranch("deltaNLL", &deltaNLL_, "deltaNLL/F");
00151 }
00152 
00153 void MultiDimFit::doSingles(RooFitResult &res)
00154 {
00155     std::cout << "\n --- MultiDimFit ---" << std::endl;
00156     std::cout << "best fit parameter values and profile-likelihood uncertainties: "  << std::endl;
00157     int len = poi_[0].length();
00158     for (int i = 0, n = poi_.size(); i < n; ++i) {
00159         len = std::max<int>(len, poi_[i].length());
00160     }
00161     for (int i = 0, n = poi_.size(); i < n; ++i) {
00162         RooRealVar *rf = dynamic_cast<RooRealVar*>(res.floatParsFinal().find(poi_[i].c_str()));
00163         double bestFitVal = rf->getVal();
00164 
00165         double hiErr = +(rf->hasRange("err68") ? rf->getMax("err68") - bestFitVal : rf->getAsymErrorHi());
00166         double loErr = -(rf->hasRange("err68") ? rf->getMin("err68") - bestFitVal : rf->getAsymErrorLo());
00167         double maxError = std::max<double>(std::max<double>(hiErr, loErr), rf->getError());
00168 
00169         if (fabs(hiErr) < 0.001*maxError) hiErr = -bestFitVal + rf->getMax();
00170         if (fabs(loErr) < 0.001*maxError) loErr = +bestFitVal - rf->getMin();
00171 
00172         double hiErr95 = +(do95_ && rf->hasRange("err95") ? rf->getMax("err95") - bestFitVal : 0);
00173         double loErr95 = -(do95_ && rf->hasRange("err95") ? rf->getMin("err95") - bestFitVal : 0);
00174 
00175         poiVals_[i] = bestFitVal - loErr; Combine::commitPoint(true, /*quantile=*/0.32);
00176         poiVals_[i] = bestFitVal + hiErr; Combine::commitPoint(true, /*quantile=*/0.32);
00177         if (do95_ && rf->hasRange("err95")) {
00178             poiVals_[i] = rf->getMax("err95"); Combine::commitPoint(true, /*quantile=*/0.05);
00179             poiVals_[i] = rf->getMin("err95"); Combine::commitPoint(true, /*quantile=*/0.05);
00180             poiVals_[i] = bestFitVal;
00181             printf("   %*s :  %+8.3f   %+6.3f/%+6.3f (68%%)    %+6.3f/%+6.3f (95%%) \n", len, poi_[i].c_str(), 
00182                     poiVals_[i], -loErr, hiErr, loErr95, -hiErr95);
00183         } else {
00184             poiVals_[i] = bestFitVal;
00185             printf("   %*s :  %+8.3f   %+6.3f/%+6.3f (68%%)\n", len, poi_[i].c_str(), 
00186                     poiVals_[i], -loErr, hiErr);
00187         }
00188     }
00189 }
00190 
00191 void MultiDimFit::doGrid(RooAbsReal &nll) 
00192 {
00193     unsigned int n = poi_.size();
00194     if (poi_.size() > 2) throw std::logic_error("Don't know how to do a grid with more than 2 POIs.");
00195     double nll0 = nll.getVal();
00196 
00197     std::vector<double> p0(n), pmin(n), pmax(n);
00198     for (unsigned int i = 0; i < n; ++i) {
00199         p0[i] = poiVars_[i]->getVal();
00200         pmin[i] = poiVars_[i]->getMin();
00201         pmax[i] = poiVars_[i]->getMax();
00202         poiVars_[i]->setConstant(true);
00203     }
00204 
00205     CascadeMinimizer minim(nll, CascadeMinimizer::Constrained);
00206     minim.setStrategy(minimizerStrategy_);
00207     std::auto_ptr<RooArgSet> params(nll.getParameters((const RooArgSet *)0));
00208     RooArgSet snap; params->snapshot(snap);
00209     //snap.Print("V");
00210     if (n == 1) {
00211         for (unsigned int i = 0; i < points_; ++i) {
00212             double x =  pmin[0] + (i+0.5)*(pmax[0]-pmin[0])/points_; 
00213             if (verbose > 1) std::cout << "Point " << i << "/" << points_ << " " << poiVars_[0]->GetName() << " = " << x << std::endl;
00214             *params = snap; 
00215             poiVals_[0] = x;
00216             poiVars_[0]->setVal(x);
00217             // now we minimize
00218             {   
00219                 //CloseCoutSentry sentry(verbose < 3);    
00220                 bool ok = fastScan_ ? true : minim.minimize(verbose-1);
00221                 if (ok) {
00222                     deltaNLL_ = nll.getVal() - nll0;
00223                     double qN = 2*(deltaNLL_);
00224                     double prob = ROOT::Math::chisquared_cdf_c(qN, n+nOtherFloatingPoi_);
00225                     Combine::commitPoint(true, /*quantile=*/prob);
00226                 }
00227             } 
00228         }
00229     } else if (n == 2) {
00230         unsigned int sqrn = ceil(sqrt(double(points_)));
00231         unsigned int ipoint = 0, nprint = ceil(0.005*sqrn*sqrn);
00232         RooAbsReal::setEvalErrorLoggingMode(RooAbsReal::CountErrors);
00233         CloseCoutSentry sentry(verbose < 2);    
00234         for (unsigned int i = 0; i < sqrn; ++i) {
00235             for (unsigned int j = 0; j < sqrn; ++j, ++ipoint) {
00236                 if (ipoint < firstPoint_) continue;
00237                 if (ipoint > lastPoint_)  break;
00238                 *params = snap; 
00239                 double x =  pmin[0] + (i+0.5)*(pmax[0]-pmin[0])/sqrn; 
00240                 double y =  pmin[1] + (j+0.5)*(pmax[1]-pmin[1])/sqrn; 
00241                 if (verbose && (ipoint % nprint == 0)) {
00242                          fprintf(sentry.trueStdOut(), "Point %d/%d, (i,j) = (%d,%d), %s = %f, %s = %f\n",
00243                                         ipoint,sqrn*sqrn, i,j, poiVars_[0]->GetName(), x, poiVars_[1]->GetName(), y);
00244                 }
00245                 poiVals_[0] = x;
00246                 poiVals_[1] = y;
00247                 poiVars_[0]->setVal(x);
00248                 poiVars_[1]->setVal(y);
00249                 nll.clearEvalErrorLog(); nll.getVal();
00250                 if (nll.numEvalErrors() > 0) { 
00251                     deltaNLL_ = 9999; Combine::commitPoint(true, /*quantile=*/0); 
00252                     continue;
00253                 }
00254                 // now we minimize
00255                 {   
00256                     bool ok = fastScan_ ? true : minim.minimize(verbose-1);
00257                     if (ok) {
00258                         deltaNLL_ = nll.getVal() - nll0;
00259                         double qN = 2*(deltaNLL_);
00260                         double prob = ROOT::Math::chisquared_cdf_c(qN, n+nOtherFloatingPoi_);
00261                         Combine::commitPoint(true, /*quantile=*/prob);
00262                     }
00263                 } 
00264             }
00265         }
00266     }
00267 }
00268 
00269 void MultiDimFit::doRandomPoints(RooAbsReal &nll) 
00270 {
00271     double nll0 = nll.getVal();
00272     for (unsigned int i = 0, n = poi_.size(); i < n; ++i) {
00273         poiVars_[i]->setConstant(true);
00274     }
00275 
00276     CascadeMinimizer minim(nll, CascadeMinimizer::Constrained);
00277     minim.setStrategy(minimizerStrategy_);
00278     unsigned int n = poi_.size();
00279     for (unsigned int j = 0; j < points_; ++j) {
00280         for (unsigned int i = 0; i < n; ++i) {
00281             poiVars_[i]->randomize();
00282             poiVals_[i] = poiVars_[i]->getVal(); 
00283         }
00284         // now we minimize
00285         {   
00286             CloseCoutSentry sentry(verbose < 3);    
00287             bool ok = minim.minimize(verbose-1);
00288             if (ok) {
00289                 double qN = 2*(nll.getVal() - nll0);
00290                 double prob = ROOT::Math::chisquared_cdf_c(qN, n+nOtherFloatingPoi_);
00291                 Combine::commitPoint(true, /*quantile=*/prob);
00292             }
00293         } 
00294     }
00295 }
00296 
00297 void MultiDimFit::doContour2D(RooAbsReal &nll) 
00298 {
00299     if (poi_.size() != 2) throw std::logic_error("Contour2D works only in 2 dimensions");
00300     RooRealVar *xv = poiVars_[0]; double x0 = poiVals_[0]; float &x = poiVals_[0];
00301     RooRealVar *yv = poiVars_[1]; double y0 = poiVals_[1]; float &y = poiVals_[1];
00302 
00303     double threshold = nll.getVal() + 0.5*ROOT::Math::chisquared_quantile_c(1-cl,2+nOtherFloatingPoi_);
00304     if (verbose>0) std::cout << "Best fit point is for " << xv->GetName() << ", "  << yv->GetName() << " =  " << x0 << ", " << y0 << std::endl;
00305 
00306     // make a box
00307     doBox(nll, cl, "box");
00308     double xMin = xv->getMin("box"), xMax = xv->getMax("box");
00309     double yMin = yv->getMin("box"), yMax = yv->getMax("box");
00310 
00311     verbose--; // reduce verbosity to avoid messages from findCrossing
00312     // ===== Get relative min/max of x for several fixed y values =====
00313     yv->setConstant(true);
00314     for (unsigned int j = 0; j <= points_; ++j) {
00315         // take points uniformly spaced in polar angle in the case of a perfect circle
00316         double yc = 0.5*(yMax + yMin), yr = 0.5*(yMax - yMin);
00317         yv->setVal( yc + yr * std::cos(j*M_PI/double(points_)) );
00318         // ===== Get the best fit x (could also do without profiling??) =====
00319         xv->setConstant(false);  xv->setVal(x0);
00320         CascadeMinimizer minimXI(nll, CascadeMinimizer::Unconstrained, xv);
00321         minimXI.setStrategy(minimizerStrategy_);
00322         {
00323             CloseCoutSentry sentry(verbose < 3);    
00324             minimXI.minimize(verbose-1);
00325         }
00326         double xc = xv->getVal(); xv->setConstant(true);
00327         if (verbose>-1) std::cout << "Best fit " << xv->GetName() << " for  " << yv->GetName() << " = " << yv->getVal() << " is at " << xc << std::endl;
00328         // ===== Then get the range =====
00329         CascadeMinimizer minim(nll, CascadeMinimizer::Constrained);
00330         double xup = findCrossing(minim, nll, *xv, threshold, xc, xMax);
00331         if (!std::isnan(xup)) { 
00332             x = xup; y = yv->getVal(); Combine::commitPoint(true, /*quantile=*/1-cl);
00333             if (verbose>-1) std::cout << "Minimum of " << xv->GetName() << " at " << cl << " CL for " << yv->GetName() << " = " << y << " is " << x << std::endl;
00334         }
00335         
00336         double xdn = findCrossing(minim, nll, *xv, threshold, xc, xMin);
00337         if (!std::isnan(xdn)) { 
00338             x = xdn; y = yv->getVal(); Combine::commitPoint(true, /*quantile=*/1-cl);
00339             if (verbose>-1) std::cout << "Maximum of " << xv->GetName() << " at " << cl << " CL for " << yv->GetName() << " = " << y << " is " << x << std::endl;
00340         }
00341     }
00342 
00343     verbose++; // restore verbosity
00344 }
00345 void MultiDimFit::doBox(RooAbsReal &nll, double cl, const char *name, bool commitPoints)  {
00346     unsigned int n = poi_.size();
00347     double nll0 = nll.getVal(), threshold = nll0 + 0.5*ROOT::Math::chisquared_quantile_c(1-cl,n+nOtherFloatingPoi_);
00348 
00349     std::vector<double> p0(n);
00350     for (unsigned int i = 0; i < n; ++i) {
00351         p0[i] = poiVars_[i]->getVal();
00352         poiVars_[i]->setConstant(false);
00353     }
00354 
00355     verbose--; // reduce verbosity due to findCrossing
00356     for (unsigned int i = 0; i < n; ++i) {
00357         RooRealVar *xv = poiVars_[i];
00358         xv->setConstant(true);
00359         CascadeMinimizer minimX(nll, CascadeMinimizer::Constrained);
00360         minimX.setStrategy(minimizerStrategy_);
00361 
00362         for (unsigned int j = 0; j < n; ++j) poiVars_[j]->setVal(p0[j]);
00363         double xMin = findCrossing(minimX, nll, *xv, threshold, p0[i], xv->getMin()); 
00364         if (!std::isnan(xMin)) { 
00365             if (verbose > -1) std::cout << "Minimum of " << xv->GetName() << " at " << cl << " CL for all others floating is " << xMin << std::endl;
00366             for (unsigned int j = 0; j < n; ++j) poiVals_[j] = poiVars_[j]->getVal();
00367             if (commitPoints) Combine::commitPoint(true, /*quantile=*/1-cl);
00368         } else {
00369             xMin = xv->getMin();
00370             for (unsigned int j = 0; j < n; ++j) poiVals_[j] = poiVars_[j]->getVal();
00371             double prob = ROOT::Math::chisquared_cdf_c(2*(nll.getVal() - nll0), n+nOtherFloatingPoi_);
00372             if (commitPoints) Combine::commitPoint(true, /*quantile=*/prob);
00373             if (verbose > -1) std::cout << "Minimum of " << xv->GetName() << " at " << cl << " CL for all others floating is " << xMin << " (on the boundary, p-val " << prob << ")" << std::endl;
00374         }
00375         
00376         for (unsigned int j = 0; j < n; ++j) poiVars_[j]->setVal(p0[j]);
00377         double xMax = findCrossing(minimX, nll, *xv, threshold, p0[i], xv->getMax()); 
00378         if (!std::isnan(xMax)) { 
00379             if (verbose > -1) std::cout << "Maximum of " << xv->GetName() << " at " << cl << " CL for all others floating is " << xMax << std::endl;
00380             for (unsigned int j = 0; j < n; ++j) poiVals_[j] = poiVars_[j]->getVal();
00381             if (commitPoints) Combine::commitPoint(true, /*quantile=*/1-cl);
00382         } else {
00383             xMax = xv->getMax();
00384             double prob = ROOT::Math::chisquared_cdf_c(2*(nll.getVal() - nll0), n+nOtherFloatingPoi_);
00385             for (unsigned int j = 0; j < n; ++j) poiVals_[j] = poiVars_[j]->getVal();
00386             if (commitPoints) Combine::commitPoint(true, /*quantile=*/prob);
00387             if (verbose > -1) std::cout << "Maximum of " << xv->GetName() << " at " << cl << " CL for all others floating is " << xMax << " (on the boundary, p-val " << prob << ")" << std::endl;
00388         }
00389 
00390         xv->setRange(name, xMin, xMax);
00391         xv->setConstant(false);
00392     }
00393     verbose++; // restore verbosity 
00394 }