CMS 3D CMS Logo

/afs/cern.ch/work/a/aaltunda/public/www/CMSSW_5_3_13_patch3/src/HiggsAnalysis/CombinedLimit/src/GoodnessOfFit.cc

Go to the documentation of this file.
00001 #include "../interface/GoodnessOfFit.h"
00002 #include <RooRealVar.h>
00003 #include <RooArgSet.h>
00004 #include <RooRandom.h>
00005 #include <RooDataSet.h>
00006 #include <RooFitResult.h>
00007 #include <RooPlot.h>
00008 #include <RooProdPdf.h>
00009 #include <RooSimultaneous.h>
00010 #include <RooAddPdf.h>
00011 #include <RooConstVar.h>
00012 #include <RooDataSet.h>
00013 #include <RooDataHist.h>
00014 #include <RooHistPdf.h>
00015 #include <TCanvas.h>
00016 #include <TStyle.h>
00017 #include <TH2.h>
00018 #include <TFile.h>
00019 #include <RooStats/ModelConfig.h>
00020 #include "../interface/Combine.h"
00021 #include "../interface/ProfileLikelihood.h"
00022 #include "../interface/CloseCoutSentry.h"
00023 #include "../interface/RooSimultaneousOpt.h"
00024 #include "../interface/utils.h"
00025 
00026 
00027 #include <Math/MinimizerOptions.h>
00028 
00029 using namespace RooStats;
00030 
00031 std::string GoodnessOfFit::algo_;
00032 std::string GoodnessOfFit::minimizerAlgo_ = "Minuit2";
00033 float       GoodnessOfFit::minimizerTolerance_ = 1e-4;
00034 int         GoodnessOfFit::minimizerStrategy_  = 1;
00035 float       GoodnessOfFit::mu_ = 0.0;
00036 bool        GoodnessOfFit::fixedMu_ = false;
00037 
00038 GoodnessOfFit::GoodnessOfFit() :
00039     LimitAlgo("GoodnessOfFit specific options")
00040 {
00041     options_.add_options()
00042         ("algorithm",          boost::program_options::value<std::string>(&algo_), "Goodness of fit algorithm. Currently, the only option is 'saturated' (which works for binned models only)")
00043         ("minimizerAlgo",      boost::program_options::value<std::string>(&minimizerAlgo_)->default_value(minimizerAlgo_), "Choice of minimizer (Minuit vs Minuit2)")
00044         ("minimizerTolerance", boost::program_options::value<float>(&minimizerTolerance_)->default_value(minimizerTolerance_),  "Tolerance for minimizer")
00045         ("minimizerStrategy",  boost::program_options::value<int>(&minimizerStrategy_)->default_value(minimizerStrategy_),      "Stragegy for minimizer")
00046         ("fixedSignalStrength", boost::program_options::value<float>(&mu_)->default_value(mu_),  "Compute the goodness of fit for a fixed signal strength. If not specified, it's left floating")
00047     ;
00048 }
00049 
00050 void GoodnessOfFit::applyOptions(const boost::program_options::variables_map &vm) 
00051 {
00052     fixedMu_ = !vm["fixedSignalStrength"].defaulted();
00053     if (algo_ == "saturated") std::cout << "Will use saturated models to compute goodness of fit for a binned likelihood" << std::endl;
00054     else throw std::invalid_argument("GoodnessOfFit: algorithm "+algo_+" not supported");
00055 }
00056 
00057 bool GoodnessOfFit::run(RooWorkspace *w, RooStats::ModelConfig *mc_s, RooStats::ModelConfig *mc_b, RooAbsData &data, double &limit, double &limitErr, const double *hint) { 
00058   ProfileLikelihood::MinimizerSentry minimizerConfig(minimizerAlgo_, minimizerTolerance_);
00059 
00060   RooRealVar *r = dynamic_cast<RooRealVar *>(mc_s->GetParametersOfInterest()->first());
00061   if (fixedMu_) { r->setVal(mu_); r->setConstant(true); }
00062   if (algo_ == "saturated") return runSaturatedModel(w, mc_s, mc_b, data, limit, limitErr, hint);
00063   return false;  
00064 }
00065 
00066 bool GoodnessOfFit::runSaturatedModel(RooWorkspace *w, RooStats::ModelConfig *mc_s, RooStats::ModelConfig *mc_b, RooAbsData &data, double &limit, double &limitErr, const double *hint) { 
00067   RooAbsPdf *pdf_nominal = mc_s->GetPdf();
00068   // now I need to make the saturated pdf
00069   std::auto_ptr<RooAbsPdf> saturated;
00070   // factorize away constraints anyway
00071   RooArgList constraints;
00072   RooAbsPdf *obsOnlyPdf = utils::factorizePdf(*mc_s->GetObservables(), *pdf_nominal, constraints);
00073   // case 1:
00074   RooSimultaneous *sim = dynamic_cast<RooSimultaneous *>(obsOnlyPdf);
00075   if (sim) {
00076       RooAbsCategoryLValue *cat = (RooAbsCategoryLValue *) sim->indexCat().Clone();
00077       std::auto_ptr<TList> datasets(data.split(*cat, true));
00078       int nbins = cat->numBins((const char *)0);
00079       RooArgSet newPdfs;
00080       TString satname = TString::Format("%s_saturated", sim->GetName());
00081       RooSimultaneous *satsim = (typeid(*sim) == typeid(RooSimultaneousOpt)) ? new RooSimultaneousOpt(satname, "", *cat) : new RooSimultaneous(satname, "", *cat); 
00082       for (int ic = 0, nc = nbins; ic < nc; ++ic) {
00083           cat->setBin(ic);
00084           RooAbsPdf *pdfi = sim->getPdf(cat->getLabel());
00085           if (pdfi == 0) continue;
00086           RooAbsData *datai = (RooAbsData *) datasets->FindObject(cat->getLabel());
00087           if (datai == 0) throw std::runtime_error(std::string("Error: missing dataset for category label ")+cat->getLabel());
00088           RooAbsPdf *saturatedPdfi = makeSaturatedPdf(*datai);
00089           //delete datai;
00090           if (constraints.getSize() > 0) {
00091             RooArgList terms(constraints); terms.add(*saturatedPdfi);
00092             RooProdPdf *prodpdf = new RooProdPdf(TString::Format("%s_constr", saturatedPdfi->GetName()), "", terms);
00093             prodpdf->addOwnedComponents(RooArgSet(*saturatedPdfi));
00094             saturatedPdfi = prodpdf;
00095           }
00096           satsim->addPdf(*saturatedPdfi, cat->getLabel());
00097           satsim->addOwnedComponents(RooArgSet(*saturatedPdfi));
00098       }
00099       saturated.reset(satsim);
00100   } else {
00101       RooAbsPdf *saturatedPdfi = makeSaturatedPdf(data);
00102       if (constraints.getSize() > 0) {
00103           RooArgList terms(constraints); terms.add(*saturatedPdfi);
00104           RooProdPdf *prodpdf = new RooProdPdf(TString::Format("%s_constr", saturatedPdfi->GetName()), "", terms);
00105           prodpdf->addOwnedComponents(RooArgSet(*saturatedPdfi));
00106           saturatedPdfi = prodpdf;
00107       }
00108       saturated.reset(saturatedPdfi);
00109   }
00110 
00111   CloseCoutSentry sentry(verbose < 2);
00112   // let's assume fits converge, for a while
00113   const RooCmdArg &minim = RooFit::Minimizer(ROOT::Math::MinimizerOptions::DefaultMinimizerType().c_str(),
00114                                              ROOT::Math::MinimizerOptions::DefaultMinimizerAlgo().c_str());
00115   std::auto_ptr<RooFitResult> result_nominal(pdf_nominal->fitTo(data, RooFit::Save(1), minim, RooFit::Strategy(minimizerStrategy_), RooFit::Hesse(0), RooFit::Constrain(*mc_s->GetNuisanceParameters())));
00116   std::auto_ptr<RooFitResult> result_saturated(saturated->fitTo(data, RooFit::Save(1), minim, RooFit::Strategy(minimizerStrategy_), RooFit::Hesse(0), RooFit::Constrain(*mc_s->GetNuisanceParameters())));
00117   sentry.clear();
00118 
00119   saturated.reset();
00120   for (int i = 0, n = tempData_.size(); i < n; ++i) delete tempData_[i]; 
00121   tempData_.clear();
00122 
00123   if (result_nominal.get()   == 0) return false;
00124   if (result_saturated.get() == 0) return false;
00125 
00126   double nll_nominal   = result_nominal->minNll();
00127   double nll_saturated = result_saturated->minNll();
00128   if (fabs(nll_nominal) > 1e10 || fabs(nll_saturated) > 1e10) return false;
00129   limit = 2*(nll_nominal-nll_saturated);
00130 
00131   std::cout << "\n --- GoodnessOfFit --- " << std::endl;
00132   std::cout << "Best fit test statistic: " << limit << std::endl;
00133   return true;
00134 }
00135 
00136 RooAbsPdf * GoodnessOfFit::makeSaturatedPdf(RooAbsData &data) {
00137   if (verbose > 1) std::cout << "Generating saturated model for " << data.GetName() << std::endl;
00138   RooDataHist *rdh = new RooDataHist(TString::Format("%s_binned", data.GetName()), "", *data.get(), data); tempData_.push_back(rdh);
00139   if (verbose > 1) utils::printRDH(rdh);
00140   RooHistPdf *hpdf = new RooHistPdf(TString::Format("%s_shape", data.GetName()), "", *rdh->get(), *rdh);
00141   RooConstVar *norm = new RooConstVar(TString::Format("%s_norm", data.GetName()), "", data.sumEntries());
00142   // we use RooAddPdf because this works with CachingNLL
00143   RooAddPdf *ret = new RooAddPdf(TString::Format("%s_saturated", data.GetName()), "", RooArgList(*hpdf), RooArgList(*norm));
00144   ret->addOwnedComponents(RooArgSet(*norm));
00145   ret->addOwnedComponents(RooArgSet(*hpdf));
00146   return ret;
00147 }
00148 
00149