CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
GoodnessOfFit.cc
Go to the documentation of this file.
1 #include "../interface/GoodnessOfFit.h"
2 #include <RooRealVar.h>
3 #include <RooArgSet.h>
4 #include <RooRandom.h>
5 #include <RooDataSet.h>
6 #include <RooFitResult.h>
7 #include <RooPlot.h>
8 #include <RooProdPdf.h>
9 #include <RooSimultaneous.h>
10 #include <RooAddPdf.h>
11 #include <RooConstVar.h>
12 #include <RooDataSet.h>
13 #include <RooDataHist.h>
14 #include <RooHistPdf.h>
15 #include <TCanvas.h>
16 #include <TStyle.h>
17 #include <TH2.h>
18 #include <TFile.h>
19 #include <RooStats/ModelConfig.h>
20 #include "../interface/Combine.h"
21 #include "../interface/ProfileLikelihood.h"
22 #include "../interface/CloseCoutSentry.h"
23 #include "../interface/RooSimultaneousOpt.h"
24 #include "../interface/utils.h"
25 
26 
27 #include <Math/MinimizerOptions.h>
28 
29 using namespace RooStats;
30 
31 std::string GoodnessOfFit::algo_;
32 std::string GoodnessOfFit::minimizerAlgo_ = "Minuit2";
35 float GoodnessOfFit::mu_ = 0.0;
36 bool GoodnessOfFit::fixedMu_ = false;
37 
39  LimitAlgo("GoodnessOfFit specific options")
40 {
41  options_.add_options()
42  ("algorithm", boost::program_options::value<std::string>(&algo_), "Goodness of fit algorithm. Currently, the only option is 'saturated' (which works for binned models only)")
43  ("minimizerAlgo", boost::program_options::value<std::string>(&minimizerAlgo_)->default_value(minimizerAlgo_), "Choice of minimizer (Minuit vs Minuit2)")
44  ("minimizerTolerance", boost::program_options::value<float>(&minimizerTolerance_)->default_value(minimizerTolerance_), "Tolerance for minimizer")
45  ("minimizerStrategy", boost::program_options::value<int>(&minimizerStrategy_)->default_value(minimizerStrategy_), "Stragegy for minimizer")
46  ("fixedSignalStrength", boost::program_options::value<float>(&mu_)->default_value(mu_), "Compute the goodness of fit for a fixed signal strength. If not specified, it's left floating")
47  ;
48 }
49 
50 void GoodnessOfFit::applyOptions(const boost::program_options::variables_map &vm)
51 {
52  fixedMu_ = !vm["fixedSignalStrength"].defaulted();
53  if (algo_ == "saturated") std::cout << "Will use saturated models to compute goodness of fit for a binned likelihood" << std::endl;
54  else throw std::invalid_argument("GoodnessOfFit: algorithm "+algo_+" not supported");
55 }
56 
57 bool GoodnessOfFit::run(RooWorkspace *w, RooStats::ModelConfig *mc_s, RooStats::ModelConfig *mc_b, RooAbsData &data, double &limit, double &limitErr, const double *hint) {
59 
60  RooRealVar *r = dynamic_cast<RooRealVar *>(mc_s->GetParametersOfInterest()->first());
61  if (fixedMu_) { r->setVal(mu_); r->setConstant(true); }
62  if (algo_ == "saturated") return runSaturatedModel(w, mc_s, mc_b, data, limit, limitErr, hint);
63  return false;
64 }
65 
66 bool GoodnessOfFit::runSaturatedModel(RooWorkspace *w, RooStats::ModelConfig *mc_s, RooStats::ModelConfig *mc_b, RooAbsData &data, double &limit, double &limitErr, const double *hint) {
67  RooAbsPdf *pdf_nominal = mc_s->GetPdf();
68  // now I need to make the saturated pdf
69  std::auto_ptr<RooAbsPdf> saturated;
70  // factorize away constraints anyway
71  RooArgList constraints;
72  RooAbsPdf *obsOnlyPdf = utils::factorizePdf(*mc_s->GetObservables(), *pdf_nominal, constraints);
73  // case 1:
74  RooSimultaneous *sim = dynamic_cast<RooSimultaneous *>(obsOnlyPdf);
75  if (sim) {
76  RooAbsCategoryLValue *cat = (RooAbsCategoryLValue *) sim->indexCat().Clone();
77  std::auto_ptr<TList> datasets(data.split(*cat, true));
78  int nbins = cat->numBins((const char *)0);
79  RooArgSet newPdfs;
80  TString satname = TString::Format("%s_saturated", sim->GetName());
81  RooSimultaneous *satsim = (typeid(*sim) == typeid(RooSimultaneousOpt)) ? new RooSimultaneousOpt(satname, "", *cat) : new RooSimultaneous(satname, "", *cat);
82  for (int ic = 0, nc = nbins; ic < nc; ++ic) {
83  cat->setBin(ic);
84  RooAbsPdf *pdfi = sim->getPdf(cat->getLabel());
85  if (pdfi == 0) continue;
86  RooAbsData *datai = (RooAbsData *) datasets->FindObject(cat->getLabel());
87  if (datai == 0) throw std::runtime_error(std::string("Error: missing dataset for category label ")+cat->getLabel());
88  RooAbsPdf *saturatedPdfi = makeSaturatedPdf(*datai);
89  //delete datai;
90  if (constraints.getSize() > 0) {
91  RooArgList terms(constraints); terms.add(*saturatedPdfi);
92  RooProdPdf *prodpdf = new RooProdPdf(TString::Format("%s_constr", saturatedPdfi->GetName()), "", terms);
93  prodpdf->addOwnedComponents(RooArgSet(*saturatedPdfi));
94  saturatedPdfi = prodpdf;
95  }
96  satsim->addPdf(*saturatedPdfi, cat->getLabel());
97  satsim->addOwnedComponents(RooArgSet(*saturatedPdfi));
98  }
99  saturated.reset(satsim);
100  } else {
101  RooAbsPdf *saturatedPdfi = makeSaturatedPdf(data);
102  if (constraints.getSize() > 0) {
103  RooArgList terms(constraints); terms.add(*saturatedPdfi);
104  RooProdPdf *prodpdf = new RooProdPdf(TString::Format("%s_constr", saturatedPdfi->GetName()), "", terms);
105  prodpdf->addOwnedComponents(RooArgSet(*saturatedPdfi));
106  saturatedPdfi = prodpdf;
107  }
108  saturated.reset(saturatedPdfi);
109  }
110 
111  CloseCoutSentry sentry(verbose < 2);
112  // let's assume fits converge, for a while
113  const RooCmdArg &minim = RooFit::Minimizer(ROOT::Math::MinimizerOptions::DefaultMinimizerType().c_str(),
114  ROOT::Math::MinimizerOptions::DefaultMinimizerAlgo().c_str());
115  std::auto_ptr<RooFitResult> result_nominal(pdf_nominal->fitTo(data, RooFit::Save(1), minim, RooFit::Strategy(minimizerStrategy_), RooFit::Hesse(0), RooFit::Constrain(*mc_s->GetNuisanceParameters())));
116  std::auto_ptr<RooFitResult> result_saturated(saturated->fitTo(data, RooFit::Save(1), minim, RooFit::Strategy(minimizerStrategy_), RooFit::Hesse(0), RooFit::Constrain(*mc_s->GetNuisanceParameters())));
117  sentry.clear();
118 
119  saturated.reset();
120  for (int i = 0, n = tempData_.size(); i < n; ++i) delete tempData_[i];
121  tempData_.clear();
122 
123  if (result_nominal.get() == 0) return false;
124  if (result_saturated.get() == 0) return false;
125 
126  double nll_nominal = result_nominal->minNll();
127  double nll_saturated = result_saturated->minNll();
128  if (fabs(nll_nominal) > 1e10 || fabs(nll_saturated) > 1e10) return false;
129  limit = 2*(nll_nominal-nll_saturated);
130 
131  std::cout << "\n --- GoodnessOfFit --- " << std::endl;
132  std::cout << "Best fit test statistic: " << limit << std::endl;
133  return true;
134 }
135 
136 RooAbsPdf * GoodnessOfFit::makeSaturatedPdf(RooAbsData &data) {
137  if (verbose > 1) std::cout << "Generating saturated model for " << data.GetName() << std::endl;
138  RooDataHist *rdh = new RooDataHist(TString::Format("%s_binned", data.GetName()), "", *data.get(), data); tempData_.push_back(rdh);
139  if (verbose > 1) utils::printRDH(rdh);
140  RooHistPdf *hpdf = new RooHistPdf(TString::Format("%s_shape", data.GetName()), "", *rdh->get(), *rdh);
141  RooConstVar *norm = new RooConstVar(TString::Format("%s_norm", data.GetName()), "", data.sumEntries());
142  // we use RooAddPdf because this works with CachingNLL
143  RooAddPdf *ret = new RooAddPdf(TString::Format("%s_saturated", data.GetName()), "", RooArgList(*hpdf), RooArgList(*norm));
144  ret->addOwnedComponents(RooArgSet(*norm));
145  ret->addOwnedComponents(RooArgSet(*hpdf));
146  return ret;
147 }
148 
149 
static bool fixedMu_
Definition: GoodnessOfFit.h:34
int i
Definition: DBlmapReader.cc:9
RooAbsPdf * makeSaturatedPdf(RooAbsData &data)
virtual void applyOptions(const boost::program_options::variables_map &vm)
static float minimizerTolerance_
Definition: GoodnessOfFit.h:30
virtual bool runSaturatedModel(RooWorkspace *w, RooStats::ModelConfig *mc_s, RooStats::ModelConfig *mc_b, RooAbsData &data, double &limit, double &limitErr, const double *hint)
Definition: sim.h:19
RooAbsPdf * factorizePdf(const RooArgSet &observables, RooAbsPdf &pdf, RooArgList &constraints)
Definition: utils.cc:80
void printRDH(RooAbsData *data)
Definition: utils.cc:30
static std::string minimizerAlgo_
Definition: GoodnessOfFit.h:29
static int minimizerStrategy_
Definition: GoodnessOfFit.h:31
Setup Minimizer configuration on creation, reset the previous one on destruction. ...
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:82
std::vector< RooAbsData * > tempData_
Definition: GoodnessOfFit.h:38
tuple cout
Definition: gather_cfg.py:121
virtual bool run(RooWorkspace *w, RooStats::ModelConfig *mc_s, RooStats::ModelConfig *mc_b, RooAbsData &data, double &limit, double &limitErr, const double *hint)
boost::program_options::options_description options_
Definition: LimitAlgo.h:31
static std::string algo_
Definition: GoodnessOfFit.h:27
T w() const
static float mu_
Definition: GoodnessOfFit.h:33