00001 #include "../interface/GoodnessOfFit.h"
00002 #include <RooRealVar.h>
00003 #include <RooArgSet.h>
00004 #include <RooRandom.h>
00005 #include <RooDataSet.h>
00006 #include <RooFitResult.h>
00007 #include <RooPlot.h>
00008 #include <RooProdPdf.h>
00009 #include <RooSimultaneous.h>
00010 #include <RooAddPdf.h>
00011 #include <RooConstVar.h>
00012 #include <RooDataSet.h>
00013 #include <RooDataHist.h>
00014 #include <RooHistPdf.h>
00015 #include <TCanvas.h>
00016 #include <TStyle.h>
00017 #include <TH2.h>
00018 #include <TFile.h>
00019 #include <RooStats/ModelConfig.h>
00020 #include "../interface/Combine.h"
00021 #include "../interface/ProfileLikelihood.h"
00022 #include "../interface/CloseCoutSentry.h"
00023 #include "../interface/RooSimultaneousOpt.h"
00024 #include "../interface/utils.h"
00025
00026
00027 #include <Math/MinimizerOptions.h>
00028
00029 using namespace RooStats;
00030
00031 std::string GoodnessOfFit::algo_;
00032 std::string GoodnessOfFit::minimizerAlgo_ = "Minuit2";
00033 float GoodnessOfFit::minimizerTolerance_ = 1e-4;
00034 int GoodnessOfFit::minimizerStrategy_ = 1;
00035 float GoodnessOfFit::mu_ = 0.0;
00036 bool GoodnessOfFit::fixedMu_ = false;
00037
00038 GoodnessOfFit::GoodnessOfFit() :
00039 LimitAlgo("GoodnessOfFit specific options")
00040 {
00041 options_.add_options()
00042 ("algorithm", boost::program_options::value<std::string>(&algo_), "Goodness of fit algorithm. Currently, the only option is 'saturated' (which works for binned models only)")
00043 ("minimizerAlgo", boost::program_options::value<std::string>(&minimizerAlgo_)->default_value(minimizerAlgo_), "Choice of minimizer (Minuit vs Minuit2)")
00044 ("minimizerTolerance", boost::program_options::value<float>(&minimizerTolerance_)->default_value(minimizerTolerance_), "Tolerance for minimizer")
00045 ("minimizerStrategy", boost::program_options::value<int>(&minimizerStrategy_)->default_value(minimizerStrategy_), "Stragegy for minimizer")
00046 ("fixedSignalStrength", boost::program_options::value<float>(&mu_)->default_value(mu_), "Compute the goodness of fit for a fixed signal strength. If not specified, it's left floating")
00047 ;
00048 }
00049
00050 void GoodnessOfFit::applyOptions(const boost::program_options::variables_map &vm)
00051 {
00052 fixedMu_ = !vm["fixedSignalStrength"].defaulted();
00053 if (algo_ == "saturated") std::cout << "Will use saturated models to compute goodness of fit for a binned likelihood" << std::endl;
00054 else throw std::invalid_argument("GoodnessOfFit: algorithm "+algo_+" not supported");
00055 }
00056
00057 bool GoodnessOfFit::run(RooWorkspace *w, RooStats::ModelConfig *mc_s, RooStats::ModelConfig *mc_b, RooAbsData &data, double &limit, double &limitErr, const double *hint) {
00058 ProfileLikelihood::MinimizerSentry minimizerConfig(minimizerAlgo_, minimizerTolerance_);
00059
00060 RooRealVar *r = dynamic_cast<RooRealVar *>(mc_s->GetParametersOfInterest()->first());
00061 if (fixedMu_) { r->setVal(mu_); r->setConstant(true); }
00062 if (algo_ == "saturated") return runSaturatedModel(w, mc_s, mc_b, data, limit, limitErr, hint);
00063 return false;
00064 }
00065
00066 bool GoodnessOfFit::runSaturatedModel(RooWorkspace *w, RooStats::ModelConfig *mc_s, RooStats::ModelConfig *mc_b, RooAbsData &data, double &limit, double &limitErr, const double *hint) {
00067 RooAbsPdf *pdf_nominal = mc_s->GetPdf();
00068
00069 std::auto_ptr<RooAbsPdf> saturated;
00070
00071 RooArgList constraints;
00072 RooAbsPdf *obsOnlyPdf = utils::factorizePdf(*mc_s->GetObservables(), *pdf_nominal, constraints);
00073
00074 RooSimultaneous *sim = dynamic_cast<RooSimultaneous *>(obsOnlyPdf);
00075 if (sim) {
00076 RooAbsCategoryLValue *cat = (RooAbsCategoryLValue *) sim->indexCat().Clone();
00077 std::auto_ptr<TList> datasets(data.split(*cat, true));
00078 int nbins = cat->numBins((const char *)0);
00079 RooArgSet newPdfs;
00080 TString satname = TString::Format("%s_saturated", sim->GetName());
00081 RooSimultaneous *satsim = (typeid(*sim) == typeid(RooSimultaneousOpt)) ? new RooSimultaneousOpt(satname, "", *cat) : new RooSimultaneous(satname, "", *cat);
00082 for (int ic = 0, nc = nbins; ic < nc; ++ic) {
00083 cat->setBin(ic);
00084 RooAbsPdf *pdfi = sim->getPdf(cat->getLabel());
00085 if (pdfi == 0) continue;
00086 RooAbsData *datai = (RooAbsData *) datasets->FindObject(cat->getLabel());
00087 if (datai == 0) throw std::runtime_error(std::string("Error: missing dataset for category label ")+cat->getLabel());
00088 RooAbsPdf *saturatedPdfi = makeSaturatedPdf(*datai);
00089
00090 if (constraints.getSize() > 0) {
00091 RooArgList terms(constraints); terms.add(*saturatedPdfi);
00092 RooProdPdf *prodpdf = new RooProdPdf(TString::Format("%s_constr", saturatedPdfi->GetName()), "", terms);
00093 prodpdf->addOwnedComponents(RooArgSet(*saturatedPdfi));
00094 saturatedPdfi = prodpdf;
00095 }
00096 satsim->addPdf(*saturatedPdfi, cat->getLabel());
00097 satsim->addOwnedComponents(RooArgSet(*saturatedPdfi));
00098 }
00099 saturated.reset(satsim);
00100 } else {
00101 RooAbsPdf *saturatedPdfi = makeSaturatedPdf(data);
00102 if (constraints.getSize() > 0) {
00103 RooArgList terms(constraints); terms.add(*saturatedPdfi);
00104 RooProdPdf *prodpdf = new RooProdPdf(TString::Format("%s_constr", saturatedPdfi->GetName()), "", terms);
00105 prodpdf->addOwnedComponents(RooArgSet(*saturatedPdfi));
00106 saturatedPdfi = prodpdf;
00107 }
00108 saturated.reset(saturatedPdfi);
00109 }
00110
00111 CloseCoutSentry sentry(verbose < 2);
00112
00113 const RooCmdArg &minim = RooFit::Minimizer(ROOT::Math::MinimizerOptions::DefaultMinimizerType().c_str(),
00114 ROOT::Math::MinimizerOptions::DefaultMinimizerAlgo().c_str());
00115 std::auto_ptr<RooFitResult> result_nominal(pdf_nominal->fitTo(data, RooFit::Save(1), minim, RooFit::Strategy(minimizerStrategy_), RooFit::Hesse(0), RooFit::Constrain(*mc_s->GetNuisanceParameters())));
00116 std::auto_ptr<RooFitResult> result_saturated(saturated->fitTo(data, RooFit::Save(1), minim, RooFit::Strategy(minimizerStrategy_), RooFit::Hesse(0), RooFit::Constrain(*mc_s->GetNuisanceParameters())));
00117 sentry.clear();
00118
00119 saturated.reset();
00120 for (int i = 0, n = tempData_.size(); i < n; ++i) delete tempData_[i];
00121 tempData_.clear();
00122
00123 if (result_nominal.get() == 0) return false;
00124 if (result_saturated.get() == 0) return false;
00125
00126 double nll_nominal = result_nominal->minNll();
00127 double nll_saturated = result_saturated->minNll();
00128 if (fabs(nll_nominal) > 1e10 || fabs(nll_saturated) > 1e10) return false;
00129 limit = 2*(nll_nominal-nll_saturated);
00130
00131 std::cout << "\n --- GoodnessOfFit --- " << std::endl;
00132 std::cout << "Best fit test statistic: " << limit << std::endl;
00133 return true;
00134 }
00135
00136 RooAbsPdf * GoodnessOfFit::makeSaturatedPdf(RooAbsData &data) {
00137 if (verbose > 1) std::cout << "Generating saturated model for " << data.GetName() << std::endl;
00138 RooDataHist *rdh = new RooDataHist(TString::Format("%s_binned", data.GetName()), "", *data.get(), data); tempData_.push_back(rdh);
00139 if (verbose > 1) utils::printRDH(rdh);
00140 RooHistPdf *hpdf = new RooHistPdf(TString::Format("%s_shape", data.GetName()), "", *rdh->get(), *rdh);
00141 RooConstVar *norm = new RooConstVar(TString::Format("%s_norm", data.GetName()), "", data.sumEntries());
00142
00143 RooAddPdf *ret = new RooAddPdf(TString::Format("%s_saturated", data.GetName()), "", RooArgList(*hpdf), RooArgList(*norm));
00144 ret->addOwnedComponents(RooArgSet(*norm));
00145 ret->addOwnedComponents(RooArgSet(*hpdf));
00146 return ret;
00147 }
00148
00149