00001 #include "../interface/MaxLikelihoodFit.h"
00002 #include "RooRealVar.h"
00003 #include "RooArgSet.h"
00004 #include "RooRandom.h"
00005 #include "RooDataSet.h"
00006 #include "RooFitResult.h"
00007 #include "RooSimultaneous.h"
00008 #include "RooAddPdf.h"
00009 #include "RooProdPdf.h"
00010 #include "RooConstVar.h"
00011 #include "RooPlot.h"
00012 #include "RooTrace.h"
00013 #include <RooMinimizer.h>
00014 #include "TCanvas.h"
00015 #include "TStyle.h"
00016 #include "TH2.h"
00017 #include "TFile.h"
00018 #include <RooStats/ModelConfig.h>
00019 #include "../interface/Combine.h"
00020 #include "../interface/ProfileLikelihood.h"
00021 #include "../interface/ProfiledLikelihoodRatioTestStatExt.h"
00022 #include "../interface/CloseCoutSentry.h"
00023 #include "../interface/utils.h"
00024
00025
00026 #include <Math/MinimizerOptions.h>
00027
00028 using namespace RooStats;
00029
00030 std::string MaxLikelihoodFit::name_ = "";
00031 std::string MaxLikelihoodFit::minos_ = "poi";
00032 std::string MaxLikelihoodFit::out_ = ".";
00033 bool MaxLikelihoodFit::makePlots_ = false;
00034 float MaxLikelihoodFit::rebinFactor_ = 1.0;
00035 std::string MaxLikelihoodFit::signalPdfNames_ = "shapeSig*";
00036 std::string MaxLikelihoodFit::backgroundPdfNames_ = "shapeBkg*";
00037 bool MaxLikelihoodFit::saveNormalizations_ = false;
00038 bool MaxLikelihoodFit::justFit_ = false;
00039 bool MaxLikelihoodFit::noErrors_ = false;
00040 bool MaxLikelihoodFit::reuseParams_ = false;
00041
00042
00043 MaxLikelihoodFit::MaxLikelihoodFit() :
00044 FitterAlgoBase("MaxLikelihoodFit specific options")
00045 {
00046 options_.add_options()
00047 ("minos", boost::program_options::value<std::string>(&minos_)->default_value(minos_), "Compute MINOS errors for: 'none', 'poi', 'all'")
00048 ("out", boost::program_options::value<std::string>(&out_)->default_value(out_), "Directory to put output in")
00049 ("plots", "Make plots")
00050 ("rebinFactor", boost::program_options::value<float>(&rebinFactor_)->default_value(rebinFactor_), "Rebin by this factor before plotting (does not affect fitting!)")
00051 ("signalPdfNames", boost::program_options::value<std::string>(&signalPdfNames_)->default_value(signalPdfNames_), "Names of signal pdfs in plots (separated by ,)")
00052 ("backgroundPdfNames", boost::program_options::value<std::string>(&backgroundPdfNames_)->default_value(backgroundPdfNames_), "Names of background pdfs in plots (separated by ',')")
00053 ("saveNormalizations", "Save post-fit normalizations of all components of the pdfs")
00054 ("justFit", "Just do the S+B fit, don't do the B-only one, don't save output file")
00055 ("noErrors", "Don't compute uncertainties on the best fit value")
00056 ("initFromBonly", "Use the vlaues of the nuisance parameters from the background only fit as the starting point for the s+b fit")
00057 ;
00058
00059
00060 nToys=0; fitStatus_=0; mu_=0;numbadnll_=-1;nll_nll0_=-1; nll_bonly_=-1;nll_sb_=-1;
00061 }
00062
00063 MaxLikelihoodFit::~MaxLikelihoodFit(){
00064
00065 delete globalObservables_;
00066 delete nuisanceParameters_;
00067 }
00068
00069 void MaxLikelihoodFit::setToyNumber(const int iToy){
00070 currentToy_ = iToy;
00071 }
00072 void MaxLikelihoodFit::setNToys(const int iToy){
00073 nToys = iToy;
00074 }
00075 void MaxLikelihoodFit::applyOptions(const boost::program_options::variables_map &vm)
00076 {
00077 applyOptionsBase(vm);
00078 makePlots_ = vm.count("plots");
00079 name_ = vm["name"].defaulted() ? std::string() : vm["name"].as<std::string>();
00080 saveNormalizations_ = vm.count("saveNormalizations");
00081 justFit_ = vm.count("justFit");
00082 noErrors_ = vm.count("noErrors");
00083 reuseParams_ = vm.count("initFromBonly");
00084 if (justFit_) { out_ = "none"; makePlots_ = false; saveNormalizations_ = false; reuseParams_ = false;}
00085
00086 }
00087
00088 bool MaxLikelihoodFit::runSpecific(RooWorkspace *w, RooStats::ModelConfig *mc_s, RooStats::ModelConfig *mc_b, RooAbsData &data, double &limit, double &limitErr, const double *hint) {
00089
00090 if (reuseParams_ && minos_!="none"){
00091 std::cout << "Cannot reuse b-only fit params when running minos. Parameters will be reset when running S+B fit"<<std::endl;
00092 reuseParams_=false;
00093 }
00094
00095 if (!justFit_ && out_ != "none"){
00096 if (currentToy_ < 1){
00097 fitOut.reset(TFile::Open((out_+"/mlfit"+name_+".root").c_str(), "RECREATE"));
00098 createFitResultTrees(*mc_s);
00099 }
00100 }
00101
00102 RooRealVar *r = dynamic_cast<RooRealVar *>(mc_s->GetParametersOfInterest()->first());
00103
00104 TCanvas *c1 = 0;
00105 if (makePlots_) {
00106 utils::tdrStyle();
00107 c1 = new TCanvas("c1","c1");
00108 }
00109
00110
00111 r->setVal(preFitValue_);
00112 if (makePlots_) {
00113 std::vector<RooPlot *> plots = utils::makePlots(*mc_s->GetPdf(), data, signalPdfNames_.c_str(), backgroundPdfNames_.c_str(), rebinFactor_);
00114 for (std::vector<RooPlot *>::iterator it = plots.begin(), ed = plots.end(); it != ed; ++it) {
00115 (*it)->Draw();
00116 c1->Print((out_+"/"+(*it)->GetName()+"_prefit.png").c_str());
00117 if (fitOut.get() && currentToy_< 1) fitOut->WriteTObject(*it, (std::string((*it)->GetName())+"_prefit").c_str());
00118 }
00119 }
00120
00121
00122
00123 if (currentToy_ < 1){
00124 const RooArgSet *nuis = mc_s->GetNuisanceParameters();
00125 const RooArgSet *globalObs = mc_s->GetGlobalObservables();
00126 if (!justFit_ && nuis && globalObs ) {
00127 std::auto_ptr<RooAbsPdf> nuisancePdf(utils::makeNuisancePdf(*mc_s));
00128 std::auto_ptr<RooDataSet> globalData(new RooDataSet("globalData","globalData", *globalObs));
00129 globalData->add(*globalObs);
00130 RooFitResult *res_prefit = 0;
00131 {
00132 CloseCoutSentry sentry(verbose < 2);
00133 res_prefit = nuisancePdf->fitTo(*globalData,
00134 RooFit::Save(1),
00135 RooFit::Minimizer(ROOT::Math::MinimizerOptions::DefaultMinimizerType().c_str(), ROOT::Math::MinimizerOptions::DefaultMinimizerAlgo().c_str()),
00136 RooFit::Strategy(minimizerStrategy_),
00137 RooFit::Minos(minos_ == "all")
00138 );
00139 }
00140 if (fitOut.get() ) fitOut->WriteTObject(res_prefit, "nuisances_prefit_res");
00141 if (fitOut.get() ) fitOut->WriteTObject(nuis->snapshot(), "nuisances_prefit");
00142
00143 nuisancePdf.reset();
00144 globalData.reset();
00145 delete res_prefit;
00146
00147 } else if (nuis) {
00148 if (fitOut.get() ) fitOut->WriteTObject(nuis->snapshot(), "nuisances_prefit");
00149 }
00150 }
00151
00152 RooFitResult *res_b = 0, *res_s = 0;
00153 const RooCmdArg &constCmdArg_s = withSystematics ? RooFit::Constrain(*mc_s->GetNuisanceParameters()) : RooFit::NumCPU(1);
00154 const RooCmdArg &minosCmdArg = minos_ == "poi" ? RooFit::Minos(*mc_s->GetParametersOfInterest()) : RooFit::Minos(minos_ != "none");
00155 w->loadSnapshot("clean");
00156 r->setVal(0.0); r->setConstant(true);
00157
00158
00159 if (currentToy_<1) nll.reset(mc_s->GetPdf()->createNLL(data,constCmdArg_s,RooFit::Extended(mc_s->GetPdf()->canBeExtended())));
00160
00161 double nll0 = nll->getVal();
00162
00163 if (justFit_) {
00164
00165 } else if (minos_ != "all") {
00166 RooArgList minos;
00167 res_b = doFit(*mc_s->GetPdf(), data, minos, constCmdArg_s, true, true);
00168 nll_bonly_=nll->getVal()-nll0;
00169 } else {
00170 CloseCoutSentry sentry(verbose < 2);
00171 res_b = mc_s->GetPdf()->fitTo(data,
00172 RooFit::Save(1),
00173 RooFit::Minimizer(ROOT::Math::MinimizerOptions::DefaultMinimizerType().c_str(), ROOT::Math::MinimizerOptions::DefaultMinimizerAlgo().c_str()),
00174 RooFit::Strategy(minimizerStrategy_),
00175 RooFit::Extended(mc_s->GetPdf()->canBeExtended()),
00176 constCmdArg_s, minosCmdArg
00177 );
00178 if (res_b) nll_bonly_ = nll->getVal() - nll0;
00179
00180 }
00181
00182 if (res_b) {
00183 if (verbose > 1) res_b->Print("V");
00184 if (fitOut.get()) {
00185 if (currentToy_< 1) fitOut->WriteTObject(res_b,"fit_b");
00186 setFitResultTrees(mc_s->GetNuisanceParameters(),nuisanceParameters_);
00187 setFitResultTrees(mc_s->GetGlobalObservables(),globalObservables_);
00188 fitStatus_ = res_b->status();
00189 }
00190 numbadnll_=res_b->numInvalidNLL();
00191
00192 if (makePlots_) {
00193 std::vector<RooPlot *> plots = utils::makePlots(*mc_b->GetPdf(), data, signalPdfNames_.c_str(), backgroundPdfNames_.c_str(), rebinFactor_);
00194 for (std::vector<RooPlot *>::iterator it = plots.begin(), ed = plots.end(); it != ed; ++it) {
00195 c1->cd(); (*it)->Draw();
00196 c1->Print((out_+"/"+(*it)->GetName()+"_fit_b.png").c_str());
00197 if (fitOut.get() && currentToy_< 1) fitOut->WriteTObject(*it, (std::string((*it)->GetName())+"_fit_b").c_str());
00198 }
00199 }
00200
00201 if (saveNormalizations_ && currentToy_<1) {
00202 RooArgSet *norms = new RooArgSet();
00203 norms->setName("norm_fit_b");
00204 getNormalizations(mc_s->GetPdf(), *mc_s->GetObservables(), *norms);
00205 if (fitOut.get()) fitOut->WriteTObject(norms, "norm_fit_b");
00206 delete norms;
00207 }
00208
00209 if (makePlots_ && currentToy_<1) {
00210 TH2 *corr = res_b->correlationHist();
00211 c1->SetLeftMargin(0.25); c1->SetBottomMargin(0.25);
00212 corr->SetTitle("Correlation matrix of fit parameters");
00213 gStyle->SetPaintTextFormat(res_b->floatParsFinal().getSize() > 10 ? ".1f" : ".2f");
00214 gStyle->SetOptStat(0);
00215 corr->SetMarkerSize(res_b->floatParsFinal().getSize() > 10 ? 2 : 1);
00216 corr->Draw("COLZ TEXT");
00217 c1->Print((out_+"/covariance_fit_b.png").c_str());
00218 c1->SetLeftMargin(0.16); c1->SetBottomMargin(0.13);
00219 if (fitOut.get()) fitOut->WriteTObject(corr, "covariance_fit_b");
00220 }
00221 }
00222 else {
00223 fitStatus_=-1;
00224 numbadnll_=-1;
00225 }
00226 mu_=r->getVal();
00227 if (t_fit_b_) t_fit_b_->Fill();
00228
00229 delete res_b;
00230
00231 if (!reuseParams_) w->loadSnapshot("clean");
00232 r->setVal(preFitValue_); r->setConstant(false);
00233 if (minos_ != "all") {
00234 RooArgList minos; if (minos_ == "poi") minos.add(*r);
00235 res_s = doFit(*mc_s->GetPdf(), data, minos, constCmdArg_s, !noErrors_, true);
00236 nll_sb_ = nll->getVal()-nll0;
00237 } else {
00238 CloseCoutSentry sentry(verbose < 2);
00239 res_s = mc_s->GetPdf()->fitTo(data,
00240 RooFit::Save(1),
00241 RooFit::Minimizer(ROOT::Math::MinimizerOptions::DefaultMinimizerType().c_str(), ROOT::Math::MinimizerOptions::DefaultMinimizerAlgo().c_str()),
00242 RooFit::Strategy(minimizerStrategy_),
00243 RooFit::Extended(mc_s->GetPdf()->canBeExtended()),
00244 constCmdArg_s, minosCmdArg
00245 );
00246 if (res_s) nll_sb_= nll->getVal()-nll0;
00247
00248 }
00249 if (res_s) {
00250 limit = r->getVal();
00251 limitErr = r->getError();
00252 if (verbose > 1) res_s->Print("V");
00253 if (fitOut.get()){
00254 if (currentToy_<1) fitOut->WriteTObject(res_s, "fit_s");
00255
00256 setFitResultTrees(mc_s->GetNuisanceParameters(),nuisanceParameters_);
00257 setFitResultTrees(mc_s->GetGlobalObservables(),globalObservables_);
00258 fitStatus_ = res_s->status();
00259 numbadnll_ = res_s->numInvalidNLL();
00260
00261
00262 nll_nll0_ = nll_sb_ - nll_bonly_;
00263 }
00264
00265 if (makePlots_) {
00266 std::vector<RooPlot *> plots = utils::makePlots(*mc_s->GetPdf(), data, signalPdfNames_.c_str(), backgroundPdfNames_.c_str(), rebinFactor_);
00267 for (std::vector<RooPlot *>::iterator it = plots.begin(), ed = plots.end(); it != ed; ++it) {
00268 c1->cd(); (*it)->Draw();
00269 c1->Print((out_+"/"+(*it)->GetName()+"_fit_s.png").c_str());
00270 if (fitOut.get() && currentToy_< 1) fitOut->WriteTObject(*it, (std::string((*it)->GetName())+"_fit_s").c_str());
00271 }
00272 }
00273
00274 if (saveNormalizations_&& currentToy_< 1) {
00275 RooArgSet *norms = new RooArgSet();
00276 norms->setName("norm_fit_s");
00277 getNormalizations(mc_s->GetPdf(), *mc_s->GetObservables(), *norms);
00278 if (fitOut.get() ) fitOut->WriteTObject(norms, "norm_fit_s");
00279 delete norms;
00280 }
00281
00282 if (makePlots_&& currentToy_< 1) {
00283 TH2 *corr = res_s->correlationHist();
00284 c1->SetLeftMargin(0.25); c1->SetBottomMargin(0.25);
00285 corr->SetTitle("Correlation matrix of fit parameters");
00286 gStyle->SetPaintTextFormat(res_s->floatParsFinal().getSize() > 10 ? ".1f" : ".2f");
00287 gStyle->SetOptStat(0);
00288 corr->SetMarkerSize(res_s->floatParsFinal().getSize() > 10 ? 2 : 1);
00289 corr->Draw("COLZ TEXT");
00290 c1->Print((out_+"/covariance_fit_s.png").c_str());
00291 c1->SetLeftMargin(0.16); c1->SetBottomMargin(0.13);
00292 if (fitOut.get() ) fitOut->WriteTObject(corr, "covariance_fit_s");
00293 }
00294 } else {
00295 fitStatus_=-1;
00296 numbadnll_=-1;
00297 nll_nll0_ = -1;
00298 }
00299 mu_=r->getVal();
00300 if (t_fit_sb_) t_fit_sb_->Fill();
00301
00302 if (res_s) {
00303 RooRealVar *rf = dynamic_cast<RooRealVar*>(res_s->floatParsFinal().find(r->GetName()));
00304 double bestFitVal = rf->getVal();
00305
00306 double hiErr = +(rf->hasRange("err68") ? rf->getMax("err68") - bestFitVal : rf->getAsymErrorHi());
00307 double loErr = -(rf->hasRange("err68") ? rf->getMin("err68") - bestFitVal : rf->getAsymErrorLo());
00308 double maxError = std::max<double>(std::max<double>(hiErr, loErr), rf->getError());
00309
00310 if (fabs(hiErr) < 0.001*maxError) hiErr = -bestFitVal + rf->getMax();
00311 if (fabs(loErr) < 0.001*maxError) loErr = +bestFitVal - rf->getMin();
00312
00313 double hiErr95 = +(do95_ && rf->hasRange("err95") ? rf->getMax("err95") - bestFitVal : 0);
00314 double loErr95 = -(do95_ && rf->hasRange("err95") ? rf->getMin("err95") - bestFitVal : 0);
00315
00316 limit = bestFitVal; limitErr = 0;
00317 if (!noErrors_) Combine::commitPoint(true, 0.5);
00318 limit = bestFitVal - loErr; limitErr = 0;
00319 if (!noErrors_) Combine::commitPoint(true, 0.16);
00320 limit = bestFitVal + hiErr; limitErr = 0;
00321 if (!noErrors_) Combine::commitPoint(true, 0.84);
00322 if (do95_ && rf->hasRange("err95") && !noErrors_) {
00323 limit = rf->getMax("err95"); Combine::commitPoint(true, 0.975);
00324 limit = rf->getMin("err95"); Combine::commitPoint(true, 0.025);
00325 }
00326
00327 limit = bestFitVal;
00328 limitErr = maxError;
00329 std::cout << "\n --- MaxLikelihoodFit ---" << std::endl;
00330 std::cout << "Best fit " << r->GetName() << ": " << rf->getVal() << " "<< -loErr << "/+" << +hiErr << " (68% CL)" << std::endl;
00331 if (do95_) {
00332 std::cout << " " << r->GetName() << ": " << rf->getVal() << " "<< -loErr95 << "/+" << +hiErr95 << " (95% CL)" << std::endl;
00333 }
00334 } else {
00335 std::cout << "\n --- MaxLikelihoodFit ---" << std::endl;
00336 std::cout << "Fit failed." << std::endl;
00337 }
00338
00339 if (currentToy_==nToys-1 || nToys==0 ) {
00340
00341 if (fitOut.get()) {
00342 fitOut->cd();
00343 t_fit_sb_->Write(); t_fit_b_->Write();
00344 fitOut.release()->Close();
00345 }
00346
00347 }
00348 bool fitreturn = (res_s!=0);
00349 delete res_s;
00350
00351 std::cout << "nll S+B -> "<<nll_sb_ << " nll B -> " << nll_bonly_ <<std::endl;
00352 return fitreturn;
00353 }
00354
00355 void MaxLikelihoodFit::getNormalizations(RooAbsPdf *pdf, const RooArgSet &obs, RooArgSet &out) {
00356 RooSimultaneous *sim = dynamic_cast<RooSimultaneous *>(pdf);
00357 if (sim != 0) {
00358 RooAbsCategoryLValue &cat = const_cast<RooAbsCategoryLValue &>(sim->indexCat());
00359 for (int i = 0, n = cat.numBins((const char *)0); i < n; ++i) {
00360 cat.setBin(i);
00361 RooAbsPdf *pdfi = sim->getPdf(cat.getLabel());
00362 if (pdfi) getNormalizations(pdfi, obs, out);
00363 }
00364 return;
00365 }
00366 RooProdPdf *prod = dynamic_cast<RooProdPdf *>(pdf);
00367 if (prod != 0) {
00368 RooArgList list(prod->pdfList());
00369 for (int i = 0, n = list.getSize(); i < n; ++i) {
00370 RooAbsPdf *pdfi = (RooAbsPdf *) list.at(i);
00371 if (pdfi->dependsOn(obs)) getNormalizations(pdfi, obs, out);
00372 }
00373 return;
00374 }
00375 RooAddPdf *add = dynamic_cast<RooAddPdf *>(pdf);
00376 if (add != 0) {
00377 RooArgList list(add->coefList());
00378 for (int i = 0, n = list.getSize(); i < n; ++i) {
00379 RooAbsReal *coeff = (RooAbsReal *) list.at(i);
00380 out.addOwned(*(new RooConstVar(coeff->GetName(), "", coeff->getVal())));
00381 }
00382 return;
00383 }
00384 }
00385
00386
00387 void MaxLikelihoodFit::setFitResultTrees(const RooArgSet *args, double * vals){
00388
00389 TIterator* iter(args->createIterator());
00390 int count=0;
00391
00392 for (TObject *a = iter->Next(); a != 0; a = iter->Next()) {
00393 RooRealVar *rrv = dynamic_cast<RooRealVar *>(a);
00394 std::string name = rrv->GetName();
00395 vals[count]=rrv->getVal();
00396 count++;
00397 }
00398 delete iter;
00399 return;
00400 }
00401
00402 void MaxLikelihoodFit::createFitResultTrees(const RooStats::ModelConfig &mc){
00403
00404
00405
00406
00407 t_fit_b_ = new TTree("tree_fit_b","tree_fit_b");
00408 t_fit_sb_ = new TTree("tree_fit_sb","tree_fit_sb");
00409
00410 t_fit_b_->Branch("fit_status",&fitStatus_,"fit_status/Int_t");
00411 t_fit_sb_->Branch("fit_status",&fitStatus_,"fit_status/Int_t");
00412
00413 t_fit_b_->Branch("mu",&mu_,"mu/Double_t");
00414 t_fit_sb_->Branch("mu",&mu_,"mu/Double_t");
00415
00416 t_fit_b_->Branch("numbadnll",&numbadnll_,"numbadnll/Int_t");
00417 t_fit_sb_->Branch("numbadnll",&numbadnll_,"numbadnll/Int_t");
00418
00419 t_fit_b_->Branch("nll_min",&nll_bonly_,"nll_min/Double_t");
00420 t_fit_sb_->Branch("nll_min",&nll_sb_,"nll_min/Double_t");
00421
00422 t_fit_sb_->Branch("nll_nll0",&nll_nll0_,"nll_nll0/Double_t");
00423
00424
00425 const RooArgSet *cons = mc.GetGlobalObservables();
00426 const RooArgSet *nuis = mc.GetNuisanceParameters();
00427
00428 globalObservables_ = new double[cons->getSize()];
00429 nuisanceParameters_= new double[nuis->getSize()];
00430
00431 int count=0;
00432 TIterator* iter_c(cons->createIterator());
00433 for (TObject *a = iter_c->Next(); a != 0; a = iter_c->Next()) {
00434 RooRealVar *rrv = dynamic_cast<RooRealVar *>(a);
00435 std::string name = rrv->GetName();
00436 globalObservables_[count]=0;
00437 t_fit_sb_->Branch(name.c_str(),&(globalObservables_[count]),Form("%s/Double_t",name.c_str()));
00438 t_fit_b_->Branch(name.c_str(),&(globalObservables_[count]),Form("%s/Double_t",name.c_str()));
00439 count++;
00440 }
00441
00442 count = 0;
00443 TIterator* iter_n(nuis->createIterator());
00444 for (TObject *a = iter_n->Next(); a != 0; a = iter_n->Next()) {
00445 RooRealVar *rrv = dynamic_cast<RooRealVar *>(a);
00446 std::string name = rrv->GetName();
00447 nuisanceParameters_[count] = 0;
00448 t_fit_sb_->Branch(name.c_str(),&(nuisanceParameters_[count])),Form("%s/Double_t",name.c_str());
00449 t_fit_b_->Branch(name.c_str(),&(nuisanceParameters_[count]),Form("%s/Double_t",name.c_str()));
00450 count++;
00451 }
00452 std::cout << "Created Branches" <<std::endl;
00453 return;
00454 }