4 #include "../interface/Combine.h"
19 #include <TGraphErrors.h>
20 #include <TIterator.h>
25 #include <TStopwatch.h>
28 #include <RooAbsData.h>
29 #include <RooAbsPdf.h>
30 #include <RooArgSet.h>
31 #include <RooCustomizer.h>
32 #include <RooDataHist.h>
33 #include <RooDataSet.h>
34 #include <RooFitResult.h>
35 #include <RooMsgService.h>
37 #include <RooRandom.h>
38 #include <RooRealVar.h>
39 #include <RooUniform.h>
40 #include <RooWorkspace.h>
42 #include <RooStats/HLFactory.h>
43 #include <RooStats/RooStatsUtils.h>
44 #include <RooStats/ModelConfig.h>
46 #include <boost/filesystem.hpp>
47 #include <boost/program_options.hpp>
50 #include "../interface/LimitAlgo.h"
51 #include "../interface/utils.h"
52 #include "../interface/CloseCoutSentry.h"
53 #include "../interface/RooSimultaneousOpt.h"
54 #include "../interface/ToyMCSamplerOpt.h"
55 #include "../interface/AsimovUtils.h"
57 using namespace RooStats;
58 using namespace RooFit;
76 statOptions_(
"Common statistics options"),
77 ioOptions_(
"Common input-output options"),
78 miscOptions_(
"Common miscellaneous options"),
79 rMin_(std::numeric_limits<float>::quiet_NaN()),
80 rMax_(std::numeric_limits<float>::quiet_NaN()) {
81 namespace po = boost::program_options;
83 (
"systematics,S", po::value<bool>(&
withSystematics)->default_value(
true),
"Add systematic uncertainties")
84 (
"cl,C", po::value<float>(&
cl)->default_value(0.95),
"Confidence Level")
85 (
"rMin", po::value<float>(&
rMin_),
"Override minimum value for signal strength")
86 (
"rMax", po::value<float>(&
rMax_),
"Override maximum value for signal strength")
87 (
"prior", po::value<std::string>(&
prior_)->default_value(
"flat"),
"Prior to use, for methods that require it and if it's not already in the input file: 'flat' (default), '1/sqrt(r)'")
88 (
"significance",
"Compute significance instead of upper limit (works only for some methods)")
89 (
"lowerLimit",
"Compute the lower limit instead of the upper limit (works only for some methods)")
90 (
"hintStatOnly",
"Ignore systematics when computing the hint")
91 (
"toysNoSystematics",
"Generate all toys with the central value of the nuisance parameters, without fluctuating them")
92 (
"toysFrequentist",
"Generate all toys in the frequentist way. Note that this affects the toys generated by option '-t' that happen in the main loop, not the ones within the Hybrid(New) algorithm.")
93 (
"expectSignal", po::value<float>(&
expectSignal_)->default_value(0.),
"If set to non-zero, generate *signal* toys instead of background ones, with the specified signal strength.")
94 (
"unbinned,U",
"Generate unbinned datasets instead of binned ones (works only for extended pdfs)")
95 (
"generateBinnedWorkaround",
"Make binned datasets generating unbinned ones and then binnning them. Workaround for a bug in RooFit.")
98 (
"saveWorkspace",
"Save workspace to output root file")
99 (
"workspaceName,w", po::value<std::string>(&
workspaceName_)->default_value(
"w"),
"Workspace name, when reading it from or writing it to a rootfile.")
100 (
"modelConfigName", po::value<std::string>(&
modelConfigName_)->default_value(
"ModelConfig"),
"ModelConfig name, when reading it from or writing it to a rootfile.")
101 (
"modelConfigNameB", po::value<std::string>(&
modelConfigNameB_)->default_value(
"%s_bonly"),
"Name of the ModelConfig for b-only hypothesis.\n"
102 "If not present, it will be made from the singal model taking zero signal strength.\n"
103 "A '%s' in the name will be replaced with the modelConfigName.")
104 (
"validateModel,V",
"Perform some sanity checks on the model and abort if they fail.")
105 (
"saveToys",
"Save results of toy MC in output file")
108 (
"newGenerator", po::value<bool>(&
newGen_)->default_value(
true),
"Use new generator code for toys, fixes all issues with binned and mixed generation (equivalent of --newToyMC but affects the top-level toys from option '-t' instead of the ones within the HybridNew)")
109 (
"optimizeSimPdf", po::value<bool>(&
optSimPdf_)->default_value(
true),
"Turn on special optimizations of RooSimultaneous. On by default, you can turn it off if it doesn't work for your workspace.")
110 (
"rebuildSimPdf", po::value<bool>(&
rebuildSimPdf_)->default_value(
false),
"Rebuild simultaneous pdf from scratch to make sure constraints are correct (not needed in CMS workspaces)")
111 (
"compile",
"Compile expressions instead of interpreting them")
112 (
"tempDir", po::value<bool>(&
makeTempDir_)->default_value(
false),
"Run the program from a temporary directory (automatically on for text datacards or if 'compile' is activated)")
113 (
"guessGenMode",
"Guess if to generate binned or unbinned based on dataset");
119 std::cout <<
">>> including systematics" << std::endl;
121 std::cout <<
">>> no systematics included" << std::endl;
136 char modelBName[1024];
140 mass_ = vm[
"mass"].as<
float>();
145 bool Combine::mklimit(RooWorkspace *
w, RooStats::ModelConfig *mc_s, RooStats::ModelConfig *mc_b, RooAbsData &
data,
double &
limit,
double &limitErr) {
149 double hint = 0, hintErr = 0;
bool hashint =
false;
153 hashint =
hintAlgo->
run(w, mc_s, mc_b, data, hint, hintErr, 0);
156 hashint =
hintAlgo->
run(w, mc_s, mc_b, data, hint, hintErr, 0);
160 ret =
algo->
run(w, mc_s, mc_b, data, limit, limitErr, (hashint ? &hint : 0));
162 std::cerr <<
"Caught exception " << ex.what() << std::endl;
172 timer.Stop();
t_cpu_ = timer.CpuTime()/60.;
t_real_ = timer.RealTime()/60.;
173 printf(
"Done in %.2f min (cpu), %.2f min (real)\n",
t_cpu_,
t_real_);
179 TFile *tfile; std::string
file,
path;
180 ToCleanUp() : tfile(0),
file(
""),
path(
"") {}
182 if (tfile) { tfile->Close();
delete tfile; }
184 if (unlink(
file.c_str()) == -1)
std::cerr <<
"Failed to delete temporary file " <<
file <<
": " << strerror(errno) << std::endl;
186 if (!
path.empty()) { boost::filesystem::remove_all(
path); }
191 ToCleanUp garbageCollect;
193 TString tmpDir =
"", tmpFile =
"",
pwd(gSystem->pwd());
195 tmpDir =
"roostats-XXXXXX"; tmpFile =
"model";
196 mkdtemp(const_cast<char *>(tmpDir.Data()));
197 gSystem->cd(tmpDir.Data());
198 garbageCollect.path = tmpDir.Data();
199 }
else if (!hlfFile.EndsWith(
".hlf") && !hlfFile.EndsWith(
".root")) {
200 tmpFile =
"roostats-XXXXXX";
201 mktemp(const_cast<char *>(tmpFile.Data()));
204 bool isTextDatacard =
false,
isBinary =
false;
205 TString fileToLoad = (hlfFile[0] ==
'/' ? hlfFile :
pwd+
"/"+hlfFile);
206 if (!boost::filesystem::exists(fileToLoad.Data()))
throw std::invalid_argument((
"File "+fileToLoad+
" does not exist").
Data());
207 if (hlfFile.EndsWith(
".hlf") ) {
209 }
else if (hlfFile.EndsWith(
".root")) {
212 TString txtFile = fileToLoad.Data();
213 TString
options = TString::Format(
" -m %f -D %s",
mass_, dataset.c_str());
216 if (
verbose > 1) options += TString::Format(
" --verbose %d",
verbose-1);
221 int status = gSystem->Exec(
"text2workspace.py "+options+
" '"+txtFile+
"' -b -o "+tmpFile+
".root");
222 isBinary =
true; fileToLoad = tmpFile+
".root";
223 if (status != 0 || !boost::filesystem::exists(fileToLoad.Data())) {
224 throw std::invalid_argument(
"Failed to convert the input datacard from LandS to RooStats format. The lines above probably contain more information about the error.");
226 garbageCollect.file = fileToLoad;
229 if (getenv(
"CMSSW_BASE")) {
230 gSystem->AddIncludePath(TString::Format(
" -I%s/src ", getenv(
"CMSSW_BASE")));
231 if (
verbose > 3)
std::cout <<
"Adding " << getenv(
"CMSSW_BASE") <<
"/src to include path" << std::endl;
233 if (getenv(
"ROOFITSYS")) {
234 gSystem->AddIncludePath(TString::Format(
" -I%s/include ", getenv(
"ROOFITSYS")));
235 if (
verbose > 3)
std::cout <<
"Adding " << getenv(
"ROOFITSYS") <<
"/include to include path" << std::endl;
240 RooWorkspace *
w = 0; RooStats::ModelConfig *mc = 0, *mc_bonly = 0;
241 std::auto_ptr<RooStats::HLFactory> hlf(0);
243 TFile *fIn = TFile::Open(fileToLoad);
244 garbageCollect.tfile = fIn;
246 w =
dynamic_cast<RooWorkspace *
>(fIn->Get(
workspaceName_.c_str()));
248 std::cerr <<
"Could not find workspace '" <<
workspaceName_ <<
"' in file " << fileToLoad << std::endl; fIn->ls();
249 throw std::invalid_argument(
"Missing Workspace");
252 RooRealVar *MH = w->var(
"MH");
254 if (
verbose > 2)
std::cerr <<
"Setting variable 'MH' in workspace to the higgs mass " <<
mass_ << std::endl;
257 mc =
dynamic_cast<RooStats::ModelConfig *
>(w->genobj(
modelConfigName_.c_str()));
258 mc_bonly =
dynamic_cast<RooStats::ModelConfig *
>(w->genobj(
modelConfigNameB_.c_str()));
261 throw std::invalid_argument(
"Missing ModelConfig");
264 const RooArgSet *POI = mc->GetParametersOfInterest();
265 if (POI == 0 || POI->getSize() == 0)
throw std::invalid_argument(
"ModelConfig '"+
modelConfigName_+
"' does not define parameters of interest.");
266 if (POI->getSize() > 1)
std::cerr <<
"ModelConfig '" <<
modelConfigName_ <<
"' defines more than one parameter of interest. This is not supported in some statistical methods." << std::endl;
267 if (mc->GetObservables() == 0)
throw std::invalid_argument(
"ModelConfig '"+
modelConfigName_+
"' does not define observables.");
268 if (mc->GetPdf() == 0)
throw std::invalid_argument(
"ModelConfig '"+
modelConfigName_+
"' does not define a pdf.");
269 if (
rebuildSimPdf_ &&
typeid(*mc->GetPdf()) ==
typeid(RooSimultaneous)) {
270 RooSimultaneous *newpdf =
utils::rebuildSimPdf(*mc->GetObservables(),
dynamic_cast<RooSimultaneous*
>(mc->GetPdf()));
274 if (
optSimPdf_ &&
typeid(*mc->GetPdf()) ==
typeid(RooSimultaneous)) {
281 std::cerr <<
"Will make one from the signal ModelConfig '" <<
modelConfigName_ <<
"' setting signal strenth '" << POI->first()->GetName() <<
"' to zero" << std::endl;
282 w->factory(
"_zero_[0]");
283 RooCustomizer make_model_s(*mc->GetPdf(),
"_model_bonly_");
284 make_model_s.replaceArg(*POI->first(), *w->var(
"_zero_"));
285 RooAbsPdf *model_b =
dynamic_cast<RooAbsPdf *
>(make_model_s.build());
286 model_b->SetName(
"_model_bonly_");
288 mc_bonly =
new RooStats::ModelConfig(*mc);
289 mc_bonly->SetPdf(*model_b);
292 hlf.reset(
new RooStats::HLFactory(
"factory", fileToLoad));
295 std::cerr <<
"Could not read HLF from file " << (hlfFile[0] ==
'/' ? hlfFile :
pwd+
"/"+hlfFile) << std::endl;
298 RooRealVar *MH = w->var(
"MH");
300 std::cerr <<
"Could not find MH var in workspace '" <<
workspaceName_ <<
"' in file " << fileToLoad << std::endl;
301 throw std::invalid_argument(
"Missing MH");
304 if (w->set(
"observables") == 0)
throw std::invalid_argument(
"The model must define a RooArgSet 'observables'");
305 if (w->set(
"POI") == 0)
throw std::invalid_argument(
"The model must define a RooArgSet 'POI' for the parameters of interest");
306 if (w->pdf(
"model_b") == 0)
throw std::invalid_argument(
"The model must define a RooAbsPdf 'model_b'");
307 if (w->pdf(
"model_s") == 0)
throw std::invalid_argument(
"The model must define a RooAbsPdf 'model_s'");
311 mc->SetPdf(*w->pdf(
"model_s"));
312 mc->SetObservables(*w->set(
"observables"));
313 mc->SetParametersOfInterest(*w->set(
"POI"));
314 if (w->set(
"nuisances")) mc->SetNuisanceParameters(*w->set(
"nuisances"));
315 if (w->set(
"globalObservables")) mc->SetGlobalObservables(*w->set(
"globalObservables"));
316 if (w->pdf(
"prior")) mc->SetNuisanceParameters(*w->pdf(
"prior"));
320 mc_bonly->SetPdf(*w->pdf(
"model_b"));
321 mc_bonly->SetObservables(*w->set(
"observables"));
322 mc_bonly->SetParametersOfInterest(*w->set(
"POI"));
323 if (w->set(
"nuisances")) mc_bonly->SetNuisanceParameters(*w->set(
"nuisances"));
324 if (w->set(
"globalObservables")) mc_bonly->SetGlobalObservables(*w->set(
"globalObservables"));
325 if (w->pdf(
"prior")) mc_bonly->SetNuisanceParameters(*w->pdf(
"prior"));
332 const RooArgSet * observables = mc->GetObservables();
333 const RooArgSet * POI = mc->GetParametersOfInterest();
334 const RooArgSet * nuisances = mc->GetNuisanceParameters();
335 if (dynamic_cast<RooRealVar*>(POI->first()) == 0)
throw std::invalid_argument(
"First parameter of interest is not a RooRealVar");
337 if (w->data(dataset.c_str()) == 0) {
338 if (isTextDatacard) {
339 RooDataSet *data_obs =
new RooDataSet(dataset.c_str(), dataset.c_str(), *observables);
340 data_obs->add(*observables);
341 w->import(*data_obs);
343 std::cout <<
"Dataset " << dataset.c_str() <<
" not found." << std::endl;
357 if (mc->GetPriorPdf() == 0) {
359 RooAbsPdf *
prior =
new RooUniform(
"prior",
"prior",*POI);
361 mc->SetPriorPdf(*prior);
362 }
else if (
prior_ ==
"1/sqrt(r)") {
363 w->factory(TString::Format(
"EXPR::prior(\\\"1/sqrt(@0)\\\",%s)", POI->first()->GetName()));
364 mc->SetPriorPdf(*w->pdf(
"prior"));
365 }
else if (!
prior_.empty() && w->pdf(
prior_.c_str()) != 0) {
366 std::cout <<
"Will use prior '" <<
prior_ <<
"' in from the input workspace" << std::endl;
367 mc->SetPriorPdf(*w->pdf(
prior_.c_str()));
369 std::cerr <<
"Unknown prior '" <<
prior_ <<
"'. It's not 'flat' '1/sqrt(r)' or the name of a pdf in the model.\n" << std::endl;
370 throw std::invalid_argument(
"Bad prior");
375 std::cout <<
"The signal model has no nuisance parameters. Please run the limit tool with no systematics (option -S 0)." << std::endl;
376 std::cout <<
"To make things easier, I will assume you have done it." << std::endl;
379 std::cout <<
"Will set nuisance parameters to constants: " ;
390 if (mc_bonly->GetGlobalObservables())
utils::setAllConstant(*mc_bonly->GetGlobalObservables(),
true);
393 w->saveSnapshot(
"clean", w->allVars());
402 bool isExtended = mc_bonly->GetPdf()->canBeExtended();
403 RooAbsData *dobs = w->data(dataset.c_str());
404 RooAbsPdf *genPdf =
expectSignal_ > 0 ? mc->GetPdf() : mc_bonly->GetPdf();
407 if (
guessGenMode_ && genPdf->InheritsFrom(
"RooSimultaneous") && (dobs != 0)) {
419 w->saveSnapshot(
"reallyClean", w->allVars());
420 if (dobs == 0)
throw std::invalid_argument(
"Frequentist Asimov datasets can't be generated without a real dataset to fit");
421 RooArgSet gobsAsimov;
423 if (mc->GetGlobalObservables()) {
424 RooArgSet gobs(*mc->GetGlobalObservables());
427 w->saveSnapshot(
"clean", w->allVars());
432 }
else if (isExtended) {
434 throw std::invalid_argument(
"Asimov datasets can only be generated binned");
436 dobs = genPdf->generateBinned(*observables,RooFit::Extended(),RooFit::Asimov());
439 dobs = genPdf->generate(*observables,1,RooFit::Asimov());
441 }
else if (dobs == 0) {
442 std::cerr <<
"No observed data '" << dataset <<
"' in the workspace. Cannot compute limit.\n" << std::endl;
448 std::cout <<
"Computing limit starting from " << (iToy == 0 ?
"observation" :
"expected outcome") << std::endl;
450 if (
mklimit(w,mc,mc_bonly,*dobs,limit,limitErr)) tree->Fill();
454 std::vector<double> limitHistory;
455 std::auto_ptr<RooAbsPdf> nuisancePdf;
458 unsigned int nLimits = 0;
459 w->loadSnapshot(
"clean");
460 RooDataSet *systDs = 0;
462 if (nuisances == 0)
throw std::logic_error(
"Running with systematics enabled, but nuisances not defined.");
465 if (mc->GetGlobalObservables() == 0)
throw std::logic_error(
"Cannot use toysFrequentist with no global observables");
466 w->saveSnapshot(
"reallyClean", w->allVars());
468 if (dobs == 0)
throw std::logic_error(
"Cannot use toysFrequentist with no input dataset");
470 genPdf->fitTo(*dobs, RooFit::Save(1), RooFit::Minimizer(
"Minuit2",
"minimize"), RooFit::Strategy(0), RooFit::Hesse(0), RooFit::Constrain(*(
expectSignal_ ?mc:mc_bonly)->GetNuisanceParameters()));
472 w->saveSnapshot(
"clean", w->allVars());
473 systDs = nuisancePdf->generate(*mc->GetGlobalObservables(), nToys);
475 systDs = nuisancePdf->generate(*nuisances, nToys);
478 std::auto_ptr<RooArgSet> vars(genPdf->getVariables());
480 for (iToy = 1; iToy <= nToys; ++iToy) {
482 RooAbsData *absdata_toy = 0;
484 w->loadSnapshot(
"clean");
487 *vars = *systDs->get(iToy-1);
492 std::cout <<
"Generate toy " << iToy <<
"/" << nToys << std::endl;
495 absdata_toy = newToyMC.
generate(weightVar_);
497 absdata_toy = genPdf->generate(*observables,RooFit::Extended());
499 std::auto_ptr<RooDataSet> unbinn(genPdf->generate(*observables,RooFit::Extended()));
500 absdata_toy =
new RooDataHist(
"toy",
"binned toy", *observables, *unbinn);
502 absdata_toy = genPdf->generateBinned(*observables,RooFit::Extended());
505 RooDataSet *data_toy = genPdf->generate(*observables,1);
506 absdata_toy = data_toy;
509 absdata_toy =
dynamic_cast<RooAbsData *
>(
readToysFromHere->Get(TString::Format(
"toys/toy_%d",iToy)));
510 if (absdata_toy == 0) {
517 w->loadSnapshot(
"clean");
519 if (
mklimit(w,mc,mc_bonly,*absdata_toy,limit,limitErr)) {
523 limitHistory.push_back(limit);
526 writeToysHere->WriteTObject(absdata_toy, TString::Format(
"toy_%d", iToy));
530 if (weightVar_)
delete weightVar_;
533 for (std::vector<double>::const_iterator itl = limitHistory.begin(); itl != limitHistory.end(); ++itl) {
534 rms +=
pow(*itl-expLimit, 2);
537 rms =
sqrt(rms/(nLimits-1)/nLimits);
538 cout <<
"mean expected limit: r < " << expLimit <<
" +/- " << rms <<
" @ " <<
cl*100 <<
"%CL (" <<nLimits <<
" toyMC)" << endl;
540 cout <<
"mean expected limit: r < " << expLimit <<
" @ " <<
cl*100 <<
"%CL (" <<nLimits <<
" toyMC)" << endl;
542 sort(limitHistory.begin(), limitHistory.end());
544 double medianLimit = (nLimits % 2 == 0 ? 0.5*(limitHistory[nLimits/2-1]+limitHistory[nLimits/2]) : limitHistory[nLimits/2]);
545 cout <<
"median expected limit: r < " << medianLimit <<
" @ " <<
cl*100 <<
"%CL (" <<nLimits <<
" toyMC)" << endl;
546 double hi68 = limitHistory[min<int>(nLimits-1, ceil(0.84 * nLimits))];
547 double lo68 = limitHistory[min<int>(nLimits-1, floor(0.16 * nLimits))];
548 double hi95 = limitHistory[min<int>(nLimits-1, ceil(0.975 * nLimits))];
549 double lo95 = limitHistory[min<int>(nLimits-1, floor(0.025 * nLimits))];
550 cout <<
" 68% expected band : " << lo68 <<
" < r < " << hi68 << endl;
551 cout <<
" 95% expected band : " << lo95 <<
" < r < " << hi95 << endl;
565 tree_->Branch(name,address,leaflist);
static void addBranch(const char *name, void *address, const char *leaflist)
Add a branch to the output tree (for advanced use or debugging only)
TDirectory * readToysFromHere
bool setAllConstant(const RooAbsCollection &coll, bool constant=true)
set all RooRealVars to constants. return true if at least one changed status
static PFTauRenderPlugin instance
void guessChannelMode(RooSimultaneous &simPdf, RooAbsData &simData, bool verbose=false)
RooAbsData * generateAsimov(RooRealVar *&weightVar)
boost::program_options::options_description statOptions_
boost::program_options::options_description miscOptions_
virtual void setNToys(const int)
RooAbsPdf * makeNuisancePdf(RooStats::ModelConfig &model, const char *name="nuisancePdf")
RooAbsData * asimovDatasetWithFit(RooStats::ModelConfig *mc, RooAbsData &realdata, RooAbsCollection &snapshot, double poiValue=0.0, int verbose=0)
Generate asimov dataset from best fit value of nuisance parameters, and fill in snapshot of correspon...
TDirectory * writeToysHere
static void commitPoint(bool expected, float quantile)
Save a point into the output tree. Usually if expected = false, quantile should be set to -1 (except ...
RooSimultaneous * rebuildSimPdf(const RooArgSet &observables, RooSimultaneous *pdf)
void printPdf(RooAbsPdf *pdf)
boost::program_options::options_description ioOptions_
Float_t g_quantileExpected_
RooAbsData * generate(RooRealVar *&weightVar, const RooDataSet *protoData=NULL, int forceEvents=0)
void run(TString hlfFile, const std::string &dataset, double &limit, double &limitErr, int &iToy, TTree *tree, int nToys)
bool generateBinnedWorkaround_
std::string workspaceName_
bool checkModel(const RooStats::ModelConfig &model, bool throwOnFail=false)
std::string modelConfigNameB_
bool mklimit(RooWorkspace *w, RooStats::ModelConfig *mc_s, RooStats::ModelConfig *mc_b, RooAbsData &data, double &limit, double &limitErr)
std::string modelConfigName_
void printRAD(const RooAbsData *d)
char data[epos_bytes_allocation]
virtual void setToyNumber(const int)
Power< A, B >::type pow(const A &a, const B &b)
virtual bool run(RooWorkspace *w, RooStats::ModelConfig *mc_s, RooStats::ModelConfig *mc_b, RooAbsData &data, double &limit, double &limitErr, const double *hint)=0
void applyOptions(const boost::program_options::variables_map &vm)