1 #include "../interface/MarkovChainMC.h"
5 #include "RooRealVar.h"
7 #include "RooUniform.h"
8 #include "RooWorkspace.h"
9 #include "RooFitResult.h"
10 #include "RooRandom.h"
11 #ifndef ROOT_THnSparse
13 #define ROOT_THnSparse
15 #include "RooStats/MCMCCalculator.h"
16 #include "RooStats/MCMCInterval.h"
17 #include "RooStats/ModelConfig.h"
18 #include "RooStats/ProposalHelper.h"
19 #include "RooStats/ProposalFunction.h"
20 #include "RooStats/RooStatsUtils.h"
21 #include "../interface/Combine.h"
22 #include "../interface/TestProposal.h"
23 #include "../interface/DebugProposal.h"
24 #include "../interface/CloseCoutSentry.h"
25 #include "../interface/RooFitGlobalKillSentry.h"
26 #include "../interface/JacknifeQuantile.h"
28 #include "../interface/ProfilingTools.h"
30 using namespace RooStats;
57 LimitAlgo(
"Markov Chain MC specific options")
60 (
"iteration,i", boost::program_options::value<unsigned int>(&
iterations_)->default_value(
iterations_),
"Number of iterations")
61 (
"tries", boost::program_options::value<unsigned int>(&
tries_)->default_value(
tries_),
"Number of times to run the MCMC on the same data")
62 (
"burnInSteps,b", boost::program_options::value<unsigned int>(&
burnInSteps_)->default_value(
burnInSteps_),
"Burn in steps (absolute number)")
63 (
"burnInFraction", boost::program_options::value<float>(&
burnInFraction_)->default_value(
burnInFraction_),
"Burn in steps (fraction of total accepted steps)")
64 (
"adaptiveBurnIn", boost::program_options::value<bool>(&
adaptiveBurnIn_)->default_value(
adaptiveBurnIn_),
"Adaptively determine burn in steps (experimental!).")
66 "Proposal function to use: 'fit', 'uniform', 'gaus', 'ortho' (also known as 'test')")
67 (
"runMinos",
"Run MINOS when fitting the data")
68 (
"noReset",
"Don't reset variable state after fit")
69 (
"updateHint",
"Update hint with the results")
70 (
"updateProposalParams",
72 "Control ProposalHelper::SetUpdateProposalParameters")
73 (
"propHelperWidthRangeDivisor",
75 "Sets the fractional size of the gaussians in the proposal")
77 "When using 'ortho' proposal, always step also the parameter of interest. On by default, as it improves convergence, but you can turn it off (e.g. if you turn off --optimizeSimPdf)")
78 (
"propHelperUniformFraction",
80 "Add a fraction of uniform proposals to the algorithm")
84 "crop range of all parameters to N times their uncertainty")
85 (
"truncatedMeanFraction",
87 "Discard this fraction of the results before computing the mean and rms")
89 "When averaging multiple runs, ignore results that are more far away from the median than the inter-quartile range")
92 "set range of integration equal to this number of times the hinted limit")
93 (
"saveChain",
"Save MarkovChain to output file")
94 (
"noSlimChain",
"Include also nuisance parameters in the chain that is saved to file")
95 (
"mergeChains",
"Merge MarkovChains instead of averaging limits")
96 (
"readChains",
"Just read MarkovChains from toysFile instead of running MCMC directly")
108 throw std::invalid_argument(
"MarkovChainMC: unsupported proposal");
115 mass_ = vm[
"mass"].as<
float>();
124 bool MarkovChainMC::run(RooWorkspace *
w, RooStats::ModelConfig *mc_s, RooStats::ModelConfig *mc_b, RooAbsData &
data,
double &
limit,
double &limitErr,
const double *hint) {
126 std::cerr <<
"Sorry, the multi-gaussian proposal does not work without systematics.\n" <<
127 "Uniform proposal will be used instead.\n" << std::endl;
136 modelNDF_ = mc_s->GetParametersOfInterest()->getSize();
139 double suma = 0;
int num = 0;
140 double savhint = (hint ? *hint : -1);
const double *thehint = hint;
141 std::vector<double> limits;
143 readChains(*mc_s->GetParametersOfInterest(), limits);
145 for (
unsigned int i = 0;
i <
tries_; ++
i) {
146 if (
int nacc =
runOnce(w,mc_s,mc_b,data,limit,limitErr,thehint)) {
148 if (
verbose > 1)
std::cout <<
"Limit from this run: " << limit << std::endl;
149 limits.push_back(limit);
150 if (
updateHint_ && tries_ > 1 && limit > savhint) {
151 if (
verbose > 0)
std::cout <<
"Updating hint from " << savhint <<
" to " << limit << std::endl;
152 savhint =
limit; thehint = &savhint;
158 if (num == 0)
return false;
163 std::cout <<
"Limit from averaging: " << limit <<
" +/- " << limitErr << std::endl;
165 RooStats::MarkovChain *merged =
mergeChains(*mc_s->GetParametersOfInterest(), limits);
167 limitFromChain(limit, limitErr, *mc_s->GetParametersOfInterest(), *merged, 0);
168 std::cout <<
"Limit from merged chain: " << limit <<
" +/- " << limitErr << std::endl;
173 std::cout <<
"\n -- MarkovChainMC -- " <<
"\n";
174 RooRealVar *
r =
dynamic_cast<RooRealVar *
>(mc_s->GetParametersOfInterest()->first());
176 std::cout <<
"Limit: " << r->GetName() <<
" < " << limit <<
" +/- " << limitErr <<
" @ " <<
cl * 100 <<
"% CL (" << num <<
" tries)" << std::endl;
179 std::cout <<
"Limit: " << r->GetName() <<
" < " << limit <<
" @ " <<
cl * 100 <<
"% CL" << std::endl;
184 int MarkovChainMC::runOnce(RooWorkspace *
w, RooStats::ModelConfig *mc_s, RooStats::ModelConfig *mc_b, RooAbsData &
data,
double &
limit,
double &limitErr,
const double *hint)
const {
185 RooArgList poi(*mc_s->GetParametersOfInterest());
186 RooRealVar *
r =
dynamic_cast<RooRealVar *
>(poi.first());
188 if ((hint != 0) && (*hint > r->getMin())) {
193 throw std::logic_error(
"MarkovChainMC: running with systematics enabled, but nuisances not defined.");
196 w->loadSnapshot(
"clean");
197 std::auto_ptr<RooFitResult> fit(0);
200 fit.reset(mc_s->GetPdf()->fitTo(data, RooFit::Save(), RooFit::Minos(
runMinos_)));
202 if (fit.get() == 0) {
std::cerr <<
"Fit failed." << std::endl;
return false; }
203 if (
verbose > 1) fit->Print(
"V");
204 if (!
noReset_) w->loadSnapshot(
"clean");
208 const RooArgList &fpf = fit->floatParsFinal();
209 for (
int i = 0,
n = fpf.getSize();
i <
n; ++
i) {
210 RooRealVar *fv =
dynamic_cast<RooRealVar *
>(fpf.at(
i));
211 if (std::string(r->GetName()) == fv->GetName())
continue;
212 RooRealVar *
v = w->var(fv->GetName());
213 double min = v->getMin(),
max = v->getMax();
214 if (fv->hasAsymError(
false)) {
217 }
else if (fv->hasError(
false)) {
222 std::cout <<
" " << fv->GetName() <<
"[" << v->getMin() <<
", " << v->getMax() <<
"] -> [" << min <<
", " <<
max <<
"]" << std::endl;
224 v->setMin(min); v->setMax(
max);
228 std::auto_ptr<ProposalFunction> ownedPdfProp;
229 ProposalFunction* pdfProp = 0;
234 ownedPdfProp.reset(
new UniformProposal());
235 pdfProp = ownedPdfProp.get();
239 ph.SetVariables(fit->floatParsFinal());
240 ph.SetCovMatrix(fit->covarianceMatrix());
241 pdfProp = ph.GetProposalFunction();
245 ph.SetVariables(*mc_s->GetNuisanceParameters());
247 pdfProp = ph.GetProposalFunction();
251 pdfProp = ownedPdfProp.get();
261 MCMCCalculator mc(data, *mc_s);
263 mc.SetConfidenceLevel(
cl);
265 mc.SetProposalFunction(
debugProposal_ > 0 ? *pdfDebugProp : *pdfProp);
266 mc.SetLeftSideTailFraction(0);
268 if (
typeid(*mc_s->GetPriorPdf()) ==
typeid(RooUniform)) {
269 mc.SetPriorPdf(*((RooAbsPdf *)0));
272 std::auto_ptr<MCMCInterval> mcInt;
274 mcInt.reset((MCMCInterval*)mc.GetInterval());
275 }
catch (std::length_error &ex) {
278 if (mcInt.get() == 0)
return false;
284 limit = mcInt->UpperLimit(*r);
289 RooStats::MarkovChain *chain =
slimChain(*mc_s->GetParametersOfInterest(), *mcInt->GetChain());
292 return chain->Size();
294 return mcInt->GetChain()->Size();
299 std::vector<double> limits(limitsIn);
300 int num = limits.size();
305 double median = (num % 2 ? limits[num/2] : 0.5*(limits[num/2] + limits[num/2+1]));
306 double iqr = limits[3*num/4] - limits[num/4];
308 double min = median - iqr,
max = median + iqr;
310 while (start <
end && limits[start] < min) ++
start;
311 while (
end > start && limits[
end] > max) --
end;
314 limit = 0; limitErr = 0;
315 for (
int k = start;
k <=
end; ++
k) limit += limits[
k];
317 for (
int k = start;
k <=
end;
k++) limitErr += (limits[
k]-limit)*(limits[
k]-
limit);
318 limitErr = (num > 1 ?
sqrt(limitErr/(num*(num-1))) : 0);
319 std::cout <<
"Result from truncated mean: " << limit <<
" +/- " << limitErr << std::endl;
325 std::cout <<
"Median of limits (simple): " << qn.first <<
" +/- " << qn.second << std::endl;
326 std::cout <<
"Median of limits (sectioning): " << qs.first <<
" +/- " << qs.second << std::endl;
327 std::cout <<
"Median of limits (jacknife): " << qj.first <<
" +/- " << qj.second << std::endl;
333 double median = (num % 2 ? limits[num/2] : 0.5*(limits[num/2] + limits[num/2+1]));
334 for (
int k = 0;
k < noutl; ++
k) {
343 limit = 0; limitErr = 0;
344 for (
int k = 0;
k <
num;
k++) limit += limits[
k];
346 for (
int k = 0;
k <
num;
k++) limitErr += (limits[
k]-limit)*(limits[
k]-
limit);
347 limitErr = (num > 1 ?
sqrt(limitErr/(num*(num-1))) : 0);
353 std::vector<double> limitsSorted(limits);
std::sort(limitsSorted.begin(), limitsSorted.end());
354 double lmin = limitsSorted.front(), lmax = limitsSorted.back();
355 if (limitsSorted.size() > 5) {
356 int n = limitsSorted.size();
357 double lmedian = limitsSorted[n/2];
358 lmin = lmedian - 2*(+lmedian - limitsSorted[1*n/4]);
359 lmax = lmedian + 2*(-lmedian + limitsSorted[3*n/4]);
361 if (
chains_.GetSize() == 0)
throw std::runtime_error(
"No chains to merge");
364 RooStats::MarkovChain *merged =
new RooStats::MarkovChain(
"Merged",
"",pars);
367 for (RooStats::MarkovChain *other = (RooStats::MarkovChain *) iter.Next();
369 other = (RooStats::MarkovChain *) iter.Next(), ++
index) {
370 if (limits[index] < lmin || limits[index] > lmax)
continue;
372 if (
verbose > 1)
std::cout <<
"Adding chain of " << other->Size() <<
" entries, skipping the first " << burninSteps <<
"; individual limit " << limits[
index] << std::endl;
373 for (
int i = burninSteps,
n = other->Size();
i <
n; ++
i) {
374 RooArgSet
point(*other->Get(
i));
375 double nllval = other->NLL();
376 double weight = other->Weight();
377 merged->Add(
point,nllval,weight);
378 if (
verbose > 2 && (
i % 500 == 0))
std::cout <<
" added " <<
i <<
"/" << other->Size() <<
" entries." << std::endl;
389 if (!
readToysFromHere)
throw std::logic_error(
"Cannot use readChains: option toysFile not specified, or input file empty");
391 if (!toyDir)
throw std::logic_error(
"Cannot use readChains: empty toy dir in input file empty");
392 TString
prefix = TString::Format(
"MarkovChain_mh%g_",
mass_);
393 TIter
next(toyDir->GetListOfKeys()); TKey *
k;
394 while ((k = (TKey *)
next()) != 0) {
395 if (TString(k->GetName()).Index(prefix) != 0)
continue;
396 RooStats::MarkovChain *toy =
dynamic_cast<RooStats::MarkovChain *
>(toyDir->Get(k->GetName()));
397 if (toy == 0)
continue;
399 if (
verbose > 1)
std::cout <<
" limit " << mylim <<
" +/- " << myerr << std::endl;
409 limits.push_back(mylim);
417 if (burnInSteps < 0) {
421 #if 1 // This is much faster and gives the same result
422 QuantileCalculator qc(*chain.GetAsConstDataSet(), poi.first()->GetName(), burnInSteps);
425 MCMCInterval
interval(
"",poi,chain);
426 RooArgList axes(poi);
427 interval.SetConfidenceLevel(
cl);
428 interval.SetIntervalType(MCMCInterval::kTailFraction);
429 interval.SetLeftSideTailFraction(0);
430 interval.SetNumBurnInSteps(burnInSteps);
431 interval.SetAxes(axes);
432 limit = interval.UpperLimit((RooRealVar&)*poi.first());
435 interval.SetChain(*(RooStats::MarkovChain *)0);
440 RooStats::MarkovChain *
443 RooArgSet poilvalue(poi);
445 RooStats::MarkovChain *
ret =
new RooStats::MarkovChain(
"",
"",poilvalue);
446 for (
int i = 0,
n = chain.Size();
i <
n; ++
i) {
447 RooArgSet
entry(*chain.Get(
i));
448 Double_t nll = chain.NLL();
449 Double_t
weight = chain.Weight();
450 if (
i) ret->AddFast(
entry, nll, weight);
451 else ret->Add(
entry, nll, weight);
459 int n = chain.Size();
460 std::vector<double> nll(n);
461 for (
int i = 0;
i <
n; ++
i) {
462 nll[
i] = chain.NLL(
i);
465 double minnll = nll[0];
466 for (
int i = 0;
i <
n; ++
i) {
if (nll[
i] < minnll) minnll = nll[
i]; }
468 for (
int i = 0;
i <
n; ++
i) nll[
i] -= minnll;
475 for (start = n-1; start >= 0; --
start) {
476 if (nll[start] > maxcut)
break;
484 std::vector<int>
entries(nchunks, 0);
485 std::vector<double>
mean(nchunks, .0);
486 const RooDataSet *
data = chain.GetAsConstDataSet();
487 const RooRealVar *
r =
dynamic_cast<const RooRealVar *
>(data->get()->find(poi.first()->GetName()));
488 int n = data->numEntries(), chunksize = ceil(n/
double(nchunks));
489 for (
int i = 0, chunk = 0;
i <
n;
i++) {
491 if (
i > 0 &&
i % chunksize == 0) chunk++;
493 mean[chunk] += r->getVal();
495 for (
int c = 0;
c < nchunks; ++
c) { mean[
c] /= entries[
c]; }
497 std::vector<double> dists, dists25;
498 for (
int c = 0;
c < nchunks; ++
c) {
499 for (
int c2 = 0; c2 < nchunks; ++c2) {
500 if (c2 !=
c) dists.push_back(fabs(mean[
c]-mean[c2]));
503 dists25.push_back(dists[ceil(0.25*nchunks)]/mean[
c]);
507 std::sort(dists25.begin(), dists25.end());
508 double tolerance = 10*dists25[ceil(0.25*nchunks)];
512 bool converged =
true;
513 std::vector<int> friends(nchunks, 0), foes(nchunks, 0);
514 for (
int c = 0;
c < nchunks; ++
c) {
515 for (
int c2 =
c+1; c2 < nchunks; ++c2) {
516 if (c2 ==
c)
continue;
517 if (fabs(mean[
c] - mean[c2]) < tolerance*mean[
c]) {
524 if (friends[
c] >= 2 && foes[
c] > 1) {
static bool updateProposalParams_
TDirectory * readToysFromHere
static float cropNSigmas_
static ProposalType proposalType_
void limitFromChain(double &limit, double &limitErr, const RooArgSet &poi, RooStats::MarkovChain &chain, int burnInSteps=-1)
static float proposalHelperWidthRangeDivisor_
static bool adaptiveTruncation_
do adaptive truncated mean
static float proposalHelperUniformFraction_
static float hintSafetyFactor_
Safety factor for hint (integrate up to this number of times the hinted limit)
int get(const char *name)
static bool readChains_
Read chains from file instead of running them.
static float truncatedMeanFraction_
Ignore up to this fraction of results if they're too far from the median.
static unsigned int iterations_
Propose this number of points for the chain.
static bool alwaysStepPoi_
TDirectory * writeToysHere
RooStats::MarkovChain * mergeChains(const RooArgSet &poi, const std::vector< double > &limits) const
float mass_
Mass of the Higgs boson (goes into the name of the saved chains)
const T & max(const T &a, const T &b)
static float burnInFraction_
Discard these fraction of points.
void swap(edm::DataFrameContainer &lhs, edm::DataFrameContainer &rhs)
static std::string proposalTypeName_
std::pair< std::string, MonitorElement * > entry
static bool adaptiveBurnIn_
Adaptive burn-in (experimental!)
static bool saveChain_
Save Markov Chain in output file.
static int debugProposal_
std::pair< double, double > quantileAndError(double quantile, Method method)
bool run(RooWorkspace *w, RooStats::ModelConfig *mc_s, RooStats::ModelConfig *mc_b, RooAbsData &data, double &limit, double &limitErr, const double *hint)
int stationarityTest(const RooStats::MarkovChain &chain, const RooArgSet &poi, int nchunks) const
note: this is still being developed
void limitAndError(double &limit, double &limitErr, const std::vector< double > &limits) const
int runOnce(RooWorkspace *w, RooStats::ModelConfig *mc_s, RooStats::ModelConfig *mc_b, RooAbsData &data, double &limit, double &limitErr, const double *hint) const
static bool noSlimChain_
Leave all parameters in the markov chain, not just the POI.
RooStats::MarkovChain * slimChain(const RooArgSet &poi, const RooStats::MarkovChain &chain) const
static unsigned int burnInSteps_
Discard these points.
virtual void applyOptions(const boost::program_options::variables_map &vm)
char data[epos_bytes_allocation]
int modelNDF_
Number of degrees of freedom of the problem, approximately.
static unsigned int tries_
compute the limit N times
int guessBurnInSteps(const RooStats::MarkovChain &chain) const
boost::program_options::options_description options_
static bool mergeChains_
Merge chains instead of averaging limits.
*vegas h *****************************************************used in the default bin number in original ***version of VEGAS is ***a higher bin number might help to derive a more precise ***grade subtle point
void readChains(const RooArgSet &poi, std::vector< double > &limits)