Reference Manual

Go to the documentation of this file.
00001 #include "../interface/MarkovChainMC.h"
00002 #include <stdexcept> 
00003 #include <cmath> 
00004 #include "TKey.h"
00005 #include "RooRealVar.h"
00006 #include "RooArgSet.h"
00007 #include "RooUniform.h"
00008 #include "RooWorkspace.h"
00009 #include "RooFitResult.h"
00010 #include "RooRandom.h"
00011 #ifndef ROOT_THnSparse
00012 class THnSparse;
00013 #define ROOT_THnSparse
00014 #endif
00015 #include "RooStats/MCMCCalculator.h"
00016 #include "RooStats/MCMCInterval.h"
00017 #include "RooStats/ModelConfig.h"
00018 #include "RooStats/ProposalHelper.h"
00019 #include "RooStats/ProposalFunction.h"
00020 #include "RooStats/RooStatsUtils.h"
00021 #include "../interface/Combine.h"
00022 #include "../interface/TestProposal.h"
00023 #include "../interface/DebugProposal.h"
00024 #include "../interface/CloseCoutSentry.h"
00025 #include "../interface/RooFitGlobalKillSentry.h"
00026 #include "../interface/JacknifeQuantile.h"
00027 
00028 #include "../interface/ProfilingTools.h"
00029 
00030 using namespace RooStats;
00031 
00032 std::string MarkovChainMC::proposalTypeName_ = "ortho";
00033 MarkovChainMC::ProposalType MarkovChainMC::proposalType_ = TestP;
00034 bool MarkovChainMC::runMinos_ = false;
00035 bool MarkovChainMC::noReset_ = false;
00036 bool MarkovChainMC::updateProposalParams_ = false;
00037 bool MarkovChainMC::updateHint_ = false;
00038 unsigned int MarkovChainMC::iterations_ = 10000;
00039 unsigned int MarkovChainMC::burnInSteps_ = 200;
00040 float MarkovChainMC::burnInFraction_ = 0.25;
00041 bool  MarkovChainMC::adaptiveBurnIn_ = false;
00042 unsigned int MarkovChainMC::tries_ = 10;
00043 float MarkovChainMC::truncatedMeanFraction_ = 0.0;
00044 bool MarkovChainMC::adaptiveTruncation_ = true;
00045 float MarkovChainMC::hintSafetyFactor_ = 5.;
00046 bool MarkovChainMC::saveChain_ = false;
00047 bool MarkovChainMC::noSlimChain_ = false;
00048 bool MarkovChainMC::mergeChains_ = false;
00049 bool MarkovChainMC::readChains_ = false;
00050 float MarkovChainMC::proposalHelperWidthRangeDivisor_ = 5.;
00051 float MarkovChainMC::proposalHelperUniformFraction_ = 0.0;
00052 bool  MarkovChainMC::alwaysStepPoi_ = true;
00053 float MarkovChainMC::cropNSigmas_ = 0;
00054 int   MarkovChainMC::debugProposal_ = false;
00055 
00056 MarkovChainMC::MarkovChainMC() : 
00057     LimitAlgo("Markov Chain MC specific options") 
00058 {
00059     options_.add_options()
00060         ("iteration,i", boost::program_options::value<unsigned int>(&iterations_)->default_value(iterations_), "Number of iterations")
00061         ("tries", boost::program_options::value<unsigned int>(&tries_)->default_value(tries_), "Number of times to run the MCMC on the same data")
00062         ("burnInSteps,b", boost::program_options::value<unsigned int>(&burnInSteps_)->default_value(burnInSteps_), "Burn in steps (absolute number)")
00063         ("burnInFraction", boost::program_options::value<float>(&burnInFraction_)->default_value(burnInFraction_), "Burn in steps (fraction of total accepted steps)")
00064         ("adaptiveBurnIn", boost::program_options::value<bool>(&adaptiveBurnIn_)->default_value(adaptiveBurnIn_), "Adaptively determine burn in steps (experimental!).")
00065         ("proposal", boost::program_options::value<std::string>(&proposalTypeName_)->default_value(proposalTypeName_), 
00066                               "Proposal function to use: 'fit', 'uniform', 'gaus', 'ortho' (also known as 'test')")
00067         ("runMinos",          "Run MINOS when fitting the data")
00068         ("noReset",           "Don't reset variable state after fit")
00069         ("updateHint",        "Update hint with the results")
00070         ("updateProposalParams", 
00071                 boost::program_options::value<bool>(&updateProposalParams_)->default_value(updateProposalParams_), 
00072                 "Control ProposalHelper::SetUpdateProposalParameters")
00073         ("propHelperWidthRangeDivisor", 
00074                 boost::program_options::value<float>(&proposalHelperWidthRangeDivisor_)->default_value(proposalHelperWidthRangeDivisor_), 
00075                 "Sets the fractional size of the gaussians in the proposal")
00076         ("alwaysStepPOI", boost::program_options::value<bool>(&alwaysStepPoi_)->default_value(alwaysStepPoi_),
00077                             "When using 'ortho' proposal, always step also the parameter of interest. On by default, as it improves convergence, but you can turn it off (e.g. if you turn off --optimizeSimPdf)")
00078         ("propHelperUniformFraction", 
00079                 boost::program_options::value<float>(&proposalHelperUniformFraction_)->default_value(proposalHelperUniformFraction_), 
00080                 "Add a fraction of uniform proposals to the algorithm")
00081         ("debugProposal", boost::program_options::value<int>(&debugProposal_)->default_value(debugProposal_), "Printout the first N proposals")
00082         ("cropNSigmas", 
00083                 boost::program_options::value<float>(&cropNSigmas_)->default_value(cropNSigmas_),
00084                 "crop range of all parameters to N times their uncertainty") 
00085         ("truncatedMeanFraction", 
00086                 boost::program_options::value<float>(&truncatedMeanFraction_)->default_value(truncatedMeanFraction_), 
00087                 "Discard this fraction of the results before computing the mean and rms")
00088         ("adaptiveTruncation", boost::program_options::value<bool>(&adaptiveTruncation_)->default_value(adaptiveTruncation_),
00089                             "When averaging multiple runs, ignore results that are more far away from the median than the inter-quartile range")
00090         ("hintSafetyFactor",
00091                 boost::program_options::value<float>(&hintSafetyFactor_)->default_value(hintSafetyFactor_),
00092                 "set range of integration equal to this number of times the hinted limit")
00093         ("saveChain", "Save MarkovChain to output file")
00094         ("noSlimChain", "Include also nuisance parameters in the chain that is saved to file")
00095         ("mergeChains", "Merge MarkovChains instead of averaging limits")
00096         ("readChains", "Just read MarkovChains from toysFile instead of running MCMC directly")
00097     ;
00098 }
00099 
00100 void MarkovChainMC::applyOptions(const boost::program_options::variables_map &vm) {
00101     if      (proposalTypeName_ == "fit")     proposalType_ = FitP;
00102     else if (proposalTypeName_ == "uniform") proposalType_ = UniformP;
00103     else if (proposalTypeName_ == "gaus")    proposalType_ = MultiGaussianP;
00104     else if (proposalTypeName_ == "ortho")   proposalType_ = TestP;
00105     else if (proposalTypeName_ == "test")    proposalType_ = TestP;
00106     else {
00107         std::cerr << "MarkovChainMC: proposal type " << proposalTypeName_ << " not known." << "\n" << options_ << std::endl;
00108         throw std::invalid_argument("MarkovChainMC: unsupported proposal");
00109     }
00110         
00111     runMinos_ = vm.count("runMinos");
00112     noReset_  = vm.count("noReset");
00113     updateHint_  = vm.count("updateHint");
00114 
00115     mass_ = vm["mass"].as<float>();
00116     saveChain_   = vm.count("saveChain");
00117     noSlimChain_   = vm.count("noSlimChain");
00118     mergeChains_ = vm.count("mergeChains");
00119     readChains_  = vm.count("readChains");
00120 
00121     if (mergeChains_ && !saveChain_ && !readChains_) chains_.SetOwner(true);
00122 }
00123 
00124 bool MarkovChainMC::run(RooWorkspace *w, RooStats::ModelConfig *mc_s, RooStats::ModelConfig *mc_b, RooAbsData &data, double &limit, double &limitErr, const double *hint) {
00125   if (proposalType_ == MultiGaussianP && !withSystematics) { 
00126       std::cerr << "Sorry, the multi-gaussian proposal does not work without systematics.\n" << 
00127                    "Uniform proposal will be used instead.\n" << std::endl;
00128       proposalType_ = UniformP;
00129   }
00130 
00131   RooFitGlobalKillSentry silence(verbose > 0 ? RooFit::INFO : RooFit::WARNING);
00132 
00133   CloseCoutSentry coutSentry(verbose <= 0); // close standard output and error, so that we don't flood them with minuit messages
00134 
00135   // Get degrees of freedom
00136   modelNDF_ = mc_s->GetParametersOfInterest()->getSize(); 
00137   if (withSystematics) modelNDF_ += mc_s->GetNuisanceParameters()->getSize();
00138 
00139   double suma = 0; int num = 0;
00140   double savhint = (hint ? *hint : -1); const double *thehint = hint;
00141   std::vector<double> limits;
00142   if (readChains_)  {
00143       readChains(*mc_s->GetParametersOfInterest(), limits);
00144   } else {
00145       for (unsigned int i = 0; i < tries_; ++i) {
00146           if (int nacc = runOnce(w,mc_s,mc_b,data,limit,limitErr,thehint)) {
00147               suma += nacc;
00148               if (verbose > 1) std::cout << "Limit from this run: " << limit << std::endl;
00149               limits.push_back(limit);
00150               if (updateHint_ && tries_ > 1 && limit > savhint) { 
00151                 if (verbose > 0) std::cout << "Updating hint from " << savhint << " to " << limit << std::endl;
00152                 savhint = limit; thehint = &savhint; 
00153               }
00154           }
00155       }
00156   } 
00157   num = limits.size();
00158   if (num == 0) return false;
00159   // average acceptance
00160   suma  = suma / (num * double(iterations_));
00161   limitAndError(limit, limitErr, limits);
00162   if (mergeChains_) {
00163     std::cout << "Limit from averaging:    " << limit << " +/- " << limitErr << std::endl;
00164     // copy constructors don't work, so we just have to leak memory :-(
00165     RooStats::MarkovChain *merged = mergeChains(*mc_s->GetParametersOfInterest(), limits);
00166     // set burn-in to zero, since steps have already been discarded when merging
00167     limitFromChain(limit, limitErr, *mc_s->GetParametersOfInterest(), *merged, 0);
00168     std::cout << "Limit from merged chain: " << limit << " +/- " << limitErr << std::endl;
00169   }
00170   coutSentry.clear();
00171 
00172   if (verbose >= 0) {
00173       std::cout << "\n -- MarkovChainMC -- " << "\n";
00174       RooRealVar *r = dynamic_cast<RooRealVar *>(mc_s->GetParametersOfInterest()->first());
00175       if (num > 1) {
00176           std::cout << "Limit: " << r->GetName() <<" < " << limit << " +/- " << limitErr << " @ " << cl * 100 << "% CL (" << num << " tries)" << std::endl;
00177           if (verbose > 0 && !readChains_) std::cout << "Average chain acceptance: " << suma << std::endl;
00178       } else {
00179           std::cout << "Limit: " << r->GetName() <<" < " << limit << " @ " << cl * 100 << "% CL" << std::endl;
00180       }
00181   }
00182   return true;
00183 }
00184 int MarkovChainMC::runOnce(RooWorkspace *w, RooStats::ModelConfig *mc_s, RooStats::ModelConfig *mc_b, RooAbsData &data, double &limit, double &limitErr, const double *hint) const {
00185   RooArgList poi(*mc_s->GetParametersOfInterest());
00186   RooRealVar *r = dynamic_cast<RooRealVar *>(poi.first());
00187 
00188   if ((hint != 0) && (*hint > r->getMin())) {
00189     r->setMax(hintSafetyFactor_*(*hint));
00190   }
00191 
00192   if (withSystematics && (mc_s->GetNuisanceParameters() == 0)) {
00193     throw std::logic_error("MarkovChainMC: running with systematics enabled, but nuisances not defined.");
00194   }
00195   
00196   w->loadSnapshot("clean");
00197   std::auto_ptr<RooFitResult> fit(0);
00198   if (proposalType_ == FitP || (cropNSigmas_ > 0)) {
00199       CloseCoutSentry coutSentry(verbose <= 1); // close standard output and error, so that we don't flood them with minuit messages
00200       fit.reset(mc_s->GetPdf()->fitTo(data, RooFit::Save(), RooFit::Minos(runMinos_)));
00201       coutSentry.clear();
00202       if (fit.get() == 0) { std::cerr << "Fit failed." << std::endl; return false; }
00203       if (verbose > 1) fit->Print("V");
00204       if (!noReset_) w->loadSnapshot("clean");
00205   }
00206 
00207   if (cropNSigmas_ > 0) {
00208       const RooArgList &fpf = fit->floatParsFinal();
00209       for (int i = 0, n = fpf.getSize(); i < n; ++i) {
00210           RooRealVar *fv = dynamic_cast<RooRealVar *>(fpf.at(i));
00211           if (std::string(r->GetName()) == fv->GetName()) continue;
00212           RooRealVar *v  = w->var(fv->GetName());
00213           double min = v->getMin(), max = v->getMax();
00214           if (fv->hasAsymError(false)) {
00215               min = (std::max(v->getMin(), fv->getVal() + cropNSigmas_ * fv->getAsymErrorLo()));
00216               max = (std::min(v->getMax(), fv->getVal() + cropNSigmas_ * fv->getAsymErrorHi()));
00217           } else if (fv->hasError(false)) {
00218               min = (std::max(v->getMin(), fv->getVal() - cropNSigmas_ * fv->getError()));
00219               max = (std::min(v->getMax(), fv->getVal() + cropNSigmas_ * fv->getError()));
00220           }
00221           if (verbose > 1) {
00222               std::cout << "  " << fv->GetName() << "[" << v->getMin() << ", " << v->getMax() << "] -> [" << min << ", " << max << "]" << std::endl;
00223           }
00224           v->setMin(min); v->setMax(max);
00225       }
00226   }
00227 
00228   std::auto_ptr<ProposalFunction> ownedPdfProp; 
00229   ProposalFunction* pdfProp = 0;
00230   ProposalHelper ph;
00231   switch (proposalType_) {
00232     case UniformP:  
00233         if (verbose) std::cout << "Using uniform proposal" << std::endl;
00234         ownedPdfProp.reset(new UniformProposal());
00235         pdfProp = ownedPdfProp.get();
00236         break;
00237     case FitP:
00238         if (verbose) std::cout << "Using fit proposal" << std::endl;
00239         ph.SetVariables(fit->floatParsFinal());
00240         ph.SetCovMatrix(fit->covarianceMatrix());
00241         pdfProp = ph.GetProposalFunction();
00242         break;
00243     case MultiGaussianP:
00244         if (verbose) std::cout << "Using multi-gaussian proposal" << std::endl;
00245         ph.SetVariables(*mc_s->GetNuisanceParameters());
00246         ph.SetWidthRangeDivisor(proposalHelperWidthRangeDivisor_);
00247         pdfProp = ph.GetProposalFunction();
00248         break;
00249     case TestP:
00250         ownedPdfProp.reset(new TestProposal(proposalHelperWidthRangeDivisor_, alwaysStepPoi_ ? poi : RooArgList()));
00251         pdfProp = ownedPdfProp.get();
00252         break;
00253   }
00254   if (proposalType_ != UniformP) {
00255       ph.SetUpdateProposalParameters(updateProposalParams_);
00256       if (proposalHelperUniformFraction_ > 0) ph.SetUniformFraction(proposalHelperUniformFraction_);
00257   }
00258 
00259   std::auto_ptr<DebugProposal> pdfDebugProp(debugProposal_ > 0 ? new DebugProposal(pdfProp, mc_s->GetPdf(), &data, debugProposal_) : 0);
00260   
00261   MCMCCalculator mc(data, *mc_s);
00262   mc.SetNumIters(iterations_); 
00263   mc.SetConfidenceLevel(cl);
00264   mc.SetNumBurnInSteps(burnInSteps_); 
00265   mc.SetProposalFunction(debugProposal_ > 0 ? *pdfDebugProp : *pdfProp);
00266   mc.SetLeftSideTailFraction(0);
00267 
00268   if (typeid(*mc_s->GetPriorPdf()) == typeid(RooUniform)) {
00269     mc.SetPriorPdf(*((RooAbsPdf *)0));
00270   }
00271 
00272   std::auto_ptr<MCMCInterval> mcInt;
00273   try {  
00274       mcInt.reset((MCMCInterval*)mc.GetInterval()); 
00275   } catch (std::length_error &ex) {
00276       mcInt.reset(0);
00277   }
00278   if (mcInt.get() == 0) return false;
00279   if (adaptiveBurnIn_) {
00280     mcInt->SetNumBurnInSteps(guessBurnInSteps(*mcInt->GetChain()));
00281   } else if (mcInt->GetChain()->Size() * burnInFraction_ > burnInSteps_) {
00282     mcInt->SetNumBurnInSteps(mcInt->GetChain()->Size() * burnInFraction_);
00283   }
00284   limit = mcInt->UpperLimit(*r);
00285 
00286   if (saveChain_ || mergeChains_) {
00287       // Copy-constructors don't work properly, so we just have to leak memory.
00288       //RooStats::MarkovChain *chain = new RooStats::MarkovChain(*mcInt->GetChain());
00289       RooStats::MarkovChain *chain = slimChain(*mc_s->GetParametersOfInterest(), *mcInt->GetChain());
00290       if (mergeChains_) chains_.Add(chain);
00291       if (saveChain_)  writeToysHere->WriteTObject(chain,  TString::Format("MarkovChain_mh%g_%u",mass_, RooRandom::integer(std::numeric_limits<UInt_t>::max() - 1)));
00292       return chain->Size();
00293   } else {
00294       return mcInt->GetChain()->Size();
00295   }
00296 }
00297 
00298 void MarkovChainMC::limitAndError(double &limit, double &limitErr, const std::vector<double> &limitsIn) const {
00299   std::vector<double> limits(limitsIn);
00300   int num = limits.size();
00301   // possibly remove outliers before computing mean
00302   if (adaptiveTruncation_ && num >= 10) {
00303       std::sort(limits.begin(), limits.end());
00304       // determine location and size of the sample
00305       double median = (num % 2 ? limits[num/2] : 0.5*(limits[num/2] + limits[num/2+1]));
00306       double iqr = limits[3*num/4] - limits[num/4];
00307       // determine range of plausible values
00308       double min = median - iqr, max = median + iqr; 
00309       int start = 0, end = num-1; 
00310       while (start < end   && limits[start] < min) ++start;
00311       while (end   > start && limits[end]   > max) --end;
00312       num = end-start+1;
00313       // compute mean and rms of accepted part
00314       limit = 0; limitErr = 0;
00315       for (int k = start; k <= end; ++k) limit += limits[k];
00316       limit /= num;
00317       for (int k = start; k <= end; k++) limitErr += (limits[k]-limit)*(limits[k]-limit);
00318       limitErr = (num > 1 ? sqrt(limitErr/(num*(num-1))) : 0);
00319       std::cout << "Result from truncated mean: " << limit << " +/- " << limitErr << std::endl;
00320 #if 0
00321       QuantileCalculator qc(limits);
00322       std::pair<double,double> qn = qc.quantileAndError(0.5, QuantileCalculator::Simple);
00323       std::pair<double,double> qs = qc.quantileAndError(0.5, QuantileCalculator::Sectioning);
00324       std::pair<double,double> qj = qc.quantileAndError(0.5, QuantileCalculator::Jacknife);
00325       std::cout << "Median of limits (simple):     " << qn.first << " +/- " << qn.second << std::endl;
00326       std::cout << "Median of limits (sectioning): " << qs.first << " +/- " << qs.second << std::endl;
00327       std::cout << "Median of limits (jacknife):   " << qj.first << " +/- " << qj.second << std::endl;
00328 #endif
00329   } else {
00330       int noutl = floor(truncatedMeanFraction_ * num);
00331       if (noutl >= 1) { 
00332           std::sort(limits.begin(), limits.end());
00333           double median = (num % 2 ? limits[num/2] : 0.5*(limits[num/2] + limits[num/2+1]));
00334           for (int k = 0; k < noutl; ++k) {
00335               // make sure outliers are all at the end
00336               if (std::abs(limits[0]-median) > std::abs(limits[num-k-1]-median)) {
00337                   std::swap(limits[0], limits[num-k-1]);
00338               }
00339           }
00340           num -= noutl;
00341       }
00342       // compute mean and rms
00343       limit = 0; limitErr = 0;
00344       for (int k = 0; k < num; k++) limit += limits[k];
00345       limit /= num;
00346       for (int k = 0; k < num; k++) limitErr += (limits[k]-limit)*(limits[k]-limit);
00347       limitErr = (num > 1 ? sqrt(limitErr/(num*(num-1))) : 0);
00348   }
00349 }
00350 
00351 RooStats::MarkovChain *MarkovChainMC::mergeChains(const RooArgSet &poi, const std::vector<double> &limits) const
00352 {
00353     std::vector<double> limitsSorted(limits); std::sort(limitsSorted.begin(), limitsSorted.end());
00354     double lmin = limitsSorted.front(), lmax = limitsSorted.back();
00355     if (limitsSorted.size() > 5) {
00356         int n = limitsSorted.size();
00357         double lmedian = limitsSorted[n/2];
00358         lmin = lmedian - 2*(+lmedian - limitsSorted[1*n/4]);
00359         lmax = lmedian + 2*(-lmedian + limitsSorted[3*n/4]);
00360     }
00361     if (chains_.GetSize() == 0) throw std::runtime_error("No chains to merge");
00362     if (verbose > 1) std::cout << "Will merge " << chains_.GetSize() << " chains." << std::endl;
00363     RooArgSet pars(poi);
00364     RooStats::MarkovChain *merged = new RooStats::MarkovChain("Merged","",pars);
00365     TIter iter(&chains_);
00366     int index = 0;
00367     for (RooStats::MarkovChain *other = (RooStats::MarkovChain *) iter.Next();
00368          other != 0;
00369          other = (RooStats::MarkovChain *) iter.Next(), ++index) {
00370         if (limits[index] < lmin || limits[index] > lmax) continue;
00371         int burninSteps = adaptiveBurnIn_ ? guessBurnInSteps(*other) : max<int>(burnInSteps_, other->Size() * burnInFraction_);
00372         if (verbose > 1) std::cout << "Adding chain of " << other->Size() << " entries, skipping the first " <<  burninSteps << "; individual limit " << limits[index] << std::endl;
00373         for (int i = burninSteps, n = other->Size(); i < n; ++i) {
00374             RooArgSet point(*other->Get(i));
00375             double nllval = other->NLL();
00376             double weight = other->Weight();
00377             merged->Add(point,nllval,weight);
00378             if (verbose > 2 && (i % 500 == 0)) std::cout << "   added " << i << "/" << other->Size() << " entries." << std::endl;
00379         }
00380     }
00381     return merged;
00382 }
00383 
00384 void MarkovChainMC::readChains(const RooArgSet &poi, std::vector<double> &limits)
00385 {
00386     double mylim, myerr;
00387     chains_.Clear();
00388     chains_.SetOwner(false);
00389     if (!readToysFromHere) throw std::logic_error("Cannot use readChains: option toysFile not specified, or input file empty");
00390     TDirectory *toyDir = readToysFromHere->GetDirectory("toys");
00391     if (!toyDir) throw std::logic_error("Cannot use readChains: empty toy dir in input file empty");
00392     TString prefix = TString::Format("MarkovChain_mh%g_",mass_);
00393     TIter next(toyDir->GetListOfKeys()); TKey *k;
00394     while ((k = (TKey *) next()) != 0) {
00395         if (TString(k->GetName()).Index(prefix) != 0) continue;
00396         RooStats::MarkovChain *toy = dynamic_cast<RooStats::MarkovChain *>(toyDir->Get(k->GetName()));
00397         if (toy == 0) continue;
00398         limitFromChain(mylim, myerr, poi, *toy);
00399         if (verbose > 1) std::cout << " limit " << mylim << " +/- " << myerr << std::endl;
00400         // vvvvv ---- begin convergence test, still being developed, not recommended yet.
00401         if (runtimedef::get("MCMC_STATIONARITY")) {
00402             if (!stationarityTest(*toy, poi, 30)) {
00403                 if (verbose > 1) std::cout << " ---> rejecting chain!" << std::endl; 
00404                 continue;
00405             }
00406         }
00407         // ^^^^^ ---- end of convergence test
00408         chains_.Add(toy);
00409         limits.push_back(mylim);
00410     }
00411     if (verbose) { std::cout << "Read " << chains_.GetSize() << " Markov Chains from input file." << std::endl; }
00412 }
00413 
00414 void 
00415 MarkovChainMC::limitFromChain(double &limit, double &limitErr, const RooArgSet &poi, RooStats::MarkovChain &chain, int burnInSteps) 
00416 {
00417     if (burnInSteps < 0) {
00418         if (adaptiveBurnIn_) burnInSteps = guessBurnInSteps(chain);
00419         else burnInSteps = max<int>(burnInSteps_, chain.Size() * burnInFraction_);
00420     }
00421 #if 1   // This is much faster and gives the same result
00422     QuantileCalculator qc(*chain.GetAsConstDataSet(), poi.first()->GetName(), burnInSteps);
00423     limit = qc.quantileAndError(cl, QuantileCalculator::Simple).first;
00424 #else
00425     MCMCInterval interval("",poi,chain);
00426     RooArgList axes(poi);
00427     interval.SetConfidenceLevel(cl);
00428     interval.SetIntervalType(MCMCInterval::kTailFraction);
00429     interval.SetLeftSideTailFraction(0);
00430     interval.SetNumBurnInSteps(burnInSteps);
00431     interval.SetAxes(axes);
00432     limit = interval.UpperLimit((RooRealVar&)*poi.first());
00433     if (mergeChains_) {
00434         // must avoid that MCMCInterval deletes the chain
00435         interval.SetChain(*(RooStats::MarkovChain *)0);
00436     }
00437 #endif
00438 }
00439 
00440 RooStats::MarkovChain *
00441 MarkovChainMC::slimChain(const RooArgSet &poi, const RooStats::MarkovChain &chain) const 
00442 {
00443     RooArgSet poilvalue(poi); // wtf they don't take a const & ??
00444     if (noSlimChain_) poilvalue.add(*chain.Get());
00445     RooStats::MarkovChain * ret = new RooStats::MarkovChain("","",poilvalue);
00446     for (int i = 0, n = chain.Size(); i < n; ++i) {
00447         RooArgSet entry(*chain.Get(i));
00448         Double_t nll = chain.NLL();
00449         Double_t weight = chain.Weight();
00450         if (i) ret->AddFast(entry, nll, weight);
00451         else   ret->Add(entry, nll, weight);
00452     }    
00453     return ret;
00454 }
00455 
00456 int 
00457 MarkovChainMC::guessBurnInSteps(const RooStats::MarkovChain &chain) const
00458 {
00459     int n = chain.Size();
00460     std::vector<double> nll(n);
00461     for (int i = 0; i < n; ++i) {
00462         nll[i] = chain.NLL(i);
00463     }
00464     // get minimum of nll
00465     double minnll = nll[0];
00466     for (int i = 0; i < n; ++i) { if (nll[i] < minnll) minnll = nll[i]; }
00467     // subtract it from all
00468     for (int i = 0; i < n; ++i) nll[i] -= minnll;
00469     // the NLL looks like 0.5 * a chi2 with nparam degrees of freedom, plus an arbitrary constant
00470     // so it should have a sigma of 0.5*sqrt(2*nparams)
00471     // anything sensible should be between minimum and minimum + 10*sigma
00472     double maxcut = 5*sqrt(2.0*modelNDF_); 
00473     // now loop backwards until you find something that's outside
00474     int start = 0;
00475     for (start = n-1; start >= 0; --start) {
00476        if (nll[start] > maxcut) break;
00477     }
00478     return start;
00479 }
00480 
00481 int 
00482 MarkovChainMC::stationarityTest(const RooStats::MarkovChain &chain, const RooArgSet &poi, int nchunks) const 
00483 {
00484     std::vector<int>    entries(nchunks, 0);
00485     std::vector<double> mean(nchunks, .0);
00486     const RooDataSet *data = chain.GetAsConstDataSet();
00487     const RooRealVar *r = dynamic_cast<const RooRealVar *>(data->get()->find(poi.first()->GetName()));
00488     int  n = data->numEntries(), chunksize = ceil(n/double(nchunks));
00489     for (int i = 0, chunk = 0; i < n; i++) {
00490         data->get(i);
00491         if (i > 0 && i % chunksize == 0) chunk++;
00492         entries[chunk]++;
00493         mean[chunk] += r->getVal();
00494     }
00495     for (int c = 0; c < nchunks; ++c) { mean[c] /= entries[c]; }
00496 
00497     std::vector<double> dists, dists25;
00498     for (int c = 0; c < nchunks; ++c) {
00499         for (int c2 = 0; c2 < nchunks; ++c2) {
00500             if (c2 != c) dists.push_back(fabs(mean[c]-mean[c2]));
00501         }
00502         std::sort(dists.begin(), dists.end());
00503         dists25.push_back(dists[ceil(0.25*nchunks)]/mean[c]);
00504         dists.clear();
00505         //printf("chunk %3d: mean  %9.5f  dist25 %9.5f abs, %9.5f real\n", c, mean[c], mean[c]*dists25.back(), dists25.back());
00506     }
00507     std::sort(dists25.begin(), dists25.end());
00508     double tolerance = 10*dists25[ceil(0.25*nchunks)];
00509     //printf("Q(25) is %9.5f\n", dists25[ceil(0.25*nchunks)]);
00510     //printf("Q(50) is %9.5f\n", dists25[ceil(0.50*nchunks)]);
00511     //printf("Tolerance set to %9.5f\n", tolerance);
00512     bool converged = true;
00513     std::vector<int> friends(nchunks, 0), foes(nchunks, 0);
00514     for (int c = 0; c < nchunks; ++c) {
00515         for (int c2 = c+1; c2 < nchunks; ++c2) {
00516             if (c2 == c) continue;
00517             if (fabs(mean[c] - mean[c2]) < tolerance*mean[c]) {
00518                 friends[c]++;
00519             } else {
00520                 foes[c]++;
00521             }
00522         }   
00523         //printf("chunk %3d: mean  %9.5f  friends %3d  foes %3d \n", c, mean[c], friends[c], foes[c]);
00524         if (friends[c] >= 2 && foes[c] > 1) {
00525             converged = false;
00526         }
00527         //fflush(stdout);
00528     }
00529     return converged;
00530 }
/data/refman/pasoursint/CMSSW_5_3_10_patch1/src/HiggsAnalysis/CombinedLimit/src/MarkovChainMC.cc