00001 #include <stdexcept>
00002 #include <cstdio>
00003 #include <unistd.h>
00004 #include <sys/types.h>
00005 #include <sys/wait.h>
00006 #include <errno.h>
00007
00008 #include "../interface/HybridNew.h"
00009 #include <TFile.h>
00010 #include <TF1.h>
00011 #include <TKey.h>
00012 #include <TLine.h>
00013 #include <TCanvas.h>
00014 #include <TGraphErrors.h>
00015 #include <TStopwatch.h>
00016 #include "RooRealVar.h"
00017 #include "RooArgSet.h"
00018 #include "RooAbsPdf.h"
00019 #include "RooFitResult.h"
00020 #include "RooRandom.h"
00021 #include "RooAddPdf.h"
00022 #include "RooConstVar.h"
00023 #include "RooMsgService.h"
00024 #include <RooStats/ModelConfig.h>
00025 #include <RooStats/FrequentistCalculator.h>
00026 #include <RooStats/HybridCalculator.h>
00027 #include <RooStats/SimpleLikelihoodRatioTestStat.h>
00028 #include <RooStats/RatioOfProfiledLikelihoodsTestStat.h>
00029 #include <RooStats/ProfileLikelihoodTestStat.h>
00030 #include <RooStats/ToyMCSampler.h>
00031 #include <RooStats/HypoTestPlot.h>
00032 #include "../interface/Combine.h"
00033 #include "../interface/CloseCoutSentry.h"
00034 #include "../interface/RooFitGlobalKillSentry.h"
00035 #include "../interface/SimplerLikelihoodRatioTestStat.h"
00036 #include "../interface/ProfiledLikelihoodRatioTestStat.h"
00037 #include "../interface/SimplerLikelihoodRatioTestStatExt.h"
00038 #include "../interface/ProfiledLikelihoodRatioTestStatExt.h"
00039 #include "../interface/BestFitSigmaTestStat.h"
00040 #include "../interface/ToyMCSamplerOpt.h"
00041 #include "../interface/utils.h"
00042 #include "../interface/ProfileLikelihood.h"
00043 #include "../interface/ProfilingTools.h"
00044
00045 using namespace RooStats;
00046
00047 HybridNew::WorkingMode HybridNew::workingMode_ = MakeLimit;
00048 unsigned int HybridNew::nToys_ = 500;
00049 double HybridNew::clsAccuracy_ = 0.005;
00050 double HybridNew::rAbsAccuracy_ = 0.1;
00051 double HybridNew::rRelAccuracy_ = 0.05;
00052 double HybridNew::interpAccuracy_ = 0.2;
00053 std::string HybridNew::rule_ = "CLs";
00054 std::string HybridNew::testStat_ = "LEP";
00055 bool HybridNew::genNuisances_ = true;
00056 bool HybridNew::genGlobalObs_ = false;
00057 bool HybridNew::fitNuisances_ = false;
00058 unsigned int HybridNew::iterations_ = 1;
00059 unsigned int HybridNew::nCpu_ = 0;
00060 unsigned int HybridNew::fork_ = 1;
00061 std::string HybridNew::rValue_ = "1.0";
00062 RooArgSet HybridNew::rValues_;
00063 bool HybridNew::CLs_ = false;
00064 bool HybridNew::saveHybridResult_ = false;
00065 bool HybridNew::readHybridResults_ = false;
00066 bool HybridNew::expectedFromGrid_ = false;
00067 bool HybridNew::clsQuantiles_ = true;
00068 float HybridNew::quantileForExpectedFromGrid_ = 0.5;
00069 bool HybridNew::fullBToys_ = false;
00070 bool HybridNew::fullGrid_ = false;
00071 bool HybridNew::saveGrid_ = false;
00072 bool HybridNew::noUpdateGrid_ = false;
00073 std::string HybridNew::gridFile_ = "";
00074 bool HybridNew::importanceSamplingNull_ = false;
00075 bool HybridNew::importanceSamplingAlt_ = false;
00076 std::string HybridNew::algo_ = "logSecant";
00077 bool HybridNew::optimizeProductPdf_ = true;
00078 bool HybridNew::optimizeTestStatistics_ = true;
00079 bool HybridNew::newToyMCSampler_ = true;
00080 bool HybridNew::rMinSet_ = false;
00081 bool HybridNew::rMaxSet_ = false;
00082 std::string HybridNew::plot_;
00083 std::string HybridNew::minimizerAlgo_ = "Minuit2";
00084 float HybridNew::minimizerTolerance_ = 1e-2;
00085 bool HybridNew::reportPVal_ = false;
00086
00087 #define EPS 1e-9
00088
00089 HybridNew::HybridNew() :
00090 LimitAlgo("HybridNew specific options") {
00091 options_.add_options()
00092 ("rule", boost::program_options::value<std::string>(&rule_)->default_value(rule_), "Rule to use: CLs, CLsplusb")
00093 ("testStat",boost::program_options::value<std::string>(&testStat_)->default_value(testStat_), "Test statistics: LEP, TEV, LHC (previously known as Atlas), Profile.")
00094 ("singlePoint", boost::program_options::value<std::string>(&rValue_)->default_value(rValue_), "Just compute CLs for the given value of the parameter of interest. In case of multiple parameters, use a syntax 'name=value,name2=value2,...'")
00095 ("onlyTestStat", "Just compute test statistics, not actual p-values (works only with --singlePoint)")
00096 ("generateNuisances", boost::program_options::value<bool>(&genNuisances_)->default_value(genNuisances_), "Generate nuisance parameters for each toy")
00097 ("generateExternalMeasurements", boost::program_options::value<bool>(&genGlobalObs_)->default_value(genGlobalObs_), "Generate external measurements for each toy, taken from the GlobalObservables of the ModelConfig")
00098 ("fitNuisances", boost::program_options::value<bool>(&fitNuisances_)->default_value(fitNuisances_), "Fit the nuisances, when not generating them.")
00099 ("searchAlgo", boost::program_options::value<std::string>(&algo_)->default_value(algo_), "Algorithm to use to search for the limit (bisection, logSecant)")
00100 ("toysH,T", boost::program_options::value<unsigned int>(&nToys_)->default_value(nToys_), "Number of Toy MC extractions to compute CLs+b, CLb and CLs")
00101 ("clsAcc", boost::program_options::value<double>(&clsAccuracy_ )->default_value(clsAccuracy_), "Absolute accuracy on CLs to reach to terminate the scan")
00102 ("rAbsAcc", boost::program_options::value<double>(&rAbsAccuracy_)->default_value(rAbsAccuracy_), "Absolute accuracy on r to reach to terminate the scan")
00103 ("rRelAcc", boost::program_options::value<double>(&rRelAccuracy_)->default_value(rRelAccuracy_), "Relative accuracy on r to reach to terminate the scan")
00104 ("interpAcc", boost::program_options::value<double>(&interpAccuracy_)->default_value(interpAccuracy_), "Minimum uncertainty from interpolation delta(x)/(max(x)-min(x))")
00105 ("iterations,i", boost::program_options::value<unsigned int>(&iterations_)->default_value(iterations_), "Number of times to throw 'toysH' toys to compute the p-values (for --singlePoint if clsAcc is set to zero disabling adaptive generation)")
00106 ("fork", boost::program_options::value<unsigned int>(&fork_)->default_value(fork_), "Fork to N processes before running the toys (set to 0 for debugging)")
00107 ("nCPU", boost::program_options::value<unsigned int>(&nCpu_)->default_value(nCpu_), "Use N CPUs with PROOF Lite (experimental!)")
00108 ("saveHybridResult", "Save result in the output file")
00109 ("readHybridResults", "Read and merge results from file (requires 'toysFile' or 'grid')")
00110 ("grid", boost::program_options::value<std::string>(&gridFile_), "Use the specified file containing a grid of SamplingDistributions for the limit (implies readHybridResults).\n For --singlePoint or --signif use --toysFile=x.root --readHybridResult instead of this.")
00111 ("expectedFromGrid", boost::program_options::value<float>(&quantileForExpectedFromGrid_)->default_value(0.5), "Use the grid to compute the expected limit for this quantile")
00112 ("signalForSignificance", boost::program_options::value<std::string>()->default_value("1"), "Use this value of the parameter of interest when generating signal toys for expected significance (same syntax as --singlePoint)")
00113 ("clsQuantiles", boost::program_options::value<bool>(&clsQuantiles_)->default_value(clsQuantiles_), "Compute correct quantiles of CLs or CLsplusb instead of assuming they're the same as CLb ones")
00114
00115
00116
00117
00118 ("optimizeTestStatistics", boost::program_options::value<bool>(&optimizeTestStatistics_)->default_value(optimizeTestStatistics_),
00119 "Use optimized test statistics if the likelihood is not extended (works for LEP and TEV test statistics).")
00120 ("optimizeProductPdf", boost::program_options::value<bool>(&optimizeProductPdf_)->default_value(optimizeProductPdf_),
00121 "Optimize the code factorizing pdfs")
00122 ("minimizerAlgo", boost::program_options::value<std::string>(&minimizerAlgo_)->default_value(minimizerAlgo_), "Choice of minimizer used for profiling (Minuit vs Minuit2)")
00123 ("minimizerTolerance", boost::program_options::value<float>(&minimizerTolerance_)->default_value(minimizerTolerance_), "Tolerance for minimizer used for profiling")
00124 ("plot", boost::program_options::value<std::string>(&plot_), "Save a plot of the result (test statistics distributions or limit scan)")
00125 ("frequentist", "Shortcut to switch to Frequentist mode (--generateNuisances=0 --generateExternalMeasurements=1 --fitNuisances=1)")
00126 ("newToyMCSampler", boost::program_options::value<bool>(&newToyMCSampler_)->default_value(newToyMCSampler_), "Use new ToyMC sampler with support for mixed binned-unbinned generation. On by default, you can turn it off if it doesn't work for your workspace.")
00127 ("fullGrid", "Evaluate p-values at all grid points, without optimitations")
00128 ("saveGrid", "Save CLs or (or FC p-value) at all grid points in the output tree. The value of 'r' is saved in the 'limit' branch, while the CLs or p-value in the 'quantileExpected' branch and the uncertainty on 'limitErr' (since there's no quantileExpectedErr)")
00129 ("noUpdateGrid", "Do not update test statistics at grid points")
00130 ("fullBToys", "Run as many B toys as S ones (default is to run 1/4 of b-only toys)")
00131 ("pvalue", "Report p-value instead of significance (when running with --significance)")
00132 ;
00133 }
00134
00135 void HybridNew::applyOptions(const boost::program_options::variables_map &vm) {
00136 rMinSet_ = vm.count("rMin")>0; rMaxSet_ = vm.count("rMax")>0;
00137 if (vm.count("expectedFromGrid") && !vm["expectedFromGrid"].defaulted()) {
00138
00139 if (quantileForExpectedFromGrid_ <= 0 || quantileForExpectedFromGrid_ >= 1.0) throw std::invalid_argument("HybridNew: the quantile for the expected limit must be between 0 and 1");
00140 expectedFromGrid_ = true;
00141 g_quantileExpected_ = quantileForExpectedFromGrid_;
00142 }
00143 if (vm.count("frequentist")) {
00144 genNuisances_ = 0; genGlobalObs_ = withSystematics; fitNuisances_ = withSystematics;
00145 if (vm["testStat"].defaulted()) testStat_ = "LHC";
00146 if (vm["toys"].as<int>() > 0 and vm.count("toysFrequentist")) {
00147 if (vm["fitNuisances"].defaulted() && withSystematics) {
00148 std::cout << "When tossing frequenst toys outside the HybridNew, the nuisances will not be refitted for each toy by default. This can be changed by specifying explicitly the fitNuisances option" << std::endl;
00149 fitNuisances_ = false;
00150 }
00151 }
00152 }
00153 if (genGlobalObs_ && genNuisances_) {
00154 std::cerr << "ALERT: generating both global observables and nuisance parameters at the same time is not validated." << std::endl;
00155 }
00156 if (!vm["singlePoint"].defaulted()) {
00157 if (doSignificance_) throw std::invalid_argument("HybridNew: Can't use --significance and --singlePoint at the same time");
00158 workingMode_ = ( vm.count("onlyTestStat") ? MakeTestStatistics : MakePValues );
00159 } else if (vm.count("onlyTestStat")) {
00160 if (doSignificance_) workingMode_ = MakeSignificanceTestStatistics;
00161 else throw std::invalid_argument("HybridNew: --onlyTestStat works only with --singlePoint or --significance");
00162 } else if (doSignificance_) {
00163 workingMode_ = MakeSignificance;
00164 rValue_ = vm["signalForSignificance"].as<std::string>();
00165 } else {
00166 workingMode_ = MakeLimit;
00167 }
00168 saveHybridResult_ = vm.count("saveHybridResult");
00169 readHybridResults_ = vm.count("readHybridResults") || vm.count("grid");
00170 if (readHybridResults_ && !(vm.count("toysFile") || vm.count("grid"))) throw std::invalid_argument("HybridNew: must have 'toysFile' or 'grid' option to have 'readHybridResults'\n");
00171 mass_ = vm["mass"].as<float>();
00172 fullGrid_ = vm.count("fullGrid");
00173 saveGrid_ = vm.count("saveGrid");
00174 fullBToys_ = vm.count("fullBToys");
00175 noUpdateGrid_ = vm.count("noUpdateGrid");
00176 reportPVal_ = vm.count("pvalue");
00177 validateOptions();
00178 }
00179
00180 void HybridNew::applyDefaultOptions() {
00181 workingMode_ = MakeLimit;
00182 validateOptions();
00183 }
00184
00185 void HybridNew::validateOptions() {
00186 if (fork_ > 1) nToys_ /= fork_;
00187 if (rule_ == "CLs") {
00188 CLs_ = true;
00189 } else if (rule_ == "CLsplusb") {
00190 CLs_ = false;
00191 } else if (rule_ == "FC") {
00192 CLs_ = false;
00193 } else {
00194 throw std::invalid_argument("HybridNew: Rule must be either 'CLs' or 'CLsplusb'");
00195 }
00196 if (testStat_ == "SimpleLikelihoodRatio" || testStat_ == "SLR" ) { testStat_ = "LEP"; }
00197 if (testStat_ == "RatioOfProfiledLikelihoods" || testStat_ == "ROPL") { testStat_ = "TEV"; }
00198 if (testStat_ == "ProfileLikelihood" || testStat_ == "PL") { testStat_ = "Profile"; }
00199 if (testStat_ == "ModifiedProfileLikelihood" || testStat_ == "MPL") { testStat_ = "LHC"; }
00200 if (testStat_ == "SignFlipProfileLikelihood" || testStat_ == "SFPL") { testStat_ = "LHCFC"; }
00201 if (testStat_ == "Atlas") { testStat_ = "LHC"; std::cout << "Note: the Atlas test statistics is now known as LHC test statistics.\n" << std::endl; }
00202 if (testStat_ != "LEP" && testStat_ != "TEV" && testStat_ != "LHC" && testStat_ != "LHCFC" && testStat_ != "Profile" && testStat_ != "MLZ") {
00203 throw std::invalid_argument("HybridNew: Test statistics should be one of 'LEP' or 'TEV' or 'LHC' (previously known as 'Atlas') or 'Profile'");
00204 }
00205 if (verbose) {
00206 if (testStat_ == "LEP") std::cout << ">>> using the Simple Likelihood Ratio test statistics (Q_LEP)" << std::endl;
00207 if (testStat_ == "TEV") std::cout << ">>> using the Ratio of Profiled Likelihoods test statistics (Q_TEV)" << std::endl;
00208 if (testStat_ == "LHC") std::cout << ">>> using the Profile Likelihood test statistics modified for upper limits (Q_LHC)" << std::endl;
00209 if (testStat_ == "LHCFC") std::cout << ">>> using the Profile Likelihood test statistics modified for upper limits and Feldman-Cousins (Q_LHCFC)" << std::endl;
00210 if (testStat_ == "Profile") std::cout << ">>> using the Profile Likelihood test statistics not modified for upper limits (Q_Profile)" << std::endl;
00211 if (testStat_ == "MLZ") std::cout << ">>> using the Maximum likelihood estimator of the signal strength as test statistics" << std::endl;
00212 }
00213 if (readHybridResults_ || workingMode_ == MakeTestStatistics || workingMode_ == MakeSignificanceTestStatistics) {
00214
00215 fitNuisances_ = false;
00216 }
00217 if (reportPVal_ && workingMode_ != MakeSignificance) throw std::invalid_argument("HybridNew: option --pvalue must go together with --significance");
00218 }
00219
00220 void HybridNew::setupPOI(RooStats::ModelConfig *mc_s) {
00221 RooArgSet POI(*mc_s->GetParametersOfInterest());
00222 if (rValue_.find("=") == std::string::npos) {
00223 if (POI.getSize() != 1) throw std::invalid_argument("Error: the argument to --singlePoint or --signalForSignificance is a single value, but there are multiple POIs");
00224 POI.snapshot(rValues_);
00225 errno = 0;
00226 ((RooRealVar*)rValues_.first())->setVal(strtod(rValue_.c_str(),NULL));
00227 if (errno != 0) std::invalid_argument("Error: the argument to --singlePoint or --signalForSignificance is not a valid number.");
00228 } else {
00229 std::string::size_type eqidx = 0, colidx = 0, colidx2;
00230 do {
00231 eqidx = rValue_.find("=", colidx);
00232 colidx2 = rValue_.find(",", colidx+1);
00233 if (eqidx == std::string::npos || (colidx2 != std::string::npos && colidx2 < eqidx)) {
00234 throw std::invalid_argument("Error: the argument to --singlePoint or --signalForSignificance is not in the forms 'value' or 'name1=value1,name2=value2,...'\n");
00235 }
00236 std::string poiName = rValue_.substr(colidx, eqidx);
00237 std::string poiVal = rValue_.substr(eqidx+1, (colidx2 == std::string::npos ? std::string::npos : colidx2 - eqidx - 1));
00238 RooAbsArg *poi = POI.find(poiName.c_str());
00239 if (poi == 0) throw std::invalid_argument("Error: unknown parameter '"+poiName+"' passed to --singlePoint or --signalForSignificance.");
00240 rValues_.addClone(*poi);
00241 errno = 0;
00242 rValues_.setRealValue(poi->GetName(), strtod(poiVal.c_str(),NULL));
00243 if (errno != 0) throw std::invalid_argument("Error: invalid value '"+poiVal+"' for parameter '"+poiName+"' passed to --singlePoint or --signalForSignificance.");
00244 } while (colidx2 != std::string::npos);
00245 if (rValues_.getSize() != POI.getSize()) {
00246 throw std::invalid_argument("Error: not all parameters of interest specified in --singlePoint or --signalForSignificance");
00247 }
00248 }
00249 }
00250
00251 bool HybridNew::run(RooWorkspace *w, RooStats::ModelConfig *mc_s, RooStats::ModelConfig *mc_b, RooAbsData &data, double &limit, double &limitErr, const double *hint) {
00252 RooFitGlobalKillSentry silence(verbose <= 1 ? RooFit::WARNING : RooFit::DEBUG);
00253 ProfileLikelihood::MinimizerSentry minimizerConfig(minimizerAlgo_, minimizerTolerance_);
00254 perf_totalToysRun_ = 0;
00255 if (rValues_.getSize() == 0) setupPOI(mc_s);
00256 switch (workingMode_) {
00257 case MakeLimit: return runLimit(w, mc_s, mc_b, data, limit, limitErr, hint);
00258 case MakeSignificance: return runSignificance(w, mc_s, mc_b, data, limit, limitErr, hint);
00259 case MakePValues: return runSinglePoint(w, mc_s, mc_b, data, limit, limitErr, hint);
00260 case MakeTestStatistics:
00261 case MakeSignificanceTestStatistics:
00262 return runTestStatistics(w, mc_s, mc_b, data, limit, limitErr, hint);
00263 }
00264 assert("Shouldn't get here" == 0);
00265 }
00266
00267 bool HybridNew::runSignificance(RooWorkspace *w, RooStats::ModelConfig *mc_s, RooStats::ModelConfig *mc_b, RooAbsData &data, double &limit, double &limitErr, const double *hint) {
00268 HybridNew::Setup setup;
00269 std::auto_ptr<RooStats::HybridCalculator> hc(create(w, mc_s, mc_b, data, rValues_, setup));
00270 std::auto_ptr<HypoTestResult> hcResult;
00271 if (readHybridResults_) {
00272 hcResult.reset(readToysFromFile(rValues_));
00273 } else {
00274 hcResult.reset(evalGeneric(*hc));
00275 for (unsigned int i = 1; i < iterations_; ++i) {
00276 std::auto_ptr<HypoTestResult> more(evalGeneric(*hc));
00277 hcResult->Append(more.get());
00278 if (verbose) std::cout << "\t1 - CLb = " << hcResult->CLb() << " +/- " << hcResult->CLbError() << std::endl;
00279 }
00280 }
00281 if (hcResult.get() == 0) {
00282 std::cerr << "Hypotest failed" << std::endl;
00283 return false;
00284 }
00285 if (saveHybridResult_) {
00286 TString name = TString::Format("HypoTestResult_mh%g",mass_);
00287 RooLinkedListIter it = rValues_.iterator();
00288 for (RooRealVar *rIn = (RooRealVar*) it.Next(); rIn != 0; rIn = (RooRealVar*) it.Next()) {
00289 name += Form("_%s%g", rIn->GetName(), rIn->getVal());
00290 }
00291 name += Form("_%u", RooRandom::integer(std::numeric_limits<UInt_t>::max() - 1));
00292 writeToysHere->WriteTObject(new HypoTestResult(*hcResult), name);
00293 if (verbose) std::cout << "Hybrid result saved as " << name << " in " << writeToysHere->GetFile()->GetName() << " : " << writeToysHere->GetPath() << std::endl;
00294 }
00295 if (verbose > 1) {
00296 std::cout << "Observed test statistics in data: " << hcResult->GetTestStatisticData() << std::endl;
00297 std::cout << "Background-only toys sampled: " << hcResult->GetNullDistribution()->GetSize() << std::endl;
00298 }
00299 if (expectedFromGrid_) applyExpectedQuantile(*hcResult);
00300
00301 hcResult->SetTestStatisticData(hcResult->GetTestStatisticData()+EPS);
00302 double sig = hcResult->Significance();
00303 double sigHi = RooStats::PValueToSignificance( (hcResult->CLb() - hcResult->CLbError()) ) - sig;
00304 double sigLo = RooStats::PValueToSignificance( (hcResult->CLb() + hcResult->CLbError()) ) - sig;
00305 if (reportPVal_) {
00306 limit = hcResult->NullPValue();
00307 limitErr = hcResult->NullPValueError();
00308 } else {
00309 limit = sig;
00310 limitErr = std::max(sigHi,-sigLo);
00311 }
00312 std::cout << "\n -- Hybrid New -- \n";
00313 std::cout << "Significance: " << sig << " " << sigLo << "/+" << sigHi << "\n";
00314 std::cout << "Null p-value: " << hcResult->NullPValue() << " +/- " << hcResult->NullPValueError() << "\n";
00315 return isfinite(limit);
00316 }
00317
00318 bool HybridNew::runLimit(RooWorkspace *w, RooStats::ModelConfig *mc_s, RooStats::ModelConfig *mc_b, RooAbsData &data, double &limit, double &limitErr, const double *hint) {
00319 if (mc_s->GetParametersOfInterest()->getSize() != 1) throw std::logic_error("Cannot run limits in more than one dimension, for the moment.");
00320 RooRealVar *r = dynamic_cast<RooRealVar *>(mc_s->GetParametersOfInterest()->first()); r->setConstant(true);
00321 w->loadSnapshot("clean");
00322
00323 if ((hint != 0) && (*hint > r->getMin())) {
00324 r->setMax(std::min<double>(3.0 * (*hint), r->getMax()));
00325 r->setMin(std::max<double>(0.3 * (*hint), r->getMin()));
00326 }
00327
00328 typedef std::pair<double,double> CLs_t;
00329
00330 double clsTarget = 1 - cl;
00331 CLs_t clsMin(1,0), clsMax(0,0), clsMid(0,0);
00332 double rMin = r->getMin(), rMax = r->getMax();
00333 limit = 0.5*(rMax + rMin);
00334 limitErr = 0.5*(rMax - rMin);
00335 bool done = false;
00336
00337 TF1 expoFit("expoFit","[0]*exp([1]*(x-[2]))", rMin, rMax);
00338
00339 if (readHybridResults_) {
00340 if (verbose > 0) std::cout << "Search for upper limit using pre-computed grid of p-values" << std::endl;
00341
00342 if (!gridFile_.empty()) {
00343 if (grid_.empty()) {
00344 std::auto_ptr<TFile> gridFile(TFile::Open(gridFile_.c_str()));
00345 if (gridFile.get() == 0) throw std::runtime_error(("Can't open grid file "+gridFile_).c_str());
00346 TDirectory *toyDir = gridFile->GetDirectory("toys");
00347 if (!toyDir) throw std::logic_error("Cannot use readHypoTestResult: empty toy dir in input file empty");
00348 readGrid(toyDir, rMinSet_ ? rMin : -99e99, rMaxSet_ ? rMax :+99e99);
00349 }
00350 if (grid_.size() <= 1) throw std::logic_error("The grid must contain at least 2 points.");
00351 if (noUpdateGrid_) {
00352 if (testStat_ == "LHCFC" && rule_ == "FC" && (saveGrid_ || lowerLimit_)) {
00353 std::cout << "Will have to re-run points for which the test statistics was set to zero" << std::endl;
00354 updateGridDataFC(w, mc_s, mc_b, data, !fullGrid_, clsTarget);
00355 } else {
00356 std::cout << "Will use the test statistics that had already been computed" << std::endl;
00357 }
00358 } else {
00359 updateGridData(w, mc_s, mc_b, data, !fullGrid_, clsTarget);
00360 }
00361 } else throw std::logic_error("When setting a limit reading results from file, a grid file must be specified with option --grid");
00362 if (grid_.size() <= 1) throw std::logic_error("The grid must contain at least 2 points.");
00363
00364 useGrid();
00365
00366 double minDist=1e3;
00367 double nearest = 0;
00368 rMin = limitPlot_->GetX()[0];
00369 rMax = limitPlot_->GetX()[limitPlot_->GetN()-1];
00370 for (int i = 0, n = limitPlot_->GetN(); i < n; ++i) {
00371 double x = limitPlot_->GetX()[i], y = limitPlot_->GetY()[i], ey = limitPlot_->GetErrorY(i);
00372 if (verbose > 0) printf(" r %.2f: %s = %6.4f +/- %6.4f\n", x, CLs_ ? "CLs" : "CLsplusb", y, ey);
00373 if (saveGrid_) { limit = x; limitErr = ey; Combine::commitPoint(false, y); }
00374 if (y-3*max(ey,0.01) >= clsTarget) { rMin = x; clsMin = CLs_t(y,ey); }
00375 if (fabs(y-clsTarget) < minDist) { nearest = x; minDist = fabs(y-clsTarget); }
00376 rMax = x; clsMax = CLs_t(y,ey);
00377 if (y+3*max(ey,0.01) <= clsTarget && !fullGrid_) break;
00378 }
00379 limit = nearest;
00380 if (verbose > 0) std::cout << " after scan x ~ " << limit << ", bounds [ " << rMin << ", " << rMax << "]" << std::endl;
00381 limitErr = std::max(limit-rMin, rMax-limit);
00382 expoFit.SetRange(rMin,rMax);
00383
00384 if (limitErr < std::max(rAbsAccuracy_, rRelAccuracy_ * limit)) {
00385 if (verbose > 1) std::cout << " reached accuracy " << limitErr << " below " << std::max(rAbsAccuracy_, rRelAccuracy_ * limit) << std::endl;
00386 done = true;
00387 }
00388 } else {
00389 limitPlot_.reset(new TGraphErrors());
00390
00391 if (verbose > 0) std::cout << "Search for upper limit to the limit" << std::endl;
00392 for (int tries = 0; tries < 6; ++tries) {
00393 clsMax = eval(w, mc_s, mc_b, data, rMax);
00394 if (lowerLimit_) break;
00395 if (clsMax.first == 0 || clsMax.first + 3 * fabs(clsMax.second) < clsTarget ) break;
00396 rMax += rMax;
00397 if (tries == 5) {
00398 std::cerr << "Cannot set higher limit: at " << r->GetName() << " = " << rMax << " still get " << (CLs_ ? "CLs" : "CLsplusb") << " = " << clsMax.first << std::endl;
00399 return false;
00400 }
00401 }
00402 if (verbose > 0) std::cout << "Search for lower limit to the limit" << std::endl;
00403 clsMin = (CLs_ && rMin == 0 ? CLs_t(1,0) : eval(w, mc_s, mc_b, data, rMin));
00404 if (!lowerLimit_ && clsMin.first != 1 && clsMin.first - 3 * fabs(clsMin.second) < clsTarget) {
00405 if (CLs_) {
00406 rMin = 0;
00407 clsMin = CLs_t(1,0);
00408 } else {
00409 rMin = -rMax / 4;
00410 for (int tries = 0; tries < 6; ++tries) {
00411 clsMin = eval(w, mc_s, mc_b, data, rMin);
00412 if (clsMin.first == 1 || clsMin.first - 3 * fabs(clsMin.second) > clsTarget) break;
00413 rMin += rMin;
00414 if (tries == 5) {
00415 std::cerr << "Cannot set lower limit: at " << r->GetName() << " = " << rMin << " still get " << (CLs_ ? "CLs" : "CLsplusb") << " = " << clsMin.first << std::endl;
00416 return false;
00417 }
00418 }
00419 }
00420 }
00421
00422 if (verbose > 0) std::cout << "Now doing proper bracketing & bisection" << std::endl;
00423 do {
00424
00425 limit = 0.5*(rMin+rMax); limitErr = 0.5*(rMax-rMin);
00426 if (algo_ == "logSecant" && clsMax.first != 0 && clsMin.first != 0) {
00427 double logMin = log(clsMin.first), logMax = log(clsMax.first), logTarget = log(clsTarget);
00428 limit = rMin + (rMax-rMin) * (logTarget - logMin)/(logMax - logMin);
00429 if (clsMax.second != 0 && clsMin.second != 0) {
00430 limitErr = hypot((logTarget-logMax) * (clsMin.second/clsMin.first), (logTarget-logMin) * (clsMax.second/clsMax.first));
00431 limitErr *= (rMax-rMin)/((logMax-logMin)*(logMax-logMin));
00432 }
00433
00434 if (limitErr < interpAccuracy_ * (rMax-rMin)) limitErr = interpAccuracy_ * (rMax-rMin);
00435 }
00436 r->setError(limitErr);
00437
00438
00439 if (limitErr < std::max(rAbsAccuracy_, rRelAccuracy_ * limit)) {
00440 if (verbose > 1) std::cout << " reached accuracy " << limitErr << " below " << std::max(rAbsAccuracy_, rRelAccuracy_ * limit) << std::endl;
00441 done = true; break;
00442 }
00443
00444
00445 clsMid = eval(w, mc_s, mc_b, data, limit, true, clsTarget);
00446 if (clsMid.second == -1) {
00447 std::cerr << "Hypotest failed" << std::endl;
00448 return false;
00449 }
00450
00451
00452 if (fabs(clsMid.first-clsTarget) >= 2*clsMid.second) {
00453 if ((clsMid.first>clsTarget) == (clsMax.first>clsTarget)) {
00454 rMax = limit; clsMax = clsMid;
00455 } else {
00456 rMin = limit; clsMin = clsMid;
00457 }
00458 } else {
00459 if (verbose > 0) std::cout << "Trying to move the interval edges closer" << std::endl;
00460 double rMinBound = rMin, rMaxBound = rMax;
00461
00462 while (clsMin.second == 0 || fabs(rMin-limit) > std::max(rAbsAccuracy_, rRelAccuracy_ * limit)) {
00463 rMin = 0.5*(rMin+limit);
00464 clsMin = eval(w, mc_s, mc_b, data, rMin, true, clsTarget);
00465 if (fabs(clsMin.first-clsTarget) <= 2*clsMin.second) break;
00466 rMinBound = rMin;
00467 }
00468 while (clsMax.second == 0 || fabs(rMax-limit) > std::max(rAbsAccuracy_, rRelAccuracy_ * limit)) {
00469 rMax = 0.5*(rMax+limit);
00470 clsMax = eval(w, mc_s, mc_b, data, rMax, true, clsTarget);
00471 if (fabs(clsMax.first-clsTarget) <= 2*clsMax.second) break;
00472 rMaxBound = rMax;
00473 }
00474 expoFit.SetRange(rMinBound,rMaxBound);
00475 break;
00476 }
00477 } while (true);
00478
00479 }
00480
00481 if (!done) {
00482 double rMinBound, rMaxBound; expoFit.GetRange(rMinBound, rMaxBound);
00483 if (verbose) {
00484 std::cout << "\n -- HybridNew, before fit -- \n";
00485 std::cout << "Limit: " << r->GetName() << " < " << limit << " +/- " << limitErr << " [" << rMinBound << ", " << rMaxBound << "]\n";
00486 }
00487
00488 expoFit.FixParameter(0,clsTarget);
00489 double clsmaxfirst = clsMax.first;
00490 if ( clsmaxfirst == 0.0 ) clsmaxfirst = 0.005;
00491 double par1guess = log(clsmaxfirst/clsMin.first)/(rMax-rMin);
00492 expoFit.SetParameter(1,par1guess);
00493 expoFit.SetParameter(2,limit);
00494 limitErr = std::max(fabs(rMinBound-limit), fabs(rMaxBound-limit));
00495 int npoints = 0;
00496 for (int j = 0; j < limitPlot_->GetN(); ++j) {
00497 if (limitPlot_->GetX()[j] >= rMinBound && limitPlot_->GetX()[j] <= rMaxBound) npoints++;
00498 }
00499 for (int i = 0, imax = (readHybridResults_ ? 0 : 8); i <= imax; ++i, ++npoints) {
00500 limitPlot_->Sort();
00501 limitPlot_->Fit(&expoFit,(verbose <= 1 ? "QNR EX0" : "NR EX0"));
00502 if (verbose) {
00503 std::cout << "Fit to " << npoints << " points: " << expoFit.GetParameter(2) << " +/- " << expoFit.GetParError(2) << std::endl;
00504 }
00505 if ((rMinBound < expoFit.GetParameter(2)) && (expoFit.GetParameter(2) < rMaxBound) && (expoFit.GetParError(2) < 0.5*(rMaxBound-rMinBound))) {
00506
00507 limit = expoFit.GetParameter(2);
00508 limitErr = expoFit.GetParError(2);
00509 if (limitErr < std::max(rAbsAccuracy_, rRelAccuracy_ * limit)) break;
00510 }
00511
00512 double rTry = RooRandom::uniform()*(rMaxBound-rMinBound)+rMinBound;
00513 if (i != imax) eval(w, mc_s, mc_b, data, rTry, true, clsTarget);
00514 }
00515 }
00516
00517 if (!plot_.empty() && limitPlot_.get()) {
00518 TCanvas *c1 = new TCanvas("c1","c1");
00519 limitPlot_->Sort();
00520 limitPlot_->SetLineWidth(2);
00521 double xmin = r->getMin(), xmax = r->getMax();
00522 for (int j = 0; j < limitPlot_->GetN(); ++j) {
00523 if (limitPlot_->GetY()[j] > 1.4*clsTarget || limitPlot_->GetY()[j] < 0.6*clsTarget) continue;
00524 xmin = std::min(limitPlot_->GetX()[j], xmin);
00525 xmax = std::max(limitPlot_->GetX()[j], xmax);
00526 }
00527 limitPlot_->GetXaxis()->SetRangeUser(xmin,xmax);
00528 limitPlot_->GetYaxis()->SetRangeUser(0.5*clsTarget, 1.5*clsTarget);
00529 limitPlot_->Draw("AP");
00530 expoFit.Draw("SAME");
00531 TLine line(limitPlot_->GetX()[0], clsTarget, limitPlot_->GetX()[limitPlot_->GetN()-1], clsTarget);
00532 line.SetLineColor(kRed); line.SetLineWidth(2); line.Draw();
00533 line.DrawLine(limit, 0, limit, limitPlot_->GetY()[0]);
00534 line.SetLineWidth(1); line.SetLineStyle(2);
00535 line.DrawLine(limit-limitErr, 0, limit-limitErr, limitPlot_->GetY()[0]);
00536 line.DrawLine(limit+limitErr, 0, limit+limitErr, limitPlot_->GetY()[0]);
00537 c1->Print(plot_.c_str());
00538 }
00539
00540 std::cout << "\n -- Hybrid New -- \n";
00541 std::cout << "Limit: " << r->GetName() << " < " << limit << " +/- " << limitErr << " @ " << cl * 100 << "% CL\n";
00542 if (verbose > 1) std::cout << "Total toys: " << perf_totalToysRun_ << std::endl;
00543 return true;
00544 }
00545
00546 bool HybridNew::runSinglePoint(RooWorkspace *w, RooStats::ModelConfig *mc_s, RooStats::ModelConfig *mc_b, RooAbsData &data, double &limit, double &limitErr, const double *hint) {
00547 std::pair<double, double> result = eval(w, mc_s, mc_b, data, rValues_, clsAccuracy_ != 0);
00548 std::cout << "\n -- Hybrid New -- \n";
00549 std::cout << (CLs_ ? "CLs = " : "CLsplusb = ") << result.first << " +/- " << result.second << std::endl;
00550 if (verbose > 1) std::cout << "Total toys: " << perf_totalToysRun_ << std::endl;
00551 limit = result.first;
00552 limitErr = result.second;
00553 return true;
00554 }
00555
00556 bool HybridNew::runTestStatistics(RooWorkspace *w, RooStats::ModelConfig *mc_s, RooStats::ModelConfig *mc_b, RooAbsData &data, double &limit, double &limitErr, const double *hint) {
00557 bool isProfile = (testStat_ == "LHC" || testStat_ == "LHCFC" || testStat_ == "Profile");
00558 if (readHybridResults_ && expectedFromGrid_) {
00559 std::auto_ptr<RooStats::HypoTestResult> result(readToysFromFile(rValues_));
00560 applyExpectedQuantile(*result);
00561 limit = -2 * result->GetTestStatisticData();
00562 } else {
00563 HybridNew::Setup setup;
00564 std::auto_ptr<RooStats::HybridCalculator> hc(create(w, mc_s, mc_b, data, rValues_, setup));
00565 RooArgSet nullPOI(*setup.modelConfig_bonly.GetSnapshot());
00566 if (isProfile) {
00568 nullPOI.assignValueOnly(rValues_);
00569 }
00570 limit = -2 * setup.qvar->Evaluate(data, nullPOI);
00571 }
00572 if (isProfile) limit = -limit;
00573 std::cout << "\n -- Hybrid New -- \n";
00574 std::cout << "-2 ln Q_{"<< testStat_<<"} = " << limit << std::endl;
00575 return true;
00576 }
00577
00578 std::pair<double, double> HybridNew::eval(RooWorkspace *w, RooStats::ModelConfig *mc_s, RooStats::ModelConfig *mc_b, RooAbsData &data, double rVal, bool adaptive, double clsTarget) {
00579 RooArgSet rValues;
00580 mc_s->GetParametersOfInterest()->snapshot(rValues);
00581 RooRealVar *r = dynamic_cast<RooRealVar*>(rValues.first());
00582 if (rVal > r->getMax()) r->setMax(2*rVal);
00583 r->setVal(rVal);
00584 return eval(w,mc_s,mc_b,data,rValues,adaptive,clsTarget);
00585 }
00586
00587 std::pair<double, double> HybridNew::eval(RooWorkspace *w, RooStats::ModelConfig *mc_s, RooStats::ModelConfig *mc_b, RooAbsData &data, const RooAbsCollection & rVals, bool adaptive, double clsTarget) {
00588 if (readHybridResults_) {
00589 bool isProfile = (testStat_ == "LHC" || testStat_ == "LHCFC" || testStat_ == "Profile");
00590 std::auto_ptr<RooStats::HypoTestResult> result(readToysFromFile(rVals));
00591 std::pair<double, double> ret(-1,-1);
00592 assert(result.get() != 0 && "Null result in HybridNew::eval(Workspace,...) after readToysFromFile");
00593 if (expectedFromGrid_) {
00594 applyExpectedQuantile(*result);
00595 result->SetTestStatisticData(result->GetTestStatisticData() + (isProfile ? -EPS : EPS));
00596 } else if (!noUpdateGrid_) {
00597 Setup setup;
00598 std::auto_ptr<RooStats::HybridCalculator> hc = create(w, mc_s, mc_b, data, rVals, setup);
00599 RooArgSet nullPOI(*setup.modelConfig_bonly.GetSnapshot());
00600 if (isProfile) nullPOI.assignValueOnly(rVals);
00601 double testStat = setup.qvar->Evaluate(data, nullPOI);
00602 result->SetTestStatisticData(testStat + (isProfile ? -EPS : EPS));
00603 }
00604 ret = eval(*result, rVals);
00605 return ret;
00606 }
00607
00608 HybridNew::Setup setup;
00609 RooLinkedListIter it = rVals.iterator();
00610 for (RooRealVar *rIn = (RooRealVar*) it.Next(); rIn != 0; rIn = (RooRealVar*) it.Next()) {
00611 RooRealVar *r = dynamic_cast<RooRealVar *>(mc_s->GetParametersOfInterest()->find(rIn->GetName()));
00612 r->setVal(rIn->getVal());
00613 if (verbose) std::cout << " " << r->GetName() << " = " << rIn->getVal() << " +/- " << r->getError() << std::endl;
00614 }
00615 std::auto_ptr<RooStats::HybridCalculator> hc(create(w, mc_s, mc_b, data, rVals, setup));
00616 std::pair<double, double> ret = eval(*hc, rVals, adaptive, clsTarget);
00617
00618
00619 if (limitPlot_.get()) {
00620 limitPlot_->Set(limitPlot_->GetN()+1);
00621 limitPlot_->SetPoint(limitPlot_->GetN()-1, ((RooAbsReal*)rVals.first())->getVal(), ret.first);
00622 limitPlot_->SetPointError(limitPlot_->GetN()-1, 0, ret.second);
00623 }
00624
00625 return ret;
00626 }
00627
00628
00629
00630 std::auto_ptr<RooStats::HybridCalculator> HybridNew::create(RooWorkspace *w, RooStats::ModelConfig *mc_s, RooStats::ModelConfig *mc_b, RooAbsData &data, double rVal, HybridNew::Setup &setup) {
00631 RooArgSet rValues;
00632 mc_s->GetParametersOfInterest()->snapshot(rValues);
00633 RooRealVar *r = dynamic_cast<RooRealVar*>(rValues.first());
00634 if (rVal > r->getMax()) r->setMax(2*rVal);
00635 r->setVal(rVal);
00636 return create(w,mc_s,mc_b,data,rValues,setup);
00637 }
00638
00639 std::auto_ptr<RooStats::HybridCalculator> HybridNew::create(RooWorkspace *w, RooStats::ModelConfig *mc_s, RooStats::ModelConfig *mc_b, RooAbsData &data, const RooAbsCollection & rVals, HybridNew::Setup &setup) {
00640 using namespace RooStats;
00641
00642 w->loadSnapshot("clean");
00643
00644
00645 RooArgSet poi(*mc_s->GetParametersOfInterest()), params(poi);
00646 RooRealVar *r = dynamic_cast<RooRealVar *>(poi.first());
00647
00648 if (poi.getSize() == 1) {
00649 double rVal = ((RooAbsReal*)rVals.first())->getVal();
00650 if (testStat_ != "MLZ") r->setMax(rVal);
00651 r->setVal(rVal);
00652 if (testStat_ == "LHC" || testStat_ == "Profile") {
00653 r->setConstant(false); r->setMin(0);
00654 if (workingMode_ == MakeSignificance || workingMode_ == MakeSignificanceTestStatistics) {
00655 r->setVal(0);
00656 r->removeMax();
00657 }
00658 } else {
00659 r->setConstant(true);
00660 }
00661 } else {
00662 if (testStat_ == "Profile") utils::setAllConstant(poi, false);
00663 else if (testStat_ == "LEP" || testStat_ == "TEV") utils::setAllConstant(poi, true);
00664 poi.assignValueOnly(rVals);
00665 }
00666
00667
00668
00669
00670 RooArgList gobsParams;
00671 RooArgList gobs;
00672 if (withSystematics && testStat_ == "LHC" && optimizeTestStatistics_) {
00673 RooArgList allnuis(*mc_s->GetNuisanceParameters());
00674 RooArgList allgobs(*mc_s->GetGlobalObservables());
00675 for (int i=0; i<allnuis.getSize(); ++i) {
00676 RooRealVar *nuis = (RooRealVar*)allnuis.at(i);
00677 if (nuis->getAttribute("globalConstrained")) {
00678 RooRealVar *glob = (RooRealVar*)allgobs.find(TString::Format("%s_In",nuis->GetName()));
00679 if (glob) {
00680 nuis->setVal(glob->getVal());
00681 nuis->setConstant();
00682 gobsParams.add(*nuis);
00683 gobs.add(*glob);
00684 }
00685 }
00686 }
00687 }
00688
00689 std::auto_ptr<RooFitResult> fitMu, fitZero;
00690 if (fitNuisances_ && mc_s->GetNuisanceParameters() && withSystematics) {
00691 TStopwatch timer;
00692 bool isExt = mc_s->GetPdf()->canBeExtended();
00693 utils::setAllConstant(poi, true);
00695 RooAbsPdf *pdfB = mc_b->GetPdf();
00696 if (poi.getSize() == 1) {
00697 r->setVal(0); pdfB = mc_s->GetPdf();
00698 }
00699 timer.Start();
00700 {
00701 CloseCoutSentry sentry(verbose < 3);
00702 fitZero.reset(pdfB->fitTo(data, RooFit::Save(1), RooFit::Minimizer("Minuit2","minimize"), RooFit::Strategy(1), RooFit::Hesse(0), RooFit::Extended(isExt), RooFit::Constrain(*mc_s->GetNuisanceParameters())));
00703 }
00704 if (verbose > 1) { std::cout << "Zero signal fit" << std::endl; fitZero->Print("V"); }
00705 if (verbose > 1) { std::cout << "Fitting of the background hypothesis done in " << timer.RealTime() << " s" << std::endl; }
00706 poi.assignValueOnly(rVals);
00707 timer.Start();
00708 {
00709 CloseCoutSentry sentry(verbose < 3);
00710 fitMu.reset(mc_s->GetPdf()->fitTo(data, RooFit::Save(1), RooFit::Minimizer("Minuit2","minimize"), RooFit::Strategy(1), RooFit::Hesse(0), RooFit::Extended(isExt), RooFit::Constrain(*mc_s->GetNuisanceParameters())));
00711 }
00712 if (verbose > 1) { std::cout << "Reference signal fit" << std::endl; fitMu->Print("V"); }
00713 if (verbose > 1) { std::cout << "Fitting of the signal-plus-background hypothesis done in " << timer.RealTime() << " s" << std::endl; }
00714 } else { fitNuisances_ = false; }
00715
00716
00717 setup.modelConfig = ModelConfig("HybridNew_mc_s", w);
00718 setup.modelConfig.SetPdf(*mc_s->GetPdf());
00719 setup.modelConfig.SetObservables(*mc_s->GetObservables());
00720 setup.modelConfig.SetParametersOfInterest(*mc_s->GetParametersOfInterest());
00721 if (withSystematics) {
00722 if (genNuisances_ && mc_s->GetNuisanceParameters()) setup.modelConfig.SetNuisanceParameters(*mc_s->GetNuisanceParameters());
00723 if (genGlobalObs_ && mc_s->GetGlobalObservables()) setup.modelConfig.SetGlobalObservables(*mc_s->GetGlobalObservables());
00724
00725 }
00726
00727 setup.modelConfig_bonly = ModelConfig("HybridNew_mc_b", w);
00728 setup.modelConfig_bonly.SetPdf(*mc_b->GetPdf());
00729 setup.modelConfig_bonly.SetObservables(*mc_b->GetObservables());
00730 setup.modelConfig_bonly.SetParametersOfInterest(*mc_b->GetParametersOfInterest());
00731 if (withSystematics) {
00732 if (genNuisances_ && mc_b->GetNuisanceParameters()) setup.modelConfig_bonly.SetNuisanceParameters(*mc_b->GetNuisanceParameters());
00733 if (genGlobalObs_ && mc_b->GetGlobalObservables()) setup.modelConfig_bonly.SetGlobalObservables(*mc_b->GetGlobalObservables());
00734 }
00735
00736 if (withSystematics && !genNuisances_) {
00737
00738
00739
00740 if (w->var("__HybridNew_fake_nuis__") == 0) {
00741 w->factory("__HybridNew_fake_nuis__[0.5,0,1]");
00742 w->factory("Uniform::__HybridNew_fake_nuisPdf__(__HybridNew_fake_nuis__)");
00743 }
00744 setup.modelConfig.SetNuisanceParameters(RooArgSet(*w->var("__HybridNew_fake_nuis__")));
00745 setup.modelConfig_bonly.SetNuisanceParameters(RooArgSet(*w->var("__HybridNew_fake_nuis__")));
00746 }
00747
00748
00749
00750 RooArgSet paramsZero;
00751 if (poi.getSize() == 1) {
00752 paramsZero.addClone(*rVals.first());
00753 paramsZero.setRealValue(rVals.first()->GetName(), 0);
00754 }
00755 if (fitNuisances_) params.add(fitMu->floatParsFinal());
00756 if (fitNuisances_) paramsZero.addClone(fitZero->floatParsFinal());
00757 setup.modelConfig.SetSnapshot(params);
00758 setup.modelConfig_bonly.SetSnapshot(paramsZero);
00759 TString paramsSnapName = TString::Format("%s_%s_snapshot", setup.modelConfig.GetName(), params.GetName());
00760 TString paramsZSnapName = TString::Format("%s__snapshot", setup.modelConfig_bonly.GetName());
00761 RooFit::MsgLevel level = RooMsgService::instance().globalKillBelow();
00762 RooMsgService::instance().setGlobalKillBelow(RooFit::ERROR);
00763 w->defineSet(paramsSnapName, params , true);
00764 w->defineSet(paramsZSnapName, paramsZero ,true);
00765 RooMsgService::instance().setGlobalKillBelow(level);
00766
00767
00768
00769 RooAbsPdf *factorizedPdf_s = setup.modelConfig.GetPdf(), *factorizedPdf_b = setup.modelConfig_bonly.GetPdf();
00770 if (withSystematics && optimizeProductPdf_ && !genGlobalObs_) {
00771 RooArgList constraints;
00772 RooAbsPdf *factorizedPdf_s = utils::factorizePdf(*mc_s->GetObservables(), *mc_s->GetPdf(), constraints);
00773 RooAbsPdf *factorizedPdf_b = utils::factorizePdf(*mc_b->GetObservables(), *mc_b->GetPdf(), constraints);
00774 if (factorizedPdf_s != mc_s->GetPdf()) setup.cleanupList.addOwned(*factorizedPdf_s);
00775 if (factorizedPdf_b != mc_b->GetPdf()) setup.cleanupList.addOwned(*factorizedPdf_b);
00776 setup.modelConfig.SetPdf(*factorizedPdf_s);
00777 setup.modelConfig_bonly.SetPdf(*factorizedPdf_b);
00778 }
00779
00780 if (testStat_ == "LEP") {
00781
00782 if (withSystematics) {
00783 if (!fitNuisances_) {
00784 params.add(*mc_s->GetNuisanceParameters(), true);
00785 paramsZero.addClone(*mc_b->GetNuisanceParameters(), true);
00786 } else {
00787 std::cerr << "ALERT: using LEP test statistics with --fitNuisances is not validated and most likely broken" << std::endl;
00788 params.assignValueOnly(*mc_s->GetNuisanceParameters());
00789 paramsZero.assignValueOnly(*mc_s->GetNuisanceParameters());
00790 }
00791 }
00792 RooAbsPdf *pdfB = factorizedPdf_b;
00793 if (poi.getSize() == 1) pdfB = factorizedPdf_s;
00794 if (optimizeTestStatistics_) {
00795 if (!mc_s->GetPdf()->canBeExtended()) {
00796 setup.qvar.reset(new SimplerLikelihoodRatioTestStat(*pdfB,*factorizedPdf_s, paramsZero, params));
00797 } else {
00798 setup.qvar.reset(new SimplerLikelihoodRatioTestStatOpt(*mc_s->GetObservables(), *pdfB, *factorizedPdf_s, paramsZero, params, withSystematics));
00799 }
00800 } else {
00801 std::cerr << "ALERT: LEP test statistics without optimization not validated." << std::endl;
00802 RooArgSet paramsSnap; params.snapshot(paramsSnap);
00803 setup.qvar.reset(new SimpleLikelihoodRatioTestStat(*pdfB,*factorizedPdf_s));
00804 ((SimpleLikelihoodRatioTestStat&)*setup.qvar).SetNullParameters(paramsZero);
00805 ((SimpleLikelihoodRatioTestStat&)*setup.qvar).SetAltParameters(paramsSnap);
00806 }
00807 } else if (testStat_ == "TEV") {
00808 std::cerr << "ALERT: Tevatron test statistics not yet validated." << std::endl;
00809 RooAbsPdf *pdfB = factorizedPdf_b;
00810 if (poi.getSize() == 1) pdfB = factorizedPdf_s;
00811 if (optimizeTestStatistics_) {
00812 setup.qvar.reset(new ProfiledLikelihoodRatioTestStatOpt(*mc_s->GetObservables(), *pdfB, *mc_s->GetPdf(), mc_s->GetNuisanceParameters(), paramsZero, params));
00813 ((ProfiledLikelihoodRatioTestStatOpt&)*setup.qvar).setPrintLevel(verbose);
00814 } else {
00815 setup.qvar.reset(new RatioOfProfiledLikelihoodsTestStat(*mc_s->GetPdf(), *pdfB, setup.modelConfig.GetSnapshot()));
00816 ((RatioOfProfiledLikelihoodsTestStat&)*setup.qvar).SetSubtractMLE(false);
00817 }
00818 } else if (testStat_ == "LHC" || testStat_ == "LHCFC" || testStat_ == "Profile") {
00819 if (poi.getSize() != 1 && testStat_ != "Profile") {
00820 throw std::logic_error("ERROR: modified profile likelihood definitions (LHC, LHCFC) do not make sense in more than one dimension");
00821 }
00822 if (optimizeTestStatistics_) {
00823 ProfiledLikelihoodTestStatOpt::OneSidedness side = ProfiledLikelihoodTestStatOpt::oneSidedDef;
00824 if (testStat_ == "LHCFC") side = ProfiledLikelihoodTestStatOpt::signFlipDef;
00825 if (testStat_ == "Profile") side = ProfiledLikelihoodTestStatOpt::twoSidedDef;
00826 if (workingMode_ == MakeSignificance) r->setVal(0.0);
00827 setup.qvar.reset(new ProfiledLikelihoodTestStatOpt(*mc_s->GetObservables(), *mc_s->GetPdf(), mc_s->GetNuisanceParameters(), params, poi, gobsParams,gobs, verbose, side));
00828 } else {
00829 std::cerr << "ALERT: LHC test statistics without optimization not validated." << std::endl;
00830 setup.qvar.reset(new ProfileLikelihoodTestStat(*mc_s->GetPdf()));
00831 if (testStat_ == "LHC") {
00832 ((ProfileLikelihoodTestStat&)*setup.qvar).SetOneSided(true);
00833 } else if (testStat_ == "LHCFC") {
00834 throw std::invalid_argument("Test statistics LHCFC is not supported without optimization");
00835 }
00836 }
00837 } else if (testStat_ == "MLZ") {
00838 if (workingMode_ == MakeSignificance) r->setVal(0.0);
00839 setup.qvar.reset(new BestFitSigmaTestStat(*mc_s->GetObservables(), *mc_s->GetPdf(), mc_s->GetNuisanceParameters(), params, verbose));
00840 }
00841
00842 RooAbsPdf *nuisancePdf = 0;
00843 if (withSystematics && (genNuisances_ || (newToyMCSampler_ && genGlobalObs_))) {
00844 nuisancePdf = utils::makeNuisancePdf(*mc_s);
00845 if (nuisancePdf) setup.cleanupList.addOwned(*nuisancePdf);
00846 }
00847 if (newToyMCSampler_) {
00848 setup.toymcsampler.reset(new ToyMCSamplerOpt(*setup.qvar, nToys_, nuisancePdf, genNuisances_));
00849 } else {
00850 std::cerr << "ALERT: running with newToyMC=0 not validated." << std::endl;
00851 setup.toymcsampler.reset(new ToyMCSampler(*setup.qvar, nToys_));
00852 }
00853
00854 if (!mc_b->GetPdf()->canBeExtended()) setup.toymcsampler->SetNEventsPerToy(1);
00855
00856 if (nCpu_ > 0) {
00857 std::cerr << "ALERT: running with proof not validated." << std::endl;
00858 if (verbose > 1) std::cout << " Will use " << nCpu_ << " CPUs." << std::endl;
00859 setup.pc.reset(new ProofConfig(*w, nCpu_, "", kFALSE));
00860 setup.toymcsampler->SetProofConfig(setup.pc.get());
00861 }
00862
00863
00864 std::auto_ptr<HybridCalculator> hc(new HybridCalculator(data, setup.modelConfig, setup.modelConfig_bonly, setup.toymcsampler.get()));
00865 if (genNuisances_ || !genGlobalObs_) {
00866 if (withSystematics) {
00867 setup.toymcsampler->SetGlobalObservables(*setup.modelConfig.GetNuisanceParameters());
00868 (static_cast<HybridCalculator&>(*hc)).ForcePriorNuisanceNull(*nuisancePdf);
00869 (static_cast<HybridCalculator&>(*hc)).ForcePriorNuisanceAlt(*nuisancePdf);
00870 }
00871 } else if (genGlobalObs_ && !genNuisances_) {
00872 setup.toymcsampler->SetGlobalObservables(*setup.modelConfig.GetGlobalObservables());
00873 hc->ForcePriorNuisanceNull(*w->pdf("__HybridNew_fake_nuisPdf__"));
00874 hc->ForcePriorNuisanceAlt(*w->pdf("__HybridNew_fake_nuisPdf__"));
00875 }
00876
00877
00878 if (workingMode_ == MakeSignificance) {
00879
00880 hc->SetToys(nToys_, int(0.01*nToys_)+1);
00881 if (fullBToys_) {
00882 hc->SetToys(nToys_, nToys_);
00883 }
00884 } else if (!CLs_) {
00885
00886 hc->SetToys(fullBToys_ ? nToys_ : int(0.01*nToys_)+1, nToys_);
00887
00888 if (expectedFromGrid_ && (fabs(0.5-quantileForExpectedFromGrid_)>=0.4) ) {
00889 hc->SetToys(nToys_, nToys_);
00890 }
00891 } else {
00892
00893 hc->SetToys(fullBToys_ ? nToys_ : int(0.25*nToys_), nToys_);
00894
00895 if (expectedFromGrid_ && (fabs(0.5-quantileForExpectedFromGrid_)>=0.4) ) {
00896 hc->SetToys(nToys_, nToys_);
00897 }
00898 }
00899
00900 static const char * istr = "__HybridNew__importanceSamplingDensity";
00901 if(importanceSamplingNull_) {
00902 std::cerr << "ALERT: running with importance sampling not validated (and probably not working)." << std::endl;
00903 if(verbose > 1) std::cout << ">>> Enabling importance sampling for null hyp." << std::endl;
00904 if(!withSystematics) {
00905 throw std::invalid_argument("Importance sampling is not available without systematics");
00906 }
00907 RooArgSet importanceSnapshot;
00908 importanceSnapshot.addClone(poi);
00909 importanceSnapshot.addClone(*mc_s->GetNuisanceParameters());
00910 if (verbose > 2) importanceSnapshot.Print("V");
00911 hc->SetNullImportanceDensity(mc_b->GetPdf(), &importanceSnapshot);
00912 }
00913 if(importanceSamplingAlt_) {
00914 std::cerr << "ALERT: running with importance sampling not validated (and probably not working)." << std::endl;
00915 if(verbose > 1) std::cout << ">>> Enabling importance sampling for alt. hyp." << std::endl;
00916 if(!withSystematics) {
00917 throw std::invalid_argument("Importance sampling is not available without systematics");
00918 }
00919 if (w->pdf(istr) == 0) {
00920 w->factory("__oneHalf__[0.5]");
00921 RooAddPdf *sum = new RooAddPdf(istr, "fifty-fifty", *mc_s->GetPdf(), *mc_b->GetPdf(), *w->var("__oneHalf__"));
00922 w->import(*sum);
00923 }
00924 RooArgSet importanceSnapshot;
00925 importanceSnapshot.addClone(poi);
00926 importanceSnapshot.addClone(*mc_s->GetNuisanceParameters());
00927 if (verbose > 2) importanceSnapshot.Print("V");
00928 hc->SetAltImportanceDensity(w->pdf(istr), &importanceSnapshot);
00929 }
00930
00931 return hc;
00932 }
00933
00934 std::pair<double,double>
00935 HybridNew::eval(RooStats::HybridCalculator &hc, const RooAbsCollection & rVals, bool adaptive, double clsTarget) {
00936 std::auto_ptr<HypoTestResult> hcResult(evalGeneric(hc));
00937 if (expectedFromGrid_) applyExpectedQuantile(*hcResult);
00938 if (hcResult.get() == 0) {
00939 std::cerr << "Hypotest failed" << std::endl;
00940 return std::pair<double, double>(-1,-1);
00941 }
00942 if (testStat_ == "LHC" || testStat_ == "LHCFC" || testStat_ == "Profile") {
00943
00944 hcResult->SetTestStatisticData(hcResult->GetTestStatisticData()-EPS);
00945 } else {
00946 hcResult->SetTestStatisticData(hcResult->GetTestStatisticData()+EPS);
00947 hcResult->SetPValueIsRightTail(!hcResult->GetPValueIsRightTail());
00948 }
00949 std::pair<double,double> cls = eval(*hcResult, rVals);
00950 if (verbose) std::cout << (CLs_ ? "\tCLs = " : "\tCLsplusb = ") << cls.first << " +/- " << cls.second << std::endl;
00951 if (adaptive) {
00952 if (CLs_) {
00953 hc.SetToys(int(0.25*nToys_ + 1), nToys_);
00954 }
00955 else {
00956 hc.SetToys(1, nToys_);
00957 }
00958
00959 if (expectedFromGrid_ && (fabs(0.5-quantileForExpectedFromGrid_)>=0.4) ) {
00960 hc.SetToys(nToys_, nToys_);
00961 }
00962 while (cls.second >= clsAccuracy_ && (clsTarget == -1 || fabs(cls.first-clsTarget) < 3*cls.second) ) {
00963 std::auto_ptr<HypoTestResult> more(evalGeneric(hc));
00964 more->SetBackgroundAsAlt(false);
00965 if (testStat_ == "LHC" || testStat_ == "LHCFC" || testStat_ == "Profile") more->SetPValueIsRightTail(!more->GetPValueIsRightTail());
00966 hcResult->Append(more.get());
00967 if (expectedFromGrid_) applyExpectedQuantile(*hcResult);
00968 cls = eval(*hcResult, rVals);
00969 if (verbose) std::cout << (CLs_ ? "\tCLs = " : "\tCLsplusb = ") << cls.first << " +/- " << cls.second << std::endl;
00970 }
00971 } else if (iterations_ > 1) {
00972 for (unsigned int i = 1; i < iterations_; ++i) {
00973 std::auto_ptr<HypoTestResult> more(evalGeneric(hc));
00974 more->SetBackgroundAsAlt(false);
00975 if (testStat_ == "LHC" || testStat_ == "LHCFC" || testStat_ == "Profile") more->SetPValueIsRightTail(!more->GetPValueIsRightTail());
00976 hcResult->Append(more.get());
00977 if (expectedFromGrid_) applyExpectedQuantile(*hcResult);
00978 cls = eval(*hcResult, rVals);
00979 if (verbose) std::cout << (CLs_ ? "\tCLs = " : "\tCLsplusb = ") << cls.first << " +/- " << cls.second << std::endl;
00980 }
00981 }
00982
00983 if (verbose > 0) {
00984 std::cout <<
00985 "\tCLs = " << hcResult->CLs() << " +/- " << hcResult->CLsError() << "\n" <<
00986 "\tCLb = " << hcResult->CLb() << " +/- " << hcResult->CLbError() << "\n" <<
00987 "\tCLsplusb = " << hcResult->CLsplusb() << " +/- " << hcResult->CLsplusbError() << "\n" <<
00988 std::endl;
00989 }
00990
00991 perf_totalToysRun_ += (hcResult->GetAltDistribution()->GetSize() + hcResult->GetNullDistribution()->GetSize());
00992
00993 if (!plot_.empty() && workingMode_ != MakeLimit) {
00994 HypoTestPlot plot(*hcResult, 30);
00995 TCanvas *c1 = new TCanvas("c1","c1");
00996 plot.Draw();
00997 c1->Print(plot_.c_str());
00998 delete c1;
00999 }
01000 if (saveHybridResult_) {
01001 TString name = TString::Format("HypoTestResult_mh%g",mass_);
01002 RooLinkedListIter it = rVals.iterator();
01003 for (RooRealVar *rIn = (RooRealVar*) it.Next(); rIn != 0; rIn = (RooRealVar*) it.Next()) {
01004 name += Form("_%s%g", rIn->GetName(), rIn->getVal());
01005 }
01006 name += Form("_%u", RooRandom::integer(std::numeric_limits<UInt_t>::max() - 1));
01007 writeToysHere->WriteTObject(new HypoTestResult(*hcResult), name);
01008 if (verbose) std::cout << "Hybrid result saved as " << name << " in " << writeToysHere->GetFile()->GetName() << " : " << writeToysHere->GetPath() << std::endl;
01009 }
01010
01011 return cls;
01012 }
01013
01014 std::pair<double,double> HybridNew::eval(const RooStats::HypoTestResult &hcres, const RooAbsCollection & rVals)
01015 {
01016 double rVal = ((RooAbsReal*)rVals.first())->getVal();
01017 return eval(hcres,rVal);
01018 }
01019
01020 std::pair<double,double> HybridNew::eval(const RooStats::HypoTestResult &hcres, double rVal)
01021 {
01022 if (testStat_ == "LHCFC") {
01023 RooStats::SamplingDistribution * bDistribution = hcres.GetNullDistribution(), * sDistribution = hcres.GetAltDistribution();
01024 const std::vector<Double_t> & bdist = bDistribution->GetSamplingDistribution();
01025 const std::vector<Double_t> & bweight = bDistribution->GetSampleWeights();
01026 const std::vector<Double_t> & sdist = sDistribution->GetSamplingDistribution();
01027 const std::vector<Double_t> & sweight = sDistribution->GetSampleWeights();
01028 Double_t data = hcres.GetTestStatisticData();
01029 std::vector<Double_t> absbdist(bdist.size()), abssdist(sdist.size());
01030 std::vector<Double_t> absbweight(bweight), abssweight(sweight);
01031 Double_t absdata;
01032 if (rule_ == "FC") {
01033 for (int i = 0, n = absbdist.size(); i < n; ++i) absbdist[i] = fabs(bdist[i]);
01034 for (int i = 0, n = abssdist.size(); i < n; ++i) abssdist[i] = fabs(sdist[i]);
01035 absdata = fabs(data)-2*minimizerTolerance_;
01036 } else {
01037 for (int i = 0, n = absbdist.size(); i < n; ++i) absbdist[i] = max(0., bdist[i]);
01038 for (int i = 0, n = abssdist.size(); i < n; ++i) abssdist[i] = max(0., sdist[i]);
01039 absdata = max(0., data) - EPS;
01040 }
01041 if (rVal == 0) {
01042 abssdist.reserve(absbdist.size() + abssdist.size());
01043 abssdist.insert(abssdist.end(), absbdist.begin(), absbdist.end());
01044 abssweight.reserve(absbweight.size() + abssweight.size());
01045 abssweight.insert(abssweight.end(), absbweight.begin(), absbweight.end());
01046 }
01047 RooStats::HypoTestResult result;
01048 RooStats::SamplingDistribution *abssDist = new RooStats::SamplingDistribution("s","s",abssdist,abssweight);
01049 RooStats::SamplingDistribution *absbDist = new RooStats::SamplingDistribution("b","b",absbdist,absbweight);
01050 result.SetNullDistribution(absbDist);
01051 result.SetAltDistribution(abssDist);
01052 result.SetTestStatisticData(absdata);
01053 result.SetPValueIsRightTail(!result.GetPValueIsRightTail());
01054 if (CLs_) {
01055 return std::pair<double,double>(result.CLs(), result.CLsError());
01056 } else {
01057 return std::pair<double,double>(result.CLsplusb(), result.CLsplusbError());
01058 }
01059 } else {
01060 if (CLs_) {
01061 return std::pair<double,double>(hcres.CLs(), hcres.CLsError());
01062 } else {
01063 return std::pair<double,double>(hcres.CLsplusb(), hcres.CLsplusbError());
01064 }
01065 }
01066 }
01067
01068 void HybridNew::applyExpectedQuantile(RooStats::HypoTestResult &hcres) {
01069 if (expectedFromGrid_) {
01070 if (workingMode_ == MakeSignificance || workingMode_ == MakeSignificanceTestStatistics) {
01071 applySignalQuantile(hcres);
01072 } else if (clsQuantiles_) {
01073 applyClsQuantile(hcres);
01074 } else {
01075 std::vector<Double_t> btoys = hcres.GetNullDistribution()->GetSamplingDistribution();
01076 std::sort(btoys.begin(), btoys.end());
01077 Double_t testStat = btoys[std::min<int>(floor((1.-quantileForExpectedFromGrid_) * btoys.size()+0.5), btoys.size())];
01078 if (verbose > 0) std::cout << "Text statistics for " << quantileForExpectedFromGrid_ << " quantile: " << testStat << std::endl;
01079 hcres.SetTestStatisticData(testStat);
01080
01081 }
01082 }
01083 }
01084 void HybridNew::applyClsQuantile(RooStats::HypoTestResult &hcres) {
01085 RooStats::SamplingDistribution * bDistribution = hcres.GetNullDistribution(), * sDistribution = hcres.GetAltDistribution();
01086 const std::vector<Double_t> & bdist = bDistribution->GetSamplingDistribution();
01087 const std::vector<Double_t> & bweight = bDistribution->GetSampleWeights();
01088 const std::vector<Double_t> & sdist = sDistribution->GetSamplingDistribution();
01089 const std::vector<Double_t> & sweight = sDistribution->GetSampleWeights();
01090 TStopwatch timer;
01091
01093 timer.Start();
01094 std::vector<std::pair<double,double> > bcumul; bcumul.reserve(bdist.size());
01095 std::vector<std::pair<double,double> > scumul; scumul.reserve(sdist.size());
01096 double btot = 0, stot = 0;
01097 for (std::vector<Double_t>::const_iterator it = bdist.begin(), ed = bdist.end(), itw = bweight.begin(); it != ed; ++it, ++itw) {
01098 bcumul.push_back(std::pair<double,double>(*it, *itw));
01099 btot += *itw;
01100 }
01101 for (std::vector<Double_t>::const_iterator it = sdist.begin(), ed = sdist.end(), itw = sweight.begin(); it != ed; ++it, ++itw) {
01102 scumul.push_back(std::pair<double,double>(*it, *itw));
01103 stot += *itw;
01104 }
01105 double sinv = 1.0/stot, binv = 1.0/btot, runningSum;
01106
01107 std::sort(scumul.begin(), scumul.end());
01108 runningSum = 0;
01109 for (std::vector<std::pair<double,double> >::reverse_iterator it = scumul.rbegin(), ed = scumul.rend(); it != ed; ++it) {
01110 runningSum += it->second;
01111 it->second = runningSum * sinv;
01112 }
01113 std::sort(bcumul.begin(), bcumul.end());
01114 std::vector<std::pair<double,std::pair<double,double> > > xcumul; xcumul.reserve(bdist.size());
01115 runningSum = 0;
01116 std::vector<std::pair<double,double> >::const_iterator sbegin = scumul.begin(), send = scumul.end();
01117
01118 for (std::vector<std::pair<double,double> >::const_reverse_iterator it = bcumul.rbegin(), ed = bcumul.rend(); it != ed; ++it) {
01119 runningSum += it->second;
01120 std::vector<std::pair<double,double> >::const_iterator match = std::upper_bound(sbegin, send, std::pair<double,double>(it->first, 0));
01121 if (match == send) {
01122
01123 double clsb = (scumul.back().second > 0.5 ? 1.0 : 0.0), clb = runningSum*binv, cls = clsb / clb;
01124 xcumul.push_back(std::make_pair(CLs_ ? cls : clsb, *it));
01125 } else {
01126 double clsb = match->second, clb = runningSum*binv, cls = clsb / clb;
01127
01128 xcumul.push_back(std::make_pair(CLs_ ? cls : clsb, *it));
01129 }
01130 }
01131
01132 std::sort(xcumul.begin(), xcumul.end());
01133
01134 runningSum = 0; double cut = quantileForExpectedFromGrid_ * btot;
01135 for (std::vector<std::pair<double,std::pair<double,double> > >::const_iterator it = xcumul.begin(), ed = xcumul.end(); it != ed; ++it) {
01136 runningSum += it->second.second;
01137 if (runningSum >= cut) {
01138 hcres.SetTestStatisticData(it->second.first);
01139
01140 break;
01141 }
01142 }
01143
01144
01145 #if 0
01146
01147 timer.Start();
01148 std::vector<std::pair<double, double> > values(bdist.size());
01149 for (int i = 0, n = bdist.size(); i < n; ++i) {
01150 hcres.SetTestStatisticData( bdist[i] );
01151 values[i] = std::pair<double, double>(CLs_ ? hcres.CLs() : hcres.CLsplusb(), bdist[i]);
01152 }
01153 std::sort(values.begin(), values.end());
01154 int index = std::min<int>(floor((1.-quantileForExpectedFromGrid_) * values.size()+0.5), values.size());
01155 std::cout << "CLs quantile = " << values[index].first << " for test stat = " << values[index].second << std::endl;
01156 hcres.SetTestStatisticData(values[index].second);
01157 std::cout << "CLs quantile = " << (CLs_ ? hcres.CLs() : hcres.CLsplusb()) << " for test stat = " << values[index].second << std::endl;
01158 std::cout << "Computed quantiles in " << timer.RealTime() << " s" << std::endl;
01159 #endif
01160 }
01161
01162 void HybridNew::applySignalQuantile(RooStats::HypoTestResult &hcres) {
01163 std::vector<Double_t> stoys = hcres.GetAltDistribution()->GetSamplingDistribution();
01164 std::sort(stoys.begin(), stoys.end());
01165 Double_t testStat = stoys[std::min<int>(floor(quantileForExpectedFromGrid_ * stoys.size()+0.5), stoys.size())];
01166 if (verbose > 0) std::cout << "Text statistics for " << quantileForExpectedFromGrid_ << " quantile: " << testStat << std::endl;
01167 hcres.SetTestStatisticData(testStat);
01168 }
01169
01170 RooStats::HypoTestResult * HybridNew::evalGeneric(RooStats::HybridCalculator &hc, bool noFork) {
01171 if (fork_ && !noFork) return evalWithFork(hc);
01172 else {
01173 TStopwatch timer; timer.Start();
01174 RooStats::HypoTestResult * ret = hc.GetHypoTest();
01175 if (runtimedef::get("HybridNew_Timing")) std::cout << "Evaluated toys in " << timer.RealTime() << " s " << std::endl;
01176 return ret;
01177 }
01178 }
01179
01180 RooStats::HypoTestResult * HybridNew::evalWithFork(RooStats::HybridCalculator &hc) {
01181 TStopwatch timer;
01182 std::auto_ptr<RooStats::HypoTestResult> result(0);
01183 char *tmpfile = tempnam(NULL,"rstat");
01184 unsigned int ich = 0;
01185 std::vector<UInt_t> newSeeds(fork_);
01186 for (ich = 0; ich < fork_; ++ich) {
01187 newSeeds[ich] = RooRandom::integer(std::numeric_limits<UInt_t>::max()-1);
01188 if (!fork()) break;
01189 }
01190 if (ich == fork_) {
01191 int cstatus, ret;
01192 do {
01193 do { ret = waitpid(-1, &cstatus, 0); } while (ret == -1 && errno == EINTR);
01194 } while (ret != -1);
01195 if (ret == -1 && errno != ECHILD) throw std::runtime_error("Didn't wait for child");
01196 for (ich = 0; ich < fork_; ++ich) {
01197 TFile *f = TFile::Open(TString::Format("%s.%d.root", tmpfile, ich));
01198 if (f == 0) throw std::runtime_error(TString::Format("Child didn't leave output file %s.%d.root", tmpfile, ich).Data());
01199 RooStats::HypoTestResult *res = dynamic_cast<RooStats::HypoTestResult *>(f->Get("result"));
01200 if (res == 0) throw std::runtime_error(TString::Format("Child output file %s.%d.root is corrupted", tmpfile, ich).Data());
01201 if (result.get()) result->Append(res); else result.reset(dynamic_cast<RooStats::HypoTestResult *>(res->Clone()));
01202 f->Close();
01203 unlink(TString::Format("%s.%d.root", tmpfile, ich).Data());
01204 unlink(TString::Format("%s.%d.out.txt", tmpfile, ich).Data());
01205 unlink(TString::Format("%s.%d.err.txt", tmpfile, ich).Data());
01206 }
01207 } else {
01208 RooRandom::randomGenerator()->SetSeed(newSeeds[ich]);
01209 freopen(TString::Format("%s.%d.out.txt", tmpfile, ich).Data(), "w", stdout);
01210 freopen(TString::Format("%s.%d.err.txt", tmpfile, ich).Data(), "w", stderr);
01211 std::cout << " I'm child " << ich << ", seed " << newSeeds[ich] << std::endl;
01212 RooStats::HypoTestResult *hcResult = evalGeneric(hc, true);
01213 TFile *f = TFile::Open(TString::Format("%s.%d.root", tmpfile, ich), "RECREATE");
01214 f->WriteTObject(hcResult, "result");
01215 f->ls();
01216 f->Close();
01217 fflush(stdout); fflush(stderr);
01218 std::cout << "And I'm done" << std::endl;
01219 throw std::runtime_error("done");
01220
01221
01222 }
01223 free(tmpfile);
01224 if (verbose > 1) { std::cout << " Evaluation of p-values done in " << timer.RealTime() << " s" << std::endl; }
01225 return result.release();
01226 }
01227
01228 #if 0
01229
01230
01231
01232 RooStats::HypoTestResult * HybridNew::evalFrequentist(RooStats::HybridCalculator &hc) {
01233 int toysSB = (workingMode_ == MakeSignificance ? 1 : nToys_);
01234 int toysB = (workingMode_ == MakeSignificance ? nToys_ : (CLs_ ? nToys_/4+1 : 1));
01235 RooArgSet obs(*hc.GetAlternateModel()->GetObservables());
01236 RooArgSet nuis(*hc.GetAlternateModel()->GetNuisanceParameters());
01237 RooArgSet gobs(*hc.GetAlternateModel()->GetGlobalObservables());
01238 RooArgSet nullPoi(*hc.GetNullModel()->GetSnapshot());
01239 std::auto_ptr<RooAbsCollection> parS(hc.GetAlternateModel()->GetPdf()->getParameters(obs));
01240 std::auto_ptr<RooAbsCollection> parB(hc.GetNullModel()->GetPdf()->getParameters(obs));
01241 RooArgList constraintsS, constraintsB;
01242 RooAbsPdf *factorS = hc.GetAlternateModel()->GetPdf();
01243 RooAbsPdf *factorB = hc.GetNullModel()->GetPdf();
01244
01245
01246 std::auto_ptr<RooAbsPdf> nuisPdf(utils::makeNuisancePdf(const_cast<RooStats::ModelConfig&>(*hc.GetAlternateModel())));
01247 std::vector<Double_t> distSB, distB;
01248 *parS = *snapGlobalObs_;
01249 Double_t tsData = hc.GetTestStatSampler()->GetTestStatistic()->Evaluate(*realData_, nullPoi);
01250 if (verbose > 2) std::cout << "Test statistics on data: " << tsData << std::endl;
01251 for (int i = 0; i < toysSB; ++i) {
01252
01253 *parS = *hc.GetAlternateModel()->GetSnapshot();
01254
01255 if (verbose > 2) { std::cout << "Generating global obs starting from " << std::endl; parS->Print("V"); }
01256 std::auto_ptr<RooDataSet> gdata(nuisPdf->generate(gobs, 1));
01257 *parS = *gdata->get(0);
01258 if (verbose > 2) { std::cout << "Generated global obs" << std::endl; utils::printRAD(&*gdata); }
01259
01260 if (verbose > 2) { std::cout << "Generating obs starting from " << std::endl; parS->Print("V"); }
01261 std::auto_ptr<RooDataSet> data(factorS->generate(obs, RooFit::Extended()));
01262 if (verbose > 2) { std::cout << "Generated obs" << std::endl; utils::printRAD(&*data); }
01263
01264 distSB.push_back(hc.GetTestStatSampler()->GetTestStatistic()->Evaluate(*data, nullPoi));
01265
01266 }
01267 for (int i = 0; i < toysB; ++i) {
01268
01269 *parB = *hc.GetNullModel()->GetSnapshot();
01270
01271
01272 if (verbose > 2) { std::cout << "Generating global obs starting from " << std::endl; parB->Print("V"); }
01273 std::auto_ptr<RooDataSet> gdata(nuisPdf->generate(gobs, 1));
01274 *parB = *gdata->get(0);
01275 if (verbose > 2) { std::cout << "Generated global obs" << std::endl; utils::printRAD(&*gdata); }
01276
01277 if (verbose > 2) { std::cout << "Generating obs starting from " << std::endl; parB->Print("V"); }
01278 std::auto_ptr<RooDataSet> data(factorB->generate(obs, RooFit::Extended()));
01279 if (verbose > 2) { std::cout << "Generated obs" << std::endl; utils::printRAD(&*data); }
01280
01281 distB.push_back(hc.GetTestStatSampler()->GetTestStatistic()->Evaluate(*data, nullPoi));
01282
01283 }
01284
01285 RooStats::HypoTestResult *ret = new RooStats::HypoTestResult();
01286 ret->SetTestStatisticData(tsData);
01287 ret->SetAltDistribution(new RooStats::SamplingDistribution("sb","sb",distSB));
01288 ret->SetNullDistribution(new RooStats::SamplingDistribution("b","b",distB));
01289 return ret;
01290 }
01291 #endif
01292
01293 RooStats::HypoTestResult * HybridNew::readToysFromFile(const RooAbsCollection & rVals) {
01294 if (!readToysFromHere) throw std::logic_error("Cannot use readHypoTestResult: option toysFile not specified, or input file empty");
01295 TDirectory *toyDir = readToysFromHere->GetDirectory("toys");
01296 if (!toyDir) throw std::logic_error("Cannot use readHypoTestResult: empty toy dir in input file empty");
01297 if (verbose) std::cout << "Reading toys for ";
01298 TString prefix1 = TString::Format("HypoTestResult_mh%g",mass_);
01299 TString prefix2 = TString::Format("HypoTestResult");
01300 RooLinkedListIter it = rVals.iterator();
01301 for (RooRealVar *rIn = (RooRealVar*) it.Next(); rIn != 0; rIn = (RooRealVar*) it.Next()) {
01302 if (verbose) std::cout << rIn->GetName() << " = " << rIn->getVal() << " ";
01303 prefix1 += Form("_%s%g", rIn->GetName(), rIn->getVal());
01304 prefix2 += Form("_%s%g", rIn->GetName(), rIn->getVal());
01305 }
01306 if (verbose) std::cout << std::endl;
01307 std::auto_ptr<RooStats::HypoTestResult> ret;
01308 TIter next(toyDir->GetListOfKeys()); TKey *k;
01309 while ((k = (TKey *) next()) != 0) {
01310 if (TString(k->GetName()).Index(prefix1) != 0 && TString(k->GetName()).Index(prefix2) != 0) continue;
01311 RooStats::HypoTestResult *toy = dynamic_cast<RooStats::HypoTestResult *>(toyDir->Get(k->GetName()));
01312 if (toy == 0) continue;
01313 if (verbose > 1) std::cout << " - " << k->GetName() << std::endl;
01314 if (ret.get() == 0) {
01315 ret.reset(new RooStats::HypoTestResult(*toy));
01316 } else {
01317 ret->Append(toy);
01318 }
01319 }
01320
01321 if (ret.get() == 0) {
01322 std::cout << "ERROR: parameter point not found in input root file.\n";
01323 rVals.Print("V");
01324 if (verbose > 0) toyDir->ls();
01325 std::cout << "ERROR: parameter point not found in input root file" << std::endl;
01326 throw std::invalid_argument("Missing input");
01327 }
01328 if (verbose > 0) {
01329 std::cout <<
01330 "\tCLs = " << ret->CLs() << " +/- " << ret->CLsError() << "\n" <<
01331 "\tCLb = " << ret->CLb() << " +/- " << ret->CLbError() << "\n" <<
01332 "\tCLsplusb = " << ret->CLsplusb() << " +/- " << ret->CLsplusbError() << "\n" <<
01333 std::endl;
01334 if (!plot_.empty() && workingMode_ != MakeLimit) {
01335 HypoTestPlot plot(*ret, 30);
01336 TCanvas *c1 = new TCanvas("c1","c1");
01337 plot.Draw();
01338 c1->Print(plot_.c_str());
01339 delete c1;
01340 }
01341 }
01342 return ret.release();
01343 }
01344
01345 void HybridNew::readGrid(TDirectory *toyDir, double rMin, double rMax) {
01346 if (rValues_.getSize() != 1) throw std::runtime_error("Running limits with grid only works in one dimension for the moment");
01347 clearGrid();
01348
01349 TIter next(toyDir->GetListOfKeys()); TKey *k; const char *poiName = rValues_.first()->GetName();
01350 while ((k = (TKey *) next()) != 0) {
01351 TString name(k->GetName());
01352 if (name.Index("HypoTestResult_mh") == 0) {
01353 if (name.Index(TString::Format("HypoTestResult_mh%g_%s",mass_,poiName)) != 0 || name.Index("_", name.Index("_")+1) == -1) continue;
01354 name.ReplaceAll(TString::Format("HypoTestResult_mh%g_%s",mass_,poiName),"");
01355 if (name.Index("_") == -1) continue;
01356 name.Remove(name.Index("_"),name.Length());
01357 } else if (name.Index("HypoTestResult_") == 0) {
01358
01359 std::cout << "HybridNew::readGrid: HypoTestResult with non-conformant name " << name << " will be skipped" << std::endl;
01360 continue;
01361 } else continue;
01362 double rVal = atof(name.Data());
01363 if (rVal < rMin || rVal > rMax) continue;
01364 if (verbose > 2) std::cout << " Do " << k->GetName() << " -> " << name << " --> " << rVal << std::endl;
01365 RooStats::HypoTestResult *toy = dynamic_cast<RooStats::HypoTestResult *>(toyDir->Get(k->GetName()));
01366 RooStats::HypoTestResult *&merge = grid_[rVal];
01367 if (merge == 0) merge = new RooStats::HypoTestResult(*toy);
01368 else merge->Append(toy);
01369 merge->ResetBit(1);
01370 }
01371 if (verbose > 1) {
01372 std::cout << "GRID, as is." << std::endl;
01373 typedef std::map<double, RooStats::HypoTestResult *>::iterator point;
01374 for (point it = grid_.begin(), ed = grid_.end(); it != ed; ++it) {
01375 std::cout << " - " << it->first << " (CLs = " << it->second->CLs() << " +/- " << it->second->CLsError() << ")" << std::endl;
01376 }
01377 }
01378 }
01379 void HybridNew::updateGridData(RooWorkspace *w, RooStats::ModelConfig *mc_s, RooStats::ModelConfig *mc_b, RooAbsData &data, bool smart, double clsTarget_) {
01380 typedef std::map<double, RooStats::HypoTestResult *>::iterator point;
01381 if (!smart) {
01382 for (point it = grid_.begin(), ed = grid_.end(); it != ed; ++it) {
01383 it->second->ResetBit(1);
01384 updateGridPoint(w, mc_s, mc_b, data, it);
01385 }
01386 } else {
01387 typedef std::pair<double,double> CLs_t;
01388 std::vector<point> points; points.reserve(grid_.size());
01389 std::vector<CLs_t> values; values.reserve(grid_.size());
01390 for (point it = grid_.begin(), ed = grid_.end(); it != ed; ++it) { points.push_back(it); values.push_back(CLs_t(-99, -99)); }
01391 int iMin = 0, iMax = points.size()-1;
01392 while (iMax-iMin > 3) {
01393 if (verbose > 1) std::cout << "Bisecting range [" << iMin << ", " << iMax << "]" << std::endl;
01394 int iMid = (iMin+iMax)/2;
01395 CLs_t clsMid = values[iMid] = updateGridPoint(w, mc_s, mc_b, data, points[iMid]);
01396 if (verbose > 1) std::cout << " Midpoint " << iMid << " value " << clsMid.first << " +/- " << clsMid.second << std::endl;
01397 if (clsMid.first - 3*max(clsMid.second,0.01) > clsTarget_) {
01398 if (verbose > 1) std::cout << " Replacing Min" << std::endl;
01399 iMin = iMid; continue;
01400 } else if (clsMid.first + 3*max(clsMid.second,0.01) < clsTarget_) {
01401 if (verbose > 1) std::cout << " Replacing Max" << std::endl;
01402 iMax = iMid; continue;
01403 } else {
01404 if (verbose > 1) std::cout << " Tightening Range" << std::endl;
01405 while (iMin < iMid-1) {
01406 int iLo = (iMin+iMid)/2;
01407 CLs_t clsLo = values[iLo] = updateGridPoint(w, mc_s, mc_b, data, points[iLo]);
01408 if (verbose > 1) std::cout << " Lowpoint " << iLo << " value " << clsLo.first << " +/- " << clsLo.second << std::endl;
01409 if (clsLo.first - 3*max(clsLo.second,0.01) > clsTarget_) iMin = iLo;
01410 else break;
01411 }
01412 while (iMax > iMid+1) {
01413 int iHi = (iMax+iMid)/2;
01414 CLs_t clsHi = values[iHi] = updateGridPoint(w, mc_s, mc_b, data, points[iHi]);
01415 if (verbose > 1) std::cout << " Highpoint " << iHi << " value " << clsHi.first << " +/- " << clsHi.second << std::endl;
01416 if (clsHi.first + 3*max(clsHi.second,0.01) < clsTarget_) iMax = iHi;
01417 else break;
01418 }
01419 break;
01420 }
01421 }
01422 if (verbose > 1) std::cout << "Final range [" << iMin << ", " << iMax << "]" << std::endl;
01423 for (int i = 0; i < iMin; ++i) {
01424 points[i]->second->SetBit(1);
01425 if (verbose > 1) std::cout << " Will not use point " << i << " (r " << points[i]->first << ")" << std::endl;
01426 }
01427 for (int i = iMin; i <= iMax; ++i) {
01428 points[i]->second->ResetBit(1);
01429 if (values[i].first < -2) {
01430 if (verbose > 1) std::cout << " Updaing point " << i << " (r " << points[i]->first << ")" << std::endl;
01431 updateGridPoint(w, mc_s, mc_b, data, points[i]);
01432 }
01433 else if (verbose > 1) std::cout << " Point " << i << " (r " << points[i]->first << ") was already updated during search." << std::endl;
01434 }
01435 for (int i = iMax+1, n = points.size(); i < n; ++i) {
01436 points[i]->second->SetBit(1);
01437 if (verbose > 1) std::cout << " Will not use point " << i << " (r " << points[i]->first << ")" << std::endl;
01438 }
01439 }
01440 }
01441 void HybridNew::updateGridDataFC(RooWorkspace *w, RooStats::ModelConfig *mc_s, RooStats::ModelConfig *mc_b, RooAbsData &data, bool smart, double clsTarget_) {
01442 typedef std::map<double, RooStats::HypoTestResult *>::iterator point;
01443 std::vector<Double_t> rToUpdate; std::vector<point> pointToUpdate;
01444 for (point it = grid_.begin(), ed = grid_.end(); it != ed; ++it) {
01445 it->second->ResetBit(1);
01446 if (it->first == 0 || fabs(it->second->GetTestStatisticData()) <= 2*minimizerTolerance_) {
01447 rToUpdate.push_back(it->first);
01448 pointToUpdate.push_back(it);
01449 }
01450 }
01451 if (verbose > 0) std::cout << "A total of " << rToUpdate.size() << " points will be updated." << std::endl;
01452 Setup setup;
01453 std::auto_ptr<RooStats::HybridCalculator> hc = create(w, mc_s, mc_b, data, rToUpdate.back(), setup);
01454 RooArgSet nullPOI(*setup.modelConfig_bonly.GetSnapshot());
01455 std::vector<Double_t> qVals = ((ProfiledLikelihoodTestStatOpt&)(*setup.qvar)).Evaluate(data, nullPOI, rToUpdate);
01456 for (int i = 0, n = rToUpdate.size(); i < n; ++i) {
01457 pointToUpdate[i]->second->SetTestStatisticData(qVals[i] - EPS);
01458 }
01459 if (verbose > 0) std::cout << "All points have been updated." << std::endl;
01460 }
01461
01462 std::pair<double,double> HybridNew::updateGridPoint(RooWorkspace *w, RooStats::ModelConfig *mc_s, RooStats::ModelConfig *mc_b, RooAbsData &data, std::map<double, RooStats::HypoTestResult *>::iterator point) {
01463 typedef std::pair<double,double> CLs_t;
01464 bool isProfile = (testStat_ == "LHC" || testStat_ == "LHCFC" || testStat_ == "Profile");
01465 if (point->first == 0 && CLs_) return std::pair<double,double>(1,0);
01466 RooArgSet poi(*mc_s->GetParametersOfInterest());
01467 RooRealVar *r = dynamic_cast<RooRealVar *>(poi.first());
01468 if (expectedFromGrid_) {
01469 applyExpectedQuantile(*point->second);
01470 point->second->SetTestStatisticData(point->second->GetTestStatisticData() + (isProfile ? EPS : EPS));
01471 } else {
01472 Setup setup;
01473 std::auto_ptr<RooStats::HybridCalculator> hc = create(w, mc_s, mc_b, data, point->first, setup);
01474 RooArgSet nullPOI(*setup.modelConfig_bonly.GetSnapshot());
01475 if (isProfile) nullPOI.setRealValue(r->GetName(), point->first);
01476 double testStat = setup.qvar->Evaluate(data, nullPOI);
01477 point->second->SetTestStatisticData(testStat + (isProfile ? EPS : EPS));
01478 }
01479 if (verbose > 1) {
01480 std::cout << "At " << r->GetName() << " = " << point->first << ":\n" <<
01481 "\tCLs = " << point->second->CLs() << " +/- " << point->second->CLsError() << "\n" <<
01482 "\tCLb = " << point->second->CLb() << " +/- " << point->second->CLbError() << "\n" <<
01483 "\tCLsplusb = " << point->second->CLsplusb() << " +/- " << point->second->CLsplusbError() << "\n" <<
01484 std::endl;
01485 }
01486
01487 return eval(*point->second, point->first);
01488 }
01489 void HybridNew::useGrid() {
01490 typedef std::pair<double,double> CLs_t;
01491 int i = 0, n = 0;
01492 limitPlot_.reset(new TGraphErrors(1));
01493 for (std::map<double, RooStats::HypoTestResult *>::iterator itg = grid_.begin(), edg = grid_.end(); itg != edg; ++itg, ++i) {
01494 if (itg->second->TestBit(1)) continue;
01495 CLs_t val(1,0);
01496 if (CLs_) {
01497 if (itg->first > 0) val = eval(*itg->second, itg->first);
01498 } else {
01499 val = eval(*itg->second, itg->first);
01500 }
01501 if (val.first == -1) continue;
01502 if (val.second == 0 && (val.first != 1 && val.first != 0)) continue;
01503 limitPlot_->Set(n+1);
01504 limitPlot_->SetPoint( n, itg->first, val.first);
01505 limitPlot_->SetPointError(n, 0, val.second);
01506 n++;
01507 }
01508 }
01509 void HybridNew::clearGrid() {
01510 for (std::map<double, RooStats::HypoTestResult *>::iterator it = grid_.begin(), ed = grid_.end(); it != ed; ++it) {
01511 delete it->second;
01512 }
01513 grid_.clear();
01514 }
01515