00001 #include "../interface/ToyMCSamplerOpt.h"
00002 #include "../interface/utils.h"
00003 #include <memory>
00004 #include <stdexcept>
00005 #include <TH1.h>
00006 #include <TH2.h>
00007 #include <TH3.h>
00008 #include <RooSimultaneous.h>
00009 #include <RooRealVar.h>
00010 #include <RooProdPdf.h>
00011 #include <RooPoisson.h>
00012 #include <RooDataHist.h>
00013 #include <RooDataSet.h>
00014 #include <RooRandom.h>
00015 #include <../interface/ProfilingTools.h>
00016
00017 ToyMCSamplerOpt::ToyMCSamplerOpt(RooStats::TestStatistic& ts, Int_t ntoys, RooAbsPdf *globalObsPdf, bool generateNuisances) :
00018 ToyMCSampler(ts, ntoys),
00019 globalObsPdf_(globalObsPdf),
00020 globalObsValues_(0), globalObsIndex_(-1),
00021 nuisValues_(0), nuisIndex_(-1),
00022 weightVar_(0)
00023 {
00024 if (!generateNuisances) fPriorNuisance = 0;
00025 }
00026
00027
00028 ToyMCSamplerOpt::ToyMCSamplerOpt(const RooStats::ToyMCSampler &base) :
00029 ToyMCSampler(base),
00030 globalObsPdf_(0),
00031 globalObsValues_(0), globalObsIndex_(-1),
00032 weightVar_(0)
00033 {
00034 }
00035
00036 ToyMCSamplerOpt::ToyMCSamplerOpt(const ToyMCSamplerOpt &other) :
00037 ToyMCSampler(other),
00038 globalObsPdf_(0),
00039 globalObsValues_(0), globalObsIndex_(-1),
00040 weightVar_(0)
00041 {
00042 }
00043
00044 ToyMCSamplerOpt::~ToyMCSamplerOpt()
00045 {
00046 delete weightVar_;
00047 for (std::map<RooAbsPdf *, toymcoptutils::SimPdfGenInfo *>::iterator it = genCache_.begin(), ed = genCache_.end(); it != ed; ++it) {
00048 delete it->second;
00049 }
00050 genCache_.clear();
00051 delete _allVars; _allVars = 0;
00052 delete globalObsValues_;
00053 }
00054
00055
00056 toymcoptutils::SinglePdfGenInfo::SinglePdfGenInfo(RooAbsPdf &pdf, const RooArgSet& observables, bool preferBinned, const RooDataSet* protoData, int forceEvents) :
00057 mode_(pdf.canBeExtended() ? (preferBinned ? Binned : Unbinned) : Counting),
00058 pdf_(&pdf),
00059 spec_(0),histoSpec_(0),keepHistoSpec_(0),weightVar_(0)
00060 {
00061 if (pdf.canBeExtended()) {
00062 if (pdf.getAttribute("forceGenBinned")) mode_ = Binned;
00063 else if (pdf.getAttribute("forceGenPoisson")) mode_ = Poisson;
00064 else if (pdf.getAttribute("forceGenUnbinned")) mode_ = Unbinned;
00065 }
00066
00067 RooArgSet *obs = pdf.getObservables(observables);
00068 observables_.add(*obs);
00069 delete obs;
00070 if (mode_ == Binned) {
00071 if (runtimedef::get("TMCSO_GenBinned")) mode_ = BinnedNoWorkaround;
00072 else if (runtimedef::get("TMCSO_GenBinnedWorkaround")) mode_ = Binned;
00073 else mode_ = Poisson;
00074 } else if (mode_ == Unbinned) {
00075
00076
00077
00078
00079 }
00080 }
00081
00082 toymcoptutils::SinglePdfGenInfo::~SinglePdfGenInfo()
00083 {
00084 delete spec_;
00085 delete weightVar_;
00086 delete histoSpec_;
00087 }
00088
00089
00090 RooAbsData *
00091 toymcoptutils::SinglePdfGenInfo::generate(const RooDataSet* protoData, int forceEvents)
00092 {
00093 assert(forceEvents == 0 && "SinglePdfGenInfo: forceEvents must be zero at least for now");
00094 RooAbsData *ret = 0;
00095 switch (mode_) {
00096 case Unbinned:
00097 if (spec_ == 0) spec_ = protoData ? pdf_->prepareMultiGen(observables_, RooFit::Extended(), RooFit::ProtoData(*protoData, true, true))
00098 : pdf_->prepareMultiGen(observables_, RooFit::Extended());
00099 if (spec_) ret = pdf_->generate(*spec_);
00100 else ret = pdf_->generate(observables_, RooFit::Extended());
00101 break;
00102 case Binned:
00103 {
00104 RooDataSet *data = pdf_->generate(observables_, RooFit::Extended());
00105 ret = new RooDataHist(data->GetName(), "", *data->get(), *data);
00106 delete data;
00107 }
00108 break;
00109 case BinnedNoWorkaround:
00110 ret = protoData ? pdf_->generateBinned(observables_, RooFit::Extended(), RooFit::ProtoData(*protoData, true, true))
00111 : pdf_->generateBinned(observables_, RooFit::Extended());
00112 break;
00113 case Poisson:
00114 ret = generateWithHisto(weightVar_, false);
00115 break;
00116 case Counting:
00117 ret = pdf_->generate(observables_, 1);
00118 break;
00119 default:
00120 throw std::logic_error("Mode not foreseen in SinglePdfGenInfo::generate");
00121 }
00122
00123
00124 return ret;
00125 }
00126
00127 RooDataSet *
00128 toymcoptutils::SinglePdfGenInfo::generateAsimov(RooRealVar *&weightVar)
00129 {
00130 static int nPA = runtimedef::get("TMCSO_PseudoAsimov");
00131 if (nPA) return generatePseudoAsimov(weightVar, nPA);
00132 return generateWithHisto(weightVar, true);
00133 }
00134
00135 RooDataSet *
00136 toymcoptutils::SinglePdfGenInfo::generatePseudoAsimov(RooRealVar *&weightVar, int nPoints)
00137 {
00138 if (mode_ == Unbinned) {
00139 double expEvents = pdf_->expectedEvents(observables_);
00140 std::auto_ptr<RooDataSet> data(pdf_->generate(observables_, nPoints));
00141 if (weightVar == 0) weightVar = new RooRealVar("_weight_","",1.0);
00142 RooArgSet obsPlusW(observables_); obsPlusW.add(*weightVar);
00143 RooDataSet *rds = new RooDataSet(data->GetName(), "", obsPlusW, weightVar->GetName());
00144 for (int i = 0; i < nPoints; ++i) {
00145 observables_ = *data->get(i);
00146 rds->add(observables_, expEvents/nPoints);
00147 }
00148 return rds;
00149 } else {
00150 return generateWithHisto(weightVar, true);
00151 }
00152 }
00153
00154
00155 RooDataSet *
00156 toymcoptutils::SinglePdfGenInfo::generateWithHisto(RooRealVar *&weightVar, bool asimov)
00157 {
00158 if (mode_ == Counting) return generateCountingAsimov();
00159 if (observables_.getSize() > 3) throw std::invalid_argument(std::string("ERROR in SinglePdfGenInfo::generateWithHisto for ") + pdf_->GetName() + ", more than 3 observable");
00160 RooArgList obs(observables_);
00161 RooRealVar *x = (RooRealVar*)obs.at(0);
00162 RooRealVar *y = obs.getSize() > 1 ? (RooRealVar*)obs.at(1) : 0;
00163 RooRealVar *z = obs.getSize() > 2 ? (RooRealVar*)obs.at(2) : 0;
00164 if (weightVar == 0) weightVar = new RooRealVar("_weight_","",1.0);
00165
00166 RooCmdArg ay = (y ? RooFit::YVar(*y) : RooCmdArg::none());
00167 RooCmdArg az = (z ? RooFit::ZVar(*z) : RooCmdArg::none());
00168
00169 if (histoSpec_ == 0) {
00170 histoSpec_ = pdf_->createHistogram("htemp", *x, ay, az);
00171 histoSpec_->SetDirectory(0);
00172 }
00173
00174 double expectedEvents = pdf_->expectedEvents(observables_);
00175 histoSpec_->Scale(expectedEvents/ histoSpec_->Integral());
00176 RooArgSet obsPlusW(obs); obsPlusW.add(*weightVar);
00177 RooDataSet *data = new RooDataSet(TString::Format("%sData", pdf_->GetName()), "", obsPlusW, weightVar->GetName());
00178 RooAbsArg::setDirtyInhibit(true);
00179 switch (obs.getSize()) {
00180 case 1:
00181 for (int i = 1, n = histoSpec_->GetNbinsX(); i <= n; ++i) {
00182 x->setVal(histoSpec_->GetXaxis()->GetBinCenter(i));
00183 data->add(observables_, asimov ? histoSpec_->GetBinContent(i) : RooRandom::randomGenerator()->Poisson(histoSpec_->GetBinContent(i)) );
00184 }
00185 break;
00186 case 2:
00187 {
00188 TH2& h2 = dynamic_cast<TH2&>(*histoSpec_);
00189 for (int ix = 1, nx = h2.GetNbinsX(); ix <= nx; ++ix) {
00190 for (int iy = 1, ny = h2.GetNbinsY(); iy <= ny; ++iy) {
00191 x->setVal(h2.GetXaxis()->GetBinCenter(ix));
00192 y->setVal(h2.GetYaxis()->GetBinCenter(iy));
00193 data->add(observables_, asimov ? h2.GetBinContent(ix,iy) : RooRandom::randomGenerator()->Poisson(h2.GetBinContent(ix,iy)) );
00194 } }
00195 }
00196 break;
00197 case 3:
00198 {
00199 TH3& h3 = dynamic_cast<TH3&>(*histoSpec_);
00200 for (int ix = 1, nx = h3.GetNbinsX(); ix <= nx; ++ix) {
00201 for (int iy = 1, ny = h3.GetNbinsY(); iy <= ny; ++iy) {
00202 for (int iz = 1, nz = h3.GetNbinsZ(); iz <= nz; ++iz) {
00203 x->setVal(h3.GetXaxis()->GetBinCenter(ix));
00204 y->setVal(h3.GetYaxis()->GetBinCenter(iy));
00205 z->setVal(h3.GetYaxis()->GetBinCenter(iz));
00206 data->add(observables_, asimov ? h3.GetBinContent(ix,iy,iz) : RooRandom::randomGenerator()->Poisson(h3.GetBinContent(ix,iy,iz)) );
00207 } } }
00208 }
00209 }
00210 RooAbsArg::setDirtyInhibit(false);
00211 if (!keepHistoSpec_) { delete histoSpec_; histoSpec_ = 0; }
00212
00213
00214 return data;
00215 }
00216
00217
00218 RooDataSet *
00219 toymcoptutils::SinglePdfGenInfo::generateCountingAsimov()
00220 {
00221 RooArgSet obs(observables_);
00222 RooProdPdf *prod = dynamic_cast<RooProdPdf *>(pdf_);
00223 RooPoisson *pois = 0;
00224 if (prod != 0) {
00225 setToExpected(*prod, observables_);
00226 } else if ((pois = dynamic_cast<RooPoisson *>(pdf_)) != 0) {
00227 setToExpected(*pois, observables_);
00228 } else throw std::logic_error("A counting model pdf must be either a RooProdPdf or a RooPoisson");
00229 RooDataSet *ret = new RooDataSet(TString::Format("%sData", pdf_->GetName()), "", obs);
00230 ret->add(obs);
00231 return ret;
00232 }
00233
00234 void
00235 toymcoptutils::SinglePdfGenInfo::setToExpected(RooProdPdf &prod, RooArgSet &obs)
00236 {
00237 std::auto_ptr<TIterator> iter(prod.pdfList().createIterator());
00238 for (RooAbsArg *a = (RooAbsArg *) iter->Next(); a != 0; a = (RooAbsArg *) iter->Next()) {
00239 if (!a->dependsOn(obs)) continue;
00240 RooPoisson *pois = 0;
00241 if ((pois = dynamic_cast<RooPoisson *>(a)) != 0) {
00242 setToExpected(*pois, obs);
00243 } else {
00244 RooProdPdf *subprod = dynamic_cast<RooProdPdf *>(a);
00245 if (subprod) setToExpected(*subprod, obs);
00246 else throw std::logic_error("Illegal term in counting model: depends on observables, but not Poisson or Product");
00247 }
00248 }
00249 }
00250
00251 void
00252 toymcoptutils::SinglePdfGenInfo::setToExpected(RooPoisson &pois, RooArgSet &obs)
00253 {
00254 RooRealVar *myobs = 0;
00255 RooAbsReal *myexp = 0;
00256 std::auto_ptr<TIterator> iter(pois.serverIterator());
00257 for (RooAbsArg *a = (RooAbsArg *) iter->Next(); a != 0; a = (RooAbsArg *) iter->Next()) {
00258 if (obs.contains(*a)) {
00259 assert(myobs == 0 && "SinglePdfGenInfo::setToExpected(RooPoisson): Two observables??");
00260 myobs = dynamic_cast<RooRealVar *>(a);
00261 assert(myobs != 0 && "SinglePdfGenInfo::setToExpected(RooPoisson): Observables is not a RooRealVar??");
00262 } else {
00263 assert(myexp == 0 && "SinglePdfGenInfo::setToExpected(RooPoisson): Two expecteds??");
00264 myexp = dynamic_cast<RooAbsReal *>(a);
00265 assert(myexp != 0 && "SinglePdfGenInfo::setToExpected(RooPoisson): Expectedis not a RooAbsReal??");
00266 }
00267 }
00268 assert(myobs != 0 && "SinglePdfGenInfo::setToExpected(RooPoisson): No observable?");
00269 assert(myexp != 0 && "SinglePdfGenInfo::setToExpected(RooPoisson): No expected?");
00270 myobs->setVal(myexp->getVal());
00271 }
00272
00273 toymcoptutils::SimPdfGenInfo::SimPdfGenInfo(RooAbsPdf &pdf, const RooArgSet& observables, bool preferBinned, const RooDataSet* protoData, int forceEvents) :
00274 pdf_(&pdf),
00275 cat_(0),
00276 observables_(observables),
00277 copyData_(true)
00278 {
00279 assert(forceEvents == 0 && "SimPdfGenInfo: forceEvents must be zero at least for now");
00280 RooSimultaneous *simPdf = dynamic_cast<RooSimultaneous *>(&pdf);
00281 if (simPdf) {
00282 cat_ = const_cast<RooAbsCategoryLValue *>(&simPdf->indexCat());
00283 int nbins = cat_->numBins((const char *)0);
00284 pdfs_.resize(nbins, 0);
00285 RooArgList dummy;
00286 for (int ic = 0; ic < nbins; ++ic) {
00287 cat_->setBin(ic);
00288 RooAbsPdf *pdfi = simPdf->getPdf(cat_->getLabel());
00289 if (pdfi == 0) throw std::logic_error(std::string("Unmapped category state: ") + cat_->getLabel());
00290 RooAbsPdf *newpdf = utils::factorizePdf(observables, *pdfi, dummy);
00291 pdfs_[ic] = new SinglePdfGenInfo(*newpdf, observables, preferBinned);
00292 if (newpdf != 0 && newpdf != pdfi) {
00293 ownedCrap_.addOwned(*newpdf);
00294 }
00295 }
00296 } else {
00297 pdfs_.push_back(new SinglePdfGenInfo(pdf, observables, preferBinned, protoData, forceEvents));
00298 }
00299 }
00300
00301 toymcoptutils::SimPdfGenInfo::~SimPdfGenInfo()
00302 {
00303 for (std::vector<SinglePdfGenInfo *>::iterator it = pdfs_.begin(), ed = pdfs_.end(); it != ed; ++it) {
00304 delete *it;
00305 }
00306 pdfs_.clear();
00307
00308 for (std::map<std::string,RooAbsData*>::iterator it = datasetPieces_.begin(), ed = datasetPieces_.end(); it != ed; ++it) {
00309 delete it->second;
00310 }
00311 datasetPieces_.clear();
00312 }
00313
00314
00315 RooAbsData *
00316 toymcoptutils::SimPdfGenInfo::generate(RooRealVar *&weightVar, const RooDataSet* protoData, int forceEvents)
00317 {
00318 RooAbsData *ret = 0;
00319 TString retName = TString::Format("%sData", pdf_->GetName());
00320 if (cat_ != 0) {
00321
00322 for (int i = 0, n = cat_->numBins((const char *)0); i < n; ++i) {
00323 if (pdfs_[i] == 0) continue;
00324 cat_->setBin(i);
00325 RooAbsData *&data = datasetPieces_[cat_->getLabel()]; delete data;
00326 assert(protoData == 0);
00327 data = pdfs_[i]->generate(protoData);
00328 if (data->isWeighted()) {
00329 if (weightVar == 0) weightVar = new RooRealVar("_weight_","",1.0);
00330 RooArgSet obs(*data->get());
00331 obs.add(*weightVar);
00332 RooDataSet *wdata = new RooDataSet(data->GetName(), "", obs, "_weight_");
00333 for (int i = 0, n = data->numEntries(); i < n; ++i) {
00334 obs = *data->get(i);
00335 if (data->weight()) wdata->add(obs, data->weight());
00336 }
00337
00338 delete data;
00339 data = wdata;
00340
00341 }
00342
00343 }
00344 if (copyData_) {
00346 RooArgSet vars(observables_), varsPlusWeight(observables_);
00347 if (weightVar) varsPlusWeight.add(*weightVar);
00348 ret = new RooDataSet(retName, "", varsPlusWeight, (weightVar ? weightVar->GetName() : 0));
00349 for (std::map<std::string,RooAbsData*>::iterator it = datasetPieces_.begin(), ed = datasetPieces_.end(); it != ed; ++it) {
00350 cat_->setLabel(it->first.c_str());
00351 for (unsigned int i = 0, n = it->second->numEntries(); i < n; ++i) {
00352 vars = *it->second->get(i);
00353 ret->add(vars, it->second->weight());
00354 }
00355 }
00356 } else {
00357
00358
00359
00360 ret = new RooDataSet(retName, "", observables_, RooFit::Index((RooCategory&)*cat_), RooFit::Link(datasetPieces_) );
00361 }
00362 } else ret = pdfs_[0]->generate(protoData, forceEvents);
00363
00364 return ret;
00365 }
00366
00367 RooAbsData *
00368 toymcoptutils::SimPdfGenInfo::generateAsimov(RooRealVar *&weightVar)
00369 {
00370 RooAbsData *ret = 0;
00371 TString retName = TString::Format("%sData", pdf_->GetName());
00372 if (cat_ != 0) {
00373
00374 for (int i = 0, n = cat_->numBins((const char *)0); i < n; ++i) {
00375 if (pdfs_[i] == 0) continue;
00376 cat_->setBin(i);
00377 RooAbsData *&data = datasetPieces_[cat_->getLabel()]; delete data;
00378 data = pdfs_[i]->generateAsimov(weightVar);
00379 }
00380 if (copyData_) {
00381 RooArgSet vars(observables_), varsPlusWeight(observables_); varsPlusWeight.add(*weightVar);
00382 ret = new RooDataSet(retName, "", varsPlusWeight, (weightVar ? weightVar->GetName() : 0));
00383 for (std::map<std::string,RooAbsData*>::iterator it = datasetPieces_.begin(), ed = datasetPieces_.end(); it != ed; ++it) {
00384 cat_->setLabel(it->first.c_str());
00385 for (unsigned int i = 0, n = it->second->numEntries(); i < n; ++i) {
00386 vars = *it->second->get(i);
00387 ret->add(vars, it->second->weight());
00388 }
00389 }
00390 } else {
00391
00392
00393
00394 ret = new RooDataSet(retName, "", observables_, RooFit::Index((RooCategory&)*cat_), RooFit::Link(datasetPieces_) );
00395 }
00396 } else ret = pdfs_[0]->generateAsimov(weightVar);
00397
00398
00399 return ret;
00400 }
00401
00402 void
00403 toymcoptutils::SimPdfGenInfo::setCacheTemplates(bool cache)
00404 {
00405 for (std::vector<SinglePdfGenInfo *>::const_iterator it = pdfs_.begin(), ed = pdfs_.end(); it != ed; ++it) {
00406 if (*it) (*it)->setCacheTemplates(cache);
00407 }
00408 }
00409
00410 void
00411 ToyMCSamplerOpt::SetPdf(RooAbsPdf& pdf)
00412 {
00413 ToyMCSampler::SetPdf(pdf);
00414 delete _allVars; _allVars = 0;
00415 delete globalObsValues_; globalObsValues_ = 0; globalObsIndex_ = -1;
00416 delete nuisValues_; nuisValues_ = 0; nuisIndex_ = -1;
00417 }
00418
00419 RooAbsData* ToyMCSamplerOpt::GenerateToyData(RooArgSet& , double& weight) const {
00420 weight = 1;
00421
00422
00423
00424 if (fObservables == NULL) {
00425 ooccoutE((TObject*)NULL,InputArguments) << "Observables not set." << endl;
00426 return 0;
00427 }
00428
00429
00430 RooArgSet saveNuis;
00431 if(fPriorNuisance && fNuisancePars && fNuisancePars->getSize() > 0) {
00432 if (nuisValues_ == 0 || nuisIndex_ == nuisValues_->numEntries()) {
00433 delete nuisValues_;
00434 nuisValues_ = fPriorNuisance->generate(*fNuisancePars, fNToys);
00435 nuisIndex_ = 0;
00436 }
00437 fNuisancePars->snapshot(saveNuis);
00438 const RooArgSet *values = nuisValues_->get(nuisIndex_++);
00439 RooArgSet pars(*fNuisancePars); pars = *values;
00440 }
00441
00442 RooArgSet observables(*fObservables);
00443 if(fGlobalObservables && fGlobalObservables->getSize()) {
00444 observables.remove(*fGlobalObservables);
00445
00446
00447 assert(globalObsPdf_);
00448 if (globalObsValues_ == 0 || globalObsIndex_ == globalObsValues_->numEntries()) {
00449 delete globalObsValues_;
00450 globalObsValues_ = (globalObsPdf_ ? globalObsPdf_ : fPdf)->generate(*fGlobalObservables, fNToys);
00451 globalObsIndex_ = 0;
00452 }
00453 const RooArgSet *values = globalObsValues_->get(globalObsIndex_++);
00454 if (!_allVars) _allVars = fPdf->getObservables(*fGlobalObservables);
00455 *_allVars = *values;
00456 }
00457
00458 RooAbsData* data = NULL;
00459
00460 if(!fImportanceDensity) {
00461
00462 data = Generate(*fPdf, observables);
00463 }else{
00464 throw std::runtime_error("No importance sampling yet");
00465
00466 RooArgSet* allVars = fPdf->getVariables();
00467 RooArgSet* allVars2 = fImportanceDensity->getVariables();
00468 allVars->add(*allVars2);
00469 const RooArgSet* saveVars = (const RooArgSet*)allVars->snapshot();
00470
00471
00472
00473
00474 int forceEvents = 0;
00475 if(fNEvents == 0) {
00476 forceEvents = (int)fPdf->expectedEvents(observables);
00477 forceEvents = RooRandom::randomGenerator()->Poisson(forceEvents);
00478 }
00479
00480
00481
00482 if(fImportanceSnapshot) *allVars = *fImportanceSnapshot;
00483
00484
00485
00486
00487 data = Generate(*fImportanceDensity, observables, NULL, forceEvents);
00488
00489 *allVars = *saveVars;
00490 delete allVars;
00491 delete allVars2;
00492 delete saveVars;
00493 }
00494
00495 if (saveNuis.getSize()) { RooArgSet pars(*fNuisancePars); pars = saveNuis; }
00496 return data;
00497 }
00498
00499
00500 RooAbsData *
00501 ToyMCSamplerOpt::Generate(RooAbsPdf& pdf, RooArgSet& observables, const RooDataSet* protoData, int forceEvents) const
00502 {
00503 if(fProtoData) {
00504 protoData = fProtoData;
00505 forceEvents = protoData->numEntries();
00506 }
00507 int events = forceEvents;
00508 if (events == 0) events = fNEvents;
00509 if (events != 0) {
00510 assert(events == 1);
00511 assert(protoData == 0);
00512 RooAbsData *ret = pdf.generate(observables, events);
00513 return ret;
00514 }
00515 toymcoptutils::SimPdfGenInfo *& info = genCache_[&pdf];
00516 if (info == 0) {
00517 info = new toymcoptutils::SimPdfGenInfo(pdf, observables, fGenerateBinned, protoData, forceEvents);
00518 info->setCopyData(false);
00519 if (!fPriorNuisance) info->setCacheTemplates(true);
00520 }
00521 return info->generate(weightVar_, protoData, forceEvents);
00522 }