CMS 3D CMS Logo

/afs/cern.ch/work/a/aaltunda/public/www/CMSSW_5_3_14/src/HiggsAnalysis/CombinedLimit/src/CachingNLL.cc

Go to the documentation of this file.
00001 #include "../interface/CachingNLL.h"
00002 #include "../interface/utils.h"
00003 #include <stdexcept>
00004 #include <RooCategory.h>
00005 #include <RooDataSet.h>
00006 #include <RooProduct.h>
00007 #include "../interface/ProfilingTools.h"
00008 
00009 //---- Uncomment this to get a '.' printed every some evals
00010 //#define TRACE_NLL_EVALS
00011 
00012 //---- Uncomment this and run with --perfCounters to get cache statistics
00013 //#define DEBUG_CACHE
00014 
00015 //---- Uncomment to enable Kahan's summation (if enabled at runtime with --X-rtd = ...
00016 // http://en.wikipedia.org/wiki/Kahan_summation_algorithm
00017 //#define ADDNLL_KAHAN_SUM
00018 #include "../interface/ProfilingTools.h"
00019 
00020 //std::map<std::string,double> cacheutils::CachingAddNLL::offsets_;
00021 bool cacheutils::CachingSimNLL::noDeepLEE_ = false;
00022 bool cacheutils::CachingSimNLL::hasError_  = false;
00023 
00024 //#define DEBUG_TRACE_POINTS
00025 #ifdef DEBUG_TRACE_POINTS
00026 namespace { 
00027     template<unsigned int>
00028     void tracePoint(const RooAbsCollection &point) {
00029         static const RooAbsCollection *lastPoint = 0;
00030         static std::vector<double> values;
00031         if (&point != lastPoint) {
00032             std::cout << "Arrived in a completely new point. " << std::endl;
00033             values.resize(point.getSize());
00034             RooLinkedListIter iter = point.iterator();
00035             for (RooAbsArg *a  = (RooAbsArg*)(iter.Next()); a != 0; a  = (RooAbsArg*)(iter.Next())) {
00036                 RooRealVar *rrv = dynamic_cast<RooRealVar *>(a); if (!rrv) continue;
00037                 values.push_back(rrv->getVal());
00038             }
00039             lastPoint = &point;
00040         } else {
00041             std::cout << "Moved: ";
00042             RooLinkedListIter iter = point.iterator();
00043             int i = 0;
00044             for (RooAbsArg *a  = (RooAbsArg*)(iter.Next()); a != 0; a  = (RooAbsArg*)(iter.Next())) {
00045                 RooRealVar *rrv = dynamic_cast<RooRealVar *>(a); if (!rrv) continue;
00046                 if (values[i] != rrv->getVal()) std::cout << a->GetName() << " " << values[i] << " => " << rrv->getVal() << "    "; 
00047                 values[i++] = rrv->getVal();
00048             }
00049             std::cout << std::endl;
00050         }
00051     }
00052 }
00053 #define TRACE_POINT2(x,i)  ::tracePoint<i>(x);
00054 #define TRACE_POINT(x)  ::tracePoint<0>(x);
00055 #define TRACE_NLL(x)    std::cout << x << std::endl;
00056 #else
00057 #define TRACE_POINT2(x,i)
00058 #define TRACE_POINT(x) 
00059 #define TRACE_NLL(x) 
00060 #endif
00061 
00062 cacheutils::ArgSetChecker::ArgSetChecker(const RooAbsCollection &set) 
00063 {
00064     std::auto_ptr<TIterator> iter(set.createIterator());
00065     for (RooAbsArg *a  = dynamic_cast<RooAbsArg *>(iter->Next()); 
00066                     a != 0; 
00067                     a  = dynamic_cast<RooAbsArg *>(iter->Next())) {
00068         RooRealVar *rrv = dynamic_cast<RooRealVar *>(a);
00069         if (rrv) { // && !rrv->isConstant()) { 
00070             vars_.push_back(rrv);
00071             vals_.push_back(rrv->getVal());
00072         }
00073     }
00074 }
00075 
00076 bool 
00077 cacheutils::ArgSetChecker::changed(bool updateIfChanged) 
00078 {
00079     std::vector<RooRealVar *>::const_iterator it = vars_.begin(), ed = vars_.end();
00080     std::vector<double>::iterator itv = vals_.begin();
00081     bool changed = false;
00082     for ( ; it != ed; ++it, ++itv) {
00083         double val = (*it)->getVal();
00084         if (val != *itv) { 
00085             //std::cerr << "var::CachingPdfable " << (*it)->GetName() << " changed: " << *itv << " -> " << val << std::endl;
00086             changed = true; 
00087             if (updateIfChanged) { *itv = val; }
00088             else break;
00089         }
00090     }
00091     return changed;
00092 }
00093 
00094 cacheutils::ValuesCache::ValuesCache(const RooAbsCollection &params, int size) :
00095     size_(1),
00096     maxSize_(size)
00097 {
00098     assert(size <= MaxItems_);
00099     items[0] = new Item(params);
00100 }
00101 cacheutils::ValuesCache::ValuesCache(const RooAbsReal &pdf, const RooArgSet &obs, int size) :
00102     size_(1),
00103     maxSize_(size)
00104 {
00105     assert(size <= MaxItems_);
00106     std::auto_ptr<RooArgSet> params(pdf.getParameters(obs));
00107     items[0] = new Item(*params);
00108 }
00109 
00110 
00111 cacheutils::ValuesCache::~ValuesCache() 
00112 {
00113     for (int i = 0; i < size_; ++i) delete items[i];
00114 }
00115 
00116 void cacheutils::ValuesCache::clear() 
00117 {
00118     for (int i = 0; i < size_; ++i) items[i]->good = false;
00119 }
00120 
00121 std::pair<std::vector<Double_t> *, bool> cacheutils::ValuesCache::get() 
00122 {
00123     int found = -1; bool good = false;
00124     for (int i = 0; i < size_; ++i) {
00125         if (items[i]->good) {
00126             // valid entry, check if fresh
00127             if (!items[i]->checker.changed()) {
00128 #ifdef DEBUG_CACHE
00129                 PerfCounter::add(i == 0 ? "ValuesCache::get hit first" : "ValuesCache::get hit other");
00130 #endif
00131                 // fresh: done! 
00132                 found = i; 
00133                 good = true; 
00134                 break;
00135             } 
00136         } else if (found == -1) {
00137             // invalid entry, can be replaced
00138             found = i;
00139 #ifdef DEBUG_CACHE
00140             PerfCounter::add("ValuesCache::get hit invalid");
00141 #endif
00142         }
00143     } 
00144     if (found == -1) {
00145         // all entries are valid but old 
00146 #ifdef DEBUG_CACHE
00147         PerfCounter::add("ValuesCache::get miss");
00148 #endif
00149         if (size_ < maxSize_) {
00150             // if I can, make a new entry
00151             items[size_] = new Item(items[0]->checker); // create a new item, copying the ArgSetChecker from the first one
00152             found = size_; 
00153             size_++;
00154         } else {
00155             // otherwise, pick the last one
00156             found = size_-1;
00157         }
00158     }
00159     // make sure new entry is the first one
00160     if (found != 0) {
00161         // remember what found is pointing to
00162         Item *f = items[found];
00163         // shift the other items down one place
00164         while (found > 0) { items[found] = items[found-1]; --found; } 
00165         // and put found on top
00166         items[found] = f;
00167     }
00168     if (!good) items[found]->checker.changed(true); // store new values in cache sentry
00169     items[found]->good = true;                      // mark this as valid entry
00170     return std::pair<std::vector<Double_t> *, bool>(&items[found]->values, good);
00171 }
00172 
00173 cacheutils::CachingPdf::CachingPdf(RooAbsReal *pdf, const RooArgSet *obs) :
00174     obs_(obs),
00175     pdfOriginal_(pdf),
00176     pdfPieces_(),
00177     pdf_(utils::fullCloneFunc(pdf, pdfPieces_)),
00178     lastData_(0),
00179     cache_(*pdf_,*obs_)
00180 {
00181 }
00182 
00183 cacheutils::CachingPdf::CachingPdf(const CachingPdf &other) :
00184     obs_(other.obs_),
00185     pdfOriginal_(other.pdfOriginal_),
00186     pdfPieces_(),
00187     pdf_(utils::fullCloneFunc(pdfOriginal_, pdfPieces_)),
00188     lastData_(0),
00189     cache_(*pdf_,*obs_)
00190 {
00191 }
00192 
00193 cacheutils::CachingPdf::~CachingPdf() 
00194 {
00195 }
00196 
00197 const std::vector<Double_t> & 
00198 cacheutils::CachingPdf::eval(const RooAbsData &data) 
00199 {
00200 #ifdef DEBUG_CACHE
00201     PerfCounter::add("CachingPdf::eval called");
00202 #endif
00203     bool newdata = (lastData_ != &data);
00204     if (newdata) {
00205         lastData_ = &data;
00206         pdf_->optimizeCacheMode(*data.get());
00207         pdf_->attachDataSet(data);
00208         const_cast<RooAbsData*>(lastData_)->setDirtyProp(false);
00209         cache_.clear();
00210     }
00211     std::pair<std::vector<Double_t> *, bool> hit = cache_.get();
00212     if (!hit.second) {
00213         realFill_(data, *hit.first);
00214     } 
00215     return *hit.first;
00216 }
00217 
00218 void
00219 cacheutils::CachingPdf::realFill_(const RooAbsData &data, std::vector<Double_t> &vals) 
00220 {
00221 #ifdef DEBUG_CACHE
00222     PerfCounter::add("CachingPdf::realFill_ called");
00223 #endif
00224     int n = data.numEntries();
00225     vals.resize(n); // should be a no-op if size is already >= n.
00226     //std::auto_ptr<RooArgSet> params(pdf_->getObservables(*obs)); // for non-smart handling of pointers
00227     std::vector<Double_t>::iterator itv = vals.begin();
00228     for (int i = 0; i < n; ++i, ++itv) {
00229         data.get(i);
00230         //*params = *data.get(i); // for non-smart handling of pointers
00231         *itv = pdf_->getVal(obs_);
00232         //std::cout << " at i = " << i << " pdf = " << *itv << std::endl;
00233         TRACE_NLL("PDF value for " << pdf_->GetName() << " is " << *itv << " at this point.") 
00234         TRACE_POINT2(*obs_,1)
00235     }
00236 }
00237 
00238 cacheutils::CachingAddNLL::CachingAddNLL(const char *name, const char *title, RooAbsPdf *pdf, RooAbsData *data) :
00239     RooAbsReal(name, title),
00240     pdf_(pdf),
00241     params_("params","parameters",this),
00242     zeroPoint_(0)
00243 {
00244     if (pdf == 0) throw std::invalid_argument(std::string("Pdf passed to ")+name+" is null");
00245     setData(*data);
00246     setup_();
00247 }
00248 
00249 cacheutils::CachingAddNLL::CachingAddNLL(const CachingAddNLL &other, const char *name) :
00250     RooAbsReal(name ? name : (TString("nll_")+other.pdf_->GetName()).Data(), ""),
00251     pdf_(other.pdf_),
00252     params_("params","parameters",this),
00253     zeroPoint_(0)
00254 {
00255     setData(*other.data_);
00256     setup_();
00257 }
00258 
00259 cacheutils::CachingAddNLL::~CachingAddNLL() 
00260 {
00261     for (int i = 0, n = integrals_.size(); i < n; ++i) delete integrals_[i];
00262     integrals_.clear();
00263 }
00264 
00265 cacheutils::CachingAddNLL *
00266 cacheutils::CachingAddNLL::clone(const char *name) const 
00267 {
00268     return new cacheutils::CachingAddNLL(*this, name);
00269 }
00270 
00271 void
00272 cacheutils::CachingAddNLL::setup_() 
00273 {
00274     const RooArgSet *obs = data_->get();
00275     for (int i = 0, n = integrals_.size(); i < n; ++i) delete integrals_[i];
00276     integrals_.clear();
00277     RooAddPdf *addpdf = 0;
00278     RooRealSumPdf *sumpdf = 0;
00279     if ((addpdf = dynamic_cast<RooAddPdf *>(pdf_)) != 0) {
00280         isRooRealSum_ = false;
00281         int npdf = addpdf->coefList().getSize();
00282         coeffs_.reserve(npdf);
00283         pdfs_.reserve(npdf);
00284         for (int i = 0; i < npdf; ++i) {
00285             RooAbsReal * coeff = dynamic_cast<RooAbsReal*>(addpdf->coefList().at(i));
00286             RooAbsPdf  * pdfi  = dynamic_cast<RooAbsPdf *>(addpdf->pdfList().at(i));
00287             coeffs_.push_back(coeff);
00288             pdfs_.push_back(CachingPdf(pdfi, obs));
00289         }
00290     } else if ((sumpdf = dynamic_cast<RooRealSumPdf *>(pdf_)) != 0) {
00291         isRooRealSum_ = true;
00292         int npdf = sumpdf->coefList().getSize();
00293         coeffs_.reserve(npdf);
00294         pdfs_.reserve(npdf);
00295         integrals_.reserve(npdf);
00296         for (int i = 0; i < npdf; ++i) {
00297             RooAbsReal * coeff = dynamic_cast<RooAbsReal*>(sumpdf->coefList().at(i));
00298             RooAbsReal * funci = dynamic_cast<RooAbsReal*>(sumpdf->funcList().at(i));
00300             if (0 && typeid(*funci) == typeid(RooProduct)) {
00301                 RooArgList obsDep, obsInd;
00302                 obsInd.add(*coeff);
00303                 utils::factorizeFunc(*obs, *funci, obsDep, obsInd);
00304                 std::cout << "Entry " << i << ": coef name " << (coeff ? coeff->GetName()   : "null") << 
00305                                               "  type " << (coeff ? coeff->ClassName() :  "n/a") << std::endl;
00306                 std::cout << "       " <<     "; func name " << (funci ? funci->GetName()   : "null") << 
00307                                               "  type " << (funci ? funci->ClassName() :  "n/a") << std::endl;
00308                 std::cout << "Terms depending on observables: " << std::endl; obsDep.Print("V");
00309                 std::cout << "Terms not depending on observables: " << std::endl; obsInd.Print("V");
00310                 if (obsInd.getSize() > 1) {
00311                     coeff = new RooProduct(TString::Format("%s_x_%s_obsIndep", coeff->GetName(), funci->GetName()), "", RooArgSet(obsInd));
00312                     addOwnedComponents(RooArgSet(*coeff));
00313                 }
00314                 if (obsDep.getSize() > 1) {
00315                     funci = new RooProduct(TString::Format("%s_obsDep", funci->GetName()), "", RooArgSet(obsInd));
00316                     addOwnedComponents(RooArgSet(*funci));
00317                 } else if (obsDep.getSize() == 1) {
00318                     funci = (RooAbsReal *) obsDep.first();
00319                 } else throw std::logic_error("No part of pdf depends on observables?");
00320             }
00321             coeffs_.push_back(coeff);
00322             pdfs_.push_back(CachingPdf(funci, obs));
00323             integrals_.push_back(funci->createIntegral(*obs));
00324         }
00325     } else {
00326         std::string errmsg = "ERROR: CachingAddNLL: Pdf ";
00327         errmsg += pdf_->GetName();
00328         errmsg += " is neither a RooAddPdf nor a RooRealSumPdf, but a ";
00329         errmsg += pdf_->ClassName();
00330         throw std::invalid_argument(errmsg);
00331     }
00332 
00333     std::auto_ptr<RooArgSet> params(pdf_->getParameters(*data_));
00334     std::auto_ptr<TIterator> iter(params->createIterator());
00335     for (RooAbsArg *a = (RooAbsArg *) iter->Next(); a != 0; a = (RooAbsArg *) iter->Next()) {
00336         RooRealVar *rrv = dynamic_cast<RooRealVar *>(a);
00337         //if (rrv != 0 && !rrv->isConstant()) params_.add(*rrv);
00338         if (rrv != 0) params_.add(*rrv);
00339     }
00340 }
00341 
00342 Double_t 
00343 cacheutils::CachingAddNLL::evaluate() const 
00344 {
00345 #ifdef DEBUG_CACHE
00346     PerfCounter::add("CachingAddNLL::evaluate called");
00347 #endif
00348     std::fill( partialSum_.begin(), partialSum_.end(), 0.0 );
00349 
00350     std::vector<RooAbsReal*>::iterator  itc = coeffs_.begin(), edc = coeffs_.end();
00351     std::vector<CachingPdf>::iterator   itp = pdfs_.begin();//,   edp = pdfs_.end();
00352     std::vector<Double_t>::const_iterator itw, bgw = weights_.begin();//,    edw = weights_.end();
00353     std::vector<Double_t>::iterator       its, bgs = partialSum_.begin(), eds = partialSum_.end();
00354     double sumCoeff = 0;
00355     //std::cout << "Performing evaluation of " << GetName() << std::endl;
00356     for ( ; itc != edc; ++itp, ++itc ) {
00357         // get coefficient
00358         Double_t coeff = (*itc)->getVal();
00359         if (isRooRealSum_) {
00360             sumCoeff += coeff * integrals_[itc - coeffs_.begin()]->getVal();
00361             //std::cout << "  coefficient = " << coeff << ", integral = " << integrals_[itc - coeffs_.begin()]->getVal() << std::endl;
00362         } else {
00363             sumCoeff += coeff;
00364         }
00365         // get vals
00366         const std::vector<Double_t> &pdfvals = itp->eval(*data_);
00367         // update running sum
00368         std::vector<Double_t>::const_iterator itv = pdfvals.begin();
00369         for (its = bgs; its != eds; ++its, ++itv) {
00370              *its += coeff * (*itv); // sum (n_i * pdf_i)
00371         }
00372     }
00373     // then get the final nll
00374     double ret = 0;
00375     for ( its = bgs, itw = bgw ; its != eds ; ++its, ++itw ) {
00376         if (*itw == 0) continue;
00377         if (!isnormal(*its) || *its <= 0) {
00378             std::cerr << "WARNING: underflow to " << *its << " in " << GetName() << std::endl; 
00379             if (!CachingSimNLL::noDeepLEE_) logEvalError("Number of events is negative or error"); else CachingSimNLL::hasError_ = true;
00380         }
00381         double thispiece = (*itw) * (*its <= 0 ? -9e9 : log( ((*its) / sumCoeff) ));
00382         #ifdef ADDNLL_KAHAN_SUM
00383         static bool do_kahan = runtimedef::get("ADDNLL_KAHAN_SUM");
00384         if (do_kahan) {
00385             double kahan_y = thispiece  - compensation;
00386             double kahan_t = ret + kahan_y;
00387             double kahan_d = (kahan_t - ret);
00388             compensation = kahan_d - kahan_y;
00389             ret  = kahan_t;
00390         } else {
00391             ret += thispiece;
00392         }
00393         #else
00394         ret += thispiece;
00395         #endif
00396     }
00397     // then flip sign
00398     ret = -ret;
00399     // std::cout << "AddNLL for " << pdf_->GetName() << ": " << ret << std::endl;
00400     // and add extended term: expected - observed*log(expected);
00401     double expectedEvents = (isRooRealSum_ ? pdf_->getNorm(data_->get()) : sumCoeff);
00402     if (expectedEvents <= 0) {
00403         if (!CachingSimNLL::noDeepLEE_) logEvalError("Expected number of events is negative"); else CachingSimNLL::hasError_ = true;
00404         expectedEvents = 1e-6;
00405     }
00406     //ret += expectedEvents - UInt_t(sumWeights_) * log(expectedEvents); // no, doesn't work with Asimov dataset
00407     ret += expectedEvents - sumWeights_ * log(expectedEvents);
00408     ret += zeroPoint_;
00409     // std::cout << "     plus extended term: " << ret << std::endl;
00410     TRACE_NLL("AddNLL for " << pdf_->GetName() << ": " << ret)
00411     return ret;
00412 }
00413 
00414 void 
00415 cacheutils::CachingAddNLL::setData(const RooAbsData &data) 
00416 {
00417     //std::cout << "Setting data for pdf " << pdf_->GetName() << std::endl;
00418     //utils::printRAD(&data);
00419     data_ = &data;
00420     setValueDirty();
00421     sumWeights_ = 0.0;
00422     weights_.resize(data.numEntries());
00423     partialSum_.resize(data.numEntries());
00424     std::vector<Double_t>::iterator itw = weights_.begin();
00425     #ifdef ADDNLL_KAHAN_SUM
00426     double compensation = 0;
00427     #endif
00428     for (int i = 0, n = data.numEntries(); i < n; ++i, ++itw) {
00429         data.get(i);
00430         *itw = data.weight();
00431         #ifdef ADDNLL_KAHAN_SUM
00432         static bool do_kahan = runtimedef::get("ADDNLL_KAHAN_SUM");
00433         if (do_kahan) {
00434             double kahan_y = *itw - compensation;
00435             double kahan_t = sumWeights_ + kahan_y;
00436             double kahan_d = (kahan_t - sumWeights_);
00437             compensation = kahan_d - kahan_y;
00438             sumWeights_  = kahan_t;
00439         } else {
00440             sumWeights_ += *itw;
00441         }
00442         #else
00443         sumWeights_ += *itw;
00444         #endif
00445     }
00446     for (std::vector<CachingPdf>::iterator itp = pdfs_.begin(), edp = pdfs_.end(); itp != edp; ++itp) {
00447         itp->setDataDirty();
00448     }
00449 }
00450 
00451 RooArgSet* 
00452 cacheutils::CachingAddNLL::getObservables(const RooArgSet* depList, Bool_t valueOnly) const 
00453 {
00454     return new RooArgSet();
00455 }
00456 
00457 RooArgSet* 
00458 cacheutils::CachingAddNLL::getParameters(const RooArgSet* depList, Bool_t stripDisconnected) const 
00459 {
00460     return new RooArgSet(params_); 
00461 }
00462 
00463 
00464 cacheutils::CachingSimNLL::CachingSimNLL(RooSimultaneous *pdf, RooAbsData *data, const RooArgSet *nuis) :
00465     pdfOriginal_(pdf),
00466     dataOriginal_(data),
00467     nuis_(nuis),
00468     params_("params","parameters",this)
00469 {
00470     setup_();
00471 }
00472 
00473 cacheutils::CachingSimNLL::CachingSimNLL(const CachingSimNLL &other, const char *name) :
00474     pdfOriginal_(other.pdfOriginal_),
00475     dataOriginal_(other.dataOriginal_),
00476     nuis_(other.nuis_),
00477     params_("params","parameters",this)
00478 {
00479     setup_();
00480 }
00481 
00482 cacheutils::CachingSimNLL *
00483 cacheutils::CachingSimNLL::clone(const char *name) const 
00484 {
00485     return new cacheutils::CachingSimNLL(*this, name);
00486 }
00487 
00488 cacheutils::CachingSimNLL::~CachingSimNLL()
00489 {
00490     for (std::vector<SimpleGaussianConstraint*>::iterator it = constrainPdfsFast_.begin(), ed = constrainPdfsFast_.end(); it != ed; ++it) {
00491         delete *it;
00492     }
00493 }
00494 
00495 void
00496 cacheutils::CachingSimNLL::setup_() 
00497 {
00498     // Allow runtime-flag to switch off logEvalErrors
00499     noDeepLEE_ = runtimedef::get("SIMNLL_NO_LEE");
00500 
00501     //RooAbsPdf *pdfclone = runtimedef::get("SIMNLL_CLONE") ? pdfOriginal_  : utils::fullClonePdf(pdfOriginal_, piecesForCloning_);
00502     RooAbsPdf *pdfclone = pdfOriginal_; // never clone
00503 
00504     //---- Instead of getting the parameters here, we get them from the individual constraint terms and single pdfs ----
00505     //---- This seems to save memory.
00506     //std::auto_ptr<RooArgSet> params(pdfclone->getParameters(*dataOriginal_));
00507     //params_.add(*params);
00508 
00509     RooArgList constraints;
00510     factorizedPdf_.reset(dynamic_cast<RooSimultaneous *>(utils::factorizePdf(*dataOriginal_->get(), *pdfclone, constraints)));
00511     
00512     RooSimultaneous *simpdf = factorizedPdf_.get();
00513     constrainPdfs_.clear(); 
00514     if (constraints.getSize()) {
00515         bool FastConstraints = runtimedef::get("SIMNLL_FASTGAUSS");
00516         //constrainPdfs_.push_back(new RooProdPdf("constraints","constraints", constraints));
00517         for (int i = 0, n = constraints.getSize(); i < n; ++i) {
00518             RooAbsPdf *pdfi = dynamic_cast<RooAbsPdf*>(constraints.at(i));
00519             if (FastConstraints && typeid(*pdfi) == typeid(RooGaussian)) {
00520                 constrainPdfsFast_.push_back(new SimpleGaussianConstraint(dynamic_cast<const RooGaussian&>(*pdfi)));
00521                 constrainZeroPointsFast_.push_back(0);
00522             } else {
00523                 constrainPdfs_.push_back(pdfi);
00524                 constrainZeroPoints_.push_back(0);
00525             }
00526             //std::cout << "Constraint pdf: " << constraints.at(i)->GetName() << std::endl;
00527             std::auto_ptr<RooArgSet> params(pdfi->getParameters(*dataOriginal_));
00528             params_.add(*params, false);
00529         }
00530     } else {
00531         std::cerr << "PDF didn't factorize!" << std::endl;
00532         std::cout << "Parameters: " << std::endl;
00533         std::auto_ptr<RooArgSet> params(pdfclone->getParameters(*dataOriginal_));
00534         params->Print("V");
00535         std::cout << "Obs: " << std::endl;
00536         dataOriginal_->get()->Print("V");
00537         factorizedPdf_.release();
00538         simpdf = dynamic_cast<RooSimultaneous *>(pdfclone);
00539     }
00540 
00541     
00542     std::auto_ptr<RooAbsCategoryLValue> catClone((RooAbsCategoryLValue*) simpdf->indexCat().Clone());
00543     pdfs_.resize(catClone->numBins(NULL), 0);
00544     //dataSets_.reset(dataOriginal_->split(pdfOriginal_->indexCat(), true));
00545     datasets_.resize(pdfs_.size(), 0);
00546     splitWithWeights(*dataOriginal_, simpdf->indexCat(), true);
00547     //std::cout << "Pdf " << simpdf->GetName() <<" is a SimPdf over category " << catClone->GetName() << ", with " << pdfs_.size() << " bins" << std::endl;
00548     for (int ib = 0, nb = pdfs_.size(); ib < nb; ++ib) {
00549         catClone->setBin(ib);
00550         RooAbsPdf *pdf = simpdf->getPdf(catClone->getLabel());
00551         if (pdf != 0) {
00552             RooAbsData *data = (RooAbsData *) datasets_[ib]; //dataSets_->FindObject(catClone->getLabel());
00553             //RooAbsData *data = (RooAbsData *) dataSets_->FindObject(catClone->getLabel());
00554             //std::cout << "   bin " << ib << " (label " << catClone->getLabel() << ") has pdf " << pdf->GetName() << " of type " << pdf->ClassName() << " and " << (data ? data->numEntries() : -1) << " dataset entries" << std::endl;
00555             if (data == 0) { throw std::logic_error("Error: no data"); }
00556             pdfs_[ib] = new CachingAddNLL(catClone->getLabel(), "", pdf, data);
00557             params_.add(pdfs_[ib]->params(), /*silent=*/true); 
00558         } else { 
00559             pdfs_[ib] = 0; 
00560             //std::cout << "   bin " << ib << " (label " << catClone->getLabel() << ") has no pdf" << std::endl;
00561         }
00562     }   
00563 
00564     setValueDirty();
00565 }
00566 
00567 Double_t 
00568 cacheutils::CachingSimNLL::evaluate() const 
00569 {
00570     TRACE_POINT(params_)
00571 #ifdef DEBUG_CACHE
00572     PerfCounter::add("CachingSimNLL::evaluate called");
00573 #endif
00574     double ret = 0;
00575     for (std::vector<CachingAddNLL*>::const_iterator it = pdfs_.begin(), ed = pdfs_.end(); it != ed; ++it) {
00576         if (*it != 0) {
00577             double nllval = (*it)->getVal();
00578             // what sanity check could I put here?
00579             ret += nllval;
00580         }
00581     }
00582     if (!constrainPdfs_.empty()) {
00584         std::vector<double>::const_iterator itz = constrainZeroPoints_.begin();
00585         for (std::vector<RooAbsPdf *>::const_iterator it = constrainPdfs_.begin(), ed = constrainPdfs_.end(); it != ed; ++it, ++itz) { 
00586             double pdfval = (*it)->getVal(nuis_);
00587             if (!isnormal(pdfval) || pdfval <= 0) {
00588                 if (!noDeepLEE_) logEvalError((std::string("Constraint pdf ")+(*it)->GetName()+" evaluated to zero, negative or error").c_str());
00589                 pdfval = 1e-9;
00590             }
00591             ret -= (log(pdfval) + *itz);
00592         }
00594         itz = constrainZeroPointsFast_.begin();
00595         for (std::vector<SimpleGaussianConstraint*>::const_iterator it = constrainPdfsFast_.begin(), ed = constrainPdfsFast_.end(); it != ed; ++it, ++itz) { 
00596             double logpdfval = (*it)->getLogValFast();
00597             ret -= (logpdfval + *itz);
00598         }
00599     }
00600 #ifdef TRACE_NLL_EVALS
00601     static unsigned long _trace_ = 0; _trace_++;
00602     if (_trace_ % 10 == 0)  { putchar('.'); fflush(stdout); }
00603     //if (_trace_ % 250 == 0) { printf("               NLL % 10.4f after %10lu evals.\n", ret, _trace_); fflush(stdout); }
00604 #endif
00605     TRACE_NLL("SimNLL for " << GetName() << ": " << ret)
00606     return ret;
00607 }
00608 
00609 void 
00610 cacheutils::CachingSimNLL::setData(const RooAbsData &data) 
00611 {
00612     dataOriginal_ = &data;
00613     //std::cout << "combined data has " << data.numEntries() << " dataset entries (sumw " << data.sumEntries() << ", weighted " << data.isWeighted() << ")" << std::endl;
00614     //utils::printRAD(&data);
00615     //dataSets_.reset(dataOriginal_->split(pdfOriginal_->indexCat(), true));
00616     splitWithWeights(*dataOriginal_, pdfOriginal_->indexCat(), true);
00617     for (int ib = 0, nb = pdfs_.size(); ib < nb; ++ib) {
00618         CachingAddNLL *canll = pdfs_[ib];
00619         if (canll == 0) continue;
00620         RooAbsData *data = datasets_[ib];
00621         //RooAbsData *data = (RooAbsData *) dataSets_->FindObject(canll->GetName());
00622         if (data == 0) { throw std::logic_error("Error: no data"); }
00623         //std::cout << "   bin " << ib << " (label " << canll->GetName() << ") has pdf " << canll->pdf()->GetName() << " of type " << canll->pdf()->ClassName() <<
00624         //             " and " << (data ? data->numEntries() : -1) << " dataset entries (sumw " << data->sumEntries() << ", weighted " << data->isWeighted() << ")" << std::endl;
00625         canll->setData(*data);
00626     }
00627 }
00628 
00629 void cacheutils::CachingSimNLL::splitWithWeights(const RooAbsData &data, const RooAbsCategory& splitCat, Bool_t createEmptyDataSets) {
00630     RooCategory *cat = dynamic_cast<RooCategory *>(data.get()->find(splitCat.GetName()));
00631     if (cat == 0) throw std::logic_error("Error: no category");
00632     int nb = cat->numBins((const char *)0), ne = data.numEntries();
00633     RooArgSet obs(*data.get()); obs.remove(*cat, true, true);
00634     RooRealVar weight("_weight_","",1);
00635     RooArgSet obsplus(obs); obsplus.add(weight);
00636     if (nb != int(datasets_.size())) throw std::logic_error("Number of categories changed"); // this can happen due to bugs in RooDataSet
00637     for (int ib = 0; ib < nb; ++ib) {
00638         if (datasets_[ib] == 0) datasets_[ib] = new RooDataSet("", "", obsplus, "_weight_");
00639         else datasets_[ib]->reset();
00640     }
00641     //utils::printRDH((RooAbsData*)&data);
00642     for (int i = 0; i < ne; ++i) {
00643         data.get(i); if (data.weight() == 0) continue;
00644         int ib = cat->getBin();
00645         //std::cout << "Event " << i << " of weight " << data.weight() << " is in bin " << ib << " label " << cat->getLabel() << std::endl;
00646         if (data.weight() > 0) datasets_[ib]->add(obs, data.weight());
00647     }
00648 }
00649 
00650 void cacheutils::CachingSimNLL::setZeroPoint() {
00651     for (std::vector<CachingAddNLL*>::const_iterator it = pdfs_.begin(), ed = pdfs_.end(); it != ed; ++it) {
00652         if (*it != 0) (*it)->setZeroPoint();
00653     }
00654     std::vector<double>::iterator itz = constrainZeroPoints_.begin();
00655     for (std::vector<RooAbsPdf *>::const_iterator it = constrainPdfs_.begin(), ed = constrainPdfs_.end(); it != ed; ++it, ++itz) {
00656         double pdfval = (*it)->getVal(nuis_);
00657         if (isnormal(pdfval) || pdfval > 0) *itz = -log(pdfval);
00658     }
00659     itz = constrainZeroPointsFast_.begin();
00660     for (std::vector<SimpleGaussianConstraint*>::const_iterator it = constrainPdfsFast_.begin(), ed = constrainPdfsFast_.end(); it != ed; ++it, ++itz) {
00661         double logpdfval = (*it)->getLogValFast();
00662         *itz = -logpdfval;
00663     }
00664     setValueDirty();
00665 }
00666 
00667 void cacheutils::CachingSimNLL::clearZeroPoint() {
00668     for (std::vector<CachingAddNLL*>::const_iterator it = pdfs_.begin(), ed = pdfs_.end(); it != ed; ++it) {
00669         if (*it != 0) (*it)->clearZeroPoint();
00670     }
00671     std::fill(constrainZeroPoints_.begin(), constrainZeroPoints_.end(), 0.0);
00672     std::fill(constrainZeroPointsFast_.begin(), constrainZeroPointsFast_.end(), 0.0);
00673     setValueDirty();
00674 }
00675 
00676 RooArgSet* 
00677 cacheutils::CachingSimNLL::getObservables(const RooArgSet* depList, Bool_t valueOnly) const 
00678 {
00679     return new RooArgSet();
00680 }
00681 
00682 RooArgSet* 
00683 cacheutils::CachingSimNLL::getParameters(const RooArgSet* depList, Bool_t stripDisconnected) const 
00684 {
00685     return new RooArgSet(params_); 
00686 }