CMS 3D CMS Logo

/afs/cern.ch/work/a/aaltunda/public/www/CMSSW_6_2_5/src/IOPool/Input/src/RootTree.cc

Go to the documentation of this file.
00001 #include "RootTree.h"
00002 #include "RootDelayedReader.h"
00003 #include "FWCore/Utilities/interface/EDMException.h"
00004 #include "FWCore/Utilities/interface/Exception.h"
00005 #include "DataFormats/Provenance/interface/BranchDescription.h"
00006 #include "InputFile.h"
00007 #include "TTree.h"
00008 #include "TTreeIndex.h"
00009 #include "TTreeCache.h"
00010 
00011 #include <iostream>
00012 
00013 namespace edm {
00014   namespace {
00015     TBranch* getAuxiliaryBranch(TTree* tree, BranchType const& branchType) {
00016       TBranch* branch = tree->GetBranch(BranchTypeToAuxiliaryBranchName(branchType).c_str());
00017       if (branch == 0) {
00018         branch = tree->GetBranch(BranchTypeToAuxBranchName(branchType).c_str());
00019       }
00020       return branch;
00021     }
00022     TBranch* getProductProvenanceBranch(TTree* tree, BranchType const& branchType) {
00023       TBranch* branch = tree->GetBranch(BranchTypeToBranchEntryInfoBranchName(branchType).c_str());
00024       return branch;
00025     }
00026   }
00027   RootTree::RootTree(boost::shared_ptr<InputFile> filePtr,
00028                      BranchType const& branchType,
00029                      unsigned int maxVirtualSize,
00030                      unsigned int cacheSize,
00031                      unsigned int learningEntries,
00032                      bool enablePrefetching) :
00033     filePtr_(filePtr),
00034     tree_(dynamic_cast<TTree*>(filePtr_.get() != 0 ? filePtr_->Get(BranchTypeToProductTreeName(branchType).c_str()) : 0)),
00035     metaTree_(dynamic_cast<TTree*>(filePtr_.get() != 0 ? filePtr_->Get(BranchTypeToMetaDataTreeName(branchType).c_str()) : 0)),
00036     branchType_(branchType),
00037     auxBranch_(tree_ ? getAuxiliaryBranch(tree_, branchType_) : 0),
00038     treeCache_(),
00039     rawTreeCache_(),
00040     triggerTreeCache_(),
00041     rawTriggerTreeCache_(),
00042     trainedSet_(),
00043     triggerSet_(),
00044     entries_(tree_ ? tree_->GetEntries() : 0),
00045     entryNumber_(-1),
00046     branchNames_(),
00047     branches_(new BranchMap),
00048     trainNow_(false),
00049     switchOverEntry_(-1),
00050     rawTriggerSwitchOverEntry_(-1),
00051     learningEntries_(learningEntries),
00052     cacheSize_(cacheSize),
00053     treeAutoFlush_(0),
00054     enablePrefetching_(enablePrefetching),
00055     enableTriggerCache_(branchType_ == InEvent),
00056     rootDelayedReader_(new RootDelayedReader(*this, filePtr)),
00057     branchEntryInfoBranch_(metaTree_ ? getProductProvenanceBranch(metaTree_, branchType_) : (tree_ ? getProductProvenanceBranch(tree_, branchType_) : 0)),
00058     infoTree_(dynamic_cast<TTree*>(filePtr_.get() != 0 ? filePtr->Get(BranchTypeToInfoTreeName(branchType).c_str()) : 0)) // backward compatibility
00059     {
00060       assert(tree_);
00061       // On merged files in older releases of ROOT, the autoFlush setting is always negative; we must guess.
00062       // TODO: On newer merged files, we should be able to get this from the cluster iterator.
00063       long treeAutoFlush = (tree_ ? tree_->GetAutoFlush() : 0);
00064       if (treeAutoFlush < 0) {
00065         // The "+1" is here to avoid divide-by-zero in degenerate cases.
00066         Long64_t averageEventSizeBytes = tree_->GetZipBytes() / (tree_->GetEntries()+1) + 1;
00067         treeAutoFlush_ = cacheSize_/averageEventSizeBytes+1;
00068       } else {
00069         treeAutoFlush_ = treeAutoFlush;
00070       }
00071       if (treeAutoFlush_ < learningEntries_) {
00072         learningEntries_ = treeAutoFlush_;
00073       }
00074       setTreeMaxVirtualSize(maxVirtualSize);
00075       setCacheSize(cacheSize);
00076       if (tree_) {
00077          Int_t branchCount = tree_->GetListOfBranches()->GetEntriesFast();
00078          trainedSet_.reserve(branchCount);
00079          triggerSet_.reserve(branchCount);
00080       }
00081   }
00082 
00083   RootTree::~RootTree() {
00084   }
00085 
00086   bool
00087   RootTree::isValid() const {
00088     if (metaTree_ == 0 || metaTree_->GetNbranches() == 0) {
00089       return tree_ != 0 && auxBranch_ != 0;
00090     }
00091     if (tree_ != 0 && auxBranch_ != 0 && metaTree_ != 0) { // backward compatibility
00092       if (branchEntryInfoBranch_ != 0 || infoTree_ != 0) return true; // backward compatibility
00093       return (entries_ == metaTree_->GetEntries() && tree_->GetNbranches() <= metaTree_->GetNbranches() + 1);  // backward compatibility
00094     } // backward compatibility
00095     return false;
00096   }
00097 
00098   DelayedReader*
00099   RootTree::rootDelayedReader() const {
00100     rootDelayedReader_->reset();
00101     return rootDelayedReader_.get();
00102   }  
00103 
00104   void
00105   RootTree::setPresence(BranchDescription const& prod, std::string const& oldBranchName) {
00106       assert(isValid());
00107       prod.init();
00108       if(tree_->GetBranch(oldBranchName.c_str()) == 0){
00109         prod.setDropped();
00110       }
00111   }
00112 
00113   void
00114   RootTree::addBranch(BranchKey const& key,
00115                       BranchDescription const& prod,
00116                       std::string const& oldBranchName) {
00117       assert(isValid());
00118       prod.init();
00119       //use the translated branch name
00120       TBranch* branch = tree_->GetBranch(oldBranchName.c_str());
00121       roottree::BranchInfo info = roottree::BranchInfo(ConstBranchDescription(prod));
00122       info.productBranch_ = 0;
00123       if (prod.present()) {
00124         info.productBranch_ = branch;
00125         //we want the new branch name for the JobReport
00126         branchNames_.push_back(prod.branchName());
00127       }
00128       TTree* provTree = (metaTree_ != 0 ? metaTree_ : tree_);
00129       info.provenanceBranch_ = provTree->GetBranch(oldBranchName.c_str());
00130       branches_->insert(std::make_pair(key, info));
00131   }
00132 
00133   void
00134   RootTree::dropBranch(std::string const& oldBranchName) {
00135       //use the translated branch name
00136       TBranch* branch = tree_->GetBranch(oldBranchName.c_str());
00137       if (branch != 0) {
00138         TObjArray* leaves = tree_->GetListOfLeaves();
00139         int entries = leaves->GetEntries();
00140         for (int i = 0; i < entries; ++i) {
00141           TLeaf* leaf = (TLeaf*)(*leaves)[i];
00142           if (leaf == 0) continue;
00143           TBranch* br = leaf->GetBranch();
00144           if (br == 0) continue;
00145           if (br->GetMother() == branch) {
00146             leaves->Remove(leaf);
00147           }
00148         }
00149         leaves->Compress();
00150         tree_->GetListOfBranches()->Remove(branch);
00151         tree_->GetListOfBranches()->Compress();
00152         delete branch;
00153       }
00154   }
00155 
00156   roottree::BranchMap const&
00157   RootTree::branches() const {return *branches_;}
00158 
00159   void
00160   RootTree::setCacheSize(unsigned int cacheSize) {
00161     cacheSize_ = cacheSize;
00162     tree_->SetCacheSize(static_cast<Long64_t>(cacheSize));
00163     treeCache_.reset(dynamic_cast<TTreeCache*>(filePtr_->GetCacheRead()));
00164     if(treeCache_) treeCache_->SetEnablePrefetching(enablePrefetching_);
00165     filePtr_->SetCacheRead(0);
00166     rawTreeCache_.reset();
00167   }
00168 
00169   void
00170   RootTree::setTreeMaxVirtualSize(int treeMaxVirtualSize) {
00171     if (treeMaxVirtualSize >= 0) tree_->SetMaxVirtualSize(static_cast<Long64_t>(treeMaxVirtualSize));
00172   }
00173 
00174   void
00175   RootTree::setEntryNumber(EntryNumber theEntryNumber) {
00176     filePtr_->SetCacheRead(treeCache_.get());
00177 
00178     // Detect a backward skip.  If the skip is sufficiently large, we roll the dice and reset the treeCache.
00179     // This will cause some amount of over-reading: we pre-fetch all the events in some prior cluster.
00180     // However, because reading one event in the cluster is supposed to be equivalent to reading all events in the cluster,
00181     // we're not incurring additional over-reading - we're just doing it more efficiently.
00182     // NOTE: Constructor guarantees treeAutoFlush_ is positive, even if TTree->GetAutoFlush() is negative.
00183     if ((theEntryNumber < static_cast<EntryNumber>(entryNumber_-treeAutoFlush_)) &&
00184         (treeCache_) && (!treeCache_->IsLearning()) && (entries_ > 0) && (switchOverEntry_ >= 0)) {
00185       treeCache_->SetEntryRange(theEntryNumber, entries_);
00186       treeCache_->FillBuffer();
00187     }
00188 
00189     entryNumber_ = theEntryNumber;
00190     tree_->LoadTree(entryNumber_);
00191     filePtr_->SetCacheRead(0);
00192     if(treeCache_ && trainNow_ && entryNumber_ >= 0) {
00193       startTraining();
00194       trainNow_ = false;
00195       trainedSet_.clear();
00196       triggerSet_.clear();
00197       rawTriggerSwitchOverEntry_ = -1;
00198     }
00199     if (treeCache_ && treeCache_->IsLearning() && switchOverEntry_ >= 0 && entryNumber_ >= switchOverEntry_) {
00200       stopTraining();
00201     }
00202   }
00203 
00204   // The actual implementation is done below; it's split in this strange
00205   // manner in order to keep a by-definition-rare code path out of the instruction cache.
00206   inline TTreeCache*
00207   RootTree::checkTriggerCache(TBranch* branch, EntryNumber entryNumber) const {
00208     if (!treeCache_->IsAsyncReading() && enableTriggerCache_ && (trainedSet_.find(branch) == trainedSet_.end())) {
00209       return checkTriggerCacheImpl(branch, entryNumber);
00210     } else {
00211       return NULL;
00212     }
00213   }
00214 
00215   // See comments in the header.  If this function is called, we already know
00216   // the trigger cache is active and it was a cache miss for the regular cache.
00217   TTreeCache*
00218   RootTree::checkTriggerCacheImpl(TBranch* branch, EntryNumber entryNumber) const {
00219     // This branch is not going to be in the cache.
00220     // Assume this is a "trigger pattern".
00221     // Always make sure the branch is added to the trigger set.
00222     if (triggerSet_.find(branch) == triggerSet_.end()) {
00223       triggerSet_.insert(branch);
00224       if (triggerTreeCache_.get()) { triggerTreeCache_->AddBranch(branch, kTRUE); }
00225     }
00226 
00227     if (rawTriggerSwitchOverEntry_ < 0) {
00228       // The trigger has never fired before.  Take everything not in the
00229       // trainedSet and load it from disk
00230 
00231       // Calculate the end of the next cluster; triggers in the next cluster
00232       // will use the triggerCache, not the rawTriggerCache.
00233       TTree::TClusterIterator clusterIter = tree_->GetClusterIterator(entryNumber);
00234       while (rawTriggerSwitchOverEntry_ < entryNumber) {
00235         rawTriggerSwitchOverEntry_ = clusterIter();
00236       }
00237 
00238       // ROOT will automatically expand the cache to fit one cluster; hence, we use
00239       // 5 MB as the cache size below
00240       tree_->SetCacheSize(static_cast<Long64_t>(5*1024*1024));
00241       rawTriggerTreeCache_.reset(dynamic_cast<TTreeCache*>(filePtr_->GetCacheRead()));
00242       if(rawTriggerTreeCache_) rawTriggerTreeCache_->SetEnablePrefetching(false);
00243       TObjArray *branches = tree_->GetListOfBranches();
00244       int branchCount = branches->GetEntriesFast();
00245 
00246       // Train the rawTriggerCache to have everything not in the regular cache.
00247       rawTriggerTreeCache_->SetLearnEntries(0);
00248       rawTriggerTreeCache_->SetEntryRange(entryNumber, rawTriggerSwitchOverEntry_);
00249       for (int i=0;i<branchCount;i++) {
00250         TBranch *tmp_branch = (TBranch*)branches->UncheckedAt(i);
00251         if (trainedSet_.find(tmp_branch) != trainedSet_.end()) {
00252           continue;
00253         }
00254         rawTriggerTreeCache_->AddBranch(tmp_branch, kTRUE);
00255       }
00256       performedSwitchOver_ = false;
00257       rawTriggerTreeCache_->StopLearningPhase();
00258       filePtr_->SetCacheRead(0);
00259 
00260       return rawTriggerTreeCache_.get();
00261     } else if (entryNumber_ < rawTriggerSwitchOverEntry_) {
00262       // The raw trigger has fired and it contents are valid.
00263       return rawTriggerTreeCache_.get();
00264     } else if (rawTriggerSwitchOverEntry_ > 0) {
00265       // The raw trigger has fired, but we are out of the cache.  Use the
00266       // triggerCache instead.
00267       if (!performedSwitchOver_) {
00268         rawTriggerTreeCache_.reset();
00269         performedSwitchOver_ = true;
00270 
00271         // Train the triggerCache
00272         tree_->SetCacheSize(static_cast<Long64_t>(5*1024*1024));
00273         triggerTreeCache_.reset(dynamic_cast<TTreeCache*>(filePtr_->GetCacheRead()));
00274         triggerTreeCache_->SetEnablePrefetching(false);
00275         triggerTreeCache_->SetLearnEntries(0);
00276         triggerTreeCache_->SetEntryRange(entryNumber, tree_->GetEntries());
00277         for(std::unordered_set<TBranch*>::const_iterator it = triggerSet_.begin(), itEnd = triggerSet_.end();
00278             it != itEnd;
00279             it++)
00280         {
00281           triggerTreeCache_->AddBranch(*it, kTRUE);
00282         }
00283         triggerTreeCache_->StopLearningPhase();
00284         filePtr_->SetCacheRead(0);
00285       }
00286       return triggerTreeCache_.get();
00287     } else if (entryNumber_ < rawTriggerSwitchOverEntry_) {
00288       // The raw trigger has fired and it contents are valid.
00289       return rawTriggerTreeCache_.get();
00290     } else if (rawTriggerSwitchOverEntry_ > 0) {
00291       // The raw trigger has fired, but we are out of the cache.  Use the
00292       // triggerCache instead.
00293       if (!performedSwitchOver_) {
00294         rawTriggerTreeCache_.reset();
00295         performedSwitchOver_ = true; 
00296         
00297         // Train the triggerCache
00298         tree_->SetCacheSize(static_cast<Long64_t>(5*1024*1024));
00299         triggerTreeCache_.reset(dynamic_cast<TTreeCache*>(filePtr_->GetCacheRead()));
00300         triggerTreeCache_->SetEnablePrefetching(false);
00301         triggerTreeCache_->SetLearnEntries(0);
00302         triggerTreeCache_->SetEntryRange(entryNumber, tree_->GetEntries());
00303         for(std::unordered_set<TBranch*>::const_iterator it = triggerSet_.begin(), itEnd = triggerSet_.end();
00304               it != itEnd;
00305               it++)
00306         { 
00307           triggerTreeCache_->AddBranch(*it, kTRUE);
00308         }
00309         triggerTreeCache_->StopLearningPhase();
00310         filePtr_->SetCacheRead(0);
00311       }
00312       return triggerTreeCache_.get();
00313     }
00314 
00315     // By construction, this case should be impossible.
00316     assert (false);
00317     return NULL;
00318   }
00319 
00320   inline TTreeCache*
00321   RootTree::selectCache(TBranch* branch, EntryNumber entryNumber) const {
00322     TTreeCache *triggerCache = NULL;
00323     if (!treeCache_) {
00324       return NULL;
00325     } else if (treeCache_->IsLearning() && rawTreeCache_) {
00326       treeCache_->AddBranch(branch, kTRUE);
00327       trainedSet_.insert(branch);
00328       return rawTreeCache_.get();
00329     } else if ((triggerCache = checkTriggerCache(branch, entryNumber))) {
00330       // A NULL return value from checkTriggerCache indicates the trigger cache case
00331       // does not apply, and we should continue below.
00332       return triggerCache;
00333     } else {
00334       // The "normal" TTreeCache case.
00335       return treeCache_.get();
00336     }
00337   }
00338 
00339   void
00340   RootTree::getEntry(TBranch* branch, EntryNumber entryNumber) const {
00341     try {
00342       TTreeCache * cache = selectCache(branch, entryNumber);
00343       filePtr_->SetCacheRead(cache);
00344       branch->GetEntry(entryNumber);
00345       filePtr_->SetCacheRead(0);
00346     } catch(cms::Exception const& e) {
00347       // We make sure the treeCache_ is detached from the file,
00348       // so that ROOT does not also delete it.
00349       filePtr_->SetCacheRead(0);
00350       Exception t(errors::FileReadError, "", e);
00351       t.addContext(std::string("Reading branch ")+branch->GetName());
00352       throw t;
00353     }
00354   }
00355 
00356   void
00357   RootTree::startTraining() {
00358     if (cacheSize_ == 0) {
00359       return;
00360     }
00361     assert(treeCache_);
00362     assert(branchType_ == InEvent);
00363     assert(!rawTreeCache_);
00364     treeCache_->SetLearnEntries(learningEntries_);
00365     tree_->SetCacheSize(static_cast<Long64_t>(cacheSize_));
00366     rawTreeCache_.reset(dynamic_cast<TTreeCache *>(filePtr_->GetCacheRead()));
00367     rawTreeCache_->SetEnablePrefetching(false);
00368     filePtr_->SetCacheRead(0);
00369     rawTreeCache_->SetLearnEntries(0);
00370     switchOverEntry_ = entryNumber_ + learningEntries_;
00371     rawTreeCache_->StartLearningPhase();
00372     rawTreeCache_->SetEntryRange(entryNumber_, switchOverEntry_);
00373     rawTreeCache_->AddBranch("*", kTRUE);
00374     rawTreeCache_->StopLearningPhase();
00375     treeCache_->StartLearningPhase();
00376     treeCache_->SetEntryRange(switchOverEntry_, tree_->GetEntries());
00377     treeCache_->AddBranch(poolNames::branchListIndexesBranchName().c_str(), kTRUE);
00378     treeCache_->AddBranch(BranchTypeToAuxiliaryBranchName(branchType_).c_str(), kTRUE);
00379     trainedSet_.clear();
00380     triggerSet_.clear();
00381     assert(treeCache_->GetTree() == tree_);
00382   }
00383 
00384   void
00385   RootTree::stopTraining() {
00386     filePtr_->SetCacheRead(treeCache_.get());
00387     treeCache_->StopLearningPhase();
00388     filePtr_->SetCacheRead(0);
00389     rawTreeCache_.reset();
00390   }
00391 
00392   void
00393   RootTree::close () {
00394     // The TFile is about to be closed, and destructed.
00395     // Just to play it safe, zero all pointers to quantities that are owned by the TFile.
00396     auxBranch_  = branchEntryInfoBranch_ = 0;
00397     tree_ = metaTree_ = infoTree_ = 0;
00398     // We own the treeCache_.
00399     // We make sure the treeCache_ is detached from the file,
00400     // so that ROOT does not also delete it.
00401     filePtr_->SetCacheRead(0);
00402     // We *must* delete the TTreeCache here because the TFilePrefetch object
00403     // references the TFile.  If TFile is closed, before the TTreeCache is
00404     // deleted, the TFilePrefetch may continue to do TFile operations, causing
00405     // deadlocks or exceptions.
00406     treeCache_.reset();
00407     rawTreeCache_.reset();
00408     triggerTreeCache_.reset();
00409     rawTriggerTreeCache_.reset();
00410     // We give up our shared ownership of the TFile itself.
00411     filePtr_.reset();
00412   }
00413 
00414   void
00415   RootTree::trainCache(char const* branchNames) {
00416     if (cacheSize_ == 0) {
00417       return;
00418     }
00419     tree_->LoadTree(0);
00420     assert(treeCache_);
00421     filePtr_->SetCacheRead(treeCache_.get());
00422     treeCache_->StartLearningPhase();
00423     treeCache_->SetEntryRange(0, tree_->GetEntries());
00424     treeCache_->AddBranch(branchNames, kTRUE);
00425     treeCache_->StopLearningPhase();
00426     assert(treeCache_->GetTree() == tree_);
00427     // We own the treeCache_.
00428     // We make sure the treeCache_ is detached from the file,
00429     // so that ROOT does not also delete it.
00430     filePtr_->SetCacheRead(0);
00431 
00432     // Must also manually add things to the trained set.
00433     TObjArray *branches = tree_->GetListOfBranches();
00434     int branchCount = branches->GetEntriesFast();
00435     for (int i=0;i<branchCount;i++) {
00436        TBranch *branch = (TBranch*)branches->UncheckedAt(i);
00437        if ((branchNames[0] == '*') || (strcmp(branchNames, branch->GetName()) == 0)) {
00438           trainedSet_.insert(branch);
00439        } 
00440     } 
00441  
00442   }
00443 
00444   namespace roottree {
00445     Int_t
00446     getEntry(TBranch* branch, EntryNumber entryNumber) {
00447       Int_t n = 0;
00448       try {
00449         n = branch->GetEntry(entryNumber);
00450       }
00451       catch(cms::Exception const& e) {
00452         throw Exception(errors::FileReadError, "", e);
00453       }
00454       return n;
00455     }
00456 
00457     Int_t
00458     getEntry(TTree* tree, EntryNumber entryNumber) {
00459       Int_t n = 0;
00460       try {
00461         n = tree->GetEntry(entryNumber);
00462       }
00463       catch(cms::Exception const& e) {
00464         throw Exception (errors::FileReadError, "", e);
00465       }
00466       return n;
00467     }
00468 
00469     std::unique_ptr<TTreeCache>
00470     trainCache(TTree* tree, InputFile& file, unsigned int cacheSize, char const* branchNames) {
00471       tree->LoadTree(0);
00472       tree->SetCacheSize(cacheSize);
00473       std::unique_ptr<TTreeCache> treeCache(dynamic_cast<TTreeCache*>(file.GetCacheRead()));
00474       if (0 != treeCache.get()) {
00475         treeCache->StartLearningPhase();
00476         treeCache->SetEntryRange(0, tree->GetEntries());
00477         treeCache->AddBranch(branchNames, kTRUE);
00478         treeCache->StopLearningPhase();
00479       }
00480       // We own the treeCache_.
00481       // We make sure the treeCache_ is detached from the file,
00482       // so that ROOT does not also delete it.
00483       file.SetCacheRead(0);
00484       return treeCache;
00485     }
00486   }
00487 }