CMS 3D CMS Logo

/data/refman/pasoursint/CMSSW_6_1_1/src/IOPool/Input/src/RootTree.cc

Go to the documentation of this file.
00001 #include "RootTree.h"
00002 #include "RootDelayedReader.h"
00003 #include "FWCore/Utilities/interface/EDMException.h"
00004 #include "FWCore/Utilities/interface/Exception.h"
00005 #include "DataFormats/Provenance/interface/BranchDescription.h"
00006 #include "InputFile.h"
00007 #include "TTree.h"
00008 #include "TTreeIndex.h"
00009 #include "TTreeCache.h"
00010 
00011 #include <iostream>
00012 
00013 namespace edm {
00014   namespace {
00015     TBranch* getAuxiliaryBranch(TTree* tree, BranchType const& branchType) {
00016       TBranch* branch = tree->GetBranch(BranchTypeToAuxiliaryBranchName(branchType).c_str());
00017       if (branch == 0) {
00018         branch = tree->GetBranch(BranchTypeToAuxBranchName(branchType).c_str());
00019       }
00020       return branch;
00021     }
00022     TBranch* getProductProvenanceBranch(TTree* tree, BranchType const& branchType) {
00023       TBranch* branch = tree->GetBranch(BranchTypeToBranchEntryInfoBranchName(branchType).c_str());
00024       return branch;
00025     }
00026   }
00027   RootTree::RootTree(boost::shared_ptr<InputFile> filePtr,
00028                      BranchType const& branchType,
00029                      unsigned int maxVirtualSize,
00030                      unsigned int cacheSize,
00031                      unsigned int learningEntries,
00032                      bool enablePrefetching) :
00033     filePtr_(filePtr),
00034     tree_(dynamic_cast<TTree*>(filePtr_.get() != 0 ? filePtr_->Get(BranchTypeToProductTreeName(branchType).c_str()) : 0)),
00035     metaTree_(dynamic_cast<TTree*>(filePtr_.get() != 0 ? filePtr_->Get(BranchTypeToMetaDataTreeName(branchType).c_str()) : 0)),
00036     branchType_(branchType),
00037     auxBranch_(tree_ ? getAuxiliaryBranch(tree_, branchType_) : 0),
00038     treeCache_(),
00039     rawTreeCache_(),
00040     triggerTreeCache_(),
00041     rawTriggerTreeCache_(),
00042     trainedSet_(),
00043     triggerSet_(),
00044     entries_(tree_ ? tree_->GetEntries() : 0),
00045     entryNumber_(-1),
00046     branchNames_(),
00047     branches_(new BranchMap),
00048     trainNow_(false),
00049     switchOverEntry_(-1),
00050     rawTriggerSwitchOverEntry_(-1),
00051     learningEntries_(learningEntries),
00052     cacheSize_(cacheSize),
00053     treeAutoFlush_(tree_ ? tree_->GetAutoFlush() : 0),
00054     enablePrefetching_(enablePrefetching),
00055     enableTriggerCache_(branchType_ == InEvent),
00056     rootDelayedReader_(new RootDelayedReader(*this, filePtr)),
00057     branchEntryInfoBranch_(metaTree_ ? getProductProvenanceBranch(metaTree_, branchType_) : (tree_ ? getProductProvenanceBranch(tree_, branchType_) : 0)),
00058     infoTree_(dynamic_cast<TTree*>(filePtr_.get() != 0 ? filePtr->Get(BranchTypeToInfoTreeName(branchType).c_str()) : 0)) // backward compatibility
00059     {
00060       assert(tree_);
00061       // On merged files in older releases of ROOT, the autoFlush setting is always negative; we must guess.
00062       // TODO: On newer merged files, we should be able to get this from the cluster iterator.
00063       if (treeAutoFlush_ < 0) {
00064         // The "+1" is here to avoid divide-by-zero in degenerate cases.
00065         Long64_t averageEventSizeBytes = tree_->GetZipBytes() / (tree_->GetEntries()+1) + 1;
00066         treeAutoFlush_ = cacheSize_/averageEventSizeBytes+1;
00067       }
00068       if (treeAutoFlush_ < learningEntries_) {
00069         learningEntries_ = treeAutoFlush_;
00070       }
00071       setTreeMaxVirtualSize(maxVirtualSize);
00072       setCacheSize(cacheSize);
00073       if (tree_) {
00074          Int_t branchCount = tree_->GetListOfBranches()->GetEntriesFast();
00075          trainedSet_.reserve(branchCount);
00076          triggerSet_.reserve(branchCount);
00077       }
00078   }
00079 
00080   RootTree::~RootTree() {
00081   }
00082 
00083   bool
00084   RootTree::isValid() const {
00085     if (metaTree_ == 0 || metaTree_->GetNbranches() == 0) {
00086       return tree_ != 0 && auxBranch_ != 0;
00087     }
00088     if (tree_ != 0 && auxBranch_ != 0 && metaTree_ != 0) { // backward compatibility
00089       if (branchEntryInfoBranch_ != 0 || infoTree_ != 0) return true; // backward compatibility
00090       return (entries_ == metaTree_->GetEntries() && tree_->GetNbranches() <= metaTree_->GetNbranches() + 1);  // backward compatibility
00091     } // backward compatibility
00092     return false;
00093   }
00094 
00095   DelayedReader*
00096   RootTree::rootDelayedReader() const {
00097     rootDelayedReader_->reset();
00098     return rootDelayedReader_.get();
00099   }  
00100 
00101   void
00102   RootTree::setPresence(BranchDescription const& prod, std::string const& oldBranchName) {
00103       assert(isValid());
00104       prod.init();
00105       if(tree_->GetBranch(oldBranchName.c_str()) == 0){
00106         prod.setDropped();
00107       }
00108   }
00109 
00110   void
00111   RootTree::addBranch(BranchKey const& key,
00112                       BranchDescription const& prod,
00113                       std::string const& oldBranchName) {
00114       assert(isValid());
00115       prod.init();
00116       //use the translated branch name
00117       TBranch* branch = tree_->GetBranch(oldBranchName.c_str());
00118       roottree::BranchInfo info = roottree::BranchInfo(ConstBranchDescription(prod));
00119       info.productBranch_ = 0;
00120       if (prod.present()) {
00121         info.productBranch_ = branch;
00122         //we want the new branch name for the JobReport
00123         branchNames_.push_back(prod.branchName());
00124       }
00125       TTree* provTree = (metaTree_ != 0 ? metaTree_ : tree_);
00126       info.provenanceBranch_ = provTree->GetBranch(oldBranchName.c_str());
00127       branches_->insert(std::make_pair(key, info));
00128   }
00129 
00130   void
00131   RootTree::dropBranch(std::string const& oldBranchName) {
00132       //use the translated branch name
00133       TBranch* branch = tree_->GetBranch(oldBranchName.c_str());
00134       if (branch != 0) {
00135         TObjArray* leaves = tree_->GetListOfLeaves();
00136         int entries = leaves->GetEntries();
00137         for (int i = 0; i < entries; ++i) {
00138           TLeaf* leaf = (TLeaf*)(*leaves)[i];
00139           if (leaf == 0) continue;
00140           TBranch* br = leaf->GetBranch();
00141           if (br == 0) continue;
00142           if (br->GetMother() == branch) {
00143             leaves->Remove(leaf);
00144           }
00145         }
00146         leaves->Compress();
00147         tree_->GetListOfBranches()->Remove(branch);
00148         tree_->GetListOfBranches()->Compress();
00149         delete branch;
00150       }
00151   }
00152 
00153   roottree::BranchMap const&
00154   RootTree::branches() const {return *branches_;}
00155 
00156   void
00157   RootTree::setCacheSize(unsigned int cacheSize) {
00158     cacheSize_ = cacheSize;
00159     tree_->SetCacheSize(static_cast<Long64_t>(cacheSize));
00160     treeCache_.reset(dynamic_cast<TTreeCache*>(filePtr_->GetCacheRead()));
00161     if(treeCache_) treeCache_->SetEnablePrefetching(enablePrefetching_);
00162     filePtr_->SetCacheRead(0);
00163     rawTreeCache_.reset();
00164   }
00165 
00166   void
00167   RootTree::setTreeMaxVirtualSize(int treeMaxVirtualSize) {
00168     if (treeMaxVirtualSize >= 0) tree_->SetMaxVirtualSize(static_cast<Long64_t>(treeMaxVirtualSize));
00169   }
00170 
00171   void
00172   RootTree::setEntryNumber(EntryNumber theEntryNumber) {
00173     filePtr_->SetCacheRead(treeCache_.get());
00174 
00175     // Detect a backward skip.  If the skip is sufficiently large, we roll the dice and reset the treeCache.
00176     // This will cause some amount of over-reading: we pre-fetch all the events in some prior cluster.
00177     // However, because reading one event in the cluster is supposed to be equivalent to reading all events in the cluster,
00178     // we're not incurring additional over-reading - we're just doing it more efficiently.
00179     // NOTE: Constructor guarantees treeAutoFlush_ is positive, even if TTree->GetAutoFlush() is negative.
00180     if ((theEntryNumber < static_cast<EntryNumber>(entryNumber_-treeAutoFlush_)) &&
00181         (treeCache_) && (!treeCache_->IsLearning()) && (entries_ > 0) && (switchOverEntry_ >= 0)) {
00182       treeCache_->SetEntryRange(theEntryNumber, entries_);
00183       treeCache_->FillBuffer();
00184     }
00185 
00186     entryNumber_ = theEntryNumber;
00187     tree_->LoadTree(entryNumber_);
00188     filePtr_->SetCacheRead(0);
00189     if(treeCache_ && trainNow_ && entryNumber_ >= 0) {
00190       startTraining();
00191       trainNow_ = false;
00192       trainedSet_.clear();
00193       triggerSet_.clear();
00194       rawTriggerSwitchOverEntry_ = -1;
00195     }
00196     if (treeCache_ && treeCache_->IsLearning() && switchOverEntry_ >= 0 && entryNumber_ >= switchOverEntry_) {
00197       stopTraining();
00198     }
00199   }
00200 
00201   // The actual implementation is done below; it's split in this strange
00202   // manner in order to keep a by-definition-rare code path out of the instruction cache.
00203   inline TTreeCache*
00204   RootTree::checkTriggerCache(TBranch* branch, EntryNumber entryNumber) const {
00205     if (!treeCache_->IsAsyncReading() && enableTriggerCache_ && (trainedSet_.find(branch) == trainedSet_.end())) {
00206       return checkTriggerCacheImpl(branch, entryNumber);
00207     } else {
00208       return NULL;
00209     }
00210   }
00211 
00212   // See comments in the header.  If this function is called, we already know
00213   // the trigger cache is active and it was a cache miss for the regular cache.
00214   TTreeCache*
00215   RootTree::checkTriggerCacheImpl(TBranch* branch, EntryNumber entryNumber) const {
00216     // This branch is not going to be in the cache.
00217     // Assume this is a "trigger pattern".
00218     // Always make sure the branch is added to the trigger set.
00219     if (triggerSet_.find(branch) == triggerSet_.end()) {
00220       triggerSet_.insert(branch);
00221       if (triggerTreeCache_.get()) { triggerTreeCache_->AddBranch(branch, kTRUE); }
00222     }
00223 
00224     if (rawTriggerSwitchOverEntry_ < 0) {
00225       // The trigger has never fired before.  Take everything not in the
00226       // trainedSet and load it from disk
00227 
00228       // Calculate the end of the next cluster; triggers in the next cluster
00229       // will use the triggerCache, not the rawTriggerCache.
00230       TTree::TClusterIterator clusterIter = tree_->GetClusterIterator(entryNumber);
00231       while (rawTriggerSwitchOverEntry_ < entryNumber) {
00232         rawTriggerSwitchOverEntry_ = clusterIter();
00233       }
00234 
00235       // ROOT will automatically expand the cache to fit one cluster; hence, we use
00236       // 5 MB as the cache size below
00237       tree_->SetCacheSize(static_cast<Long64_t>(5*1024*1024));
00238       rawTriggerTreeCache_.reset(dynamic_cast<TTreeCache*>(filePtr_->GetCacheRead()));
00239       if(rawTriggerTreeCache_) rawTriggerTreeCache_->SetEnablePrefetching(false);
00240       TObjArray *branches = tree_->GetListOfBranches();
00241       int branchCount = branches->GetEntriesFast();
00242 
00243       // Train the rawTriggerCache to have everything not in the regular cache.
00244       rawTriggerTreeCache_->SetLearnEntries(0);
00245       rawTriggerTreeCache_->SetEntryRange(entryNumber, rawTriggerSwitchOverEntry_);
00246       for (int i=0;i<branchCount;i++) {
00247         TBranch *tmp_branch = (TBranch*)branches->UncheckedAt(i);
00248         if (trainedSet_.find(tmp_branch) != trainedSet_.end()) {
00249           continue;
00250         }
00251         rawTriggerTreeCache_->AddBranch(tmp_branch, kTRUE);
00252       }
00253       performedSwitchOver_ = false;
00254       rawTriggerTreeCache_->StopLearningPhase();
00255       filePtr_->SetCacheRead(0);
00256 
00257       return rawTriggerTreeCache_.get();
00258     } else if (entryNumber_ < rawTriggerSwitchOverEntry_) {
00259       // The raw trigger has fired and it contents are valid.
00260       return rawTriggerTreeCache_.get();
00261     } else if (rawTriggerSwitchOverEntry_ > 0) {
00262       // The raw trigger has fired, but we are out of the cache.  Use the
00263       // triggerCache instead.
00264       if (!performedSwitchOver_) {
00265         rawTriggerTreeCache_.reset();
00266         performedSwitchOver_ = true;
00267 
00268         // Train the triggerCache
00269         tree_->SetCacheSize(static_cast<Long64_t>(5*1024*1024));
00270         triggerTreeCache_.reset(dynamic_cast<TTreeCache*>(filePtr_->GetCacheRead()));
00271         triggerTreeCache_->SetEnablePrefetching(false);
00272         triggerTreeCache_->SetLearnEntries(0);
00273         triggerTreeCache_->SetEntryRange(entryNumber, tree_->GetEntries());
00274         for(std::unordered_set<TBranch*>::const_iterator it = triggerSet_.begin(), itEnd = triggerSet_.end();
00275             it != itEnd;
00276             it++)
00277         {
00278           triggerTreeCache_->AddBranch(*it, kTRUE);
00279         }
00280         triggerTreeCache_->StopLearningPhase();
00281         filePtr_->SetCacheRead(0);
00282       }
00283       return triggerTreeCache_.get();
00284     } else if (entryNumber_ < rawTriggerSwitchOverEntry_) {
00285       // The raw trigger has fired and it contents are valid.
00286       return rawTriggerTreeCache_.get();
00287     } else if (rawTriggerSwitchOverEntry_ > 0) {
00288       // The raw trigger has fired, but we are out of the cache.  Use the
00289       // triggerCache instead.
00290       if (!performedSwitchOver_) {
00291         rawTriggerTreeCache_.reset();
00292         performedSwitchOver_ = true; 
00293         
00294         // Train the triggerCache
00295         tree_->SetCacheSize(static_cast<Long64_t>(5*1024*1024));
00296         triggerTreeCache_.reset(dynamic_cast<TTreeCache*>(filePtr_->GetCacheRead()));
00297         triggerTreeCache_->SetEnablePrefetching(false);
00298         triggerTreeCache_->SetLearnEntries(0);
00299         triggerTreeCache_->SetEntryRange(entryNumber, tree_->GetEntries());
00300         for(std::unordered_set<TBranch*>::const_iterator it = triggerSet_.begin(), itEnd = triggerSet_.end();
00301               it != itEnd;
00302               it++)
00303         { 
00304           triggerTreeCache_->AddBranch(*it, kTRUE);
00305         }
00306         triggerTreeCache_->StopLearningPhase();
00307         filePtr_->SetCacheRead(0);
00308       }
00309       return triggerTreeCache_.get();
00310     }
00311 
00312     // By construction, this case should be impossible.
00313     assert (false);
00314     return NULL;
00315   }
00316 
00317   inline TTreeCache*
00318   RootTree::selectCache(TBranch* branch, EntryNumber entryNumber) const {
00319     TTreeCache *triggerCache = NULL;
00320     if (!treeCache_) {
00321       return NULL;
00322     } else if (treeCache_->IsLearning() && rawTreeCache_) {
00323       treeCache_->AddBranch(branch, kTRUE);
00324       trainedSet_.insert(branch);
00325       return rawTreeCache_.get();
00326     } else if ((triggerCache = checkTriggerCache(branch, entryNumber))) {
00327       // A NULL return value from checkTriggerCache indicates the trigger cache case
00328       // does not apply, and we should continue below.
00329       return triggerCache;
00330     } else {
00331       // The "normal" TTreeCache case.
00332       return treeCache_.get();
00333     }
00334   }
00335 
00336   void
00337   RootTree::getEntry(TBranch* branch, EntryNumber entryNumber) const {
00338     try {
00339       TTreeCache * cache = selectCache(branch, entryNumber);
00340       filePtr_->SetCacheRead(cache);
00341       branch->GetEntry(entryNumber);
00342       filePtr_->SetCacheRead(0);
00343     } catch(cms::Exception const& e) {
00344       // We make sure the treeCache_ is detached from the file,
00345       // so that ROOT does not also delete it.
00346       filePtr_->SetCacheRead(0);
00347       Exception t(errors::FileReadError, "", e);
00348       t.addContext(std::string("Reading branch ")+branch->GetName());
00349       throw t;
00350     }
00351   }
00352 
00353   void
00354   RootTree::startTraining() {
00355     if (cacheSize_ == 0) {
00356       return;
00357     }
00358     assert(treeCache_);
00359     assert(branchType_ == InEvent);
00360     assert(!rawTreeCache_);
00361     treeCache_->SetLearnEntries(learningEntries_);
00362     tree_->SetCacheSize(static_cast<Long64_t>(cacheSize_));
00363     rawTreeCache_.reset(dynamic_cast<TTreeCache *>(filePtr_->GetCacheRead()));
00364     rawTreeCache_->SetEnablePrefetching(false);
00365     filePtr_->SetCacheRead(0);
00366     rawTreeCache_->SetLearnEntries(0);
00367     switchOverEntry_ = entryNumber_ + learningEntries_;
00368     rawTreeCache_->StartLearningPhase();
00369     rawTreeCache_->SetEntryRange(entryNumber_, switchOverEntry_);
00370     rawTreeCache_->AddBranch("*", kTRUE);
00371     rawTreeCache_->StopLearningPhase();
00372     treeCache_->StartLearningPhase();
00373     treeCache_->SetEntryRange(switchOverEntry_, tree_->GetEntries());
00374     treeCache_->AddBranch(poolNames::branchListIndexesBranchName().c_str(), kTRUE);
00375     treeCache_->AddBranch(BranchTypeToAuxiliaryBranchName(branchType_).c_str(), kTRUE);
00376     trainedSet_.clear();
00377     triggerSet_.clear();
00378     assert(treeCache_->GetTree() == tree_);
00379   }
00380 
00381   void
00382   RootTree::stopTraining() {
00383     filePtr_->SetCacheRead(treeCache_.get());
00384     treeCache_->StopLearningPhase();
00385     filePtr_->SetCacheRead(0);
00386     rawTreeCache_.reset();
00387   }
00388 
00389   void
00390   RootTree::close () {
00391     // The TFile is about to be closed, and destructed.
00392     // Just to play it safe, zero all pointers to quantities that are owned by the TFile.
00393     auxBranch_  = branchEntryInfoBranch_ = 0;
00394     tree_ = metaTree_ = infoTree_ = 0;
00395     // We own the treeCache_.
00396     // We make sure the treeCache_ is detached from the file,
00397     // so that ROOT does not also delete it.
00398     filePtr_->SetCacheRead(0);
00399     // We *must* delete the TTreeCache here because the TFilePrefetch object
00400     // references the TFile.  If TFile is closed, before the TTreeCache is
00401     // deleted, the TFilePrefetch may continue to do TFile operations, causing
00402     // deadlocks or exceptions.
00403     treeCache_.reset();
00404     rawTreeCache_.reset();
00405     triggerTreeCache_.reset();
00406     rawTriggerTreeCache_.reset();
00407     // We give up our shared ownership of the TFile itself.
00408     filePtr_.reset();
00409   }
00410 
00411   void
00412   RootTree::trainCache(char const* branchNames) {
00413     if (cacheSize_ == 0) {
00414       return;
00415     }
00416     tree_->LoadTree(0);
00417     assert(treeCache_);
00418     filePtr_->SetCacheRead(treeCache_.get());
00419     treeCache_->StartLearningPhase();
00420     treeCache_->SetEntryRange(0, tree_->GetEntries());
00421     treeCache_->AddBranch(branchNames, kTRUE);
00422     treeCache_->StopLearningPhase();
00423     assert(treeCache_->GetTree() == tree_);
00424     // We own the treeCache_.
00425     // We make sure the treeCache_ is detached from the file,
00426     // so that ROOT does not also delete it.
00427     filePtr_->SetCacheRead(0);
00428 
00429     // Must also manually add things to the trained set.
00430     TObjArray *branches = tree_->GetListOfBranches();
00431     int branchCount = branches->GetEntriesFast();
00432     for (int i=0;i<branchCount;i++) {
00433        TBranch *branch = (TBranch*)branches->UncheckedAt(i);
00434        if ((branchNames[0] == '*') || (strcmp(branchNames, branch->GetName()) == 0)) {
00435           trainedSet_.insert(branch);
00436        } 
00437     } 
00438  
00439   }
00440 
00441   namespace roottree {
00442     Int_t
00443     getEntry(TBranch* branch, EntryNumber entryNumber) {
00444       Int_t n = 0;
00445       try {
00446         n = branch->GetEntry(entryNumber);
00447       }
00448       catch(cms::Exception const& e) {
00449         throw Exception(errors::FileReadError, "", e);
00450       }
00451       return n;
00452     }
00453 
00454     Int_t
00455     getEntry(TTree* tree, EntryNumber entryNumber) {
00456       Int_t n = 0;
00457       try {
00458         n = tree->GetEntry(entryNumber);
00459       }
00460       catch(cms::Exception const& e) {
00461         throw Exception (errors::FileReadError, "", e);
00462       }
00463       return n;
00464     }
00465 
00466     std::unique_ptr<TTreeCache>
00467     trainCache(TTree* tree, InputFile& file, unsigned int cacheSize, char const* branchNames) {
00468       tree->LoadTree(0);
00469       tree->SetCacheSize(cacheSize);
00470       std::unique_ptr<TTreeCache> treeCache(dynamic_cast<TTreeCache*>(file.GetCacheRead()));
00471       if (0 != treeCache.get()) {
00472         treeCache->StartLearningPhase();
00473         treeCache->SetEntryRange(0, tree->GetEntries());
00474         treeCache->AddBranch(branchNames, kTRUE);
00475         treeCache->StopLearningPhase();
00476       }
00477       // We own the treeCache_.
00478       // We make sure the treeCache_ is detached from the file,
00479       // so that ROOT does not also delete it.
00480       file.SetCacheRead(0);
00481       return treeCache;
00482     }
00483   }
00484 }