CMS 3D CMS Logo

/data/refman/pasoursint/CMSSW_4_1_8_patch12/src/IOPool/Output/src/PoolOutputModule.cc

Go to the documentation of this file.
00001 #include "IOPool/Output/interface/PoolOutputModule.h"
00002 
00003 #include "FWCore/MessageLogger/interface/JobReport.h"
00004 #include "IOPool/Output/src/RootOutputFile.h"
00005 
00006 #include "FWCore/Framework/interface/LuminosityBlockPrincipal.h"
00007 #include "FWCore/Framework/interface/RunPrincipal.h"
00008 #include "FWCore/Framework/interface/FileBlock.h"
00009 #include "FWCore/ParameterSet/interface/ParameterSet.h"
00010 #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
00011 #include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
00012 #include "FWCore/ServiceRegistry/interface/Service.h"
00013 #include "DataFormats/Provenance/interface/BranchDescription.h"
00014 #include "FWCore/Utilities/interface/Algorithms.h"
00015 #include "FWCore/Utilities/interface/EDMException.h"
00016 #include "FWCore/Utilities/interface/TimeOfDay.h"
00017 
00018 #include "TTree.h"
00019 #include "TBranchElement.h"
00020 #include "TObjArray.h"
00021 
00022 #include <fstream>
00023 #include <iomanip>
00024 #include <sstream>
00025 
00026 namespace edm {
00027   PoolOutputModule::PoolOutputModule(ParameterSet const& pset) :
00028     OutputModule(pset),
00029     rootServiceChecker_(),
00030     auxItems_(),
00031     selectedOutputItemList_(),
00032     fileName_(pset.getUntrackedParameter<std::string>("fileName")),
00033     logicalFileName_(pset.getUntrackedParameter<std::string>("logicalFileName")),
00034     catalog_(pset.getUntrackedParameter<std::string>("catalog")),
00035     maxFileSize_(pset.getUntrackedParameter<int>("maxSize")),
00036     compressionLevel_(pset.getUntrackedParameter<int>("compressionLevel")),
00037     basketSize_(pset.getUntrackedParameter<int>("basketSize")),
00038     eventAutoFlushSize_(pset.getUntrackedParameter<int>("eventAutoFlushCompressedSize")),
00039     splitLevel_(std::min<int>(pset.getUntrackedParameter<int>("splitLevel") + 1, 99)),
00040     basketOrder_(pset.getUntrackedParameter<std::string>("sortBaskets")),
00041     treeMaxVirtualSize_(pset.getUntrackedParameter<int>("treeMaxVirtualSize")),
00042     whyNotFastClonable_(pset.getUntrackedParameter<bool>("fastCloning") ? FileBlock::CanFastClone : FileBlock::DisabledInConfigFile),
00043     dropMetaData_(DropNone),
00044     moduleLabel_(pset.getParameter<std::string>("@module_label")),
00045     initializedFromInput_(false),
00046     outputFileCount_(0),
00047     inputFileCount_(0),
00048     childIndex_(0U),
00049     numberOfDigitsInIndex_(0U),
00050     overrideInputFileSplitLevels_(pset.getUntrackedParameter<bool>("overrideInputFileSplitLevels")),
00051     rootOutputFile_(),
00052     statusFileName_() {
00053 
00054       if (pset.getUntrackedParameter<bool>("writeStatusFile")) {
00055         std::ostringstream statusfilename;
00056         statusfilename << moduleLabel_ << '_' << getpid();
00057         statusFileName_ = statusfilename.str();
00058       }
00059 
00060       std::string dropMetaData(pset.getUntrackedParameter<std::string>("dropMetaData"));
00061       if(dropMetaData.empty()) dropMetaData_ = DropNone;
00062       else if(dropMetaData == std::string("NONE")) dropMetaData_ = DropNone;
00063       else if(dropMetaData == std::string("DROPPED")) dropMetaData_ = DropDroppedPrior;
00064       else if(dropMetaData == std::string("PRIOR")) dropMetaData_ = DropPrior;
00065       else if(dropMetaData == std::string("ALL")) dropMetaData_ = DropAll;
00066       else {
00067         throw edm::Exception(errors::Configuration, "Illegal dropMetaData parameter value: ")
00068             << dropMetaData << ".\n"
00069             << "Legal values are 'NONE', 'DROPPED', 'PRIOR', and 'ALL'.\n";
00070       }
00071 
00072     if (!wantAllEvents()) {
00073       whyNotFastClonable_+= FileBlock::EventSelectionUsed;
00074     }
00075 
00076     // We don't use this next parameter, but we read it anyway because it is part
00077     // of the configuration of this module.  An external parser creates the
00078     // configuration by reading this source code.
00079     pset.getUntrackedParameterSet("dataset");
00080   }
00081 
00082   std::string const& PoolOutputModule::currentFileName() const {
00083     return rootOutputFile_->fileName();
00084   }
00085 
00086 
00087   PoolOutputModule::AuxItem::AuxItem() :
00088         basketSize_(BranchDescription::invalidBasketSize) {}
00089 
00090   PoolOutputModule::OutputItem::OutputItem() :
00091         branchDescription_(0),
00092         product_(0),
00093         splitLevel_(BranchDescription::invalidSplitLevel),
00094         basketSize_(BranchDescription::invalidBasketSize) {}
00095 
00096   PoolOutputModule::OutputItem::OutputItem(BranchDescription const* bd, int splitLevel, int basketSize) :
00097         branchDescription_(bd),
00098         product_(0),
00099         splitLevel_(splitLevel),
00100         basketSize_(basketSize) {}
00101 
00102 
00103   PoolOutputModule::OutputItem::Sorter::Sorter(TTree* tree) : treeMap_(new std::map<std::string, int>) {
00104     // Fill a map mapping branch names to an index specifying the order in the tree.
00105     if(tree != 0) {
00106       TObjArray* branches = tree->GetListOfBranches();
00107       for(int i = 0; i < branches->GetEntries(); ++i) {
00108         TBranchElement* br = (TBranchElement*)branches->At(i);
00109         treeMap_->insert(std::make_pair(std::string(br->GetName()), i));
00110       }
00111     }
00112   }
00113 
00114   bool
00115   PoolOutputModule::OutputItem::Sorter::operator()(OutputItem const& lh, OutputItem const& rh) const {
00116     // Provides a comparison for sorting branches according to the index values in treeMap_.
00117     // Branches not found are always put at the end (i.e. not found > found).
00118     if(treeMap_->empty()) return lh < rh;
00119     std::string const& lstring = lh.branchDescription_->branchName();
00120     std::string const& rstring = rh.branchDescription_->branchName();
00121     std::map<std::string, int>::const_iterator lit = treeMap_->find(lstring);
00122     std::map<std::string, int>::const_iterator rit = treeMap_->find(rstring);
00123     bool lfound = (lit != treeMap_->end());
00124     bool rfound = (rit != treeMap_->end());
00125     if(lfound && rfound) {
00126       return lit->second < rit->second;
00127     } else if(lfound) {
00128       return true;
00129     } else if(rfound) {
00130       return false;
00131     }
00132     return lh < rh;
00133   }
00134 
00135   void PoolOutputModule::fillSelectedItemList(BranchType branchType, TTree* theInputTree) {
00136 
00137     Selections const& keptVector =    keptProducts()[branchType];
00138     OutputItemList&   outputItemList = selectedOutputItemList_[branchType];
00139     AuxItem&   auxItem = auxItems_[branchType];
00140 
00141     // Fill AuxItem
00142     if (theInputTree != 0 && !overrideInputFileSplitLevels_) {
00143       TBranch* auxBranch = theInputTree->GetBranch(BranchTypeToAuxiliaryBranchName(branchType).c_str());
00144       if (auxBranch) {
00145         auxItem.basketSize_ = auxBranch->GetBasketSize();
00146       } else {
00147         auxItem.basketSize_ = basketSize_;
00148       }
00149     } else {
00150       auxItem.basketSize_ = basketSize_;
00151     }
00152 
00153     // Fill outputItemList with an entry for each branch.
00154     for(Selections::const_iterator it = keptVector.begin(), itEnd = keptVector.end(); it != itEnd; ++it) {
00155       int splitLevel = BranchDescription::invalidSplitLevel;
00156       int basketSize = BranchDescription::invalidBasketSize;
00157 
00158       BranchDescription const& prod = **it;
00159       TBranch* theBranch = ((!prod.produced() && theInputTree != 0 && !overrideInputFileSplitLevels_) ? theInputTree->GetBranch(prod.branchName().c_str()) : 0);
00160 
00161       if(theBranch != 0) {
00162         splitLevel = theBranch->GetSplitLevel();
00163         basketSize = theBranch->GetBasketSize();
00164       } else {
00165         splitLevel = (prod.splitLevel() == BranchDescription::invalidSplitLevel ? splitLevel_ : prod.splitLevel());
00166         basketSize = (prod.basketSize() == BranchDescription::invalidBasketSize ? basketSize_ : prod.basketSize());
00167       }
00168       outputItemList.push_back(OutputItem(&prod, splitLevel, basketSize));
00169     }
00170 
00171     // Sort outputItemList to allow fast copying.
00172     // The branches in outputItemList must be in the same order as in the input tree, with all new branches at the end.
00173     sort_all(outputItemList, OutputItem::Sorter(theInputTree));
00174   }
00175 
00176   void PoolOutputModule::beginInputFile(FileBlock const& fb) {
00177     if(isFileOpen()) {
00178       rootOutputFile_->beginInputFile(fb, remainingEvents());
00179     }
00180   }
00181 
00182   void PoolOutputModule::openFile(FileBlock const& fb) {
00183     if(!isFileOpen()) {
00184       doOpenFile();
00185       beginInputFile(fb);
00186     }
00187   }
00188 
00189   void PoolOutputModule::respondToOpenInputFile(FileBlock const& fb) {
00190     if(!initializedFromInput_) {
00191       for(int i = InEvent; i < NumBranchTypes; ++i) {
00192         BranchType branchType = static_cast<BranchType>(i);
00193         TTree* theInputTree = (branchType == InEvent ? fb.tree() :
00194                               (branchType == InLumi ? fb.lumiTree() :
00195                                fb.runTree()));
00196         fillSelectedItemList(branchType, theInputTree);
00197       }
00198       initializedFromInput_ = true;
00199     }
00200     ++inputFileCount_;
00201     beginInputFile(fb);
00202   }
00203 
00204   void PoolOutputModule::respondToCloseInputFile(FileBlock const& fb) {
00205     if(rootOutputFile_) rootOutputFile_->respondToCloseInputFile(fb);
00206   }
00207 
00208   void PoolOutputModule::postForkReacquireResources(unsigned int iChildIndex, unsigned int iNumberOfChildren) {
00209     childIndex_ = iChildIndex;
00210     while (iNumberOfChildren != 0) {
00211       ++numberOfDigitsInIndex_;
00212       iNumberOfChildren /= 10;
00213     }
00214     if (numberOfDigitsInIndex_ == 0) {
00215       numberOfDigitsInIndex_ = 3; // Protect against zero iNumberOfChildren
00216     }
00217   }
00218 
00219   PoolOutputModule::~PoolOutputModule() {
00220   }
00221 
00222   void PoolOutputModule::write(EventPrincipal const& e) {
00223       rootOutputFile_->writeOne(e);
00224       if (!statusFileName_.empty()) {
00225         std::ofstream statusFile(statusFileName_.c_str());
00226         statusFile << e.id() << " time: " << std::setprecision(3) << TimeOfDay() << '\n';
00227         statusFile.close();
00228       }
00229   }
00230 
00231   void PoolOutputModule::writeLuminosityBlock(LuminosityBlockPrincipal const& lb) {
00232       rootOutputFile_->writeLuminosityBlock(lb);
00233       Service<JobReport> reportSvc;
00234       reportSvc->reportLumiSection(lb.id().run(), lb.id().luminosityBlock());
00235   }
00236 
00237   void PoolOutputModule::writeRun(RunPrincipal const& r) {
00238       rootOutputFile_->writeRun(r);
00239       Service<JobReport> reportSvc;
00240       reportSvc->reportRunNumber(r.run());
00241   }
00242 
00243   // At some later date, we may move functionality from finishEndFile() to here.
00244   void PoolOutputModule::startEndFile() { }
00245 
00246 
00247   void PoolOutputModule::writeFileFormatVersion() { rootOutputFile_->writeFileFormatVersion(); }
00248   void PoolOutputModule::writeFileIdentifier() { rootOutputFile_->writeFileIdentifier(); }
00249   void PoolOutputModule::writeIndexIntoFile() { rootOutputFile_->writeIndexIntoFile(); }
00250   void PoolOutputModule::writeProcessConfigurationRegistry() { rootOutputFile_->writeProcessConfigurationRegistry(); }
00251   void PoolOutputModule::writeProcessHistoryRegistry() { rootOutputFile_->writeProcessHistoryRegistry(); }
00252   void PoolOutputModule::writeParameterSetRegistry() { rootOutputFile_->writeParameterSetRegistry(); }
00253   void PoolOutputModule::writeProductDescriptionRegistry() { rootOutputFile_->writeProductDescriptionRegistry(); }
00254   void PoolOutputModule::writeParentageRegistry() { rootOutputFile_->writeParentageRegistry(); }
00255   void PoolOutputModule::writeBranchIDListRegistry() { rootOutputFile_->writeBranchIDListRegistry(); }
00256   void PoolOutputModule::writeProductDependencies() { rootOutputFile_->writeProductDependencies(); }
00257   void PoolOutputModule::finishEndFile() { rootOutputFile_->finishEndFile(); rootOutputFile_.reset(); }
00258   bool PoolOutputModule::isFileOpen() const { return rootOutputFile_.get() != 0; }
00259   bool PoolOutputModule::shouldWeCloseFile() const { return rootOutputFile_->shouldWeCloseFile(); }
00260 
00261   void PoolOutputModule::doOpenFile() {
00262       if(inputFileCount_ == 0) {
00263         throw edm::Exception(errors::LogicError)
00264           << "Attempt to open output file before input file. "
00265           << "Please report this to the core framework developers.\n";
00266       }
00267       std::string suffix(".root");
00268       std::string::size_type offset = fileName().rfind(suffix);
00269       bool ext = (offset == fileName().size() - suffix.size());
00270       if(!ext) suffix.clear();
00271       std::string fileBase(ext ? fileName().substr(0, offset) : fileName());
00272       std::ostringstream ofilename;
00273       std::ostringstream lfilename;
00274       ofilename << fileBase;
00275       lfilename << logicalFileName();
00276       if(numberOfDigitsInIndex_) {
00277         ofilename << '_' << std::setw(numberOfDigitsInIndex_) << std::setfill('0') << childIndex_;
00278         if(!logicalFileName().empty()) {
00279           lfilename << '_' << std::setw(numberOfDigitsInIndex_) << std::setfill('0') << childIndex_;
00280         }
00281       }
00282       if(outputFileCount_) {
00283         ofilename << std::setw(3) << std::setfill('0') << outputFileCount_;
00284         if(!logicalFileName().empty()) {
00285           lfilename << std::setw(3) << std::setfill('0') << outputFileCount_;
00286         }
00287       }
00288       ofilename << suffix;
00289       rootOutputFile_.reset(new RootOutputFile(this, ofilename.str(), lfilename.str()));
00290       ++outputFileCount_;
00291   }
00292 
00293   void
00294   PoolOutputModule::fillDescriptions(ConfigurationDescriptions & descriptions) {
00295     std::string defaultString;
00296     ParameterSetDescription desc;
00297     desc.setComment("Writes runs, lumis, and events into EDM/ROOT files.");
00298     desc.addUntracked<std::string>("fileName")
00299         ->setComment("Name of output file.");
00300     desc.addUntracked<std::string>("logicalFileName", defaultString)
00301         ->setComment("Passed to job report. Otherwise unused by module.");
00302     desc.addUntracked<std::string>("catalog", defaultString)
00303         ->setComment("Passed to job report. Otherwise unused by module.");
00304     desc.addUntracked<int>("maxSize", 0x7f000000)
00305         ->setComment("Maximum output file size, in kB.\n"
00306                      "If over maximum, new output file will be started at next input file transition.");
00307     desc.addUntracked<int>("compressionLevel", 7)
00308         ->setComment("ROOT compression level of output file.");
00309     desc.addUntracked<int>("basketSize", 16384)
00310         ->setComment("Default ROOT basket size in output file.");
00311     desc.addUntracked<int>("eventAutoFlushCompressedSize",-1)->setComment("Set ROOT auto flush stored data size (in bytes) for event TTree. The value sets how large the compressed buffer is allowed to get. The uncompressed buffer can be quite a bit larger than this depending on the average compression ratio. The value of -1 just uses ROOT's default value. The value of 0 turns off this feature.");
00312     desc.addUntracked<int>("splitLevel", 99)
00313         ->setComment("Default ROOT branch split level in output file.");
00314     desc.addUntracked<std::string>("sortBaskets", std::string("sortbasketsbyoffset"))
00315         ->setComment("Legal values: 'sortbasketsbyoffset', 'sortbasketsbybranch', 'sortbasketsbyentry'.\n"
00316                      "Used by ROOT when fast copying. Affects performance.");
00317     desc.addUntracked<int>("treeMaxVirtualSize", -1)
00318         ->setComment("Size of ROOT TTree TBasket cache.  Affects performance.");
00319     desc.addUntracked<bool>("fastCloning", true)
00320         ->setComment("True:  Allow fast copying, if possible.\n"
00321                      "False: Disable fast copying.");
00322     desc.addUntracked<bool>("overrideInputFileSplitLevels", false)
00323         ->setComment("False: Use branch split levels and basket sizes from input file, if possible.\n"
00324                      "True:  Always use specified or default split levels and basket sizes.");
00325     desc.addUntracked<bool>("writeStatusFile", false)
00326         ->setComment("Write a status file. Intended for use by workflow management.");
00327     desc.addUntracked<std::string>("dropMetaData", defaultString)
00328         ->setComment("Determines handling of per product per event metadata.  Options are:\n"
00329                      "'NONE':    Keep all of it.\n"
00330                      "'DROPPED': Keep it for products produced in current process and all kept products. Drop it for dropped products produced in prior processes.\n"
00331                      "'PRIOR':   Keep it for products produced in current process. Drop it for products produced in prior processes.\n"
00332                      "'ALL':     Drop all of it.");
00333     ParameterSetDescription dataSet;
00334     dataSet.setAllowAnything();
00335     desc.addUntracked<ParameterSetDescription>("dataset", dataSet)
00336      ->setComment("PSet is only used by Data Operations and not by this module.");
00337 
00338     OutputModule::fillDescription(desc);
00339 
00340     descriptions.add("edmOutput", desc);
00341   }
00342 }