CMS 3D CMS Logo

PoolOutputModule.cc
Go to the documentation of this file.
2 
4 
24 
25 #include "TTree.h"
26 #include "TBranchElement.h"
27 #include "TObjArray.h"
28 #include "RVersion.h"
29 
30 #include <fstream>
31 #include <iomanip>
32 #include <sstream>
33 #include "boost/algorithm/string.hpp"
34 
35 namespace edm {
39  rootServiceChecker_(),
40  auxItems_(),
41  selectedOutputItemList_(),
42  fileName_(pset.getUntrackedParameter<std::string>("fileName")),
43  logicalFileName_(pset.getUntrackedParameter<std::string>("logicalFileName")),
44  catalog_(pset.getUntrackedParameter<std::string>("catalog")),
45  maxFileSize_(pset.getUntrackedParameter<int>("maxSize")),
46  compressionLevel_(pset.getUntrackedParameter<int>("compressionLevel")),
47  compressionAlgorithm_(pset.getUntrackedParameter<std::string>("compressionAlgorithm")),
48  basketSize_(pset.getUntrackedParameter<int>("basketSize")),
49  eventAutoFlushSize_(pset.getUntrackedParameter<int>("eventAutoFlushCompressedSize")),
50  splitLevel_(std::min<int>(pset.getUntrackedParameter<int>("splitLevel") + 1, 99)),
51  basketOrder_(pset.getUntrackedParameter<std::string>("sortBaskets")),
52  treeMaxVirtualSize_(pset.getUntrackedParameter<int>("treeMaxVirtualSize")),
53  whyNotFastClonable_(pset.getUntrackedParameter<bool>("fastCloning") ? FileBlock::CanFastClone
54  : FileBlock::DisabledInConfigFile),
55  dropMetaData_(DropNone),
56  moduleLabel_(pset.getParameter<std::string>("@module_label")),
57  initializedFromInput_(false),
58  outputFileCount_(0),
59  inputFileCount_(0),
60  childIndex_(0U),
61  numberOfDigitsInIndex_(0U),
62  branchParents_(),
63  branchChildren_(),
64  overrideInputFileSplitLevels_(pset.getUntrackedParameter<bool>("overrideInputFileSplitLevels")),
65  rootOutputFile_(),
66  statusFileName_() {
67  if (pset.getUntrackedParameter<bool>("writeStatusFile")) {
68  std::ostringstream statusfilename;
69  statusfilename << moduleLabel_ << '_' << getpid();
70  statusFileName_ = statusfilename.str();
71  }
72 
74  if (dropMetaData.empty())
76  else if (dropMetaData == std::string("NONE"))
78  else if (dropMetaData == std::string("DROPPED"))
80  else if (dropMetaData == std::string("PRIOR"))
82  else if (dropMetaData == std::string("ALL"))
84  else {
85  throw edm::Exception(errors::Configuration, "Illegal dropMetaData parameter value: ")
86  << dropMetaData << ".\n"
87  << "Legal values are 'NONE', 'DROPPED', 'PRIOR', and 'ALL'.\n";
88  }
89 
90  if (!wantAllEvents()) {
92  }
93 
94  auto const& specialSplit{pset.getUntrackedParameterSetVector("overrideBranchesSplitLevel")};
95 
96  specialSplitLevelForBranches_.reserve(specialSplit.size());
97  for (auto const& s : specialSplit) {
98  specialSplitLevelForBranches_.emplace_back(s.getUntrackedParameter<std::string>("branch"),
99  s.getUntrackedParameter<int>("splitLevel"));
100  }
101 
102  // We don't use this next parameter, but we read it anyway because it is part
103  // of the configuration of this module. An external parser creates the
104  // configuration by reading this source code.
105  pset.getUntrackedParameterSet("dataset");
106  }
107 
110  for (auto const& prod : reg->productList()) {
111  BranchDescription const& desc = prod.second;
112  if (desc.produced() && desc.branchType() == InEvent && !desc.isAlias()) {
113  producedBranches_.emplace_back(desc.branchID());
114  }
115  }
116  }
117 
118  std::string const& PoolOutputModule::currentFileName() const { return rootOutputFile_->fileName(); }
119 
121 
123  : branchDescription_(nullptr),
124  token_(),
125  product_(nullptr),
126  splitLevel_(BranchDescription::invalidSplitLevel),
127  basketSize_(BranchDescription::invalidBasketSize) {}
128 
130  EDGetToken const& token,
131  int splitLevel,
132  int basketSize)
133  : branchDescription_(bd), token_(token), product_(nullptr), splitLevel_(splitLevel), basketSize_(basketSize) {}
134 
136  // Fill a map mapping branch names to an index specifying the order in the tree.
137  if (tree != nullptr) {
138  TObjArray* branches = tree->GetListOfBranches();
139  for (int i = 0; i < branches->GetEntries(); ++i) {
140  TBranchElement* br = (TBranchElement*)branches->At(i);
141  treeMap_->insert(std::make_pair(std::string(br->GetName()), i));
142  }
143  }
144  }
145 
147  // Provides a comparison for sorting branches according to the index values in treeMap_.
148  // Branches not found are always put at the end (i.e. not found > found).
149  if (treeMap_->empty())
150  return lh < rh;
151  std::string const& lstring = lh.branchDescription_->branchName();
152  std::string const& rstring = rh.branchDescription_->branchName();
153  std::map<std::string, int>::const_iterator lit = treeMap_->find(lstring);
154  std::map<std::string, int>::const_iterator rit = treeMap_->find(rstring);
155  bool lfound = (lit != treeMap_->end());
156  bool rfound = (rit != treeMap_->end());
157  if (lfound && rfound) {
158  return lit->second < rit->second;
159  } else if (lfound) {
160  return true;
161  } else if (rfound) {
162  return false;
163  }
164  return lh < rh;
165  }
166 
167  inline bool PoolOutputModule::SpecialSplitLevelForBranch::match(std::string const& iBranchName) const {
168  return std::regex_match(iBranchName, branch_);
169  }
170 
171  std::regex PoolOutputModule::SpecialSplitLevelForBranch::convert(std::string const& iGlobBranchExpression) const {
172  std::string tmp(iGlobBranchExpression);
173  boost::replace_all(tmp, "*", ".*");
174  boost::replace_all(tmp, "?", ".");
175  return std::regex(tmp);
176  }
177 
179  SelectedProducts const& keptVector = keptProducts()[branchType];
181  AuxItem& auxItem = auxItems_[branchType];
182 
183  // Fill AuxItem
184  if (theInputTree != nullptr && !overrideInputFileSplitLevels_) {
185  TBranch* auxBranch = theInputTree->GetBranch(BranchTypeToAuxiliaryBranchName(branchType).c_str());
186  if (auxBranch) {
187  auxItem.basketSize_ = auxBranch->GetBasketSize();
188  } else {
189  auxItem.basketSize_ = basketSize_;
190  }
191  } else {
192  auxItem.basketSize_ = basketSize_;
193  }
194 
195  // Fill outputItemList with an entry for each branch.
196  for (auto const& kept : keptVector) {
199 
200  BranchDescription const& prod = *kept.first;
201  TBranch* theBranch = ((!prod.produced() && theInputTree != nullptr && !overrideInputFileSplitLevels_)
202  ? theInputTree->GetBranch(prod.branchName().c_str())
203  : nullptr);
204 
205  if (theBranch != nullptr) {
206  splitLevel = theBranch->GetSplitLevel();
207  basketSize = theBranch->GetBasketSize();
208  } else {
209  splitLevel = (prod.splitLevel() == BranchDescription::invalidSplitLevel ? splitLevel_ : prod.splitLevel());
210  for (auto const& b : specialSplitLevelForBranches_) {
211  if (b.match(prod.branchName())) {
212  splitLevel = b.splitLevel_;
213  }
214  }
215  basketSize = (prod.basketSize() == BranchDescription::invalidBasketSize ? basketSize_ : prod.basketSize());
216  }
217  outputItemList.emplace_back(&prod, kept.second, splitLevel, basketSize);
218  }
219 
220  // Sort outputItemList to allow fast copying.
221  // The branches in outputItemList must be in the same order as in the input tree, with all new branches at the end.
222  sort_all(outputItemList, OutputItem::Sorter(theInputTree));
223  }
224 
226  if (isFileOpen()) {
227  //Faster to read ChildrenBranches directly from input
228  // file than to build it every event
229  auto const& branchToChildMap = fb.branchChildren().childLookup();
230  for (auto const& parentToChildren : branchToChildMap) {
231  for (auto const& child : parentToChildren.second) {
232  branchChildren_.insertChild(parentToChildren.first, child);
233  }
234  }
235  rootOutputFile_->beginInputFile(fb, remainingEvents());
236  }
237  }
238 
240  if (!isFileOpen()) {
241  reallyOpenFile();
242  beginInputFile(fb);
243  }
244  }
245 
247  if (!initializedFromInput_) {
248  for (int i = InEvent; i < NumBranchTypes; ++i) {
249  BranchType branchType = static_cast<BranchType>(i);
250  TTree* theInputTree =
251  (branchType == InEvent ? fb.tree() : (branchType == InLumi ? fb.lumiTree() : fb.runTree()));
252  fillSelectedItemList(branchType, theInputTree);
253  }
254  initializedFromInput_ = true;
255  }
256  ++inputFileCount_;
257  beginInputFile(fb);
258  }
259 
261  if (rootOutputFile_)
262  rootOutputFile_->respondToCloseInputFile(fb);
263  }
264 
266  processesWithSelectedMergeableRunProducts_.assign(processes.begin(), processes.end());
267  }
268 
270 
273  rootOutputFile_->writeOne(e);
274  if (!statusFileName_.empty()) {
275  std::ofstream statusFile(statusFileName_.c_str());
276  statusFile << e.id() << " time: " << std::setprecision(3) << TimeOfDay() << '\n';
277  statusFile.close();
278  }
279  }
280 
282  rootOutputFile_->writeLuminosityBlock(lb);
283  }
284 
286 
289  branchParents_.clear();
290  startEndFile();
301  writeProductDependencies(); //branchChildren used here
303  finishEndFile();
304 
306  }
307 
308  // At some later date, we may move functionality from finishEndFile() to here.
310 
311  void PoolOutputModule::writeFileFormatVersion() { rootOutputFile_->writeFileFormatVersion(); }
312  void PoolOutputModule::writeFileIdentifier() { rootOutputFile_->writeFileIdentifier(); }
313  void PoolOutputModule::writeIndexIntoFile() { rootOutputFile_->writeIndexIntoFile(); }
315  rootOutputFile_->writeStoredMergeableRunProductMetadata();
316  }
317  void PoolOutputModule::writeProcessHistoryRegistry() { rootOutputFile_->writeProcessHistoryRegistry(); }
318  void PoolOutputModule::writeParameterSetRegistry() { rootOutputFile_->writeParameterSetRegistry(); }
319  void PoolOutputModule::writeProductDescriptionRegistry() { rootOutputFile_->writeProductDescriptionRegistry(); }
320  void PoolOutputModule::writeParentageRegistry() { rootOutputFile_->writeParentageRegistry(); }
321  void PoolOutputModule::writeBranchIDListRegistry() { rootOutputFile_->writeBranchIDListRegistry(); }
322  void PoolOutputModule::writeThinnedAssociationsHelper() { rootOutputFile_->writeThinnedAssociationsHelper(); }
323  void PoolOutputModule::writeProductDependencies() { rootOutputFile_->writeProductDependencies(); }
325  rootOutputFile_->finishEndFile();
326  rootOutputFile_ = nullptr;
327  } // propagate_const<T> has no reset() function
329  bool PoolOutputModule::isFileOpen() const { return rootOutputFile_.get() != nullptr; }
330  bool PoolOutputModule::shouldWeCloseFile() const { return rootOutputFile_->shouldWeCloseFile(); }
331 
332  std::pair<std::string, std::string> PoolOutputModule::physicalAndLogicalNameForNewFile() {
333  if (inputFileCount_ == 0) {
334  throw edm::Exception(errors::LogicError) << "Attempt to open output file before input file. "
335  << "Please report this to the core framework developers.\n";
336  }
337  std::string suffix(".root");
338  std::string::size_type offset = fileName().rfind(suffix);
339  bool ext = (offset == fileName().size() - suffix.size());
340  if (!ext)
341  suffix.clear();
342  std::string fileBase(ext ? fileName().substr(0, offset) : fileName());
343  std::ostringstream ofilename;
344  std::ostringstream lfilename;
345  ofilename << fileBase;
346  lfilename << logicalFileName();
348  ofilename << '_' << std::setw(numberOfDigitsInIndex_) << std::setfill('0') << childIndex_;
349  if (!logicalFileName().empty()) {
350  lfilename << '_' << std::setw(numberOfDigitsInIndex_) << std::setfill('0') << childIndex_;
351  }
352  }
353  if (outputFileCount_) {
354  ofilename << std::setw(3) << std::setfill('0') << outputFileCount_;
355  if (!logicalFileName().empty()) {
356  lfilename << std::setw(3) << std::setfill('0') << outputFileCount_;
357  }
358  }
359  ofilename << suffix;
361 
362  return std::make_pair(ofilename.str(), lfilename.str());
363  }
364 
367  rootOutputFile_ = std::make_unique<RootOutputFile>(
368  this,
369  names.first,
370  names.second,
371  processesWithSelectedMergeableRunProducts_); // propagate_const<T> has no reset() function
372  }
373 
375  BranchID const& branchID) {
376  ProductProvenance const* provenance = provRetriever->branchIDToProvenanceForProducedOnly(branchID);
377  if (provenance != nullptr) {
378  BranchParents::iterator it = branchParents_.find(branchID);
379  if (it == branchParents_.end()) {
380  it = branchParents_.insert(std::make_pair(branchID, std::set<ParentageID>())).first;
381  }
382  it->second.insert(provenance->parentageID());
383  }
384  }
385 
388  for (auto const& bid : producedBranches_) {
389  updateBranchParentsForOneBranch(provRetriever, bid);
390  }
392  if (helper) {
393  for (auto const& bid : subProcessParentageHelper()->producedProducts()) {
394  updateBranchParentsForOneBranch(provRetriever, bid);
395  }
396  }
397  }
398 
400  ModuleCallingContext const& iModuleCallingContext,
401  Principal const& iPrincipal) const {
402  if (DropAll != dropMetaData_) {
403  auto const* ep = dynamic_cast<EventPrincipal const*>(&iPrincipal);
404  if (ep) {
405  auto pr = ep->productProvenanceRetrieverPtr();
406  if (pr) {
407  pr->readProvenanceAsync(iTask, &iModuleCallingContext);
408  }
409  }
410  }
411  }
412 
414  for (auto const& branchParent : branchParents_) {
415  BranchID const& child = branchParent.first;
416  std::set<ParentageID> const& eIds = branchParent.second;
417  for (auto const& eId : eIds) {
418  Parentage entryDesc;
419  ParentageRegistry::instance()->getMapped(eId, entryDesc);
420  std::vector<BranchID> const& parents = entryDesc.parents();
421  for (auto const& parent : parents) {
423  }
424  }
425  }
426  }
427 
429  std::string defaultString;
430 
431  desc.setComment("Writes runs, lumis, and events into EDM/ROOT files.");
432  desc.addUntracked<std::string>("fileName")->setComment("Name of output file.");
433  desc.addUntracked<std::string>("logicalFileName", defaultString)
434  ->setComment("Passed to job report. Otherwise unused by module.");
435  desc.addUntracked<std::string>("catalog", defaultString)
436  ->setComment("Passed to job report. Otherwise unused by module.");
437  desc.addUntracked<int>("maxSize", 0x7f000000)
438  ->setComment(
439  "Maximum output file size, in kB.\n"
440  "If over maximum, new output file will be started at next input file transition.");
441  desc.addUntracked<int>("compressionLevel", 9)->setComment("ROOT compression level of output file.");
442  desc.addUntracked<std::string>("compressionAlgorithm", "ZLIB")
443  ->setComment("Algorithm used to compress data in the ROOT output file, allowed values are ZLIB and LZMA");
444  desc.addUntracked<int>("basketSize", 16384)->setComment("Default ROOT basket size in output file.");
445  desc.addUntracked<int>("eventAutoFlushCompressedSize", 20 * 1024 * 1024)
446  ->setComment(
447  "Set ROOT auto flush stored data size (in bytes) for event TTree. The value sets how large the compressed "
448  "buffer is allowed to get. The uncompressed buffer can be quite a bit larger than this depending on the "
449  "average compression ratio. The value of -1 just uses ROOT's default value. The value of 0 turns off this "
450  "feature.");
451  desc.addUntracked<int>("splitLevel", 99)->setComment("Default ROOT branch split level in output file.");
452  desc.addUntracked<std::string>("sortBaskets", std::string("sortbasketsbyoffset"))
453  ->setComment(
454  "Legal values: 'sortbasketsbyoffset', 'sortbasketsbybranch', 'sortbasketsbyentry'.\n"
455  "Used by ROOT when fast copying. Affects performance.");
456  desc.addUntracked<int>("treeMaxVirtualSize", -1)
457  ->setComment("Size of ROOT TTree TBasket cache. Affects performance.");
458  desc.addUntracked<bool>("fastCloning", true)
459  ->setComment(
460  "True: Allow fast copying, if possible.\n"
461  "False: Disable fast copying.");
462  desc.addUntracked<bool>("overrideInputFileSplitLevels", false)
463  ->setComment(
464  "False: Use branch split levels and basket sizes from input file, if possible.\n"
465  "True: Always use specified or default split levels and basket sizes.");
466  desc.addUntracked<bool>("writeStatusFile", false)
467  ->setComment("Write a status file. Intended for use by workflow management.");
468  desc.addUntracked<std::string>("dropMetaData", defaultString)
469  ->setComment(
470  "Determines handling of per product per event metadata. Options are:\n"
471  "'NONE': Keep all of it.\n"
472  "'DROPPED': Keep it for products produced in current process and all kept products. Drop it for dropped "
473  "products produced in prior processes.\n"
474  "'PRIOR': Keep it for products produced in current process. Drop it for products produced in prior "
475  "processes.\n"
476  "'ALL': Drop all of it.");
477  {
478  ParameterSetDescription dataSet;
479  dataSet.setAllowAnything();
480  desc.addUntracked<ParameterSetDescription>("dataset", dataSet)
481  ->setComment("PSet is only used by Data Operations and not by this module.");
482  }
483  {
484  ParameterSetDescription specialSplit;
485  specialSplit.addUntracked<std::string>("branch")->setComment(
486  "Name of branch needing a special split level. The name can contain wildcards '*' and '?'");
487  specialSplit.addUntracked<int>("splitLevel")->setComment("The special split level for the branch");
488  desc.addVPSetUntracked("overrideBranchesSplitLevel", specialSplit, std::vector<ParameterSet>());
489  }
491  }
492 
496  descriptions.add("edmOutput", desc);
497  }
498 } // namespace edm
void openFile(FileBlock const &fb) override
virtual std::pair< std::string, std::string > physicalAndLogicalNameForNewFile()
T getUntrackedParameter(std::string const &, T const &) const
bool shouldWeCloseFile() const override
allow inheriting classes to override but still be able to call this method in the overridden version ...
std::string const & branchName() const
SubProcessParentageHelper const * subProcessParentageHelper() const
std::string const & BranchTypeToAuxiliaryBranchName(BranchType const &branchType)
Definition: BranchType.cc:116
BranchDescription const * branchDescription_
int const & basketSize() const
EventID const & id() const
TPRegexp parents
Definition: eve_filter.cc:21
BranchType const & branchType() const
Definition: helper.py:1
void write(EventForOutput const &e) override
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
edm::propagate_const< std::unique_ptr< RootOutputFile > > rootOutputFile_
static int const invalidSplitLevel
void setAllowAnything()
allow any parameter label/value pairs
std::vector< SpecialSplitLevelForBranch > specialSplitLevelForBranches_
static int const invalidBasketSize
void setProcessesWithSelectedMergeableRunProducts(std::set< std::string > const &) override
void updateBranchParents(EventForOutput const &e)
DropMetaData const & dropMetaData() const
BranchChildren branchChildren_
#define nullptr
std::vector< OutputItem > OutputItemList
std::string const & fileName() const
void insertChild(BranchID parent, BranchID child)
BranchChildren const & branchChildren() const
Definition: FileBlock.h:116
std::string const moduleLabel_
ParameterSet getUntrackedParameterSet(std::string const &name, ParameterSet const &defaultValue) const
bool int lh
Definition: SIMDVec.h:21
uint16_t size_type
virtual void doExtrasAfterCloseFile()
const std::string names[nVars_]
std::string const & logicalFileName() const
BranchType
Definition: BranchType.h:11
std::vector< std::pair< BranchDescription const *, EDGetToken > > SelectedProducts
void fillSelectedItemList(BranchType branchtype, TTree *theInputTree)
std::vector< BranchID > const & parents() const
Definition: Parentage.h:44
std::regex convert(std::string const &iGlobBranchExpression) const
PoolOutputModule(ParameterSet const &ps)
void setComment(std::string const &value)
void updateBranchParentsForOneBranch(ProductProvenanceRetriever const *provRetriever, BranchID const &branchID)
bool operator()(OutputItem const &lh, OutputItem const &rh) const
std::string const & currentFileName() const
bool getMapped(key_type const &k, value_type &result) const
OutputItemListArray selectedOutputItemList_
map_t const & childLookup() const
std::vector< BranchID > producedBranches_
void preActionBeforeRunEventAsync(WaitingTask *iTask, ModuleCallingContext const &iModuleCallingContext, Principal const &iPrincipal) const override
static void fillDescription(ParameterSetDescription &desc, std::vector< std::string > const &iDefaultOutputCommands=ProductSelectorRules::defaultSelectionStrings())
ProductProvenanceRetriever const * productProvenanceRetrieverPtr() const
void writeStoredMergeableRunProductMetadata()
SelectedProductsForBranchType const & keptProducts() const
int const & splitLevel() const
BranchID const & branchID() const
EventID const & min(EventID const &lh, EventID const &rh)
Definition: EventID.h:137
BranchParents branchParents_
unsigned int numberOfDigitsInIndex_
void writeRun(RunForOutput const &r) override
void sort_all(RandomAccessSequence &s)
wrappers for std::sort
Definition: Algorithms.h:92
bool isFileOpen() const override
TTree * lumiTree() const
Definition: FileBlock.h:104
void respondToCloseInputFile(FileBlock const &fb) override
void beginJob() override
void reallyCloseFile() override
double b
Definition: hdecay.h:120
void add(std::string const &label, ParameterSetDescription const &psetDescription)
ProductProvenance const * branchIDToProvenanceForProducedOnly(BranchID const &bid) const
ParentageID const & parentageID() const
std::vector< std::vector< double > > tmp
Definition: MVATrainer.cc:100
HLT enums.
std::shared_ptr< std::map< std::string, int > > treeMap_
VParameterSet getUntrackedParameterSetVector(std::string const &name, VParameterSet const &defaultValue) const
void beginInputFile(FileBlock const &fb)
Definition: tree.py:1
void respondToOpenInputFile(FileBlock const &fb) override
Definition: memstream.h:15
static void fillDescriptions(ConfigurationDescriptions &descriptions)
bool match(std::string const &iBranchName) const
static void fillDescription(ParameterSetDescription &desc)
static ParentageRegistry * instance()
TTree * runTree() const
Definition: FileBlock.h:106
TTree * tree() const
Definition: FileBlock.h:102
ParameterDescriptionBase * addVPSetUntracked(U const &iLabel, ParameterSetDescription const &validator, std::vector< ParameterSet > const &defaults)
std::vector< std::string > processesWithSelectedMergeableRunProducts_
def branchType(schema, name)
Definition: revisionDML.py:114
void writeLuminosityBlock(LuminosityBlockForOutput const &lb) override