CMS 3D CMS Logo

PoolOutputModule.cc
Go to the documentation of this file.
2 
4 
24 
25 #include "TTree.h"
26 #include "TBranchElement.h"
27 #include "TObjArray.h"
28 #include "RVersion.h"
29 
30 #include <fstream>
31 #include <iomanip>
32 #include <sstream>
33 
34 namespace edm {
38  rootServiceChecker_(),
39  auxItems_(),
40  selectedOutputItemList_(),
41  fileName_(pset.getUntrackedParameter<std::string>("fileName")),
42  logicalFileName_(pset.getUntrackedParameter<std::string>("logicalFileName")),
43  catalog_(pset.getUntrackedParameter<std::string>("catalog")),
44  maxFileSize_(pset.getUntrackedParameter<int>("maxSize")),
45  compressionLevel_(pset.getUntrackedParameter<int>("compressionLevel")),
46  compressionAlgorithm_(pset.getUntrackedParameter<std::string>("compressionAlgorithm")),
47  basketSize_(pset.getUntrackedParameter<int>("basketSize")),
48  eventAutoFlushSize_(pset.getUntrackedParameter<int>("eventAutoFlushCompressedSize")),
49  splitLevel_(std::min<int>(pset.getUntrackedParameter<int>("splitLevel") + 1, 99)),
50  basketOrder_(pset.getUntrackedParameter<std::string>("sortBaskets")),
51  treeMaxVirtualSize_(pset.getUntrackedParameter<int>("treeMaxVirtualSize")),
52  whyNotFastClonable_(pset.getUntrackedParameter<bool>("fastCloning") ? FileBlock::CanFastClone : FileBlock::DisabledInConfigFile),
53  dropMetaData_(DropNone),
54  moduleLabel_(pset.getParameter<std::string>("@module_label")),
55  initializedFromInput_(false),
56  outputFileCount_(0),
57  inputFileCount_(0),
58  childIndex_(0U),
59  numberOfDigitsInIndex_(0U),
60  branchParents_(),
61  branchChildren_(),
62  overrideInputFileSplitLevels_(pset.getUntrackedParameter<bool>("overrideInputFileSplitLevels")),
63  rootOutputFile_(),
64  statusFileName_() {
65 
66  if (pset.getUntrackedParameter<bool>("writeStatusFile")) {
67  std::ostringstream statusfilename;
68  statusfilename << moduleLabel_ << '_' << getpid();
69  statusFileName_ = statusfilename.str();
70  }
71 
73  if(dropMetaData.empty()) dropMetaData_ = DropNone;
74  else if(dropMetaData == std::string("NONE")) dropMetaData_ = DropNone;
75  else if(dropMetaData == std::string("DROPPED")) dropMetaData_ = DropDroppedPrior;
76  else if(dropMetaData == std::string("PRIOR")) dropMetaData_ = DropPrior;
77  else if(dropMetaData == std::string("ALL")) dropMetaData_ = DropAll;
78  else {
79  throw edm::Exception(errors::Configuration, "Illegal dropMetaData parameter value: ")
80  << dropMetaData << ".\n"
81  << "Legal values are 'NONE', 'DROPPED', 'PRIOR', and 'ALL'.\n";
82  }
83 
84  if (!wantAllEvents()) {
86  }
87 
88  // We don't use this next parameter, but we read it anyway because it is part
89  // of the configuration of this module. An external parser creates the
90  // configuration by reading this source code.
91  pset.getUntrackedParameterSet("dataset");
92  }
93 
96  for(auto const& prod : reg->productList()) {
97  BranchDescription const& desc = prod.second;
98  if (desc.produced() && desc.branchType() == InEvent && !desc.isAlias()) {
99  producedBranches_.emplace_back(desc.branchID());
100  }
101  }
102  }
103 
105  return rootOutputFile_->fileName();
106  }
107 
109  basketSize_(BranchDescription::invalidBasketSize) {}
110 
112  branchDescription_(0),
113  token_(),
114  product_(0),
115  splitLevel_(BranchDescription::invalidSplitLevel),
116  basketSize_(BranchDescription::invalidBasketSize) {}
117 
119  branchDescription_(bd),
120  token_(token),
121  product_(0),
122  splitLevel_(splitLevel),
123  basketSize_(basketSize) {}
124 
125 
127  // Fill a map mapping branch names to an index specifying the order in the tree.
128  if(tree != nullptr) {
129  TObjArray* branches = tree->GetListOfBranches();
130  for(int i = 0; i < branches->GetEntries(); ++i) {
131  TBranchElement* br = (TBranchElement*)branches->At(i);
132  treeMap_->insert(std::make_pair(std::string(br->GetName()), i));
133  }
134  }
135  }
136 
137  bool
139  // Provides a comparison for sorting branches according to the index values in treeMap_.
140  // Branches not found are always put at the end (i.e. not found > found).
141  if(treeMap_->empty()) return lh < rh;
142  std::string const& lstring = lh.branchDescription_->branchName();
143  std::string const& rstring = rh.branchDescription_->branchName();
144  std::map<std::string, int>::const_iterator lit = treeMap_->find(lstring);
145  std::map<std::string, int>::const_iterator rit = treeMap_->find(rstring);
146  bool lfound = (lit != treeMap_->end());
147  bool rfound = (rit != treeMap_->end());
148  if(lfound && rfound) {
149  return lit->second < rit->second;
150  } else if(lfound) {
151  return true;
152  } else if(rfound) {
153  return false;
154  }
155  return lh < rh;
156  }
157 
159 
160  SelectedProducts const& keptVector = keptProducts()[branchType];
162  AuxItem& auxItem = auxItems_[branchType];
163 
164  // Fill AuxItem
165  if (theInputTree != nullptr && !overrideInputFileSplitLevels_) {
166  TBranch* auxBranch = theInputTree->GetBranch(BranchTypeToAuxiliaryBranchName(branchType).c_str());
167  if (auxBranch) {
168  auxItem.basketSize_ = auxBranch->GetBasketSize();
169  } else {
170  auxItem.basketSize_ = basketSize_;
171  }
172  } else {
173  auxItem.basketSize_ = basketSize_;
174  }
175 
176  // Fill outputItemList with an entry for each branch.
177  for(auto const& kept : keptVector) {
180 
181  BranchDescription const& prod = *kept.first;
182  TBranch* theBranch = ((!prod.produced() && theInputTree != nullptr && !overrideInputFileSplitLevels_) ? theInputTree->GetBranch(prod.branchName().c_str()) : 0);
183 
184  if(theBranch != nullptr) {
185  splitLevel = theBranch->GetSplitLevel();
186  basketSize = theBranch->GetBasketSize();
187  } else {
188  splitLevel = (prod.splitLevel() == BranchDescription::invalidSplitLevel ? splitLevel_ : prod.splitLevel());
189  basketSize = (prod.basketSize() == BranchDescription::invalidBasketSize ? basketSize_ : prod.basketSize());
190  }
191  outputItemList.emplace_back(&prod, kept.second, splitLevel, basketSize);
192  }
193 
194  // Sort outputItemList to allow fast copying.
195  // The branches in outputItemList must be in the same order as in the input tree, with all new branches at the end.
196  sort_all(outputItemList, OutputItem::Sorter(theInputTree));
197  }
198 
200  if(isFileOpen()) {
201  //Faster to read ChildrenBranches directly from input
202  // file than to build it every event
203  auto const& branchToChildMap = fb.branchChildren().childLookup();
204  for (auto const& parentToChildren : branchToChildMap) {
205  for (auto const& child : parentToChildren.second) {
206  branchChildren_.insertChild(parentToChildren.first, child);
207  }
208  }
209  rootOutputFile_->beginInputFile(fb, remainingEvents());
210  }
211  }
212 
214  if(!isFileOpen()) {
215  reallyOpenFile();
216  beginInputFile(fb);
217  }
218  }
219 
221  if(!initializedFromInput_) {
222  for(int i = InEvent; i < NumBranchTypes; ++i) {
223  BranchType branchType = static_cast<BranchType>(i);
224  TTree* theInputTree = (branchType == InEvent ? fb.tree() :
225  (branchType == InLumi ? fb.lumiTree() :
226  fb.runTree()));
227  fillSelectedItemList(branchType, theInputTree);
228  }
229  initializedFromInput_ = true;
230  }
231  ++inputFileCount_;
232  beginInputFile(fb);
233  }
234 
236  if(rootOutputFile_) rootOutputFile_->respondToCloseInputFile(fb);
237  }
238 
239  void PoolOutputModule::postForkReacquireResources(unsigned int iChildIndex, unsigned int iNumberOfChildren) {
240  childIndex_ = iChildIndex;
241  while (iNumberOfChildren != 0) {
243  iNumberOfChildren /= 10;
244  }
245  if (numberOfDigitsInIndex_ == 0) {
246  numberOfDigitsInIndex_ = 3; // Protect against zero iNumberOfChildren
247  }
248  }
249 
251  }
252 
255  rootOutputFile_->writeOne(e);
256  if (!statusFileName_.empty()) {
257  std::ofstream statusFile(statusFileName_.c_str());
258  statusFile << e.id() << " time: " << std::setprecision(3) << TimeOfDay() << '\n';
259  statusFile.close();
260  }
261  }
262 
264  rootOutputFile_->writeLuminosityBlock(lb);
265  }
266 
268  rootOutputFile_->writeRun(r);
269  }
270 
273  branchParents_.clear();
274  startEndFile();
284  writeProductDependencies(); //branchChildren used here
286  finishEndFile();
287 
289  }
290 
291 
292  // At some later date, we may move functionality from finishEndFile() to here.
294 
295  void PoolOutputModule::writeFileFormatVersion() { rootOutputFile_->writeFileFormatVersion(); }
296  void PoolOutputModule::writeFileIdentifier() { rootOutputFile_->writeFileIdentifier(); }
297  void PoolOutputModule::writeIndexIntoFile() { rootOutputFile_->writeIndexIntoFile(); }
298  void PoolOutputModule::writeProcessHistoryRegistry() { rootOutputFile_->writeProcessHistoryRegistry(); }
299  void PoolOutputModule::writeParameterSetRegistry() { rootOutputFile_->writeParameterSetRegistry(); }
300  void PoolOutputModule::writeProductDescriptionRegistry() { rootOutputFile_->writeProductDescriptionRegistry(); }
301  void PoolOutputModule::writeParentageRegistry() { rootOutputFile_->writeParentageRegistry(); }
302  void PoolOutputModule::writeBranchIDListRegistry() { rootOutputFile_->writeBranchIDListRegistry(); }
303  void PoolOutputModule::writeThinnedAssociationsHelper() { rootOutputFile_->writeThinnedAssociationsHelper(); }
304  void PoolOutputModule::writeProductDependencies() { rootOutputFile_->writeProductDependencies(); }
305  void PoolOutputModule::finishEndFile() { rootOutputFile_->finishEndFile(); rootOutputFile_ = nullptr; } // propagate_const<T> has no reset() function
307  bool PoolOutputModule::isFileOpen() const { return rootOutputFile_.get() != nullptr; }
308  bool PoolOutputModule::shouldWeCloseFile() const { return rootOutputFile_->shouldWeCloseFile(); }
309 
310  std::pair<std::string, std::string>
312  if(inputFileCount_ == 0) {
314  << "Attempt to open output file before input file. "
315  << "Please report this to the core framework developers.\n";
316  }
317  std::string suffix(".root");
318  std::string::size_type offset = fileName().rfind(suffix);
319  bool ext = (offset == fileName().size() - suffix.size());
320  if(!ext) suffix.clear();
321  std::string fileBase(ext ? fileName().substr(0, offset) : fileName());
322  std::ostringstream ofilename;
323  std::ostringstream lfilename;
324  ofilename << fileBase;
325  lfilename << logicalFileName();
327  ofilename << '_' << std::setw(numberOfDigitsInIndex_) << std::setfill('0') << childIndex_;
328  if(!logicalFileName().empty()) {
329  lfilename << '_' << std::setw(numberOfDigitsInIndex_) << std::setfill('0') << childIndex_;
330  }
331  }
332  if(outputFileCount_) {
333  ofilename << std::setw(3) << std::setfill('0') << outputFileCount_;
334  if(!logicalFileName().empty()) {
335  lfilename << std::setw(3) << std::setfill('0') << outputFileCount_;
336  }
337  }
338  ofilename << suffix;
340 
341  return std::make_pair(ofilename.str(), lfilename.str());
342  }
343 
346  rootOutputFile_ = std::make_unique<RootOutputFile>(this, names.first, names.second); // propagate_const<T> has no reset() function
347  }
348 
349  void
351  ProductProvenanceRetriever const* provRetriever,
352  BranchID const& branchID) {
353 
354  ProductProvenance const* provenance = provRetriever->branchIDToProvenanceForProducedOnly(branchID);
355  if (provenance != nullptr) {
356  BranchParents::iterator it = branchParents_.find(branchID);
357  if (it == branchParents_.end()) {
358  it = branchParents_.insert(std::make_pair(branchID,
359  std::set<ParentageID>())).first;
360  }
361  it->second.insert(provenance->parentageID());
362  }
363  }
364 
365  void
367 
369  for (auto const& bid : producedBranches_) {
370  updateBranchParentsForOneBranch(provRetriever, bid);
371  }
373  if (helper) {
374  for (auto const& bid : subProcessParentageHelper()->producedProducts()) {
375  updateBranchParentsForOneBranch(provRetriever, bid);
376  }
377  }
378  }
379 
380  void
381  PoolOutputModule::preActionBeforeRunEventAsync(WaitingTask* iTask, ModuleCallingContext const& iModuleCallingContext, Principal const& iPrincipal) const {
382  if(DropAll != dropMetaData_ ) {
383  auto const* ep = dynamic_cast<EventPrincipal const*>(&iPrincipal);
384  if(ep)
385  {
386  auto pr = ep->productProvenanceRetrieverPtr();
387  if(pr) {
388  pr->readProvenanceAsync(iTask,&iModuleCallingContext);
389  }
390  }
391  }
392  }
393 
394  void
396  for(auto const& branchParent : branchParents_) {
397  BranchID const& child = branchParent.first;
398  std::set<ParentageID> const& eIds = branchParent.second;
399  for(auto const& eId : eIds) {
400  Parentage entryDesc;
401  ParentageRegistry::instance()->getMapped(eId, entryDesc);
402  std::vector<BranchID> const& parents = entryDesc.parents();
403  for(auto const& parent : parents) {
405  }
406  }
407  }
408  }
409 
410  void
412  std::string defaultString;
413 
414  desc.setComment("Writes runs, lumis, and events into EDM/ROOT files.");
415  desc.addUntracked<std::string>("fileName")
416  ->setComment("Name of output file.");
417  desc.addUntracked<std::string>("logicalFileName", defaultString)
418  ->setComment("Passed to job report. Otherwise unused by module.");
419  desc.addUntracked<std::string>("catalog", defaultString)
420  ->setComment("Passed to job report. Otherwise unused by module.");
421  desc.addUntracked<int>("maxSize", 0x7f000000)
422  ->setComment("Maximum output file size, in kB.\n"
423  "If over maximum, new output file will be started at next input file transition.");
424  desc.addUntracked<int>("compressionLevel", 7)
425  ->setComment("ROOT compression level of output file.");
426  desc.addUntracked<std::string>("compressionAlgorithm", "ZLIB")
427  ->setComment("Algorithm used to compress data in the ROOT output file, allowed values are ZLIB and LZMA");
428  desc.addUntracked<int>("basketSize", 16384)
429  ->setComment("Default ROOT basket size in output file.");
430  desc.addUntracked<int>("eventAutoFlushCompressedSize",-1)->setComment("Set ROOT auto flush stored data size (in bytes) for event TTree. The value sets how large the compressed buffer is allowed to get. The uncompressed buffer can be quite a bit larger than this depending on the average compression ratio. The value of -1 just uses ROOT's default value. The value of 0 turns off this feature.");
431  desc.addUntracked<int>("splitLevel", 99)
432  ->setComment("Default ROOT branch split level in output file.");
433  desc.addUntracked<std::string>("sortBaskets", std::string("sortbasketsbyoffset"))
434  ->setComment("Legal values: 'sortbasketsbyoffset', 'sortbasketsbybranch', 'sortbasketsbyentry'.\n"
435  "Used by ROOT when fast copying. Affects performance.");
436  desc.addUntracked<int>("treeMaxVirtualSize", -1)
437  ->setComment("Size of ROOT TTree TBasket cache. Affects performance.");
438  desc.addUntracked<bool>("fastCloning", true)
439  ->setComment("True: Allow fast copying, if possible.\n"
440  "False: Disable fast copying.");
441  desc.addUntracked<bool>("overrideInputFileSplitLevels", false)
442  ->setComment("False: Use branch split levels and basket sizes from input file, if possible.\n"
443  "True: Always use specified or default split levels and basket sizes.");
444  desc.addUntracked<bool>("writeStatusFile", false)
445  ->setComment("Write a status file. Intended for use by workflow management.");
446  desc.addUntracked<std::string>("dropMetaData", defaultString)
447  ->setComment("Determines handling of per product per event metadata. Options are:\n"
448  "'NONE': Keep all of it.\n"
449  "'DROPPED': Keep it for products produced in current process and all kept products. Drop it for dropped products produced in prior processes.\n"
450  "'PRIOR': Keep it for products produced in current process. Drop it for products produced in prior processes.\n"
451  "'ALL': Drop all of it.");
452  ParameterSetDescription dataSet;
453  dataSet.setAllowAnything();
454  desc.addUntracked<ParameterSetDescription>("dataset", dataSet)
455  ->setComment("PSet is only used by Data Operations and not by this module.");
456 
458  }
459 
460  void
464  descriptions.add("edmOutput", desc);
465  }
466 }
virtual void openFile(FileBlock const &fb) override
virtual std::pair< std::string, std::string > physicalAndLogicalNameForNewFile()
T getUntrackedParameter(std::string const &, T const &) const
virtual bool shouldWeCloseFile() const override
allow inheriting classes to override but still be able to call this method in the overridden version ...
std::string const & branchName() const
SubProcessParentageHelper const * subProcessParentageHelper() const
std::string const & BranchTypeToAuxiliaryBranchName(BranchType const &branchType)
Definition: BranchType.cc:115
BranchDescription const * branchDescription_
int const & basketSize() const
EventID const & id() const
TPRegexp parents
Definition: eve_filter.cc:21
BranchType const & branchType() const
Definition: helper.py:1
virtual void write(EventForOutput const &e) override
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
static const HistoName names[]
edm::propagate_const< std::unique_ptr< RootOutputFile > > rootOutputFile_
static int const invalidSplitLevel
void setAllowAnything()
allow any parameter label/value pairs
static int const invalidBasketSize
void updateBranchParents(EventForOutput const &e)
DropMetaData const & dropMetaData() const
BranchChildren branchChildren_
std::vector< OutputItem > OutputItemList
std::string const & fileName() const
void insertChild(BranchID parent, BranchID child)
BranchChildren const & branchChildren() const
Definition: FileBlock.h:113
std::string const moduleLabel_
ParameterSet getUntrackedParameterSet(std::string const &name, ParameterSet const &defaultValue) const
bool int lh
Definition: SIMDVec.h:21
uint16_t size_type
virtual void doExtrasAfterCloseFile()
std::string const & logicalFileName() const
BranchType
Definition: BranchType.h:11
std::vector< std::pair< BranchDescription const *, EDGetToken > > SelectedProducts
void fillSelectedItemList(BranchType branchtype, TTree *theInputTree)
std::vector< BranchID > const & parents() const
Definition: Parentage.h:44
PoolOutputModule(ParameterSet const &ps)
void setComment(std::string const &value)
void updateBranchParentsForOneBranch(ProductProvenanceRetriever const *provRetriever, BranchID const &branchID)
bool operator()(OutputItem const &lh, OutputItem const &rh) const
std::string const & currentFileName() const
bool getMapped(key_type const &k, value_type &result) const
OutputItemListArray selectedOutputItemList_
map_t const & childLookup() const
std::vector< BranchID > producedBranches_
virtual void preActionBeforeRunEventAsync(WaitingTask *iTask, ModuleCallingContext const &iModuleCallingContext, Principal const &iPrincipal) const override
void readProvenanceAsync(WaitingTask *task, ModuleCallingContext const *moduleCallingContext) const
static void fillDescription(ParameterSetDescription &desc, std::vector< std::string > const &iDefaultOutputCommands=ProductSelectorRules::defaultSelectionStrings())
ProductProvenanceRetriever const * productProvenanceRetrieverPtr() const
SelectedProductsForBranchType const & keptProducts() const
int const & splitLevel() const
BranchID const & branchID() const
EventID const & min(EventID const &lh, EventID const &rh)
Definition: EventID.h:137
virtual void reallyOpenFile() override
BranchParents branchParents_
ProductProvenanceRetriever const * productProvenanceRetrieverPtr() const
unsigned int numberOfDigitsInIndex_
virtual void writeRun(RunForOutput const &r) override
void sort_all(RandomAccessSequence &s)
wrappers for std::sort
Definition: Algorithms.h:120
virtual bool isFileOpen() const override
TTree * lumiTree() const
Definition: FileBlock.h:99
virtual void respondToCloseInputFile(FileBlock const &fb) override
virtual void beginJob() override
virtual void reallyCloseFile() override
void add(std::string const &label, ParameterSetDescription const &psetDescription)
ProductProvenance const * branchIDToProvenanceForProducedOnly(BranchID const &bid) const
ParentageID const & parentageID() const
HLT enums.
virtual void postForkReacquireResources(unsigned int iChildIndex, unsigned int iNumberOfChildren) override
std::shared_ptr< std::map< std::string, int > > treeMap_
void beginInputFile(FileBlock const &fb)
Definition: tree.py:1
virtual void respondToOpenInputFile(FileBlock const &fb) override
Definition: memstream.h:15
static void fillDescriptions(ConfigurationDescriptions &descriptions)
static void fillDescription(ParameterSetDescription &desc)
static ParentageRegistry * instance()
TTree * runTree() const
Definition: FileBlock.h:101
TTree * tree() const
Definition: FileBlock.h:97
def branchType(schema, name)
Definition: revisionDML.py:112
virtual void writeLuminosityBlock(LuminosityBlockForOutput const &lb) override