test
CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
PoolOutputModule.cc
Go to the documentation of this file.
2 
4 
19 
20 #include "TTree.h"
21 #include "TBranchElement.h"
22 #include "TObjArray.h"
23 #include "RVersion.h"
24 
25 #include <fstream>
26 #include <iomanip>
27 #include <sstream>
28 
29 namespace edm {
31  edm::one::OutputModuleBase::OutputModuleBase(pset),
32  one::OutputModule<WatchInputFiles>(pset),
33  rootServiceChecker_(),
34  auxItems_(),
35  selectedOutputItemList_(),
36  fileName_(pset.getUntrackedParameter<std::string>("fileName")),
37  logicalFileName_(pset.getUntrackedParameter<std::string>("logicalFileName")),
38  catalog_(pset.getUntrackedParameter<std::string>("catalog")),
39  maxFileSize_(pset.getUntrackedParameter<int>("maxSize")),
40  compressionLevel_(pset.getUntrackedParameter<int>("compressionLevel")),
41  compressionAlgorithm_(pset.getUntrackedParameter<std::string>("compressionAlgorithm")),
42  basketSize_(pset.getUntrackedParameter<int>("basketSize")),
43  eventAutoFlushSize_(pset.getUntrackedParameter<int>("eventAutoFlushCompressedSize")),
44  splitLevel_(std::min<int>(pset.getUntrackedParameter<int>("splitLevel") + 1, 99)),
45  basketOrder_(pset.getUntrackedParameter<std::string>("sortBaskets")),
46  treeMaxVirtualSize_(pset.getUntrackedParameter<int>("treeMaxVirtualSize")),
47  whyNotFastClonable_(pset.getUntrackedParameter<bool>("fastCloning") ? FileBlock::CanFastClone : FileBlock::DisabledInConfigFile),
48  dropMetaData_(DropNone),
49  moduleLabel_(pset.getParameter<std::string>("@module_label")),
50  initializedFromInput_(false),
51  outputFileCount_(0),
52  inputFileCount_(0),
53  childIndex_(0U),
54  numberOfDigitsInIndex_(0U),
55  branchParents_(),
56  branchChildren_(),
57  overrideInputFileSplitLevels_(pset.getUntrackedParameter<bool>("overrideInputFileSplitLevels")),
58  rootOutputFile_(),
59  statusFileName_() {
60 
61  if (pset.getUntrackedParameter<bool>("writeStatusFile")) {
62  std::ostringstream statusfilename;
63  statusfilename << moduleLabel_ << '_' << getpid();
64  statusFileName_ = statusfilename.str();
65  }
66 
68  if(dropMetaData.empty()) dropMetaData_ = DropNone;
69  else if(dropMetaData == std::string("NONE")) dropMetaData_ = DropNone;
70  else if(dropMetaData == std::string("DROPPED")) dropMetaData_ = DropDroppedPrior;
71  else if(dropMetaData == std::string("PRIOR")) dropMetaData_ = DropPrior;
72  else if(dropMetaData == std::string("ALL")) dropMetaData_ = DropAll;
73  else {
74  throw edm::Exception(errors::Configuration, "Illegal dropMetaData parameter value: ")
75  << dropMetaData << ".\n"
76  << "Legal values are 'NONE', 'DROPPED', 'PRIOR', and 'ALL'.\n";
77  }
78 
79  if (!wantAllEvents()) {
81  }
82 
83  // We don't use this next parameter, but we read it anyway because it is part
84  // of the configuration of this module. An external parser creates the
85  // configuration by reading this source code.
86  pset.getUntrackedParameterSet("dataset");
87  }
88 
90  }
91 
93  return rootOutputFile_->fileName();
94  }
95 
97  basketSize_(BranchDescription::invalidBasketSize) {}
98 
100  branchDescription_(0),
101  token_(),
102  product_(0),
103  splitLevel_(BranchDescription::invalidSplitLevel),
104  basketSize_(BranchDescription::invalidBasketSize) {}
105 
107  branchDescription_(bd),
108  token_(token),
109  product_(0),
110  splitLevel_(splitLevel),
111  basketSize_(basketSize) {}
112 
113 
114  PoolOutputModule::OutputItem::Sorter::Sorter(TTree* tree) : treeMap_(new std::map<std::string, int>) {
115  // Fill a map mapping branch names to an index specifying the order in the tree.
116  if(tree != nullptr) {
117  TObjArray* branches = tree->GetListOfBranches();
118  for(int i = 0; i < branches->GetEntries(); ++i) {
119  TBranchElement* br = (TBranchElement*)branches->At(i);
120  treeMap_->insert(std::make_pair(std::string(br->GetName()), i));
121  }
122  }
123  }
124 
125  bool
127  // Provides a comparison for sorting branches according to the index values in treeMap_.
128  // Branches not found are always put at the end (i.e. not found > found).
129  if(treeMap_->empty()) return lh < rh;
130  std::string const& lstring = lh.branchDescription_->branchName();
131  std::string const& rstring = rh.branchDescription_->branchName();
132  std::map<std::string, int>::const_iterator lit = treeMap_->find(lstring);
133  std::map<std::string, int>::const_iterator rit = treeMap_->find(rstring);
134  bool lfound = (lit != treeMap_->end());
135  bool rfound = (rit != treeMap_->end());
136  if(lfound && rfound) {
137  return lit->second < rit->second;
138  } else if(lfound) {
139  return true;
140  } else if(rfound) {
141  return false;
142  }
143  return lh < rh;
144  }
145 
147 
148  SelectedProducts const& keptVector = keptProducts()[branchType];
150  AuxItem& auxItem = auxItems_[branchType];
151 
152  // Fill AuxItem
153  if (theInputTree != nullptr && !overrideInputFileSplitLevels_) {
154  TBranch* auxBranch = theInputTree->GetBranch(BranchTypeToAuxiliaryBranchName(branchType).c_str());
155  if (auxBranch) {
156  auxItem.basketSize_ = auxBranch->GetBasketSize();
157  } else {
158  auxItem.basketSize_ = basketSize_;
159  }
160  } else {
161  auxItem.basketSize_ = basketSize_;
162  }
163 
164  // Fill outputItemList with an entry for each branch.
165  for(auto const& kept : keptVector) {
168 
169  BranchDescription const& prod = *kept.first;
170  TBranch* theBranch = ((!prod.produced() && theInputTree != nullptr && !overrideInputFileSplitLevels_) ? theInputTree->GetBranch(prod.branchName().c_str()) : 0);
171 
172  if(theBranch != nullptr) {
173  splitLevel = theBranch->GetSplitLevel();
174  basketSize = theBranch->GetBasketSize();
175  } else {
176  splitLevel = (prod.splitLevel() == BranchDescription::invalidSplitLevel ? splitLevel_ : prod.splitLevel());
177  basketSize = (prod.basketSize() == BranchDescription::invalidBasketSize ? basketSize_ : prod.basketSize());
178  }
179  outputItemList.emplace_back(&prod, kept.second, splitLevel, basketSize);
180  }
181 
182  // Sort outputItemList to allow fast copying.
183  // The branches in outputItemList must be in the same order as in the input tree, with all new branches at the end.
184  sort_all(outputItemList, OutputItem::Sorter(theInputTree));
185  }
186 
188  if(isFileOpen()) {
189  rootOutputFile_->beginInputFile(fb, remainingEvents());
190  }
191  }
192 
194  if(!isFileOpen()) {
195  reallyOpenFile();
196  beginInputFile(fb);
197  }
198  }
199 
201  if(!initializedFromInput_) {
202  for(int i = InEvent; i < NumBranchTypes; ++i) {
203  BranchType branchType = static_cast<BranchType>(i);
204  TTree* theInputTree = (branchType == InEvent ? fb.tree() :
205  (branchType == InLumi ? fb.lumiTree() :
206  fb.runTree()));
207  fillSelectedItemList(branchType, theInputTree);
208  }
209  initializedFromInput_ = true;
210  }
211  ++inputFileCount_;
212  beginInputFile(fb);
213  }
214 
216  if(rootOutputFile_) rootOutputFile_->respondToCloseInputFile(fb);
217  }
218 
219  void PoolOutputModule::postForkReacquireResources(unsigned int iChildIndex, unsigned int iNumberOfChildren) {
220  childIndex_ = iChildIndex;
221  while (iNumberOfChildren != 0) {
223  iNumberOfChildren /= 10;
224  }
225  if (numberOfDigitsInIndex_ == 0) {
226  numberOfDigitsInIndex_ = 3; // Protect against zero iNumberOfChildren
227  }
228  }
229 
231  }
232 
235  rootOutputFile_->writeOne(e);
236  if (!statusFileName_.empty()) {
237  std::ofstream statusFile(statusFileName_.c_str());
238  statusFile << e.id() << " time: " << std::setprecision(3) << TimeOfDay() << '\n';
239  statusFile.close();
240  }
241  }
242 
244  rootOutputFile_->writeLuminosityBlock(lb);
245  }
246 
248  rootOutputFile_->writeRun(r);
249  }
250 
253  branchParents_.clear();
255  startEndFile();
266  finishEndFile();
267 
269  }
270 
271 
272  // At some later date, we may move functionality from finishEndFile() to here.
274 
275  void PoolOutputModule::writeFileFormatVersion() { rootOutputFile_->writeFileFormatVersion(); }
276  void PoolOutputModule::writeFileIdentifier() { rootOutputFile_->writeFileIdentifier(); }
277  void PoolOutputModule::writeIndexIntoFile() { rootOutputFile_->writeIndexIntoFile(); }
278  void PoolOutputModule::writeProcessHistoryRegistry() { rootOutputFile_->writeProcessHistoryRegistry(); }
279  void PoolOutputModule::writeParameterSetRegistry() { rootOutputFile_->writeParameterSetRegistry(); }
280  void PoolOutputModule::writeProductDescriptionRegistry() { rootOutputFile_->writeProductDescriptionRegistry(); }
281  void PoolOutputModule::writeParentageRegistry() { rootOutputFile_->writeParentageRegistry(); }
282  void PoolOutputModule::writeBranchIDListRegistry() { rootOutputFile_->writeBranchIDListRegistry(); }
283  void PoolOutputModule::writeThinnedAssociationsHelper() { rootOutputFile_->writeThinnedAssociationsHelper(); }
284  void PoolOutputModule::writeProductDependencies() { rootOutputFile_->writeProductDependencies(); }
285  void PoolOutputModule::finishEndFile() { rootOutputFile_->finishEndFile(); rootOutputFile_ = nullptr; } // propagate_const<T> has no reset() function
287  bool PoolOutputModule::isFileOpen() const { return rootOutputFile_.get() != nullptr; }
288  bool PoolOutputModule::shouldWeCloseFile() const { return rootOutputFile_->shouldWeCloseFile(); }
289 
290  std::pair<std::string, std::string>
292  if(inputFileCount_ == 0) {
294  << "Attempt to open output file before input file. "
295  << "Please report this to the core framework developers.\n";
296  }
297  std::string suffix(".root");
298  std::string::size_type offset = fileName().rfind(suffix);
299  bool ext = (offset == fileName().size() - suffix.size());
300  if(!ext) suffix.clear();
301  std::string fileBase(ext ? fileName().substr(0, offset) : fileName());
302  std::ostringstream ofilename;
303  std::ostringstream lfilename;
304  ofilename << fileBase;
305  lfilename << logicalFileName();
307  ofilename << '_' << std::setw(numberOfDigitsInIndex_) << std::setfill('0') << childIndex_;
308  if(!logicalFileName().empty()) {
309  lfilename << '_' << std::setw(numberOfDigitsInIndex_) << std::setfill('0') << childIndex_;
310  }
311  }
312  if(outputFileCount_) {
313  ofilename << std::setw(3) << std::setfill('0') << outputFileCount_;
314  if(!logicalFileName().empty()) {
315  lfilename << std::setw(3) << std::setfill('0') << outputFileCount_;
316  }
317  }
318  ofilename << suffix;
320 
321  return std::make_pair(ofilename.str(), lfilename.str());
322  }
323 
326  rootOutputFile_ = std::make_unique<RootOutputFile>(this, names.first, names.second); // propagate_const<T> has no reset() function
327  }
328 
329  void
333  for(auto const& product : products) {
334  BranchDescription const& bd = *product.first;
335  BranchID const& bid = bd.branchID();
336  ProductProvenance const* provenance = provRetriever->branchIDToProvenance(bid);
337  if(provenance != nullptr) {
338  BranchParents::iterator it = branchParents_.find(bid);
339  if(it == branchParents_.end()) {
340  it = branchParents_.insert(std::make_pair(bid, std::set<ParentageID>())).first;
341  }
342  it->second.insert(provenance->parentageID());
344  }
345  }
346  }
347 
348  void
350  for(auto const& branchParent : branchParents_) {
351  BranchID const& child = branchParent.first;
352  std::set<ParentageID> const& eIds = branchParent.second;
353  for(auto const& eId : eIds) {
354  Parentage entryDesc;
355  ParentageRegistry::instance()->getMapped(eId, entryDesc);
356  std::vector<BranchID> const& parents = entryDesc.parents();
357  for(auto const& parent : parents) {
359  }
360  }
361  }
362  }
363 
364  void
366  std::string defaultString;
367 
368  desc.setComment("Writes runs, lumis, and events into EDM/ROOT files.");
369  desc.addUntracked<std::string>("fileName")
370  ->setComment("Name of output file.");
371  desc.addUntracked<std::string>("logicalFileName", defaultString)
372  ->setComment("Passed to job report. Otherwise unused by module.");
373  desc.addUntracked<std::string>("catalog", defaultString)
374  ->setComment("Passed to job report. Otherwise unused by module.");
375  desc.addUntracked<int>("maxSize", 0x7f000000)
376  ->setComment("Maximum output file size, in kB.\n"
377  "If over maximum, new output file will be started at next input file transition.");
378  desc.addUntracked<int>("compressionLevel", 7)
379  ->setComment("ROOT compression level of output file.");
380  desc.addUntracked<std::string>("compressionAlgorithm", "ZLIB")
381  ->setComment("Algorithm used to compress data in the ROOT output file, allowed values are ZLIB and LZMA");
382  desc.addUntracked<int>("basketSize", 16384)
383  ->setComment("Default ROOT basket size in output file.");
384  desc.addUntracked<int>("eventAutoFlushCompressedSize",-1)->setComment("Set ROOT auto flush stored data size (in bytes) for event TTree. The value sets how large the compressed buffer is allowed to get. The uncompressed buffer can be quite a bit larger than this depending on the average compression ratio. The value of -1 just uses ROOT's default value. The value of 0 turns off this feature.");
385  desc.addUntracked<int>("splitLevel", 99)
386  ->setComment("Default ROOT branch split level in output file.");
387  desc.addUntracked<std::string>("sortBaskets", std::string("sortbasketsbyoffset"))
388  ->setComment("Legal values: 'sortbasketsbyoffset', 'sortbasketsbybranch', 'sortbasketsbyentry'.\n"
389  "Used by ROOT when fast copying. Affects performance.");
390  desc.addUntracked<int>("treeMaxVirtualSize", -1)
391  ->setComment("Size of ROOT TTree TBasket cache. Affects performance.");
392  desc.addUntracked<bool>("fastCloning", true)
393  ->setComment("True: Allow fast copying, if possible.\n"
394  "False: Disable fast copying.");
395  desc.addUntracked<bool>("overrideInputFileSplitLevels", false)
396  ->setComment("False: Use branch split levels and basket sizes from input file, if possible.\n"
397  "True: Always use specified or default split levels and basket sizes.");
398  desc.addUntracked<bool>("writeStatusFile", false)
399  ->setComment("Write a status file. Intended for use by workflow management.");
400  desc.addUntracked<std::string>("dropMetaData", defaultString)
401  ->setComment("Determines handling of per product per event metadata. Options are:\n"
402  "'NONE': Keep all of it.\n"
403  "'DROPPED': Keep it for products produced in current process and all kept products. Drop it for dropped products produced in prior processes.\n"
404  "'PRIOR': Keep it for products produced in current process. Drop it for products produced in prior processes.\n"
405  "'ALL': Drop all of it.");
406  ParameterSetDescription dataSet;
407  dataSet.setAllowAnything();
408  desc.addUntracked<ParameterSetDescription>("dataset", dataSet)
409  ->setComment("PSet is only used by Data Operations and not by this module.");
410 
412  }
413 
414  void
418  descriptions.add("edmOutput", desc);
419  }
420 }
virtual void openFile(FileBlock const &fb) override
virtual std::pair< std::string, std::string > physicalAndLogicalNameForNewFile()
T getUntrackedParameter(std::string const &, T const &) const
int i
Definition: DBlmapReader.cc:9
std::string const & branchName() const
std::string const & BranchTypeToAuxiliaryBranchName(BranchType const &branchType)
Definition: BranchType.cc:115
BranchDescription const * branchDescription_
int const & basketSize() const
EventID const & id() const
TPRegexp parents
Definition: eve_filter.cc:21
virtual void write(EventForOutput const &e) override
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
static const HistoName names[]
edm::propagate_const< std::unique_ptr< RootOutputFile > > rootOutputFile_
static int const invalidSplitLevel
void setAllowAnything()
allow any parameter label/value pairs
static int const invalidBasketSize
void updateBranchParents(EventForOutput const &e)
DropMetaData const & dropMetaData() const
BranchChildren branchChildren_
std::vector< OutputItem > OutputItemList
std::string const & fileName() const
void insertEmpty(BranchID parent)
void insertChild(BranchID parent, BranchID child)
std::string const moduleLabel_
ParameterSet getUntrackedParameterSet(std::string const &name, ParameterSet const &defaultValue) const
bool int lh
Definition: SIMDVec.h:21
uint16_t size_type
virtual void doExtrasAfterCloseFile()
std::string const & logicalFileName() const
BranchType
Definition: BranchType.h:11
std::vector< std::pair< BranchDescription const *, EDGetToken > > SelectedProducts
void fillSelectedItemList(BranchType branchtype, TTree *theInputTree)
std::vector< BranchID > const & parents() const
Definition: Parentage.h:44
PoolOutputModule(ParameterSet const &ps)
void setComment(std::string const &value)
bool operator()(OutputItem const &lh, OutputItem const &rh) const
std::string const & currentFileName() const
bool getMapped(key_type const &k, value_type &result) const
OutputItemListArray selectedOutputItemList_
ESProducts< T, S > products(const T &i1, const S &i2)
Definition: ESProducts.h:189
static void fillDescription(ParameterSetDescription &desc, std::vector< std::string > const &iDefaultOutputCommands=ProductSelectorRules::defaultSelectionStrings())
ProductProvenanceRetriever const * productProvenanceRetrieverPtr() const
SelectedProductsForBranchType const & keptProducts() const
int const & splitLevel() const
BranchID const & branchID() const
EventID const & min(EventID const &lh, EventID const &rh)
Definition: EventID.h:137
virtual void reallyOpenFile() override
BranchParents branchParents_
unsigned int numberOfDigitsInIndex_
virtual void writeRun(RunForOutput const &r) override
void sort_all(RandomAccessSequence &s)
wrappers for std::sort
Definition: Algorithms.h:120
TTree * lumiTree() const
Definition: FileBlock.h:99
virtual bool shouldWeCloseFile() const override
allow inheriting classes to override but still be able to call this method in the overridden version ...
virtual void respondToCloseInputFile(FileBlock const &fb) override
virtual bool isFileOpen() const override
virtual void beginJob() override
virtual void reallyCloseFile() override
void add(std::string const &label, ParameterSetDescription const &psetDescription)
ParentageID const & parentageID() const
virtual void postForkReacquireResources(unsigned int iChildIndex, unsigned int iNumberOfChildren) override
std::shared_ptr< std::map< std::string, int > > treeMap_
ProductProvenance const * branchIDToProvenance(BranchID const &bid) const
void beginInputFile(FileBlock const &fb)
volatile std::atomic< bool > shutdown_flag false
virtual void respondToOpenInputFile(FileBlock const &fb) override
static void fillDescriptions(ConfigurationDescriptions &descriptions)
static void fillDescription(ParameterSetDescription &desc)
moduleLabel_(iConfig.getParameter< string >("@module_label"))
static ParentageRegistry * instance()
TTree * runTree() const
Definition: FileBlock.h:101
TTree * tree() const
Definition: FileBlock.h:97
virtual void writeLuminosityBlock(LuminosityBlockForOutput const &lb) override