CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
PoolOutputModule.cc
Go to the documentation of this file.
2 
4 
20 
21 #include "TTree.h"
22 #include "TBranchElement.h"
23 #include "TObjArray.h"
24 #include "RVersion.h"
25 
26 #include <fstream>
27 #include <iomanip>
28 #include <sstream>
29 
30 namespace edm {
32  edm::one::OutputModuleBase::OutputModuleBase(pset),
33  one::OutputModule<WatchInputFiles>(pset),
34  rootServiceChecker_(),
35  auxItems_(),
36  selectedOutputItemList_(),
37  fileName_(pset.getUntrackedParameter<std::string>("fileName")),
38  logicalFileName_(pset.getUntrackedParameter<std::string>("logicalFileName")),
39  catalog_(pset.getUntrackedParameter<std::string>("catalog")),
40  maxFileSize_(pset.getUntrackedParameter<int>("maxSize")),
41  compressionLevel_(pset.getUntrackedParameter<int>("compressionLevel")),
42 #if ROOT_VERSION_CODE >= ROOT_VERSION(5,30,0)
43  compressionAlgorithm_(pset.getUntrackedParameter<std::string>("compressionAlgorithm")),
44 #else
45  compressionAlgorithm_("ZLIB"),
46 #endif
47  basketSize_(pset.getUntrackedParameter<int>("basketSize")),
48  eventAutoFlushSize_(pset.getUntrackedParameter<int>("eventAutoFlushCompressedSize")),
49  splitLevel_(std::min<int>(pset.getUntrackedParameter<int>("splitLevel") + 1, 99)),
50  basketOrder_(pset.getUntrackedParameter<std::string>("sortBaskets")),
51  treeMaxVirtualSize_(pset.getUntrackedParameter<int>("treeMaxVirtualSize")),
52  whyNotFastClonable_(pset.getUntrackedParameter<bool>("fastCloning") ? FileBlock::CanFastClone : FileBlock::DisabledInConfigFile),
53  dropMetaData_(DropNone),
54  moduleLabel_(pset.getParameter<std::string>("@module_label")),
55  initializedFromInput_(false),
56  outputFileCount_(0),
57  inputFileCount_(0),
58  childIndex_(0U),
59  numberOfDigitsInIndex_(0U),
60  branchParents_(),
61  branchChildren_(),
62  overrideInputFileSplitLevels_(pset.getUntrackedParameter<bool>("overrideInputFileSplitLevels")),
63  rootOutputFile_(),
64  statusFileName_() {
65 
66  if (pset.getUntrackedParameter<bool>("writeStatusFile")) {
67  std::ostringstream statusfilename;
68  statusfilename << moduleLabel_ << '_' << getpid();
69  statusFileName_ = statusfilename.str();
70  }
71 
73  if(dropMetaData.empty()) dropMetaData_ = DropNone;
74  else if(dropMetaData == std::string("NONE")) dropMetaData_ = DropNone;
75  else if(dropMetaData == std::string("DROPPED")) dropMetaData_ = DropDroppedPrior;
76  else if(dropMetaData == std::string("PRIOR")) dropMetaData_ = DropPrior;
77  else if(dropMetaData == std::string("ALL")) dropMetaData_ = DropAll;
78  else {
79  throw edm::Exception(errors::Configuration, "Illegal dropMetaData parameter value: ")
80  << dropMetaData << ".\n"
81  << "Legal values are 'NONE', 'DROPPED', 'PRIOR', and 'ALL'.\n";
82  }
83 
84  if (!wantAllEvents()) {
86  }
87 
88  // We don't use this next parameter, but we read it anyway because it is part
89  // of the configuration of this module. An external parser creates the
90  // configuration by reading this source code.
91  pset.getUntrackedParameterSet("dataset");
92  }
93 
95  for(int i = InEvent; i < NumBranchTypes; ++i) {
96  BranchType branchType = static_cast<BranchType>(i);
97  SelectedProducts const& keptVector = keptProducts()[branchType];
98  for(auto const& prod : keptVector) {
99  checkClassDictionaries(TypeID(prod->wrappedType().typeInfo()), false);
100  }
101  }
102  }
103 
105  return rootOutputFile_->fileName();
106  }
107 
109  basketSize_(BranchDescription::invalidBasketSize) {}
110 
112  branchDescription_(0),
113  product_(0),
114  splitLevel_(BranchDescription::invalidSplitLevel),
115  basketSize_(BranchDescription::invalidBasketSize) {}
116 
118  branchDescription_(bd),
119  product_(0),
120  splitLevel_(splitLevel),
121  basketSize_(basketSize) {}
122 
123 
124  PoolOutputModule::OutputItem::Sorter::Sorter(TTree* tree) : treeMap_(new std::map<std::string, int>) {
125  // Fill a map mapping branch names to an index specifying the order in the tree.
126  if(tree != 0) {
127  TObjArray* branches = tree->GetListOfBranches();
128  for(int i = 0; i < branches->GetEntries(); ++i) {
129  TBranchElement* br = (TBranchElement*)branches->At(i);
130  treeMap_->insert(std::make_pair(std::string(br->GetName()), i));
131  }
132  }
133  }
134 
135  bool
137  // Provides a comparison for sorting branches according to the index values in treeMap_.
138  // Branches not found are always put at the end (i.e. not found > found).
139  if(treeMap_->empty()) return lh < rh;
140  std::string const& lstring = lh.branchDescription_->branchName();
141  std::string const& rstring = rh.branchDescription_->branchName();
142  std::map<std::string, int>::const_iterator lit = treeMap_->find(lstring);
143  std::map<std::string, int>::const_iterator rit = treeMap_->find(rstring);
144  bool lfound = (lit != treeMap_->end());
145  bool rfound = (rit != treeMap_->end());
146  if(lfound && rfound) {
147  return lit->second < rit->second;
148  } else if(lfound) {
149  return true;
150  } else if(rfound) {
151  return false;
152  }
153  return lh < rh;
154  }
155 
157 
158  SelectedProducts const& keptVector = keptProducts()[branchType];
160  AuxItem& auxItem = auxItems_[branchType];
161 
162  // Fill AuxItem
163  if (theInputTree != 0 && !overrideInputFileSplitLevels_) {
164  TBranch* auxBranch = theInputTree->GetBranch(BranchTypeToAuxiliaryBranchName(branchType).c_str());
165  if (auxBranch) {
166  auxItem.basketSize_ = auxBranch->GetBasketSize();
167  } else {
168  auxItem.basketSize_ = basketSize_;
169  }
170  } else {
171  auxItem.basketSize_ = basketSize_;
172  }
173 
174  // Fill outputItemList with an entry for each branch.
175  for(SelectedProducts::const_iterator it = keptVector.begin(), itEnd = keptVector.end(); it != itEnd; ++it) {
178 
179  BranchDescription const& prod = **it;
180  TBranch* theBranch = ((!prod.produced() && theInputTree != 0 && !overrideInputFileSplitLevels_) ? theInputTree->GetBranch(prod.branchName().c_str()) : 0);
181 
182  if(theBranch != 0) {
183  splitLevel = theBranch->GetSplitLevel();
184  basketSize = theBranch->GetBasketSize();
185  } else {
186  splitLevel = (prod.splitLevel() == BranchDescription::invalidSplitLevel ? splitLevel_ : prod.splitLevel());
187  basketSize = (prod.basketSize() == BranchDescription::invalidBasketSize ? basketSize_ : prod.basketSize());
188  }
189  outputItemList.emplace_back(&prod, splitLevel, basketSize);
190  }
191 
192  // Sort outputItemList to allow fast copying.
193  // The branches in outputItemList must be in the same order as in the input tree, with all new branches at the end.
194  sort_all(outputItemList, OutputItem::Sorter(theInputTree));
195  }
196 
198  if(isFileOpen()) {
199  rootOutputFile_->beginInputFile(fb, remainingEvents());
200  }
201  }
202 
204  if(!isFileOpen()) {
205  reallyOpenFile();
206  beginInputFile(fb);
207  }
208  }
209 
211  if(!initializedFromInput_) {
212  for(int i = InEvent; i < NumBranchTypes; ++i) {
213  BranchType branchType = static_cast<BranchType>(i);
214  TTree* theInputTree = (branchType == InEvent ? fb.tree() :
215  (branchType == InLumi ? fb.lumiTree() :
216  fb.runTree()));
217  fillSelectedItemList(branchType, theInputTree);
218  }
219  initializedFromInput_ = true;
220  }
221  ++inputFileCount_;
222  beginInputFile(fb);
223  }
224 
226  if(rootOutputFile_) rootOutputFile_->respondToCloseInputFile(fb);
227  }
228 
229  void PoolOutputModule::postForkReacquireResources(unsigned int iChildIndex, unsigned int iNumberOfChildren) {
230  childIndex_ = iChildIndex;
231  while (iNumberOfChildren != 0) {
233  iNumberOfChildren /= 10;
234  }
235  if (numberOfDigitsInIndex_ == 0) {
236  numberOfDigitsInIndex_ = 3; // Protect against zero iNumberOfChildren
237  }
238  }
239 
241  }
242 
245  rootOutputFile_->writeOne(e, mcc);
246  if (!statusFileName_.empty()) {
247  std::ofstream statusFile(statusFileName_.c_str());
248  statusFile << e.id() << " time: " << std::setprecision(3) << TimeOfDay() << '\n';
249  statusFile.close();
250  }
251  }
252 
254  rootOutputFile_->writeLuminosityBlock(lb, mcc);
255  }
256 
258  rootOutputFile_->writeRun(r, mcc);
259  }
260 
263  branchParents_.clear();
265  startEndFile();
276  finishEndFile();
277 
279  }
280 
281 
282  // At some later date, we may move functionality from finishEndFile() to here.
284 
285  void PoolOutputModule::writeFileFormatVersion() { rootOutputFile_->writeFileFormatVersion(); }
286  void PoolOutputModule::writeFileIdentifier() { rootOutputFile_->writeFileIdentifier(); }
287  void PoolOutputModule::writeIndexIntoFile() { rootOutputFile_->writeIndexIntoFile(); }
288  void PoolOutputModule::writeProcessHistoryRegistry() { rootOutputFile_->writeProcessHistoryRegistry(); }
289  void PoolOutputModule::writeParameterSetRegistry() { rootOutputFile_->writeParameterSetRegistry(); }
290  void PoolOutputModule::writeProductDescriptionRegistry() { rootOutputFile_->writeProductDescriptionRegistry(); }
291  void PoolOutputModule::writeParentageRegistry() { rootOutputFile_->writeParentageRegistry(); }
292  void PoolOutputModule::writeBranchIDListRegistry() { rootOutputFile_->writeBranchIDListRegistry(); }
293  void PoolOutputModule::writeThinnedAssociationsHelper() { rootOutputFile_->writeThinnedAssociationsHelper(); }
294  void PoolOutputModule::writeProductDependencies() { rootOutputFile_->writeProductDependencies(); }
297  bool PoolOutputModule::isFileOpen() const { return rootOutputFile_.get() != 0; }
298  bool PoolOutputModule::shouldWeCloseFile() const { return rootOutputFile_->shouldWeCloseFile(); }
299 
300  std::pair<std::string, std::string>
302  if(inputFileCount_ == 0) {
304  << "Attempt to open output file before input file. "
305  << "Please report this to the core framework developers.\n";
306  }
307  std::string suffix(".root");
308  std::string::size_type offset = fileName().rfind(suffix);
309  bool ext = (offset == fileName().size() - suffix.size());
310  if(!ext) suffix.clear();
311  std::string fileBase(ext ? fileName().substr(0, offset) : fileName());
312  std::ostringstream ofilename;
313  std::ostringstream lfilename;
314  ofilename << fileBase;
315  lfilename << logicalFileName();
317  ofilename << '_' << std::setw(numberOfDigitsInIndex_) << std::setfill('0') << childIndex_;
318  if(!logicalFileName().empty()) {
319  lfilename << '_' << std::setw(numberOfDigitsInIndex_) << std::setfill('0') << childIndex_;
320  }
321  }
322  if(outputFileCount_) {
323  ofilename << std::setw(3) << std::setfill('0') << outputFileCount_;
324  if(!logicalFileName().empty()) {
325  lfilename << std::setw(3) << std::setfill('0') << outputFileCount_;
326  }
327  }
328  ofilename << suffix;
330 
331  return std::make_pair(ofilename.str(), lfilename.str());
332  }
333 
336  rootOutputFile_.reset( new RootOutputFile(this, names.first, names.second));
337  }
338 
339  void
341  for(EventPrincipal::const_iterator i = ep.begin(), iEnd = ep.end(); i != iEnd; ++i) {
342  if((*i) && (*i)->productProvenancePtr() != 0) {
343  BranchID const& bid = (*i)->branchDescription().branchID();
344  BranchParents::iterator it = branchParents_.find(bid);
345  if(it == branchParents_.end()) {
346  it = branchParents_.insert(std::make_pair(bid, std::set<ParentageID>())).first;
347  }
348  it->second.insert((*i)->productProvenancePtr()->parentageID());
350  }
351  }
352  }
353 
354  void
356  for(BranchParents::const_iterator i = branchParents_.begin(), iEnd = branchParents_.end();
357  i != iEnd; ++i) {
358  BranchID const& child = i->first;
359  std::set<ParentageID> const& eIds = i->second;
360  for(std::set<ParentageID>::const_iterator it = eIds.begin(), itEnd = eIds.end();
361  it != itEnd; ++it) {
362  Parentage entryDesc;
363  ParentageRegistry::instance()->getMapped(*it, entryDesc);
364  std::vector<BranchID> const& parents = entryDesc.parents();
365  for(std::vector<BranchID>::const_iterator j = parents.begin(), jEnd = parents.end();
366  j != jEnd; ++j) {
367  branchChildren_.insertChild(*j, child);
368  }
369  }
370  }
371  }
372 
373  void
375  std::string defaultString;
376 
377  desc.setComment("Writes runs, lumis, and events into EDM/ROOT files.");
378  desc.addUntracked<std::string>("fileName")
379  ->setComment("Name of output file.");
380  desc.addUntracked<std::string>("logicalFileName", defaultString)
381  ->setComment("Passed to job report. Otherwise unused by module.");
382  desc.addUntracked<std::string>("catalog", defaultString)
383  ->setComment("Passed to job report. Otherwise unused by module.");
384  desc.addUntracked<int>("maxSize", 0x7f000000)
385  ->setComment("Maximum output file size, in kB.\n"
386  "If over maximum, new output file will be started at next input file transition.");
387  desc.addUntracked<int>("compressionLevel", 7)
388  ->setComment("ROOT compression level of output file.");
389 #if ROOT_VERSION_CODE >= ROOT_VERSION(5,30,0)
390  desc.addUntracked<std::string>("compressionAlgorithm", "ZLIB")
391  ->setComment("Algorithm used to compress data in the ROOT output file, allowed values are ZLIB and LZMA");
392 #endif
393  desc.addUntracked<int>("basketSize", 16384)
394  ->setComment("Default ROOT basket size in output file.");
395  desc.addUntracked<int>("eventAutoFlushCompressedSize",-1)->setComment("Set ROOT auto flush stored data size (in bytes) for event TTree. The value sets how large the compressed buffer is allowed to get. The uncompressed buffer can be quite a bit larger than this depending on the average compression ratio. The value of -1 just uses ROOT's default value. The value of 0 turns off this feature.");
396  desc.addUntracked<int>("splitLevel", 99)
397  ->setComment("Default ROOT branch split level in output file.");
398  desc.addUntracked<std::string>("sortBaskets", std::string("sortbasketsbyoffset"))
399  ->setComment("Legal values: 'sortbasketsbyoffset', 'sortbasketsbybranch', 'sortbasketsbyentry'.\n"
400  "Used by ROOT when fast copying. Affects performance.");
401  desc.addUntracked<int>("treeMaxVirtualSize", -1)
402  ->setComment("Size of ROOT TTree TBasket cache. Affects performance.");
403  desc.addUntracked<bool>("fastCloning", true)
404  ->setComment("True: Allow fast copying, if possible.\n"
405  "False: Disable fast copying.");
406  desc.addUntracked<bool>("overrideInputFileSplitLevels", false)
407  ->setComment("False: Use branch split levels and basket sizes from input file, if possible.\n"
408  "True: Always use specified or default split levels and basket sizes.");
409  desc.addUntracked<bool>("writeStatusFile", false)
410  ->setComment("Write a status file. Intended for use by workflow management.");
411  desc.addUntracked<std::string>("dropMetaData", defaultString)
412  ->setComment("Determines handling of per product per event metadata. Options are:\n"
413  "'NONE': Keep all of it.\n"
414  "'DROPPED': Keep it for products produced in current process and all kept products. Drop it for dropped products produced in prior processes.\n"
415  "'PRIOR': Keep it for products produced in current process. Drop it for products produced in prior processes.\n"
416  "'ALL': Drop all of it.");
417  ParameterSetDescription dataSet;
418  dataSet.setAllowAnything();
419  desc.addUntracked<ParameterSetDescription>("dataset", dataSet)
420  ->setComment("PSet is only used by Data Operations and not by this module.");
421 
423  }
424 
425  void
429  descriptions.add("edmOutput", desc);
430  }
431 }
virtual void openFile(FileBlock const &fb) override
virtual std::pair< std::string, std::string > physicalAndLogicalNameForNewFile()
T getUntrackedParameter(std::string const &, T const &) const
int i
Definition: DBlmapReader.cc:9
std::string const & branchName() const
std::string const & BranchTypeToAuxiliaryBranchName(BranchType const &branchType)
Definition: BranchType.cc:115
virtual void writeRun(RunPrincipal const &r, ModuleCallingContext const *) override
BranchDescription const * branchDescription_
int const & basketSize() const
TPRegexp parents
Definition: eve_filter.cc:21
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
static const HistoName names[]
static int const invalidSplitLevel
const_iterator end() const
Definition: Principal.h:163
void setAllowAnything()
allow any parameter label/value pairs
static int const invalidBasketSize
void checkClassDictionaries(TypeID const &type, bool recursive=true)
DropMetaData const & dropMetaData() const
BranchChildren branchChildren_
EventID const & id() const
std::vector< OutputItem > OutputItemList
std::string const & fileName() const
void insertEmpty(BranchID parent)
void insertChild(BranchID parent, BranchID child)
std::string const moduleLabel_
ParameterSet getUntrackedParameterSet(std::string const &name, ParameterSet const &defaultValue) const
bool int lh
Definition: SIMDVec.h:21
uint16_t size_type
virtual void doExtrasAfterCloseFile()
std::string const & logicalFileName() const
BranchType
Definition: BranchType.h:11
else
Definition: XrdSource.cc:104
void fillSelectedItemList(BranchType branchtype, TTree *theInputTree)
std::vector< BranchID > const & parents() const
Definition: Parentage.h:37
PoolOutputModule(ParameterSet const &ps)
void setComment(std::string const &value)
bool operator()(OutputItem const &lh, OutputItem const &rh) const
std::string const & currentFileName() const
bool getMapped(key_type const &k, value_type &result) const
OutputItemListArray selectedOutputItemList_
int j
Definition: DBlmapReader.cc:9
void updateBranchParents(EventPrincipal const &ep)
SelectedProductsForBranchType const & keptProducts() const
int const & splitLevel() const
EventID const & min(EventID const &lh, EventID const &rh)
Definition: EventID.h:137
virtual void reallyOpenFile() override
BranchParents branchParents_
std::unique_ptr< RootOutputFile > rootOutputFile_
unsigned int numberOfDigitsInIndex_
void sort_all(RandomAccessSequence &s)
wrappers for std::sort
Definition: Algorithms.h:120
TTree * lumiTree() const
Definition: FileBlock.h:99
virtual bool shouldWeCloseFile() const override
allow inheriting classes to override but still be able to call this method in the overridden version ...
virtual void respondToCloseInputFile(FileBlock const &fb) override
virtual bool isFileOpen() const override
virtual void write(EventPrincipal const &e, ModuleCallingContext const *) override
const_iterator begin() const
Definition: Principal.h:162
virtual void beginJob() override
virtual void reallyCloseFile() override
void add(std::string const &label, ParameterSetDescription const &psetDescription)
virtual void writeLuminosityBlock(LuminosityBlockPrincipal const &lb, ModuleCallingContext const *) override
static void fillDescription(ParameterSetDescription &desc)
virtual void postForkReacquireResources(unsigned int iChildIndex, unsigned int iNumberOfChildren) override
std::shared_ptr< std::map< std::string, int > > treeMap_
boost::filter_iterator< FilledProductPtr, ProductHolderCollection::const_iterator > const_iterator
Definition: Principal.h:57
std::vector< BranchDescription const * > SelectedProducts
void beginInputFile(FileBlock const &fb)
volatile std::atomic< bool > shutdown_flag false
virtual void respondToOpenInputFile(FileBlock const &fb) override
if(conf.exists("allCellsPositionCalc"))
static void fillDescriptions(ConfigurationDescriptions &descriptions)
static void fillDescription(ParameterSetDescription &desc)
moduleLabel_(iConfig.getParameter< string >("@module_label"))
static ParentageRegistry * instance()
TTree * runTree() const
Definition: FileBlock.h:101
TTree * tree() const
Definition: FileBlock.h:97