CMS 3D CMS Logo

RootOutputFile.cc
Go to the documentation of this file.
1 
3 
5 
43 
44 #include "TTree.h"
45 #include "TFile.h"
46 #include "TClass.h"
47 #include "Rtypes.h"
48 #include "RVersion.h"
49 
50 #include "Compression.h"
51 
52 #include <algorithm>
53 #include <iomanip>
54 #include <sstream>
55 
56 namespace edm {
57 
58  namespace {
59  bool sorterForJobReportHash(BranchDescription const* lh, BranchDescription const* rh) {
60  return lh->fullClassName() < rh->fullClassName() ? true
61  : lh->fullClassName() > rh->fullClassName() ? false
62  : lh->moduleLabel() < rh->moduleLabel() ? true
63  : lh->moduleLabel() > rh->moduleLabel() ? false
64  : lh->productInstanceName() < rh->productInstanceName() ? true
65  : lh->productInstanceName() > rh->productInstanceName() ? false
66  : lh->processName() < rh->processName() ? true
67  : false;
68  }
69 
70  TFile* openTFile(char const* name, int compressionLevel) {
71  TFile* file = TFile::Open(name, "recreate", "", compressionLevel);
72  std::exception_ptr e = edm::threadLocalException::getException();
73  if (e != std::exception_ptr()) {
74  edm::threadLocalException::setException(std::exception_ptr());
75  std::rethrow_exception(e);
76  }
77  return file;
78  }
79  } // namespace
80 
82  std::string const& fileName,
83  std::string const& logicalFileName,
84  std::vector<std::string> const& processesWithSelectedMergeableRunProducts,
85  std::string const& overrideGUID)
86  : file_(fileName),
87  logicalFile_(logicalFileName),
88  reportToken_(0),
89  om_(om),
90  whyNotFastClonable_(om_->whyNotFastClonable()),
91  canFastCloneAux_(false),
92  filePtr_(openTFile(file_.c_str(), om_->compressionLevel())),
93  fid_(),
94  eventEntryNumber_(0LL),
95  lumiEntryNumber_(0LL),
96  runEntryNumber_(0LL),
97  indexIntoFile_(),
98  storedMergeableRunProductMetadata_(processesWithSelectedMergeableRunProducts),
99  nEventsInLumi_(0),
100  metaDataTree_(nullptr),
101  parameterSetsTree_(nullptr),
102  parentageTree_(nullptr),
103  lumiAux_(),
104  runAux_(),
105  pEventAux_(nullptr),
106  pLumiAux_(&lumiAux_),
107  pRunAux_(&runAux_),
108  eventEntryInfoVector_(),
109  pEventEntryInfoVector_(&eventEntryInfoVector_),
110  pBranchListIndexes_(nullptr),
111  pEventSelectionIDs_(nullptr),
112  eventTree_(filePtr(), InEvent, om_->splitLevel(), om_->treeMaxVirtualSize()),
113  lumiTree_(filePtr(), InLumi, om_->splitLevel(), om_->treeMaxVirtualSize()),
114  runTree_(filePtr(), InRun, om_->splitLevel(), om_->treeMaxVirtualSize()),
115  dataTypeReported_(false),
116  processHistoryRegistry_(),
117  parentageIDs_(),
118  branchesWithStoredHistory_(),
119  wrapperBaseTClass_(TClass::GetClass("edm::WrapperBase")) {
120  std::vector<std::string> const& processesWithProcessBlockProducts =
121  om_->outputProcessBlockHelper().processesWithProcessBlockProducts();
122  for (auto const& processName : processesWithProcessBlockProducts) {
123  processBlockTrees_.emplace_back(std::make_unique<RootOutputTree>(
124  filePtr(), InProcess, om_->splitLevel(), om_->treeMaxVirtualSize(), processName));
125  }
126 
127  if (om_->compressionAlgorithm() == std::string("ZLIB")) {
128  filePtr_->SetCompressionAlgorithm(ROOT::kZLIB);
129  } else if (om_->compressionAlgorithm() == std::string("LZMA")) {
130  filePtr_->SetCompressionAlgorithm(ROOT::kLZMA);
131  } else if (om_->compressionAlgorithm() == std::string("ZSTD")) {
132  filePtr_->SetCompressionAlgorithm(ROOT::kZSTD);
133  } else if (om_->compressionAlgorithm() == std::string("LZ4")) {
134  filePtr_->SetCompressionAlgorithm(ROOT::kLZ4);
135  } else {
137  << "PoolOutputModule configured with unknown compression algorithm '" << om_->compressionAlgorithm() << "'\n"
138  << "Allowed compression algorithms are ZLIB, LZMA, LZ4, and ZSTD\n";
139  }
140  if (-1 != om->eventAutoFlushSize()) {
142  }
143  if (om_->compactEventAuxiliary()) {
145  BranchTypeToAuxiliaryBranchName(InEvent), pEventAux_, om_->auxItems()[InEvent].basketSize_, false);
146  eventTree_.tree()->SetBranchStatus(BranchTypeToAuxiliaryBranchName(InEvent).c_str(),
147  false); // see writeEventAuxiliary
148  } else {
150  BranchTypeToAuxiliaryBranchName(InEvent), pEventAux_, om_->auxItems()[InEvent].basketSize_);
151  }
152 
155  om_->auxItems()[InEvent].basketSize_);
157  poolNames::eventSelectionsBranchName(), pEventSelectionIDs_, om_->auxItems()[InEvent].basketSize_, false);
160 
161  if (om_->outputProcessBlockHelper().productsFromInputKept()) {
164  om_->auxItems()[InEvent].basketSize_);
165  }
166 
168  BranchTypeToAuxiliaryBranchName(InLumi), pLumiAux_, om_->auxItems()[InLumi].basketSize_);
169 
171  BranchTypeToAuxiliaryBranchName(InRun), pRunAux_, om_->auxItems()[InRun].basketSize_);
172 
173  treePointers_.emplace_back(&eventTree_);
174  treePointers_.emplace_back(&lumiTree_);
175  treePointers_.emplace_back(&runTree_);
176  for (auto& processBlockTree : processBlockTrees_) {
177  treePointers_.emplace_back(processBlockTree.get());
178  }
179 
180  for (unsigned int i = 0; i < treePointers_.size(); ++i) {
181  RootOutputTree* theTree = treePointers_[i];
182  for (auto& item : om_->selectedOutputItemList()[i]) {
183  item.setProduct(nullptr);
184  BranchDescription const& desc = *item.branchDescription();
185  theTree->addBranch(desc.branchName(),
186  desc.wrappedName(),
187  item.product(),
188  item.splitLevel(),
189  item.basketSize(),
190  item.branchDescription()->produced());
191  //make sure we always store product registry info for all branches we create
192  branchesWithStoredHistory_.insert(item.branchID());
193  }
194  }
195  // Don't split metadata tree or event description tree
199 
200  if (overrideGUID.empty()) {
202  } else {
203  if (not isValidGlobalIdentifier(overrideGUID)) {
205  << "GUID to be used for output file is not valid (is '" << overrideGUID << "')";
206  }
207  fid_ = FileID(overrideGUID);
208  }
209 
210  // For the Job Report, get a vector of branch names in the "Events" tree.
211  // Also create a hash of all the branch names in the "Events" tree
212  // in a deterministic order, except use the full class name instead of the friendly class name.
213  // To avoid extra string copies, we create a vector of pointers into the product registry,
214  // and use a custom comparison operator for sorting.
215  std::vector<std::string> branchNames;
216  std::vector<BranchDescription const*> branches;
217  branchNames.reserve(om_->selectedOutputItemList()[InEvent].size());
218  branches.reserve(om->selectedOutputItemList()[InEvent].size());
219  for (auto const& item : om_->selectedOutputItemList()[InEvent]) {
220  branchNames.push_back(item.branchDescription()->branchName());
221  branches.push_back(item.branchDescription());
222  }
223  // Now sort the branches for the hash.
224  sort_all(branches, sorterForJobReportHash);
225  // Now, make a concatenated string.
226  std::ostringstream oss;
227  char const underscore = '_';
228  for (auto const& branch : branches) {
229  BranchDescription const& bd = *branch;
230  oss << bd.fullClassName() << underscore << bd.moduleLabel() << underscore << bd.productInstanceName()
231  << underscore << bd.processName() << underscore;
232  }
233  std::string stringrep = oss.str();
234  cms::Digest md5alg(stringrep);
235 
236  // Register the output file with the JobReport service
237  // and get back the token for it.
238  std::string moduleName = "PoolOutputModule";
239  Service<JobReport> reportSvc;
240  reportToken_ = reportSvc->outputFileOpened(file_,
241  logicalFile_, // PFN and LFN
242  om_->catalog(), // catalog
243  moduleName, // module class name
244  om_->moduleLabel(), // module label
245  fid_.fid(), // file id (guid)
246  std::string(), // data type (not yet known, so string is empty).
247  md5alg.digest().toString(), // branch hash
248  branchNames); // branch names being written
249  }
250 
251  namespace {
252  void maybeIssueWarning(int whyNotFastClonable, std::string const& ifileName, std::string const& ofileName) {
253  // No message if fast cloning was deliberately disabled, or if there are no events to copy anyway.
256  return;
257  }
258 
259  // There will be a message stating every reason that fast cloning was not possible.
260  // If at one or more of the reasons was because of something the user explicitly specified (e.g. event selection, skipping events),
261  // or if the input file was in an old format, the message will be informational. Otherwise, the message will be a warning.
262  bool isWarning = true;
263  std::ostringstream message;
264  message << "Fast copying of file " << ifileName << " to file " << ofileName << " is disabled because:\n";
265  if ((whyNotFastClonable & FileBlock::HasSecondaryFileSequence) != 0) {
266  message << "a SecondaryFileSequence was specified.\n";
267  whyNotFastClonable &= ~(FileBlock::HasSecondaryFileSequence);
268  isWarning = false;
269  }
270  if ((whyNotFastClonable & FileBlock::FileTooOld) != 0) {
271  message << "the input file is in an old format.\n";
272  whyNotFastClonable &= ~(FileBlock::FileTooOld);
273  isWarning = false;
274  }
275  if ((whyNotFastClonable & FileBlock::EventsToBeSorted) != 0) {
276  message << "events need to be sorted.\n";
277  whyNotFastClonable &= ~(FileBlock::EventsToBeSorted);
278  }
279  if ((whyNotFastClonable & FileBlock::RunOrLumiNotContiguous) != 0) {
280  message << "a run or a lumi is not contiguous in the input file.\n";
281  whyNotFastClonable &= ~(FileBlock::RunOrLumiNotContiguous);
282  }
283  if ((whyNotFastClonable & FileBlock::EventsOrLumisSelectedByID) != 0) {
284  message << "events or lumis were selected or skipped by ID.\n";
285  whyNotFastClonable &= ~(FileBlock::EventsOrLumisSelectedByID);
286  isWarning = false;
287  }
288  if ((whyNotFastClonable & FileBlock::InitialEventsSkipped) != 0) {
289  message << "initial events, lumis or runs were skipped.\n";
290  whyNotFastClonable &= ~(FileBlock::InitialEventsSkipped);
291  isWarning = false;
292  }
293  if ((whyNotFastClonable & FileBlock::DuplicateEventsRemoved) != 0) {
294  message << "some events were skipped because of duplicate checking.\n";
295  whyNotFastClonable &= ~(FileBlock::DuplicateEventsRemoved);
296  }
297  if ((whyNotFastClonable & FileBlock::MaxEventsTooSmall) != 0) {
298  message << "some events were not copied because of maxEvents limit.\n";
299  whyNotFastClonable &= ~(FileBlock::MaxEventsTooSmall);
300  isWarning = false;
301  }
302  if ((whyNotFastClonable & FileBlock::MaxLumisTooSmall) != 0) {
303  message << "some events were not copied because of maxLumis limit.\n";
304  whyNotFastClonable &= ~(FileBlock::MaxLumisTooSmall);
305  isWarning = false;
306  }
307  if ((whyNotFastClonable & FileBlock::ParallelProcesses) != 0) {
308  message << "parallel processing was specified.\n";
309  whyNotFastClonable &= ~(FileBlock::ParallelProcesses);
310  isWarning = false;
311  }
312  if ((whyNotFastClonable & FileBlock::EventSelectionUsed) != 0) {
313  message << "an EventSelector was specified.\n";
314  whyNotFastClonable &= ~(FileBlock::EventSelectionUsed);
315  isWarning = false;
316  }
317  if ((whyNotFastClonable & FileBlock::OutputMaxEventsTooSmall) != 0) {
318  message << "some events were not copied because of maxEvents output limit.\n";
319  whyNotFastClonable &= ~(FileBlock::OutputMaxEventsTooSmall);
320  isWarning = false;
321  }
322  if ((whyNotFastClonable & FileBlock::SplitLevelMismatch) != 0) {
323  message << "the split level or basket size of a branch or branches was modified.\n";
324  whyNotFastClonable &= ~(FileBlock::SplitLevelMismatch);
325  }
326  if ((whyNotFastClonable & FileBlock::BranchMismatch) != 0) {
327  message << "The format of a data product has changed.\n";
328  whyNotFastClonable &= ~(FileBlock::BranchMismatch);
329  }
330  assert(whyNotFastClonable == FileBlock::CanFastClone);
331  if (isWarning) {
332  LogWarning("FastCloningDisabled") << message.str();
333  } else {
334  LogInfo("FastCloningDisabled") << message.str();
335  }
336  }
337  } // namespace
338 
339  void RootOutputFile::beginInputFile(FileBlock const& fb, int remainingEvents) {
340  // Reset per input file information
341  whyNotFastClonable_ = om_->whyNotFastClonable();
342  canFastCloneAux_ = false;
343 
344  if (fb.tree() != nullptr) {
346 
347  if (remainingEvents >= 0 && remainingEvents < fb.tree()->GetEntries()) {
349  }
350 
352  if (!match) {
353  if (om_->overrideInputFileSplitLevels()) {
354  // We may be fast copying. We must disable fast copying if the split levels
355  // or basket sizes do not match.
357  } else {
358  // We are using the input split levels and basket sizes from the first input file
359  // for copied output branches. In this case, we throw an exception if any branches
360  // have different split levels or basket sizes in a subsequent input file.
361  // If the mismatch is in the first file, there is a bug somewhere, so we assert.
362  assert(om_->inputFileCount() > 1);
363  throw Exception(errors::MismatchedInputFiles, "RootOutputFile::beginInputFile()")
364  << "Merge failure because input file " << file_ << " has different ROOT split levels or basket sizes\n"
365  << "than previous files. To allow merging in spite of this, use the configuration parameter\n"
366  << "overrideInputFileSplitLevels=cms.untracked.bool(True)\n"
367  << "in every PoolOutputModule.\n";
368  }
369  }
370 
371  // Since this check can be time consuming, we do it only if we would otherwise fast clone.
373  if (!eventTree_.checkIfFastClonable(fb.tree())) {
375  }
376  }
377 
378  // reasons for whyNotFastClonable that are also inconsistent with a merge job
379  constexpr auto setSubBranchBasketConditions =
383 
384  if (om_->inputFileCount() == 1) {
385  if (om_->mergeJob()) {
386  // for merge jobs always forward the compression mode
387  auto infile = fb.tree()->GetCurrentFile();
388  if (infile != nullptr) {
389  filePtr_->SetCompressionSettings(infile->GetCompressionSettings());
390  }
391  }
392 
393  // if we aren't fast cloning, and the reason why is consistent with a
394  // merge job or is only because of parallel processes, then forward all
395  // the sub-branch basket sizes
397  ((om_->mergeJob() && (whyNotFastClonable_ & setSubBranchBasketConditions) == 0) ||
400  }
401  }
402 
403  // We now check if we can fast copy the auxiliary branches.
404  // We can do so only if we can otherwise fast copy,
405  // the input file has the current format (these branches are in the Events Tree),
406  // there are no newly dropped or produced products,
407  // no metadata has been dropped,
408  // ID's have not been modified,
409  // and the branch list indexes do not need modification.
410 
411  // Note: Fast copy of the EventProductProvenance branch is unsafe
412  // unless we can enforce that the parentage information for a fully copied
413  // output file will be the same as for the input file, with nothing dropped.
414  // This has never been enforced, and, withthe EDAlias feature, it may no longer
415  // work by accident.
416  // So, for now, we do not enable fast cloning of the non-product branches.
417  /*
418  Service<ConstProductRegistry> reg;
419  canFastCloneAux_ = (whyNotFastClonable_ == FileBlock::CanFastClone) &&
420  fb.fileFormatVersion().noMetaDataTrees() &&
421  !om_->hasNewlyDroppedBranch()[InEvent] &&
422  !fb.hasNewlyDroppedBranch()[InEvent] &&
423  om_->dropMetaData() == PoolOutputModule::DropNone &&
424  !reg->anyProductProduced() &&
425  !fb.modifiedIDs() &&
426  fb.branchListIndexesUnchanged();
427  */
428 
429  // Report the fast copying status.
430  Service<JobReport> reportSvc;
431  reportSvc->reportFastCopyingStatus(reportToken_, fb.fileName(), whyNotFastClonable_ == FileBlock::CanFastClone);
432  } else {
434  }
435 
438 
439  // Possibly issue warning or informational message if we haven't fast cloned.
440  if (fb.tree() != nullptr && whyNotFastClonable_ != FileBlock::CanFastClone) {
441  maybeIssueWarning(whyNotFastClonable_, fb.fileName(), file_);
442  }
443 
444  if (om_->compactEventAuxiliary() &&
447  long long int reserve = remainingEvents;
448  if (fb.tree() != nullptr) {
449  reserve = reserve > 0 ? std::min(fb.tree()->GetEntries(), reserve) : fb.tree()->GetEntries();
450  }
451  if (reserve > 0) {
453  }
454  }
455  }
456 
458  // We can't do setEntries() on the event tree if the EventAuxiliary branch is empty & disabled
459  if (not om_->compactEventAuxiliary()) {
461  }
464  }
465 
467  unsigned int const oneK = 1024;
468  Long64_t size = filePtr_->GetSize() / oneK;
469  return (size >= om_->maxFileSize());
470  }
471 
473  // Auxiliary branch
474  pEventAux_ = &e.eventAuxiliary();
475 
476  // Because getting the data may cause an exception to be thrown we want to do that
477  // first before writing anything to the file about this event
478  // NOTE: pEventAux_, pBranchListIndexes_, pEventSelectionIDs_, and pEventEntryInfoVector_
479  // must be set before calling fillBranches since they get written out in that routine.
480  assert(pEventAux_->processHistoryID() == e.processHistoryID());
481  pBranchListIndexes_ = &e.branchListIndexes();
482  pEventToProcessBlockIndexes_ = &e.eventToProcessBlockIndexes();
483 
484  // Note: The EventSelectionIDVector should have a one to one correspondence with the processes in the process history.
485  // Therefore, a new entry should be added if and only if the current process has been added to the process history,
486  // which is done if and only if there is a produced product.
488  EventSelectionIDVector esids = e.eventSelectionIDs();
489  if (reg->anyProductProduced() || !om_->wantAllEvents()) {
490  esids.push_back(om_->selectorConfig());
491  }
492  pEventSelectionIDs_ = &esids;
493  ProductProvenanceRetriever const* provRetriever = e.productProvenanceRetrieverPtr();
494  assert(provRetriever);
495  unsigned int ttreeIndex = InEvent;
496  fillBranches(InEvent, e, ttreeIndex, pEventEntryInfoVector_, provRetriever);
497 
498  // Add the dataType to the job report if it hasn't already been done
499  if (!dataTypeReported_) {
500  Service<JobReport> reportSvc;
501  std::string dataType("MC");
502  if (pEventAux_->isRealData())
503  dataType = "Data";
504  reportSvc->reportDataType(reportToken_, dataType);
505  dataTypeReported_ = true;
506  }
507 
508  // Store the process history.
510  // Store the reduced ID in the IndexIntoFile
511  ProcessHistoryID reducedPHID = processHistoryRegistry_.reducedProcessHistoryID(e.processHistoryID());
512  // Add event to index
516 
517  if (om_->compactEventAuxiliary()) {
519  }
520 
521  // Report event written
522  Service<JobReport> reportSvc;
523  reportSvc->eventWrittenToFile(reportToken_, e.id().run(), e.id().event());
524  ++nEventsInLumi_;
525  }
526 
528  // Auxiliary branch
529  // NOTE: lumiAux_ must be filled before calling fillBranches since it gets written out in that routine.
531  // Use the updated process historyID
533  // Store the process history.
535  // Store the reduced ID in the IndexIntoFile
537  // Add lumi to index.
540  unsigned int ttreeIndex = InLumi;
541  fillBranches(InLumi, lb, ttreeIndex);
542  lumiTree_.optimizeBaskets(10ULL * 1024 * 1024);
543 
544  Service<JobReport> reportSvc;
545  reportSvc->reportLumiSection(reportToken_, lb.id().run(), lb.id().luminosityBlock(), nEventsInLumi_);
546  nEventsInLumi_ = 0;
547  }
548 
550  // Auxiliary branch
551  // NOTE: runAux_ must be filled before calling fillBranches since it gets written out in that routine.
552  runAux_ = r.runAuxiliary();
553  // Use the updated process historyID
554  runAux_.setProcessHistoryID(r.processHistoryID());
555  // Store the process history.
557  // Store the reduced ID in the IndexIntoFile
558  ProcessHistoryID reducedPHID = processHistoryRegistry_.reducedProcessHistoryID(r.processHistoryID());
559  // Add run to index.
560  indexIntoFile_.addEntry(reducedPHID, runAux_.run(), 0U, 0U, runEntryNumber_);
561  r.mergeableRunProductMetadata()->addEntryToStoredMetadata(storedMergeableRunProductMetadata_);
562  ++runEntryNumber_;
563  unsigned int ttreeIndex = InRun;
564  fillBranches(InRun, r, ttreeIndex);
565  runTree_.optimizeBaskets(10ULL * 1024 * 1024);
566 
567  Service<JobReport> reportSvc;
568  reportSvc->reportRunNumber(reportToken_, r.run());
569  }
570 
572  std::string const& processName = pb.processName();
573  std::vector<std::string> const& processesWithProcessBlockProducts =
574  om_->outputProcessBlockHelper().processesWithProcessBlockProducts();
575  std::vector<std::string>::const_iterator it =
576  std::find(processesWithProcessBlockProducts.cbegin(), processesWithProcessBlockProducts.cend(), processName);
577  if (it == processesWithProcessBlockProducts.cend()) {
578  return;
579  }
580  unsigned int ttreeIndex = InProcess + std::distance(processesWithProcessBlockProducts.cbegin(), it);
581  fillBranches(InProcess, pb, ttreeIndex);
582  treePointers_[ttreeIndex]->optimizeBaskets(10ULL * 1024 * 1024);
583  }
584 
586  Parentage const* desc(nullptr);
587 
588  if (!parentageTree_->Branch(poolNames::parentageBranchName().c_str(), &desc, om_->basketSize(), 0))
589  throw Exception(errors::FatalRootError) << "Failed to create a branch for Parentages in the output file";
590 
592 
593  std::vector<ParentageID> orderedIDs(parentageIDs_.size());
594  for (auto const& parentageID : parentageIDs_) {
595  orderedIDs[parentageID.second] = parentageID.first;
596  }
597  //now put them into the TTree in the correct order
598  for (auto const& orderedID : orderedIDs) {
599  desc = ptReg.getMapped(orderedID);
600  //NOTE: some old format files have missing Parentage info
601  // so a null value of desc can't be fatal.
602  // Root will default construct an object in that case.
603  parentageTree_->Fill();
604  }
605  }
606 
608  FileFormatVersion fileFormatVersion(getFileFormatVersion());
609  FileFormatVersion* pFileFmtVsn = &fileFormatVersion;
610  TBranch* b =
611  metaDataTree_->Branch(poolNames::fileFormatVersionBranchName().c_str(), &pFileFmtVsn, om_->basketSize(), 0);
612  assert(b);
613  b->Fill();
614  }
615 
617  FileID* fidPtr = &fid_;
618  TBranch* b = metaDataTree_->Branch(poolNames::fileIdentifierBranchName().c_str(), &fidPtr, om_->basketSize(), 0);
619  assert(b);
620  b->Fill();
621  }
622 
626  ex << "The number of entries in at least one output TBranch whose entries\n"
627  "were copied from the input does not match the number of events\n"
628  "recorded in IndexIntoFile. This might (or might not) indicate a\n"
629  "problem related to fast copy.";
630  ex.addContext("Calling RootOutputFile::writeIndexIntoFile");
631  throw ex;
632  }
634  IndexIntoFile* iifPtr = &indexIntoFile_;
635  TBranch* b = metaDataTree_->Branch(poolNames::indexIntoFileBranchName().c_str(), &iifPtr, om_->basketSize(), 0);
636  assert(b);
637  b->Fill();
638  }
639 
643  TBranch* b =
644  metaDataTree_->Branch(poolNames::mergeableRunProductMetadataBranchName().c_str(), &ptr, om_->basketSize(), 0);
645  assert(b);
646  b->Fill();
647  }
648 
651  }
652 
654  BranchIDLists const* p = om_->branchIDLists();
655  TBranch* b = metaDataTree_->Branch(poolNames::branchIDListBranchName().c_str(), &p, om_->basketSize(), 0);
656  assert(b);
657  b->Fill();
658  }
659 
661  ThinnedAssociationsHelper const* p = om_->thinnedAssociationsHelper();
662  TBranch* b =
663  metaDataTree_->Branch(poolNames::thinnedAssociationsHelperBranchName().c_str(), &p, om_->basketSize(), 0);
664  assert(b);
665  b->Fill();
666  }
667 
670  }
671 
673  // Make a local copy of the ProductRegistry, removing any transient or pruned products.
674  using ProductList = ProductRegistry::ProductList;
676  ProductRegistry pReg(reg->productList());
677  ProductList& pList = const_cast<ProductList&>(pReg.productList());
678  for (auto const& prod : pList) {
679  if (prod.second.branchID() != prod.second.originalBranchID()) {
680  if (branchesWithStoredHistory_.find(prod.second.branchID()) != branchesWithStoredHistory_.end()) {
681  branchesWithStoredHistory_.insert(prod.second.originalBranchID());
682  }
683  }
684  }
685  std::set<BranchID>::iterator end = branchesWithStoredHistory_.end();
686  for (ProductList::iterator it = pList.begin(); it != pList.end();) {
687  if (branchesWithStoredHistory_.find(it->second.branchID()) == end) {
688  // avoid invalidating iterator on deletion
689  ProductList::iterator itCopy = it;
690  ++it;
691  pList.erase(itCopy);
692 
693  } else {
694  ++it;
695  }
696  }
697 
698  ProductRegistry* ppReg = &pReg;
699  TBranch* b = metaDataTree_->Branch(poolNames::productDescriptionBranchName().c_str(), &ppReg, om_->basketSize(), 0);
700  assert(b);
701  b->Fill();
702  }
704  BranchChildren& pDeps = const_cast<BranchChildren&>(om_->branchChildren());
705  BranchChildren* ppDeps = &pDeps;
706  TBranch* b =
707  metaDataTree_->Branch(poolNames::productDependenciesBranchName().c_str(), &ppDeps, om_->basketSize(), 0);
708  assert(b);
709  b->Fill();
710  }
711 
712  // For duplicate removal and to determine if fast cloning is possible, the input
713  // module by default reads the entire EventAuxiliary branch when it opens the
714  // input files. If EventAuxiliary is written in the usual way, this results
715  // in many small reads scattered throughout the file, which can have very poor
716  // performance characteristics on some filesystems. As a workaround, we save
717  // EventAuxiliary and write it at the end of the file.
718 
720  constexpr std::size_t maxEaBasketSize = 4 * 1024 * 1024;
721 
722  if (om_->compactEventAuxiliary()) {
723  auto tree = eventTree_.tree();
724  auto const& bname = BranchTypeToAuxiliaryBranchName(InEvent).c_str();
725 
726  tree->SetBranchStatus(bname, true);
727  auto basketsize =
728  std::min(maxEaBasketSize,
729  compactEventAuxiliary_.size() * (sizeof(EventAuxiliary) + 26)); // 26 is an empirical fudge factor
730  tree->SetBasketSize(bname, basketsize);
731  auto b = tree->GetBranch(bname);
732 
733  assert(b);
734 
735  LogDebug("writeEventAuxiliary") << "EventAuxiliary ratio extras/GUIDs/all = "
738 
739  for (auto const& aux : compactEventAuxiliary_) {
740  const auto ea = aux.eventAuxiliary();
741  pEventAux_ = &ea;
742  // Fill EventAuxiliary branch
743  b->Fill();
744  }
746  }
747  }
748 
750  if (!om_->outputProcessBlockHelper().processesWithProcessBlockProducts().empty()) {
751  StoredProcessBlockHelper storedProcessBlockHelper(
752  om_->outputProcessBlockHelper().processesWithProcessBlockProducts());
753  om_->outputProcessBlockHelper().fillCacheIndices(storedProcessBlockHelper);
754 
755  StoredProcessBlockHelper* pStoredProcessBlockHelper = &storedProcessBlockHelper;
756  TBranch* b = metaDataTree_->Branch(
757  poolNames::processBlockHelperBranchName().c_str(), &pStoredProcessBlockHelper, om_->basketSize(), 0);
758  assert(b);
759  b->Fill();
760  }
761  }
762 
764  std::string_view status = "beginning";
765  std::string_view value = "";
766  try {
767  metaDataTree_->SetEntries(-1);
768  status = "writeTTree() for metadata";
770  status = "writeTTree() for ParameterSets";
772 
773  status = "writeTTree() for parentage";
775 
776  // Create branch aliases for all the branches in the
777  // events/lumis/runs/processblock trees. The loop is over
778  // all types of data products.
779  status = "writeTree() for ";
780  for (unsigned int i = 0; i < treePointers_.size(); ++i) {
782  BranchType branchType = InProcess;
783  if (i < InProcess) {
784  branchType = static_cast<BranchType>(i);
785  } else {
786  processName = om_->outputProcessBlockHelper().processesWithProcessBlockProducts()[i - InProcess];
787  }
788  setBranchAliases(treePointers_[i]->tree(), om_->keptProducts()[branchType], processName);
789  value = treePointers_[i]->tree()->GetName();
790  treePointers_[i]->writeTree();
791  }
792 
793  // close the file -- mfp
794  // Just to play it safe, zero all pointers to objects in the TFile to be closed.
795  status = "closing TTrees";
796  value = "";
797  metaDataTree_ = parentageTree_ = nullptr;
798  for (auto& treePointer : treePointers_) {
799  treePointer->close();
800  treePointer = nullptr;
801  }
802  status = "closing TFile";
803  filePtr_->Close();
804  filePtr_ = nullptr; // propagate_const<T> has no reset() function
805 
806  // report that file has been closed
807  status = "reporting to JobReport";
808  Service<JobReport> reportSvc;
809  reportSvc->outputFileClosed(reportToken_);
810  } catch (cms::Exception& e) {
811  e.addContext("Calling RootOutputFile::finishEndFile() while closing " + file_);
812  e.addAdditionalInfo("While calling " + std::string(status) + std::string(value));
813  throw;
814  }
815  }
816 
818  SelectedProducts const& branches,
819  std::string const& processName) const {
820  if (tree && tree->GetNbranches() != 0) {
821  for (auto const& selection : branches) {
822  BranchDescription const& pd = *selection.first;
823  if (pd.branchType() == InProcess && processName != pd.processName()) {
824  continue;
825  }
826  std::string const& full = pd.branchName() + "obj";
827  if (pd.branchAliases().empty()) {
828  std::string const& alias = (pd.productInstanceName().empty() ? pd.moduleLabel() : pd.productInstanceName());
829  tree->SetAlias(alias.c_str(), full.c_str());
830  } else {
831  for (auto const& alias : pd.branchAliases()) {
832  tree->SetAlias(alias.c_str(), full.c_str());
833  }
834  }
835  }
836  }
837  }
838 
840  ProductProvenanceRetriever const* iMapper,
841  bool produced,
842  std::set<BranchID> const& iProducedIDs,
843  std::set<StoredProductProvenance>& oToFill) {
844  assert(om_->dropMetaData() != PoolOutputModule::DropAll);
845  assert(produced || om_->dropMetaData() != PoolOutputModule::DropPrior);
846  if (om_->dropMetaData() == PoolOutputModule::DropDroppedPrior && !produced)
847  return;
848  std::vector<BranchID> const& parentIDs = iGetParents.parentage().parents();
849  for (auto const& parentID : parentIDs) {
850  branchesWithStoredHistory_.insert(parentID);
851  ProductProvenance const* info = iMapper->branchIDToProvenance(parentID);
852  if (info) {
853  if (om_->dropMetaData() == PoolOutputModule::DropNone ||
854  (iProducedIDs.end() != iProducedIDs.find(info->branchID()))) {
855  if (insertProductProvenance(*info, oToFill)) {
856  //haven't seen this one yet
857  insertAncestors(*info, iMapper, produced, iProducedIDs, oToFill);
858  }
859  }
860  }
861  }
862  }
863 
864  void RootOutputFile::fillBranches(BranchType const& branchType,
865  OccurrenceForOutput const& occurrence,
866  unsigned int ttreeIndex,
867  StoredProductProvenanceVector* productProvenanceVecPtr,
868  ProductProvenanceRetriever const* provRetriever) {
869  std::vector<std::unique_ptr<WrapperBase> > dummies;
870 
871  OutputItemList& items = om_->selectedOutputItemList()[ttreeIndex];
872 
873  bool const doProvenance =
874  (productProvenanceVecPtr != nullptr) && (om_->dropMetaData() != PoolOutputModule::DropAll);
875  bool const keepProvenanceForPrior = doProvenance && om_->dropMetaData() != PoolOutputModule::DropPrior;
876 
877  bool const fastCloning = (branchType == InEvent) && (whyNotFastClonable_ == FileBlock::CanFastClone);
878  std::set<StoredProductProvenance> provenanceToKeep;
879  //
880  //If we are dropping some of the meta data we need to know
881  // which BranchIDs were produced in this process because
882  // we may be storing meta data for only those products
883  // We do this only for event products.
884  std::set<BranchID> producedBranches;
885  if (doProvenance && branchType == InEvent && om_->dropMetaData() != PoolOutputModule::DropNone) {
887  for (auto bd : preg->allBranchDescriptions()) {
888  if (bd->produced() && bd->branchType() == InEvent) {
889  producedBranches.insert(bd->branchID());
890  }
891  }
892  }
893 
894  // Loop over EDProduct branches, possibly fill the provenance, and write the branch.
895  for (auto& item : items) {
896  BranchID const& id = item.branchDescription()->branchID();
897  branchesWithStoredHistory_.insert(id);
898 
899  bool produced = item.branchDescription()->produced();
900  bool getProd =
901  (produced || !fastCloning || treePointers_[ttreeIndex]->uncloned(item.branchDescription()->branchName()));
902  bool keepProvenance = doProvenance && (produced || keepProvenanceForPrior);
903 
904  WrapperBase const* product = nullptr;
905  ProductProvenance const* productProvenance = nullptr;
906  if (getProd) {
907  BasicHandle result = occurrence.getByToken(item.token(), item.branchDescription()->unwrappedTypeID());
908  product = result.wrapper();
909  if (result.isValid() && keepProvenance) {
910  productProvenance = result.provenance()->productProvenance();
911  }
912  if (product == nullptr) {
913  // No product with this ID is in the event.
914  // Add a null product.
915  TClass* cp = item.branchDescription()->wrappedType().getClass();
916  assert(cp != nullptr);
917  int offset = cp->GetBaseClassOffset(wrapperBaseTClass_);
918  void* p = cp->New();
919  std::unique_ptr<WrapperBase> dummy = getWrapperBasePtr(p, offset);
920  product = dummy.get();
921  dummies.emplace_back(std::move(dummy));
922  }
923  item.setProduct(product);
924  }
925  if (keepProvenance && productProvenance == nullptr) {
926  productProvenance = provRetriever->branchIDToProvenance(item.branchDescription()->originalBranchID());
927  }
928  if (productProvenance) {
929  insertProductProvenance(*productProvenance, provenanceToKeep);
930  insertAncestors(*productProvenance, provRetriever, produced, producedBranches, provenanceToKeep);
931  }
932  }
933 
934  if (doProvenance)
935  productProvenanceVecPtr->assign(provenanceToKeep.begin(), provenanceToKeep.end());
936  treePointers_[ttreeIndex]->fillTree();
937  if (doProvenance)
938  productProvenanceVecPtr->clear();
939  }
940 
942  std::set<edm::StoredProductProvenance>& oToInsert) {
943  StoredProductProvenance toStore;
944  toStore.branchID_ = iProv.branchID().id();
945  std::set<edm::StoredProductProvenance>::iterator itFound = oToInsert.find(toStore);
946  if (itFound == oToInsert.end()) {
947  //get the index to the ParentageID or insert a new value if not already present
948  std::pair<std::map<edm::ParentageID, unsigned int>::iterator, bool> i =
949  parentageIDs_.insert(std::make_pair(iProv.parentageID(), static_cast<unsigned int>(parentageIDs_.size())));
950  toStore.parentageIDIndex_ = i.first->second;
951  if (toStore.parentageIDIndex_ >= parentageIDs_.size()) {
953  << "RootOutputFile::insertProductProvenance\n"
954  << "The parentage ID index value " << toStore.parentageIDIndex_
955  << " is out of bounds. The maximum value is currently " << parentageIDs_.size() - 1 << ".\n"
956  << "This should never happen.\n"
957  << "Please report this to the framework developers.";
958  }
959 
960  oToInsert.insert(toStore);
961  return true;
962  }
963  return false;
964  }
965 } // namespace edm
PoolOutputModule::OutputItemList OutputItemList
std::string const & metaDataTreeName()
Definition: BranchType.cc:159
void beginInputFile(FileBlock const &fb, int remainingEvents)
LuminosityBlockNumber_t luminosityBlock() const
LuminosityBlockAuxiliary lumiAux_
std::string const & fid() const
Definition: FileID.h:19
static const TGPicture * info(bool iBackgroundIsBlack)
std::string const & fileIdentifierBranchName()
Definition: BranchType.cc:192
ParentageID const & parentageID() const
std::string const & mergeableRunProductMetadataBranchName()
Definition: BranchType.cc:201
std::vector< BranchIDList > BranchIDLists
Definition: BranchIDList.h:19
std::string const & processBlockHelperBranchName()
Definition: BranchType.cc:204
EventNumber_t event() const
void writeProcessHistoryRegistry()
bool checkEntriesInReadBranches(Long64_t expectedNumberOfEntries) const
void push_back(const EventAuxiliary &ea)
void setSubBranchBasketSizes(TTree *inputTree) const
void setBranchAliases(TTree *tree, SelectedProducts const &branches, std::string const &processName) const
LuminosityBlockNumber_t luminosityBlock() const
std::string const & fileName() const
Definition: FileBlock.h:130
RootOutputFile(PoolOutputModule *om, std::string const &fileName, std::string const &logicalFileName, std::vector< std::string > const &processesWithSelectedMergeableRunProducts, std::string const &overrideGUID)
BranchType const & branchType() const
void insertAncestors(ProductProvenance const &iGetParents, ProductProvenanceRetriever const *iMapper, bool produced, std::set< BranchID > const &producedBranches, std::set< StoredProductProvenance > &oToFill)
void fillParameterSetBranch(TTree *parameterSetsTree, int basketSize)
std::map< BranchKey, BranchDescription > ProductList
bool registerProcessHistory(ProcessHistory const &processHistory)
selection
main part
Definition: corrVsCorr.py:100
std::shared_ptr< TFile const > filePtr() const
std::map< ParentageID, unsigned int > parentageIDs_
void fillProcessHistoryBranch(TTree *metaDataTree, int basketSize, ProcessHistoryRegistry const &processHistoryRegistry)
std::string const & fullClassName() const
edm::propagate_const< TTree * > metaDataTree_
std::string const & processName() const
unsigned long nEventsInLumi_
ProcessHistoryRegistry processHistoryRegistry_
std::vector< BranchID > const & parents() const
Definition: Parentage.h:44
std::string const & productDependenciesBranchName()
Definition: BranchType.cc:165
bool int lh
Definition: SIMDVec.h:27
void writeLuminosityBlock(LuminosityBlockForOutput const &lb)
void setAutoFlush(Long64_t size)
void find(edm::Handle< EcalRecHitCollection > &hits, DetId thisDet, std::vector< EcalRecHitCollection::const_iterator > &hit, bool debug=false)
Definition: FindCaloHit.cc:19
BasicHandle getByToken(EDGetToken token, TypeID const &typeID) const
assert(be >=bs)
std::string const & fileFormatVersionBranchName()
Definition: BranchType.cc:189
BranchType
Definition: BranchType.h:11
std::set< BranchID > branchesWithStoredHistory_
std::vector< edm::propagate_const< RootOutputTree * > > treePointers_
std::vector< EventSelectionID > EventSelectionIDVector
std::string const & productDescriptionBranchName()
Definition: BranchType.cc:162
void sortVector_Run_Or_Lumi_Entries()
RunNumber_t run() const
Definition: RunAuxiliary.h:30
std::string const & branchIDListBranchName()
Definition: BranchType.cc:183
std::vector< std::pair< BranchDescription const *, EDGetToken > > SelectedProducts
constexpr element_type const * get() const
bool getMapped(key_type const &k, value_type &result) const
bool checkSplitLevelsAndBasketSizes(TTree *inputTree) const
void writeThinnedAssociationsHelper()
RootOutputTree eventTree_
std::vector< BranchListIndex > BranchListIndexes
void addBranch(std::string const &branchName, std::string const &className, void const *&pProd, int splitLevel, int basketSize, bool produced)
edm::propagate_const< PoolOutputModule * > om_
MD5Result digest()
Definition: Digest.cc:171
TTree * tree() const
Definition: FileBlock.h:117
std::string const & branchName() const
IndexIntoFile::EntryNumber_t eventEntryNumber_
LuminosityBlockAuxiliary const * pLumiAux_
std::string const & productInstanceName() const
unsigned int id() const
Definition: BranchID.h:21
edm::propagate_const< std::shared_ptr< TFile > > filePtr_
Definition: GenABIO.cc:168
std::string const & parameterSetsTreeName()
Definition: BranchType.cc:216
LuminosityBlockNumber_t luminosityBlock() const
void writeOne(EventForOutput const &e)
std::vector< OutputItemList > const & selectedOutputItemList() const
bool isRealData() const
std::string const & BranchTypeToProductProvenanceBranchName(BranchType const &BranchType)
Definition: BranchType.cc:139
int getFileFormatVersion()
ProcessHistoryID const & reducedProcessHistoryID(ProcessHistoryID const &fullID) const
std::string logicalFile_
void fillBranches(BranchType const &branchType, OccurrenceForOutput const &occurrence, unsigned int ttreeIndex, StoredProductProvenanceVector *productProvenanceVecPtr=nullptr, ProductProvenanceRetriever const *provRetriever=nullptr)
ProductProvenance const * branchIDToProvenance(BranchID const &bid) const
std::string const & parentageBranchName()
Definition: BranchType.cc:156
void setProcessHistoryID(ProcessHistoryID const &phid)
std::string const & processName() const
Definition: value.py:1
std::string const & eventToProcessBlockIndexesBranchName()
Definition: BranchType.cc:214
CompactEventAuxiliaryVector compactEventAuxiliary_
void addEntry(ProcessHistoryID const &processHistoryID, RunNumber_t run, LuminosityBlockNumber_t lumi, EventNumber_t event, EntryNumber_t entry)
edm::propagate_const< TClass * > wrapperBaseTClass_
static TTree * makeTTree(TFile *filePtr, std::string const &name, int splitLevel)
ProcessHistoryID const & processHistoryID() const
RunNumber_t run() const
EventSelectionIDVector const * pEventSelectionIDs_
ProcessHistoryID const & processHistoryID() const
std::string createGlobalIdentifier(bool binary=false)
Log< level::Info, false > LogInfo
edm::propagate_const< TTree * > parameterSetsTree_
StoredMergeableRunProductMetadata storedMergeableRunProductMetadata_
void writeStoredMergeableRunProductMetadata()
BranchID const & branchID() const
std::string const & eventSelectionsBranchName()
Definition: BranchType.cc:210
void sort_all(RandomAccessSequence &s)
wrappers for std::sort
Definition: Algorithms.h:92
edm::propagate_const< StoredProductProvenanceVector * > pEventEntryInfoVector_
RootOutputTree runTree_
std::string const & parentageTreeName()
Definition: BranchType.cc:154
double b
Definition: hdecay.h:120
LuminosityBlockID const & id() const
void addContext(std::string const &context)
Definition: Exception.cc:169
LuminosityBlockAuxiliary const & luminosityBlockAuxiliary() const
edm::propagate_const< TTree * > parentageTree_
bool insertProductProvenance(const ProductProvenance &, std::set< StoredProductProvenance > &oToInsert)
void setException(std::exception_ptr e)
void addAuxiliary(std::string const &branchName, T const *&pAux, int bufSize, bool allowCloning=true)
std::vector< StoredProductProvenance > StoredProductProvenanceVector
void respondToCloseInputFile(FileBlock const &fb)
IndexIntoFile indexIntoFile_
IndexIntoFile::EntryNumber_t runEntryNumber_
bool isValidGlobalIdentifier(std::string const &guid)
virtual ProcessHistory const & processHistory() const
static void writeTTree(TTree *tree)
TTree const * tree() const
std::string moduleName(StableProvenance const &provenance, ProcessHistory const &history)
Definition: Provenance.cc:27
EventAuxiliary const * pEventAux_
StoredProductProvenanceVector const * pEventEntryInfoVector() const
HLT enums.
RunAuxiliary runAux_
std::exception_ptr getException()
void writeProductDescriptionRegistry()
int eventAutoFlushSize() const
void writeProcessBlock(ProcessBlockForOutput const &)
std::unique_ptr< WrapperBase > getWrapperBasePtr(void *p, int offset)
std::string const & branchListIndexesBranchName()
Definition: BranchType.cc:212
void setProcessHistoryID(ProcessHistoryID const &phid)
Definition: RunAuxiliary.h:25
Definition: tree.py:1
BranchListIndexes const * pBranchListIndexes_
Log< level::Warning, false > LogWarning
void maybeFastCloneTree(bool canFastClone, bool canFastCloneAux, TTree *tree, std::string const &option)
int whyNotFastClonable() const
Definition: FileBlock.h:128
std::string const & indexIntoFileBranchName()
Definition: BranchType.cc:198
std::string const & BranchTypeToAuxiliaryBranchName(BranchType const &branchType)
Definition: BranchType.cc:116
std::vector< edm::propagate_const< std::unique_ptr< RootOutputTree > > > processBlockTrees_
branchNames
Definition: haddnano.py:54
static ParentageRegistry * instance()
bool shouldWeCloseFile() const
IndexIntoFile::EntryNumber_t lumiEntryNumber_
RootOutputTree lumiTree_
void optimizeBaskets(ULong64_t size)
void writeRun(RunForOutput const &r)
std::string const & moduleLabel() const
std::string toString() const
Definition: Digest.cc:95
RunAuxiliary const * pRunAux_
bool checkIfFastClonable(TTree *inputTree) const
def move(src, dest)
Definition: eostools.py:511
JobReport::Token reportToken_
RunNumber_t run() const
std::set< std::string > const & branchAliases() const
EventToProcessBlockIndexes const * pEventToProcessBlockIndexes_
std::string const & thinnedAssociationsHelperBranchName()
Definition: BranchType.cc:186
#define LogDebug(id)
std::string match(BranchDescription const &a, BranchDescription const &b, std::string const &fileName)
Parentage const & parentage() const