CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
RootOutputFile.cc
Go to the documentation of this file.
1 
3 
5 
43 
44 #include "TTree.h"
45 #include "TFile.h"
46 #include "TClass.h"
47 #include "Rtypes.h"
48 #include "RVersion.h"
49 
50 #include "Compression.h"
51 
52 #include <algorithm>
53 #include <iomanip>
54 #include <sstream>
55 
56 namespace edm {
57 
58  namespace {
59  bool sorterForJobReportHash(BranchDescription const* lh, BranchDescription const* rh) {
60  return lh->fullClassName() < rh->fullClassName() ? true
61  : lh->fullClassName() > rh->fullClassName() ? false
62  : lh->moduleLabel() < rh->moduleLabel() ? true
63  : lh->moduleLabel() > rh->moduleLabel() ? false
64  : lh->productInstanceName() < rh->productInstanceName() ? true
65  : lh->productInstanceName() > rh->productInstanceName() ? false
66  : lh->processName() < rh->processName() ? true
67  : false;
68  }
69 
70  TFile* openTFile(char const* name, int compressionLevel) {
71  TFile* file = TFile::Open(name, "recreate", "", compressionLevel);
72  std::exception_ptr e = edm::threadLocalException::getException();
73  if (e != std::exception_ptr()) {
74  edm::threadLocalException::setException(std::exception_ptr());
75  std::rethrow_exception(e);
76  }
77  return file;
78  }
79  } // namespace
80 
82  std::string const& fileName,
83  std::string const& logicalFileName,
84  std::vector<std::string> const& processesWithSelectedMergeableRunProducts)
85  : file_(fileName),
86  logicalFile_(logicalFileName),
87  reportToken_(0),
88  om_(om),
89  whyNotFastClonable_(om_->whyNotFastClonable()),
90  canFastCloneAux_(false),
91  filePtr_(openTFile(file_.c_str(), om_->compressionLevel())),
92  fid_(),
93  eventEntryNumber_(0LL),
94  lumiEntryNumber_(0LL),
95  runEntryNumber_(0LL),
96  indexIntoFile_(),
97  storedMergeableRunProductMetadata_(processesWithSelectedMergeableRunProducts),
98  nEventsInLumi_(0),
99  metaDataTree_(nullptr),
100  parameterSetsTree_(nullptr),
101  parentageTree_(nullptr),
102  lumiAux_(),
103  runAux_(),
104  pEventAux_(nullptr),
105  pLumiAux_(&lumiAux_),
106  pRunAux_(&runAux_),
107  eventEntryInfoVector_(),
108  pEventEntryInfoVector_(&eventEntryInfoVector_),
109  pBranchListIndexes_(nullptr),
110  pEventSelectionIDs_(nullptr),
111  eventTree_(filePtr(), InEvent, om_->splitLevel(), om_->treeMaxVirtualSize()),
112  lumiTree_(filePtr(), InLumi, om_->splitLevel(), om_->treeMaxVirtualSize()),
113  runTree_(filePtr(), InRun, om_->splitLevel(), om_->treeMaxVirtualSize()),
114  dataTypeReported_(false),
115  processHistoryRegistry_(),
116  parentageIDs_(),
117  branchesWithStoredHistory_(),
118  wrapperBaseTClass_(TClass::GetClass("edm::WrapperBase")) {
119  std::vector<std::string> const& processesWithProcessBlockProducts =
120  om_->outputProcessBlockHelper().processesWithProcessBlockProducts();
121  for (auto const& processName : processesWithProcessBlockProducts) {
122  processBlockTrees_.emplace_back(std::make_unique<RootOutputTree>(
123  filePtr(), InProcess, om_->splitLevel(), om_->treeMaxVirtualSize(), processName));
124  }
125 
126  if (om_->compressionAlgorithm() == std::string("ZLIB")) {
127  filePtr_->SetCompressionAlgorithm(ROOT::kZLIB);
128  } else if (om_->compressionAlgorithm() == std::string("LZMA")) {
129  filePtr_->SetCompressionAlgorithm(ROOT::kLZMA);
130  } else if (om_->compressionAlgorithm() == std::string("ZSTD")) {
131  filePtr_->SetCompressionAlgorithm(ROOT::kZSTD);
132  } else if (om_->compressionAlgorithm() == std::string("LZ4")) {
133  filePtr_->SetCompressionAlgorithm(ROOT::kLZ4);
134  } else {
136  << "PoolOutputModule configured with unknown compression algorithm '" << om_->compressionAlgorithm() << "'\n"
137  << "Allowed compression algorithms are ZLIB, LZMA, LZ4, and ZSTD\n";
138  }
139  if (-1 != om->eventAutoFlushSize()) {
141  }
142  if (om_->compactEventAuxiliary()) {
144  BranchTypeToAuxiliaryBranchName(InEvent), pEventAux_, om_->auxItems()[InEvent].basketSize_, false);
145  eventTree_.tree()->SetBranchStatus(BranchTypeToAuxiliaryBranchName(InEvent).c_str(),
146  false); // see writeEventAuxiliary
147  } else {
149  BranchTypeToAuxiliaryBranchName(InEvent), pEventAux_, om_->auxItems()[InEvent].basketSize_);
150  }
151 
154  om_->auxItems()[InEvent].basketSize_);
156  poolNames::eventSelectionsBranchName(), pEventSelectionIDs_, om_->auxItems()[InEvent].basketSize_, false);
159 
160  if (om_->outputProcessBlockHelper().productsFromInputKept()) {
163  om_->auxItems()[InEvent].basketSize_);
164  }
165 
167  BranchTypeToAuxiliaryBranchName(InLumi), pLumiAux_, om_->auxItems()[InLumi].basketSize_);
168 
170  BranchTypeToAuxiliaryBranchName(InRun), pRunAux_, om_->auxItems()[InRun].basketSize_);
171 
172  treePointers_.emplace_back(&eventTree_);
173  treePointers_.emplace_back(&lumiTree_);
174  treePointers_.emplace_back(&runTree_);
175  for (auto& processBlockTree : processBlockTrees_) {
176  treePointers_.emplace_back(processBlockTree.get());
177  }
178 
179  for (unsigned int i = 0; i < treePointers_.size(); ++i) {
180  RootOutputTree* theTree = treePointers_[i];
181  for (auto& item : om_->selectedOutputItemList()[i]) {
182  item.setProduct(nullptr);
183  BranchDescription const& desc = *item.branchDescription();
184  theTree->addBranch(desc.branchName(),
185  desc.wrappedName(),
186  item.product(),
187  item.splitLevel(),
188  item.basketSize(),
189  item.branchDescription()->produced());
190  //make sure we always store product registry info for all branches we create
191  branchesWithStoredHistory_.insert(item.branchID());
192  }
193  }
194  // Don't split metadata tree or event description tree
198 
200 
201  // For the Job Report, get a vector of branch names in the "Events" tree.
202  // Also create a hash of all the branch names in the "Events" tree
203  // in a deterministic order, except use the full class name instead of the friendly class name.
204  // To avoid extra string copies, we create a vector of pointers into the product registry,
205  // and use a custom comparison operator for sorting.
206  std::vector<std::string> branchNames;
207  std::vector<BranchDescription const*> branches;
208  branchNames.reserve(om_->selectedOutputItemList()[InEvent].size());
209  branches.reserve(om->selectedOutputItemList()[InEvent].size());
210  for (auto const& item : om_->selectedOutputItemList()[InEvent]) {
211  branchNames.push_back(item.branchDescription()->branchName());
212  branches.push_back(item.branchDescription());
213  }
214  // Now sort the branches for the hash.
215  sort_all(branches, sorterForJobReportHash);
216  // Now, make a concatenated string.
217  std::ostringstream oss;
218  char const underscore = '_';
219  for (auto const& branch : branches) {
220  BranchDescription const& bd = *branch;
221  oss << bd.fullClassName() << underscore << bd.moduleLabel() << underscore << bd.productInstanceName()
222  << underscore << bd.processName() << underscore;
223  }
224  std::string stringrep = oss.str();
225  cms::Digest md5alg(stringrep);
226 
227  // Register the output file with the JobReport service
228  // and get back the token for it.
229  std::string moduleName = "PoolOutputModule";
230  Service<JobReport> reportSvc;
231  reportToken_ = reportSvc->outputFileOpened(file_,
232  logicalFile_, // PFN and LFN
233  om_->catalog(), // catalog
234  moduleName, // module class name
235  om_->moduleLabel(), // module label
236  fid_.fid(), // file id (guid)
237  std::string(), // data type (not yet known, so string is empty).
238  md5alg.digest().toString(), // branch hash
239  branchNames); // branch names being written
240  }
241 
242  namespace {
243  void maybeIssueWarning(int whyNotFastClonable, std::string const& ifileName, std::string const& ofileName) {
244  // No message if fast cloning was deliberately disabled, or if there are no events to copy anyway.
247  return;
248  }
249 
250  // There will be a message stating every reason that fast cloning was not possible.
251  // If at one or more of the reasons was because of something the user explicitly specified (e.g. event selection, skipping events),
252  // or if the input file was in an old format, the message will be informational. Otherwise, the message will be a warning.
253  bool isWarning = true;
254  std::ostringstream message;
255  message << "Fast copying of file " << ifileName << " to file " << ofileName << " is disabled because:\n";
256  if ((whyNotFastClonable & FileBlock::HasSecondaryFileSequence) != 0) {
257  message << "a SecondaryFileSequence was specified.\n";
258  whyNotFastClonable &= ~(FileBlock::HasSecondaryFileSequence);
259  isWarning = false;
260  }
261  if ((whyNotFastClonable & FileBlock::FileTooOld) != 0) {
262  message << "the input file is in an old format.\n";
263  whyNotFastClonable &= ~(FileBlock::FileTooOld);
264  isWarning = false;
265  }
266  if ((whyNotFastClonable & FileBlock::EventsToBeSorted) != 0) {
267  message << "events need to be sorted.\n";
268  whyNotFastClonable &= ~(FileBlock::EventsToBeSorted);
269  }
270  if ((whyNotFastClonable & FileBlock::RunOrLumiNotContiguous) != 0) {
271  message << "a run or a lumi is not contiguous in the input file.\n";
272  whyNotFastClonable &= ~(FileBlock::RunOrLumiNotContiguous);
273  }
274  if ((whyNotFastClonable & FileBlock::EventsOrLumisSelectedByID) != 0) {
275  message << "events or lumis were selected or skipped by ID.\n";
276  whyNotFastClonable &= ~(FileBlock::EventsOrLumisSelectedByID);
277  isWarning = false;
278  }
279  if ((whyNotFastClonable & FileBlock::InitialEventsSkipped) != 0) {
280  message << "initial events, lumis or runs were skipped.\n";
281  whyNotFastClonable &= ~(FileBlock::InitialEventsSkipped);
282  isWarning = false;
283  }
284  if ((whyNotFastClonable & FileBlock::DuplicateEventsRemoved) != 0) {
285  message << "some events were skipped because of duplicate checking.\n";
286  whyNotFastClonable &= ~(FileBlock::DuplicateEventsRemoved);
287  }
288  if ((whyNotFastClonable & FileBlock::MaxEventsTooSmall) != 0) {
289  message << "some events were not copied because of maxEvents limit.\n";
290  whyNotFastClonable &= ~(FileBlock::MaxEventsTooSmall);
291  isWarning = false;
292  }
293  if ((whyNotFastClonable & FileBlock::MaxLumisTooSmall) != 0) {
294  message << "some events were not copied because of maxLumis limit.\n";
295  whyNotFastClonable &= ~(FileBlock::MaxLumisTooSmall);
296  isWarning = false;
297  }
298  if ((whyNotFastClonable & FileBlock::ParallelProcesses) != 0) {
299  message << "parallel processing was specified.\n";
300  whyNotFastClonable &= ~(FileBlock::ParallelProcesses);
301  isWarning = false;
302  }
303  if ((whyNotFastClonable & FileBlock::EventSelectionUsed) != 0) {
304  message << "an EventSelector was specified.\n";
305  whyNotFastClonable &= ~(FileBlock::EventSelectionUsed);
306  isWarning = false;
307  }
308  if ((whyNotFastClonable & FileBlock::OutputMaxEventsTooSmall) != 0) {
309  message << "some events were not copied because of maxEvents output limit.\n";
310  whyNotFastClonable &= ~(FileBlock::OutputMaxEventsTooSmall);
311  isWarning = false;
312  }
313  if ((whyNotFastClonable & FileBlock::SplitLevelMismatch) != 0) {
314  message << "the split level or basket size of a branch or branches was modified.\n";
315  whyNotFastClonable &= ~(FileBlock::SplitLevelMismatch);
316  }
317  if ((whyNotFastClonable & FileBlock::BranchMismatch) != 0) {
318  message << "The format of a data product has changed.\n";
319  whyNotFastClonable &= ~(FileBlock::BranchMismatch);
320  }
321  assert(whyNotFastClonable == FileBlock::CanFastClone);
322  if (isWarning) {
323  LogWarning("FastCloningDisabled") << message.str();
324  } else {
325  LogInfo("FastCloningDisabled") << message.str();
326  }
327  }
328  } // namespace
329 
330  void RootOutputFile::beginInputFile(FileBlock const& fb, int remainingEvents) {
331  // Reset per input file information
332  whyNotFastClonable_ = om_->whyNotFastClonable();
333  canFastCloneAux_ = false;
334 
335  if (fb.tree() != nullptr) {
337 
338  if (remainingEvents >= 0 && remainingEvents < fb.tree()->GetEntries()) {
340  }
341 
343  if (!match) {
344  if (om_->overrideInputFileSplitLevels()) {
345  // We may be fast copying. We must disable fast copying if the split levels
346  // or basket sizes do not match.
348  } else {
349  // We are using the input split levels and basket sizes from the first input file
350  // for copied output branches. In this case, we throw an exception if any branches
351  // have different split levels or basket sizes in a subsequent input file.
352  // If the mismatch is in the first file, there is a bug somewhere, so we assert.
353  assert(om_->inputFileCount() > 1);
354  throw Exception(errors::MismatchedInputFiles, "RootOutputFile::beginInputFile()")
355  << "Merge failure because input file " << file_ << " has different ROOT split levels or basket sizes\n"
356  << "than previous files. To allow merging in spite of this, use the configuration parameter\n"
357  << "overrideInputFileSplitLevels=cms.untracked.bool(True)\n"
358  << "in every PoolOutputModule.\n";
359  }
360  }
361 
362  // Since this check can be time consuming, we do it only if we would otherwise fast clone.
364  if (!eventTree_.checkIfFastClonable(fb.tree())) {
366  }
367  }
368 
369  // reasons for whyNotFastClonable that are also inconsistent with a merge job
370  constexpr auto setSubBranchBasketConditions =
371  FileBlock::EventsOrLumisSelectedByID | FileBlock::InitialEventsSkipped | FileBlock::MaxEventsTooSmall |
372  FileBlock::MaxLumisTooSmall | FileBlock::EventSelectionUsed | FileBlock::OutputMaxEventsTooSmall |
373  FileBlock::SplitLevelMismatch | FileBlock::BranchMismatch;
374 
375  if (om_->inputFileCount() == 1) {
376  if (om_->mergeJob()) {
377  // for merge jobs always forward the compression mode
378  auto infile = fb.tree()->GetCurrentFile();
379  if (infile != nullptr) {
380  filePtr_->SetCompressionSettings(infile->GetCompressionSettings());
381  }
382  }
383 
384  // if we aren't fast cloning, and the reason why is consistent with a
385  // merge job or is only because of parallel processes, then forward all
386  // the sub-branch basket sizes
388  ((om_->mergeJob() && (whyNotFastClonable_ & setSubBranchBasketConditions) == 0) ||
391  }
392  }
393 
394  // We now check if we can fast copy the auxiliary branches.
395  // We can do so only if we can otherwise fast copy,
396  // the input file has the current format (these branches are in the Events Tree),
397  // there are no newly dropped or produced products,
398  // no metadata has been dropped,
399  // ID's have not been modified,
400  // and the branch list indexes do not need modification.
401 
402  // Note: Fast copy of the EventProductProvenance branch is unsafe
403  // unless we can enforce that the parentage information for a fully copied
404  // output file will be the same as for the input file, with nothing dropped.
405  // This has never been enforced, and, withthe EDAlias feature, it may no longer
406  // work by accident.
407  // So, for now, we do not enable fast cloning of the non-product branches.
408  /*
409  Service<ConstProductRegistry> reg;
410  canFastCloneAux_ = (whyNotFastClonable_ == FileBlock::CanFastClone) &&
411  fb.fileFormatVersion().noMetaDataTrees() &&
412  !om_->hasNewlyDroppedBranch()[InEvent] &&
413  !fb.hasNewlyDroppedBranch()[InEvent] &&
414  om_->dropMetaData() == PoolOutputModule::DropNone &&
415  !reg->anyProductProduced() &&
416  !fb.modifiedIDs() &&
417  fb.branchListIndexesUnchanged();
418  */
419 
420  // Report the fast copying status.
421  Service<JobReport> reportSvc;
422  reportSvc->reportFastCopyingStatus(reportToken_, fb.fileName(), whyNotFastClonable_ == FileBlock::CanFastClone);
423  } else {
425  }
426 
429 
430  // Possibly issue warning or informational message if we haven't fast cloned.
431  if (fb.tree() != nullptr && whyNotFastClonable_ != FileBlock::CanFastClone) {
432  maybeIssueWarning(whyNotFastClonable_, fb.fileName(), file_);
433  }
434 
435  if (om_->compactEventAuxiliary() &&
436  (whyNotFastClonable_ & (FileBlock::EventsOrLumisSelectedByID | FileBlock::InitialEventsSkipped |
438  long long int reserve = remainingEvents;
439  if (fb.tree() != nullptr) {
440  reserve = reserve > 0 ? std::min(fb.tree()->GetEntries(), reserve) : fb.tree()->GetEntries();
441  }
442  if (reserve > 0) {
444  }
445  }
446  }
447 
449  // We can't do setEntries() on the event tree if the EventAuxiliary branch is empty & disabled
450  if (not om_->compactEventAuxiliary()) {
452  }
455  }
456 
458  unsigned int const oneK = 1024;
459  Long64_t size = filePtr_->GetSize() / oneK;
460  return (size >= om_->maxFileSize());
461  }
462 
464  // Auxiliary branch
465  pEventAux_ = &e.eventAuxiliary();
466 
467  // Because getting the data may cause an exception to be thrown we want to do that
468  // first before writing anything to the file about this event
469  // NOTE: pEventAux_, pBranchListIndexes_, pEventSelectionIDs_, and pEventEntryInfoVector_
470  // must be set before calling fillBranches since they get written out in that routine.
474 
475  // Note: The EventSelectionIDVector should have a one to one correspondence with the processes in the process history.
476  // Therefore, a new entry should be added if and only if the current process has been added to the process history,
477  // which is done if and only if there is a produced product.
480  if (reg->anyProductProduced() || !om_->wantAllEvents()) {
481  esids.push_back(om_->selectorConfig());
482  }
483  pEventSelectionIDs_ = &esids;
485  assert(provRetriever);
486  unsigned int ttreeIndex = InEvent;
487  fillBranches(InEvent, e, ttreeIndex, pEventEntryInfoVector_, provRetriever);
488 
489  // Add the dataType to the job report if it hasn't already been done
490  if (!dataTypeReported_) {
491  Service<JobReport> reportSvc;
492  std::string dataType("MC");
493  if (pEventAux_->isRealData())
494  dataType = "Data";
495  reportSvc->reportDataType(reportToken_, dataType);
496  dataTypeReported_ = true;
497  }
498 
499  // Store the process history.
501  // Store the reduced ID in the IndexIntoFile
503  // Add event to index
507 
508  if (om_->compactEventAuxiliary()) {
510  }
511 
512  // Report event written
513  Service<JobReport> reportSvc;
514  reportSvc->eventWrittenToFile(reportToken_, e.id().run(), e.id().event());
515  ++nEventsInLumi_;
516  }
517 
519  // Auxiliary branch
520  // NOTE: lumiAux_ must be filled before calling fillBranches since it gets written out in that routine.
522  // Use the updated process historyID
524  // Store the process history.
526  // Store the reduced ID in the IndexIntoFile
528  // Add lumi to index.
531  unsigned int ttreeIndex = InLumi;
532  fillBranches(InLumi, lb, ttreeIndex);
533  lumiTree_.optimizeBaskets(10ULL * 1024 * 1024);
534 
535  Service<JobReport> reportSvc;
536  reportSvc->reportLumiSection(reportToken_, lb.id().run(), lb.id().luminosityBlock(), nEventsInLumi_);
537  nEventsInLumi_ = 0;
538  }
539 
541  // Auxiliary branch
542  // NOTE: runAux_ must be filled before calling fillBranches since it gets written out in that routine.
543  runAux_ = r.runAuxiliary();
544  // Use the updated process historyID
546  // Store the process history.
548  // Store the reduced ID in the IndexIntoFile
550  // Add run to index.
551  indexIntoFile_.addEntry(reducedPHID, runAux_.run(), 0U, 0U, runEntryNumber_);
553  ++runEntryNumber_;
554  unsigned int ttreeIndex = InRun;
555  fillBranches(InRun, r, ttreeIndex);
556  runTree_.optimizeBaskets(10ULL * 1024 * 1024);
557 
558  Service<JobReport> reportSvc;
559  reportSvc->reportRunNumber(reportToken_, r.run());
560  }
561 
563  std::string const& processName = pb.processName();
564  std::vector<std::string> const& processesWithProcessBlockProducts =
565  om_->outputProcessBlockHelper().processesWithProcessBlockProducts();
566  std::vector<std::string>::const_iterator it =
567  std::find(processesWithProcessBlockProducts.cbegin(), processesWithProcessBlockProducts.cend(), processName);
568  if (it == processesWithProcessBlockProducts.cend()) {
569  return;
570  }
571  unsigned int ttreeIndex = InProcess + std::distance(processesWithProcessBlockProducts.cbegin(), it);
572  fillBranches(InProcess, pb, ttreeIndex);
573  treePointers_[ttreeIndex]->optimizeBaskets(10ULL * 1024 * 1024);
574  }
575 
577  Parentage const* desc(nullptr);
578 
579  if (!parentageTree_->Branch(poolNames::parentageBranchName().c_str(), &desc, om_->basketSize(), 0))
580  throw Exception(errors::FatalRootError) << "Failed to create a branch for Parentages in the output file";
581 
583 
584  std::vector<ParentageID> orderedIDs(parentageIDs_.size());
585  for (auto const& parentageID : parentageIDs_) {
586  orderedIDs[parentageID.second] = parentageID.first;
587  }
588  //now put them into the TTree in the correct order
589  for (auto const& orderedID : orderedIDs) {
590  desc = ptReg.getMapped(orderedID);
591  //NOTE: some old format files have missing Parentage info
592  // so a null value of desc can't be fatal.
593  // Root will default construct an object in that case.
594  parentageTree_->Fill();
595  }
596  }
597 
599  FileFormatVersion fileFormatVersion(getFileFormatVersion());
600  FileFormatVersion* pFileFmtVsn = &fileFormatVersion;
601  TBranch* b =
602  metaDataTree_->Branch(poolNames::fileFormatVersionBranchName().c_str(), &pFileFmtVsn, om_->basketSize(), 0);
603  assert(b);
604  b->Fill();
605  }
606 
608  FileID* fidPtr = &fid_;
609  TBranch* b = metaDataTree_->Branch(poolNames::fileIdentifierBranchName().c_str(), &fidPtr, om_->basketSize(), 0);
610  assert(b);
611  b->Fill();
612  }
613 
617  ex << "The number of entries in at least one output TBranch whose entries\n"
618  "were copied from the input does not match the number of events\n"
619  "recorded in IndexIntoFile. This might (or might not) indicate a\n"
620  "problem related to fast copy.";
621  ex.addContext("Calling RootOutputFile::writeIndexIntoFile");
622  throw ex;
623  }
625  IndexIntoFile* iifPtr = &indexIntoFile_;
626  TBranch* b = metaDataTree_->Branch(poolNames::indexIntoFileBranchName().c_str(), &iifPtr, om_->basketSize(), 0);
627  assert(b);
628  b->Fill();
629  }
630 
634  TBranch* b =
635  metaDataTree_->Branch(poolNames::mergeableRunProductMetadataBranchName().c_str(), &ptr, om_->basketSize(), 0);
636  assert(b);
637  b->Fill();
638  }
639 
642  }
643 
645  BranchIDLists const* p = om_->branchIDLists();
646  TBranch* b = metaDataTree_->Branch(poolNames::branchIDListBranchName().c_str(), &p, om_->basketSize(), 0);
647  assert(b);
648  b->Fill();
649  }
650 
652  ThinnedAssociationsHelper const* p = om_->thinnedAssociationsHelper();
653  TBranch* b =
654  metaDataTree_->Branch(poolNames::thinnedAssociationsHelperBranchName().c_str(), &p, om_->basketSize(), 0);
655  assert(b);
656  b->Fill();
657  }
658 
661  }
662 
664  // Make a local copy of the ProductRegistry, removing any transient or pruned products.
665  using ProductList = ProductRegistry::ProductList;
667  ProductRegistry pReg(reg->productList());
668  ProductList& pList = const_cast<ProductList&>(pReg.productList());
669  for (auto const& prod : pList) {
670  if (prod.second.branchID() != prod.second.originalBranchID()) {
671  if (branchesWithStoredHistory_.find(prod.second.branchID()) != branchesWithStoredHistory_.end()) {
672  branchesWithStoredHistory_.insert(prod.second.originalBranchID());
673  }
674  }
675  }
676  std::set<BranchID>::iterator end = branchesWithStoredHistory_.end();
677  for (ProductList::iterator it = pList.begin(); it != pList.end();) {
678  if (branchesWithStoredHistory_.find(it->second.branchID()) == end) {
679  // avoid invalidating iterator on deletion
680  ProductList::iterator itCopy = it;
681  ++it;
682  pList.erase(itCopy);
683 
684  } else {
685  ++it;
686  }
687  }
688 
689  ProductRegistry* ppReg = &pReg;
690  TBranch* b = metaDataTree_->Branch(poolNames::productDescriptionBranchName().c_str(), &ppReg, om_->basketSize(), 0);
691  assert(b);
692  b->Fill();
693  }
695  BranchChildren& pDeps = const_cast<BranchChildren&>(om_->branchChildren());
696  BranchChildren* ppDeps = &pDeps;
697  TBranch* b =
698  metaDataTree_->Branch(poolNames::productDependenciesBranchName().c_str(), &ppDeps, om_->basketSize(), 0);
699  assert(b);
700  b->Fill();
701  }
702 
703  // For duplicate removal and to determine if fast cloning is possible, the input
704  // module by default reads the entire EventAuxiliary branch when it opens the
705  // input files. If EventAuxiliary is written in the usual way, this results
706  // in many small reads scattered throughout the file, which can have very poor
707  // performance characteristics on some filesystems. As a workaround, we save
708  // EventAuxiliary and write it at the end of the file.
709 
711  constexpr std::size_t maxEaBasketSize = 4 * 1024 * 1024;
712 
713  if (om_->compactEventAuxiliary()) {
714  auto tree = eventTree_.tree();
715  auto const& bname = BranchTypeToAuxiliaryBranchName(InEvent).c_str();
716 
717  tree->SetBranchStatus(bname, true);
718  auto basketsize =
719  std::min(maxEaBasketSize,
720  compactEventAuxiliary_.size() * (sizeof(EventAuxiliary) + 26)); // 26 is an empirical fudge factor
721  tree->SetBasketSize(bname, basketsize);
722  auto b = tree->GetBranch(bname);
723 
724  assert(b);
725 
726  LogDebug("writeEventAuxiliary") << "EventAuxiliary ratio extras/GUIDs/all = "
729 
730  for (auto const& aux : compactEventAuxiliary_) {
731  const auto ea = aux.eventAuxiliary();
732  pEventAux_ = &ea;
733  // Fill EventAuxiliary branch
734  b->Fill();
735  }
737  }
738  }
739 
741  if (!om_->outputProcessBlockHelper().processesWithProcessBlockProducts().empty()) {
742  StoredProcessBlockHelper storedProcessBlockHelper(
743  om_->outputProcessBlockHelper().processesWithProcessBlockProducts());
744  om_->outputProcessBlockHelper().fillCacheIndices(storedProcessBlockHelper);
745 
746  StoredProcessBlockHelper* pStoredProcessBlockHelper = &storedProcessBlockHelper;
747  TBranch* b = metaDataTree_->Branch(
748  poolNames::processBlockHelperBranchName().c_str(), &pStoredProcessBlockHelper, om_->basketSize(), 0);
749  assert(b);
750  b->Fill();
751  }
752  }
753 
755  metaDataTree_->SetEntries(-1);
758 
760 
761  // Create branch aliases for all the branches in the
762  // events/lumis/runs/processblock trees. The loop is over
763  // all types of data products.
764  for (unsigned int i = 0; i < treePointers_.size(); ++i) {
766  BranchType branchType = InProcess;
767  if (i < InProcess) {
768  branchType = static_cast<BranchType>(i);
769  } else {
770  processName = om_->outputProcessBlockHelper().processesWithProcessBlockProducts()[i - InProcess];
771  }
772  setBranchAliases(treePointers_[i]->tree(), om_->keptProducts()[branchType], processName);
773  treePointers_[i]->writeTree();
774  }
775 
776  // close the file -- mfp
777  // Just to play it safe, zero all pointers to objects in the TFile to be closed.
778  metaDataTree_ = parentageTree_ = nullptr;
779  for (auto& treePointer : treePointers_) {
780  treePointer->close();
781  treePointer = nullptr;
782  }
783  filePtr_->Close();
784  filePtr_ = nullptr; // propagate_const<T> has no reset() function
785 
786  // report that file has been closed
787  Service<JobReport> reportSvc;
788  reportSvc->outputFileClosed(reportToken_);
789  }
790 
792  SelectedProducts const& branches,
793  std::string const& processName) const {
794  if (tree && tree->GetNbranches() != 0) {
795  for (auto const& selection : branches) {
796  BranchDescription const& pd = *selection.first;
797  if (pd.branchType() == InProcess && processName != pd.processName()) {
798  continue;
799  }
800  std::string const& full = pd.branchName() + "obj";
801  if (pd.branchAliases().empty()) {
802  std::string const& alias = (pd.productInstanceName().empty() ? pd.moduleLabel() : pd.productInstanceName());
803  tree->SetAlias(alias.c_str(), full.c_str());
804  } else {
805  for (auto const& alias : pd.branchAliases()) {
806  tree->SetAlias(alias.c_str(), full.c_str());
807  }
808  }
809  }
810  }
811  }
812 
814  ProductProvenanceRetriever const* iMapper,
815  bool produced,
816  std::set<BranchID> const& iProducedIDs,
817  std::set<StoredProductProvenance>& oToFill) {
818  assert(om_->dropMetaData() != PoolOutputModule::DropAll);
819  assert(produced || om_->dropMetaData() != PoolOutputModule::DropPrior);
820  if (om_->dropMetaData() == PoolOutputModule::DropDroppedPrior && !produced)
821  return;
822  std::vector<BranchID> const& parentIDs = iGetParents.parentage().parents();
823  for (auto const& parentID : parentIDs) {
824  branchesWithStoredHistory_.insert(parentID);
825  ProductProvenance const* info = iMapper->branchIDToProvenance(parentID);
826  if (info) {
827  if (om_->dropMetaData() == PoolOutputModule::DropNone ||
828  (iProducedIDs.end() != iProducedIDs.find(info->branchID()))) {
829  if (insertProductProvenance(*info, oToFill)) {
830  //haven't seen this one yet
831  insertAncestors(*info, iMapper, produced, iProducedIDs, oToFill);
832  }
833  }
834  }
835  }
836  }
837 
838  void RootOutputFile::fillBranches(BranchType const& branchType,
839  OccurrenceForOutput const& occurrence,
840  unsigned int ttreeIndex,
841  StoredProductProvenanceVector* productProvenanceVecPtr,
842  ProductProvenanceRetriever const* provRetriever) {
843  std::vector<std::unique_ptr<WrapperBase> > dummies;
844 
845  OutputItemList& items = om_->selectedOutputItemList()[ttreeIndex];
846 
847  bool const doProvenance =
848  (productProvenanceVecPtr != nullptr) && (om_->dropMetaData() != PoolOutputModule::DropAll);
849  bool const keepProvenanceForPrior = doProvenance && om_->dropMetaData() != PoolOutputModule::DropPrior;
850 
851  bool const fastCloning = (branchType == InEvent) && (whyNotFastClonable_ == FileBlock::CanFastClone);
852  std::set<StoredProductProvenance> provenanceToKeep;
853  //
854  //If we are dropping some of the meta data we need to know
855  // which BranchIDs were produced in this process because
856  // we may be storing meta data for only those products
857  // We do this only for event products.
858  std::set<BranchID> producedBranches;
859  if (doProvenance && branchType == InEvent && om_->dropMetaData() != PoolOutputModule::DropNone) {
861  for (auto bd : preg->allBranchDescriptions()) {
862  if (bd->produced() && bd->branchType() == InEvent) {
863  producedBranches.insert(bd->branchID());
864  }
865  }
866  }
867 
868  // Loop over EDProduct branches, possibly fill the provenance, and write the branch.
869  for (auto& item : items) {
870  BranchID const& id = item.branchDescription()->branchID();
871  branchesWithStoredHistory_.insert(id);
872 
873  bool produced = item.branchDescription()->produced();
874  bool getProd =
875  (produced || !fastCloning || treePointers_[ttreeIndex]->uncloned(item.branchDescription()->branchName()));
876  bool keepProvenance = doProvenance && (produced || keepProvenanceForPrior);
877 
878  WrapperBase const* product = nullptr;
879  ProductProvenance const* productProvenance = nullptr;
880  if (getProd) {
881  BasicHandle result = occurrence.getByToken(item.token(), item.branchDescription()->unwrappedTypeID());
882  product = result.wrapper();
883  if (result.isValid() && keepProvenance) {
884  productProvenance = result.provenance()->productProvenance();
885  }
886  if (product == nullptr) {
887  // No product with this ID is in the event.
888  // Add a null product.
889  TClass* cp = item.branchDescription()->wrappedType().getClass();
890  assert(cp != nullptr);
891  int offset = cp->GetBaseClassOffset(wrapperBaseTClass_);
892  void* p = cp->New();
893  std::unique_ptr<WrapperBase> dummy = getWrapperBasePtr(p, offset);
894  product = dummy.get();
895  dummies.emplace_back(std::move(dummy));
896  }
897  item.setProduct(product);
898  }
899  if (keepProvenance && productProvenance == nullptr) {
900  productProvenance = provRetriever->branchIDToProvenance(item.branchDescription()->originalBranchID());
901  }
902  if (productProvenance) {
903  insertProductProvenance(*productProvenance, provenanceToKeep);
904  insertAncestors(*productProvenance, provRetriever, produced, producedBranches, provenanceToKeep);
905  }
906  }
907 
908  if (doProvenance)
909  productProvenanceVecPtr->assign(provenanceToKeep.begin(), provenanceToKeep.end());
910  treePointers_[ttreeIndex]->fillTree();
911  if (doProvenance)
912  productProvenanceVecPtr->clear();
913  }
914 
916  std::set<edm::StoredProductProvenance>& oToInsert) {
917  StoredProductProvenance toStore;
918  toStore.branchID_ = iProv.branchID().id();
919  std::set<edm::StoredProductProvenance>::iterator itFound = oToInsert.find(toStore);
920  if (itFound == oToInsert.end()) {
921  //get the index to the ParentageID or insert a new value if not already present
922  std::pair<std::map<edm::ParentageID, unsigned int>::iterator, bool> i =
923  parentageIDs_.insert(std::make_pair(iProv.parentageID(), static_cast<unsigned int>(parentageIDs_.size())));
924  toStore.parentageIDIndex_ = i.first->second;
925  if (toStore.parentageIDIndex_ >= parentageIDs_.size()) {
927  << "RootOutputFile::insertProductProvenance\n"
928  << "The parentage ID index value " << toStore.parentageIDIndex_
929  << " is out of bounds. The maximum value is currently " << parentageIDs_.size() - 1 << ".\n"
930  << "This should never happen.\n"
931  << "Please report this to the framework hypernews forum 'hn-cms-edmFramework@cern.ch'.\n";
932  }
933 
934  oToInsert.insert(toStore);
935  return true;
936  }
937  return false;
938  }
939 } // namespace edm
RunNumber_t run() const
Definition: EventID.h:38
PoolOutputModule::OutputItemList OutputItemList
EventNumber_t event() const
Definition: EventID.h:40
std::string const & branchName() const
bool isRealData() const
void beginInputFile(FileBlock const &fb, int remainingEvents)
BranchID const & branchID() const
LuminosityBlockAuxiliary lumiAux_
std::string const & BranchTypeToAuxiliaryBranchName(BranchType const &branchType)
Definition: BranchType.cc:116
EventID const & id() const
static const TGPicture * info(bool iBackgroundIsBlack)
std::string const & processName() const
BranchType const & branchType() const
std::string const & parentageTreeName()
Definition: BranchType.cc:154
std::vector< BranchIDList > BranchIDLists
Definition: BranchIDList.h:19
int eventAutoFlushSize() const
EventAuxiliary const & eventAuxiliary() const
void writeProcessHistoryRegistry()
void push_back(const EventAuxiliary &ea)
EventToProcessBlockIndexes const & eventToProcessBlockIndexes() const
void addEntryToStoredMetadata(StoredMergeableRunProductMetadata &) const
void insertAncestors(ProductProvenance const &iGetParents, ProductProvenanceRetriever const *iMapper, bool produced, std::set< BranchID > const &producedBranches, std::set< StoredProductProvenance > &oToFill)
void fillParameterSetBranch(TTree *parameterSetsTree, int basketSize)
std::map< BranchKey, BranchDescription > ProductList
bool checkSplitLevelsAndBasketSizes(TTree *inputTree) const
bool registerProcessHistory(ProcessHistory const &processHistory)
selection
main part
Definition: corrVsCorr.py:100
TTree const * tree() const
std::map< ParentageID, unsigned int > parentageIDs_
void fillProcessHistoryBranch(TTree *metaDataTree, int basketSize, ProcessHistoryRegistry const &processHistoryRegistry)
int whyNotFastClonable() const
Definition: FileBlock.h:128
RunNumber_t run() const
std::string const & fileFormatVersionBranchName()
Definition: BranchType.cc:189
edm::propagate_const< TTree * > metaDataTree_
unsigned long nEventsInLumi_
ProcessHistoryRegistry processHistoryRegistry_
ProductProvenance const * branchIDToProvenance(BranchID const &bid) const
std::string const & processName() const
ProductProvenance const * productProvenance() const
Definition: Provenance.cc:24
std::string const & eventSelectionsBranchName()
Definition: BranchType.cc:210
bool int lh
Definition: SIMDVec.h:20
void writeLuminosityBlock(LuminosityBlockForOutput const &lb)
void setAutoFlush(Long64_t size)
void find(edm::Handle< EcalRecHitCollection > &hits, DetId thisDet, std::vector< EcalRecHitCollection::const_iterator > &hit, bool debug=false)
Definition: FindCaloHit.cc:19
assert(be >=bs)
bool shouldWeCloseFile() const
unsigned int id() const
Definition: BranchID.h:21
constexpr Matriplex::idx_t LL
Definition: Matrix.h:45
LuminosityBlockAuxiliary const & luminosityBlockAuxiliary() const
bool checkIfFastClonable(TTree *inputTree) const
BranchType
Definition: BranchType.h:11
std::set< BranchID > branchesWithStoredHistory_
std::vector< edm::propagate_const< RootOutputTree * > > treePointers_
std::vector< EventSelectionID > EventSelectionIDVector
void sortVector_Run_Or_Lumi_Entries()
LuminosityBlockNumber_t luminosityBlock() const
std::string const & parameterSetsTreeName()
Definition: BranchType.cc:216
tuple result
Definition: mps_fire.py:311
RootOutputFile(PoolOutputModule *om, std::string const &fileName, std::string const &logicalFileName, std::vector< std::string > const &processesWithSelectedMergeableRunProducts)
std::vector< std::pair< BranchDescription const *, EDGetToken > > SelectedProducts
std::vector< BranchID > const & parents() const
Definition: Parentage.h:44
void writeThinnedAssociationsHelper()
std::shared_ptr< TFile const > filePtr() const
RootOutputTree eventTree_
std::vector< BranchListIndex > BranchListIndexes
std::vector< OutputItemList > const & selectedOutputItemList() const
bool getMapped(key_type const &k, value_type &result) const
void addBranch(std::string const &branchName, std::string const &className, void const *&pProd, int splitLevel, int basketSize, bool produced)
edm::propagate_const< PoolOutputModule * > om_
virtual ProcessHistory const & processHistory() const
MD5Result digest()
Definition: Digest.cc:171
std::string const & moduleLabel() const
IndexIntoFile::EntryNumber_t eventEntryNumber_
std::string const & productInstanceName() const
std::string const & mergeableRunProductMetadataBranchName()
Definition: BranchType.cc:201
LuminosityBlockAuxiliary const * pLumiAux_
std::string const & indexIntoFileBranchName()
Definition: BranchType.cc:198
edm::propagate_const< std::shared_ptr< TFile > > filePtr_
Definition: GenABIO.cc:168
def move
Definition: eostools.py:511
void writeOne(EventForOutput const &e)
ProductProvenanceRetriever const * productProvenanceRetrieverPtr() const
RunNumber_t run() const
int getFileFormatVersion()
EventSelectionIDVector const & eventSelectionIDs() const
std::string logicalFile_
bool checkEntriesInReadBranches(Long64_t expectedNumberOfEntries) const
void fillBranches(BranchType const &branchType, OccurrenceForOutput const &occurrence, unsigned int ttreeIndex, StoredProductProvenanceVector *productProvenanceVecPtr=nullptr, ProductProvenanceRetriever const *provRetriever=nullptr)
ProcessHistoryID const & reducedProcessHistoryID(ProcessHistoryID const &fullID) const
void setProcessHistoryID(ProcessHistoryID const &phid)
std::string const & metaDataTreeName()
Definition: BranchType.cc:159
CompactEventAuxiliaryVector compactEventAuxiliary_
LuminosityBlockNumber_t luminosityBlock() const
void addEntry(ProcessHistoryID const &processHistoryID, RunNumber_t run, LuminosityBlockNumber_t lumi, EventNumber_t event, EntryNumber_t entry)
edm::propagate_const< TClass * > wrapperBaseTClass_
bool isValid() const noexcept(true)
Definition: BasicHandle.h:69
std::string const & eventToProcessBlockIndexesBranchName()
Definition: BranchType.cc:214
static TTree * makeTTree(TFile *filePtr, std::string const &name, int splitLevel)
ProcessHistoryID const & processHistoryID() const
std::string const & fullClassName() const
EventSelectionIDVector const * pEventSelectionIDs_
void setSubBranchBasketSizes(TTree *inputTree) const
std::string createGlobalIdentifier(bool binary=false)
Log< level::Info, false > LogInfo
edm::propagate_const< TTree * > parameterSetsTree_
StoredMergeableRunProductMetadata storedMergeableRunProductMetadata_
void writeStoredMergeableRunProductMetadata()
RunAuxiliary const & runAuxiliary() const
Definition: RunForOutput.h:54
void sort_all(RandomAccessSequence &s)
wrappers for std::sort
Definition: Algorithms.h:92
edm::propagate_const< StoredProductProvenanceVector * > pEventEntryInfoVector_
WrapperBase const * wrapper() const noexcept(true)
Definition: BasicHandle.h:73
RootOutputTree runTree_
std::string toString() const
Definition: Digest.cc:95
std::string const & parentageBranchName()
Definition: BranchType.cc:156
double b
Definition: hdecay.h:118
LuminosityBlockNumber_t luminosityBlock() const
void addContext(std::string const &context)
Definition: Exception.cc:165
edm::propagate_const< TTree * > parentageTree_
std::set< std::string > const & branchAliases() const
bool insertProductProvenance(const ProductProvenance &, std::set< StoredProductProvenance > &oToInsert)
void setException(std::exception_ptr e)
void addAuxiliary(std::string const &branchName, T const *&pAux, int bufSize, bool allowCloning=true)
std::vector< StoredProductProvenance > StoredProductProvenanceVector
void respondToCloseInputFile(FileBlock const &fb)
IndexIntoFile indexIntoFile_
BasicHandle getByToken(EDGetToken token, TypeID const &typeID) const
constexpr element_type const * get() const
std::string const & productDescriptionBranchName()
Definition: BranchType.cc:162
IndexIntoFile::EntryNumber_t runEntryNumber_
ParentageID const & parentageID() const
static void writeTTree(TTree *tree)
ProcessHistoryID const & processHistoryID() const
std::string moduleName(StableProvenance const &provenance, ProcessHistory const &history)
Definition: Provenance.cc:27
EventAuxiliary const * pEventAux_
RunAuxiliary runAux_
std::exception_ptr getException()
string end
Definition: dataset.py:937
void writeProductDescriptionRegistry()
std::string const & BranchTypeToProductProvenanceBranchName(BranchType const &BranchType)
Definition: BranchType.cc:139
std::string const & productDependenciesBranchName()
Definition: BranchType.cc:165
StoredProductProvenanceVector const * pEventEntryInfoVector() const
std::string const & thinnedAssociationsHelperBranchName()
Definition: BranchType.cc:186
BranchListIndexes const & branchListIndexes() const
void writeProcessBlock(ProcessBlockForOutput const &)
std::unique_ptr< WrapperBase > getWrapperBasePtr(void *p, int offset)
std::string const & branchIDListBranchName()
Definition: BranchType.cc:183
void setProcessHistoryID(ProcessHistoryID const &phid)
Definition: RunAuxiliary.h:26
std::string const & branchListIndexesBranchName()
Definition: BranchType.cc:212
Parentage const & parentage() const
BranchListIndexes const * pBranchListIndexes_
Log< level::Warning, false > LogWarning
tuple branchNames
Definition: haddnano.py:54
void maybeFastCloneTree(bool canFastClone, bool canFastCloneAux, TTree *tree, std::string const &option)
LuminosityBlockID const & id() const
preg
Definition: Schedule.cc:687
std::vector< edm::propagate_const< std::unique_ptr< RootOutputTree > > > processBlockTrees_
static ParentageRegistry * instance()
IndexIntoFile::EntryNumber_t lumiEntryNumber_
RootOutputTree lumiTree_
Provenance const * provenance() const noexcept(true)
Definition: BasicHandle.h:75
void optimizeBaskets(ULong64_t size)
void writeRun(RunForOutput const &r)
TTree * tree() const
Definition: FileBlock.h:117
std::string const & fileIdentifierBranchName()
Definition: BranchType.cc:192
MergeableRunProductMetadata const * mergeableRunProductMetadata() const
Definition: RunForOutput.h:59
RunAuxiliary const * pRunAux_
std::string const & processBlockHelperBranchName()
Definition: BranchType.cc:204
std::string const & wrappedName() const
tuple size
Write out results.
EventNumber_t event() const
RunNumber_t run() const
Definition: RunAuxiliary.h:31
void setBranchAliases(TTree *tree, SelectedProducts const &branches, std::string const &processName) const
RunNumber_t run() const
Definition: RunForOutput.h:56
JobReport::Token reportToken_
EventToProcessBlockIndexes const * pEventToProcessBlockIndexes_
std::string const & fileName() const
Definition: FileBlock.h:130
#define LogDebug(id)
std::string match(BranchDescription const &a, BranchDescription const &b, std::string const &fileName)