CMS 3D CMS Logo

NanoAODOutputModule.cc
Go to the documentation of this file.
1 // -*- C++ -*-
2 //
3 // Package: PhysicsTools/NanoAODOutput
4 // Class : NanoAODOutputModule
5 //
6 // Implementation:
7 // [Notes on implementation]
8 //
9 // Original Author: Christopher Jones
10 // Created: Mon, 07 Aug 2017 14:21:41 GMT
11 //
12 
13 // system include files
14 #include <algorithm>
15 #include <memory>
16 
17 #include "Compression.h"
18 #include "TFile.h"
19 #include "TObjString.h"
20 #include "TROOT.h"
21 #include "TTree.h"
22 #include <string>
23 
24 // user include files
46 
47 #include <iostream>
48 
49 #include "oneapi/tbb/task_arena.h"
50 
52 public:
54  ~NanoAODOutputModule() override;
55 
56  static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
57 
58 private:
59  void write(edm::EventForOutput const& e) override;
61  void writeRun(edm::RunForOutput const&) override;
62  bool isFileOpen() const override;
63  void openFile(edm::FileBlock const&) override;
64  void reallyCloseFile() override;
65 
72  bool m_fakeName; //crab workaround, remove after crab is fixed
76  std::unique_ptr<TFile> m_file;
78 
79  static constexpr int m_firstFlush{1000};
80 
82  public:
83  void branch(TTree& tree) {
84  tree.Branch("run", &m_run, "run/i");
85  tree.Branch("luminosityBlock", &m_luminosityBlock, "luminosityBlock/i");
86  tree.Branch("event", &m_event, "event/l");
87  tree.Branch("bunchCrossing", &m_bunchCrossing, "bunchCrossing/i");
88  tree.Branch("orbitNumber", &m_orbitNumber, "orbitNumber/i");
89  }
90  void fill(const edm::EventAuxiliary& aux) {
91  m_run = aux.id().run();
92  m_luminosityBlock = aux.id().luminosityBlock();
93  m_event = aux.id().event();
94  m_bunchCrossing = aux.bunchCrossing();
95  m_orbitNumber = aux.orbitNumber();
96  }
97 
98  private:
99  UInt_t m_run;
101  ULong64_t m_event;
105 
107  public:
108  void branch(TTree& tree) {
109  tree.Branch("run", &m_run, "run/i");
110  tree.Branch("luminosityBlock", &m_luminosityBlock, "luminosityBlock/i");
111  }
112  void fill(const edm::LuminosityBlockID& id) {
113  m_run = id.run();
114  m_luminosityBlock = id.value();
115  }
116 
117  private:
118  UInt_t m_run;
121 
123  public:
124  void branch(TTree& tree) { tree.Branch("run", &m_run, "run/i"); }
125  void fill(const edm::RunID& id) { m_run = id.run(); }
126 
127  private:
128  UInt_t m_run;
130 
131  std::vector<TableOutputBranches> m_tables;
132  std::vector<TriggerOutputBranches> m_triggers;
133  bool m_triggers_areSorted = false;
134  std::vector<EventStringOutputBranches> m_evstrings;
135 
136  std::vector<SummaryTableOutputBranches> m_runTables;
137  std::vector<SummaryTableOutputBranches> m_lumiTables;
138  std::vector<LumiOutputBranches> m_lumiTables2;
139  std::vector<TableOutputBranches> m_runFlatTables;
140 
141  std::vector<std::pair<std::string, edm::EDGetToken>> m_nanoMetadata;
142 };
143 
144 //
145 // constants, enums and typedefs
146 //
147 
148 //
149 // static data member definitions
150 //
151 
152 //
153 // constructors and destructor
154 //
157  edm::one::OutputModule<>(pset),
158  m_fileName(pset.getUntrackedParameter<std::string>("fileName")),
159  m_logicalFileName(pset.getUntrackedParameter<std::string>("logicalFileName")),
160  m_compressionLevel(pset.getUntrackedParameter<int>("compressionLevel")),
161  m_compressionAlgorithm(pset.getUntrackedParameter<std::string>("compressionAlgorithm")),
162  m_writeProvenance(pset.getUntrackedParameter<bool>("saveProvenance", true)),
163  m_fakeName(pset.getUntrackedParameter<bool>("fakeNameForCrab", false)),
164  m_autoFlush(pset.getUntrackedParameter<int>("autoFlush", -10000000)),
165  m_processHistoryRegistry() {}
166 
168 
170  //Get data from 'e' and write it to the file
172  jr->eventWrittenToFile(m_jrToken, iEvent.id().run(), iEvent.id().event());
173 
174  if (m_autoFlush) {
175  int64_t events = m_tree->GetEntriesFast();
176  if (events == m_firstFlush) {
177  m_tree->FlushBaskets();
178  float maxMemory;
179  if (m_autoFlush > 0) {
180  // Estimate the memory we'll be using at the first full flush by
181  // linearly scaling the number of events.
182  float percentClusterDone = m_firstFlush / static_cast<float>(m_autoFlush);
183  maxMemory = static_cast<float>(m_tree->GetTotBytes()) / percentClusterDone;
184  } else if (m_tree->GetZipBytes() == 0) {
185  maxMemory = 100 * 1024 * 1024; // Degenerate case of no information in the tree; arbitrary value
186  } else {
187  // Estimate the memory we'll be using by scaling the current compression ratio.
188  float cxnRatio = m_tree->GetTotBytes() / static_cast<float>(m_tree->GetZipBytes());
189  maxMemory = -m_autoFlush * cxnRatio;
190  float percentBytesDone = -m_tree->GetZipBytes() / static_cast<float>(m_autoFlush);
191  m_autoFlush = m_firstFlush / percentBytesDone;
192  }
193  //std::cout << "OptimizeBaskets: total bytes " << m_tree->GetTotBytes() << std::endl;
194  //std::cout << "OptimizeBaskets: zip bytes " << m_tree->GetZipBytes() << std::endl;
195  //std::cout << "OptimizeBaskets: autoFlush " << m_autoFlush << std::endl;
196  //std::cout << "OptimizeBaskets: maxMemory " << static_cast<uint32_t>(maxMemory) << std::endl;
197  //m_tree->OptimizeBaskets(static_cast<uint32_t>(maxMemory), 1, "d");
198  m_tree->OptimizeBaskets(static_cast<uint32_t>(maxMemory), 1, "");
199  }
201  m_tree->FlushBaskets();
202  m_eventsSinceFlush = 0;
203  }
205  }
206 
207  m_commonBranches.fill(iEvent.eventAuxiliary());
208  // fill all tables, starting from main tables and then doing extension tables
209  for (unsigned int extensions = 0; extensions <= 1; ++extensions) {
210  for (auto& t : m_tables)
211  t.fill(iEvent, *m_tree, extensions);
212  }
213  if (!m_triggers_areSorted) { // sort triggers/flags in inverse processHistory order, to save without any special label the most recent ones
214  std::vector<std::string> pnames;
215  for (auto& p : iEvent.processHistory())
216  pnames.push_back(p.processName());
218  return ((std::find(pnames.begin(), pnames.end(), a.processName()) - pnames.begin()) >
219  (std::find(pnames.begin(), pnames.end(), b.processName()) - pnames.begin()));
220  });
221  m_triggers_areSorted = true;
222  }
223  // fill triggers
224  for (auto& t : m_triggers)
225  t.fill(iEvent, *m_tree);
226  // fill event branches
227  for (auto& t : m_evstrings)
228  t.fill(iEvent, *m_tree);
229  tbb::this_task_arena::isolate([&] { m_tree->Fill(); });
230 
232 }
233 
236  jr->reportLumiSection(m_jrToken, iLumi.id().run(), iLumi.id().value());
237 
239 
240  for (auto& t : m_lumiTables)
241  t.fill(iLumi, *m_lumiTree);
242 
243  for (unsigned int extensions = 0; extensions <= 1; ++extensions) {
244  for (auto& t : m_lumiTables2)
245  t.fill(iLumi, *m_lumiTree, extensions);
246  }
247 
248  tbb::this_task_arena::isolate([&] { m_lumiTree->Fill(); });
249 
251 }
252 
255  jr->reportRunNumber(m_jrToken, iRun.id().run());
256 
257  m_commonRunBranches.fill(iRun.id());
258 
259  for (auto& t : m_runTables)
260  t.fill(iRun, *m_runTree);
261 
262  for (unsigned int extensions = 0; extensions <= 1; ++extensions) {
263  for (auto& t : m_runFlatTables)
264  t.fill(iRun, *m_runTree, extensions);
265  }
266 
268  for (const auto& p : m_nanoMetadata) {
269  iRun.getByToken(p.second, hstring);
270  TObjString* tos = dynamic_cast<TObjString*>(m_file->Get(p.first.c_str()));
271  if (tos) {
272  if (hstring->str() != tos->GetString())
273  throw cms::Exception("LogicError", "Inconsistent nanoMetadata " + p.first + " (" + hstring->str() + ")");
274  } else {
275  auto ostr = std::make_unique<TObjString>(hstring->str().c_str());
276  m_file->WriteTObject(ostr.release(), p.first.c_str());
277  }
278  }
279 
280  tbb::this_task_arena::isolate([&] { m_runTree->Fill(); });
281 
283 }
284 
285 bool NanoAODOutputModule::isFileOpen() const { return nullptr != m_file.get(); }
286 
288  m_file = std::make_unique<TFile>(m_fileName.c_str(), "RECREATE", "", m_compressionLevel);
290  cms::Digest branchHash;
291  m_jrToken = jr->outputFileOpened(m_fileName,
293  std::string(),
294  m_fakeName ? "PoolOutputModule" : "NanoAODOutputModule",
297  std::string(),
298  branchHash.digest().toString(),
299  std::vector<std::string>());
300 
301  if (m_compressionAlgorithm == std::string("ZLIB")) {
302  m_file->SetCompressionAlgorithm(ROOT::kZLIB);
303  } else if (m_compressionAlgorithm == std::string("LZMA")) {
304  m_file->SetCompressionAlgorithm(ROOT::kLZMA);
305  } else if (m_compressionAlgorithm == std::string("ZSTD")) {
306  m_file->SetCompressionAlgorithm(ROOT::kZSTD);
307  } else if (m_compressionAlgorithm == std::string("LZ4")) {
308  m_file->SetCompressionAlgorithm(ROOT::kLZ4);
309  } else {
310  throw cms::Exception("Configuration")
311  << "NanoAODOutputModule configured with unknown compression algorithm '" << m_compressionAlgorithm << "'\n"
312  << "Allowed compression algorithms are ZLIB, LZMA, ZSTD, and LZ4\n";
313  }
314  /* Setup file structure here */
315  m_tables.clear();
316  m_triggers.clear();
317  m_triggers_areSorted = false;
318  m_evstrings.clear();
319  m_runTables.clear();
320  m_lumiTables.clear();
321  m_lumiTables2.clear();
322  m_runFlatTables.clear();
323  const auto& keeps = keptProducts();
324  for (const auto& keep : keeps[edm::InEvent]) {
325  if (keep.first->className() == "nanoaod::FlatTable")
326  m_tables.emplace_back(keep.first, keep.second);
327  else if (keep.first->className() == "edm::TriggerResults") {
328  m_triggers.emplace_back(keep.first, keep.second);
329  } else if (keep.first->className() == "std::basic_string<char,std::char_traits<char> >" &&
330  keep.first->productInstanceName() == "genModel") { // friendlyClassName == "String"
331  m_evstrings.emplace_back(keep.first, keep.second, true); // update only at lumiBlock transitions
332  } else
333  throw cms::Exception("Configuration", "NanoAODOutputModule cannot handle class " + keep.first->className());
334  }
335 
336  for (const auto& keep : keeps[edm::InLumi]) {
337  if (keep.first->className() == "nanoaod::MergeableCounterTable")
338  m_lumiTables.push_back(SummaryTableOutputBranches(keep.first, keep.second));
339  else if (keep.first->className() == "nanoaod::UniqueString" && keep.first->moduleLabel() == "nanoMetadata")
340  m_nanoMetadata.emplace_back(keep.first->productInstanceName(), keep.second);
341  else if (keep.first->className() == "nanoaod::FlatTable")
342  m_lumiTables2.push_back(LumiOutputBranches(keep.first, keep.second));
343  else
344  throw cms::Exception(
345  "Configuration",
346  "NanoAODOutputModule cannot handle class " + keep.first->className() + " in LuminosityBlock branch");
347  }
348 
349  for (const auto& keep : keeps[edm::InRun]) {
350  if (keep.first->className() == "nanoaod::MergeableCounterTable")
351  m_runTables.push_back(SummaryTableOutputBranches(keep.first, keep.second));
352  else if (keep.first->className() == "nanoaod::UniqueString" && keep.first->moduleLabel() == "nanoMetadata")
353  m_nanoMetadata.emplace_back(keep.first->productInstanceName(), keep.second);
354  else if (keep.first->className() == "nanoaod::FlatTable")
355  m_runFlatTables.emplace_back(keep.first, keep.second);
356  else
357  throw cms::Exception("Configuration",
358  "NanoAODOutputModule cannot handle class " + keep.first->className() + " in Run branch");
359  }
360 
361  // create the trees
362  m_tree = std::make_unique<TTree>("Events", "Events");
363  m_tree->SetAutoSave(0);
364  m_tree->SetAutoFlush(0);
366 
367  m_lumiTree = std::make_unique<TTree>("LuminosityBlocks", "LuminosityBlocks");
368  m_lumiTree->SetAutoSave(0);
370 
371  m_runTree = std::make_unique<TTree>("Runs", "Runs");
372  m_runTree->SetAutoSave(0);
374 
375  if (m_writeProvenance) {
376  m_metaDataTree = std::make_unique<TTree>(edm::poolNames::metaDataTreeName().c_str(), "Job metadata");
377  m_metaDataTree->SetAutoSave(0);
378  m_parameterSetsTree = std::make_unique<TTree>(edm::poolNames::parameterSetsTreeName().c_str(), "Parameter sets");
379  m_parameterSetsTree->SetAutoSave(0);
380  }
381 }
383  if (m_writeProvenance) {
384  int basketSize = 16384; // fixme configurable?
387  if (m_metaDataTree->GetNbranches() != 0) {
388  m_metaDataTree->SetEntries(-1);
389  }
390  if (m_parameterSetsTree->GetNbranches() != 0) {
391  m_parameterSetsTree->SetEntries(-1);
392  }
393  }
394  m_file->Write();
395  m_file->Close();
396  m_file.reset();
397  m_tree.release(); // apparently root has ownership
398  m_lumiTree.release(); //
399  m_runTree.release(); //
400  m_metaDataTree.release(); //
401  m_parameterSetsTree.release(); //
404 }
405 
408 
409  desc.addUntracked<std::string>("fileName");
410  desc.addUntracked<std::string>("logicalFileName", "");
411 
412  desc.addUntracked<int>("compressionLevel", 9)->setComment("ROOT compression level of output file.");
413  desc.addUntracked<std::string>("compressionAlgorithm", "ZLIB")
414  ->setComment("Algorithm used to compress data in the ROOT output file, allowed values are ZLIB and LZMA");
415  desc.addUntracked<bool>("saveProvenance", true)
416  ->setComment("Save process provenance information, e.g. for edmProvDump");
417  desc.addUntracked<bool>("fakeNameForCrab", false)
418  ->setComment(
419  "Change the OutputModule name in the fwk job report to fake PoolOutputModule. This is needed to run on cran "
420  "(and publish) till crab is fixed");
421  desc.addUntracked<int>("autoFlush", -10000000)->setComment("Autoflush parameter for ROOT file");
422 
423  //replace with whatever you want to get from the EDM by default
424  const std::vector<std::string> keep = {"drop *",
425  "keep nanoaodFlatTable_*Table_*_*",
426  "keep edmTriggerResults_*_*_*",
427  "keep String_*_genModel_*",
428  "keep nanoaodMergeableCounterTable_*Table_*_*",
429  "keep nanoaodUniqueString_nanoMetadata_*_*"};
431 
432  //Used by Workflow management for their own meta data
434  dataSet.setAllowAnything();
435  desc.addUntracked<edm::ParameterSetDescription>("dataset", dataSet)
436  ->setComment("PSet is only used by Data Operations and not by this module.");
437 
439  branchSet.setAllowAnything();
440  desc.add<edm::ParameterSetDescription>("branches", branchSet);
441 
442  descriptions.addDefault(desc);
443 }
444 
std::unique_ptr< TTree > m_runTree
std::vector< TableOutputBranches > m_runFlatTables
std::string const & metaDataTreeName()
Definition: BranchType.cc:159
std::unique_ptr< TTree > m_lumiTree
std::vector< SummaryTableOutputBranches > m_lumiTables
std::unique_ptr< TTree > m_tree
void setAllowAnything()
allow any parameter label/value pairs
void fillParameterSetBranch(TTree *parameterSetsTree, int basketSize)
bool registerProcessHistory(ProcessHistory const &processHistory)
void fillProcessHistoryBranch(TTree *metaDataTree, int basketSize, ProcessHistoryRegistry const &processHistoryRegistry)
NanoAODOutputModule(edm::ParameterSet const &pset)
bool isFileOpen() const override
void fill(const edm::LuminosityBlockID &id)
void reportRunNumber(JobReport::Token token, unsigned int run)
Definition: JobReport.cc:505
void find(edm::Handle< EcalRecHitCollection > &hits, DetId thisDet, std::vector< EcalRecHitCollection::const_iterator > &hit, bool debug=false)
Definition: FindCaloHit.cc:19
BasicHandle getByToken(EDGetToken token, TypeID const &typeID) const
ModuleDescription const & description() const
std::unique_ptr< TTree > m_parameterSetsTree
void writeRun(edm::RunForOutput const &) override
class NanoAODOutputModule::CommonLumiBranches m_commonLumiBranches
std::unique_ptr< TTree > m_metaDataTree
std::unique_ptr< TFile > m_file
int iEvent
Definition: GenABIO.cc:224
void addDefault(ParameterSetDescription const &psetDescription)
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
MD5Result digest()
Definition: Digest.cc:171
std::vector< EventStringOutputBranches > m_evstrings
std::vector< SummaryTableOutputBranches > m_runTables
std::string const & parameterSetsTreeName()
Definition: BranchType.cc:216
std::vector< TableOutputBranches > m_tables
static constexpr int m_firstFlush
std::vector< TriggerOutputBranches > m_triggers
std::vector< std::pair< std::string, edm::EDGetToken > > m_nanoMetadata
#define DEFINE_FWK_MODULE(type)
Definition: MakerMacros.h:16
std::vector< LumiOutputBranches > m_lumiTables2
std::size_t Token
Definition: JobReport.h:106
void openFile(edm::FileBlock const &) override
std::string createGlobalIdentifier(bool binary=false)
void reallyCloseFile() override
SelectedProductsForBranchType const & keptProducts() const
void eventWrittenToFile(Token fileToken, RunNumber_t run, EventNumber_t event)
Definition: JobReport.cc:462
double b
Definition: hdecay.h:120
const std::string & str() const
Definition: UniqueString.h:12
virtual ProcessHistory const & processHistory() const
RunID const & id() const
Definition: RunForOutput.h:55
HLT enums.
double a
Definition: hdecay.h:121
void outputFileClosed(Token fileToken)
Definition: JobReport.cc:467
void write(edm::EventForOutput const &e) override
class NanoAODOutputModule::CommonRunBranches m_commonRunBranches
void fill(const edm::EventAuxiliary &aux)
edm::JobReport::Token m_jrToken
class NanoAODOutputModule::CommonEventBranches m_commonBranches
Definition: tree.py:1
edm::ProcessHistoryRegistry m_processHistoryRegistry
static void fillDescription(ParameterSetDescription &desc, std::vector< std::string > const &iDefaultOutputCommands=ProductSelectorRules::defaultSelectionStrings())
std::string toString() const
Definition: Digest.cc:95
int events
std::string m_compressionAlgorithm
RunNumber_t run() const
Definition: RunID.h:26
void reportLumiSection(JobReport::Token token, unsigned int run, unsigned int lumiSectId, unsigned long nEvents=0)
Definition: JobReport.cc:494
void writeLuminosityBlock(edm::LuminosityBlockForOutput const &) override