CMS 3D CMS Logo

NanoAODOutputModule.cc
Go to the documentation of this file.
1 // -*- C++ -*-
2 //
3 // Package: PhysicsTools/NanoAODOutput
4 // Class : NanoAODOutputModule
5 //
6 // Implementation:
7 // [Notes on implementation]
8 //
9 // Original Author: Christopher Jones
10 // Created: Mon, 07 Aug 2017 14:21:41 GMT
11 //
12 
13 // system include files
14 #include <string>
15 #include "TFile.h"
16 #include "TTree.h"
17 #include "TROOT.h"
18 #include "TObjString.h"
19 #include "Compression.h"
20 
21 // user include files
43 
44 #include <iostream>
45 
47 public:
49  ~NanoAODOutputModule() override;
50 
51  static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
52 
53 private:
54  void write(edm::EventForOutput const& e) override;
56  void writeRun(edm::RunForOutput const&) override;
57  bool isFileOpen() const override;
58  void openFile(edm::FileBlock const&) override;
59  void reallyCloseFile() override;
60 
67  bool m_fakeName; //crab workaround, remove after crab is fixed
71  std::unique_ptr<TFile> m_file;
73 
74  static constexpr int m_firstFlush{1000};
75 
77  public:
78  void branch(TTree& tree) {
79  tree.Branch("run", &m_run, "run/i");
80  tree.Branch("luminosityBlock", &m_luminosityBlock, "luminosityBlock/i");
81  tree.Branch("event", &m_event, "event/l");
82  }
83  void fill(const edm::EventID& id) {
84  m_run = id.run();
85  m_luminosityBlock = id.luminosityBlock();
86  m_event = id.event();
87  }
88 
89  private:
90  UInt_t m_run;
92  ULong64_t m_event;
94 
96  public:
97  void branch(TTree& tree) {
98  tree.Branch("run", &m_run, "run/i");
99  tree.Branch("luminosityBlock", &m_luminosityBlock, "luminosityBlock/i");
100  }
101  void fill(const edm::LuminosityBlockID& id) {
102  m_run = id.run();
103  m_luminosityBlock = id.value();
104  }
105 
106  private:
107  UInt_t m_run;
110 
112  public:
113  void branch(TTree& tree) { tree.Branch("run", &m_run, "run/i"); }
114  void fill(const edm::RunID& id) { m_run = id.run(); }
115 
116  private:
117  UInt_t m_run;
119 
120  std::vector<TableOutputBranches> m_tables;
121  std::vector<TriggerOutputBranches> m_triggers;
122  std::vector<EventStringOutputBranches> m_evstrings;
123 
124  std::vector<SummaryTableOutputBranches> m_runTables;
125 
126  std::vector<std::pair<std::string, edm::EDGetToken>> m_nanoMetadata;
127 };
128 
129 //
130 // constants, enums and typedefs
131 //
132 
133 //
134 // static data member definitions
135 //
136 
137 //
138 // constructors and destructor
139 //
142  edm::one::OutputModule<>(pset),
143  m_fileName(pset.getUntrackedParameter<std::string>("fileName")),
144  m_logicalFileName(pset.getUntrackedParameter<std::string>("logicalFileName")),
145  m_compressionLevel(pset.getUntrackedParameter<int>("compressionLevel")),
146  m_compressionAlgorithm(pset.getUntrackedParameter<std::string>("compressionAlgorithm")),
147  m_writeProvenance(pset.getUntrackedParameter<bool>("saveProvenance", true)),
148  m_fakeName(pset.getUntrackedParameter<bool>("fakeNameForCrab", false)),
149  m_autoFlush(pset.getUntrackedParameter<int>("autoFlush", -10000000)),
151 
153 
155  //Get data from 'e' and write it to the file
157  jr->eventWrittenToFile(m_jrToken, iEvent.id().run(), iEvent.id().event());
158 
159  if (m_autoFlush) {
160  int64_t events = m_tree->GetEntriesFast();
161  if (events == m_firstFlush) {
162  m_tree->FlushBaskets();
163  float maxMemory;
164  if (m_autoFlush > 0) {
165  // Estimate the memory we'll be using at the first full flush by
166  // linearly scaling the number of events.
167  float percentClusterDone = m_firstFlush / static_cast<float>(m_autoFlush);
168  maxMemory = static_cast<float>(m_tree->GetTotBytes()) / percentClusterDone;
169  } else if (m_tree->GetZipBytes() == 0) {
170  maxMemory = 100 * 1024 * 1024; // Degenerate case of no information in the tree; arbitrary value
171  } else {
172  // Estimate the memory we'll be using by scaling the current compression ratio.
173  float cxnRatio = m_tree->GetTotBytes() / static_cast<float>(m_tree->GetZipBytes());
174  maxMemory = -m_autoFlush * cxnRatio;
175  float percentBytesDone = -m_tree->GetZipBytes() / static_cast<float>(m_autoFlush);
176  m_autoFlush = m_firstFlush / percentBytesDone;
177  }
178  //std::cout << "OptimizeBaskets: total bytes " << m_tree->GetTotBytes() << std::endl;
179  //std::cout << "OptimizeBaskets: zip bytes " << m_tree->GetZipBytes() << std::endl;
180  //std::cout << "OptimizeBaskets: autoFlush " << m_autoFlush << std::endl;
181  //std::cout << "OptimizeBaskets: maxMemory " << static_cast<uint32_t>(maxMemory) << std::endl;
182  //m_tree->OptimizeBaskets(static_cast<uint32_t>(maxMemory), 1, "d");
183  m_tree->OptimizeBaskets(static_cast<uint32_t>(maxMemory), 1, "");
184  }
186  m_tree->FlushBaskets();
187  m_eventsSinceFlush = 0;
188  }
190  }
191 
192  m_commonBranches.fill(iEvent.id());
193  // fill all tables, starting from main tables and then doing extension tables
194  for (unsigned int extensions = 0; extensions <= 1; ++extensions) {
195  for (auto& t : m_tables)
196  t.fill(iEvent, *m_tree, extensions);
197  }
198  // fill triggers
199  for (auto& t : m_triggers)
200  t.fill(iEvent, *m_tree);
201  // fill event branches
202  for (auto& t : m_evstrings)
203  t.fill(iEvent, *m_tree);
204  m_tree->Fill();
205 
207 }
208 
211  jr->reportLumiSection(m_jrToken, iLumi.id().run(), iLumi.id().value());
212 
213  m_commonLumiBranches.fill(iLumi.id());
214  m_lumiTree->Fill();
215 
217 }
218 
221  jr->reportRunNumber(m_jrToken, iRun.id().run());
222 
223  m_commonRunBranches.fill(iRun.id());
224 
225  for (auto& t : m_runTables)
226  t.fill(iRun, *m_runTree);
227 
229  for (const auto& p : m_nanoMetadata) {
230  iRun.getByToken(p.second, hstring);
231  TObjString* tos = dynamic_cast<TObjString*>(m_file->Get(p.first.c_str()));
232  if (tos) {
233  if (hstring->str() != tos->GetString())
234  throw cms::Exception("LogicError", "Inconsistent nanoMetadata " + p.first + " (" + hstring->str() + ")");
235  } else {
236  auto ostr = std::make_unique<TObjString>(hstring->str().c_str());
237  m_file->WriteTObject(ostr.release(), p.first.c_str());
238  }
239  }
240 
241  m_runTree->Fill();
242 
244 }
245 
246 bool NanoAODOutputModule::isFileOpen() const { return nullptr != m_file.get(); }
247 
249  m_file = std::make_unique<TFile>(m_fileName.c_str(), "RECREATE", "", m_compressionLevel);
251  cms::Digest branchHash;
252  m_jrToken = jr->outputFileOpened(m_fileName,
254  std::string(),
255  m_fakeName ? "PoolOutputModule" : "NanoAODOutputModule",
258  std::string(),
259  branchHash.digest().toString(),
260  std::vector<std::string>());
261 
262  if (m_compressionAlgorithm == std::string("ZLIB")) {
263  m_file->SetCompressionAlgorithm(ROOT::kZLIB);
264  } else if (m_compressionAlgorithm == std::string("LZMA")) {
265  m_file->SetCompressionAlgorithm(ROOT::kLZMA);
266  } else {
267  throw cms::Exception("Configuration")
268  << "NanoAODOutputModule configured with unknown compression algorithm '" << m_compressionAlgorithm << "'\n"
269  << "Allowed compression algorithms are ZLIB and LZMA\n";
270  }
271  /* Setup file structure here */
272  m_tables.clear();
273  m_triggers.clear();
274  m_evstrings.clear();
275  m_runTables.clear();
276  const auto& keeps = keptProducts();
277  for (const auto& keep : keeps[edm::InEvent]) {
278  if (keep.first->className() == "nanoaod::FlatTable")
279  m_tables.emplace_back(keep.first, keep.second);
280  else if (keep.first->className() == "edm::TriggerResults") {
281  m_triggers.emplace_back(keep.first, keep.second);
282  } else if (keep.first->className() == "std::basic_string<char,std::char_traits<char> >" &&
283  keep.first->productInstanceName() == "genModel") { // friendlyClassName == "String"
284  m_evstrings.emplace_back(keep.first, keep.second, true); // update only at lumiBlock transitions
285  } else
286  throw cms::Exception("Configuration", "NanoAODOutputModule cannot handle class " + keep.first->className());
287  }
288 
289  for (const auto& keep : keeps[edm::InRun]) {
290  if (keep.first->className() == "nanoaod::MergeableCounterTable")
291  m_runTables.push_back(SummaryTableOutputBranches(keep.first, keep.second));
292  else if (keep.first->className() == "nanoaod::UniqueString" && keep.first->moduleLabel() == "nanoMetadata")
293  m_nanoMetadata.emplace_back(keep.first->productInstanceName(), keep.second);
294  else
295  throw cms::Exception("Configuration",
296  "NanoAODOutputModule cannot handle class " + keep.first->className() + " in Run branch");
297  }
298 
299  // create the trees
300  m_tree.reset(new TTree("Events", "Events"));
301  m_tree->SetAutoSave(0);
302  m_tree->SetAutoFlush(0);
304 
305  m_lumiTree.reset(new TTree("LuminosityBlocks", "LuminosityBlocks"));
306  m_lumiTree->SetAutoSave(0);
308 
309  m_runTree.reset(new TTree("Runs", "Runs"));
310  m_runTree->SetAutoSave(0);
312 
313  if (m_writeProvenance) {
314  m_metaDataTree.reset(new TTree(edm::poolNames::metaDataTreeName().c_str(), "Job metadata"));
315  m_metaDataTree->SetAutoSave(0);
316  m_parameterSetsTree.reset(new TTree(edm::poolNames::parameterSetsTreeName().c_str(), "Parameter sets"));
317  m_parameterSetsTree->SetAutoSave(0);
318  }
319 }
321  if (m_writeProvenance) {
322  int basketSize = 16384; // fixme configurable?
325  if (m_metaDataTree->GetNbranches() != 0) {
326  m_metaDataTree->SetEntries(-1);
327  }
328  if (m_parameterSetsTree->GetNbranches() != 0) {
329  m_parameterSetsTree->SetEntries(-1);
330  }
331  }
332  m_file->Write();
333  m_file->Close();
334  m_file.reset();
335  m_tree.release(); // apparently root has ownership
336  m_lumiTree.release(); //
337  m_runTree.release(); //
338  m_metaDataTree.release(); //
339  m_parameterSetsTree.release(); //
342 }
343 
346 
347  desc.addUntracked<std::string>("fileName");
348  desc.addUntracked<std::string>("logicalFileName", "");
349 
350  desc.addUntracked<int>("compressionLevel", 9)->setComment("ROOT compression level of output file.");
351  desc.addUntracked<std::string>("compressionAlgorithm", "ZLIB")
352  ->setComment("Algorithm used to compress data in the ROOT output file, allowed values are ZLIB and LZMA");
353  desc.addUntracked<bool>("saveProvenance", true)
354  ->setComment("Save process provenance information, e.g. for edmProvDump");
355  desc.addUntracked<bool>("fakeNameForCrab", false)
356  ->setComment(
357  "Change the OutputModule name in the fwk job report to fake PoolOutputModule. This is needed to run on cran "
358  "(and publish) till crab is fixed");
359  desc.addUntracked<int>("autoFlush", -10000000)->setComment("Autoflush parameter for ROOT file");
360 
361  //replace with whatever you want to get from the EDM by default
362  const std::vector<std::string> keep = {"drop *",
363  "keep nanoaodFlatTable_*Table_*_*",
364  "keep edmTriggerResults_*_*_*",
365  "keep String_*_genModel_*",
366  "keep nanoaodMergeableCounterTable_*Table_*_*",
367  "keep nanoaodUniqueString_nanoMetadata_*_*"};
369 
370  //Used by Workflow management for their own meta data
372  dataSet.setAllowAnything();
374  ->setComment("PSet is only used by Data Operations and not by this module.");
375 
377  branchSet.setAllowAnything();
378  desc.add<edm::ParameterSetDescription>("branches", branchSet);
379 
380  descriptions.addDefault(desc);
381 }
382 
RunNumber_t run() const
Definition: EventID.h:38
std::unique_ptr< TTree > m_runTree
EventNumber_t event() const
Definition: EventID.h:40
virtual ProcessHistory const & processHistory() const
std::unique_ptr< TTree > m_lumiTree
EventID const & id() const
ModuleDescription const & description() const
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
BasicHandle getByToken(EDGetToken token, TypeID const &typeID) const
bool isFileOpen() const override
RunNumber_t run() const
Definition: RunID.h:36
std::unique_ptr< TTree > m_tree
void setAllowAnything()
allow any parameter label/value pairs
RunID const & id() const
Definition: RunForOutput.h:49
void fillParameterSetBranch(TTree *parameterSetsTree, int basketSize)
bool registerProcessHistory(ProcessHistory const &processHistory)
void fillProcessHistoryBranch(TTree *metaDataTree, int basketSize, ProcessHistoryRegistry const &processHistoryRegistry)
NanoAODOutputModule(edm::ParameterSet const &pset)
void fill(const edm::LuminosityBlockID &id)
MD5Result digest() const
Definition: Digest.cc:171
void reportRunNumber(JobReport::Token token, unsigned int run)
Definition: JobReport.cc:469
std::string const & parameterSetsTreeName()
Definition: BranchType.cc:214
std::unique_ptr< TTree > m_parameterSetsTree
void writeRun(edm::RunForOutput const &) override
class NanoAODOutputModule::CommonLumiBranches m_commonLumiBranches
std::unique_ptr< TTree > m_metaDataTree
const int keep
std::unique_ptr< TFile > m_file
int iEvent
Definition: GenABIO.cc:224
#define DEFINE_FWK_MODULE(type)
Definition: MakerMacros.h:16
void addDefault(ParameterSetDescription const &psetDescription)
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
std::vector< EventStringOutputBranches > m_evstrings
static void fillDescription(ParameterSetDescription &desc, std::vector< std::string > const &iDefaultOutputCommands=ProductSelectorRules::defaultSelectionStrings())
std::vector< SummaryTableOutputBranches > m_runTables
std::vector< TableOutputBranches > m_tables
std::vector< TriggerOutputBranches > m_triggers
RunNumber_t run() const
uint64_t value() const
SelectedProductsForBranchType const & keptProducts() const
std::vector< std::pair< std::string, edm::EDGetToken > > m_nanoMetadata
std::string const & metaDataTreeName()
Definition: BranchType.cc:162
ParameterDescriptionBase * add(U const &iLabel, T const &value)
std::size_t Token
Definition: JobReport.h:106
void openFile(edm::FileBlock const &) override
void reallyCloseFile() override
void eventWrittenToFile(Token fileToken, RunNumber_t run, EventNumber_t event)
Definition: JobReport.cc:428
std::string toString() const
Definition: Digest.cc:95
HLT enums.
void outputFileClosed(Token fileToken)
Definition: JobReport.cc:433
void write(edm::EventForOutput const &e) override
class NanoAODOutputModule::CommonRunBranches m_commonRunBranches
edm::JobReport::Token m_jrToken
class NanoAODOutputModule::CommonEventBranches m_commonBranches
const std::string & str() const
Definition: UniqueString.h:12
Definition: tree.py:1
edm::ProcessHistoryRegistry m_processHistoryRegistry
LuminosityBlockID const & id() const
OutputModule(edm::ParameterSet const &iPSet)
Definition: OutputModule.h:32
std::string m_compressionAlgorithm
#define constexpr
std::string createGlobalIdentifier()
void reportLumiSection(JobReport::Token token, unsigned int run, unsigned int lumiSectId, unsigned long nEvents=0)
Definition: JobReport.cc:458
void writeLuminosityBlock(edm::LuminosityBlockForOutput const &) override