CMS 3D CMS Logo

NanoAODOutputModule.cc
Go to the documentation of this file.
1 // -*- C++ -*-
2 //
3 // Package: PhysicsTools/NanoAODOutput
4 // Class : NanoAODOutputModule
5 //
6 // Implementation:
7 // [Notes on implementation]
8 //
9 // Original Author: Christopher Jones
10 // Created: Mon, 07 Aug 2017 14:21:41 GMT
11 //
12 
13 // system include files
14 #include <string>
15 #include "TFile.h"
16 #include "TTree.h"
17 #include "TROOT.h"
18 #include "TObjString.h"
19 #include "Compression.h"
20 
21 // user include files
43 
44 #include <iostream>
45 
47 public:
49  ~NanoAODOutputModule() override;
50 
51  static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
52 
53 private:
54  void write(edm::EventForOutput const& e) override;
56  void writeRun(edm::RunForOutput const&) override;
57  bool isFileOpen() const override;
58  void openFile(edm::FileBlock const&) override;
59  void reallyCloseFile() override;
60 
67  bool m_fakeName; //crab workaround, remove after crab is fixed
71  std::unique_ptr<TFile> m_file;
73 
74  static constexpr int m_firstFlush{1000};
75 
77  public:
78  void branch(TTree &tree) {
79  tree.Branch("run", & m_run, "run/i");
80  tree.Branch("luminosityBlock", & m_luminosityBlock, "luminosityBlock/i");
81  tree.Branch("event", & m_event, "event/l");
82  }
83  void fill(const edm::EventID & id) {
84  m_run = id.run(); m_luminosityBlock = id.luminosityBlock(); m_event = id.event();
85  }
86  private:
87  UInt_t m_run; UInt_t m_luminosityBlock; ULong64_t m_event;
89 
91  public:
92  void branch(TTree &tree) {
93  tree.Branch("run", & m_run, "run/i");
94  tree.Branch("luminosityBlock", & m_luminosityBlock, "luminosityBlock/i");
95  }
96  void fill(const edm::LuminosityBlockID & id) {
97  m_run = id.run();
98  m_luminosityBlock = id.value();
99  }
100  private:
101  UInt_t m_run; UInt_t m_luminosityBlock;
103 
105  public:
106  void branch(TTree &tree) {
107  tree.Branch("run", & m_run, "run/i");
108  }
109  void fill(const edm::RunID & id) {
110  m_run = id.run();
111  }
112  private:
113  UInt_t m_run;
115 
116 
117  std::vector<TableOutputBranches> m_tables;
118  std::vector<TriggerOutputBranches> m_triggers;
119  std::vector<EventStringOutputBranches> m_evstrings;
120 
121  std::vector<SummaryTableOutputBranches> m_runTables;
122 
123  std::vector<std::pair<std::string,edm::EDGetToken>> m_nanoMetadata;
124 
125 };
126 
127 
128 //
129 // constants, enums and typedefs
130 //
131 
132 //
133 // static data member definitions
134 //
135 
136 //
137 // constructors and destructor
138 //
141  edm::one::OutputModule<>(pset),
142  m_fileName(pset.getUntrackedParameter<std::string>("fileName")),
143  m_logicalFileName(pset.getUntrackedParameter<std::string>("logicalFileName")),
144  m_compressionLevel(pset.getUntrackedParameter<int>("compressionLevel")),
145  m_compressionAlgorithm(pset.getUntrackedParameter<std::string>("compressionAlgorithm")),
146  m_writeProvenance(pset.getUntrackedParameter<bool>("saveProvenance", true)),
147  m_fakeName(pset.getUntrackedParameter<bool>("fakeNameForCrab", false)),
148  m_autoFlush(pset.getUntrackedParameter<int>("autoFlush", -10000000)),
150 {
151 }
152 
154 {
155 }
156 
157 void
159  //Get data from 'e' and write it to the file
161  jr->eventWrittenToFile(m_jrToken, iEvent.id().run(), iEvent.id().event());
162 
163  if (m_autoFlush) {
164  int64_t events = m_tree->GetEntriesFast();
165  if (events == m_firstFlush) {
166  m_tree->FlushBaskets();
167  float maxMemory;
168  if (m_autoFlush > 0) {
169  // Estimate the memory we'll be using at the first full flush by
170  // linearly scaling the number of events.
171  float percentClusterDone = m_firstFlush / static_cast<float>(m_autoFlush);
172  maxMemory = static_cast<float>(m_tree->GetTotBytes()) / percentClusterDone;
173  } else if (m_tree->GetZipBytes() == 0) {
174  maxMemory = 100*1024*1024; // Degenerate case of no information in the tree; arbitrary value
175  } else {
176  // Estimate the memory we'll be using by scaling the current compression ratio.
177  float cxnRatio = m_tree->GetTotBytes() / static_cast<float>(m_tree->GetZipBytes());
178  maxMemory = -m_autoFlush * cxnRatio;
179  float percentBytesDone = -m_tree->GetZipBytes() / static_cast<float>(m_autoFlush);
180  m_autoFlush = m_firstFlush / percentBytesDone;
181  }
182  //std::cout << "OptimizeBaskets: total bytes " << m_tree->GetTotBytes() << std::endl;
183  //std::cout << "OptimizeBaskets: zip bytes " << m_tree->GetZipBytes() << std::endl;
184  //std::cout << "OptimizeBaskets: autoFlush " << m_autoFlush << std::endl;
185  //std::cout << "OptimizeBaskets: maxMemory " << static_cast<uint32_t>(maxMemory) << std::endl;
186  //m_tree->OptimizeBaskets(static_cast<uint32_t>(maxMemory), 1, "d");
187  m_tree->OptimizeBaskets(static_cast<uint32_t>(maxMemory), 1, "");
188  }
190  m_tree->FlushBaskets();
191  m_eventsSinceFlush = 0;
192  }
194  }
195 
196  m_commonBranches.fill(iEvent.id());
197  // fill all tables, starting from main tables and then doing extension tables
198  for (unsigned int extensions = 0; extensions <= 1; ++extensions) {
199  for (auto & t : m_tables) t.fill(iEvent,*m_tree,extensions);
200  }
201  // fill triggers
202  for (auto & t : m_triggers) t.fill(iEvent,*m_tree);
203  // fill event branches
204  for (auto & t : m_evstrings) t.fill(iEvent,*m_tree);
205  m_tree->Fill();
206 
208 }
209 
210 void
213  jr->reportLumiSection(m_jrToken, iLumi.id().run(), iLumi.id().value());
214 
215  m_commonLumiBranches.fill(iLumi.id());
216  m_lumiTree->Fill();
217 
219 }
220 
221 void
224  jr->reportRunNumber(m_jrToken, iRun.id().run());
225 
226  m_commonRunBranches.fill(iRun.id());
227 
228  for (auto & t : m_runTables) t.fill(iRun,*m_runTree);
229 
231  for (const auto & p : m_nanoMetadata) {
232  iRun.getByToken(p.second, hstring);
233  TObjString *tos = dynamic_cast<TObjString *>(m_file->Get(p.first.c_str()));
234  if (tos) {
235  if (hstring->str() != tos->GetString()) throw cms::Exception("LogicError", "Inconsistent nanoMetadata " + p.first + " (" + hstring->str() +")");
236  } else {
237  auto ostr = std::make_unique<TObjString>(hstring->str().c_str());
238  m_file->WriteTObject(ostr.release(), p.first.c_str());
239  }
240  }
241 
242  m_runTree->Fill();
243 
245 }
246 
247 bool
249  return nullptr != m_file.get();
250 }
251 
252 void
254  m_file = std::make_unique<TFile>(m_fileName.c_str(),"RECREATE","",m_compressionLevel);
256  cms::Digest branchHash;
257  m_jrToken = jr->outputFileOpened(m_fileName,
259  std::string(),
260  m_fakeName?"PoolOutputModule":"NanoAODOutputModule",
261  description().moduleLabel(),
263  std::string(),
264  branchHash.digest().toString(),
265  std::vector<std::string>()
266  );
267 
268  if (m_compressionAlgorithm == std::string("ZLIB")) {
269  m_file->SetCompressionAlgorithm(ROOT::kZLIB);
270  } else if (m_compressionAlgorithm == std::string("LZMA")) {
271  m_file->SetCompressionAlgorithm(ROOT::kLZMA);
272  } else {
273  throw cms::Exception("Configuration") << "NanoAODOutputModule configured with unknown compression algorithm '" << m_compressionAlgorithm << "'\n"
274  << "Allowed compression algorithms are ZLIB and LZMA\n";
275  }
276  /* Setup file structure here */
277  m_tables.clear();
278  m_triggers.clear();
279  m_evstrings.clear();
280  m_runTables.clear();
281  const auto & keeps = keptProducts();
282  for (const auto & keep : keeps[edm::InEvent]) {
283  if(keep.first->className() == "nanoaod::FlatTable" )
284  m_tables.emplace_back(keep.first, keep.second);
285  else if(keep.first->className() == "edm::TriggerResults" )
286  {
287  m_triggers.emplace_back(keep.first, keep.second);
288  }
289  else if(keep.first->className() == "std::basic_string<char,std::char_traits<char> >" && keep.first->productInstanceName()=="genModel") { // friendlyClassName == "String"
290  m_evstrings.emplace_back(keep.first, keep.second, true); // update only at lumiBlock transitions
291  }
292  else throw cms::Exception("Configuration", "NanoAODOutputModule cannot handle class " + keep.first->className());
293  }
294 
295  for (const auto & keep : keeps[edm::InRun]) {
296  if(keep.first->className() == "nanoaod::MergeableCounterTable" )
297  m_runTables.push_back(SummaryTableOutputBranches(keep.first, keep.second));
298  else if(keep.first->className() == "nanoaod::UniqueString" && keep.first->moduleLabel() == "nanoMetadata")
299  m_nanoMetadata.emplace_back(keep.first->productInstanceName(), keep.second);
300  else throw cms::Exception("Configuration", "NanoAODOutputModule cannot handle class " + keep.first->className() + " in Run branch");
301  }
302 
303 
304  // create the trees
305  m_tree.reset(new TTree("Events","Events"));
306  m_tree->SetAutoSave(0);
307  m_tree->SetAutoFlush(0);
309 
310  m_lumiTree.reset(new TTree("LuminosityBlocks","LuminosityBlocks"));
311  m_lumiTree->SetAutoSave(0);
313 
314  m_runTree.reset(new TTree("Runs","Runs"));
315  m_runTree->SetAutoSave(0);
317 
318  if (m_writeProvenance) {
319  m_metaDataTree.reset(new TTree(edm::poolNames::metaDataTreeName().c_str(),"Job metadata"));
320  m_metaDataTree->SetAutoSave(0);
321  m_parameterSetsTree.reset(new TTree(edm::poolNames::parameterSetsTreeName().c_str(),"Parameter sets"));
322  m_parameterSetsTree->SetAutoSave(0);
323  }
324 }
325 void
327  if (m_writeProvenance) {
328  int basketSize = 16384; // fixme configurable?
331  if (m_metaDataTree->GetNbranches() != 0) {
332  m_metaDataTree->SetEntries(-1);
333  }
334  if (m_parameterSetsTree->GetNbranches() != 0) {
335  m_parameterSetsTree->SetEntries(-1);
336  }
337  }
338  m_file->Write();
339  m_file->Close();
340  m_file.reset();
341  m_tree.release(); // apparently root has ownership
342  m_lumiTree.release(); //
343  m_runTree.release(); //
344  m_metaDataTree.release(); //
345  m_parameterSetsTree.release(); //
348 }
349 
350 void
353 
354  desc.addUntracked<std::string>("fileName");
355  desc.addUntracked<std::string>("logicalFileName","");
356 
357  desc.addUntracked<int>("compressionLevel", 9)
358  ->setComment("ROOT compression level of output file.");
359  desc.addUntracked<std::string>("compressionAlgorithm", "ZLIB")
360  ->setComment("Algorithm used to compress data in the ROOT output file, allowed values are ZLIB and LZMA");
361  desc.addUntracked<bool>("saveProvenance", true)
362  ->setComment("Save process provenance information, e.g. for edmProvDump");
363  desc.addUntracked<bool>("fakeNameForCrab", false)
364  ->setComment("Change the OutputModule name in the fwk job report to fake PoolOutputModule. This is needed to run on cran (and publish) till crab is fixed");
365  desc.addUntracked<int>("autoFlush", -10000000)
366  ->setComment("Autoflush parameter for ROOT file");
367 
368  //replace with whatever you want to get from the EDM by default
369  const std::vector<std::string> keep = {"drop *", "keep nanoaodFlatTable_*Table_*_*", "keep edmTriggerResults_*_*_*", "keep String_*_genModel_*", "keep nanoaodMergeableCounterTable_*Table_*_*", "keep nanoaodUniqueString_nanoMetadata_*_*"};
371 
372  //Used by Workflow management for their own meta data
374  dataSet.setAllowAnything();
375  desc.addUntracked<edm::ParameterSetDescription>("dataset", dataSet)
376  ->setComment("PSet is only used by Data Operations and not by this module.");
377 
379  branchSet.setAllowAnything();
380  desc.add<edm::ParameterSetDescription>("branches", branchSet);
381 
382 
383 
384  descriptions.addDefault(desc);
385 
386 }
387 
RunNumber_t run() const
Definition: EventID.h:39
std::unique_ptr< TTree > m_runTree
EventNumber_t event() const
Definition: EventID.h:41
virtual ProcessHistory const & processHistory() const
std::unique_ptr< TTree > m_lumiTree
EventID const & id() const
boost::uint64_t value() const
ModuleDescription const & description() const
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
BasicHandle getByToken(EDGetToken token, TypeID const &typeID) const
bool isFileOpen() const override
RunNumber_t run() const
Definition: RunID.h:39
std::unique_ptr< TTree > m_tree
void setAllowAnything()
allow any parameter label/value pairs
RunID const & id() const
Definition: RunForOutput.h:49
void fillParameterSetBranch(TTree *parameterSetsTree, int basketSize)
bool registerProcessHistory(ProcessHistory const &processHistory)
void fillProcessHistoryBranch(TTree *metaDataTree, int basketSize, ProcessHistoryRegistry const &processHistoryRegistry)
NanoAODOutputModule(edm::ParameterSet const &pset)
void fill(const edm::LuminosityBlockID &id)
MD5Result digest() const
Definition: Digest.cc:171
void reportRunNumber(JobReport::Token token, unsigned int run)
Definition: JobReport.cc:469
std::vector< std::pair< std::string, edm::EDGetToken > > m_nanoMetadata
std::string const & parameterSetsTreeName()
Definition: BranchType.cc:257
std::unique_ptr< TTree > m_parameterSetsTree
void writeRun(edm::RunForOutput const &) override
class NanoAODOutputModule::CommonLumiBranches m_commonLumiBranches
std::unique_ptr< TTree > m_metaDataTree
const int keep
std::unique_ptr< TFile > m_file
int iEvent
Definition: GenABIO.cc:224
#define DEFINE_FWK_MODULE(type)
Definition: MakerMacros.h:16
void addDefault(ParameterSetDescription const &psetDescription)
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
std::vector< EventStringOutputBranches > m_evstrings
static void fillDescription(ParameterSetDescription &desc, std::vector< std::string > const &iDefaultOutputCommands=ProductSelectorRules::defaultSelectionStrings())
std::vector< SummaryTableOutputBranches > m_runTables
std::vector< TableOutputBranches > m_tables
std::vector< TriggerOutputBranches > m_triggers
RunNumber_t run() const
SelectedProductsForBranchType const & keptProducts() const
std::string const & metaDataTreeName()
Definition: BranchType.cc:169
ParameterDescriptionBase * add(U const &iLabel, T const &value)
std::size_t Token
Definition: JobReport.h:106
void openFile(edm::FileBlock const &) override
void reallyCloseFile() override
void eventWrittenToFile(Token fileToken, RunNumber_t run, EventNumber_t event)
Definition: JobReport.cc:428
std::string toString() const
Definition: Digest.cc:95
HLT enums.
void outputFileClosed(Token fileToken)
Definition: JobReport.cc:433
void write(edm::EventForOutput const &e) override
class NanoAODOutputModule::CommonRunBranches m_commonRunBranches
edm::JobReport::Token m_jrToken
class NanoAODOutputModule::CommonEventBranches m_commonBranches
const std::string & str() const
Definition: UniqueString.h:12
Definition: tree.py:1
edm::ProcessHistoryRegistry m_processHistoryRegistry
LuminosityBlockID const & id() const
OutputModule(edm::ParameterSet const &iPSet)
Definition: OutputModule.h:32
std::string m_compressionAlgorithm
#define constexpr
std::string createGlobalIdentifier()
void reportLumiSection(JobReport::Token token, unsigned int run, unsigned int lumiSectId, unsigned long nEvents=0)
Definition: JobReport.cc:458
void writeLuminosityBlock(edm::LuminosityBlockForOutput const &) override