CMS 3D CMS Logo

NanoAODOutputModule.cc
Go to the documentation of this file.
1 // -*- C++ -*-
2 //
3 // Package: PhysicsTools/NanoAODOutput
4 // Class : NanoAODOutputModule
5 //
6 // Implementation:
7 // [Notes on implementation]
8 //
9 // Original Author: Christopher Jones
10 // Created: Mon, 07 Aug 2017 14:21:41 GMT
11 //
12 
13 // system include files
14 #include <algorithm>
15 #include <string>
16 #include "TFile.h"
17 #include "TTree.h"
18 #include "TROOT.h"
19 #include "TObjString.h"
20 #include "Compression.h"
21 
22 // user include files
44 
45 #include <iostream>
46 
48 public:
50  ~NanoAODOutputModule() override;
51 
52  static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
53 
54 private:
55  void write(edm::EventForOutput const& e) override;
57  void writeRun(edm::RunForOutput const&) override;
58  bool isFileOpen() const override;
59  void openFile(edm::FileBlock const&) override;
60  void reallyCloseFile() override;
61 
68  bool m_fakeName; //crab workaround, remove after crab is fixed
72  std::unique_ptr<TFile> m_file;
74 
75  static constexpr int m_firstFlush{1000};
76 
78  public:
79  void branch(TTree &tree) {
80  tree.Branch("run", & m_run, "run/i");
81  tree.Branch("luminosityBlock", & m_luminosityBlock, "luminosityBlock/i");
82  tree.Branch("event", & m_event, "event/l");
83  }
84  void fill(const edm::EventID & id) {
85  m_run = id.run(); m_luminosityBlock = id.luminosityBlock(); m_event = id.event();
86  }
87  private:
88  UInt_t m_run; UInt_t m_luminosityBlock; ULong64_t m_event;
90 
92  public:
93  void branch(TTree &tree) {
94  tree.Branch("run", & m_run, "run/i");
95  tree.Branch("luminosityBlock", & m_luminosityBlock, "luminosityBlock/i");
96  }
97  void fill(const edm::LuminosityBlockID & id) {
98  m_run = id.run();
99  m_luminosityBlock = id.value();
100  }
101  private:
102  UInt_t m_run; UInt_t m_luminosityBlock;
104 
106  public:
107  void branch(TTree &tree) {
108  tree.Branch("run", & m_run, "run/i");
109  }
110  void fill(const edm::RunID & id) {
111  m_run = id.run();
112  }
113  private:
114  UInt_t m_run;
116 
117 
118  std::vector<TableOutputBranches> m_tables;
119  std::vector<TriggerOutputBranches> m_triggers;
120  bool m_triggers_areSorted = false;
121  std::vector<EventStringOutputBranches> m_evstrings;
122 
123  std::vector<SummaryTableOutputBranches> m_runTables;
124 
125  std::vector<std::pair<std::string,edm::EDGetToken>> m_nanoMetadata;
126 
127 };
128 
129 
130 //
131 // constants, enums and typedefs
132 //
133 
134 //
135 // static data member definitions
136 //
137 
138 //
139 // constructors and destructor
140 //
143  edm::one::OutputModule<>(pset),
144  m_fileName(pset.getUntrackedParameter<std::string>("fileName")),
145  m_logicalFileName(pset.getUntrackedParameter<std::string>("logicalFileName")),
146  m_compressionLevel(pset.getUntrackedParameter<int>("compressionLevel")),
147  m_compressionAlgorithm(pset.getUntrackedParameter<std::string>("compressionAlgorithm")),
148  m_writeProvenance(pset.getUntrackedParameter<bool>("saveProvenance", true)),
149  m_fakeName(pset.getUntrackedParameter<bool>("fakeNameForCrab", false)),
150  m_autoFlush(pset.getUntrackedParameter<int>("autoFlush", -10000000)),
152 {
153 }
154 
156 {
157 }
158 
159 void
161  //Get data from 'e' and write it to the file
163  jr->eventWrittenToFile(m_jrToken, iEvent.id().run(), iEvent.id().event());
164 
165  if (m_autoFlush) {
166  int64_t events = m_tree->GetEntriesFast();
167  if (events == m_firstFlush) {
168  m_tree->FlushBaskets();
169  float maxMemory;
170  if (m_autoFlush > 0) {
171  // Estimate the memory we'll be using at the first full flush by
172  // linearly scaling the number of events.
173  float percentClusterDone = m_firstFlush / static_cast<float>(m_autoFlush);
174  maxMemory = static_cast<float>(m_tree->GetTotBytes()) / percentClusterDone;
175  } else if (m_tree->GetZipBytes() == 0) {
176  maxMemory = 100*1024*1024; // Degenerate case of no information in the tree; arbitrary value
177  } else {
178  // Estimate the memory we'll be using by scaling the current compression ratio.
179  float cxnRatio = m_tree->GetTotBytes() / static_cast<float>(m_tree->GetZipBytes());
180  maxMemory = -m_autoFlush * cxnRatio;
181  float percentBytesDone = -m_tree->GetZipBytes() / static_cast<float>(m_autoFlush);
182  m_autoFlush = m_firstFlush / percentBytesDone;
183  }
184  //std::cout << "OptimizeBaskets: total bytes " << m_tree->GetTotBytes() << std::endl;
185  //std::cout << "OptimizeBaskets: zip bytes " << m_tree->GetZipBytes() << std::endl;
186  //std::cout << "OptimizeBaskets: autoFlush " << m_autoFlush << std::endl;
187  //std::cout << "OptimizeBaskets: maxMemory " << static_cast<uint32_t>(maxMemory) << std::endl;
188  //m_tree->OptimizeBaskets(static_cast<uint32_t>(maxMemory), 1, "d");
189  m_tree->OptimizeBaskets(static_cast<uint32_t>(maxMemory), 1, "");
190  }
192  m_tree->FlushBaskets();
193  m_eventsSinceFlush = 0;
194  }
196  }
197 
198  m_commonBranches.fill(iEvent.id());
199  // fill all tables, starting from main tables and then doing extension tables
200  for (unsigned int extensions = 0; extensions <= 1; ++extensions) {
201  for (auto & t : m_tables) t.fill(iEvent,*m_tree,extensions);
202  }
203  if (!m_triggers_areSorted) { // sort triggers/flags in inverse processHistory order, to save without any special label the most recent ones
204  std::vector<std::string> pnames;
205  for (auto& p : iEvent.processHistory())
206  pnames.push_back(p.processName());
207  std::sort(m_triggers.begin(), m_triggers.end(), [pnames](TriggerOutputBranches& a, TriggerOutputBranches& b) {
208  return ((std::find(pnames.begin(), pnames.end(), a.processName()) - pnames.begin()) >
209  (std::find(pnames.begin(), pnames.end(), b.processName()) - pnames.begin()));
210  });
211  m_triggers_areSorted = true;
212  }
213  // fill triggers
214  for (auto & t : m_triggers) t.fill(iEvent,*m_tree);
215  // fill event branches
216  for (auto & t : m_evstrings) t.fill(iEvent,*m_tree);
217  m_tree->Fill();
218 
220 }
221 
222 void
225  jr->reportLumiSection(m_jrToken, iLumi.id().run(), iLumi.id().value());
226 
227  m_commonLumiBranches.fill(iLumi.id());
228  m_lumiTree->Fill();
229 
231 }
232 
233 void
236  jr->reportRunNumber(m_jrToken, iRun.id().run());
237 
238  m_commonRunBranches.fill(iRun.id());
239 
240  for (auto & t : m_runTables) t.fill(iRun,*m_runTree);
241 
243  for (const auto & p : m_nanoMetadata) {
244  iRun.getByToken(p.second, hstring);
245  TObjString *tos = dynamic_cast<TObjString *>(m_file->Get(p.first.c_str()));
246  if (tos) {
247  if (hstring->str() != tos->GetString()) throw cms::Exception("LogicError", "Inconsistent nanoMetadata " + p.first + " (" + hstring->str() +")");
248  } else {
249  auto ostr = std::make_unique<TObjString>(hstring->str().c_str());
250  m_file->WriteTObject(ostr.release(), p.first.c_str());
251  }
252  }
253 
254  m_runTree->Fill();
255 
257 }
258 
259 bool
261  return nullptr != m_file.get();
262 }
263 
264 void
266  m_file = std::make_unique<TFile>(m_fileName.c_str(),"RECREATE","",m_compressionLevel);
268  cms::Digest branchHash;
269  m_jrToken = jr->outputFileOpened(m_fileName,
271  std::string(),
272  m_fakeName?"PoolOutputModule":"NanoAODOutputModule",
273  description().moduleLabel(),
275  std::string(),
276  branchHash.digest().toString(),
277  std::vector<std::string>()
278  );
279 
280  if (m_compressionAlgorithm == std::string("ZLIB")) {
281  m_file->SetCompressionAlgorithm(ROOT::kZLIB);
282  } else if (m_compressionAlgorithm == std::string("LZMA")) {
283  m_file->SetCompressionAlgorithm(ROOT::kLZMA);
284  } else {
285  throw cms::Exception("Configuration") << "NanoAODOutputModule configured with unknown compression algorithm '" << m_compressionAlgorithm << "'\n"
286  << "Allowed compression algorithms are ZLIB and LZMA\n";
287  }
288  /* Setup file structure here */
289  m_tables.clear();
290  m_triggers.clear();
291  m_triggers_areSorted = false;
292  m_evstrings.clear();
293  m_runTables.clear();
294  const auto & keeps = keptProducts();
295  for (const auto & keep : keeps[edm::InEvent]) {
296  if(keep.first->className() == "nanoaod::FlatTable" )
297  m_tables.emplace_back(keep.first, keep.second);
298  else if(keep.first->className() == "edm::TriggerResults" )
299  {
300  m_triggers.emplace_back(keep.first, keep.second);
301  }
302  else if(keep.first->className() == "std::basic_string<char,std::char_traits<char> >" && keep.first->productInstanceName()=="genModel") { // friendlyClassName == "String"
303  m_evstrings.emplace_back(keep.first, keep.second, true); // update only at lumiBlock transitions
304  }
305  else throw cms::Exception("Configuration", "NanoAODOutputModule cannot handle class " + keep.first->className());
306  }
307 
308  for (const auto & keep : keeps[edm::InRun]) {
309  if(keep.first->className() == "nanoaod::MergeableCounterTable" )
310  m_runTables.push_back(SummaryTableOutputBranches(keep.first, keep.second));
311  else if(keep.first->className() == "nanoaod::UniqueString" && keep.first->moduleLabel() == "nanoMetadata")
312  m_nanoMetadata.emplace_back(keep.first->productInstanceName(), keep.second);
313  else throw cms::Exception("Configuration", "NanoAODOutputModule cannot handle class " + keep.first->className() + " in Run branch");
314  }
315 
316 
317  // create the trees
318  m_tree.reset(new TTree("Events","Events"));
319  m_tree->SetAutoSave(0);
320  m_tree->SetAutoFlush(0);
322 
323  m_lumiTree.reset(new TTree("LuminosityBlocks","LuminosityBlocks"));
324  m_lumiTree->SetAutoSave(0);
326 
327  m_runTree.reset(new TTree("Runs","Runs"));
328  m_runTree->SetAutoSave(0);
330 
331  if (m_writeProvenance) {
332  m_metaDataTree.reset(new TTree(edm::poolNames::metaDataTreeName().c_str(),"Job metadata"));
333  m_metaDataTree->SetAutoSave(0);
334  m_parameterSetsTree.reset(new TTree(edm::poolNames::parameterSetsTreeName().c_str(),"Parameter sets"));
335  m_parameterSetsTree->SetAutoSave(0);
336  }
337 }
338 void
340  if (m_writeProvenance) {
341  int basketSize = 16384; // fixme configurable?
344  if (m_metaDataTree->GetNbranches() != 0) {
345  m_metaDataTree->SetEntries(-1);
346  }
347  if (m_parameterSetsTree->GetNbranches() != 0) {
348  m_parameterSetsTree->SetEntries(-1);
349  }
350  }
351  m_file->Write();
352  m_file->Close();
353  m_file.reset();
354  m_tree.release(); // apparently root has ownership
355  m_lumiTree.release(); //
356  m_runTree.release(); //
357  m_metaDataTree.release(); //
358  m_parameterSetsTree.release(); //
361 }
362 
363 void
366 
367  desc.addUntracked<std::string>("fileName");
368  desc.addUntracked<std::string>("logicalFileName","");
369 
370  desc.addUntracked<int>("compressionLevel", 9)
371  ->setComment("ROOT compression level of output file.");
372  desc.addUntracked<std::string>("compressionAlgorithm", "ZLIB")
373  ->setComment("Algorithm used to compress data in the ROOT output file, allowed values are ZLIB and LZMA");
374  desc.addUntracked<bool>("saveProvenance", true)
375  ->setComment("Save process provenance information, e.g. for edmProvDump");
376  desc.addUntracked<bool>("fakeNameForCrab", false)
377  ->setComment("Change the OutputModule name in the fwk job report to fake PoolOutputModule. This is needed to run on cran (and publish) till crab is fixed");
378  desc.addUntracked<int>("autoFlush", -10000000)
379  ->setComment("Autoflush parameter for ROOT file");
380 
381  //replace with whatever you want to get from the EDM by default
382  const std::vector<std::string> keep = {"drop *", "keep nanoaodFlatTable_*Table_*_*", "keep edmTriggerResults_*_*_*", "keep String_*_genModel_*", "keep nanoaodMergeableCounterTable_*Table_*_*", "keep nanoaodUniqueString_nanoMetadata_*_*"};
384 
385  //Used by Workflow management for their own meta data
387  dataSet.setAllowAnything();
388  desc.addUntracked<edm::ParameterSetDescription>("dataset", dataSet)
389  ->setComment("PSet is only used by Data Operations and not by this module.");
390 
392  branchSet.setAllowAnything();
393  desc.add<edm::ParameterSetDescription>("branches", branchSet);
394 
395 
396 
397  descriptions.addDefault(desc);
398 
399 }
400 
RunNumber_t run() const
Definition: EventID.h:39
std::unique_ptr< TTree > m_runTree
EventNumber_t event() const
Definition: EventID.h:41
virtual ProcessHistory const & processHistory() const
std::unique_ptr< TTree > m_lumiTree
EventID const & id() const
boost::uint64_t value() const
ModuleDescription const & description() const
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
BasicHandle getByToken(EDGetToken token, TypeID const &typeID) const
bool isFileOpen() const override
RunNumber_t run() const
Definition: RunID.h:39
std::unique_ptr< TTree > m_tree
void setAllowAnything()
allow any parameter label/value pairs
RunID const & id() const
Definition: RunForOutput.h:49
void fillParameterSetBranch(TTree *parameterSetsTree, int basketSize)
bool registerProcessHistory(ProcessHistory const &processHistory)
void fillProcessHistoryBranch(TTree *metaDataTree, int basketSize, ProcessHistoryRegistry const &processHistoryRegistry)
NanoAODOutputModule(edm::ParameterSet const &pset)
void fill(const edm::LuminosityBlockID &id)
MD5Result digest() const
Definition: Digest.cc:171
void reportRunNumber(JobReport::Token token, unsigned int run)
Definition: JobReport.cc:469
std::vector< std::pair< std::string, edm::EDGetToken > > m_nanoMetadata
void find(edm::Handle< EcalRecHitCollection > &hits, DetId thisDet, std::vector< EcalRecHitCollection::const_iterator > &hit, bool debug=false)
Definition: FindCaloHit.cc:20
std::string const & parameterSetsTreeName()
Definition: BranchType.cc:257
std::unique_ptr< TTree > m_parameterSetsTree
void writeRun(edm::RunForOutput const &) override
class NanoAODOutputModule::CommonLumiBranches m_commonLumiBranches
std::unique_ptr< TTree > m_metaDataTree
const int keep
std::unique_ptr< TFile > m_file
int iEvent
Definition: GenABIO.cc:224
#define DEFINE_FWK_MODULE(type)
Definition: MakerMacros.h:16
void addDefault(ParameterSetDescription const &psetDescription)
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
std::vector< EventStringOutputBranches > m_evstrings
static void fillDescription(ParameterSetDescription &desc, std::vector< std::string > const &iDefaultOutputCommands=ProductSelectorRules::defaultSelectionStrings())
std::vector< SummaryTableOutputBranches > m_runTables
std::vector< TableOutputBranches > m_tables
std::vector< TriggerOutputBranches > m_triggers
RunNumber_t run() const
SelectedProductsForBranchType const & keptProducts() const
std::string const & metaDataTreeName()
Definition: BranchType.cc:169
ParameterDescriptionBase * add(U const &iLabel, T const &value)
std::size_t Token
Definition: JobReport.h:106
void openFile(edm::FileBlock const &) override
void reallyCloseFile() override
void eventWrittenToFile(Token fileToken, RunNumber_t run, EventNumber_t event)
Definition: JobReport.cc:428
std::string toString() const
Definition: Digest.cc:95
double b
Definition: hdecay.h:120
HLT enums.
double a
Definition: hdecay.h:121
void outputFileClosed(Token fileToken)
Definition: JobReport.cc:433
void write(edm::EventForOutput const &e) override
class NanoAODOutputModule::CommonRunBranches m_commonRunBranches
edm::JobReport::Token m_jrToken
class NanoAODOutputModule::CommonEventBranches m_commonBranches
const std::string & str() const
Definition: UniqueString.h:12
Definition: tree.py:1
edm::ProcessHistoryRegistry m_processHistoryRegistry
LuminosityBlockID const & id() const
OutputModule(edm::ParameterSet const &iPSet)
Definition: OutputModule.h:32
std::string m_compressionAlgorithm
#define constexpr
std::string createGlobalIdentifier()
void reportLumiSection(JobReport::Token token, unsigned int run, unsigned int lumiSectId, unsigned long nEvents=0)
Definition: JobReport.cc:458
void writeLuminosityBlock(edm::LuminosityBlockForOutput const &) override