CMS 3D CMS Logo

NanoAODOutputModule.cc
Go to the documentation of this file.
1 // -*- C++ -*-
2 //
3 // Package: PhysicsTools/NanoAODOutput
4 // Class : NanoAODOutputModule
5 //
6 // Implementation:
7 // [Notes on implementation]
8 //
9 // Original Author: Christopher Jones
10 // Created: Mon, 07 Aug 2017 14:21:41 GMT
11 //
12 
13 // system include files
14 #include <algorithm>
15 #include <string>
16 #include "TFile.h"
17 #include "TTree.h"
18 #include "TROOT.h"
19 #include "TObjString.h"
20 #include "Compression.h"
21 
22 // user include files
44 
45 #include <iostream>
46 
48 public:
50  ~NanoAODOutputModule() override;
51 
52  static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
53 
54 private:
55  void write(edm::EventForOutput const& e) override;
57  void writeRun(edm::RunForOutput const&) override;
58  bool isFileOpen() const override;
59  void openFile(edm::FileBlock const&) override;
60  void reallyCloseFile() override;
61 
68  bool m_fakeName; //crab workaround, remove after crab is fixed
72  std::unique_ptr<TFile> m_file;
74 
75  static constexpr int m_firstFlush{1000};
76 
78  public:
79  void branch(TTree &tree) {
80  tree.Branch("run", & m_run, "run/i");
81  tree.Branch("luminosityBlock", & m_luminosityBlock, "luminosityBlock/i");
82  tree.Branch("event", & m_event, "event/l");
83  }
84  void fill(const edm::EventID & id) {
85  m_run = id.run(); m_luminosityBlock = id.luminosityBlock(); m_event = id.event();
86  }
87  private:
88  UInt_t m_run; UInt_t m_luminosityBlock; ULong64_t m_event;
90 
92  public:
93  void branch(TTree &tree) {
94  tree.Branch("run", & m_run, "run/i");
95  tree.Branch("luminosityBlock", & m_luminosityBlock, "luminosityBlock/i");
96  }
97  void fill(const edm::LuminosityBlockID & id) {
98  m_run = id.run();
99  m_luminosityBlock = id.value();
100  }
101  private:
102  UInt_t m_run; UInt_t m_luminosityBlock;
104 
106  public:
107  void branch(TTree &tree) {
108  tree.Branch("run", & m_run, "run/i");
109  }
110  void fill(const edm::RunID & id) {
111  m_run = id.run();
112  }
113  private:
114  UInt_t m_run;
116 
117 
118  std::vector<TableOutputBranches> m_tables;
119  std::vector<TriggerOutputBranches> m_triggers;
120  bool m_triggers_areSorted = false;
121  std::vector<EventStringOutputBranches> m_evstrings;
122 
123  std::vector<SummaryTableOutputBranches> m_runTables;
124  std::vector<SummaryTableOutputBranches> m_lumiTables;
125 
126  std::vector<std::pair<std::string,edm::EDGetToken>> m_nanoMetadata;
127 
128 };
129 
130 
131 //
132 // constants, enums and typedefs
133 //
134 
135 //
136 // static data member definitions
137 //
138 
139 //
140 // constructors and destructor
141 //
144  edm::one::OutputModule<>(pset),
145  m_fileName(pset.getUntrackedParameter<std::string>("fileName")),
146  m_logicalFileName(pset.getUntrackedParameter<std::string>("logicalFileName")),
147  m_compressionLevel(pset.getUntrackedParameter<int>("compressionLevel")),
148  m_compressionAlgorithm(pset.getUntrackedParameter<std::string>("compressionAlgorithm")),
149  m_writeProvenance(pset.getUntrackedParameter<bool>("saveProvenance", true)),
150  m_fakeName(pset.getUntrackedParameter<bool>("fakeNameForCrab", false)),
151  m_autoFlush(pset.getUntrackedParameter<int>("autoFlush", -10000000)),
153 {
154 }
155 
157 {
158 }
159 
160 void
162  //Get data from 'e' and write it to the file
164  jr->eventWrittenToFile(m_jrToken, iEvent.id().run(), iEvent.id().event());
165 
166  if (m_autoFlush) {
167  int64_t events = m_tree->GetEntriesFast();
168  if (events == m_firstFlush) {
169  m_tree->FlushBaskets();
170  float maxMemory;
171  if (m_autoFlush > 0) {
172  // Estimate the memory we'll be using at the first full flush by
173  // linearly scaling the number of events.
174  float percentClusterDone = m_firstFlush / static_cast<float>(m_autoFlush);
175  maxMemory = static_cast<float>(m_tree->GetTotBytes()) / percentClusterDone;
176  } else if (m_tree->GetZipBytes() == 0) {
177  maxMemory = 100*1024*1024; // Degenerate case of no information in the tree; arbitrary value
178  } else {
179  // Estimate the memory we'll be using by scaling the current compression ratio.
180  float cxnRatio = m_tree->GetTotBytes() / static_cast<float>(m_tree->GetZipBytes());
181  maxMemory = -m_autoFlush * cxnRatio;
182  float percentBytesDone = -m_tree->GetZipBytes() / static_cast<float>(m_autoFlush);
183  m_autoFlush = m_firstFlush / percentBytesDone;
184  }
185  //std::cout << "OptimizeBaskets: total bytes " << m_tree->GetTotBytes() << std::endl;
186  //std::cout << "OptimizeBaskets: zip bytes " << m_tree->GetZipBytes() << std::endl;
187  //std::cout << "OptimizeBaskets: autoFlush " << m_autoFlush << std::endl;
188  //std::cout << "OptimizeBaskets: maxMemory " << static_cast<uint32_t>(maxMemory) << std::endl;
189  //m_tree->OptimizeBaskets(static_cast<uint32_t>(maxMemory), 1, "d");
190  m_tree->OptimizeBaskets(static_cast<uint32_t>(maxMemory), 1, "");
191  }
193  m_tree->FlushBaskets();
194  m_eventsSinceFlush = 0;
195  }
197  }
198 
199  m_commonBranches.fill(iEvent.id());
200  // fill all tables, starting from main tables and then doing extension tables
201  for (unsigned int extensions = 0; extensions <= 1; ++extensions) {
202  for (auto & t : m_tables) t.fill(iEvent,*m_tree,extensions);
203  }
204  if (!m_triggers_areSorted) { // sort triggers/flags in inverse processHistory order, to save without any special label the most recent ones
205  std::vector<std::string> pnames;
206  for (auto& p : iEvent.processHistory())
207  pnames.push_back(p.processName());
209  return ((std::find(pnames.begin(), pnames.end(), a.processName()) - pnames.begin()) >
210  (std::find(pnames.begin(), pnames.end(), b.processName()) - pnames.begin()));
211  });
212  m_triggers_areSorted = true;
213  }
214  // fill triggers
215  for (auto & t : m_triggers) t.fill(iEvent,*m_tree);
216  // fill event branches
217  for (auto & t : m_evstrings) t.fill(iEvent,*m_tree);
218  m_tree->Fill();
219 
221 }
222 
223 void
226  jr->reportLumiSection(m_jrToken, iLumi.id().run(), iLumi.id().value());
227 
228  m_commonLumiBranches.fill(iLumi.id());
229  for (auto & t : m_lumiTables) t.fill(iLumi,*m_lumiTree);
230 
231  m_lumiTree->Fill();
232 
234 }
235 
236 void
239  jr->reportRunNumber(m_jrToken, iRun.id().run());
240 
241  m_commonRunBranches.fill(iRun.id());
242 
243  for (auto & t : m_runTables) t.fill(iRun,*m_runTree);
244 
246  for (const auto & p : m_nanoMetadata) {
247  iRun.getByToken(p.second, hstring);
248  TObjString *tos = dynamic_cast<TObjString *>(m_file->Get(p.first.c_str()));
249  if (tos) {
250  if (hstring->str() != tos->GetString()) throw cms::Exception("LogicError", "Inconsistent nanoMetadata " + p.first + " (" + hstring->str() +")");
251  } else {
252  auto ostr = std::make_unique<TObjString>(hstring->str().c_str());
253  m_file->WriteTObject(ostr.release(), p.first.c_str());
254  }
255  }
256 
257  m_runTree->Fill();
258 
260 }
261 
262 bool
264  return nullptr != m_file.get();
265 }
266 
267 void
269  m_file = std::make_unique<TFile>(m_fileName.c_str(),"RECREATE","",m_compressionLevel);
271  cms::Digest branchHash;
272  m_jrToken = jr->outputFileOpened(m_fileName,
274  std::string(),
275  m_fakeName?"PoolOutputModule":"NanoAODOutputModule",
276  description().moduleLabel(),
278  std::string(),
279  branchHash.digest().toString(),
280  std::vector<std::string>()
281  );
282 
283  if (m_compressionAlgorithm == std::string("ZLIB")) {
284  m_file->SetCompressionAlgorithm(ROOT::kZLIB);
285  } else if (m_compressionAlgorithm == std::string("LZMA")) {
286  m_file->SetCompressionAlgorithm(ROOT::kLZMA);
287  } else {
288  throw cms::Exception("Configuration") << "NanoAODOutputModule configured with unknown compression algorithm '" << m_compressionAlgorithm << "'\n"
289  << "Allowed compression algorithms are ZLIB and LZMA\n";
290  }
291  /* Setup file structure here */
292  m_tables.clear();
293  m_triggers.clear();
294  m_triggers_areSorted = false;
295  m_evstrings.clear();
296  m_runTables.clear();
297  m_lumiTables.clear();
298  const auto & keeps = keptProducts();
299  for (const auto & keep : keeps[edm::InEvent]) {
300  if(keep.first->className() == "nanoaod::FlatTable" )
301  m_tables.emplace_back(keep.first, keep.second);
302  else if(keep.first->className() == "edm::TriggerResults" )
303  {
304  m_triggers.emplace_back(keep.first, keep.second);
305  }
306  else if(keep.first->className() == "std::basic_string<char,std::char_traits<char> >" && keep.first->productInstanceName()=="genModel") { // friendlyClassName == "String"
307  m_evstrings.emplace_back(keep.first, keep.second, true); // update only at lumiBlock transitions
308  }
309  else throw cms::Exception("Configuration", "NanoAODOutputModule cannot handle class " + keep.first->className());
310  }
311 
312  for (const auto & keep : keeps[edm::InRun]) {
313  if(keep.first->className() == "nanoaod::MergeableCounterTable" )
314  m_runTables.push_back(SummaryTableOutputBranches(keep.first, keep.second));
315  else if(keep.first->className() == "nanoaod::UniqueString" && keep.first->moduleLabel() == "nanoMetadata")
316  m_nanoMetadata.emplace_back(keep.first->productInstanceName(), keep.second);
317  else throw cms::Exception("Configuration", "NanoAODOutputModule cannot handle class " + keep.first->className() + " in Run branch");
318  }
319 
320  for (const auto& keep : keeps[edm::InLumi]) {
321  if (keep.first->className() == "nanoaod::MergeableCounterTable")
322  m_lumiTables.push_back(SummaryTableOutputBranches(keep.first, keep.second));
323  else if (keep.first->className() == "nanoaod::UniqueString" && keep.first->moduleLabel() == "nanoMetadata")
324  m_nanoMetadata.emplace_back(keep.first->productInstanceName(), keep.second);
325  else throw cms::Exception("Configuration", "NanoAODOutputModule cannot handle class " + keep.first->className() + " in LuminosityBlock branch");
326  }
327 
328  // create the trees
329  m_tree.reset(new TTree("Events","Events"));
330  m_tree->SetAutoSave(0);
331  m_tree->SetAutoFlush(0);
333 
334  m_lumiTree.reset(new TTree("LuminosityBlocks","LuminosityBlocks"));
335  m_lumiTree->SetAutoSave(0);
337 
338  m_runTree.reset(new TTree("Runs","Runs"));
339  m_runTree->SetAutoSave(0);
341 
342  if (m_writeProvenance) {
343  m_metaDataTree.reset(new TTree(edm::poolNames::metaDataTreeName().c_str(),"Job metadata"));
344  m_metaDataTree->SetAutoSave(0);
345  m_parameterSetsTree.reset(new TTree(edm::poolNames::parameterSetsTreeName().c_str(),"Parameter sets"));
346  m_parameterSetsTree->SetAutoSave(0);
347  }
348 }
349 void
351  if (m_writeProvenance) {
352  int basketSize = 16384; // fixme configurable?
355  if (m_metaDataTree->GetNbranches() != 0) {
356  m_metaDataTree->SetEntries(-1);
357  }
358  if (m_parameterSetsTree->GetNbranches() != 0) {
359  m_parameterSetsTree->SetEntries(-1);
360  }
361  }
362  m_file->Write();
363  m_file->Close();
364  m_file.reset();
365  m_tree.release(); // apparently root has ownership
366  m_lumiTree.release(); //
367  m_runTree.release(); //
368  m_metaDataTree.release(); //
369  m_parameterSetsTree.release(); //
372 }
373 
374 void
377 
378  desc.addUntracked<std::string>("fileName");
379  desc.addUntracked<std::string>("logicalFileName","");
380 
381  desc.addUntracked<int>("compressionLevel", 9)
382  ->setComment("ROOT compression level of output file.");
383  desc.addUntracked<std::string>("compressionAlgorithm", "ZLIB")
384  ->setComment("Algorithm used to compress data in the ROOT output file, allowed values are ZLIB and LZMA");
385  desc.addUntracked<bool>("saveProvenance", true)
386  ->setComment("Save process provenance information, e.g. for edmProvDump");
387  desc.addUntracked<bool>("fakeNameForCrab", false)
388  ->setComment("Change the OutputModule name in the fwk job report to fake PoolOutputModule. This is needed to run on cran (and publish) till crab is fixed");
389  desc.addUntracked<int>("autoFlush", -10000000)
390  ->setComment("Autoflush parameter for ROOT file");
391 
392  //replace with whatever you want to get from the EDM by default
393  const std::vector<std::string> keep = {"drop *", "keep nanoaodFlatTable_*Table_*_*", "keep edmTriggerResults_*_*_*", "keep String_*_genModel_*", "keep nanoaodMergeableCounterTable_*Table_*_*", "keep nanoaodUniqueString_nanoMetadata_*_*"};
395 
396  //Used by Workflow management for their own meta data
398  dataSet.setAllowAnything();
399  desc.addUntracked<edm::ParameterSetDescription>("dataset", dataSet)
400  ->setComment("PSet is only used by Data Operations and not by this module.");
401 
403  branchSet.setAllowAnything();
404  desc.add<edm::ParameterSetDescription>("branches", branchSet);
405 
406 
407 
408  descriptions.addDefault(desc);
409 
410 }
411 
RunNumber_t run() const
Definition: EventID.h:39
std::unique_ptr< TTree > m_runTree
EventNumber_t event() const
Definition: EventID.h:41
virtual ProcessHistory const & processHistory() const
std::unique_ptr< TTree > m_lumiTree
EventID const & id() const
boost::uint64_t value() const
ModuleDescription const & description() const
std::vector< SummaryTableOutputBranches > m_lumiTables
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
BasicHandle getByToken(EDGetToken token, TypeID const &typeID) const
bool isFileOpen() const override
RunNumber_t run() const
Definition: RunID.h:39
std::unique_ptr< TTree > m_tree
void setAllowAnything()
allow any parameter label/value pairs
RunID const & id() const
Definition: RunForOutput.h:49
void fillParameterSetBranch(TTree *parameterSetsTree, int basketSize)
bool registerProcessHistory(ProcessHistory const &processHistory)
void fillProcessHistoryBranch(TTree *metaDataTree, int basketSize, ProcessHistoryRegistry const &processHistoryRegistry)
NanoAODOutputModule(edm::ParameterSet const &pset)
void fill(const edm::LuminosityBlockID &id)
MD5Result digest() const
Definition: Digest.cc:171
void reportRunNumber(JobReport::Token token, unsigned int run)
Definition: JobReport.cc:469
std::vector< std::pair< std::string, edm::EDGetToken > > m_nanoMetadata
void find(edm::Handle< EcalRecHitCollection > &hits, DetId thisDet, std::vector< EcalRecHitCollection::const_iterator > &hit, bool debug=false)
Definition: FindCaloHit.cc:20
std::string const & parameterSetsTreeName()
Definition: BranchType.cc:257
std::unique_ptr< TTree > m_parameterSetsTree
void writeRun(edm::RunForOutput const &) override
class NanoAODOutputModule::CommonLumiBranches m_commonLumiBranches
std::unique_ptr< TTree > m_metaDataTree
const int keep
std::unique_ptr< TFile > m_file
int iEvent
Definition: GenABIO.cc:224
#define DEFINE_FWK_MODULE(type)
Definition: MakerMacros.h:16
void addDefault(ParameterSetDescription const &psetDescription)
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
std::vector< EventStringOutputBranches > m_evstrings
static void fillDescription(ParameterSetDescription &desc, std::vector< std::string > const &iDefaultOutputCommands=ProductSelectorRules::defaultSelectionStrings())
std::vector< SummaryTableOutputBranches > m_runTables
std::vector< TableOutputBranches > m_tables
std::vector< TriggerOutputBranches > m_triggers
RunNumber_t run() const
SelectedProductsForBranchType const & keptProducts() const
std::string const & metaDataTreeName()
Definition: BranchType.cc:169
ParameterDescriptionBase * add(U const &iLabel, T const &value)
std::size_t Token
Definition: JobReport.h:106
void openFile(edm::FileBlock const &) override
void reallyCloseFile() override
void eventWrittenToFile(Token fileToken, RunNumber_t run, EventNumber_t event)
Definition: JobReport.cc:428
std::string toString() const
Definition: Digest.cc:95
double b
Definition: hdecay.h:120
HLT enums.
double a
Definition: hdecay.h:121
void outputFileClosed(Token fileToken)
Definition: JobReport.cc:433
void write(edm::EventForOutput const &e) override
class NanoAODOutputModule::CommonRunBranches m_commonRunBranches
edm::JobReport::Token m_jrToken
class NanoAODOutputModule::CommonEventBranches m_commonBranches
const std::string & str() const
Definition: UniqueString.h:12
Definition: tree.py:1
edm::ProcessHistoryRegistry m_processHistoryRegistry
LuminosityBlockID const & id() const
OutputModule(edm::ParameterSet const &iPSet)
Definition: OutputModule.h:32
std::string m_compressionAlgorithm
#define constexpr
std::string createGlobalIdentifier()
void reportLumiSection(JobReport::Token token, unsigned int run, unsigned int lumiSectId, unsigned long nEvents=0)
Definition: JobReport.cc:458
void writeLuminosityBlock(edm::LuminosityBlockForOutput const &) override