CMS 3D CMS Logo

NanoAODOutputModule.cc
Go to the documentation of this file.
1 // -*- C++ -*-
2 //
3 // Package: PhysicsTools/NanoAODOutput
4 // Class : NanoAODOutputModule
5 //
6 // Implementation:
7 // [Notes on implementation]
8 //
9 // Original Author: Christopher Jones
10 // Created: Mon, 07 Aug 2017 14:21:41 GMT
11 //
12 
13 // system include files
14 #include <string>
15 #include "TFile.h"
16 #include "TTree.h"
17 #include "TROOT.h"
18 #include "TObjString.h"
19 #include "Compression.h"
20 
21 // user include files
42 
43 #include <iostream>
44 
46 public:
48  ~NanoAODOutputModule() override;
49 
50  static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
51 
52 private:
53  void write(edm::EventForOutput const& e) override;
55  void writeRun(edm::RunForOutput const&) override;
56  bool isFileOpen() const override;
57  void openFile(edm::FileBlock const&) override;
58  void reallyCloseFile() override;
59 
66  bool m_fakeName; //crab workaround, remove after crab is fixed
70  std::unique_ptr<TFile> m_file;
72 
73  static constexpr int m_firstFlush{1000};
74 
76  public:
77  void branch(TTree &tree) {
78  tree.Branch("run", & m_run, "run/i");
79  tree.Branch("luminosityBlock", & m_luminosityBlock, "luminosityBlock/i");
80  tree.Branch("event", & m_event, "event/l");
81  }
82  void fill(const edm::EventID & id) {
83  m_run = id.run(); m_luminosityBlock = id.luminosityBlock(); m_event = id.event();
84  }
85  private:
86  UInt_t m_run; UInt_t m_luminosityBlock; ULong64_t m_event;
88 
90  public:
91  void branch(TTree &tree) {
92  tree.Branch("run", & m_run, "run/i");
93  tree.Branch("luminosityBlock", & m_luminosityBlock, "luminosityBlock/i");
94  }
95  void fill(const edm::LuminosityBlockID & id) {
96  m_run = id.run();
97  m_luminosityBlock = id.value();
98  }
99  private:
100  UInt_t m_run; UInt_t m_luminosityBlock;
102 
104  public:
105  void branch(TTree &tree) {
106  tree.Branch("run", & m_run, "run/i");
107  }
108  void fill(const edm::RunID & id) {
109  m_run = id.run();
110  }
111  private:
112  UInt_t m_run;
114 
115 
116  std::vector<TableOutputBranches> m_tables;
117  std::vector<TriggerOutputBranches> m_triggers;
118 
119  std::vector<SummaryTableOutputBranches> m_runTables;
120 
121  std::vector<std::pair<std::string,edm::EDGetToken>> m_nanoMetadata;
122 
123 };
124 
125 
126 //
127 // constants, enums and typedefs
128 //
129 
130 //
131 // static data member definitions
132 //
133 
134 //
135 // constructors and destructor
136 //
139  edm::one::OutputModule<>(pset),
140  m_fileName(pset.getUntrackedParameter<std::string>("fileName")),
141  m_logicalFileName(pset.getUntrackedParameter<std::string>("logicalFileName")),
142  m_compressionLevel(pset.getUntrackedParameter<int>("compressionLevel")),
143  m_compressionAlgorithm(pset.getUntrackedParameter<std::string>("compressionAlgorithm")),
144  m_writeProvenance(pset.getUntrackedParameter<bool>("saveProvenance", true)),
145  m_fakeName(pset.getUntrackedParameter<bool>("fakeNameForCrab", false)),
146  m_autoFlush(pset.getUntrackedParameter<int>("autoFlush", -10000000)),
148 {
149 }
150 
152 {
153 }
154 
155 void
157  //Get data from 'e' and write it to the file
159  jr->eventWrittenToFile(m_jrToken, iEvent.id().run(), iEvent.id().event());
160 
161  if (m_autoFlush) {
162  int64_t events = m_tree->GetEntriesFast();
163  if (events == m_firstFlush) {
164  m_tree->FlushBaskets();
165  float maxMemory;
166  if (m_autoFlush > 0) {
167  // Estimate the memory we'll be using at the first full flush by
168  // linearly scaling the number of events.
169  float percentClusterDone = m_firstFlush / static_cast<float>(m_autoFlush);
170  maxMemory = static_cast<float>(m_tree->GetTotBytes()) / percentClusterDone;
171  } else if (m_tree->GetZipBytes() == 0) {
172  maxMemory = 100*1024*1024; // Degenerate case of no information in the tree; arbitrary value
173  } else {
174  // Estimate the memory we'll be using by scaling the current compression ratio.
175  float cxnRatio = m_tree->GetTotBytes() / static_cast<float>(m_tree->GetZipBytes());
176  maxMemory = -m_autoFlush * cxnRatio;
177  float percentBytesDone = -m_tree->GetZipBytes() / static_cast<float>(m_autoFlush);
178  m_autoFlush = m_firstFlush / percentBytesDone;
179  }
180  //std::cout << "OptimizeBaskets: total bytes " << m_tree->GetTotBytes() << std::endl;
181  //std::cout << "OptimizeBaskets: zip bytes " << m_tree->GetZipBytes() << std::endl;
182  //std::cout << "OptimizeBaskets: autoFlush " << m_autoFlush << std::endl;
183  //std::cout << "OptimizeBaskets: maxMemory " << static_cast<uint32_t>(maxMemory) << std::endl;
184  //m_tree->OptimizeBaskets(static_cast<uint32_t>(maxMemory), 1, "d");
185  m_tree->OptimizeBaskets(static_cast<uint32_t>(maxMemory), 1, "");
186  }
188  m_tree->FlushBaskets();
189  m_eventsSinceFlush = 0;
190  }
192  }
193 
194  m_commonBranches.fill(iEvent.id());
195  // fill all tables, starting from main tables and then doing extension tables
196  for (unsigned int extensions = 0; extensions <= 1; ++extensions) {
197  for (auto & t : m_tables) t.fill(iEvent,*m_tree,extensions);
198  }
199  // fill triggers
200  for (auto & t : m_triggers) t.fill(iEvent,*m_tree);
201  m_tree->Fill();
202 
204 }
205 
206 void
209  jr->reportLumiSection(m_jrToken, iLumi.id().run(), iLumi.id().value());
210 
211  m_commonLumiBranches.fill(iLumi.id());
212  m_lumiTree->Fill();
213 
215 }
216 
217 void
220  jr->reportRunNumber(m_jrToken, iRun.id().run());
221 
222  m_commonRunBranches.fill(iRun.id());
223 
224  for (auto & t : m_runTables) t.fill(iRun,*m_runTree);
225 
227  for (const auto & p : m_nanoMetadata) {
228  iRun.getByToken(p.second, hstring);
229  TObjString *tos = dynamic_cast<TObjString *>(m_file->Get(p.first.c_str()));
230  if (tos) {
231  if (hstring->str() != tos->GetString()) throw cms::Exception("LogicError", "Inconsistent nanoMetadata " + p.first + " (" + hstring->str() +")");
232  } else {
233  auto ostr = std::make_unique<TObjString>(hstring->str().c_str());
234  m_file->WriteTObject(ostr.release(), p.first.c_str());
235  }
236  }
237 
238  m_runTree->Fill();
239 
241 }
242 
243 bool
245  return nullptr != m_file.get();
246 }
247 
248 void
250  m_file = std::make_unique<TFile>(m_fileName.c_str(),"RECREATE","",m_compressionLevel);
252  cms::Digest branchHash;
253  m_jrToken = jr->outputFileOpened(m_fileName,
255  std::string(),
256  m_fakeName?"PoolOutputModule":"NanoAODOutputModule",
257  description().moduleLabel(),
259  std::string(),
260  branchHash.digest().toString(),
261  std::vector<std::string>()
262  );
263 
264  if (m_compressionAlgorithm == std::string("ZLIB")) {
265  m_file->SetCompressionAlgorithm(ROOT::kZLIB);
266  } else if (m_compressionAlgorithm == std::string("LZMA")) {
267  m_file->SetCompressionAlgorithm(ROOT::kLZMA);
268  } else {
269  throw cms::Exception("Configuration") << "NanoAODOutputModule configured with unknown compression algorithm '" << m_compressionAlgorithm << "'\n"
270  << "Allowed compression algorithms are ZLIB and LZMA\n";
271  }
272  /* Setup file structure here */
273  m_tables.clear();
274  m_triggers.clear();
275  m_runTables.clear();
276  const auto & keeps = keptProducts();
277  for (const auto & keep : keeps[edm::InEvent]) {
278  if(keep.first->className() == "nanoaod::FlatTable" )
279  m_tables.emplace_back(keep.first, keep.second);
280  else if(keep.first->className() == "edm::TriggerResults" )
281  {
282  m_triggers.emplace_back(keep.first, keep.second);
283  }
284  else throw cms::Exception("Configuration", "NanoAODOutputModule cannot handle class " + keep.first->className());
285  }
286 
287  for (const auto & keep : keeps[edm::InRun]) {
288  if(keep.first->className() == "nanoaod::MergeableCounterTable" )
289  m_runTables.push_back(SummaryTableOutputBranches(keep.first, keep.second));
290  else if(keep.first->className() == "nanoaod::UniqueString" && keep.first->moduleLabel() == "nanoMetadata")
291  m_nanoMetadata.emplace_back(keep.first->productInstanceName(), keep.second);
292  else throw cms::Exception("Configuration", "NanoAODOutputModule cannot handle class " + keep.first->className() + " in Run branch");
293  }
294 
295 
296  // create the trees
297  m_tree.reset(new TTree("Events","Events"));
298  m_tree->SetAutoSave(0);
299  m_tree->SetAutoFlush(0);
301 
302  m_lumiTree.reset(new TTree("LuminosityBlocks","LuminosityBlocks"));
303  m_lumiTree->SetAutoSave(0);
305 
306  m_runTree.reset(new TTree("Runs","Runs"));
307  m_runTree->SetAutoSave(0);
309 
310  if (m_writeProvenance) {
311  m_metaDataTree.reset(new TTree(edm::poolNames::metaDataTreeName().c_str(),"Job metadata"));
312  m_metaDataTree->SetAutoSave(0);
313  m_parameterSetsTree.reset(new TTree(edm::poolNames::parameterSetsTreeName().c_str(),"Parameter sets"));
314  m_parameterSetsTree->SetAutoSave(0);
315  }
316 }
317 void
319  if (m_writeProvenance) {
320  int basketSize = 16384; // fixme configurable?
323  if (m_metaDataTree->GetNbranches() != 0) {
324  m_metaDataTree->SetEntries(-1);
325  }
326  if (m_parameterSetsTree->GetNbranches() != 0) {
327  m_parameterSetsTree->SetEntries(-1);
328  }
329  }
330  m_file->Write();
331  m_file->Close();
332  m_file.reset();
333  m_tree.release(); // apparently root has ownership
334  m_lumiTree.release(); //
335  m_runTree.release(); //
336  m_metaDataTree.release(); //
337  m_parameterSetsTree.release(); //
340 }
341 
342 void
345 
346  desc.addUntracked<std::string>("fileName");
347  desc.addUntracked<std::string>("logicalFileName","");
348 
349  desc.addUntracked<int>("compressionLevel", 9)
350  ->setComment("ROOT compression level of output file.");
351  desc.addUntracked<std::string>("compressionAlgorithm", "ZLIB")
352  ->setComment("Algorithm used to compress data in the ROOT output file, allowed values are ZLIB and LZMA");
353  desc.addUntracked<bool>("saveProvenance", true)
354  ->setComment("Save process provenance information, e.g. for edmProvDump");
355  desc.addUntracked<bool>("fakeNameForCrab", false)
356  ->setComment("Change the OutputModule name in the fwk job report to fake PoolOutputModule. This is needed to run on cran (and publish) till crab is fixed");
357  desc.addUntracked<int>("autoFlush", -10000000)
358  ->setComment("Autoflush parameter for ROOT file");
359 
360  //replace with whatever you want to get from the EDM by default
361  const std::vector<std::string> keep = {"drop *", "keep nanoaodFlatTable_*Table_*_*", "keep edmTriggerResults_*_*_*", "keep nanoaodMergeableCounterTable_*Table_*_*", "keep nanoaodUniqueString_nanoMetadata_*_*"};
363 
364  //Used by Workflow management for their own meta data
366  dataSet.setAllowAnything();
367  desc.addUntracked<edm::ParameterSetDescription>("dataset", dataSet)
368  ->setComment("PSet is only used by Data Operations and not by this module.");
369 
371  branchSet.setAllowAnything();
372  desc.add<edm::ParameterSetDescription>("branches", branchSet);
373 
374 
375 
376  descriptions.addDefault(desc);
377 
378 }
379 
RunNumber_t run() const
Definition: EventID.h:39
std::unique_ptr< TTree > m_runTree
EventNumber_t event() const
Definition: EventID.h:41
virtual ProcessHistory const & processHistory() const
std::unique_ptr< TTree > m_lumiTree
EventID const & id() const
boost::uint64_t value() const
ModuleDescription const & description() const
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
bool isFileOpen() const override
RunNumber_t run() const
Definition: RunID.h:39
std::unique_ptr< TTree > m_tree
void setAllowAnything()
allow any parameter label/value pairs
RunID const & id() const
Definition: RunForOutput.h:45
#define DEFINE_FWK_MODULE(type)
Definition: MakerMacros.h:17
void fillParameterSetBranch(TTree *parameterSetsTree, int basketSize)
bool registerProcessHistory(ProcessHistory const &processHistory)
void fillProcessHistoryBranch(TTree *metaDataTree, int basketSize, ProcessHistoryRegistry const &processHistoryRegistry)
NanoAODOutputModule(edm::ParameterSet const &pset)
void fill(const edm::LuminosityBlockID &id)
MD5Result digest() const
Definition: Digest.cc:194
void reportRunNumber(JobReport::Token token, unsigned int run)
Definition: JobReport.cc:497
std::vector< std::pair< std::string, edm::EDGetToken > > m_nanoMetadata
#define constexpr
std::string const & parameterSetsTreeName()
Definition: BranchType.cc:251
std::unique_ptr< TTree > m_parameterSetsTree
void writeRun(edm::RunForOutput const &) override
class NanoAODOutputModule::CommonLumiBranches m_commonLumiBranches
std::unique_ptr< TTree > m_metaDataTree
const int keep
std::unique_ptr< TFile > m_file
bool getByToken(EDGetToken token, TypeID const &typeID, BasicHandle &result) const
int iEvent
Definition: GenABIO.cc:230
void addDefault(ParameterSetDescription const &psetDescription)
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
static void fillDescription(ParameterSetDescription &desc, std::vector< std::string > const &iDefaultOutputCommands=ProductSelectorRules::defaultSelectionStrings())
std::vector< SummaryTableOutputBranches > m_runTables
std::vector< TableOutputBranches > m_tables
std::vector< TriggerOutputBranches > m_triggers
RunNumber_t run() const
SelectedProductsForBranchType const & keptProducts() const
std::string const & metaDataTreeName()
Definition: BranchType.cc:168
ParameterDescriptionBase * add(U const &iLabel, T const &value)
std::size_t Token
Definition: JobReport.h:107
void openFile(edm::FileBlock const &) override
void reallyCloseFile() override
void eventWrittenToFile(Token fileToken, RunNumber_t run, EventNumber_t event)
Definition: JobReport.cc:455
std::string toString() const
Definition: Digest.cc:87
HLT enums.
void outputFileClosed(Token fileToken)
Definition: JobReport.cc:461
void write(edm::EventForOutput const &e) override
class NanoAODOutputModule::CommonRunBranches m_commonRunBranches
edm::JobReport::Token m_jrToken
class NanoAODOutputModule::CommonEventBranches m_commonBranches
const std::string & str() const
Definition: UniqueString.h:12
Definition: tree.py:1
edm::ProcessHistoryRegistry m_processHistoryRegistry
LuminosityBlockID const & id() const
OutputModule(edm::ParameterSet const &iPSet)
Definition: OutputModule.h:35
std::string m_compressionAlgorithm
std::string createGlobalIdentifier()
void reportLumiSection(JobReport::Token token, unsigned int run, unsigned int lumiSectId, unsigned long nEvents=0)
Definition: JobReport.cc:487
void writeLuminosityBlock(edm::LuminosityBlockForOutput const &) override