CMS 3D CMS Logo

NanoAODRNTupleOutputModule.cc
Go to the documentation of this file.
1 // -*- C++ -*-
2 //
3 // Package: PhysicsTools/NanoAODOutput
4 // Class : NanoAODRNTupleOutputModule
5 //
6 // Implementation:
7 // [Notes on implementation]
8 //
9 // Original Author: Max Orok
10 // Created: Wed, 13 Jan 2021 14:21:41 GMT
11 //
12 
13 #include <cstdint>
14 #include <string>
15 
16 #include <ROOT/RNTuple.hxx>
17 #include <ROOT/RNTupleModel.hxx>
18 #include <ROOT/RNTupleOptions.hxx>
19 #include <ROOT/RPageStorageFile.hxx>
20 using ROOT::Experimental::RNTupleModel;
21 using ROOT::Experimental::RNTupleWriteOptions;
22 using ROOT::Experimental::RNTupleWriter;
23 using ROOT::Experimental::Detail::RPageSinkFile;
24 
25 #include "TObjString.h"
26 
39 
40 #include "NanoAODRNTuples.h"
41 
43 public:
45  ~NanoAODRNTupleOutputModule() override;
46 
47  static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
48 
49 private:
50  void openFile(edm::FileBlock const&) override;
51  bool isFileOpen() const override;
52  void write(edm::EventForOutput const& e) override;
54  void writeRun(edm::RunForOutput const&) override;
55  void reallyCloseFile() override;
56  void writeProvenance();
57 
59 
67 
68  std::unique_ptr<TFile> m_file;
69  std::unique_ptr<RNTupleWriter> m_ntuple;
71  std::vector<TriggerOutputFields> m_triggers;
73 
75  public:
76  void createFields(RNTupleModel& model) {
77  model.AddField<UInt_t>("run", &m_run);
78  model.AddField<UInt_t>("luminosityBlock", &m_luminosityBlock);
79  model.AddField<std::uint64_t>("event", &m_event);
80  }
81  void fill(const edm::EventID& id) {
82  m_run = id.run();
83  m_luminosityBlock = id.luminosityBlock();
84  m_event = id.event();
85  }
86 
87  private:
88  UInt_t m_run;
92 
95 
96  std::vector<std::pair<std::string, edm::EDGetToken>> m_nanoMetadata;
97 };
98 
101  edm::one::OutputModule<>(pset),
102  m_fileName(pset.getUntrackedParameter<std::string>("fileName")),
103  m_logicalFileName(pset.getUntrackedParameter<std::string>("logicalFileName")),
104  m_compressionAlgorithm(pset.getUntrackedParameter<std::string>("compressionAlgorithm")),
105  m_compressionLevel(pset.getUntrackedParameter<int>("compressionLevel")),
106  m_writeProvenance(pset.getUntrackedParameter<bool>("saveProvenance", true)),
107  m_processHistoryRegistry() {}
108 
110 
113  jr->reportLumiSection(m_jrToken, iLumi.id().run(), iLumi.id().value());
114  m_lumi.fill(iLumi.id(), *m_file);
116 }
117 
120  jr->reportRunNumber(m_jrToken, iRun.id().run());
121 
122  m_run.fill(iRun, *m_file);
123 
125  for (const auto& p : m_nanoMetadata) {
126  iRun.getByToken(p.second, hstring);
127  TObjString* tos = dynamic_cast<TObjString*>(m_file->Get(p.first.c_str()));
128  if (tos && hstring->str() != tos->GetString()) {
129  throw cms::Exception("LogicError", "Inconsistent nanoMetadata " + p.first + " (" + hstring->str() + ")");
130  } else {
131  auto ostr = std::make_unique<TObjString>(hstring->str().c_str());
132  m_file->WriteTObject(ostr.release(), p.first.c_str());
133  }
134  }
136 }
137 
138 bool NanoAODRNTupleOutputModule::isFileOpen() const { return nullptr != m_ntuple.get(); }
139 
141  m_file = std::make_unique<TFile>(m_fileName.c_str(), "RECREATE", "", m_compressionLevel);
143  cms::Digest branchHash;
144  m_jrToken = jr->outputFileOpened(m_fileName,
146  std::string(),
147  // TODO check if needed
148  //m_fakeName ? "PoolOutputModule" : "NanoAODOutputModule",
149  "NanoAODRNTupleOutputModule",
152  std::string(),
153  branchHash.digest().toString(),
154  std::vector<std::string>());
155 
156  if (m_compressionAlgorithm == "ZLIB") {
157  m_file->SetCompressionAlgorithm(ROOT::kZLIB);
158  } else if (m_compressionAlgorithm == "LZMA") {
159  m_file->SetCompressionAlgorithm(ROOT::kLZMA);
160  } else {
161  throw cms::Exception("Configuration")
162  << "NanoAODOutputModule configured with unknown compression algorithm '" << m_compressionAlgorithm << "'\n"
163  << "Allowed compression algorithms are ZLIB and LZMA\n";
164  }
165 
166  const auto& keeps = keptProducts();
167  for (const auto& keep : keeps[edm::InRun]) {
168  if (keep.first->className() == "nanoaod::MergeableCounterTable") {
169  m_run.registerToken(keep.second);
170  } else if (keep.first->className() == "nanoaod::UniqueString" && keep.first->moduleLabel() == "nanoMetadata") {
171  m_nanoMetadata.emplace_back(keep.first->productInstanceName(), keep.second);
172  } else {
173  throw cms::Exception(
174  "Configuration",
175  "NanoAODRNTupleOutputModule cannot handle class " + keep.first->className() + " in Run branch");
176  }
177  }
178 }
179 
181  // set up RNTuple schema
182  auto model = RNTupleModel::Create();
184 
185  const auto& keeps = keptProducts();
186  for (const auto& keep : keeps[edm::InEvent]) {
187  if (keep.first->className() == "nanoaod::FlatTable") {
189  const auto& token = keep.second;
190  iEvent.getByToken(token, handle);
192  } else if (keep.first->className() == "edm::TriggerResults") {
193  m_triggers.emplace_back(TriggerOutputFields(keep.first->processName(), keep.second));
194  } else if (keep.first->className() == "std::basic_string<char,std::char_traits<char> >" &&
195  keep.first->productInstanceName() == "genModel") {
197  } else {
198  throw cms::Exception("Configuration", "NanoAODOutputModule cannot handle class " + keep.first->className());
199  }
200  }
202  for (auto& trigger : m_triggers) {
203  trigger.createFields(iEvent, *model);
204  }
206  // TODO use Append
207  RNTupleWriteOptions options;
208  options.SetCompression(m_file->GetCompressionSettings());
209  m_ntuple =
210  std::make_unique<RNTupleWriter>(std::move(model), std::make_unique<RPageSinkFile>("Events", *m_file, options));
211 }
212 
214  if (!m_ntuple) {
216  }
217 
219  jr->eventWrittenToFile(m_jrToken, iEvent.id().run(), iEvent.id().event());
220 
221  m_commonFields.fill(iEvent.id());
223  for (auto& trigger : m_triggers) {
224  trigger.fill(iEvent);
225  }
227  m_ntuple->Fill();
229 }
230 
232  if (m_writeProvenance) {
233  writeProvenance();
234  }
235  // write ntuple to disk by calling the RNTupleWriter destructor
236  m_ntuple.reset();
239  m_file->Write();
240  m_file->Close();
241 
244 }
245 
247  PSetNTuple pntuple;
249  pntuple.finalizeWrite();
250 
251  MetadataNTuple mdntuple;
253  mdntuple.finalizeWrite();
254 }
255 
258 
259  desc.addUntracked<std::string>("fileName");
260  desc.addUntracked<std::string>("logicalFileName", "");
261  desc.addUntracked<int>("compressionLevel", 9)->setComment("ROOT compression level of output file.");
262  desc.addUntracked<std::string>("compressionAlgorithm", "ZLIB")
263  ->setComment(
264  "Algorithm used to "
265  "compress data in the ROOT output file, allowed values are ZLIB and LZMA");
266  desc.addUntracked<bool>("saveProvenance", true)
267  ->setComment("Save process provenance information, e.g. for edmProvDump");
268  const std::vector<std::string> keep = {"drop *",
269  "keep nanoaodFlatTable_*Table_*_*",
270  "keep edmTriggerResults_*_*_*",
271  "keep String_*_genModel_*",
272  "keep nanoaodMergeableCounterTable_*Table_*_*",
273  "keep nanoaodUniqueString_nanoMetadata_*_*"};
275 
276  //Used by Workflow management for their own meta data
278  dataSet.setAllowAnything();
279  desc.addUntracked<edm::ParameterSetDescription>("dataset", dataSet)
280  ->setComment("PSet is only used by Data Operations and not by this module.");
281 
283  branchSet.setAllowAnything();
284  desc.add<edm::ParameterSetDescription>("branches", branchSet);
285 
286  descriptions.addDefault(desc);
287 }
288 
void finalizeWrite()
void fill(const edm::LuminosityBlockID &id, TFile &file)
void setAllowAnything()
allow any parameter label/value pairs
bool registerProcessHistory(ProcessHistory const &processHistory)
void initializeNTuple(edm::EventForOutput const &e)
std::vector< std::pair< std::string, edm::EDGetToken > > m_nanoMetadata
void registerToken(const edm::EDGetToken &token)
void reportRunNumber(JobReport::Token token, unsigned int run)
Definition: JobReport.cc:505
BasicHandle getByToken(EDGetToken token, TypeID const &typeID) const
ModuleDescription const & description() const
int iEvent
Definition: GenABIO.cc:224
NanoAODRNTupleOutputModule(edm::ParameterSet const &pset)
void addDefault(ParameterSetDescription const &psetDescription)
MD5Result digest()
Definition: Digest.cc:171
void finalizeWrite()
class NanoAODRNTupleOutputModule::CommonEventFields m_commonFields
void fill(const edm::ProcessHistoryRegistry &procHist, TFile &file)
#define DEFINE_FWK_MODULE(type)
Definition: MakerMacros.h:16
void fill(const edm::EventForOutput &event)
void writeRun(edm::RunForOutput const &) override
void fill(edm::pset::Registry *pset, TFile &file)
RunNumber_t run() const
std::size_t Token
Definition: JobReport.h:106
void openFile(edm::FileBlock const &) override
std::string createGlobalIdentifier(bool binary=false)
SelectedProductsForBranchType const & keptProducts() const
unsigned long long uint64_t
Definition: Time.h:13
std::unique_ptr< RNTupleWriter > m_ntuple
void eventWrittenToFile(Token fileToken, RunNumber_t run, EventNumber_t event)
Definition: JobReport.cc:462
std::vector< TriggerOutputFields > m_triggers
void createFields(RNTupleModel &model)
LuminosityBlockID const & id() const
const std::string & str() const
Definition: UniqueString.h:12
void finalizeWrite()
void add(const edm::EDGetToken &table_token, const nanoaod::FlatTable &table)
virtual ProcessHistory const & processHistory() const
void writeLuminosityBlock(edm::LuminosityBlockForOutput const &) override
RunID const & id() const
Definition: RunForOutput.h:55
HLT enums.
void outputFileClosed(Token fileToken)
Definition: JobReport.cc:467
edm::ProcessHistoryRegistry m_processHistoryRegistry
void fill(const edm::EventForOutput &iEvent)
void registerToken(const edm::EDGetToken &token)
void fill(const edm::RunForOutput &iRun, TFile &file)
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
static void fillDescription(ParameterSetDescription &desc, std::vector< std::string > const &iDefaultOutputCommands=ProductSelectorRules::defaultSelectionStrings())
std::string toString() const
Definition: Digest.cc:95
void createFields(const edm::EventForOutput &event, RNTupleModel &eventModel)
def move(src, dest)
Definition: eostools.py:511
static Registry * instance()
Definition: Registry.cc:12
RunNumber_t run() const
Definition: RunID.h:26
void write(edm::EventForOutput const &e) override
void reportLumiSection(JobReport::Token token, unsigned int run, unsigned int lumiSectId, unsigned long nEvents=0)
Definition: JobReport.cc:494