CMS 3D CMS Logo

GenWeightsTableProducer.cc
Go to the documentation of this file.
15 #include "boost/algorithm/string.hpp"
16 
17 #include <vector>
18 #include <unordered_map>
19 #include <iostream>
20 #include <regex>
21 
22 namespace {
24  struct Counter {
25  Counter() :
26  num(0), sumw(0), sumw2(0), sumPDF(), sumScale(), sumRwgt(), sumNamed(), sumPS() {}
27 
28  // the counters
29  long long num;
30  long double sumw;
31  long double sumw2;
32  std::vector<long double> sumPDF, sumScale, sumRwgt, sumNamed, sumPS;
33 
34  void clear() {
35  num = 0; sumw = 0; sumw2 = 0;
36  sumPDF.clear(); sumScale.clear(); sumRwgt.clear(); sumNamed.clear(), sumPS.clear();
37  }
38 
39  // inc the counters
40  void incGenOnly(double w) {
41  num++; sumw += w; sumw2 += (w*w);
42  }
43 
44  void incPSOnly(double w0, const std::vector<double> & wPS) {
45  if (!wPS.empty()) {
46  if (sumPS.empty()) sumPS.resize(wPS.size(), 0);
47  for (unsigned int i = 0, n = wPS.size(); i < n; ++i) sumPS[i] += (w0 * wPS[i]);
48  }
49  }
50 
51  void incLHE(double w0, const std::vector<double> & wScale, const std::vector<double> & wPDF, const std::vector<double> & wRwgt, const std::vector<double> & wNamed, const std::vector<double> & wPS) {
52  // add up weights
53  incGenOnly(w0);
54  // then add up variations
55  if (!wScale.empty()) {
56  if (sumScale.empty()) sumScale.resize(wScale.size(), 0);
57  for (unsigned int i = 0, n = wScale.size(); i < n; ++i) sumScale[i] += (w0 * wScale[i]);
58  }
59  if (!wPDF.empty()) {
60  if (sumPDF.empty()) sumPDF.resize(wPDF.size(), 0);
61  for (unsigned int i = 0, n = wPDF.size(); i < n; ++i) sumPDF[i] += (w0 * wPDF[i]);
62  }
63  if (!wRwgt.empty()) {
64  if (sumRwgt.empty()) sumRwgt.resize(wRwgt.size(), 0);
65  for (unsigned int i = 0, n = wRwgt.size(); i < n; ++i) sumRwgt[i] += (w0 * wRwgt[i]);
66  }
67  if (!wNamed.empty()) {
68  if (sumNamed.empty()) sumNamed.resize(wNamed.size(), 0);
69  for (unsigned int i = 0, n = wNamed.size(); i < n; ++i) sumNamed[i] += (w0 * wNamed[i]);
70  }
71  incPSOnly(w0, wPS);
72  }
73 
74  void merge(const Counter & other) {
75  num += other.num; sumw += other.sumw; sumw2 += other.sumw2;
76  if (sumScale.empty() && !other.sumScale.empty()) sumScale.resize(other.sumScale.size(),0);
77  if (sumPDF.empty() && !other.sumPDF.empty()) sumPDF.resize(other.sumPDF.size(),0);
78  if (sumRwgt.empty() && !other.sumRwgt.empty()) sumRwgt.resize(other.sumRwgt.size(),0);
79  if (sumNamed.empty() && !other.sumNamed.empty()) sumNamed.resize(other.sumNamed.size(),0);
80  if (sumPS.empty() && !other.sumPS.empty()) sumPS.resize(other.sumPS.size(),0);
81  if (!other.sumScale.empty()) for (unsigned int i = 0, n = sumScale.size(); i < n; ++i) sumScale[i] += other.sumScale[i];
82  if (!other.sumPDF.empty()) for (unsigned int i = 0, n = sumPDF.size(); i < n; ++i) sumPDF[i] += other.sumPDF[i];
83  if (!other.sumRwgt.empty()) for (unsigned int i = 0, n = sumRwgt.size(); i < n; ++i) sumRwgt[i] += other.sumRwgt[i];
84  if (!other.sumNamed.empty()) for (unsigned int i = 0, n = sumNamed.size(); i < n; ++i) sumNamed[i] += other.sumNamed[i];
85  if (!other.sumPS.empty()) for (unsigned int i = 0, n = sumPS.size(); i < n; ++i) sumPS[i] += other.sumPS[i];
86  }
87  };
88 
89  struct CounterMap {
90  std::map<std::string, Counter> countermap;
91  Counter* active_el = nullptr;
92  std::string active_label = "";
93  void merge(const CounterMap & other) {
94  for (const auto &y : other.countermap) countermap[y.first].merge(y.second);
95  active_el = nullptr;
96  }
97  void clear() {for (auto x : countermap) x.second.clear();}
98  void setLabel(std::string label) { active_el = &(countermap[label]); active_label = label;}
99  void checkLabelSet() { if (!active_el) throw cms::Exception("LogicError", "Called CounterMap::get() before setting the active label\n"); }
100  Counter* get() { checkLabelSet(); return active_el; }
101  std::string& getLabel() { checkLabelSet(); return active_label; }
102  };
103 
105  struct DynamicWeightChoice {
106  // choice of LHE weights
107  // ---- scale ----
108  std::vector<std::string> scaleWeightIDs;
109  std::string scaleWeightsDoc;
110  // ---- pdf ----
111  std::vector<std::string> pdfWeightIDs;
112  std::string pdfWeightsDoc;
113  // ---- rwgt ----
114  std::vector<std::string> rwgtIDs;
115  std::string rwgtWeightDoc;
116  };
117 
118  float stof_fortrancomp(const std::string &str) {
119  std::string::size_type match = str.find("d");
120  if (match != std::string::npos) {
121  std::string pre = str.substr(0,match);
122  std::string post = str.substr(match+1);
123  return std::stof(pre) * std::pow(10.0f, std::stof(post));
124  } else {
125  return std::stof(str);
126  }
127  }
129  struct ScaleVarWeight {
130  std::string wid, label;
131  std::pair<float,float> scales;
132  ScaleVarWeight(const std::string & id, const std::string & text, const std::string & muR, const std::string & muF) :
133  wid(id), label(text), scales(stof_fortrancomp(muR), stof_fortrancomp(muF)) {}
134  bool operator<(const ScaleVarWeight & other) { return (scales == other.scales ? wid < other.wid : scales < other.scales); }
135  };
136  struct PDFSetWeights {
137  std::vector<std::string> wids;
138  std::pair<unsigned int,unsigned int> lhaIDs;
139  PDFSetWeights(const std::string & wid, unsigned int lhaID) : wids(1,wid), lhaIDs(lhaID,lhaID) {}
140  bool operator<(const PDFSetWeights & other) const { return lhaIDs < other.lhaIDs; }
141  void add(const std::string & wid, unsigned int lhaID) {
142  wids.push_back(wid);
143  lhaIDs.second = lhaID;
144  }
145  bool maybe_add(const std::string & wid, unsigned int lhaID) {
146  if (lhaID == lhaIDs.second+1) {
147  lhaIDs.second++;
148  wids.push_back(wid);
149  return true;
150  } else {
151  return false;
152  }
153  }
154  };
155 }
156 
157 class GenWeightsTableProducer : public edm::global::EDProducer<edm::StreamCache<CounterMap>, edm::RunCache<DynamicWeightChoice>, edm::RunSummaryCache<CounterMap>, edm::EndRunProducer> {
158  public:
160  genTag_(consumes<GenEventInfoProduct>(params.getParameter<edm::InputTag>("genEvent"))),
161  lheLabel_(params.getParameter<std::vector<edm::InputTag>>("lheInfo")),
162  lheTag_(edm::vector_transform(lheLabel_, [this](const edm::InputTag & tag) { return mayConsume<LHEEventProduct>(tag); })),
163  lheRunTag_(edm::vector_transform(lheLabel_, [this](const edm::InputTag & tag) { return mayConsume<LHERunInfoProduct, edm::InRun>(tag); })),
164  genLumiInfoHeadTag_(mayConsume<GenLumiInfoHeader,edm::InLumi>(params.getParameter<edm::InputTag>("genLumiInfoHeader"))),
165  namedWeightIDs_(params.getParameter<std::vector<std::string>>("namedWeightIDs")),
166  namedWeightLabels_(params.getParameter<std::vector<std::string>>("namedWeightLabels")),
167  lheWeightPrecision_(params.getParameter<int32_t>("lheWeightPrecision")),
168  maxPdfWeights_(params.getParameter<uint32_t>("maxPdfWeights")),
169  debug_(params.getUntrackedParameter<bool>("debug",false)), debugRun_(debug_.load()),
170  hasIssuedWarning_(false)
171  {
172  produces<nanoaod::FlatTable>();
173  produces<std::string>("genModel");
174  produces<nanoaod::FlatTable>("LHEScale");
175  produces<nanoaod::FlatTable>("LHEPdf");
176  produces<nanoaod::FlatTable>("LHEReweighting");
177  produces<nanoaod::FlatTable>("LHENamed");
178  produces<nanoaod::FlatTable>("PS");
179  produces<nanoaod::MergeableCounterTable,edm::Transition::EndRun>();
180  if (namedWeightIDs_.size() != namedWeightLabels_.size()) {
181  throw cms::Exception("Configuration", "Size mismatch between namedWeightIDs & namedWeightLabels");
182  }
183  for (const edm::ParameterSet & pdfps : params.getParameter<std::vector<edm::ParameterSet>>("preferredPDFs")) {
184  const std::string & name = pdfps.getParameter<std::string>("name");
185  uint32_t lhaid = pdfps.getParameter<uint32_t>("lhaid");
186  preferredPDFLHAIDs_.push_back(lhaid);
187  lhaNameToID_[name] = lhaid;
188  lhaNameToID_[name+".LHgrid"] = lhaid;
189  }
190  }
191 
193 
194  void produce(edm::StreamID id, edm::Event& iEvent, const edm::EventSetup& iSetup) const override {
195  // get my counter for weights
196  Counter * counter = streamCache(id)->get();
197 
198  // generator information (always available)
200  iEvent.getByToken(genTag_, genInfo);
201  double weight = genInfo->weight();
202 
203  // table for gen info, always available
204  auto out = std::make_unique<nanoaod::FlatTable>(1, "genWeight", true);
205  out->setDoc("generator weight");
206  out->addColumnValue<float>("", weight, "generator weight", nanoaod::FlatTable::FloatColumn);
207  iEvent.put(std::move(out));
208 
209  std::string model_label = streamCache(id)->getLabel();
210  auto outM = std::make_unique<std::string>((!model_label.empty()) ? std::string("GenModel_") + model_label : "");
211  iEvent.put(std::move(outM),"genModel");
212 
213  // tables for LHE weights, may not be filled
214  std::unique_ptr<nanoaod::FlatTable> lheScaleTab, lhePdfTab, lheRwgtTab, lheNamedTab;
215  std::unique_ptr<nanoaod::FlatTable> genPSTab;
216 
218  for (const auto & lheTag: lheTag_) {
219  iEvent.getByToken(lheTag, lheInfo);
220  if (lheInfo.isValid()) {
221  break;
222  }
223  }
224  if (lheInfo.isValid()) {
225  // get the dynamic choice of weights
226  const DynamicWeightChoice * weightChoice = runCache(iEvent.getRun().index());
227  // go fill tables
228  fillLHEWeightTables(counter, weightChoice, weight, *lheInfo, *genInfo, lheScaleTab, lhePdfTab, lheRwgtTab, lheNamedTab, genPSTab);
229  } else {
230  // Still try to add the PS weights
231  fillOnlyPSWeightTable(counter, weight, *genInfo, genPSTab);
232  // make dummy values
233  lheScaleTab.reset(new nanoaod::FlatTable(1, "LHEScaleWeights", true));
234  lhePdfTab.reset(new nanoaod::FlatTable(1, "LHEPdfWeights", true));
235  lheRwgtTab.reset(new nanoaod::FlatTable(1, "LHEReweightingWeights", true));
236  lheNamedTab.reset(new nanoaod::FlatTable(1, "LHENamedWeights", true));
237  if (!hasIssuedWarning_.exchange(true)) {
238  edm::LogWarning("LHETablesProducer") << "No LHEEventProduct, so there will be no LHE Tables\n";
239  }
240  }
241 
242  iEvent.put(std::move(lheScaleTab), "LHEScale");
243  iEvent.put(std::move(lhePdfTab), "LHEPdf");
244  iEvent.put(std::move(lheRwgtTab), "LHEReweighting");
245  iEvent.put(std::move(lheNamedTab), "LHENamed");
246  iEvent.put(std::move(genPSTab), "PS");
247  }
248 
250  Counter * counter,
251  const DynamicWeightChoice * weightChoice,
252  double genWeight,
253  const LHEEventProduct & lheProd,
254  const GenEventInfoProduct & genProd,
255  std::unique_ptr<nanoaod::FlatTable> & outScale,
256  std::unique_ptr<nanoaod::FlatTable> & outPdf,
257  std::unique_ptr<nanoaod::FlatTable> & outRwgt,
258  std::unique_ptr<nanoaod::FlatTable> & outNamed,
259  std::unique_ptr<nanoaod::FlatTable> & outPS ) const
260  {
261  bool lheDebug = debug_.exchange(false); // make sure only the first thread dumps out this (even if may still be mixed up with other output, but nevermind)
262 
263  const std::vector<std::string> & scaleWeightIDs = weightChoice->scaleWeightIDs;
264  const std::vector<std::string> & pdfWeightIDs = weightChoice->pdfWeightIDs;
265  const std::vector<std::string> & rwgtWeightIDs = weightChoice->rwgtIDs;
266 
267  double w0 = lheProd.originalXWGTUP();
268 
269  std::vector<double> wScale(scaleWeightIDs.size(), 1), wPDF(pdfWeightIDs.size(), 1), wRwgt(rwgtWeightIDs.size(), 1), wNamed(namedWeightIDs_.size(), 1);
270  for (auto & weight : lheProd.weights()) {
271  if (lheDebug) printf("Weight %+9.5f rel %+9.5f for id %s\n", weight.wgt, weight.wgt/w0, weight.id.c_str());
272  // now we do it slowly, can be optimized
273  auto mScale = std::find(scaleWeightIDs.begin(), scaleWeightIDs.end(), weight.id);
274  if (mScale != scaleWeightIDs.end()) wScale[mScale-scaleWeightIDs.begin()] = weight.wgt/w0;
275 
276  auto mPDF = std::find(pdfWeightIDs.begin(), pdfWeightIDs.end(), weight.id);
277  if (mPDF != pdfWeightIDs.end()) wPDF[mPDF-pdfWeightIDs.begin()] = weight.wgt/w0;
278 
279  auto mRwgt = std::find(rwgtWeightIDs.begin(), rwgtWeightIDs.end(), weight.id);
280  if (mRwgt != rwgtWeightIDs.end()) wRwgt[mRwgt-rwgtWeightIDs.begin()] = weight.wgt/w0;
281 
282  auto mNamed = std::find(namedWeightIDs_.begin(), namedWeightIDs_.end(), weight.id);
283  if (mNamed != namedWeightIDs_.end()) wNamed[mNamed-namedWeightIDs_.begin()] = weight.wgt/w0;
284  }
285 
286  int vectorSize = (genProd.weights().size() == 14 || genProd.weights().size() == 46) ? 4 : 1;
287  std::vector<double> wPS(vectorSize, 1);
288  if (vectorSize > 1 ) {
289  for (unsigned int i=6; i<10; i++){
290  wPS[i-6] = (genProd.weights()[i])/w0;
291  }
292  }
293  outPS.reset(new nanoaod::FlatTable(wPS.size(), "PSWeight", false));
294  outPS->addColumn<float>("", wPS, vectorSize > 1 ? "PS weights (w_var / w_nominal); [0] is ISR=0.5 FSR=1; [1] is ISR=1 FSR=0.5; [2] is ISR=2 FSR=1; [3] is ISR=1 FSR=2 " : "dummy PS weight (1.0) ", nanoaod::FlatTable::FloatColumn, lheWeightPrecision_);
295 
296  outScale.reset(new nanoaod::FlatTable(wScale.size(), "LHEScaleWeight", false));
297  outScale->addColumn<float>("", wScale, weightChoice->scaleWeightsDoc, nanoaod::FlatTable::FloatColumn, lheWeightPrecision_);
298 
299  outPdf.reset(new nanoaod::FlatTable(wPDF.size(), "LHEPdfWeight", false));
300  outPdf->addColumn<float>("", wPDF, weightChoice->pdfWeightsDoc, nanoaod::FlatTable::FloatColumn, lheWeightPrecision_);
301 
302  outRwgt.reset(new nanoaod::FlatTable(wRwgt.size(), "LHEReweightingWeight", false));
303  outRwgt->addColumn<float>("", wRwgt, weightChoice->rwgtWeightDoc, nanoaod::FlatTable::FloatColumn, lheWeightPrecision_);
304 
305  outNamed.reset(new nanoaod::FlatTable(1, "LHEWeight", true));
306  outNamed->addColumnValue<float>("originalXWGTUP", lheProd.originalXWGTUP(), "Nominal event weight in the LHE file", nanoaod::FlatTable::FloatColumn);
307  for (unsigned int i = 0, n = wNamed.size(); i < n; ++i) {
308  outNamed->addColumnValue<float>(namedWeightLabels_[i], wNamed[i], "LHE weight for id "+namedWeightIDs_[i]+", relative to nominal", nanoaod::FlatTable::FloatColumn, lheWeightPrecision_);
309  }
310 
311  counter->incLHE(genWeight, wScale, wPDF, wRwgt, wNamed, wPS);
312  }
313 
315  Counter * counter,
316  double genWeight,
317  const GenEventInfoProduct & genProd,
318  std::unique_ptr<nanoaod::FlatTable> & outPS ) const
319  {
320  int vectorSize = (genProd.weights().size() == 14 || genProd.weights().size() == 46) ? 4 : 1;
321 
322  std::vector<double> wPS(vectorSize, 1);
323  if (vectorSize > 1 ){
324  for (unsigned int i=6; i<10; i++){
325  wPS[i-6] = (genProd.weights()[i])/genWeight;
326  }
327  }
328 
329  outPS.reset(new nanoaod::FlatTable(wPS.size(), "PSWeight", false));
330  outPS->addColumn<float>("", wPS, vectorSize > 1 ? "PS weights (w_var / w_nominal); [0] is ISR=0.5 FSR=1; [1] is ISR=1 FSR=0.5; [2] is ISR=2 FSR=1; [3] is ISR=1 FSR=2 " : "dummy PS weight (1.0) " , nanoaod::FlatTable::FloatColumn, lheWeightPrecision_);
331 
332  counter->incGenOnly(genWeight);
333  counter->incPSOnly(genWeight,wPS);
334  }
335 
336 
337 
338  // create an empty counter
339  std::shared_ptr<DynamicWeightChoice> globalBeginRun(edm::Run const& iRun, edm::EventSetup const&) const override {
341 
342  bool lheDebug = debugRun_.exchange(false); // make sure only the first thread dumps out this (even if may still be mixed up with other output, but nevermind)
343  auto weightChoice = std::make_shared<DynamicWeightChoice>();
344 
345  // getByToken throws since we're not in the endRun (see https://github.com/cms-sw/cmssw/pull/18499)
346  //if (iRun.getByToken(lheRunTag_, lheInfo)) {
347  for (const auto & lheLabel: lheLabel_) {
348  iRun.getByLabel(lheLabel, lheInfo);
349  if (lheInfo.isValid()) {
350  break;
351  }
352  }
353  if (lheInfo.isValid()) {
354  std::vector<ScaleVarWeight> scaleVariationIDs;
355  std::vector<PDFSetWeights> pdfSetWeightIDs;
356  std::vector<std::string> lheReweighingIDs;
357 
358  std::regex weightgroupmg26x("<weightgroup\\s+(?:name|type)=\"(.*)\"\\s+combine=\"(.*)\"\\s*>");
359  std::regex weightgroup("<weightgroup\\s+combine=\"(.*)\"\\s+(?:name|type)=\"(.*)\"\\s*>");
360  std::regex weightgroupRwgt("<weightgroup\\s+(?:name|type)=\"(.*)\"\\s*>");
361  std::regex endweightgroup("</weightgroup>");
362  std::regex scalewmg26x("<weight\\s+(?:.*\\s+)?id=\"(\\d+)\"\\s*(?:lhapdf=\\d+|dyn=\\s*-?\\d+)?\\s*((?:[mM][uU][rR]|renscfact)=\"(\\S+)\"\\s+(?:[mM][uU][Ff]|facscfact)=\"(\\S+)\")(\\s+.*)?</weight>");
363  std::regex scalew("<weight\\s+(?:.*\\s+)?id=\"(\\d+)\">\\s*(?:lhapdf=\\d+|dyn=\\s*-?\\d+)?\\s*((?:mu[rR]|renscfact)=(\\S+)\\s+(?:mu[Ff]|facscfact)=(\\S+)(\\s+.*)?)</weight>");
364  std::regex pdfw("<weight\\s+id=\"(\\d+)\">\\s*(?:PDF set|lhapdf|PDF|pdfset)\\s*=\\s*(\\d+)\\s*(?:\\s.*)?</weight>");
365  std::regex pdfwOld("<weight\\s+(?:.*\\s+)?id=\"(\\d+)\">\\s*Member \\s*(\\d+)\\s*(?:.*)</weight>");
366  std::regex pdfwmg26x("<weight\\s+id=\"(\\d+)\"\\s*MUR=\"(?:\\S+)\"\\s*MUF=\"(?:\\S+)\"\\s*(?:PDF set|lhapdf|PDF|pdfset)\\s*=\\s*\"(\\d+)\"\\s*>\\s*(?:PDF=(\\d+)\\s*MemberID=(\\d+))?\\s*(?:\\s.*)?</weight>");
367  std::regex rwgt("<weight\\s+id=\"(.+)\">(.+)?(</weight>)?");
368  std::smatch groups;
369  for (auto iter=lheInfo->headers_begin(), end = lheInfo->headers_end(); iter != end; ++iter) {
370  if (iter->tag() != "initrwgt") {
371  if (lheDebug) std::cout << "Skipping LHE header with tag" << iter->tag() << std::endl;
372  continue;
373  }
374  if (lheDebug) std::cout << "Found LHE header with tag" << iter->tag() << std::endl;
375  std::vector<std::string> lines = iter->lines();
376  bool missed_weightgroup=false; //Needed because in some of the samples ( produced with MG26X ) a small part of the header info is ordered incorrectly
377  bool ismg26x=false;
378  for (unsigned int iLine = 0, nLines = lines.size(); iLine < nLines; ++iLine) { //First start looping through the lines to see which weightgroup pattern is matched
379  boost::replace_all(lines[iLine],"&lt;", "<");
380  boost::replace_all(lines[iLine],"&gt;", ">");
381  if(std::regex_search(lines[iLine],groups,weightgroupmg26x)){
382  ismg26x=true;
383  }
384  }
385  for (unsigned int iLine = 0, nLines = lines.size(); iLine < nLines; ++iLine) {
386  if (lheDebug) std::cout << lines[iLine];
387  if (std::regex_search(lines[iLine], groups, ismg26x ? weightgroupmg26x : weightgroup) ) {
388  std::string groupname = groups.str(2);
389  if (ismg26x) groupname = groups.str(1);
390  if (lheDebug) std::cout << ">>> Looks like the beginning of a weight group for '" << groupname << "'" << std::endl;
391  if (groupname.find("scale_variation") == 0 || groupname == "Central scale variation") {
392  if (lheDebug) std::cout << ">>> Looks like scale variation for theory uncertainties" << std::endl;
393  for ( ++iLine; iLine < nLines; ++iLine) {
394  if (lheDebug) std::cout << " " << lines[iLine];
395  if (std::regex_search(lines[iLine], groups, ismg26x ? scalewmg26x : scalew)) {
396  if (lheDebug) std::cout << " >>> Scale weight " << groups[1].str() << " for " << groups[3].str() << " , " << groups[4].str() << " , " << groups[5].str() << std::endl;
397  scaleVariationIDs.emplace_back(groups.str(1), groups.str(2), groups.str(3), groups.str(4));
398  } else if (std::regex_search(lines[iLine], endweightgroup)) {
399  if (lheDebug) std::cout << ">>> Looks like the end of a weight group" << std::endl;
400  if (!missed_weightgroup){
401  break;
402  } else missed_weightgroup=false;
403  } else if (std::regex_search(lines[iLine], ismg26x ? weightgroupmg26x : weightgroup)) {
404  if (lheDebug) std::cout << ">>> Looks like the beginning of a new weight group, I will assume I missed the end of the group." << std::endl;
405  if (ismg26x) missed_weightgroup=true;
406  --iLine; // rewind by one, and go back to the outer loop
407  break;
408  }
409  }
410  } else if (groupname == "PDF_variation" || groupname.find("PDF_variation ") == 0) {
411  if (lheDebug) std::cout << ">>> Looks like a new-style block of PDF weights for one or more pdfs" << std::endl;
412  for ( ++iLine; iLine < nLines; ++iLine) {
413  if (lheDebug) std::cout << " " << lines[iLine];
414  if (std::regex_search(lines[iLine], groups, pdfw)) {
415  unsigned int lhaID = std::stoi(groups.str(2));
416  if (lheDebug) std::cout << " >>> PDF weight " << groups.str(1) << " for " << groups.str(2) << " = " << lhaID << std::endl;
417  if (pdfSetWeightIDs.empty() || ! pdfSetWeightIDs.back().maybe_add(groups.str(1),lhaID)) {
418  pdfSetWeightIDs.emplace_back(groups.str(1),lhaID);
419  }
420  } else if (std::regex_search(lines[iLine], endweightgroup)) {
421  if (lheDebug) std::cout << ">>> Looks like the end of a weight group" << std::endl;
422  if (!missed_weightgroup){
423  break;
424  } else missed_weightgroup=false;
425  } else if (std::regex_search(lines[iLine], ismg26x ? weightgroupmg26x : weightgroup)) {
426  if (lheDebug) std::cout << ">>> Looks like the beginning of a new weight group, I will assume I missed the end of the group." << std::endl;
427  if (ismg26x) missed_weightgroup=true;
428  --iLine; // rewind by one, and go back to the outer loop
429  break;
430  }
431  }
432  } else if (groupname == "PDF_variation1" || groupname == "PDF_variation2") {
433  if (lheDebug) std::cout << ">>> Looks like a new-style block of PDF weights for multiple pdfs" << std::endl;
434  unsigned int lastid = 0;
435  for ( ++iLine; iLine < nLines; ++iLine) {
436  if (lheDebug) std::cout << " " << lines[iLine];
437  if (std::regex_search(lines[iLine], groups, pdfw)) {
438  unsigned int id = std::stoi(groups.str(1));
439  unsigned int lhaID = std::stoi(groups.str(2));
440  if (lheDebug) std::cout << " >>> PDF weight " << groups.str(1) << " for " << groups.str(2) << " = " << lhaID << std::endl;
441  if (id != (lastid+1) || pdfSetWeightIDs.empty()) {
442  pdfSetWeightIDs.emplace_back(groups.str(1),lhaID);
443  } else {
444  pdfSetWeightIDs.back().add(groups.str(1),lhaID);
445  }
446  lastid = id;
447  } else if (std::regex_search(lines[iLine], endweightgroup)) {
448  if (lheDebug) std::cout << ">>> Looks like the end of a weight group" << std::endl;
449  if(!missed_weightgroup) {
450  break;
451  } else missed_weightgroup=false;
452  } else if (std::regex_search(lines[iLine], ismg26x ? weightgroupmg26x : weightgroup)) {
453  if (lheDebug) std::cout << ">>> Looks like the beginning of a new weight group, I will assume I missed the end of the group." << std::endl;
454  if (ismg26x) missed_weightgroup=true;
455  --iLine; // rewind by one, and go back to the outer loop
456  break;
457  }
458  }
459  } else if (lhaNameToID_.find(groupname) != lhaNameToID_.end()) {
460  if (lheDebug) std::cout << ">>> Looks like an old-style PDF weight for an individual pdf" << std::endl;
461  unsigned int firstLhaID = lhaNameToID_.find(groupname)->second;
462  bool first = true;
463  for ( ++iLine; iLine < nLines; ++iLine) {
464  if (lheDebug) std::cout << " " << lines[iLine];
465  if (std::regex_search(lines[iLine], groups, ismg26x ? pdfwmg26x : pdfwOld)) {
466  unsigned int member = 0;
467  if (ismg26x==0){
468  member = std::stoi(groups.str(2));
469  } else {
470  if (!groups.str(4).empty()){
471  member = std::stoi(groups.str(4));
472  }
473  }
474  unsigned int lhaID = member+firstLhaID;
475  if (lheDebug) std::cout << " >>> PDF weight " << groups.str(1) << " for " << member << " = " << lhaID << std::endl;
476  //if (member == 0) continue; // let's keep also the central value for now
477  if (first) {
478  pdfSetWeightIDs.emplace_back(groups.str(1),lhaID);
479  first = false;
480  } else {
481  pdfSetWeightIDs.back().add(groups.str(1),lhaID);
482  }
483  } else if (std::regex_search(lines[iLine], endweightgroup)) {
484  if (lheDebug) std::cout << ">>> Looks like the end of a weight group" << std::endl;
485  if (!missed_weightgroup) {
486  break;
487  } else missed_weightgroup=false;
488  } else if (std::regex_search(lines[iLine], ismg26x ? weightgroupmg26x : weightgroup)) {
489  if (lheDebug) std::cout << ">>> Looks like the beginning of a new weight group, I will assume I missed the end of the group." << std::endl;
490  if (ismg26x) missed_weightgroup=true;
491  --iLine; // rewind by one, and go back to the outer loop
492  break;
493  }
494  }
495  } else {
496  for ( ++iLine; iLine < nLines; ++iLine) {
497  if (lheDebug) std::cout << " " << lines[iLine];
498  if (std::regex_search(lines[iLine], groups, endweightgroup)) {
499  if (lheDebug) std::cout << ">>> Looks like the end of a weight group" << std::endl;
500  if (!missed_weightgroup){
501  break;
502  } else missed_weightgroup=false;
503  } else if (std::regex_search(lines[iLine], ismg26x ? weightgroupmg26x : weightgroup)) {
504  if (lheDebug) std::cout << ">>> Looks like the beginning of a new weight group, I will assume I missed the end of the group." << std::endl;
505  if (ismg26x) missed_weightgroup=true;
506  --iLine; // rewind by one, and go back to the outer loop
507  break;
508  }
509  }
510  }
511  } else if(std::regex_search(lines[iLine], groups, weightgroupRwgt) ) {
512  std::string groupname = groups.str(1);
513  if (groupname == "mg_reweighting") {
514  if (lheDebug) std::cout << ">>> Looks like a LHE weights for reweighting" << std::endl;
515  for ( ++iLine; iLine < nLines; ++iLine) {
516  if (lheDebug) std::cout << " " << lines[iLine];
517  if (std::regex_search(lines[iLine], groups, rwgt)) {
518  std::string rwgtID = groups.str(1);
519  if (lheDebug) std::cout << " >>> LHE reweighting weight: " << rwgtID << std::endl;
520  if (std::find(lheReweighingIDs.begin(), lheReweighingIDs.end(), rwgtID) == lheReweighingIDs.end()) {
521  // we're only interested in the beggining of the block
522  lheReweighingIDs.emplace_back(rwgtID);
523  }
524  } else if (std::regex_search(lines[iLine], endweightgroup)) {
525  if (lheDebug) std::cout << ">>> Looks like the end of a weight group" << std::endl;
526  if (!missed_weightgroup){
527  break;
528  } else missed_weightgroup=false;
529  } else if (std::regex_search(lines[iLine], ismg26x ? weightgroupmg26x : weightgroup)) {
530  if (lheDebug) std::cout << ">>> Looks like the beginning of a new weight group, I will assume I missed the end of the group." << std::endl;
531  if (ismg26x) missed_weightgroup=true;
532  --iLine; // rewind by one, and go back to the outer loop
533  break;
534  }
535  }
536  }
537  }
538  }
539  //std::cout << "============= END [ " << iter->tag() << " ] ============ \n\n" << std::endl;
540 
541  // ----- SCALE VARIATIONS -----
542  std::sort(scaleVariationIDs.begin(), scaleVariationIDs.end());
543  if (lheDebug) std::cout << "Found " << scaleVariationIDs.size() << " scale variations: " << std::endl;
544  std::stringstream scaleDoc; scaleDoc << "LHE scale variation weights (w_var / w_nominal); ";
545  for (unsigned int isw = 0, nsw = scaleVariationIDs.size(); isw < nsw; ++isw) {
546  const auto & sw = scaleVariationIDs[isw];
547  if (isw) scaleDoc << "; ";
548  scaleDoc << "[" << isw << "] is " << sw.label;
549  weightChoice->scaleWeightIDs.push_back(sw.wid);
550  if (lheDebug) printf(" id %s: scales ren = % .2f fact = % .2f text = %s\n", sw.wid.c_str(), sw.scales.first, sw.scales.second, sw.label.c_str());
551  }
552  if (!scaleVariationIDs.empty()) weightChoice->scaleWeightsDoc = scaleDoc.str();
553 
554  // ------ PDF VARIATIONS (take the preferred one) -----
555  if (lheDebug) {
556  std::cout << "Found " << pdfSetWeightIDs.size() << " PDF set errors: " << std::endl;
557  for (const auto & pw : pdfSetWeightIDs) printf("lhaIDs %6d - %6d (%3lu weights: %s, ... )\n", pw.lhaIDs.first, pw.lhaIDs.second, pw.wids.size(), pw.wids.front().c_str());
558  }
559 
560  // ------ LHE REWEIGHTING -------
561  if (lheDebug) {
562  std::cout << "Found " << lheReweighingIDs.size() << " reweighting weights" << std::endl;
563  }
564  std::copy(lheReweighingIDs.begin(), lheReweighingIDs.end(), std::back_inserter(weightChoice->rwgtIDs));
565 
566  std::stringstream pdfDoc; pdfDoc << "LHE pdf variation weights (w_var / w_nominal) for LHA IDs ";
567  bool found = false;
568  for (uint32_t lhaid : preferredPDFLHAIDs_) {
569  for (const auto & pw : pdfSetWeightIDs) {
570  if (pw.lhaIDs.first != lhaid && pw.lhaIDs.first != (lhaid+1)) continue; // sometimes the first weight is not saved if that PDF is the nominal one for the sample
571  if (pw.wids.size() == 1) continue; // only consider error sets
572  pdfDoc << pw.lhaIDs.first << " - " << pw.lhaIDs.second;
573  weightChoice->pdfWeightIDs = pw.wids;
574  if (maxPdfWeights_ < pw.wids.size()) {
575  weightChoice->pdfWeightIDs.resize(maxPdfWeights_); // drop some replicas
576  pdfDoc << ", truncated to the first " << maxPdfWeights_ << " replicas";
577  }
578  weightChoice->pdfWeightsDoc = pdfDoc.str();
579  found = true; break;
580  }
581  if (found) break;
582  }
583  }
584  }
585  return weightChoice;
586  }
587 
588 
589  // create an empty counter
590  std::unique_ptr<CounterMap> beginStream(edm::StreamID) const override {
591  return std::make_unique<CounterMap>();
592  }
593  // inizialize to zero at begin run
594  void streamBeginRun(edm::StreamID id, edm::Run const&, edm::EventSetup const&) const override {
595  streamCache(id)->clear();
596  }
597  void streamBeginLuminosityBlock(edm::StreamID id, edm::LuminosityBlock const& lumiBlock, edm::EventSetup const& eventSetup) const override {
598  auto counterMap = streamCache(id);
599  edm::Handle<GenLumiInfoHeader> genLumiInfoHead;
600  lumiBlock.getByToken(genLumiInfoHeadTag_,genLumiInfoHead);
601  if (!genLumiInfoHead.isValid()) edm::LogWarning("LHETablesProducer") << "No GenLumiInfoHeader product found, will not fill generator model string.\n";
602  counterMap->setLabel(genLumiInfoHead.isValid() ? genLumiInfoHead->configDescription() : "");
603  }
604  // create an empty counter
605  std::shared_ptr<CounterMap> globalBeginRunSummary(edm::Run const&, edm::EventSetup const&) const override {
606  return std::make_shared<CounterMap>();
607  }
608  // add this stream to the summary
609  void streamEndRunSummary(edm::StreamID id, edm::Run const&, edm::EventSetup const&, CounterMap* runCounterMap) const override {
610  runCounterMap->merge(*streamCache(id));
611  }
612  // nothing to do per se
613  void globalEndRunSummary(edm::Run const&, edm::EventSetup const&, CounterMap* runCounterMap) const override {
614  }
615  // write the total to the run
616  void globalEndRunProduce(edm::Run& iRun, edm::EventSetup const&, CounterMap const* runCounterMap) const override {
617  auto out = std::make_unique<nanoaod::MergeableCounterTable>();
618 
619  for (auto x : runCounterMap->countermap) {
620 
621  auto runCounter = &(x.second);
622  std::string label = std::string("_") + x.first;
623  std::string doclabel = (!x.first.empty()) ? (std::string(", for model label ") + x.first) : "";
624 
625  out->addInt("genEventCount"+label, "event count"+doclabel, runCounter->num);
626  out->addFloat("genEventSumw"+label, "sum of gen weights"+doclabel, runCounter->sumw);
627  out->addFloat("genEventSumw2"+label, "sum of gen (weight^2)"+doclabel, runCounter->sumw2);
628 
629  double norm = runCounter->sumw ? 1.0/runCounter->sumw : 1;
630  auto sumScales = runCounter->sumScale; for (auto & val : sumScales) val *= norm;
631  out->addVFloat("LHEScaleSumw"+label, "Sum of genEventWeight * LHEScaleWeight[i], divided by genEventSumw"+doclabel, sumScales);
632  auto sumPDFs = runCounter->sumPDF; for (auto & val : sumPDFs) val *= norm;
633  out->addVFloat("LHEPdfSumw"+label, "Sum of genEventWeight * LHEPdfWeight[i], divided by genEventSumw"+doclabel, sumPDFs);
634  if (!runCounter->sumRwgt.empty()) {
635  auto sumRwgts = runCounter->sumRwgt; for (auto & val : sumRwgts) val *= norm;
636  out->addVFloat("LHEReweightingSumw"+label, "Sum of genEventWeight * LHEReweightingWeight[i], divided by genEventSumw"+doclabel, sumRwgts);
637  }
638  if (!runCounter->sumNamed.empty()) { // it could be empty if there's no LHE info in the sample
639  for (unsigned int i = 0, n = namedWeightLabels_.size(); i < n; ++i) {
640  out->addFloat("LHESumw_"+namedWeightLabels_[i]+label, "Sum of genEventWeight * LHEWeight_"+namedWeightLabels_[i]+", divided by genEventSumw"+doclabel, runCounter->sumNamed[i] * norm);
641  }
642  }
643 
644  }
645  iRun.put(std::move(out));
646  }
647  // nothing to do here
648  void globalEndRun(edm::Run const&, edm::EventSetup const&) const override { }
649 
650  static void fillDescriptions(edm::ConfigurationDescriptions & descriptions) {
652  desc.add<edm::InputTag>("genEvent", edm::InputTag("generator"))->setComment("tag for the GenEventInfoProduct, to get the main weight");
653  desc.add<edm::InputTag>("genLumiInfoHeader", edm::InputTag("generator"))->setComment("tag for the GenLumiInfoProduct, to get the model string");
654  desc.add<std::vector<edm::InputTag>>("lheInfo", std::vector<edm::InputTag>{{"externalLHEProducer"},{"source"}})->setComment("tag(s) for the LHE information (LHEEventProduct and LHERunInfoProduct)");
655 
657  prefpdf.add<std::string>("name");
658  prefpdf.add<uint32_t>("lhaid");
659  desc.addVPSet("preferredPDFs", prefpdf, std::vector<edm::ParameterSet>())->setComment("LHA PDF Ids of the preferred PDF sets, in order of preference (the first matching one will be used)");
660  desc.add<std::vector<std::string>>("namedWeightIDs")->setComment("set of LHA weight IDs for named LHE weights");
661  desc.add<std::vector<std::string>>("namedWeightLabels")->setComment("output names for the namedWeightIDs (in the same order)");
662  desc.add<int32_t>("lheWeightPrecision")->setComment("Number of bits in the mantissa for LHE weights");
663  desc.add<uint32_t>("maxPdfWeights")->setComment("Maximum number of PDF weights to save (to crop NN replicas)");
664  desc.addOptionalUntracked<bool>("debug")->setComment("dump out all LHE information for one event");
665  descriptions.add("genWeightsTable", desc);
666  }
667 
668 
669  protected:
671  const std::vector<edm::InputTag> lheLabel_;
672  const std::vector<edm::EDGetTokenT<LHEEventProduct>> lheTag_;
673  const std::vector<edm::EDGetTokenT<LHERunInfoProduct>> lheRunTag_;
675 
676  std::vector<uint32_t> preferredPDFLHAIDs_;
677  std::unordered_map<std::string,uint32_t> lhaNameToID_;
678  std::vector<std::string> namedWeightIDs_;
679  std::vector<std::string> namedWeightLabels_;
681  unsigned int maxPdfWeights_;
682 
683  mutable std::atomic<bool> debug_, debugRun_, hasIssuedWarning_;
684 };
685 
688 
const std::vector< edm::EDGetTokenT< LHERunInfoProduct > > lheRunTag_
double originalXWGTUP() const
T getParameter(std::string const &) const
bool getByLabel(std::string const &label, Handle< PROD > &result) const
Definition: Run.h:280
void setComment(std::string const &value)
void streamBeginRun(edm::StreamID id, edm::Run const &, edm::EventSetup const &) const override
OrphanHandle< PROD > put(std::unique_ptr< PROD > product)
Put a new product.
Definition: Event.h:125
ParameterDescriptionBase * addVPSet(U const &iLabel, ParameterSetDescription const &validator, std::vector< ParameterSet > const &defaults)
std::unique_ptr< CounterMap > beginStream(edm::StreamID) const override
void streamBeginLuminosityBlock(edm::StreamID id, edm::LuminosityBlock const &lumiBlock, edm::EventSetup const &eventSetup) const override
std::unordered_map< std::string, uint32_t > lhaNameToID_
const double w
Definition: UKUtility.cc:23
def copy(args, dbName)
void globalEndRun(edm::Run const &, edm::EventSetup const &) const override
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
bool getByToken(EDGetToken token, Handle< PROD > &result) const
Definition: Event.h:517
bool getByToken(EDGetToken token, Handle< PROD > &result) const
const edm::EDGetTokenT< GenEventInfoProduct > genTag_
std::shared_ptr< DynamicWeightChoice > globalBeginRun(edm::Run const &iRun, edm::EventSetup const &) const override
Definition: weight.py:1
headers_const_iterator headers_end() const
Run const & getRun() const
Definition: Event.cc:99
double weight() const
std::atomic< bool > hasIssuedWarning_
void find(edm::Handle< EcalRecHitCollection > &hits, DetId thisDet, std::vector< EcalRecHitCollection::const_iterator > &hit, bool debug=false)
Definition: FindCaloHit.cc:20
uint16_t size_type
void fillLHEWeightTables(Counter *counter, const DynamicWeightChoice *weightChoice, double genWeight, const LHEEventProduct &lheProd, const GenEventInfoProduct &genProd, std::unique_ptr< nanoaod::FlatTable > &outScale, std::unique_ptr< nanoaod::FlatTable > &outPdf, std::unique_ptr< nanoaod::FlatTable > &outRwgt, std::unique_ptr< nanoaod::FlatTable > &outNamed, std::unique_ptr< nanoaod::FlatTable > &outPS) const
void globalEndRunSummary(edm::Run const &, edm::EventSetup const &, CounterMap *runCounterMap) const override
const std::vector< edm::InputTag > lheLabel_
const std::vector< WGT > & weights() const
char const * label
std::function< unsigned int(align::ID)> Counter
std::shared_ptr< CounterMap > globalBeginRunSummary(edm::Run const &, edm::EventSetup const &) const override
int iEvent
Definition: GenABIO.cc:224
#define DEFINE_FWK_MODULE(type)
Definition: MakerMacros.h:16
headers_const_iterator headers_begin() const
void clear(CLHEP::HepGenMatrix &m)
Helper function: Reset all elements of a matrix to 0.
Definition: matutil.cc:167
RunIndex index() const
Definition: Run.cc:21
double f[11][100]
#define end
Definition: vmac.h:39
void fillOnlyPSWeightTable(Counter *counter, double genWeight, const GenEventInfoProduct &genProd, std::unique_ptr< nanoaod::FlatTable > &outPS) const
ParameterDescriptionBase * add(U const &iLabel, T const &value)
bool isValid() const
Definition: HandleBase.h:74
void add(std::map< std::string, TH1 * > &h, TH1 *hist)
auto vector_transform(std::vector< InputType > const &input, Function predicate) -> std::vector< typename std::remove_cv< typename std::remove_reference< decltype(predicate(input.front()))>::type >::type >
Definition: transform.h:11
const std::string & configDescription() const
std::vector< std::string > namedWeightLabels_
GenWeightsTableProducer(edm::ParameterSet const &params)
void put(std::unique_ptr< PROD > product)
Put a new product.
Definition: Run.h:108
void add(std::string const &label, ParameterSetDescription const &psetDescription)
const std::vector< edm::EDGetTokenT< LHEEventProduct > > lheTag_
bool operator<(DTCELinkId const &lhs, DTCELinkId const &rhs)
Definition: DTCELinkId.h:73
void streamEndRunSummary(edm::StreamID id, edm::Run const &, edm::EventSetup const &, CounterMap *runCounterMap) const override
void globalEndRunProduce(edm::Run &iRun, edm::EventSetup const &, CounterMap const *runCounterMap) const override
std::vector< uint32_t > preferredPDFLHAIDs_
HLT enums.
std::vector< double > & weights()
void produce(edm::StreamID id, edm::Event &iEvent, const edm::EventSetup &iSetup) const override
ParameterDescriptionBase * addOptionalUntracked(U const &iLabel, T const &value)
std::pair< typename Association::data_type::first_type, double > match(Reference key, Association association, bool bestMatchByMaxValue)
Generic matching function.
Definition: Utils.h:10
std::vector< std::string > namedWeightIDs_
#define str(s)
const edm::EDGetTokenT< GenLumiInfoHeader > genLumiInfoHeadTag_
Power< A, B >::type pow(const A &a, const B &b)
Definition: Power.h:40
def move(src, dest)
Definition: eostools.py:511
Definition: Run.h:45
def merge(dictlist, TELL=False)
Definition: MatrixUtil.py:194