CMS 3D CMS Logo

QGLikelihoodDBWriter.cc
Go to the documentation of this file.
1 // Author: Benedikt Hegner, Tom Cornelis
2 // Email: benedikt.hegner@cern.ch, tom.cornelis@cern.ch
3 
13 #include "TFile.h"
14 #include "TH1.h"
15 #include "TKey.h"
16 #include "TList.h"
17 #include "TVector.h"
18 #include <cstdlib>
19 #include <cstring>
20 #include <memory>
21 #include <sstream>
22 #include <string>
23 #include <vector>
24 
26 public:
28  void beginJob() override;
29  void analyze(const edm::Event&, const edm::EventSetup&) override {}
30  void endJob() override {}
31  ~QGLikelihoodDBWriter() override {}
32 
33 private:
34  bool getVectorFromFile(TFile*, std::vector<float>&, const TString&);
35  void tryToMerge(std::map<std::vector<int>, QGLikelihoodCategory>&,
36  std::map<std::vector<int>, TH1*>&,
37  std::vector<int>&,
38  int);
41 };
42 
43 // Constructor
45  inputRootFile = pSet.getParameter<std::string>("src");
46  payloadTag = pSet.getParameter<std::string>("payload");
47 }
48 
49 // Get vector from input file (includes translating TVector to std::vector)
50 bool QGLikelihoodDBWriter::getVectorFromFile(TFile* f, std::vector<float>& vector, const TString& name) {
51  TVectorT<float>* tVector = nullptr;
52  f->GetObject(name, tVector);
53  if (!tVector)
54  return false;
55  for (int i = 0; i < tVector->GetNoElements(); ++i)
56  vector.push_back((*tVector)[i]);
57  return true;
58 }
59 
60 // Transform ROOT TH1 to QGLikelihoodObject (same indexing)
63  th1->GetNbinsX(), th1->GetXaxis()->GetBinLowEdge(1), th1->GetXaxis()->GetBinUpEdge(th1->GetNbinsX()));
64  for (int ibin = 0; ibin <= th1->GetNbinsX() + 1; ++ibin)
65  histogram.setBinContent(ibin, th1->GetBinContent(ibin));
66  return histogram;
67 }
68 
69 // Try to merge bin with neighbouring bin (index = 2,3,4 for eta,pt,rho)
71  std::map<std::vector<int>, TH1*>& pdfs,
72  std::vector<int>& binNumbers,
73  int index) {
74  if (!pdfs[binNumbers])
75  return;
76  std::vector<int> neighbour = binNumbers;
77  do {
78  if (--(neighbour[index]) < 0)
79  return;
80  } while (!pdfs[neighbour]);
81  if (TString(pdfs[binNumbers]->GetTitle()) != TString(pdfs[neighbour]->GetTitle()))
82  return;
83  if (index != 4 && categories[neighbour].RhoMax != categories[binNumbers].RhoMax)
84  return;
85  if (index != 4 && categories[neighbour].RhoMin != categories[binNumbers].RhoMin)
86  return;
87  if (index != 3 && categories[neighbour].PtMax != categories[binNumbers].PtMax)
88  return;
89  if (index != 3 && categories[neighbour].PtMin != categories[binNumbers].PtMin)
90  return;
91  if (index != 2 && categories[neighbour].EtaMax != categories[binNumbers].EtaMax)
92  return;
93  if (index != 2 && categories[neighbour].EtaMin != categories[binNumbers].EtaMin)
94  return;
95 
96  if (index == 4)
97  categories[neighbour].RhoMax = categories[binNumbers].RhoMax;
98  if (index == 3)
99  categories[neighbour].PtMax = categories[binNumbers].PtMax;
100  if (index == 2)
101  categories[neighbour].EtaMax = categories[binNumbers].EtaMax;
102  pdfs.erase(binNumbers);
103  categories.erase(binNumbers);
104 }
105 
106 // Begin Job
109  payload.data.clear();
110 
111  // Get the ROOT file
112  TFile* f = TFile::Open(edm::FileInPath(inputRootFile.c_str()).fullPath().c_str());
113 
114  // The ROOT file contains the binning for each variable, needed to set up the binning grid
115  std::map<TString, std::vector<float>> gridOfBins;
116  for (auto&& binVariable : {"eta", "pt", "rho"}) {
117  if (!getVectorFromFile(f, gridOfBins[binVariable], TString(binVariable) + "Bins")) {
118  edm::LogError("NoBins") << "Missing bin information for " << binVariable << " in input file" << std::endl;
119  return;
120  }
121  }
122 
123  // Get pdf's from file and associate them to a QGLikelihoodCategory
124  // Some pdf's in the ROOT-file are copies from each other, with the same title: those are merged bins in pt and rho
125  // Here we do not store the copies, but try to extend the range of the neighbouring category (will result in less comparisons during application phase)
126  std::map<std::vector<int>, TH1*> pdfs;
127  std::map<std::vector<int>, QGLikelihoodCategory> categories;
128  for (auto&& type : {"gluon", "quark"}) {
129  // Keep numbering same as in RecoJets/JetAlgorithms/src/QGLikelihoodCalculator.cc
130  int qgIndex = strcmp(type, "gluon") == 0 ? 1 : 0;
131  for (auto&& likelihoodVar : {"mult", "ptD", "axis2"}) {
132  // Keep order same as in RecoJets/JetProducers/plugins/QGTagger.cc
133  int varIndex = (strcmp(likelihoodVar, "mult") == 0 ? 0 : (strcmp(likelihoodVar, "ptD") == 0 ? 1 : 2));
134  for (int i = 0; i < (int)gridOfBins["eta"].size() - 1; ++i) {
135  for (int j = 0; j < (int)gridOfBins["pt"].size() - 1; ++j) {
136  for (int k = 0; k < (int)gridOfBins["rho"].size() - 1; ++k) {
138  category.EtaMin = gridOfBins["eta"][i];
139  category.EtaMax = gridOfBins["eta"][i + 1];
140  category.PtMin = gridOfBins["pt"][j];
141  category.PtMax = gridOfBins["pt"][j + 1];
142  category.RhoMin = gridOfBins["rho"][k];
143  category.RhoMax = gridOfBins["rho"][k + 1];
144  category.QGIndex = qgIndex;
145  category.VarIndex = varIndex;
146 
147  TString key = TString::Format("%s/%s_%s_eta%d_pt%d_rho%d", likelihoodVar, likelihoodVar, type, i, j, k);
148  TH1* pdf = (TH1*)f->Get(key);
149  if (!pdf) {
150  edm::LogError("NoPDF") << "Could not found pdf with key " << key << " in input file" << std::endl;
151  return;
152  }
153 
154  std::vector<int> binNumbers = {qgIndex, varIndex, i, j, k};
155  pdfs[binNumbers] = pdf;
156  categories[binNumbers] = category;
157 
158  tryToMerge(categories, pdfs, binNumbers, 4);
159  }
160  for (int k = 0; k < (int)gridOfBins["rho"].size() - 1; ++k) {
161  std::vector<int> binNumbers = {qgIndex, varIndex, i, j, k};
162  tryToMerge(categories, pdfs, binNumbers, 3);
163  }
164  }
165  for (int j = 0; j < (int)gridOfBins["pt"].size() - 1; ++j) {
166  for (int k = 0; k < (int)gridOfBins["rho"].size() - 1; ++k) {
167  std::vector<int> binNumbers = {qgIndex, varIndex, i, j, k};
168  tryToMerge(categories, pdfs, binNumbers, 2);
169  }
170  }
171  }
172  }
173  }
174 
175  // Write all categories with their histograms to file
176  int i = 0;
177  for (const auto& category : categories) {
179  entry.category = category.second;
180  entry.histogram = transformToHistogramObject(pdfs[category.first]);
181  entry.mean =
182  0; // not used by the algorithm, is an old data member used in the past, but DB objects are not allowed to change
183  payload.data.push_back(entry);
184 
185  char buff[1000];
186  sprintf(buff,
187  "%6d) var=%1d\t\tqg=%1d\t\teta={%5.2f,%5.2f}\t\tpt={%8.2f,%8.2f}\t\trho={%6.2f,%8.2f}",
188  i++,
189  category.second.VarIndex,
190  category.second.QGIndex,
191  category.second.EtaMin,
192  category.second.EtaMax,
193  category.second.PtMin,
194  category.second.PtMax,
195  category.second.RhoMin,
196  category.second.RhoMax);
197  edm::LogVerbatim("HistName") << buff << std::endl;
198  }
199 
200  // Define the valid range, if no category is found within these bounds a warning will be thrown
201  payload.qgValidRange.EtaMin = gridOfBins["eta"].front();
202  payload.qgValidRange.EtaMax = gridOfBins["eta"].back();
203  payload.qgValidRange.PtMin = gridOfBins["pt"].front();
204  payload.qgValidRange.PtMax = gridOfBins["pt"].back();
205  payload.qgValidRange.RhoMin = gridOfBins["rho"].front();
206  payload.qgValidRange.RhoMax = gridOfBins["rho"].back();
207  payload.qgValidRange.QGIndex = -1;
208  payload.qgValidRange.VarIndex = -1;
209 
210  // Now write it into the DB
211  edm::LogInfo("UserOutput") << "Opening PoolDBOutputService" << std::endl;
212 
214  if (s.isAvailable()) {
215  edm::LogInfo("UserOutput") << "Setting up payload with " << payload.data.size() << " entries and tag " << payloadTag
216  << std::endl;
217  s->writeOneIOV(payload, s->beginOfTime(), payloadTag);
218  }
219  edm::LogInfo("UserOutput") << "Wrote in CondDB QGLikelihood payload label: " << payloadTag << std::endl;
220 }
221 
size
Write out results.
Log< level::Info, true > LogVerbatim
T getParameter(std::string const &) const
Definition: ParameterSet.h:303
void setBinContent(int bin, Value_t value)
Log< level::Error, false > LogError
void tryToMerge(std::map< std::vector< int >, QGLikelihoodCategory > &, std::map< std::vector< int >, TH1 *> &, std::vector< int > &, int)
QGLikelihoodDBWriter(const edm::ParameterSet &)
QGLikelihoodObject containing valid range and entries with category and histogram (mean is not used a...
QGLikelihoodObject::Histogram transformToHistogramObject(TH1 *)
double f[11][100]
#define DEFINE_FWK_MODULE(type)
Definition: MakerMacros.h:16
Category structure: ranges associated with QGLikelihood histograms.
Log< level::Info, false > LogInfo
void analyze(const edm::Event &, const edm::EventSetup &) override
bool getVectorFromFile(TFile *, std::vector< float > &, const TString &)