CMS 3D CMS Logo

HLTGenValClient.cc
Go to the documentation of this file.
1 //********************************************************************************
2 //
3 // Description:
4 // DQM histogram post processor for the HLT Gen validation source module
5 // Given a folder name, this module will find histograms before and after
6 // HLT filters and produce efficiency histograms from these.
7 // The structure of this model is strongly inspired by the DQMGenericClient,
8 // replacing most user input parameters by the automatic parsing of the given directory.
9 //
10 // Author: Finn Labe, UHH, Jul. 2022
11 // Inspired by DQMGenericClient from Junghwan Goh - SungKyunKwan University
12 //********************************************************************************
13 
19 
20 #include <TH1.h>
21 #include <TH1F.h>
22 #include <TH2F.h>
23 #include <TClass.h>
24 #include <TString.h>
25 #include <TPRegexp.h>
26 #include <TDirectory.h>
27 #include <TEfficiency.h>
28 
29 #include <set>
30 #include <cmath>
31 #include <string>
32 #include <vector>
33 #include <climits>
34 #include <boost/tokenizer.hpp>
35 
37 public:
39  ~HLTGenValClient() override{};
40 
42  DQMStore::IGetter& igetter,
43  const edm::LuminosityBlock& lumiSeg,
44  const edm::EventSetup& c) override;
45  void dqmEndRun(DQMStore::IBooker&, DQMStore::IGetter&, edm::Run const&, edm::EventSetup const&) override;
47 
48  struct EfficOption {
51  };
52 
54  DQMStore::IGetter& igetter,
55  const std::string& dirName,
56  const std::string& efficMEName,
57  const std::string& efficMETitle,
58  const std::string& numeratorMEName,
59  const std::string& denominatorMEName);
60 
61 private:
62  TPRegexp metacharacters_;
63  TPRegexp nonPerlWildcard_;
64  unsigned int verbose_;
69 
71  std::vector<std::string> subDirs_;
73 
74  std::vector<EfficOption> efficOptions_;
75 
76  const std::string separator_ = "__";
77 
79 
81  DQMStore::IGetter& igetter,
83  std::set<std::string>* myList,
84  const TString& pattern);
85 
86  void genericEff(TH1* denom, TH1* numer, MonitorElement* efficiencyHist);
87 };
88 
90  : metacharacters_("[\\^\\$\\.\\*\\+\\?\\|\\(\\)\\{\\}\\[\\]]"), nonPerlWildcard_("\\w\\*|^\\*") {
91  boost::escaped_list_separator<char> commonEscapes("\\", " \t", "\'");
92 
93  verbose_ = pset.getUntrackedParameter<unsigned int>("verbose", 0);
94  runOnEndLumi_ = pset.getUntrackedParameter<bool>("runOnEndLumi", false);
95  runOnEndJob_ = pset.getUntrackedParameter<bool>("runOnEndJob", true);
96  makeGlobalEffPlot_ = pset.getUntrackedParameter<bool>("makeGlobalEffienciesPlot", true);
97 
98  outputFileName_ = pset.getUntrackedParameter<std::string>("outputFileName", "");
99  subDirs_ = pset.getUntrackedParameter<std::vector<std::string>>("subDirs");
100 
101  isWildcardUsed_ = false;
102 }
103 
105  DQMStore::IGetter& igetter,
106  const edm::LuminosityBlock& lumiSeg,
107  const edm::EventSetup& c) {
108  if (runOnEndLumi_) {
109  makeAllPlots(ibooker, igetter);
110  }
111 }
112 
114  DQMStore::IGetter& igetter,
115  edm::Run const&,
116  edm::EventSetup const&) {
117  // Create new MEs in endRun, even though we are requested to do it in endJob.
118  // This gives the QTests a chance to run, before summaries are created in
119  // endJob. The negative side effect is that we cannot run the GenericClient
120  // for plots produced in Harvesting, but that seems rather rare.
121  //
122  // It is important that this is still save in the presence of multiple runs,
123  // first because in multi-run harvesting, we accumulate statistics over all
124  // runs and have full statistics at the endRun of the last run, and second,
125  // because we set the efficiencyFlag so any further aggregation should produce
126  // correct results. Also, all operations should be idempotent; running them
127  // more than once does no harm.
128 
130 
131  if (runOnEndJob_) {
132  makeAllPlots(ibooker, igetter);
133  }
134 
135  if (!outputFileName_.empty())
137 }
138 
139 // the main method that creates the plots
141  // Process wildcard in the sub-directory
142  std::set<std::string> subDirSet;
143  for (auto& subDir : subDirs_) {
144  if (subDir[subDir.size() - 1] == '/')
145  subDir.erase(subDir.size() - 1);
146 
147  if (TString(subDir).Contains(metacharacters_)) {
148  isWildcardUsed_ = true;
149 
150  const std::string::size_type shiftPos = subDir.rfind('/');
151  const std::string searchPath = subDir.substr(0, shiftPos);
152  const std::string pattern = subDir.substr(shiftPos + 1, subDir.length());
153 
154  findAllSubdirectories(ibooker, igetter, searchPath, &subDirSet, pattern);
155 
156  } else {
157  subDirSet.insert(subDir);
158  }
159  }
160 
161  // loop through all sub-directories
162  // from the current implementation of the HLTGenValSource, we expect all histograms in a single directory
163  // however, this module is also capable of handling sub-directories, if needed
164  for (std::set<std::string>::const_iterator iSubDir = subDirSet.begin(); iSubDir != subDirSet.end(); ++iSubDir) {
165  const std::string& dirName = *iSubDir;
166 
167  // construct efficiency options automatically from systematically names histograms^
168  const auto contents = igetter.getAllContents(dirName);
169  for (const auto& content : contents) {
170  // splitting the input string
171  std::string name = content->getName();
172  std::vector<std::string> seglist;
173  size_t pos = 0;
175  while ((pos = name.find(separator_)) != std::string::npos) {
176  token = name.substr(0, pos);
177  seglist.push_back(token);
178  name.erase(0, pos + separator_.length());
179  }
180  seglist.push_back(name);
181 
182  if (seglist.size() == 4 ||
183  seglist.size() ==
184  5) { // this should be the only "proper" files we want to look at. 5 means that a custom tag was set!
185  if (seglist.at(2) == "GEN")
186  continue; // this is the "before" hist, we won't create an effiency from this alone
187 
188  // if a fifth entry exists, it is expected to be the custom tag
189  std::string tag = "";
190  if (seglist.size() == 5)
191  tag = seglist.at(4);
192 
193  // first we determing whether we have the 1D or 2D case
194  if (seglist.at(3).rfind("2D", 0) == 0) {
195  // 2D case
197  opt.name = seglist.at(0) + separator_ + seglist.at(1) + separator_ + seglist.at(2) + separator_ +
198  seglist.at(3) + separator_ + "eff"; // efficiency histogram name
199  opt.title = seglist.at(0) + " " + seglist.at(1) + " " + seglist.at(2) + " " + seglist.at(3) +
200  " efficiency"; // efficiency histogram title
201  opt.numerator = content->getName(); // numerator histogram (after a filter)
202  opt.denominator = seglist.at(0) + separator_ + seglist.at(1) + separator_ + "GEN" + separator_ +
203  seglist.at(3); // denominator histogram (before all filters)
204 
205  efficOptions_.push_back(opt);
206 
207  } else {
208  // 1D case
210  opt.name = seglist.at(0) + separator_ + seglist.at(1) + separator_ + seglist.at(2) + separator_ +
211  seglist.at(3) + separator_ + "eff"; // efficiency histogram name
212  opt.title = seglist.at(0) + " " + seglist.at(1) + " " + seglist.at(2) + " " + seglist.at(3) +
213  " efficiency"; // efficiency histogram title
214  opt.numerator = content->getName(); // numerator histogram (after a filter)
215  opt.denominator = seglist.at(0) + separator_ + seglist.at(1) + separator_ + "GEN" + separator_ +
216  seglist.at(3); // denominator histogram (before all filters)
217 
218  // propagating the custom tag to the efficiency
219  if (!tag.empty()) {
220  opt.name += separator_ + tag;
221  opt.title += " " + tag;
222  opt.denominator += separator_ + tag;
223  }
224 
225  efficOptions_.push_back(opt);
226  }
227  }
228  }
229 
230  // now that we have all EfficOptions, we create the histograms
231  for (const auto& efficOption : efficOptions_) {
232  computeEfficiency(ibooker,
233  igetter,
234  dirName,
235  efficOption.name,
236  efficOption.title,
237  efficOption.numerator,
238  efficOption.denominator);
239  }
240  }
241 }
242 
243 // main method of efficiency computation, called once for each EfficOption
245  DQMStore::IGetter& igetter,
246  const std::string& dirName,
247  const std::string& efficMEName,
248  const std::string& efficMETitle,
249  const std::string& numeratorMEName,
250  const std::string& denominatorMEName) {
251  // checking if directory exists
252  if (!igetter.dirExists(dirName)) {
253  if (verbose_ >= 2 || (verbose_ == 1 && !isWildcardUsed_)) {
254  edm::LogError("HLTGenValClient") << "computeEfficiency() : "
255  << "Cannot find sub-directory " << dirName << std::endl;
256  }
257  return;
258  }
259 
260  ibooker.cd();
261 
262  // getting input MEs
263  HLTGenValClient::MonitorElement* denominatorME = igetter.get(dirName + "/" + denominatorMEName);
264  HLTGenValClient::MonitorElement* numeratorME = igetter.get(dirName + "/" + numeratorMEName);
265 
266  // checking of input MEs exist
267  if (!denominatorME) {
268  if (verbose_ >= 2 || (verbose_ == 1 && !isWildcardUsed_)) {
269  edm::LogError("HLTGenValClient") << "computeEfficiency() : "
270  << "No denominator-ME '" << denominatorMEName << "' found\n";
271  }
272  return;
273  }
274  if (!numeratorME) {
275  if (verbose_ >= 2 || (verbose_ == 1 && !isWildcardUsed_)) {
276  edm::LogError("HLTGenValClient") << "computeEfficiency() : "
277  << "No numerator-ME '" << numeratorMEName << "' found\n";
278  }
279  return;
280  }
281 
282  // Treat everything as the base class, TH1
283  TH1* hDenominator = denominatorME->getTH1();
284  TH1* hNumerator = numeratorME->getTH1();
285 
286  // check if TH1 extraction has succeeded
287  if (!hDenominator || !hNumerator) {
288  if (verbose_ >= 2 || (verbose_ == 1 && !isWildcardUsed_)) {
289  edm::LogError("HLTGenValClient") << "computeEfficiency() : "
290  << "Cannot create TH1 from ME\n";
291  }
292  return;
293  }
294 
295  // preparing efficiency output path and name
296  std::string efficDir = dirName;
297  std::string newEfficMEName = efficMEName;
298  std::string::size_type shiftPos;
299  if (std::string::npos != (shiftPos = efficMEName.rfind('/'))) {
300  efficDir += "/" + efficMEName.substr(0, shiftPos);
301  newEfficMEName.erase(0, shiftPos + 1);
302  }
303  ibooker.setCurrentFolder(efficDir);
304 
305  // creating the efficiency MonitorElement
306  HLTGenValClient::MonitorElement* efficME = nullptr;
307 
308  // We need to know what kind of TH1 we have
309  // That information is obtained from the class name of the hDenominator
310  // Then we use the appropriate booking function
311  TH1* efficHist = static_cast<TH1*>(hDenominator->Clone(newEfficMEName.c_str()));
312  efficHist->SetDirectory(nullptr);
313  efficHist->SetTitle(efficMETitle.c_str());
314  TClass* myHistClass = efficHist->IsA();
315  std::string histClassName = myHistClass->GetName();
316  if (histClassName == "TH1F") {
317  efficME = ibooker.book1D(newEfficMEName, (TH1F*)efficHist);
318  } else if (histClassName == "TH2F") {
319  efficME = ibooker.book2D(newEfficMEName, (TH2F*)efficHist);
320  } else if (histClassName == "TH3F") {
321  efficME = ibooker.book3D(newEfficMEName, (TH3F*)efficHist);
322  }
323  delete efficHist;
324 
325  // checking whether efficME was succesfully created
326  if (!efficME) {
327  edm::LogError("HLTGenValClient") << "computeEfficiency() : "
328  << "Cannot book effic-ME from the DQM\n";
329  return;
330  }
331 
332  // actually calculating the efficiency and filling the ME
333  genericEff(hDenominator, hNumerator, efficME);
334  efficME->setEntries(denominatorME->getEntries());
335 
336  // Putting total efficiency in "GLobal efficiencies" histogram
337  if (makeGlobalEffPlot_) {
338  // getting global efficiency ME
339  HLTGenValClient::MonitorElement* globalEfficME = igetter.get(efficDir + "/globalEfficiencies");
340  if (!globalEfficME) // in case it does not exist yet, we create it
341  globalEfficME = ibooker.book1D("globalEfficiencies", "Global efficiencies", 1, 0, 1);
342  if (!globalEfficME) { // error handling in case creation failed
343  edm::LogError("HLTGenValClient") << "computeEfficiency() : "
344  << "Cannot book globalEffic-ME from the DQM\n";
345  return;
346  }
347  globalEfficME->setEfficiencyFlag();
348 
349  // extracting histogram
350  TH1F* hGlobalEffic = globalEfficME->getTH1F();
351  if (!hGlobalEffic) {
352  edm::LogError("HLTGenValClient") << "computeEfficiency() : "
353  << "Cannot create TH1F from ME, globalEfficME\n";
354  return;
355  }
356 
357  // getting total counts
358  const float nDenominatorAll = hDenominator->GetEntries();
359  const float nNumeratorAll = hNumerator->GetEntries();
360 
361  // calculating total efficiency
362  float efficAll = 0;
363  float errorAll = 0;
364  efficAll = nDenominatorAll ? nNumeratorAll / nDenominatorAll : 0;
365  errorAll = nDenominatorAll && efficAll < 1 ? sqrt(efficAll * (1 - efficAll) / nDenominatorAll) : 0;
366 
367  // Filling the histogram bin
368  const int iBin = hGlobalEffic->Fill(newEfficMEName.c_str(), 0);
369  hGlobalEffic->SetBinContent(iBin, efficAll);
370  hGlobalEffic->SetBinError(iBin, errorAll);
371  }
372 }
373 
374 // method to find all subdirectories of the given directory
375 // goal is to fill myList with paths to all subdirectories
377  DQMStore::IGetter& igetter,
379  std::set<std::string>* myList,
380  const TString& _pattern = TString("")) {
381  TString patternTmp = _pattern;
382 
383  // checking if directory exists
384  if (!igetter.dirExists(dir)) {
385  edm::LogError("HLTGenValClient") << " HLTGenValClient::findAllSubdirectories ==> Missing folder " << dir << " !!!";
386  return;
387  }
388 
389  // replacing wildcards
390  if (patternTmp != "") {
391  if (patternTmp.Contains(nonPerlWildcard_))
392  patternTmp.ReplaceAll("*", ".*");
393  TPRegexp regexp(patternTmp);
394  ibooker.cd(dir);
395  std::vector<std::string> foundDirs = igetter.getSubdirs();
396  for (const auto& iDir : foundDirs) {
397  TString dirName = iDir.substr(iDir.rfind('/') + 1, iDir.length());
398  if (dirName.Contains(regexp))
399  findAllSubdirectories(ibooker, igetter, iDir, myList);
400  }
401  } else if (igetter.dirExists(dir)) {
402  // we have found a subdirectory - adding it to the list
403  myList->insert(dir);
404 
405  // moving into the found subdirectory and recursively continue
406  ibooker.cd(dir);
407  findAllSubdirectories(ibooker, igetter, dir, myList, "*");
408 
409  } else {
410  // error handling in case found directory does not exist
411  edm::LogError("HLTGenValClient") << "Trying to find sub-directories of " << dir << " failed because " << dir
412  << " does not exist";
413  }
414  return;
415 }
416 
417 // efficiency calculation from two histograms
418 void HLTGenValClient::genericEff(TH1* denom, TH1* numer, MonitorElement* efficiencyHist) {
419  // looping over all bins. Up to three dimentions can be handled
420  // in case of less dimensions, the inner for loops are excecuted only once
421  for (int iBinX = 1; iBinX < denom->GetNbinsX() + 1; iBinX++) {
422  for (int iBinY = 1; iBinY < denom->GetNbinsY() + 1; iBinY++) {
423  for (int iBinZ = 1; iBinZ < denom->GetNbinsZ() + 1; iBinZ++) {
424  int globalBinNum = denom->GetBin(iBinX, iBinY, iBinZ);
425 
426  // getting numerator and denominator values
427  float numerVal = numer->GetBinContent(globalBinNum);
428  float denomVal = denom->GetBinContent(globalBinNum);
429 
430  // calculating effiency
431  float effVal = 0;
432  effVal = denomVal ? numerVal / denomVal : 0;
433 
434  // calculating error
435  float errVal = 0;
436  errVal = (denomVal && (effVal <= 1)) ? sqrt(effVal * (1 - effVal) / denomVal) : 0;
437 
438  // inserting value into the efficiency histogram
439  efficiencyHist->setBinContent(globalBinNum, effVal);
440  efficiencyHist->setBinError(globalBinNum, errVal);
441  efficiencyHist->setEfficiencyFlag();
442  }
443  }
444  }
445 }
446 
std::vector< std::string > subDirs_
HLTGenValClient(const edm::ParameterSet &pset)
void dqmEndLuminosityBlock(DQMStore::IBooker &ibooker, DQMStore::IGetter &igetter, const edm::LuminosityBlock &lumiSeg, const edm::EventSetup &c) override
std::vector< EfficOption > efficOptions_
TPRegexp metacharacters_
void computeEfficiency(DQMStore::IBooker &ibooker, DQMStore::IGetter &igetter, const std::string &dirName, const std::string &efficMEName, const std::string &efficMETitle, const std::string &numeratorMEName, const std::string &denominatorMEName)
const std::string separator_
void genericEff(TH1 *denom, TH1 *numer, MonitorElement *efficiencyHist)
virtual void setCurrentFolder(std::string const &fullpath)
Definition: DQMStore.cc:36
virtual void setEntries(double nentries)
set # of entries
TPRegexp nonPerlWildcard_
unsigned int verbose_
virtual bool dirExists(std::string const &path) const
Definition: DQMStore.cc:769
Log< level::Error, false > LogError
uint16_t size_type
~HLTGenValClient() override
std::string outputFileName_
void makeAllPlots(DQMStore::IBooker &, DQMStore::IGetter &)
void findAllSubdirectories(DQMStore::IBooker &ibooker, DQMStore::IGetter &igetter, std::string dir, std::set< std::string > *myList, const TString &pattern)
T sqrt(T t)
Definition: SSEVec.h:23
virtual std::vector< dqm::harvesting::MonitorElement * > getAllContents(std::string const &path) const
Definition: DQMStore.cc:641
void dqmEndRun(DQMStore::IBooker &, DQMStore::IGetter &, edm::Run const &, edm::EventSetup const &) override
#define DEFINE_FWK_MODULE(type)
Definition: MakerMacros.h:16
virtual double getEntries() const
get # of entries
virtual void setBinContent(int binx, double content)
set content of bin (1-D)
virtual TH1F * getTH1F() const
MonitorElement * book2D(TString const &name, TString const &title, int nchX, double lowX, double highX, int nchY, double lowY, double highY, FUNC onbooking=NOOP())
Definition: DQMStore.h:221
virtual MonitorElement * get(std::string const &fullpath) const
Definition: DQMStore.cc:712
virtual TH1 * getTH1() const
DQM_DEPRECATED void save(std::string const &filename, std::string const &path="")
Definition: DQMStore.cc:824
virtual void setBinError(int binx, double error)
set uncertainty on content of bin (1-D)
MonitorElement * book1D(TString const &name, TString const &title, int const nchX, double const lowX, double const highX, FUNC onbooking=NOOP())
Definition: DQMStore.h:98
MonitorElement * book3D(TString const &name, TString const &title, int nchX, double lowX, double highX, int nchY, double lowY, double highY, int nchZ, double lowZ, double highZ, FUNC onbooking=NOOP())
Definition: DQMStore.h:376
void dqmEndJob(DQMStore::IBooker &, DQMStore::IGetter &) override
Definition: Run.h:45
virtual DQM_DEPRECATED std::vector< std::string > getSubdirs() const
Definition: DQMStore.cc:739