CMS 3D CMS Logo

ProcLikelihood.cc
Go to the documentation of this file.
1 // -*- C++ -*-
2 //
3 // Package: MVAComputer
4 // Class : ProcLikelihood
5 //
6 
7 // Implementation:
8 // A likelihood estimator variable processor. Reads in 0..n values for
9 // m variables and calculates the total signal/background likelihood
10 // using calibration PDFs for signal and background for each variable.
11 // The output variable is set to s/(s+b) (or log(s/b) for logOutput).
12 //
13 // Author: Christophe Saout
14 // Created: Sat Apr 24 15:18 CEST 2007
15 //
16 
17 #include <vector>
18 #include <memory>
19 #include <cmath>
20 
26 
27 using namespace PhysicsTools;
28 
29 namespace { // anonymous
30 
31  class ProcLikelihood : public VarProcessor {
32  public:
34 
35  ProcLikelihood(const char *name, const Calibration::ProcLikelihood *calib, const MVAComputer *computer);
36  ~ProcLikelihood() override {}
37 
38  void configure(ConfIterator iter, unsigned int n) override;
39  void eval(ValueIterator iter, unsigned int n) const override;
40  std::vector<double> deriv(ValueIterator iter, unsigned int n) const override;
41 
42  private:
43  struct PDF {
44  virtual ~PDF() {}
45  virtual double eval(double value) const = 0;
46  virtual double deriv(double value) const = 0;
47 
48  double norm;
49  };
50 
51  struct SplinePDF : public PDF {
52  SplinePDF(const Calibration::HistogramF *calib) : min(calib->range().min), width(calib->range().width()) {
53  std::vector<double> values(calib->values().begin() + 1, calib->values().end() - 1);
54  spline.set(values.size(), &values.front());
55  }
56 
57  double eval(double value) const override;
58  double deriv(double value) const override;
59 
60  double min, width;
61  Spline spline;
62  };
63 
64  struct HistogramPDF : public PDF {
65  HistogramPDF(const Calibration::HistogramF *calib) : histo(calib) {}
66 
67  double eval(double value) const override;
68  double deriv(double value) const override;
69 
71  };
72 
73  struct SigBkg {
74  SigBkg(const Calibration::ProcLikelihood::SigBkg &calib) {
75  if (calib.useSplines) {
76  signal = std::unique_ptr<PDF>(new SplinePDF(&calib.signal));
77  background = std::unique_ptr<PDF>(new SplinePDF(&calib.background));
78  } else {
79  signal = std::unique_ptr<PDF>(new HistogramPDF(&calib.signal));
80  background = std::unique_ptr<PDF>(new HistogramPDF(&calib.background));
81  }
82  double norm = (calib.signal.numberOfBins() + calib.background.numberOfBins()) / 2.0;
83  signal->norm = norm;
84  background->norm = norm;
85  }
86 
87  std::unique_ptr<PDF> signal;
88  std::unique_ptr<PDF> background;
89  };
90 
91  int findPDFs(ValueIterator iter,
92  unsigned int n,
93  std::vector<SigBkg>::const_iterator &begin,
94  std::vector<SigBkg>::const_iterator &end) const;
95 
96  std::vector<SigBkg> pdfs;
97  std::vector<double> bias;
98  int categoryIdx;
99  bool logOutput;
100  bool individual;
101  bool neverUndefined;
102  bool keepEmpty;
103  unsigned int nCategories;
104  };
105 
106  ProcLikelihood::Registry registry("ProcLikelihood");
107 
108  double ProcLikelihood::SplinePDF::eval(double value) const {
109  value = (value - min) / width;
110  return spline.eval(value) * norm / spline.getArea();
111  }
112 
113  double ProcLikelihood::SplinePDF::deriv(double value) const {
114  value = (value - min) / width;
115  return spline.deriv(value) * norm / spline.getArea();
116  }
117 
118  double ProcLikelihood::HistogramPDF::eval(double value) const { return histo->normalizedValue(value) * norm; }
119 
120  double ProcLikelihood::HistogramPDF::deriv(double value) const { return 0; }
121 
122  ProcLikelihood::ProcLikelihood(const char *name,
123  const Calibration::ProcLikelihood *calib,
124  const MVAComputer *computer)
125  : VarProcessor(name, calib, computer),
126  pdfs(calib->pdfs.begin(), calib->pdfs.end()),
127  bias(calib->bias),
128  categoryIdx(calib->categoryIdx),
129  logOutput(calib->logOutput),
130  individual(calib->individual),
131  neverUndefined(calib->neverUndefined),
132  keepEmpty(calib->keepEmpty),
133  nCategories(1) {}
134 
135  void ProcLikelihood::configure(ConfIterator iter, unsigned int n) {
136  if (categoryIdx >= 0) {
137  if ((int)n < categoryIdx + 1)
138  return;
139  nCategories = pdfs.size() / (n - 1);
140  if (nCategories * (n - 1) != pdfs.size())
141  return;
142  if (!bias.empty() && bias.size() != nCategories)
143  return;
144  } else if (n != pdfs.size() || bias.size() > 1)
145  return;
146 
147  int i = 0;
148  while (iter) {
149  if (categoryIdx == i++)
150  iter++(Variable::FLAG_NONE);
151  else
152  iter++(Variable::FLAG_ALL);
153  }
154 
155  if (individual) {
156  for (unsigned int i = 0; i < pdfs.size(); i += nCategories)
157  iter << (neverUndefined ? Variable::FLAG_NONE : Variable::FLAG_OPTIONAL);
158  } else
159  iter << (neverUndefined ? Variable::FLAG_NONE : Variable::FLAG_OPTIONAL);
160  }
161 
162  int ProcLikelihood::findPDFs(ValueIterator iter,
163  unsigned int n,
164  std::vector<SigBkg>::const_iterator &begin,
165  std::vector<SigBkg>::const_iterator &end) const {
166  int cat;
167  if (categoryIdx >= 0) {
168  ValueIterator iter2 = iter;
169  for (int i = 0; i < categoryIdx; i++)
170  ++iter2;
171 
172  cat = (int)*iter2;
173  if (cat < 0 || (unsigned int)cat >= nCategories)
174  return -1;
175 
176  begin = pdfs.begin() + cat * (n - 1);
177  end = begin + (n - 1);
178  } else {
179  cat = 0;
180  begin = pdfs.begin();
181  end = pdfs.end();
182  }
183 
184  return cat;
185  }
186 
187  void ProcLikelihood::eval(ValueIterator iter, unsigned int n) const {
188  std::vector<SigBkg>::const_iterator pdf, last;
189  int cat = findPDFs(iter, n, pdf, last);
190  int vars = 0;
191  long double signal = bias.empty() ? 1.0 : bias[cat];
192  long double background = 1.0;
193 
194  if (cat < 0) {
195  if (individual)
196  for (unsigned int i = 0; i < n; i++)
197  iter();
198  else
199  iter();
200  return;
201  }
202 
203  for (int i = 0; pdf != last; ++iter, i++) {
204  if (i == categoryIdx)
205  continue;
206 
207  for (double *value = iter.begin(); value < iter.end(); value++) {
208  double signalProb = std::max(0.0, pdf->signal->eval(*value));
209  double backgroundProb = std::max(0.0, pdf->background->eval(*value));
210  if (!keepEmpty && !individual && signalProb + backgroundProb < 1.0e-20)
211  continue;
212  vars++;
213 
214  if (individual) {
215  signalProb *= signal;
216  backgroundProb *= background;
217  if (logOutput) {
218  if (signalProb < 1.0e-9 && backgroundProb < 1.0e-9) {
219  if (!neverUndefined)
220  continue;
221  iter << 0.0;
222  } else if (signalProb < 1.0e-9)
223  iter << -99999.0;
224  else if (backgroundProb < 1.0e-9)
225  iter << +99999.0;
226  else
227  iter << (std::log(signalProb) - std::log(backgroundProb));
228  } else {
229  double sum = signalProb + backgroundProb;
230  if (sum > 1.0e-9)
231  iter << (signalProb / sum);
232  else if (neverUndefined)
233  iter << 0.5;
234  }
235  } else {
236  signal *= signalProb;
237  background *= backgroundProb;
238  }
239  }
240 
241  ++pdf;
242  if (individual)
243  iter();
244  }
245 
246  if (!individual) {
247  if (!vars || signal + background < std::exp(-7 * vars - 3)) {
248  if (neverUndefined)
249  iter(logOutput ? 0.0 : 0.5);
250  else
251  iter();
252  } else if (logOutput) {
253  if (signal < 1.0e-9 && background < 1.0e-9) {
254  if (neverUndefined)
255  iter(0.0);
256  else
257  iter();
258  } else if (signal < 1.0e-9)
259  iter(-99999.0);
260  else if (background < 1.0e-9)
261  iter(+99999.0);
262  else
263  iter(std::log(signal) - std::log(background));
264  } else
265  iter(signal / (signal + background));
266  }
267  }
268 
269  std::vector<double> ProcLikelihood::deriv(ValueIterator iter, unsigned int n) const {
270  std::vector<SigBkg>::const_iterator pdf, last;
271  int cat = findPDFs(iter, n, pdf, last);
272  int vars = 0;
273  long double signal = bias.empty() ? 1.0 : bias[cat];
274  long double background = 1.0;
275 
276  std::vector<double> result;
277  if (cat < 0)
278  return result;
279 
280  unsigned int size = 0;
281  for (ValueIterator iter2 = iter; iter2; ++iter2)
282  size += iter2.size();
283 
284  // The logic whether a variable is used or net depends on the
285  // evaluation, so FFS copy the whole ****
286 
287  if (!individual)
288  result.resize(size);
289 
290  unsigned int j = 0;
291  for (int i = 0; pdf != last; ++iter, i++) {
292  if (i == categoryIdx) {
293  j += iter.size();
294  continue;
295  }
296 
297  for (double *value = iter.begin(); value < iter.end(); value++, j++) {
298  double signalProb = pdf->signal->eval(*value);
299  double signalDiff = pdf->signal->deriv(*value);
300  if (signalProb < 0.0)
301  signalProb = signalDiff = 0.0;
302 
303  double backgroundProb = pdf->background->eval(*value);
304  double backgroundDiff = pdf->background->deriv(*value);
305  if (backgroundProb < 0.0)
306  backgroundProb = backgroundDiff = 0.0;
307 
308  if (!keepEmpty && !individual && signalProb + backgroundProb < 1.0e-20)
309  continue;
310  vars++;
311 
312  if (individual) {
313  signalProb *= signal;
314  signalDiff *= signal;
315  backgroundProb *= background;
316  backgroundDiff *= background;
317  if (logOutput) {
318  if (signalProb < 1.0e-9 && backgroundProb < 1.0e-9) {
319  if (!neverUndefined)
320  continue;
321  result.resize(result.size() + size);
322  } else if (signalProb < 1.0e-9 || backgroundProb < 1.0e-9)
323  result.resize(result.size() + size);
324  else {
325  result.resize(result.size() + size);
326  result[result.size() - size + j] = signalDiff / signalProb - backgroundDiff / backgroundProb;
327  }
328  } else {
329  double sum = signalProb + backgroundProb;
330  if (sum > 1.0e-9) {
331  result.resize(result.size() + size);
332  result[result.size() - size + j] =
333  (signalDiff * backgroundProb - signalProb * backgroundDiff) / (sum * sum);
334  } else if (neverUndefined)
335  result.resize(result.size() + size);
336  }
337  } else {
338  signal *= signalProb;
339  background *= backgroundProb;
340  double s = signalDiff / signalProb;
341  if (edm::isNotFinite(s))
342  s = 0.0;
343  double b = backgroundDiff / backgroundProb;
344  if (edm::isNotFinite(b))
345  b = 0.0;
346 
347  result[j] = s - b;
348  }
349  }
350 
351  ++pdf;
352  }
353 
354  if (!individual) {
355  if (!vars || signal + background < std::exp(-7 * vars - 3)) {
356  if (neverUndefined)
357  std::fill(result.begin(), result.end(), 0.0);
358  else
359  result.clear();
360  } else if (logOutput) {
361  if (signal < 1.0e-9 && background < 1.0e-9) {
362  if (neverUndefined)
363  std::fill(result.begin(), result.end(), 0.0);
364  else
365  result.clear();
366  } else if (signal < 1.0e-9 || background < 1.0e-9)
367  std::fill(result.begin(), result.end(), 0.0);
368  else {
369  // should be ok
370  }
371  } else {
372  double factor = signal * background / ((signal + background) * (signal + background));
373  for (std::vector<double>::iterator p = result.begin(); p != result.end(); ++p)
374  *p *= factor;
375  }
376  }
377 
378  return result;
379  }
380 
381 } // anonymous namespace
size
Write out results.
constexpr bool isNotFinite(T x)
Definition: isFinite.h:9
template to generate a registry singleton for a type.
def cat(path)
Definition: eostools.py:401
Main interface class to the generic discriminator computer framework.
Definition: MVAComputer.h:39
#define end
Definition: vmac.h:39
Definition: value.py:1
T min(T a, T b)
Definition: MathUtil.h:58
const std::vector< Value_t > & values() const
Definition: Histogram.h:82
double b
Definition: hdecay.h:118
#define begin
Definition: vmac.h:32
vars
Definition: DeepTauId.cc:158
A simple class for cubic splines.
Definition: Spline.h:25
Common base class for variable processors.
Definition: VarProcessor.h:36