CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
ProcLikelihood.cc
Go to the documentation of this file.
1 // -*- C++ -*-
2 //
3 // Package: MVAComputer
4 // Class : ProcLikelihood
5 //
6 
7 // Implementation:
8 // A likelihood estimator variable processor. Reads in 0..n values for
9 // m variables and calculates the total signal/background likelihood
10 // using calibration PDFs for signal and background for each variable.
11 // The output variable is set to s/(s+b) (or log(s/b) for logOutput).
12 //
13 // Author: Christophe Saout
14 // Created: Sat Apr 24 15:18 CEST 2007
15 //
16 
17 #include <vector>
18 #include <memory>
19 #include <cmath>
20 
26 
27 using namespace PhysicsTools;
28 
29 namespace { // anonymous
30 
31 class ProcLikelihood : public VarProcessor {
32  public:
33  typedef VarProcessor::Registry::Registry<ProcLikelihood,
35 
36  ProcLikelihood(const char *name,
38  const MVAComputer *computer);
39  virtual ~ProcLikelihood() {}
40 
41  virtual void configure(ConfIterator iter, unsigned int n) override;
42  virtual void eval(ValueIterator iter, unsigned int n) const override;
43  virtual std::vector<double> deriv(
44  ValueIterator iter, unsigned int n) const override;
45 
46  private:
47  struct PDF {
48  virtual ~PDF() {}
49  virtual double eval(double value) const = 0;
50  virtual double deriv(double value) const = 0;
51 
52  double norm;
53  };
54 
55  struct SplinePDF : public PDF {
56  SplinePDF(const Calibration::HistogramF *calib) :
57  min(calib->range().min),
58  width(calib->range().width())
59  {
60  std::vector<double> values(
61  calib->values().begin() + 1,
62  calib->values().end() - 1);
63  spline.set(values.size(), &values.front());
64  }
65 
66  virtual double eval(double value) const override;
67  virtual double deriv(double value) const override;
68 
69  double min, width;
70  Spline spline;
71  };
72 
73  struct HistogramPDF : public PDF {
74  HistogramPDF(const Calibration::HistogramF *calib) :
75  histo(calib) {}
76 
77  virtual double eval(double value) const override;
78  virtual double deriv(double value) const override;
79 
81  };
82 
83  struct SigBkg {
84  SigBkg(const Calibration::ProcLikelihood::SigBkg &calib)
85  {
86  if (calib.useSplines) {
87  signal = std::auto_ptr<PDF>(
88  new SplinePDF(&calib.signal));
89  background = std::auto_ptr<PDF>(
90  new SplinePDF(&calib.background));
91  } else {
92  signal = std::auto_ptr<PDF>(
93  new HistogramPDF(&calib.signal));
94  background = std::auto_ptr<PDF>(
95  new HistogramPDF(&calib.background));
96  }
97  double norm = (calib.signal.numberOfBins() +
98  calib.background.numberOfBins()) / 2.0;
99  signal->norm = norm;
100  background->norm = norm;
101  }
102 
103  std::auto_ptr<PDF> signal;
104  std::auto_ptr<PDF> background;
105  };
106 
107  int findPDFs(ValueIterator iter, unsigned int n,
108  std::vector<SigBkg>::const_iterator &begin,
109  std::vector<SigBkg>::const_iterator &end) const;
110 
111  std::vector<SigBkg> pdfs;
112  std::vector<double> bias;
113  int categoryIdx;
114  bool logOutput;
115  bool individual;
116  bool neverUndefined;
117  bool keepEmpty;
118  unsigned int nCategories;
119 };
120 
121 static ProcLikelihood::Registry registry("ProcLikelihood");
122 
123 double ProcLikelihood::SplinePDF::eval(double value) const
124 {
125  value = (value - min) / width;
126  return spline.eval(value) * norm / spline.getArea();
127 }
128 
129 double ProcLikelihood::SplinePDF::deriv(double value) const
130 {
131  value = (value - min) / width;
132  return spline.deriv(value) * norm / spline.getArea();
133 }
134 
135 double ProcLikelihood::HistogramPDF::eval(double value) const
136 {
137  return histo->normalizedValue(value) * norm;
138 }
139 
140 double ProcLikelihood::HistogramPDF::deriv(double value) const
141 {
142  return 0;
143 }
144 
145 ProcLikelihood::ProcLikelihood(const char *name,
146  const Calibration::ProcLikelihood *calib,
147  const MVAComputer *computer) :
148  VarProcessor(name, calib, computer),
149  pdfs(calib->pdfs.begin(), calib->pdfs.end()),
150  bias(calib->bias),
151  categoryIdx(calib->categoryIdx), logOutput(calib->logOutput),
152  individual(calib->individual), neverUndefined(calib->neverUndefined),
153  keepEmpty(calib->keepEmpty), nCategories(1)
154 {
155 }
156 
157 void ProcLikelihood::configure(ConfIterator iter, unsigned int n)
158 {
159  if (categoryIdx >= 0) {
160  if ((int)n < categoryIdx + 1)
161  return;
162  nCategories = pdfs.size() / (n - 1);
163  if (nCategories * (n - 1) != pdfs.size())
164  return;
165  if (!bias.empty() && bias.size() != nCategories)
166  return;
167  } else if (n != pdfs.size() || bias.size() > 1)
168  return;
169 
170  int i = 0;
171  while(iter) {
172  if (categoryIdx == i++)
173  iter++(Variable::FLAG_NONE);
174  else
175  iter++(Variable::FLAG_ALL);
176  }
177 
178  if (individual) {
179  for(unsigned int i = 0; i < pdfs.size(); i += nCategories)
180  iter << (neverUndefined ? Variable::FLAG_NONE
181  : Variable::FLAG_OPTIONAL);
182  } else
183  iter << (neverUndefined ? Variable::FLAG_NONE
184  : Variable::FLAG_OPTIONAL);
185 }
186 
187 int ProcLikelihood::findPDFs(ValueIterator iter, unsigned int n,
188  std::vector<SigBkg>::const_iterator &begin,
189  std::vector<SigBkg>::const_iterator &end) const
190 {
191  int cat;
192  if (categoryIdx >= 0) {
193  ValueIterator iter2 = iter;
194  for(int i = 0; i < categoryIdx; i++)
195  ++iter2;
196 
197  cat = (int)*iter2;
198  if (cat < 0 || (unsigned int)cat >= nCategories)
199  return -1;
200 
201  begin = pdfs.begin() + cat * (n - 1);
202  end = begin + (n - 1);
203  } else {
204  cat = 0;
205  begin = pdfs.begin();
206  end = pdfs.end();
207  }
208 
209  return cat;
210 }
211 
212 void ProcLikelihood::eval(ValueIterator iter, unsigned int n) const
213 {
214  std::vector<SigBkg>::const_iterator pdf, last;
215  int cat = findPDFs(iter, n, pdf, last);
216  int vars = 0;
217  long double signal = bias.empty() ? 1.0 : bias[cat];
218  long double background = 1.0;
219 
220  if (cat < 0) {
221  if (individual)
222  for(unsigned int i = 0; i < n; i++)
223  iter();
224  else
225  iter();
226  return;
227  }
228 
229  for(int i = 0; pdf != last; ++iter, i++) {
230  if (i == categoryIdx)
231  continue;
232 
233  for(double *value = iter.begin();
234  value < iter.end(); value++) {
235  double signalProb =
236  std::max(0.0, pdf->signal->eval(*value));
237  double backgroundProb =
238  std::max(0.0, pdf->background->eval(*value));
239  if (!keepEmpty && !individual &&
240  signalProb + backgroundProb < 1.0e-20)
241  continue;
242  vars++;
243 
244  if (individual) {
245  signalProb *= signal;
246  backgroundProb *= background;
247  if (logOutput) {
248  if (signalProb < 1.0e-9 &&
249  backgroundProb < 1.0e-9) {
250  if (!neverUndefined)
251  continue;
252  iter << 0.0;
253  } else if (signalProb < 1.0e-9)
254  iter << -99999.0;
255  else if (backgroundProb < 1.0e-9)
256  iter << +99999.0;
257  else
258  iter << (std::log(signalProb) -
259  std::log(backgroundProb));
260  } else {
261  double sum =
262  signalProb + backgroundProb;
263  if (sum > 1.0e-9)
264  iter << (signalProb / sum);
265  else if (neverUndefined)
266  iter << 0.5;
267  }
268  } else {
269  signal *= signalProb;
270  background *= backgroundProb;
271  }
272  }
273 
274  ++pdf;
275  if (individual)
276  iter();
277  }
278 
279  if (!individual) {
280  if (!vars || signal + background < std::exp(-7 * vars - 3)) {
281  if (neverUndefined)
282  iter(logOutput ? 0.0 : 0.5);
283  else
284  iter();
285  } else if (logOutput) {
286  if (signal < 1.0e-9 && background < 1.0e-9) {
287  if (neverUndefined)
288  iter(0.0);
289  else
290  iter();
291  } else if (signal < 1.0e-9)
292  iter(-99999.0);
293  else if (background < 1.0e-9)
294  iter(+99999.0);
295  else
296  iter(std::log(signal) - std::log(background));
297  } else
298  iter(signal / (signal + background));
299  }
300 }
301 
302 std::vector<double> ProcLikelihood::deriv(ValueIterator iter,
303  unsigned int n) const
304 {
305  std::vector<SigBkg>::const_iterator pdf, last;
306  int cat = findPDFs(iter, n, pdf, last);
307  int vars = 0;
308  long double signal = bias.empty() ? 1.0 : bias[cat];
309  long double background = 1.0;
310 
311  std::vector<double> result;
312  if (cat < 0)
313  return result;
314 
315  unsigned int size = 0;
316  for(ValueIterator iter2 = iter; iter2; ++iter2)
317  size += iter2.size();
318 
319  // The logic whether a variable is used or net depends on the
320  // evaluation, so FFS copy the whole ****
321 
322  if (!individual)
323  result.resize(size);
324 
325  unsigned int j = 0;
326  for(int i = 0; pdf != last; ++iter, i++) {
327  if (i == categoryIdx) {
328  j += iter.size();
329  continue;
330  }
331 
332  for(double *value = iter.begin();
333  value < iter.end(); value++, j++) {
334  double signalProb = pdf->signal->eval(*value);
335  double signalDiff = pdf->signal->deriv(*value);
336  if (signalProb < 0.0)
337  signalProb = signalDiff = 0.0;
338 
339  double backgroundProb = pdf->background->eval(*value);
340  double backgroundDiff = pdf->background->deriv(*value);
341  if (backgroundProb < 0.0)
342  backgroundProb = backgroundDiff = 0.0;
343 
344  if (!keepEmpty && !individual &&
345  signalProb + backgroundProb < 1.0e-20)
346  continue;
347  vars++;
348 
349  if (individual) {
350  signalProb *= signal;
351  signalDiff *= signal;
352  backgroundProb *= background;
353  backgroundDiff *= background;
354  if (logOutput) {
355  if (signalProb < 1.0e-9 &&
356  backgroundProb < 1.0e-9) {
357  if (!neverUndefined)
358  continue;
359  result.resize(result.size() +
360  size);
361  } else if (signalProb < 1.0e-9 ||
362  backgroundProb < 1.0e-9)
363  result.resize(result.size() +
364  size);
365  else {
366  result.resize(result.size() +
367  size);
368  result[result.size() -
369  size + j] =
370  signalDiff /
371  signalProb -
372  backgroundDiff /
373  backgroundProb;
374  }
375  } else {
376  double sum =
377  signalProb + backgroundProb;
378  if (sum > 1.0e-9) {
379  result.resize(result.size() +
380  size);
381  result[result.size() -
382  size + j] =
383  (signalDiff *
384  backgroundProb -
385  signalProb *
386  backgroundDiff) /
387  (sum * sum);
388  } else if (neverUndefined)
389  result.resize(result.size() +
390  size);
391  }
392  } else {
393  signal *= signalProb;
394  background *= backgroundProb;
395  double s = signalDiff / signalProb;
396  if (edm::isNotFinite(s))
397  s = 0.0;
398  double b = backgroundDiff / backgroundProb;
399  if (edm::isNotFinite(b))
400  b = 0.0;
401 
402  result[j] = s - b;
403  }
404  }
405 
406  ++pdf;
407  }
408 
409  if (!individual) {
410  if (!vars || signal + background < std::exp(-7 * vars - 3)) {
411  if (neverUndefined)
412  std::fill(result.begin(), result.end(), 0.0);
413  else
414  result.clear();
415  } else if (logOutput) {
416  if (signal < 1.0e-9 && background < 1.0e-9) {
417  if (neverUndefined)
418  std::fill(result.begin(),
419  result.end(), 0.0);
420  else
421  result.clear();
422  } else if (signal < 1.0e-9 ||
423  background < 1.0e-9)
424  std::fill(result.begin(), result.end(), 0.0);
425  else {
426  // should be ok
427  }
428  } else {
429  double factor = signal * background /
430  ((signal + background) *
431  (signal + background));
432  for(std::vector<double>::iterator p = result.begin();
433  p != result.end(); ++p)
434  *p *= factor;
435  }
436  }
437 
438  return result;
439 }
440 
441 } // anonymous namespace
int i
Definition: DBlmapReader.cc:9
string fill
Definition: lumiContext.py:319
template to generate a registry singleton for a type.
MVATrainerComputer * calib
Definition: MVATrainer.cc:64
bool isNotFinite(T x)
Definition: isFinite.h:10
Main interface class to the generic discriminator computer framework.
Definition: MVAComputer.h:39
def cat
Definition: eostools.py:400
tuple result
Definition: query.py:137
int j
Definition: DBlmapReader.cc:9
#define end
Definition: vmac.h:37
T min(T a, T b)
Definition: MathUtil.h:58
const std::vector< Value_t > & values() const
Definition: Histogram.h:86
double b
Definition: hdecay.h:120
#define begin
Definition: vmac.h:30
static Interceptor::Registry registry("Interceptor")
tuple size
Write out results.
A simple class for cubic splines.
Definition: Spline.h:25
Common base class for variable processors.
Definition: VarProcessor.h:36
tuple log
Definition: cmsBatch.py:341