CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
GaussianSumUtilities1D.cc
Go to the documentation of this file.
3 
4 // #include "Utilities/Timing/interface/TimingReport.h"
5 // #include "Utilities/Timing/interface/TimerStack.h"
6 
7 // #include "TROOT.h"
8 // #include "TMath.h"
9 #include "Math/ProbFuncMathCore.h"
10 #include "Math/PdfFuncMathCore.h"
11 #include "Math/QuantFuncMathCore.h"
12 
13 #include <iostream>
14 #include <cmath>
15 
16 #include <map>
17 #include <functional>
18 #include <numeric>
19 #include <algorithm>
20 
21 
22 double GaussianSumUtilities1D::pdf(unsigned int i, double x) const {
23  return weight(i)*gauss(x,mean(i),standardDeviation(i));
24 }
25 
26 
27 double
28 GaussianSumUtilities1D::quantile (const double q) const
29 {
30  return ROOT::Math::gaussian_quantile(q,1.);
31 }
32 
33 
34 // double
35 // GaussianSumUtilities1D::quantile (const double q) const
36 // {
37 // //
38 // // mean and sigma of highest weight component
39 // //
40 // int iwMax(-1);
41 // float wMax(0.);
42 // for ( unsigned int i=0; i<size(); i++ ) {
43 // if ( weight(i)>wMax ) {
44 // iwMax = i;
45 // wMax = weight(i);
46 // }
47 // }
48 // //
49 // // Find start values: begin with mean and
50 // // go towards f(x)=0 until two points on
51 // // either side are found (assumes monotonously
52 // // increasing function)
53 // //
54 // double x1(mean(iwMax));
55 // double y1(cdf(x1)-q);
56 // double dx = y1>0. ? -standardDeviation(iwMax) : standardDeviation(iwMax);
57 // double x2(x1+dx);
58 // double y2(cdf(x2)-q);
59 // while ( y1*y2>0. ) {
60 // x1 = x2;
61 // y1 = y2;
62 // x2 += dx;
63 // y2 = cdf(x2) - q;
64 // }
65 // //
66 // // now use bisection to find final value
67 // //
68 // double x(0.);
69 // while ( true ) {
70 // // use linear interpolation
71 // x = -(x2*y1-x1*y2)/(y2-y1);
72 // double y = cdf(x) - q;
73 // if ( fabs(y)<1.e-6 ) break;
74 // if ( y*y1>0. ) {
75 // y1 = y;
76 // x1 = x;
77 // }
78 // else {
79 // y2 = y;
80 // x2 = x;
81 // }
82 // }
83 // return x;
84 // }
85 
86 bool
88 {
90  return theModeStatus==Valid;
91 }
92 
95 {
97  return theMode;
98 }
99 
100 void
102 {
103 // TimerStack tstack;
104 // tstack.benchmark("GSU1D::benchmark",100000);
105 // FastTimerStackPush(tstack,"GaussianSumUtilities1D::computeMode");
107  //
108  // check for "degenerate" states (identical values with vanishing error)
109  //
110  if ( theState.variance()<1.e-14 ) {
112  theState.weight());
114  return;
115  }
116  //
117  // Use means of individual components as start values.
118  // Sort by value of pdf.
119  //
120  typedef std::multimap<double, int, std::greater<double> > StartMap;
121  StartMap xStart;
122  for ( unsigned int i=0; i<size(); i++ ) {
123  xStart.insert(std::make_pair(pdf(mean(i)),i));
124  }
125  //
126  // Now try with each start value
127  //
128  int iRes(-1); // index of start component for current estimate
129  double xRes(mean((*xStart.begin()).second)); // current estimate of mode
130  double yRes(-1.); // pdf at current estimate of mode
131 // std::pair<double,double> result(-1.,mean((*xStart.begin()).second));
132  for ( StartMap::const_iterator i=xStart.begin(); i!=xStart.end(); i++ ) {
133  //
134  // Convergence radius for a single Gaussian = 1 sigma: don't try
135  // start values within 1 sigma of the current solution
136  //
137  if ( theModeStatus==Valid &&
138  fabs(mean((*i).second)-mean(iRes))/standardDeviation(iRes)<1. ) continue;
139  //
140  // If a solution exists: drop as soon as the pdf at
141  // start value drops to < 75% of maximum (try to avoid
142  // unnecessary searches for the mode)
143  //
144  if ( theModeStatus==Valid &&
145  (*i).first/(*xStart.begin()).first<0.75 ) break;
146  //
147  // Try to find mode
148  //
149  double x;
150  double y;
151  bool valid = findMode(x,y,mean((*i).second),standardDeviation((*i).second));
152  //
153  // consider only successful searches
154  //
155  if ( valid ) { //...
156  //
157  // update result only for significant changes in pdf(solution)
158  //
159  if ( yRes<0. || (y-yRes)/(y+yRes)>1.e-10 ) {
160  iRes = (*i).second; // store index
161  theModeStatus = Valid; // update status
162  xRes = x; // current solution
163  yRes = y; // and its pdf value
164 // result = std::make_pair(y,x); // store solution and pdf(solution)
165  }
166  } //...
167  }
168  //
169  // check (existance of) solution
170  //
171  if ( theModeStatus== Valid ) {
172  //
173  // Construct single Gaussian state with
174  // mean = mode
175  // variance = local variance at mode
176  // weight such that the pdf's of the mixture and the
177  // single state are equal at the mode
178  //
179  double mode = xRes;
180  double varMode = localVariance(mode);
181  double wgtMode = pdf(mode)*sqrt(2*M_PI*varMode);
182  theMode = SingleGaussianState1D(mode,varMode,wgtMode);
183  }
184  else {
185  //
186  // mode finding failed: set solution to highest component
187  // (alternative would be global mean / variance ..?)
188  //
189  //two many log warnings to actually be useful - comment out
190  //edm::LogWarning("GaussianSumUtilities") << "1D mode calculation failed";
191 // double x = mean();
192 // double y = pdf(x);
193 // result = std::make_pair(y,x);
194 // theMode = SingleGaussianState1D(mean(),variance(),weight());
195  //
196  // look for component with highest value at mean
197  //
198  int icMax(0);
199  double ySqMax(0.);
200  int s = size();
201  for (int ic=0; ic<s; ++ic ) {
202  double w = weight(ic);
203  double ySq = w*w/variance(ic);
204  if ( ySq>ySqMax ) {
205  icMax = ic;
206  ySqMax = ySq;
207  }
208  }
210  }
211 
212 }
213 
214 bool
215 GaussianSumUtilities1D::findMode (double& xMode, double& yMode,
216  const double& xStart,
217  const double& scale) const
218 {
219  //
220  // try with Newton on (lnPdf)'(x)
221  //
222  double x1(0.);
223  double y1(0.);
224  FinderState state(size());
225  update(state,xStart);
226 
227  double xmin(xStart-1.*scale);
228  double xmax(xStart+1.*scale);
229  //
230  // preset result
231  //
232  bool result(false);
233  xMode = state.x;
234  yMode = state.y;
235  //
236  // Iterate
237  //
238  int nLoop(0);
239  while ( nLoop++<20 ) {
240  if ( nLoop>1 && state.yd2<0. &&
241  ( fabs(state.yd*scale)<1.e-10 || fabs(state.y-y1)/(state.y+y1)<1.e-14 ) ) {
242  result = true;
243  break;
244  }
245  if ( fabs(state.yd2)<std::numeric_limits<float>::min() )
247  double dx = -state.yd/state.yd2;
248  x1 = state.x;
249  y1 = state.y;
250  double x2 = x1 + dx;
251  if ( state.yd2>0. && (x2<xmin||x2>xmax) ) return false;
252  update(state,x2);
253  }
254  //
255  // result
256  //
257  if ( result ) {
258  xMode = state.x;
259  yMode = state.y;
260  }
261  return result;
262 }
263 
264 
265 void GaussianSumUtilities1D::update(FinderState & state, double x) const {
266  state.x = x;
267 
268  pdfComponents(state.x, state.pdfs);
269  state.y = pdf(state.x, state.pdfs);
270  state.yd = 0;
271  if (state.y>std::numeric_limits<double>::min()) state.yd= d1Pdf(state.x,state.pdfs)/state.y;
272  state.yd2 = -state.yd*state.yd;
273  if (state.y > std::numeric_limits<double>::min()) state.yd2 += d2Pdf(state.x,state.pdfs)/state.y;
274 }
275 
276 
277 double
279 {
280  double result(0.);
281  size_t s=size();
282  for ( unsigned int i=0; i<s; i++ )
283  result += pdf(i,x);
284  return result;
285 }
286 
287 double
288 GaussianSumUtilities1D::cdf (const double& x) const
289 {
290  double result(0.);
291  size_t s=size();
292  for ( unsigned int i=0; i<s; i++ )
293  result += weight(i)*gaussInt(x,mean(i),standardDeviation(i));
294  return result;
295 }
296 
297 double
298 GaussianSumUtilities1D::d1Pdf (const double& x) const
299 {
300  return d1Pdf(x,pdfComponents(x));
301 }
302 
303 double
304 GaussianSumUtilities1D::d2Pdf (const double& x) const
305 {
306  return d2Pdf(x,pdfComponents(x));
307 }
308 
309 double
310 GaussianSumUtilities1D::d3Pdf (const double& x) const
311 {
312  return d3Pdf(x,pdfComponents(x));
313 }
314 
315 double
316 GaussianSumUtilities1D::lnPdf (const double& x) const
317 {
318  return lnPdf(x,pdfComponents(x));
319 }
320 
321 double
322 GaussianSumUtilities1D::d1LnPdf (const double& x) const
323 {
324  return d1LnPdf(x,pdfComponents(x));
325 }
326 
327 double
328 GaussianSumUtilities1D::d2LnPdf (const double& x) const
329 {
330  return d2LnPdf(x,pdfComponents(x));
331 }
332 
333 std::vector<double>
335 {
336  std::vector<double> result;
337  result.reserve(size());
338  for ( unsigned int i=0; i<size(); i++ )
339  result.push_back(weight(i)*gauss(x,mean(i),standardDeviation(i)));
340  return result;
341 }
342 
343 namespace {
344  struct PDF {
345  PDF(double ix) : x(ix){}
346  double x;
347  double operator()(SingleGaussianState1D const & sg) const {
348  return sg.weight()*ROOT::Math::gaussian_pdf(x,sg.standardDeviation(),sg.mean());
349  }
350  };
351 }
352 void GaussianSumUtilities1D::pdfComponents (double x, std::vector<double> & result) const {
353  size_t s = size();
354  if (s!=result.size()) result.resize(s);
355  std::transform(components().begin(),components().end(),result.begin(),PDF(x));
356 }
357 /*
358 void GaussianSumUtilities1D::pdfComponents (double x, std::vector<double> & result) const {
359  size_t s = size();
360  if (s!=result.size()) result.resize(s);
361  double* __restrict__ v = &result.front();
362  SingleGaussianState1D const * __restrict__ sgv = &components().front();
363  for ( unsigned int i=0; i<s; i++ )
364  v[i]= sgv[i].weight()*gauss(x,sgv[i].mean(),sgv[i].standardDeviation());
365 // result[i]=pdf(i,x);
366 }
367 */
368 
369 double
370 GaussianSumUtilities1D::pdf (double, const std::vector<double>& pdfs)
371 {
372  return std::accumulate(pdfs.begin(),pdfs.end(),0.);
373 }
374 
375 double
376 GaussianSumUtilities1D::d1Pdf (double x, const std::vector<double>& pdfs) const
377 {
378  double result(0.);
379  size_t s=size();
380  for ( unsigned int i=0; i<s; i++ ) {
381  double dx = (x-mean(i))/standardDeviation(i);
382  result += -pdfs[i]*dx/standardDeviation(i);
383  }
384  return result;
385 }
386 
387 double
388 GaussianSumUtilities1D::d2Pdf (double x, const std::vector<double>& pdfs) const
389 {
390  double result(0.);
391  size_t s=size();
392  for ( unsigned int i=0; i<s; i++ ) {
393  double dx = (x-mean(i))/standardDeviation(i);
394  result += pdfs[i]/standardDeviation(i)/standardDeviation(i)*(dx*dx-1);
395  }
396  return result;
397 }
398 
399 double
400 GaussianSumUtilities1D::d3Pdf (double x, const std::vector<double>& pdfs) const
401 {
402  double result(0.);
403  size_t s=size();
404  for ( unsigned int i=0; i<s; i++ ) {
405  double dx = (x-mean(i))/standardDeviation(i);
407  (-dx*dx+3)*dx;
408  }
409  return result;
410 }
411 
412 double
413 GaussianSumUtilities1D::lnPdf (double x, const std::vector<double>& pdfs)
414 {
415  double f(pdf(x,pdfs));
417  if ( f>std::numeric_limits<double>::min() ) result = log(f);
418  return result;
419 }
420 
421 double
422 GaussianSumUtilities1D::d1LnPdf (double x, const std::vector<double>& pdfs) const
423 {
424 
425  double f = pdf(x,pdfs);
426  double result(d1Pdf(x,pdfs));
427  if ( f>std::numeric_limits<double>::min() ) result /= f;
428  else result = 0.;
429  return result;
430 }
431 
432 double
433 GaussianSumUtilities1D::d2LnPdf (double x, const std::vector<double>& pdfs) const
434 {
435 
436  double f = pdf(x,pdfs);
437  double df = d1LnPdf(x,pdfs);
438  double result(-df*df);
439  if ( f>std::numeric_limits<double>::min() ) result += d2Pdf(x,pdfs)/f;
440  return result;
441 }
442 
443 double
444 GaussianSumUtilities1D::gauss (double x, double mean, double sigma)
445 {
446 // const double fNorm(1./sqrt(2*M_PI));
447 // double result(0.);
448 
449 // double d((x-mean)/sigma);
450 // if ( fabs(d)<20. ) result = exp(-d*d/2.);
451 // result *= fNorm/sigma;
452 // return result;
453  return ROOT::Math::gaussian_pdf(x,sigma,mean);
454 }
455 
456 double
457 GaussianSumUtilities1D::gaussInt (double x, double mean, double sigma)
458 {
459  return ROOT::Math::normal_cdf(x,sigma,mean);
460 }
461 
462 double
464 {
465  double s0(0.);
466  double s1(0.);
467  int s = size();
468  SingleGaussianState1D const * __restrict__ sgv = &components().front();
469  for (int i=0; i<s; i++ )
470  s0 += sgv[i].weight();
471  for (int i=0; i<s; i++ )
472  s1 += sgv[i].weight()*sgv[i].mean();
473  return s1/s0;
474 }
475 
476 double
478 {
479  std::vector<double> pdfs;
480  pdfComponents(x,pdfs);
481  double result = -pdf(x,pdfs)/d2Pdf(x,pdfs);
482  // FIXME: wrong curvature seems to be non-existant but should add a proper recovery
483  if ( result<0. )
484  edm::LogWarning("GaussianSumUtilities") << "1D variance at mode < 0";
485  return result;
486 }
std::vector< double > pdfComponents(const double &) const
pdf components
int i
Definition: DBlmapReader.cc:9
double weight() const
combined weight
const double w
Definition: UKUtility.cc:23
const MultiGaussianState1D & theState
double d2Pdf(const double &) const
second derivative of the p.d.f.
double d3Pdf(const double &) const
third derivative of the p.d.f.
double mean() const
combined mean
double weight() const
combined weight
static double gauss(double, double, double)
Value of gaussian distribution.
SingleGaussianState1D theMode
bool findMode(double &mode, double &pdfAtMode, const double &xStart, const double &scale) const
unsigned int size() const
number of components
double d1Pdf(const double &) const
first derivative of the p.d.f.
double standardDeviation() const
standardDeviation
void computeMode() const
calculation of mode
U second(std::pair< T, U > const &p)
static double gaussInt(double, double, double)
Integrated value of gaussian distribution.
double mean() const
parameter vector
T sqrt(T t)
Definition: SSEVec.h:48
tuple result
Definition: query.py:137
double variance() const
combined covariance
const SingleGaussianState1D & mode() const
double quantile(const double) const
Quantile (i.e. x for a given value of the c.d.f.)
double f[11][100]
double pdf(unsigned int i, double x) const
pdf of a single component at x
#define end
Definition: vmac.h:37
T min(T a, T b)
Definition: MathUtil.h:58
#define M_PI
double variance() const
combined variance
double mean() const
combined mean
double weight() const
weight
double combinedMean() const
Mean value of combined state.
double d2LnPdf(const double &) const
second derivative of ln(pdf)
#define begin
Definition: vmac.h:30
double localVariance(double x) const
double standardDeviation(unsigned int i) const
standard deviation of a component
Definition: DDAxes.h:10
double lnPdf(const double &) const
ln(pdf)
void update(FinderState &state, double x) const
double cdf(const double &) const
value of the c.d.f.
double d1LnPdf(const double &) const
first derivative of ln(pdf)
const std::vector< SingleGaussianState1D > & components() const
components
bool modeIsValid() const
mode status
tuple log
Definition: cmsBatch.py:347