CMS 3D CMS Logo

/data/refman/pasoursint/CMSSW_6_1_2_SLHC4_patch1/src/TrackingTools/GsfTools/src/GaussianSumUtilities1D.cc

Go to the documentation of this file.
00001 #include "TrackingTools/GsfTools/interface/GaussianSumUtilities1D.h"
00002 #include "FWCore/MessageLogger/interface/MessageLogger.h"
00003 
00004 // #include "Utilities/Timing/interface/TimingReport.h"
00005 // #include "Utilities/Timing/interface/TimerStack.h"
00006 
00007 // #include "TROOT.h"
00008 // #include "TMath.h"
00009 #include "Math/ProbFuncMathCore.h"
00010 #include "Math/PdfFuncMathCore.h"
00011 #include "Math/QuantFuncMathCore.h"
00012 
00013 #include <iostream>
00014 #include <cmath>
00015 
00016 #include <map>
00017 #include <functional>
00018 #include <numeric>
00019 #include <algorithm>
00020 
00021 
00022 double GaussianSumUtilities1D::pdf(unsigned int i, double x)  const {
00023   return weight(i)*gauss(x,mean(i),standardDeviation(i));
00024 }
00025 
00026 
00027 double
00028 GaussianSumUtilities1D::quantile (const double q) const
00029 {
00030   return ROOT::Math::gaussian_quantile(q,1.);
00031 }
00032 
00033 
00034 // double
00035 // GaussianSumUtilities1D::quantile (const double q) const
00036 // {
00037 //   //
00038 //   // mean and sigma of highest weight component
00039 //   //
00040 //   int iwMax(-1);
00041 //   float wMax(0.);
00042 //   for ( unsigned int i=0; i<size(); i++ ) {
00043 //     if ( weight(i)>wMax ) {
00044 //       iwMax = i;
00045 //       wMax = weight(i);
00046 //     }
00047 //   }
00048 //   //
00049 //   // Find start values: begin with mean and
00050 //   // go towards f(x)=0 until two points on
00051 //   // either side are found (assumes monotonously
00052 //   // increasing function)
00053 //   //
00054 //   double x1(mean(iwMax));
00055 //   double y1(cdf(x1)-q);
00056 //   double dx = y1>0. ? -standardDeviation(iwMax) : standardDeviation(iwMax);
00057 //   double x2(x1+dx);
00058 //   double y2(cdf(x2)-q);
00059 //   while ( y1*y2>0. ) {
00060 //     x1 = x2;
00061 //     y1 = y2;
00062 //     x2 += dx;
00063 //     y2 = cdf(x2) - q;
00064 //   }
00065 //   //
00066 //   // now use bisection to find final value
00067 //   //
00068 //   double x(0.);
00069 //   while ( true ) {
00070 //     // use linear interpolation
00071 //     x = -(x2*y1-x1*y2)/(y2-y1);
00072 //     double y = cdf(x) - q;
00073 //     if ( fabs(y)<1.e-6 )  break;
00074 //     if ( y*y1>0. ) {
00075 //       y1 = y;
00076 //       x1 = x;
00077 //     }
00078 //     else {
00079 //       y2 = y;
00080 //       x2 = x;
00081 //     }
00082 //   }
00083 //   return x;
00084 // }
00085 
00086 bool
00087 GaussianSumUtilities1D::modeIsValid () const
00088 {
00089   if ( theModeStatus==NotComputed )  computeMode();
00090   return theModeStatus==Valid;
00091 }
00092 
00093 const SingleGaussianState1D&
00094 GaussianSumUtilities1D::mode () const
00095 {
00096   if ( theModeStatus==NotComputed )  computeMode();
00097   return theMode;
00098 }
00099 
00100 void
00101 GaussianSumUtilities1D::computeMode () const
00102 {
00103 //   TimerStack tstack;
00104 //   tstack.benchmark("GSU1D::benchmark",100000);
00105 //   FastTimerStackPush(tstack,"GaussianSumUtilities1D::computeMode");
00106   theModeStatus = NotValid;
00107   //
00108   // check for "degenerate" states (identical values with vanishing error)
00109   //
00110   if ( theState.variance()<1.e-14 ) {
00111     theMode = SingleGaussianState1D(theState.mean(),theState.variance(),
00112                                     theState.weight());
00113     theModeStatus = Valid;
00114     return;
00115   }
00116   //
00117   // Use means of individual components as start values.
00118   // Sort by value of pdf.
00119   //
00120   typedef std::multimap<double, int, std::greater<double> > StartMap;
00121   StartMap xStart;
00122   for ( unsigned int i=0; i<size(); i++ ) {
00123     xStart.insert(std::make_pair(pdf(mean(i)),i));
00124   }
00125   //
00126   // Now try with each start value
00127   //
00128   int iRes(-1);     // index of start component for current estimate
00129   double xRes(mean((*xStart.begin()).second)); // current estimate of mode
00130   double yRes(-1.); // pdf at current estimate of mode
00131 //   std::pair<double,double> result(-1.,mean((*xStart.begin()).second));
00132   for ( StartMap::const_iterator i=xStart.begin(); i!=xStart.end(); i++ ) {
00133     //
00134     // Convergence radius for a single Gaussian = 1 sigma: don't try
00135     // start values within 1 sigma of the current solution
00136     //
00137     if ( theModeStatus==Valid &&
00138          fabs(mean((*i).second)-mean(iRes))/standardDeviation(iRes)<1. )  continue;
00139     //
00140     // If a solution exists: drop as soon as the pdf at
00141     // start value drops to < 75% of maximum (try to avoid
00142     // unnecessary searches for the mode)
00143     //
00144     if ( theModeStatus==Valid && 
00145          (*i).first/(*xStart.begin()).first<0.75 )  break;
00146     //
00147     // Try to find mode
00148     //
00149     double x;
00150     double y;
00151     bool valid = findMode(x,y,mean((*i).second),standardDeviation((*i).second));
00152     //
00153     // consider only successful searches
00154     //
00155     if ( valid ) { //...
00156       //
00157       // update result only for significant changes in pdf(solution)
00158       //
00159       if ( yRes<0. || (y-yRes)/(y+yRes)>1.e-10 ) {
00160       iRes = (*i).second;               // store index
00161       theModeStatus = Valid;            // update status
00162       xRes = x;                         // current solution
00163       yRes = y;                         // and its pdf value
00164 //       result = std::make_pair(y,x);     // store solution and pdf(solution)
00165       }
00166     } //...
00167   } 
00168   //
00169   // check (existance of) solution
00170   //
00171   if ( theModeStatus== Valid ) {
00172     //
00173     // Construct single Gaussian state with 
00174     //  mean = mode
00175     //  variance = local variance at mode
00176     //  weight such that the pdf's of the mixture and the
00177     //    single state are equal at the mode
00178     //
00179     double mode = xRes;
00180     double varMode = localVariance(mode);
00181     double wgtMode = pdf(mode)*sqrt(2*M_PI*varMode);
00182     theMode = SingleGaussianState1D(mode,varMode,wgtMode);
00183   }
00184   else {
00185     //
00186     // mode finding failed: set solution to highest component
00187     //  (alternative would be global mean / variance ..?)
00188     //
00189     //two many log warnings to actually be useful - comment out
00190     //edm::LogWarning("GaussianSumUtilities") << "1D mode calculation failed";
00191 //     double x = mean();
00192 //     double y = pdf(x);
00193 //     result = std::make_pair(y,x);
00194 //     theMode = SingleGaussianState1D(mean(),variance(),weight());
00195     //
00196     // look for component with highest value at mean
00197     //
00198     int icMax(0);
00199     double ySqMax(0.);
00200     int s = size();
00201     for (int ic=0; ic<s; ++ic ) {
00202       double w = weight(ic);
00203       double ySq = w*w/variance(ic);
00204       if ( ySq>ySqMax ) {
00205         icMax = ic;
00206         ySqMax = ySq;
00207       }
00208     }
00209     theMode = SingleGaussianState1D(components()[icMax]);
00210   }
00211   
00212 }
00213 
00214 bool
00215 GaussianSumUtilities1D::findMode (double& xMode, double& yMode,
00216                                   const double& xStart,
00217                                   const double& scale) const
00218 {
00219   //
00220   // try with Newton on (lnPdf)'(x)
00221   //
00222   double x1(0.);
00223   double y1(0.);
00224   FinderState  state(size());
00225   update(state,xStart);
00226 
00227   double xmin(xStart-1.*scale);
00228   double xmax(xStart+1.*scale);
00229   //
00230   // preset result
00231   //
00232   bool result(false);
00233   xMode = state.x;
00234   yMode = state.y;
00235   //
00236   // Iterate
00237   //
00238   int nLoop(0);
00239   while ( nLoop++<20 ) {
00240     if ( nLoop>1 && state.yd2<0. &&  
00241          ( fabs(state.yd*scale)<1.e-10 || fabs(state.y-y1)/(state.y+y1)<1.e-14 ) ) {
00242       result = true;
00243       break;
00244     }
00245     if ( fabs(state.yd2)<std::numeric_limits<float>::min() )  
00246       state.yd2 = state.yd2>0. ? std::numeric_limits<float>::min() : -std::numeric_limits<float>::min();
00247     double dx = -state.yd/state.yd2;
00248     x1 = state.x;
00249     y1 = state.y;
00250     double x2 = x1 + dx;
00251     if ( state.yd2>0. && (x2<xmin||x2>xmax) )  return false;
00252     update(state,x2);
00253   }
00254   //
00255   // result
00256   //
00257   if ( result ) {
00258     xMode = state.x;
00259     yMode = state.y;
00260   }
00261   return result;
00262 }
00263 
00264 
00265 void GaussianSumUtilities1D::update(FinderState & state, double x) const {
00266   state.x = x;
00267 
00268   pdfComponents(state.x, state.pdfs);
00269   state.y = pdf(state.x, state.pdfs);
00270   state.yd = 0;
00271   if (state.y>std::numeric_limits<double>::min()) state.yd= d1Pdf(state.x,state.pdfs)/state.y;
00272   state.yd2 = -state.yd*state.yd;
00273   if (state.y > std::numeric_limits<double>::min()) state.yd2 += d2Pdf(state.x,state.pdfs)/state.y;
00274 }
00275 
00276 
00277 double
00278 GaussianSumUtilities1D::pdf (double x) const
00279 {
00280   double result(0.);
00281   size_t s=size();
00282   for ( unsigned int i=0; i<s; i++ )
00283     result += pdf(i,x);
00284   return result;
00285 }
00286 
00287 double
00288 GaussianSumUtilities1D::cdf (const double& x) const
00289 {
00290   double result(0.);
00291   size_t s=size();
00292   for ( unsigned int i=0; i<s; i++ )
00293     result += weight(i)*gaussInt(x,mean(i),standardDeviation(i));
00294   return result;
00295 }
00296 
00297 double
00298 GaussianSumUtilities1D::d1Pdf (const double& x) const
00299 {
00300   return d1Pdf(x,pdfComponents(x));
00301 }
00302 
00303 double
00304 GaussianSumUtilities1D::d2Pdf (const double& x) const
00305 {
00306   return d2Pdf(x,pdfComponents(x));
00307 }
00308 
00309 double
00310 GaussianSumUtilities1D::d3Pdf (const double& x) const
00311 {
00312   return d3Pdf(x,pdfComponents(x));
00313 }
00314 
00315 double
00316 GaussianSumUtilities1D::lnPdf (const double& x) const
00317 {
00318   return lnPdf(x,pdfComponents(x));
00319 }
00320 
00321 double
00322 GaussianSumUtilities1D::d1LnPdf (const double& x) const
00323 {
00324   return d1LnPdf(x,pdfComponents(x));
00325 }
00326 
00327 double
00328 GaussianSumUtilities1D::d2LnPdf (const double& x) const
00329 {
00330   return d2LnPdf(x,pdfComponents(x));
00331 }
00332 
00333 std::vector<double>
00334 GaussianSumUtilities1D::pdfComponents (const double& x) const
00335 {
00336   std::vector<double> result;
00337   result.reserve(size());
00338   for ( unsigned int i=0; i<size(); i++ )
00339     result.push_back(weight(i)*gauss(x,mean(i),standardDeviation(i)));
00340   return result;
00341 }
00342 
00343 namespace {
00344   struct PDF {
00345   PDF(double ix) : x(ix){}
00346     double x;
00347     double operator()(SingleGaussianState1D const & sg) const {
00348        return sg.weight()*ROOT::Math::gaussian_pdf(x,sg.standardDeviation(),sg.mean());
00349     }
00350   };
00351 }
00352 void GaussianSumUtilities1D::pdfComponents (double x, std::vector<double> & result) const {
00353   size_t s = size();
00354   if (s!=result.size()) result.resize(s);  
00355   std::transform(components().begin(),components().end(),result.begin(),PDF(x));
00356 }
00357 /*
00358 void GaussianSumUtilities1D::pdfComponents (double x, std::vector<double> & result) const {
00359    size_t s = size();
00360   if (s!=result.size()) result.resize(s);
00361   double* __restrict__ v = &result.front();
00362   SingleGaussianState1D const * __restrict__ sgv = &components().front();
00363   for ( unsigned int i=0; i<s; i++ )
00364     v[i]= sgv[i].weight()*gauss(x,sgv[i].mean(),sgv[i].standardDeviation());
00365 //    result[i]=pdf(i,x);
00366 }
00367 */
00368 
00369 double
00370 GaussianSumUtilities1D::pdf (double, const std::vector<double>& pdfs)
00371 {
00372   return std::accumulate(pdfs.begin(),pdfs.end(),0.);
00373 }
00374 
00375 double
00376 GaussianSumUtilities1D::d1Pdf (double x, const std::vector<double>& pdfs) const
00377 {
00378   double result(0.);
00379   size_t s=size();
00380   for ( unsigned int i=0; i<s; i++ ) {
00381     double dx = (x-mean(i))/standardDeviation(i);
00382     result += -pdfs[i]*dx/standardDeviation(i);
00383   }
00384   return result;
00385 }
00386 
00387 double
00388 GaussianSumUtilities1D::d2Pdf (double x, const std::vector<double>& pdfs) const
00389 {
00390   double result(0.);
00391   size_t s=size();
00392   for ( unsigned int i=0; i<s; i++ ) {
00393     double dx = (x-mean(i))/standardDeviation(i);
00394     result += pdfs[i]/standardDeviation(i)/standardDeviation(i)*(dx*dx-1);
00395   }
00396   return result;
00397 }
00398 
00399 double
00400 GaussianSumUtilities1D::d3Pdf (double x, const std::vector<double>& pdfs) const
00401 {
00402   double result(0.);
00403   size_t s=size();
00404   for ( unsigned int i=0; i<s; i++ ) {
00405     double dx = (x-mean(i))/standardDeviation(i);
00406     result += pdfs[i]/standardDeviation(i)/standardDeviation(i)/standardDeviation(i)*
00407       (-dx*dx+3)*dx;
00408   }
00409   return result;
00410 }
00411 
00412 double
00413 GaussianSumUtilities1D::lnPdf (double x, const std::vector<double>& pdfs)
00414 {
00415   double f(pdf(x,pdfs));
00416   double result(-std::numeric_limits<float>::max());
00417   if ( f>std::numeric_limits<double>::min() )  result = log(f);
00418   return result;
00419 }
00420 
00421 double
00422 GaussianSumUtilities1D::d1LnPdf (double x, const std::vector<double>& pdfs) const
00423 {
00424 
00425   double f = pdf(x,pdfs);
00426   double result(d1Pdf(x,pdfs));
00427   if ( f>std::numeric_limits<double>::min() )  result /= f;
00428   else  result = 0.;
00429   return result;
00430 }
00431 
00432 double
00433 GaussianSumUtilities1D::d2LnPdf (double x, const std::vector<double>& pdfs) const
00434 {
00435 
00436   double f = pdf(x,pdfs);
00437   double df = d1LnPdf(x,pdfs);
00438   double result(-df*df);
00439   if ( f>std::numeric_limits<double>::min() )  result += d2Pdf(x,pdfs)/f;
00440   return result;
00441 }
00442 
00443 double 
00444 GaussianSumUtilities1D::gauss (double x, double mean, double sigma) 
00445 {
00446 //   const double fNorm(1./sqrt(2*M_PI));
00447 //   double result(0.);
00448 
00449 //   double d((x-mean)/sigma);
00450 //   if ( fabs(d)<20. )  result = exp(-d*d/2.);
00451 //   result *= fNorm/sigma;
00452 //   return result;
00453   return ROOT::Math::gaussian_pdf(x,sigma,mean);
00454 }
00455 
00456 double 
00457 GaussianSumUtilities1D::gaussInt (double x, double mean, double sigma) 
00458 {
00459   return ROOT::Math::normal_cdf(x,sigma,mean);
00460 }
00461 
00462 double
00463 GaussianSumUtilities1D::combinedMean () const
00464 {
00465   double s0(0.);
00466   double s1(0.);
00467   int s = size();
00468   SingleGaussianState1D const * __restrict__ sgv = &components().front();
00469   for (int i=0; i<s; i++ )
00470     s0 += sgv[i].weight();
00471   for (int i=0; i<s; i++ )
00472     s1 += sgv[i].weight()*sgv[i].mean();
00473   return s1/s0;
00474 }
00475 
00476 double
00477 GaussianSumUtilities1D::localVariance (double x) const
00478 {
00479   std::vector<double> pdfs;
00480   pdfComponents(x,pdfs);
00481   double result = -pdf(x,pdfs)/d2Pdf(x,pdfs);
00482   // FIXME: wrong curvature seems to be non-existant but should add a proper recovery
00483   if ( result<0. )
00484     edm::LogWarning("GaussianSumUtilities") << "1D variance at mode < 0";    
00485   return result;
00486 }