CMS 3D CMS Logo

/data/refman/pasoursint/CMSSW_4_1_8_patch12/src/TrackingTools/GsfTools/src/GaussianSumUtilities1D.cc

Go to the documentation of this file.
00001 #include "TrackingTools/GsfTools/interface/GaussianSumUtilities1D.h"
00002 #include "FWCore/MessageLogger/interface/MessageLogger.h"
00003 
00004 // #include "Utilities/Timing/interface/TimingReport.h"
00005 // #include "Utilities/Timing/interface/TimerStack.h"
00006 
00007 // #include "TROOT.h"
00008 // #include "TMath.h"
00009 #include "Math/ProbFuncMathCore.h"
00010 #include "Math/PdfFuncMathCore.h"
00011 #include "Math/QuantFuncMathCore.h"
00012 
00013 #include <iostream>
00014 #include <cmath>
00015 
00016 #include <map>
00017 #include <functional>
00018 #include <numeric>
00019 #include <algorithm>
00020 
00021 
00022 double GaussianSumUtilities1D::pdf(unsigned int i, double x)  const {
00023   return weight(i)*gauss(x,mean(i),standardDeviation(i));
00024 }
00025 
00026 
00027 double
00028 GaussianSumUtilities1D::quantile (const double q) const
00029 {
00030   return ROOT::Math::gaussian_quantile(q,1.);
00031 }
00032 
00033 
00034 // double
00035 // GaussianSumUtilities1D::quantile (const double q) const
00036 // {
00037 //   //
00038 //   // mean and sigma of highest weight component
00039 //   //
00040 //   int iwMax(-1);
00041 //   float wMax(0.);
00042 //   for ( unsigned int i=0; i<size(); i++ ) {
00043 //     if ( weight(i)>wMax ) {
00044 //       iwMax = i;
00045 //       wMax = weight(i);
00046 //     }
00047 //   }
00048 //   //
00049 //   // Find start values: begin with mean and
00050 //   // go towards f(x)=0 until two points on
00051 //   // either side are found (assumes monotonously
00052 //   // increasing function)
00053 //   //
00054 //   double x1(mean(iwMax));
00055 //   double y1(cdf(x1)-q);
00056 //   double dx = y1>0. ? -standardDeviation(iwMax) : standardDeviation(iwMax);
00057 //   double x2(x1+dx);
00058 //   double y2(cdf(x2)-q);
00059 //   while ( y1*y2>0. ) {
00060 //     x1 = x2;
00061 //     y1 = y2;
00062 //     x2 += dx;
00063 //     y2 = cdf(x2) - q;
00064 //   }
00065 //   //
00066 //   // now use bisection to find final value
00067 //   //
00068 //   double x(0.);
00069 //   while ( true ) {
00070 //     // use linear interpolation
00071 //     x = -(x2*y1-x1*y2)/(y2-y1);
00072 //     double y = cdf(x) - q;
00073 //     if ( fabs(y)<1.e-6 )  break;
00074 //     if ( y*y1>0. ) {
00075 //       y1 = y;
00076 //       x1 = x;
00077 //     }
00078 //     else {
00079 //       y2 = y;
00080 //       x2 = x;
00081 //     }
00082 //   }
00083 //   return x;
00084 // }
00085 
00086 bool
00087 GaussianSumUtilities1D::modeIsValid () const
00088 {
00089   if ( theModeStatus==NotComputed )  computeMode();
00090   return theModeStatus==Valid;
00091 }
00092 
00093 const SingleGaussianState1D&
00094 GaussianSumUtilities1D::mode () const
00095 {
00096   if ( theModeStatus==NotComputed )  computeMode();
00097 //     std::cout << "Mode calculation failed!!" << std::endl;
00098   return theMode;
00099 }
00100 
00101 void
00102 GaussianSumUtilities1D::computeMode () const
00103 {
00104 //   TimerStack tstack;
00105 //   tstack.benchmark("GSU1D::benchmark",100000);
00106 //   FastTimerStackPush(tstack,"GaussianSumUtilities1D::computeMode");
00107   theModeStatus = NotValid;
00108   //
00109   // Use means of individual components as start values.
00110   // Sort by value of pdf.
00111   //
00112   typedef std::multimap<double, int, std::greater<double> > StartMap;
00113   StartMap xStart;
00114   for ( unsigned int i=0; i<size(); i++ ) {
00115     xStart.insert(std::make_pair(pdf(mean(i)),i));
00116   }
00117   //
00118   // Now try with each start value
00119   //
00120   int iRes(-1);     // index of start component for current estimate
00121   double xRes(mean((*xStart.begin()).second)); // current estimate of mode
00122   double yRes(-1.); // pdf at current estimate of mode
00123 //   std::pair<double,double> result(-1.,mean((*xStart.begin()).second));
00124   for ( StartMap::const_iterator i=xStart.begin(); i!=xStart.end(); i++ ) {
00125     //
00126     // Convergence radius for a single Gaussian = 1 sigma: don't try
00127     // start values within 1 sigma of the current solution
00128     //
00129     if ( theModeStatus==Valid &&
00130          fabs(mean((*i).second)-mean(iRes))/standardDeviation(iRes)<1. )  continue;
00131     //
00132     // If a solution exists: drop as soon as the pdf at
00133     // start value drops to < 75% of maximum (try to avoid
00134     // unnecessary searches for the mode)
00135     //
00136     if ( theModeStatus==Valid && 
00137          (*i).first/(*xStart.begin()).first<0.75 )  break;
00138     //
00139     // Try to find mode
00140     //
00141     double x;
00142     double y;
00143     bool valid = findMode(x,y,mean((*i).second),standardDeviation((*i).second));
00144     //
00145     // consider only successful searches
00146     //
00147     if ( valid ) { //...
00148       //
00149       // update result only for significant changes in pdf(solution)
00150       //
00151       if ( yRes<0. || (y-yRes)/(y+yRes)>1.e-10 ) {
00152       iRes = (*i).second;               // store index
00153       theModeStatus = Valid;            // update status
00154       xRes = x;                         // current solution
00155       yRes = y;                         // and its pdf value
00156 //       result = std::make_pair(y,x);     // store solution and pdf(solution)
00157       }
00158     } //...
00159   } 
00160   //
00161   // check (existance of) solution
00162   //
00163   if ( theModeStatus== Valid ) {
00164     //
00165     // Construct single Gaussian state with 
00166     //  mean = mode
00167     //  variance = local variance at mode
00168     //  weight such that the pdf's of the mixture and the
00169     //    single state are equal at the mode
00170     //
00171     double mode = xRes;
00172     double varMode = localVariance(mode);
00173     double wgtMode = pdf(mode)*sqrt(2*M_PI*varMode);
00174     theMode = SingleGaussianState1D(mode,varMode,wgtMode);
00175   }
00176   else {
00177     //
00178     // mode finding failed: set solution to highest component
00179     //  (alternative would be global mean / variance ..?)
00180     //
00181     //two many log warnings to actually be useful - comment out
00182     //edm::LogWarning("GaussianSumUtilities") << "1D mode calculation failed";
00183 //     double x = mean();
00184 //     double y = pdf(x);
00185 //     result = std::make_pair(y,x);
00186 //     theMode = SingleGaussianState1D(mean(),variance(),weight());
00187     //
00188     // look for component with highest value at mean
00189     //
00190     int icMax(0);
00191     double ySqMax(0.);
00192     int s = size();
00193     for (int ic=0; ic<s; ++ic ) {
00194       double w = weight(ic);
00195       double ySq = w*w/variance(ic);
00196       if ( ySq>ySqMax ) {
00197         icMax = ic;
00198         ySqMax = ySq;
00199       }
00200     }
00201     theMode = SingleGaussianState1D(components()[icMax]);
00202   }
00203   
00204 }
00205 
00206 bool
00207 GaussianSumUtilities1D::findMode (double& xMode, double& yMode,
00208                                   const double& xStart,
00209                                   const double& scale) const
00210 {
00211   //
00212   // try with Newton on (lnPdf)'(x)
00213   //
00214   double x1(0.);
00215   double y1(0.);
00216   FinderState  state(size());
00217   update(state,xStart);
00218 
00219   double xmin(xStart-1.*scale);
00220   double xmax(xStart+1.*scale);
00221   //
00222   // preset result
00223   //
00224   bool result(false);
00225   xMode = state.x;
00226   yMode = state.y;
00227   //
00228   // Iterate
00229   //
00230   int nLoop(0);
00231   while ( nLoop++<20 ) {
00232     if ( nLoop>1 && state.yd2<0. &&  
00233          ( fabs(state.yd*scale)<1.e-10 || fabs(state.y-y1)/(state.y+y1)<1.e-14 ) ) {
00234       result = true;
00235       break;
00236     }
00237     if ( fabs(state.yd2)<std::numeric_limits<float>::min() )  
00238       state.yd2 = state.yd2>0. ? std::numeric_limits<float>::min() : -std::numeric_limits<float>::min();
00239     double dx = -state.yd/state.yd2;
00240     x1 = state.x;
00241     y1 = state.y;
00242     double x2 = x1 + dx;
00243     if ( state.yd2>0. && (x2<xmin||x2>xmax) )  return false;
00244     update(state,x2);
00245   }
00246   //
00247   // result
00248   //
00249   if ( result ) {
00250     xMode = state.x;
00251     yMode = state.y;
00252   }
00253   return result;
00254 }
00255 
00256 
00257 void GaussianSumUtilities1D::update(FinderState & state, double x) const {
00258   state.x = x;
00259 
00260   pdfComponents(state.x, state.pdfs);
00261   state.y = pdf(state.x, state.pdfs);
00262   state.yd = 0;
00263   if (state.y>std::numeric_limits<double>::min()) state.yd= d1Pdf(state.x,state.pdfs)/state.y;
00264   state.yd2 = -state.yd*state.yd;
00265   if (state.y > std::numeric_limits<double>::min()) state.yd2 += d2Pdf(state.x,state.pdfs)/state.y;
00266 }
00267 
00268 
00269 double
00270 GaussianSumUtilities1D::pdf (double x) const
00271 {
00272   double result(0.);
00273   size_t s=size();
00274   for ( unsigned int i=0; i<s; i++ )
00275     result += pdf(i,x);
00276   return result;
00277 }
00278 
00279 double
00280 GaussianSumUtilities1D::cdf (const double& x) const
00281 {
00282   double result(0.);
00283   size_t s=size();
00284   for ( unsigned int i=0; i<s; i++ )
00285     result += weight(i)*gaussInt(x,mean(i),standardDeviation(i));
00286   return result;
00287 }
00288 
00289 double
00290 GaussianSumUtilities1D::d1Pdf (const double& x) const
00291 {
00292   return d1Pdf(x,pdfComponents(x));
00293 }
00294 
00295 double
00296 GaussianSumUtilities1D::d2Pdf (const double& x) const
00297 {
00298   return d2Pdf(x,pdfComponents(x));
00299 }
00300 
00301 double
00302 GaussianSumUtilities1D::d3Pdf (const double& x) const
00303 {
00304   return d3Pdf(x,pdfComponents(x));
00305 }
00306 
00307 double
00308 GaussianSumUtilities1D::lnPdf (const double& x) const
00309 {
00310   return lnPdf(x,pdfComponents(x));
00311 }
00312 
00313 double
00314 GaussianSumUtilities1D::d1LnPdf (const double& x) const
00315 {
00316   return d1LnPdf(x,pdfComponents(x));
00317 }
00318 
00319 double
00320 GaussianSumUtilities1D::d2LnPdf (const double& x) const
00321 {
00322   return d2LnPdf(x,pdfComponents(x));
00323 }
00324 
00325 std::vector<double>
00326 GaussianSumUtilities1D::pdfComponents (const double& x) const
00327 {
00328   std::vector<double> result;
00329   result.reserve(size());
00330   for ( unsigned int i=0; i<size(); i++ )
00331     result.push_back(weight(i)*gauss(x,mean(i),standardDeviation(i)));
00332   return result;
00333 }
00334 
00335 namespace {
00336   struct PDF {
00337   PDF(double ix) : x(ix){}
00338     double x;
00339     double operator()(SingleGaussianState1D const & sg) const {
00340        return sg.weight()*ROOT::Math::gaussian_pdf(x,sg.standardDeviation(),sg.mean());
00341     }
00342   };
00343 }
00344 void GaussianSumUtilities1D::pdfComponents (double x, std::vector<double> & result) const {
00345   size_t s = size();
00346   if (s!=result.size()) result.resize(s);  
00347   std::transform(components().begin(),components().end(),result.begin(),PDF(x));
00348 }
00349 /*
00350 void GaussianSumUtilities1D::pdfComponents (double x, std::vector<double> & result) const {
00351    size_t s = size();
00352   if (s!=result.size()) result.resize(s);
00353   double* __restrict__ v = &result.front();
00354   SingleGaussianState1D const * __restrict__ sgv = &components().front();
00355   for ( unsigned int i=0; i<s; i++ )
00356     v[i]= sgv[i].weight()*gauss(x,sgv[i].mean(),sgv[i].standardDeviation());
00357 //    result[i]=pdf(i,x);
00358 }
00359 */
00360 
00361 double
00362 GaussianSumUtilities1D::pdf (double, const std::vector<double>& pdfs)
00363 {
00364   return std::accumulate(pdfs.begin(),pdfs.end(),0.);
00365 }
00366 
00367 double
00368 GaussianSumUtilities1D::d1Pdf (double x, const std::vector<double>& pdfs) const
00369 {
00370   double result(0.);
00371   size_t s=size();
00372   for ( unsigned int i=0; i<s; i++ ) {
00373     double dx = (x-mean(i))/standardDeviation(i);
00374     result += -pdfs[i]*dx/standardDeviation(i);
00375   }
00376   return result;
00377 }
00378 
00379 double
00380 GaussianSumUtilities1D::d2Pdf (double x, const std::vector<double>& pdfs) const
00381 {
00382   double result(0.);
00383   size_t s=size();
00384   for ( unsigned int i=0; i<s; i++ ) {
00385     double dx = (x-mean(i))/standardDeviation(i);
00386     result += pdfs[i]/standardDeviation(i)/standardDeviation(i)*(dx*dx-1);
00387   }
00388   return result;
00389 }
00390 
00391 double
00392 GaussianSumUtilities1D::d3Pdf (double x, const std::vector<double>& pdfs) const
00393 {
00394   double result(0.);
00395   size_t s=size();
00396   for ( unsigned int i=0; i<s; i++ ) {
00397     double dx = (x-mean(i))/standardDeviation(i);
00398     result += pdfs[i]/standardDeviation(i)/standardDeviation(i)/standardDeviation(i)*
00399       (-dx*dx+3)*dx;
00400   }
00401   return result;
00402 }
00403 
00404 double
00405 GaussianSumUtilities1D::lnPdf (double x, const std::vector<double>& pdfs)
00406 {
00407   double f(pdf(x,pdfs));
00408   double result(-std::numeric_limits<float>::max());
00409   if ( f>std::numeric_limits<double>::min() )  result = log(f);
00410   return result;
00411 }
00412 
00413 double
00414 GaussianSumUtilities1D::d1LnPdf (double x, const std::vector<double>& pdfs) const
00415 {
00416 
00417   double f = pdf(x,pdfs);
00418   double result(d1Pdf(x,pdfs));
00419   if ( f>std::numeric_limits<double>::min() )  result /= f;
00420   else  result = 0.;
00421   return result;
00422 }
00423 
00424 double
00425 GaussianSumUtilities1D::d2LnPdf (double x, const std::vector<double>& pdfs) const
00426 {
00427 
00428   double f = pdf(x,pdfs);
00429   double df = d1LnPdf(x,pdfs);
00430   double result(-df*df);
00431   if ( f>std::numeric_limits<double>::min() )  result += d2Pdf(x,pdfs)/f;
00432   return result;
00433 }
00434 
00435 double 
00436 GaussianSumUtilities1D::gauss (double x, double mean, double sigma) 
00437 {
00438 //   const double fNorm(1./sqrt(2*M_PI));
00439 //   double result(0.);
00440 
00441 //   double d((x-mean)/sigma);
00442 //   if ( fabs(d)<20. )  result = exp(-d*d/2.);
00443 //   result *= fNorm/sigma;
00444 //   return result;
00445   return ROOT::Math::gaussian_pdf(x,sigma,mean);
00446 }
00447 
00448 double 
00449 GaussianSumUtilities1D::gaussInt (double x, double mean, double sigma) 
00450 {
00451   return ROOT::Math::normal_cdf(x,sigma,mean);
00452 }
00453 
00454 double
00455 GaussianSumUtilities1D::combinedMean () const
00456 {
00457   double s0(0.);
00458   double s1(0.);
00459   int s = size();
00460   SingleGaussianState1D const * __restrict__ sgv = &components().front();
00461   for (int i=0; i<s; i++ )
00462     s0 += sgv[i].weight();
00463   for (int i=0; i<s; i++ )
00464     s1 += sgv[i].weight()*sgv[i].mean();
00465   return s1/s0;
00466 }
00467 
00468 double
00469 GaussianSumUtilities1D::localVariance (double x) const
00470 {
00471   std::vector<double> pdfs;
00472   pdfComponents(x,pdfs);
00473   double result = -pdf(x,pdfs)/d2Pdf(x,pdfs);
00474   // FIXME: wrong curvature seems to be non-existant but should add a proper recovery
00475   if ( result<0. )
00476     edm::LogWarning("GaussianSumUtilities") << "1D variance at mode < 0";    
00477   return result;
00478 }