CMS 3D CMS Logo

GaussianSumUtilities1D.cc

Go to the documentation of this file.
00001 #include "TrackingTools/GsfTools/interface/GaussianSumUtilities1D.h"
00002 #include "FWCore/MessageLogger/interface/MessageLogger.h"
00003 
00004 // #include "Utilities/Timing/interface/TimingReport.h"
00005 // #include "Utilities/Timing/interface/TimerStack.h"
00006 
00007 #include "TROOT.h"
00008 #include "TMath.h"
00009 
00010 #include <iostream>
00011 #include <cmath>
00012 
00013 #include <map>
00014 #include <functional>
00015 
00016 // #define DBG_GS1D
00017 // #define DRAW_GS1D
00018 #ifdef DRAW_GS1D
00019 #include "TCanvas.h"
00020 #include "TGraph.h"
00021 #include "TPolyMarker.h"
00022 #endif
00023 
00024 double
00025 GaussianSumUtilities1D::quantile (const double q) const
00026 {
00027   //
00028   // mean and sigma of highest weight component
00029   //
00030   int iwMax(-1);
00031   float wMax(0.);
00032   for ( unsigned int i=0; i<size(); i++ ) {
00033     if ( weight(i)>wMax ) {
00034       iwMax = i;
00035       wMax = weight(i);
00036     }
00037   }
00038   //
00039   // Find start values: begin with mean and
00040   // go towards f(x)=0 until two points on
00041   // either side are found (assumes monotonously
00042   // increasing function)
00043   //
00044   double x1(mean(iwMax));
00045   double y1(cdf(x1)-q);
00046   double dx = y1>0. ? -standardDeviation(iwMax) : standardDeviation(iwMax);
00047   double x2(x1+dx);
00048   double y2(cdf(x2)-q);
00049   while ( y1*y2>0. ) {
00050     x1 = x2;
00051     y1 = y2;
00052     x2 += dx;
00053     y2 = cdf(x2) - q;
00054   }
00055   //
00056   // now use bisection to find final value
00057   //
00058   double x(0.);
00059   while ( true ) {
00060     // use linear interpolation
00061     x = -(x2*y1-x1*y2)/(y2-y1);
00062     double y = cdf(x) - q;
00063     if ( fabs(y)<1.e-6 )  break;
00064     if ( y*y1>0. ) {
00065       y1 = y;
00066       x1 = x;
00067     }
00068     else {
00069       y2 = y;
00070       x2 = x;
00071     }
00072   }
00073   return x;
00074 }
00075 
00076 bool
00077 GaussianSumUtilities1D::modeIsValid () const
00078 {
00079   if ( theModeStatus==NotComputed )  computeMode();
00080   return theModeStatus==Valid;
00081 }
00082 
00083 const SingleGaussianState1D&
00084 GaussianSumUtilities1D::mode () const
00085 {
00086   if ( theModeStatus==NotComputed )  computeMode();
00087 //     std::cout << "Mode calculation failed!!" << std::endl;
00088   return theMode;
00089 }
00090 
00091 void
00092 GaussianSumUtilities1D::computeMode () const
00093 {
00094 //   TimerStack tstack;
00095 //   tstack.benchmark("GSU1D::benchmark",100000);
00096 //   FastTimerStackPush(tstack,"GaussianSumUtilities1D::computeMode");
00097 #ifdef DRAW_GS1D
00098   {
00099   gPad->Clear();
00100   std::cout << "gPad = " << gPad << std::endl;
00101   std::multimap<double,double> drawMap;
00102   const int npDraw(1024);
00103   double xpDraw[npDraw];
00104   double ypDraw[npDraw];
00105   for ( unsigned int i=0; i<size(); i++ ) {
00106     double ave = mean(i);
00107     double sig = standardDeviation(i);
00108     for ( double xi=-3.; xi<3.; ) {
00109       double x = ave + xi*sig;
00110       drawMap.insert(std::make_pair(x,pdf(x)));
00111       xi += 0.2;
00112     }
00113   }
00114   int np(0);
00115   double xMin(FLT_MAX);
00116   double xMax(-FLT_MAX);
00117   double yMax(-FLT_MAX);
00118   for ( std::multimap<double,double>::const_iterator im=drawMap.begin();
00119         im!=drawMap.end(); ++im,++np ) {
00120     if ( np>=1024 )  break;
00121     xpDraw[np] = (*im).first;
00122     ypDraw[np] = (*im).second;
00123     if ( xMin>(*im).first )  xMin = (*im).first;
00124     if ( xMax<(*im).first )  xMax = (*im).first;
00125     if ( yMax<(*im).second )  yMax = (*im).second;
00126   }
00127 //   for ( int i=0; i<np; ++i )
00128 //     std::cout << i << " " << xpDraw[i] << " " << ypDraw[i] << std::endl;
00129   gPad->DrawFrame(xMin,0.,xMax,1.05*yMax);  
00130   TGraph* g = new TGraph(np,xpDraw,ypDraw);
00131   g->SetLineWidth(2);
00132   g->Draw("C");
00133   TGraph* gc = new TGraph();
00134   gc->SetLineStyle(2);
00135   int np2(0);
00136   double xpDraw2[1024];
00137   double ypDraw2[1024];
00138   for ( unsigned int i=0; i<size(); i++ ) {
00139     double ave = mean(i);
00140     double sig = standardDeviation(i);
00141     SingleGaussianState1D sgs(ave,variance(i),weight(i));
00142     std::vector<SingleGaussianState1D> sgsv(1,sgs);
00143     MultiGaussianState1D mgs(sgsv);
00144     GaussianSumUtilities1D gsu(mgs);
00145     np2 = 0;
00146     for ( double xi=-3.; xi<3.; ) {
00147       double x = ave + xi*sig;
00148       xpDraw2[np2] = x;
00149       ypDraw2[np2] = gsu.pdf(x);
00150       ++np2;
00151       xi += 0.2;
00152     }
00153 //    for ( int i=0; i<np2; ++i )
00154 //      std::cout << i << " " << xpDraw2[i] << " " << ypDraw2[i] << std::endl;
00155 //     std::cout << "np2 = " << np2 
00156 //       << " ave = " << ave 
00157 //       << " sig = " << sig 
00158 //       << " weight = " << weight(i)
00159 //       << " pdf = " << gsu.pdf(ave) << std::endl;
00160     if ( np2>0 )  gc->DrawGraph(np2,xpDraw2,ypDraw2);
00161 //     break;
00162   }
00163   gPad->Modified();
00164   gPad->Update();
00165   }
00166 #endif
00167   theModeStatus = NotValid;
00168   //
00169   // Use means of individual components as start values.
00170   // Sort by value of pdf.
00171   //
00172   typedef std::multimap<double, int, std::greater<double> > StartMap;
00173   StartMap xStart;
00174   for ( unsigned int i=0; i<size(); i++ ) {
00175     xStart.insert(std::make_pair(pdf(mean(i)),i));
00176   }
00177   //
00178   // Now try with each start value
00179   //
00180 #ifdef DRAW_GS1D
00181   int ind(0);
00182 #endif
00183   int iRes(-1);     // index of start component for current estimate
00184   double xRes(mean((*xStart.begin()).second)); // current estimate of mode
00185   double yRes(-1.); // pdf at current estimate of mode
00186 //   std::pair<double,double> result(-1.,mean((*xStart.begin()).second));
00187   for ( StartMap::const_iterator i=xStart.begin(); i!=xStart.end(); i++ ) {
00188 #ifdef DRAW_GS1D
00189     double xp[2];
00190     double yp[2];
00191     TPolyMarker* g = new TPolyMarker();
00192     g->SetMarkerStyle(20+ind/4);
00193     g->SetMarkerColor(1+ind%4);
00194     ++ind;
00195 #endif
00196     //
00197     // Convergence radius for a single Gaussian = 1 sigma: don't try
00198     // start values within 1 sigma of the current solution
00199     //
00200     if ( theModeStatus==Valid &&
00201          fabs(mean((*i).second)-mean(iRes))/standardDeviation(iRes)<1. )  continue;
00202     //
00203     // If a solution exists: drop as soon as the pdf at
00204     // start value drops to < 75% of maximum (try to avoid
00205     // unnecessary searches for the mode)
00206     //
00207     if ( theModeStatus==Valid && 
00208          (*i).first/(*xStart.begin()).first<0.75 )  break;
00209     //
00210     // Try to find mode
00211     //
00212     double x;
00213     double y;
00214     bool valid = findMode(x,y,mean((*i).second),standardDeviation((*i).second));
00215     //
00216     // consider only successful searches
00217     //
00218     if ( valid ) { //...
00219       //
00220       // update result only for significant changes in pdf(solution)
00221       //
00222       if ( yRes<0. || (y-yRes)/(y+yRes)>1.e-10 ) {
00223 #ifdef DBG_GS1D
00224       if ( iRes>=0 ) 
00225         std::cout << "dxStart = " << (xRes-mean((*i).second))/standardDeviation(iRes) << std::endl;
00226 #endif
00227       iRes = (*i).second;               // store index
00228       theModeStatus = Valid;            // update status
00229       xRes = x;                         // current solution
00230       yRes = y;                         // and its pdf value
00231 //       result = std::make_pair(y,x);     // store solution and pdf(solution)
00232       }
00233     } //...
00234 #ifdef DRAW_GS1D
00235     xp[0] = xp[1] = mean((*i).second);
00236     yp[0] = yp[1] = (*i).first;
00237     if ( valid ) {
00238       xp[1] = x;
00239       yp[1] = y;
00240     }
00241     g->DrawPolyMarker(2,xp,yp);
00242     ++ind; //...
00243 #endif
00244   } 
00245   //
00246   // check (existance of) solution
00247   //
00248   if ( theModeStatus== Valid ) {
00249 #ifdef DBG_GS1D
00250     std::cout << "Ratio of pdfs at  first / real maximum = " << iRes << " " << mean(iRes) << " "
00251          << pdf(mean(iRes))/(*xStart.begin()).first << std::endl;
00252 #endif
00253     //
00254     // Construct single Gaussian state with 
00255     //  mean = mode
00256     //  variance = local variance at mode
00257     //  weight such that the pdf's of the mixture and the
00258     //    single state are equal at the mode
00259     //
00260     double mode = xRes;
00261     double varMode = localVariance(mode);
00262     double wgtMode = pdf(mode)*sqrt(2*TMath::Pi()*varMode);
00263     theMode = SingleGaussianState1D(mode,varMode,wgtMode);
00264   }
00265   else {
00266     //
00267     // mode finding failed: set solution to highest component
00268     //  (alternative would be global mean / variance ..?)
00269     //
00270     edm::LogWarning("GaussianSumUtilities") << "1D mode calculation failed";
00271 //     double x = mean();
00272 //     double y = pdf(x);
00273 //     result = std::make_pair(y,x);
00274 //     theMode = SingleGaussianState1D(mean(),variance(),weight());
00275     //
00276     // look for component with highest value at mean
00277     //
00278     unsigned int icMax(0);
00279     double ySqMax(0.);
00280     for ( unsigned int ic=0; ic<size(); ++ic ) {
00281       double w = weight(ic);
00282       double ySq = w*w/variance(ic);
00283       if ( ic==0 || ySqMax ) {
00284         icMax = ic;
00285         ySqMax = ySq;
00286       }
00287     }
00288     theMode = SingleGaussianState1D(components()[icMax]);
00289   }
00290 #ifdef DRAW_GS1D
00291   gPad->Modified();
00292   gPad->Update();
00293 #endif
00294 #ifdef DRAW_GS1D
00295   {
00296     TGraph* gm = new TGraph();
00297     gm->SetLineColor(2);
00298     int np(0);
00299     double xp[1024];
00300     double yp[1024];
00301     double mode = theMode.mean();
00302     double var = theMode.variance();
00303     double sig = sqrt(var);
00304     SingleGaussianState1D sgs(mode,var,pdf(mode)*sqrt(2*TMath::Pi())*sig);
00305     std::vector<SingleGaussianState1D> sgsv(1,sgs);
00306     MultiGaussianState1D mgs(sgsv);
00307     GaussianSumUtilities1D gsu(mgs);
00308     for ( double xi=-3.; xi<3.; ) {
00309       double x = mode + xi*sig;
00310       xp[np] = x;
00311       yp[np] = gsu.pdf(x);
00312       ++np;
00313       xi += 0.2;
00314     }
00315     gm->DrawGraph(np,xp,yp);
00316     gPad->Modified();
00317     gPad->Update();
00318   }
00319 #endif
00320   
00321 }
00322 
00323 bool
00324 GaussianSumUtilities1D::findMode (double& xMode, double& yMode,
00325                                   const double& xStart,
00326                                   const double& scale) const
00327 {
00328   //
00329   // try with Newton on (lnPdf)'(x)
00330   //
00331   double x1(0.);
00332   double y1(0.);
00333   std::vector<double> pdfs(pdfComponents(xStart));
00334   double x2(xStart);
00335   double y2(pdf(xStart,pdfs));
00336   double yd(d1LnPdf(xStart,pdfs));
00337   double yd2(d2LnPdf(xStart,pdfs));
00338   double xmin(xStart-1.*scale);
00339   double xmax(xStart+1.*scale);
00340   //
00341   // preset result
00342   //
00343   bool result(false);
00344   xMode = x2;
00345   yMode = y2;
00346   //
00347   // Iterate
00348   //
00349   int nLoop(0);
00350   while ( nLoop++<20 ) {
00351 #ifdef DBG_GS1D
00352     std::cout << "Loop " << nLoop << " " << yd2 << " "
00353               << (yd*scale) << " " << (y2-y1)/(y2+y1) << std::endl;
00354 #endif
00355     if ( nLoop>1 && yd2<0. &&  
00356          ( fabs(yd*scale)<1.e-10 || fabs(y2-y1)/(y2+y1)<1.e-14 ) ) {
00357       result = true;
00358       break;
00359     }
00360     if ( fabs(yd2)<FLT_MIN )  yd2 = yd2>0. ? FLT_MIN : -FLT_MIN;
00361     double dx = -yd/yd2;
00362     x1 = x2;
00363     y1 = y2;
00364     x2 += dx;
00365 #ifdef DBG_GS1D
00366     std::cout << yd << " " << yd2 << " " << x2 << " " << xmin << " " << xmax << std::endl;
00367 #endif
00368     if ( yd2>0. && (x2<xmin||x2>xmax) )  return false;
00369 
00370     pdfs = pdfComponents(x2);
00371     y2 = pdf(x2,pdfs);
00372     yd = d1LnPdf(x2,pdfs);
00373     yd2 = d2LnPdf(x2,pdfs);
00374   }
00375   //
00376   // result
00377   //
00378   if ( result ) {
00379     xMode = x2;
00380     yMode = y2;
00381   }
00382 #ifdef DBG_GS1D
00383   std::cout << "Started from " << xStart << " " << pdf(xStart)
00384             << " ; ended at " << xMode << " " << yMode << " after " 
00385             << nLoop << " iterations " << result << std::endl;
00386 #endif
00387   return result;
00388 }
00389 
00390 double
00391 GaussianSumUtilities1D::pdf (const double& x) const
00392 {
00393   return pdf(x,pdfComponents(x));
00394 }
00395 
00396 double
00397 GaussianSumUtilities1D::cdf (const double& x) const
00398 {
00399   double result(0.);
00400   for ( unsigned int i=0; i<size(); i++ )
00401     result += weight(i)*gaussInt(x,mean(i),standardDeviation(i));
00402   return result;
00403 }
00404 
00405 double
00406 GaussianSumUtilities1D::d1Pdf (const double& x) const
00407 {
00408   return d1Pdf(x,pdfComponents(x));
00409 }
00410 
00411 double
00412 GaussianSumUtilities1D::d2Pdf (const double& x) const
00413 {
00414   return d2Pdf(x,pdfComponents(x));
00415 }
00416 
00417 double
00418 GaussianSumUtilities1D::d3Pdf (const double& x) const
00419 {
00420   return d3Pdf(x,pdfComponents(x));
00421 }
00422 
00423 double
00424 GaussianSumUtilities1D::lnPdf (const double& x) const
00425 {
00426   return lnPdf(x,pdfComponents(x));
00427 }
00428 
00429 double
00430 GaussianSumUtilities1D::d1LnPdf (const double& x) const
00431 {
00432   return d1LnPdf(x,pdfComponents(x));
00433 }
00434 
00435 double
00436 GaussianSumUtilities1D::d2LnPdf (const double& x) const
00437 {
00438   return d2LnPdf(x,pdfComponents(x));
00439 }
00440 
00441 std::vector<double>
00442 GaussianSumUtilities1D::pdfComponents (const double& x) const
00443 {
00444   std::vector<double> result;
00445   result.reserve(size());
00446   for ( unsigned int i=0; i<size(); i++ )
00447     result.push_back(weight(i)*gauss(x,mean(i),standardDeviation(i)));
00448   return result;
00449 }
00450 
00451 double
00452 GaussianSumUtilities1D::pdf (const double& x, const std::vector<double>& pdfs) const
00453 {
00454   double result(0.);
00455   for ( unsigned int i=0; i<size(); i++ )
00456     result += pdfs[i];
00457   return result;
00458 }
00459 
00460 double
00461 GaussianSumUtilities1D::d1Pdf (const double& x, const std::vector<double>& pdfs) const
00462 {
00463   double result(0.);
00464   for ( unsigned int i=0; i<size(); i++ ) {
00465     double dx = (x-mean(i))/standardDeviation(i);
00466     result += -pdfs[i]*dx/standardDeviation(i);
00467   }
00468   return result;
00469 }
00470 
00471 double
00472 GaussianSumUtilities1D::d2Pdf (const double& x, const std::vector<double>& pdfs) const
00473 {
00474   double result(0.);
00475   for ( unsigned int i=0; i<size(); i++ ) {
00476     double dx = (x-mean(i))/standardDeviation(i);
00477     result += pdfs[i]/standardDeviation(i)/standardDeviation(i)*(dx*dx-1);
00478   }
00479   return result;
00480 }
00481 
00482 double
00483 GaussianSumUtilities1D::d3Pdf (const double& x, const std::vector<double>& pdfs) const
00484 {
00485   double result(0.);
00486   for ( unsigned int i=0; i<size(); i++ ) {
00487     double dx = (x-mean(i))/standardDeviation(i);
00488     result += pdfs[i]/standardDeviation(i)/standardDeviation(i)/standardDeviation(i)*
00489       (-dx*dx+3)*dx;
00490   }
00491   return result;
00492 }
00493 
00494 double
00495 GaussianSumUtilities1D::lnPdf (const double& x, const std::vector<double>& pdfs) const
00496 {
00497   double f(pdf(x,pdfs));
00498   double result(-FLT_MAX);
00499   if ( result>DBL_MIN )  result = log(f);
00500   return result;
00501 }
00502 
00503 double
00504 GaussianSumUtilities1D::d1LnPdf (const double& x, const std::vector<double>& pdfs) const
00505 {
00506 
00507   double f = pdf(x,pdfs);
00508   double result(d1Pdf(x,pdfs));
00509   if ( f>DBL_MIN )  result /= f;
00510   else  result = 0.;
00511   return result;
00512 }
00513 
00514 double
00515 GaussianSumUtilities1D::d2LnPdf (const double& x, const std::vector<double>& pdfs) const
00516 {
00517 
00518   double f = pdf(x,pdfs);
00519   double df = d1LnPdf(x,pdfs);
00520   double result(-df*df);
00521   if ( f>DBL_MIN )  result += d2Pdf(x,pdfs)/f;
00522   return result;
00523 }
00524 
00525 double 
00526 GaussianSumUtilities1D::gauss (const double& x, const double& mean,
00527                                const double& sigma) const 
00528 {
00529   const double fNorm(1./sqrt(2*TMath::Pi()));
00530   double result(0.);
00531 
00532   double d((x-mean)/sigma);
00533   if ( fabs(d)<20. )  result = exp(-d*d/2.);
00534   result *= fNorm/sigma;
00535   return result;
00536 }
00537 
00538 double 
00539 GaussianSumUtilities1D::gaussInt (const double& x, const double& mean,
00540                        const double& sigma) const 
00541 {
00542   return TMath::Freq((x-mean)/sigma);
00543 }
00544 
00545 double
00546 GaussianSumUtilities1D::combinedMean () const
00547 {
00548   double s0(0.);
00549   double s1(0.);
00550   for ( unsigned int i=0; i<size(); i++ ) {
00551     s0 += weight(i);
00552     s1 += weight(i)*mean(i);
00553   }
00554   return s1/s0;
00555 }
00556 
00557 double
00558 GaussianSumUtilities1D::localVariance (const double& x) const
00559 {
00560   double result = -pdf(x)/d2Pdf(x);
00561   // FIXME: wrong curvature seems to be non-existant but should add a proper recovery
00562   if ( result<0. )
00563     edm::LogWarning("GaussianSumUtilities") << "1D variance at mode < 0";    
00564   return result;
00565 }

Generated on Tue Jun 9 17:48:19 2009 for CMSSW by  doxygen 1.5.4