CMS 3D CMS Logo

/data/refman/pasoursint/CMSSW_5_3_6/src/HiggsAnalysis/CombinedLimit/src/th1fmorph.cc

Go to the documentation of this file.
00001 #include "../interface/th1fmorph.h"
00002 #include "TROOT.h"
00003 #include "TAxis.h"
00004 #include "TArrayD.h"
00005 
00006 #include <iostream>
00007 #include <cmath>
00008 #include <set>
00009 
00010 using namespace std;
00011 
00012 template<typename TH1_t, typename Value_t>
00013 TH1_t *th1fmorph_(const char *chname, 
00014                 const char *chtitle,
00015                 TH1_t *hist1,TH1_t *hist2,
00016                 Double_t par1,Double_t par2,Double_t parinterp,
00017                 Double_t morphedhistnorm,
00018                 Int_t idebug)
00019 {
00020   //--------------------------------------------------------------------------
00021   // Author           : Alex Read 
00022   // Version 0.2 of ROOT implementation, 08.05.2011
00023   // *
00024   // *      Perform a linear interpolation between two histograms as a function
00025   // *      of the characteristic parameter of the distribution.
00026   // *
00027   // *      The algorithm is described in Read, A. L., "Linear Interpolation
00028   // *      of Histograms", NIM A 425 (1999) 357-360.
00029   // *      
00030   // *      This ROOT-based CINT implementation is based on the FORTRAN77
00031   // *      implementation used by the DELPHI experiment at LEP (d_pvmorph.f).
00032   // *      The use of double precision allows pdf's to be accurately 
00033   // *      interpolated down to something like 10**-15.
00034   // *
00035   // *      The input histograms don't have to be identical, the binning is also
00036   // *      interpolated.
00037   // *
00038   // *      Extrapolation is allowed (a warning is given) but the extrapolation 
00039   // *      is not as well-defined as the interpolation and the results should 
00040   // *      be used with great care.
00041   // *
00042   // *      Data in the underflow and overflow bins are completely ignored. 
00043   // *      They are neither interpolated nor do they contribute to the 
00044   // *      normalization of the histograms.
00045   // *
00046   // * Input arguments:
00047   // * ================
00048   // * chname, chtitle : The ROOT name and title of the interpolated histogram.
00049   // *                   Defaults for the name and title are "THF1-interpolated"
00050   // *                   and "Interpolated histogram", respectively.
00051   // *
00052   // * hist1, hist2    : The two input histograms.
00053   // *
00054   // * par1,par2       : The values of the linear parameter that characterises
00055   // *                   the histograms (e.g. a particle mass).
00056   // *
00057   // * parinterp       : The value of the linear parameter we wish to 
00058   // *                   interpolate to. 
00059   // * 
00060   // * morphedhistnorm : The normalization of the interpolated histogram 
00061   // *                   (default is 1.0).  
00062   // * 
00063   // * idebug          : Default is zero, no internal information displayed. 
00064   // *                   Values between 1 and increase the verbosity of 
00065   // *                   informational output which may prove helpful to
00066   // *                   understand errors and pathalogical results.
00067   // * 
00068   // * The routine returns a pointer (TH1_t *) to a new histogram which is
00069   // * the interpolated result.
00070   // *
00071   // *------------------------------------------------------------------------
00072   // Changes from 0.1 to 0.2:
00073   // o Treatment of empty and non-existing histograms now well-defined.
00074   // o The tricky regions of the first and last bins are improved (and
00075   //   well-tested).
00076   // *------------------------------------------------------------------------
00077 
00078   // Return right away if one of the input histograms doesn't exist.
00079   if(!hist1) {
00080     cout << "ERROR! th1morph says first input histogram doesn't exist." << endl;
00081     return(0);
00082   }
00083   if(!hist2) {
00084     cout << "ERROR! th1morph says second input histogram doesn't exist." << endl;
00085     return(0);
00086   }
00087   
00088   // Extract bin parameters of input histograms 1 and 2. 
00089   // Supports the cases of non-equidistant as well as equidistant binning
00090   // and also the case that binning of histograms 1 and 2 is different.
00091   TAxis* axis1 = hist1->GetXaxis();
00092   Int_t nb1 = axis1->GetNbins();
00093   TAxis* axis2 = hist2->GetXaxis();
00094   Int_t nb2 = axis2->GetNbins();
00095 
00096   std::set<Double_t> bedgesn_tmp;
00097   for(Int_t i = 1; i <= nb1; ++i){
00098     bedgesn_tmp.insert(axis1->GetBinLowEdge(i));
00099     bedgesn_tmp.insert(axis1->GetBinUpEdge(i));
00100   }
00101   for(Int_t i = 1; i <= nb2; ++i){
00102     bedgesn_tmp.insert(axis2->GetBinLowEdge(i));
00103     bedgesn_tmp.insert(axis2->GetBinUpEdge(i));
00104   }
00105   Int_t nbn = bedgesn_tmp.size() - 1;
00106   TArrayD bedgesn(nbn+1);
00107   Int_t idx = 0;
00108   for (std::set<Double_t>::const_iterator bedge = bedgesn_tmp.begin();
00109        bedge != bedgesn_tmp.end(); ++bedge){
00110     bedgesn[idx]=(*bedge);
00111     ++idx;
00112   }
00113   Double_t xminn = bedgesn[0];
00114   Double_t xmaxn = bedgesn[nbn];
00115 
00116   // ......The weights (wt1,wt2) are the complements of the "distances" between 
00117   //       the values of the parameters at the histograms and the desired 
00118   //       interpolation point. For example, wt1=0, wt2=1 means that the 
00119   //       interpolated histogram should be identical to input histogram 2.
00120   //       Check that they make sense. If par1=par2 then we can choose any
00121   //       valid set of wt1,wt2 so why not take the average?
00122 
00123   Double_t wt1,wt2;
00124   if (par2 != par1) {
00125     wt1 = 1. - (parinterp-par1)/(par2-par1);
00126     wt2 = 1. + (parinterp-par2)/(par2-par1);
00127   }
00128   else { 
00129     wt1 = 0.5;
00130     wt2 = 0.5;
00131   }
00132 
00133   //......Give a warning if this is an extrapolation.
00134 
00135   if (wt1 < 0 || wt1 > 1. || wt2 < 0. || wt2 > 1. || fabs(1-(wt1+wt2)) 
00136       > 1.0e-4) {
00137     cout << "Warning! th1fmorph: This is an extrapolation!! Weights are "
00138          << wt1 << " and " << wt2 << " (sum=" << wt1+wt2 << ")" << endl;
00139   }
00140   if (idebug >= 1) cout << "th1morph - Weights: " << wt1 << " " << wt2 << endl;
00141 
00142   if (idebug >= 1) cout << "New hist: " << nbn << " " << xminn << " " 
00143                         << xmaxn << endl;
00144 
00145   // Treatment for empty histograms: Return an empty histogram
00146   // with interpolated bins.
00147 
00148   if (hist1->GetSum() <= 0 || hist2->GetSum() <=0 ) {
00149     cout << "Warning! th1morph detects an empty input histogram. Empty interpolated histogram returned: " 
00150          <<endl << "         " << chname << " - " << chtitle << endl;
00151     TH1_t *morphedhist = (TH1_t *)gROOT->FindObject(chname);
00152     if (morphedhist) delete morphedhist;
00153     morphedhist = new TH1_t(chname,chtitle,nbn,xminn,xmaxn);
00154     return(morphedhist);
00155   }
00156   if (idebug >= 1) cout << "Input histogram content sums: " 
00157                         << hist1->GetSum() << " " << hist2->GetSum() << endl;
00158   // *         
00159   // *......Extract the single precision histograms into double precision arrays
00160   // *      for the interpolation computation. The offset is because sigdis(i)
00161   // *      describes edge i (there are nbins+1 of them) while dist1/2
00162   // *      describe bin i. Be careful, ROOT does not use C++ convention to
00163   // *      number bins: dist1[ibin] is content of bin ibin where ibin runs from
00164   // *      1 to nbins. We allocate some extra space for the derived distributions
00165   // *      because there may be as many as nb1+nb2+2 edges in the intermediate 
00166   // *      interpolated cdf described by xdisn[i] (position of edge i) and 
00167   // *      sigdisn[i] (cummulative probability up this edge) before we project 
00168   // *      into the final binning.
00169 
00170   Value_t *dist1=hist1->GetArray(); 
00171   Value_t *dist2=hist2->GetArray();
00172   Double_t *sigdis1 = new Double_t[1+nb1];
00173   Double_t *sigdis2 = new Double_t[1+nb2];
00174   Double_t *sigdisn = new Double_t[2+nb1+nb2];
00175   Double_t *xdisn = new Double_t[2+nb1+nb2];
00176   Double_t *sigdisf = new Double_t[nbn+1];
00177 
00178   for(Int_t i=0;i<2+nb1+nb2;i++) xdisn[i] = 0; // Start with empty edges
00179   sigdis1[0] = 0; sigdis2[0] = 0; // Start with cdf=0 at left edge
00180 
00181   for(Int_t i=1;i<nb1+1;i++) {   // Remember, bin i has edges at i-1 and 
00182     sigdis1[i] = dist1[i];       // i and i runs from 1 to nb.
00183   }
00184   for(Int_t i=1;i<nb2+1;i++) {
00185     sigdis2[i] = dist2[i];
00186   }
00187 
00188   if (idebug >= 3) {
00189     for(Int_t i=0;i<nb1+1;i++) {
00190       cout << i << " dist1" << dist1[i] << endl;
00191     }
00192     for(Int_t i=0;i<nb2+1;i++) {
00193       cout << i << " dist2" << dist2[i] << endl;
00194     }
00195   }
00196   
00197   //......Normalize the distributions to 1 to obtain pdf's and integrate 
00198   //      (sum) to obtain cdf's.
00199 
00200   Double_t total = 0;
00201   for(Int_t i=0;i<nb1+1;i++) {
00202     total += sigdis1[i];
00203   }
00204   if (idebug >=1) cout << "Total histogram 1: " <<  total << endl;
00205   for(Int_t i=1;i<nb1+1;i++) {
00206     sigdis1[i] = sigdis1[i]/total + sigdis1[i-1];
00207   }
00208   
00209   total = 0.;
00210   for(Int_t i=0;i<nb2+1;i++) {
00211     total += sigdis2[i];
00212   }
00213   if (idebug >=1) cout << "Total histogram 22: " <<  total << endl;
00214   for(Int_t i=1;i<nb2+1;i++) {
00215     sigdis2[i] = sigdis2[i]/total + sigdis2[i-1];
00216   }
00217 
00218   // *
00219   // *......We are going to step through all the edges of both input
00220   // *      cdf's ordered by increasing value of y. We start at the
00221   // *      lower edge, but first we should identify the upper ends of the
00222   // *      curves. These (ixl1, ixl2) are the first point in each cdf from 
00223   // *      above that has the same integral as the last edge.
00224   // *
00225 
00226   Int_t ix1l = nb1;
00227   Int_t ix2l = nb2;
00228   while(sigdis1[ix1l-1] >= sigdis1[ix1l]) {
00229     ix1l = ix1l - 1;
00230   }
00231   while(sigdis2[ix2l-1] >= sigdis2[ix2l]) {
00232     ix2l = ix2l - 1;
00233   }
00234 
00235   // *
00236   // *......Step up to the beginnings of the curves. These (ix1, ix2) are the
00237   // *      first non-zero points from below.
00238 
00239   Int_t ix1 = -1;
00240   do {
00241     ix1 = ix1 + 1;
00242   } while(sigdis1[ix1+1] <= sigdis1[0]);
00243 
00244   Int_t ix2 = -1;
00245   do {
00246     ix2 = ix2 + 1;
00247   } while(sigdis2[ix2+1] <= sigdis2[0]);
00248 
00249   if (idebug >= 1) {
00250     cout << "First and last edge of hist1: " << ix1 << " " << ix1l << endl;
00251     cout << "   " << sigdis1[ix1] << " " << sigdis1[ix1+1] << endl;
00252     cout << "First and last edge of hist2: " << ix2 << " " << ix2l << endl;
00253     cout << "   " << sigdis2[ix2] << " " << sigdis2[ix2+1] << endl;
00254   }
00255 
00256   //......The first interpolated point should be computed now.
00257 
00258   Int_t nx3 = 0;
00259   Double_t x1,x2,x;
00260   x1 = axis1->GetBinLowEdge(ix1+1); 
00261   x2 = axis2->GetBinLowEdge(ix2+1); 
00262   x = wt1*x1 + wt2*x2;
00263   xdisn[nx3] = x;
00264   sigdisn[nx3] = 0;
00265   if(idebug >= 1) {
00266     cout << "First interpolated point: " << xdisn[nx3] << " " 
00267          << sigdisn[nx3] << endl;
00268     cout << "                          " << x1 << " <= " << x << " <= " 
00269          << x2 << endl;
00270   }
00271 
00272   //......Loop over the remaining point in both curves. Getting the last
00273   //      points may be a bit tricky due to limited floating point 
00274   //      precision.
00275 
00276   if (idebug >= 1) {
00277     cout << "----BEFORE while with ix1=" << ix1 << ", ix1l=" << ix1l 
00278          << ", ix2=" << ix2 << ", ix2l=" << ix2l << endl;
00279   }
00280 
00281   Double_t yprev = -1; // The probability y of the previous point, it will 
00282                        //get updated and used in the loop.
00283   Double_t y = 0;
00284   while((ix1 < ix1l) | (ix2 < ix2l)) {
00285     if (idebug >= 1 ) cout << "----Top of while with ix1=" << ix1 
00286                            << ", ix1l=" << ix1l << ", ix2=" << ix2 
00287                            << ", ix2l=" << ix2l << endl;
00288 
00289     //......Increment to the next lowest point. Step up to the next
00290     //      kink in case there are several empty (flat in the integral)
00291     //      bins.
00292 
00293     Int_t i12type = -1; // Tells which input distribution we need to 
00294                         // see next point of.
00295     if ((sigdis1[ix1+1] <= sigdis2[ix2+1] || ix2 == ix2l) && ix1 < ix1l) {
00296       ix1 = ix1 + 1;
00297       while(sigdis1[ix1+1] <= sigdis1[ix1] && ix1 < ix1l) {
00298         ix1 = ix1 + 1;
00299       }
00300       i12type = 1;
00301     } else if (ix2 < ix2l) {
00302       ix2 = ix2 + 1;
00303       while(sigdis2[ix2+1] <= sigdis2[ix2] && ix2 < ix2l) {
00304         ix2 = ix2 + 1;
00305       }
00306       i12type = 2;
00307     }
00308     if (i12type == 1) {
00309       if (idebug >= 3) {
00310         cout << "Pair for i12type=1: " << sigdis2[ix2] << " " 
00311              << sigdis1[ix1] << " " << sigdis2[ix2+1] << endl;
00312       }
00313       x1 = axis1->GetBinLowEdge(ix1+1);
00314       y = sigdis1[ix1];
00315       Double_t x20 = axis2->GetBinLowEdge(ix2+1);
00316       Double_t x21 = axis2->GetBinUpEdge(ix2+1);
00317       Double_t y20 = sigdis2[ix2];
00318       Double_t y21 = sigdis2[ix2+1];
00319 
00320       //......Calculate where the cummulative probability y in distribution 1
00321       //      intersects between the 2 points from distribution 2 which 
00322       //      bracket it.
00323 
00324       if (y21 > y20) {
00325         x2 = x20 + (x21-x20)*(y-y20)/(y21-y20);
00326       } 
00327       else {
00328         x2 = x20;
00329       }
00330     } else {
00331       if (idebug >= 3) {
00332         cout << "Pair for i12type=2: " << sigdis1[ix1] << " " << sigdis2[ix2] 
00333              << " " << sigdis1[ix1+1] << endl;
00334       }
00335       x2 = axis2->GetBinLowEdge(ix2+1);
00336       y = sigdis2[ix2];
00337       Double_t x10 = axis1->GetBinLowEdge(ix1+1);
00338       Double_t x11 = axis1->GetBinUpEdge(ix1+1);
00339       Double_t y10 = sigdis1[ix1];
00340       Double_t y11 = sigdis1[ix1+1];
00341 
00342       //......Calculate where the cummulative probability y in distribution 2
00343       //      intersects between the 2 points from distribution 1 which 
00344       //      brackets it.
00345 
00346       if (y11 > y10) {
00347         x1 = x10 + (x11-x10)*(y-y10)/(y11-y10);
00348       } else {
00349         x1 = x10;
00350       }
00351     }
00352 
00353     //......Interpolate between the x's in the 2 distributions at the 
00354     //      cummulative probability y. Store the (x,y) for provisional 
00355     //      edge nx3 in (xdisn[nx3],sigdisn[nx3]). nx3 grows for each point
00356     //      we add the the arrays. Note: Should probably turn the pair into 
00357     //      a structure to make the code more object-oriented and readable.
00358 
00359     x = wt1*x1 + wt2*x2;
00360     if (y > yprev) {
00361       nx3 = nx3+1;
00362       if (idebug >= 1) {
00363         cout << " ---> y > yprev: i12type=" << i12type << ", nx3=" 
00364              << nx3 << ", x= " << x << ", y=" << y << ", yprev=" << yprev 
00365              << endl;
00366       }
00367       yprev = y;
00368       xdisn[nx3] = x;
00369       sigdisn[nx3] = y;
00370       if(idebug >= 1) {
00371         cout << "    ix1=" << ix1 << ", ix2= " << ix2 << ", i12type= " 
00372              << i12type << ", sigdis1[ix1]=" << sigdis1[ix1] << endl;
00373         cout << "        " << ", nx3=" << nx3 << ", x=" << x << ", y= " 
00374              << sigdisn[nx3] << endl;
00375       }
00376     }
00377   }
00378   if (idebug >=3) for (Int_t i=0;i<nx3;i++) {
00379     cout << " nx " << i << " " << xdisn[i] << " " << sigdisn[i] << endl;
00380   }
00381 
00382   // *......Now we loop over the edges of the bins of the interpolated
00383   // *      histogram and find out where the interpolated cdf 3
00384   // *      crosses them. This projection defines the result and will
00385   // *      be stored (after differention and renormalization) in the
00386   // *      output histogram.
00387   // *
00388   // *......We set all the bins following the final edge to the value
00389   // *      of the final edge.
00390 
00391   x = xmaxn;
00392   Int_t ix = nbn;
00393 
00394   if (idebug >= 1) cout << "------> Any final bins to set? " << x << " " 
00395                         << xdisn[nx3] << endl;
00396   while(x >= xdisn[nx3]) {
00397     sigdisf[ix] = sigdisn[nx3];
00398     if (idebug >= 2) cout << "   Setting final bins" << ix << " " << x 
00399                           << " " << sigdisf[ix] << endl;
00400     ix = ix-1;
00401     x = bedgesn[ix];
00402   }
00403   Int_t ixl = ix + 1;
00404   if (idebug >= 1) cout << " Now ixl=" << ixl << " ix=" << ix << endl;
00405 
00406   // *
00407   // *......The beginning may be empty, so we have to step up to the first
00408   // *      edge where the result is nonzero. We zero the bins which have
00409   // *      and upper (!) edge which is below the first point of the
00410   // *      cummulative distribution we are going to project to this
00411   // *      output histogram binning.
00412   // *
00413 
00414   ix = 0;
00415   x = bedgesn[ix+1];
00416   if (idebug >= 1) cout << "Start setting initial bins at x=" << x << endl;
00417   while(x <= xdisn[0]) {
00418     sigdisf[ix] = sigdisn[0];
00419     if (idebug >= 1) cout << "   Setting initial bins " << ix << " " << x 
00420                           << " " << xdisn[1] << " " << sigdisf[ix] << endl;
00421     ix = ix+1;
00422     x = bedgesn[ix+1];
00423   }
00424   Int_t ixf = ix;
00425 
00426   if (idebug >= 1)
00427     cout << "Bins left to loop over:" << ixf << "-" << ixl << endl;
00428 
00429   // *......Also the end (from y to 1.0) often comes before the last edge
00430   // *      so we have to set the following to 1.0 as well.
00431 
00432   Int_t ix3 = 0; // Problems with initial edge!!!
00433   for(ix=ixf;ix<ixl;ix++) {
00434     x = bedgesn[ix];
00435     if (x < xdisn[0]) {
00436       y = 0;
00437     } else if (x > xdisn[nx3]) {
00438       y = 1.;
00439     } else {
00440       while(xdisn[ix3+1] <= x && ix3 < 2*nbn) {
00441         ix3 = ix3 + 1;
00442       }
00443       Double_t dx2=axis2->GetBinWidth(axis2->FindBin(x));
00444       if (xdisn[ix3+1]-x > 1.1*dx2) { // Empty bin treatment
00445         y = sigdisn[ix3+1];
00446       }
00447       else if (xdisn[ix3+1] > xdisn[ix3]) { // Normal bins
00448         y = sigdisn[ix3] + (sigdisn[ix3+1]-sigdisn[ix3])
00449           *(x-xdisn[ix3])/(xdisn[ix3+1]-xdisn[ix3]);
00450       } else {  // Is this ever used?
00451         y = 0;
00452         cout << "Warning - th1fmorph: This probably shoudn't happen! " 
00453              << endl;
00454         cout << "Warning - th1fmorph: Zero slope solving x(y)" << endl;
00455       }
00456     }
00457     sigdisf[ix] = y;
00458     if (idebug >= 3) {
00459       cout << ix << ", ix3=" << ix3 << ", xdisn=" << xdisn[ix3] << ", x=" 
00460            << x << ", next xdisn=" << xdisn[ix3+1] << endl;
00461       cout << "   cdf n=" << sigdisn[ix3] << ", y=" << y << ", next point=" 
00462            << sigdisn[ix3+1] << endl;
00463     }
00464   }
00465 
00466   //......Differentiate interpolated cdf and return renormalized result in 
00467   //      new histogram. 
00468 
00469   TH1_t *morphedhist = (TH1_t *)gROOT->FindObject(chname);
00470   if (morphedhist) delete morphedhist;
00471   morphedhist = new TH1_t(chname,chtitle,nbn,bedgesn.GetArray());
00472 
00473   for(ix=nbn-1;ix>-1;ix--) {
00474     y = sigdisf[ix+1]-sigdisf[ix];
00475     morphedhist->SetBinContent(ix+1,y*morphedhistnorm);
00476   }
00477   
00478   //......Clean up the temporary arrays we allocated.
00479 
00480   delete sigdis1; delete sigdis2; 
00481   delete sigdisn; delete xdisn; delete sigdisf;
00482 
00483   //......All done, return the result.
00484 
00485   return(morphedhist);
00486 }
00487 
00488 
00489 TH1F *th1fmorph(const char *chname, 
00490                 const char *chtitle,
00491                 TH1F *hist1,TH1F *hist2,
00492                 Double_t par1,Double_t par2,Double_t parinterp,
00493                 Double_t morphedhistnorm,
00494                 Int_t idebug)
00495 { return th1fmorph_<TH1F, Float_t>(chname, chtitle, hist1, hist2, par1, par2, parinterp, morphedhistnorm, idebug); }
00496 
00497 TH1D *th1fmorph(const char *chname, 
00498                 const char *chtitle,
00499                 TH1D *hist1,TH1D *hist2,
00500                 Double_t par1,Double_t par2,Double_t parinterp,
00501                 Double_t morphedhistnorm,
00502                 Int_t idebug)
00503 { return th1fmorph_<TH1D, Double_t>(chname, chtitle, hist1, hist2, par1, par2, parinterp, morphedhistnorm, idebug); }