CMS 3D CMS Logo

/data/refman/pasoursint/CMSSW_5_2_7_hltpatch1/src/PhysicsTools/Utilities/src/SideBandSubtraction.cc

Go to the documentation of this file.
00001 
00002 // Author: David Bjergaard
00003 // 
00004 // Library for Generic Sideband Subtraction Methods.
00005 //
00006 // This library is designed to be a tool which serves two purposes.
00007 // Its primary purpose is to provide a generic framework for doing
00008 // sideband subtraction.  It will also plug into current tag and probe
00009 // modules to prevent code duplication and redundancy.
00010 //
00012 
00013 #include "PhysicsTools/Utilities/interface/SideBandSubtraction.h"
00014 // System includes
00015 #include <iostream>
00016 #include <cstdlib>
00017 #include <sstream>
00018 
00019 // ROOT includes
00020 #include <TCanvas.h>
00021 #include <TFile.h>
00022 #include <TF1.h>
00023 #include <TH1F.h>
00024 #include <TString.h>
00025 #include <TKey.h>
00026 #include <TClass.h>
00027 
00028 // RooFit includes
00029 #include <RooFitResult.h>
00030 #include <RooRealVar.h>
00031 #include <RooAbsPdf.h>
00032 #include <RooDataSet.h>
00033 #include <RooPlot.h>
00034 
00035 using namespace RooFit;
00036 using std::cout;
00037 using std::cerr;
00038 using std::endl;
00039 using std::string;
00040 using std::vector;
00041 
00042 template <class T>
00043 inline std::string stringify(const T& t)
00044 {
00045   std::ostringstream o;
00046   if (!(o << t))
00047     return "err";
00048   return o.str();
00049 } 
00050 Double_t SideBandSubtract::getYield(std::vector<SbsRegion> Regions, RooAbsPdf *PDF)
00051 {
00052   if(PDF==NULL || SeparationVariable==NULL)
00053     return 0.0;
00054 
00055   Double_t yield=0;
00056   RooAbsReal* intPDF;
00057   for(unsigned int i=0; i < Regions.size(); i++)
00058     {
00059       if(verbose)
00060         cout<<"Integrating over Range: "<<Regions[i].RegionName<<" from "<<Regions[i].min<<" to "<<Regions[i].max<<endl;
00061       intPDF = PDF->createIntegral(*SeparationVariable, Range(Regions[i].RegionName.c_str()));
00062       yield += intPDF->getVal();
00063       if(verbose)
00064         cout<<"Current yield: "<<yield<<endl;
00065     }
00066   return yield;
00067 }
00068 static void setHistOptions(TH1F* histo, string name, string title, string axis_label)
00069 {
00070 
00071   histo->SetName(name.c_str());
00072   histo->SetTitle(title.c_str());
00073   if(axis_label == "GeV/c^2")
00074     axis_label = "Mass (" + axis_label + ")";
00075   if(axis_label == "GeV/c")
00076     axis_label = "Momentum (" + axis_label + ")";
00077   histo->GetXaxis()->SetTitle(axis_label.c_str());
00078 }
00079 int SideBandSubtract::doSubtraction(RooRealVar* variable, Double_t stsratio,Int_t index) //stsratio -> signal to sideband ratio
00080 {
00081   if(Data==NULL || SeparationVariable==NULL)
00082     {
00083       cerr << "ERROR: Data or SeparationVariable is NULL returning now!\n";
00084       return -1;
00085     }
00086   TH1F* SideBandHist = (TH1F*)BaseHistos[index]->Clone();
00087   setHistOptions(SideBandHist,(string)variable->GetName()+"Sideband",(string)SideBandHist->GetTitle() + " Sideband",(string)variable->getUnit());
00088 
00089   TH1F* SignalHist = (TH1F*)BaseHistos[index]->Clone();
00090   setHistOptions(SignalHist,(string)variable->GetName()+"SignalHist",(string)SignalHist->GetTitle() + " Raw Signal",(string)variable->getUnit());
00091 
00092   //Begin a loop over the data to fill our histograms. I should figure
00093   //out how to do this in one shot to avoid a loop
00094   //O(N_vars*N_events)...
00095 
00096   TIterator* iter = (TIterator*) Data->get()->createIterator();
00097   RooAbsArg *var=NULL;
00098   RooRealVar *sep_var=NULL;
00099   while((var = (RooAbsArg*)iter->Next()))
00100     {
00101       if((string)var->GetName()==(string)SeparationVariable->GetName())
00102         {
00103           sep_var = (RooRealVar*)var;
00104           break;
00105         }
00106     }
00107   for(int i=0; i < Data->numEntries(); i++)
00108     {
00109       Data->get(i);
00110       Double_t value = variable->getVal();
00111       Double_t cutval = sep_var->getVal();
00112 
00113       for(unsigned int j=0; j < SideBandRegions.size(); j++) //UGLY!  Find better solution!
00114         {
00115           if(cutval > SideBandRegions[j].min && cutval < SideBandRegions[j].max)
00116             SideBandHist->Fill(value);
00117         }
00118       for(unsigned int j=0; j < SignalRegions.size(); j++)
00119         {
00120           if(cutval > SignalRegions[j].min && cutval < SignalRegions[j].max)
00121             SignalHist->Fill(value);
00122         }
00123     }
00124   //Save pre-subtracted histo
00125   SignalHist->Sumw2(); SideBandHist->Sumw2(); 
00126   //SignalHist->SetDirectory(0); SideBandHist->SetDirectory(0);
00127   RawHistos.push_back(*SignalHist);
00128 
00129   SignalHist->Add(SideBandHist, -stsratio);
00130 
00131   SignalHist->SetTitle(((string)BaseHistos[index]->GetTitle() + " SBS Signal").c_str());
00132   //Save subtracted histo
00133   SBSHistos.push_back(*SignalHist);
00134   //Save Sideband histo
00135   SideBandHistos.push_back(*SideBandHist);
00136 
00137   if(SideBandHist) delete SideBandHist;
00138   return 0;
00139 }
00140 static void print_histo(TH1F* plot, string outname)
00141 {
00142   TCanvas genericCanvas;
00143   plot->Draw("E1P0");
00144   outname = outname + ".eps";
00145   genericCanvas.SaveAs(outname.c_str());
00146   outname.replace(outname.size()-3,3,"gif");
00147   genericCanvas.SaveAs(outname.c_str());
00148 }
00149 void SideBandSubtract::printResults(string prefix)
00150 {//handles *all* printing
00151   //spool over vectors of histograms and print them, then print
00152   //separation variable plots and the results text file.
00153   if(SeparationVariable==NULL)
00154     {
00155       cerr << "ERROR: printResults, SeparationVariable is NULL!\n";
00156       return;
00157     }
00158   string filename; //output file name
00159   for(unsigned int i=0; i < RawHistos.size(); ++i)
00160     {
00161       filename=prefix + "Raw_" + (string)RawHistos[i].GetName();
00162       print_histo(&RawHistos[i], filename);
00163     }
00164   for(unsigned int i=0; i < SBSHistos.size(); ++i)
00165     {
00166       filename=prefix + "SBS_" + (string)RawHistos[i].GetName();
00167       print_histo(&SBSHistos[i], filename);
00168     }
00169   if(verbose)
00170     {
00171       for(unsigned int i=0; i < SideBandHistos.size(); ++i)
00172         {
00173           filename=prefix + "SideBand_" + (string)RawHistos[i].GetName();
00174           print_histo(&SideBandHistos[i], filename);
00175         }
00176     }
00177 
00178   string outname = prefix + (string)SeparationVariable->GetName() + "_fitted.eps";
00179   if(Data!=NULL && ModelPDF!=NULL)
00180     {
00181       RooPlot *SepVarFrame = SeparationVariable->frame();
00182       Data->plotOn(SepVarFrame);
00183       ModelPDF->plotOn(SepVarFrame);
00184       TCanvas Canvas;
00185       SepVarFrame->Draw();
00186       Canvas.SaveAs(outname.c_str());
00187       outname.replace(outname.size()-3,3,"gif");
00188       Canvas.SaveAs(outname.c_str());
00189     }
00190   else
00191     cerr <<"ERROR: printResults, Data or ModelPDF is NULL!\n";
00192 
00193   string result_outname = prefix + "_fit_results.txt";
00194   ofstream output(result_outname.c_str(),ios::out);
00195   if(!output)
00196     {
00197       cout <<"ERROR: Could not open file for writing!\n";
00198       return;
00199     }
00200 
00201   if(fit_result!=NULL)
00202     {
00203 #if ROOT_VERSION_CODE <= ROOT_VERSION(5,19,0)
00204       fit_result->printToStream(output,Verbose);
00205 #else
00206       fit_result->printMultiline(output,kTRUE);
00207 #endif
00208     }
00209 }
00210 
00211 
00212 void SideBandSubtract::saveResults(string outname)
00213 {
00214   //saves the ephemeral stuff to a root file for future
00215   //use/modification (ie everything printed by printResults())
00216 
00217   TFile output(outname.c_str(),"UPDATE"); //open the output file,
00218                                           //create it if it doesn't
00219                                           //exist
00220   //Since keys are only available from files on disk, we need to write
00221   //out a new file.  If the file already existed, then we opened to
00222   //update, and are writing nothing new.  
00223   output.Write();
00224   TString dirname;
00225   TIter nextkey(output.GetListOfKeys());
00226   TKey *key;
00227   TDirectory* curDir=NULL;
00228   while((key=(TKey*)nextkey.Next()))
00229     {
00230 
00231       if(key==NULL)
00232         break;
00233       TObject *obj = key->ReadObj();
00234       if(obj->IsA()->InheritsFrom("TDirectory"))
00235           dirname=obj->GetName();
00236 
00237     }
00238 
00239   if(dirname=="")
00240     {
00241       //we didn't find any directories so, we'll make a new one
00242       curDir = output.mkdir("run0","Run 0");
00243       output.cd("run0");
00244     }
00245   else
00246     {
00247       //manipulate last dir string, make a new one, and get ready to fill
00248       dirname.Remove(0,3);
00249       Int_t run_num = dirname.Atoi();
00250       run_num++;
00251       dirname = "run" + stringify(run_num);
00252       curDir = output.mkdir(dirname.Data(),("Run "+stringify(run_num)).c_str());
00253       output.cd(dirname.Data());
00254     }
00255   if(curDir==NULL)
00256     curDir = output.GetDirectory("",kTRUE,"");
00257 
00258   //these should all be the same size, but to be pedantic we'll loop
00259   //over each one individually, also, we need to associate them with
00260   //the directory because by default they "float" in memory to avoid
00261   //conflicts with other root files the user has open. If they want to
00262   //write to those files, they need to close their file, pass the name
00263   //here, and then let us work.
00264   for(unsigned int i=0; i < SideBandHistos.size(); ++i)
00265     {
00266       SideBandHistos[i].SetDirectory(curDir);
00267       SideBandHistos[i].Write();
00268     }
00269   for(unsigned int i=0; i < RawHistos.size(); ++i)
00270     {
00271       RawHistos[i].SetDirectory(curDir);
00272       RawHistos[i].Write();
00273     }
00274   for(unsigned int i=0; i < SBSHistos.size(); ++i)
00275     {
00276       SBSHistos[i].SetDirectory(curDir);
00277       SBSHistos[i].Write();
00278     }
00279   if(Data!=NULL && ModelPDF!=NULL && BackgroundPDF!=NULL && SeparationVariable!=NULL)
00280     {
00281       RooPlot *sep_varFrame = SeparationVariable->frame();
00282       Data->plotOn(sep_varFrame);
00283       ModelPDF->plotOn(sep_varFrame);
00284       BackgroundPDF->plotOn(sep_varFrame);
00285       sep_varFrame->Write();
00286     }
00287   else
00288     cerr <<"ERROR: saveResults, did not save RooPlot of data and fit\n";
00289   output.Write();
00290 
00291 }
00292 void SideBandSubtract::setDataSet(RooDataSet* newData)
00293 {
00294   if(newData!=NULL)
00295     Data=newData;
00296 }
00297 void SideBandSubtract::print_plot(RooRealVar* printVar,string outname)
00298 {
00299   if(Data==NULL || ModelPDF==NULL)
00300     {
00301       cerr << "ERROR: print_plot, Data or ModelPDF are NULL\n";
00302       return;
00303     }
00304 
00305   RooPlot *genericFrame = printVar->frame();
00306   Data->plotOn(genericFrame);
00307   ModelPDF->plotOn(genericFrame);
00308   TCanvas genericCanvas;
00309   genericFrame->Draw();
00310   outname = outname + ".eps";
00311   genericCanvas.SaveAs(outname.c_str());
00312   outname.replace(outname.size()-3,3,"gif");
00313   genericCanvas.SaveAs(outname.c_str());
00314 }
00315 SideBandSubtract::SideBandSubtract()
00316   : BackgroundPDF(0), 
00317     ModelPDF(0), 
00318     Data(0),
00319     SeparationVariable(0),
00320     verbose(0),
00321     SignalRegions(),
00322     SideBandRegions(),
00323     SideBandHistos(0),
00324     RawHistos(0),
00325     SBSHistos(0),
00326     BaseHistos(0),
00327     fit_result(0),
00328     SignalSidebandRatio(0)
00329 {
00330   // Default constructor so we can do fast subtraction
00331 }
00332 SideBandSubtract::SideBandSubtract(RooAbsPdf *model_shape, 
00333                                    RooAbsPdf *bkg_shape, 
00334                                    RooDataSet* data,
00335                                    RooRealVar* sep_var,
00336                                    vector<TH1F*> base,
00337                                    bool verb
00338                                    )
00339   : BackgroundPDF(bkg_shape), 
00340     ModelPDF(model_shape), 
00341     Data(data),
00342     SeparationVariable(sep_var),
00343     verbose(verb),
00344     SignalRegions(),
00345     SideBandRegions(),
00346     SideBandHistos(),
00347     RawHistos(),
00348     SBSHistos(),
00349     BaseHistos(base),
00350     base_histo(0),
00351     fit_result(0),
00352     SignalSidebandRatio(0)
00353 { }
00354 /*
00355 SideBandSubtract::SideBandSubtract(RooAbsPdf *model_shape, 
00356                                    RooAbsPdf *bkg_shape, 
00357                                    RooDataSet* data,
00358                                    RooRealVar* sep_var,
00359                                    bool verb
00360                                    )
00361   : BackgroundPDF(bkg_shape), 
00362     ModelPDF(model_shape), 
00363     Data(data),
00364     SeparationVariable(sep_var),
00365     verbose(verb),
00366     SignalRegions(),
00367     SideBandRegions(),
00368     SideBandHistos(),
00369     RawHistos(),
00370     SBSHistos(),
00371     BaseHistos(),
00372     base_histo(0),
00373     fit_result(0),
00374     SignalSidebandRatio(0)
00375 {
00376   // Build BaseHistos from dataset
00377   TIterator* iter = (TIterator*) Data->get()->createIterator();
00378   RooAbsArg *var=NULL;
00379   RooRealVar *variable=NULL;
00380   cout <<"Starting while loop\n";
00381   while((var = (RooAbsArg*)iter->Next()))
00382     {
00383       if((string)var->ClassName() != "RooRealVar")
00384         continue;
00385       variable = (RooRealVar*)var;
00386       //we own the data this points to so we need to delete it at the end...
00387       assert(variable!=NULL);
00388       string title = "base_"+(string)variable->GetName();
00389       base_histo = (TH1F*)Data->createHistogram(title.c_str(), *variable, Binning(variable->getBinning("default",verb,kTRUE)) );
00390       //cout <<"Made histo with name: "<<base_histo->GetName()<<endl;
00391       //base_histo->SetDirectory(0);
00392       BaseHistos.push_back(*base_histo);
00393       cout <<"Added histo to BaseHistos!\n Deleting local copy...";
00394       if(base_histo) delete base_histo;
00395       cout <<"Done!\n";
00396     }
00397 
00398 }
00399 */
00400 SideBandSubtract::~SideBandSubtract()
00401 {
00402   //**WARNING** 
00403 
00404   // We don't delete objects that we don't own (duh) so, all of our
00405   // pointers just hang out and get handled by other people :)
00406  
00407   // We DO own the BaseHistos IFF the user didn't provide us with 
00408   // them and we constructed them from the dataset... in this case 
00409   // base_histo will be non-null and we want to delete the vector 
00410   // of histograms...
00411   resetSBSProducts();
00412   /*
00413   if(base_histo!=NULL)
00414     {
00415       BaseHistos.clear();
00416     }
00417   */
00418 }
00419 void SideBandSubtract::addSignalRegion(Double_t min, Double_t max)
00420 {
00421   SbsRegion signal;
00422   signal.min=min;
00423   signal.max=max;
00424   signal.RegionName="Signal" + stringify(SignalRegions.size());
00425   if(SeparationVariable!=NULL)
00426     SeparationVariable->setRange(signal.RegionName.c_str(),signal.min,signal.max);
00427   SignalRegions.push_back(signal);
00428   return;
00429 }
00430 void SideBandSubtract::addSideBandRegion(Double_t min, Double_t max)
00431 {
00432   SbsRegion sideband;
00433   sideband.min=min;
00434   sideband.max=max;
00435   sideband.RegionName="SideBand" + stringify(SideBandRegions.size());
00436   if(SeparationVariable!=NULL)
00437     SeparationVariable->setRange(sideband.RegionName.c_str(),sideband.min,sideband.max);
00438   SideBandRegions.push_back(sideband);
00439   return;
00440 }
00441 int SideBandSubtract::doGlobalFit()
00442 {
00443   if(verbose)
00444     cout <<"Beginning SideBand Subtraction\n";
00445 
00446   if(ModelPDF!=NULL && Data!=NULL && SeparationVariable!=NULL)
00447     {
00448       fit_result = ModelPDF->fitTo(*Data,"r");
00449     }
00450   else
00451     {
00452       cerr <<"ERROR: doGobalFit, no ModelPDF, SeparationVariable or Data specified\n";
00453       return -1;
00454     }
00455 
00456   Double_t SideBandYield=getYield(SideBandRegions,BackgroundPDF);
00457   Double_t  BackgroundInSignal=getYield(SignalRegions,BackgroundPDF);
00458 
00459   SignalSidebandRatio = BackgroundInSignal/SideBandYield;
00460   if(verbose)
00461     {
00462       cout <<"Finished fitting background!\n";
00463       cout <<"Attained a Signal to Sideband ratio of: " << SignalSidebandRatio<<endl;
00464     }
00465   //I can't see a way around a double loop for each variable.  Maybe I
00466   //can get a C++/RooFit guru to save me the trouble here?
00467 
00468 
00469   //need to grab sbs objects after each global fit, because they get reset
00470   resetSBSProducts();
00471   TIterator* iter = (TIterator*) Data->get()->createIterator();
00472   RooAbsArg *variable;
00473   while((variable = (RooAbsArg*)iter->Next()))
00474     {
00475       for(unsigned int i=0; i < BaseHistos.size(); i++)
00476         {
00477           if((string)variable->GetName()!=(string)SeparationVariable->GetName() 
00478              && (string)variable->GetName()==(string)BaseHistos[i]->GetName())
00479             doSubtraction((RooRealVar*)variable,SignalSidebandRatio,i);
00480         }
00481     }
00482 
00483   //  clean up our memory...
00484   if(variable)      delete variable;
00485   if(iter)          delete iter;
00486   return 0;
00487 }
00488 void SideBandSubtract::doFastSubtraction(TH1F &Total, TH1F &Result, SbsRegion& leftRegion, SbsRegion& rightRegion)
00489 {
00490   Int_t binMin = Total.FindBin(leftRegion.max,0.0,0.0);
00491   Int_t binMax = Total.FindBin(leftRegion.min,0.0,0.0);
00492   double numLeft = Total.Integral( binMin, binMax );
00493 
00494   binMin = Total.FindBin(rightRegion.max,0.0,0.0);
00495   binMax = Total.FindBin(rightRegion.min,0.0,0.0);
00496   double numRight = Total.Integral( binMin, binMax );
00497   
00498   const unsigned int nbinsx = Total.GetNbinsX();
00499   const double x1 = (leftRegion.max + leftRegion.min)/2.0;
00500   const double x2 = (rightRegion.max + rightRegion.min)/2.0;
00501 
00502   const double y1 = numLeft/(leftRegion.max - leftRegion.min);
00503   const double y2 = numRight/(rightRegion.max - rightRegion.min);
00504     
00505   const double Slope = (y2 - y1)/(x2 - x1);
00506   const double Intercept = y1 - Slope*x1;
00507   // Evantually we want to handle more complicated functions, but for
00508   // now... just use a linear one
00509   TF1 function("sbs_function_line", "[0]*x + [1]",Total.GetMinimum(), Total.GetMaximum());
00510   for ( unsigned int binx = 1;  binx <= nbinsx; ++binx ) 
00511     {
00512       double binWidth = Total.GetBinWidth(binx);
00513       function.SetParameter(0,Slope*binWidth);
00514       function.SetParameter(1,Intercept*binWidth);
00515 
00516       double xx = Total.GetBinCenter(binx);
00517       double cu = Total.GetBinContent(binx) - function.Eval(xx);    
00518       // TODO: Propogate the  error on the parameters in function.
00519       double error1 = Total.GetBinError(binx);
00520     
00521       Result.SetBinContent(binx, cu);
00522       Result.SetBinError(binx, error1);
00523     }
00524   Result.SetEntries(Result.Integral() );
00525 }
00526 RooFitResult* SideBandSubtract::getFitResult()
00527 {
00528   return fit_result;
00529 }
00530 vector<TH1F*> SideBandSubtract::getBaseHistos()
00531 {
00532   return BaseHistos;
00533 }
00534 vector<TH1F> SideBandSubtract::getRawHistos()
00535 {
00536   return RawHistos;
00537 }
00538 vector<TH1F> SideBandSubtract::getSBSHistos()
00539 {
00540   return SBSHistos;
00541 }
00542 Double_t SideBandSubtract::getSTSRatio()
00543 {
00544   return SignalSidebandRatio;
00545 }
00546 void SideBandSubtract::resetSBSProducts()
00547 {
00548   //cout <<"Starting to reset products \n";
00549 
00550   if(!SideBandHistos.empty())
00551     SideBandHistos.clear();
00552 
00553   if(!RawHistos.empty())
00554     RawHistos.clear();
00555   if(!SBSHistos.empty())
00556     SBSHistos.clear();
00557 }