CMS 3D CMS Logo

/afs/cern.ch/work/a/aaltunda/public/www/CMSSW_6_2_5/src/PhysicsTools/Utilities/src/SideBandSubtraction.cc

Go to the documentation of this file.
00001 
00002 // Author: David Bjergaard
00003 // 
00004 // Library for Generic Sideband Subtraction Methods.
00005 //
00006 // This library is designed to be a tool which serves two purposes.
00007 // Its primary purpose is to provide a generic framework for doing
00008 // sideband subtraction.  It will also plug into current tag and probe
00009 // modules to prevent code duplication and redundancy.
00010 //
00012 
00013 #include "PhysicsTools/Utilities/interface/SideBandSubtraction.h"
00014 // System includes
00015 #include <iostream>
00016 #include <ios>
00017 #include <fstream>
00018 #include <cstdlib>
00019 #include <sstream>
00020 
00021 // ROOT includes
00022 #include <TCanvas.h>
00023 #include <TFile.h>
00024 #include <TF1.h>
00025 #include <TH1F.h>
00026 #include <TString.h>
00027 #include <TKey.h>
00028 #include <TClass.h>
00029 
00030 // RooFit includes
00031 #include <RooFitResult.h>
00032 #include <RooRealVar.h>
00033 #include <RooAbsPdf.h>
00034 #include <RooDataSet.h>
00035 #include <RooPlot.h>
00036 
00037 using namespace RooFit;
00038 using std::cout;
00039 using std::cerr;
00040 using std::endl;
00041 using std::string;
00042 using std::vector;
00043 
00044 template <class T>
00045 inline std::string stringify(const T& t)
00046 {
00047   std::ostringstream o;
00048   if (!(o << t))
00049     return "err";
00050   return o.str();
00051 } 
00052 Double_t SideBandSubtract::getYield(std::vector<SbsRegion> Regions, RooAbsPdf *PDF)
00053 {
00054   if(PDF==NULL || SeparationVariable==NULL)
00055     return 0.0;
00056 
00057   Double_t yield=0;
00058   RooAbsReal* intPDF;
00059   for(unsigned int i=0; i < Regions.size(); i++)
00060     {
00061       if(verbose)
00062         cout<<"Integrating over Range: "<<Regions[i].RegionName<<" from "<<Regions[i].min<<" to "<<Regions[i].max<<endl;
00063       intPDF = PDF->createIntegral(*SeparationVariable, Range(Regions[i].RegionName.c_str()));
00064       yield += intPDF->getVal();
00065       if(verbose)
00066         cout<<"Current yield: "<<yield<<endl;
00067     }
00068   return yield;
00069 }
00070 static void setHistOptions(TH1F* histo, string name, string title, string axis_label)
00071 {
00072 
00073   histo->SetName(name.c_str());
00074   histo->SetTitle(title.c_str());
00075   if(axis_label == "GeV/c^2")
00076     axis_label = "Mass (" + axis_label + ")";
00077   if(axis_label == "GeV/c")
00078     axis_label = "Momentum (" + axis_label + ")";
00079   histo->GetXaxis()->SetTitle(axis_label.c_str());
00080 }
00081 int SideBandSubtract::doSubtraction(RooRealVar* variable, Double_t stsratio,Int_t index) //stsratio -> signal to sideband ratio
00082 {
00083   if(Data==NULL || SeparationVariable==NULL)
00084     {
00085       cerr << "ERROR: Data or SeparationVariable is NULL returning now!\n";
00086       return -1;
00087     }
00088   TH1F* SideBandHist = (TH1F*)BaseHistos[index]->Clone();
00089   setHistOptions(SideBandHist,(string)variable->GetName()+"Sideband",(string)SideBandHist->GetTitle() + " Sideband",(string)variable->getUnit());
00090 
00091   TH1F* SignalHist = (TH1F*)BaseHistos[index]->Clone();
00092   setHistOptions(SignalHist,(string)variable->GetName()+"SignalHist",(string)SignalHist->GetTitle() + " Raw Signal",(string)variable->getUnit());
00093 
00094   //Begin a loop over the data to fill our histograms. I should figure
00095   //out how to do this in one shot to avoid a loop
00096   //O(N_vars*N_events)...
00097 
00098   TIterator* iter = (TIterator*) Data->get()->createIterator();
00099   RooAbsArg *var=NULL;
00100   RooRealVar *sep_var=NULL;
00101   while((var = (RooAbsArg*)iter->Next()))
00102     {
00103       if((string)var->GetName()==(string)SeparationVariable->GetName())
00104         {
00105           sep_var = (RooRealVar*)var;
00106           break;
00107         }
00108     }
00109   for(int i=0; i < Data->numEntries(); i++)
00110     {
00111       Data->get(i);
00112       Double_t value = variable->getVal();
00113       Double_t cutval = sep_var->getVal();
00114 
00115       for(unsigned int j=0; j < SideBandRegions.size(); j++) //UGLY!  Find better solution!
00116         {
00117           if(cutval > SideBandRegions[j].min && cutval < SideBandRegions[j].max)
00118             SideBandHist->Fill(value);
00119         }
00120       for(unsigned int j=0; j < SignalRegions.size(); j++)
00121         {
00122           if(cutval > SignalRegions[j].min && cutval < SignalRegions[j].max)
00123             SignalHist->Fill(value);
00124         }
00125     }
00126   //Save pre-subtracted histo
00127   SignalHist->Sumw2(); SideBandHist->Sumw2(); 
00128   //SignalHist->SetDirectory(0); SideBandHist->SetDirectory(0);
00129   RawHistos.push_back(*SignalHist);
00130 
00131   SignalHist->Add(SideBandHist, -stsratio);
00132 
00133   SignalHist->SetTitle(((string)BaseHistos[index]->GetTitle() + " SBS Signal").c_str());
00134   //Save subtracted histo
00135   SBSHistos.push_back(*SignalHist);
00136   //Save Sideband histo
00137   SideBandHistos.push_back(*SideBandHist);
00138 
00139   if(SideBandHist) delete SideBandHist;
00140   return 0;
00141 }
00142 static void print_histo(TH1F* plot, string outname)
00143 {
00144   TCanvas genericCanvas;
00145   plot->Draw("E1P0");
00146   outname = outname + ".eps";
00147   genericCanvas.SaveAs(outname.c_str());
00148   outname.replace(outname.size()-3,3,"gif");
00149   genericCanvas.SaveAs(outname.c_str());
00150 }
00151 void SideBandSubtract::printResults(string prefix)
00152 {//handles *all* printing
00153   //spool over vectors of histograms and print them, then print
00154   //separation variable plots and the results text file.
00155   if(SeparationVariable==NULL)
00156     {
00157       cerr << "ERROR: printResults, SeparationVariable is NULL!\n";
00158       return;
00159     }
00160   string filename; //output file name
00161   for(unsigned int i=0; i < RawHistos.size(); ++i)
00162     {
00163       filename=prefix + "Raw_" + (string)RawHistos[i].GetName();
00164       print_histo(&RawHistos[i], filename);
00165     }
00166   for(unsigned int i=0; i < SBSHistos.size(); ++i)
00167     {
00168       filename=prefix + "SBS_" + (string)RawHistos[i].GetName();
00169       print_histo(&SBSHistos[i], filename);
00170     }
00171   if(verbose)
00172     {
00173       for(unsigned int i=0; i < SideBandHistos.size(); ++i)
00174         {
00175           filename=prefix + "SideBand_" + (string)RawHistos[i].GetName();
00176           print_histo(&SideBandHistos[i], filename);
00177         }
00178     }
00179 
00180   string outname = prefix + (string)SeparationVariable->GetName() + "_fitted.eps";
00181   if(Data!=NULL && ModelPDF!=NULL)
00182     {
00183       RooPlot *SepVarFrame = SeparationVariable->frame();
00184       Data->plotOn(SepVarFrame);
00185       ModelPDF->plotOn(SepVarFrame);
00186       TCanvas Canvas;
00187       SepVarFrame->Draw();
00188       Canvas.SaveAs(outname.c_str());
00189       outname.replace(outname.size()-3,3,"gif");
00190       Canvas.SaveAs(outname.c_str());
00191     }
00192   else
00193     cerr <<"ERROR: printResults, Data or ModelPDF is NULL!\n";
00194 
00195   string result_outname = prefix + "_fit_results.txt";
00196   ofstream output(result_outname.c_str(),std::ios::out);
00197   if(!output)
00198     {
00199       cout <<"ERROR: Could not open file for writing!\n";
00200       return;
00201     }
00202 
00203   if(fit_result!=NULL)
00204     {
00205 #if ROOT_VERSION_CODE <= ROOT_VERSION(5,19,0)
00206       fit_result->printToStream(output,Verbose);
00207 #else
00208       fit_result->printMultiline(output,kTRUE);
00209 #endif
00210     }
00211 }
00212 
00213 
00214 void SideBandSubtract::saveResults(string outname)
00215 {
00216   //saves the ephemeral stuff to a root file for future
00217   //use/modification (ie everything printed by printResults())
00218 
00219   TFile output(outname.c_str(),"UPDATE"); //open the output file,
00220                                           //create it if it doesn't
00221                                           //exist
00222   //Since keys are only available from files on disk, we need to write
00223   //out a new file.  If the file already existed, then we opened to
00224   //update, and are writing nothing new.  
00225   output.Write();
00226   TString dirname;
00227   TIter nextkey(output.GetListOfKeys());
00228   TKey *key;
00229   TDirectory* curDir=NULL;
00230   while((key=(TKey*)nextkey.Next()))
00231     {
00232 
00233       if(key==NULL)
00234         break;
00235       TObject *obj = key->ReadObj();
00236       if(obj->IsA()->InheritsFrom("TDirectory"))
00237           dirname=obj->GetName();
00238 
00239     }
00240 
00241   if(dirname=="")
00242     {
00243       //we didn't find any directories so, we'll make a new one
00244       curDir = output.mkdir("run0","Run 0");
00245       output.cd("run0");
00246     }
00247   else
00248     {
00249       //manipulate last dir string, make a new one, and get ready to fill
00250       dirname.Remove(0,3);
00251       Int_t run_num = dirname.Atoi();
00252       run_num++;
00253       dirname = "run" + stringify(run_num);
00254       curDir = output.mkdir(dirname.Data(),("Run "+stringify(run_num)).c_str());
00255       output.cd(dirname.Data());
00256     }
00257   if(curDir==NULL)
00258     curDir = output.GetDirectory("",kTRUE,"");
00259 
00260   //these should all be the same size, but to be pedantic we'll loop
00261   //over each one individually, also, we need to associate them with
00262   //the directory because by default they "float" in memory to avoid
00263   //conflicts with other root files the user has open. If they want to
00264   //write to those files, they need to close their file, pass the name
00265   //here, and then let us work.
00266   for(unsigned int i=0; i < SideBandHistos.size(); ++i)
00267     {
00268       SideBandHistos[i].SetDirectory(curDir);
00269       SideBandHistos[i].Write();
00270     }
00271   for(unsigned int i=0; i < RawHistos.size(); ++i)
00272     {
00273       RawHistos[i].SetDirectory(curDir);
00274       RawHistos[i].Write();
00275     }
00276   for(unsigned int i=0; i < SBSHistos.size(); ++i)
00277     {
00278       SBSHistos[i].SetDirectory(curDir);
00279       SBSHistos[i].Write();
00280     }
00281   if(Data!=NULL && ModelPDF!=NULL && BackgroundPDF!=NULL && SeparationVariable!=NULL)
00282     {
00283       RooPlot *sep_varFrame = SeparationVariable->frame();
00284       Data->plotOn(sep_varFrame);
00285       ModelPDF->plotOn(sep_varFrame);
00286       BackgroundPDF->plotOn(sep_varFrame);
00287       sep_varFrame->Write();
00288     }
00289   else
00290     cerr <<"ERROR: saveResults, did not save RooPlot of data and fit\n";
00291   output.Write();
00292 
00293 }
00294 void SideBandSubtract::setDataSet(RooDataSet* newData)
00295 {
00296   if(newData!=NULL)
00297     Data=newData;
00298 }
00299 void SideBandSubtract::print_plot(RooRealVar* printVar,string outname)
00300 {
00301   if(Data==NULL || ModelPDF==NULL)
00302     {
00303       cerr << "ERROR: print_plot, Data or ModelPDF are NULL\n";
00304       return;
00305     }
00306 
00307   RooPlot *genericFrame = printVar->frame();
00308   Data->plotOn(genericFrame);
00309   ModelPDF->plotOn(genericFrame);
00310   TCanvas genericCanvas;
00311   genericFrame->Draw();
00312   outname = outname + ".eps";
00313   genericCanvas.SaveAs(outname.c_str());
00314   outname.replace(outname.size()-3,3,"gif");
00315   genericCanvas.SaveAs(outname.c_str());
00316 }
00317 SideBandSubtract::SideBandSubtract()
00318   : BackgroundPDF(0), 
00319     ModelPDF(0), 
00320     Data(0),
00321     SeparationVariable(0),
00322     verbose(0),
00323     SignalRegions(),
00324     SideBandRegions(),
00325     SideBandHistos(0),
00326     RawHistos(0),
00327     SBSHistos(0),
00328     BaseHistos(0),
00329     fit_result(0),
00330     SignalSidebandRatio(0)
00331 {
00332   // Default constructor so we can do fast subtraction
00333 }
00334 SideBandSubtract::SideBandSubtract(RooAbsPdf *model_shape, 
00335                                    RooAbsPdf *bkg_shape, 
00336                                    RooDataSet* data,
00337                                    RooRealVar* sep_var,
00338                                    vector<TH1F*> base,
00339                                    bool verb
00340                                    )
00341   : BackgroundPDF(bkg_shape), 
00342     ModelPDF(model_shape), 
00343     Data(data),
00344     SeparationVariable(sep_var),
00345     verbose(verb),
00346     SignalRegions(),
00347     SideBandRegions(),
00348     SideBandHistos(),
00349     RawHistos(),
00350     SBSHistos(),
00351     BaseHistos(base),
00352     base_histo(0),
00353     fit_result(0),
00354     SignalSidebandRatio(0)
00355 { }
00356 /*
00357 SideBandSubtract::SideBandSubtract(RooAbsPdf *model_shape, 
00358                                    RooAbsPdf *bkg_shape, 
00359                                    RooDataSet* data,
00360                                    RooRealVar* sep_var,
00361                                    bool verb
00362                                    )
00363   : BackgroundPDF(bkg_shape), 
00364     ModelPDF(model_shape), 
00365     Data(data),
00366     SeparationVariable(sep_var),
00367     verbose(verb),
00368     SignalRegions(),
00369     SideBandRegions(),
00370     SideBandHistos(),
00371     RawHistos(),
00372     SBSHistos(),
00373     BaseHistos(),
00374     base_histo(0),
00375     fit_result(0),
00376     SignalSidebandRatio(0)
00377 {
00378   // Build BaseHistos from dataset
00379   TIterator* iter = (TIterator*) Data->get()->createIterator();
00380   RooAbsArg *var=NULL;
00381   RooRealVar *variable=NULL;
00382   cout <<"Starting while loop\n";
00383   while((var = (RooAbsArg*)iter->Next()))
00384     {
00385       if((string)var->ClassName() != "RooRealVar")
00386         continue;
00387       variable = (RooRealVar*)var;
00388       //we own the data this points to so we need to delete it at the end...
00389       assert(variable!=NULL);
00390       string title = "base_"+(string)variable->GetName();
00391       base_histo = (TH1F*)Data->createHistogram(title.c_str(), *variable, Binning(variable->getBinning("default",verb,kTRUE)) );
00392       //cout <<"Made histo with name: "<<base_histo->GetName()<<endl;
00393       //base_histo->SetDirectory(0);
00394       BaseHistos.push_back(*base_histo);
00395       cout <<"Added histo to BaseHistos!\n Deleting local copy...";
00396       if(base_histo) delete base_histo;
00397       cout <<"Done!\n";
00398     }
00399 
00400 }
00401 */
00402 SideBandSubtract::~SideBandSubtract()
00403 {
00404   //**WARNING** 
00405 
00406   // We don't delete objects that we don't own (duh) so, all of our
00407   // pointers just hang out and get handled by other people :)
00408  
00409   // We DO own the BaseHistos IFF the user didn't provide us with 
00410   // them and we constructed them from the dataset... in this case 
00411   // base_histo will be non-null and we want to delete the vector 
00412   // of histograms...
00413   resetSBSProducts();
00414   /*
00415   if(base_histo!=NULL)
00416     {
00417       BaseHistos.clear();
00418     }
00419   */
00420 }
00421 void SideBandSubtract::addSignalRegion(Double_t min, Double_t max)
00422 {
00423   SbsRegion signal;
00424   signal.min=min;
00425   signal.max=max;
00426   signal.RegionName="Signal" + stringify(SignalRegions.size());
00427   if(SeparationVariable!=NULL)
00428     SeparationVariable->setRange(signal.RegionName.c_str(),signal.min,signal.max);
00429   SignalRegions.push_back(signal);
00430   return;
00431 }
00432 void SideBandSubtract::addSideBandRegion(Double_t min, Double_t max)
00433 {
00434   SbsRegion sideband;
00435   sideband.min=min;
00436   sideband.max=max;
00437   sideband.RegionName="SideBand" + stringify(SideBandRegions.size());
00438   if(SeparationVariable!=NULL)
00439     SeparationVariable->setRange(sideband.RegionName.c_str(),sideband.min,sideband.max);
00440   SideBandRegions.push_back(sideband);
00441   return;
00442 }
00443 int SideBandSubtract::doGlobalFit()
00444 {
00445   if(verbose)
00446     cout <<"Beginning SideBand Subtraction\n";
00447 
00448   if(ModelPDF!=NULL && Data!=NULL && SeparationVariable!=NULL)
00449     {
00450       fit_result = ModelPDF->fitTo(*Data,"r");
00451     }
00452   else
00453     {
00454       cerr <<"ERROR: doGobalFit, no ModelPDF, SeparationVariable or Data specified\n";
00455       return -1;
00456     }
00457 
00458   Double_t SideBandYield=getYield(SideBandRegions,BackgroundPDF);
00459   Double_t  BackgroundInSignal=getYield(SignalRegions,BackgroundPDF);
00460 
00461   SignalSidebandRatio = BackgroundInSignal/SideBandYield;
00462   if(verbose)
00463     {
00464       cout <<"Finished fitting background!\n";
00465       cout <<"Attained a Signal to Sideband ratio of: " << SignalSidebandRatio<<endl;
00466     }
00467   //I can't see a way around a double loop for each variable.  Maybe I
00468   //can get a C++/RooFit guru to save me the trouble here?
00469 
00470 
00471   //need to grab sbs objects after each global fit, because they get reset
00472   resetSBSProducts();
00473   TIterator* iter = (TIterator*) Data->get()->createIterator();
00474   RooAbsArg *variable;
00475   while((variable = (RooAbsArg*)iter->Next()))
00476     {
00477       for(unsigned int i=0; i < BaseHistos.size(); i++)
00478         {
00479           if((string)variable->GetName()!=(string)SeparationVariable->GetName() 
00480              && (string)variable->GetName()==(string)BaseHistos[i]->GetName())
00481             doSubtraction((RooRealVar*)variable,SignalSidebandRatio,i);
00482         }
00483     }
00484 
00485   //  clean up our memory...
00486   if(variable)      delete variable;
00487   if(iter)          delete iter;
00488   return 0;
00489 }
00490 void SideBandSubtract::doFastSubtraction(TH1F &Total, TH1F &Result, SbsRegion& leftRegion, SbsRegion& rightRegion)
00491 {
00492   Int_t binMin = Total.FindBin(leftRegion.max,0.0,0.0);
00493   Int_t binMax = Total.FindBin(leftRegion.min,0.0,0.0);
00494   double numLeft = Total.Integral( binMin, binMax );
00495 
00496   binMin = Total.FindBin(rightRegion.max,0.0,0.0);
00497   binMax = Total.FindBin(rightRegion.min,0.0,0.0);
00498   double numRight = Total.Integral( binMin, binMax );
00499   
00500   const unsigned int nbinsx = Total.GetNbinsX();
00501   const double x1 = (leftRegion.max + leftRegion.min)/2.0;
00502   const double x2 = (rightRegion.max + rightRegion.min)/2.0;
00503 
00504   const double y1 = numLeft/(leftRegion.max - leftRegion.min);
00505   const double y2 = numRight/(rightRegion.max - rightRegion.min);
00506     
00507   const double Slope = (y2 - y1)/(x2 - x1);
00508   const double Intercept = y1 - Slope*x1;
00509   // Evantually we want to handle more complicated functions, but for
00510   // now... just use a linear one
00511   TF1 function("sbs_function_line", "[0]*x + [1]",Total.GetMinimum(), Total.GetMaximum());
00512   for ( unsigned int binx = 1;  binx <= nbinsx; ++binx ) 
00513     {
00514       double binWidth = Total.GetBinWidth(binx);
00515       function.SetParameter(0,Slope*binWidth);
00516       function.SetParameter(1,Intercept*binWidth);
00517 
00518       double xx = Total.GetBinCenter(binx);
00519       double cu = Total.GetBinContent(binx) - function.Eval(xx);    
00520       // TODO: Propogate the  error on the parameters in function.
00521       double error1 = Total.GetBinError(binx);
00522     
00523       Result.SetBinContent(binx, cu);
00524       Result.SetBinError(binx, error1);
00525     }
00526   Result.SetEntries(Result.Integral() );
00527 }
00528 RooFitResult* SideBandSubtract::getFitResult()
00529 {
00530   return fit_result;
00531 }
00532 vector<TH1F*> SideBandSubtract::getBaseHistos()
00533 {
00534   return BaseHistos;
00535 }
00536 vector<TH1F> SideBandSubtract::getRawHistos()
00537 {
00538   return RawHistos;
00539 }
00540 vector<TH1F> SideBandSubtract::getSBSHistos()
00541 {
00542   return SBSHistos;
00543 }
00544 Double_t SideBandSubtract::getSTSRatio()
00545 {
00546   return SignalSidebandRatio;
00547 }
00548 void SideBandSubtract::resetSBSProducts()
00549 {
00550   //cout <<"Starting to reset products \n";
00551 
00552   if(!SideBandHistos.empty())
00553     SideBandHistos.clear();
00554 
00555   if(!RawHistos.empty())
00556     RawHistos.clear();
00557   if(!SBSHistos.empty())
00558     SBSHistos.clear();
00559 }