CMS 3D CMS Logo

SideBandSubtraction.cc
Go to the documentation of this file.
1 // Author: David Bjergaard
3 //
4 // Library for Generic Sideband Subtraction Methods.
5 //
6 // This library is designed to be a tool which serves two purposes.
7 // Its primary purpose is to provide a generic framework for doing
8 // sideband subtraction. It will also plug into current tag and probe
9 // modules to prevent code duplication and redundancy.
10 //
12 
14 // System includes
15 #include <iostream>
16 #include <ios>
17 #include <fstream>
18 #include <cstdlib>
19 #include <sstream>
20 
21 // ROOT includes
22 #include <TCanvas.h>
23 #include <TFile.h>
24 #include <TF1.h>
25 #include <TH1F.h>
26 #include <TString.h>
27 #include <TKey.h>
28 #include <TClass.h>
29 
30 // RooFit includes
31 #include <RooFitResult.h>
32 #include <RooRealVar.h>
33 #include <RooAbsPdf.h>
34 #include <RooDataSet.h>
35 #include <RooPlot.h>
36 
37 using namespace RooFit;
38 using std::cout;
39 using std::cerr;
40 using std::endl;
41 using std::string;
42 using std::vector;
43 
44 template <class T>
45 inline std::string stringify(const T& t)
46 {
47  std::ostringstream o;
48  if (!(o << t))
49  return "err";
50  return o.str();
51 }
52 Double_t SideBandSubtract::getYield(const std::vector<SbsRegion>& Regions, RooAbsPdf *PDF)
53 {
54  if(PDF==NULL || SeparationVariable==NULL)
55  return 0.0;
56 
57  Double_t yield=0;
58  RooAbsReal* intPDF;
59  for(unsigned int i=0; i < Regions.size(); i++)
60  {
61  if(verbose)
62  cout<<"Integrating over Range: "<<Regions[i].RegionName<<" from "<<Regions[i].min<<" to "<<Regions[i].max<<endl;
63  intPDF = PDF->createIntegral(*SeparationVariable, Range(Regions[i].RegionName.c_str()));
64  yield += intPDF->getVal();
65  if(verbose)
66  cout<<"Current yield: "<<yield<<endl;
67  }
68  return yield;
69 }
70 static void setHistOptions(TH1F* histo, string name, string title, string axis_label)
71 {
72 
73  histo->SetName(name.c_str());
74  histo->SetTitle(title.c_str());
75  if(axis_label == "GeV/c^2")
76  axis_label = "Mass (" + axis_label + ")";
77  if(axis_label == "GeV/c")
78  axis_label = "Momentum (" + axis_label + ")";
79  histo->GetXaxis()->SetTitle(axis_label.c_str());
80 }
81 int SideBandSubtract::doSubtraction(RooRealVar* variable, Double_t stsratio,Int_t index) //stsratio -> signal to sideband ratio
82 {
83  if(Data==NULL || SeparationVariable==NULL)
84  {
85  cerr << "ERROR: Data or SeparationVariable is NULL returning now!\n";
86  return -1;
87  }
88  TH1F* SideBandHist = (TH1F*)BaseHistos[index]->Clone();
89  setHistOptions(SideBandHist,(string)variable->GetName()+"Sideband",(string)SideBandHist->GetTitle() + " Sideband",(string)variable->getUnit());
90 
91  TH1F* SignalHist = (TH1F*)BaseHistos[index]->Clone();
92  setHistOptions(SignalHist,(string)variable->GetName()+"SignalHist",(string)SignalHist->GetTitle() + " Raw Signal",(string)variable->getUnit());
93 
94  //Begin a loop over the data to fill our histograms. I should figure
95  //out how to do this in one shot to avoid a loop
96  //O(N_vars*N_events)...
97 
98  TIterator* iter = (TIterator*) Data->get()->createIterator();
99  RooAbsArg *var=NULL;
100  RooRealVar *sep_var=NULL;
101  while((var = (RooAbsArg*)iter->Next()))
102  {
103  if((string)var->GetName()==(string)SeparationVariable->GetName())
104  {
105  sep_var = (RooRealVar*)var;
106  break;
107  }
108  }
109  for(int i=0; i < Data->numEntries(); i++)
110  {
111  Data->get(i);
112  Double_t value = variable->getVal();
113  Double_t cutval = sep_var->getVal();
114 
115  for(unsigned int j=0; j < SideBandRegions.size(); j++) //UGLY! Find better solution!
116  {
117  if(cutval > SideBandRegions[j].min && cutval < SideBandRegions[j].max)
118  SideBandHist->Fill(value);
119  }
120  for(unsigned int j=0; j < SignalRegions.size(); j++)
121  {
122  if(cutval > SignalRegions[j].min && cutval < SignalRegions[j].max)
123  SignalHist->Fill(value);
124  }
125  }
126  //Save pre-subtracted histo
127  SignalHist->Sumw2(); SideBandHist->Sumw2();
128  //SignalHist->SetDirectory(0); SideBandHist->SetDirectory(0);
129  RawHistos.push_back(*SignalHist);
130 
131  SignalHist->Add(SideBandHist, -stsratio);
132 
133  SignalHist->SetTitle(((string)BaseHistos[index]->GetTitle() + " SBS Signal").c_str());
134  //Save subtracted histo
135  SBSHistos.push_back(*SignalHist);
136  //Save Sideband histo
137  SideBandHistos.push_back(*SideBandHist);
138 
139  if(SideBandHist) delete SideBandHist;
140  return 0;
141 }
142 static void print_histo(TH1F* plot, string outname)
143 {
144  TCanvas genericCanvas;
145  plot->Draw("E1P0");
146  outname = outname + ".eps";
147  genericCanvas.SaveAs(outname.c_str());
148  outname.replace(outname.size()-3,3,"gif");
149  genericCanvas.SaveAs(outname.c_str());
150 }
152 {//handles *all* printing
153  //spool over vectors of histograms and print them, then print
154  //separation variable plots and the results text file.
155  if(SeparationVariable==NULL)
156  {
157  cerr << "ERROR: printResults, SeparationVariable is NULL!\n";
158  return;
159  }
160  string filename; //output file name
161  for(unsigned int i=0; i < RawHistos.size(); ++i)
162  {
163  filename=prefix + "Raw_" + (string)RawHistos[i].GetName();
164  print_histo(&RawHistos[i], filename);
165  }
166  for(unsigned int i=0; i < SBSHistos.size(); ++i)
167  {
168  filename=prefix + "SBS_" + (string)RawHistos[i].GetName();
169  print_histo(&SBSHistos[i], filename);
170  }
171  if(verbose)
172  {
173  for(unsigned int i=0; i < SideBandHistos.size(); ++i)
174  {
175  filename=prefix + "SideBand_" + (string)RawHistos[i].GetName();
176  print_histo(&SideBandHistos[i], filename);
177  }
178  }
179 
180  string outname = prefix + (string)SeparationVariable->GetName() + "_fitted.eps";
181  if(Data!=NULL && ModelPDF!=NULL)
182  {
183  RooPlot *SepVarFrame = SeparationVariable->frame();
184  Data->plotOn(SepVarFrame);
185  ModelPDF->plotOn(SepVarFrame);
186  TCanvas Canvas;
187  SepVarFrame->Draw();
188  Canvas.SaveAs(outname.c_str());
189  outname.replace(outname.size()-3,3,"gif");
190  Canvas.SaveAs(outname.c_str());
191  }
192  else
193  cerr <<"ERROR: printResults, Data or ModelPDF is NULL!\n";
194 
195  string result_outname = prefix + "_fit_results.txt";
196  std::ofstream output(result_outname.c_str(),std::ios::out);
197  if(!output)
198  {
199  cout <<"ERROR: Could not open file for writing!\n";
200  return;
201  }
202 
203  if(fit_result!=NULL)
204  {
205 #if ROOT_VERSION_CODE <= ROOT_VERSION(5,19,0)
206  fit_result->printToStream(output,Verbose);
207 #else
208  fit_result->printMultiline(output,kTRUE);
209 #endif
210  }
211 }
212 
213 
214 void SideBandSubtract::saveResults(string outname)
215 {
216  //saves the ephemeral stuff to a root file for future
217  //use/modification (ie everything printed by printResults())
218 
219  TFile output(outname.c_str(),"UPDATE"); //open the output file,
220  //create it if it doesn't
221  //exist
222  //Since keys are only available from files on disk, we need to write
223  //out a new file. If the file already existed, then we opened to
224  //update, and are writing nothing new.
225  output.Write();
226  TString dirname;
227  TIter nextkey(output.GetListOfKeys());
228  TKey *key;
229  TDirectory* curDir=NULL;
230  while((key=(TKey*)nextkey.Next()))
231  {
232 
233  if(key==NULL)
234  break;
235  TObject *obj = key->ReadObj();
236  if(obj->IsA()->InheritsFrom("TDirectory"))
237  dirname=obj->GetName();
238 
239  }
240 
241  if(dirname=="")
242  {
243  //we didn't find any directories so, we'll make a new one
244  curDir = output.mkdir("run0","Run 0");
245  output.cd("run0");
246  }
247  else
248  {
249  //manipulate last dir string, make a new one, and get ready to fill
250  dirname.Remove(0,3);
251  Int_t run_num = dirname.Atoi();
252  run_num++;
253  dirname = "run" + stringify(run_num);
254  curDir = output.mkdir(dirname.Data(),("Run "+stringify(run_num)).c_str());
255  output.cd(dirname.Data());
256  }
257  if(curDir==NULL)
258  curDir = output.GetDirectory("",kTRUE,"");
259 
260  //these should all be the same size, but to be pedantic we'll loop
261  //over each one individually, also, we need to associate them with
262  //the directory because by default they "float" in memory to avoid
263  //conflicts with other root files the user has open. If they want to
264  //write to those files, they need to close their file, pass the name
265  //here, and then let us work.
266  for(unsigned int i=0; i < SideBandHistos.size(); ++i)
267  {
268  SideBandHistos[i].SetDirectory(curDir);
269  SideBandHistos[i].Write();
270  }
271  for(unsigned int i=0; i < RawHistos.size(); ++i)
272  {
273  RawHistos[i].SetDirectory(curDir);
274  RawHistos[i].Write();
275  }
276  for(unsigned int i=0; i < SBSHistos.size(); ++i)
277  {
278  SBSHistos[i].SetDirectory(curDir);
279  SBSHistos[i].Write();
280  }
281  if(Data!=NULL && ModelPDF!=NULL && BackgroundPDF!=NULL && SeparationVariable!=NULL)
282  {
283  RooPlot *sep_varFrame = SeparationVariable->frame();
284  Data->plotOn(sep_varFrame);
285  ModelPDF->plotOn(sep_varFrame);
286  BackgroundPDF->plotOn(sep_varFrame);
287  sep_varFrame->Write();
288  }
289  else
290  cerr <<"ERROR: saveResults, did not save RooPlot of data and fit\n";
291  output.Write();
292 
293 }
294 void SideBandSubtract::setDataSet(RooDataSet* newData)
295 {
296  if(newData!=NULL)
297  Data=newData;
298 }
299 void SideBandSubtract::print_plot(RooRealVar* printVar,string outname)
300 {
301  if(Data==NULL || ModelPDF==NULL)
302  {
303  cerr << "ERROR: print_plot, Data or ModelPDF are NULL\n";
304  return;
305  }
306 
307  RooPlot *genericFrame = printVar->frame();
308  Data->plotOn(genericFrame);
309  ModelPDF->plotOn(genericFrame);
310  TCanvas genericCanvas;
311  genericFrame->Draw();
312  outname = outname + ".eps";
313  genericCanvas.SaveAs(outname.c_str());
314  outname.replace(outname.size()-3,3,"gif");
315  genericCanvas.SaveAs(outname.c_str());
316 }
318  : BackgroundPDF(0),
319  ModelPDF(0),
320  Data(0),
321  SeparationVariable(0),
322  verbose(0),
323  SignalRegions(),
324  SideBandRegions(),
325  SideBandHistos(0),
326  RawHistos(0),
327  SBSHistos(0),
328  BaseHistos(0),
329  fit_result(0),
330  SignalSidebandRatio(0)
331 {
332  // Default constructor so we can do fast subtraction
333 }
334 SideBandSubtract::SideBandSubtract(RooAbsPdf *model_shape,
335  RooAbsPdf *bkg_shape,
336  RooDataSet* data,
337  RooRealVar* sep_var,
338  const vector<TH1F*>& base,
339  bool verb
340  )
341  : BackgroundPDF(bkg_shape),
342  ModelPDF(model_shape),
343  Data(data),
344  SeparationVariable(sep_var),
345  verbose(verb),
346  SignalRegions(),
347  SideBandRegions(),
348  SideBandHistos(),
349  RawHistos(),
350  SBSHistos(),
351  BaseHistos(base),
352  base_histo(0),
353  fit_result(0),
355 { }
356 /*
357 SideBandSubtract::SideBandSubtract(RooAbsPdf *model_shape,
358  RooAbsPdf *bkg_shape,
359  RooDataSet* data,
360  RooRealVar* sep_var,
361  bool verb
362  )
363  : BackgroundPDF(bkg_shape),
364  ModelPDF(model_shape),
365  Data(data),
366  SeparationVariable(sep_var),
367  verbose(verb),
368  SignalRegions(),
369  SideBandRegions(),
370  SideBandHistos(),
371  RawHistos(),
372  SBSHistos(),
373  BaseHistos(),
374  base_histo(0),
375  fit_result(0),
376  SignalSidebandRatio(0)
377 {
378  // Build BaseHistos from dataset
379  TIterator* iter = (TIterator*) Data->get()->createIterator();
380  RooAbsArg *var=NULL;
381  RooRealVar *variable=NULL;
382  cout <<"Starting while loop\n";
383  while((var = (RooAbsArg*)iter->Next()))
384  {
385  if((string)var->ClassName() != "RooRealVar")
386  continue;
387  variable = (RooRealVar*)var;
388  //we own the data this points to so we need to delete it at the end...
389  assert(variable!=NULL);
390  string title = "base_"+(string)variable->GetName();
391  base_histo = (TH1F*)Data->createHistogram(title.c_str(), *variable, Binning(variable->getBinning("default",verb,kTRUE)) );
392  //cout <<"Made histo with name: "<<base_histo->GetName()<<endl;
393  //base_histo->SetDirectory(0);
394  BaseHistos.push_back(*base_histo);
395  cout <<"Added histo to BaseHistos!\n Deleting local copy...";
396  if(base_histo) delete base_histo;
397  cout <<"Done!\n";
398  }
399 
400 }
401 */
403 {
404  //**WARNING**
405 
406  // We don't delete objects that we don't own (duh) so, all of our
407  // pointers just hang out and get handled by other people :)
408 
409  // We DO own the BaseHistos IFF the user didn't provide us with
410  // them and we constructed them from the dataset... in this case
411  // base_histo will be non-null and we want to delete the vector
412  // of histograms...
414  /*
415  if(base_histo!=NULL)
416  {
417  BaseHistos.clear();
418  }
419  */
420 }
421 void SideBandSubtract::addSignalRegion(Double_t min, Double_t max)
422 {
423  SbsRegion signal;
424  signal.min=min;
425  signal.max=max;
426  signal.RegionName="Signal" + stringify(SignalRegions.size());
428  SeparationVariable->setRange(signal.RegionName.c_str(),signal.min,signal.max);
429  SignalRegions.push_back(signal);
430  return;
431 }
433 {
434  SbsRegion sideband;
435  sideband.min=min;
436  sideband.max=max;
437  sideband.RegionName="SideBand" + stringify(SideBandRegions.size());
439  SeparationVariable->setRange(sideband.RegionName.c_str(),sideband.min,sideband.max);
440  SideBandRegions.push_back(sideband);
441  return;
442 }
444 {
445  if(verbose)
446  cout <<"Beginning SideBand Subtraction\n";
447 
449  {
450  fit_result = ModelPDF->fitTo(*Data,"r");
451  }
452  else
453  {
454  cerr <<"ERROR: doGobalFit, no ModelPDF, SeparationVariable or Data specified\n";
455  return -1;
456  }
457 
458  Double_t SideBandYield=getYield(SideBandRegions,BackgroundPDF);
459  Double_t BackgroundInSignal=getYield(SignalRegions,BackgroundPDF);
460 
461  SignalSidebandRatio = BackgroundInSignal/SideBandYield;
462  if(verbose)
463  {
464  cout <<"Finished fitting background!\n";
465  cout <<"Attained a Signal to Sideband ratio of: " << SignalSidebandRatio<<endl;
466  }
467  //I can't see a way around a double loop for each variable. Maybe I
468  //can get a C++/RooFit guru to save me the trouble here?
469 
470 
471  //need to grab sbs objects after each global fit, because they get reset
473  TIterator* iter = (TIterator*) Data->get()->createIterator();
474  RooAbsArg *variable;
475  while((variable = (RooAbsArg*)iter->Next()))
476  {
477  for(unsigned int i=0; i < BaseHistos.size(); i++)
478  {
479  if((string)variable->GetName()!=(string)SeparationVariable->GetName()
480  && (string)variable->GetName()==(string)BaseHistos[i]->GetName())
482  }
483  }
484 
485  // clean up our memory...
486  if(variable) delete variable;
487  if(iter) delete iter;
488  return 0;
489 }
490 void SideBandSubtract::doFastSubtraction(TH1F &Total, TH1F &Result, SbsRegion& leftRegion, SbsRegion& rightRegion)
491 {
492  Int_t binMin = Total.FindBin(leftRegion.max,0.0,0.0);
493  Int_t binMax = Total.FindBin(leftRegion.min,0.0,0.0);
494  double numLeft = Total.Integral( binMin, binMax );
495 
496  binMin = Total.FindBin(rightRegion.max,0.0,0.0);
497  binMax = Total.FindBin(rightRegion.min,0.0,0.0);
498  double numRight = Total.Integral( binMin, binMax );
499 
500  const unsigned int nbinsx = Total.GetNbinsX();
501  const double x1 = (leftRegion.max + leftRegion.min)/2.0;
502  const double x2 = (rightRegion.max + rightRegion.min)/2.0;
503 
504  const double y1 = numLeft/(leftRegion.max - leftRegion.min);
505  const double y2 = numRight/(rightRegion.max - rightRegion.min);
506 
507  const double Slope = (y2 - y1)/(x2 - x1);
508  const double Intercept = y1 - Slope*x1;
509  // Evantually we want to handle more complicated functions, but for
510  // now... just use a linear one
511  TF1 function("sbs_function_line", "[0]*x + [1]",Total.GetMinimum(), Total.GetMaximum());
512  for ( unsigned int binx = 1; binx <= nbinsx; ++binx )
513  {
514  double binWidth = Total.GetBinWidth(binx);
515  function.SetParameter(0,Slope*binWidth);
516  function.SetParameter(1,Intercept*binWidth);
517 
518  double xx = Total.GetBinCenter(binx);
519  double cu = Total.GetBinContent(binx) - function.Eval(xx);
520  // TODO: Propogate the error on the parameters in function.
521  double error1 = Total.GetBinError(binx);
522 
523  Result.SetBinContent(binx, cu);
524  Result.SetBinError(binx, error1);
525  }
526  Result.SetEntries(Result.Integral() );
527 }
529 {
530  return fit_result;
531 }
533 {
534  return BaseHistos;
535 }
537 {
538  return RawHistos;
539 }
541 {
542  return SBSHistos;
543 }
545 {
546  return SignalSidebandRatio;
547 }
549 {
550  //cout <<"Starting to reset products \n";
551 
552  if(!SideBandHistos.empty())
553  SideBandHistos.clear();
554 
555  if(!RawHistos.empty())
556  RawHistos.clear();
557  if(!SBSHistos.empty())
558  SBSHistos.clear();
559 }
RooAbsPdf * BackgroundPDF
Double_t getYield(const std::vector< SbsRegion > &Regions, RooAbsPdf *PDF)
RooFitResult * getFitResult()
std::vector< TH1F > SideBandHistos
std::vector< TH1F > getSBSHistos()
void saveResults(std::string outname)
RooFitResult * fit_result
#define NULL
Definition: scimark2.h:8
std::vector< TH1F > getRawHistos()
static void print_histo(TH1F *plot, string outname)
std::vector< TH1F * > getBaseHistos()
int doSubtraction(RooRealVar *variable, Double_t stsratio, Int_t index)
std::string stringify(const T &t)
static void setHistOptions(TH1F *histo, string name, string title, string axis_label)
std::vector< TH1F > SBSHistos
PixelRecoRange< float > Range
RooRealVar * SeparationVariable
std::string RegionName
std::vector< SbsRegion > SideBandRegions
Definition: value.py:1
T min(T a, T b)
Definition: MathUtil.h:58
base
Make Sure CMSSW is Setup ##.
void printResults(std::string prefix="")
void addSignalRegion(Double_t min, Double_t max)
std::vector< SbsRegion > SignalRegions
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:82
void addSideBandRegion(Double_t min, Double_t max)
void print_plot(RooRealVar *printVar, std::string outname)
void doFastSubtraction(TH1F &Total, TH1F &Result, SbsRegion &leftRegion, SbsRegion &rightRegion)
void setDataSet(RooDataSet *newData)
long double T
std::vector< TH1F * > BaseHistos
std::vector< TH1F > RawHistos