CMS 3D CMS Logo

SideBandSubtraction.cc
Go to the documentation of this file.
1 // Author: David Bjergaard
3 //
4 // Library for Generic Sideband Subtraction Methods.
5 //
6 // This library is designed to be a tool which serves two purposes.
7 // Its primary purpose is to provide a generic framework for doing
8 // sideband subtraction. It will also plug into current tag and probe
9 // modules to prevent code duplication and redundancy.
10 //
12 
14 // System includes
15 #include <iostream>
16 #include <ios>
17 #include <fstream>
18 #include <cstdlib>
19 #include <sstream>
20 
21 // ROOT includes
22 #include <TCanvas.h>
23 #include <TFile.h>
24 #include <TF1.h>
25 #include <TH1F.h>
26 #include <TString.h>
27 #include <TKey.h>
28 #include <TClass.h>
29 
30 // RooFit includes
31 #include <RooFitResult.h>
32 #include <RooRealVar.h>
33 #include <RooAbsPdf.h>
34 #include <RooDataSet.h>
35 #include <RooPlot.h>
36 
37 using namespace RooFit;
38 using std::cerr;
39 using std::cout;
40 using std::endl;
41 using std::string;
42 using std::vector;
43 
44 template <class T>
45 inline std::string stringify(const T& t) {
46  std::ostringstream o;
47  if (!(o << t))
48  return "err";
49  return o.str();
50 }
51 Double_t SideBandSubtract::getYield(const std::vector<SbsRegion>& Regions, RooAbsPdf* PDF) {
52  if (PDF == nullptr || SeparationVariable == nullptr)
53  return 0.0;
54 
55  Double_t yield = 0;
56  RooAbsReal* intPDF;
57  for (unsigned int i = 0; i < Regions.size(); i++) {
58  if (verbose)
59  cout << "Integrating over Range: " << Regions[i].RegionName << " from " << Regions[i].min << " to "
60  << Regions[i].max << endl;
61  intPDF = PDF->createIntegral(*SeparationVariable, Range(Regions[i].RegionName.c_str()));
62  yield += intPDF->getVal();
63  if (verbose)
64  cout << "Current yield: " << yield << endl;
65  }
66  return yield;
67 }
68 static void setHistOptions(TH1F* histo, string name, string title, string axis_label) {
69  histo->SetName(name.c_str());
70  histo->SetTitle(title.c_str());
71  if (axis_label == "GeV/c^2")
72  axis_label = "Mass (" + axis_label + ")";
73  if (axis_label == "GeV/c")
74  axis_label = "Momentum (" + axis_label + ")";
75  histo->GetXaxis()->SetTitle(axis_label.c_str());
76 }
78  Double_t stsratio,
79  Int_t index) //stsratio -> signal to sideband ratio
80 {
81  if (Data == nullptr || SeparationVariable == nullptr) {
82  cerr << "ERROR: Data or SeparationVariable is NULL returning now!\n";
83  return -1;
84  }
85  TH1F* SideBandHist = (TH1F*)BaseHistos[index]->Clone();
86  setHistOptions(SideBandHist,
87  (string)variable->GetName() + "Sideband",
88  (string)SideBandHist->GetTitle() + " Sideband",
89  (string)variable->getUnit());
90 
91  TH1F* SignalHist = (TH1F*)BaseHistos[index]->Clone();
92  setHistOptions(SignalHist,
93  (string)variable->GetName() + "SignalHist",
94  (string)SignalHist->GetTitle() + " Raw Signal",
95  (string)variable->getUnit());
96 
97  //Begin a loop over the data to fill our histograms. I should figure
98  //out how to do this in one shot to avoid a loop
99  //O(N_vars*N_events)...
100 
101  TIterator* iter = (TIterator*)Data->get()->createIterator();
102  RooAbsArg* var = nullptr;
103  RooRealVar* sep_var = nullptr;
104  while ((var = (RooAbsArg*)iter->Next())) {
105  if ((string)var->GetName() == (string)SeparationVariable->GetName()) {
106  sep_var = (RooRealVar*)var;
107  break;
108  }
109  }
110  for (int i = 0; i < Data->numEntries(); i++) {
111  Data->get(i);
112  Double_t value = variable->getVal();
113  Double_t cutval = sep_var->getVal();
114 
115  for (unsigned int j = 0; j < SideBandRegions.size(); j++) //UGLY! Find better solution!
116  {
117  if (cutval > SideBandRegions[j].min && cutval < SideBandRegions[j].max)
118  SideBandHist->Fill(value);
119  }
120  for (unsigned int j = 0; j < SignalRegions.size(); j++) {
121  if (cutval > SignalRegions[j].min && cutval < SignalRegions[j].max)
122  SignalHist->Fill(value);
123  }
124  }
125  //Save pre-subtracted histo
126  SignalHist->Sumw2();
127  SideBandHist->Sumw2();
128  //SignalHist->SetDirectory(0); SideBandHist->SetDirectory(0);
129  RawHistos.push_back(*SignalHist);
130 
131  SignalHist->Add(SideBandHist, -stsratio);
132 
133  SignalHist->SetTitle(((string)BaseHistos[index]->GetTitle() + " SBS Signal").c_str());
134  //Save subtracted histo
135  SBSHistos.push_back(*SignalHist);
136  //Save Sideband histo
137  SideBandHistos.push_back(*SideBandHist);
138 
139  if (SideBandHist)
140  delete SideBandHist;
141  return 0;
142 }
143 static void print_histo(TH1F* plot, string outname) {
144  TCanvas genericCanvas;
145  plot->Draw("E1P0");
146  outname = outname + ".eps";
147  genericCanvas.SaveAs(outname.c_str());
148  outname.replace(outname.size() - 3, 3, "gif");
149  genericCanvas.SaveAs(outname.c_str());
150 }
151 void SideBandSubtract::printResults(string prefix) { //handles *all* printing
152  //spool over vectors of histograms and print them, then print
153  //separation variable plots and the results text file.
154  if (SeparationVariable == nullptr) {
155  cerr << "ERROR: printResults, SeparationVariable is NULL!\n";
156  return;
157  }
158  string filename; //output file name
159  for (unsigned int i = 0; i < RawHistos.size(); ++i) {
160  filename = prefix + "Raw_" + (string)RawHistos[i].GetName();
161  print_histo(&RawHistos[i], filename);
162  }
163  for (unsigned int i = 0; i < SBSHistos.size(); ++i) {
164  filename = prefix + "SBS_" + (string)RawHistos[i].GetName();
165  print_histo(&SBSHistos[i], filename);
166  }
167  if (verbose) {
168  for (unsigned int i = 0; i < SideBandHistos.size(); ++i) {
169  filename = prefix + "SideBand_" + (string)RawHistos[i].GetName();
170  print_histo(&SideBandHistos[i], filename);
171  }
172  }
173 
174  string outname = prefix + (string)SeparationVariable->GetName() + "_fitted.eps";
175  if (Data != nullptr && ModelPDF != nullptr) {
176  RooPlot* SepVarFrame = SeparationVariable->frame();
177  Data->plotOn(SepVarFrame);
178  ModelPDF->plotOn(SepVarFrame);
179  TCanvas Canvas;
180  SepVarFrame->Draw();
181  Canvas.SaveAs(outname.c_str());
182  outname.replace(outname.size() - 3, 3, "gif");
183  Canvas.SaveAs(outname.c_str());
184  } else
185  cerr << "ERROR: printResults, Data or ModelPDF is NULL!\n";
186 
187  string result_outname = prefix + "_fit_results.txt";
188  std::ofstream output(result_outname.c_str(), std::ios::out);
189  if (!output) {
190  cout << "ERROR: Could not open file for writing!\n";
191  return;
192  }
193 
194  if (fit_result != nullptr) {
195 #if ROOT_VERSION_CODE <= ROOT_VERSION(5, 19, 0)
196  fit_result->printToStream(output, Verbose);
197 #else
198  fit_result->printMultiline(output, kTRUE);
199 #endif
200  }
201 }
202 
204  //saves the ephemeral stuff to a root file for future
205  //use/modification (ie everything printed by printResults())
206 
207  TFile output(outname.c_str(), "UPDATE"); //open the output file,
208  //create it if it doesn't
209  //exist
210  //Since keys are only available from files on disk, we need to write
211  //out a new file. If the file already existed, then we opened to
212  //update, and are writing nothing new.
213  output.Write();
214  TString dirname;
215  TIter nextkey(output.GetListOfKeys());
216  TKey* key;
217  TDirectory* curDir = nullptr;
218  while ((key = (TKey*)nextkey.Next())) {
219  if (key == nullptr)
220  break;
221  TObject* obj = key->ReadObj();
222  if (obj->IsA()->InheritsFrom("TDirectory"))
223  dirname = obj->GetName();
224  }
225 
226  if (dirname == "") {
227  //we didn't find any directories so, we'll make a new one
228  curDir = output.mkdir("run0", "Run 0");
229  output.cd("run0");
230  } else {
231  //manipulate last dir string, make a new one, and get ready to fill
232  dirname.Remove(0, 3);
233  Int_t run_num = dirname.Atoi();
234  run_num++;
235  dirname = "run" + stringify(run_num);
236  curDir = output.mkdir(dirname.Data(), ("Run " + stringify(run_num)).c_str());
237  output.cd(dirname.Data());
238  }
239  if (curDir == nullptr)
240  curDir = output.GetDirectory("", kTRUE, "");
241 
242  //these should all be the same size, but to be pedantic we'll loop
243  //over each one individually, also, we need to associate them with
244  //the directory because by default they "float" in memory to avoid
245  //conflicts with other root files the user has open. If they want to
246  //write to those files, they need to close their file, pass the name
247  //here, and then let us work.
248  for (unsigned int i = 0; i < SideBandHistos.size(); ++i) {
249  SideBandHistos[i].SetDirectory(curDir);
250  SideBandHistos[i].Write();
251  }
252  for (unsigned int i = 0; i < RawHistos.size(); ++i) {
253  RawHistos[i].SetDirectory(curDir);
254  RawHistos[i].Write();
255  }
256  for (unsigned int i = 0; i < SBSHistos.size(); ++i) {
257  SBSHistos[i].SetDirectory(curDir);
258  SBSHistos[i].Write();
259  }
260  if (Data != nullptr && ModelPDF != nullptr && BackgroundPDF != nullptr && SeparationVariable != nullptr) {
261  RooPlot* sep_varFrame = SeparationVariable->frame();
262  Data->plotOn(sep_varFrame);
263  ModelPDF->plotOn(sep_varFrame);
264  BackgroundPDF->plotOn(sep_varFrame);
265  sep_varFrame->Write();
266  } else
267  cerr << "ERROR: saveResults, did not save RooPlot of data and fit\n";
268  output.Write();
269 }
270 void SideBandSubtract::setDataSet(RooDataSet* newData) {
271  if (newData != nullptr)
272  Data = newData;
273 }
274 void SideBandSubtract::print_plot(RooRealVar* printVar, string outname) {
275  if (Data == nullptr || ModelPDF == nullptr) {
276  cerr << "ERROR: print_plot, Data or ModelPDF are NULL\n";
277  return;
278  }
279 
280  RooPlot* genericFrame = printVar->frame();
281  Data->plotOn(genericFrame);
282  ModelPDF->plotOn(genericFrame);
283  TCanvas genericCanvas;
284  genericFrame->Draw();
285  outname = outname + ".eps";
286  genericCanvas.SaveAs(outname.c_str());
287  outname.replace(outname.size() - 3, 3, "gif");
288  genericCanvas.SaveAs(outname.c_str());
289 }
291  : BackgroundPDF(nullptr),
292  ModelPDF(nullptr),
293  Data(nullptr),
294  SeparationVariable(nullptr),
295  verbose(false),
296  SignalRegions(),
297  SideBandRegions(),
298  SideBandHistos(0),
299  RawHistos(0),
300  SBSHistos(0),
301  BaseHistos(0),
302  fit_result(nullptr),
303  SignalSidebandRatio(0) {
304  // Default constructor so we can do fast subtraction
305 }
306 SideBandSubtract::SideBandSubtract(RooAbsPdf* model_shape,
307  RooAbsPdf* bkg_shape,
308  RooDataSet* data,
309  RooRealVar* sep_var,
310  const vector<TH1F*>& base,
311  bool verb)
312  : BackgroundPDF(bkg_shape),
313  ModelPDF(model_shape),
314  Data(data),
315  SeparationVariable(sep_var),
316  verbose(verb),
317  SignalRegions(),
318  SideBandRegions(),
319  SideBandHistos(),
320  RawHistos(),
321  SBSHistos(),
322  BaseHistos(base),
323  base_histo(nullptr),
324  fit_result(nullptr),
325  SignalSidebandRatio(0) {}
326 /*
327 SideBandSubtract::SideBandSubtract(RooAbsPdf *model_shape,
328  RooAbsPdf *bkg_shape,
329  RooDataSet* data,
330  RooRealVar* sep_var,
331  bool verb
332  )
333  : BackgroundPDF(bkg_shape),
334  ModelPDF(model_shape),
335  Data(data),
336  SeparationVariable(sep_var),
337  verbose(verb),
338  SignalRegions(),
339  SideBandRegions(),
340  SideBandHistos(),
341  RawHistos(),
342  SBSHistos(),
343  BaseHistos(),
344  base_histo(0),
345  fit_result(0),
346  SignalSidebandRatio(0)
347 {
348  // Build BaseHistos from dataset
349  TIterator* iter = (TIterator*) Data->get()->createIterator();
350  RooAbsArg *var=NULL;
351  RooRealVar *variable=NULL;
352  cout <<"Starting while loop\n";
353  while((var = (RooAbsArg*)iter->Next()))
354  {
355  if((string)var->ClassName() != "RooRealVar")
356  continue;
357  variable = (RooRealVar*)var;
358  //we own the data this points to so we need to delete it at the end...
359  assert(variable!=NULL);
360  string title = "base_"+(string)variable->GetName();
361  base_histo = (TH1F*)Data->createHistogram(title.c_str(), *variable, Binning(variable->getBinning("default",verb,kTRUE)) );
362  //cout <<"Made histo with name: "<<base_histo->GetName()<<endl;
363  //base_histo->SetDirectory(0);
364  BaseHistos.push_back(*base_histo);
365  cout <<"Added histo to BaseHistos!\n Deleting local copy...";
366  if(base_histo) delete base_histo;
367  cout <<"Done!\n";
368  }
369 
370 }
371 */
373  //**WARNING**
374 
375  // We don't delete objects that we don't own (duh) so, all of our
376  // pointers just hang out and get handled by other people :)
377 
378  // We DO own the BaseHistos IFF the user didn't provide us with
379  // them and we constructed them from the dataset... in this case
380  // base_histo will be non-null and we want to delete the vector
381  // of histograms...
383  /*
384  if(base_histo!=NULL)
385  {
386  BaseHistos.clear();
387  }
388  */
389 }
390 void SideBandSubtract::addSignalRegion(Double_t min, Double_t max) {
391  SbsRegion signal;
392  signal.min = min;
393  signal.max = max;
394  signal.RegionName = "Signal" + stringify(SignalRegions.size());
395  if (SeparationVariable != nullptr)
396  SeparationVariable->setRange(signal.RegionName.c_str(), signal.min, signal.max);
397  SignalRegions.push_back(signal);
398  return;
399 }
400 void SideBandSubtract::addSideBandRegion(Double_t min, Double_t max) {
401  SbsRegion sideband;
402  sideband.min = min;
403  sideband.max = max;
404  sideband.RegionName = "SideBand" + stringify(SideBandRegions.size());
405  if (SeparationVariable != nullptr)
406  SeparationVariable->setRange(sideband.RegionName.c_str(), sideband.min, sideband.max);
407  SideBandRegions.push_back(sideband);
408  return;
409 }
411  if (verbose)
412  cout << "Beginning SideBand Subtraction\n";
413 
414  if (ModelPDF != nullptr && Data != nullptr && SeparationVariable != nullptr) {
415  fit_result = ModelPDF->fitTo(*Data);
416  } else {
417  cerr << "ERROR: doGobalFit, no ModelPDF, SeparationVariable or Data specified\n";
418  return -1;
419  }
420 
421  Double_t SideBandYield = getYield(SideBandRegions, BackgroundPDF);
422  Double_t BackgroundInSignal = getYield(SignalRegions, BackgroundPDF);
423 
424  SignalSidebandRatio = BackgroundInSignal / SideBandYield;
425  if (verbose) {
426  cout << "Finished fitting background!\n";
427  cout << "Attained a Signal to Sideband ratio of: " << SignalSidebandRatio << endl;
428  }
429  //I can't see a way around a double loop for each variable. Maybe I
430  //can get a C++/RooFit guru to save me the trouble here?
431 
432  //need to grab sbs objects after each global fit, because they get reset
434  TIterator* iter = (TIterator*)Data->get()->createIterator();
435  RooAbsArg* variable;
436  while ((variable = (RooAbsArg*)iter->Next())) {
437  for (unsigned int i = 0; i < BaseHistos.size(); i++) {
438  if ((string)variable->GetName() != (string)SeparationVariable->GetName() &&
439  (string)variable->GetName() == (string)BaseHistos[i]->GetName())
441  }
442  }
443 
444  // clean up our memory...
445  if (variable)
446  delete variable;
447  if (iter)
448  delete iter;
449  return 0;
450 }
451 void SideBandSubtract::doFastSubtraction(TH1F& Total, TH1F& Result, SbsRegion& leftRegion, SbsRegion& rightRegion) {
452  Int_t binMin = Total.FindBin(leftRegion.max, 0.0, 0.0);
453  Int_t binMax = Total.FindBin(leftRegion.min, 0.0, 0.0);
454  double numLeft = Total.Integral(binMin, binMax);
455 
456  binMin = Total.FindBin(rightRegion.max, 0.0, 0.0);
457  binMax = Total.FindBin(rightRegion.min, 0.0, 0.0);
458  double numRight = Total.Integral(binMin, binMax);
459 
460  const unsigned int nbinsx = Total.GetNbinsX();
461  const double x1 = (leftRegion.max + leftRegion.min) / 2.0;
462  const double x2 = (rightRegion.max + rightRegion.min) / 2.0;
463 
464  const double y1 = numLeft / (leftRegion.max - leftRegion.min);
465  const double y2 = numRight / (rightRegion.max - rightRegion.min);
466 
467  const double Slope = (y2 - y1) / (x2 - x1);
468  const double Intercept = y1 - Slope * x1;
469  // Evantually we want to handle more complicated functions, but for
470  // now... just use a linear one
471  TF1 function("sbs_function_line", "[0]*x + [1]", Total.GetMinimum(), Total.GetMaximum());
472  for (unsigned int binx = 1; binx <= nbinsx; ++binx) {
473  double binWidth = Total.GetBinWidth(binx);
474  function.SetParameter(0, Slope * binWidth);
475  function.SetParameter(1, Intercept * binWidth);
476 
477  double xx = Total.GetBinCenter(binx);
478  double cu = Total.GetBinContent(binx) - function.Eval(xx);
479  // TODO: Propogate the error on the parameters in function.
480  double error1 = Total.GetBinError(binx);
481 
482  Result.SetBinContent(binx, cu);
483  Result.SetBinError(binx, error1);
484  }
485  Result.SetEntries(Result.Integral());
486 }
487 RooFitResult* SideBandSubtract::getFitResult() { return fit_result; }
488 vector<TH1F*> SideBandSubtract::getBaseHistos() { return BaseHistos; }
489 vector<TH1F> SideBandSubtract::getRawHistos() { return RawHistos; }
490 vector<TH1F> SideBandSubtract::getSBSHistos() { return SBSHistos; }
493  //cout <<"Starting to reset products \n";
494 
495  if (!SideBandHistos.empty())
496  SideBandHistos.clear();
497 
498  if (!RawHistos.empty())
499  RawHistos.clear();
500  if (!SBSHistos.empty())
501  SBSHistos.clear();
502 }
RooAbsPdf * BackgroundPDF
Double_t getYield(const std::vector< SbsRegion > &Regions, RooAbsPdf *PDF)
RooFitResult * getFitResult()
std::vector< TH1F > SideBandHistos
std::vector< TH1F > getSBSHistos()
PixelRecoRange< float > Range
bool verbose
base
Main Program
Definition: newFWLiteAna.py:92
void saveResults(std::string outname)
RooFitResult * fit_result
std::vector< TH1F > getRawHistos()
static void print_histo(TH1F *plot, string outname)
std::vector< TH1F * > getBaseHistos()
int doSubtraction(RooRealVar *variable, Double_t stsratio, Int_t index)
std::string stringify(const T &t)
static void setHistOptions(TH1F *histo, string name, string title, string axis_label)
std::vector< TH1F > SBSHistos
RooRealVar * SeparationVariable
std::string RegionName
std::vector< SbsRegion > SideBandRegions
Definition: value.py:1
void printResults(std::string prefix="")
void addSignalRegion(Double_t min, Double_t max)
std::vector< SbsRegion > SignalRegions
const float binMin[32]
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:80
void addSideBandRegion(Double_t min, Double_t max)
void print_plot(RooRealVar *printVar, std::string outname)
void doFastSubtraction(TH1F &Total, TH1F &Result, SbsRegion &leftRegion, SbsRegion &rightRegion)
void setDataSet(RooDataSet *newData)
long double T
std::vector< TH1F * > BaseHistos
std::vector< TH1F > RawHistos