CMS 3D CMS Logo

SideBandSubtraction.cc
Go to the documentation of this file.
1 // Author: David Bjergaard
3 //
4 // Library for Generic Sideband Subtraction Methods.
5 //
6 // This library is designed to be a tool which serves two purposes.
7 // Its primary purpose is to provide a generic framework for doing
8 // sideband subtraction. It will also plug into current tag and probe
9 // modules to prevent code duplication and redundancy.
10 //
12 
14 // System includes
15 #include <iostream>
16 #include <ios>
17 #include <fstream>
18 #include <cstdlib>
19 #include <sstream>
20 
21 // ROOT includes
22 #include <TCanvas.h>
23 #include <TFile.h>
24 #include <TF1.h>
25 #include <TH1F.h>
26 #include <TString.h>
27 #include <TKey.h>
28 #include <TClass.h>
29 
30 // RooFit includes
31 #include <RooFitResult.h>
32 #include <RooRealVar.h>
33 #include <RooAbsPdf.h>
34 #include <RooDataSet.h>
35 #include <RooPlot.h>
36 
37 using namespace RooFit;
38 using std::cerr;
39 using std::cout;
40 using std::endl;
41 using std::string;
42 using std::vector;
43 
44 template <class T>
45 inline std::string stringify(const T& t) {
46  std::ostringstream o;
47  if (!(o << t))
48  return "err";
49  return o.str();
50 }
51 Double_t SideBandSubtract::getYield(const std::vector<SbsRegion>& Regions, RooAbsPdf* PDF) {
52  if (PDF == nullptr || SeparationVariable == nullptr)
53  return 0.0;
54 
55  Double_t yield = 0;
56  RooAbsReal* intPDF;
57  for (unsigned int i = 0; i < Regions.size(); i++) {
58  if (verbose)
59  cout << "Integrating over Range: " << Regions[i].RegionName << " from " << Regions[i].min << " to "
60  << Regions[i].max << endl;
61  intPDF = PDF->createIntegral(*SeparationVariable, Range(Regions[i].RegionName.c_str()));
62  yield += intPDF->getVal();
63  if (verbose)
64  cout << "Current yield: " << yield << endl;
65  }
66  return yield;
67 }
68 static void setHistOptions(TH1F* histo, string name, string title, string axis_label) {
69  histo->SetName(name.c_str());
70  histo->SetTitle(title.c_str());
71  if (axis_label == "GeV/c^2")
72  axis_label = "Mass (" + axis_label + ")";
73  if (axis_label == "GeV/c")
74  axis_label = "Momentum (" + axis_label + ")";
75  histo->GetXaxis()->SetTitle(axis_label.c_str());
76 }
78  Double_t stsratio,
79  Int_t index) //stsratio -> signal to sideband ratio
80 {
81  if (Data == nullptr || SeparationVariable == nullptr) {
82  cerr << "ERROR: Data or SeparationVariable is NULL returning now!\n";
83  return -1;
84  }
85  TH1F* SideBandHist = (TH1F*)BaseHistos[index]->Clone();
86  setHistOptions(SideBandHist,
87  (string)variable->GetName() + "Sideband",
88  (string)SideBandHist->GetTitle() + " Sideband",
89  (string)variable->getUnit());
90 
91  TH1F* SignalHist = (TH1F*)BaseHistos[index]->Clone();
92  setHistOptions(SignalHist,
93  (string)variable->GetName() + "SignalHist",
94  (string)SignalHist->GetTitle() + " Raw Signal",
95  (string)variable->getUnit());
96 
97  //Begin a loop over the data to fill our histograms. I should figure
98  //out how to do this in one shot to avoid a loop
99  //O(N_vars*N_events)...
100 
101  RooRealVar* sep_var = nullptr;
102  for (const auto& var : *Data->get()) {
103  if ((string)var->GetName() == (string)SeparationVariable->GetName()) {
104  sep_var = (RooRealVar*)var;
105  break;
106  }
107  }
108 
109  for (int i = 0; i < Data->numEntries(); i++) {
110  Data->get(i);
111  Double_t value = variable->getVal();
112  Double_t cutval = sep_var->getVal();
113 
114  for (unsigned int j = 0; j < SideBandRegions.size(); j++) //UGLY! Find better solution!
115  {
116  if (cutval > SideBandRegions[j].min && cutval < SideBandRegions[j].max)
117  SideBandHist->Fill(value);
118  }
119  for (unsigned int j = 0; j < SignalRegions.size(); j++) {
120  if (cutval > SignalRegions[j].min && cutval < SignalRegions[j].max)
121  SignalHist->Fill(value);
122  }
123  }
124  //Save pre-subtracted histo
125  SignalHist->Sumw2();
126  SideBandHist->Sumw2();
127  //SignalHist->SetDirectory(0); SideBandHist->SetDirectory(0);
128  RawHistos.push_back(*SignalHist);
129 
130  SignalHist->Add(SideBandHist, -stsratio);
131 
132  SignalHist->SetTitle(((string)BaseHistos[index]->GetTitle() + " SBS Signal").c_str());
133  //Save subtracted histo
134  SBSHistos.push_back(*SignalHist);
135  //Save Sideband histo
136  SideBandHistos.push_back(*SideBandHist);
137 
138  if (SideBandHist)
139  delete SideBandHist;
140  return 0;
141 }
142 static void print_histo(TH1F* plot, string outname) {
143  TCanvas genericCanvas;
144  plot->Draw("E1P0");
145  outname = outname + ".eps";
146  genericCanvas.SaveAs(outname.c_str());
147  outname.replace(outname.size() - 3, 3, "gif");
148  genericCanvas.SaveAs(outname.c_str());
149 }
150 void SideBandSubtract::printResults(string prefix) { //handles *all* printing
151  //spool over vectors of histograms and print them, then print
152  //separation variable plots and the results text file.
153  if (SeparationVariable == nullptr) {
154  cerr << "ERROR: printResults, SeparationVariable is NULL!\n";
155  return;
156  }
157  string filename; //output file name
158  for (unsigned int i = 0; i < RawHistos.size(); ++i) {
159  filename = prefix + "Raw_" + (string)RawHistos[i].GetName();
160  print_histo(&RawHistos[i], filename);
161  }
162  for (unsigned int i = 0; i < SBSHistos.size(); ++i) {
163  filename = prefix + "SBS_" + (string)RawHistos[i].GetName();
164  print_histo(&SBSHistos[i], filename);
165  }
166  if (verbose) {
167  for (unsigned int i = 0; i < SideBandHistos.size(); ++i) {
168  filename = prefix + "SideBand_" + (string)RawHistos[i].GetName();
169  print_histo(&SideBandHistos[i], filename);
170  }
171  }
172 
173  string outname = prefix + (string)SeparationVariable->GetName() + "_fitted.eps";
174  if (Data != nullptr && ModelPDF != nullptr) {
175  RooPlot* SepVarFrame = SeparationVariable->frame();
176  Data->plotOn(SepVarFrame);
177  ModelPDF->plotOn(SepVarFrame);
178  TCanvas Canvas;
179  SepVarFrame->Draw();
180  Canvas.SaveAs(outname.c_str());
181  outname.replace(outname.size() - 3, 3, "gif");
182  Canvas.SaveAs(outname.c_str());
183  } else
184  cerr << "ERROR: printResults, Data or ModelPDF is NULL!\n";
185 
186  string result_outname = prefix + "_fit_results.txt";
187  std::ofstream output(result_outname.c_str(), std::ios::out);
188  if (!output) {
189  cout << "ERROR: Could not open file for writing!\n";
190  return;
191  }
192 
193  if (fit_result != nullptr) {
194 #if ROOT_VERSION_CODE <= ROOT_VERSION(5, 19, 0)
195  fit_result->printToStream(output, Verbose);
196 #else
197  fit_result->printMultiline(output, kTRUE);
198 #endif
199  }
200 }
201 
203  //saves the ephemeral stuff to a root file for future
204  //use/modification (ie everything printed by printResults())
205 
206  TFile output(outname.c_str(), "UPDATE"); //open the output file,
207  //create it if it doesn't
208  //exist
209  //Since keys are only available from files on disk, we need to write
210  //out a new file. If the file already existed, then we opened to
211  //update, and are writing nothing new.
212  output.Write();
213  TString dirname;
214  TIter nextkey(output.GetListOfKeys());
215  TKey* key;
216  TDirectory* curDir = nullptr;
217  while ((key = (TKey*)nextkey.Next())) {
218  if (key == nullptr)
219  break;
220  TObject* obj = key->ReadObj();
221  if (obj->IsA()->InheritsFrom("TDirectory"))
222  dirname = obj->GetName();
223  }
224 
225  if (dirname == "") {
226  //we didn't find any directories so, we'll make a new one
227  curDir = output.mkdir("run0", "Run 0");
228  output.cd("run0");
229  } else {
230  //manipulate last dir string, make a new one, and get ready to fill
231  dirname.Remove(0, 3);
232  Int_t run_num = dirname.Atoi();
233  run_num++;
234  dirname = "run" + stringify(run_num);
235  curDir = output.mkdir(dirname.Data(), ("Run " + stringify(run_num)).c_str());
236  output.cd(dirname.Data());
237  }
238  if (curDir == nullptr)
239  curDir = output.GetDirectory("", kTRUE, "");
240 
241  //these should all be the same size, but to be pedantic we'll loop
242  //over each one individually, also, we need to associate them with
243  //the directory because by default they "float" in memory to avoid
244  //conflicts with other root files the user has open. If they want to
245  //write to those files, they need to close their file, pass the name
246  //here, and then let us work.
247  for (unsigned int i = 0; i < SideBandHistos.size(); ++i) {
248  SideBandHistos[i].SetDirectory(curDir);
249  SideBandHistos[i].Write();
250  }
251  for (unsigned int i = 0; i < RawHistos.size(); ++i) {
252  RawHistos[i].SetDirectory(curDir);
253  RawHistos[i].Write();
254  }
255  for (unsigned int i = 0; i < SBSHistos.size(); ++i) {
256  SBSHistos[i].SetDirectory(curDir);
257  SBSHistos[i].Write();
258  }
259  if (Data != nullptr && ModelPDF != nullptr && BackgroundPDF != nullptr && SeparationVariable != nullptr) {
260  RooPlot* sep_varFrame = SeparationVariable->frame();
261  Data->plotOn(sep_varFrame);
262  ModelPDF->plotOn(sep_varFrame);
263  BackgroundPDF->plotOn(sep_varFrame);
264  sep_varFrame->Write();
265  } else
266  cerr << "ERROR: saveResults, did not save RooPlot of data and fit\n";
267  output.Write();
268 }
269 void SideBandSubtract::setDataSet(RooDataSet* newData) {
270  if (newData != nullptr)
271  Data = newData;
272 }
273 void SideBandSubtract::print_plot(RooRealVar* printVar, string outname) {
274  if (Data == nullptr || ModelPDF == nullptr) {
275  cerr << "ERROR: print_plot, Data or ModelPDF are NULL\n";
276  return;
277  }
278 
279  RooPlot* genericFrame = printVar->frame();
280  Data->plotOn(genericFrame);
281  ModelPDF->plotOn(genericFrame);
282  TCanvas genericCanvas;
283  genericFrame->Draw();
284  outname = outname + ".eps";
285  genericCanvas.SaveAs(outname.c_str());
286  outname.replace(outname.size() - 3, 3, "gif");
287  genericCanvas.SaveAs(outname.c_str());
288 }
290  : BackgroundPDF(nullptr),
291  ModelPDF(nullptr),
292  Data(nullptr),
293  SeparationVariable(nullptr),
294  verbose(false),
295  SignalRegions(),
296  SideBandRegions(),
297  SideBandHistos(0),
298  RawHistos(0),
299  SBSHistos(0),
300  BaseHistos(0),
301  fit_result(nullptr),
302  SignalSidebandRatio(0) {
303  // Default constructor so we can do fast subtraction
304 }
305 SideBandSubtract::SideBandSubtract(RooAbsPdf* model_shape,
306  RooAbsPdf* bkg_shape,
307  RooDataSet* data,
308  RooRealVar* sep_var,
309  const vector<TH1F*>& base,
310  bool verb)
311  : BackgroundPDF(bkg_shape),
312  ModelPDF(model_shape),
313  Data(data),
314  SeparationVariable(sep_var),
315  verbose(verb),
316  SignalRegions(),
317  SideBandRegions(),
318  SideBandHistos(),
319  RawHistos(),
320  SBSHistos(),
321  BaseHistos(base),
322  base_histo(nullptr),
323  fit_result(nullptr),
324  SignalSidebandRatio(0) {}
325 /*
326 SideBandSubtract::SideBandSubtract(RooAbsPdf *model_shape,
327  RooAbsPdf *bkg_shape,
328  RooDataSet* data,
329  RooRealVar* sep_var,
330  bool verb
331  )
332  : BackgroundPDF(bkg_shape),
333  ModelPDF(model_shape),
334  Data(data),
335  SeparationVariable(sep_var),
336  verbose(verb),
337  SignalRegions(),
338  SideBandRegions(),
339  SideBandHistos(),
340  RawHistos(),
341  SBSHistos(),
342  BaseHistos(),
343  base_histo(0),
344  fit_result(0),
345  SignalSidebandRatio(0)
346 {
347  // Build BaseHistos from dataset
348  TIterator* iter = (TIterator*) Data->get()->createIterator();
349  RooAbsArg *var=NULL;
350  RooRealVar *variable=NULL;
351  cout <<"Starting while loop\n";
352  while((var = (RooAbsArg*)iter->Next()))
353  {
354  if((string)var->ClassName() != "RooRealVar")
355  continue;
356  variable = (RooRealVar*)var;
357  //we own the data this points to so we need to delete it at the end...
358  assert(variable!=NULL);
359  string title = "base_"+(string)variable->GetName();
360  base_histo = (TH1F*)Data->createHistogram(title.c_str(), *variable, Binning(variable->getBinning("default",verb,kTRUE)) );
361  //cout <<"Made histo with name: "<<base_histo->GetName()<<endl;
362  //base_histo->SetDirectory(0);
363  BaseHistos.push_back(*base_histo);
364  cout <<"Added histo to BaseHistos!\n Deleting local copy...";
365  if(base_histo) delete base_histo;
366  cout <<"Done!\n";
367  }
368 
369 }
370 */
372  //**WARNING**
373 
374  // We don't delete objects that we don't own (duh) so, all of our
375  // pointers just hang out and get handled by other people :)
376 
377  // We DO own the BaseHistos IFF the user didn't provide us with
378  // them and we constructed them from the dataset... in this case
379  // base_histo will be non-null and we want to delete the vector
380  // of histograms...
382  /*
383  if(base_histo!=NULL)
384  {
385  BaseHistos.clear();
386  }
387  */
388 }
389 void SideBandSubtract::addSignalRegion(Double_t min, Double_t max) {
390  SbsRegion signal;
391  signal.min = min;
392  signal.max = max;
393  signal.RegionName = "Signal" + stringify(SignalRegions.size());
394  if (SeparationVariable != nullptr)
395  SeparationVariable->setRange(signal.RegionName.c_str(), signal.min, signal.max);
396  SignalRegions.push_back(signal);
397  return;
398 }
399 void SideBandSubtract::addSideBandRegion(Double_t min, Double_t max) {
400  SbsRegion sideband;
401  sideband.min = min;
402  sideband.max = max;
403  sideband.RegionName = "SideBand" + stringify(SideBandRegions.size());
404  if (SeparationVariable != nullptr)
405  SeparationVariable->setRange(sideband.RegionName.c_str(), sideband.min, sideband.max);
406  SideBandRegions.push_back(sideband);
407  return;
408 }
410  if (verbose)
411  cout << "Beginning SideBand Subtraction\n";
412 
413  if (ModelPDF != nullptr && Data != nullptr && SeparationVariable != nullptr) {
414  fit_result = ModelPDF->fitTo(*Data);
415  } else {
416  cerr << "ERROR: doGobalFit, no ModelPDF, SeparationVariable or Data specified\n";
417  return -1;
418  }
419 
420  Double_t SideBandYield = getYield(SideBandRegions, BackgroundPDF);
421  Double_t BackgroundInSignal = getYield(SignalRegions, BackgroundPDF);
422 
423  SignalSidebandRatio = BackgroundInSignal / SideBandYield;
424  if (verbose) {
425  cout << "Finished fitting background!\n";
426  cout << "Attained a Signal to Sideband ratio of: " << SignalSidebandRatio << endl;
427  }
428  //I can't see a way around a double loop for each variable. Maybe I
429  //can get a C++/RooFit guru to save me the trouble here?
430 
431  //need to grab sbs objects after each global fit, because they get reset
433  for (const auto& variable : *Data->get()) {
434  for (unsigned int i = 0; i < BaseHistos.size(); i++) {
435  if ((string)variable->GetName() != (string)SeparationVariable->GetName() &&
436  (string)variable->GetName() == (string)BaseHistos[i]->GetName())
438  }
439  }
440 
441  return 0;
442 }
443 void SideBandSubtract::doFastSubtraction(TH1F& Total, TH1F& Result, SbsRegion& leftRegion, SbsRegion& rightRegion) {
444  Int_t binMin = Total.FindBin(leftRegion.max, 0.0, 0.0);
445  Int_t binMax = Total.FindBin(leftRegion.min, 0.0, 0.0);
446  double numLeft = Total.Integral(binMin, binMax);
447 
448  binMin = Total.FindBin(rightRegion.max, 0.0, 0.0);
449  binMax = Total.FindBin(rightRegion.min, 0.0, 0.0);
450  double numRight = Total.Integral(binMin, binMax);
451 
452  const unsigned int nbinsx = Total.GetNbinsX();
453  const double x1 = (leftRegion.max + leftRegion.min) / 2.0;
454  const double x2 = (rightRegion.max + rightRegion.min) / 2.0;
455 
456  const double y1 = numLeft / (leftRegion.max - leftRegion.min);
457  const double y2 = numRight / (rightRegion.max - rightRegion.min);
458 
459  const double Slope = (y2 - y1) / (x2 - x1);
460  const double Intercept = y1 - Slope * x1;
461  // Evantually we want to handle more complicated functions, but for
462  // now... just use a linear one
463  TF1 function("sbs_function_line", "[0]*x + [1]", Total.GetMinimum(), Total.GetMaximum());
464  for (unsigned int binx = 1; binx <= nbinsx; ++binx) {
465  double binWidth = Total.GetBinWidth(binx);
466  function.SetParameter(0, Slope * binWidth);
467  function.SetParameter(1, Intercept * binWidth);
468 
469  double xx = Total.GetBinCenter(binx);
470  double cu = Total.GetBinContent(binx) - function.Eval(xx);
471  // TODO: Propogate the error on the parameters in function.
472  double error1 = Total.GetBinError(binx);
473 
474  Result.SetBinContent(binx, cu);
475  Result.SetBinError(binx, error1);
476  }
477  Result.SetEntries(Result.Integral());
478 }
479 RooFitResult* SideBandSubtract::getFitResult() { return fit_result; }
480 vector<TH1F*> SideBandSubtract::getBaseHistos() { return BaseHistos; }
481 vector<TH1F> SideBandSubtract::getRawHistos() { return RawHistos; }
482 vector<TH1F> SideBandSubtract::getSBSHistos() { return SBSHistos; }
485  //cout <<"Starting to reset products \n";
486 
487  if (!SideBandHistos.empty())
488  SideBandHistos.clear();
489 
490  if (!RawHistos.empty())
491  RawHistos.clear();
492  if (!SBSHistos.empty())
493  SBSHistos.clear();
494 }
RooAbsPdf * BackgroundPDF
Double_t getYield(const std::vector< SbsRegion > &Regions, RooAbsPdf *PDF)
RooFitResult * getFitResult()
std::vector< TH1F > SideBandHistos
std::vector< TH1F > getSBSHistos()
PixelRecoRange< float > Range
bool verbose
base
Main Program
Definition: newFWLiteAna.py:92
void saveResults(std::string outname)
RooFitResult * fit_result
std::vector< TH1F > getRawHistos()
static void print_histo(TH1F *plot, string outname)
std::vector< TH1F * > getBaseHistos()
int doSubtraction(RooRealVar *variable, Double_t stsratio, Int_t index)
std::string stringify(const T &t)
static void setHistOptions(TH1F *histo, string name, string title, string axis_label)
std::vector< TH1F > SBSHistos
RooRealVar * SeparationVariable
std::string RegionName
std::vector< SbsRegion > SideBandRegions
key
prepare the HTCondor submission files and eventually submit them
Definition: value.py:1
void printResults(std::string prefix="")
void addSignalRegion(Double_t min, Double_t max)
std::vector< SbsRegion > SignalRegions
const float binMin[32]
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:80
void addSideBandRegion(Double_t min, Double_t max)
Definition: output.py:1
void print_plot(RooRealVar *printVar, std::string outname)
void doFastSubtraction(TH1F &Total, TH1F &Result, SbsRegion &leftRegion, SbsRegion &rightRegion)
void setDataSet(RooDataSet *newData)
long double T
std::vector< TH1F * > BaseHistos
std::vector< TH1F > RawHistos