CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
TagProbeFitter.cc
Go to the documentation of this file.
2 #include <stdexcept>
3 //#include "TagProbeFitter.h"
4 
5 #include "TROOT.h"
6 #include "TFile.h"
7 #include "TPad.h"
8 #include "TText.h"
9 #include "TCanvas.h"
10 #include "TH2F.h"
11 #include "TStyle.h"
12 #include "RooWorkspace.h"
13 #include "RooDataSet.h"
14 #include "RooDataHist.h"
15 #include "RooRealVar.h"
16 #include "RooFormulaVar.h"
17 #include "RooAddPdf.h"
18 #include "RooGlobalFunc.h"
19 #include "RooCategory.h"
20 #include "RooSimultaneous.h"
21 #include "RooPlot.h"
22 #include "RooFitResult.h"
23 #include "RooBinning.h"
24 #include "RooBinningCategory.h"
25 #include "RooMultiCategory.h"
26 #include "RooMappedCategory.h"
27 #include "RooThresholdCategory.h"
28 #include "Roo1DTable.h"
29 #include "RooMinuit.h"
30 #include "RooNLLVar.h"
31 #include "RooAbsDataStore.h"
32 #include "RooEfficiency.h"
33 #include "RooGaussian.h"
34 #include "RooChebychev.h"
35 #include "RooProdPdf.h"
36 #include "RooGenericPdf.h"
37 #include "RooExtendPdf.h"
38 #include "RooTrace.h"
39 #include "RooMsgService.h"
40 #include "Math/QuantFuncMathCore.h"
41 
42 using namespace RooFit;
43 
44 TagProbeFitter::TagProbeFitter(vector<string> inputFileNames, string inputDirectoryName, string inputTreeName, string outputFileName, int numCPU_, bool saveWorkspace_, bool floatShapeParameters_, std::vector<std::string> fixVars_){
45  inputTree = new TChain((inputDirectoryName+"/"+inputTreeName).c_str());
46  for(size_t i=0; i<inputFileNames.size(); i++){
47  inputTree->Add(inputFileNames[i].c_str());
48  }
49  outputFile = new TFile(outputFileName.c_str(),"recreate");
50  outputDirectory = outputFile->mkdir(inputDirectoryName.c_str());
51  numCPU = numCPU_;
52  saveWorkspace = saveWorkspace_;
53  massBins = 0; // automatic default
54  floatShapeParameters = floatShapeParameters_;
55  fixVars = fixVars_;
56  weightVar = "";
57  if(!floatShapeParameters && fixVars.empty()) std::cout << "TagProbeFitter: " << "You wnat to fix some variables but do not specify them!";
58 
59  gROOT->SetStyle("Plain");
60  gStyle->SetTitleFillColor(0);
61  gStyle->SetPalette(1);
62  gStyle->SetOptStat(0);
63  gStyle->SetPaintTextFormat(".2f");
64 
65  quiet = false;
66 
67  binnedFit = false;
68 }
69 
71  if(inputTree)
72  delete inputTree;
73  if(outputFile)
74  outputFile->Close();
75 }
76 
77 void TagProbeFitter::setQuiet(bool quiet_) {
78  quiet = quiet_;
79  if (quiet) {
80  RooMsgService::instance().setGlobalKillBelow(RooFit::ERROR);
81  } else {
82  RooMsgService::instance().setGlobalKillBelow(RooFit::WARNING);
83  }
84 }
85 bool TagProbeFitter::addVariable(string name, string title, double low, double hi, string units){
86  variables.addClone(RooRealVar(name.c_str(), title.c_str(), low, hi, units.c_str()));
87  return true;
88 }
89 
90 bool TagProbeFitter::addCategory(string name, string title, string expression){
91  RooCategory* c = (RooCategory*) parameterParser.factory(expression.c_str());
92  if(!c)
93  return false;
94  //set the name of the category to the passed name instead of the one in the expression
95  c->SetName(name.c_str());
96  c->SetTitle(title.c_str());
97  variables.addClone(*c);
98  return true;
99 }
100 
101 bool TagProbeFitter::addExpression(string expressionName, string title, string expression, vector<string> arguments) {
102  expressionVars.push_back(make_pair(make_pair(expressionName,title), make_pair(expression,arguments)));
103  return true;
104 }
105 
106 
107 bool TagProbeFitter::addThresholdCategory(string categoryName, string title, string varName, double cutValue){
108  thresholdCategories.push_back(make_pair(make_pair(categoryName,title), make_pair(varName,cutValue)));
109  return true;
110 }
111 
112 
113 void TagProbeFitter::addPdf(string name, vector<string>& pdfCommands){
114  pdfs[name] = pdfCommands;
115 }
116 
118  massBins = bins;
119 }
120 
121 void TagProbeFitter::setWeightVar(const std::string &var) {
122  weightVar = var;
123 }
124 
125 string TagProbeFitter::calculateEfficiency(string dirName, vector<string> effCats, vector<string> effStates, vector<string>& unbinnedVariables, map<string, vector<double> >& binnedReals, map<string, std::vector<string> >& binnedCategories, vector<string>& binToPDFmap){
126  //go to home directory
127  outputDirectory->cd();
128  //make a directory corresponding to this efficiency binning
129  gDirectory->mkdir(dirName.c_str())->cd();
130 
131  RooArgSet dataVars;
132 
133  //collect unbinned variables
134  for(vector<string>::iterator v=unbinnedVariables.begin(); v!=unbinnedVariables.end(); v++){
135  dataVars.addClone(variables[v->c_str()], true);
136  if (binnedFit && (v == unbinnedVariables.begin())) {
137  ((RooRealVar&)dataVars[v->c_str()]).setBins(massBins);
138  }
139  }
140  //collect the binned variables and the corresponding bin categories
141  RooArgSet binnedVariables;
142  RooArgSet binCategories;
143  for(map<string, vector<double> >::iterator v=binnedReals.begin(); v!=binnedReals.end(); v++){
144  TString name = v->first;
145  if (variables.find(name) == 0) { cerr << "Binned variable '"<<name<<"' not found." << endl; return "Error"; }
146  binnedVariables.addClone(variables[name]);
147  ((RooRealVar&)binnedVariables[name]).setBinning( RooBinning(v->second.size()-1, &v->second[0]) );
148  binCategories.addClone( RooBinningCategory(name+"_bins", name+"_bins", (RooRealVar&)binnedVariables[name]) );
149  }
150  dataVars.addClone(binnedVariables, true);
151 
152  //collect the category variables and the corresponding mapped categories
153  RooArgSet categories;
154  RooArgSet mappedCategories;
155  for(map<string, vector<string> >::iterator v=binnedCategories.begin(); v!=binnedCategories.end(); v++){
156  TString name = v->first;
157  if (variables.find(name) == 0) { cerr << "Binned category '"<<name<<"' not found." << endl; return "Error"; }
158  categories.addClone(variables[name]);
159  mappedCategories.addClone(RooMappedCategory(name+"_bins", name+"_bins", (RooCategory&)categories[name]));
160  for(unsigned int i = 0; i<v->second.size(); i++){
161  ((RooMappedCategory&)mappedCategories[name+"_bins"]).map(v->second[i].c_str(), name+"_"+TString(v->second[i].c_str()).ReplaceAll(",","_"));
162  }
163  }
164  dataVars.addClone(categories, true);
165 
166  // add the efficiency category if it's not a dynamic one
167  for (vector<string>::const_iterator effCat = effCats.begin(); effCat != effCats.end(); ++effCat) {
168  if (variables.find(effCat->c_str()) != 0) {
169  dataVars.addClone(variables[effCat->c_str()], true);
170  }
171  }
172 
173  // add all variables used in expressions
174  for(vector<pair<pair<string,string>, pair<string, vector<string> > > >::const_iterator ev = expressionVars.begin(), eve = expressionVars.end(); ev != eve; ++ev){
175  for (vector<string>::const_iterator it = ev->second.second.begin(), ed = ev->second.second.end(); it != ed; ++it) {
176  // provided that they are real variables themselves
177  if (variables.find(it->c_str())) dataVars.addClone(variables[it->c_str()], true);
178  }
179  }
180  // add all real variables used in cuts
181  for(vector<pair<pair<string,string>, pair<string, double> > >::const_iterator tc = thresholdCategories.begin(), tce = thresholdCategories.end(); tc != tce; ++tc){
182  if (variables.find(tc->second.first.c_str())) dataVars.addClone(variables[tc->second.first.c_str()], true);
183  }
184 
185 
186  //now add the necessary mass and passing variables to make the unbinned RooDataSet
187  RooDataSet data("data", "data", inputTree,
188  dataVars,
189  /*selExpr=*/"", /*wgtVarName=*/(weightVar.empty() ? 0 : weightVar.c_str()));
190 
191  // Now add all expressions that are computed dynamically
192  for(vector<pair<pair<string,string>, pair<string, vector<string> > > >::const_iterator ev = expressionVars.begin(), eve = expressionVars.end(); ev != eve; ++ev){
193  RooArgList args;
194  for (vector<string>::const_iterator it = ev->second.second.begin(), ed = ev->second.second.end(); it != ed; ++it) {
195  args.add(dataVars[it->c_str()]);
196  }
197  RooFormulaVar expr(ev->first.first.c_str(), ev->first.second.c_str(), ev->second.first.c_str(), args);
198  RooRealVar *col = (RooRealVar *) data.addColumn(expr);
199  dataVars.addClone(*col);
200  }
201 
202  // And add all dynamic categories from thresholds
203  for(vector<pair<pair<string,string>, pair<string, double> > >::const_iterator tc = thresholdCategories.begin(), tce = thresholdCategories.end(); tc != tce; ++tc){
204  RooThresholdCategory tmp(tc->first.first.c_str(), tc->first.second.c_str(), (RooAbsReal &)dataVars[tc->second.first.c_str()], "above", 1);
205  tmp.addThreshold(tc->second.second, "below",0);
206  RooCategory *cat = (RooCategory *) data.addColumn(tmp);
207  dataVars.addClone(*cat);
208  }
209 
210 
211  //merge the bin categories to a MultiCategory for convenience
212  RooMultiCategory allCats("allCats", "allCats", RooArgSet(binCategories, mappedCategories));
213  data.addColumn(allCats);
214  string effName;
215  //setup the efficiency category
216  if (effCats.size() == 1) {
217  effName = effCats.front() + "::" + effStates.front();
218  RooMappedCategory efficiencyCategory("_efficiencyCategory_", "_efficiencyCategory_", (RooCategory&)dataVars[effCats.front().c_str()], "Failed");
219  efficiencyCategory.map(effStates.front().c_str(), "Passed");
220  data.addColumn( efficiencyCategory );
221  } else {
222  RooArgSet rooEffCats;
223  string multiState = "{";
224  for (size_t i = 0; i < effCats.size(); ++i) {
225  if (i) { multiState += ";"; effName += " && "; }
226  rooEffCats.add((RooCategory &) dataVars[effCats[i].c_str()]);
227  multiState += effStates[i];
228  effName = effCats[i] + "::" + effStates[i];
229  }
230  multiState += "}";
231  RooMultiCategory efficiencyMultiCategory("_efficiencyMultiCategory_", "_efficiencyMultiCategory_", rooEffCats);
232  RooMappedCategory efficiencyCategory("_efficiencyCategory_", "_efficiencyCategory_", efficiencyMultiCategory, "Failed");
233  efficiencyCategory.map(multiState.c_str(), "Passed");
234  data.addColumn( efficiencyCategory );
235  }
236  //setup the pdf category
237  RooMappedCategory pdfCategory("_pdfCategory_", "_pdfCategory_", allCats, (binToPDFmap.size()>0)?binToPDFmap[0].c_str():"");
238  for(unsigned int i = 1; i<binToPDFmap.size(); i+=2){
239  pdfCategory.map(binToPDFmap[i].c_str(), binToPDFmap[i+1].c_str());
240  }
241  data.addColumn( pdfCategory );
242 
243  //create the empty efficiency datasets from the binned variables
244  RooRealVar efficiency("efficiency", "Efficiency", 0, 1);
245 
246  RooDataSet fitEfficiency("fit_eff", "Efficiency from unbinned ML fit", RooArgSet(RooArgSet(binnedVariables, categories), efficiency), StoreAsymError(RooArgSet(binnedVariables, efficiency)));
247 // RooDataSet sbsEfficiency("sbs_eff", "Efficiency from side band substraction", RooArgSet(RooArgSet(binnedVariables, categories), efficiency), StoreAsymError(RooArgSet(binnedVariables, efficiency)));
248  RooDataSet cntEfficiency("cnt_eff", "Efficiency from counting", RooArgSet(RooArgSet(binnedVariables, categories), efficiency), StoreAsymError(RooArgSet(binnedVariables, efficiency)));
249 
250 
251  if(!floatShapeParameters){
252  //fitting whole dataset to get initial values for some parameters
253  RooWorkspace* w = new RooWorkspace();
254  w->import(data);
255  efficiency.setVal(0);//reset
256  efficiency.setAsymError(0,0);
257  std::cout << "ALL dataset: calling doFitEfficiency with pdf: " << pdfCategory.getLabel() << std::endl;
258  doFitEfficiency(w, pdfCategory.getLabel(), efficiency);
259  delete w;
260  }
261 
262  //loop over all bins with the help of allCats
263  TIterator* it = allCats.typeIterator();
264  for(RooCatType* t = (RooCatType*)it->Next(); t!=0; t = (RooCatType*)it->Next() ){
265  //name of the multicategory
266  TString catName = t->GetName();
267  //skip unmapped states
268  if(catName.Contains("NotMapped")) continue;
269  //create the dataset
270  RooDataSet* data_bin = (RooDataSet*) data.reduce(//SelectVars(RooArgSet(variables["mass"], variables["passing"])),
271  Cut(TString::Format("allCats==%d",t->getVal())));
272  //set the category variables by reading the first event
273  const RooArgSet* row = data_bin->get();
274 
275  //get PDF name
276  TString pdfName(((RooCategory*)row->find("_pdfCategory_"))->getLabel());
277 
278 
279  //make directory name
280  TString dirName = catName;
281  dirName.ReplaceAll("{","").ReplaceAll("}","").ReplaceAll(";","__");
282  if(pdfName.Length() > 0){
283  dirName.Append("__").Append(pdfName);
284  }
285 
286  cout<<"Fitting bin: "<<dirName<<endl;
287  //make a directory for each bin
288  gDirectory->mkdir(dirName)->cd();
289 
290 
291  //create a workspace
292  RooWorkspace* w = new RooWorkspace();
293  //import the data
294  w->import(*data_bin);
295  //save the distribution of variables
296  saveDistributionsPlot(w);
297  //do the fitting only if there is sufficient number of events
298  if(data_bin->numEntries()>0){
299  //set the values of binnedVariables to the mean value in this data bin
300  RooArgSet meanOfVariables;
301  TIterator* vit = binnedVariables.createIterator();
302  for(RooRealVar* v = (RooRealVar*)vit->Next(); v!=0; v = (RooRealVar*)vit->Next() ){
303  meanOfVariables.addClone(*v);
304  double mean = w->data("data")->mean(*v);
305  RooBinning binning((RooBinning&)v->getBinning());
306  int ind = binning.binNumber(mean);
307  RooRealVar& newVar = (RooRealVar&)meanOfVariables[v->GetName()];
308  newVar.setVal(mean);
309  newVar.setAsymError(binning.binLow(ind)-mean, binning.binHigh(ind)-mean);
310  }
311  delete vit;
312  //put an entry in the efficiency dataset
313  //note that the category values are coming from data_bin->get(0)
314  meanOfVariables.addClone(*data_bin->get(0), true);
315 
316  efficiency.setVal(0);//reset
317  efficiency.setAsymError(0,0);
318  doFitEfficiency(w, pdfName.Data(), efficiency);
319  fitEfficiency.add( RooArgSet(meanOfVariables, efficiency) );
320 
321 /* efficiency.setVal(0);//reset
322  doSBSEfficiency(w, efficiency);
323  sbsEfficiency.add( RooArgSet(meanOfVariables, efficiency) );*/
324 
325  efficiency.setVal(0);//reset
326  doCntEfficiency(w, efficiency);
327  cntEfficiency.add( RooArgSet(meanOfVariables, efficiency) );
328  }
329  //save the workspace if requested
330  if(saveWorkspace){
331  w->Write("w");
332  }
333  //clean up
334  delete data_bin;
335  delete w;
336  //get back to the initial directory
337  gDirectory->cd("..");
338  }
339 
340  //save the efficiency data
341  fitEfficiency.Write();
342  gDirectory->mkdir("fit_eff_plots")->cd();
343  saveEfficiencyPlots(fitEfficiency, effName, binnedVariables, mappedCategories);
344  gDirectory->cd("..");
345 
346 /* sbsEfficiency.Write();
347  gDirectory->mkdir("sbs_eff_plots")->cd();
348  saveEfficiencyPlots(sbsEfficiency, effCat+"::"+effState, binnedVariables, mappedCategories);
349  gDirectory->cd("..");*/
350 
351  cntEfficiency.Write();
352  gDirectory->mkdir("cnt_eff_plots")->cd();
353  saveEfficiencyPlots(cntEfficiency, effName, binnedVariables, mappedCategories);
354  gDirectory->cd("..");
355  //empty string means no error
356  return "";
357 }
358 
359 void TagProbeFitter::doFitEfficiency(RooWorkspace* w, string pdfName, RooRealVar& efficiency){
360  //if pdfName is empty skip the fit
361  if(pdfName == ""){
362  return;
363  }
364  //create the simultaneous pdf of name pdfName
365  createPdf(w, pdfs[pdfName]);
366  //set the initial values for the yields of signal and background
367  setInitialValues(w);
368  RooFitResult* res = 0;
369 
370  RooAbsData *data = w->data("data");
371  if (binnedFit) {
372  // get variables from data, which contain also other binning or expression variables
373  const RooArgSet *dataObs = data->get(0);
374  // remove everything which is not a dependency of the pdf
375  RooArgSet *obs = w->pdf("simPdf")->getObservables(dataObs);
376  RooDataHist *bdata = new RooDataHist("data_binned", "data_binned", *obs, *data);
377  w->import(*bdata);
378  data = bdata;
379  delete obs;
380  }
381 
382  double totPassing = data->sumEntries("_efficiencyCategory_==_efficiencyCategory_::Passed");
383  double totFailing = data->sumEntries("_efficiencyCategory_==_efficiencyCategory_::Failed");
384 
385  //******* The block of code below is to make the fit converge faster.
386  // ****** This part is OPTIONAL, i.e., off be default. User can activate this
387  // ****** by setting the following parameters: "fixVars" and "floatShapeParameters"
388  // ****** Here is the full logic:
399 
400 
401  if(!fixVars.empty()){
402  // calculate initial values for parameters user want to fix
403  if(!floatShapeParameters && fixVarValues.empty()){
404  // we want to fix these parameters for each bin.
405  // the following sequence fixes them, fits, releases and fits again
406  // to get reasonable values.
407  // ----------------------------------------------------------------------
408  // This procedure works only once with a whole dataset (without binning)
409  // ----------------------------------------------------------------------
410 
411  // fix them
412  varFixer(w,true);
413  //do fit
414  w->pdf("simPdf")->fitTo(*data, Save(true), Extended(true), NumCPU(numCPU), PrintLevel(quiet?-1:1), PrintEvalErrors(quiet?-1:1), Warnings(!quiet));
415  //release vars
416  varFixer(w,false);
417  //do fit
418  w->pdf("simPdf")->fitTo(*data, Save(true), Extended(true), NumCPU(numCPU), PrintLevel(quiet?-1:1), PrintEvalErrors(quiet?-1:1), Warnings(!quiet));
419  //save vars
420  varSaver(w);
421  // now we have a starting point. Fit will converge faster.
422  }
423 
424  // here we can use initial values if we want (this works for each bin)
425  if(!floatShapeParameters) varRestorer(w); //restore vars
426 
427 
428  // if we don't want to "floatShapeParameters" we just fix, fit,
429  // release, and fit again. No need for global fitting above.
430  //fix vars
431  varFixer(w,true);
432  //do fit
433  res = w->pdf("simPdf")->fitTo(*data, Save(true), Extended(true), NumCPU(numCPU), Minos(*w->var("efficiency")), PrintLevel(quiet?-1:1), PrintEvalErrors(quiet?-1:1), Warnings(!quiet));
434  }//if(!fixVars.empty())
435 
436  // (default = true) if we don't want to fix any parameters or want to fit each bin with all parameters floating
437  if(floatShapeParameters){
438  //release vars
439  varFixer(w,false);
440 
441  //do fit
442  res = w->pdf("simPdf")->fitTo(*data, Save(true), Extended(true), NumCPU(numCPU), Minos(*w->var("efficiency")), PrintLevel(quiet?-1:1), PrintEvalErrors(quiet?-1:1), Warnings(!quiet));
443  }
444 
445 
446 
447  // save everything
448  res->Write("fitresults");
449  w->saveSnapshot("finalState",w->components());
450  saveFitPlot(w);
451  //extract the efficiency parameter from the results
452  RooRealVar* e = (RooRealVar*) res->floatParsFinal().find("efficiency");
453  //What's wrong with this? and why don't they copy the errors!
454  //efficiency = *e;
455  efficiency.setVal(e->getVal());
456  Double_t errLo = e->getErrorLo(), errHi = e->getErrorHi();
457  if (errLo == 0 && e->getVal() < 0.5) errLo = e->getMin()-e->getVal();
458  if (errHi == 0 && e->getVal() > 0.5) errHi = e->getMax()-e->getVal();
459  efficiency.setAsymError(errLo, errHi);
460 
461  if (totPassing * totFailing == 0) {
462  RooRealVar* nS = (RooRealVar*) res->floatParsFinal().find("numSignalAll");
463  //RooRealVar* nB = (RooRealVar*) res->floatParsFinal().find(totPassing != 0 ? "numBackgroundPass" : "numBackgroundFail");
464  double cerr = ROOT::Math::beta_quantile( 1-(1.0-.68540158589942957)/2, 1, nS->getVal() );
465  /*
466  std::cout << "======================================================================================" << std::endl;
467  std::cout << "======= totPassing " << totPassing << ", totFailing " << totFailing << std::endl;
468  std::cout << "======= FIT: e " << e->getVal() << ", e Lo " << e->getErrorLo() << ", e Hi " << e->getErrorHi() << std::endl;
469  std::cout << "======= FIT:nS " << nS->getVal() << ", nS Lo " << nS->getErrorLo() << ", nS Hi " << nS->getErrorHi() << std::endl;
470  std::cout << "======= FIT:nB " << nB->getVal() << ", nB Lo " << nB->getErrorLo() << ", nB Hi " << nB->getErrorHi() << std::endl;
471  std::cout << "======= CNT: " << cerr << std::endl;
472  std::cout << "======================================================================================" << std::endl;
473  */
474  if (totPassing == 0) {
475  efficiency.setVal(0);
476  efficiency.setAsymError(0,cerr);
477  } else {
478  efficiency.setVal(1);
479  efficiency.setAsymError(-cerr,0);
480  }
481  }
482 }
483 
484 void TagProbeFitter::createPdf(RooWorkspace* w, vector<string>& pdfCommands){
485  // create the signal and background pdfs defined by the user
486  for(unsigned int i=0; i<pdfCommands.size(); i++){
487  const std::string & command = pdfCommands[i];
488  if (command.find("#import ") == 0) {
489  TDirectory *here = gDirectory;
490  w->import(command.substr(8).c_str());
491  here->cd();
492  } else {
493  TDirectory *here = gDirectory;
494  w->factory(command.c_str());
495  here->cd();
496  }
497  }
498  // setup the simultaneous extended pdf
499  w->factory("expr::numSignalPass('efficiency*numSignalAll', efficiency, numSignalAll[0.,1e10])");
500  w->factory("expr::numSignalFail('(1-efficiency)*numSignalAll', efficiency, numSignalAll)");
501  TString sPass = "signal", sFail = "signal";
502  if (w->pdf("signalPass") != 0 && w->pdf("signalFail") != 0) {
503  if (w->pdf("signal") != 0) throw std::logic_error("You must either define one 'signal' PDF or two PDFs ('signalPass', 'signalFail'), not both!");
504  sPass = "signalPass"; sFail = "signalFail";
505  } else if (w->pdf("signal") != 0) {
506  if (w->pdf("signalPass") != 0 || w->pdf("signalFail") != 0) {
507  throw std::logic_error("You must either define one 'signal' PDF or two PDFs ('signalPass', 'signalFail'), not both!");
508  }
509  } else {
510  throw std::logic_error("You must either define one 'signal' PDF or two PDFs ('signalPass', 'signalFail')");
511  }
512  w->factory("SUM::pdfPass(numSignalPass*"+sPass+", numBackgroundPass[0.,1e10]*backgroundPass)");
513  w->factory("SUM::pdfFail(numSignalFail*"+sFail+", numBackgroundFail[0.,1e10]*backgroundFail)");
514  w->factory("SIMUL::simPdf(_efficiencyCategory_, Passed=pdfPass, Failed=pdfFail)");
515  // signalFractionInPassing is not used in the fit just to set the initial values
516  if (w->pdf("simPdf") == 0) throw std::runtime_error("Could not create simultaneous fit pdf.");
517  if(w->var("signalFractionInPassing") == 0)
518  w->factory("signalFractionInPassing[0.9]");
519 }
520 
521 void TagProbeFitter::setInitialValues(RooWorkspace* w){
522  // calculate initial values
523  double signalEfficiency = w->var("efficiency")->getVal();
524  double signalFractionInPassing = w->var("signalFractionInPassing")->getVal();
525  double totPassing = w->data("data")->sumEntries("_efficiencyCategory_==_efficiencyCategory_::Passed");
526  double totFailinging = w->data("data")->sumEntries("_efficiencyCategory_==_efficiencyCategory_::Failed");
527  double numSignalAll = totPassing*signalFractionInPassing/signalEfficiency;
528  // check if this value is inconsistent on the failing side
529  if(numSignalAll*(1-signalEfficiency) > totFailinging)
530  numSignalAll = totFailinging;
531  // now set the values
532  w->var("numSignalAll")->setVal(numSignalAll);
533  w->var("numBackgroundPass")->setVal(totPassing - numSignalAll*signalEfficiency);
534  w->var("numBackgroundFail")->setVal(totFailinging - numSignalAll*(1-signalEfficiency));
535 
536  if (totPassing == 0) {
537  w->var("efficiency")->setVal(0.0);
538  w->var("efficiency")->setAsymError(0,1);
539  w->var("efficiency")->setConstant(false);
540  w->var("numBackgroundPass")->setVal(0.0);
541  w->var("numBackgroundPass")->setConstant(true);
542  w->var("numBackgroundFail")->setConstant(false);
543  } else if (totFailinging == 0) {
544  w->var("efficiency")->setVal(1.0);
545  w->var("efficiency")->setAsymError(-1,0);
546  w->var("efficiency")->setConstant(false);
547  w->var("numBackgroundPass")->setConstant(false);
548  w->var("numBackgroundFail")->setVal(0.0);
549  w->var("numBackgroundFail")->setConstant(true);
550  } else {
551  w->var("efficiency")->setConstant(false);
552  w->var("numBackgroundPass")->setConstant(false);
553  w->var("numBackgroundFail")->setConstant(false);
554  }
555 
556  // if signal fraction is 1 then set the number of background events to 0.
557  RooRealVar* fBkgPass = w->var("numBackgroundPass");
558  if(signalFractionInPassing==1.0) { fBkgPass->setVal(0.0); fBkgPass->setConstant(true); }
559 
560  // save initial state for reference
561  w->saveSnapshot("initialState",w->components());
562 }
563 
564 void TagProbeFitter::saveFitPlot(RooWorkspace* w){
565  // save refferences for convenience
566  RooCategory& efficiencyCategory = *w->cat("_efficiencyCategory_");
567  RooAbsData* dataAll = (binnedFit ? w->data("data_binned") : w->data("data") );
568  RooAbsData* dataPass = dataAll->reduce(Cut("_efficiencyCategory_==_efficiencyCategory_::Passed"));
569  RooAbsData* dataFail = dataAll->reduce(Cut("_efficiencyCategory_==_efficiencyCategory_::Failed"));
570  RooAbsPdf& pdf = *w->pdf("simPdf");
571  RooArgSet *obs = pdf.getObservables(*dataAll);
572  RooRealVar* mass = 0;
573  TIterator* it = obs->createIterator();
574  for(RooAbsArg* v = (RooAbsArg*)it->Next(); v!=0; v = (RooAbsArg*)it->Next() ){
575  if(!v->InheritsFrom("RooRealVar")) continue;
576  mass = (RooRealVar*)v;
577  break;
578  }
579  if(!mass) return;
580  // make a 2x2 canvas
581  TCanvas canvas("fit_canvas");
582  canvas.Divide(2,2);
583  vector<RooPlot*> frames;
584  // plot the Passing Probes
585  canvas.cd(1);
586  if (massBins == 0) {
587  frames.push_back(mass->frame(Name("Passing"), Title("Passing Probes")));
588  frames.push_back(mass->frame(Name("Failing"), Title("Failing Probes")));
589  frames.push_back(mass->frame(Name("All"), Title("All Probes")));
590  } else {
591  frames.push_back(mass->frame(Name("Passing"), Title("Passing Probes"), Bins(massBins)));
592  frames.push_back(mass->frame(Name("Failing"), Title("Failing Probes"), Bins(massBins)));
593  frames.push_back(mass->frame(Name("All"), Title("All Probes"), Bins(massBins)));
594  }
595  dataPass->plotOn(frames[0]);
596  pdf.plotOn(frames[0], Slice(efficiencyCategory, "Passed"), ProjWData(*dataPass), LineColor(kGreen));
597  pdf.plotOn(frames[0], Slice(efficiencyCategory, "Passed"), ProjWData(*dataPass), LineColor(kGreen), Components("backgroundPass"), LineStyle(kDashed));
598  frames[0]->Draw();
599  // plot the Failing Probes
600  canvas.cd(2);
601  dataFail->plotOn(frames[1]);
602  pdf.plotOn(frames[1], Slice(efficiencyCategory, "Failed"), ProjWData(*dataFail), LineColor(kRed));
603  pdf.plotOn(frames[1], Slice(efficiencyCategory, "Failed"), ProjWData(*dataFail), LineColor(kRed), Components("backgroundFail"), LineStyle(kDashed));
604  frames[1]->Draw();
605  // plot the All Probes
606  canvas.cd(3);
607  dataAll->plotOn(frames.back());
608  pdf.plotOn(frames.back(), ProjWData(*dataAll), LineColor(kBlue));
609  pdf.plotOn(frames.back(), ProjWData(*dataAll), LineColor(kBlue), Components("backgroundPass,backgroundFail"), LineStyle(kDashed));
610  frames.back()->Draw();
611  // plot the Fit Results
612  canvas.cd(4);
613  frames.push_back(mass->frame(Name("Fit Results"), Title("Fit Results")));
614  pdf.paramOn(frames.back(), dataAll, "", 0, "NELU", 0.1, 0.9, 0.9);
615  // draw only the parameter box not the whole frame
616  frames.back()->findObject(Form("%s_paramBox",pdf.GetName()))->Draw();
617  //save and clean up
618  canvas.Write();
619  for (size_t i=0; i<frames.size(); i++) {
620  delete frames[i];
621  }
622  delete dataPass;
623  delete dataFail;
624 }
625 
627  // save pointers to datasets to manage memory
628  RooAbsData* dataAll = w->data("data");
629  RooAbsData* dataPass = dataAll->reduce(Cut("_efficiencyCategory_==_efficiencyCategory_::Passed"));
630  RooAbsData* dataFail = dataAll->reduce(Cut("_efficiencyCategory_==_efficiencyCategory_::Failed"));
631 
632  const RooArgSet* vars = dataAll->get();
633  vector<RooRealVar*> reals;
634  TIterator* it = vars->createIterator();
635  for(RooAbsArg* v = (RooAbsArg*)it->Next(); v!=0; v = (RooAbsArg*)it->Next() ){
636  if(!v->InheritsFrom("RooRealVar")) continue;
637  reals.push_back((RooRealVar*)v);
638  }
639  TCanvas canvas("distributions_canvas");
640  canvas.Divide(3,reals.size());
641  vector<RooPlot*> frames;
642  for(unsigned int i=0; i<reals.size(); i++){
643  // plot the Passing Probes
644  canvas.cd(3*i+1);
645  frames.push_back(reals[i]->frame(Name("Passing"), Title("Passing Probes"), Bins(100)));
646  dataPass->plotOn(frames.back(), LineColor(kGreen));
647  dataPass->statOn(frames.back());
648  frames.back()->Draw();
649  // plot the Failing Probes
650  canvas.cd(3*i+2);
651  frames.push_back(reals[i]->frame(Name("Failing"), Title("Failing Probes"), Bins(100)));
652  dataFail->plotOn(frames.back(), LineColor(kRed));
653  dataFail->statOn(frames.back());
654  frames.back()->Draw();
655  // plot the All Probes
656  canvas.cd(3*i+3);
657  frames.push_back(reals[i]->frame(Name("All"), Title("All Probes"), Bins(100)));
658  dataAll->plotOn(frames.back(), LineColor(kBlue));
659  dataAll->statOn(frames.back());
660  frames.back()->Draw();
661  }
662  canvas.Write();
663  for (size_t i=0; i<frames.size(); i++) {
664  delete frames[i];
665  }
666  delete dataPass;
667  delete dataFail;
668 }
669 
670 void TagProbeFitter::saveEfficiencyPlots(RooDataSet& eff, TString effName, RooArgSet& binnedVariables, RooArgSet& mappedCategories){
671  TIterator* v1it = binnedVariables.createIterator();
672  for(RooRealVar* v1 = (RooRealVar*)v1it->Next(); v1!=0; v1 = (RooRealVar*)v1it->Next() ){
673  RooArgSet binCategories1D;
674  TIterator* v2it = binnedVariables.createIterator();
675  for(RooRealVar* v2 = (RooRealVar*)v2it->Next(); v2!=0; v2 = (RooRealVar*)v2it->Next() ){
676  if(v2 == v1) continue;
677  binCategories1D.addClone( RooBinningCategory(TString(v2->GetName())+"_bins", TString(v2->GetName())+"_bins", *v2) );
678 
679  RooArgSet binCategories2D;
680  TIterator* v3it = binnedVariables.createIterator();
681  for(RooRealVar* v3 = (RooRealVar*)v3it->Next(); v3!=0; v3 = (RooRealVar*)v3it->Next() ){
682  if(v3 == v1 || v3 == v2) continue;
683  binCategories2D.addClone( RooBinningCategory(TString(v3->GetName())+"_bins", TString(v3->GetName())+"_bins", *v3) );
684  }
685  RooMultiCategory allCats2D("allCats2D", "allCats2D", RooArgSet(binCategories2D, mappedCategories));
686  if(allCats2D.numTypes()==0){
687  makeEfficiencyPlot2D(eff, *v1, *v2, TString::Format("%s_%s_PLOT", v1->GetName(), v2->GetName()), "", effName);
688  }else{
689  RooDataSet myEff(eff);
690  myEff.addColumn(allCats2D);
691  TIterator* catIt = allCats2D.typeIterator();
692  for(RooCatType* t = (RooCatType*)catIt->Next(); t!=0; t = (RooCatType*)catIt->Next() ){
693  TString catName = t->GetName();
694  if(catName.Contains("NotMapped")) continue;
695  catName.ReplaceAll("{","").ReplaceAll("}","").ReplaceAll(";","_&_");
696  RooDataSet* eff_bin = (RooDataSet*) myEff.reduce( Cut(TString::Format("allCats2D==%d",t->getVal())) );
697  makeEfficiencyPlot2D(*eff_bin, *v1, *v2, TString::Format("%s_%s_PLOT_%s",v1->GetName(), v2->GetName(), catName.Data()), catName, effName);
698  delete eff_bin;
699  }
700  }
701  }
702  RooMultiCategory allCats1D("allCats1D", "allCats1D", RooArgSet(binCategories1D, mappedCategories));
703  if(allCats1D.numTypes()==0){
704  makeEfficiencyPlot1D(eff, *v1, TString::Format("%s_PLOT", v1->GetName()), "", effName);
705  }else{
706  RooDataSet myEff(eff);
707  myEff.addColumn(allCats1D);
708  TIterator* catIt = allCats1D.typeIterator();
709  for(RooCatType* t = (RooCatType*)catIt->Next(); t!=0; t = (RooCatType*)catIt->Next() ){
710  TString catName = t->GetName();
711  if(catName.Contains("NotMapped")) continue;
712  catName.ReplaceAll("{","").ReplaceAll("}","").ReplaceAll(";","_&_");
713  RooDataSet* eff_bin = (RooDataSet*) myEff.reduce( Cut(TString::Format("allCats1D==%d",t->getVal())) );
714  makeEfficiencyPlot1D(*eff_bin, *v1, TString::Format("%s_PLOT_%s", v1->GetName(), catName.Data()), catName, effName);
715  delete eff_bin;
716  }
717  }
718  }
719 }
720 
721 void TagProbeFitter::makeEfficiencyPlot1D(RooDataSet& eff, RooRealVar& v, TString plotName, TString plotTitle, TString effName){
722  TCanvas canvas(plotName);
723  const RooArgSet* set = eff.get();
724  RooRealVar* e = (RooRealVar*) set->find("efficiency");
725  RooPlot* p = v.frame(Name(plotName), Title(plotTitle));
726  eff.plotOnXY(p,YVar(*e));
727  p->SetYTitle(TString("Efficiency of ")+effName);
728  p->SetAxisRange(0,1,"Y");
729  p->Draw();
730  canvas.Write();
731  delete p;
732 }
733 
734 void TagProbeFitter::makeEfficiencyPlot2D(RooDataSet& eff, RooRealVar& v1, RooRealVar& v2, TString plotName, TString plotTitle, TString effName){
735  TCanvas canvas(plotName);
736  canvas.SetRightMargin(0.15);
737  TH2F* h = new TH2F(plotName, plotName, v1.getBinning().numBins(), v1.getBinning().array(), v2.getBinning().numBins(), v2.getBinning().array());
738  const RooArgSet* set = eff.get();
739  RooRealVar* e = (RooRealVar*) set->find("efficiency");
740  RooRealVar* v1_ = (RooRealVar*) set->find(v1.GetName());
741  RooRealVar* v2_ = (RooRealVar*) set->find(v2.GetName());
742  h->SetTitle(TString::Format("%s;%s%s;%s%s;Efficiency of %s", plotTitle.Data(),
743  v1.GetTitle(), TString(v1.getUnit()).Length()==0?"":TString::Format(" (%s)", v1.getUnit()).Data(),
744  v2.GetTitle(), TString(v2.getUnit()).Length()==0?"":TString::Format(" (%s)", v2.getUnit()).Data(), effName.Data()));
745  h->SetOption("colztexte");
746  h->GetZaxis()->SetRangeUser(-0.001,1.001);
747  h->SetStats(kFALSE);
748  for(int i=0; i<eff.numEntries(); i++){
749  eff.get(i);
750  h->SetBinContent(h->FindBin(v1_->getVal(), v2_->getVal()), e->getVal());
751  h->SetBinError(h->FindBin(v1_->getVal(), v2_->getVal()), (e->getErrorHi()-e->getErrorLo())/2.);
752  }
753  h->Draw();
754  canvas.Write();
755  delete h;
756 }
757 
758 void TagProbeFitter::doSBSEfficiency(RooWorkspace* w, RooRealVar& efficiency){
759 }
760 
761 void TagProbeFitter::doCntEfficiency(RooWorkspace* w, RooRealVar& efficiency){
762  int pass = w->data("data")->sumEntries("_efficiencyCategory_==_efficiencyCategory_::Passed");
763  int fail = w->data("data")->sumEntries("_efficiencyCategory_==_efficiencyCategory_::Failed");
764  double e = (pass+fail == 0) ? 0 : pass/double(pass+fail);
765  // Use Clopper-Pearson
766  double alpha = (1.0 - .68540158589942957)/2;
767  double lo = (pass == 0) ? 0.0 : ROOT::Math::beta_quantile( alpha, pass, fail+1 );
768  double hi = (fail == 0) ? 1.0 : ROOT::Math::beta_quantile( 1-alpha, pass+1, fail );
770  //double lob, hib;
771  //Efficiency( pass, pass+fail, .68540158589942957, e, lob, hib );
772  //std::cerr << "CNT " << pass << "/" << fail << ": Clopper Pearson [" << lo << ", " << hi << "], Bayes [" << lob << ", " << hib << "]" << std::endl;
773  efficiency.setVal(e);
774  efficiency.setAsymError(lo-e, hi-e);
775 }
776 
777 void TagProbeFitter::varFixer(RooWorkspace* w, bool fix){
778  std::vector<std::string>::const_iterator it;
779  for(it=fixVars.begin(); it<fixVars.end(); it++){
780  if(w->var((*it).c_str()))
781  w->var((*it).c_str())->setAttribute("Constant",fix);
782  else{
783  std::cout << "TagProbeFitter: " << "Can't find a variable to fix: " << *it;
784  }
785  }
786 }
787 
788 void TagProbeFitter::varSaver(RooWorkspace* w){
789  if(!fixVarValues.empty()){
790  std::cout << "attempt to save variables more than once!" << std::endl;
791  return;
792  }
793  std::vector<std::string>::const_iterator it;
794  for(it=fixVars.begin(); it<fixVars.end(); it++){
795  fixVarValues.push_back(w->var((*it).c_str())->getVal());
796  }
797 
798 }
799 
800 void TagProbeFitter::varRestorer(RooWorkspace* w){
801  if(fixVarValues.size()==fixVars.size())
802  for(unsigned int i=0; i< fixVars.size(); i++){
803  std::cout << "setting variable " << fixVars[i].c_str() << std::endl;
804  w->var(fixVars[i].c_str())->setVal(fixVarValues[i]);
805  }
806  else{
807  std::cout << "fixVars and fixVarValues are not of the same size!" << std::endl;
808  }
809 }
810 
int i
Definition: DBlmapReader.cc:9
void doCntEfficiency(RooWorkspace *w, RooRealVar &efficiency)
calculate the efficiecny by counting in the dataset found in the workspace
float alpha
Definition: AMPTWrapper.h:95
bool addExpression(std::string expressionName, std::string title, std::string expression, std::vector< std::string > arguments)
adds a new category based on a cut
void makeEfficiencyPlot1D(RooDataSet &eff, RooRealVar &v, TString plotName, TString plotTitle, TString effName)
makes the 1D plot
void addPdf(std::string pdfName, std::vector< std::string > &pdfCommands)
static PFTauRenderPlugin instance
TagProbeFitter(std::vector< std::string > inputFileNames, std::string inputDirectoryName, std::string inputTreeName, std::string outputFileName, int numCPU=1, bool saveWorkspace_=false, bool floatShapeParameters=true, std::vector< std::string > fixVars_=std::vector< std::string >())
construct the fitter with the inputFileName, inputDirectoryName, inputTreeName, outputFileName and sp...
void setWeightVar(const std::string &weight)
set a variable to be used as weight for a dataset. empty string means no weights. ...
std::string calculateEfficiency(std::string dirName, std::string efficiencyCategory, std::string efficiencyState, std::vector< std::string > &unbinnedVariables, std::map< std::string, std::vector< double > > &binnedReals, std::map< std::string, std::vector< std::string > > &binnedCategories, std::vector< std::string > &binToPDFmap)
calculate the efficiency for a particular binning of the data; it saves everything in the directory &quot;...
static const int WARNING
bool addThresholdCategory(std::string categoryName, std::string title, std::string varName, double cutValue)
adds a new category based on a cut
def canvas
Definition: svgfig.py:481
void setQuiet(bool quiet_=true)
suppress most of the output from RooFit and Minuit
void varSaver(RooWorkspace *w)
store values in the vector
void varRestorer(RooWorkspace *w)
restore variables&#39;s values for fit starting point
void saveFitPlot(RooWorkspace *w)
saves the fit canvas
~TagProbeFitter()
destructor closes the files
void saveEfficiencyPlots(RooDataSet &eff, TString effName, RooArgSet &binnedVariables, RooArgSet &mappedCategories)
saves the efficiency plots
void setInitialValues(RooWorkspace *w)
sets initial values of the PDF parameters based on the data available in the workspace ...
void saveDistributionsPlot(RooWorkspace *w)
saves the distributions canvas
The Signals That Services Can Subscribe To This is based on ActivityRegistry h
Helper function to determine trigger accepts.
Definition: Activities.doc:4
void makeEfficiencyPlot2D(RooDataSet &eff, RooRealVar &v1, RooRealVar &v2, TString plotName, TString plotTitle, TString effName)
makes the 2D plot
void varFixer(RooWorkspace *w, bool fix)
fix or release variables selected by user
std::vector< std::vector< double > > tmp
Definition: MVATrainer.cc:100
Double_t mean(Double_t mH, TString proc)
dictionary args
TString units(TString variable, Char_t axis)
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:82
MultiGaussianState< N > multiState(const std::vector< MultiGaussianState< N >::Vector > &, const std::vector< MultiGaussianState< N >::Matrix > &, const std::vector< double > &)
tuple mass
Definition: scaleCards.py:27
void setBinsForMassPlots(int bins)
set number of bins to use when making the plots; 0 = automatic
void doSBSEfficiency(RooWorkspace *w, RooRealVar &efficiency)
calculate the efficiecny with side band substraction in the dataset found in the workspace ...
tuple cout
Definition: gather_cfg.py:121
void createPdf(RooWorkspace *w, std::vector< std::string > &pdfCommands)
creates the simultaneous PDF in the workspace according to the &quot;pdfCommands&quot;
void doFitEfficiency(RooWorkspace *w, std::string pdfName, RooRealVar &efficiency)
calculate the efficiecny with a simulataneous maximum likelihood fit in the dataset found in the work...
bool addCategory(std::string categoryName, std::string title, std::string expression)
adds a new category variable to the set of variables describing the data in the tree; &quot;expression&quot; is...
bool addVariable(std::string variableName, std::string title, double low, double hi, std::string units)
adds a new real variable to the set of variables describing the data in the tree
mathSSE::Vec4< T > v
static const int ERROR
T w() const
void set(const std::string &name, int value)
set the flag, with a run-time name