CMS 3D CMS Logo

BTagWeightCalculator.py
Go to the documentation of this file.
1 from __future__ import print_function
2 import ROOT
3 import numpy as np
4 
6  """
7  Calculates the jet and event correction factor as a weight based on the b-tagger shape-dependent data/mc
8  corrections.
9 
10  Currently, the recipe is only described in https://twiki.cern.ch/twiki/bin/viewauth/CMS/TTbarHbbRun2ReferenceAnalysis#Applying_CSV_weights
11 
12  In short, jet-by-jet correction factors as a function of pt, eta and CSV have been derived.
13  This code accesses the flavour of MC jets and gets the correct weight histogram
14  corresponding to the pt, eta and flavour of the jet.
15  From there, the per-jet weight is just accessed according to the value of the discriminator.
16  """
17  def __init__(self, fn_hf, fn_lf) :
18  self.pdfs = {}
19 
20  #bin edges of the heavy-flavour histograms
21  #pt>=20 && pt<30 -> bin=0
22  #pt>=30 && pt<40 -> bin=1
23  #etc
24  self.pt_bins_hf = np.array([20, 30, 40, 60, 100])
25  self.eta_bins_hf = np.array([0, 2.41])
26 
27  #bin edges of the light-flavour histograms
28  self.pt_bins_lf = np.array([20, 30, 40, 60])
29  self.eta_bins_lf = np.array([0, 0.8, 1.6, 2.41])
30 
31  #name of the default b-tagger
32  self.btag = "pfCombinedInclusiveSecondaryVertexV2BJetTags"
33  self.init(fn_hf, fn_lf)
34 
35  # systematic uncertainties for different flavour assignments
36  self.systematics_for_b = ["JESUp", "JESDown", "LFUp", "LFDown",
37  "HFStats1Up", "HFStats1Down", "HFStats2Up", "HFStats2Down"]
38  self.systematics_for_c = ["cErr1Up", "cErr1Down", "cErr2Up", "cErr2Down"]
39  self.systematics_for_l = ["JESUp", "JESDown", "HFUp", "HFDown",
40  "LFStats1Up", "LFStats1Down", "LFStats2Up", "LFStats2Down"]
41 
42  def getBin(self, bvec, val):
43  return int(bvec.searchsorted(val, side="right")) - 1
44 
45  def init(self, fn_hf, fn_lf):
46  """
47  fn_hf (string) - path to the heavy flavour weight file
48  fn_lf (string) - path to the light flavour weight file
49  """
50  print("[BTagWeightCalculator]: Initializing from files", fn_hf, fn_lf)
51 
52  #print "hf"
53  self.pdfs["hf"] = self.getHistosFromFile(fn_hf)
54  #print "lf"
55  self.pdfs["lf"] = self.getHistosFromFile(fn_lf)
56 
57  return True
58 
59  def getHistosFromFile(self, fn):
60  """
61  Initialized the lookup table for b-tag weight histograms based on jet
62  pt, eta and flavour.
63  The format of the weight file is similar to:
64  KEY: TH1D csv_ratio_Pt0_Eta0_final;1
65  KEY: TH1D csv_ratio_Pt0_Eta0_final_JESUp;1
66  KEY: TH1D csv_ratio_Pt0_Eta0_final_JESDown;1
67  KEY: TH1D csv_ratio_Pt0_Eta0_final_LFUp;1
68  KEY: TH1D csv_ratio_Pt0_Eta0_final_LFDown;1
69  KEY: TH1D csv_ratio_Pt0_Eta0_final_Stats1Up;1
70  KEY: TH1D csv_ratio_Pt0_Eta0_final_Stats1Down;1
71  KEY: TH1D csv_ratio_Pt0_Eta0_final_Stats2Up;1
72  KEY: TH1D csv_ratio_Pt0_Eta0_final_Stats2Down;1
73  KEY: TH1D c_csv_ratio_Pt0_Eta0_final;2
74  KEY: TH1D c_csv_ratio_Pt0_Eta0_final;1
75  KEY: TH1D c_csv_ratio_Pt0_Eta0_final_cErr1Up;1
76  KEY: TH1D c_csv_ratio_Pt0_Eta0_final_cErr1Down;1
77  KEY: TH1D c_csv_ratio_Pt0_Eta0_final_cErr2Up;1
78  KEY: TH1D c_csv_ratio_Pt0_Eta0_final_cErr2Down;1
79  """
80  ret = {}
81  tf = ROOT.TFile(fn)
82  if not tf or tf.IsZombie():
83  raise FileNotFoundError("Could not open file {0}".format(fn))
84  ROOT.gROOT.cd()
85  for k in tf.GetListOfKeys():
86  kn = k.GetName()
87  if not (kn.startswith("csv_ratio") or kn.startswith("c_csv_ratio") ):
88  continue
89  spl = kn.split("_")
90  is_c = 1 if kn.startswith("c_csv_ratio") else 0
91 
92  if spl[2+is_c] == "all":
93  ptbin = -1
94  etabin = -1
95  kind = "all"
96  syst = "nominal"
97  else:
98  ptbin = int(spl[2+is_c][2:])
99  etabin = int(spl[3+is_c][3:])
100  kind = spl[4+is_c]
101  if len(spl)==(6+is_c):
102  syst = spl[5+is_c]
103  else:
104  syst = "nominal"
105  ret[(is_c, ptbin, etabin, kind, syst)] = k.ReadObj().Clone()
106  return ret
107 
108  def calcJetWeight(self, jet, kind, systematic):
109  """
110  Calculates the per-jet correction factor.
111  jet: either an object with the attributes pt, eta, mcFlavour, self.btag
112  or a Heppy Jet
113  kind: string specifying the name of the corrections. Usually "final".
114  systematic: the correction systematic, e.g. "nominal", "JESUp", etc
115  """
116  #if jet is a simple class with attributes
117  if isinstance(getattr(jet, "pt"), float):
118  pt = getattr(jet, "pt")
119  aeta = abs(getattr(jet, "eta"))
120  fl = abs(getattr(jet, "hadronFlavour"))
121  csv = getattr(jet, self.btag)
122  #if jet is a heppy Jet object
123  else:
124  #print "could not get jet", e
125  pt = jet.pt()
126  aeta = abs(jet.eta())
127  fl = abs(jet.hadronFlavour())
128  csv = jet.btag(self.btag)
129  return self.calcJetWeightImpl(pt, aeta, fl, csv, kind, systematic)
130 
131  def calcJetWeightImpl(self, pt, aeta, fl, csv, kind, systematic):
132 
133  is_b = (fl == 5)
134  is_c = (fl == 4)
135  is_l = not (is_b or is_c)
136 
137  #if evaluating a weight for systematic uncertainties, make sure the jet is affected. If not, return 'nominal' weight
138  if systematic != "nominal":
139  if (is_b and systematic not in self.systematics_for_b) or (is_c and systematic not in self.systematics_for_c) or (is_l and systematic not in self.systematics_for_l):
140  systematic = "nominal"
141 
142  #needed because the TH1 names for Stats are same for HF and LF
143  if "Stats" in systematic:
144  systematic = systematic[2:]
145 
146  if is_b or is_c:
147  ptbin = self.getBin(self.pt_bins_hf, pt)
148  etabin = self.getBin(self.eta_bins_hf, aeta)
149  else:
150  ptbin = self.getBin(self.pt_bins_lf, pt)
151  etabin = self.getBin(self.eta_bins_lf, aeta)
152 
153  if ptbin < 0 or etabin < 0:
154  #print "pt or eta bin outside range", pt, aeta, ptbin, etabin
155  return 1.0
156 
157  k = (is_c, ptbin, etabin, kind, systematic)
158  hdict = self.pdfs["lf"]
159  if is_b or is_c:
160  hdict = self.pdfs["hf"]
161  h = hdict.get(k, None)
162  if not h:
163  #print "no histogram", k
164  return 1.0
165 
166  if csv > 1:
167  csv = 1
168 
169  csvbin = 1
170  csvbin = h.FindBin(csv)
171  #This is to fix csv=-10 not being accounted for in CSV SF input hists
172  if csvbin <= 0:
173  csvbin = 1
174  if csvbin > h.GetNbinsX():
175  csvbin = h.GetNbinsX()
176 
177  w = h.GetBinContent(csvbin)
178  return w
179 
180  def calcEventWeight(self, jets, kind, systematic):
181  """
182  The per-event weight is just a product of per-jet weights.
183  """
184  weights = np.array(
185  [self.calcJetWeight(jet, kind, systematic)
186  for jet in jets]
187  )
188 
189  wtot = np.prod(weights)
190  return wtot
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
def calcJetWeight(self, jet, kind, systematic)
def calcJetWeightImpl(self, pt, aeta, fl, csv, kind, systematic)
def calcEventWeight(self, jets, kind, systematic)