CMS 3D CMS Logo

checkBTagCalibrationConsistency.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 from __future__ import print_function
4 from __future__ import absolute_import
5 import itertools
6 import unittest
7 import sys
8 from . import dataLoader
9 import ROOT
10 
11 import six
12 
13 data = None
14 check_flavor = True
15 check_op = True
16 check_sys = True
17 verbose = False
18 
19 
20 def _eta_pt_discr_entries_generator(filter_keyfunc, op):
21  assert data
22  entries = list(filter(filter_keyfunc, data.entries))
23 
24  # use full or half eta range?
25  if any(e.params.etaMin < 0. for e in entries):
26  eta_test_points = data.eta_test_points
27  else:
28  eta_test_points = data.abseta_test_points
29 
30  for eta in eta_test_points:
31  for pt in data.pt_test_points:
32  ens_pt_eta = [e for e in entries if e.params.etaMin < eta < e.params.etaMax and
33  e.params.ptMin < pt < e.params.ptMax]
34  if op == 3:
35  for discr in data.discr_test_points:
36  ens_pt_eta_discr = [e for e in ens_pt_eta if e.params.discrMin < discr < e.params.discrMax]
37  yield eta, pt, discr, ens_pt_eta_discr
38  else:
39  yield eta, pt, None, ens_pt_eta
40 
41 
42 class BtagCalibConsistencyChecker(unittest.TestCase):
43  def test_lowercase(self):
44  for item in [data.meas_type] + list(data.syss):
45  self.assertEqual(
46  item, item.lower(),
47  "Item is not lowercase: %s" % item
48  )
49 
50  def test_ops_tight(self):
51  if check_op:
52  self.assertIn(2, data.ops, "OP_TIGHT is missing")
53 
54  def test_ops_medium(self):
55  if check_op:
56  self.assertIn(1, data.ops, "OP_MEDIUM is missing")
57 
58  def test_ops_loose(self):
59  if check_op:
60  self.assertIn(0, data.ops, "OP_LOOSE is missing")
61 
62  def test_flavs_b(self):
63  if check_flavor:
64  self.assertIn(0, data.flavs, "FLAV_B is missing")
65 
66  def test_flavs_c(self):
67  if check_flavor:
68  self.assertIn(1, data.flavs, "FLAV_C is missing")
69 
70  def test_flavs_udsg(self):
71  if check_flavor:
72  self.assertIn(2, data.flavs, "FLAV_UDSG is missing")
73 
75  if check_sys:
76  self.assertIn("central", data.syss,
77  "'central' sys. uncert. is missing")
78 
80  if check_sys:
81  self.assertIn("up", data.syss, "'up' sys. uncert. is missing")
82 
84  if check_sys:
85  self.assertIn("down", data.syss, "'down' sys. uncert. is missing")
86 
88  if check_sys:
89  for syst in data.syss:
90  if syst == 'central':
91  continue
92  self.assertTrue(
93  syst.startswith("up") or syst.startswith("down"),
94  "sys. uncert name must start with 'up' or 'down' : %s"
95  % syst
96  )
97 
99  if check_sys:
100  for syst in data.syss:
101  if "up" in syst:
102  other = syst.replace("up", "down")
103  self.assertIn(other, data.syss,
104  "'%s' sys. uncert. is missing" % other)
105  elif "down" in syst:
106  other = syst.replace("down", "up")
107  self.assertIn(other, data.syss,
108  "'%s' sys. uncert. is missing" % other)
109 
111  if check_sys:
112  res = list(itertools.chain.from_iterable(
113  self._check_sys_side(op, flav)
114  for flav in data.flavs
115  for op in data.ops
116  ))
117  self.assertFalse(bool(res), "\n"+"\n".join(res))
118 
119  def _check_sys_side(self, op, flav):
120  region = "op=%d, flav=%d" % (op, flav)
121  if verbose:
122  print("Checking sys side correctness for", region)
123 
124  res = []
125  for eta, pt, discr, entries in _eta_pt_discr_entries_generator(
126  lambda e:
127  e.params.operatingPoint == op and
128  e.params.jetFlavor == flav,
129  op
130  ):
131  if not entries:
132  continue
133 
134  for e in entries: # do a little monkey patching with tf1's
135  if not hasattr(e, 'tf1_func'):
136  e.tf1_func = ROOT.TF1("", e.formula)
137 
138  sys_dict = dict((e.params.sysType, e) for e in entries)
139  assert len(sys_dict) == len(entries)
140  sys_cent = sys_dict.pop('central', None)
141  x = discr if op == 3 else pt
142  for syst, e in six.iteritems(sys_dict):
143  sys_val = e.tf1_func.Eval(x)
144  cent_val = sys_cent.tf1_func.Eval(x)
145  if syst.startswith('up') and not sys_val > cent_val:
146  res.append(
147  ("Up variation '%s' not larger than 'central': %s "
148  "eta=%f, pt=%f " % (syst, region, eta, pt))
149  + ((", discr=%f" % discr) if discr else "")
150  )
151  elif syst.startswith('down') and not sys_val < cent_val:
152  res.append(
153  ("Down variation '%s' not smaller than 'central': %s "
154  "eta=%f, pt=%f " % (syst, region, eta, pt))
155  + ((", discr=%f" % discr) if discr else "")
156  )
157  return res
158 
159  def test_eta_ranges(self):
160  for a, b in data.etas:
161  self.assertLess(a, b)
162  self.assertGreater(a, data.ETA_MIN - 1e-7)
163  self.assertLess(b, data.ETA_MAX + 1e-7)
164 
165  def test_pt_ranges(self):
166  for a, b in data.pts:
167  self.assertLess(a, b)
168  self.assertGreater(a, data.PT_MIN - 1e-7)
169  self.assertLess(b, data.PT_MAX + 1e-7)
170 
171  def test_discr_ranges(self):
172  for a, b in data.discrs:
173  self.assertLess(a, b)
174  self.assertGreater(a, data.DISCR_MIN - 1e-7)
175  self.assertLess(b, data.DISCR_MAX + 1e-7)
176 
177  def test_coverage(self):
178  res = list(itertools.chain.from_iterable(
179  self._check_coverage(op, syst, flav)
180  for flav in data.flavs
181  for syst in data.syss
182  for op in data.ops
183  ))
184  self.assertFalse(bool(res), "\n"+"\n".join(res))
185 
186  def _check_coverage(self, op, syst, flav):
187  region = "op=%d, %s, flav=%d" % (op, syst, flav)
188  if verbose:
189  print("Checking coverage for", region)
190 
191  # walk over all testpoints
192  res = []
193  for eta, pt, discr, entries in _eta_pt_discr_entries_generator(
194  lambda e:
195  e.params.operatingPoint == op and
196  e.params.sysType == syst and
197  e.params.jetFlavor == flav,
198  op
199  ):
200  size = len(entries)
201  if size == 0:
202  res.append(
203  ("Region not covered: %s eta=%f, pt=%f "
204  % (region, eta, pt))
205  + ((", discr=%f" % discr) if discr else "")
206  )
207  elif size > 1:
208  res.append(
209  ("Region covered %d times: %s eta=%f, pt=%f"
210  % (size, region, eta, pt))
211  + ((", discr=%f" % discr) if discr else "")
212  )
213  return res
214 
215 
216 def run_check(filename, op=True, sys=True, flavor=True):
217  loaders = dataLoader.get_data(filename)
218  return run_check_data(loaders, op, sys, flavor)
219 
220 
221 def run_check_csv(csv_data, op=True, sys=True, flavor=True):
222  loaders = dataLoader.get_data_csv(csv_data)
223  return run_check_data(loaders, op, sys, flavor)
224 
225 
226 def run_check_data(data_loaders,
227  op=True, sys=True, flavor=True):
228  global data, check_op, check_sys, check_flavor
229  check_op, check_sys, check_flavor = op, sys, flavor
230 
231  all_res = []
232  for dat in data_loaders:
233  data = dat
234  print('\n\n')
235  print('# Checking csv data for type / op / flavour:', \
236  data.meas_type, data.op, data.flav)
237  print('='*60 + '\n')
238  if verbose:
239  data.print_data()
240  testsuite = unittest.TestLoader().loadTestsFromTestCase(
241  BtagCalibConsistencyChecker)
242  res = unittest.TextTestRunner().run(testsuite)
243  all_res.append(not bool(res.failures))
244  return all_res
245 
246 
247 if __name__ == '__main__':
248  if len(sys.argv) < 2:
249  print('Need csv data file as first argument.')
250  print('Options:')
251  print(' --light (do not check op, sys, flav)')
252  print(' --separate-by-op')
253  print(' --separate-by-flav')
254  print(' --separate-all (both of the above)')
255  print('Exit.')
256  exit(-1)
257 
258  ck_op = ck_sy = ck_fl = not '--light' in sys.argv
259 
260  dataLoader.separate_by_op = '--separate-by-op' in sys.argv
261  dataLoader.separate_by_flav = '--separate-by-flav' in sys.argv
262 
263  if '--separate-all' in sys.argv:
264  dataLoader.separate_by_op = dataLoader.separate_by_flav = True
265 
266  if dataLoader.separate_by_op:
267  ck_op = False
268  if dataLoader.separate_by_flav:
269  ck_fl = False
270 
271  verbose = True
272  if not all(run_check(sys.argv[1], ck_op, ck_sy, ck_fl)):
273  exit(-1)
274 
def run_check(filename, op=True, sys=True, flavor=True)
bool any(const std::vector< T > &v, const T &what)
Definition: ECalSD.cc:37
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
def run_check_csv(csv_data, op=True, sys=True, flavor=True)
def _eta_pt_discr_entries_generator(filter_keyfunc, op)
def get_data_csv(csv_data)
Definition: dataLoader.py:158
def run_check_data(data_loaders, op=True, sys=True, flavor=True)
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def get_data(filename)
Definition: dataLoader.py:191
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run