CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
checkBTagCalibrationConsistency.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 import itertools
4 import unittest
5 import sys
6 import dataLoader
7 import ROOT
8 
9 
10 data = None
11 check_flavor = True
12 check_op = True
13 check_sys = True
14 verbose = False
15 
16 
17 def _eta_pt_discr_entries_generator(filter_keyfunc, op):
18  assert data
19  entries = filter(filter_keyfunc, data.entries)
20 
21  # use full or half eta range?
22  if any(e.params.etaMin < 0. for e in entries):
23  eta_test_points = data.eta_test_points
24  else:
25  eta_test_points = data.abseta_test_points
26 
27  for eta in eta_test_points:
28  for pt in data.pt_test_points:
29  ens_pt_eta = filter(
30  lambda e:
31  e.params.etaMin < eta < e.params.etaMax and
32  e.params.ptMin < pt < e.params.ptMax,
33  entries
34  )
35  if op == 3:
36  for discr in data.discr_test_points:
37  ens_pt_eta_discr = filter(
38  lambda e:
39  e.params.discrMin < discr < e.params.discrMax,
40  ens_pt_eta
41  )
42  yield eta, pt, discr, ens_pt_eta_discr
43  else:
44  yield eta, pt, None, ens_pt_eta
45 
46 
47 class BtagCalibConsistencyChecker(unittest.TestCase):
48  def test_lowercase(self):
49  for item in [data.meas_type] + list(data.syss):
50  self.assertEqual(
51  item, item.lower(),
52  "Item is not lowercase: %s" % item
53  )
54 
55  def test_ops_tight(self):
56  if check_op:
57  self.assertIn(2, data.ops, "OP_TIGHT is missing")
58 
59  def test_ops_medium(self):
60  if check_op:
61  self.assertIn(1, data.ops, "OP_MEDIUM is missing")
62 
63  def test_ops_loose(self):
64  if check_op:
65  self.assertIn(0, data.ops, "OP_LOOSE is missing")
66 
67  def test_flavs_b(self):
68  if check_flavor:
69  self.assertIn(0, data.flavs, "FLAV_B is missing")
70 
71  def test_flavs_c(self):
72  if check_flavor:
73  self.assertIn(1, data.flavs, "FLAV_C is missing")
74 
75  def test_flavs_udsg(self):
76  if check_flavor:
77  self.assertIn(2, data.flavs, "FLAV_UDSG is missing")
78 
80  if check_sys:
81  self.assertIn("central", data.syss,
82  "'central' sys. uncert. is missing")
83 
85  if check_sys:
86  self.assertIn("up", data.syss, "'up' sys. uncert. is missing")
87 
89  if check_sys:
90  self.assertIn("down", data.syss, "'down' sys. uncert. is missing")
91 
93  if check_sys:
94  for syst in data.syss:
95  if syst == 'central':
96  continue
97  self.assertTrue(
98  syst.startswith("up") or syst.startswith("down"),
99  "sys. uncert name must start with 'up' or 'down' : %s"
100  % syst
101  )
102 
104  if check_sys:
105  for syst in data.syss:
106  if "up" in syst:
107  other = syst.replace("up", "down")
108  self.assertIn(other, data.syss,
109  "'%s' sys. uncert. is missing" % other)
110  elif "down" in syst:
111  other = syst.replace("down", "up")
112  self.assertIn(other, data.syss,
113  "'%s' sys. uncert. is missing" % other)
114 
116  if check_sys:
117  res = list(itertools.chain.from_iterable(
118  self._check_sys_side(op, flav)
119  for flav in data.flavs
120  for op in data.ops
121  ))
122  self.assertFalse(bool(res), "\n"+"\n".join(res))
123 
124  def _check_sys_side(self, op, flav):
125  region = "op=%d, flav=%d" % (op, flav)
126  if verbose:
127  print "Checking sys side correctness for", region
128 
129  res = []
130  for eta, pt, discr, entries in _eta_pt_discr_entries_generator(
131  lambda e:
132  e.params.operatingPoint == op and
133  e.params.jetFlavor == flav,
134  op
135  ):
136  if not entries:
137  continue
138 
139  for e in entries: # do a little monkey patching with tf1's
140  if not hasattr(e, 'tf1_func'):
141  e.tf1_func = ROOT.TF1("", e.formula)
142 
143  sys_dict = dict((e.params.sysType, e) for e in entries)
144  assert len(sys_dict) == len(entries)
145  sys_cent = sys_dict.pop('central', None)
146  x = discr if op == 3 else pt
147  for syst, e in sys_dict.iteritems():
148  sys_val = e.tf1_func.Eval(x)
149  cent_val = sys_cent.tf1_func.Eval(x)
150  if syst.startswith('up') and not sys_val > cent_val:
151  res.append(
152  ("Up variation '%s' not larger than 'central': %s "
153  "eta=%f, pt=%f " % (syst, region, eta, pt))
154  + ((", discr=%f" % discr) if discr else "")
155  )
156  elif syst.startswith('down') and not sys_val < cent_val:
157  res.append(
158  ("Down variation '%s' not smaller than 'central': %s "
159  "eta=%f, pt=%f " % (syst, region, eta, pt))
160  + ((", discr=%f" % discr) if discr else "")
161  )
162  return res
163 
164  def test_eta_ranges(self):
165  for a, b in data.etas:
166  self.assertLess(a, b)
167  self.assertGreater(a, data.ETA_MIN - 1e-7)
168  self.assertLess(b, data.ETA_MAX + 1e-7)
169 
170  def test_pt_ranges(self):
171  for a, b in data.pts:
172  self.assertLess(a, b)
173  self.assertGreater(a, data.PT_MIN - 1e-7)
174  self.assertLess(b, data.PT_MAX + 1e-7)
175 
176  def test_discr_ranges(self):
177  for a, b in data.discrs:
178  self.assertLess(a, b)
179  self.assertGreater(a, data.DISCR_MIN - 1e-7)
180  self.assertLess(b, data.DISCR_MAX + 1e-7)
181 
182  def test_coverage(self):
183  res = list(itertools.chain.from_iterable(
184  self._check_coverage(op, syst, flav)
185  for flav in data.flavs
186  for syst in data.syss
187  for op in data.ops
188  ))
189  self.assertFalse(bool(res), "\n"+"\n".join(res))
190 
191  def _check_coverage(self, op, syst, flav):
192  region = "op=%d, %s, flav=%d" % (op, syst, flav)
193  if verbose:
194  print "Checking coverage for", region
195 
196  # walk over all testpoints
197  res = []
198  for eta, pt, discr, entries in _eta_pt_discr_entries_generator(
199  lambda e:
200  e.params.operatingPoint == op and
201  e.params.sysType == syst and
202  e.params.jetFlavor == flav,
203  op
204  ):
205  size = len(entries)
206  if size == 0:
207  res.append(
208  ("Region not covered: %s eta=%f, pt=%f "
209  % (region, eta, pt))
210  + ((", discr=%f" % discr) if discr else "")
211  )
212  elif size > 1:
213  res.append(
214  ("Region covered %d times: %s eta=%f, pt=%f"
215  % (size, region, eta, pt))
216  + ((", discr=%f" % discr) if discr else "")
217  )
218  return res
219 
220 
221 def run_check(filename, op=True, sys=True, flavor=True):
222  loaders = dataLoader.get_data(filename)
223  return run_check_data(loaders, op, sys, flavor)
224 
225 
226 def run_check_csv(csv_data, op=True, sys=True, flavor=True):
227  loaders = dataLoader.get_data_csv(csv_data)
228  return run_check_data(loaders, op, sys, flavor)
229 
230 
231 def run_check_data(data_loaders,
232  op=True, sys=True, flavor=True):
233  global data, check_op, check_sys, check_flavor
234  check_op, check_sys, check_flavor = op, sys, flavor
235 
236  all_res = []
237  for dat in data_loaders:
238  data = dat
239  print '\n\n'
240  print '# Checking csv data for type / op / flavour:', \
241  data.meas_type, data.op, data.flav
242  print '='*60 + '\n'
243  if verbose:
244  data.print_data()
245  testsuite = unittest.TestLoader().loadTestsFromTestCase(
246  BtagCalibConsistencyChecker)
247  res = unittest.TextTestRunner().run(testsuite)
248  all_res.append(not bool(res.failures))
249  return all_res
250 
251 
252 if __name__ == '__main__':
253  if len(sys.argv) < 2:
254  print 'Need csv data file as first argument.'
255  print 'Options:'
256  print ' --light (do not check op, sys, flav)'
257  print ' --separate-by-op'
258  print ' --separate-by-flav'
259  print ' --separate-all (both of the above)'
260  print 'Exit.'
261  exit(-1)
262 
263  ck_op = ck_sy = ck_fl = not '--light' in sys.argv
264 
265  dataLoader.separate_by_op = '--separate-by-op' in sys.argv
266  dataLoader.separate_by_flav = '--separate-by-flav' in sys.argv
267 
268  if '--separate-all' in sys.argv:
269  dataLoader.separate_by_op = dataLoader.separate_by_flav = True
270 
271  if dataLoader.separate_by_op:
272  ck_op = False
273  if dataLoader.separate_by_flav:
274  ck_fl = False
275 
276  verbose = True
277  if not all(run_check(sys.argv[1], ck_op, ck_sy, ck_fl)):
278  exit(-1)
279 
bool any(const std::vector< T > &v, const T &what)
Definition: ECalSD.cc:34
def get_data_csv
Definition: dataLoader.py:157
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def get_data
Definition: dataLoader.py:190
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run