CMS 3D CMS Logo

dataLoader.py
Go to the documentation of this file.
1 import itertools
2 import ROOT
3 try:
4  ROOT.BTagEntry
5 except AttributeError:
6  ROOT.gROOT.ProcessLine('.L BTagCalibrationStandalone.cpp+')
7 
8 try:
9  ROOT.BTagEntry
10 except AttributeError:
11  print 'ROOT.BTagEntry is needed! Please copy ' \
12  'BTagCalibrationStandalone.[h|cpp] to the working directory. Exit.'
13  exit(-1)
14 
15 separate_by_op = False
16 separate_by_flav = False
17 
18 
20  def __init__(self, csv_data, measurement_type, operating_point, flavour):
21  self.meas_type = measurement_type
22  self.op = operating_point
23  self.flav = flavour
24 
25  # list of entries
26  ens = []
27  for l in csv_data:
28  if not l.strip():
29  continue # skip empty lines
30  try:
31  e = ROOT.BTagEntry(l)
32  if (e.params.measurementType == measurement_type
33  and ((not separate_by_op)
34  or e.params.operatingPoint == operating_point)
35  and ((not separate_by_flav)
36  or e.params.jetFlavor == flavour)
37  ):
38  ens.append(e)
39  except TypeError:
40  raise RuntimeError("Error: can not interpret line: " + l)
41  self.entries = ens
42 
43  if not ens:
44  return
45 
46  # fixed data
47  self.ops = set(e.params.operatingPoint for e in ens)
48  self.flavs = set(e.params.jetFlavor for e in ens)
49  self.syss = set(e.params.sysType for e in ens)
50  self.etas = set((e.params.etaMin, e.params.etaMax) for e in ens)
51  self.pts = set((e.params.ptMin, e.params.ptMax) for e in ens)
52  self.discrs = set((e.params.discrMin, e.params.discrMax)
53  for e in ens
54  if e.params.operatingPoint == 3)
55 
56  self.ETA_MIN = -2.4
57  self.ETA_MAX = 2.4
58  self.PT_MIN = min(e.params.ptMin for e in ens)
59  self.PT_MAX = max(e.params.ptMax for e in ens)
60  if any(e.params.operatingPoint == 3 for e in ens):
61  self.DISCR_MIN = min(
62  e.params.discrMin
63  for e in ens
64  if e.params.operatingPoint == 3
65  )
66  self.DISCR_MAX = max(
67  e.params.discrMax
68  for e in ens
69  if e.params.operatingPoint == 3
70  )
71  else:
72  self.DISCR_MIN = 0.
73  self.DISCR_MAX = 1.
74 
75  # test points for variable data (using bound +- epsilon)
76  eps = 1e-4
77  eta_test_points = list(itertools.ifilter(
78  lambda x: self.ETA_MIN < x < self.ETA_MAX,
79  itertools.chain(
80  (a + eps for a, _ in self.etas),
81  (a - eps for a, _ in self.etas),
82  (b + eps for _, b in self.etas),
83  (b - eps for _, b in self.etas),
84  (self.ETA_MIN + eps, self.ETA_MAX - eps),
85  )
86  ))
87  abseta_test_points = list(itertools.ifilter(
88  lambda x: 0. < x < self.ETA_MAX,
89  itertools.chain(
90  (a + eps for a, _ in self.etas),
91  (a - eps for a, _ in self.etas),
92  (b + eps for _, b in self.etas),
93  (b - eps for _, b in self.etas),
94  (eps, self.ETA_MAX - eps),
95  )
96  ))
97  pt_test_points = list(itertools.ifilter(
98  lambda x: self.PT_MIN < x < self.PT_MAX,
99  itertools.chain(
100  (a + eps for a, _ in self.pts),
101  (a - eps for a, _ in self.pts),
102  (b + eps for _, b in self.pts),
103  (b - eps for _, b in self.pts),
104  (self.PT_MIN + eps, self.PT_MAX - eps),
105  )
106  ))
107  discr_test_points = list(itertools.ifilter(
108  lambda x: self.DISCR_MIN < x < self.DISCR_MAX,
109  itertools.chain(
110  (a + eps for a, _ in self.discrs),
111  (a - eps for a, _ in self.discrs),
112  (b + eps for _, b in self.discrs),
113  (b - eps for _, b in self.discrs),
114  (self.DISCR_MIN + eps, self.DISCR_MAX - eps),
115  )
116  ))
117  # use sets
118  self.eta_test_points = set(round(f, 5) for f in eta_test_points)
119  self.abseta_test_points = set(round(f, 5) for f in abseta_test_points)
120  self.pt_test_points = set(round(f, 5) for f in pt_test_points)
121  self.discr_test_points = set(round(f, 5) for f in discr_test_points)
122 
123  def print_data(self):
124  print "\nFound operating points:"
125  print self.ops
126 
127  print "\nFound jet flavors:"
128  print self.flavs
129 
130  print "\nFound sys types (need at least 'central', 'up', 'down'; " \
131  "also 'up_SYS'/'down_SYS' compatibility is checked):"
132  print self.syss
133 
134  print "\nFound eta ranges: (need everything covered from %g or 0. " \
135  "up to %g):" % (self.ETA_MIN, self.ETA_MAX)
136  print self.etas
137 
138  print "\nFound pt ranges: (need everything covered from %g " \
139  "to %g):" % (self.PT_MIN, self.PT_MAX)
140  print self.pts
141 
142  print "\nFound discr ranges: (only needed for operatingPoint==3, " \
143  "covered from %g to %g):" % (self.DISCR_MIN, self.DISCR_MAX)
144  print self.discrs
145 
146  print "\nTest points for eta (bounds +- epsilon):"
147  print self.eta_test_points
148 
149  print "\nTest points for pt (bounds +- epsilon):"
150  print self.pt_test_points
151 
152  print "\nTest points for discr (bounds +- epsilon):"
153  print self.discr_test_points
154  print ""
155 
156 
157 def get_data_csv(csv_data):
158  # grab measurement types
159  meas_types = set(
160  l.split(',')[1].strip()
161  for l in csv_data
162  if len(l.split()) == 11
163  )
164 
165  # grab operating points
166  ops = set(
167  int(l.split(',')[0])
168  for l in csv_data
169  if len(l.split()) == 11
170  ) if separate_by_op else ['all']
171 
172  # grab flavors
173  flavs = set(
174  int(l.split(',')[3])
175  for l in csv_data
176  if len(l.split()) == 11
177  ) if separate_by_flav else ['all']
178 
179  # make loaders and filter empty ones
180  lds = list(
181  DataLoader(csv_data, mt, op, fl)
182  for mt in meas_types
183  for op in ops
184  for fl in flavs
185  )
186  lds = filter(lambda d: d.entries, lds)
187  return lds
188 
189 
190 def get_data(filename):
191  with open(filename) as f:
192  csv_data = f.readlines()
193  if not (csv_data and "OperatingPoint" in csv_data[0]):
194  print "Data file does not contain typical header: %s. Exit" % filename
195  return False
196  csv_data.pop(0) # remove header
197  return get_data_csv(csv_data)
def __init__(self, csv_data, measurement_type, operating_point, flavour)
Definition: dataLoader.py:20
bool any(const std::vector< T > &v, const T &what)
Definition: ECalSD.cc:37
def get_data_csv(csv_data)
Definition: dataLoader.py:157
T min(T a, T b)
Definition: MathUtil.h:58
def get_data(filename)
Definition: dataLoader.py:190
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run