CMS 3D CMS Logo

SpecificationBuilder_cfi.py
Go to the documentation of this file.
1 import FWCore.ParameterSet.Config as cms
2 from copy import deepcopy
3 
4 # this is a pure Python module to build the deeply nested PSets that describe a
5 # SummationSpecification.
6 # The C++ code assumes the form is fully correct, so you should always use this,
7 # which outputs a valid form.
8 
9 # these need to stay in sync with the C++ enums. TODO: Can we use Python3 enum or sth.?
10 # Internal specification step types
11 NO_TYPE = cms.int32(0)
12 GROUPBY = cms.int32(1) # only "SUM", real histograms
13 EXTEND_X = cms.int32(2) # use geometry column as coordinate axis, concatenate
14 EXTEND_Y = cms.int32(3)
15 COUNT = cms.int32(4) # drop all values, only count entries. Atm only step1.
16 REDUCE = cms.int32(5) # histogram-to-scalar operator for harvesting, atm only MEAN
17 SAVE = cms.int32(6) # atm not used in execution. Marks stage1/2 switch.
18 USE_X = cms.int32(8) # use arg-th fill(...) parameter for the respective axis.
19 USE_Y = cms.int32(9)
20 USE_Z = cms.int32(10)
21 PROFILE = cms.int32(11) # marker for step1 to make a profile, related to REDUCE(MEAN)
22 
23 # Specifications are broken down into Stages, that are executed at different
24 # points in time (in the fill call, in pre-event harvesting (counters), in
25 # harvesting (DQM step2)
26 NO_STAGE = cms.int32(0)
27 FIRST = cms.int32(1) # first grouping, before and/or after counting
28 STAGE1 = cms.int32(2) # USE/EXTEND/PROFILE for step1
29 STAGE2 = cms.int32(3) # REDUCE/EXTEND/GROUPBY/CUSTOM for harvesting
30 
31 # small helpers
32 def val(maybecms):
33  if hasattr(maybecms, "value"):
34  return maybecms.value()
35  else:
36  return maybecms
37 
38 def parent(path):
39  parts = val(path).split("/")
40  return "/".join(parts[0:len(parts)-1])
41 
42 # do not change values here, Pass in a PSet instead
43 DefaultConf = cms.PSet(enabled = cms.bool(True))
44 
45 
46 # The internal Specification format is very rigid and looks much less like a
47 # program in the internal form:
48 # - There is one entry FIRST, which is a GROUPBY or COUNT and some columns
49 # - There is another entry FIRST, which is a GROUPBY and some columns iff
50 # the one before was COUNT
51 # - There are some entries STAGE1
52 # - There is one entry per dimension (ordered)
53 # - which is either USE_* or EXTEND_*
54 # - with one column, that is NOT listed in FIRST.
55 # - There is optionally an entry PROFILE to make a profile.
56 # - There are 0-n steps STAGE2, which are one of GROUPBY, EXTEND_X
57 # - The argument for GROUPBY and EXTEND_X is a subset of columns of last step
58 # - SAVE is ignored
59 
60 class Specification(cms.PSet):
61  def __init__(self, conf = DefaultConf):
62  super(Specification, self).__init__()
63  # these are the steps passed down to C++. Will be filled later.
64  self.spec = cms.VPSet()
65  # this is currently only an additional enable flag. Might add topFolder or
66  # range there in the future.
67  self.conf = conf
68 
69  # these are onlly used during construction.
70  self._activeColumns = set()
71  self._state = FIRST
72 
73  def __deepcopy__(self, memo):
74  # override deepcopy to not copy .conf: it should remain a reference
75  # w/o this it is not cleanly possible to build a per-module switch.
76  t = Specification(self.conf)
77  t.spec = deepcopy(self.spec, memo)
78  return t
79 
80  def groupBy(self, cols, mode = "SUM"):
81  cnames = list(filter(len, val(cols).split("/"))) # omit empty items
82  newstate = self._state
83 
84  # The behaviour of groupBy depends a lot on when it happens:
85  # - The first (or second, if there is per-event counting) are very special
86  # - others in STAGE1 have to be EXTEND, and they will be translated into a
87  # list of exactly 3 USE/EXTEND steps (one per dimension).
88  # - in STAGE2 they are just passed down to C++.
89 
90  if self._state == FIRST:
91  cname = cnames
92  if mode != "SUM":
93  raise Exception("First grouping must be SUM")
94  if "Event" in cnames:
95  cnames.remove("Event"); # per-Event grouping is done automatically
96  t = COUNT
97  mode = "COUNT"
98  newstate = FIRST
99  else:
100  t = GROUPBY
101  newstate = STAGE1
102 
103  if self._state == STAGE1:
104  cname = self._activeColumns.difference(cnames)
105  if len(cname) != 1:
106  raise Exception("EXTEND must drop exactly one column.")
107 
108  if mode == "EXTEND_X":
109  self._x.type = EXTEND_X
110  self._x.columns = cms.vstring(cname)
111  elif mode == "EXTEND_Y":
112  self._y.type = EXTEND_Y
113  self._y.columns = cms.vstring(cname)
114  else:
115  raise Exception("Only EXTEND_X or EXTEND_Y allowed here, not " + mode)
116 
117  # remove the column in the FIRST groupBy, we always re-extract in step1.
118  c = list(cname)[0]
119  for s in self.spec:
120  if s.stage == FIRST and s.type == GROUPBY and c in s.columns:
121  s.columns.remove(c)
122  if c in self._activeColumns:
123  self._activeColumns.remove(c)
124  if c in self._lastColumns:
125  self._lastColumns.remove(c)
126 
127  return self # done here, no new step to add
128 
129  if self._state == STAGE2:
130  cname = cnames
131  if self._activeColumns.issubset(cname):
132  raise Exception("Harvesting GROUPBY must drop some columns")
133  if mode == "EXTEND_X":
134  t = EXTEND_X
135  elif mode == "SUM":
136  t = GROUPBY
137  else:
138  raise Exception("Currently only EXTEND_X and SUM supported in harvesting, not " + mode)
139 
140  self._activeColumns = set(cnames)
141  self._lastColumns = cnames
142  self._lastMode = mode
143 
144  self.spec.append(cms.PSet(
145  type = t,
146  stage = self._state,
147  columns = cms.vstring(cname),
148  arg = cms.string(mode),
149  nbins = cms.int32(-1), xmin = cms.int32(0), xmax = cms.int32(0)
150  ))
151 
152  # In the very beginning emit standard column assignments, they will be
153  # changed later (above and in save()) to reflect the EXTENDS given above.
154  if newstate == STAGE1 and self._state == FIRST:
155  self._x = cms.PSet(
156  type = USE_X, stage = STAGE1,
157  columns = cms.vstring(),
158  arg = cms.string(""),
159  nbins = cms.int32(-1), xmin = cms.int32(0), xmax = cms.int32(0)
160  )
161  self.spec.append(self._x)
162  self._y = cms.PSet(
163  type = USE_Y, stage = STAGE1,
164  columns = cms.vstring(),
165  arg = cms.string(""),
166  nbins = cms.int32(-1), xmin = cms.int32(0), xmax = cms.int32(0)
167  )
168  self.spec.append(self._y)
169  self._z = cms.PSet(
170  type = USE_Z, stage = STAGE1,
171  columns = cms.vstring(),
172  arg = cms.string(""),
173  nbins = cms.int32(-1), xmin = cms.int32(0), xmax = cms.int32(0)
174  )
175  self.spec.append(self._z)
176 
177  self._state = newstate
178 
179  return self
180 
181  def reduce(self, sort):
182  # reduce can be MEAN or COUNT. in STAGE2, just pass through.
183  # in STAGE1, MEAN (anywhere) means make a PROFILE
184  # COUNT can mean per-event counting or a occupancy plot, which is acheived
185  # by ignoring the values passed to fill() (like dimensions=0, TODO).
186  if self._state == FIRST:
187  if sort != "COUNT":
188  raise Exception("First statement must be groupBy.")
189  self.spec[0].type = COUNT # this is actually a noop
190  # groupBy already saw the "Event" column and set up counting.
191 
192  return self
193 
194  if self._state == STAGE1:
195  if sort == "MEAN":
196  self.spec.append(cms.PSet(
197  type = PROFILE, stage = STAGE1,
198  columns = cms.vstring(), arg = cms.string(""),
199  nbins = cms.int32(-1), xmin = cms.int32(0), xmax = cms.int32(0)
200  ))
201  return self
202 
203  if sort != "MEAN":
204  raise Exception("Harvesting allows only reduce(MEAN) at the moment, not " + sort)
205 
206  self.spec.append(cms.PSet(
207  type = REDUCE,
208  stage = self._state,
209  columns = cms.vstring(),
210  arg = cms.string(sort),
211  nbins = cms.int32(-1), xmin = cms.int32(0), xmax = cms.int32(0)
212  ))
213  return self
214 
215  def save(self, nbins=-1, xmin=0, xmax=0):
216  if self._state == FIRST:
217  raise Exception("First statement must be groupBy.")
218 
219  if self._state == STAGE1:
220  # end of STAGE1, fix the parameter assignments
221  n = 1
222  if self._x.type == USE_X:
223  self._x.arg = cms.string(str(n))
224  n = n+1
225  self._x.nbins = cms.int32(nbins)
226  self._x.xmin = cms.int32(xmin)
227  self._x.xmax = cms.int32(xmax)
228  if self._y.type == USE_Y:
229  self._y.arg = cms.string(str(n))
230  n = n+1
231  self._y.nbins = cms.int32(nbins)
232  self._y.xmin = cms.int32(xmin)
233  self._y.xmax = cms.int32(xmax)
234  if self._z.type == USE_Z:
235  self._z.arg = cms.string(str(n))
236  n = n+1
237  self._z.nbins = cms.int32(nbins)
238  self._z.xmin = cms.int32(xmin)
239  self._z.xmax = cms.int32(xmax)
240  # we don't know how many parameters the user wants to pass here, but the
241  # HistogramManager knows. So we just add 3.
242 
243  # SAVE is implicit in step1 and ignored in harvesting, so not really needed.
244  # self.spec.append(cms.PSet(
245  # type = SAVE,
246  # stage = self._state,
247  # columns = cms.vstring(),
248  # arg = cms.string(""),
249  # ))
250  self._state = STAGE2
251 
252  return self
253 
254  def saveAll(self):
255  # call groupBy() and save() until all colums are consumed.
256  self.save()
257  columns = self._lastColumns
258  for i in range(len(columns)-1, 0, -1):
259  cols = columns[0:i]
260  self.groupBy("/".join(cols), self._lastMode)
261  self.save()
262  return self
263 
264  # this is used for serialization, and for that this is just a PSet.
265  def pythonTypeName(self):
266  return 'cms.PSet';
static std::string join(char **cmd)
Definition: RemoteFile.cc:19
def save(self, nbins=-1, xmin=0, xmax=0)
def remove(d, key, TELL=False)
Definition: MatrixUtil.py:223
#define str(s)