CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
combineCards.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 import re
3 from sys import argv
4 import os.path
5 from pprint import pprint
6 from optparse import OptionParser
7 parser = OptionParser()
8 parser.add_option("-s", "--stat", dest="stat", default=False, action="store_true", help="Drop all systematics")
9 parser.add_option("-S", "--force-shape", dest="shape", default=False, action="store_true", help="Treat all channels as shape analysis. Useful for mixed combinations")
10 parser.add_option("-a", "--asimov", dest="asimov", default=False, action="store_true", help="Replace observation with asimov dataset. Works only for counting experiments")
11 parser.add_option("-P", "--prefix", type="string", dest="fprefix", default="", help="Prefix this to all file names")
12 parser.add_option("--xc", "--exclude-channel", type="string", dest="channelVetos", default=[], action="append", help="Exclude channels that match this regexp; can specify multiple ones")
13 parser.add_option("--X-no-jmax", dest="noJMax", default=False, action="store_true", help="FOR DEBUG ONLY: Turn off the consistency check between jmax and number of processes.")
14 parser.add_option("--xn-file", "--exclude-nuisances-from-file", type="string", dest="nuisVetoFile", help="Exclude all the nuisances in this file")
15 
16 (options, args) = parser.parse_args()
17 options.bin = True # fake that is a binary output, so that we parse shape lines
18 options.nuisancesToExclude = []
19 options.verbose = 0
20 
21 if options.nuisVetoFile:
22  for line in open(options.nuisVetoFile,"r"):
23  options.nuisancesToExclude.append(re.compile(line.strip()))
24 
26 
27 obsline = []; obskeyline = [] ;
28 keyline = []; expline = []; systlines = {}
29 signals = []; backgrounds = []; shapeLines = []
30 paramSysts = {}; flatParamNuisances = {}
31 cmax = 5 # column width
32 for ich,fname in enumerate(args):
33  label = "ch%d" % (ich+1)
34  if "=" in fname: (label,fname) = fname.split("=")
35  fname = options.fprefix+fname
36  dirname = os.path.dirname(fname)
37  file = open(fname, "r")
38  DC = parseCard(file, options)
39  singlebin = (len(DC.bins) == 1)
40  if label == ".":
41  label=DC.bins[0] if singlebin else "";
42  elif not singlebin:
43  label += "_";
44  for b in DC.bins:
45  bout = label if singlebin else label+b
46  if isVetoed(bout,options.channelVetos): continue
47  obskeyline.append(bout)
48  for (p,e) in DC.exp[b].items(): # so that we get only self.DC.processes contributing to this bin
49  if DC.isSignal[p] == False: continue
50  #print "in DC.exp.items:b,p", b,p
51  expline.append("%.4f" % e)
52  keyline.append((bout, p, DC.isSignal[p]))
53  for (p,e) in DC.exp[b].items(): # so that we get only self.DC.processes contributing to this bin
54  if DC.isSignal[p]: continue
55  #print "in DC.exp.items:b,p", b,p
56  expline.append("%.4f" % e)
57  keyline.append((bout, p, DC.isSignal[p]))
58  # systematics
59  for (lsyst,nofloat,pdf,pdfargs,errline) in DC.systs:
60  systeffect = {}
61  if pdf == "param":
62  if paramSysts.has_key(lsyst):
63  if paramSysts[lsyst] != pdfargs: raise RuntimeError, "Parameter uncerainty %s mismatch between cards." % lsyst
64  else:
65  paramSysts[lsyst] = pdfargs
66  continue
67  for b in DC.bins:
68  bout = label if singlebin else label+b
69  if isVetoed(bout,options.channelVetos): continue
70  if not systeffect.has_key(bout): systeffect[bout] = {}
71  for p in DC.exp[b].keys(): # so that we get only self.DC.processes contributing to this bin
72  r = str(errline[b][p]);
73  if type(errline[b][p]) == list: r = "%.3f/%.3f" % (errline[b][p][0], errline[b][p][1])
74  elif type in ("lnN",'gmM'): r = "%.3f" % errline[b][p]
75  if errline[b][p] == 0: r = "-"
76  if len(r) > cmax: cmax = len(r) # get max col length, as it's more tricky do do it later with a map
77  systeffect[bout][p] = r
78  if systlines.has_key(lsyst):
79  (otherpdf, otherargs, othereffect, othernofloat) = systlines[lsyst]
80  if otherpdf != pdf:
81  if (pdf == "lnN" and otherpdf.startswith("shape")):
82  if systlines[lsyst][0][-1] != '?': systlines[lsyst][0] += '?'
83  for b,v in systeffect.items(): othereffect[b] = v;
84  elif (pdf.startswith("shape") and otherpdf == "lnN"):
85  if pdf[-1] != '?': pdf += '?'
86  systlines[lsyst][0] = pdf
87  for b,v in systeffect.items(): othereffect[b] = v;
88  elif (pdf == otherpdf+"?") or (pdf+"?" == otherpdf):
89  systlines[lsyst][0] = pdf.replace("?","")+"?"
90  for b,v in systeffect.items(): othereffect[b] = v;
91  else:
92  raise RuntimeError, "File %s defines systematic %s as using pdf %s, while a previous file defines it as using %s" % (fname,lsyst,pdf,otherpdf)
93  else:
94  if pdf == "gmN" and int(pdfargs[0]) != int(otherargs[0]):
95  raise RuntimeError, "File %s defines systematic %s as using gamma with %s events in sideband, while a previous file has %s" % (fname,lsyst,pdfargs[0],otherargs[0])
96  for b,v in systeffect.items(): othereffect[b] = v;
97  else:
98  pdfargs = [ str(x) for x in pdfargs ]
99  systlines[lsyst] = [pdf,pdfargs,systeffect,nofloat]
100  # flat params
101  for K in DC.flatParamNuisances.iterkeys():
102  flatParamNuisances[K] = True
103  # put shapes, if available
104  if len(DC.shapeMap):
105  for b in DC.bins:
106  bout = label if singlebin else label+b
107  if isVetoed(bout,options.channelVetos): continue
108  p2sMap = DC.shapeMap[b] if DC.shapeMap.has_key(b) else {}
109  p2sMapD = DC.shapeMap['*'] if DC.shapeMap.has_key('*') else {}
110  for p, x in p2sMap.items():
111  xrep = [xi.replace("$CHANNEL",b) for xi in x]
112  if xrep[0] != 'FAKE' and dirname != '': xrep[0] = dirname+"/"+xrep[0]
113  shapeLines.append((p,bout,xrep))
114  for p, x in p2sMapD.items():
115  if p2sMap.has_key(p): continue
116  xrep = [xi.replace("$CHANNEL",b) for xi in x]
117  if xrep[0] != 'FAKE' and dirname != '': xrep[0] = dirname+"/"+xrep[0]
118  shapeLines.append((p,bout,xrep))
119  elif options.shape:
120  for b in DC.bins:
121  bout = label if singlebin else label+b
122  shapeLines.append(('*',bout,['FAKE']))
123  # combine observations, but remove line if any of the datacards doesn't have it
124  if len(DC.obs) == 0:
125  obsline = None
126  elif obsline != None:
127  for b in DC.bins:
128  bout = label if singlebin else label+b
129  if isVetoed(bout,options.channelVetos): continue
130  obsline += [str(DC.obs[b])];
131 
132 bins = []
133 for (b,p,s) in keyline:
134  if b not in bins: bins.append(b)
135  if s:
136  if p not in signals: signals.append(p)
137  else:
138  if p not in backgrounds: backgrounds.append(p)
139 
140 print "Combination of", ", ".join(args)
141 print "imax %d number of bins" % len(bins)
142 print "jmax %d number of processes minus 1" % (len(signals) + len(backgrounds) - 1)
143 print "kmax %d number of nuisance parameters" % (len(systlines) + len(paramSysts))
144 print "-" * 130
145 
146 if shapeLines:
147  chmax = max([max(len(p),len(c)) for p,c,x in shapeLines]);
148  cfmt = "%-"+str(chmax)+"s ";
149  shapeLines.sort( lambda x,y : cmp(x[0],y[0]) if x[1] == y[1] else cmp(x[1],y[1]) )
150  for (process,channel,stuff) in shapeLines:
151  print "shapes", cfmt % process, cfmt % channel, ' '.join(stuff);
152  print "-" * 130
153 
154 if obsline:
155  cmax = max([cmax]+[len(l) for l in obskeyline]+[len(x) for x in obsline])
156  cfmt = "%-"+str(cmax)+"s";
157  print "bin ", " ".join([cfmt % x for x in obskeyline])
158  print "observation ", " ".join([cfmt % x for x in obsline])
159 
160 print "-" * 130
161 
162 pidline = []; signals = []; backgrounds = []
163 tmpsignals = [];
164 for (b,p,s) in keyline:
165  if s:
166  if p not in tmpsignals: tmpsignals.append(p)
167 for (b,p,s) in keyline:
168  if s:
169  if p not in signals: signals.append(p)
170  pidline.append(signals.index(p)-len(tmpsignals)+1)
171  else:
172  if p not in backgrounds: backgrounds.append(p)
173  pidline.append(1+backgrounds.index(p))
174 cmax = max([cmax]+[max(len(p),len(b)) for p,b,s in keyline]+[len(e) for e in expline])
175 hmax = max([10] + [len("%-12s[nofloat] %s %s" % (l,p,a)) for l,(p,a,e,nf) in systlines.items()])
176 cfmt = "%-"+str(cmax)+"s"; hfmt = "%-"+str(hmax)+"s ";
177 print hfmt % "bin", " ".join([cfmt % p for p,b,s in keyline])
178 print hfmt % "process", " ".join([cfmt % b for p,b,s in keyline])
179 print hfmt % "process", " ".join([cfmt % x for x in pidline])
180 print hfmt % "rate", " ".join([cfmt % x for x in expline])
181 
182 print "-" * 130
183 
184 sysnamesSorted = systlines.keys(); sysnamesSorted.sort()
185 for name in sysnamesSorted:
186  (pdf,pdfargs,effect,nofloat) = systlines[name]
187  if nofloat: name += "[nofloat]"
188  systline = []
189  for b,p,s in keyline:
190  try:
191  systline.append(effect[b][p])
192  except KeyError:
193  systline.append("-");
194  print hfmt % ("%-21s %s %s" % (name, pdf, " ".join(pdfargs))), " ".join([cfmt % x for x in systline])
195 for (pname, pargs) in paramSysts.items():
196  print "%-12s param %s" % (pname, " ".join(pargs))
197 
198 for pname in flatParamNuisances.iterkeys():
199  print "%-12s flatParam" % pname
const T & max(const T &a, const T &b)
static std::string join(char **cmd)
Definition: RemoteFile.cc:18