CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
errorMatrix2Lands_multiChannel.py
Go to the documentation of this file.
1 import re
2 from sys import argv
3 from math import *
4 from optparse import OptionParser
5 parser = OptionParser()
6 parser.add_option("-s", "--stat", dest="stat", default=False, action="store_true")
7 parser.add_option("-S", "--signal", dest="signal", default=False, action="store_true")
8 parser.add_option("-a", "--asimov", dest="asimov", default=False, action="store_true")
9 parser.add_option("-F", "--floor", dest="floor", default=False, action="store_true")
10 parser.add_option("-o", "--optimize", dest="optimize", default=False, action="store_true", help="Remove processes or systematics that don't contribute")
11 parser.add_option("-g", "--gamma", dest="gamma", default=False, action="store_true", help="Use gammas for statistical uncertainties")
12 parser.add_option("--gmM", dest="gmM", type="string", action="append", default=[], help="Use multiplicative gamma for this systematics (can use multiple times)")
13 parser.add_option("-l", "--label", dest="label", type="string", default="hww")
14 parser.add_option("-L", "--label-syst", dest="labelSyst", default=False, action="store_true", help="Give names to systematics")
15 parser.add_option("-4", "--4th-gen", dest="sm4", default=False, action="store_true")
16 parser.add_option("-n", "--nch", dest="nch", type="int", default=1)
17 parser.add_option("-B", "--b-fluct", dest="bfluct", type="float", default=0)
18 (options, args) = parser.parse_args()
19 
20 if len(args) < 1: raise RuntimeError, "Usage: errorMatrix2Lands.py [options] errorMatrix.txt "
21 
22 file = open(args[0], "r")
23 
24 data = {}
25 
26 def iddify(x):
27  id = x
28  slang = { "[Cc]ross[ -][Ss]ection":"XS",
29  "[Tt]rigger(\s+[Ee]ff(s|ic(ienc(y|ies))?))?":"Trig",
30  "([Ii]ntegrated\s+)?[Ll]umi(nosity)":"Lumi",
31  "[Ee]ff(s|ic(ienc(y|ies))?)":"Eff",
32  "[Nn]orm(alization)":"Norm",
33  "[Rr]eco(nstruction)":"Reco",
34  "[Ss]stat(istics?)":"Stat"}
35  for s,r in slang.items(): id = re.sub(s,r,id)
36  return re.sub("[^A-Za-z0-9_]", "", re.sub("\s+|-|/","_", re.sub("^\s+|\s+$","",id)))
37 
38 header = file.readline() # skip header
39 processnames = header.split()[2:]
40 nproc = len(processnames)
41 if (options.nch > 1):
42  processnames = header.split()[1:]
43  nproc = len(processnames)/options.nch-1
44  processnames = processnames[1:(nproc+1)]
45  print processnames
46 # read yields
47 for l in file:
48  l = re.sub("--+","0",l)
49  m = re.match(r"(\d+) Yield\s+((\d+\.?\d*(E[+\-]\d+)?\s+)+)", l)
50  if not m: break
51  mh = m.group(1)
52  yields = [float(x) for x in m.group(2).split()];
53  line = []
54  if len(yields) != (options.nch * (nproc+1)):
55  raise RuntimeError, "len(yields) = %d != options.nch * (nproc + 1) = %d * (%d+1)" % (len(yields),options.nch,nproc)
56  data[mh] = { 'nch':0, 'obs':[], 'exp':[], 'processnames':[], 'nuis':[], 'nuisname':[]}
57  for i in range(options.nch):
58  start = i*(nproc+1); end = start + nproc + 1
59  data[mh]['nch'] += 1
60  data[mh]['obs'] += [yields[start]]
61  data[mh]['exp'] += yields[start+1:end]
62  data[mh]['processnames'] += processnames[:]
63 
64 # read nuisances
65 if not options.stat:
66  for l in file:
67  if l.rstrip().rstrip() == "": continue
68  l = re.sub("--+","0",l)
69  m = re.match(r"(.*?)\s+(0\s+(\d+\.?\d*(E[+\-]\d+)?\s+)+)", l)
70  if m == None: raise ValueError, "Missing line "+l
71  sysname = m.group(1)
72  syseff = [float(x) for x in m.group(2).split()]
73  if len(syseff) != (options.nch * (nproc+1)):
74  raise RuntimeError, "len(syseff) = %d != options.nch * (nproc + 1) = %d * (%d+1)" % (len(syseff),options.nch,nproc)
75  # decide which selections are affected
76  mhs = data.keys()
77  if re.match(r"\d{3}\s+.*?", sysname) and data.has_key(sysname[0:3]):
78  mhs = [ sysname[0:3] ]
79  for mh in mhs:
80  nuisline = []
81  for i in range(options.nch):
82  start = i*(nproc+1); end = start + nproc + 1
83  nuisline += syseff[start+1:end]
84  # special case: the stats line have to be expanded in N independent systematics
85  if sysname != mh+" Stats":
86  data[mh]['nuis'].append(nuisline)
87  data[mh]['nuisname'].append(sysname)
88  else:
89  for k in range(0, nproc * options.nch):
90  if nuisline[k] < 0.005: continue
91  data[mh]['nuis'].append([(x if j == k else 0) for j,x in enumerate(nuisline)])
92  data[mh]['nuisname'].append('Statistics in MC or control sample')
93 
94 if options.asimov:
95  for mh in data.keys():
96  for i in range(options.nch):
97  start = i*(nproc); end = start + nproc
98  data[mh]['obs'][i] = sum(data[mh]['exp'][start+1:end])
99  if options.bfluct != 0:
100  data[mh]['obs'][i] = max(0, data[mh]['obs'][i] + options.bfluct * sqrt(data[mh]['obs'][i]))
101  if options.signal: data[mh]['obs'][i] += data[mh]['exp'][start]
102  if options.floor: data[mh]['obs'][i] = floor(data[mh]['obs'][i])
103 
104 
105 if options.optimize:
106  for mh in data.keys():
107  # step 1: strip processes with no rate
108  hasrate = [0 for p in range(nproc)]
109  for i in range(options.nch):
110  for p in range(nproc):
111  if data[mh]['exp'][p] != 0: hasrate[p] = 1
112  for p in range(nproc):
113  if hasrate[p] == 0: print "For mH = ", mh , " process ", processnames[p], " does not contribute."
114  data[mh]['nuis'] = [ [x for i,x in enumerate(n) if hasrate[i%nproc]] for n in data[mh]['nuis'] ]
115  data[mh]['processnames'] = [ x for i,x in enumerate(data[mh]['processnames']) if hasrate[i%nproc] ]
116  data[mh]['exp'] = [ x for i,x in enumerate(data[mh]['exp']) if hasrate[i%nproc] ]
117  # step 2: strip nuisances with no non-zero value
118  data[mh]['nuisname'] = [ data[mh]['nuisname'][i] for i,n in enumerate(data[mh]['nuis']) if sum(n) > 0 ]
119  data[mh]['nuis'] = [ n for i,n in enumerate(data[mh]['nuis']) if sum(n) > 0 ]
120 
121 print "Generating datacards: "
122 for mh,D in data.items():
123  # prepare variables
124  # open file
125  filename = "%s-mH%s.txt" % (options.label,mh)
126  fout = open(filename, "w")
127  if fout == None: raise RuntimeError, "Cannot open %s for writing" % filename
128  print " - "+filename
129  nproc = len(data[mh]['exp'])/data[mh]['nch']
130  # write datacard
131  fout.write( "Limit (%s), mH = %s GeV\n" % (options.label,mh) )
132  fout.write( "date 2010.11.30\n" )
133  fout.write( "\n" )
134  fout.write( "imax %d number of channels\n" % D['nch'] )
135  fout.write( "jmax %d number of background\n" % (nproc-1) )
136  fout.write( "kmax %d number of nuisance parameters\n" % len(D['nuis']) )
137  fout.write( "---------------------------------------------------------------------------------------------------------------\n" );
138  fout.write( "Observation " + (" ".join( "%7.3f" % X for X in D['obs'])) + "\n" )
139  fout.write( "---------------------------------------------------------------------------------------------------------------\n" );
140  pad = " "*27 if options.labelSyst else "";
141  fout.write( "bin " + pad + (" ".join("%6d" % (X/nproc+1) for X in range(nproc*options.nch)) ) + "\n" )
142  fout.write( "process " + pad + (" ".join("%6.6s" % X for X in D['processnames'])) + "\n" )
143  fout.write( "process " + pad + (" ".join("%6d" % (i%nproc) for i in range(nproc*options.nch))) + "\n")
144  fout.write( "rate " + pad + (" ".join("%6.2f" % f for f in D['exp'])) + "\n")
145  fout.write( "---------------------------------------------------------------------------------------------------------------\n" );
146  used_syst_labels = {}
147  for (inuis,N) in enumerate(D['nuis']):
148  name = "%-3d" % (inuis+1); id = name
149  isStat = (D['nuisname'][inuis] == "Statistics in MC or control sample")
150  if options.labelSyst:
151  id = iddify(D['nuisname'][inuis])
152  if isStat: id = "Stat%d" % inuis
153  if id in used_syst_labels: id += "_%d" % inuis
154  used_syst_labels[id] = True
155  name = "%-30s" % id
156  if options.gamma and isStat:
157  # make some room for label
158  name = "%-22s" % id
159  # have to guess N
160  sumN = sum([1.0/(X*X) for X in N if X > 0]); ## people filling error matrix uses the naive 1/sqrt(N) errors
161  nN = sum([1 for X in N if X > 0]);
162  realN = int(floor(sumN/nN+1.5)) if nN else 1
163  alphas = [ (D['exp'][i]/(realN) if X else 0) for i,X in enumerate(N) ]
164  fout.write( name+" gmN %7d " % realN + (" ".join([("%7.5f" % X if X else " 0 ") for X in alphas])) + " " + D['nuisname'][inuis] + "\n")
165  else:
166  if id in options.gmM:
167  fout.write( name+" gmM " + (" ".join(["%6.3f" % (0.0+X) for X in N])) + " " + D['nuisname'][inuis] + "\n")
168  else:
169  fout.write( name+" lnN " + (" ".join(["%6.3f" % (1.0+X) for X in N])) + " " + D['nuisname'][inuis] + "\n")
const T & max(const T &a, const T &b)
T sqrt(T t)
Definition: SSEVec.h:46
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
double split
Definition: MVATrainer.cc:139