CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
errorMatrix2Lands.py
Go to the documentation of this file.
1 import re
2 from sys import argv
3 from optparse import OptionParser
4 parser = OptionParser()
5 parser.add_option("-s", "--stat", dest="stat", default=False, action="store_true")
6 parser.add_option("-S", "--signal", dest="signal", default=False, action="store_true")
7 parser.add_option("-a", "--asimov", dest="asimov", default=False, action="store_true")
8 parser.add_option("-o", "--optimize", dest="optimize", default=False, action="store_true")
9 parser.add_option("-l", "--label", dest="label", type="string", default="hww")
10 parser.add_option("-4", "--4th-gen", dest="sm4", default=False, action="store_true")
11 (options, args) = parser.parse_args()
12 
13 if len(args) < 1: raise RuntimeError, "Usage: errorMatrix2Lands.py [options] errorMatrix.txt "
14 
15 file = open(args[0], "r")
16 
17 data = {}
18 
19 header = file.readline() # skip header
20 processnames = header.split()[2:]
21 # read yields
22 for l in file:
23  l = l.replace("---","0")
24  m = re.match(r"(\d+) Yield\s+((\d+\.?\d*(E[+\-]\d+)?\s+)+)", l)
25  if not m: break
26  mh = m.group(1)
27  yields = [float(x) for x in m.group(2).split()];
28  if len(yields) != len(processnames)+1: raise RuntimeError, "Length of yields does not match with process names"
29  data[mh] = { 'obs':yields[0], 'exp':yields[1:], 'processnames':processnames[:], 'nuis':[] }
30 
31 # read nuisances
32 if not options.stat:
33  for l in file:
34  l = l.replace("---","0")
35  m = re.match(r"(.*?)\s+0\s+((\d+\.?\d*(E[+\-]\d+)?\s+)+)", l)
36  if m == None: raise ValueError, "Missing line "+l
37  sysname = m.group(1)
38  syseff = [float(x) for x in m.group(2).split()]
39  # decide which selections are affected
40  mhs = data.keys()
41  if re.match(r"\d{3}\s+.*?", sysname) and data.has_key(sysname[0:3]):
42  mhs = [ sysname[0:3] ]
43  for mh in mhs:
44  if len(data[mh]['exp']) != len(syseff): raise RuntimeError, "Sysline %s: len(syseff) = %d, len(exp) = %d\n" % (l,len(syseff),len(data[mh]['exp']))
45  # special case: the stats line have to be expanded in N independent systematics
46  if sysname != mh+" Stats":
47  data[mh]['nuis'].append(syseff)
48  else:
49  nproc = len(syseff)-(1 if options.sm4 else 0)
50  data[mh]['nuis'].append(syseff[0:(2 if options.sm4 else 1)] + [0] * (nproc-1))
51  for i in range(1,nproc):
52  data[mh]['nuis'].append([(x if j == i+(1 if options.sm4 else 0) else 0) for j,x in enumerate(syseff)])
53 
54 if options.optimize:
55  for mh in data.keys():
56  # step 1: strip processes with no rate
57  data[mh]['nuis'] = [ [n[i] for i,k in enumerate(data[mh]['exp']) if k > 0] for n in data[mh]['nuis'] ]
58  data[mh]['processnames'] = [ data[mh]['processnames'][i] for i,k in enumerate(data[mh]['exp']) if k > 0 ]
59  data[mh]['exp'] = [ k for k in data[mh]['exp'] if k > 0 ]
60  # step 2: strip nuisances with no non-zero value
61  data[mh]['nuis'] = [ n for n in data[mh]['nuis'] if sum(n) > 0 ]
62 
63 if options.asimov:
64  for mh in data.keys():
65  data[mh]['obs'] = sum(data[mh]['exp'][(2 if options.sm4 else 1):])
66  if options.signal: data[mh]['obs'] += data[mh]['exp'][0]
67 
68 print "Generating datacards: "
69 models = [ 'SM', '4G' ] if options.sm4 else [ 'SM' ]
70 for (isig,name) in enumerate(models):
71  for mh,D in data.items():
72  # prepare variables
73  nproc = len(D['exp'])-(1 if options.sm4 else 0) # there's 1 more column, as it has both SM and 4G
74  indices = [isig] + range(len(models),nproc+(1 if options.sm4 else 0))
75  # open file
76  filename = "%s-%s-mH%s.txt" % (options.label,name,mh)
77  fout = open(filename, "w")
78  if fout == None: raise RuntimeError, "Cannot open %s for writing" % filename
79  print " - "+filename
80  # write datacard
81  fout.write( "%s limit (%s), mH = %s GeV\n" % (name,options.label,mh) )
82  fout.write( "date 2010.11.30\n" )
83  fout.write( "\n" )
84  fout.write( "imax %d number of channels\n" % 1 )
85  fout.write( "jmax %d number of background\n" % (nproc-1) )
86  fout.write( "kmax %d number of nuisance parameters\n" % len(D['nuis']) )
87  fout.write( "Observation %f\n" % D['obs'] )
88  fout.write( "bin " + ("1 "*nproc) + "\n" )
89  fout.write( "process " + (" ".join([D['processnames'][i] for i in indices])) + "\n" )
90  fout.write( "process " + (" ".join([str(i) for i in range(nproc)])) + "\n")
91  fout.write( "rate " + str(D['exp'][isig])+ " " + (" ".join([str(f) for f in D['exp'][(2 if options.sm4 else 1):]])) + "\n")
92  for (inuis,N) in enumerate(D['nuis']):
93  fout.write( str(inuis+1) + " lnN " + (" ".join([str(1.0+N[i]) for i in indices])) + "\n")
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
double split
Definition: MVATrainer.cc:139