00001 import re
00002 from sys import argv
00003 from math import *
00004 from optparse import OptionParser
00005 parser = OptionParser()
00006 parser.add_option("-s", "--stat", dest="stat", default=False, action="store_true")
00007 parser.add_option("-S", "--signal", dest="signal", default=False, action="store_true")
00008 parser.add_option("-a", "--asimov", dest="asimov", default=False, action="store_true")
00009 parser.add_option("-F", "--floor", dest="floor", default=False, action="store_true")
00010 parser.add_option("-o", "--optimize", dest="optimize", default=False, action="store_true", help="Remove processes or systematics that don't contribute")
00011 parser.add_option("-g", "--gamma", dest="gamma", default=False, action="store_true", help="Use gammas for statistical uncertainties")
00012 parser.add_option("--gmM", dest="gmM", type="string", action="append", default=[], help="Use multiplicative gamma for this systematics (can use multiple times)")
00013 parser.add_option("-l", "--label", dest="label", type="string", default="hww")
00014 parser.add_option("-L", "--label-syst", dest="labelSyst", default=False, action="store_true", help="Give names to systematics")
00015 parser.add_option("-4", "--4th-gen", dest="sm4", default=False, action="store_true")
00016 parser.add_option("-n", "--nch", dest="nch", type="int", default=1)
00017 parser.add_option("-B", "--b-fluct", dest="bfluct", type="float", default=0)
00018 (options, args) = parser.parse_args()
00019
00020 if len(args) < 1: raise RuntimeError, "Usage: errorMatrix2Lands.py [options] errorMatrix.txt "
00021
00022 file = open(args[0], "r")
00023
00024 data = {}
00025
00026 def iddify(x):
00027 id = x
00028 slang = { "[Cc]ross[ -][Ss]ection":"XS",
00029 "[Tt]rigger(\s+[Ee]ff(s|ic(ienc(y|ies))?))?":"Trig",
00030 "([Ii]ntegrated\s+)?[Ll]umi(nosity)":"Lumi",
00031 "[Ee]ff(s|ic(ienc(y|ies))?)":"Eff",
00032 "[Nn]orm(alization)":"Norm",
00033 "[Rr]eco(nstruction)":"Reco",
00034 "[Ss]stat(istics?)":"Stat"}
00035 for s,r in slang.items(): id = re.sub(s,r,id)
00036 return re.sub("[^A-Za-z0-9_]", "", re.sub("\s+|-|/","_", re.sub("^\s+|\s+$","",id)))
00037
00038 header = file.readline()
00039 processnames = header.split()[2:]
00040 nproc = len(processnames)
00041 if (options.nch > 1):
00042 processnames = header.split()[1:]
00043 nproc = len(processnames)/options.nch-1
00044 processnames = processnames[1:(nproc+1)]
00045 print processnames
00046
00047 for l in file:
00048 l = re.sub("--+","0",l)
00049 m = re.match(r"(\d+) Yield\s+((\d+\.?\d*(E[+\-]\d+)?\s+)+)", l)
00050 if not m: break
00051 mh = m.group(1)
00052 yields = [float(x) for x in m.group(2).split()];
00053 line = []
00054 if len(yields) != (options.nch * (nproc+1)):
00055 raise RuntimeError, "len(yields) = %d != options.nch * (nproc + 1) = %d * (%d+1)" % (len(yields),options.nch,nproc)
00056 data[mh] = { 'nch':0, 'obs':[], 'exp':[], 'processnames':[], 'nuis':[], 'nuisname':[]}
00057 for i in range(options.nch):
00058 start = i*(nproc+1); end = start + nproc + 1
00059 data[mh]['nch'] += 1
00060 data[mh]['obs'] += [yields[start]]
00061 data[mh]['exp'] += yields[start+1:end]
00062 data[mh]['processnames'] += processnames[:]
00063
00064
00065 if not options.stat:
00066 for l in file:
00067 if l.rstrip().rstrip() == "": continue
00068 l = re.sub("--+","0",l)
00069 m = re.match(r"(.*?)\s+(0\s+(\d+\.?\d*(E[+\-]\d+)?\s+)+)", l)
00070 if m == None: raise ValueError, "Missing line "+l
00071 sysname = m.group(1)
00072 syseff = [float(x) for x in m.group(2).split()]
00073 if len(syseff) != (options.nch * (nproc+1)):
00074 raise RuntimeError, "len(syseff) = %d != options.nch * (nproc + 1) = %d * (%d+1)" % (len(syseff),options.nch,nproc)
00075
00076 mhs = data.keys()
00077 if re.match(r"\d{3}\s+.*?", sysname) and data.has_key(sysname[0:3]):
00078 mhs = [ sysname[0:3] ]
00079 for mh in mhs:
00080 nuisline = []
00081 for i in range(options.nch):
00082 start = i*(nproc+1); end = start + nproc + 1
00083 nuisline += syseff[start+1:end]
00084
00085 if sysname != mh+" Stats":
00086 data[mh]['nuis'].append(nuisline)
00087 data[mh]['nuisname'].append(sysname)
00088 else:
00089 for k in range(0, nproc * options.nch):
00090 if nuisline[k] < 0.005: continue
00091 data[mh]['nuis'].append([(x if j == k else 0) for j,x in enumerate(nuisline)])
00092 data[mh]['nuisname'].append('Statistics in MC or control sample')
00093
00094 if options.asimov:
00095 for mh in data.keys():
00096 for i in range(options.nch):
00097 start = i*(nproc); end = start + nproc
00098 data[mh]['obs'][i] = sum(data[mh]['exp'][start+1:end])
00099 if options.bfluct != 0:
00100 data[mh]['obs'][i] = max(0, data[mh]['obs'][i] + options.bfluct * sqrt(data[mh]['obs'][i]))
00101 if options.signal: data[mh]['obs'][i] += data[mh]['exp'][start]
00102 if options.floor: data[mh]['obs'][i] = floor(data[mh]['obs'][i])
00103
00104
00105 if options.optimize:
00106 for mh in data.keys():
00107
00108 hasrate = [0 for p in range(nproc)]
00109 for i in range(options.nch):
00110 for p in range(nproc):
00111 if data[mh]['exp'][p] != 0: hasrate[p] = 1
00112 for p in range(nproc):
00113 if hasrate[p] == 0: print "For mH = ", mh , " process ", processnames[p], " does not contribute."
00114 data[mh]['nuis'] = [ [x for i,x in enumerate(n) if hasrate[i%nproc]] for n in data[mh]['nuis'] ]
00115 data[mh]['processnames'] = [ x for i,x in enumerate(data[mh]['processnames']) if hasrate[i%nproc] ]
00116 data[mh]['exp'] = [ x for i,x in enumerate(data[mh]['exp']) if hasrate[i%nproc] ]
00117
00118 data[mh]['nuisname'] = [ data[mh]['nuisname'][i] for i,n in enumerate(data[mh]['nuis']) if sum(n) > 0 ]
00119 data[mh]['nuis'] = [ n for i,n in enumerate(data[mh]['nuis']) if sum(n) > 0 ]
00120
00121 print "Generating datacards: "
00122 for mh,D in data.items():
00123
00124
00125 filename = "%s-mH%s.txt" % (options.label,mh)
00126 fout = open(filename, "w")
00127 if fout == None: raise RuntimeError, "Cannot open %s for writing" % filename
00128 print " - "+filename
00129 nproc = len(data[mh]['exp'])/data[mh]['nch']
00130
00131 fout.write( "Limit (%s), mH = %s GeV\n" % (options.label,mh) )
00132 fout.write( "date 2010.11.30\n" )
00133 fout.write( "\n" )
00134 fout.write( "imax %d number of channels\n" % D['nch'] )
00135 fout.write( "jmax %d number of background\n" % (nproc-1) )
00136 fout.write( "kmax %d number of nuisance parameters\n" % len(D['nuis']) )
00137 fout.write( "---------------------------------------------------------------------------------------------------------------\n" );
00138 fout.write( "Observation " + (" ".join( "%7.3f" % X for X in D['obs'])) + "\n" )
00139 fout.write( "---------------------------------------------------------------------------------------------------------------\n" );
00140 pad = " "*27 if options.labelSyst else "";
00141 fout.write( "bin " + pad + (" ".join("%6d" % (X/nproc+1) for X in range(nproc*options.nch)) ) + "\n" )
00142 fout.write( "process " + pad + (" ".join("%6.6s" % X for X in D['processnames'])) + "\n" )
00143 fout.write( "process " + pad + (" ".join("%6d" % (i%nproc) for i in range(nproc*options.nch))) + "\n")
00144 fout.write( "rate " + pad + (" ".join("%6.2f" % f for f in D['exp'])) + "\n")
00145 fout.write( "---------------------------------------------------------------------------------------------------------------\n" );
00146 used_syst_labels = {}
00147 for (inuis,N) in enumerate(D['nuis']):
00148 name = "%-3d" % (inuis+1); id = name
00149 isStat = (D['nuisname'][inuis] == "Statistics in MC or control sample")
00150 if options.labelSyst:
00151 id = iddify(D['nuisname'][inuis])
00152 if isStat: id = "Stat%d" % inuis
00153 if id in used_syst_labels: id += "_%d" % inuis
00154 used_syst_labels[id] = True
00155 name = "%-30s" % id
00156 if options.gamma and isStat:
00157
00158 name = "%-22s" % id
00159
00160 sumN = sum([1.0/(X*X) for X in N if X > 0]);
00161 nN = sum([1 for X in N if X > 0]);
00162 realN = int(floor(sumN/nN+1.5)) if nN else 1
00163 alphas = [ (D['exp'][i]/(realN) if X else 0) for i,X in enumerate(N) ]
00164 fout.write( name+" gmN %7d " % realN + (" ".join([("%7.5f" % X if X else " 0 ") for X in alphas])) + " " + D['nuisname'][inuis] + "\n")
00165 else:
00166 if id in options.gmM:
00167 fout.write( name+" gmM " + (" ".join(["%6.3f" % (0.0+X) for X in N])) + " " + D['nuisname'][inuis] + "\n")
00168 else:
00169 fout.write( name+" lnN " + (" ".join(["%6.3f" % (1.0+X) for X in N])) + " " + D['nuisname'][inuis] + "\n")