CMS 3D CMS Logo

/afs/cern.ch/work/a/aaltunda/public/www/CMSSW_5_3_14/src/HiggsAnalysis/CombinedLimit/data/hww/errorMatrix2Lands.py

Go to the documentation of this file.
00001 import re
00002 from sys import argv
00003 from optparse import OptionParser
00004 parser = OptionParser()
00005 parser.add_option("-s", "--stat",     dest="stat",     default=False, action="store_true")
00006 parser.add_option("-S", "--signal",   dest="signal",   default=False, action="store_true")
00007 parser.add_option("-a", "--asimov",   dest="asimov",   default=False, action="store_true")
00008 parser.add_option("-o", "--optimize", dest="optimize", default=False, action="store_true")
00009 parser.add_option("-l", "--label",    dest="label",    type="string", default="hww")
00010 parser.add_option("-4", "--4th-gen",  dest="sm4",      default=False, action="store_true")
00011 (options, args) = parser.parse_args()
00012 
00013 if len(args) < 1: raise RuntimeError, "Usage: errorMatrix2Lands.py [options] errorMatrix.txt "
00014 
00015 file = open(args[0], "r")
00016 
00017 data = {}
00018 
00019 header = file.readline() # skip header
00020 processnames = header.split()[2:]
00021 # read yields
00022 for l in file:
00023     l = l.replace("---","0")
00024     m = re.match(r"(\d+) Yield\s+((\d+\.?\d*(E[+\-]\d+)?\s+)+)", l)
00025     if not m: break
00026     mh = m.group(1)
00027     yields = [float(x) for x in m.group(2).split()];
00028     if len(yields) != len(processnames)+1: raise RuntimeError, "Length of yields does not match with process names"
00029     data[mh] = { 'obs':yields[0], 'exp':yields[1:], 'processnames':processnames[:], 'nuis':[] }
00030 
00031 # read nuisances
00032 if not options.stat:
00033     for l in file:
00034         l = l.replace("---","0")
00035         m = re.match(r"(.*?)\s+0\s+((\d+\.?\d*(E[+\-]\d+)?\s+)+)", l)
00036         if m == None: raise ValueError, "Missing line "+l
00037         sysname = m.group(1)
00038         syseff  = [float(x) for x in m.group(2).split()]
00039         # decide which selections are affected
00040         mhs = data.keys()
00041         if re.match(r"\d{3}\s+.*?", sysname) and data.has_key(sysname[0:3]):
00042             mhs = [ sysname[0:3] ]
00043         for mh in mhs:
00044             if len(data[mh]['exp']) != len(syseff): raise RuntimeError, "Sysline %s: len(syseff) = %d, len(exp) = %d\n" % (l,len(syseff),len(data[mh]['exp']))
00045             # special case: the stats line have to be expanded in N independent systematics
00046             if sysname != mh+" Stats":
00047                 data[mh]['nuis'].append(syseff)
00048             else:   
00049                 nproc = len(syseff)-(1 if options.sm4 else 0)
00050                 data[mh]['nuis'].append(syseff[0:(2 if options.sm4 else 1)] + [0] * (nproc-1))
00051                 for i in range(1,nproc):
00052                     data[mh]['nuis'].append([(x if j == i+(1 if options.sm4 else 0) else 0) for j,x in enumerate(syseff)])
00053 
00054 if options.optimize:
00055     for mh in data.keys():
00056         # step 1: strip processes with no rate
00057         data[mh]['nuis']         = [ [n[i] for i,k in enumerate(data[mh]['exp']) if k > 0] for n in data[mh]['nuis'] ]
00058         data[mh]['processnames'] = [ data[mh]['processnames'][i] for i,k in enumerate(data[mh]['exp']) if k > 0 ]
00059         data[mh]['exp']          = [ k for k in data[mh]['exp'] if k > 0 ]
00060         # step 2: strip nuisances with no non-zero value
00061         data[mh]['nuis'] = [ n for n in data[mh]['nuis'] if sum(n) > 0 ]
00062         
00063 if options.asimov:
00064     for mh in data.keys():
00065         data[mh]['obs'] = sum(data[mh]['exp'][(2 if options.sm4 else 1):])
00066         if options.signal: data[mh]['obs'] += data[mh]['exp'][0]
00067 
00068 print "Generating datacards: " 
00069 models = [ 'SM', '4G' ] if options.sm4 else [ 'SM' ]
00070 for (isig,name) in enumerate(models):
00071     for mh,D in  data.items():
00072         # prepare variables
00073         nproc = len(D['exp'])-(1 if options.sm4 else 0) # there's 1 more column, as it has both SM and 4G
00074         indices = [isig] + range(len(models),nproc+(1 if options.sm4 else 0))
00075         # open file
00076         filename = "%s-%s-mH%s.txt" % (options.label,name,mh)
00077         fout = open(filename, "w")
00078         if fout == None: raise RuntimeError, "Cannot open %s for writing" % filename
00079         print " - "+filename
00080         # write datacard
00081         fout.write( "%s limit (%s), mH = %s GeV\n" % (name,options.label,mh) )
00082         fout.write( "date 2010.11.30\n" )
00083         fout.write( "\n" )
00084         fout.write( "imax %d number of channels\n"           % 1 )
00085         fout.write( "jmax %d number of background\n"          % (nproc-1) )
00086         fout.write( "kmax %d number of nuisance parameters\n" % len(D['nuis']) )
00087         fout.write( "Observation %f\n" % D['obs'] )
00088         fout.write( "bin " + ("1 "*nproc) + "\n" )
00089         fout.write( "process " + (" ".join([D['processnames'][i] for i in indices])) + "\n" )
00090         fout.write( "process " + (" ".join([str(i) for i in range(nproc)])) + "\n")
00091         fout.write( "rate " + str(D['exp'][isig])+ " " + (" ".join([str(f) for f in D['exp'][(2 if options.sm4 else 1):]])) + "\n")
00092         for (inuis,N) in enumerate(D['nuis']):
00093             fout.write( str(inuis+1) + " lnN " + (" ".join([str(1.0+N[i]) for i in indices])) + "\n")