CMS 3D CMS Logo

/data/refman/pasoursint/CMSSW_5_3_10/src/HiggsAnalysis/CombinedLimit/data/hww/errorMatrix2Lands_multiChannel.py

Go to the documentation of this file.
00001 import re
00002 from sys import argv
00003 from math import *
00004 from optparse import OptionParser
00005 parser = OptionParser()
00006 parser.add_option("-s", "--stat",     dest="stat",     default=False, action="store_true")
00007 parser.add_option("-S", "--signal",   dest="signal",   default=False, action="store_true")
00008 parser.add_option("-a", "--asimov",   dest="asimov",   default=False, action="store_true")
00009 parser.add_option("-F", "--floor",    dest="floor",    default=False, action="store_true")
00010 parser.add_option("-o", "--optimize", dest="optimize", default=False, action="store_true", help="Remove processes or systematics that don't contribute")
00011 parser.add_option("-g", "--gamma", dest="gamma", default=False, action="store_true", help="Use gammas for statistical uncertainties")
00012 parser.add_option("--gmM",   dest="gmM",  type="string", action="append", default=[], help="Use multiplicative gamma for this systematics (can use multiple times)")
00013 parser.add_option("-l", "--label",    dest="label",    type="string", default="hww")
00014 parser.add_option("-L", "--label-syst",    dest="labelSyst", default=False, action="store_true", help="Give names to systematics")
00015 parser.add_option("-4", "--4th-gen",  dest="sm4",      default=False, action="store_true")
00016 parser.add_option("-n", "--nch",      dest="nch",      type="int",    default=1)
00017 parser.add_option("-B", "--b-fluct",  dest="bfluct",   type="float",  default=0)
00018 (options, args) = parser.parse_args()
00019 
00020 if len(args) < 1: raise RuntimeError, "Usage: errorMatrix2Lands.py [options] errorMatrix.txt "
00021 
00022 file = open(args[0], "r")
00023 
00024 data = {} 
00025 
00026 def iddify(x):
00027     id = x
00028     slang = { "[Cc]ross[ -][Ss]ection":"XS", 
00029               "[Tt]rigger(\s+[Ee]ff(s|ic(ienc(y|ies))?))?":"Trig",
00030               "([Ii]ntegrated\s+)?[Ll]umi(nosity)":"Lumi",
00031               "[Ee]ff(s|ic(ienc(y|ies))?)":"Eff",
00032               "[Nn]orm(alization)":"Norm",
00033               "[Rr]eco(nstruction)":"Reco",
00034               "[Ss]stat(istics?)":"Stat"}
00035     for s,r in slang.items(): id = re.sub(s,r,id)
00036     return re.sub("[^A-Za-z0-9_]", "", re.sub("\s+|-|/","_", re.sub("^\s+|\s+$","",id)))
00037      
00038 header = file.readline() # skip header
00039 processnames = header.split()[2:]
00040 nproc = len(processnames)
00041 if (options.nch > 1):
00042     processnames = header.split()[1:]
00043     nproc = len(processnames)/options.nch-1
00044     processnames = processnames[1:(nproc+1)]
00045     print processnames
00046 # read yields
00047 for l in file:
00048     l = re.sub("--+","0",l)
00049     m = re.match(r"(\d+) Yield\s+((\d+\.?\d*(E[+\-]\d+)?\s+)+)", l)
00050     if not m: break
00051     mh = m.group(1)
00052     yields = [float(x) for x in m.group(2).split()];
00053     line = []
00054     if len(yields) != (options.nch * (nproc+1)): 
00055         raise RuntimeError, "len(yields) = %d != options.nch * (nproc + 1) = %d * (%d+1)" % (len(yields),options.nch,nproc)
00056     data[mh] = { 'nch':0, 'obs':[], 'exp':[], 'processnames':[], 'nuis':[], 'nuisname':[]}
00057     for i in range(options.nch):
00058         start = i*(nproc+1); end = start + nproc + 1
00059         data[mh]['nch'] += 1
00060         data[mh]['obs'] += [yields[start]]
00061         data[mh]['exp'] +=  yields[start+1:end]
00062         data[mh]['processnames'] += processnames[:]
00063 
00064 # read nuisances
00065 if not options.stat:
00066     for l in file:
00067         if l.rstrip().rstrip() == "": continue
00068         l = re.sub("--+","0",l)
00069         m = re.match(r"(.*?)\s+(0\s+(\d+\.?\d*(E[+\-]\d+)?\s+)+)", l)
00070         if m == None: raise ValueError, "Missing line "+l
00071         sysname = m.group(1)
00072         syseff  = [float(x) for x in m.group(2).split()]
00073         if len(syseff) != (options.nch * (nproc+1)): 
00074             raise RuntimeError, "len(syseff) = %d != options.nch * (nproc + 1) = %d * (%d+1)" % (len(syseff),options.nch,nproc)
00075         # decide which selections are affected
00076         mhs = data.keys()
00077         if re.match(r"\d{3}\s+.*?", sysname) and data.has_key(sysname[0:3]):
00078             mhs = [ sysname[0:3] ]
00079         for mh in mhs:
00080             nuisline = []
00081             for i in range(options.nch):
00082                 start = i*(nproc+1); end = start + nproc + 1
00083                 nuisline += syseff[start+1:end] 
00084             # special case: the stats line have to be expanded in N independent systematics
00085             if sysname != mh+" Stats":
00086                 data[mh]['nuis'].append(nuisline)
00087                 data[mh]['nuisname'].append(sysname)
00088             else:   
00089                 for k in range(0, nproc * options.nch):
00090                     if nuisline[k] < 0.005: continue
00091                     data[mh]['nuis'].append([(x if j == k else 0) for j,x in enumerate(nuisline)])
00092                     data[mh]['nuisname'].append('Statistics in MC or control sample')
00093 
00094 if options.asimov:
00095     for mh in data.keys():
00096         for i in range(options.nch):
00097             start = i*(nproc); end = start + nproc
00098             data[mh]['obs'][i] = sum(data[mh]['exp'][start+1:end])
00099             if options.bfluct != 0:
00100                 data[mh]['obs'][i] = max(0, data[mh]['obs'][i] + options.bfluct * sqrt(data[mh]['obs'][i]))
00101             if options.signal: data[mh]['obs'][i] += data[mh]['exp'][start]
00102             if options.floor:  data[mh]['obs'][i] = floor(data[mh]['obs'][i])
00103     
00104 
00105 if options.optimize:
00106     for mh in data.keys():
00107         # step 1: strip processes with no rate
00108         hasrate = [0 for p in range(nproc)]
00109         for i in range(options.nch): 
00110             for p in range(nproc):  
00111                 if data[mh]['exp'][p] != 0: hasrate[p] = 1
00112         for p in range(nproc):
00113             if hasrate[p] == 0: print "For mH = ", mh , " process ", processnames[p], " does not contribute."
00114         data[mh]['nuis']         = [ [x for i,x in enumerate(n) if hasrate[i%nproc]] for n in data[mh]['nuis'] ]
00115         data[mh]['processnames'] = [  x for i,x in enumerate(data[mh]['processnames']) if hasrate[i%nproc] ]
00116         data[mh]['exp']          = [  x for i,x in enumerate(data[mh]['exp'])          if hasrate[i%nproc] ]
00117         # step 2: strip nuisances with no non-zero value
00118         data[mh]['nuisname'] = [ data[mh]['nuisname'][i] for i,n in enumerate(data[mh]['nuis']) if sum(n) > 0 ]
00119         data[mh]['nuis']     = [ n                       for i,n in enumerate(data[mh]['nuis']) if sum(n) > 0 ]
00120         
00121 print "Generating datacards: " 
00122 for mh,D in  data.items():
00123         # prepare variables
00124         # open file
00125         filename = "%s-mH%s.txt" % (options.label,mh)
00126         fout = open(filename, "w")
00127         if fout == None: raise RuntimeError, "Cannot open %s for writing" % filename
00128         print " - "+filename
00129         nproc = len(data[mh]['exp'])/data[mh]['nch']
00130         # write datacard
00131         fout.write( "Limit (%s), mH = %s GeV\n" % (options.label,mh) )
00132         fout.write( "date 2010.11.30\n" )
00133         fout.write( "\n" )
00134         fout.write( "imax %d number of channels\n"            % D['nch'] )
00135         fout.write( "jmax %d number of background\n"          % (nproc-1) )
00136         fout.write( "kmax %d number of nuisance parameters\n" % len(D['nuis']) )
00137         fout.write( "---------------------------------------------------------------------------------------------------------------\n" );
00138         fout.write( "Observation " + (" ".join( "%7.3f" % X for X in D['obs'])) + "\n" )
00139         fout.write( "---------------------------------------------------------------------------------------------------------------\n" );
00140         pad = " "*27 if options.labelSyst else "";
00141         fout.write( "bin      " + pad + ("  ".join("%6d" % (X/nproc+1) for X in range(nproc*options.nch)) ) + "\n" )
00142         fout.write( "process  " + pad + ("  ".join("%6.6s" % X for X in D['processnames'])) + "\n" )
00143         fout.write( "process  " + pad + ("  ".join("%6d" % (i%nproc) for i in range(nproc*options.nch))) + "\n")
00144         fout.write( "rate     " + pad + ("  ".join("%6.2f" % f for f in D['exp'])) + "\n")
00145         fout.write( "---------------------------------------------------------------------------------------------------------------\n" );
00146         used_syst_labels = {}
00147         for (inuis,N) in enumerate(D['nuis']):
00148             name = "%-3d"  % (inuis+1); id = name
00149             isStat = (D['nuisname'][inuis] == "Statistics in MC or control sample")
00150             if options.labelSyst: 
00151                 id = iddify(D['nuisname'][inuis])
00152                 if isStat: id = "Stat%d" % inuis 
00153                 if id in used_syst_labels: id += "_%d" % inuis
00154                 used_syst_labels[id] = True
00155                 name = "%-30s" % id
00156             if options.gamma and isStat:
00157                 # make some room for label
00158                 name = "%-22s" % id
00159                 # have to guess N
00160                 sumN = sum([1.0/(X*X) for X in N if X > 0]); ## people filling error matrix uses the naive 1/sqrt(N) errors
00161                 nN   = sum([1         for X in N if X > 0]);
00162                 realN = int(floor(sumN/nN+1.5)) if nN else 1
00163                 alphas = [ (D['exp'][i]/(realN) if X else 0) for i,X in enumerate(N) ]
00164                 fout.write( name+"  gmN %7d " % realN  + (" ".join([("%7.5f" % X if X else "   0   ") for X in alphas])) + "     " + D['nuisname'][inuis] +  "\n")
00165             else:
00166                 if id in options.gmM:
00167                     fout.write( name+"  gmM "  + ("  ".join(["%6.3f" % (0.0+X) for X in N])) + "     " + D['nuisname'][inuis] +  "\n")
00168                 else:
00169                     fout.write( name+"  lnN "  + ("  ".join(["%6.3f" % (1.0+X) for X in N])) + "     " + D['nuisname'][inuis] +  "\n")