00001
00002 import re, os
00003 from sys import argv, stdout, stderr, exit
00004 from optparse import OptionParser
00005 from math import *
00006
00007
00008 argv.append( '-b-' )
00009 import ROOT
00010 ROOT.gROOT.SetBatch(True)
00011 argv.remove( '-b-' )
00012 parser = OptionParser(usage="usage: %prog [options] [mass1 mass2] mass3 > datacard3.txt \nrun with --help to get list of options")
00013 parser.add_option("--jets", dest="jets", type="int", default=0, help="Jet bin");
00014 parser.add_option("--xsbr", dest="xsbr", action="store_true", default=False, help="Use correct XS*BR for Higgs")
00015
00016
00017 parser.add_option("--log", dest="log", action="store_true", default=False, help="Use log-scale interpolation for yields (default is linear)")
00018 parser.add_option("--ddir", dest="ddir", type="string", default=".", help="Path to the datacards")
00019 parser.add_option("--refmasses", dest="refmasses", type="string", default="hww.masses.txt", help="File containing the reference masses between which to interpolate (relative to --options.ddir)")
00020 parser.add_option("--postfix", dest="postfix", type="string", default="", help="Postfix to add to datacard name")
00021 (options, args) = parser.parse_args()
00022 options.bin = True; options.stat = False
00023 if len(args) not in [1,3]:
00024 parser.print_usage()
00025 exit(1)
00026
00027 from HiggsAnalysis.CombinedLimit.DatacardParser import *
00028
00029 refmasses = [ int(line) for line in open(options.ddir+"/"+options.refmasses,"r") ]
00030
00031 if len(args) == 1:
00032 mass = float(args[0])
00033 mass1 = max([m for m in refmasses if m <= mass])
00034 mass2 = min([m for m in refmasses if m >= mass])
00035 else:
00036 mass1 = int(args[0])
00037 mass2 = int(args[1])
00038 mass = float(args[2])
00039
00040 if mass in refmasses and options.postfix == "": raise RuntimeError, "Will not overwrite the reference masses"
00041
00042
00043 dm1 = abs(mass1 - mass)
00044 dm2 = abs(mass2 - mass)
00045 if (dm2 < dm1) or (dm2 == dm1 and abs(mass1 - 164) < abs(mass2 - 164)):
00046 (mass1, mass2) = (mass2, mass1)
00047
00048
00049 xsbr1 = { 'ggH':1.0, 'qqH':1.0 }
00050 xsbr2 = { 'ggH':1.0, 'qqH':1.0 }
00051 xsbr = { 'ggH':1.0, 'qqH':1.0 }
00052 if options.xsbr:
00053 def file2map(x):
00054 ret = {}; headers = []
00055 for x in open(x,"r"):
00056 cols = x.split()
00057 if len(cols) < 2: continue
00058 if "mH" in x:
00059 headers = [i.strip() for i in cols[1:]]
00060 else:
00061 fields = [ float(i) for i in cols ]
00062 ret[fields[0]] = dict(zip(headers,fields[1:]))
00063 return ret
00064 path = os.environ['CMSSW_BASE']+"/src/HiggsAnalysis/CombinedLimit/data/";
00065 ggXS = file2map(path+"YR-XS-ggH.txt")
00066 qqXS = file2map(path+"YR-XS-vbfH.txt")
00067 br = file2map(path+"YR-BR3.txt")
00068
00069 for M in (450,550):
00070 ggXS[M] = dict([ (key, 0.5*(ggXS[M+10][key] + ggXS[M-10][key])) for key in ggXS[M+10].iterkeys() ])
00071 qqXS[M] = dict([ (key, 0.5*(qqXS[M+10][key] + qqXS[M-10][key])) for key in qqXS[M+10].iterkeys() ])
00072 br[M] = dict([ (key, 0.5*(br[M+10][key] + br[M-10][key])) for key in br[M+10].iterkeys() ])
00073 xsbr1['ggH'] = ggXS[mass1]['XS_pb'] * br[mass1]['H_evmv']
00074 xsbr2['ggH'] = ggXS[mass2]['XS_pb'] * br[mass2]['H_evmv']
00075 xsbr ['ggH'] = ggXS[mass ]['XS_pb'] * br[mass ]['H_evmv']
00076 xsbr1['qqH'] = qqXS[mass1]['XS_pb'] * br[mass1]['H_evmv']
00077 xsbr2['qqH'] = qqXS[mass2]['XS_pb'] * br[mass2]['H_evmv']
00078 xsbr ['qqH'] = qqXS[mass ]['XS_pb'] * br[mass ]['H_evmv']
00079
00080 print "Will interpolate %g from [%d, %d]" % (mass, mass1, mass2)
00081
00082 alpha = abs(mass2 - mass)/abs(mass2 - mass1) if mass1 != mass2 else 1.0;
00083 beta = 1 - alpha;
00084 os.system("cp %s/%d/hww_%dj.input.root %s/%g/hww_%dj%s.input.root" % (options.ddir,mass1,options.jets,options.ddir,mass,options.jets,options.postfix))
00085 ofile = ROOT.TFile( "%s/%g/hww_%dj%s.input.root" % (options.ddir,mass,options.jets,options.postfix) , "UPDATE" )
00086
00087 file1 = options.ddir+"/%d/hww_%dj_shape.txt" % (mass1, options.jets)
00088 file2 = options.ddir+"/%d/hww_%dj_shape.txt" % (mass2, options.jets)
00089 options.fileName = file1; options.mass = mass1;
00090 DC1 = parseCard(open(file1,"r"), options)
00091
00092 options.fileName = file2; options.mass = mass2;
00093 DC2 = parseCard(open(file2,"r"), options)
00094
00095
00096 if DC1.bins != DC2.bins: raise RuntimeError, "The two datacards have different bins: %s has %s, %s has %s" % (file1, DC1.bins, file2, DC2.bins)
00097 if DC1.processes != DC2.processes: raise RuntimeError, "The two datacards have different processes: %s has %s, %s has %s" % (file1, DC1.processes, file2, DC2.processes)
00098 if DC1.signals != DC2.signals: raise RuntimeError, "The two datacards have different signals: %s has %s, %s has %s" % (file1, DC1.signals, file2, DC2.signals)
00099 if DC1.isSignal != DC2.isSignal: raise RuntimeError, "The two datacards have different isSignal: %s has %s, %s has %s" % (file1, DC1.isSignal, file2, DC2.isSignal)
00100
00101 if len(DC1.bins) != 1: raise RuntimeError, "This does not work on multi-channel"
00102 obsline = [str(x) for x in DC1.obs.values()]; obskeyline = DC1.bins; cmax = 5;
00103 keyline = []; expline = []; systlines = {}; systlines2 = {}
00104 signals = []; backgrounds = []; shapeLines = [];
00105 paramSysts = {}; flatParamNuisances = {}
00106 for (name,nf,pdf,args,errline) in DC1.systs:
00107 systlines[name] = [ pdf, args, errline, nf ]
00108 for (name,nf,pdf,args,errline) in DC2.systs:
00109 systlines2[name] = [ pdf, args, errline, nf ]
00110 for b,p,sig in DC1.keyline:
00111 if p not in DC2.exp[b].keys(): raise RuntimeError, "Process %s contributes to bin %s in card %s but not in card %s" % (p, b, file1, file2)
00112 rate = DC1.exp[b][p]
00113 if p in ['ggH', 'qqH']:
00114 eff = rate/xsbr1[p]
00115 rate = eff * xsbr[p]
00116 if rate != 0:
00117 histo = ofile.Get("histo_%s" % p);
00118 histo.Scale(rate/histo.Integral())
00119 ofile.WriteTObject(histo,"histo_%s" % p,"Overwrite");
00120 keyline.append( (b, p, DC1.isSignal[p]) )
00121 expline.append( "%.4f" % rate )
00122 if False:
00123 for name in systlines.keys():
00124 errline = systlines[name][2]
00125 if b in errline:
00126 if p in errline[b]:
00127 if options.log and pdf == "gmN" and errline[b][p] != 0:
00128 errline[b][p] = exp(alpha * log(systlines[name][2][b][p]) + beta*log(systlines2[name][2][b][p]))
00129 else:
00130 errline[b][p] = alpha * systlines[name][2][b][p] + beta*systlines2[name][2][b][p]
00131 shapeLines.append( ("*", obskeyline[0], [ "hww_%dj%s.input.root" % (options.jets,options.postfix), "histo_$PROCESS" ]) )
00132 shapeLines.append( ("data_obs", obskeyline[0], [ "hww_%dj%s.input.root" % (options.jets,options.postfix), "histo_Data" ]) )
00133
00134
00135 xfile = open(options.ddir+"/%d/hww_%dj_shape%s.txt" % (mass, options.jets,options.postfix), "w")
00136 xfile.write(" ".join(["imax %d number of bins" % len(DC1.bins)])+"\n")
00137 xfile.write(" ".join(["jmax * number of processes minus 1"])+"\n")
00138 xfile.write(" ".join(["kmax * number of nuisance parameters"])+"\n")
00139 xfile.write(" ".join(["-" * 130])+"\n")
00140 if shapeLines:
00141 chmax = max([max(len(p),len(c)) for p,c,x in shapeLines]);
00142 cfmt = "%-"+str(chmax)+"s ";
00143 for (process,channel,stuff) in shapeLines:
00144 xfile.write(" ".join(["shapes", cfmt % process, cfmt % channel, ' '.join(stuff)])+"\n")
00145 xfile.write(" ".join(["-" * 130])+"\n")
00146
00147 if obsline:
00148 cmax = max([cmax]+[len(l) for l in obskeyline]+[len(x) for x in obsline])
00149 cfmt = "%-"+str(cmax)+"s";
00150 xfile.write(" ".join(["bin ", " ".join([cfmt % x for x in obskeyline])])+"\n")
00151 xfile.write(" ".join(["observation ", " ".join([cfmt % x for x in obsline])])+"\n")
00152
00153 xfile.write(" ".join(["-" * 150])+"\n")
00154
00155 pidline = []; signals = []; backgrounds = []
00156 for (b,p,s) in keyline:
00157 if s:
00158 if p not in signals: signals.append(p)
00159 pidline.append(-len(DC1.signals)+signals.index(p)+1)
00160 else:
00161 if p not in backgrounds: backgrounds.append(p)
00162 pidline.append(1+backgrounds.index(p))
00163 cmax = max([cmax]+[max(len(p),len(b)) for p,b,s in keyline]+[len(e) for e in expline])
00164 hmax = max([10] + [len("%-12s %s %s" % (l,p,a)) for l,(p,a,e,nf) in systlines.items()])
00165 cfmt = "%-"+str(cmax)+"s"; hfmt = "%-"+str(hmax)+"s ";
00166 xfile.write(" ".join([hfmt % "bin", " ".join([cfmt % p for p,b,s in keyline])])+"\n")
00167 xfile.write(" ".join([hfmt % "process", " ".join([cfmt % b for p,b,s in keyline])])+"\n")
00168 xfile.write(" ".join([hfmt % "process", " ".join([cfmt % x for x in pidline])])+"\n")
00169 xfile.write(" ".join([hfmt % "rate", " ".join([cfmt % x for x in expline])])+"\n")
00170 xfile.write(" ".join(["-" * 150])+"\n")
00171 sysnamesSorted = systlines.keys(); sysnamesSorted.sort()
00172 for name in sysnamesSorted:
00173 (pdf,pdfargs,effect,nofloat) = systlines[name]
00174 if nofloat: name += "[nofloat]"
00175 systline = []
00176 for b,p,s in keyline:
00177 try:
00178 systline.append(effect[b][p] if (effect[b][p] != 1.0 or pdf != 'lnN') else "-")
00179 except KeyError:
00180 systline.append("-");
00181 xfile.write(" ".join([hfmt % ("%-28s %s %s" % (name, pdf, " ".join(pdfargs))), " ".join([cfmt % x for x in systline])])+"\n")
00182 for (pname, pargs) in paramSysts.items():
00183 xfile.write(" ".join(["%-12s param %s" % (pname, " ".join(pargs))])+"\n")
00184
00185 for pname in flatParamNuisances.iterkeys():
00186 xfile.write(" ".join(["%-12s flatParam" % pname])+"\n")