CMS 3D CMS Logo

/data/refman/pasoursint/CMSSW_5_3_9_patch3/src/HiggsAnalysis/CombinedLimit/data/hww/interpolateCardsSimple.py

Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 import re, os
00003 from sys import argv, stdout, stderr, exit
00004 from optparse import OptionParser
00005 from math import *
00006 
00007 # import ROOT with a fix to get batch mode (https://root.cern.ch/phpBB3/viewtopic.php?t=3198)
00008 argv.append( '-b-' )
00009 import ROOT
00010 ROOT.gROOT.SetBatch(True)
00011 argv.remove( '-b-' )
00012 parser = OptionParser(usage="usage: %prog [options] [mass1 mass2] mass3 > datacard3.txt \nrun with --help to get list of options")
00013 parser.add_option("--jets",  dest="jets",   type="int",    default=0,  help="Jet bin");
00014 parser.add_option("--xsbr",  dest="xsbr",   action="store_true", default=False, help="Use correct XS*BR for Higgs")
00015 #parser.add_option("--doeff", dest="doeff",  action="store_true", default=False, help="Include interpolation of efficiency")
00016 #parser.add_option("--nobg",  dest="nobg",   action="store_true", default=False, help="No interpolation of background normalizations")
00017 parser.add_option("--log",   dest="log",    action="store_true", default=False, help="Use log-scale interpolation for yields (default is linear)")
00018 parser.add_option("--ddir",  dest="ddir", type="string", default=".", help="Path to the datacards")
00019 parser.add_option("--refmasses",  dest="refmasses", type="string",  default="hww.masses.txt", help="File containing the reference masses between which to interpolate (relative to --options.ddir)")
00020 parser.add_option("--postfix",    dest="postfix",   type="string",  default="",               help="Postfix to add to datacard name")
00021 (options, args) = parser.parse_args()
00022 options.bin = True; options.stat = False
00023 if len(args) not in [1,3]:
00024     parser.print_usage()
00025     exit(1)
00026 
00027 from HiggsAnalysis.CombinedLimit.DatacardParser import *
00028 
00029 refmasses = [ int(line) for line in open(options.ddir+"/"+options.refmasses,"r") ]
00030 
00031 if len(args) == 1:
00032     mass = float(args[0])
00033     mass1 = max([m for m in refmasses if m <= mass])
00034     mass2 = min([m for m in refmasses if m >= mass])
00035 else:
00036     mass1 = int(args[0])
00037     mass2 = int(args[1])
00038     mass  = float(args[2])
00039 
00040 if mass in refmasses and options.postfix == "": raise RuntimeError, "Will not overwrite the reference masses"
00041 
00042 ## Make sure mass1 is always the closest (and pick the worse one in case of a tie)
00043 dm1 = abs(mass1 - mass)
00044 dm2 = abs(mass2 - mass)
00045 if (dm2 < dm1) or (dm2 == dm1 and abs(mass1 - 164) < abs(mass2 - 164)):
00046     (mass1, mass2) = (mass2, mass1)
00047 
00048 
00049 xsbr1 = { 'ggH':1.0, 'qqH':1.0 }
00050 xsbr2 = { 'ggH':1.0, 'qqH':1.0 }
00051 xsbr  = { 'ggH':1.0, 'qqH':1.0 }
00052 if options.xsbr:
00053     def file2map(x): 
00054         ret = {}; headers = []
00055         for x in open(x,"r"):
00056             cols = x.split()
00057             if len(cols) < 2: continue
00058             if "mH" in x: 
00059                 headers = [i.strip() for i in cols[1:]]
00060             else:
00061                 fields = [ float(i) for i in cols ]
00062                 ret[fields[0]] = dict(zip(headers,fields[1:]))
00063         return ret
00064     path = os.environ['CMSSW_BASE']+"/src/HiggsAnalysis/CombinedLimit/data/";   
00065     ggXS = file2map(path+"YR-XS-ggH.txt")
00066     qqXS = file2map(path+"YR-XS-vbfH.txt")
00067     br   = file2map(path+"YR-BR3.txt")
00068     # create points at 450, 550 by interpolation
00069     for M in (450,550):
00070         ggXS[M] = dict([ (key, 0.5*(ggXS[M+10][key] + ggXS[M-10][key])) for key in ggXS[M+10].iterkeys() ])
00071         qqXS[M] = dict([ (key, 0.5*(qqXS[M+10][key] + qqXS[M-10][key])) for key in qqXS[M+10].iterkeys() ])
00072         br[M] = dict([ (key, 0.5*(br[M+10][key] + br[M-10][key])) for key in br[M+10].iterkeys() ])
00073     xsbr1['ggH'] = ggXS[mass1]['XS_pb'] * br[mass1]['H_evmv']
00074     xsbr2['ggH'] = ggXS[mass2]['XS_pb'] * br[mass2]['H_evmv']
00075     xsbr ['ggH'] = ggXS[mass ]['XS_pb'] * br[mass ]['H_evmv']
00076     xsbr1['qqH'] = qqXS[mass1]['XS_pb'] * br[mass1]['H_evmv']
00077     xsbr2['qqH'] = qqXS[mass2]['XS_pb'] * br[mass2]['H_evmv']
00078     xsbr ['qqH'] = qqXS[mass ]['XS_pb'] * br[mass ]['H_evmv']
00079 
00080 print "Will interpolate %g from [%d, %d]" % (mass, mass1, mass2)
00081 
00082 alpha = abs(mass2 - mass)/abs(mass2 - mass1) if mass1 != mass2 else 1.0; 
00083 beta = 1 - alpha;
00084 os.system("cp %s/%d/hww_%dj.input.root  %s/%g/hww_%dj%s.input.root" % (options.ddir,mass1,options.jets,options.ddir,mass,options.jets,options.postfix))
00085 ofile  = ROOT.TFile( "%s/%g/hww_%dj%s.input.root" % (options.ddir,mass,options.jets,options.postfix) , "UPDATE" )
00086 
00087 file1 = options.ddir+"/%d/hww_%dj_shape.txt" % (mass1, options.jets)
00088 file2 = options.ddir+"/%d/hww_%dj_shape.txt" % (mass2, options.jets)
00089 options.fileName = file1; options.mass = mass1;
00090 DC1 = parseCard(open(file1,"r"), options)
00091 
00092 options.fileName = file2; options.mass = mass2;
00093 DC2 = parseCard(open(file2,"r"), options)
00094 
00095 ## Basic consistency check
00096 if DC1.bins != DC2.bins: raise RuntimeError, "The two datacards have different bins: %s has %s, %s has %s" % (file1, DC1.bins, file2, DC2.bins)
00097 if DC1.processes != DC2.processes: raise RuntimeError, "The two datacards have different processes: %s has %s, %s has %s" % (file1, DC1.processes, file2, DC2.processes)
00098 if DC1.signals   != DC2.signals:   raise RuntimeError, "The two datacards have different signals: %s has %s, %s has %s" % (file1, DC1.signals, file2, DC2.signals)
00099 if DC1.isSignal  != DC2.isSignal:  raise RuntimeError, "The two datacards have different isSignal: %s has %s, %s has %s" % (file1, DC1.isSignal, file2, DC2.isSignal)
00100 
00101 if len(DC1.bins) != 1: raise RuntimeError, "This does not work on multi-channel"
00102 obsline = [str(x) for x in DC1.obs.values()]; obskeyline = DC1.bins; cmax = 5;
00103 keyline = []; expline = []; systlines = {}; systlines2 = {}
00104 signals = []; backgrounds = []; shapeLines = []; 
00105 paramSysts = {}; flatParamNuisances = {}
00106 for (name,nf,pdf,args,errline) in DC1.systs:
00107     systlines[name] = [ pdf, args, errline, nf ]
00108 for (name,nf,pdf,args,errline) in DC2.systs:
00109     systlines2[name] = [ pdf, args, errline, nf ]
00110 for b,p,sig in DC1.keyline:
00111     if p not in DC2.exp[b].keys(): raise RuntimeError, "Process %s contributes to bin %s in card %s but not in card %s" % (p, b, file1, file2)
00112     rate = DC1.exp[b][p]
00113     if p in  ['ggH', 'qqH']:
00114         eff = rate/xsbr1[p]
00115         rate = eff * xsbr[p]
00116     if rate != 0: 
00117         histo = ofile.Get("histo_%s" % p);
00118         histo.Scale(rate/histo.Integral())
00119         ofile.WriteTObject(histo,"histo_%s" % p,"Overwrite");
00120     keyline.append( (b, p, DC1.isSignal[p]) )
00121     expline.append( "%.4f" % rate )
00122     if False:
00123         for name in systlines.keys():
00124             errline = systlines[name][2]
00125             if b in errline: 
00126                 if p in errline[b]:
00127                     if options.log and pdf == "gmN" and errline[b][p] != 0: 
00128                         errline[b][p] = exp(alpha * log(systlines[name][2][b][p]) + beta*log(systlines2[name][2][b][p]))
00129                     else:
00130                         errline[b][p] = alpha * systlines[name][2][b][p] + beta*systlines2[name][2][b][p]
00131 shapeLines.append( ("*",        obskeyline[0], [ "hww_%dj%s.input.root" % (options.jets,options.postfix),  "histo_$PROCESS" ]) )
00132 shapeLines.append( ("data_obs", obskeyline[0], [ "hww_%dj%s.input.root" % (options.jets,options.postfix),  "histo_Data"     ]) )
00133 
00134 
00135 xfile = open(options.ddir+"/%d/hww_%dj_shape%s.txt" % (mass, options.jets,options.postfix), "w")
00136 xfile.write(" ".join(["imax %d number of bins" % len(DC1.bins)])+"\n")
00137 xfile.write(" ".join(["jmax *  number of processes minus 1"])+"\n")
00138 xfile.write(" ".join(["kmax *  number of nuisance parameters"])+"\n")
00139 xfile.write(" ".join(["-" * 130])+"\n")
00140 if shapeLines:
00141     chmax = max([max(len(p),len(c)) for p,c,x in shapeLines]);
00142     cfmt = "%-"+str(chmax)+"s ";
00143     for (process,channel,stuff) in shapeLines:
00144         xfile.write(" ".join(["shapes", cfmt % process, cfmt % channel, ' '.join(stuff)])+"\n")
00145     xfile.write(" ".join(["-" * 130])+"\n")
00146 
00147 if obsline:
00148     cmax = max([cmax]+[len(l) for l in obskeyline]+[len(x) for x in obsline])
00149     cfmt = "%-"+str(cmax)+"s";
00150     xfile.write(" ".join(["bin         ", "  ".join([cfmt % x for x in obskeyline])])+"\n")
00151     xfile.write(" ".join(["observation ", "  ".join([cfmt % x for x in obsline])])+"\n")
00152 
00153 xfile.write(" ".join(["-" * 150])+"\n")
00154 
00155 pidline = []; signals = []; backgrounds = []
00156 for (b,p,s) in keyline:
00157     if s:
00158         if p not in signals: signals.append(p)
00159         pidline.append(-len(DC1.signals)+signals.index(p)+1)
00160     else:
00161         if p not in backgrounds: backgrounds.append(p)
00162         pidline.append(1+backgrounds.index(p))
00163 cmax = max([cmax]+[max(len(p),len(b)) for p,b,s in keyline]+[len(e) for e in expline])
00164 hmax = max([10] + [len("%-12s  %s %s" % (l,p,a)) for l,(p,a,e,nf) in systlines.items()])
00165 cfmt  = "%-"+str(cmax)+"s"; hfmt = "%-"+str(hmax)+"s  ";
00166 xfile.write(" ".join([hfmt % "bin",     "  ".join([cfmt % p for p,b,s in keyline])])+"\n")
00167 xfile.write(" ".join([hfmt % "process", "  ".join([cfmt % b for p,b,s in keyline])])+"\n")
00168 xfile.write(" ".join([hfmt % "process", "  ".join([cfmt % x for x in pidline])])+"\n")
00169 xfile.write(" ".join([hfmt % "rate",    "  ".join([cfmt % x for x in expline])])+"\n")
00170 xfile.write(" ".join(["-" * 150])+"\n")
00171 sysnamesSorted = systlines.keys(); sysnamesSorted.sort()
00172 for name in sysnamesSorted:
00173     (pdf,pdfargs,effect,nofloat) = systlines[name]
00174     if nofloat: name += "[nofloat]"
00175     systline = []
00176     for b,p,s in keyline:
00177         try:
00178             systline.append(effect[b][p] if (effect[b][p] != 1.0 or pdf != 'lnN') else "-")
00179         except KeyError:
00180             systline.append("-");
00181     xfile.write(" ".join([hfmt % ("%-28s   %s  %s" % (name, pdf, " ".join(pdfargs))), "  ".join([cfmt % x for x in systline])])+"\n")
00182 for (pname, pargs) in paramSysts.items():
00183     xfile.write(" ".join(["%-12s  param  %s" %  (pname, " ".join(pargs))])+"\n")
00184 
00185 for pname in flatParamNuisances.iterkeys(): 
00186     xfile.write(" ".join(["%-12s  flatParam" % pname])+"\n")