CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
interpolateCardsSimple.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 import re, os
3 from sys import argv, stdout, stderr, exit
4 from optparse import OptionParser
5 from math import *
6 
7 # import ROOT with a fix to get batch mode (http://root.cern.ch/phpBB3/viewtopic.php?t=3198)
8 argv.append( '-b-' )
9 import ROOT
10 ROOT.gROOT.SetBatch(True)
11 argv.remove( '-b-' )
12 parser = OptionParser(usage="usage: %prog [options] [mass1 mass2] mass3 > datacard3.txt \nrun with --help to get list of options")
13 parser.add_option("--jets", dest="jets", type="int", default=0, help="Jet bin");
14 parser.add_option("--xsbr", dest="xsbr", action="store_true", default=False, help="Use correct XS*BR for Higgs")
15 #parser.add_option("--doeff", dest="doeff", action="store_true", default=False, help="Include interpolation of efficiency")
16 #parser.add_option("--nobg", dest="nobg", action="store_true", default=False, help="No interpolation of background normalizations")
17 parser.add_option("--log", dest="log", action="store_true", default=False, help="Use log-scale interpolation for yields (default is linear)")
18 parser.add_option("--ddir", dest="ddir", type="string", default=".", help="Path to the datacards")
19 parser.add_option("--refmasses", dest="refmasses", type="string", default="hww.masses.txt", help="File containing the reference masses between which to interpolate (relative to --options.ddir)")
20 parser.add_option("--postfix", dest="postfix", type="string", default="", help="Postfix to add to datacard name")
21 (options, args) = parser.parse_args()
22 options.bin = True; options.stat = False
23 if len(args) not in [1,3]:
24  parser.print_usage()
25  exit(1)
26 
28 
29 refmasses = [ int(line) for line in open(options.ddir+"/"+options.refmasses,"r") ]
30 
31 if len(args) == 1:
32  mass = float(args[0])
33  mass1 = max([m for m in refmasses if m <= mass])
34  mass2 = min([m for m in refmasses if m >= mass])
35 else:
36  mass1 = int(args[0])
37  mass2 = int(args[1])
38  mass = float(args[2])
39 
40 if mass in refmasses and options.postfix == "": raise RuntimeError, "Will not overwrite the reference masses"
41 
42 ## Make sure mass1 is always the closest (and pick the worse one in case of a tie)
43 dm1 = abs(mass1 - mass)
44 dm2 = abs(mass2 - mass)
45 if (dm2 < dm1) or (dm2 == dm1 and abs(mass1 - 164) < abs(mass2 - 164)):
46  (mass1, mass2) = (mass2, mass1)
47 
48 
49 xsbr1 = { 'ggH':1.0, 'qqH':1.0 }
50 xsbr2 = { 'ggH':1.0, 'qqH':1.0 }
51 xsbr = { 'ggH':1.0, 'qqH':1.0 }
52 if options.xsbr:
53  def file2map(x):
54  ret = {}; headers = []
55  for x in open(x,"r"):
56  cols = x.split()
57  if len(cols) < 2: continue
58  if "mH" in x:
59  headers = [i.strip() for i in cols[1:]]
60  else:
61  fields = [ float(i) for i in cols ]
62  ret[fields[0]] = dict(zip(headers,fields[1:]))
63  return ret
64  path = os.environ['CMSSW_BASE']+"/src/HiggsAnalysis/CombinedLimit/data/";
65  ggXS = file2map(path+"YR-XS-ggH.txt")
66  qqXS = file2map(path+"YR-XS-vbfH.txt")
67  br = file2map(path+"YR-BR3.txt")
68  # create points at 450, 550 by interpolation
69  for M in (450,550):
70  ggXS[M] = dict([ (key, 0.5*(ggXS[M+10][key] + ggXS[M-10][key])) for key in ggXS[M+10].iterkeys() ])
71  qqXS[M] = dict([ (key, 0.5*(qqXS[M+10][key] + qqXS[M-10][key])) for key in qqXS[M+10].iterkeys() ])
72  br[M] = dict([ (key, 0.5*(br[M+10][key] + br[M-10][key])) for key in br[M+10].iterkeys() ])
73  xsbr1['ggH'] = ggXS[mass1]['XS_pb'] * br[mass1]['H_evmv']
74  xsbr2['ggH'] = ggXS[mass2]['XS_pb'] * br[mass2]['H_evmv']
75  xsbr ['ggH'] = ggXS[mass ]['XS_pb'] * br[mass ]['H_evmv']
76  xsbr1['qqH'] = qqXS[mass1]['XS_pb'] * br[mass1]['H_evmv']
77  xsbr2['qqH'] = qqXS[mass2]['XS_pb'] * br[mass2]['H_evmv']
78  xsbr ['qqH'] = qqXS[mass ]['XS_pb'] * br[mass ]['H_evmv']
79 
80 print "Will interpolate %g from [%d, %d]" % (mass, mass1, mass2)
81 
82 alpha = abs(mass2 - mass)/abs(mass2 - mass1) if mass1 != mass2 else 1.0;
83 beta = 1 - alpha;
84 os.system("cp %s/%d/hww_%dj.input.root %s/%g/hww_%dj%s.input.root" % (options.ddir,mass1,options.jets,options.ddir,mass,options.jets,options.postfix))
85 ofile = ROOT.TFile( "%s/%g/hww_%dj%s.input.root" % (options.ddir,mass,options.jets,options.postfix) , "UPDATE" )
86 
87 file1 = options.ddir+"/%d/hww_%dj_shape.txt" % (mass1, options.jets)
88 file2 = options.ddir+"/%d/hww_%dj_shape.txt" % (mass2, options.jets)
89 options.fileName = file1; options.mass = mass1;
90 DC1 = parseCard(open(file1,"r"), options)
91 
92 options.fileName = file2; options.mass = mass2;
93 DC2 = parseCard(open(file2,"r"), options)
94 
95 ## Basic consistency check
96 if DC1.bins != DC2.bins: raise RuntimeError, "The two datacards have different bins: %s has %s, %s has %s" % (file1, DC1.bins, file2, DC2.bins)
97 if DC1.processes != DC2.processes: raise RuntimeError, "The two datacards have different processes: %s has %s, %s has %s" % (file1, DC1.processes, file2, DC2.processes)
98 if DC1.signals != DC2.signals: raise RuntimeError, "The two datacards have different signals: %s has %s, %s has %s" % (file1, DC1.signals, file2, DC2.signals)
99 if DC1.isSignal != DC2.isSignal: raise RuntimeError, "The two datacards have different isSignal: %s has %s, %s has %s" % (file1, DC1.isSignal, file2, DC2.isSignal)
100 
101 if len(DC1.bins) != 1: raise RuntimeError, "This does not work on multi-channel"
102 obsline = [str(x) for x in DC1.obs.values()]; obskeyline = DC1.bins; cmax = 5;
103 keyline = []; expline = []; systlines = {}; systlines2 = {}
104 signals = []; backgrounds = []; shapeLines = [];
105 paramSysts = {}; flatParamNuisances = {}
106 for (name,nf,pdf,args,errline) in DC1.systs:
107  systlines[name] = [ pdf, args, errline, nf ]
108 for (name,nf,pdf,args,errline) in DC2.systs:
109  systlines2[name] = [ pdf, args, errline, nf ]
110 for b,p,sig in DC1.keyline:
111  if p not in DC2.exp[b].keys(): raise RuntimeError, "Process %s contributes to bin %s in card %s but not in card %s" % (p, b, file1, file2)
112  rate = DC1.exp[b][p]
113  if p in ['ggH', 'qqH']:
114  eff = rate/xsbr1[p]
115  rate = eff * xsbr[p]
116  if rate != 0:
117  histo = ofile.Get("histo_%s" % p);
118  histo.Scale(rate/histo.Integral())
119  ofile.WriteTObject(histo,"histo_%s" % p,"Overwrite");
120  keyline.append( (b, p, DC1.isSignal[p]) )
121  expline.append( "%.4f" % rate )
122  if False:
123  for name in systlines.keys():
124  errline = systlines[name][2]
125  if b in errline:
126  if p in errline[b]:
127  if options.log and pdf == "gmN" and errline[b][p] != 0:
128  errline[b][p] = exp(alpha * log(systlines[name][2][b][p]) + beta*log(systlines2[name][2][b][p]))
129  else:
130  errline[b][p] = alpha * systlines[name][2][b][p] + beta*systlines2[name][2][b][p]
131 shapeLines.append( ("*", obskeyline[0], [ "hww_%dj%s.input.root" % (options.jets,options.postfix), "histo_$PROCESS" ]) )
132 shapeLines.append( ("data_obs", obskeyline[0], [ "hww_%dj%s.input.root" % (options.jets,options.postfix), "histo_Data" ]) )
133 
134 
135 xfile = open(options.ddir+"/%d/hww_%dj_shape%s.txt" % (mass, options.jets,options.postfix), "w")
136 xfile.write(" ".join(["imax %d number of bins" % len(DC1.bins)])+"\n")
137 xfile.write(" ".join(["jmax * number of processes minus 1"])+"\n")
138 xfile.write(" ".join(["kmax * number of nuisance parameters"])+"\n")
139 xfile.write(" ".join(["-" * 130])+"\n")
140 if shapeLines:
141  chmax = max([max(len(p),len(c)) for p,c,x in shapeLines]);
142  cfmt = "%-"+str(chmax)+"s ";
143  for (process,channel,stuff) in shapeLines:
144  xfile.write(" ".join(["shapes", cfmt % process, cfmt % channel, ' '.join(stuff)])+"\n")
145  xfile.write(" ".join(["-" * 130])+"\n")
146 
147 if obsline:
148  cmax = max([cmax]+[len(l) for l in obskeyline]+[len(x) for x in obsline])
149  cfmt = "%-"+str(cmax)+"s";
150  xfile.write(" ".join(["bin ", " ".join([cfmt % x for x in obskeyline])])+"\n")
151  xfile.write(" ".join(["observation ", " ".join([cfmt % x for x in obsline])])+"\n")
152 
153 xfile.write(" ".join(["-" * 150])+"\n")
154 
155 pidline = []; signals = []; backgrounds = []
156 for (b,p,s) in keyline:
157  if s:
158  if p not in signals: signals.append(p)
159  pidline.append(-len(DC1.signals)+signals.index(p)+1)
160  else:
161  if p not in backgrounds: backgrounds.append(p)
162  pidline.append(1+backgrounds.index(p))
163 cmax = max([cmax]+[max(len(p),len(b)) for p,b,s in keyline]+[len(e) for e in expline])
164 hmax = max([10] + [len("%-12s %s %s" % (l,p,a)) for l,(p,a,e,nf) in systlines.items()])
165 cfmt = "%-"+str(cmax)+"s"; hfmt = "%-"+str(hmax)+"s ";
166 xfile.write(" ".join([hfmt % "bin", " ".join([cfmt % p for p,b,s in keyline])])+"\n")
167 xfile.write(" ".join([hfmt % "process", " ".join([cfmt % b for p,b,s in keyline])])+"\n")
168 xfile.write(" ".join([hfmt % "process", " ".join([cfmt % x for x in pidline])])+"\n")
169 xfile.write(" ".join([hfmt % "rate", " ".join([cfmt % x for x in expline])])+"\n")
170 xfile.write(" ".join(["-" * 150])+"\n")
171 sysnamesSorted = systlines.keys(); sysnamesSorted.sort()
172 for name in sysnamesSorted:
173  (pdf,pdfargs,effect,nofloat) = systlines[name]
174  if nofloat: name += "[nofloat]"
175  systline = []
176  for b,p,s in keyline:
177  try:
178  systline.append(effect[b][p] if (effect[b][p] != 1.0 or pdf != 'lnN') else "-")
179  except KeyError:
180  systline.append("-");
181  xfile.write(" ".join([hfmt % ("%-28s %s %s" % (name, pdf, " ".join(pdfargs))), " ".join([cfmt % x for x in systline])])+"\n")
182 for (pname, pargs) in paramSysts.items():
183  xfile.write(" ".join(["%-12s param %s" % (pname, " ".join(pargs))])+"\n")
184 
185 for pname in flatParamNuisances.iterkeys():
186  xfile.write(" ".join(["%-12s flatParam" % pname])+"\n")
#define abs(x)
Definition: mlp_lapack.h:159
#define min(a, b)
Definition: mlp_lapack.h:161
const T & max(const T &a, const T &b)
static std::string join(char **cmd)
Definition: RemoteFile.cc:18