CMS 3D CMS Logo

/afs/cern.ch/work/a/aaltunda/public/www/CMSSW_5_3_13_patch3/src/HiggsAnalysis/CombinedLimit/scripts/combineCards.py

Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 import re
00003 from sys import argv
00004 import os.path
00005 from pprint import pprint
00006 from optparse import OptionParser
00007 parser = OptionParser()
00008 parser.add_option("-s", "--stat",   dest="stat",          default=False, action="store_true", help="Drop all systematics")
00009 parser.add_option("-S", "--force-shape", dest="shape",    default=False, action="store_true", help="Treat all channels as shape analysis. Useful for mixed combinations") 
00010 parser.add_option("-a", "--asimov", dest="asimov",  default=False, action="store_true", help="Replace observation with asimov dataset. Works only for counting experiments")
00011 parser.add_option("-P", "--prefix", type="string", dest="fprefix", default="",  help="Prefix this to all file names")
00012 parser.add_option("--xc", "--exclude-channel", type="string", dest="channelVetos", default=[], action="append", help="Exclude channels that match this regexp; can specify multiple ones")
00013 parser.add_option("--X-no-jmax",  dest="noJMax", default=False, action="store_true", help="FOR DEBUG ONLY: Turn off the consistency check between jmax and number of processes.")
00014 parser.add_option("--xn-file", "--exclude-nuisances-from-file", type="string", dest="nuisVetoFile", help="Exclude all the nuisances in this file")
00015 
00016 (options, args) = parser.parse_args()
00017 options.bin = True # fake that is a binary output, so that we parse shape lines
00018 options.nuisancesToExclude = []
00019 options.verbose = 0
00020 
00021 if options.nuisVetoFile:
00022     for line in open(options.nuisVetoFile,"r"):
00023         options.nuisancesToExclude.append(re.compile(line.strip()))
00024 
00025 from HiggsAnalysis.CombinedLimit.DatacardParser import *
00026 
00027 obsline = []; obskeyline = [] ;
00028 keyline = []; expline = []; systlines = {}
00029 signals = []; backgrounds = []; shapeLines = []
00030 paramSysts = {}; flatParamNuisances = {}
00031 cmax = 5 # column width
00032 for ich,fname in enumerate(args):
00033     label = "ch%d" % (ich+1)
00034     if "=" in fname: (label,fname) = fname.split("=")
00035     fname = options.fprefix+fname
00036     dirname = os.path.dirname(fname)
00037     file = open(fname, "r")
00038     DC = parseCard(file, options)
00039     singlebin = (len(DC.bins) == 1)
00040     if label == ".":
00041         label=DC.bins[0] if singlebin else "";
00042     elif not singlebin: 
00043         label += "_";
00044     for b in DC.bins:
00045         bout = label if singlebin else label+b
00046         if isVetoed(bout,options.channelVetos): continue
00047         obskeyline.append(bout)
00048         for (p,e) in DC.exp[b].items(): # so that we get only self.DC.processes contributing to this bin
00049             if DC.isSignal[p] == False: continue
00050             #print "in DC.exp.items:b,p", b,p
00051             expline.append("%.4f" % e)
00052             keyline.append((bout, p, DC.isSignal[p]))
00053         for (p,e) in DC.exp[b].items(): # so that we get only self.DC.processes contributing to this bin
00054             if DC.isSignal[p]: continue
00055             #print "in DC.exp.items:b,p", b,p
00056             expline.append("%.4f" % e)
00057             keyline.append((bout, p, DC.isSignal[p]))
00058     # systematics
00059     for (lsyst,nofloat,pdf,pdfargs,errline) in DC.systs:
00060         systeffect = {}
00061         if pdf == "param":
00062             if paramSysts.has_key(lsyst):
00063                if paramSysts[lsyst] != pdfargs: raise RuntimeError, "Parameter uncerainty %s mismatch between cards." % lsyst
00064             else:
00065                 paramSysts[lsyst] = pdfargs
00066             continue
00067         for b in DC.bins:
00068             bout = label if singlebin else label+b
00069             if isVetoed(bout,options.channelVetos): continue
00070             if not systeffect.has_key(bout): systeffect[bout] = {} 
00071             for p in DC.exp[b].keys(): # so that we get only self.DC.processes contributing to this bin
00072                 r = str(errline[b][p]);
00073                 if type(errline[b][p]) == list: r = "%.3f/%.3f" % (errline[b][p][0], errline[b][p][1])
00074                 elif type in ("lnN",'gmM'): r = "%.3f" % errline[b][p]
00075                 if errline[b][p] == 0: r = "-"
00076                 if len(r) > cmax: cmax = len(r) # get max col length, as it's more tricky do do it later with a map
00077                 systeffect[bout][p] = r
00078         if systlines.has_key(lsyst):
00079             (otherpdf, otherargs, othereffect, othernofloat) = systlines[lsyst]
00080             if otherpdf != pdf: 
00081                 if (pdf == "lnN" and otherpdf.startswith("shape")):
00082                     if systlines[lsyst][0][-1] != '?': systlines[lsyst][0] += '?'
00083                     for b,v in systeffect.items(): othereffect[b] = v;
00084                 elif (pdf.startswith("shape") and otherpdf == "lnN"):
00085                     if pdf[-1] != '?': pdf += '?'
00086                     systlines[lsyst][0] = pdf
00087                     for b,v in systeffect.items(): othereffect[b] = v;
00088                 elif (pdf == otherpdf+"?") or (pdf+"?" == otherpdf):
00089                     systlines[lsyst][0] = pdf.replace("?","")+"?"
00090                     for b,v in systeffect.items(): othereffect[b] = v;
00091                 else:
00092                     raise RuntimeError, "File %s defines systematic %s as using pdf %s, while a previous file defines it as using %s" % (fname,lsyst,pdf,otherpdf)
00093             else:
00094                 if pdf == "gmN" and int(pdfargs[0]) != int(otherargs[0]): 
00095                     raise RuntimeError, "File %s defines systematic %s as using gamma with %s events in sideband, while a previous file has %s" % (fname,lsyst,pdfargs[0],otherargs[0])
00096                 for b,v in systeffect.items(): othereffect[b] = v;
00097         else:
00098             pdfargs = [ str(x) for x in pdfargs ]
00099             systlines[lsyst] = [pdf,pdfargs,systeffect,nofloat]
00100     # flat params
00101     for K in DC.flatParamNuisances.iterkeys(): 
00102         flatParamNuisances[K] = True
00103     # put shapes, if available
00104     if len(DC.shapeMap):
00105         for b in DC.bins:
00106             bout = label if singlebin else label+b
00107             if isVetoed(bout,options.channelVetos): continue
00108             p2sMap  = DC.shapeMap[b]   if DC.shapeMap.has_key(b)   else {}
00109             p2sMapD = DC.shapeMap['*'] if DC.shapeMap.has_key('*') else {}
00110             for p, x in p2sMap.items():
00111                 xrep = [xi.replace("$CHANNEL",b) for xi in x]
00112                 if xrep[0] != 'FAKE' and dirname != '': xrep[0] = dirname+"/"+xrep[0]
00113                 shapeLines.append((p,bout,xrep))
00114             for p, x in p2sMapD.items():
00115                 if p2sMap.has_key(p): continue
00116                 xrep = [xi.replace("$CHANNEL",b) for xi in x]
00117                 if xrep[0] != 'FAKE' and dirname != '': xrep[0] = dirname+"/"+xrep[0]
00118                 shapeLines.append((p,bout,xrep))
00119     elif options.shape:
00120         for b in DC.bins:
00121             bout = label if singlebin else label+b
00122             shapeLines.append(('*',bout,['FAKE']))
00123     # combine observations, but remove line if any of the datacards doesn't have it
00124     if len(DC.obs) == 0:
00125         obsline = None
00126     elif obsline != None:
00127         for b in DC.bins:
00128             bout = label if singlebin else label+b
00129             if isVetoed(bout,options.channelVetos): continue
00130             obsline += [str(DC.obs[b])]; 
00131 
00132 bins = []
00133 for (b,p,s) in keyline:
00134     if b not in bins: bins.append(b)
00135     if s:
00136         if p not in signals: signals.append(p)
00137     else:
00138         if p not in backgrounds: backgrounds.append(p)
00139 
00140 print "Combination of", ", ".join(args)
00141 print "imax %d number of bins" % len(bins)
00142 print "jmax %d number of processes minus 1" % (len(signals) + len(backgrounds) - 1)
00143 print "kmax %d number of nuisance parameters" % (len(systlines) + len(paramSysts))
00144 print "-" * 130
00145 
00146 if shapeLines:
00147     chmax = max([max(len(p),len(c)) for p,c,x in shapeLines]);
00148     cfmt = "%-"+str(chmax)+"s ";
00149     shapeLines.sort( lambda x,y : cmp(x[0],y[0]) if x[1] == y[1] else cmp(x[1],y[1]) )
00150     for (process,channel,stuff) in shapeLines:
00151         print "shapes", cfmt % process, cfmt % channel, ' '.join(stuff);
00152     print "-" * 130
00153 
00154 if obsline:
00155     cmax = max([cmax]+[len(l) for l in obskeyline]+[len(x) for x in obsline])
00156     cfmt = "%-"+str(cmax)+"s";
00157     print "bin         ", "  ".join([cfmt % x for x in obskeyline])
00158     print "observation ", "  ".join([cfmt % x for x in obsline])
00159 
00160 print "-" * 130
00161 
00162 pidline = []; signals = []; backgrounds = []
00163 tmpsignals = [];
00164 for (b,p,s) in keyline:
00165     if s:
00166         if p not in tmpsignals: tmpsignals.append(p)
00167 for (b,p,s) in keyline:
00168     if s:
00169         if p not in signals: signals.append(p)
00170         pidline.append(signals.index(p)-len(tmpsignals)+1)
00171     else:
00172         if p not in backgrounds: backgrounds.append(p)
00173         pidline.append(1+backgrounds.index(p))
00174 cmax = max([cmax]+[max(len(p),len(b)) for p,b,s in keyline]+[len(e) for e in expline])
00175 hmax = max([10] + [len("%-12s[nofloat]  %s %s" % (l,p,a)) for l,(p,a,e,nf) in systlines.items()])
00176 cfmt  = "%-"+str(cmax)+"s"; hfmt = "%-"+str(hmax)+"s  ";
00177 print hfmt % "bin",     "  ".join([cfmt % p for p,b,s in keyline])
00178 print hfmt % "process", "  ".join([cfmt % b for p,b,s in keyline])
00179 print hfmt % "process", "  ".join([cfmt % x for x in pidline])
00180 print hfmt % "rate",    "  ".join([cfmt % x for x in expline])
00181 
00182 print "-" * 130
00183 
00184 sysnamesSorted = systlines.keys(); sysnamesSorted.sort()
00185 for name in sysnamesSorted:
00186     (pdf,pdfargs,effect,nofloat) = systlines[name]
00187     if nofloat: name += "[nofloat]"
00188     systline = []
00189     for b,p,s in keyline:
00190         try:
00191             systline.append(effect[b][p])
00192         except KeyError:
00193             systline.append("-");
00194     print hfmt % ("%-21s   %s  %s" % (name, pdf, " ".join(pdfargs))), "  ".join([cfmt % x for x in systline])
00195 for (pname, pargs) in paramSysts.items():
00196     print "%-12s  param  %s" %  (pname, " ".join(pargs))
00197 
00198 for pname in flatParamNuisances.iterkeys(): 
00199     print "%-12s  flatParam" % pname