CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
ModelTools.py
Go to the documentation of this file.
1 import ROOT
2 import re, os, os.path
3 from sys import stderr, stdout
4 from math import *
5 ROOFIT_EXPR = "expr"
6 N_OBS_MAX = 100000
7 
9  """This class defines the basic stuff for a model builder, and it's an interface on top of RooWorkspace::factory or HLF files"""
10  def __init__(self,options):
11  self.options = options
12  self.out = stdout
13  if options.bin:
14  if options.out == None: options.out = re.sub(".txt$","",options.fileName)+".root"
15  options.baseDir = os.path.dirname(options.fileName)
16  ROOT.gSystem.Load("libHiggsAnalysisCombinedLimit.so")
17  self.out = ROOT.RooWorkspace("w","w");
18  self.out._import = getattr(self.out,"import") # workaround: import is a python keyword
19  self.out.dont_delete = []
20  if options.verbose == 0:
21  ROOT.RooMsgService.instance().setGlobalKillBelow(ROOT.RooFit.ERROR)
22  elif options.verbose < 2:
23  ROOT.RooMsgService.instance().setGlobalKillBelow(ROOT.RooFit.WARNING)
24  if os.environ.has_key('ROOFITSYS'):
25  ROOT.gSystem.AddIncludePath(" -I%s/include " % os.environ['ROOFITSYS'])
26  elif options.out != None:
27  #stderr.write("Will save workspace to HLF file %s" % options.out)
28  self.out = open(options.out, "w");
29  if not options.bin:
30  stderr.write("\nWARNING: You're not using binary mode. This is is DEPRECATED and NOT SUPPORTED anymore, and can give WRONG results.\n\n")
31  if options.cexpr:
32  global ROOFIT_EXPR;
33  ROOFIT_EXPR = "cexpr"
34  def factory_(self,X):
35  if (len(X) > 1000):
36  print "Executing factory with a string of length ",len(X)," > 1000, could trigger a bug: ",X
37  ret = self.out.factory(X);
38  if ret:
39  self.out.dont_delete.append(ret)
40  return ret
41  else:
42  print "ERROR parsing '%s'" % X
43  self.out.Print("V");
44  raise RuntimeError, "Error in factory statement"
45  def doComment(self,X):
46  if not self.options.bin: self.out.write("// "+X+"\n");
47  def doVar(self,vardef):
48  if self.options.bin: self.factory_(vardef);
49  else: self.out.write(vardef+";\n");
50  def doSet(self,name,vars):
51  if self.options.bin: self.out.defineSet(name,vars)
52  else: self.out.write("%s = set(%s);\n" % (name,vars));
53  def doObj(self,name,type,X):
54  if self.options.bin: return self.factory_("%s::%s(%s)" % (type, name, X));
55  else: self.out.write("%s = %s(%s);\n" % (name, type, X))
56 
58  """This class defines the actual methods to build a model"""
59  def __init__(self,datacard,options):
60  ModelBuilderBase.__init__(self,options)
61  self.DC = datacard
62  def setPhysics(self,physicsModel):
63  self.physics = physicsModel
64  self.physics.setModelBuilder(self)
65  def doModel(self):
66  self.doObservables()
67  self.physics.doParametersOfInterest()
68  self.physics.preProcessNuisances(self.DC.systs)
69  self.doNuisances()
70  self.doExpectedEvents()
71  self.doIndividualModels()
72  self.doCombination()
73  if self.options.bin:
74  self.doModelConfigs()
75  if self.options.verbose > 1: self.out.Print("V")
76  if self.options.verbose > 2:
77  print "Wrote GraphVizTree of model_s to ",self.options.out+".dot"
78  self.out.pdf("model_s").graphVizTree(self.options.out+".dot", "\\n")
79  def doObservables(self):
80  """create pdf_bin<X> and pdf_bin<X>_bonly for each bin"""
81  raise RuntimeError, "Not implemented in ModelBuilder"
82  def doNuisances(self):
83  if len(self.DC.systs) == 0: return
84  self.doComment(" ----- nuisances -----")
85  globalobs = []
86  for (n,nofloat,pdf,args,errline) in self.DC.systs:
87  if pdf == "lnN" or pdf.startswith("shape"):
88  r = "-4,4" if pdf == "shape" else "-7,7"
89  self.doObj("%s_Pdf" % n, "Gaussian", "%s[%s], %s_In[0,%s], 1" % (n,r,n,r));
90  globalobs.append("%s_In" % n)
91  if self.options.bin:
92  self.out.var("%s_In" % n).setConstant(True)
93  elif pdf == "gmM":
94  val = 0;
95  for c in errline.values(): #list channels
96  for v in c.values(): # list effects in each channel
97  if v != 0:
98  if val != 0 and v != val:
99  raise RuntimeError, "Error: line %s contains two different uncertainties %g, %g, which is not supported for gmM" % (n,v,val)
100  val = v;
101  if val == 0: raise RuntimeError, "Error: line %s contains all zeroes"
102  theta = val*val; kappa = 1/theta
103  self.doObj("%s_Pdf" % n, "Gamma", "%s[1,%f,%f], %s_In[%g,%g,%g], %s_scaling[%g], 0" % (n, max(0.01,1-5*val), 1+5*val, n, kappa, 1, 2*kappa+4, n, theta))
104  globalobs.append("%s_In" % n)
105  if self.options.bin: self.out.var("%s_In" % n).setConstant(True)
106  elif pdf == "gmN":
107  if False:
108  # old version, that creates a poisson with a very large range
109  self.doObj("%s_Pdf" % n, "Poisson", "%s_In[%d,0,%d], %s[0,%d], 1" % (n,args[0],2*args[0]+5,n,2*args[0]+5))
110  else:
111  # new version, that creates a poisson with a narrower range (but still +/- 7 sigmas)
112  #print "Searching for bounds for",n,"poisson with obs",args[0]
113  minExp = args[0]+1 if args[0] > 0 else 0;
114  while (ROOT.TMath.Poisson(args[0], minExp) > 1e-12) and minExp > 0:
115  #print "Poisson(%d, minExp = %f) = %g > 1e-12" % (args[0], minExp, ROOT.TMath.Poisson(args[0], minExp))
116  minExp *= 0.8;
117  maxExp = args[0]+1;
118  while (ROOT.TMath.Poisson(args[0], maxExp) > 1e-12):
119  #print "Poisson(%d, maxExp = %f) = %g > 1e-12" % (args[0], maxExp, ROOT.TMath.Poisson(args[0], maxExp))
120  maxExp *= 1.2;
121  minObs = args[0];
122  while minObs > 0 and (ROOT.TMath.Poisson(minObs, args[0]+1) > 1e-12):
123  #print "Poisson(minObs = %d, %f) = %g > 1e-12" % (minObs, args[0]+1, ROOT.TMath.Poisson(minObs, args[0]+1))
124  minObs -= (sqrt(args[0]) if args[0] > 10 else 1);
125  maxObs = args[0]+2;
126  while (ROOT.TMath.Poisson(maxObs, args[0]+1) > 1e-12):
127  #print "Poisson(maxObs = %d, %f) = %g > 1e-12" % (maxObs, args[0]+1, ROOT.TMath.Poisson(maxObs, args[0]+1))
128  maxObs += (sqrt(args[0]) if args[0] > 10 else 2);
129  self.doObj("%s_Pdf" % n, "Poisson", "%s_In[%d,%f,%f], %s[%f,%f,%f], 1" % (n,args[0],minObs,maxObs,n,args[0]+1,minExp,maxExp))
130  globalobs.append("%s_In" % n)
131  if self.options.bin: self.out.var("%s_In" % n).setConstant(True)
132  elif pdf == "trG":
133  trG_min = -7; trG_max = +7;
134  for b in errline.keys():
135  for v in errline[b].values():
136  if v > 0 and 1.0 + trG_min * v < 0: trG_min = -1.0/v;
137  if v < 0 and 1.0 + trG_max * v < 0: trG_max = -1.0/v;
138  r = "%f,%f" % (trG_min,trG_max);
139  self.doObj("%s_Pdf" % n, "Gaussian", "%s[0,%s], %s_In[0,%s], 1" % (n,r,n,r));
140  globalobs.append("%s_In" % n)
141  if self.options.bin:
142  self.out.var("%s_In" % n).setConstant(True)
143  elif pdf == "lnU":
144  self.doObj("%s_Pdf" % n, "Uniform", "%s[-1,1]" % n);
145  elif pdf == "unif":
146  self.doObj("%s_Pdf" % n, "Uniform", "%s[%f,%f]" % (n,args[0],args[1]))
147  elif pdf == "param":
148  mean = float(args[0])
149  if "/" in args[1]:
150  sigmaL,sigmaR = args[1].split("/")
151  if sigmaL[0] != "-" or sigmaR[0] != "+": raise RuntimeError, "Asymmetric parameter uncertainties should be entered as -x/+y"
152  sigmaL = sigmaL[1:]; sigmaR = sigmaR[1:]
153  if len(args) == 3: # mean, sigma, range
154  self.doVar("%s%s" % (n,args[2]))
155  else:
156  sigma = float(args[1])
157  if self.out.var(n):
158  self.out.var(n).setConstant(False)
159  self.out.var(n).setRange(mean-4*float(sigmaL),mean+4*float(sigmaR))
160  else:
161  self.doVar("%s[%g,%g]" % (n, mean-4*float(sigmaL), mean+4*float(sigmaR)))
162  self.out.var(n).setVal(mean)
163  self.doObj("%s_Pdf" % n, "BifurGauss", "%s, %s_In[%s,%g,%g], %s, %s" % (n, n, args[0], self.out.var(n).getMin(), self.out.var(n).getMax(), sigmaL, sigmaR))
164  self.out.var("%s_In" % n).setConstant(True)
165  else:
166  if len(args) == 3: # mean, sigma, range
167  self.doVar("%s%s" % (n,args[2]))
168  else:
169  sigma = float(args[1])
170  if self.out.var(n):
171  self.out.var(n).setConstant(False)
172  self.out.var(n).setRange(mean-4*sigma, mean+4*sigma)
173  else:
174  self.doVar("%s[%g,%g]" % (n, mean-4*sigma, mean+4*sigma))
175  self.out.var(n).setVal(mean)
176  self.doObj("%s_Pdf" % n, "Gaussian", "%s, %s_In[%s,%g,%g], %s" % (n, n, args[0], self.out.var(n).getMin(), self.out.var(n).getMax(), args[1]))
177  self.out.var("%s_In" % n).setConstant(True)
178  globalobs.append("%s_In" % n)
179  else: raise RuntimeError, "Unsupported pdf %s" % pdf
180  if nofloat:
181  self.out.var("%s" % n).setAttribute("globalConstrained",True)
182  if self.options.bin:
183  nuisPdfs = ROOT.RooArgList()
184  nuisVars = ROOT.RooArgSet()
185  for (n,nf,p,a,e) in self.DC.systs:
186  nuisVars.add(self.out.var(n))
187  nuisPdfs.add(self.out.pdf(n+"_Pdf"))
188  self.out.defineSet("nuisances", nuisVars)
189  self.out.nuisPdf = ROOT.RooProdPdf("nuisancePdf", "nuisancePdf", nuisPdfs)
190  self.out._import(self.out.nuisPdf)
191  self.out.nuisPdfs = nuisPdfs
192  gobsVars = ROOT.RooArgSet()
193  for g in globalobs: gobsVars.add(self.out.var(g))
194  self.out.defineSet("globalObservables", gobsVars)
195  else: # doesn't work for too many nuisances :-(
196  self.doSet("nuisances", ",".join(["%s" % n for (n,nf,p,a,e) in self.DC.systs]))
197  self.doObj("nuisancePdf", "PROD", ",".join(["%s_Pdf" % n for (n,nf,p,a,e) in self.DC.systs]))
198  self.doSet("globalObservables", ",".join(globalobs))
199  def doExpectedEvents(self):
200  self.doComment(" --- Expected events in each bin, for each process ----")
201  for b in self.DC.bins:
202  for p in self.DC.exp[b].keys(): # so that we get only self.DC.processes contributing to this bin
203  # if it's a zero background, write a zero and move on
204  if self.DC.exp[b][p] == 0:
205  self.doVar("n_exp_bin%s_proc_%s[%g]" % (b, p, self.DC.exp[b][p]))
206  continue
207  # get model-dependent scale factor
208  scale = self.physics.getYieldScale(b,p)
209  if scale == 0:
210  self.doVar("n_exp_bin%s_proc_%s[%g]" % (b, p, 0))
211  continue
212  # collect multiplicative corrections
213  nominal = self.DC.exp[b][p]
214  gamma = None; # gamma normalization (if present, DC.exp[b][p] is ignored)
215  factors = [] # RooAbsReal multiplicative factors (including gmN)
216  logNorms = [] # (kappa, RooAbsReal) lnN (or lnN)
217  alogNorms = [] # (kappaLo, kappaHi, RooAbsReal) asymm lnN
218  if scale == 1:
219  pass
220  elif type(scale) == str:
221  factors.append(scale)
222  else:
223  raise RuntimeError, "Physics model returned something which is neither a name, nor 0, nor 1."
224  for (n,nofloat,pdf,args,errline) in self.DC.systs:
225  if pdf == "param" or pdf.startswith("shape"): continue
226  if not errline[b].has_key(p): continue
227  if errline[b][p] == 0.0: continue
228  if pdf.startswith("shape") and pdf.endswith("?"): # might be a lnN in disguise
229  if not self.isShapeSystematic(b,p,n): pdf = "lnN"
230  if pdf == "lnN" and errline[b][p] == 1.0: continue
231  if pdf == "lnN" or pdf == "lnU":
232  if type(errline[b][p]) == list:
233  elow, ehigh = errline[b][p];
234  alogNorms.append((elow, ehigh, n))
235  else:
236  logNorms.append((errline[b][p], n))
237  elif pdf == "gmM":
238  factors.append(n)
239  elif pdf == "trG" or pdf == "unif":
240  myname = "n_exp_shift_bin%s_proc_%s_%s" % (b,p,n)
241  self.doObj(myname, ROOFIT_EXPR, "'1+%f*@0', %s" % (errline[b][p], n));
242  factors.append(myname)
243  elif pdf == "gmN":
244  factors.append(n)
245  if abs(errline[b][p] * args[0] - self.DC.exp[b][p]) > max(0.05 * max(self.DC.exp[b][p],1), errline[b][p]):
246  raise RuntimeError, "Values of N = %d, alpha = %g don't match with expected rate %g for systematics %s " % (
247  args[0], errline[b][p], self.DC.exp[b][p], n)
248  if gamma != None:
249  raise RuntimeError, "More than one gmN uncertainty for the same bin and process (second one is %s)" % n
250  gamma = n; nominal = errline[b][p];
251  else: raise RuntimeError, "Unsupported pdf %s" % pdf
252  # optimize constants
253  if len(factors) + len(logNorms) + len(alogNorms) == 0:
254  self.doVar("n_exp_bin%s_proc_%s[%g]" % (b, p, self.DC.exp[b][p]))
255  else:
256  #print "Process %s of bin %s depends on:\n\tlog-normals: %s\n\tasymm log-normals: %s\n\tother factors: %s\n" % (p,b,logNorms, alogNorms, factors)
257  procNorm = ROOT.ProcessNormalization("n_exp_bin%s_proc_%s" % (b,p), "", nominal)
258  for kappa, thetaName in logNorms: procNorm.addLogNormal(kappa, self.out.function(thetaName))
259  for kappaLo, kappaHi, thetaName in alogNorms: procNorm.addAsymmLogNormal(kappaLo, kappaHi, self.out.function(thetaName))
260  for factorName in factors: procNorm.addOtherFactor(self.out.function(factorName))
261  self.out._import(procNorm)
263  """create pdf_bin<X> and pdf_bin<X>_bonly for each bin"""
264  raise RuntimeError, "Not implemented in ModelBuilder"
265  def doCombination(self):
266  """create model_s and model_b pdfs"""
267  raise RuntimeError, "Not implemented in ModelBuilder"
268  def doModelConfigs(self):
269  if not self.options.bin: raise RuntimeException
270  if self.options.out == None: raise RuntimeException
271  for nuis,warn in self.DC.flatParamNuisances.iteritems():
272  if self.out.var(nuis): self.out.var(nuis).setAttribute("flatParam")
273  elif warn: stderr.write("Missing variable %s declared as flatParam\n" % nuis)
274  mc_s = ROOT.RooStats.ModelConfig("ModelConfig", self.out)
275  mc_b = ROOT.RooStats.ModelConfig("ModelConfig_bonly", self.out)
276  for (l,mc) in [ ('s',mc_s), ('b',mc_b) ]:
277  mc.SetPdf(self.out.pdf("model_"+l))
278  mc.SetParametersOfInterest(self.out.set("POI"))
279  mc.SetObservables(self.out.set("observables"))
280  if len(self.DC.systs): mc.SetNuisanceParameters(self.out.set("nuisances"))
281  if self.out.set("globalObservables"): mc.SetGlobalObservables(self.out.set("globalObservables"))
282  if self.options.verbose > 1: mc.Print("V")
283  self.out._import(mc, mc.GetName())
284  self.out.writeToFile(self.options.out)
285  def isShapeSystematic(self,channel,process,syst):
286  return False
287 
289  """ModelBuilder to make a counting experiment"""
290  def __init__(self,datacard,options):
291  ModelBuilder.__init__(self,datacard,options)
292  if datacard.hasShapes:
293  raise RuntimeError, "You're using a CountingModelBuilder for a model that has shapes"
294  def doObservables(self):
295  if len(self.DC.obs):
296  self.doComment(" ----- observables (already set to observed values) -----")
297  for b in self.DC.bins: self.doVar("n_obs_bin%s[%f,0,%d]" % (b,self.DC.obs[b],N_OBS_MAX))
298  else:
299  self.doComment(" ----- observables -----")
300  for b in self.DC.bins: self.doVar("n_obs_bin%s[0,%d]" % (b,N_OBS_MAX))
301  self.doSet("observables", ",".join(["n_obs_bin%s" % b for b in self.DC.bins]))
302  if len(self.DC.obs):
303  if self.options.bin:
304  self.out.data_obs = ROOT.RooDataSet(self.options.dataname,"observed data", self.out.set("observables"))
305  self.out.data_obs.add( self.out.set("observables") )
306  self.out._import(self.out.data_obs)
308  self.doComment(" --- Expected events in each bin, total (S+B and B) ----")
309  for b in self.DC.bins:
310  self.doObj("n_exp_bin%s_bonly" % b, "sum", ", ".join(["n_exp_bin%s_proc_%s" % (b,p) for p in self.DC.exp[b].keys() if self.DC.isSignal[p] == False]) )
311  self.doObj("n_exp_bin%s" % b, "sum", ", ".join(["n_exp_bin%s_proc_%s" % (b,p) for p in self.DC.exp[b].keys() ]) )
312  self.doObj("pdf_bin%s" % b, "Poisson", "n_obs_bin%s, n_exp_bin%s, 1" % (b,b))
313  self.doObj("pdf_bin%s_bonly" % b, "Poisson", "n_obs_bin%s, n_exp_bin%s_bonly, 1" % (b,b))
314  def doCombination(self):
315  prefix = "modelObs" if len(self.DC.systs) else "model" # if no systematics, we build directly the model
316  nbins = len(self.DC.bins)
317  if nbins > 50:
318  from math import ceil
319  nblocks = int(ceil(nbins/10.))
320  for i in range(nblocks):
321  self.doObj("%s_s_%d" % (prefix,i), "PROD", ",".join(["pdf_bin%s" % self.DC.bins[j] for j in range(10*i,min(nbins,10*i+10))]))
322  self.doObj("%s_b_%d" % (prefix,i), "PROD", ",".join(["pdf_bin%s_bonly" % self.DC.bins[j] for j in range(10*i,min(nbins,10*i+10))]))
323  self.doObj("%s_s" % prefix, "PROD", ",".join([prefix+"_s_%d" % i for i in range(nblocks)]))
324  self.doObj("%s_b" % prefix, "PROD", ",".join([prefix+"_b_%d" % i for i in range(nblocks)]))
325  else:
326  self.doObj("%s_s" % prefix, "PROD", ",".join(["pdf_bin%s" % b for b in self.DC.bins]))
327  self.doObj("%s_b" % prefix, "PROD", ",".join(["pdf_bin%s_bonly" % b for b in self.DC.bins]))
328  if len(self.DC.systs): # multiply by nuisances if needed
329  self.doObj("model_s", "PROD", "modelObs_s, nuisancePdf")
330  self.doObj("model_b", "PROD", "modelObs_b, nuisancePdf")
331 
#define abs(x)
Definition: mlp_lapack.h:159
#define min(a, b)
Definition: mlp_lapack.h:161
const T & max(const T &a, const T &b)
T sqrt(T t)
Definition: SSEVec.h:46
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
double split
Definition: MVATrainer.cc:139