00001 """
00002 Utilities for rootplot including histogram classes.
00003 """
00004
00005 __license__ = '''\
00006 Copyright (c) 2009-2010 Jeff Klukas <klukas@wisc.edu>
00007
00008 Permission is hereby granted, free of charge, to any person obtaining a copy
00009 of this software and associated documentation files (the "Software"), to deal
00010 in the Software without restriction, including without limitation the rights
00011 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
00012 copies of the Software, and to permit persons to whom the Software is
00013 furnished to do so, subject to the following conditions:
00014
00015 The above copyright notice and this permission notice shall be included in
00016 all copies or substantial portions of the Software.
00017
00018 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
00019 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
00020 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
00021 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
00022 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
00023 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
00024 THE SOFTWARE.
00025 '''
00026
00027
00028
00029 import math
00030 import ROOT
00031 import re
00032 import copy
00033 import array
00034 import os.path
00035
00036
00037
00038 class Hist2D(object):
00039 """A container to hold the parameters from a 2D ROOT histogram."""
00040 def __init__(self, hist, label="__nolabel__", title=None,
00041 xlabel=None, ylabel=None):
00042 try:
00043 if not hist.InheritsFrom("TH2"):
00044 raise TypeError("%s does not inherit from TH2" % hist)
00045 except:
00046 raise TypeError("%s is not a ROOT object" % hist)
00047 self.rootclass = hist.ClassName()
00048 self.name = hist.GetName()
00049 self.nbinsx = nx = hist.GetNbinsX()
00050 self.nbinsy = ny = hist.GetNbinsY()
00051 self.binlabelsx = process_bin_labels([hist.GetXaxis().GetBinLabel(i)
00052 for i in range(1, nx + 1)])
00053 if self.binlabelsx:
00054 self.nbinsx = nx = self.binlabelsx.index('')
00055 self.binlabelsx = self.binlabelsx[:ny]
00056 self.binlabelsy = process_bin_labels([hist.GetYaxis().GetBinLabel(i)
00057 for i in range(1, ny + 1)])
00058 if self.binlabelsy:
00059 self.nbinsy = ny = self.binlabelsy.index('')
00060 self.binlabelsy = self.binlabelsy[:ny]
00061 self.entries = hist.GetEntries()
00062 self.content = [[hist.GetBinContent(i, j) for i in range(1, nx + 1)]
00063 for j in range(1, ny + 1)]
00064 self.xedges = [hist.GetXaxis().GetBinLowEdge(i)
00065 for i in range(1, nx + 2)]
00066 self.yedges = [hist.GetYaxis().GetBinLowEdge(i)
00067 for i in range(1, ny + 2)]
00068 self.x = [(self.xedges[i+1] + self.xedges[i])/2
00069 for i in range(nx)]
00070 self.y = [(self.yedges[i+1] + self.yedges[i])/2
00071 for i in range(ny)]
00072 self.title = title or hist.GetTitle()
00073 self.xlabel = xlabel or hist.GetXaxis().GetTitle()
00074 self.ylabel = ylabel or hist.GetYaxis().GetTitle()
00075 self.label = label
00076 def _flat_content(self):
00077 flatcontent = []
00078 for row in self.content:
00079 flatcontent += row
00080 return flatcontent
00081 def __getitem__(self, index):
00082 """Return contents of indexth bin: x.__getitem__(y) <==> x[y]"""
00083 return self._flat_content()[index]
00084 def __len__(self):
00085 """Return the number of bins: x.__len__() <==> len(x)"""
00086 return len(self._flat_content())
00087 def __iter__(self):
00088 """Iterate through bins: x.__iter__() <==> iter(x)"""
00089 return iter(self._flat_content())
00090 def TH2F(self, name=""):
00091 """Return a ROOT.TH2F object with contents of this Hist2D."""
00092 th2f = ROOT.TH2F(name, "",
00093 self.nbinsx, array.array('f', self.xedges),
00094 self.nbinsy, array.array('f', self.yedges))
00095 th2f.SetTitle("%s;%s;%s" % (self.title, self.xlabel, self.ylabel))
00096 for ix in range(self.nbinsx):
00097 for iy in range(self.nbinsy):
00098 th2f.SetBinContent(ix + 1, iy + 1, self.content[iy][ix])
00099 return th2f
00100
00101 class Hist(object):
00102 """A container to hold the parameters from a ROOT histogram."""
00103 def __init__(self, hist, label="__nolabel__",
00104 name=None, title=None, xlabel=None, ylabel=None):
00105 try:
00106 hist.GetNbinsX()
00107 self.__init_TH1(hist)
00108 except AttributeError:
00109 try:
00110 hist.GetN()
00111 self.__init_TGraph(hist)
00112 except AttributeError:
00113 raise TypeError("%s is not a 1D histogram or TGraph" % hist)
00114 self.rootclass = hist.ClassName()
00115 self.name = name or hist.GetName()
00116 self.title = title or hist.GetTitle().split(';')[0]
00117 self.xlabel = xlabel or hist.GetXaxis().GetTitle()
00118 self.ylabel = ylabel or hist.GetYaxis().GetTitle()
00119 self.label = label
00120 def __init_TH1(self, hist):
00121 self.nbins = n = hist.GetNbinsX()
00122 self.binlabels = process_bin_labels([hist.GetXaxis().GetBinLabel(i)
00123 for i in range(1, n + 1)])
00124 if self.binlabels and '' in self.binlabels:
00125
00126 self.nbins = n = self.binlabels.index('')
00127 self.binlabels = self.binlabels[:n]
00128 self.entries = hist.GetEntries()
00129 self.xedges = [hist.GetBinLowEdge(i) for i in range(1, n + 2)]
00130 self.x = [(self.xedges[i+1] + self.xedges[i])/2 for i in range(n)]
00131 self.xerr = [(self.xedges[i+1] - self.xedges[i])/2 for i in range(n)]
00132 self.xerr = [self.xerr[:], self.xerr[:]]
00133 self.width = [(self.xedges[i+1] - self.xedges[i]) for i in range(n)]
00134 self.y = [hist.GetBinContent(i) for i in range(1, n + 1)]
00135 self.yerr = [hist.GetBinError( i) for i in range(1, n + 1)]
00136 self.yerr = [self.yerr[:], self.yerr[:]]
00137 self.underflow = hist.GetBinContent(0)
00138 self.overflow = hist.GetBinContent(self.nbins + 1)
00139 def __init_TGraph(self, hist):
00140 self.nbins = n = hist.GetN()
00141 self.x, self.y = [], []
00142 x, y = ROOT.Double(0), ROOT.Double(0)
00143 for i in range(n):
00144 hist.GetPoint(i, x, y)
00145 self.x.append(copy.copy(x))
00146 self.y.append(copy.copy(y))
00147 lower = [max(0, hist.GetErrorXlow(i)) for i in xrange(n)]
00148 upper = [max(0, hist.GetErrorXhigh(i)) for i in xrange(n)]
00149 self.xerr = [lower[:], upper[:]]
00150 lower = [max(0, hist.GetErrorYlow(i)) for i in xrange(n)]
00151 upper = [max(0, hist.GetErrorYhigh(i)) for i in xrange(n)]
00152 self.yerr = [lower[:], upper[:]]
00153 self.xedges = [self.x[i] - self.xerr[0][i] for i in xrange(n)]
00154 self.xedges.append(self.x[n - 1] + self.xerr[1][n - 1])
00155 self.width = [self.xedges[i + 1] - self.xedges[i] for i in range(n)]
00156 self.underflow, self.overflow = 0, 0
00157 self.binlabels = None
00158 self.entries = n
00159 def __add__(self, b):
00160 """Return the sum of self and b: x.__add__(y) <==> x + y"""
00161 c = copy.copy(self)
00162 for i in range(len(self)):
00163 c.y[i] += b.y[i]
00164 c.yerr[i] += b.yerr[i]
00165 c.overflow += b.overflow
00166 c.underflow += b.underflow
00167 return c
00168 def __sub__(self, b):
00169 """Return the difference of self and b: x.__sub__(y) <==> x - y"""
00170 c = copy.copy(self)
00171 for i in range(len(self)):
00172 c.y[i] -= b.y[i]
00173 c.yerr[i] -= b.yerr[i]
00174 c.overflow -= b.overflow
00175 c.underflow -= b.underflow
00176 return c
00177 def __div__(self, denominator):
00178 return self.divide(denominator)
00179 def __getitem__(self, index):
00180 """Return contents of indexth bin: x.__getitem__(y) <==> x[y]"""
00181 return self.y[index]
00182 def __setitem__(self, index, value):
00183 """Set contents of indexth bin: x.__setitem__(i, y) <==> x[i]=y"""
00184 self.y[index] = value
00185 def __len__(self):
00186 """Return the number of bins: x.__len__() <==> len(x)"""
00187 return self.nbins
00188 def __iter__(self):
00189 """Iterate through bins: x.__iter__() <==> iter(x)"""
00190 return iter(self.y)
00191 def min(self, threshold=None):
00192 """Return the y-value of the bottom tip of the lowest errorbar."""
00193 vals = [(yval - yerr) for yval, yerr in zip(self.y, self.yerr[0])
00194 if (yval - yerr) > threshold]
00195 if vals:
00196 return min(vals)
00197 else:
00198 return threshold
00199 def av_xerr(self):
00200 """Return average between the upper and lower xerr."""
00201 return [(self.xerr[0][i] + self.xerr[1][i]) / 2
00202 for i in range(self.nbins)]
00203 def av_yerr(self):
00204 """Return average between the upper and lower yerr."""
00205 return [(self.yerr[0][i] + self.yerr[1][i]) / 2
00206 for i in range(self.nbins)]
00207 def scale(self, factor):
00208 """
00209 Scale contents, errors, and over/underflow by the given scale factor.
00210 """
00211 self.y = [x * factor for x in self.y]
00212 self.yerr[0] = [x * factor for x in self.yerr[0]]
00213 self.yerr[1] = [x * factor for x in self.yerr[1]]
00214 self.overflow *= factor
00215 self.underflow *= factor
00216 def delete_bin(self, index):
00217 """
00218 Delete a the contents of a bin, sliding all the other data one bin to
00219 the left. This can be useful for histograms with labeled bins.
00220 """
00221 self.nbins -= 1
00222 self.xedges.pop()
00223 self.x.pop()
00224 self.width.pop()
00225 self.y.pop(index)
00226 self.xerr[0].pop(index)
00227 self.xerr[1].pop(index)
00228 self.yerr[0].pop(index)
00229 self.yerr[1].pop(index)
00230 if self.binlabels:
00231 self.binlabels.pop(index)
00232 def TH1F(self, name=None):
00233 """Return a ROOT.TH1F object with contents of this Hist"""
00234 th1f = ROOT.TH1F(name or self.name, "", self.nbins,
00235 array.array('f', self.xedges))
00236 th1f.Sumw2()
00237 th1f.SetTitle("%s;%s;%s" % (self.title, self.xlabel, self.ylabel))
00238 for i in range(self.nbins):
00239 th1f.SetBinContent(i + 1, self.y[i])
00240 try:
00241 th1f.SetBinError(i + 1, (self.yerr[0][i] + self.yerr[1][i]) / 2)
00242 except TypeError:
00243 th1f.SetBinError(i + 1, self.yerr[i])
00244 if self.binlabels:
00245 th1f.GetXaxis().SetBinLabel(i + 1, self.binlabels[i])
00246 th1f.SetBinContent(0, self.underflow)
00247 th1f.SetBinContent(self.nbins + 2, self.overflow)
00248 th1f.SetEntries(self.entries)
00249 return th1f
00250 def TGraph(self, name=None):
00251 """Return a ROOT.TGraphAsymmErrors object with contents of this Hist"""
00252 x = array.array('f', self.x)
00253 y = array.array('f', self.y)
00254 xl = array.array('f', self.xerr[0])
00255 xh = array.array('f', self.xerr[1])
00256 yl = array.array('f', self.yerr[0])
00257 yh = array.array('f', self.yerr[1])
00258 tgraph = ROOT.TGraphAsymmErrors(self.nbins, x, y, xl, xh, yl, yh)
00259 tgraph.SetName(name or self.name)
00260 tgraph.SetTitle('%s;%s;%s' % (self.title, self.xlabel, self.ylabel))
00261 return tgraph
00262 def divide(self, denominator):
00263 """
00264 Return the simple quotient with errors added in quadrature.
00265
00266 This function is called by the division operator:
00267 hist3 = hist1.divide_wilson(hist2) <--> hist3 = hist1 / hist2
00268 """
00269 quotient = copy.deepcopy(self)
00270 num_yerr = self.av_yerr()
00271 den_yerr = denominator.av_yerr()
00272 quotient.yerr = [0. for i in range(self.nbins)]
00273 for i in range(self.nbins):
00274 if denominator[i] == 0 or self[i] == 0:
00275 quotient.y[i] = 0.
00276 else:
00277 quotient.y[i] = self[i] / denominator[i]
00278 quotient.yerr[i] = quotient[i]
00279 quotient.yerr[i] *= math.sqrt((num_yerr[i] / self[i]) ** 2 +
00280 (den_yerr[i] / denominator[i]) ** 2)
00281 if quotient.yerr[i] > quotient[i]:
00282 quotient.yerr[i] = quotient[i]
00283 quotient.yerr = [quotient.yerr, quotient.yerr]
00284 return quotient
00285 def divide_wilson(self, denominator):
00286 """Return an efficiency plot with Wilson score interval errors."""
00287 eff, upper_err, lower_err = wilson_interval(self.y, denominator.y)
00288 quotient = copy.deepcopy(self)
00289 quotient.y = eff
00290 quotient.yerr = [lower_err, upper_err]
00291 return quotient
00292
00293 class HistStack(object):
00294 """
00295 A container to hold Hist objects for plotting together.
00296
00297 When plotting, the title and the x and y labels of the last Hist added
00298 will be used unless specified otherwise in the constructor.
00299 """
00300 def __init__(self, hists=None, title=None, xlabel=None, ylabel=None):
00301 self.hists = []
00302 self.kwargs = []
00303 self.title = title
00304 self.xlabel = xlabel
00305 self.ylabel = ylabel
00306 if hists:
00307 for hist in hists:
00308 self.add(hist)
00309 def __getitem__(self, index):
00310 """Return indexth hist: x.__getitem__(y) <==> x[y]"""
00311 return self.hists[index]
00312 def __setitem__(self, index, value):
00313 """Replace indexth hist with value: x.__setitem__(i, y) <==> x[i]=y"""
00314 self.hists[index] = value
00315 def __len__(self):
00316 """Return the number of hists in the stack: x.__len__() <==> len(x)"""
00317 return len(self.hists)
00318 def __iter__(self):
00319 """Iterate through hists in the stack: x.__iter__() <==> iter(x)"""
00320 return iter(self.hists)
00321 def max(self):
00322 """Return the value of the highest bin of all hists in the stack."""
00323 maxes = [max(x) for x in self.hists]
00324 try:
00325 return max(maxes)
00326 except ValueError:
00327 return 0
00328 def stackmax(self):
00329 """Return the value of the highest bin in the addition of all hists."""
00330 try:
00331 return max([sum([h[i] for h in self.hists])
00332 for i in range(self.hists[0].nbins)])
00333 except:
00334 print [h.nbins for h in self.hists]
00335 def scale(self, factor):
00336 """Scale all Hists by factor."""
00337 for hist in self.hists:
00338 hist.scale(factor)
00339 def min(self, threshold=None):
00340 """
00341 Return the value of the lowest bin of all hists in the stack.
00342
00343 If threshold is specified, only values above the threshold will be
00344 considered.
00345 """
00346 mins = [x.min(threshold) for x in self.hists]
00347 return min(mins)
00348 def add(self, hist, **kwargs):
00349 """
00350 Add a Hist object to this stack.
00351
00352 Any additional keyword arguments will be added to just this Hist
00353 when the stack is plotted.
00354 """
00355 if "label" in kwargs:
00356 hist.label = kwargs['label']
00357 del kwargs['label']
00358 self.hists.append(hist)
00359 self.kwargs.append(kwargs)
00360
00361
00362
00363
00364 class RootFile(object):
00365 """A wrapper for TFiles, allowing easier access to methods."""
00366 def __init__(self, filename, name=None):
00367 self.filename = filename
00368 self.name = name or filename[:-5]
00369 self.file = ROOT.TFile(filename, 'read')
00370 if self.file.IsZombie():
00371 raise ValueError("Error opening %s" % filename)
00372 def cd(self, directory=''):
00373 """Make directory the current directory."""
00374 self.file.cd(directory)
00375 def get(self, object_name, path=None, type1D=Hist, type2D=Hist2D):
00376 """Return a Hist object from the given path within this file."""
00377 if not path:
00378 path = os.path.dirname(object_name)
00379 object_name = os.path.basename(object_name)
00380 try:
00381 roothist = self.file.GetDirectory(path).Get(object_name)
00382 except ReferenceError, e:
00383 raise ReferenceError(e)
00384 try:
00385 return type2D(roothist)
00386 except TypeError:
00387 return type1D(roothist)
00388
00389 def ls(directory=None):
00390 """Return a python list of ROOT object names from the given directory."""
00391 if directory == None:
00392 keys = ROOT.gDirectory.GetListOfKeys()
00393 else:
00394 keys = ROOT.gDirectory.GetDirectory(directory).GetListOfKeys()
00395 key = keys[0]
00396 names = []
00397 while key:
00398 obj = key.ReadObj()
00399 key = keys.After(key)
00400 names.append(obj.GetName())
00401 return names
00402
00403 def pwd():
00404 """Return ROOT's present working directory."""
00405 return ROOT.gDirectory.GetPath()
00406
00407 def get(object_name):
00408 """Return a Hist object with the given name."""
00409 return Hist(ROOT.gDirectory.Get(object_name))
00410
00411
00412
00413
00414 def replace(string, replacements):
00415 """
00416 Modify a string based on a list of patterns and substitutions.
00417
00418 replacements should be a list of two-entry tuples, the first entry giving
00419 a string to search for and the second entry giving the string with which
00420 to replace it. If replacements includes a pattern entry containing
00421 'use_regexp', then all patterns will be treated as regular expressions
00422 using re.sub.
00423 """
00424 if not replacements:
00425 return string
00426 if 'use_regexp' in [x for x,y in replacements]:
00427 for pattern, repl in [x for x in replacements
00428 if x[0] != 'use_regexp']:
00429 string = re.sub(pattern, repl, string)
00430 else:
00431 for pattern, repl in replacements:
00432 string = string.replace(pattern, repl)
00433 if re.match(_all_whitespace_string, string):
00434 return ""
00435 return string
00436
00437 def process_bin_labels(binlabels):
00438 has_labels = False
00439 for binlabel in binlabels:
00440 if binlabel:
00441 has_labels = True
00442 if has_labels:
00443 return binlabels
00444 else:
00445 return None
00446
00447 def wilson_interval(numerator_array, denominator_array):
00448 eff, upper_err, lower_err = [], [], []
00449 for n, d in zip(numerator_array, denominator_array):
00450 try:
00451 p = float(n) / d
00452 s = math.sqrt(p * (1 - p) / d + 1 / (4 * d * d))
00453 t = p + 1 / (2 * d)
00454 eff.append(p)
00455 upper_err.append(-p + 1/(1 + 1/d) * (t + s))
00456 lower_err.append(+p - 1/(1 + 1/d) * (t - s))
00457 except ZeroDivisionError:
00458 eff.append(0)
00459 upper_err.append(0)
00460 lower_err.append(0)
00461 return eff, upper_err, lower_err
00462
00463 def find_num_processors():
00464 import os
00465 try:
00466 num_processors = os.sysconf('SC_NPROCESSORS_ONLN')
00467 except:
00468 try:
00469 num_processors = os.environ['NUMBER_OF_PROCESSORS']
00470 except:
00471 num_processors = 1
00472 return num_processors