CMS 3D CMS Logo

/data/refman/pasoursint/CMSSW_4_1_8_patch13/src/PhysicsTools/PythonAnalysis/python/rootplot/utilities.py

Go to the documentation of this file.
00001 """
00002 Utilities for rootplot including histogram classes.
00003 """
00004 
00005 __license__ = '''\
00006 Copyright (c) 2009-2010 Jeff Klukas <klukas@wisc.edu>
00007 
00008 Permission is hereby granted, free of charge, to any person obtaining a copy
00009 of this software and associated documentation files (the "Software"), to deal
00010 in the Software without restriction, including without limitation the rights
00011 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
00012 copies of the Software, and to permit persons to whom the Software is
00013 furnished to do so, subject to the following conditions:
00014 
00015 The above copyright notice and this permission notice shall be included in
00016 all copies or substantial portions of the Software.
00017 
00018 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
00019 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
00020 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
00021 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
00022 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
00023 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
00024 THE SOFTWARE.
00025 '''
00026 
00027 ################ Import python libraries
00028 
00029 import math
00030 import ROOT
00031 import re
00032 import copy
00033 import array
00034 import os.path
00035 
00036 ################ Define classes
00037 
00038 class Hist2D(object):
00039     """A container to hold the parameters from a 2D ROOT histogram."""
00040     def __init__(self, hist, label="__nolabel__", title=None,
00041                  xlabel=None, ylabel=None):
00042         try:
00043             if not hist.InheritsFrom("TH2"):
00044                 raise TypeError("%s does not inherit from TH2" % hist)
00045         except:
00046             raise TypeError("%s is not a ROOT object" % hist)
00047         self.rootclass = hist.ClassName()
00048         self.name = hist.GetName()
00049         self.nbinsx = nx = hist.GetNbinsX()
00050         self.nbinsy = ny = hist.GetNbinsY()
00051         self.binlabelsx = process_bin_labels([hist.GetXaxis().GetBinLabel(i)
00052                                                for i in range(1, nx + 1)])
00053         if self.binlabelsx:
00054             self.nbinsx = nx = self.binlabelsx.index('')
00055             self.binlabelsx = self.binlabelsx[:ny]
00056         self.binlabelsy = process_bin_labels([hist.GetYaxis().GetBinLabel(i)
00057                                                for i in range(1, ny + 1)])
00058         if self.binlabelsy:
00059             self.nbinsy = ny = self.binlabelsy.index('')
00060             self.binlabelsy = self.binlabelsy[:ny]
00061         self.entries = hist.GetEntries()
00062         self.content = [[hist.GetBinContent(i, j) for i in range(1, nx + 1)]
00063                         for j in range(1, ny + 1)]
00064         self.xedges = [hist.GetXaxis().GetBinLowEdge(i)
00065                              for i in range(1, nx + 2)]
00066         self.yedges = [hist.GetYaxis().GetBinLowEdge(i)
00067                              for i in range(1, ny + 2)]
00068         self.x      = [(self.xedges[i+1] + self.xedges[i])/2
00069                              for i in range(nx)]
00070         self.y      = [(self.yedges[i+1] + self.yedges[i])/2
00071                              for i in range(ny)]
00072         self.title  = title or hist.GetTitle()
00073         self.xlabel = xlabel or hist.GetXaxis().GetTitle()
00074         self.ylabel = ylabel or hist.GetYaxis().GetTitle()
00075         self.label  = label
00076     def _flat_content(self):
00077         flatcontent = []
00078         for row in self.content:
00079             flatcontent += row
00080         return flatcontent
00081     def __getitem__(self, index):
00082         """Return contents of indexth bin: x.__getitem__(y) <==> x[y]"""
00083         return self._flat_content()[index]
00084     def __len__(self):
00085         """Return the number of bins: x.__len__() <==> len(x)"""
00086         return len(self._flat_content())
00087     def __iter__(self):
00088         """Iterate through bins: x.__iter__() <==> iter(x)"""
00089         return iter(self._flat_content())
00090     def TH2F(self, name=""):
00091         """Return a ROOT.TH2F object with contents of this Hist2D."""
00092         th2f = ROOT.TH2F(name, "",
00093                          self.nbinsx, array.array('f', self.xedges),
00094                          self.nbinsy, array.array('f', self.yedges))
00095         th2f.SetTitle("%s;%s;%s" % (self.title, self.xlabel, self.ylabel))
00096         for ix in range(self.nbinsx):
00097             for iy in range(self.nbinsy):
00098                 th2f.SetBinContent(ix + 1, iy + 1, self.content[iy][ix])
00099         return th2f
00100 
00101 class Hist(object):
00102     """A container to hold the parameters from a ROOT histogram."""
00103     def __init__(self, hist, label="__nolabel__",
00104                  name=None, title=None, xlabel=None, ylabel=None):
00105         try:
00106             hist.GetNbinsX()
00107             self.__init_TH1(hist)
00108         except AttributeError:
00109             try:
00110                 hist.GetN()
00111                 self.__init_TGraph(hist)
00112             except AttributeError:
00113                 raise TypeError("%s is not a 1D histogram or TGraph" % hist)
00114         self.rootclass = hist.ClassName()
00115         self.name = name or hist.GetName()
00116         self.title  = title or hist.GetTitle().split(';')[0]
00117         self.xlabel = xlabel or hist.GetXaxis().GetTitle()
00118         self.ylabel = ylabel or hist.GetYaxis().GetTitle()
00119         self.label  = label
00120     def __init_TH1(self, hist):
00121         self.nbins = n = hist.GetNbinsX()
00122         self.binlabels = process_bin_labels([hist.GetXaxis().GetBinLabel(i)
00123                                              for i in range(1, n + 1)])
00124         if self.binlabels and '' in self.binlabels:
00125             # Get rid of extra non-labeled bins
00126             self.nbins = n = self.binlabels.index('')
00127             self.binlabels = self.binlabels[:n]
00128         self.entries = hist.GetEntries()
00129         self.xedges = [hist.GetBinLowEdge(i) for i in range(1, n + 2)]
00130         self.x      = [(self.xedges[i+1] + self.xedges[i])/2 for i in range(n)]
00131         self.xerr   = [(self.xedges[i+1] - self.xedges[i])/2 for i in range(n)]
00132         self.xerr   = [self.xerr[:], self.xerr[:]]
00133         self.width  = [(self.xedges[i+1] - self.xedges[i])   for i in range(n)]
00134         self.y      = [hist.GetBinContent(i) for i in range(1, n + 1)]
00135         self.yerr   = [hist.GetBinError(  i) for i in range(1, n + 1)]
00136         self.yerr   = [self.yerr[:], self.yerr[:]]
00137         self.underflow = hist.GetBinContent(0)
00138         self.overflow  = hist.GetBinContent(self.nbins + 1)
00139     def __init_TGraph(self, hist):
00140         self.nbins = n = hist.GetN()
00141         self.x, self.y = [], []
00142         x, y = ROOT.Double(0), ROOT.Double(0)
00143         for i in range(n):
00144             hist.GetPoint(i, x, y)
00145             self.x.append(copy.copy(x))
00146             self.y.append(copy.copy(y))
00147         lower = [max(0, hist.GetErrorXlow(i))  for i in xrange(n)]
00148         upper = [max(0, hist.GetErrorXhigh(i)) for i in xrange(n)]
00149         self.xerr = [lower[:], upper[:]]
00150         lower = [max(0, hist.GetErrorYlow(i))  for i in xrange(n)]
00151         upper = [max(0, hist.GetErrorYhigh(i)) for i in xrange(n)]
00152         self.yerr = [lower[:], upper[:]]
00153         self.xedges = [self.x[i] - self.xerr[0][i] for i in xrange(n)]
00154         self.xedges.append(self.x[n - 1] + self.xerr[1][n - 1])
00155         self.width = [self.xedges[i + 1] - self.xedges[i] for i in range(n)]
00156         self.underflow, self.overflow = 0, 0
00157         self.binlabels = None
00158         self.entries = n
00159     def __add__(self, b):
00160         """Return the sum of self and b: x.__add__(y) <==> x + y"""
00161         c = copy.copy(self)
00162         for i in range(len(self)):
00163             c.y[i] += b.y[i]
00164             c.yerr[i] += b.yerr[i]
00165         c.overflow += b.overflow
00166         c.underflow += b.underflow
00167         return c
00168     def __sub__(self, b):
00169         """Return the difference of self and b: x.__sub__(y) <==> x - y"""
00170         c = copy.copy(self)
00171         for i in range(len(self)):
00172             c.y[i] -= b.y[i]
00173             c.yerr[i] -= b.yerr[i]
00174         c.overflow -= b.overflow
00175         c.underflow -= b.underflow
00176         return c
00177     def __div__(self, denominator):
00178         return self.divide(denominator)
00179     def __getitem__(self, index):
00180         """Return contents of indexth bin: x.__getitem__(y) <==> x[y]"""
00181         return self.y[index]
00182     def __setitem__(self, index, value):
00183         """Set contents of indexth bin: x.__setitem__(i, y) <==> x[i]=y"""
00184         self.y[index] = value
00185     def __len__(self):
00186         """Return the number of bins: x.__len__() <==> len(x)"""
00187         return self.nbins
00188     def __iter__(self):
00189         """Iterate through bins: x.__iter__() <==> iter(x)"""
00190         return iter(self.y)
00191     def min(self, threshold=None):
00192         """Return the y-value of the bottom tip of the lowest errorbar."""
00193         vals = [(yval - yerr) for yval, yerr in zip(self.y, self.yerr[0])
00194                 if (yval - yerr) > threshold]
00195         if vals:
00196             return min(vals)
00197         else:
00198             return threshold
00199     def av_xerr(self):
00200         """Return average between the upper and lower xerr."""
00201         return [(self.xerr[0][i] + self.xerr[1][i]) / 2
00202                 for i in range(self.nbins)]
00203     def av_yerr(self):
00204         """Return average between the upper and lower yerr."""
00205         return [(self.yerr[0][i] + self.yerr[1][i]) / 2
00206                 for i in range(self.nbins)]
00207     def scale(self, factor):
00208         """
00209         Scale contents, errors, and over/underflow by the given scale factor.
00210         """
00211         self.y = [x * factor for x in self.y]
00212         self.yerr[0] = [x * factor for x in self.yerr[0]]
00213         self.yerr[1] = [x * factor for x in self.yerr[1]]
00214         self.overflow *= factor
00215         self.underflow *= factor
00216     def delete_bin(self, index):
00217         """
00218         Delete a the contents of a bin, sliding all the other data one bin to
00219         the left.  This can be useful for histograms with labeled bins.
00220         """
00221         self.nbins -= 1
00222         self.xedges.pop()
00223         self.x.pop()
00224         self.width.pop()
00225         self.y.pop(index)
00226         self.xerr[0].pop(index)
00227         self.xerr[1].pop(index)
00228         self.yerr[0].pop(index)
00229         self.yerr[1].pop(index)
00230         if self.binlabels:
00231             self.binlabels.pop(index)
00232     def TH1F(self, name=None):
00233         """Return a ROOT.TH1F object with contents of this Hist"""
00234         th1f = ROOT.TH1F(name or self.name, "", self.nbins,
00235                          array.array('f', self.xedges))
00236         th1f.Sumw2()
00237         th1f.SetTitle("%s;%s;%s" % (self.title, self.xlabel, self.ylabel))
00238         for i in range(self.nbins):
00239             th1f.SetBinContent(i + 1, self.y[i])
00240             try:
00241                 th1f.SetBinError(i + 1, (self.yerr[0][i] + self.yerr[1][i]) / 2)
00242             except TypeError:
00243                 th1f.SetBinError(i + 1, self.yerr[i])
00244             if self.binlabels:
00245                 th1f.GetXaxis().SetBinLabel(i + 1, self.binlabels[i])
00246         th1f.SetBinContent(0, self.underflow)
00247         th1f.SetBinContent(self.nbins + 2, self.overflow)
00248         th1f.SetEntries(self.entries)
00249         return th1f
00250     def TGraph(self, name=None):
00251         """Return a ROOT.TGraphAsymmErrors object with contents of this Hist"""
00252         x = array.array('f', self.x)
00253         y = array.array('f', self.y)
00254         xl = array.array('f', self.xerr[0])
00255         xh = array.array('f', self.xerr[1])
00256         yl = array.array('f', self.yerr[0])
00257         yh = array.array('f', self.yerr[1])
00258         tgraph = ROOT.TGraphAsymmErrors(self.nbins, x, y, xl, xh, yl, yh)
00259         tgraph.SetName(name or self.name)
00260         tgraph.SetTitle('%s;%s;%s' % (self.title, self.xlabel, self.ylabel))
00261         return tgraph
00262     def divide(self, denominator):
00263         """
00264         Return the simple quotient with errors added in quadrature.
00265 
00266         This function is called by the division operator:
00267             hist3 = hist1.divide_wilson(hist2) <--> hist3 = hist1 / hist2
00268         """
00269         quotient = copy.deepcopy(self)
00270         num_yerr = self.av_yerr()
00271         den_yerr = denominator.av_yerr()
00272         quotient.yerr = [0. for i in range(self.nbins)]
00273         for i in range(self.nbins):
00274             if denominator[i] == 0 or self[i] == 0:
00275                 quotient.y[i] = 0.
00276             else:
00277                 quotient.y[i] = self[i] / denominator[i]
00278                 quotient.yerr[i] = quotient[i]
00279                 quotient.yerr[i] *= math.sqrt((num_yerr[i] / self[i]) ** 2 +
00280                                        (den_yerr[i] / denominator[i]) ** 2)
00281             if quotient.yerr[i] > quotient[i]:
00282                 quotient.yerr[i] = quotient[i]
00283         quotient.yerr = [quotient.yerr, quotient.yerr]
00284         return quotient
00285     def divide_wilson(self, denominator):
00286         """Return an efficiency plot with Wilson score interval errors."""
00287         eff, upper_err, lower_err = wilson_interval(self.y, denominator.y)
00288         quotient = copy.deepcopy(self)
00289         quotient.y = eff
00290         quotient.yerr = [lower_err, upper_err]
00291         return quotient
00292 
00293 class HistStack(object):
00294     """
00295     A container to hold Hist objects for plotting together.
00296 
00297     When plotting, the title and the x and y labels of the last Hist added
00298     will be used unless specified otherwise in the constructor.
00299     """
00300     def __init__(self, hists=None, title=None, xlabel=None, ylabel=None):
00301         self.hists  = []
00302         self.kwargs = []
00303         self.title  = title
00304         self.xlabel = xlabel
00305         self.ylabel = ylabel
00306         if hists:
00307             for hist in hists:
00308                 self.add(hist)
00309     def __getitem__(self, index):
00310         """Return indexth hist: x.__getitem__(y) <==> x[y]"""
00311         return self.hists[index]
00312     def __setitem__(self, index, value):
00313         """Replace indexth hist with value: x.__setitem__(i, y) <==> x[i]=y"""
00314         self.hists[index] = value
00315     def __len__(self):
00316         """Return the number of hists in the stack: x.__len__() <==> len(x)"""
00317         return len(self.hists)
00318     def __iter__(self):
00319         """Iterate through hists in the stack: x.__iter__() <==> iter(x)"""
00320         return iter(self.hists)
00321     def max(self):
00322         """Return the value of the highest bin of all hists in the stack."""
00323         maxes = [max(x) for x in self.hists]
00324         try:
00325             return max(maxes)
00326         except ValueError:
00327             return 0
00328     def stackmax(self):
00329         """Return the value of the highest bin in the addition of all hists."""
00330         try:
00331             return max([sum([h[i] for h in self.hists])
00332                        for i in range(self.hists[0].nbins)])
00333         except:
00334             print [h.nbins for h in self.hists]
00335     def scale(self, factor):
00336         """Scale all Hists by factor."""
00337         for hist in self.hists:
00338             hist.scale(factor)
00339     def min(self, threshold=None):
00340         """
00341         Return the value of the lowest bin of all hists in the stack.
00342 
00343         If threshold is specified, only values above the threshold will be
00344         considered.
00345         """
00346         mins = [x.min(threshold) for x in self.hists]
00347         return min(mins)
00348     def add(self, hist, **kwargs):
00349         """
00350         Add a Hist object to this stack.
00351 
00352         Any additional keyword arguments will be added to just this Hist
00353         when the stack is plotted.
00354         """
00355         if "label" in kwargs:
00356             hist.label = kwargs['label']
00357             del kwargs['label']
00358         self.hists.append(hist)
00359         self.kwargs.append(kwargs)
00360 
00361 
00362 ################ Define functions and classes for navigating within ROOT
00363 
00364 class RootFile(object):
00365     """A wrapper for TFiles, allowing easier access to methods."""
00366     def __init__(self, filename, name=None):
00367         self.filename = filename
00368         self.name = name or filename[:-5]
00369         self.file = ROOT.TFile(filename, 'read')
00370         if self.file.IsZombie():
00371             raise ValueError("Error opening %s" % filename)
00372     def cd(self, directory=''):
00373         """Make directory the current directory."""
00374         self.file.cd(directory)
00375     def get(self, object_name, path=None, type1D=Hist, type2D=Hist2D):
00376         """Return a Hist object from the given path within this file."""
00377         if not path:
00378             path = os.path.dirname(object_name)
00379             object_name = os.path.basename(object_name)
00380         try:
00381             roothist = self.file.GetDirectory(path).Get(object_name)
00382         except ReferenceError, e:
00383             raise ReferenceError(e)
00384         try:
00385             return type2D(roothist)
00386         except TypeError:
00387             return type1D(roothist)
00388 
00389 def ls(directory=None):
00390     """Return a python list of ROOT object names from the given directory."""
00391     if directory == None:
00392         keys = ROOT.gDirectory.GetListOfKeys()
00393     else:
00394         keys = ROOT.gDirectory.GetDirectory(directory).GetListOfKeys()
00395     key = keys[0]
00396     names = []
00397     while key:
00398         obj = key.ReadObj()
00399         key = keys.After(key)
00400         names.append(obj.GetName())
00401     return names
00402 
00403 def pwd():
00404     """Return ROOT's present working directory."""
00405     return ROOT.gDirectory.GetPath()
00406 
00407 def get(object_name):
00408     """Return a Hist object with the given name."""
00409     return Hist(ROOT.gDirectory.Get(object_name))
00410 
00411 
00412 ################ Define additional helping functions
00413 
00414 def replace(string, replacements):
00415     """
00416     Modify a string based on a list of patterns and substitutions.
00417 
00418     replacements should be a list of two-entry tuples, the first entry giving
00419     a string to search for and the second entry giving the string with which
00420     to replace it.  If replacements includes a pattern entry containing
00421     'use_regexp', then all patterns will be treated as regular expressions
00422     using re.sub.
00423     """
00424     if not replacements:
00425         return string
00426     if 'use_regexp' in [x for x,y in replacements]:
00427         for pattern, repl in [x for x in replacements
00428                               if x[0] != 'use_regexp']:
00429             string = re.sub(pattern, repl, string)
00430     else:
00431         for pattern, repl in replacements:
00432             string = string.replace(pattern, repl)
00433     if re.match(_all_whitespace_string, string):
00434         return ""
00435     return string
00436 
00437 def process_bin_labels(binlabels):
00438     has_labels = False
00439     for binlabel in binlabels:
00440         if binlabel:
00441             has_labels = True
00442     if has_labels:
00443         return binlabels
00444     else:
00445         return None
00446 
00447 def wilson_interval(numerator_array, denominator_array):
00448     eff, upper_err, lower_err = [], [], []
00449     for n, d in zip(numerator_array, denominator_array):
00450         try:
00451             p = float(n) / d
00452             s = math.sqrt(p * (1 - p) / d + 1 / (4 * d * d))
00453             t = p + 1 / (2 * d)
00454             eff.append(p)
00455             upper_err.append(-p + 1/(1 + 1/d) * (t + s))
00456             lower_err.append(+p - 1/(1 + 1/d) * (t - s))
00457         except ZeroDivisionError:
00458             eff.append(0)
00459             upper_err.append(0)
00460             lower_err.append(0)
00461     return eff, upper_err, lower_err
00462 
00463 def find_num_processors():
00464     import os
00465     try:
00466         num_processors = os.sysconf('SC_NPROCESSORS_ONLN')
00467     except:
00468         try:
00469             num_processors = os.environ['NUMBER_OF_PROCESSORS']
00470         except:
00471             num_processors = 1
00472     return num_processors