CMS 3D CMS Logo

/afs/cern.ch/work/a/aaltunda/public/www/CMSSW_6_2_7/src/PhysicsTools/PythonAnalysis/python/rootplot/rootmath.py

Go to the documentation of this file.
00001 """
00002 rootmath description
00003 """
00004 
00005 __license__ = '''\
00006 Copyright (c) 2009-2010 Jeff Klukas <klukas@wisc.edu>
00007 
00008 Permission is hereby granted, free of charge, to any person obtaining a copy
00009 of this software and associated documentation files (the "Software"), to deal
00010 in the Software without restriction, including without limitation the rights
00011 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
00012 copies of the Software, and to permit persons to whom the Software is
00013 furnished to do so, subject to the following conditions:
00014 
00015 The above copyright notice and this permission notice shall be included in
00016 all copies or substantial portions of the Software.
00017 
00018 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
00019 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
00020 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
00021 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
00022 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
00023 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
00024 THE SOFTWARE.
00025 '''
00026 
00027 
00028 ##############################################################################
00029 ######## Import python libraries #############################################
00030 
00031 import sys
00032 import shutil
00033 import math
00034 import os
00035 import re
00036 import tempfile
00037 import copy
00038 import fnmatch
00039 import argparse
00040 from os.path import join as joined
00041 from utilities import rootglob, loadROOT
00042 
00043 ROOT = loadROOT()
00044 
00045 ##############################################################################
00046 ######## Define globals ######################################################
00047 
00048 from version import __version__          # version number
00049 
00050 
00051 ##############################################################################
00052 ######## Classes #############################################################
00053 
00054 class Target(object):
00055     """Description."""
00056     def __init__(self, filename, path='', scale=1., scale_error=None):
00057         self.filename = filename
00058         self.path = path
00059         self.scale = scale
00060         self.scale_error = scale_error
00061     def __repr__(self):
00062         return "%s:%s:%f" % (self.filename, self.path, self.scale)
00063 
00064 def newadd(outfile, targets, dest_path=""):
00065     """Description."""
00066     if allsame([x.filename for x in targets]):
00067         f = ROOT.TFile(targets[0].filename, 'read')
00068         paths = [x.path for x in targets]
00069         scales = [x.scale for x in targets]
00070         scale_errors = [x.scale_error for x in targets]
00071         if f.GetDirectory(paths[0]):
00072             destdir = pathdiff2(paths)    # What does this do?
00073             for h in [os.path.basename(x) for x in
00074                       rootglob(f, paths[0] + '/*')]:
00075                 hists = [f.GetDirectory(x).Get(h) for x in paths]
00076                 if not alltrue([x and x.InheritsFrom('TH1') for x in hists]):
00077                     continue
00078                 dest = joined(destdir, h)
00079                 add(outfile, dest, hists, scales, dest_path, scale_errors=scale_errors)
00080         else:
00081             hists = [f.Get(x) for x in paths]
00082             if alltrue([x and x.InheritsFrom('TH1') for x in hists]):
00083                 dest = pathdiff2(paths)
00084                 add(outfile, dest, hists, scales, scale_errors=scale_errors)
00085     else:
00086         dict_targets = {}  # Stores paths and scales, key = filename
00087         dict_tfiles = {}   # Stores map from filenames to Root.TFile() objects
00088         for target in targets:
00089             dict_targets.setdefault(target.filename, []).append((target.path, target.scale))
00090             if (target.filename not in dict_tfiles):
00091                 # Only open root files once
00092                 dict_tfiles[target.filename] = ROOT.TFile(target.filename, 'read')
00093         # dict_targets now a dictionary, with keys the filenames, example:
00094         # {'fileA.root': [('path0',scale0), ('path1', scale1)],
00095         #  'fileB.root': [('path3', scale3)]}
00096         f = ROOT.TFile(targets[0].filename, 'read')
00097         if f.GetDirectory(targets[0].path):
00098             # Create list of histograms to get
00099             destdir = '/'               # should probably use pathdiff2 somehow
00100             histnames = [os.path.basename(x) for x in
00101                          rootglob(f, targets[0].path + '/*')]
00102             f.Close()
00103             # For each histogram name found, grab it from
00104             # every file & path
00105             for histname in histnames:
00106                 hists = []
00107                 scales = []
00108                 for filename in dict_targets:
00109                     tfile_cur = dict_tfiles[filename]
00110                     for path, scale in dict_targets[filename]:
00111                         hists.append(tfile_cur.GetDirectory(path).Get(histname))
00112                         scales.append(scale)
00113                         #print "%s:%s:%s:%f" % (filename, path, histname, scale)
00114                 if not alltrue([x and x.InheritsFrom('TH1') for x in hists]):
00115                     continue
00116                 dest = joined(destdir, histname)
00117                 add(outfile, dest, hists, scales, dest_path)
00118         else:
00119             print "Code not written yet to add histograms from multiple files"
00120             return
00121         return
00122 
00123 
00124 ##############################################################################
00125 ######## Implementation ######################################################
00126 
00127 def walk_rootfile(rootfile, path=''):
00128     #### Yield (path, folders, objects) for each directory under path.
00129     keys = rootfile.GetDirectory(path).GetListOfKeys()
00130     folders, objects = [], []
00131     for key in keys:
00132         name = key.GetName()
00133         classname = key.GetClassName()
00134         newpath = joined(path, name)
00135         dimension = 0
00136         if 'TDirectory' in classname:
00137             folders.append(name)
00138         else:
00139             objects.append(name)
00140     yield path, folders, objects
00141     for folder in folders:
00142         for x in walk_rootfile(rootfile, joined(path, folder)):
00143             yield x
00144 
00145 def allsame(iterable):
00146     for element in iterable:
00147         if element != iterable[0]:
00148             return False
00149     return True
00150 
00151 def alltrue(iterable):
00152     for element in iterable:
00153         if element != True:
00154             return False
00155     return True
00156 
00157 def pathdiff(paths, joiner):
00158     """
00159     Return the appropriate destination for an object.
00160     
00161     In all cases, the result will be placed in the deepest directory shared by
00162     all paths.  If the histogram names are the same, the result will be named
00163     based on the first directories that they do not share.  Otherwise, the 
00164     result will be named based on the names of the other histograms.
00165 
00166     >>> pathdiff(['/dirA/dirB/dirX/hist', '/dirA/dirB/dirY/hist'], '_div_')
00167     '/dirA/dirB/dirX_div_dirY'
00168     >>> pathdiff(['/dirA/hist1', '/dirA/hist2', '/dirA/hist3'], '_plus_')
00169     '/dirA/hist1_plus_hist2_plus_hist3'
00170     >>> pathdiff(['/hist1', '/dirA/hist2'], '_minus_')
00171     '/hist1_minus_hist2'
00172     """
00173     paths = [x.split('/') for x in paths]
00174     dest = '/'
00175     for i in range(len(paths[0])):
00176         if allsame([p[i] for p in paths]):
00177             dest = joined(dest, paths[0][i])
00178         else:
00179             break
00180     name = joiner.join([p[-1] for p in paths])
00181     if allsame([p[-1] for p in paths]):
00182         for i in range(len(paths[0])):
00183             if not allsame([p[i] for p in paths]):
00184                 name = joiner.join([p[i] for p in paths])
00185     return joined(dest, name)
00186 
00187 def pathdiff2(paths, joiner='__', truncate=False):
00188     """
00189     Placeholder.
00190     """
00191     paths = [x.split('/') for x in paths]
00192     commonbeginning = ''
00193     for i in range(len(paths[0])):
00194         if allsame([p[i] for p in paths]):
00195             commonbeginning = joined(commonbeginning, paths[0][i])
00196         else:
00197             break
00198     commonending = ''
00199     for i in range(-1, -1 * len(paths[0]), -1):
00200         if allsame([p[i] for p in paths]):
00201             commonending = joined(paths[0][i], commonending)
00202         else:
00203             break
00204     #return commonbeginning, commonending
00205     if truncate:
00206         return commonending
00207     else:
00208         return joined(commonbeginning, commonending)
00209 
00210 def pathdiff3(paths, joiner='__'):
00211     """
00212     Return the appropriate destination for an object.
00213     
00214     If the final objects in each path match, then the return value will be the
00215     matching part of the paths.  Otherwise, the output path will simply be those
00216     names joined together with *joiner*.  See the examples below.
00217     
00218     >>> pathdiff3(['/dirA/dirX/hist', '/dirA/dirY/hist'])
00219     '/hist'
00220     >>> pathdiff3(['/dirA/dirX/dirB/hist', '/dirA/dirY/dirB/hist'])
00221     '/dirB/hist'
00222     >>> pathdiff3(['/dirA/hist1', '/dirA/hist2', '/dirA/hist3'], '_plus_')
00223     '/hist1_plus_hist2_plus_hist3'
00224     >>> pathdiff3(['/hist1', '/dirA/hist2'], '_div_')
00225     '/hist1_div_hist2'
00226     """
00227     paths = [x.split('/') for x in paths]
00228     if allsame([x[-1] for x in paths]):
00229         dest = paths[0][-1]
00230         for i in range(-2, min([len(x) for x in paths]) * -1, -1):
00231             if allsame([p[i] for p in paths]):
00232                 dest = joined(paths[0][i], dest)
00233             else:
00234                 break
00235         return '/' + dest
00236     else:
00237         return '/' + joiner.join([x[-1] for x in paths])
00238 
00239 def operator_func(fn):
00240     def newfunc(outfile, dest, hists, scales=None, dest_path="", scale_errors=None):
00241         outfile.cd()
00242         for d in os.path.dirname(dest).split('/'):
00243             if not ROOT.gDirectory.GetDirectory(d):
00244                 ROOT.gDirectory.mkdir(d)
00245             ROOT.gDirectory.cd(d)
00246         fn(outfile, dest, hists, scales, dest_path, scale_errors)
00247     return newfunc
00248 
00249 def scale_with_error(hist, scale, scale_error=None):
00250     '''Scale a histogram by a scale factor that has an error.
00251     This takes into account the scale error to set new error bars.'''
00252     hist_new = hist.Clone()
00253     if scale_error:
00254         for i in range(hist_new.GetNbinsX()+2):
00255             hist_new.SetBinContent(i, scale)
00256             hist_new.SetBinError(i, scale_error)
00257         hist_new.Multiply(hist)
00258     else:
00259         hist_new.Scale(scale)
00260     return hist_new
00261 
00262 @operator_func
00263 def add(outfile, dest, hists, scales=None, dest_path="", scale_errors=None):
00264     if not scales:
00265         scales = [1. for i in range(len(hists))]
00266     if not scale_errors:
00267         scale_errors = [None for i in range(len(hists))]
00268     sumhist = hists[0].Clone(os.path.basename(dest))
00269     sumhist = scale_with_error(sumhist, scales[0], scale_errors[0])
00270     #sumhist.Scale(scales[0])
00271     for i in range(1,len(hists)):
00272         sumhist.Add(scale_with_error(hists[i], scales[i], scale_errors[i]))
00273         #sumhist.Add(hists[i], scales[i])
00274     if dest_path:
00275         outfile.cd()
00276         if not ROOT.gDirectory.GetDirectory(dest_path):
00277             ROOT.gDirectory.mkdir(dest_path)
00278         ROOT.gDirectory.cd(dest_path)
00279     sumhist.Write()
00280     ROOT.gDirectory.cd("/")
00281 
00282 @operator_func
00283 def subtract(outfile, dest, hists):
00284     diffhist = hists[0].Clone(os.path.basename(dest))
00285     for hist in hists[1:]:
00286         diffhist.Add(hist, -1)
00287     diffhist.Write()
00288 
00289 @operator_func
00290 def divide(outfile, dest, numer, denom):
00291     quotient = numer.Clone(os.path.basename(dest))
00292     quotient.Divide(numer, denom)
00293     quotient.Write()
00294 
00295 @operator_func
00296 def bayes_divide(outfile, dest, numer, denom):
00297     quotient = ROOT.TGraphAsymmErrors()
00298     quotient.SetName(os.path.basename(dest))
00299     quotient.BayesDivide(numer, denom)
00300     quotient.Write()
00301 
00302 def main():
00303     parser = argparse.ArgumentParser()
00304     parser.add_argument('filenames', type=str, nargs='+',
00305                        help='root files to process')
00306     parser.add_argument('--dirs', type=str, nargs='+', default=['/'],
00307                         help='target directories in the root files; paths to '
00308                         'histograms will be relative to these')
00309     parser.add_argument('--add', default=[], action='append', nargs='*',
00310                         help='a list of directories or histograms to add')
00311     parser.add_argument('--subtract', default=[], action='append', nargs='*',
00312                         help='a list of directories or histograms to subtract')
00313     parser.add_argument('--divide', default=[], action='append', nargs='*',
00314                         help='2 directories or histograms to divide')
00315     parser.add_argument('--bayes-divide', default=[], action='append', nargs='*',
00316                         help='2 directories or histograms from which to make '
00317                         'an efficiency plot')
00318     args = parser.parse_args()
00319     separators = {'add' : '_plus_',
00320                   'subtract' : '_minus_',
00321                   'divide' : '_div_',
00322                   'bayes_divide' : '_eff_'}
00323 
00324     files = [ROOT.TFile(x, 'read') for x in args.filenames]
00325     outfile = ROOT.TFile('out.root', 'recreate')
00326     dirs = []
00327     for d in args.dirs:
00328         dirs += rootglob(files[0], d)
00329 
00330     if len(files) == 1:
00331         f = files[0]
00332         for thisdir in dirs:
00333             for operation_type, separator in separators.items():
00334                 for arg_set in getattr(args, operation_type):
00335                     paths = [joined(thisdir, x) for x in arg_set]
00336                     if f.GetDirectory(paths[0]):
00337                         destdir = pathdiff(paths, separator)
00338                         for target in [os.path.basename(x) for x in
00339                                        rootglob(f, paths[0] + '/*')]:
00340                             hists = [f.GetDirectory(x).Get(target)
00341                                      for x in paths]
00342                             if not alltrue([x and x.InheritsFrom('TH1')
00343                                             for x in hists]):
00344                                 continue
00345                             dest = joined(destdir, target)
00346                             math_func = globals()[operation_type]
00347                             math_func(outfile, dest, hists)
00348                     else:
00349                         hists = [f.GetDirectory(thisdir).Get(x) for x in paths]
00350                         if not alltrue([x and x.InheritsFrom('TH1') 
00351                                         for x in hists]):
00352                             continue
00353                         dest = pathdiff(paths, separator)
00354                         math_func = globals()[operation_type]
00355                         math_func(outfile, dest, hists)
00356     else:
00357         for operation_type, separator in separators.items():
00358             arg_sets = getattr(args, operation_type)
00359             if arg_sets and arg_sets != [[]]:
00360                 raise ValueError("No arguments to --%s allowed when multiple "
00361                                  "files are specified" % operation_type)
00362             elif arg_sets:
00363                 if 'divide' in operation_type and len(files) != 2:
00364                     raise ValueError("Exactly 2 files are expected with --%s; "
00365                                      "%i given" % (operation_type, len(files)))
00366                 for path, folders, objects in walk_rootfile(files[0]):
00367                     for obj in objects:
00368                         hists = [x.GetDirectory(path).Get(obj) for x in files]
00369                         if not alltrue([x and x.InheritsFrom('TH1') 
00370                                         for x in hists]):
00371                             continue
00372                         math_func = globals()[operation_type]
00373                         math_func(outfile, joined(path, obj), hists)
00374 
00375     outfile.Close()
00376 
00377 if __name__ == '__main__':
00378     import doctest
00379     doctest.testmod()