CMS 3D CMS Logo

rootmath.py
Go to the documentation of this file.
1 """
2 rootmath description
3 """
4 from __future__ import absolute_import
5 
6 __license__ = '''\
7 Copyright (c) 2009-2010 Jeff Klukas <klukas@wisc.edu>
8 
9 Permission is hereby granted, free of charge, to any person obtaining a copy
10 of this software and associated documentation files (the "Software"), to deal
11 in the Software without restriction, including without limitation the rights
12 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13 copies of the Software, and to permit persons to whom the Software is
14 furnished to do so, subject to the following conditions:
15 
16 The above copyright notice and this permission notice shall be included in
17 all copies or substantial portions of the Software.
18 
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 THE SOFTWARE.
26 '''
27 
28 
29 ##############################################################################
30 ######## Import python libraries #############################################
31 
32 import sys
33 import shutil
34 import math
35 import os
36 import re
37 import tempfile
38 import copy
39 import fnmatch
40 from . import argparse
41 from os.path import join as joined
42 from .utilities import rootglob, loadROOT
43 
44 ROOT = loadROOT()
45 
46 ##############################################################################
47 ######## Define globals ######################################################
48 
49 from .version import __version__ # version number
50 
51 
52 ##############################################################################
53 ######## Classes #############################################################
54 
55 class Target(object):
56  """Description."""
57  def __init__(self, filename, path='', scale=1., scale_error=None):
58  self.filename = filename
59  self.path = path
60  self.scale = scale
61  self.scale_error = scale_error
62  def __repr__(self):
63  return "%s:%s:%f" % (self.filename, self.path, self.scale)
64 
65 def newadd(outfile, targets, dest_path=""):
66  """Description."""
67  if allsame([x.filename for x in targets]):
68  f = ROOT.TFile(targets[0].filename, 'read')
69  paths = [x.path for x in targets]
70  scales = [x.scale for x in targets]
71  scale_errors = [x.scale_error for x in targets]
72  if f.GetDirectory(paths[0]):
73  destdir = pathdiff2(paths) # What does this do?
74  for h in [os.path.basename(x) for x in
75  rootglob(f, paths[0] + '/*')]:
76  hists = [f.GetDirectory(x).Get(h) for x in paths]
77  if not alltrue([x and x.InheritsFrom('TH1') for x in hists]):
78  continue
79  dest = joined(destdir, h)
80  add(outfile, dest, hists, scales, dest_path, scale_errors=scale_errors)
81  else:
82  hists = [f.Get(x) for x in paths]
83  if alltrue([x and x.InheritsFrom('TH1') for x in hists]):
84  dest = pathdiff2(paths)
85  add(outfile, dest, hists, scales, scale_errors=scale_errors)
86  else:
87  dict_targets = {} # Stores paths and scales, key = filename
88  dict_tfiles = {} # Stores map from filenames to Root.TFile() objects
89  for target in targets:
90  dict_targets.setdefault(target.filename, []).append((target.path, target.scale))
91  if (target.filename not in dict_tfiles):
92  # Only open root files once
93  dict_tfiles[target.filename] = ROOT.TFile(target.filename, 'read')
94  # dict_targets now a dictionary, with keys the filenames, example:
95  # {'fileA.root': [('path0',scale0), ('path1', scale1)],
96  # 'fileB.root': [('path3', scale3)]}
97  f = ROOT.TFile(targets[0].filename, 'read')
98  if f.GetDirectory(targets[0].path):
99  # Create list of histograms to get
100  destdir = '/' # should probably use pathdiff2 somehow
101  histnames = [os.path.basename(x) for x in
102  rootglob(f, targets[0].path + '/*')]
103  f.Close()
104  # For each histogram name found, grab it from
105  # every file & path
106  for histname in histnames:
107  hists = []
108  scales = []
109  for filename in dict_targets:
110  tfile_cur = dict_tfiles[filename]
111  for path, scale in dict_targets[filename]:
112  hists.append(tfile_cur.GetDirectory(path).Get(histname))
113  scales.append(scale)
114  #print "%s:%s:%s:%f" % (filename, path, histname, scale)
115  if not alltrue([x and x.InheritsFrom('TH1') for x in hists]):
116  continue
117  dest = joined(destdir, histname)
118  add(outfile, dest, hists, scales, dest_path)
119  else:
120  print "Code not written yet to add histograms from multiple files"
121  return
122  return
123 
124 
125 ##############################################################################
126 ######## Implementation ######################################################
127 
128 def walk_rootfile(rootfile, path=''):
129  #### Yield (path, folders, objects) for each directory under path.
130  keys = rootfile.GetDirectory(path).GetListOfKeys()
131  folders, objects = [], []
132  for key in keys:
133  name = key.GetName()
134  classname = key.GetClassName()
135  newpath = joined(path, name)
136  dimension = 0
137  if 'TDirectory' in classname:
138  folders.append(name)
139  else:
140  objects.append(name)
141  yield path, folders, objects
142  for folder in folders:
143  for x in walk_rootfile(rootfile, joined(path, folder)):
144  yield x
145 
146 def allsame(iterable):
147  for element in iterable:
148  if element != iterable[0]:
149  return False
150  return True
151 
152 def alltrue(iterable):
153  for element in iterable:
154  if element != True:
155  return False
156  return True
157 
158 def pathdiff(paths, joiner):
159  """
160  Return the appropriate destination for an object.
161 
162  In all cases, the result will be placed in the deepest directory shared by
163  all paths. If the histogram names are the same, the result will be named
164  based on the first directories that they do not share. Otherwise, the
165  result will be named based on the names of the other histograms.
166 
167  >>> pathdiff(['/dirA/dirB/dirX/hist', '/dirA/dirB/dirY/hist'], '_div_')
168  '/dirA/dirB/dirX_div_dirY'
169  >>> pathdiff(['/dirA/hist1', '/dirA/hist2', '/dirA/hist3'], '_plus_')
170  '/dirA/hist1_plus_hist2_plus_hist3'
171  >>> pathdiff(['/hist1', '/dirA/hist2'], '_minus_')
172  '/hist1_minus_hist2'
173  """
174  paths = [x.split('/') for x in paths]
175  dest = '/'
176  for i in range(len(paths[0])):
177  if allsame([p[i] for p in paths]):
178  dest = joined(dest, paths[0][i])
179  else:
180  break
181  name = joiner.join([p[-1] for p in paths])
182  if allsame([p[-1] for p in paths]):
183  for i in range(len(paths[0])):
184  if not allsame([p[i] for p in paths]):
185  name = joiner.join([p[i] for p in paths])
186  return joined(dest, name)
187 
188 def pathdiff2(paths, joiner='__', truncate=False):
189  """
190  Placeholder.
191  """
192  paths = [x.split('/') for x in paths]
193  commonbeginning = ''
194  for i in range(len(paths[0])):
195  if allsame([p[i] for p in paths]):
196  commonbeginning = joined(commonbeginning, paths[0][i])
197  else:
198  break
199  commonending = ''
200  for i in range(-1, -1 * len(paths[0]), -1):
201  if allsame([p[i] for p in paths]):
202  commonending = joined(paths[0][i], commonending)
203  else:
204  break
205  #return commonbeginning, commonending
206  if truncate:
207  return commonending
208  else:
209  return joined(commonbeginning, commonending)
210 
211 def pathdiff3(paths, joiner='__'):
212  """
213  Return the appropriate destination for an object.
214 
215  If the final objects in each path match, then the return value will be the
216  matching part of the paths. Otherwise, the output path will simply be those
217  names joined together with *joiner*. See the examples below.
218 
219  >>> pathdiff3(['/dirA/dirX/hist', '/dirA/dirY/hist'])
220  '/hist'
221  >>> pathdiff3(['/dirA/dirX/dirB/hist', '/dirA/dirY/dirB/hist'])
222  '/dirB/hist'
223  >>> pathdiff3(['/dirA/hist1', '/dirA/hist2', '/dirA/hist3'], '_plus_')
224  '/hist1_plus_hist2_plus_hist3'
225  >>> pathdiff3(['/hist1', '/dirA/hist2'], '_div_')
226  '/hist1_div_hist2'
227  """
228  paths = [x.split('/') for x in paths]
229  if allsame([x[-1] for x in paths]):
230  dest = paths[0][-1]
231  for i in range(-2, min([len(x) for x in paths]) * -1, -1):
232  if allsame([p[i] for p in paths]):
233  dest = joined(paths[0][i], dest)
234  else:
235  break
236  return '/' + dest
237  else:
238  return '/' + joiner.join([x[-1] for x in paths])
239 
241  def newfunc(outfile, dest, hists, scales=None, dest_path="", scale_errors=None):
242  outfile.cd()
243  for d in os.path.dirname(dest).split('/'):
244  if not ROOT.gDirectory.GetDirectory(d):
245  ROOT.gDirectory.mkdir(d)
246  ROOT.gDirectory.cd(d)
247  fn(outfile, dest, hists, scales, dest_path, scale_errors)
248  return newfunc
249 
250 def scale_with_error(hist, scale, scale_error=None):
251  '''Scale a histogram by a scale factor that has an error.
252  This takes into account the scale error to set new error bars.'''
253  hist_new = hist.Clone()
254  if scale_error:
255  for i in range(hist_new.GetNbinsX()+2):
256  hist_new.SetBinContent(i, scale)
257  hist_new.SetBinError(i, scale_error)
258  hist_new.Multiply(hist)
259  else:
260  hist_new.Scale(scale)
261  return hist_new
262 
263 @operator_func
264 def add(outfile, dest, hists, scales=None, dest_path="", scale_errors=None):
265  if not scales:
266  scales = [1. for i in range(len(hists))]
267  if not scale_errors:
268  scale_errors = [None for i in range(len(hists))]
269  sumhist = hists[0].Clone(os.path.basename(dest))
270  sumhist = scale_with_error(sumhist, scales[0], scale_errors[0])
271  #sumhist.Scale(scales[0])
272  for i in range(1,len(hists)):
273  sumhist.Add(scale_with_error(hists[i], scales[i], scale_errors[i]))
274  #sumhist.Add(hists[i], scales[i])
275  if dest_path:
276  outfile.cd()
277  if not ROOT.gDirectory.GetDirectory(dest_path):
278  ROOT.gDirectory.mkdir(dest_path)
279  ROOT.gDirectory.cd(dest_path)
280  sumhist.Write()
281  ROOT.gDirectory.cd("/")
282 
283 @operator_func
284 def subtract(outfile, dest, hists):
285  diffhist = hists[0].Clone(os.path.basename(dest))
286  for hist in hists[1:]:
287  diffhist.Add(hist, -1)
288  diffhist.Write()
289 
290 @operator_func
291 def divide(outfile, dest, numer, denom):
292  quotient = numer.Clone(os.path.basename(dest))
293  quotient.Divide(numer, denom)
294  quotient.Write()
295 
296 @operator_func
297 def bayes_divide(outfile, dest, numer, denom):
298  quotient = ROOT.TGraphAsymmErrors()
299  quotient.SetName(os.path.basename(dest))
300  quotient.BayesDivide(numer, denom)
301  quotient.Write()
302 
303 def main():
304  parser = argparse.ArgumentParser()
305  parser.add_argument('filenames', type=str, nargs='+',
306  help='root files to process')
307  parser.add_argument('--dirs', type=str, nargs='+', default=['/'],
308  help='target directories in the root files; paths to '
309  'histograms will be relative to these')
310  parser.add_argument('--add', default=[], action='append', nargs='*',
311  help='a list of directories or histograms to add')
312  parser.add_argument('--subtract', default=[], action='append', nargs='*',
313  help='a list of directories or histograms to subtract')
314  parser.add_argument('--divide', default=[], action='append', nargs='*',
315  help='2 directories or histograms to divide')
316  parser.add_argument('--bayes-divide', default=[], action='append', nargs='*',
317  help='2 directories or histograms from which to make '
318  'an efficiency plot')
319  args = parser.parse_args()
320  separators = {'add' : '_plus_',
321  'subtract' : '_minus_',
322  'divide' : '_div_',
323  'bayes_divide' : '_eff_'}
324 
325  files = [ROOT.TFile(x, 'read') for x in args.filenames]
326  outfile = ROOT.TFile('out.root', 'recreate')
327  dirs = []
328  for d in args.dirs:
329  dirs += rootglob(files[0], d)
330 
331  if len(files) == 1:
332  f = files[0]
333  for thisdir in dirs:
334  for operation_type, separator in separators.items():
335  for arg_set in getattr(args, operation_type):
336  paths = [joined(thisdir, x) for x in arg_set]
337  if f.GetDirectory(paths[0]):
338  destdir = pathdiff(paths, separator)
339  for target in [os.path.basename(x) for x in
340  rootglob(f, paths[0] + '/*')]:
341  hists = [f.GetDirectory(x).Get(target)
342  for x in paths]
343  if not alltrue([x and x.InheritsFrom('TH1')
344  for x in hists]):
345  continue
346  dest = joined(destdir, target)
347  math_func = globals()[operation_type]
348  math_func(outfile, dest, hists)
349  else:
350  hists = [f.GetDirectory(thisdir).Get(x) for x in paths]
351  if not alltrue([x and x.InheritsFrom('TH1')
352  for x in hists]):
353  continue
354  dest = pathdiff(paths, separator)
355  math_func = globals()[operation_type]
356  math_func(outfile, dest, hists)
357  else:
358  for operation_type, separator in separators.items():
359  arg_sets = getattr(args, operation_type)
360  if arg_sets and arg_sets != [[]]:
361  raise ValueError("No arguments to --%s allowed when multiple "
362  "files are specified" % operation_type)
363  elif arg_sets:
364  if 'divide' in operation_type and len(files) != 2:
365  raise ValueError("Exactly 2 files are expected with --%s; "
366  "%i given" % (operation_type, len(files)))
367  for path, folders, objects in walk_rootfile(files[0]):
368  for obj in objects:
369  hists = [x.GetDirectory(path).Get(obj) for x in files]
370  if not alltrue([x and x.InheritsFrom('TH1')
371  for x in hists]):
372  continue
373  math_func = globals()[operation_type]
374  math_func(outfile, joined(path, obj), hists)
375 
376  outfile.Close()
377 
378 if __name__ == '__main__':
379  import doctest
380  doctest.testmod()
def loadROOT(batch=True)
Define additional helping functions.
Definition: utilities.py:433
def pathdiff2(paths, joiner='__', truncate=False)
Definition: rootmath.py:188
def bayes_divide(outfile, dest, numer, denom)
Definition: rootmath.py:297
def pathdiff3(paths, joiner='__')
Definition: rootmath.py:211
def divide(outfile, dest, numer, denom)
Definition: rootmath.py:291
def pathdiff(paths, joiner)
Definition: rootmath.py:158
T min(T a, T b)
Definition: MathUtil.h:58
def scale_with_error(hist, scale, scale_error=None)
Definition: rootmath.py:250
def __init__(self, filename, path='', scale=1., scale_error=None)
Definition: rootmath.py:57
def subtract(outfile, dest, hists)
Definition: rootmath.py:284
def add(outfile, dest, hists, scales=None, dest_path="", scale_errors=None)
Definition: rootmath.py:264
def walk_rootfile(rootfile, path='')
Implementation ######################################################.
Definition: rootmath.py:128
Classes #############################################################.
Definition: rootmath.py:55
def rootglob(tdirectory, pathname)
Definition: utilities.py:557
def newadd(outfile, targets, dest_path="")
Definition: rootmath.py:65
def allsame(iterable)
Definition: rootmath.py:146
def alltrue(iterable)
Definition: rootmath.py:152
double split
Definition: MVATrainer.cc:139