CMS 3D CMS Logo

rootmath.py
Go to the documentation of this file.
1 """
2 rootmath description
3 """
4 from __future__ import absolute_import
5 from __future__ import print_function
6 
7 __license__ = '''\
8 Copyright (c) 2009-2010 Jeff Klukas <klukas@wisc.edu>
9 
10 Permission is hereby granted, free of charge, to any person obtaining a copy
11 of this software and associated documentation files (the "Software"), to deal
12 in the Software without restriction, including without limitation the rights
13 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 copies of the Software, and to permit persons to whom the Software is
15 furnished to do so, subject to the following conditions:
16 
17 The above copyright notice and this permission notice shall be included in
18 all copies or substantial portions of the Software.
19 
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 THE SOFTWARE.
27 '''
28 
29 
30 ##############################################################################
31 ######## Import python libraries #############################################
32 
33 import sys
34 import shutil
35 import math
36 import os
37 import re
38 import tempfile
39 import copy
40 import fnmatch
41 from . import argparse
42 from os.path import join as joined
43 from .utilities import rootglob, loadROOT
44 
45 ROOT = loadROOT()
46 
47 ##############################################################################
48 ######## Define globals ######################################################
49 
50 from .version import __version__ # version number
51 
52 
53 ##############################################################################
54 ######## Classes #############################################################
55 
56 class Target(object):
57  """Description."""
58  def __init__(self, filename, path='', scale=1., scale_error=None):
59  self.filename = filename
60  self.path = path
61  self.scale = scale
62  self.scale_error = scale_error
63  def __repr__(self):
64  return "%s:%s:%f" % (self.filename, self.path, self.scale)
65 
66 def newadd(outfile, targets, dest_path=""):
67  """Description."""
68  if allsame([x.filename for x in targets]):
69  f = ROOT.TFile(targets[0].filename, 'read')
70  paths = [x.path for x in targets]
71  scales = [x.scale for x in targets]
72  scale_errors = [x.scale_error for x in targets]
73  if f.GetDirectory(paths[0]):
74  destdir = pathdiff2(paths) # What does this do?
75  for h in [os.path.basename(x) for x in
76  rootglob(f, paths[0] + '/*')]:
77  hists = [f.GetDirectory(x).Get(h) for x in paths]
78  if not alltrue([x and x.InheritsFrom('TH1') for x in hists]):
79  continue
80  dest = joined(destdir, h)
81  add(outfile, dest, hists, scales, dest_path, scale_errors=scale_errors)
82  else:
83  hists = [f.Get(x) for x in paths]
84  if alltrue([x and x.InheritsFrom('TH1') for x in hists]):
85  dest = pathdiff2(paths)
86  add(outfile, dest, hists, scales, scale_errors=scale_errors)
87  else:
88  dict_targets = {} # Stores paths and scales, key = filename
89  dict_tfiles = {} # Stores map from filenames to Root.TFile() objects
90  for target in targets:
91  dict_targets.setdefault(target.filename, []).append((target.path, target.scale))
92  if (target.filename not in dict_tfiles):
93  # Only open root files once
94  dict_tfiles[target.filename] = ROOT.TFile(target.filename, 'read')
95  # dict_targets now a dictionary, with keys the filenames, example:
96  # {'fileA.root': [('path0',scale0), ('path1', scale1)],
97  # 'fileB.root': [('path3', scale3)]}
98  f = ROOT.TFile(targets[0].filename, 'read')
99  if f.GetDirectory(targets[0].path):
100  # Create list of histograms to get
101  destdir = '/' # should probably use pathdiff2 somehow
102  histnames = [os.path.basename(x) for x in
103  rootglob(f, targets[0].path + '/*')]
104  f.Close()
105  # For each histogram name found, grab it from
106  # every file & path
107  for histname in histnames:
108  hists = []
109  scales = []
110  for filename in dict_targets:
111  tfile_cur = dict_tfiles[filename]
112  for path, scale in dict_targets[filename]:
113  hists.append(tfile_cur.GetDirectory(path).Get(histname))
114  scales.append(scale)
115  #print "%s:%s:%s:%f" % (filename, path, histname, scale)
116  if not alltrue([x and x.InheritsFrom('TH1') for x in hists]):
117  continue
118  dest = joined(destdir, histname)
119  add(outfile, dest, hists, scales, dest_path)
120  else:
121  print("Code not written yet to add histograms from multiple files")
122  return
123  return
124 
125 
126 ##############################################################################
127 ######## Implementation ######################################################
128 
129 def walk_rootfile(rootfile, path=''):
130  #### Yield (path, folders, objects) for each directory under path.
131  keys = rootfile.GetDirectory(path).GetListOfKeys()
132  folders, objects = [], []
133  for key in keys:
134  name = key.GetName()
135  classname = key.GetClassName()
136  newpath = joined(path, name)
137  dimension = 0
138  if 'TDirectory' in classname:
139  folders.append(name)
140  else:
141  objects.append(name)
142  yield path, folders, objects
143  for folder in folders:
144  for x in walk_rootfile(rootfile, joined(path, folder)):
145  yield x
146 
147 def allsame(iterable):
148  for element in iterable:
149  if element != iterable[0]:
150  return False
151  return True
152 
153 def alltrue(iterable):
154  for element in iterable:
155  if element != True:
156  return False
157  return True
158 
159 def pathdiff(paths, joiner):
160  """
161  Return the appropriate destination for an object.
162 
163  In all cases, the result will be placed in the deepest directory shared by
164  all paths. If the histogram names are the same, the result will be named
165  based on the first directories that they do not share. Otherwise, the
166  result will be named based on the names of the other histograms.
167 
168  >>> pathdiff(['/dirA/dirB/dirX/hist', '/dirA/dirB/dirY/hist'], '_div_')
169  '/dirA/dirB/dirX_div_dirY'
170  >>> pathdiff(['/dirA/hist1', '/dirA/hist2', '/dirA/hist3'], '_plus_')
171  '/dirA/hist1_plus_hist2_plus_hist3'
172  >>> pathdiff(['/hist1', '/dirA/hist2'], '_minus_')
173  '/hist1_minus_hist2'
174  """
175  paths = [x.split('/') for x in paths]
176  dest = '/'
177  for i in range(len(paths[0])):
178  if allsame([p[i] for p in paths]):
179  dest = joined(dest, paths[0][i])
180  else:
181  break
182  name = joiner.join([p[-1] for p in paths])
183  if allsame([p[-1] for p in paths]):
184  for i in range(len(paths[0])):
185  if not allsame([p[i] for p in paths]):
186  name = joiner.join([p[i] for p in paths])
187  return joined(dest, name)
188 
189 def pathdiff2(paths, joiner='__', truncate=False):
190  """
191  Placeholder.
192  """
193  paths = [x.split('/') for x in paths]
194  commonbeginning = ''
195  for i in range(len(paths[0])):
196  if allsame([p[i] for p in paths]):
197  commonbeginning = joined(commonbeginning, paths[0][i])
198  else:
199  break
200  commonending = ''
201  for i in range(-1, -1 * len(paths[0]), -1):
202  if allsame([p[i] for p in paths]):
203  commonending = joined(paths[0][i], commonending)
204  else:
205  break
206  #return commonbeginning, commonending
207  if truncate:
208  return commonending
209  else:
210  return joined(commonbeginning, commonending)
211 
212 def pathdiff3(paths, joiner='__'):
213  """
214  Return the appropriate destination for an object.
215 
216  If the final objects in each path match, then the return value will be the
217  matching part of the paths. Otherwise, the output path will simply be those
218  names joined together with *joiner*. See the examples below.
219 
220  >>> pathdiff3(['/dirA/dirX/hist', '/dirA/dirY/hist'])
221  '/hist'
222  >>> pathdiff3(['/dirA/dirX/dirB/hist', '/dirA/dirY/dirB/hist'])
223  '/dirB/hist'
224  >>> pathdiff3(['/dirA/hist1', '/dirA/hist2', '/dirA/hist3'], '_plus_')
225  '/hist1_plus_hist2_plus_hist3'
226  >>> pathdiff3(['/hist1', '/dirA/hist2'], '_div_')
227  '/hist1_div_hist2'
228  """
229  paths = [x.split('/') for x in paths]
230  if allsame([x[-1] for x in paths]):
231  dest = paths[0][-1]
232  for i in range(-2, min([len(x) for x in paths]) * -1, -1):
233  if allsame([p[i] for p in paths]):
234  dest = joined(paths[0][i], dest)
235  else:
236  break
237  return '/' + dest
238  else:
239  return '/' + joiner.join([x[-1] for x in paths])
240 
242  def newfunc(outfile, dest, hists, scales=None, dest_path="", scale_errors=None):
243  outfile.cd()
244  for d in os.path.dirname(dest).split('/'):
245  if not ROOT.gDirectory.GetDirectory(d):
246  ROOT.gDirectory.mkdir(d)
247  ROOT.gDirectory.cd(d)
248  fn(outfile, dest, hists, scales, dest_path, scale_errors)
249  return newfunc
250 
251 def scale_with_error(hist, scale, scale_error=None):
252  '''Scale a histogram by a scale factor that has an error.
253  This takes into account the scale error to set new error bars.'''
254  hist_new = hist.Clone()
255  if scale_error:
256  for i in range(hist_new.GetNbinsX()+2):
257  hist_new.SetBinContent(i, scale)
258  hist_new.SetBinError(i, scale_error)
259  hist_new.Multiply(hist)
260  else:
261  hist_new.Scale(scale)
262  return hist_new
263 
264 @operator_func
265 def add(outfile, dest, hists, scales=None, dest_path="", scale_errors=None):
266  if not scales:
267  scales = [1. for i in range(len(hists))]
268  if not scale_errors:
269  scale_errors = [None for i in range(len(hists))]
270  sumhist = hists[0].Clone(os.path.basename(dest))
271  sumhist = scale_with_error(sumhist, scales[0], scale_errors[0])
272  #sumhist.Scale(scales[0])
273  for i in range(1,len(hists)):
274  sumhist.Add(scale_with_error(hists[i], scales[i], scale_errors[i]))
275  #sumhist.Add(hists[i], scales[i])
276  if dest_path:
277  outfile.cd()
278  if not ROOT.gDirectory.GetDirectory(dest_path):
279  ROOT.gDirectory.mkdir(dest_path)
280  ROOT.gDirectory.cd(dest_path)
281  sumhist.Write()
282  ROOT.gDirectory.cd("/")
283 
284 @operator_func
285 def subtract(outfile, dest, hists):
286  diffhist = hists[0].Clone(os.path.basename(dest))
287  for hist in hists[1:]:
288  diffhist.Add(hist, -1)
289  diffhist.Write()
290 
291 @operator_func
292 def divide(outfile, dest, numer, denom):
293  quotient = numer.Clone(os.path.basename(dest))
294  quotient.Divide(numer, denom)
295  quotient.Write()
296 
297 @operator_func
298 def bayes_divide(outfile, dest, numer, denom):
299  quotient = ROOT.TGraphAsymmErrors()
300  quotient.SetName(os.path.basename(dest))
301  quotient.BayesDivide(numer, denom)
302  quotient.Write()
303 
304 def main():
305  parser = argparse.ArgumentParser()
306  parser.add_argument('filenames', type=str, nargs='+',
307  help='root files to process')
308  parser.add_argument('--dirs', type=str, nargs='+', default=['/'],
309  help='target directories in the root files; paths to '
310  'histograms will be relative to these')
311  parser.add_argument('--add', default=[], action='append', nargs='*',
312  help='a list of directories or histograms to add')
313  parser.add_argument('--subtract', default=[], action='append', nargs='*',
314  help='a list of directories or histograms to subtract')
315  parser.add_argument('--divide', default=[], action='append', nargs='*',
316  help='2 directories or histograms to divide')
317  parser.add_argument('--bayes-divide', default=[], action='append', nargs='*',
318  help='2 directories or histograms from which to make '
319  'an efficiency plot')
320  args = parser.parse_args()
321  separators = {'add' : '_plus_',
322  'subtract' : '_minus_',
323  'divide' : '_div_',
324  'bayes_divide' : '_eff_'}
325 
326  files = [ROOT.TFile(x, 'read') for x in args.filenames]
327  outfile = ROOT.TFile('out.root', 'recreate')
328  dirs = []
329  for d in args.dirs:
330  dirs += rootglob(files[0], d)
331 
332  if len(files) == 1:
333  f = files[0]
334  for thisdir in dirs:
335  for operation_type, separator in separators.items():
336  for arg_set in getattr(args, operation_type):
337  paths = [joined(thisdir, x) for x in arg_set]
338  if f.GetDirectory(paths[0]):
339  destdir = pathdiff(paths, separator)
340  for target in [os.path.basename(x) for x in
341  rootglob(f, paths[0] + '/*')]:
342  hists = [f.GetDirectory(x).Get(target)
343  for x in paths]
344  if not alltrue([x and x.InheritsFrom('TH1')
345  for x in hists]):
346  continue
347  dest = joined(destdir, target)
348  math_func = globals()[operation_type]
349  math_func(outfile, dest, hists)
350  else:
351  hists = [f.GetDirectory(thisdir).Get(x) for x in paths]
352  if not alltrue([x and x.InheritsFrom('TH1')
353  for x in hists]):
354  continue
355  dest = pathdiff(paths, separator)
356  math_func = globals()[operation_type]
357  math_func(outfile, dest, hists)
358  else:
359  for operation_type, separator in separators.items():
360  arg_sets = getattr(args, operation_type)
361  if arg_sets and arg_sets != [[]]:
362  raise ValueError("No arguments to --%s allowed when multiple "
363  "files are specified" % operation_type)
364  elif arg_sets:
365  if 'divide' in operation_type and len(files) != 2:
366  raise ValueError("Exactly 2 files are expected with --%s; "
367  "%i given" % (operation_type, len(files)))
368  for path, folders, objects in walk_rootfile(files[0]):
369  for obj in objects:
370  hists = [x.GetDirectory(path).Get(obj) for x in files]
371  if not alltrue([x and x.InheritsFrom('TH1')
372  for x in hists]):
373  continue
374  math_func = globals()[operation_type]
375  math_func(outfile, joined(path, obj), hists)
376 
377  outfile.Close()
378 
379 if __name__ == '__main__':
380  import doctest
381  doctest.testmod()
def loadROOT(batch=True)
Define additional helping functions.
Definition: utilities.py:434
def pathdiff2(paths, joiner='__', truncate=False)
Definition: rootmath.py:189
def bayes_divide(outfile, dest, numer, denom)
Definition: rootmath.py:298
def pathdiff3(paths, joiner='__')
Definition: rootmath.py:212
def divide(outfile, dest, numer, denom)
Definition: rootmath.py:292
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:65
def pathdiff(paths, joiner)
Definition: rootmath.py:159
T min(T a, T b)
Definition: MathUtil.h:58
def scale_with_error(hist, scale, scale_error=None)
Definition: rootmath.py:251
def __init__(self, filename, path='', scale=1., scale_error=None)
Definition: rootmath.py:58
def subtract(outfile, dest, hists)
Definition: rootmath.py:285
def add(outfile, dest, hists, scales=None, dest_path="", scale_errors=None)
Definition: rootmath.py:265
def walk_rootfile(rootfile, path='')
Implementation ######################################################.
Definition: rootmath.py:129
Classes #############################################################.
Definition: rootmath.py:56
def rootglob(tdirectory, pathname)
Definition: utilities.py:558
def newadd(outfile, targets, dest_path="")
Definition: rootmath.py:66
def allsame(iterable)
Definition: rootmath.py:147
def alltrue(iterable)
Definition: rootmath.py:153
double split
Definition: MVATrainer.cc:139