CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
rootmath.py
Go to the documentation of this file.
1 """
2 rootmath description
3 """
4 
5 __license__ = '''\
6 Copyright (c) 2009-2010 Jeff Klukas <klukas@wisc.edu>
7 
8 Permission is hereby granted, free of charge, to any person obtaining a copy
9 of this software and associated documentation files (the "Software"), to deal
10 in the Software without restriction, including without limitation the rights
11 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 copies of the Software, and to permit persons to whom the Software is
13 furnished to do so, subject to the following conditions:
14 
15 The above copyright notice and this permission notice shall be included in
16 all copies or substantial portions of the Software.
17 
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 THE SOFTWARE.
25 '''
26 
27 
28 ##############################################################################
29 ######## Import python libraries #############################################
30 
31 import sys
32 import shutil
33 import math
34 import os
35 import re
36 import tempfile
37 import copy
38 import fnmatch
39 import argparse
40 from os.path import join as joined
41 from utilities import rootglob, loadROOT
42 
43 ROOT = loadROOT()
44 
45 ##############################################################################
46 ######## Define globals ######################################################
47 
48 from version import __version__ # version number
49 
50 
51 ##############################################################################
52 ######## Classes #############################################################
53 
54 class Target(object):
55  """Description."""
56  def __init__(self, filename, path='', scale=1., scale_error=None):
57  self.filename = filename
58  self.path = path
59  self.scale = scale
60  self.scale_error = scale_error
61  def __repr__(self):
62  return "%s:%s:%f" % (self.filename, self.path, self.scale)
63 
64 def newadd(outfile, targets, dest_path=""):
65  """Description."""
66  if allsame([x.filename for x in targets]):
67  f = ROOT.TFile(targets[0].filename, 'read')
68  paths = [x.path for x in targets]
69  scales = [x.scale for x in targets]
70  scale_errors = [x.scale_error for x in targets]
71  if f.GetDirectory(paths[0]):
72  destdir = pathdiff2(paths) # What does this do?
73  for h in [os.path.basename(x) for x in
74  rootglob(f, paths[0] + '/*')]:
75  hists = [f.GetDirectory(x).Get(h) for x in paths]
76  if not alltrue([x and x.InheritsFrom('TH1') for x in hists]):
77  continue
78  dest = joined(destdir, h)
79  add(outfile, dest, hists, scales, dest_path, scale_errors=scale_errors)
80  else:
81  hists = [f.Get(x) for x in paths]
82  if alltrue([x and x.InheritsFrom('TH1') for x in hists]):
83  dest = pathdiff2(paths)
84  add(outfile, dest, hists, scales, scale_errors=scale_errors)
85  else:
86  dict_targets = {} # Stores paths and scales, key = filename
87  dict_tfiles = {} # Stores map from filenames to Root.TFile() objects
88  for target in targets:
89  dict_targets.setdefault(target.filename, []).append((target.path, target.scale))
90  if (target.filename not in dict_tfiles):
91  # Only open root files once
92  dict_tfiles[target.filename] = ROOT.TFile(target.filename, 'read')
93  # dict_targets now a dictionary, with keys the filenames, example:
94  # {'fileA.root': [('path0',scale0), ('path1', scale1)],
95  # 'fileB.root': [('path3', scale3)]}
96  f = ROOT.TFile(targets[0].filename, 'read')
97  if f.GetDirectory(targets[0].path):
98  # Create list of histograms to get
99  destdir = '/' # should probably use pathdiff2 somehow
100  histnames = [os.path.basename(x) for x in
101  rootglob(f, targets[0].path + '/*')]
102  f.Close()
103  # For each histogram name found, grab it from
104  # every file & path
105  for histname in histnames:
106  hists = []
107  scales = []
108  for filename in dict_targets:
109  tfile_cur = dict_tfiles[filename]
110  for path, scale in dict_targets[filename]:
111  hists.append(tfile_cur.GetDirectory(path).Get(histname))
112  scales.append(scale)
113  #print "%s:%s:%s:%f" % (filename, path, histname, scale)
114  if not alltrue([x and x.InheritsFrom('TH1') for x in hists]):
115  continue
116  dest = joined(destdir, histname)
117  add(outfile, dest, hists, scales, dest_path)
118  else:
119  print "Code not written yet to add histograms from multiple files"
120  return
121  return
122 
123 
124 ##############################################################################
125 ######## Implementation ######################################################
126 
127 def walk_rootfile(rootfile, path=''):
128  #### Yield (path, folders, objects) for each directory under path.
129  keys = rootfile.GetDirectory(path).GetListOfKeys()
130  folders, objects = [], []
131  for key in keys:
132  name = key.GetName()
133  classname = key.GetClassName()
134  newpath = joined(path, name)
135  dimension = 0
136  if 'TDirectory' in classname:
137  folders.append(name)
138  else:
139  objects.append(name)
140  yield path, folders, objects
141  for folder in folders:
142  for x in walk_rootfile(rootfile, joined(path, folder)):
143  yield x
144 
145 def allsame(iterable):
146  for element in iterable:
147  if element != iterable[0]:
148  return False
149  return True
150 
151 def alltrue(iterable):
152  for element in iterable:
153  if element != True:
154  return False
155  return True
156 
157 def pathdiff(paths, joiner):
158  """
159  Return the appropriate destination for an object.
160 
161  In all cases, the result will be placed in the deepest directory shared by
162  all paths. If the histogram names are the same, the result will be named
163  based on the first directories that they do not share. Otherwise, the
164  result will be named based on the names of the other histograms.
165 
166  >>> pathdiff(['/dirA/dirB/dirX/hist', '/dirA/dirB/dirY/hist'], '_div_')
167  '/dirA/dirB/dirX_div_dirY'
168  >>> pathdiff(['/dirA/hist1', '/dirA/hist2', '/dirA/hist3'], '_plus_')
169  '/dirA/hist1_plus_hist2_plus_hist3'
170  >>> pathdiff(['/hist1', '/dirA/hist2'], '_minus_')
171  '/hist1_minus_hist2'
172  """
173  paths = [x.split('/') for x in paths]
174  dest = '/'
175  for i in range(len(paths[0])):
176  if allsame([p[i] for p in paths]):
177  dest = joined(dest, paths[0][i])
178  else:
179  break
180  name = joiner.join([p[-1] for p in paths])
181  if allsame([p[-1] for p in paths]):
182  for i in range(len(paths[0])):
183  if not allsame([p[i] for p in paths]):
184  name = joiner.join([p[i] for p in paths])
185  return joined(dest, name)
186 
187 def pathdiff2(paths, joiner='__', truncate=False):
188  """
189  Placeholder.
190  """
191  paths = [x.split('/') for x in paths]
192  commonbeginning = ''
193  for i in range(len(paths[0])):
194  if allsame([p[i] for p in paths]):
195  commonbeginning = joined(commonbeginning, paths[0][i])
196  else:
197  break
198  commonending = ''
199  for i in range(-1, -1 * len(paths[0]), -1):
200  if allsame([p[i] for p in paths]):
201  commonending = joined(paths[0][i], commonending)
202  else:
203  break
204  #return commonbeginning, commonending
205  if truncate:
206  return commonending
207  else:
208  return joined(commonbeginning, commonending)
209 
210 def pathdiff3(paths, joiner='__'):
211  """
212  Return the appropriate destination for an object.
213 
214  If the final objects in each path match, then the return value will be the
215  matching part of the paths. Otherwise, the output path will simply be those
216  names joined together with *joiner*. See the examples below.
217 
218  >>> pathdiff3(['/dirA/dirX/hist', '/dirA/dirY/hist'])
219  '/hist'
220  >>> pathdiff3(['/dirA/dirX/dirB/hist', '/dirA/dirY/dirB/hist'])
221  '/dirB/hist'
222  >>> pathdiff3(['/dirA/hist1', '/dirA/hist2', '/dirA/hist3'], '_plus_')
223  '/hist1_plus_hist2_plus_hist3'
224  >>> pathdiff3(['/hist1', '/dirA/hist2'], '_div_')
225  '/hist1_div_hist2'
226  """
227  paths = [x.split('/') for x in paths]
228  if allsame([x[-1] for x in paths]):
229  dest = paths[0][-1]
230  for i in range(-2, min([len(x) for x in paths]) * -1, -1):
231  if allsame([p[i] for p in paths]):
232  dest = joined(paths[0][i], dest)
233  else:
234  break
235  return '/' + dest
236  else:
237  return '/' + joiner.join([x[-1] for x in paths])
238 
240  def newfunc(outfile, dest, hists, scales=None, dest_path="", scale_errors=None):
241  outfile.cd()
242  for d in os.path.dirname(dest).split('/'):
243  if not ROOT.gDirectory.GetDirectory(d):
244  ROOT.gDirectory.mkdir(d)
245  ROOT.gDirectory.cd(d)
246  fn(outfile, dest, hists, scales, dest_path, scale_errors)
247  return newfunc
248 
249 def scale_with_error(hist, scale, scale_error=None):
250  '''Scale a histogram by a scale factor that has an error.
251  This takes into account the scale error to set new error bars.'''
252  hist_new = hist.Clone()
253  if scale_error:
254  for i in range(hist_new.GetNbinsX()+2):
255  hist_new.SetBinContent(i, scale)
256  hist_new.SetBinError(i, scale_error)
257  hist_new.Multiply(hist)
258  else:
259  hist_new.Scale(scale)
260  return hist_new
261 
262 @operator_func
263 def add(outfile, dest, hists, scales=None, dest_path="", scale_errors=None):
264  if not scales:
265  scales = [1. for i in range(len(hists))]
266  if not scale_errors:
267  scale_errors = [None for i in range(len(hists))]
268  sumhist = hists[0].Clone(os.path.basename(dest))
269  sumhist = scale_with_error(sumhist, scales[0], scale_errors[0])
270  #sumhist.Scale(scales[0])
271  for i in range(1,len(hists)):
272  sumhist.Add(scale_with_error(hists[i], scales[i], scale_errors[i]))
273  #sumhist.Add(hists[i], scales[i])
274  if dest_path:
275  outfile.cd()
276  if not ROOT.gDirectory.GetDirectory(dest_path):
277  ROOT.gDirectory.mkdir(dest_path)
278  ROOT.gDirectory.cd(dest_path)
279  sumhist.Write()
280  ROOT.gDirectory.cd("/")
281 
282 @operator_func
283 def subtract(outfile, dest, hists):
284  diffhist = hists[0].Clone(os.path.basename(dest))
285  for hist in hists[1:]:
286  diffhist.Add(hist, -1)
287  diffhist.Write()
288 
289 @operator_func
290 def divide(outfile, dest, numer, denom):
291  quotient = numer.Clone(os.path.basename(dest))
292  quotient.Divide(numer, denom)
293  quotient.Write()
294 
295 @operator_func
296 def bayes_divide(outfile, dest, numer, denom):
297  quotient = ROOT.TGraphAsymmErrors()
298  quotient.SetName(os.path.basename(dest))
299  quotient.BayesDivide(numer, denom)
300  quotient.Write()
301 
302 def main():
303  parser = argparse.ArgumentParser()
304  parser.add_argument('filenames', type=str, nargs='+',
305  help='root files to process')
306  parser.add_argument('--dirs', type=str, nargs='+', default=['/'],
307  help='target directories in the root files; paths to '
308  'histograms will be relative to these')
309  parser.add_argument('--add', default=[], action='append', nargs='*',
310  help='a list of directories or histograms to add')
311  parser.add_argument('--subtract', default=[], action='append', nargs='*',
312  help='a list of directories or histograms to subtract')
313  parser.add_argument('--divide', default=[], action='append', nargs='*',
314  help='2 directories or histograms to divide')
315  parser.add_argument('--bayes-divide', default=[], action='append', nargs='*',
316  help='2 directories or histograms from which to make '
317  'an efficiency plot')
318  args = parser.parse_args()
319  separators = {'add' : '_plus_',
320  'subtract' : '_minus_',
321  'divide' : '_div_',
322  'bayes_divide' : '_eff_'}
323 
324  files = [ROOT.TFile(x, 'read') for x in args.filenames]
325  outfile = ROOT.TFile('out.root', 'recreate')
326  dirs = []
327  for d in args.dirs:
328  dirs += rootglob(files[0], d)
329 
330  if len(files) == 1:
331  f = files[0]
332  for thisdir in dirs:
333  for operation_type, separator in separators.items():
334  for arg_set in getattr(args, operation_type):
335  paths = [joined(thisdir, x) for x in arg_set]
336  if f.GetDirectory(paths[0]):
337  destdir = pathdiff(paths, separator)
338  for target in [os.path.basename(x) for x in
339  rootglob(f, paths[0] + '/*')]:
340  hists = [f.GetDirectory(x).Get(target)
341  for x in paths]
342  if not alltrue([x and x.InheritsFrom('TH1')
343  for x in hists]):
344  continue
345  dest = joined(destdir, target)
346  math_func = globals()[operation_type]
347  math_func(outfile, dest, hists)
348  else:
349  hists = [f.GetDirectory(thisdir).Get(x) for x in paths]
350  if not alltrue([x and x.InheritsFrom('TH1')
351  for x in hists]):
352  continue
353  dest = pathdiff(paths, separator)
354  math_func = globals()[operation_type]
355  math_func(outfile, dest, hists)
356  else:
357  for operation_type, separator in separators.items():
358  arg_sets = getattr(args, operation_type)
359  if arg_sets and arg_sets != [[]]:
360  raise ValueError("No arguments to --%s allowed when multiple "
361  "files are specified" % operation_type)
362  elif arg_sets:
363  if 'divide' in operation_type and len(files) != 2:
364  raise ValueError("Exactly 2 files are expected with --%s; "
365  "%i given" % (operation_type, len(files)))
366  for path, folders, objects in walk_rootfile(files[0]):
367  for obj in objects:
368  hists = [x.GetDirectory(path).Get(obj) for x in files]
369  if not alltrue([x and x.InheritsFrom('TH1')
370  for x in hists]):
371  continue
372  math_func = globals()[operation_type]
373  math_func(outfile, joined(path, obj), hists)
374 
375  outfile.Close()
376 
377 if __name__ == '__main__':
378  import doctest
379  doctest.testmod()
def walk_rootfile
Implementation ######################################################.
Definition: rootmath.py:127
def loadROOT
Define additional helping functions.
Definition: utilities.py:433
Classes #############################################################.
Definition: rootmath.py:54
list object
Definition: dbtoconf.py:77
double split
Definition: MVATrainer.cc:139