CMS 3D CMS Logo

heppy_hadd.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 # Copyright (C) 2014 Colin Bernet
3 # https://github.com/cbernet/heppy/blob/master/LICENSE
4 
5 from __future__ import print_function
6 import os
7 import pprint
8 import pickle
9 import shutil
10 import six
11 
12 MAX_ARG_STRLEN = 131072
13 
14 def haddPck(file, odir, idirs):
15  '''add pck files in directories idirs to a directory outdir.
16  All dirs in idirs must have the same subdirectory structure.
17  Each pickle file will be opened, and the corresponding objects added to a destination pickle in odir.
18  '''
19  sum = None
20  for dir in idirs:
21  fileName = file.replace( idirs[0], dir )
22  pckfile = open(fileName)
23  obj = pickle.load(pckfile)
24  if sum is None:
25  sum = obj
26  else:
27  try:
28  sum += obj
29  except TypeError:
30  # += not implemented, nevermind
31  pass
32 
33  oFileName = file.replace( idirs[0], odir )
34  pckfile = open(oFileName, 'w')
35  pickle.dump(sum, pckfile)
36  txtFileName = oFileName.replace('.pck','.txt')
37  txtFile = open(txtFileName, 'w')
38  txtFile.write( str(sum) )
39  txtFile.write( '\n' )
40  txtFile.close()
41 
42 
43 def hadd(file, odir, idirs, appx=''):
44  if file.endswith('.pck'):
45  try:
46  haddPck( file, odir, idirs)
47  except ImportError:
48  pass
49  return
50  elif not file.endswith('.root'):
51  return
52  haddCmd = ['hadd']
53  haddCmd.append( file.replace( idirs[0], odir ).replace('.root', appx+'.root') )
54  for dir in idirs:
55  haddCmd.append( file.replace( idirs[0], dir ) )
56  # import pdb; pdb.set_trace()
57  cmd = ' '.join(haddCmd)
58  print(cmd)
59  if len(cmd) > MAX_ARG_STRLEN:
60  print('Command longer than maximum unix string length; dividing into 2')
61  hadd(file, odir, idirs[:len(idirs)/2], '1')
62  hadd(file.replace(idirs[0], idirs[len(idirs)/2]), odir, idirs[len(idirs)/2:], '2')
63  haddCmd = ['hadd']
64  haddCmd.append( file.replace( idirs[0], odir ).replace('.root', appx+'.root') )
65  haddCmd.append( file.replace( idirs[0], odir ).replace('.root', '1.root') )
66  haddCmd.append( file.replace( idirs[0], odir ).replace('.root', '2.root') )
67  cmd = ' '.join(haddCmd)
68  print('Running merge cmd:', cmd)
69  os.system(cmd)
70  else:
71  os.system(cmd)
72 
73 
74 def haddRec(odir, idirs):
75  print('adding', idirs)
76  print('to', odir)
77 
78  cmd = ' '.join( ['mkdir', odir])
79  # import pdb; pdb.set_trace()
80  # os.system( cmd )
81  try:
82  os.mkdir( odir )
83  except OSError:
84  print()
85  print('ERROR: directory in the way. Maybe you ran hadd already in this directory? Remove it and try again')
86  print()
87  raise
88  for root,dirs,files in os.walk( idirs[0] ):
89  # print root, dirs, files
90  for dir in dirs:
91  dir = '/'.join([root, dir])
92  dir = dir.replace(idirs[0], odir)
93  cmd = 'mkdir ' + dir
94  # print cmd
95  # os.system(cmd)
96  os.mkdir(dir)
97  for file in files:
98  hadd('/'.join([root, file]), odir, idirs)
99 
100 def haddChunks(idir, removeDestDir, cleanUp=False, odir_cmd='./'):
101  chunks = {}
102  for file in sorted(os.listdir(idir)):
103  filepath = '/'.join( [idir, file] )
104  # print filepath
105  if os.path.isdir(filepath):
106  compdir = file
107  try:
108  prefix,num = compdir.split('_Chunk')
109  except ValueError:
110  # ok, not a chunk
111  continue
112  # print prefix, num
113  chunks.setdefault( prefix, list() ).append(filepath)
114  if len(chunks)==0:
115  print('warning: no chunk found.')
116  return
117  for comp, cchunks in six.iteritems(chunks):
118  odir = odir_cmd+'/'+'/'.join( [idir, comp] )
119  print(odir, cchunks)
120  if removeDestDir:
121  if os.path.isdir( odir ):
122  shutil.rmtree(odir)
123  haddRec(odir, cchunks)
124  if cleanUp:
125  chunkDir = 'Chunks'
126  if os.path.isdir('Chunks'):
127  shutil.rmtree(chunkDir)
128  os.mkdir(chunkDir)
129  print(chunks)
130  for comp, chunks in six.iteritems(chunks):
131  for chunk in chunks:
132  shutil.move(chunk, chunkDir)
133 
134 
135 if __name__ == '__main__':
136 
137  import os
138  import sys
139  from optparse import OptionParser
140 
141  parser = OptionParser()
142  parser.usage = """
143  %prog <dir>
144  Find chunks in dir, and run recursive hadd to group all chunks.
145  For example:
146  DYJets_Chunk0/, DYJets_Chunk1/ ... -> hadd -> DYJets/
147  WJets_Chunk0/, WJets_Chunk1/ ... -> hadd -> WJets/
148  """
149  parser.add_option("-r","--remove", dest="remove",
150  default=False,action="store_true",
151  help="remove existing destination directories.")
152  parser.add_option("-c","--clean", dest="clean",
153  default=False,action="store_true",
154  help="move chunks to Chunks/ after processing.")
155 
156  (options,args) = parser.parse_args()
157 
158  if len(args)>2:
159  print('provide at most 2 directory as arguments: first the source, then the destination (optional)')
160  sys.exit(1)
161 
162  dir = args[0]
163  if(len(args)>1):
164  odir = args[1]
165  else:
166  odir='./'
167 
168  haddChunks(dir, options.remove, options.clean, odir)
169 
join
static std::string join(char **cmd)
Definition: RemoteFile.cc:17
heppy_hadd.haddChunks
def haddChunks(idir, removeDestDir, cleanUp=False, odir_cmd='./')
Definition: heppy_hadd.py:100
str
#define str(s)
Definition: TestProcessor.cc:51
print
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:46
mps_setup.append
append
Definition: mps_setup.py:85
heppy_hadd.hadd
def hadd(file, odir, idirs, appx='')
Definition: heppy_hadd.py:43
heppy_hadd.haddPck
def haddPck(file, odir, idirs)
Definition: heppy_hadd.py:14
heppy_hadd.haddRec
def haddRec(odir, idirs)
Definition: heppy_hadd.py:74
python.rootplot.root2matplotlib.replace
def replace(string, replacements)
Definition: root2matplotlib.py:444