CMS 3D CMS Logo

heppy_hadd.py
Go to the documentation of this file.
1 #!/usr/bin/env python3
2 # Copyright (C) 2014 Colin Bernet
3 # https://github.com/cbernet/heppy/blob/master/LICENSE
4 
5 from __future__ import print_function
6 import os
7 import pprint
8 import pickle
9 import shutil
10 
11 MAX_ARG_STRLEN = 131072
12 
13 def haddPck(file, odir, idirs):
14  '''add pck files in directories idirs to a directory outdir.
15  All dirs in idirs must have the same subdirectory structure.
16  Each pickle file will be opened, and the corresponding objects added to a destination pickle in odir.
17  '''
18  sum = None
19  for dir in idirs:
20  fileName = file.replace( idirs[0], dir )
21  pckfile = open(fileName)
22  obj = pickle.load(pckfile)
23  if sum is None:
24  sum = obj
25  else:
26  try:
27  sum += obj
28  except TypeError:
29  # += not implemented, nevermind
30  pass
31 
32  oFileName = file.replace( idirs[0], odir )
33  pckfile = open(oFileName, 'w')
34  pickle.dump(sum, pckfile)
35  txtFileName = oFileName.replace('.pck','.txt')
36  txtFile = open(txtFileName, 'w')
37  txtFile.write( str(sum) )
38  txtFile.write( '\n' )
39  txtFile.close()
40 
41 
42 def hadd(file, odir, idirs, appx=''):
43  if file.endswith('.pck'):
44  try:
45  haddPck( file, odir, idirs)
46  except ImportError:
47  pass
48  return
49  elif not file.endswith('.root'):
50  return
51  haddCmd = ['hadd']
52  haddCmd.append( file.replace( idirs[0], odir ).replace('.root', appx+'.root') )
53  for dir in idirs:
54  haddCmd.append( file.replace( idirs[0], dir ) )
55  # import pdb; pdb.set_trace()
56  cmd = ' '.join(haddCmd)
57  print(cmd)
58  if len(cmd) > MAX_ARG_STRLEN:
59  print('Command longer than maximum unix string length; dividing into 2')
60  hadd(file, odir, idirs[:len(idirs)/2], '1')
61  hadd(file.replace(idirs[0], idirs[len(idirs)/2]), odir, idirs[len(idirs)/2:], '2')
62  haddCmd = ['hadd']
63  haddCmd.append( file.replace( idirs[0], odir ).replace('.root', appx+'.root') )
64  haddCmd.append( file.replace( idirs[0], odir ).replace('.root', '1.root') )
65  haddCmd.append( file.replace( idirs[0], odir ).replace('.root', '2.root') )
66  cmd = ' '.join(haddCmd)
67  print('Running merge cmd:', cmd)
68  os.system(cmd)
69  else:
70  os.system(cmd)
71 
72 
73 def haddRec(odir, idirs):
74  print('adding', idirs)
75  print('to', odir)
76 
77  cmd = ' '.join( ['mkdir', odir])
78  # import pdb; pdb.set_trace()
79  # os.system( cmd )
80  try:
81  os.mkdir( odir )
82  except OSError:
83  print()
84  print('ERROR: directory in the way. Maybe you ran hadd already in this directory? Remove it and try again')
85  print()
86  raise
87  for root,dirs,files in os.walk( idirs[0] ):
88  # print root, dirs, files
89  for dir in dirs:
90  dir = '/'.join([root, dir])
91  dir = dir.replace(idirs[0], odir)
92  cmd = 'mkdir ' + dir
93  # print cmd
94  # os.system(cmd)
95  os.mkdir(dir)
96  for file in files:
97  hadd('/'.join([root, file]), odir, idirs)
98 
99 def haddChunks(idir, removeDestDir, cleanUp=False, odir_cmd='./'):
100  chunks = {}
101  for file in sorted(os.listdir(idir)):
102  filepath = '/'.join( [idir, file] )
103  # print filepath
104  if os.path.isdir(filepath):
105  compdir = file
106  try:
107  prefix,num = compdir.split('_Chunk')
108  except ValueError:
109  # ok, not a chunk
110  continue
111  # print prefix, num
112  chunks.setdefault( prefix, list() ).append(filepath)
113  if len(chunks)==0:
114  print('warning: no chunk found.')
115  return
116  for comp, cchunks in chunks.items():
117  odir = odir_cmd+'/'+'/'.join( [idir, comp] )
118  print(odir, cchunks)
119  if removeDestDir:
120  if os.path.isdir( odir ):
121  shutil.rmtree(odir)
122  haddRec(odir, cchunks)
123  if cleanUp:
124  chunkDir = 'Chunks'
125  if os.path.isdir('Chunks'):
126  shutil.rmtree(chunkDir)
127  os.mkdir(chunkDir)
128  print(chunks)
129  for comp, chunks in chunks.items():
130  for chunk in chunks:
131  shutil.move(chunk, chunkDir)
132 
133 
134 if __name__ == '__main__':
135 
136  import os
137  import sys
138  from optparse import OptionParser
139 
140  parser = OptionParser()
141  parser.usage = """
142  %prog <dir>
143  Find chunks in dir, and run recursive hadd to group all chunks.
144  For example:
145  DYJets_Chunk0/, DYJets_Chunk1/ ... -> hadd -> DYJets/
146  WJets_Chunk0/, WJets_Chunk1/ ... -> hadd -> WJets/
147  """
148  parser.add_option("-r","--remove", dest="remove",
149  default=False,action="store_true",
150  help="remove existing destination directories.")
151  parser.add_option("-c","--clean", dest="clean",
152  default=False,action="store_true",
153  help="move chunks to Chunks/ after processing.")
154 
155  (options,args) = parser.parse_args()
156 
157  if len(args)>2:
158  print('provide at most 2 directory as arguments: first the source, then the destination (optional)')
159  sys.exit(1)
160 
161  dir = args[0]
162  if(len(args)>1):
163  odir = args[1]
164  else:
165  odir='./'
166 
167  haddChunks(dir, options.remove, options.clean, odir)
168 
def hadd(file, odir, idirs, appx='')
Definition: heppy_hadd.py:42
def haddPck(file, odir, idirs)
Definition: heppy_hadd.py:13
def replace(string, replacements)
def haddRec(odir, idirs)
Definition: heppy_hadd.py:73
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47
def haddChunks(idir, removeDestDir, cleanUp=False, odir_cmd='./')
Definition: heppy_hadd.py:99
static std::string join(char **cmd)
Definition: RemoteFile.cc:19
#define str(s)