CMS 3D CMS Logo

heppy_hadd.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 # Copyright (C) 2014 Colin Bernet
3 # https://github.com/cbernet/heppy/blob/master/LICENSE
4 
5 from __future__ import print_function
6 import os
7 import pprint
8 import pickle
9 import shutil
10 import six
11 
12 MAX_ARG_STRLEN = 131072
13 
14 def haddPck(file, odir, idirs):
15  '''add pck files in directories idirs to a directory outdir.
16  All dirs in idirs must have the same subdirectory structure.
17  Each pickle file will be opened, and the corresponding objects added to a destination pickle in odir.
18  '''
19  sum = None
20  for dir in idirs:
21  fileName = file.replace( idirs[0], dir )
22  pckfile = open(fileName)
23  obj = pickle.load(pckfile)
24  if sum is None:
25  sum = obj
26  else:
27  try:
28  sum += obj
29  except TypeError:
30  # += not implemented, nevermind
31  pass
32 
33  oFileName = file.replace( idirs[0], odir )
34  pckfile = open(oFileName, 'w')
35  pickle.dump(sum, pckfile)
36  txtFileName = oFileName.replace('.pck','.txt')
37  txtFile = open(txtFileName, 'w')
38  txtFile.write( str(sum) )
39  txtFile.write( '\n' )
40  txtFile.close()
41 
42 
43 def hadd(file, odir, idirs, appx=''):
44  if file.endswith('.pck'):
45  try:
46  haddPck( file, odir, idirs)
47  except ImportError:
48  pass
49  return
50  elif not file.endswith('.root'):
51  return
52  haddCmd = ['hadd']
53  haddCmd.append( file.replace( idirs[0], odir ).replace('.root', appx+'.root') )
54  for dir in idirs:
55  haddCmd.append( file.replace( idirs[0], dir ) )
56  # import pdb; pdb.set_trace()
57  cmd = ' '.join(haddCmd)
58  print(cmd)
59  if len(cmd) > MAX_ARG_STRLEN:
60  print('Command longer than maximum unix string length; dividing into 2')
61  hadd(file, odir, idirs[:len(idirs)/2], '1')
62  hadd(file.replace(idirs[0], idirs[len(idirs)/2]), odir, idirs[len(idirs)/2:], '2')
63  haddCmd = ['hadd']
64  haddCmd.append( file.replace( idirs[0], odir ).replace('.root', appx+'.root') )
65  haddCmd.append( file.replace( idirs[0], odir ).replace('.root', '1.root') )
66  haddCmd.append( file.replace( idirs[0], odir ).replace('.root', '2.root') )
67  cmd = ' '.join(haddCmd)
68  print('Running merge cmd:', cmd)
69  os.system(cmd)
70  else:
71  os.system(cmd)
72 
73 
74 def haddRec(odir, idirs):
75  print('adding', idirs)
76  print('to', odir)
77 
78  cmd = ' '.join( ['mkdir', odir])
79  # import pdb; pdb.set_trace()
80  # os.system( cmd )
81  try:
82  os.mkdir( odir )
83  except OSError:
84  print()
85  print('ERROR: directory in the way. Maybe you ran hadd already in this directory? Remove it and try again')
86  print()
87  raise
88  for root,dirs,files in os.walk( idirs[0] ):
89  # print root, dirs, files
90  for dir in dirs:
91  dir = '/'.join([root, dir])
92  dir = dir.replace(idirs[0], odir)
93  cmd = 'mkdir ' + dir
94  # print cmd
95  # os.system(cmd)
96  os.mkdir(dir)
97  for file in files:
98  hadd('/'.join([root, file]), odir, idirs)
99 
100 def haddChunks(idir, removeDestDir, cleanUp=False, odir_cmd='./'):
101  chunks = {}
102  for file in sorted(os.listdir(idir)):
103  filepath = '/'.join( [idir, file] )
104  # print filepath
105  if os.path.isdir(filepath):
106  compdir = file
107  try:
108  prefix,num = compdir.split('_Chunk')
109  except ValueError:
110  # ok, not a chunk
111  continue
112  # print prefix, num
113  chunks.setdefault( prefix, list() ).append(filepath)
114  if len(chunks)==0:
115  print('warning: no chunk found.')
116  return
117  for comp, cchunks in six.iteritems(chunks):
118  odir = odir_cmd+'/'+'/'.join( [idir, comp] )
119  print(odir, cchunks)
120  if removeDestDir:
121  if os.path.isdir( odir ):
122  shutil.rmtree(odir)
123  haddRec(odir, cchunks)
124  if cleanUp:
125  chunkDir = 'Chunks'
126  if os.path.isdir('Chunks'):
127  shutil.rmtree(chunkDir)
128  os.mkdir(chunkDir)
129  print(chunks)
130  for comp, chunks in six.iteritems(chunks):
131  for chunk in chunks:
132  shutil.move(chunk, chunkDir)
133 
134 
135 if __name__ == '__main__':
136 
137  import os
138  import sys
139  from optparse import OptionParser
140 
141  parser = OptionParser()
142  parser.usage = """
143  %prog <dir>
144  Find chunks in dir, and run recursive hadd to group all chunks.
145  For example:
146  DYJets_Chunk0/, DYJets_Chunk1/ ... -> hadd -> DYJets/
147  WJets_Chunk0/, WJets_Chunk1/ ... -> hadd -> WJets/
148  """
149  parser.add_option("-r","--remove", dest="remove",
150  default=False,action="store_true",
151  help="remove existing destination directories.")
152  parser.add_option("-c","--clean", dest="clean",
153  default=False,action="store_true",
154  help="move chunks to Chunks/ after processing.")
155 
156  (options,args) = parser.parse_args()
157 
158  if len(args)>2:
159  print('provide at most 2 directory as arguments: first the source, then the destination (optional)')
160  sys.exit(1)
161 
162  dir = args[0]
163  if(len(args)>1):
164  odir = args[1]
165  else:
166  odir='./'
167 
168  haddChunks(dir, options.remove, options.clean, odir)
169 
def hadd(file, odir, idirs, appx='')
Definition: heppy_hadd.py:43
def haddPck(file, odir, idirs)
Definition: heppy_hadd.py:14
def replace(string, replacements)
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
def haddRec(odir, idirs)
Definition: heppy_hadd.py:74
def haddChunks(idir, removeDestDir, cleanUp=False, odir_cmd='./')
Definition: heppy_hadd.py:100
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
#define str(s)
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run