CMS 3D CMS Logo

heppy_hadd.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 # Copyright (C) 2014 Colin Bernet
3 # https://github.com/cbernet/heppy/blob/master/LICENSE
4 
5 import os
6 import pprint
7 import pickle
8 import shutil
9 import six
10 
11 MAX_ARG_STRLEN = 131072
12 
13 def haddPck(file, odir, idirs):
14  '''add pck files in directories idirs to a directory outdir.
15  All dirs in idirs must have the same subdirectory structure.
16  Each pickle file will be opened, and the corresponding objects added to a destination pickle in odir.
17  '''
18  sum = None
19  for dir in idirs:
20  fileName = file.replace( idirs[0], dir )
21  pckfile = open(fileName)
22  obj = pickle.load(pckfile)
23  if sum is None:
24  sum = obj
25  else:
26  try:
27  sum += obj
28  except TypeError:
29  # += not implemented, nevermind
30  pass
31 
32  oFileName = file.replace( idirs[0], odir )
33  pckfile = open(oFileName, 'w')
34  pickle.dump(sum, pckfile)
35  txtFileName = oFileName.replace('.pck','.txt')
36  txtFile = open(txtFileName, 'w')
37  txtFile.write( str(sum) )
38  txtFile.write( '\n' )
39  txtFile.close()
40 
41 
42 def hadd(file, odir, idirs, appx=''):
43  if file.endswith('.pck'):
44  try:
45  haddPck( file, odir, idirs)
46  except ImportError:
47  pass
48  return
49  elif not file.endswith('.root'):
50  return
51  haddCmd = ['hadd']
52  haddCmd.append( file.replace( idirs[0], odir ).replace('.root', appx+'.root') )
53  for dir in idirs:
54  haddCmd.append( file.replace( idirs[0], dir ) )
55  # import pdb; pdb.set_trace()
56  cmd = ' '.join(haddCmd)
57  print cmd
58  if len(cmd) > MAX_ARG_STRLEN:
59  print 'Command longer than maximum unix string length; dividing into 2'
60  hadd(file, odir, idirs[:len(idirs)/2], '1')
61  hadd(file.replace(idirs[0], idirs[len(idirs)/2]), odir, idirs[len(idirs)/2:], '2')
62  haddCmd = ['hadd']
63  haddCmd.append( file.replace( idirs[0], odir ).replace('.root', appx+'.root') )
64  haddCmd.append( file.replace( idirs[0], odir ).replace('.root', '1.root') )
65  haddCmd.append( file.replace( idirs[0], odir ).replace('.root', '2.root') )
66  cmd = ' '.join(haddCmd)
67  print 'Running merge cmd:', cmd
68  os.system(cmd)
69  else:
70  os.system(cmd)
71 
72 
73 def haddRec(odir, idirs):
74  print 'adding', idirs
75  print 'to', odir
76 
77  cmd = ' '.join( ['mkdir', odir])
78  # import pdb; pdb.set_trace()
79  # os.system( cmd )
80  try:
81  os.mkdir( odir )
82  except OSError:
83  print
84  print 'ERROR: directory in the way. Maybe you ran hadd already in this directory? Remove it and try again'
85  print
86  raise
87  for root,dirs,files in os.walk( idirs[0] ):
88  # print root, dirs, files
89  for dir in dirs:
90  dir = '/'.join([root, dir])
91  dir = dir.replace(idirs[0], odir)
92  cmd = 'mkdir ' + dir
93  # print cmd
94  # os.system(cmd)
95  os.mkdir(dir)
96  for file in files:
97  hadd('/'.join([root, file]), odir, idirs)
98 
99 def haddChunks(idir, removeDestDir, cleanUp=False, odir_cmd='./'):
100  chunks = {}
101  for file in sorted(os.listdir(idir)):
102  filepath = '/'.join( [idir, file] )
103  # print filepath
104  if os.path.isdir(filepath):
105  compdir = file
106  try:
107  prefix,num = compdir.split('_Chunk')
108  except ValueError:
109  # ok, not a chunk
110  continue
111  # print prefix, num
112  chunks.setdefault( prefix, list() ).append(filepath)
113  if len(chunks)==0:
114  print 'warning: no chunk found.'
115  return
116  for comp, cchunks in six.iteritems(chunks):
117  odir = odir_cmd+'/'+'/'.join( [idir, comp] )
118  print odir, cchunks
119  if removeDestDir:
120  if os.path.isdir( odir ):
121  shutil.rmtree(odir)
122  haddRec(odir, cchunks)
123  if cleanUp:
124  chunkDir = 'Chunks'
125  if os.path.isdir('Chunks'):
126  shutil.rmtree(chunkDir)
127  os.mkdir(chunkDir)
128  print chunks
129  for comp, chunks in six.iteritems(chunks):
130  for chunk in chunks:
131  shutil.move(chunk, chunkDir)
132 
133 
134 if __name__ == '__main__':
135 
136  import os
137  import sys
138  from optparse import OptionParser
139 
140  parser = OptionParser()
141  parser.usage = """
142  %prog <dir>
143  Find chunks in dir, and run recursive hadd to group all chunks.
144  For example:
145  DYJets_Chunk0/, DYJets_Chunk1/ ... -> hadd -> DYJets/
146  WJets_Chunk0/, WJets_Chunk1/ ... -> hadd -> WJets/
147  """
148  parser.add_option("-r","--remove", dest="remove",
149  default=False,action="store_true",
150  help="remove existing destination directories.")
151  parser.add_option("-c","--clean", dest="clean",
152  default=False,action="store_true",
153  help="move chunks to Chunks/ after processing.")
154 
155  (options,args) = parser.parse_args()
156 
157  if len(args)>2:
158  print 'provide at most 2 directory as arguments: first the source, then the destination (optional)'
159  sys.exit(1)
160 
161  dir = args[0]
162  if(len(args)>1):
163  odir = args[1]
164  else:
165  odir='./'
166 
167  haddChunks(dir, options.remove, options.clean, odir)
168 
def hadd(file, odir, idirs, appx='')
Definition: heppy_hadd.py:42
def haddPck(file, odir, idirs)
Definition: heppy_hadd.py:13
def replace(string, replacements)
def haddRec(odir, idirs)
Definition: heppy_hadd.py:73
def haddChunks(idir, removeDestDir, cleanUp=False, odir_cmd='./')
Definition: heppy_hadd.py:99
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
#define str(s)
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run