CMS 3D CMS Logo

parallelization.py
Go to the documentation of this file.
1 #! /usr/bin/python
2 
3 # This script sets up parallel jobs for the build, integrate and run
4 # step when using Herwig with the CMSSW framework.
5 # It takes a cmsRun file, adjusts the parameters in it accordingly to
6 # the options and saves them to temporary cmsRun files. For each step
7 # a different cmsRun file is created. The original file remains
8 # unaltered.
9 
10 # Possible options:
11 # -b/--build : sets the number of build jobs and starts the build step.
12 # -i/--integrate : sets the maximal number of integration jobs
13 # This option already has to be set when the build step is invoked.
14 # The integration step will be performed if this option is set,
15 # unless --nointegration is chosen.
16 # The actual number of integration jobs may be smaller. It is
17 # determined by the number of files in Herwig-scratch/Build.
18 # -r/--run : sets the number of run jobs and starts the run step.
19 # --nointegration : use this option to set up several integration jobs
20 # without actually performing them
21 # --stoprun: use this option if you want to create the cmsRun files
22 # without calling cmsRun
23 # --resumerun: no new cmsRun files for the run step will be created
24 # For this option to work 'temporary' cmsRun files complying to the
25 # naming scheme have to be availible. Only files up to the number
26 # of jobs defined by --run will be considered.
27 # --keepfiles : don't remove the created temporary cmsRun files
28 # --l/--log: write the output of each shell command called in a
29 # seperate log file
30 
31 # Comments in the cmsRun file in the process.generator part may confuse
32 # this script. Check the temporary cmsRun files if errors occur.
33 
34 # A parallelized run step is achieved by calling cmsRun an according
35 # number of times with different seeds for Herwig. The built in feature
36 # of Herwig wont be used.
37 
38 # Author: Dominik Beutel
39 
40 
41 import argparse
42 import sys
43 import os
44 import subprocess
45 import re
46 
47 
48 
49 def uint(string):
50  """Unsigned int type"""
51  value = int(string)
52  if value < 0:
53  msg = '{0} is negative'.format(string)
54  raise argparse.ArgumentTypeError(msg)
55  return value
56 
57 
58 
59 def adjust_pset(cmsrunfilename, savefilename, par_list):
60  """Takes the cmsRun filem, removes all occurences of runMode, jobs,
61  maxJobs and integrationList parameters in the process.generator
62  part.
63  The the parameters in par_list are set instead and saved.
64  """
65 
66  with open(cmsrunfilename, 'r') as readfile:
67  parsestring = readfile.read()
68 
69  # get first opening bracket after process.generator
70  begin_gen_step = parsestring.find('(', parsestring.find('process.generator'))
71 
72  # find matching bracket
73  end_gen_step = begin_gen_step
74  bracket_counter = 1
75  for position in range(begin_gen_step+1, len(parsestring)):
76  if parsestring[position] == '(':
77  bracket_counter += 1
78  if parsestring[position] == ')':
79  bracket_counter -= 1
80  if not bracket_counter:
81  end_gen_step = position
82  break
83 
84  # get string between brackets
85  gen_string = parsestring[begin_gen_step+1:end_gen_step]
86 
87  # remove all parameters that would interfere
88  gen_string = re.sub(r',\s*runModeList\s*=\s*cms.untracked.string\((.*?)\)', '', gen_string)
89  gen_string = re.sub(r',\s*jobs\s*=\s*cms.untracked.int32\((.*?)\)', '', gen_string)
90  gen_string = re.sub(r',\s*integrationList\s*=\s*cms.untracked.string\((.*?)\)', '', gen_string)
91  gen_string = re.sub(r',\s*maxJobs\s*=\s*cms.untracked.uint32\((.*?)\)', '', gen_string)
92  gen_string = re.sub(r',\s*seed\s*=\s*cms.untracked.int32\((.*?)\)', '', gen_string)
93 
94 
95  # write the savefile with all parameters given in par_list
96  with open(savefilename,'w') as savefile:
97  savefile.write(parsestring[:begin_gen_step+1])
98  savefile.write(gen_string)
99  for item in par_list:
100  savefile.write(',\n')
101  savefile.write(item)
102  savefile.write(parsestring[end_gen_step:])
103 
104 
105 
106 def cleanupandexit(filelist):
107  """Delete the files in filelist and exit"""
108  for filename in filelist:
109  os.remove(filename)
110  sys.exit(0)
111 
112 
113 
114 
115 ##################################################
116 # Get command line arguments
117 ##################################################
118 
119 parser = argparse.ArgumentParser()
120 
121 parser.add_argument('cmsRunfile', help='filename of the cmsRun configuration')
122 parser.add_argument('-b', '--build', help='set the number of build jobs', type=int, choices=range(0,11), default=0)
123 parser.add_argument('-i', '--integrate', help='set the maximal number of integration jobs', type=uint, default=0)
124 parser.add_argument('-r', '--run', help='set the number of run jobs', type=int, choices=range(0,11), default=0)
125 parser.add_argument('--nointegration', help='build -i integration jobs without actually integrating', action='store_true')
126 parser.add_argument('--keepfiles', help='don\'t delete temporary files', action='store_true')
127 parser.add_argument('--stoprun', help='stop after creating the cmsRun files for the run step', action='store_true')
128 parser.add_argument('--resumerun', help='use existing \'temporary\' files for the run step', action='store_true')
129 parser.add_argument('-l', '--log', help='write the output of each process in a separate log file', action='store_true')
130 
131 args = parser.parse_args()
132 
133 # List of files needed for clean-up
134 cleanupfiles = []
135 
136 # Create a template name for all created files
137 template_name = args.cmsRunfile.replace('.', '_')
138 
139 
140 
141 ##################################################
142 # Execute the different run modes
143 ##################################################
144 
145 ## Build ##
146 
147 # jobs defines number of build jobs in the cmsRun file
148 # maxJobs tells Herwig to prepare the according number
149 # of integrations
150 
151 if args.build != 0:
152  # Set up parameters
153  parameters = ['runModeList = cms.untracked.string(\'build\')']
154  parameters.append('jobs = cms.untracked.int32(' + str(args.build) + ')')
155  if args.integrate != 0:
156  parameters.append('maxJobs = cms.untracked.uint32(' + str(args.integrate) + ')')
157 
158  build_name = template_name + '_build.py'
159  adjust_pset(args.cmsRunfile, build_name, parameters)
160 
161  cleanupfiles.append(build_name)
162 
163  # Start build job
164  print 'Setting up {0} build jobs.'.format(str(args.build))
165  print 'Setting up a maximum of {0} integration jobs.'.format(str(args.integrate))
166  print 'Calling\t\'cmsRun ' + build_name + '\''
167 
168  if args.log:
169  print 'Writing ouput to log file: ' + build_name[:-2] + 'log'
170  with open(build_name[:-2] + 'log', 'w') as build_log:
171  process = subprocess.Popen(['cmsRun', build_name], stdout=build_log, stderr=subprocess.STDOUT)
172  else:
173  process = subprocess.Popen(['cmsRun ' + build_name], shell=True)
174  process.wait()
175 
176  print '--------------------'
177  print 'Build step finished.'
178  print '--------------------'
179 
180 
181 
182 ## Integrate ##
183 
184 # Stop in case no integration is desired
185 if args.nointegration:
186  print '--nointegration: Run will be stopped here.'
187  cleanupandexit(cleanupfiles)
188 
189 if args.integrate != 0:
190  # Determine number of integration jobs
191  actual_int_jobs = len([string for string in os.listdir('Herwig-scratch/Build') if re.match(r'integrationJob[0-9]+', string)])
192 
193  # Stop if this number exceeds the given parameter
194  if actual_int_jobs > args.integrate:
195  print 'Actual number of integration jobs {0} exceeds \'--integrate {1}\'.'.format(actual_int_jobs, args.integrate)
196  print 'Integration will not be performed.'
197  cleanupandexit(cleanupfiles)
198 
199  # Start the integration jobs
200  print 'Found {0} integration jobs, a maxiumum of {1} was given.'.format(actual_int_jobs, args.integrate)
201  print 'Starting all jobs.'
202  if not args.log:
203  print '--- Output may be cluttered. (Try the option -l/--log) ---'
204  processes = []
205  for i in range(actual_int_jobs):
206  # Set up parameters
207  parameters = ['runModeList = cms.untracked.string(\'integrate\')']
208  parameters.append('integrationList = cms.untracked.string(\'' + str(i) + '\')')
209 
210  integration_name = template_name + '_integrate_' + str(i) + '.py'
211  adjust_pset(args.cmsRunfile, integration_name, parameters)
212 
213  cleanupfiles.append(integration_name)
214 
215  print 'Calling\t\'cmsRun ' + integration_name + '\''
216  if args.log:
217  print 'Writing ouput to log file: ' + integration_name[:-2] + 'log'
218  with open(integration_name[:-2] + 'log', 'w') as integration_log:
219  processes.append( subprocess.Popen(['cmsRun', integration_name], stdout=integration_log, stderr=subprocess.STDOUT) )
220  else:
221  processes.append( subprocess.Popen(['cmsRun', integration_name]) )
222 
223 
224  # Wait for all processes to finish
225  for process in processes:
226  process.wait()
227  print '--------------------------'
228  print 'Integration step finished.'
229  print '--------------------------'
230 
231 
232 
233 ## Run mode ##
234 
235 ## This part uses the parallelization of the run step provided by
236 ## Herwig. At the moment it is not usable.
237 
238 ##if args.run != 0:
239 ## parameters = ['runModeList = cms.untracked.string(\'run\')']
240 ## parameters.append('jobs = cms.untracked.int32(' + str(args.run) + ')')
241 
253 
254 ## This is the alternative for a paralellized run step. cmsRun is called
255 ## as often as give with the option -r/--run. So the total number of
256 ## generated events is a corresponding multiple of the number of events
257 ## given in the cmsRun file.
258 
259 
260 if args.stoprun and args.resumerun:
261  print '--stoprun AND --resumerun are chosen: run step will be omitted.'
262  cleanupandexit(cleanupfiles)
263 
264 if args.run != 0:
265  # Start the run jobs
266  print 'Setting up {0} runs.'.format(args.run)
267  if not args.log:
268  print '--- Output may be cluttered. (Try the option -l/--log) ---'
269  processes = []
270  for i in range(args.run):
271  run_name = template_name + '_run_' + str(i) + '.py'
272 
273  # Only create new files if this isn't a resumed run
274  if not args.resumerun:
275  parameters = ['runModeList = cms.untracked.string(\'run\')']
276  # Set different seeds
277  parameters.append('seed = cms.untracked.int32(' + str(i) + ')')
278  adjust_pset(args.cmsRunfile, run_name, parameters)
279 
280  # Unless run will be stopped execute the jobs
281  if not args.stoprun:
282  # Don't mark the files for cleanup if this is a resumed run
283  if not args.resumerun:
284  cleanupfiles.append(run_name)
285 
286  if not os.path.isfile(run_name):
287  print '\'' + run_name + '\' not found. It will be skipped.'
288  continue
289 
290  print 'Calling\t\'cmsRun ' + run_name + '\''
291  if args.log:
292  print 'Writing ouput to log file: ' + run_name[:-2] + 'log'
293  with open(run_name[:-2] + 'log', 'w') as run_log:
294  processes.append( subprocess.Popen(['cmsRun', run_name], stdout=run_log, stderr=subprocess.STDOUT) )
295  else:
296  processes.append( subprocess.Popen(['cmsRun', run_name]) )
297 
298 
299  # Wait for all processes to finish
300  for process in processes:
301  process.wait()
302  if args.stoprun:
303  print '--stoprun: kept run files and stopped before calling cmsRun'
304  print '------------------'
305  print 'Run step finished.'
306  print '------------------'
307 
308 
309 
310 if not args.keepfiles:
311  cleanupandexit(cleanupfiles)
def cleanupandexit(filelist)
def uint(string)
def adjust_pset(cmsrunfilename, savefilename, par_list)