CMS 3D CMS Logo

cmsBatch.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 # Colin
3 # batch mode for cmsRun, March 2009
4 
5 from __future__ import print_function
6 import os, sys, imp, re, pprint, string, time,shutil,copy,pickle,math
7 from optparse import OptionParser
8 
9 # particle flow specific
10 from PhysicsTools.HeppyCore.utils.batchmanager import BatchManager
11 import PhysicsTools.HeppyCore.utils.eostools as eostools
12 
13 # cms specific
14 import FWCore.ParameterSet.Config as cms
15 from IOMC.RandomEngine.RandomServiceHelper import RandomNumberServiceHelper
16 
17 
19  script = """!/usr/bin/env bash
20 #PBS -l platform=LINUX,u_sps_cmsf,M=2000MB,T=2000000
21 # sets the queue
22 #PBS -q T
23 #PBS -eo
24 #PBS -me
25 #PBS -V
26 
27 source $HOME/.bash_profile
28 
29 echo '***********************'
30 
31 ulimit -v 3000000
32 
33 # coming back to submission dir do setup the env
34 cd $PBS_O_WORKDIR
35 eval `scramv1 ru -sh`
36 
37 
38 # back to the worker
39 cd -
40 
41 # copy job dir here
42 cp -r $PBS_O_WORKDIR .
43 
44 # go inside
45 jobdir=`ls`
46 echo $jobdir
47 
48 cd $jobdir
49 
50 cat > sysinfo.sh <<EOF
51 #! env bash
52 echo '************** ENVIRONMENT ****************'
53 
54 env
55 
56 echo
57 echo '************** WORKER *********************'
58 echo
59 
60 free
61 cat /proc/cpuinfo
62 
63 echo
64 echo '************** START *********************'
65 echo
66 EOF
67 
68 source sysinfo.sh > sysinfo.txt
69 
70 cmsRun run_cfg.py
71 
72 # copy job dir do disk
73 cd -
74 cp -r $jobdir $PBS_O_WORKDIR
75 """
76  return script
77 
78 
79 
80 def rootfiles_to_eos_script(index, remoteDir):
81  remoteDir = eostools.eosToLFN(remoteDir)
82  return """
83 for file in *.root; do
84 newFileName=`echo $file | sed -r -e 's/\./_{index}\./'`
85 fullFileName={remoteDir}/$newFileName
86 {eos} cp $file /eos/cms/$fullFileName
87 {eos} chmod 755 /eos/cms/$fullFileName
88 rm *.root
89 done
90 """.format(index=index, remoteDir=remoteDir, eos=eostools.eos_select)
91 
92 
93 def batchScriptCERN( remoteDir, index ):
94  '''prepare the LSF version of the batch script, to run on LSF'''
95  script = """#!/bin/bash
96 # sets the queue
97 #BSUB -q 8nm
98 
99 echo 'environment:'
100 echo
101 env
102 ulimit -v 3000000
103 echo 'copying job dir to worker'
104 cd $CMSSW_BASE/src
105 eval `scramv1 ru -sh`
106 cd -
107 cp -rf $LS_SUBCWD .
108 ls
109 cd `find . -type d | grep /`
110 echo 'running'
111 {prog} run_cfg.py
112 if [ $? != 0 ]; then
113  echo wrong exit code! removing all root files
114  rm *.root
115  exit 1
116 fi
117 echo 'sending the job directory back'
118 """.format(prog=prog)
119 
120  if remoteDir != '':
121  script += rootfiles_to_eos_script(index, remoteDir)
122 
123  script += 'cp -rf * $LS_SUBCWD\n'
124 
125  return script
126 
127 def batchScriptLocal( remoteDir, index ):
128  '''prepare a local version of the batch script, to run using nohup'''
129 
130  script = """#!/bin/bash
131 echo 'running'
132 {prog} run_cfg.py
133 if [ $? != 0 ]; then
134  echo wrong exit code! removing all root files
135  rm *.root
136  exit 1
137 fi
138 echo 'sending the job directory back'
139 """.format(prog=prog)
140 
141  if remoteDir != '':
142  script += rootfiles_to_eos_script(index, remoteDir)
143 
144  return script
145 
146 
148  '''Exception class for this script'''
149 
150  def __init__(self, value):
151  self.value = value
152 
153  def __str__(self):
154  return str( self.value)
155 
156 
157 class MyBatchManager( BatchManager ):
158  '''Batch manager specific to cmsRun processes.'''
159 
160  def PrepareJobUser(self, jobDir, value ):
161  '''Prepare one job. This function is called by the base class.'''
162 
163  process.source = fullSource.clone()
164 
165  #prepare the batch script
166  scriptFileName = jobDir+'/batchScript.sh'
167  scriptFile = open(scriptFileName,'w')
168  storeDir = self.remoteOutputDir_.replace('/castor/cern.ch/cms','')
169  mode = self.RunningMode(options.batch)
170  if mode == 'LXPLUS':
171  scriptFile.write( batchScriptCERN( storeDir, value) ) #here is the call to batchScriptCERN, i need to change value
172  elif mode == 'LOCAL':
173  scriptFile.write( batchScriptLocal( storeDir, value) ) #same as above but for batchScriptLocal
174  scriptFile.close()
175  os.system('chmod +x %s' % scriptFileName)
176 
177  #prepare the cfg
178  # replace the list of fileNames by a chunk of filenames:
179  if generator:
180  randSvc = RandomNumberServiceHelper(process.RandomNumberGeneratorService)
181  randSvc.populate()
182  else:
183  iFileMin = (value-1)*grouping
184  iFileMax = (value)*grouping
185  process.source.fileNames = fullSource.fileNames[iFileMin:iFileMax]
186  print(process.source)
187  cfgFile = open(jobDir+'/run_cfg.py','w')
188  cfgFile.write('import FWCore.ParameterSet.Config as cms\n\n')
189  cfgFile.write('import os,sys\n')
190  # need to import most of the config from the base directory containing all jobs
191  cfgFile.write("sys.path.append('%s')\n" % os.path.dirname(jobDir) )
192  cfgFile.write('from base_cfg import *\n')
193  cfgFile.write('process.source = ' + process.source.dumpPython() + '\n')
194  if generator:
195  cfgFile.write('process.RandomNumberGeneratorService = ' + process.RandomNumberGeneratorService.dumpPython() + '\n')
196  cfgFile.close()
197 
198 
199 batchManager = MyBatchManager()
200 
201 
202 file = open('cmsBatch.txt', 'w')
203 file.write(string.join(sys.argv) + "\n")
204 file.close()
205 
206 batchManager.parser_.usage = """
207 %prog [options] <number of input files per job> <your_cfg.py>.
208 
209 Submits a number of jobs taking your_cfg.py as a template. your_cfg.py can either read events from input files, or produce them with a generator. In the later case, the seeds are of course updated for each job.
210 
211 A local output directory is created locally. This directory contains a job directory for each job, and a Logger/ directory containing information on the software you are using.
212 By default:
213 - the name of the output directory is created automatically.
214 - the output root files end up in the job directories.
215 
216 Each job directory contains:
217 - the full python configuration for this job. You can run it interactively by doing:
218 cmsRun run_cfg.py
219 - the batch script to run the job. You can submit it again by calling the batch command yourself, see the -b option.
220 - while running interactively: nohup.out, where the job stderr and stdout are redirected. To check the status of a job running interactively, do:
221 tail nohup.out
222 - after running:
223  o the full nohup.out (your log) and your root files, in case you ran interactively
224  o the LSF directory, in case you ran on LSF
225 
226 Also see fwBatch.py, which is a layer on top of cmsBatch.py adapted to the organization of our samples on the CMST3.
227 
228 Examples:
229 
230 First do:
231 cd $CMSSW_BASE/src/CMGTools/Common/test
232 
233 to run on your local machine:
234 cmsBatch.py 1 testCMGTools_cfg.py -b 'nohup ./batchScript.sh&'
235 
236 to run on LSF (you must be logged on lxplus, not on your interactive machine, so that you have access to LSF)
237 cmsBatch.py 1 testCMGTools_cfg.py -b 'bsub -q 8nm < ./batchScript.sh'
238 """
239 batchManager.parser_.add_option("-p", "--program", dest="prog",
240  help="program to run on your cfg file",
241  default="cmsRun")
242 ## batchManager.parser_.add_option("-b", "--batch", dest="batch",
243 ## help="batch command. default is: 'bsub -q 8nh < batchScript.sh'. You can also use 'nohup < ./batchScript.sh &' to run locally.",
244 ## default="bsub -q 8nh < .batchScript.sh")
245 batchManager.parser_.add_option("-c", "--command-args", dest="cmdargs",
246  help="command line arguments for the job",
247  default=None)
248 batchManager.parser_.add_option("--notagCVS", dest="tagPackages",
249  default=True,action="store_false",
250  help="tag the package on CVS (True)")
251 
252 (options,args) = batchManager.parser_.parse_args()
253 batchManager.ParseOptions()
254 
255 prog = options.prog
256 doCVSTag = options.tagPackages
257 
258 if len(args)!=2:
259  batchManager.parser_.print_help()
260  sys.exit(1)
261 
262 # testing that we run a sensible batch command. If not, exit.
263 runningMode = None
264 try:
265  runningMode = batchManager.RunningMode( options.batch )
266 except CmsBatchException as err:
267  print(err)
268  sys.exit(1)
269 
270 grouping = int(args[0])
271 nJobs = grouping
272 cfgFileName = args[1]
273 
274 print('Loading cfg')
275 
276 pycfg_params = options.cmdargs
277 trueArgv = sys.argv
278 sys.argv = [cfgFileName]
279 if pycfg_params:
280  sys.argv.extend(pycfg_params.split(' '))
281 print(sys.argv)
282 
283 
284 # load cfg script
285 handle = open(cfgFileName, 'r')
286 cfo = imp.load_source("pycfg", cfgFileName, handle)
287 process = cfo.process
288 handle.close()
289 
290 # Restore original sys.argv
291 sys.argv = trueArgv
292 
293 
294 # keep track of the original source
295 fullSource = process.source.clone()
296 generator = False
297 
298 try:
299  process.source.fileNames
300 except:
301  print('No input file. This is a generator process.')
302  generator = True
303  listOfValues = [i+1 for i in range( nJobs )] #Here is where the list of values is created
304 else:
305  print("Number of files in the source:",len(process.source.fileNames), ":")
306  pprint.pprint(process.source.fileNames)
307  nFiles = len(process.source.fileNames)
308  nJobs = nFiles / grouping
309  if (nJobs!=0 and (nFiles % grouping) > 0) or nJobs==0:
310  nJobs = nJobs + 1
311 
312  print("number of jobs to be created: ", nJobs)
313  listOfValues = [i+1 for i in range( nJobs )] #OR Here is where the list of values is created
314  #here i change from e.g 0-19 to 1-20
315 
316 batchManager.PrepareJobs( listOfValues ) #PrepareJobs with listOfValues as param
317 
318 # preparing master cfg file
319 
320 cfgFile = open(batchManager.outputDir_+'/base_cfg.py','w')
321 cfgFile.write( process.dumpPython() + '\n')
322 cfgFile.close()
323 
324 # need to wait 5 seconds to give castor some time
325 # now on EOS, should be ok. reducing to 1 sec
326 waitingTime = 1
327 if runningMode == 'LOCAL':
328  # of course, not the case when running with nohup
329  # because we will never have enough processes to saturate castor.
330  waitingTime = 0
331 batchManager.SubmitJobs( waitingTime )
332 
333 
334 # logging
335 
336 from PhysicsTools.HeppyCore.utils.logger import logger
337 
338 oldPwd = os.getcwd()
339 os.chdir(batchManager.outputDir_)
340 logDir = 'Logger'
341 os.system( 'mkdir ' + logDir )
342 log = logger( logDir )
343 
344 log.logCMSSW()
345 log.logJobs(nJobs)
346 #COLIN not so elegant... but tar is behaving in a strange way.
347 log.addFile( oldPwd + '/' + cfgFileName )
348 
349 if not batchManager.options_.negate:
350  if batchManager.remoteOutputDir_ != "":
351  # we don't want to crush an existing log file on castor
352  #COLIN could protect the logger against that.
353  log.stageOut( batchManager.remoteOutputDir_ )
354 
355 os.chdir( oldPwd )
356 
357 
def rootfiles_to_eos_script(index, remoteDir)
Definition: cmsBatch.py:80
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
def eosToLFN(path)
Definition: eostools.py:66
def PrepareJobUser(self, jobDir, value)
Definition: cmsBatch.py:160
Definition: logger.py:1
def batchScriptCERN(remoteDir, index)
Definition: cmsBatch.py:93
def __init__(self, value)
Definition: cmsBatch.py:150
def batchScriptCCIN2P3()
Definition: cmsBatch.py:18
#define str(s)
def batchScriptLocal(remoteDir, index)
Definition: cmsBatch.py:127