CMS 3D CMS Logo

cmsBatch.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 # Colin
3 # batch mode for cmsRun, March 2009
4 
5 from __future__ import print_function
6 from builtins import range
7 import os, sys, imp, re, pprint, string, time,shutil,copy,pickle,math
8 from optparse import OptionParser
9 
10 # particle flow specific
11 from PhysicsTools.HeppyCore.utils.batchmanager import BatchManager
12 import PhysicsTools.HeppyCore.utils.eostools as eostools
13 
14 # cms specific
15 import FWCore.ParameterSet.Config as cms
16 from IOMC.RandomEngine.RandomServiceHelper import RandomNumberServiceHelper
17 
18 
20  script = """!/usr/bin/env bash
21 #PBS -l platform=LINUX,u_sps_cmsf,M=2000MB,T=2000000
22 # sets the queue
23 #PBS -q T
24 #PBS -eo
25 #PBS -me
26 #PBS -V
27 
28 source $HOME/.bash_profile
29 
30 echo '***********************'
31 
32 ulimit -v 3000000
33 
34 # coming back to submission dir do setup the env
35 cd $PBS_O_WORKDIR
36 eval `scramv1 ru -sh`
37 
38 
39 # back to the worker
40 cd -
41 
42 # copy job dir here
43 cp -r $PBS_O_WORKDIR .
44 
45 # go inside
46 jobdir=`ls`
47 echo $jobdir
48 
49 cd $jobdir
50 
51 cat > sysinfo.sh <<EOF
52 #! env bash
53 echo '************** ENVIRONMENT ****************'
54 
55 env
56 
57 echo
58 echo '************** WORKER *********************'
59 echo
60 
61 free
62 cat /proc/cpuinfo
63 
64 echo
65 echo '************** START *********************'
66 echo
67 EOF
68 
69 source sysinfo.sh > sysinfo.txt
70 
71 cmsRun run_cfg.py
72 
73 # copy job dir do disk
74 cd -
75 cp -r $jobdir $PBS_O_WORKDIR
76 """
77  return script
78 
79 
80 
81 def rootfiles_to_eos_script(index, remoteDir):
82  remoteDir = eostools.eosToLFN(remoteDir)
83  return """
84 for file in *.root; do
85 newFileName=`echo $file | sed -r -e 's/\./_{index}\./'`
86 fullFileName={remoteDir}/$newFileName
87 {eos} cp $file /eos/cms/$fullFileName
88 {eos} chmod 755 /eos/cms/$fullFileName
89 rm *.root
90 done
91 """.format(index=index, remoteDir=remoteDir, eos=eostools.eos_select)
92 
93 
94 def batchScriptCERN( remoteDir, index ):
95  '''prepare the LSF version of the batch script, to run on LSF'''
96  script = """#!/bin/bash
97 # sets the queue
98 #BSUB -q 8nm
99 
100 echo 'environment:'
101 echo
102 env
103 ulimit -v 3000000
104 echo 'copying job dir to worker'
105 cd $CMSSW_BASE/src
106 eval `scramv1 ru -sh`
107 cd -
108 cp -rf $LS_SUBCWD .
109 ls
110 cd `find . -type d | grep /`
111 echo 'running'
112 {prog} run_cfg.py
113 if [ $? != 0 ]; then
114  echo wrong exit code! removing all root files
115  rm *.root
116  exit 1
117 fi
118 echo 'sending the job directory back'
119 """.format(prog=prog)
120 
121  if remoteDir != '':
122  script += rootfiles_to_eos_script(index, remoteDir)
123 
124  script += 'cp -rf * $LS_SUBCWD\n'
125 
126  return script
127 
128 def batchScriptLocal( remoteDir, index ):
129  '''prepare a local version of the batch script, to run using nohup'''
130 
131  script = """#!/bin/bash
132 echo 'running'
133 {prog} run_cfg.py
134 if [ $? != 0 ]; then
135  echo wrong exit code! removing all root files
136  rm *.root
137  exit 1
138 fi
139 echo 'sending the job directory back'
140 """.format(prog=prog)
141 
142  if remoteDir != '':
143  script += rootfiles_to_eos_script(index, remoteDir)
144 
145  return script
146 
147 
149  '''Exception class for this script'''
150 
151  def __init__(self, value):
152  self.value = value
153 
154  def __str__(self):
155  return str( self.value)
156 
157 
158 class MyBatchManager( BatchManager ):
159  '''Batch manager specific to cmsRun processes.'''
160 
161  def PrepareJobUser(self, jobDir, value ):
162  '''Prepare one job. This function is called by the base class.'''
163 
164  process.source = fullSource.clone()
165 
166  #prepare the batch script
167  scriptFileName = jobDir+'/batchScript.sh'
168  scriptFile = open(scriptFileName,'w')
169  storeDir = self.remoteOutputDir_.replace('/castor/cern.ch/cms','')
170  mode = self.RunningMode(options.batch)
171  if mode == 'LXPLUS':
172  scriptFile.write( batchScriptCERN( storeDir, value) ) #here is the call to batchScriptCERN, i need to change value
173  elif mode == 'LOCAL':
174  scriptFile.write( batchScriptLocal( storeDir, value) ) #same as above but for batchScriptLocal
175  scriptFile.close()
176  os.system('chmod +x %s' % scriptFileName)
177 
178  #prepare the cfg
179  # replace the list of fileNames by a chunk of filenames:
180  if generator:
181  randSvc = RandomNumberServiceHelper(process.RandomNumberGeneratorService)
182  randSvc.populate()
183  else:
184  iFileMin = (value-1)*grouping
185  iFileMax = (value)*grouping
186  process.source.fileNames = fullSource.fileNames[iFileMin:iFileMax]
187  print(process.source)
188  cfgFile = open(jobDir+'/run_cfg.py','w')
189  cfgFile.write('import FWCore.ParameterSet.Config as cms\n\n')
190  cfgFile.write('import os,sys\n')
191  # need to import most of the config from the base directory containing all jobs
192  cfgFile.write("sys.path.append('%s')\n" % os.path.dirname(jobDir) )
193  cfgFile.write('from base_cfg import *\n')
194  cfgFile.write('process.source = ' + process.source.dumpPython() + '\n')
195  if generator:
196  cfgFile.write('process.RandomNumberGeneratorService = ' + process.RandomNumberGeneratorService.dumpPython() + '\n')
197  cfgFile.close()
198 
199 
200 batchManager = MyBatchManager()
201 
202 
203 file = open('cmsBatch.txt', 'w')
204 file.write(string.join(sys.argv) + "\n")
205 file.close()
206 
207 batchManager.parser_.usage = """
208 %prog [options] <number of input files per job> <your_cfg.py>.
209 
210 Submits a number of jobs taking your_cfg.py as a template. your_cfg.py can either read events from input files, or produce them with a generator. In the later case, the seeds are of course updated for each job.
211 
212 A local output directory is created locally. This directory contains a job directory for each job, and a Logger/ directory containing information on the software you are using.
213 By default:
214 - the name of the output directory is created automatically.
215 - the output root files end up in the job directories.
216 
217 Each job directory contains:
218 - the full python configuration for this job. You can run it interactively by doing:
219 cmsRun run_cfg.py
220 - the batch script to run the job. You can submit it again by calling the batch command yourself, see the -b option.
221 - while running interactively: nohup.out, where the job stderr and stdout are redirected. To check the status of a job running interactively, do:
222 tail nohup.out
223 - after running:
224  o the full nohup.out (your log) and your root files, in case you ran interactively
225  o the LSF directory, in case you ran on LSF
226 
227 Also see fwBatch.py, which is a layer on top of cmsBatch.py adapted to the organization of our samples on the CMST3.
228 
229 Examples:
230 
231 First do:
232 cd $CMSSW_BASE/src/CMGTools/Common/test
233 
234 to run on your local machine:
235 cmsBatch.py 1 testCMGTools_cfg.py -b 'nohup ./batchScript.sh&'
236 
237 to run on LSF (you must be logged on lxplus, not on your interactive machine, so that you have access to LSF)
238 cmsBatch.py 1 testCMGTools_cfg.py -b 'bsub -q 8nm < ./batchScript.sh'
239 """
240 batchManager.parser_.add_option("-p", "--program", dest="prog",
241  help="program to run on your cfg file",
242  default="cmsRun")
243 ## batchManager.parser_.add_option("-b", "--batch", dest="batch",
244 ## help="batch command. default is: 'bsub -q 8nh < batchScript.sh'. You can also use 'nohup < ./batchScript.sh &' to run locally.",
245 ## default="bsub -q 8nh < .batchScript.sh")
246 batchManager.parser_.add_option("-c", "--command-args", dest="cmdargs",
247  help="command line arguments for the job",
248  default=None)
249 batchManager.parser_.add_option("--notagCVS", dest="tagPackages",
250  default=True,action="store_false",
251  help="tag the package on CVS (True)")
252 
253 (options,args) = batchManager.parser_.parse_args()
254 batchManager.ParseOptions()
255 
256 prog = options.prog
257 doCVSTag = options.tagPackages
258 
259 if len(args)!=2:
260  batchManager.parser_.print_help()
261  sys.exit(1)
262 
263 # testing that we run a sensible batch command. If not, exit.
264 runningMode = None
265 try:
266  runningMode = batchManager.RunningMode( options.batch )
267 except CmsBatchException as err:
268  print(err)
269  sys.exit(1)
270 
271 grouping = int(args[0])
272 nJobs = grouping
273 cfgFileName = args[1]
274 
275 print('Loading cfg')
276 
277 pycfg_params = options.cmdargs
278 trueArgv = sys.argv
279 sys.argv = [cfgFileName]
280 if pycfg_params:
281  sys.argv.extend(pycfg_params.split(' '))
282 print(sys.argv)
283 
284 
285 # load cfg script
286 handle = open(cfgFileName, 'r')
287 cfo = imp.load_source("pycfg", cfgFileName, handle)
288 process = cfo.process
289 handle.close()
290 
291 # Restore original sys.argv
292 sys.argv = trueArgv
293 
294 
295 # keep track of the original source
296 fullSource = process.source.clone()
297 generator = False
298 
299 try:
300  process.source.fileNames
301 except:
302  print('No input file. This is a generator process.')
303  generator = True
304  listOfValues = [i+1 for i in range( nJobs )] #Here is where the list of values is created
305 else:
306  print("Number of files in the source:",len(process.source.fileNames), ":")
307  pprint.pprint(process.source.fileNames)
308  nFiles = len(process.source.fileNames)
309  nJobs = nFiles / grouping
310  if (nJobs!=0 and (nFiles % grouping) > 0) or nJobs==0:
311  nJobs = nJobs + 1
312 
313  print("number of jobs to be created: ", nJobs)
314  listOfValues = [i+1 for i in range( nJobs )] #OR Here is where the list of values is created
315  #here i change from e.g 0-19 to 1-20
316 
317 batchManager.PrepareJobs( listOfValues ) #PrepareJobs with listOfValues as param
318 
319 # preparing master cfg file
320 
321 cfgFile = open(batchManager.outputDir_+'/base_cfg.py','w')
322 cfgFile.write( process.dumpPython() + '\n')
323 cfgFile.close()
324 
325 # need to wait 5 seconds to give castor some time
326 # now on EOS, should be ok. reducing to 1 sec
327 waitingTime = 1
328 if runningMode == 'LOCAL':
329  # of course, not the case when running with nohup
330  # because we will never have enough processes to saturate castor.
331  waitingTime = 0
332 batchManager.SubmitJobs( waitingTime )
333 
334 
335 # logging
336 
337 from PhysicsTools.HeppyCore.utils.logger import logger
338 
339 oldPwd = os.getcwd()
340 os.chdir(batchManager.outputDir_)
341 logDir = 'Logger'
342 os.system( 'mkdir ' + logDir )
343 log = logger( logDir )
344 
345 log.logCMSSW()
346 log.logJobs(nJobs)
347 #COLIN not so elegant... but tar is behaving in a strange way.
348 log.addFile( oldPwd + '/' + cfgFileName )
349 
350 if not batchManager.options_.negate:
351  if batchManager.remoteOutputDir_ != "":
352  # we don't want to crush an existing log file on castor
353  #COLIN could protect the logger against that.
354  log.stageOut( batchManager.remoteOutputDir_ )
355 
356 os.chdir( oldPwd )
357 
358 
def rootfiles_to_eos_script(index, remoteDir)
Definition: cmsBatch.py:81
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
def eosToLFN(path)
Definition: eostools.py:66
def PrepareJobUser(self, jobDir, value)
Definition: cmsBatch.py:161
Definition: logger.py:1
def batchScriptCERN(remoteDir, index)
Definition: cmsBatch.py:94
def __init__(self, value)
Definition: cmsBatch.py:151
def batchScriptCCIN2P3()
Definition: cmsBatch.py:19
#define str(s)
def batchScriptLocal(remoteDir, index)
Definition: cmsBatch.py:128