CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
mps_fire.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 # Submit jobs that are setup in local mps database to batch system
3 #
4 # The bsub sytax: bsub -J 'jobname' -q 'queue name' theProgram
5 # The jobname will be something like MP_2015.
6 # The queue name is derived from lib.classInfo.
7 # The program is theScrip.sh located in each job-directory.
8 # There may be the other option -R (see man bsub for info).
9 #
10 # Usage:
11 #
12 # mps_fire.pl [-m[f]] [maxjobs]
13 # mps_fire.pl -h
14 
15 import Alignment.MillePedeAlignmentAlgorithm.mpslib.Mpslibclass as mpslib
16 import os
17 import sys
18 import subprocess
19 import re
20 
21 lib = mpslib.jobdatabase()
22 maxJobs = 1
23 fireMerge = 0
24 helpwanted = 0
25 forceMerge = 0
26 #updateDb = 0
27 
28 # parse the arguments
29 for i, arg in enumerate(sys.argv):
30  if arg[0] == '-':
31  if 'h' in arg:
32  helpwanted = 1
33  if 'm' in arg:
34  fireMerge = 1
35  if 'f' in arg:
36  forceMerge = 1
37 # elif 'u' in arg:
38 # updateDb = 1
39  else:
40  if i == 1:
41  maxJobs = arg
42 maxJobs = int(maxJobs)
43 
44 # Option -h ->Print help
45 if helpwanted != 0:
46  print "Usage:\n mps_fire.pl [-m[f]] [maxjobs]"
47  print "\nmaxjobs: Number of Mille jobs to be submitted (default is one)"
48  print "\nKnown options:";
49  print "\n -m Submit all setup Pede jobs, maxJobs is ignored."
50  print "\n -mf Force the submission of the Pede job in case"
51  print "\n some Mille jobs are not in the OK state.\n"
52  print "\n -h This help."
53  exit()
54 
55 lib.read_db()
56 
57 # build the absolute job directory path (needed by mps_script)
58 thePwd = subprocess.check_output('pwd', stderr=subprocess.STDOUT, shell=True)
59 thePwd = thePwd.strip()
60 theJobData = thePwd+'/jobData'
61 
62 # set the job name ???????????????????
63 theJobName = 'mpalign'
64 if lib.addFiles != '':
65  theJobName = lib.addFiles
66 
67 # fire the 'normal' parallel Jobs (Mille Jobs)
68 if fireMerge == 0:
69  #set the resources string coming from mps.db
70  resources = lib.get_class('mille')
71 
72  # "cmscafspec" found in $resources: special cmscaf resources
73  if 'cmscafspec' in resources:
74  print '\nWARNING:\n Running mille jobs on cmscafspec, intended for pede only!\n\n'
75  queue = resources
76  queue = queue.replace('cmscafspec','cmscaf')
77  resources = '-q'+queue+'-R cmscafspec' # FIXME why?
78  resources = '-q cmscafalcamille'
79  # "cmscaf" found in $resources
80  elif 'cmscaf' in resources:
81  # g_cmscaf for ordinary caf queue, keeping 'cmscafspec' free for pede jobs:
82  resources = '-q'+resources+' -m g_cmscaf'
83  else:
84  resources = '-q '+resources
85 
86  nSub = 0 # number of submitted Jobs
87  for i in xrange(lib.nJobs):
88  if lib.JOBSTATUS[i] == 'SETUP':
89  if nSub < maxJobs:
90  # submit a new job with 'bsub -J ...' and check output
91  # for some reasons LSF wants script with full path
92  submission = 'bsub -J %s %s %s/%s/theScript.sh' % \
93  (theJobName, resources, theJobData, lib.JOBDIR[i])
94  print submission
95  result = subprocess.check_output(submission, stderr=subprocess.STDOUT, shell=True)
96  print ' '+result
97  result = result.strip()
98 
99  # check if job was submitted and updating jobdatabase
100  match = re.search('Job <(\d+)> is submitted', result)
101  if match:
102  # need standard format for job number
103  lib.JOBSTATUS[i] = 'SUBTD'
104  lib.JOBID[i] = int(match.group(1))
105  ##lib.JOBID[i] = '%07d' % int(match.group(1))
106  ##print 'jobid is',lib.JOBID[i]
107  else:
108  print 'Submission of %03d seems to have failed: %s' % (lib.JOBNUMBER[i],result)
109  nSub +=1
110 
111 # fire the merge job
112 else:
113  print 'fire merge'
114  # set the resources string coming from mps.db
115  resources = lib.get_class('pede')
116  if 'cmscafspec' in resources:
117  queue = resources
118  queue = queue.replace('cmscafspec','cmscaf')
119  resources = '-q '+queue+' -R cmscafspec' # FIXME why?
120  resources = '-q cmscafalcamille'
121  else:
122  resources = '-q '+resources
123 
124  # Allocate memory for pede job FIXME check documentation for bsub!!!!!
125  resources = resources+' -R \"rusage[mem="%s"]\"' % str(lib.pedeMem) # FIXME the dots? -> see .pl
126 
127  # check whether all other jobs are OK
128  mergeOK = 1
129  for i in xrange(lib.nJobs):
130  if lib.JOBSTATUS[i] != 'OK':
131  if 'DISABLED' not in lib.JOBSTATUS[i]:
132  mergeOK = 0
133  break
134 
135  # loop over merge jobs
136  i = lib.nJobs
137  while i<len(lib.JOBDIR):
138  jobNumFrom1 = i+1
139 
140  # check if current job in SETUP mode or if forced
141  if lib.JOBSTATUS[i] != 'SETUP':
142  print 'Merge job %d status %s not submitted.' % \
143  (jobNumFrom1, lib.JOBSTATUS[i])
144  elif (mergeOK != 1) and (forceMerge != 1):
145  print 'Merge job',jobNumFrom1,'not submitted since Mille jobs error/unfinished (Use -mf to force).'
146  else:
147  # some paths for clarity
148  Path = '%s/%s' % (theJobData,lib.JOBDIR[i])
149  backupScriptPath = Path+'/theScript.sh.bak'
150  scriptPath = Path+'/theScript.sh'
151 
152  # force option invoked:
153  if forceMerge == 1:
154 
155  # make a backup copy of the script first, if it doesn't already exist.
156  if not os.path.isfile(backupScriptPath):
157  os.system('cp -p '+scriptPath+' '+backupScriptPath)
158 
159  # get the name of merge cfg file -> either the.py or alignment_merge.py
160  command = 'cat '+backupScriptPath+' | grep cmsRun | grep "\.py" | head -1 | awk \'{gsub("^.*cmsRun ","");print $1}\''
161  mergeCfg = subprocess.check_output(command, stderr=subprocess.STDOUT, shell=True)
162  command = 'basename '+mergeCfg
163  mergeCfg = subprocess.check_output(command, stderr=subprocess.STDOUT, shell=True)
164  mergeCfg = mergeCfg.replace('\n','')
165 
166  # make a backup copy of the cfg
167  backupCfgPath = Path+'/%s.bak' % mergeCfg
168  cfgPath = Path+'/%s' % mergeCfg
169  if not os.path.isfile(backupCfgPath):
170  os.system('cp -p '+cfgPath+' '+backupCfgPath)
171 
172  # rewrite the mergeCfg using only 'OK' jobs (uses last mille-job as baseconfig)
173  inCfgPath = theJobData+'/'+lib.JOBDIR[lib.nJobs]+'/the.py'
174  command ='mps_merge.py -c '+inCfgPath+' '+Path+'/'+mergeCfg+' '+Path+' '+str(lib.nJobs)
175  os.system(command)
176 
177  # rewrite theScript.sh using inly 'OK' jobs
178  command = 'mps_scriptm.pl -c '+lib.mergeScript+' '+scriptPath+' '+Path+' '+mergeCfg+' '+str(lib.nJobs)+' '+lib.mssDir+' '+lib.mssDirPool
179  os.system(command)
180 
181  else:
182  # restore the backup copy of the script
183  if os.path.isfile(backupScriptPath):
184  os.system('cp -pf '+backupScriptPath+' '+scriptPath)
185 
186  # get the name of merge cfg file
187  command = 'cat '+scriptPath+' | grep cmsRun | grep "\.py" | head -1 | awk \'{gsub("^.*cmsRun ","");print $1}\''
188  mergeCfg = subprocess.check_output(command, stderr=subprocess.STDOUT, shell=True)
189  command = 'basename '+mergeCfg
190  mergeCfg = subprocess.check_output(command, stderr=subprocess.STDOUT, shell=True)
191  mergeCfg = mergeCfg.replace('\n','')
192 
193  # restore the backup copy of the cfg
194  backupCfgPath = Path+'/%s.bak' % mergeCfg
195  cfgPath = Path+'/%s' % mergeCfg
196  if os.path.isfile(backupCfgPath):
197  os.system('cp -pf '+backupCfgPath+' '+cfgPath)
198 
199  # end of if/else forceMerge
200 
201  # submit merge job
202  nMerge = i-lib.nJobs # 'index' of this merge job
203  curJobName = 'm'+str(nMerge)+'_'+theJobName
204  submission = 'bsub -J %s %s %s' % (curJobName,resources,scriptPath)
205  result = subprocess.check_output(submission, stderr=subprocess.STDOUT, shell=True)
206  print ' '+result
207  result = result.strip()
208 
209  # check if merge job was submitted and updating jobdatabase
210  match = re.search('Job <(\d+)> is submitted', result)
211  if match:
212  # need standard format for job number
213  lib.JOBSTATUS[i] = 'SUBTD'
214  lib.JOBID[i] = int(match.group(1))
215  ##lib.JOBID[i] = '%07d' % int(match.group(1))
216  print 'jobid is',lib.JOBID[i]
217  else:
218  print 'Submission of merge job seems to have failed:',result
219 
220  i +=1
221  # end of while on merge jobs
222 
223 
224 lib.write_db()
225 
226 
227