CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
List of all members | Public Member Functions | Public Attributes
batchmanager.BatchManager Class Reference

Public Member Functions

def __init__
 
def CheckBatchScript
 
def DefineOptions
 
def ManageOutputDir
 
def mkdir
 
def ParseOptions
 
def PrepareJob
 
def PrepareJobs
 
def PrepareJobUser
 
def RunningMode
 
def SubmitJob
 
def SubmitJobs
 

Public Attributes

 listOfJobs_
 
 outputDir_
 
 parser_
 
 remoteOutputDir_
 
 remoteOutputFile_
 

Detailed Description

This class manages batch jobs
Used in batch scripts
Colin Bernet 2008

Definition at line 13 of file batchmanager.py.

Constructor & Destructor Documentation

def batchmanager.BatchManager.__init__ (   self)

Definition at line 23 of file batchmanager.py.

References batchmanager.BatchManager.DefineOptions().

23 
24  def __init__(self):
25  self.DefineOptions()
26 

Member Function Documentation

def batchmanager.BatchManager.CheckBatchScript (   self,
  batchScript 
)

Definition at line 232 of file batchmanager.py.

References if().

233  def CheckBatchScript( self, batchScript ):
234 
235  if batchScript == '':
236  return
237 
238  if( os.path.isfile(batchScript)== False ):
239  print 'file ',batchScript,' does not exist'
240  sys.exit(3)
241 
242  try:
243  ifile = open(batchScript)
244  except:
245  print 'cannot open input %s' % batchScript
246  sys.exit(3)
247  else:
248  for line in ifile:
249  p = re.compile("\s*cp.*\$jobdir\s+(\S+)$");
250  m=p.match(line)
251  if m:
252  if os.path.isdir( os.path.expandvars(m.group(1)) ):
253  print 'output directory ', m.group(1), 'already exists!'
254  print 'exiting'
255  sys.exit(2)
256  else:
257  if self.options_.negate==False:
258  os.mkdir( os.path.expandvars(m.group(1)) )
259  else:
260  print 'not making dir', self.options_.negate
if(conf.exists("allCellsPositionCalc"))
def batchmanager.BatchManager.DefineOptions (   self)

Definition at line 27 of file batchmanager.py.

Referenced by batchmanager.BatchManager.__init__().

27 
28  def DefineOptions(self):
29  # define options and arguments ====================================
30  # how to add more doc to the help?
31  self.parser_ = OptionParser()
32  self.parser_.add_option("-o", "--output-dir", dest="outputDir",
33  help="Name of the local output directory for your jobs. This directory will be created automatically.",
34  default=None)
35  self.parser_.add_option("-r", "--remote-copy", dest="remoteCopy",
36  help="remote output directory for your jobs. Example: /store/cmst3/user/cbern/CMG/HT/Run2011A-PromptReco-v1/AOD/PAT_CMG/RA2. This directory *must* be provided as a logical file name (LFN). When this option is used, all root files produced by a job are copied to the remote directory, and the job index is appended to the root file name. The Logger directory is tarred and compressed into Logger.tgz, and sent to the remote output directory as well. Afterwards, use logger.py to access the information contained in Logger.tgz. For remote copy to PSI specify path like: '/pnfs/psi.ch/...'. Logs will be sent back to the submision directory. NOTE: so far this option has been implemented and validated to work only for a remote copy to PSI",
37  default=None)
38  self.parser_.add_option("-f", "--force", action="store_true",
39  dest="force", default=False,
40  help="Don't ask any questions, just over-write")
41  # this opt can be removed
42  self.parser_.add_option("-n", "--negate", action="store_true",
43  dest="negate", default=False,
44  help="create jobs, but does not submit the jobs.")
45  self.parser_.add_option("-b", "--batch", dest="batch",
46  help="batch command. default is: 'bsub -q 8nh < batchScript.sh'. You can also use 'nohup < ./batchScript.sh &' to run locally.",
47  default="bsub -q 8nh < ./batchScript.sh")
48  self.parser_.add_option("-p", "--parametric", action="store_true",
49  dest="parametric", default=False,
50  help="submit jobs parametrically, implemented for IC so far")
51 
def batchmanager.BatchManager.ManageOutputDir (   self)

Definition at line 124 of file batchmanager.py.

125  def ManageOutputDir( self ):
126 
127  #if the output dir is not specified, generate a name
128  #else
129  #test if the directory exists
130  #if yes, returns
131 
132  outputDir = self.options_.outputDir
133 
134  if outputDir==None:
135  today = datetime.today()
136  outputDir = 'OutCmsBatch_%s' % today.strftime("%d%h%y_%H%M%S")
137  print 'output directory not specified, using %s' % outputDir
138 
139  self.outputDir_ = os.path.abspath(outputDir)
140 
141  if( os.path.isdir(self.outputDir_) == True ):
142  input = ''
143  if not self.options_.force:
144  while input != 'y' and input != 'n':
145  input = raw_input( 'The directory ' + self.outputDir_ + ' exists. Are you sure you want to continue? its contents will be overwritten [y/n]' )
146  if input == 'n':
147  sys.exit(1)
148  else:
149  os.system( 'rm -rf ' + self.outputDir_)
150 
151  self.mkdir( self.outputDir_ )
152 
if(conf.exists("allCellsPositionCalc"))
def batchmanager.BatchManager.mkdir (   self,
  dirname 
)

Definition at line 262 of file batchmanager.py.

References if().

Referenced by batchmanager.BatchManager.PrepareJob().

263  def mkdir( self, dirname ):
264  # there is probably a command for this in python
265  mkdir = 'mkdir -p %s' % dirname
266  ret = os.system( mkdir )
267  if( ret != 0 ):
268  print 'please remove or rename directory: ', dirname
269  sys.exit(4)
270 
if(conf.exists("allCellsPositionCalc"))
def batchmanager.BatchManager.ParseOptions (   self)

Definition at line 52 of file batchmanager.py.

References reco::parser::MethodInvoker.args_, reco::parser::ExpressionQuaterOperator< Op >.args_, ExternalLHEProducer.args_, Json::Path.args_, pftools::CalibCompare.options_, and pftools::Exercises3.options_.

52 
53  def ParseOptions(self):
54  (self.options_,self.args_) = self.parser_.parse_args()
55  if self.options_.remoteCopy == None:
56  self.remoteOutputDir_ = ""
57  else:
58  # removing possible trailing slash
59  import CMGTools.Production.eostools as castortools
60  self.remoteOutputDir_ = self.options_.remoteCopy.rstrip('/')
61 
62  if "psi.ch" in self.remoteOutputDir_: # T3 @ PSI:
63  # overwriting protection to be improved
64  if self.remoteOutputDir_.startswith("/pnfs/psi.ch"):
65  ld_lib_path = os.environ.get('LD_LIBRARY_PATH')
66  if ld_lib_path != "None":
67  os.environ['LD_LIBRARY_PATH'] = "/usr/lib64/:"+ld_lib_path # to solve gfal conflict with CMSSW
68  os.system("gfal-mkdir srm://t3se01.psi.ch/"+self.remoteOutputDir_)
69  outputDir = self.options_.outputDir.rstrip("/").split("/")[-1] # to for instance direct output to /afs/cern.ch/work/u/user/outputDir
70  if outputDir==None:
71  today = datetime.today()
72  outputDir = 'OutCmsBatch_%s' % today.strftime("%d%h%y_%H%M")
73  self.remoteOutputDir_+="/"+outputDir
74  os.system("gfal-mkdir srm://t3se01.psi.ch/"+self.remoteOutputDir_)
75  if ld_lib_path != "None":
76  os.environ['LD_LIBRARY_PATH'] = ld_lib_path # back to original to avoid conflicts
77  else:
78  print "remote directory must start with /pnfs/psi.ch to send to the tier3 at PSI"
79  print self.remoteOutputDir_, "not valid"
80  sys.exit(1)
81  else: # assume EOS
82  if not castortools.isLFN( self.remoteOutputDir_ ):
83  print 'When providing an output directory, you must give its LFN, starting by /store. You gave:'
84  print self.remoteOutputDir_
85  sys.exit(1)
86  self.remoteOutputDir_ = castortools.lfnToEOS( self.remoteOutputDir_ )
87  dirExist = castortools.isDirectory( self.remoteOutputDir_ )
88  # nsls = 'nsls %s > /dev/null' % self.remoteOutputDir_
89  # dirExist = os.system( nsls )
90  if dirExist is False:
91  print 'creating ', self.remoteOutputDir_
92  if castortools.isEOSFile( self.remoteOutputDir_ ):
93  # the output directory is currently a file..
94  # need to remove it.
95  castortools.rm( self.remoteOutputDir_ )
96  castortools.createEOSDir( self.remoteOutputDir_ )
97  else:
98  # directory exists.
99  if self.options_.negate is False and self.options_.force is False:
100  #COLIN need to reimplement protectedRemove in eostools
101  raise ValueError( ' '.join(['directory ', self.remoteOutputDir_, ' already exists.']))
102  # if not castortools.protectedRemove( self.remoteOutputDir_, '.*root'):
103  # the user does not want to delete the root files
104  self.remoteOutputFile_ = ""
105  self.ManageOutputDir()
106  return (self.options_, self.args_)
107 
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
double split
Definition: MVATrainer.cc:139
def batchmanager.BatchManager.PrepareJob (   self,
  value,
  dirname = None 
)
Prepare a job for a given value.

calls PrepareJobUser, which should be overloaded by the user.

Definition at line 153 of file batchmanager.py.

References join(), TFileService.mkdir(), TFileDirectory.mkdir(), batchmanager.BatchManager.mkdir(), L1GtVhdlWriter.outputDir_, batchmanager.BatchManager.outputDir_, L1GtVhdlWriterCore.outputDir_, LaserSorter.outputDir_, and batchmanager.BatchManager.PrepareJobUser().

154  def PrepareJob( self, value, dirname=None):
155  '''Prepare a job for a given value.
156 
157  calls PrepareJobUser, which should be overloaded by the user.
158  '''
159  print 'PrepareJob : %s' % value
160  dname = dirname
161  if dname is None:
162  dname = 'Job_{value}'.format( value=value )
163  jobDir = '/'.join( [self.outputDir_, dname])
164  print '\t',jobDir
165  self.mkdir( jobDir )
166  self.listOfJobs_.append( jobDir )
167  self.PrepareJobUser( jobDir, value )
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def batchmanager.BatchManager.PrepareJobs (   self,
  listOfValues,
  listOfDirNames = None 
)

Definition at line 108 of file batchmanager.py.

109  def PrepareJobs(self, listOfValues, listOfDirNames=None):
110  print 'PREPARING JOBS ======== '
111  self.listOfJobs_ = []
112 
113  if listOfDirNames is None:
114  for value in listOfValues:
115  self.PrepareJob( value )
116  else:
117  for value, name in zip( listOfValues, listOfDirNames):
118  self.PrepareJob( value, name )
119  print "list of jobs:"
120  pp = pprint.PrettyPrinter(indent=4)
121  pp.pprint( self.listOfJobs_)
122 
def batchmanager.BatchManager.PrepareJobUser (   self,
  value 
)
Hook allowing user to define how one of his jobs should be prepared.

Definition at line 168 of file batchmanager.py.

Referenced by batchmanager.BatchManager.PrepareJob().

169  def PrepareJobUser(self, value ):
170  '''Hook allowing user to define how one of his jobs should be prepared.'''
171  print '\to be customized'
172 
def batchmanager.BatchManager.RunningMode (   self,
  batch 
)
Returns "LXPLUS", "PSI", "LOCAL", or None,

"LXPLUS" : batch command is bsub, and logged on lxplus
"PSI"    : batch command is qsub, and logged to t3uiXX
"IC"     : batch command is qsub, and logged to hep.ph.ic.ac.uk
"LOCAL"  : batch command is nohup.
In all other cases, a CmsBatchException is raised

Definition at line 271 of file batchmanager.py.

Referenced by heppy_batch.MyBatchManager.PrepareJobUser(), and batchmanager.BatchManager.SubmitJobs().

272  def RunningMode(self, batch):
273  '''Returns "LXPLUS", "PSI", "LOCAL", or None,
274 
275  "LXPLUS" : batch command is bsub, and logged on lxplus
276  "PSI" : batch command is qsub, and logged to t3uiXX
277  "IC" : batch command is qsub, and logged to hep.ph.ic.ac.uk
278  "LOCAL" : batch command is nohup.
279  In all other cases, a CmsBatchException is raised
280  '''
281 
282  hostName = os.environ['HOSTNAME']
283  onLxplus = hostName.startswith('lxplus')
284  onPSI = hostName.startswith('t3ui' )
285  onPISA = re.match('.*gridui.*',hostName) or re.match('.*faiwn.*',hostName)
286  onPADOVA = ( hostName.startswith('t2-ui') and re.match('.*pd.infn.*',hostName) ) or ( hostName.startswith('t2-cld') and re.match('.*lnl.infn.*',hostName) )
287  onIC = 'hep.ph.ic.ac.uk' in hostName
288  batchCmd = batch.split()[0]
289 
290  if batchCmd == 'bsub':
291  if not (onLxplus or onPISA or onPADOVA) :
292  err = 'Cannot run %s on %s' % (batchCmd, hostName)
293  raise ValueError( err )
294  elif onPISA :
295  print 'running on LSF pisa : %s from %s' % (batchCmd, hostName)
296  return 'PISA'
297  elif onPADOVA:
298  print 'running on LSF padova: %s from %s' % (batchCmd, hostName)
299  return 'PADOVA'
300  else:
301  print 'running on LSF lxplus: %s from %s' % (batchCmd, hostName)
302  return 'LXPLUS'
303  elif batchCmd == "qsub":
304  #if not onPSI:
305  # err = 'Cannot run %s on %s' % (batchCmd, hostName)
306  # raise ValueError( err )
307 
308  if onIC:
309  print 'running on IC : %s from %s' % (batchCmd, hostName)
310  return 'IC'
311 
312  else:
313  if onPSI:
314  print 'running on SGE : %s from %s' % (batchCmd, hostName)
315  return 'PSI'
316 
317  elif batchCmd == 'nohup' or batchCmd == './batchScript.sh':
318  print 'running locally : %s on %s' % (batchCmd, hostName)
319  return 'LOCAL'
320  else:
321  err = 'unknown batch command: X%sX' % batchCmd
322  raise ValueError( err )
def batchmanager.BatchManager.SubmitJob (   self,
  jobDir 
)
Hook for job submission.

Definition at line 226 of file batchmanager.py.

Referenced by batchmanager.BatchManager.SubmitJobs().

227  def SubmitJob( self, jobDir ):
228  '''Hook for job submission.'''
229  print 'submitting (to be customized): ', jobDir
230  os.system( self.options_.batch )
231 
def batchmanager.BatchManager.SubmitJobs (   self,
  waitingTimeInSec = 0 
)
Submit all jobs. Possibly wait between each job

Definition at line 173 of file batchmanager.py.

References if(), batchmanager.BatchManager.listOfJobs_, L1GtVhdlWriter.outputDir_, batchmanager.BatchManager.outputDir_, L1GtVhdlWriterCore.outputDir_, LaserSorter.outputDir_, batchmanager.BatchManager.RunningMode(), and batchmanager.BatchManager.SubmitJob().

174  def SubmitJobs( self, waitingTimeInSec=0 ):
175  '''Submit all jobs. Possibly wait between each job'''
176 
177  if(self.options_.negate):
178  print '*NOT* SUBMITTING JOBS - exit '
179  return
180  print 'SUBMITTING JOBS ======== '
181 
182  mode = self.RunningMode(self.options_.batch)
183 
184  # If at IC write all the job directories to a file then submit a parameteric
185  # job that depends on the file number. This is required to circumvent the 2000
186  # individual job limit at IC
187  if mode=="IC" and self.options_.parametric:
188 
189  jobDirsFile = os.path.join(self.outputDir_,"jobDirectories.txt")
190  with open(jobDirsFile, 'w') as f:
191  for jobDir in self.listOfJobs_:
192  print>>f,jobDir
193 
194  readLine = "readarray JOBDIR < "+jobDirsFile+"\n"
195 
196  submitScript = os.path.join(self.outputDir_,"parametricSubmit.sh")
197  with open(submitScript,'w') as batchScript:
198  batchScript.write("#!/bin/bash\n")
199  batchScript.write("#$ -e /dev/null -o /dev/null \n")
200  batchScript.write("cd "+self.outputDir_+"\n")
201  batchScript.write(readLine)
202  batchScript.write("cd ${JOBDIR[${SGE_TASK_ID}-1]}\n")
203  batchScript.write( "./batchScript.sh > BATCH_outputLog.txt 2> BATCH_errorLog.txt" )
204 
205  #Find the queue
206  splitBatchOptions = self.options_.batch.split()
207  if '-q' in splitBatchOptions: queue = splitBatchOptions[splitBatchOptions.index('-q')+1]
208  else: queue = "hepshort.q"
209 
210  os.system("qsub -q "+queue+" -t 1-"+str(len(self.listOfJobs_))+" "+submitScript)
211 
212  else:
213  #continue as before, submitting one job per directory
214 
215  for jobDir in self.listOfJobs_:
216  root = os.getcwd()
217  # run it
218  print 'processing ', jobDir
219  os.chdir( jobDir )
220  self.SubmitJob( jobDir )
221  # and come back
222  os.chdir(root)
223  print 'waiting %s seconds...' % waitingTimeInSec
224  time.sleep( waitingTimeInSec )
225  print 'done.'
if(conf.exists("allCellsPositionCalc"))

Member Data Documentation

batchmanager.BatchManager.listOfJobs_

Definition at line 110 of file batchmanager.py.

Referenced by batchmanager.BatchManager.SubmitJobs().

batchmanager.BatchManager.outputDir_

Definition at line 138 of file batchmanager.py.

Referenced by batchmanager.BatchManager.PrepareJob(), valtools.webpage.readCaptions(), and batchmanager.BatchManager.SubmitJobs().

batchmanager.BatchManager.parser_

Definition at line 30 of file batchmanager.py.

batchmanager.BatchManager.remoteOutputDir_

Definition at line 55 of file batchmanager.py.

batchmanager.BatchManager.remoteOutputFile_

Definition at line 103 of file batchmanager.py.