CMS 3D CMS Logo

List of all members | Public Member Functions | Public Attributes | Private Attributes
crabFunctions.CrabTask Class Reference

Class for a single CrabRequest e This class represents one crab3 task/request. More...

Public Member Functions

def __init__ (self, taskname="", crab_config="", crabController=None, initUpdate=True, debuglevel="ERROR", datasetpath="", localDir="", outlfn="")
 The object constructor. More...
 
def crab_folder (self)
 
def crabConfig (self)
 Function to access crab config object or read it if unititalized. More...
 
def crabFolder (self)
 
def datasetpath (self)
 
def handleNoState (self)
 Function to handle Task which received NOSTATE status. More...
 
def isData (self)
 Property function to find out if task runs on data. More...
 
def readLogArch (self, logArchName)
 Function to read log info from log.tar.gz. More...
 
def resubmit_failed (self)
 Function to resubmit failed jobs in tasks. More...
 
def test_print (self)
 
def update (self)
 Function to update Task in associated Jobs. More...
 
def updateJobStats (self, dCacheFileList=None)
 Function to update JobStatistics. More...
 

Public Attributes

 debug
 
 failureReason
 
 finalFiles
 
 isUpdating
 
 jobs
 
 lastUpdate
 
 localDir
 
 log
 
 maxjobnumber
 
 name
 
 nComplete
 
 nCooloff
 
 nFailed
 
 nFinished
 
 nIdle
 
 nJobs
 
 nRunning
 
 nTransferring
 
 nUnsubmitted
 
 outlfn
 
 resubmitCount
 
 state
 
 taskId
 
 totalEvents
 
 uuid
 

Private Attributes

 _crabConfig
 
 _crabFolder
 
 _datasetpath_default
 
 _isData
 

Detailed Description

Class for a single CrabRequest e This class represents one crab3 task/request.

Definition at line 372 of file crabFunctions.py.

Constructor & Destructor Documentation

◆ __init__()

def crabFunctions.CrabTask.__init__ (   self,
  taskname = "",
  crab_config = "",
  crabController = None,
  initUpdate = True,
  debuglevel = "ERROR",
  datasetpath = "",
  localDir = "",
  outlfn = "" 
)

The object constructor.

Parameters
selfThe object pointer.
tasknameThe object pointer.
initUpdateFlag if crab status should be called when an instance is created

Definition at line 387 of file crabFunctions.py.

387  outlfn = "" ,):
388 
389  # crab config as a python object should only be used via .config
390  self._crabConfig = None
391 
392  self._crabFolder = None
393 
394  if taskname:
395  self.name = taskname
396  else:
397  if not crab_config:
398  raise ValueError("Either taskname or crab_config needs to be set")
399  if not os.path.exists( crab_config):
400  raise IOError("File %s not found" % crab_config )
401  self.name = crab_config
402  self.name = self.crabConfig.General.requestName
403  self.uuid = uuid.uuid4()
404  #~ self.lock = multiprocessing.Lock()
405  #setup logging
406  self.log = logging.getLogger( 'crabTask' )
407  self.log.setLevel(logging._levelNames[ debuglevel ])
408  self.jobs = {}
409  self.localDir = localDir
410  self.outlfn = outlfn
411  self.isUpdating = False
412  self.taskId = -1
413  #variables for statistics
414  self.nJobs = 0
415  self.state = "NOSTATE"
416  self.maxjobnumber = 0
417  self.nUnsubmitted = 0
418  self.nIdle = 0
419  self.nRunning = 0
420  self.nTransferring = 0
421  self.nCooloff = 0
422  self.nFailed = 0
423  self.nFinished = 0
424  self.nComplete = 0
425  self.failureReason = None
426  self.lastUpdate = datetime.datetime.now().strftime( "%Y-%m-%d_%H.%M.%S" )
427 
428  self._isData = None
429  self.resubmitCount = 0
430 
431  self.debug = False
432 
433  self.finalFiles = []
434  self.totalEvents = 0
435 
436 
437  self._datasetpath_default = datasetpath
438 
439  #start with first updates
440  if initUpdate:
441  self.update()
442  self.updateJobStats()
443 

Member Function Documentation

◆ crab_folder()

def crabFunctions.CrabTask.crab_folder (   self)

Definition at line 507 of file crabFunctions.py.

References crabFunctions.CrabTask.crabConfig().

Referenced by crabFunctions.CrabTask.update().

507  def crab_folder(self):
508  return os.path.join( self.crabConfig.General.workArea,
509  "crab_" + self.crabConfig.General.requestName)

◆ crabConfig()

def crabFunctions.CrabTask.crabConfig (   self)

◆ crabFolder()

def crabFunctions.CrabTask.crabFolder (   self)

Definition at line 480 of file crabFunctions.py.

References crabFunctions.CrabTask._crabFolder, crabFunctions.CrabTask.crabConfig(), relativeConstraints.error, crabFunctions.CrabTask.log, AlignableObjectId::entry.name, preexistingValidation.PreexistingValidation.name, alignment.Alignment.name, XMLProcessor::_loaderBaseConfig.name, genericValidation.GenericValidation.name, h4DSegm.name, TrackerSectorStruct.name, MuonGeometrySanityCheckPoint.name, classes.MonitorData.name, classes.OutputData.name, h2DSegm.name, geometry.Structure.name, plotscripts.SawTeethFunction.name, crabFunctions.CrabTask.name, and hTMaxCell.name.

480  def crabFolder( self ):
481  if not self._crabFolder is None: return self._crabFolder
482  crab = CrabController()
483  if os.path.exists( os.path.join( self.crabConfig.General.workArea, crab._prepareFoldername( self.name ) ) ):
484  self._crabFolder = os.path.join( self.crabConfig.General.workArea, crab._prepareFoldername( self.name ) )
485  return self._crabFolder
486  alternative_path = os.path.join(os.path.cwd(), crab._prepareFoldername( self.name ) )
487  if os.path.exists( alternative_path ):
488  self._crabFolder = alternative_path
489  return self._crabFolder
490  self.log.error( "Unable to find folder for Task")
491  return ""
492 

◆ datasetpath()

def crabFunctions.CrabTask.datasetpath (   self)

Definition at line 472 of file crabFunctions.py.

References crabFunctions.CrabTask._datasetpath_default, and crabFunctions.CrabTask.crabConfig().

472  def datasetpath( self ):
473  try:
474  return self.crabConfig.Data.inputDataset
475  except:
476  pass
477  return self._datasetpath_default
478 

◆ handleNoState()

def crabFunctions.CrabTask.handleNoState (   self)

Function to handle Task which received NOSTATE status.

Parameters
selfCrabTask The object pointer.

Definition at line 542 of file crabFunctions.py.

References AlignableObjectId::entry.name, preexistingValidation.PreexistingValidation.name, alignment.Alignment.name, XMLProcessor::_loaderBaseConfig.name, genericValidation.GenericValidation.name, h4DSegm.name, TrackerSectorStruct.name, MuonGeometrySanityCheckPoint.name, classes.MonitorData.name, classes.OutputData.name, h2DSegm.name, geometry.Structure.name, plotscripts.SawTeethFunction.name, crabFunctions.CrabTask.name, hTMaxCell.name, crabFunctions.CrabTask.resubmitCount, CastorLedAnalysis.state, HcalLedAnalysis.state, CastorPedestalAnalysis.state, HcalPedestalAnalysis.state, and crabFunctions.CrabTask.state.

Referenced by crabFunctions.CrabTask.update().

542  def handleNoState( self ):
543  crab = CrabController()
544  if "The CRAB3 server backend could not resubmit your task because the Grid scheduler answered with an error." in task.failureReason:
545  # move folder and try it again
546  cmd = 'mv %s bak_%s' %(crab._prepareFoldername( self.name ),crab._prepareFoldername( self.name ))
547  p = subprocess.Popen(cmd,stdout=subprocess.PIPE, shell=True)#,shell=True,universal_newlines=True)
548  (out,err) = p.communicate()
549  self.state = "SHEDERR"
550  configName = '%s_cfg.py' %(crab._prepareFoldername( self.name ))
551  crab.submit( configName )
552 
553  elif task.failureReason is not None:
554  self.state = "ERRHANDLE"
555  crab.resubmit( self.name )
556  self.resubmitCount += 1
557 

◆ isData()

def crabFunctions.CrabTask.isData (   self)

Property function to find out if task runs on data.

Parameters
selfCrabTask The object pointer.

Definition at line 448 of file crabFunctions.py.

References crabFunctions.CrabTask._isData, crabFunctions.CrabTask.crabConfig(), AlignableObjectId::entry.name, preexistingValidation.PreexistingValidation.name, alignment.Alignment.name, XMLProcessor::_loaderBaseConfig.name, genericValidation.GenericValidation.name, h4DSegm.name, TrackerSectorStruct.name, MuonGeometrySanityCheckPoint.name, classes.MonitorData.name, classes.OutputData.name, h2DSegm.name, geometry.Structure.name, plotscripts.SawTeethFunction.name, crabFunctions.CrabTask.name, and hTMaxCell.name.

448  def isData( self ):
449  if self._isData is None:
450  try:
451  test = self.crabConfig.Data.lumiMask
452  self._isData = True
453  except:
454  if self.name.startswith( "Data_" ):
455  self._isData = True
456  else:
457  self._isData = False
458  return self._isData
459 
460 

◆ readLogArch()

def crabFunctions.CrabTask.readLogArch (   self,
  logArchName 
)

Function to read log info from log.tar.gz.

Parameters
selfThe object pointer.
logArchNamepath to the compressed log file
Returns
a dictionary with parsed info

Definition at line 599 of file crabFunctions.py.

References createfilelist.int, print(), and submitPVValidationJobs.split().

599  def readLogArch(self, logArchName):
600  JobNumber = logArchName.split("/")[-1].split("_")[1].split(".")[0]
601  log = {'readEvents' : 0}
602  with tarfile.open( logArchName, "r") as tar:
603  try:
604  JobXmlFile = tar.extractfile('FrameworkJobReport-%s.xml' % JobNumber)
605  root = ET.fromstring( JobXmlFile.read() )
606  for child in root:
607  if child.tag == 'InputFile':
608  for subchild in child:
609  if subchild.tag == 'EventsRead':
610  nEvents = int(subchild.text)
611  log.update({'readEvents' : nEvents})
612  break
613  break
614  except:
615  print("Can not parse / read %s" % logArchName)
616  return log
617 
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47

◆ resubmit_failed()

def crabFunctions.CrabTask.resubmit_failed (   self)

Function to resubmit failed jobs in tasks.

Parameters
selfCrabTask The object pointer.

Definition at line 496 of file crabFunctions.py.

References crabFunctions.CrabTask.jobs, relativeConstraints.keys, crabFunctions.CrabTask.lastUpdate, AlignableObjectId::entry.name, preexistingValidation.PreexistingValidation.name, alignment.Alignment.name, XMLProcessor::_loaderBaseConfig.name, genericValidation.GenericValidation.name, h4DSegm.name, TrackerSectorStruct.name, MuonGeometrySanityCheckPoint.name, classes.MonitorData.name, classes.OutputData.name, h2DSegm.name, geometry.Structure.name, plotscripts.SawTeethFunction.name, crabFunctions.CrabTask.name, and hTMaxCell.name.

496  def resubmit_failed( self ):
497  failedJobIds = []
498  controller = CrabController()
499  for jobkey in self.jobs.keys():
500  job = self.jobs[jobkey]
501  if job['State'] == 'failed':
502  failedJobIds.append( job['JobIds'][-1] )
503  controller.resubmit( self.name, joblist = failedJobIds )
504  self.lastUpdate = datetime.datetime.now().strftime( "%Y-%m-%d_%H.%M.%S" )
505 

◆ test_print()

def crabFunctions.CrabTask.test_print (   self)

Definition at line 558 of file crabFunctions.py.

References crabFunctions.CrabTask.uuid.

558  def test_print(self):
559  return self.uuid

◆ update()

def crabFunctions.CrabTask.update (   self)

Function to update Task in associated Jobs.

Parameters
selfCrabTask The object pointer.

Definition at line 513 of file crabFunctions.py.

References crabFunctions.CrabTask.crab_folder(), crabFunctions.CrabTask.debug, crabFunctions.CrabTask.failureReason, crabFunctions.CrabTask.handleNoState(), crabFunctions.CrabTask.isUpdating, crabFunctions.CrabTask.jobs, crabFunctions.CrabTask.lastUpdate, crabFunctions.CrabTask.log, AlignableObjectId::entry.name, preexistingValidation.PreexistingValidation.name, alignment.Alignment.name, XMLProcessor::_loaderBaseConfig.name, genericValidation.GenericValidation.name, h4DSegm.name, TrackerSectorStruct.name, MuonGeometrySanityCheckPoint.name, classes.MonitorData.name, classes.OutputData.name, h2DSegm.name, geometry.Structure.name, plotscripts.SawTeethFunction.name, crabFunctions.CrabTask.name, hTMaxCell.name, Mpslibclass.jobdatabase.nJobs, crabFunctions.CrabTask.nJobs, crabFunctions.CrabTask.resubmitCount, CosmicNavigationSchool::CosmicNavigationSchoolConfiguration.self, DDLSAX2FileHandler.self(), CastorLedAnalysis.state, HcalLedAnalysis.state, HcalPedestalAnalysis.state, CastorPedestalAnalysis.state, crabFunctions.CrabTask.state, and crabFunctions.CrabTask.updateJobStats().

Referenced by progressbar.ProgressBar.__next__(), MatrixUtil.Matrix.__setitem__(), MatrixUtil.Steps.__setitem__(), dqm-mbProfile.Profile.finish(), progressbar.ProgressBar.finish(), and MatrixUtil.Steps.overwrite().

513  def update(self):
514  #~ self.lock.acquire()
515  self.log.debug( "Start update for task %s" % self.name )
516  self.isUpdating = True
517  controller = CrabController()
518  self.state = "UPDATING"
519  # check if we should drop this sample due to missing info
520 
521  self.log.debug( "Try to get status for task" )
522  self.state , self.jobs,self.failureReason = controller.status(self.crab_folder)
523  self.log.debug( "Found state: %s" % self.state )
524  if self.state=="FAILED":
525  #try it once more
526  time.sleep(2)
527  self.state , self.jobs,self.failureReason = controller.status(self.crab_folder)
528  self.nJobs = len(self.jobs)
529  self.updateJobStats()
530  if self.state == "NOSTATE":
531  self.log.debug( "Trying to resubmit because of NOSTATE" )
532  if self.resubmitCount < 3: self.self.handleNoState()
533  # add to db if not
534  # Final solution inf state not yet found
535  self.isUpdating = False
536  self.lastUpdate = datetime.datetime.now().strftime( "%Y-%m-%d_%H.%M.%S" )
537  #~ self.lock.release()
538 
#define debug
Definition: HDRShower.cc:19
#define update(a, b)

◆ updateJobStats()

def crabFunctions.CrabTask.updateJobStats (   self,
  dCacheFileList = None 
)

Function to update JobStatistics.

Parameters
selfThe object pointer.
dCacheFilelistA list of files on the dCache

Definition at line 564 of file crabFunctions.py.

References any(), createfilelist.int, crabFunctions.CrabTask.jobs, relativeConstraints.keys, AlignableObjectId::entry.name, preexistingValidation.PreexistingValidation.name, alignment.Alignment.name, XMLProcessor::_loaderBaseConfig.name, genericValidation.GenericValidation.name, h4DSegm.name, TrackerSectorStruct.name, MuonGeometrySanityCheckPoint.name, classes.MonitorData.name, classes.OutputData.name, h2DSegm.name, geometry.Structure.name, plotscripts.SawTeethFunction.name, crabFunctions.CrabTask.name, hTMaxCell.name, crabFunctions.CrabTask.nComplete, and print().

Referenced by crabFunctions.CrabTask.update().

564  def updateJobStats(self,dCacheFileList = None):
565  jobKeys = sorted(self.jobs.keys())
566  try:
567  intJobkeys = [int(x) for x in jobKeys]
568  except:
569  print("error parsing job numers to int")
570 
571  #maxjobnumber = max(intJobkeys)
572 
573  stateDict = {'unsubmitted':0,'idle':0,'running':0,'transferring':0,'cooloff':0,'failed':0,'finished':0}
574  nComplete = 0
575 
576  # loop through jobs
577  for key in jobKeys:
578  job = self.jobs[key]
579  #check if all completed files are on decache
580  for statekey in stateDict.keys():
581  if statekey in job['State']:
582  stateDict[statekey]+=1
583  # check if finished fails are found on dCache if dCacheFilelist is given
584  if dCacheFileList is not None:
585  outputFilename = "%s_%s"%( self.name, key)
586  if 'finished' in statekey and any(outputFilename in s for s in dCacheFileList):
587  nComplete +=1
588 
589  for state in stateDict:
590  attrname = "n" + state.capitalize()
591  setattr(self, attrname, stateDict[state])
592  self.nComplete = nComplete
593 
bool any(const std::vector< T > &v, const T &what)
Definition: ECalSD.cc:37
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47

Member Data Documentation

◆ _crabConfig

crabFunctions.CrabTask._crabConfig
private

Definition at line 390 of file crabFunctions.py.

Referenced by crabFunctions.CrabTask.crabConfig().

◆ _crabFolder

crabFunctions.CrabTask._crabFolder
private

Definition at line 392 of file crabFunctions.py.

Referenced by crabFunctions.CrabTask.crabFolder().

◆ _datasetpath_default

crabFunctions.CrabTask._datasetpath_default
private

Definition at line 437 of file crabFunctions.py.

Referenced by crabFunctions.CrabTask.datasetpath().

◆ _isData

crabFunctions.CrabTask._isData
private

Definition at line 428 of file crabFunctions.py.

Referenced by crabFunctions.CrabTask.isData().

◆ debug

crabFunctions.CrabTask.debug

◆ failureReason

crabFunctions.CrabTask.failureReason

Definition at line 425 of file crabFunctions.py.

Referenced by crabFunctions.CrabTask.update().

◆ finalFiles

crabFunctions.CrabTask.finalFiles

Definition at line 433 of file crabFunctions.py.

◆ isUpdating

crabFunctions.CrabTask.isUpdating

Definition at line 411 of file crabFunctions.py.

Referenced by crabFunctions.CrabTask.update().

◆ jobs

crabFunctions.CrabTask.jobs

◆ lastUpdate

crabFunctions.CrabTask.lastUpdate

◆ localDir

crabFunctions.CrabTask.localDir

Definition at line 409 of file crabFunctions.py.

◆ log

crabFunctions.CrabTask.log

◆ maxjobnumber

crabFunctions.CrabTask.maxjobnumber

Definition at line 416 of file crabFunctions.py.

◆ name

crabFunctions.CrabTask.name

Definition at line 395 of file crabFunctions.py.

Referenced by ElectronMVAID.ElectronMVAID.__call__(), FWLite.ElectronMVAID.__call__(), dirstructure.Directory.__create_pie_image(), DisplayManager.DisplayManager.__del__(), dqm_interfaces.DirID.__eq__(), BeautifulSoup.Tag.__eq__(), dirstructure.Directory.__get_full_path(), dirstructure.Comparison.__get_img_name(), dataset.Dataset.__getDataType(), dataset.Dataset.__getFileInfoList(), dirstructure.Comparison.__make_image(), core.autovars.NTupleVariable.__repr__(), core.autovars.NTupleObjectType.__repr__(), core.autovars.NTupleObject.__repr__(), core.autovars.NTupleCollection.__repr__(), dirstructure.Directory.__repr__(), dqm_interfaces.DirID.__repr__(), dirstructure.Comparison.__repr__(), config.Service.__setattr__(), config.CFG.__str__(), counter.Counter.__str__(), average.Average.__str__(), BeautifulSoup.Tag.__str__(), BeautifulSoup.SoupStrainer.__str__(), FWLite.WorkingPoints._reformat_cut_definitions(), core.autovars.NTupleObjectType.addSubObjects(), core.autovars.NTupleObjectType.addVariables(), core.autovars.NTupleObjectType.allVars(), dataset.CMSDataset.buildListOfFiles(), dataset.LocalDataset.buildListOfFiles(), dataset.CMSDataset.buildListOfFilesDBS(), dirstructure.Directory.calcStats(), crabFunctions.CrabTask.crabConfig(), crabFunctions.CrabTask.crabFolder(), validation.Sample.digest(), python.rootplot.utilities.Hist.divide(), python.rootplot.utilities.Hist.divide_wilson(), DisplayManager.DisplayManager.Draw(), TreeCrawler.Package.dump(), core.autovars.NTupleVariable.fillBranch(), core.autovars.NTupleObject.fillBranches(), core.autovars.NTupleCollection.fillBranchesScalar(), core.autovars.NTupleCollection.fillBranchesVector(), core.autovars.NTupleCollection.get_cpp_declaration(), core.autovars.NTupleCollection.get_cpp_wrapper_class(), core.autovars.NTupleCollection.get_py_wrapper_class(), utils.StatisticalTest.get_status(), production_tasks.Task.getname(), dataset.CMSDataset.getPrimaryDatasetEntries(), dataset.PrivateDataset.getPrimaryDatasetEntries(), crabFunctions.CrabTask.handleNoState(), VIDSelectorBase.VIDSelectorBase.initialize(), crabFunctions.CrabTask.isData(), personalPlayback.Applet.log(), core.autovars.NTupleVariable.makeBranch(), core.autovars.NTupleObject.makeBranches(), core.autovars.NTupleCollection.makeBranchesScalar(), core.autovars.NTupleCollection.makeBranchesVector(), dirstructure.Directory.print_report(), dataset.BaseDataset.printInfo(), dataset.Dataset.printInfo(), crabFunctions.CrabTask.resubmit_failed(), production_tasks.MonitorJobs.run(), BeautifulSoup.SoupStrainer.searchTag(), python.rootplot.utilities.Hist.TGraph(), python.rootplot.utilities.Hist.TH1F(), crabFunctions.CrabTask.update(), crabFunctions.CrabTask.updateJobStats(), counter.Counter.write(), and average.Average.write().

◆ nComplete

crabFunctions.CrabTask.nComplete

Definition at line 424 of file crabFunctions.py.

Referenced by crabFunctions.CrabTask.updateJobStats().

◆ nCooloff

crabFunctions.CrabTask.nCooloff

Definition at line 421 of file crabFunctions.py.

◆ nFailed

crabFunctions.CrabTask.nFailed

Definition at line 422 of file crabFunctions.py.

◆ nFinished

crabFunctions.CrabTask.nFinished

Definition at line 423 of file crabFunctions.py.

◆ nIdle

crabFunctions.CrabTask.nIdle

Definition at line 418 of file crabFunctions.py.

◆ nJobs

crabFunctions.CrabTask.nJobs

Definition at line 414 of file crabFunctions.py.

Referenced by crabFunctions.CrabTask.update().

◆ nRunning

crabFunctions.CrabTask.nRunning

Definition at line 419 of file crabFunctions.py.

◆ nTransferring

crabFunctions.CrabTask.nTransferring

Definition at line 420 of file crabFunctions.py.

◆ nUnsubmitted

crabFunctions.CrabTask.nUnsubmitted

Definition at line 417 of file crabFunctions.py.

◆ outlfn

crabFunctions.CrabTask.outlfn

Definition at line 410 of file crabFunctions.py.

◆ resubmitCount

crabFunctions.CrabTask.resubmitCount

◆ state

crabFunctions.CrabTask.state

◆ taskId

crabFunctions.CrabTask.taskId

Definition at line 412 of file crabFunctions.py.

◆ totalEvents

crabFunctions.CrabTask.totalEvents

Definition at line 434 of file crabFunctions.py.

◆ uuid

crabFunctions.CrabTask.uuid

Definition at line 403 of file crabFunctions.py.

Referenced by crabFunctions.CrabTask.test_print().