CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
List of all members | Public Member Functions | Static Public Member Functions | Public Attributes | Static Public Attributes | Private Member Functions | Private Attributes | Static Private Attributes
dataset.Dataset Class Reference
Inheritance diagram for dataset.Dataset:
dataset.BaseDataset

Public Member Functions

def __init__
 
def __init__
 
def buildListOfBadFiles
 
def buildListOfFiles
 
def convertTimeToRun
 
def createdatasetfile_hippy
 
def datasetSnippet
 
def dataType
 
def dump_cff
 
def extractFileSizes
 
def fileInfoList
 
def fileList
 
def forcerunrange
 
def getForceRunRangeFunction
 
def getPrimaryDatasetEntries
 
def magneticField
 
def magneticFieldForRun
 
def name
 
def parentDataset
 
def predefined
 
def printInfo
 
def runList
 
- Public Member Functions inherited from dataset.BaseDataset
def __init__
 def init(self, name, user, pattern='. More...
 
def buildListOfBadFiles
 
def buildListOfFiles
 
def extractFileSizes
 
def getPrimaryDatasetEntries
 
def listOfFiles
 
def listOfGoodFiles
 
def listOfGoodFilesWithPrescale
 
def printFiles
 
def printInfo
 

Static Public Member Functions

def getrunnumberfromfilename
 

Public Attributes

 bad_files
 
 castorDir
 
 files
 
 filesAndSizes
 
 good_files
 
 lfnDir
 
 maskExists
 
 report
 
- Public Attributes inherited from dataset.BaseDataset
 bad_files
 
 dbsInstance
 MM. More...
 
 files
 
 filesAndSizes
 
 good_files
 
 name
 
 pattern
 
 primaryDatasetEntries
 MM. More...
 
 report
 
 run_range
 
 user
 

Static Public Attributes

tuple dasData = das_client.get_data(dasQuery, dasLimit)
 
tuple error = self.__findInJson(jsondict,["data","error"])
 
 error = None
 
int i = 0
 
tuple jsondict = json.loads( dasData )
 
 jsondict = dasData
 
string jsonfile = "das_query_output_%i.txt"
 
 jsonfile = jsonfile%i
 
tuple jsonstr = self.__findInJson(jsondict,"reason")
 
string msg = "The DAS query returned an error. The output is very long, and has been stored in:\n"
 
tuple theFile = open( jsonfile, "w" )
 

Private Member Functions

def __chunks
 
def __createSnippet
 
def __dateString
 
def __datetime
 
def __fileListSnippet
 
def __find_ge
 
def __find_lt
 
def __findInJson
 
def __getData
 
def __getDataType
 
def __getFileInfoList
 
def __getMagneticField
 
def __getMagneticFieldForRun
 
def __getParentDataset
 
def __getRunList
 
def __lumiSelectionSnippet
 

Private Attributes

 __alreadyStored
 
 __cmssw
 
 __cmsswrelease
 
 __dasLimit
 
 __dataType
 
 __fileInfoList
 
 __fileList
 
 __filename
 
 __firstusedrun
 
 __lastusedrun
 
 __magneticField
 
 __name
 
 __official
 
 __origName
 
 __parentDataset
 
 __parentFileInfoList
 
 __parentFileList
 
 __predefined
 
 __runList
 

Static Private Attributes

tuple __dummy_source_template
 

Detailed Description

Definition at line 14 of file dataset.py.

Constructor & Destructor Documentation

def dataset.Dataset.__init__ (   self,
  datasetName,
  dasLimit = 0,
  tryPredefinedFirst = True,
  cmssw = os.environ["CMSSW_BASE"],
  cmsswrelease = os.environ["CMSSW_RELEASE_BASE"] 
)

Definition at line 16 of file dataset.py.

Referenced by dataset.Dataset.__init__().

16 
17  cmssw = os.environ["CMSSW_BASE"], cmsswrelease = os.environ["CMSSW_RELEASE_BASE"]):
18  self.__name = datasetName
19  self.__origName = datasetName
20  self.__dasLimit = dasLimit
21  self.__fileList = None
22  self.__fileInfoList = None
23  self.__runList = None
24  self.__alreadyStored = False
25  self.__cmssw = cmssw
26  self.__cmsswrelease = cmsswrelease
27  self.__firstusedrun = None
28  self.__lastusedrun = None
29  self.__parentDataset = None
30  self.__parentFileList = None
31  self.__parentFileInfoList = None
32 
33  # check, if dataset name matches CMS dataset naming scheme
34  if re.match( r'/.+/.+/.+', self.__name ):
35  self.__official = True
36  fileName = "Dataset" + self.__name.replace("/","_") + "_cff.py"
37  else:
38  self.__official = False
39  fileName = self.__name + "_cff.py"
40 
41  searchPath1 = os.path.join( self.__cmssw, "python",
42  "Alignment", "OfflineValidation",
43  fileName )
44  searchPath2 = os.path.join( self.__cmssw, "src",
45  "Alignment", "OfflineValidation",
46  "python", fileName )
47  searchPath3 = os.path.join( self.__cmsswrelease,
48  "python", "Alignment",
49  "OfflineValidation", fileName )
50  if self.__official and not tryPredefinedFirst:
51  self.__predefined = False
52  elif os.path.exists( searchPath1 ):
53  self.__predefined = True
54  self.__filename = searchPath1
55  elif os.path.exists( searchPath2 ):
56  msg = ("The predefined dataset '%s' does exist in '%s', but "
57  "you need to run 'scram b' first."
58  %( self.__name, searchPath2 ))
59  if self.__official:
60  print msg
61  print "Getting the data from DAS again. To go faster next time, run scram b."
62  else:
63  raise AllInOneError( msg )
64  elif os.path.exists( searchPath3 ):
65  self.__predefined = True
66  self.__filename = searchPath3
67  elif self.__official:
68  self.__predefined = False
69  else:
70  msg = ("The predefined dataset '%s' does not exist. Please "
71  "create it first or check for typos."%( self.__name ))
72  raise AllInOneError( msg )
73 
74  if self.__predefined and self.__official:
75  self.__name = "Dataset" + self.__name.replace("/","_")
76 
77  self.__dataType = self.__getDataType()
def __getMagneticField
Definition: dataset.py:408
def __getDataType
Definition: dataset.py:373
def dataset.Dataset.__init__ (   self,
  name,
  user,
  pattern = '.*root' 
)

Definition at line 264 of file dataset.py.

References dataset.Dataset.__init__().

265  def __init__(self, name, user, pattern='.*root'):
266  self.lfnDir = castorBaseDir(user) + name
267  self.castorDir = castortools.lfnToCastor( self.lfnDir )
268  self.maskExists = False
269  self.report = None
270  super(Dataset, self).__init__(name, user, pattern)

Member Function Documentation

def dataset.Dataset.__chunks (   self,
  theList,
  n 
)
private
Yield successive n-sized chunks from theList.

Definition at line 79 of file dataset.py.

Referenced by dataset.Dataset.__fileListSnippet(), dataset.Dataset.__lumiSelectionSnippet(), and dataset.Dataset.createdatasetfile_hippy().

79 
80  def __chunks( self, theList, n ):
81  """ Yield successive n-sized chunks from theList.
82  """
83  for i in xrange( 0, len( theList ), n ):
84  yield theList[i:i+n]
def dataset.Dataset.__createSnippet (   self,
  jsonPath = None,
  begin = None,
  end = None,
  firstRun = None,
  lastRun = None,
  repMap = None,
  crab = False,
  parent = False 
)
private

Definition at line 230 of file dataset.py.

References dataset.Dataset.__dummy_source_template, dataset.Dataset.__fileListSnippet(), dataset.Dataset.__lumiSelectionSnippet(), and dataset.Dataset.convertTimeToRun().

Referenced by dataset.Dataset.__fileListSnippet(), dataset.Dataset.datasetSnippet(), and dataset.Dataset.dump_cff().

231  crab = False, parent = False ):
232 
233  if firstRun:
234  firstRun = int( firstRun )
235  if lastRun:
236  lastRun = int( lastRun )
237  if ( begin and firstRun ) or ( end and lastRun ):
238  msg = ( "The Usage of "
239  + "'begin' & 'firstRun' " * int( bool( begin and
240  firstRun ) )
241  + "and " * int( bool( ( begin and firstRun ) and
242  ( end and lastRun ) ) )
243  + "'end' & 'lastRun' " * int( bool( end and lastRun ) )
244  + "is ambigous." )
245  raise AllInOneError( msg )
246  if begin or end:
247  ( firstRun, lastRun ) = self.convertTimeToRun(
248  begin = begin, end = end, firstRun = firstRun,
249  lastRun = lastRun )
250  if ( firstRun and lastRun ) and ( firstRun > lastRun ):
251  msg = ( "The lower time/runrange limit ('begin'/'firstRun') "
252  "chosen is greater than the upper time/runrange limit "
253  "('end'/'lastRun').")
254  raise AllInOneError( msg )
255 
256  lumiSecExtend = self.__lumiSelectionSnippet(jsonPath=jsonPath, firstRun=firstRun, lastRun=lastRun)
257  lumiStr = goodLumiSecStr = ""
258  if lumiSecExtend:
259  goodLumiSecStr = "lumiSecs = cms.untracked.VLuminosityBlockRange()\n"
260  lumiStr = " lumisToProcess = lumiSecs,\n"
261 
262  files = self.__fileListSnippet(crab=crab, parent=parent, firstRun=firstRun, lastRun=lastRun, forcerunselection=False)
263 
264  theMap = repMap
265  theMap["files"] = files
266  theMap["json"] = jsonPath
267  theMap["lumiStr"] = lumiStr
268  theMap["goodLumiSecStr"] = goodLumiSecStr%( theMap )
269  theMap["lumiSecExtend"] = lumiSecExtend
270  if crab:
271  dataset_snippet = self.__dummy_source_template%( theMap )
272  else:
273  dataset_snippet = self.__source_template%( theMap )
274  return dataset_snippet
def __lumiSelectionSnippet
Definition: dataset.py:115
def __fileListSnippet
Definition: dataset.py:208
def convertTimeToRun
Definition: dataset.py:626
tuple __dummy_source_template
Definition: dataset.py:103
def dataset.Dataset.__dateString (   self,
  date 
)
private

Definition at line 621 of file dataset.py.

References dataset.Dataset.convertTimeToRun().

Referenced by dataset.Dataset.convertTimeToRun().

622  def __dateString(self, date):
623  return str(date.year) + str(date.month).zfill(2) + str(date.day).zfill(2)
def __dateString
Definition: dataset.py:621
def dataset.Dataset.__datetime (   self,
  stringForDas 
)
private

Definition at line 612 of file dataset.py.

Referenced by dataset.Dataset.convertTimeToRun().

613  def __datetime(self, stringForDas):
614  if len(stringForDas) != 8:
615  raise AllInOneError(stringForDas + " is not a valid date string.\n"
616  + "DAS accepts dates in the form 'yyyymmdd'")
617  year = stringForDas[:4]
618  month = stringForDas[4:6]
619  day = stringForDas[6:8]
620  return datetime.date(int(year), int(month), int(day))
def dataset.Dataset.__fileListSnippet (   self,
  crab = False,
  parent = False,
  firstRun = None,
  lastRun = None,
  forcerunselection = False 
)
private

Definition at line 208 of file dataset.py.

References dataset.Dataset.__chunks(), dataset.Dataset.__createSnippet(), dataset.Dataset.fileList(), join(), and list().

Referenced by dataset.Dataset.__createSnippet().

209  def __fileListSnippet(self, crab=False, parent=False, firstRun=None, lastRun=None, forcerunselection=False):
210  if crab:
211  files = ""
212  else:
213  splitFileList = list( self.__chunks( self.fileList(firstRun=firstRun, lastRun=lastRun, forcerunselection=forcerunselection), 255 ) )
214  fileStr = [ "',\n'".join( files ) for files in splitFileList ]
215  fileStr = [ "readFiles.extend( [\n'" + files + "'\n] )" \
216  for files in fileStr ]
217  files = "\n".join( fileStr )
218 
219  if parent:
220  splitParentFileList = list( self.__chunks( self.fileList(parent=True, firstRun=firstRun, lastRun=lastRun, forcerunselection=forcerunselection), 255 ) )
221  parentFileStr = [ "',\n'".join( parentFiles ) for parentFiles in splitParentFileList ]
222  parentFileStr = [ "secFiles.extend( [\n'" + parentFiles + "'\n] )" \
223  for parentFiles in parentFileStr ]
224  parentFiles = "\n".join( parentFileStr )
225  files += "\n\n" + parentFiles
226 
227  return files
def __fileListSnippet
Definition: dataset.py:208
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run
def dataset.Dataset.__find_ge (   self,
  a,
  x 
)
private

Definition at line 282 of file dataset.py.

Referenced by dataset.Dataset.convertTimeToRun().

283  def __find_ge( self, a, x):
284  'Find leftmost item greater than or equal to x'
285  i = bisect.bisect_left( a, x )
286  if i != len( a ):
287  return i
288  raise ValueError
def dataset.Dataset.__find_lt (   self,
  a,
  x 
)
private

Definition at line 275 of file dataset.py.

Referenced by dataset.Dataset.convertTimeToRun().

276  def __find_lt( self, a, x ):
277  'Find rightmost value less than x'
278  i = bisect.bisect_left( a, x )
279  if i:
280  return i-1
281  raise ValueError
def dataset.Dataset.__findInJson (   self,
  jsondict,
  strings 
)
private

Definition at line 289 of file dataset.py.

References dataset.Dataset.__findInJson().

Referenced by dataset.Dataset.__findInJson(), dataset.Dataset.__getDataType(), dataset.Dataset.__getFileInfoList(), dataset.Dataset.__getMagneticField(), dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.__getParentDataset(), dataset.Dataset.__getRunList(), dataset.Dataset.__lumiSelectionSnippet(), dataset.Dataset.convertTimeToRun(), and dataset.Dataset.fileList().

290  def __findInJson(self, jsondict, strings):
291  if isinstance(strings, str):
292  strings = [ strings ]
293 
294  if len(strings) == 0:
295  return jsondict
296  if isinstance(jsondict,dict):
297  if strings[0] in jsondict:
298  try:
299  return self.__findInJson(jsondict[strings[0]], strings[1:])
300  except KeyError:
301  pass
302  else:
303  for a in jsondict:
304  if strings[0] in a:
305  try:
306  return self.__findInJson(a[strings[0]], strings[1:])
307  except (TypeError, KeyError): #TypeError because a could be a string and contain strings[0]
308  pass
309  #if it's not found
310  raise KeyError("Can't find " + strings[0])
def __findInJson
Definition: dataset.py:289
def dataset.Dataset.__getData (   self,
  dasQuery,
  dasLimit = 0 
)
private

Definition at line 341 of file dataset.py.

Referenced by dataset.Dataset.__getDataType(), dataset.Dataset.__getFileInfoList(), dataset.Dataset.__getMagneticField(), dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.__getParentDataset(), dataset.Dataset.__getRunList(), and dataset.Dataset.convertTimeToRun().

def __getData( self, dasQuery, dasLimit = 0 ):
def dataset.Dataset.__getDataType (   self)
private

Definition at line 373 of file dataset.py.

References dataset.Dataset.__filename, dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__name, dataset.Dataset.__predefined, ElectronMVAID.ElectronMVAID.name, counter.Counter.name, average.Average.name, AlignableObjectId::entry.name, histograms.Histograms.name, geometrydata.GeometryData.name, cond::persistency::TAG::NAME.name, TmModule.name, core.autovars.NTupleVariable.name, cond::persistency::GLOBAL_TAG::NAME.name, cond::persistency::GLOBAL_TAG::VALIDITY.name, cond::persistency::TAG::TIME_TYPE.name, cond::persistency::GLOBAL_TAG::DESCRIPTION.name, genericValidation.GenericValidation.name, cond::persistency::TAG::OBJECT_TYPE.name, preexistingValidation.PreexistingValidation.name, cond::persistency::GLOBAL_TAG::RELEASE.name, cond::persistency::TAG::SYNCHRONIZATION.name, cond::persistency::GLOBAL_TAG::SNAPSHOT_TIME.name, MEPSet.name, cond::persistency::TAG::END_OF_VALIDITY.name, cond::persistency::GLOBAL_TAG::INSERTION_TIME.name, cond::persistency::TAG::DESCRIPTION.name, cond::persistency::GTEditorData.name, cond::persistency::TAG::LAST_VALIDATED_TIME.name, FWTGeoRecoGeometry::Info.name, Types._Untracked.name, cond::persistency::TAG::INSERTION_TIME.name, dataset.BaseDataset.name, cond::persistency::TAG::MODIFICATION_TIME.name, OutputMEPSet.name, personalPlayback.Applet.name, ParameterSet.name, PixelDCSObject< class >::Item.name, analyzer.Analyzer.name, DQMRivetClient::LumiOption.name, MagCylinder.name, alignment.Alignment.name, ParSet.name, DQMRivetClient::ScaleFactorOption.name, SingleObjectCondition.name, EgHLTOfflineSummaryClient::SumHistBinData.name, DQMGenericClient::EfficOption.name, XMLHTRZeroSuppressionLoader::_loaderBaseConfig.name, XMLRBXPedestalsLoader::_loaderBaseConfig.name, core.autovars.NTupleObjectType.name, cond::persistency::GTProxyData.name, o2o.O2OJob.name, MyWatcher.name, Mapper::definition< ScannerT >.name, edm::PathTimingSummary.name, cond::TimeTypeSpecs.name, lumi::TriggerInfo.name, edm::PathSummary.name, cond::persistency::GLOBAL_TAG_MAP::GLOBAL_TAG_NAME.name, PixelEndcapLinkMaker::Item.name, perftools::EdmEventSize::BranchRecord.name, FWTableViewManager::TableEntry.name, cond::persistency::GLOBAL_TAG_MAP::RECORD.name, PixelBarrelLinkMaker::Item.name, EcalLogicID.name, cond::persistency::GLOBAL_TAG_MAP::LABEL.name, cond::persistency::GLOBAL_TAG_MAP::TAG_NAME.name, ExpressionHisto< T >.name, XMLProcessor::_loaderBaseConfig.name, cond::persistency::PAYLOAD::HASH.name, DQMGenericClient::ProfileOption.name, TreeCrawler.Package.name, cond::persistency::PAYLOAD::OBJECT_TYPE.name, cond::persistency::PAYLOAD::DATA.name, options.ConnectionHLTMenu.name, cond::persistency::PAYLOAD::STREAMER_INFO.name, cond::persistency::PAYLOAD::VERSION.name, MagGeoBuilderFromDDD::volumeHandle.name, cond::persistency::PAYLOAD::INSERTION_TIME.name, DQMGenericClient::NormOption.name, Node.name, DQMGenericClient::CDOption.name, FastHFShowerLibrary.name, h4DSegm.name, PhysicsTools::Calibration::Variable.name, cond::TagInfo_t.name, CounterChecker.name, EDMtoMEConverter.name, looper.Looper.name, MEtoEDM< T >::MEtoEDMObject.name, cond::persistency::IOV::TAG_NAME.name, TrackerSectorStruct.name, cond::persistency::IOV::SINCE.name, cond::persistency::IOV::PAYLOAD_HASH.name, cond::persistency::IOV::INSERTION_TIME.name, HistogramManager.name, classes.MonitorData.name, MuonGeometrySanityCheckPoint.name, classes.OutputData.name, options.HLTProcessOptions.name, h2DSegm.name, config.Analyzer.name, core.autovars.NTupleSubObject.name, DQMNet::WaitObject.name, AlpgenParameterName.name, SiStripMonitorDigi.name, core.autovars.NTupleObject.name, geometry.Structure.name, cond::persistency::TAG_LOG::TAG_NAME.name, cond::persistency::TAG_LOG::EVENT_TIME.name, cond::persistency::TAG_LOG::USER_NAME.name, cond::persistency::TAG_LOG::HOST_NAME.name, cond::persistency::TAG_LOG::COMMAND.name, cond::persistency::TAG_LOG::ACTION.name, cond::persistency::TAG_LOG::USER_TEXT.name, config.Service.name, core.autovars.NTupleCollection.name, FastTimerService::LuminosityDescription.name, BPHRecoBuilder::BPHRecoSource.name, BPHRecoBuilder::BPHCompSource.name, personalPlayback.FrameworkJob.name, plotscripts.SawTeethFunction.name, FastTimerService::ProcessDescription.name, hTMaxCell.name, cscdqm::ParHistoDef.name, BeautifulSoup.Tag.name, SummaryOutputProducer::GenericSummary.name, BeautifulSoup.SoupStrainer.name, and python.rootplot.root2matplotlib.replace().

Referenced by dataset.Dataset.dataType().

374  def __getDataType( self ):
375  if self.__predefined:
376  with open(self.__filename) as f:
377  datatype = None
378  for line in f.readlines():
379  if line.startswith("#data type: "):
380  if datatype is not None:
381  raise AllInOneError(self.__filename + " has multiple 'data type' lines.")
382  datatype = line.replace("#data type: ", "").replace("\n","")
383  return datatype
384  return "unknown"
385 
386  dasQuery_type = ( 'dataset dataset=%s | grep dataset.datatype,'
387  'dataset.name'%( self.__name ) )
388  data = self.__getData( dasQuery_type )
389 
390  try:
391  return self.__findInJson(data, ["dataset", "datatype"])
392  except KeyError:
393  print ("Cannot find the datatype of the dataset '%s'\n"
394  "It may not be possible to automatically find the magnetic field,\n"
395  "and you will not be able run in CRAB mode"
396  %( self.name() ))
397  return "unknown"
def __findInJson
Definition: dataset.py:289
def __getDataType
Definition: dataset.py:373
def dataset.Dataset.__getFileInfoList (   self,
  dasLimit,
  parent = False 
)
private

Definition at line 533 of file dataset.py.

References dataset.Dataset.__fileInfoList, dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__name, dataset.Dataset.__parentFileInfoList, dataset.Dataset.__predefined, ElectronMVAID.ElectronMVAID.name, counter.Counter.name, average.Average.name, AlignableObjectId::entry.name, histograms.Histograms.name, geometrydata.GeometryData.name, cond::persistency::GLOBAL_TAG::NAME.name, cond::persistency::TAG::NAME.name, TmModule.name, core.autovars.NTupleVariable.name, cond::persistency::GLOBAL_TAG::VALIDITY.name, cond::persistency::TAG::TIME_TYPE.name, genericValidation.GenericValidation.name, cond::persistency::GLOBAL_TAG::DESCRIPTION.name, cond::persistency::TAG::OBJECT_TYPE.name, cond::persistency::GLOBAL_TAG::RELEASE.name, preexistingValidation.PreexistingValidation.name, cond::persistency::TAG::SYNCHRONIZATION.name, cond::persistency::GLOBAL_TAG::SNAPSHOT_TIME.name, MEPSet.name, cond::persistency::TAG::END_OF_VALIDITY.name, cond::persistency::GLOBAL_TAG::INSERTION_TIME.name, cond::persistency::TAG::DESCRIPTION.name, cond::persistency::GTEditorData.name, cond::persistency::TAG::LAST_VALIDATED_TIME.name, FWTGeoRecoGeometry::Info.name, Types._Untracked.name, cond::persistency::TAG::INSERTION_TIME.name, cond::persistency::TAG::MODIFICATION_TIME.name, dataset.BaseDataset.name, OutputMEPSet.name, personalPlayback.Applet.name, ParameterSet.name, PixelDCSObject< class >::Item.name, analyzer.Analyzer.name, DQMRivetClient::LumiOption.name, MagCylinder.name, alignment.Alignment.name, ParSet.name, DQMRivetClient::ScaleFactorOption.name, SingleObjectCondition.name, EgHLTOfflineSummaryClient::SumHistBinData.name, DQMGenericClient::EfficOption.name, XMLHTRZeroSuppressionLoader::_loaderBaseConfig.name, XMLRBXPedestalsLoader::_loaderBaseConfig.name, core.autovars.NTupleObjectType.name, cond::persistency::GTProxyData.name, o2o.O2OJob.name, MyWatcher.name, Mapper::definition< ScannerT >.name, edm::PathTimingSummary.name, cond::TimeTypeSpecs.name, lumi::TriggerInfo.name, edm::PathSummary.name, cond::persistency::GLOBAL_TAG_MAP::GLOBAL_TAG_NAME.name, PixelEndcapLinkMaker::Item.name, perftools::EdmEventSize::BranchRecord.name, FWTableViewManager::TableEntry.name, cond::persistency::GLOBAL_TAG_MAP::RECORD.name, PixelBarrelLinkMaker::Item.name, EcalLogicID.name, cond::persistency::GLOBAL_TAG_MAP::LABEL.name, ExpressionHisto< T >.name, cond::persistency::GLOBAL_TAG_MAP::TAG_NAME.name, XMLProcessor::_loaderBaseConfig.name, DQMGenericClient::ProfileOption.name, TreeCrawler.Package.name, cond::persistency::PAYLOAD::HASH.name, cond::persistency::PAYLOAD::OBJECT_TYPE.name, cond::persistency::PAYLOAD::DATA.name, options.ConnectionHLTMenu.name, cond::persistency::PAYLOAD::STREAMER_INFO.name, cond::persistency::PAYLOAD::VERSION.name, MagGeoBuilderFromDDD::volumeHandle.name, cond::persistency::PAYLOAD::INSERTION_TIME.name, DQMGenericClient::NormOption.name, Node.name, DQMGenericClient::CDOption.name, FastHFShowerLibrary.name, h4DSegm.name, PhysicsTools::Calibration::Variable.name, cond::TagInfo_t.name, CounterChecker.name, EDMtoMEConverter.name, looper.Looper.name, MEtoEDM< T >::MEtoEDMObject.name, cond::persistency::IOV::TAG_NAME.name, cond::persistency::IOV::SINCE.name, TrackerSectorStruct.name, cond::persistency::IOV::PAYLOAD_HASH.name, cond::persistency::IOV::INSERTION_TIME.name, classes.MonitorData.name, HistogramManager.name, MuonGeometrySanityCheckPoint.name, classes.OutputData.name, options.HLTProcessOptions.name, h2DSegm.name, config.Analyzer.name, core.autovars.NTupleSubObject.name, DQMNet::WaitObject.name, AlpgenParameterName.name, SiStripMonitorDigi.name, core.autovars.NTupleObject.name, geometry.Structure.name, cond::persistency::TAG_LOG::TAG_NAME.name, cond::persistency::TAG_LOG::EVENT_TIME.name, cond::persistency::TAG_LOG::USER_NAME.name, cond::persistency::TAG_LOG::HOST_NAME.name, cond::persistency::TAG_LOG::COMMAND.name, cond::persistency::TAG_LOG::ACTION.name, cond::persistency::TAG_LOG::USER_TEXT.name, config.Service.name, core.autovars.NTupleCollection.name, FastTimerService::LuminosityDescription.name, BPHRecoBuilder::BPHRecoSource.name, BPHRecoBuilder::BPHCompSource.name, personalPlayback.FrameworkJob.name, plotscripts.SawTeethFunction.name, FastTimerService::ProcessDescription.name, hTMaxCell.name, cscdqm::ParHistoDef.name, BeautifulSoup.Tag.name, SummaryOutputProducer::GenericSummary.name, BeautifulSoup.SoupStrainer.name, and dataset.Dataset.parentDataset().

Referenced by dataset.Dataset.fileInfoList().

534  def __getFileInfoList( self, dasLimit, parent = False ):
535  if self.__predefined:
536  if parent:
537  extendstring = "secFiles.extend"
538  else:
539  extendstring = "readFiles.extend"
540  with open(self.__fileName) as f:
541  files = []
542  copy = False
543  for line in f.readlines():
544  if "]" in line:
545  copy = False
546  if copy:
547  files.append({name: line.translate(None, "', " + '"')})
548  if extendstring in line and "[" in line and "]" not in line:
549  copy = True
550  return files
551 
552  if self.__fileInfoList and not parent:
553  return self.__fileInfoList
554  if self.__parentFileInfoList and parent:
555  return self.__parentFileInfoList
556 
557  if parent:
558  searchdataset = self.parentDataset()
559  else:
560  searchdataset = self.__name
561  dasQuery_files = ( 'file dataset=%s | grep file.name, file.nevents, '
562  'file.creation_time, '
563  'file.modification_time'%( searchdataset ) )
564  print "Requesting file information for '%s' from DAS..."%( searchdataset ),
565  data = self.__getData( dasQuery_files, dasLimit )
566  print "Done."
567  data = [ self.__findInJson(entry,"file") for entry in data ]
568  if len( data ) == 0:
569  msg = ("No files are available for the dataset '%s'. This can be "
570  "due to a typo or due to a DAS problem. Please check the "
571  "spelling of the dataset and/or retry to run "
572  "'validateAlignments.py'."%( self.name() ))
573  raise AllInOneError( msg )
574  fileInformationList = []
575  for file in data:
576  fileName = 'unknown'
577  try:
578  fileName = self.__findInJson(file, "name")
579  fileCreationTime = self.__findInJson(file, "creation_time")
580  fileNEvents = self.__findInJson(file, "nevents")
581  except KeyError:
582  print ("DAS query gives bad output for file '%s'. Skipping it.\n"
583  "It may work if you try again later.") % fileName
584  fileNEvents = 0
585  # select only non-empty files
586  if fileNEvents == 0:
587  continue
588  fileDict = { "name": fileName,
589  "creation_time": fileCreationTime,
590  "nevents": fileNEvents
591  }
592  fileInformationList.append( fileDict )
593  fileInformationList.sort( key=lambda info: self.__findInJson(info,"name") )
594  if parent:
595  self.__parentFileInfoList = fileInformationList
596  else:
597  self.__fileInfoList = fileInformationList
598  return fileInformationList
def __findInJson
Definition: dataset.py:289
def __getFileInfoList
Definition: dataset.py:533
def parentDataset
Definition: dataset.py:704
def dataset.Dataset.__getMagneticField (   self)
private

Definition at line 408 of file dataset.py.

References dataset.Dataset.__cmssw, dataset.Dataset.__cmsswrelease, dataset.Dataset.__dataType, dataset.Dataset.__filename, dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__name, dataset.Dataset.__predefined, and python.rootplot.root2matplotlib.replace().

Referenced by dataset.Dataset.magneticField().

409  def __getMagneticField( self ):
410  Bfieldlocation = os.path.join( self.__cmssw, "python", "Configuration", "StandardSequences" )
411  if not os.path.isdir(Bfieldlocation):
412  Bfieldlocation = os.path.join( self.__cmsswrelease, "python", "Configuration", "StandardSequences" )
413  Bfieldlist = [ f.replace("_cff.py",'') \
414  for f in os.listdir(Bfieldlocation) \
415  if f.startswith("MagneticField_") and f.endswith("_cff.py") ]
416  Bfieldlist.sort( key = lambda Bfield: -len(Bfield) ) #Put it in order of decreasing length, so that searching in the name gives the longer match
417 
418  if self.__predefined:
419  with open(self.__filename) as f:
420  datatype = None
421  Bfield = None
422  for line in f.readlines():
423  if line.startswith("#data type: "):
424  if datatype is not None:
425  raise AllInOneError(self.__filename + " has multiple 'data type' lines.")
426  datatype = line.replace("#data type: ", "").replace("\n","")
427  datatype = datatype.split("#")[0].strip()
428  if line.startswith("#magnetic field: "):
429  if Bfield is not None:
430  raise AllInOneError(self.__filename + " has multiple 'magnetic field' lines.")
431  Bfield = line.replace("#magnetic field: ", "").replace("\n","")
432  Bfield = Bfield.split("#")[0].strip()
433  if Bfield is not None:
434  Bfield = Bfield.split(",")[0]
435  if Bfield in Bfieldlist or Bfield == "unknown":
436  return Bfield
437  else:
438  print "Your dataset has magnetic field '%s', which does not exist in your CMSSW version!" % Bfield
439  print "Using Bfield='unknown' - this will revert to the default"
440  return "unknown"
441  elif datatype == "data":
442  return "MagneticField" #this should be in the "#magnetic field" line, but for safety in case it got messed up
443  else:
444  return "unknown"
445 
446  if self.__dataType == "data":
447  return "MagneticField"
448 
449  dasQuery_B = ( 'dataset dataset=%s'%( self.__name ) ) #try to find the magnetic field from DAS
450  data = self.__getData( dasQuery_B ) #it seems to be there for the newer (7X) MC samples, except cosmics
451 
452  try:
453  Bfield = self.__findInJson(data, ["dataset", "mcm", "sequences", "magField"])
454  if Bfield in Bfieldlist:
455  return Bfield
456  elif Bfield == "38T" or Bfield == "38T_PostLS1":
457  return "MagneticField"
458  elif "MagneticField_" + Bfield in Bfieldlist:
459  return "MagneticField_" + Bfield
460  elif Bfield == "":
461  pass
462  else:
463  print "Your dataset has magnetic field '%s', which does not exist in your CMSSW version!" % Bfield
464  print "Using Bfield='unknown' - this will revert to the default magnetic field"
465  return "unknown"
466  except KeyError:
467  pass
468 
469  for possibleB in Bfieldlist:
470  if (possibleB != "MagneticField"
471  and possibleB.replace("MagneticField_","") in self.__name.replace("TkAlCosmics0T", "")):
472  #final attempt - try to identify the dataset from the name
473  #all cosmics dataset names contain "TkAlCosmics0T"
474  if possibleB == "MagneticField_38T" or possibleB == "MagneticField_38T_PostLS1":
475  return "MagneticField"
476  return possibleB
477 
478  return "unknown"
def __findInJson
Definition: dataset.py:289
def __getMagneticField
Definition: dataset.py:408
def dataset.Dataset.__getMagneticFieldForRun (   self,
  run = -1,
  tolerance = 0.5 
)
private
For MC, this returns the same as the previous function.
   For data, it gets the magnetic field from the runs.  This is important for
   deciding which template to use for offlinevalidation

Definition at line 479 of file dataset.py.

References dataset.Dataset.__dataType, dataset.Dataset.__filename, dataset.Dataset.__findInJson(), dataset.Dataset.__firstusedrun, dataset.Dataset.__getData(), dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.__lastusedrun, dataset.Dataset.__magneticField, dataset.Dataset.__name, dataset.Dataset.__predefined, funct.abs(), python.rootplot.root2matplotlib.replace(), and split.

Referenced by dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.dump_cff(), and dataset.Dataset.magneticFieldForRun().

480  def __getMagneticFieldForRun( self, run = -1, tolerance = 0.5 ):
481  """For MC, this returns the same as the previous function.
482  For data, it gets the magnetic field from the runs. This is important for
483  deciding which template to use for offlinevalidation
484  """
485  if self.__dataType == "mc" and self.__magneticField == "MagneticField":
486  return 3.8 #For 3.8T MC the default MagneticField is used
487  if "T" in self.__magneticField:
488  Bfield = self.__magneticField.split("T")[0].replace("MagneticField_","")
489  try:
490  return float(Bfield) / 10.0 #e.g. 38T and 38T_PostLS1 both return 3.8
491  except ValueError:
492  pass
493  if self.__predefined:
494  with open(self.__filename) as f:
495  Bfield = None
496  for line in f.readlines():
497  if line.startswith("#magnetic field: ") and "," in line:
498  if Bfield is not None:
499  raise AllInOneError(self.__filename + " has multiple 'magnetic field' lines.")
500  return float(line.replace("#magnetic field: ", "").split(",")[1].split("#")[0].strip())
501 
502  if run > 0:
503  dasQuery = ('run = %s'%run) #for data
504  data = self.__getData(dasQuery)
505  try:
506  return self.__findInJson(data, ["run","bfield"])
507  except KeyError:
508  return "unknown Can't get the magnetic field for run %s from DAS" % run
509 
510  #run < 0 - find B field for the first and last runs, and make sure they're compatible
511  # (to within tolerance)
512  #NOT FOOLPROOF! The magnetic field might go up and then down, or vice versa
513  if self.__firstusedrun is None or self.__lastusedrun is None:
514  return "unknown Can't get the exact magnetic field for the dataset until data has been retrieved from DAS."
515  firstrunB = self.__getMagneticFieldForRun(self.__firstusedrun)
516  lastrunB = self.__getMagneticFieldForRun(self.__lastusedrun)
517  try:
518  if abs(firstrunB - lastrunB) <= tolerance:
519  return .5*(firstrunB + lastrunB)
520  print firstrunB, lastrunB, tolerance
521  return ("unknown The beginning and end of your run range for %s\n"
522  "have different magnetic fields (%s, %s)!\n"
523  "Try limiting the run range using firstRun, lastRun, begin, end, or JSON,\n"
524  "or increasing the tolerance (in dataset.py) from %s.") % (self.__name, firstrunB, lastrunB, tolerance)
525  except TypeError:
526  try:
527  if "unknown" in firstrunB:
528  return firstrunB
529  else:
530  return lastrunB
531  except TypeError:
532  return lastrunB
def __findInJson
Definition: dataset.py:289
def __getMagneticFieldForRun
Definition: dataset.py:479
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
double split
Definition: MVATrainer.cc:139
def dataset.Dataset.__getParentDataset (   self)
private

Definition at line 398 of file dataset.py.

References dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), and dataset.Dataset.__name.

Referenced by dataset.Dataset.parentDataset().

399  def __getParentDataset( self ):
400  dasQuery = "parent dataset=" + self.__name
401  data = self.__getData( dasQuery )
402  try:
403  return self.__findInJson(data, ["parent", "name"])
404  except KeyError:
405  raise AllInOneError("Cannot find the parent of the dataset '" + self.__name + "'\n"
406  "Here is the DAS output:\n" + str(jsondict) +
407  "\nIt's possible that this was a server error. If so, it may work if you try again later")
def __findInJson
Definition: dataset.py:289
def __getParentDataset
Definition: dataset.py:398
def dataset.Dataset.__getRunList (   self)
private

Definition at line 599 of file dataset.py.

References dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__name, and dataset.Dataset.__runList.

Referenced by dataset.Dataset.__lumiSelectionSnippet(), dataset.Dataset.convertTimeToRun(), and dataset.Dataset.runList().

600  def __getRunList( self ):
601  if self.__runList:
602  return self.__runList
603  dasQuery_runs = ( 'run dataset=%s | grep run.run_number,'
604  'run.creation_time'%( self.__name ) )
605  print "Requesting run information for '%s' from DAS..."%( self.__name ),
606  data = self.__getData( dasQuery_runs )
607  print "Done."
608  data = [ self.__findInJson(entry,"run") for entry in data ]
609  data.sort( key = lambda run: self.__findInJson(run, "run_number") )
610  self.__runList = data
611  return data
def __findInJson
Definition: dataset.py:289
def __getRunList
Definition: dataset.py:599
def dataset.Dataset.__lumiSelectionSnippet (   self,
  jsonPath = None,
  firstRun = None,
  lastRun = None 
)
private

Definition at line 115 of file dataset.py.

References dataset.Dataset.__chunks(), dataset.Dataset.__findInJson(), dataset.Dataset.__firstusedrun, dataset.Dataset.__getRunList(), dataset.Dataset.__lastusedrun, dataset.Dataset.getForceRunRangeFunction(), join(), list(), bookConverter.max, min(), python.rootplot.root2matplotlib.replace(), and split.

Referenced by dataset.Dataset.__createSnippet().

116  def __lumiSelectionSnippet( self, jsonPath = None, firstRun = None, lastRun = None ):
117  lumiSecExtend = ""
118  if firstRun or lastRun or jsonPath:
119  if not jsonPath:
120  selectedRunList = self.__getRunList()
121  if firstRun:
122  selectedRunList = [ run for run in selectedRunList \
123  if self.__findInJson(run, "run_number") >= firstRun ]
124  if lastRun:
125  selectedRunList = [ run for run in selectedRunList \
126  if self.__findInJson(run, "run_number") <= lastRun ]
127  lumiList = [ str( self.__findInJson(run, "run_number") ) + ":1-" \
128  + str( self.__findInJson(run, "run_number") ) + ":max" \
129  for run in selectedRunList ]
130  splitLumiList = list( self.__chunks( lumiList, 255 ) )
131  else:
132  theLumiList = None
133  try:
134  theLumiList = LumiList ( filename = jsonPath )
135  except ValueError:
136  pass
137 
138  if theLumiList is not None:
139  allRuns = theLumiList.getRuns()
140  runsToRemove = []
141  for run in allRuns:
142  if firstRun and int( run ) < firstRun:
143  runsToRemove.append( run )
144  if lastRun and int( run ) > lastRun:
145  runsToRemove.append( run )
146  theLumiList.removeRuns( runsToRemove )
147  splitLumiList = list( self.__chunks(
148  theLumiList.getCMSSWString().split(','), 255 ) )
149  if not (splitLumiList and splitLumiList[0] and splitLumiList[0][0]):
150  splitLumiList = None
151  else:
152  with open(jsonPath) as f:
153  jsoncontents = f.read()
154  if "process.source.lumisToProcess" in jsoncontents:
155  msg = "%s is not a json file, but it seems to be a CMSSW lumi selection cff snippet. Trying to use it" % jsonPath
156  if firstRun or lastRun:
157  msg += ("\n (after applying firstRun and/or lastRun)")
158  msg += ".\nPlease note that, depending on the format of this file, it may not work as expected."
159  msg += "\nCheck your config file to make sure that it worked properly."
160  print msg
161 
162  runlist = self.__getRunList()
163  if firstRun or lastRun:
164  self.__firstusedrun = -1
165  self.__lastusedrun = -1
166  jsoncontents = re.sub(r"\d+:(\d+|max)(-\d+:(\d+|max))?", self.getForceRunRangeFunction(firstRun, lastRun), jsoncontents)
167  jsoncontents = (jsoncontents.replace("'',\n","").replace("''\n","")
168  .replace('"",\n','').replace('""\n',''))
169  self.__firstusedrun = max(self.__firstusedrun, int(self.__findInJson(runlist[0],"run_number")))
170  self.__lastusedrun = min(self.__lastusedrun, int(self.__findInJson(runlist[-1],"run_number")))
171  if self.__lastusedrun < self.__firstusedrun:
172  jsoncontents = None
173  else:
174  self.__firstusedrun = int(self.__findInJson(runlist[0],"run_number"))
175  self.__lastusedrun = int(self.__findInJson(runlist[-1],"run_number"))
176  lumiSecExtend = jsoncontents
177  splitLumiList = None
178  else:
179  raise AllInOneError("%s is not a valid json file!" % jsonPath)
180 
181  if splitLumiList and splitLumiList[0] and splitLumiList[0][0]:
182  lumiSecStr = [ "',\n'".join( lumis ) \
183  for lumis in splitLumiList ]
184  lumiSecStr = [ "lumiSecs.extend( [\n'" + lumis + "'\n] )" \
185  for lumis in lumiSecStr ]
186  lumiSecExtend = "\n".join( lumiSecStr )
187  runlist = self.__getRunList()
188  self.__firstusedrun = max(int(splitLumiList[0][0].split(":")[0]), int(self.__findInJson(runlist[0],"run_number")))
189  self.__lastusedrun = min(int(splitLumiList[-1][-1].split(":")[0]), int(self.__findInJson(runlist[-1],"run_number")))
190  elif lumiSecExtend:
191  pass
192  else:
193  msg = "You are trying to run a validation without any runs! Check that:"
194  if firstRun or lastRun:
195  msg += "\n - firstRun/begin and lastRun/end are correct for this dataset, and there are runs in between containing data"
196  if jsonPath:
197  msg += "\n - your JSON file is correct for this dataset, and the runs contain data"
198  if (firstRun or lastRun) and jsonPath:
199  msg += "\n - firstRun/begin and lastRun/end are consistent with your JSON file"
200  raise AllInOneError(msg)
201 
202  else:
203  runlist = self.__getRunList()
204  self.__firstusedrun = int(self.__findInJson(self.__getRunList()[0],"run_number"))
205  self.__lastusedrun = int(self.__findInJson(self.__getRunList()[-1],"run_number"))
206 
207  return lumiSecExtend
def __findInJson
Definition: dataset.py:289
def __lumiSelectionSnippet
Definition: dataset.py:115
T min(T a, T b)
Definition: MathUtil.h:58
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def __getRunList
Definition: dataset.py:599
def getForceRunRangeFunction
Definition: dataset.py:336
double split
Definition: MVATrainer.cc:139
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run
def dataset.Dataset.buildListOfBadFiles (   self)
fills the list of bad files from the IntegrityCheck log.

When the integrity check file is not available,
files are considered as good.

Definition at line 275 of file dataset.py.

276  def buildListOfBadFiles(self):
277  '''fills the list of bad files from the IntegrityCheck log.
278 
279  When the integrity check file is not available,
280  files are considered as good.'''
281  mask = "IntegrityCheck"
282 
283  self.bad_files = {}
284  self.good_files = []
285 
286  file_mask = castortools.matchingFiles(self.castorDir, '^%s_.*\.txt$' % mask)
287  if file_mask:
288  # here to avoid circular dependency
289  from edmIntegrityCheck import PublishToFileSystem
290  p = PublishToFileSystem(mask)
291  report = p.get(self.castorDir)
292  if report is not None and report:
293  self.maskExists = True
294  self.report = report
295  dup = report.get('ValidDuplicates',{})
296  for name, status in report['Files'].iteritems():
297  # print name, status
298  if not status[0]:
299  self.bad_files[name] = 'MarkedBad'
300  elif name in dup:
301  self.bad_files[name] = 'ValidDup'
302  else:
303  self.good_files.append( name )
304  else:
305  raise IntegrityCheckError( "ERROR: IntegrityCheck log file IntegrityCheck_XXXXXXXXXX.txt not found" )
def buildListOfBadFiles
Definition: dataset.py:275
def dataset.Dataset.buildListOfFiles (   self,
  pattern = '.*root' 
)
fills list of files, taking all root files matching the pattern in the castor dir

Definition at line 271 of file dataset.py.

272  def buildListOfFiles(self, pattern='.*root'):
273  '''fills list of files, taking all root files matching the pattern in the castor dir'''
274  self.files = castortools.matchingFiles( self.castorDir, pattern )
def buildListOfFiles
Definition: dataset.py:271
def dataset.Dataset.convertTimeToRun (   self,
  begin = None,
  end = None,
  firstRun = None,
  lastRun = None,
  shortTuple = True 
)

Definition at line 626 of file dataset.py.

References dataset.Dataset.__dateString(), dataset.Dataset.__datetime(), dataset.Dataset.__find_ge(), dataset.Dataset.__find_lt(), dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__getRunList(), and dataset.Dataset.__name.

Referenced by dataset.Dataset.__createSnippet(), and dataset.Dataset.__dateString().

627  shortTuple = True ):
628  if ( begin and firstRun ) or ( end and lastRun ):
629  msg = ( "The Usage of "
630  + "'begin' & 'firstRun' " * int( bool( begin and
631  firstRun ) )
632  + "and " * int( bool( ( begin and firstRun ) and
633  ( end and lastRun ) ) )
634  + "'end' & 'lastRun' " * int( bool( end and lastRun ) )
635  + "is ambigous." )
636  raise AllInOneError( msg )
637 
638  if begin or end:
639  runList = [ self.__findInJson(run, "run_number") for run in self.__getRunList() ]
640 
641  if begin:
642  lastdate = begin
643  for delta in [ 1, 5, 10, 20, 30 ]: #try searching for about 2 months after begin
644  firstdate = lastdate
645  lastdate = self.__dateString(self.__datetime(firstdate) + datetime.timedelta(delta))
646  dasQuery_begin = "run date between[%s,%s]" % (firstdate, lastdate)
647  begindata = self.__getData(dasQuery_begin)
648  if len(begindata) > 0:
649  begindata.sort(key = lambda run: self.__findInJson(run, ["run", "run_number"]))
650  try:
651  runIndex = self.__find_ge( runList, self.__findInJson(begindata[0], ["run", "run_number"]))
652  except ValueError:
653  msg = ( "Your 'begin' is after the creation time of the last "
654  "run in the dataset\n'%s'"%( self.__name ) )
655  raise AllInOneError( msg )
656  firstRun = runList[runIndex]
657  begin = None
658  break
659 
660  if begin:
661  raise AllInOneError("No runs within a reasonable time interval after your 'begin'."
662  "Try using a 'begin' that has runs soon after it (within 2 months at most)")
663 
664  if end:
665  firstdate = end
666  for delta in [ 1, 5, 10, 20, 30 ]: #try searching for about 2 months before end
667  lastdate = firstdate
668  firstdate = self.__dateString(self.__datetime(lastdate) - datetime.timedelta(delta))
669  dasQuery_end = "run date between[%s,%s]" % (firstdate, lastdate)
670  enddata = self.__getData(dasQuery_end)
671  if len(enddata) > 0:
672  enddata.sort(key = lambda run: self.__findInJson(run, ["run", "run_number"]))
673  try:
674  runIndex = self.__find_lt( runList, self.__findInJson(enddata[-1], ["run", "run_number"]))
675  except ValueError:
676  msg = ( "Your 'end' is before the creation time of the first "
677  "run in the dataset\n'%s'"%( self.__name ) )
678  raise AllInOneError( msg )
679  lastRun = runList[runIndex]
680  end = None
681  break
682 
683  if end:
684  raise AllInOneError("No runs within a reasonable time interval before your 'end'."
685  "Try using an 'end' that has runs soon before it (within 2 months at most)")
686 
687  if shortTuple:
688  return firstRun, lastRun
689  else:
690  return begin, end, firstRun, lastRun
def __findInJson
Definition: dataset.py:289
def __getRunList
Definition: dataset.py:599
def __dateString
Definition: dataset.py:621
def dataset.Dataset.createdatasetfile_hippy (   self,
  filename,
  filesperjob,
  firstrun,
  lastrun 
)

Definition at line 831 of file dataset.py.

References dataset.Dataset.__chunks(), dataset.Dataset.fileList(), and join().

832  def createdatasetfile_hippy(self, filename, filesperjob, firstrun, lastrun):
833  with open(filename, "w") as f:
834  for job in self.__chunks(self.fileList(firstRun=firstrun, lastRun=lastrun, forcerunselection=True), filesperjob):
835  f.write(",".join("'{}'".format(file) for file in job)+"\n")
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def createdatasetfile_hippy
Definition: dataset.py:831
def dataset.Dataset.datasetSnippet (   self,
  jsonPath = None,
  begin = None,
  end = None,
  firstRun = None,
  lastRun = None,
  crab = False,
  parent = False 
)

Definition at line 710 of file dataset.py.

References dataset.Dataset.__createSnippet(), dataset.Dataset.__filename, dataset.Dataset.__name, dataset.Dataset.__official, dataset.Dataset.__origName, dataset.Dataset.__predefined, and dataset.Dataset.dump_cff().

Referenced by dataset.Dataset.parentDataset().

711  firstRun = None, lastRun = None, crab = False, parent = False ):
712  if self.__predefined and (jsonPath or begin or end or firstRun or lastRun):
713  msg = ( "The parameters 'JSON', 'begin', 'end', 'firstRun', and 'lastRun' "
714  "only work for official datasets, not predefined _cff.py files" )
715  raise AllInOneError( msg )
716  if self.__predefined and parent:
717  with open(self.__filename) as f:
718  if "secFiles.extend" not in f.read():
719  msg = ("The predefined dataset '%s' does not contain secondary files, "
720  "which your validation requires!") % self.__name
721  if self.__official:
722  self.__name = self.__origName
723  self.__predefined = False
724  print msg
725  print ("Retreiving the files from DAS. You will be asked if you want "
726  "to overwrite the old dataset.\n"
727  "It will still be compatible with validations that don't need secondary files.")
728  else:
729  raise AllInOneError(msg)
730 
731  if self.__predefined:
732  snippet = ("process.load(\"Alignment.OfflineValidation.%s_cff\")\n"
733  "process.maxEvents = cms.untracked.PSet(\n"
734  " input = cms.untracked.int32(.oO[nEvents]Oo. / .oO[parallelJobs]Oo.)\n"
735  ")\n"
736  "process.source.skipEvents=cms.untracked.uint32(.oO[nIndex]Oo.*.oO[nEvents]Oo./.oO[parallelJobs]Oo.)"
737  %(self.__name))
738  if not parent:
739  with open(self.__filename) as f:
740  if "secFiles.extend" in f.read():
741  snippet += "\nprocess.source.secondaryFileNames = cms.untracked.vstring()"
742  return snippet
743  theMap = { "process": "process.",
744  "tab": " " * len( "process." ),
745  "nEvents": ".oO[nEvents]Oo. / .oO[parallelJobs]Oo.",
746  "skipEventsString": "process.source.skipEvents=cms.untracked.uint32(.oO[nIndex]Oo.*.oO[nEvents]Oo./.oO[parallelJobs]Oo.)\n",
747  "importCms": "",
748  "header": ""
749  }
750  datasetSnippet = self.__createSnippet( jsonPath = jsonPath,
751  begin = begin,
752  end = end,
753  firstRun = firstRun,
754  lastRun = lastRun,
755  repMap = theMap,
756  crab = crab,
757  parent = parent )
758  if jsonPath == "" and begin == "" and end == "" and firstRun == "" and lastRun == "":
759  try:
760  self.dump_cff(parent = parent)
761  except AllInOneError as e:
762  print "Can't store the dataset as a cff:"
763  print e
764  print "This may be inconvenient in the future, but will not cause a problem for this validation."
765  return datasetSnippet
def __createSnippet
Definition: dataset.py:230
def dataset.Dataset.dataType (   self)

Definition at line 691 of file dataset.py.

References dataset.Dataset.__dataType, and dataset.Dataset.__getDataType().

692  def dataType( self ):
693  if not self.__dataType:
694  self.__dataType = self.__getDataType()
695  return self.__dataType
def __getDataType
Definition: dataset.py:373
def dataset.Dataset.dump_cff (   self,
  outName = None,
  jsonPath = None,
  begin = None,
  end = None,
  firstRun = None,
  lastRun = None,
  parent = False 
)

Definition at line 767 of file dataset.py.

References dataset.Dataset.__alreadyStored, dataset.Dataset.__cmssw, dataset.Dataset.__createSnippet(), dataset.Dataset.__dataType, dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.__magneticField, dataset.Dataset.__name, python.rootplot.root2matplotlib.replace(), and split.

Referenced by dataset.Dataset.datasetSnippet().

768  end = None, firstRun = None, lastRun = None, parent = False ):
769  if self.__alreadyStored:
770  return
771  self.__alreadyStored = True
772  if outName == None:
773  outName = "Dataset" + self.__name.replace("/", "_")
774  packageName = os.path.join( "Alignment", "OfflineValidation" )
775  if not os.path.exists( os.path.join(
776  self.__cmssw, "src", packageName ) ):
777  msg = ("You try to store the predefined dataset'%s'.\n"
778  "For that you need to check out the package '%s' to your "
779  "private relase area in\n"%( outName, packageName )
780  + self.__cmssw )
781  raise AllInOneError( msg )
782  theMap = { "process": "",
783  "tab": "",
784  "nEvents": str( -1 ),
785  "skipEventsString": "",
786  "importCms": "import FWCore.ParameterSet.Config as cms\n",
787  "header": "#Do not delete or (unless you know what you're doing) change these comments\n"
788  "#%(name)s\n"
789  "#data type: %(dataType)s\n"
790  "#magnetic field: .oO[magneticField]Oo.\n" #put in magnetic field later
791  %{"name": self.__name, #need to create the snippet before getting the magnetic field
792  "dataType": self.__dataType} #so that we know the first and last runs
793  }
794  dataset_cff = self.__createSnippet( jsonPath = jsonPath,
795  begin = begin,
796  end = end,
797  firstRun = firstRun,
798  lastRun = lastRun,
799  repMap = theMap,
800  parent = parent)
801  magneticField = self.__magneticField
802  if magneticField == "MagneticField":
803  magneticField = "%s, %s #%s" % (magneticField,
804  str(self.__getMagneticFieldForRun()).replace("\n"," ").split("#")[0].strip(),
805  "Use MagneticField_cff.py; the number is for determining which track selection to use."
806  )
807  dataset_cff = dataset_cff.replace(".oO[magneticField]Oo.",magneticField)
808  filePath = os.path.join( self.__cmssw, "src", packageName,
809  "python", outName + "_cff.py" )
810  if os.path.exists( filePath ):
811  existMsg = "The predefined dataset '%s' already exists.\n"%( outName )
812  askString = "Do you want to overwrite it? [y/n]\n"
813  inputQuery = existMsg + askString
814  while True:
815  userInput = raw_input( inputQuery ).lower()
816  if userInput == "y":
817  break
818  elif userInput == "n":
819  return
820  else:
821  inputQuery = askString
822  print ( "The predefined dataset '%s' will be stored in the file\n"
823  %( outName )
824  + filePath +
825  "\nFor future use you have to do 'scram b'." )
826  print
827  theFile = open( filePath, "w" )
828  theFile.write( dataset_cff )
829  theFile.close()
830  return
def __getMagneticFieldForRun
Definition: dataset.py:479
def __createSnippet
Definition: dataset.py:230
double split
Definition: MVATrainer.cc:139
def dataset.Dataset.extractFileSizes (   self)
Get the file size for each file, from the eos ls -l command.

Definition at line 306 of file dataset.py.

References dataset.EOSDataset.castorDir, and dataset.Dataset.castorDir.

307  def extractFileSizes(self):
308  '''Get the file size for each file, from the eos ls -l command.'''
309  # EOS command does not work in tier3
310  lsout = castortools.runXRDCommand(self.castorDir,'dirlist')[0]
311  lsout = lsout.split('\n')
312  self.filesAndSizes = {}
313  for entry in lsout:
314  values = entry.split()
315  if( len(values) != 5):
316  continue
317  # using full abs path as a key.
318  file = '/'.join([self.lfnDir, values[4].split("/")[-1]])
319  size = values[1]
320  self.filesAndSizes[file] = size
def extractFileSizes
Definition: dataset.py:306
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
if(dp >Float(M_PI)) dp-
double split
Definition: MVATrainer.cc:139
def dataset.Dataset.fileInfoList (   self,
  parent = False 
)

Definition at line 901 of file dataset.py.

References dataset.Dataset.__dasLimit, and dataset.Dataset.__getFileInfoList().

Referenced by dataset.Dataset.fileList().

902  def fileInfoList( self, parent = False ):
903  return self.__getFileInfoList( self.__dasLimit, parent )
def __getFileInfoList
Definition: dataset.py:533
def fileInfoList
Definition: dataset.py:901
def dataset.Dataset.fileList (   self,
  parent = False,
  firstRun = None,
  lastRun = None,
  forcerunselection = False 
)

Definition at line 863 of file dataset.py.

References dataset.Dataset.__fileList, dataset.Dataset.__findInJson(), dataset.Dataset.__parentFileList, dataset.Dataset.fileInfoList(), and dataset.Dataset.getrunnumberfromfilename().

Referenced by dataset.Dataset.__fileListSnippet(), and dataset.Dataset.createdatasetfile_hippy().

864  def fileList(self, parent=False, firstRun=None, lastRun=None, forcerunselection=False):
865  if self.__fileList and not parent:
866  return self.__fileList
867  if self.__parentFileList and parent:
868  return self.__parentFileList
869 
870  fileList = [ self.__findInJson(fileInfo,"name")
871  for fileInfo in self.fileInfoList(parent) ]
872 
873  if firstRun is not None or lastRun is not None:
874  if firstRun is None: firstRun = -1
875  if lastRun is None: lastRun = float('infinity')
876  unknownfilenames, reasons = [], set()
877  for filename in fileList[:]:
878  try:
879  if not firstRun < self.getrunnumberfromfilename(filename) < lastRun:
880  fileList.remove(filename)
881  except AllInOneError as e:
882  if forcerunselection: raise
883  unknownfilenames.append(e.message.split("\n")[1])
884  reasons .add (e.message.split("\n")[2])
885  if reasons:
886  if len(unknownfilenames) == len(fileList):
887  print "Could not figure out the run numbers of any of the filenames for the following reason(s):"
888  else:
889  print "Could not figure out the run numbers of the following filenames:"
890  for filename in unknownfilenames:
891  print " "+filename
892  print "for the following reason(s):"
893  for reason in reasons:
894  print " "+reason
895  print "Using the files anyway. The runs will be filtered at the CMSSW level."
896  if not parent:
897  self.__fileList = fileList
898  else:
899  self.__parentFileList = fileList
900  return fileList
def __findInJson
Definition: dataset.py:289
def fileInfoList
Definition: dataset.py:901
def getrunnumberfromfilename
Definition: dataset.py:837
def dataset.Dataset.forcerunrange (   self,
  firstRun,
  lastRun,
  s 
)
s must be in the format run1:lum1-run2:lum2

Definition at line 311 of file dataset.py.

References dataset.Dataset.__firstusedrun, dataset.Dataset.__lastusedrun, and split.

Referenced by dataset.Dataset.getForceRunRangeFunction().

312  def forcerunrange(self, firstRun, lastRun, s):
313  """s must be in the format run1:lum1-run2:lum2"""
314  s = s.group()
315  run1 = s.split("-")[0].split(":")[0]
316  lum1 = s.split("-")[0].split(":")[1]
317  try:
318  run2 = s.split("-")[1].split(":")[0]
319  lum2 = s.split("-")[1].split(":")[1]
320  except IndexError:
321  run2 = run1
322  lum2 = lum1
323  if int(run2) < firstRun or int(run1) > lastRun:
324  return ""
325  if int(run1) < firstRun or firstRun < 0:
326  run1 = firstRun
327  lum1 = 1
328  if int(run2) > lastRun:
329  run2 = lastRun
330  lum2 = "max"
331  if int(run1) < self.__firstusedrun or self.__firstusedrun < 0:
332  self.__firstusedrun = int(run1)
333  if int(run2) > self.__lastusedrun:
334  self.__lastusedrun = int(run2)
335  return "%s:%s-%s:%s" % (run1, lum1, run2, lum2)
def forcerunrange
Definition: dataset.py:311
double split
Definition: MVATrainer.cc:139
def dataset.Dataset.getForceRunRangeFunction (   self,
  firstRun,
  lastRun 
)

Definition at line 336 of file dataset.py.

References dataset.Dataset.forcerunrange().

Referenced by dataset.Dataset.__lumiSelectionSnippet().

337  def getForceRunRangeFunction(self, firstRun, lastRun):
338  def forcerunrangefunction(s):
339  return self.forcerunrange(firstRun, lastRun, s)
340  return forcerunrangefunction
def forcerunrange
Definition: dataset.py:311
def getForceRunRangeFunction
Definition: dataset.py:336
def dataset.Dataset.getPrimaryDatasetEntries (   self)

Definition at line 326 of file dataset.py.

References runall.testit.report, dataset.BaseDataset.report, ALIUtils.report, and WorkFlowRunner.WorkFlowRunner.report.

327  def getPrimaryDatasetEntries(self):
328  if self.report is not None and self.report:
329  return int(self.report.get('PrimaryDatasetEntries',-1))
330  return -1
331 
def getPrimaryDatasetEntries
Definition: dataset.py:326
def dataset.Dataset.getrunnumberfromfilename (   filename)
static

Definition at line 837 of file dataset.py.

References Vispa.Plugins.EdmBrowser.EdmDataAccessor.all(), and join().

Referenced by dataset.Dataset.fileList().

838  def getrunnumberfromfilename(filename):
839  parts = filename.split("/")
840  result = error = None
841  if parts[0] != "" or parts[1] != "store":
842  error = "does not start with /store"
843  elif parts[2] in ["mc", "relval"]:
844  result = 1
845  elif parts[-2] != "00000" or not parts[-1].endswith(".root"):
846  error = "does not end with 00000/something.root"
847  elif len(parts) != 12:
848  error = "should be exactly 11 slashes counting the first one"
849  else:
850  runnumberparts = parts[-5:-2]
851  if not all(len(part)==3 for part in runnumberparts):
852  error = "the 3 directories {} do not have length 3 each".format("/".join(runnumberparts))
853  try:
854  result = int("".join(runnumberparts))
855  except ValueError:
856  error = "the 3 directories {} do not form an integer".format("/".join(runnumberparts))
857 
858  if error:
859  error = "could not figure out which run number this file is from:\n{}\n{}".format(filename, error)
860  raise AllInOneError(error)
861 
862  return result
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def getrunnumberfromfilename
Definition: dataset.py:837
def dataset.Dataset.magneticField (   self)

Definition at line 696 of file dataset.py.

References dataset.Dataset.__getMagneticField(), and dataset.Dataset.__magneticField.

697  def magneticField( self ):
698  if not self.__magneticField:
699  self.__magneticField = self.__getMagneticField()
700  return self.__magneticField
def magneticField
Definition: dataset.py:696
def __getMagneticField
Definition: dataset.py:408
def dataset.Dataset.magneticFieldForRun (   self,
  run = -1 
)

Definition at line 701 of file dataset.py.

References dataset.Dataset.__getMagneticFieldForRun().

702  def magneticFieldForRun( self, run = -1 ):
703  return self.__getMagneticFieldForRun(run)
def __getMagneticFieldForRun
Definition: dataset.py:479
def magneticFieldForRun
Definition: dataset.py:701
def dataset.Dataset.name (   self)

Definition at line 904 of file dataset.py.

References dataset.Dataset.__name.

Referenced by cuy.divideElement.__init__(), cuy.plotElement.__init__(), cuy.additionElement.__init__(), cuy.superimposeElement.__init__(), cuy.graphElement.__init__(), config.CFG.__str__(), validation.Sample.digest(), VIDSelectorBase.VIDSelectorBase.initialize(), and Vispa.Views.PropertyView.Property.valueChanged().

905  def name( self ):
906  return self.__name
def dataset.Dataset.parentDataset (   self)

Definition at line 704 of file dataset.py.

References dataset.Dataset.__getParentDataset(), dataset.Dataset.__parentDataset, and dataset.Dataset.datasetSnippet().

Referenced by dataset.Dataset.__getFileInfoList().

705  def parentDataset( self ):
706  if not self.__parentDataset:
707  self.__parentDataset = self.__getParentDataset()
708  return self.__parentDataset
def parentDataset
Definition: dataset.py:704
def __getParentDataset
Definition: dataset.py:398
def dataset.Dataset.predefined (   self)

Definition at line 907 of file dataset.py.

References dataset.Dataset.__predefined.

908  def predefined( self ):
909  return self.__predefined
def dataset.Dataset.printInfo (   self)

Definition at line 321 of file dataset.py.

References dataset.EOSDataset.castorDir, dataset.Dataset.castorDir, dataset.Dataset.lfnDir, ElectronMVAID.ElectronMVAID.name, counter.Counter.name, average.Average.name, geometrydata.GeometryData.name, histograms.Histograms.name, AlignableObjectId::entry.name, TmModule.name, cond::persistency::TAG::NAME.name, cond::persistency::GLOBAL_TAG::NAME.name, core.autovars.NTupleVariable.name, cond::persistency::TAG::TIME_TYPE.name, cond::persistency::GLOBAL_TAG::VALIDITY.name, cond::persistency::TAG::OBJECT_TYPE.name, genericValidation.GenericValidation.name, cond::persistency::GLOBAL_TAG::DESCRIPTION.name, cond::persistency::TAG::SYNCHRONIZATION.name, preexistingValidation.PreexistingValidation.name, cond::persistency::GLOBAL_TAG::RELEASE.name, MEPSet.name, cond::persistency::TAG::END_OF_VALIDITY.name, cond::persistency::GLOBAL_TAG::SNAPSHOT_TIME.name, cond::persistency::TAG::DESCRIPTION.name, cond::persistency::GTEditorData.name, cond::persistency::GLOBAL_TAG::INSERTION_TIME.name, cond::persistency::TAG::LAST_VALIDATED_TIME.name, FWTGeoRecoGeometry::Info.name, Types._Untracked.name, cond::persistency::TAG::INSERTION_TIME.name, cond::persistency::TAG::MODIFICATION_TIME.name, dataset.BaseDataset.name, OutputMEPSet.name, personalPlayback.Applet.name, ParameterSet.name, PixelDCSObject< class >::Item.name, analyzer.Analyzer.name, DQMRivetClient::LumiOption.name, MagCylinder.name, alignment.Alignment.name, ParSet.name, DQMRivetClient::ScaleFactorOption.name, SingleObjectCondition.name, EgHLTOfflineSummaryClient::SumHistBinData.name, XMLHTRZeroSuppressionLoader::_loaderBaseConfig.name, XMLRBXPedestalsLoader::_loaderBaseConfig.name, DQMGenericClient::EfficOption.name, cond::persistency::GTProxyData.name, core.autovars.NTupleObjectType.name, MyWatcher.name, o2o.O2OJob.name, Mapper::definition< ScannerT >.name, edm::PathTimingSummary.name, cond::TimeTypeSpecs.name, lumi::TriggerInfo.name, edm::PathSummary.name, PixelEndcapLinkMaker::Item.name, perftools::EdmEventSize::BranchRecord.name, cond::persistency::GLOBAL_TAG_MAP::GLOBAL_TAG_NAME.name, FWTableViewManager::TableEntry.name, cond::persistency::GLOBAL_TAG_MAP::RECORD.name, PixelBarrelLinkMaker::Item.name, EcalLogicID.name, cond::persistency::GLOBAL_TAG_MAP::LABEL.name, cond::persistency::GLOBAL_TAG_MAP::TAG_NAME.name, ExpressionHisto< T >.name, XMLProcessor::_loaderBaseConfig.name, DQMGenericClient::ProfileOption.name, TreeCrawler.Package.name, cond::persistency::PAYLOAD::HASH.name, cond::persistency::PAYLOAD::OBJECT_TYPE.name, cond::persistency::PAYLOAD::DATA.name, cond::persistency::PAYLOAD::STREAMER_INFO.name, options.ConnectionHLTMenu.name, cond::persistency::PAYLOAD::VERSION.name, MagGeoBuilderFromDDD::volumeHandle.name, cond::persistency::PAYLOAD::INSERTION_TIME.name, DQMGenericClient::NormOption.name, Node.name, DQMGenericClient::CDOption.name, FastHFShowerLibrary.name, h4DSegm.name, PhysicsTools::Calibration::Variable.name, cond::TagInfo_t.name, CounterChecker.name, EDMtoMEConverter.name, looper.Looper.name, MEtoEDM< T >::MEtoEDMObject.name, cond::persistency::IOV::TAG_NAME.name, TrackerSectorStruct.name, cond::persistency::IOV::SINCE.name, cond::persistency::IOV::PAYLOAD_HASH.name, cond::persistency::IOV::INSERTION_TIME.name, HistogramManager.name, classes.MonitorData.name, MuonGeometrySanityCheckPoint.name, classes.OutputData.name, options.HLTProcessOptions.name, h2DSegm.name, config.Analyzer.name, core.autovars.NTupleSubObject.name, DQMNet::WaitObject.name, AlpgenParameterName.name, SiStripMonitorDigi.name, core.autovars.NTupleObject.name, geometry.Structure.name, cond::persistency::TAG_LOG::TAG_NAME.name, cond::persistency::TAG_LOG::EVENT_TIME.name, cond::persistency::TAG_LOG::USER_NAME.name, cond::persistency::TAG_LOG::HOST_NAME.name, cond::persistency::TAG_LOG::COMMAND.name, cond::persistency::TAG_LOG::ACTION.name, cond::persistency::TAG_LOG::USER_TEXT.name, config.Service.name, core.autovars.NTupleCollection.name, FastTimerService::LuminosityDescription.name, BPHRecoBuilder::BPHRecoSource.name, BPHRecoBuilder::BPHCompSource.name, personalPlayback.FrameworkJob.name, plotscripts.SawTeethFunction.name, FastTimerService::ProcessDescription.name, hTMaxCell.name, cscdqm::ParHistoDef.name, BeautifulSoup.Tag.name, SummaryOutputProducer::GenericSummary.name, and BeautifulSoup.SoupStrainer.name.

322  def printInfo(self):
323  print 'sample : ' + self.name
324  print 'LFN : ' + self.lfnDir
325  print 'Castor path : ' + self.castorDir
def dataset.Dataset.runList (   self)

Definition at line 910 of file dataset.py.

References dataset.Dataset.__getRunList(), and dataset.Dataset.__runList.

911  def runList( self ):
912  if self.__runList:
913  return self.__runList
914  return self.__getRunList()
915 
def __getRunList
Definition: dataset.py:599

Member Data Documentation

dataset.Dataset.__alreadyStored
private

Definition at line 23 of file dataset.py.

Referenced by dataset.Dataset.dump_cff().

dataset.Dataset.__cmssw
private

Definition at line 24 of file dataset.py.

Referenced by dataset.Dataset.__getMagneticField(), and dataset.Dataset.dump_cff().

dataset.Dataset.__cmsswrelease
private

Definition at line 25 of file dataset.py.

Referenced by dataset.Dataset.__getMagneticField().

dataset.Dataset.__dasLimit
private

Definition at line 19 of file dataset.py.

Referenced by dataset.Dataset.fileInfoList().

dataset.Dataset.__dataType
private

Definition at line 76 of file dataset.py.

Referenced by dataset.Dataset.__getMagneticField(), dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.dataType(), and dataset.Dataset.dump_cff().

tuple dataset.Dataset.__dummy_source_template
staticprivate
Initial value:
1 = ("readFiles = cms.untracked.vstring()\n"
2  "secFiles = cms.untracked.vstring()\n"
3  "%(process)ssource = cms.Source(\"PoolSource\",\n"
4  "%(tab)s secondaryFileNames ="
5  "secFiles,\n"
6  "%(tab)s fileNames = readFiles\n"
7  ")\n"
8  "readFiles.extend(['dummy_File.root'])\n"
9  "%(process)smaxEvents = cms.untracked.PSet( "
10  "input = cms.untracked.int32(%(nEvents)s) )\n"
11  "%(skipEventsString)s\n")

Definition at line 103 of file dataset.py.

Referenced by dataset.Dataset.__createSnippet().

dataset.Dataset.__fileInfoList
private

Definition at line 21 of file dataset.py.

Referenced by dataset.Dataset.__getFileInfoList().

dataset.Dataset.__fileList
private

Definition at line 20 of file dataset.py.

Referenced by dataset.Dataset.fileList().

dataset.Dataset.__filename
private

Definition at line 53 of file dataset.py.

Referenced by dataset.Dataset.__getDataType(), dataset.Dataset.__getMagneticField(), dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.datasetSnippet(), csvReporter.csvReporter.writeRow(), and csvReporter.csvReporter.writeRows().

dataset.Dataset.__firstusedrun
private

Definition at line 26 of file dataset.py.

Referenced by dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.__lumiSelectionSnippet(), and dataset.Dataset.forcerunrange().

dataset.Dataset.__lastusedrun
private

Definition at line 27 of file dataset.py.

Referenced by dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.__lumiSelectionSnippet(), and dataset.Dataset.forcerunrange().

dataset.Dataset.__magneticField
private

Definition at line 77 of file dataset.py.

Referenced by dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.dump_cff(), and dataset.Dataset.magneticField().

dataset.Dataset.__name
private

Definition at line 17 of file dataset.py.

Referenced by dataset.Dataset.__getDataType(), dataset.Dataset.__getFileInfoList(), dataset.Dataset.__getMagneticField(), dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.__getParentDataset(), dataset.Dataset.__getRunList(), dataset.Dataset.convertTimeToRun(), dataset.Dataset.datasetSnippet(), dataset.Dataset.dump_cff(), Config.Process.dumpConfig(), Config.Process.dumpPython(), dataset.Dataset.name(), and Config.Process.name_().

dataset.Dataset.__official
private

Definition at line 34 of file dataset.py.

Referenced by dataset.Dataset.datasetSnippet().

dataset.Dataset.__origName
private

Definition at line 18 of file dataset.py.

Referenced by dataset.Dataset.datasetSnippet().

dataset.Dataset.__parentDataset
private

Definition at line 28 of file dataset.py.

Referenced by dataset.Dataset.parentDataset().

dataset.Dataset.__parentFileInfoList
private

Definition at line 30 of file dataset.py.

Referenced by dataset.Dataset.__getFileInfoList().

dataset.Dataset.__parentFileList
private

Definition at line 29 of file dataset.py.

Referenced by dataset.Dataset.fileList().

dataset.Dataset.__predefined
private

Definition at line 50 of file dataset.py.

Referenced by dataset.Dataset.__getDataType(), dataset.Dataset.__getFileInfoList(), dataset.Dataset.__getMagneticField(), dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.datasetSnippet(), and dataset.Dataset.predefined().

dataset.Dataset.__runList
private

Definition at line 22 of file dataset.py.

Referenced by dataset.Dataset.__getRunList(), and dataset.Dataset.runList().

dataset.Dataset.bad_files

Definition at line 282 of file dataset.py.

dataset.Dataset.castorDir

Definition at line 266 of file dataset.py.

Referenced by dataset.Dataset.extractFileSizes(), and dataset.Dataset.printInfo().

tuple dataset.Dataset.dasData = das_client.get_data(dasQuery, dasLimit)
static

Definition at line 342 of file dataset.py.

tuple dataset.Dataset.error = self.__findInJson(jsondict,["data","error"])
static

Definition at line 349 of file dataset.py.

Referenced by argparse.ArgumentParser._get_option_tuples(), python.rootplot.argparse.ArgumentParser._get_option_tuples(), argparse.ArgumentParser._parse_known_args(), python.rootplot.argparse.ArgumentParser._parse_known_args(), argparse.ArgumentParser._parse_optional(), python.rootplot.argparse.ArgumentParser._parse_optional(), argparse.ArgumentParser._read_args_from_files(), python.rootplot.argparse.ArgumentParser._read_args_from_files(), argparse.ArgumentParser.add_subparsers(), python.rootplot.argparse.ArgumentParser.add_subparsers(), argparse.ArgumentParser.parse_args(), python.rootplot.argparse.ArgumentParser.parse_args(), argparse.ArgumentParser.parse_known_args(), and python.rootplot.argparse.ArgumentParser.parse_known_args().

dataset.Dataset.error = None
static

Definition at line 351 of file dataset.py.

Referenced by argparse.ArgumentParser._get_option_tuples(), python.rootplot.argparse.ArgumentParser._get_option_tuples(), argparse.ArgumentParser._parse_known_args(), python.rootplot.argparse.ArgumentParser._parse_known_args(), argparse.ArgumentParser._parse_optional(), python.rootplot.argparse.ArgumentParser._parse_optional(), argparse.ArgumentParser._read_args_from_files(), python.rootplot.argparse.ArgumentParser._read_args_from_files(), argparse.ArgumentParser.add_subparsers(), python.rootplot.argparse.ArgumentParser.add_subparsers(), argparse.ArgumentParser.parse_args(), python.rootplot.argparse.ArgumentParser.parse_args(), argparse.ArgumentParser.parse_known_args(), and python.rootplot.argparse.ArgumentParser.parse_known_args().

dataset.Dataset.files

Definition at line 273 of file dataset.py.

dataset.Dataset.filesAndSizes

Definition at line 311 of file dataset.py.

dataset.Dataset.good_files

Definition at line 283 of file dataset.py.

int dataset.Dataset.i = 0
static

Definition at line 359 of file dataset.py.

tuple dataset.Dataset.jsondict = json.loads( dasData )
static

Definition at line 344 of file dataset.py.

dataset.Dataset.jsondict = dasData
static

Definition at line 346 of file dataset.py.

string dataset.Dataset.jsonfile = "das_query_output_%i.txt"
static

Definition at line 358 of file dataset.py.

dataset.Dataset.jsonfile = jsonfile%i
static

Definition at line 362 of file dataset.py.

tuple dataset.Dataset.jsonstr = self.__findInJson(jsondict,"reason")
static

Definition at line 354 of file dataset.py.

dataset.Dataset.lfnDir

Definition at line 265 of file dataset.py.

Referenced by dataset.Dataset.printInfo().

dataset.Dataset.maskExists

Definition at line 267 of file dataset.py.

string dataset.Dataset.msg = "The DAS query returned an error. The output is very long, and has been stored in:\n"
static

Definition at line 366 of file dataset.py.

Referenced by MatrixReader.MatrixException.__str__(), cmsHarvester.Usage.__str__(), and cmsHarvester.Error.__str__().

dataset.Dataset.report

Definition at line 268 of file dataset.py.

Referenced by addOnTests.testit.run().

tuple dataset.Dataset.theFile = open( jsonfile, "w" )
static

Definition at line 363 of file dataset.py.