CMS 3D CMS Logo

List of all members | Public Member Functions | Static Public Member Functions | Public Attributes | Static Public Attributes | Private Member Functions | Private Attributes | Static Private Attributes
dataset.Dataset Class Reference
Inheritance diagram for dataset.Dataset:
dataset.BaseDataset

Public Member Functions

def __init__ (self, datasetName, dasLimit=0, tryPredefinedFirst=True, cmssw=os.environ["CMSSW_BASE"], cmsswrelease=os.environ["CMSSW_RELEASE_BASE"])
 
def __init__ (self, name, user, pattern='.*root')
 
def buildListOfBadFiles (self)
 
def buildListOfFiles (self, pattern='.*root')
 
def convertTimeToRun (self, begin=None, end=None, firstRun=None, lastRun=None, shortTuple=True)
 
def createdatasetfile_hippy (self, filename, filesperjob, firstrun, lastrun)
 
def datasetSnippet (self, jsonPath=None, begin=None, end=None, firstRun=None, lastRun=None, crab=False, parent=False)
 
def dataType (self)
 
def dump_cff (self, outName=None, jsonPath=None, begin=None, end=None, firstRun=None, lastRun=None, parent=False)
 
def extractFileSizes (self)
 
def fileInfoList (self, parent=False)
 
def fileList (self, parent=False, firstRun=None, lastRun=None, forcerunselection=False)
 
def forcerunrange (self, firstRun, lastRun, s)
 
def getForceRunRangeFunction (self, firstRun, lastRun)
 
def getPrimaryDatasetEntries (self)
 
def magneticField (self)
 
def magneticFieldForRun (self, run=-1)
 
def name (self)
 
def parentDataset (self)
 
def predefined (self)
 
def printInfo (self)
 
def runList (self)
 
- Public Member Functions inherited from dataset.BaseDataset
def __init__ (self, name, user, pattern='.*root', run_range=None, dbsInstance=None)
 def init(self, name, user, pattern='. More...
 
def buildListOfBadFiles (self)
 
def buildListOfFiles (self, pattern)
 
def extractFileSizes (self)
 
def getPrimaryDatasetEntries (self)
 
def listOfFiles (self)
 
def listOfGoodFiles (self)
 
def listOfGoodFilesWithPrescale (self, prescale)
 
def printFiles (self, abspath=True, info=True)
 
def printInfo (self)
 

Static Public Member Functions

def getrunnumberfromfilename (filename)
 

Public Attributes

 bad_files
 
 castorDir
 
 files
 
 filesAndSizes
 
 good_files
 
 lfnDir
 
 maskExists
 
 report
 
- Public Attributes inherited from dataset.BaseDataset
 bad_files
 
 dbsInstance
 MM. More...
 
 files
 
 filesAndSizes
 
 good_files
 
 name
 
 pattern
 
 primaryDatasetEntries
 MM. More...
 
 report
 
 run_range
 
 user
 

Static Public Attributes

 dasData = das_client.get_data(dasQuery, dasLimit)
 
 error = self.__findInJson(jsondict,["data","error"])
 
int i = 0
 
 jsondict = json.loads( dasData )
 
string jsonfile = "das_query_output_%i.txt"
 
 jsonfile = jsonfile%i
 
 jsonstr = self.__findInJson(jsondict,"reason")
 
string msg = "The DAS query returned an error. The output is very long, and has been stored in:\n"
 
 theFile = open( jsonfile, "w" )
 

Private Member Functions

def __chunks (self, theList, n)
 
def __createSnippet (self, jsonPath=None, begin=None, end=None, firstRun=None, lastRun=None, repMap=None, crab=False, parent=False)
 
def __dateString (self, date)
 
def __datetime (self, stringForDas)
 
def __fileListSnippet (self, crab=False, parent=False, firstRun=None, lastRun=None, forcerunselection=False)
 
def __find_ge (self, a, x)
 
def __find_lt (self, a, x)
 
def __findInJson (self, jsondict, strings)
 
def __getData (self, dasQuery, dasLimit=0)
 
def __getDataType (self)
 
def __getFileInfoList (self, dasLimit, parent=False)
 
def __getMagneticField (self)
 
def __getMagneticFieldForRun (self, run=-1, tolerance=0.5)
 
def __getParentDataset (self)
 
def __getRunList (self)
 
def __lumiSelectionSnippet (self, jsonPath=None, firstRun=None, lastRun=None)
 

Private Attributes

 __alreadyStored
 
 __cmssw
 
 __cmsswrelease
 
 __dasLimit
 
 __dataType
 
 __fileInfoList
 
 __fileList
 
 __filename
 
 __firstusedrun
 
 __lastusedrun
 
 __magneticField
 
 __name
 
 __official
 
 __origName
 
 __parentDataset
 
 __parentFileInfoList
 
 __parentFileList
 
 __predefined
 
 __runList
 

Static Private Attributes

tuple __dummy_source_template
 
 __source_template
 

Detailed Description

Definition at line 14 of file dataset.py.

Constructor & Destructor Documentation

def dataset.Dataset.__init__ (   self,
  datasetName,
  dasLimit = 0,
  tryPredefinedFirst = True,
  cmssw = os.environ["CMSSW_BASE"],
  cmsswrelease = os.environ["CMSSW_RELEASE_BASE"] 
)

Definition at line 16 of file dataset.py.

Referenced by dataset.Dataset.__init__().

16  cmssw = os.environ["CMSSW_BASE"], cmsswrelease = os.environ["CMSSW_RELEASE_BASE"]):
17  self.__name = datasetName
18  self.__origName = datasetName
19  self.__dasLimit = dasLimit
20  self.__fileList = None
21  self.__fileInfoList = None
22  self.__runList = None
23  self.__alreadyStored = False
24  self.__cmssw = cmssw
25  self.__cmsswrelease = cmsswrelease
26  self.__firstusedrun = None
27  self.__lastusedrun = None
28  self.__parentDataset = None
29  self.__parentFileList = None
31 
32  # check, if dataset name matches CMS dataset naming scheme
33  if re.match( r'/.+/.+/.+', self.__name ):
34  self.__official = True
35  fileName = "Dataset" + self.__name.replace("/","_") + "_cff.py"
36  else:
37  self.__official = False
38  fileName = self.__name + "_cff.py"
39 
40  searchPath1 = os.path.join( self.__cmssw, "python",
41  "Alignment", "OfflineValidation",
42  fileName )
43  searchPath2 = os.path.join( self.__cmssw, "src",
44  "Alignment", "OfflineValidation",
45  "python", fileName )
46  searchPath3 = os.path.join( self.__cmsswrelease,
47  "python", "Alignment",
48  "OfflineValidation", fileName )
49  if self.__official and not tryPredefinedFirst:
50  self.__predefined = False
51  elif os.path.exists( searchPath1 ):
52  self.__predefined = True
53  self.__filename = searchPath1
54  elif os.path.exists( searchPath2 ):
55  msg = ("The predefined dataset '%s' does exist in '%s', but "
56  "you need to run 'scram b' first."
57  %( self.__name, searchPath2 ))
58  if self.__official:
59  print msg
60  print "Getting the data from DAS again. To go faster next time, run scram b."
61  else:
62  raise AllInOneError( msg )
63  elif os.path.exists( searchPath3 ):
64  self.__predefined = True
65  self.__filename = searchPath3
66  elif self.__official:
67  self.__predefined = False
68  else:
69  msg = ("The predefined dataset '%s' does not exist. Please "
70  "create it first or check for typos."%( self.__name ))
71  raise AllInOneError( msg )
72 
73  if self.__predefined and self.__official:
74  self.__name = "Dataset" + self.__name.replace("/","_")
75 
76  self.__dataType = self.__getDataType()
78 
def __getDataType(self)
Definition: dataset.py:373
def __getMagneticField(self)
Definition: dataset.py:408
def dataset.Dataset.__init__ (   self,
  name,
  user,
  pattern = '.*root' 
)

Definition at line 264 of file dataset.py.

References dataset.Dataset.__init__().

264  def __init__(self, name, user, pattern='.*root'):
265  self.lfnDir = castorBaseDir(user) + name
266  self.castorDir = castortools.lfnToCastor( self.lfnDir )
267  self.maskExists = False
268  self.report = None
269  super(Dataset, self).__init__(name, user, pattern)
270 
def __init__(self, datasetName, dasLimit=0, tryPredefinedFirst=True, cmssw=os.environ["CMSSW_BASE"], cmsswrelease=os.environ["CMSSW_RELEASE_BASE"])
Definition: dataset.py:16

Member Function Documentation

def dataset.Dataset.__chunks (   self,
  theList,
  n 
)
private
Yield successive n-sized chunks from theList.

Definition at line 79 of file dataset.py.

Referenced by dataset.Dataset.__fileListSnippet(), dataset.Dataset.__lumiSelectionSnippet(), and dataset.Dataset.createdatasetfile_hippy().

79  def __chunks( self, theList, n ):
80  """ Yield successive n-sized chunks from theList.
81  """
82  for i in xrange( 0, len( theList ), n ):
83  yield theList[i:i+n]
84 
def __chunks(self, theList, n)
Definition: dataset.py:79
def dataset.Dataset.__createSnippet (   self,
  jsonPath = None,
  begin = None,
  end = None,
  firstRun = None,
  lastRun = None,
  repMap = None,
  crab = False,
  parent = False 
)
private

Definition at line 230 of file dataset.py.

References dataset.Dataset.__dummy_source_template, dataset.Dataset.__fileListSnippet(), dataset.Dataset.__lumiSelectionSnippet(), dataset.Dataset.__source_template, dataset.Dataset.convertTimeToRun(), and dataset.int.

Referenced by dataset.Dataset.__fileListSnippet(), dataset.Dataset.datasetSnippet(), and dataset.Dataset.dump_cff().

230  crab = False, parent = False ):
231 
232  if firstRun:
233  firstRun = int( firstRun )
234  if lastRun:
235  lastRun = int( lastRun )
236  if ( begin and firstRun ) or ( end and lastRun ):
237  msg = ( "The Usage of "
238  + "'begin' & 'firstRun' " * int( bool( begin and
239  firstRun ) )
240  + "and " * int( bool( ( begin and firstRun ) and
241  ( end and lastRun ) ) )
242  + "'end' & 'lastRun' " * int( bool( end and lastRun ) )
243  + "is ambigous." )
244  raise AllInOneError( msg )
245  if begin or end:
246  ( firstRun, lastRun ) = self.convertTimeToRun(
247  begin = begin, end = end, firstRun = firstRun,
248  lastRun = lastRun )
249  if ( firstRun and lastRun ) and ( firstRun > lastRun ):
250  msg = ( "The lower time/runrange limit ('begin'/'firstRun') "
251  "chosen is greater than the upper time/runrange limit "
252  "('end'/'lastRun').")
253  raise AllInOneError( msg )
254 
255  lumiSecExtend = self.__lumiSelectionSnippet(jsonPath=jsonPath, firstRun=firstRun, lastRun=lastRun)
256  lumiStr = goodLumiSecStr = ""
257  if lumiSecExtend:
258  goodLumiSecStr = "lumiSecs = cms.untracked.VLuminosityBlockRange()\n"
259  lumiStr = " lumisToProcess = lumiSecs,\n"
260 
261  files = self.__fileListSnippet(crab=crab, parent=parent, firstRun=firstRun, lastRun=lastRun, forcerunselection=False)
262 
263  theMap = repMap
264  theMap["files"] = files
265  theMap["json"] = jsonPath
266  theMap["lumiStr"] = lumiStr
267  theMap["goodLumiSecStr"] = goodLumiSecStr%( theMap )
268  theMap["lumiSecExtend"] = lumiSecExtend
269  if crab:
270  dataset_snippet = self.__dummy_source_template%( theMap )
271  else:
272  dataset_snippet = self.__source_template%( theMap )
273  return dataset_snippet
274 
def __lumiSelectionSnippet(self, jsonPath=None, firstRun=None, lastRun=None)
Definition: dataset.py:115
def convertTimeToRun(self, begin=None, end=None, firstRun=None, lastRun=None, shortTuple=True)
Definition: dataset.py:626
tuple __dummy_source_template
Definition: dataset.py:103
def __fileListSnippet(self, crab=False, parent=False, firstRun=None, lastRun=None, forcerunselection=False)
Definition: dataset.py:208
def dataset.Dataset.__dateString (   self,
  date 
)
private

Definition at line 621 of file dataset.py.

References dataset.Dataset.convertTimeToRun(), and harvestTrackValidationPlots.str.

Referenced by dataset.Dataset.convertTimeToRun().

621  def __dateString(self, date):
622  return str(date.year) + str(date.month).zfill(2) + str(date.day).zfill(2)
623 
def __dateString(self, date)
Definition: dataset.py:621
def dataset.Dataset.__datetime (   self,
  stringForDas 
)
private

Definition at line 612 of file dataset.py.

References dataset.int.

Referenced by dataset.Dataset.convertTimeToRun().

612  def __datetime(self, stringForDas):
613  if len(stringForDas) != 8:
614  raise AllInOneError(stringForDas + " is not a valid date string.\n"
615  + "DAS accepts dates in the form 'yyyymmdd'")
616  year = stringForDas[:4]
617  month = stringForDas[4:6]
618  day = stringForDas[6:8]
619  return datetime.date(int(year), int(month), int(day))
620 
def __datetime(self, stringForDas)
Definition: dataset.py:612
def dataset.Dataset.__fileListSnippet (   self,
  crab = False,
  parent = False,
  firstRun = None,
  lastRun = None,
  forcerunselection = False 
)
private

Definition at line 208 of file dataset.py.

References dataset.Dataset.__chunks(), dataset.Dataset.__createSnippet(), dataset.Dataset.fileList(), join(), and list().

Referenced by dataset.Dataset.__createSnippet().

208  def __fileListSnippet(self, crab=False, parent=False, firstRun=None, lastRun=None, forcerunselection=False):
209  if crab:
210  files = ""
211  else:
212  splitFileList = list( self.__chunks( self.fileList(firstRun=firstRun, lastRun=lastRun, forcerunselection=forcerunselection), 255 ) )
213  fileStr = [ "',\n'".join( files ) for files in splitFileList ]
214  fileStr = [ "readFiles.extend( [\n'" + files + "'\n] )" \
215  for files in fileStr ]
216  files = "\n".join( fileStr )
217 
218  if parent:
219  splitParentFileList = list( self.__chunks( self.fileList(parent=True, firstRun=firstRun, lastRun=lastRun, forcerunselection=forcerunselection), 255 ) )
220  parentFileStr = [ "',\n'".join( parentFiles ) for parentFiles in splitParentFileList ]
221  parentFileStr = [ "secFiles.extend( [\n'" + parentFiles + "'\n] )" \
222  for parentFiles in parentFileStr ]
223  parentFiles = "\n".join( parentFileStr )
224  files += "\n\n" + parentFiles
225 
226  return files
227 
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def __chunks(self, theList, n)
Definition: dataset.py:79
def __fileListSnippet(self, crab=False, parent=False, firstRun=None, lastRun=None, forcerunselection=False)
Definition: dataset.py:208
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run
def fileList(self, parent=False, firstRun=None, lastRun=None, forcerunselection=False)
Definition: dataset.py:863
def dataset.Dataset.__find_ge (   self,
  a,
  x 
)
private

Definition at line 282 of file dataset.py.

Referenced by dataset.Dataset.convertTimeToRun().

282  def __find_ge( self, a, x):
283  'Find leftmost item greater than or equal to x'
284  i = bisect.bisect_left( a, x )
285  if i != len( a ):
286  return i
287  raise ValueError
288 
def __find_ge(self, a, x)
Definition: dataset.py:282
def dataset.Dataset.__find_lt (   self,
  a,
  x 
)
private

Definition at line 275 of file dataset.py.

Referenced by dataset.Dataset.convertTimeToRun().

275  def __find_lt( self, a, x ):
276  'Find rightmost value less than x'
277  i = bisect.bisect_left( a, x )
278  if i:
279  return i-1
280  raise ValueError
281 
def __find_lt(self, a, x)
Definition: dataset.py:275
def dataset.Dataset.__findInJson (   self,
  jsondict,
  strings 
)
private

Definition at line 289 of file dataset.py.

References dataset.Dataset.__findInJson().

Referenced by dataset.Dataset.__findInJson(), dataset.Dataset.__getDataType(), dataset.Dataset.__getFileInfoList(), dataset.Dataset.__getMagneticField(), dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.__getParentDataset(), dataset.Dataset.__getRunList(), dataset.Dataset.__lumiSelectionSnippet(), dataset.Dataset.convertTimeToRun(), and dataset.Dataset.fileList().

289  def __findInJson(self, jsondict, strings):
290  if isinstance(strings, str):
291  strings = [ strings ]
292 
293  if len(strings) == 0:
294  return jsondict
295  if isinstance(jsondict,dict):
296  if strings[0] in jsondict:
297  try:
298  return self.__findInJson(jsondict[strings[0]], strings[1:])
299  except KeyError:
300  pass
301  else:
302  for a in jsondict:
303  if strings[0] in a:
304  try:
305  return self.__findInJson(a[strings[0]], strings[1:])
306  except (TypeError, KeyError): #TypeError because a could be a string and contain strings[0]
307  pass
308  #if it's not found
309  raise KeyError("Can't find " + strings[0])
310 
def __findInJson(self, jsondict, strings)
Definition: dataset.py:289
def dataset.Dataset.__getData (   self,
  dasQuery,
  dasLimit = 0 
)
private
def dataset.Dataset.__getDataType (   self)
private

Definition at line 373 of file dataset.py.

References dataset.Dataset.__filename, dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__name, dataset.Dataset.__predefined, ElectronMVAID.ElectronMVAID.name, counter.Counter.name, average.Average.name, AlignableObjectId::entry.name, histograms.Histograms.name, cond::persistency::RUN_INFO::RUN_NUMBER.name, cond::persistency::TAG::NAME.name, TmModule.name, core.autovars.NTupleVariable.name, cond::persistency::GLOBAL_TAG::NAME.name, cond::persistency::RUN_INFO::START_TIME.name, cond::persistency::TAG::TIME_TYPE.name, cond::persistency::GLOBAL_TAG::VALIDITY.name, cond::persistency::RUN_INFO::END_TIME.name, cond::persistency::TAG::OBJECT_TYPE.name, genericValidation.GenericValidation.name, cond::persistency::GLOBAL_TAG::DESCRIPTION.name, preexistingValidation.PreexistingValidation.name, cond::persistency::TAG::SYNCHRONIZATION.name, cond::persistency::GLOBAL_TAG::RELEASE.name, MEPSet.name, cond::persistency::TAG::END_OF_VALIDITY.name, cond::persistency::GLOBAL_TAG::SNAPSHOT_TIME.name, cond::persistency::GLOBAL_TAG::INSERTION_TIME.name, cond::persistency::TAG::DESCRIPTION.name, cond::persistency::GTEditorData.name, cond::persistency::TAG::LAST_VALIDATED_TIME.name, FWTGeoRecoGeometry::Info.name, Types._Untracked.name, cond::persistency::TAG::INSERTION_TIME.name, dataset.BaseDataset.name, cond::persistency::TAG::MODIFICATION_TIME.name, OutputMEPSet.name, personalPlayback.Applet.name, ParameterSet.name, PixelDCSObject< T >::Item.name, analyzer.Analyzer.name, DQMRivetClient::LumiOption.name, MagCylinder.name, alignment.Alignment.name, ParSet.name, DQMRivetClient::ScaleFactorOption.name, SingleObjectCondition.name, EgHLTOfflineSummaryClient::SumHistBinData.name, cond::persistency::GTProxyData.name, core.autovars.NTupleObjectType.name, MyWatcher.name, Mapper::definition< ScannerT >.name, edm::PathTimingSummary.name, cond::TimeTypeSpecs.name, lumi::TriggerInfo.name, edm::PathSummary.name, cond::persistency::GLOBAL_TAG_MAP::GLOBAL_TAG_NAME.name, DQMGenericClient::EfficOption.name, PixelEndcapLinkMaker::Item.name, perftools::EdmEventSize::BranchRecord.name, FWTableViewManager::TableEntry.name, cond::persistency::GLOBAL_TAG_MAP::RECORD.name, PixelBarrelLinkMaker::Item.name, EcalLogicID.name, cond::persistency::GLOBAL_TAG_MAP::LABEL.name, cond::persistency::GLOBAL_TAG_MAP::TAG_NAME.name, ExpressionHisto< T >.name, XMLProcessor::_loaderBaseConfig.name, cond::persistency::PAYLOAD::HASH.name, TreeCrawler.Package.name, cond::persistency::PAYLOAD::OBJECT_TYPE.name, cond::persistency::PAYLOAD::DATA.name, cond::persistency::PAYLOAD::STREAMER_INFO.name, cond::persistency::PAYLOAD::VERSION.name, MagGeoBuilderFromDDD::volumeHandle.name, cond::persistency::PAYLOAD::INSERTION_TIME.name, options.ConnectionHLTMenu.name, DQMGenericClient::ProfileOption.name, emtf::Node.name, DQMGenericClient::NormOption.name, FastHFShowerLibrary.name, h4DSegm.name, core.TriggerMatchAnalyzer.TriggerMatchAnalyzer.name, PhysicsTools::Calibration::Variable.name, DQMGenericClient::CDOption.name, cond::TagInfo_t.name, CounterChecker.name, EDMtoMEConverter.name, looper.Looper.name, MEtoEDM< T >::MEtoEDMObject.name, cond::persistency::IOV::TAG_NAME.name, cond::persistency::IOV::SINCE.name, TrackerSectorStruct.name, classes.MonitorData.name, cond::persistency::IOV::PAYLOAD_HASH.name, cond::persistency::IOV::INSERTION_TIME.name, HistogramManager.name, MuonGeometrySanityCheckPoint.name, classes.OutputData.name, options.HLTProcessOptions.name, h2DSegm.name, core.TriggerBitAnalyzer.TriggerBitAnalyzer.name, config.Analyzer.name, geometry.Structure.name, core.autovars.NTupleSubObject.name, DQMNet::WaitObject.name, AlpgenParameterName.name, SiStripMonitorDigi.name, core.autovars.NTupleObject.name, config.Service.name, cond::persistency::TAG_LOG::TAG_NAME.name, cond::persistency::TAG_LOG::EVENT_TIME.name, cond::persistency::TAG_LOG::USER_NAME.name, cond::persistency::TAG_LOG::HOST_NAME.name, cond::persistency::TAG_LOG::COMMAND.name, cond::persistency::TAG_LOG::ACTION.name, cond::persistency::TAG_LOG::USER_TEXT.name, core.autovars.NTupleCollection.name, BPHRecoBuilder::BPHRecoSource.name, BPHRecoBuilder::BPHCompSource.name, personalPlayback.FrameworkJob.name, plotscripts.SawTeethFunction.name, hTMaxCell.name, cscdqm::ParHistoDef.name, BeautifulSoup.Tag.name, SummaryOutputProducer::GenericSummary.name, BeautifulSoup.SoupStrainer.name, and python.rootplot.root2matplotlib.replace().

Referenced by dataset.Dataset.dataType().

373  def __getDataType( self ):
374  if self.__predefined:
375  with open(self.__filename) as f:
376  datatype = None
377  for line in f.readlines():
378  if line.startswith("#data type: "):
379  if datatype is not None:
380  raise AllInOneError(self.__filename + " has multiple 'data type' lines.")
381  datatype = line.replace("#data type: ", "").replace("\n","")
382  return datatype
383  return "unknown"
384 
385  dasQuery_type = ( 'dataset dataset=%s | grep dataset.datatype,'
386  'dataset.name'%( self.__name ) )
387  data = self.__getData( dasQuery_type )
388 
389  try:
390  return self.__findInJson(data, ["dataset", "datatype"])
391  except KeyError:
392  print ("Cannot find the datatype of the dataset '%s'\n"
393  "It may not be possible to automatically find the magnetic field,\n"
394  "and you will not be able run in CRAB mode"
395  %( self.name() ))
396  return "unknown"
397 
def __getDataType(self)
Definition: dataset.py:373
def __findInJson(self, jsondict, strings)
Definition: dataset.py:289
def replace(string, replacements)
def __getData(self, dasQuery, dasLimit=0)
Definition: dataset.py:341
def dataset.Dataset.__getFileInfoList (   self,
  dasLimit,
  parent = False 
)
private

Definition at line 533 of file dataset.py.

References dataset.Dataset.__fileInfoList, dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__name, dataset.Dataset.__parentFileInfoList, dataset.Dataset.__predefined, ElectronMVAID.ElectronMVAID.name, counter.Counter.name, average.Average.name, AlignableObjectId::entry.name, histograms.Histograms.name, cond::persistency::RUN_INFO::RUN_NUMBER.name, cond::persistency::TAG::NAME.name, TmModule.name, cond::persistency::GLOBAL_TAG::NAME.name, core.autovars.NTupleVariable.name, cond::persistency::RUN_INFO::START_TIME.name, cond::persistency::TAG::TIME_TYPE.name, cond::persistency::GLOBAL_TAG::VALIDITY.name, genericValidation.GenericValidation.name, cond::persistency::RUN_INFO::END_TIME.name, cond::persistency::TAG::OBJECT_TYPE.name, cond::persistency::GLOBAL_TAG::DESCRIPTION.name, preexistingValidation.PreexistingValidation.name, cond::persistency::TAG::SYNCHRONIZATION.name, cond::persistency::GLOBAL_TAG::RELEASE.name, cond::persistency::GLOBAL_TAG::SNAPSHOT_TIME.name, MEPSet.name, cond::persistency::TAG::END_OF_VALIDITY.name, cond::persistency::GLOBAL_TAG::INSERTION_TIME.name, cond::persistency::TAG::DESCRIPTION.name, cond::persistency::GTEditorData.name, cond::persistency::TAG::LAST_VALIDATED_TIME.name, FWTGeoRecoGeometry::Info.name, Types._Untracked.name, cond::persistency::TAG::INSERTION_TIME.name, dataset.BaseDataset.name, cond::persistency::TAG::MODIFICATION_TIME.name, OutputMEPSet.name, personalPlayback.Applet.name, ParameterSet.name, PixelDCSObject< T >::Item.name, analyzer.Analyzer.name, DQMRivetClient::LumiOption.name, MagCylinder.name, alignment.Alignment.name, ParSet.name, DQMRivetClient::ScaleFactorOption.name, SingleObjectCondition.name, EgHLTOfflineSummaryClient::SumHistBinData.name, cond::persistency::GTProxyData.name, core.autovars.NTupleObjectType.name, MyWatcher.name, Mapper::definition< ScannerT >.name, edm::PathTimingSummary.name, cond::TimeTypeSpecs.name, lumi::TriggerInfo.name, edm::PathSummary.name, cond::persistency::GLOBAL_TAG_MAP::GLOBAL_TAG_NAME.name, DQMGenericClient::EfficOption.name, PixelEndcapLinkMaker::Item.name, perftools::EdmEventSize::BranchRecord.name, FWTableViewManager::TableEntry.name, cond::persistency::GLOBAL_TAG_MAP::RECORD.name, PixelBarrelLinkMaker::Item.name, EcalLogicID.name, cond::persistency::GLOBAL_TAG_MAP::LABEL.name, cond::persistency::GLOBAL_TAG_MAP::TAG_NAME.name, ExpressionHisto< T >.name, XMLProcessor::_loaderBaseConfig.name, TreeCrawler.Package.name, cond::persistency::PAYLOAD::HASH.name, cond::persistency::PAYLOAD::OBJECT_TYPE.name, cond::persistency::PAYLOAD::DATA.name, cond::persistency::PAYLOAD::STREAMER_INFO.name, MagGeoBuilderFromDDD::volumeHandle.name, cond::persistency::PAYLOAD::VERSION.name, cond::persistency::PAYLOAD::INSERTION_TIME.name, options.ConnectionHLTMenu.name, DQMGenericClient::ProfileOption.name, emtf::Node.name, DQMGenericClient::NormOption.name, FastHFShowerLibrary.name, h4DSegm.name, core.TriggerMatchAnalyzer.TriggerMatchAnalyzer.name, PhysicsTools::Calibration::Variable.name, DQMGenericClient::CDOption.name, cond::TagInfo_t.name, CounterChecker.name, EDMtoMEConverter.name, looper.Looper.name, MEtoEDM< T >::MEtoEDMObject.name, cond::persistency::IOV::TAG_NAME.name, cond::persistency::IOV::SINCE.name, TrackerSectorStruct.name, classes.MonitorData.name, cond::persistency::IOV::PAYLOAD_HASH.name, cond::persistency::IOV::INSERTION_TIME.name, HistogramManager.name, MuonGeometrySanityCheckPoint.name, classes.OutputData.name, options.HLTProcessOptions.name, h2DSegm.name, core.TriggerBitAnalyzer.TriggerBitAnalyzer.name, config.Analyzer.name, geometry.Structure.name, core.autovars.NTupleSubObject.name, DQMNet::WaitObject.name, AlpgenParameterName.name, SiStripMonitorDigi.name, core.autovars.NTupleObject.name, config.Service.name, cond::persistency::TAG_LOG::TAG_NAME.name, cond::persistency::TAG_LOG::EVENT_TIME.name, cond::persistency::TAG_LOG::USER_NAME.name, cond::persistency::TAG_LOG::HOST_NAME.name, cond::persistency::TAG_LOG::COMMAND.name, cond::persistency::TAG_LOG::ACTION.name, cond::persistency::TAG_LOG::USER_TEXT.name, core.autovars.NTupleCollection.name, BPHRecoBuilder::BPHRecoSource.name, BPHRecoBuilder::BPHCompSource.name, personalPlayback.FrameworkJob.name, plotscripts.SawTeethFunction.name, hTMaxCell.name, cscdqm::ParHistoDef.name, BeautifulSoup.Tag.name, SummaryOutputProducer::GenericSummary.name, BeautifulSoup.SoupStrainer.name, and dataset.Dataset.parentDataset().

Referenced by dataset.Dataset.fileInfoList().

533  def __getFileInfoList( self, dasLimit, parent = False ):
534  if self.__predefined:
535  if parent:
536  extendstring = "secFiles.extend"
537  else:
538  extendstring = "readFiles.extend"
539  with open(self.__fileName) as f:
540  files = []
541  copy = False
542  for line in f.readlines():
543  if "]" in line:
544  copy = False
545  if copy:
546  files.append({name: line.translate(None, "', " + '"')})
547  if extendstring in line and "[" in line and "]" not in line:
548  copy = True
549  return files
550 
551  if self.__fileInfoList and not parent:
552  return self.__fileInfoList
553  if self.__parentFileInfoList and parent:
554  return self.__parentFileInfoList
555 
556  if parent:
557  searchdataset = self.parentDataset()
558  else:
559  searchdataset = self.__name
560  dasQuery_files = ( 'file dataset=%s | grep file.name, file.nevents, '
561  'file.creation_time, '
562  'file.modification_time'%( searchdataset ) )
563  print "Requesting file information for '%s' from DAS..."%( searchdataset ),
564  data = self.__getData( dasQuery_files, dasLimit )
565  print "Done."
566  data = [ self.__findInJson(entry,"file") for entry in data ]
567  if len( data ) == 0:
568  msg = ("No files are available for the dataset '%s'. This can be "
569  "due to a typo or due to a DAS problem. Please check the "
570  "spelling of the dataset and/or retry to run "
571  "'validateAlignments.py'."%( self.name() ))
572  raise AllInOneError( msg )
573  fileInformationList = []
574  for file in data:
575  fileName = 'unknown'
576  try:
577  fileName = self.__findInJson(file, "name")
578  fileCreationTime = self.__findInJson(file, "creation_time")
579  fileNEvents = self.__findInJson(file, "nevents")
580  except KeyError:
581  print ("DAS query gives bad output for file '%s'. Skipping it.\n"
582  "It may work if you try again later.") % fileName
583  fileNEvents = 0
584  # select only non-empty files
585  if fileNEvents == 0:
586  continue
587  fileDict = { "name": fileName,
588  "creation_time": fileCreationTime,
589  "nevents": fileNEvents
590  }
591  fileInformationList.append( fileDict )
592  fileInformationList.sort( key=lambda info: self.__findInJson(info,"name") )
593  if parent:
594  self.__parentFileInfoList = fileInformationList
595  else:
596  self.__fileInfoList = fileInformationList
597  return fileInformationList
598 
def __getFileInfoList(self, dasLimit, parent=False)
Definition: dataset.py:533
def __findInJson(self, jsondict, strings)
Definition: dataset.py:289
def parentDataset(self)
Definition: dataset.py:704
def __getData(self, dasQuery, dasLimit=0)
Definition: dataset.py:341
def dataset.Dataset.__getMagneticField (   self)
private

Definition at line 408 of file dataset.py.

References dataset.Dataset.__cmssw, dataset.Dataset.__cmsswrelease, dataset.Dataset.__dataType, dataset.Dataset.__filename, dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__name, dataset.Dataset.__predefined, python.rootplot.root2matplotlib.replace(), and digi_MixPreMix_cfi.strip.

Referenced by dataset.Dataset.magneticField().

408  def __getMagneticField( self ):
409  Bfieldlocation = os.path.join( self.__cmssw, "python", "Configuration", "StandardSequences" )
410  if not os.path.isdir(Bfieldlocation):
411  Bfieldlocation = os.path.join( self.__cmsswrelease, "python", "Configuration", "StandardSequences" )
412  Bfieldlist = [ f.replace("_cff.py",'') \
413  for f in os.listdir(Bfieldlocation) \
414  if f.startswith("MagneticField_") and f.endswith("_cff.py") ]
415  Bfieldlist.sort( key = lambda Bfield: -len(Bfield) ) #Put it in order of decreasing length, so that searching in the name gives the longer match
416 
417  if self.__predefined:
418  with open(self.__filename) as f:
419  datatype = None
420  Bfield = None
421  for line in f.readlines():
422  if line.startswith("#data type: "):
423  if datatype is not None:
424  raise AllInOneError(self.__filename + " has multiple 'data type' lines.")
425  datatype = line.replace("#data type: ", "").replace("\n","")
426  datatype = datatype.split("#")[0].strip()
427  if line.startswith("#magnetic field: "):
428  if Bfield is not None:
429  raise AllInOneError(self.__filename + " has multiple 'magnetic field' lines.")
430  Bfield = line.replace("#magnetic field: ", "").replace("\n","")
431  Bfield = Bfield.split("#")[0].strip()
432  if Bfield is not None:
433  Bfield = Bfield.split(",")[0]
434  if Bfield in Bfieldlist or Bfield == "unknown":
435  return Bfield
436  else:
437  print "Your dataset has magnetic field '%s', which does not exist in your CMSSW version!" % Bfield
438  print "Using Bfield='unknown' - this will revert to the default"
439  return "unknown"
440  elif datatype == "data":
441  return "MagneticField" #this should be in the "#magnetic field" line, but for safety in case it got messed up
442  else:
443  return "unknown"
444 
445  if self.__dataType == "data":
446  return "MagneticField"
447 
448  dasQuery_B = ( 'dataset dataset=%s'%( self.__name ) ) #try to find the magnetic field from DAS
449  data = self.__getData( dasQuery_B ) #it seems to be there for the newer (7X) MC samples, except cosmics
450 
451  try:
452  Bfield = self.__findInJson(data, ["dataset", "mcm", "sequences", "magField"])
453  if Bfield in Bfieldlist:
454  return Bfield
455  elif Bfield == "38T" or Bfield == "38T_PostLS1":
456  return "MagneticField"
457  elif "MagneticField_" + Bfield in Bfieldlist:
458  return "MagneticField_" + Bfield
459  elif Bfield == "":
460  pass
461  else:
462  print "Your dataset has magnetic field '%s', which does not exist in your CMSSW version!" % Bfield
463  print "Using Bfield='unknown' - this will revert to the default magnetic field"
464  return "unknown"
465  except KeyError:
466  pass
467 
468  for possibleB in Bfieldlist:
469  if (possibleB != "MagneticField"
470  and possibleB.replace("MagneticField_","") in self.__name.replace("TkAlCosmics0T", "")):
471  #final attempt - try to identify the dataset from the name
472  #all cosmics dataset names contain "TkAlCosmics0T"
473  if possibleB == "MagneticField_38T" or possibleB == "MagneticField_38T_PostLS1":
474  return "MagneticField"
475  return possibleB
476 
477  return "unknown"
478 
def __findInJson(self, jsondict, strings)
Definition: dataset.py:289
def replace(string, replacements)
def __getMagneticField(self)
Definition: dataset.py:408
def __getData(self, dasQuery, dasLimit=0)
Definition: dataset.py:341
def dataset.Dataset.__getMagneticFieldForRun (   self,
  run = -1,
  tolerance = 0.5 
)
private
For MC, this returns the same as the previous function.
   For data, it gets the magnetic field from the runs.  This is important for
   deciding which template to use for offlinevalidation

Definition at line 479 of file dataset.py.

References dataset.Dataset.__dataType, dataset.Dataset.__filename, dataset.Dataset.__findInJson(), dataset.Dataset.__firstusedrun, dataset.Dataset.__getData(), dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.__lastusedrun, dataset.Dataset.__magneticField, dataset.Dataset.__name, dataset.Dataset.__predefined, funct.abs(), objects.autophobj.float, python.rootplot.root2matplotlib.replace(), split, and digi_MixPreMix_cfi.strip.

Referenced by dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.dump_cff(), and dataset.Dataset.magneticFieldForRun().

479  def __getMagneticFieldForRun( self, run = -1, tolerance = 0.5 ):
480  """For MC, this returns the same as the previous function.
481  For data, it gets the magnetic field from the runs. This is important for
482  deciding which template to use for offlinevalidation
483  """
484  if self.__dataType == "mc" and self.__magneticField == "MagneticField":
485  return 3.8 #For 3.8T MC the default MagneticField is used
486  if "T" in self.__magneticField:
487  Bfield = self.__magneticField.split("T")[0].replace("MagneticField_","")
488  try:
489  return float(Bfield) / 10.0 #e.g. 38T and 38T_PostLS1 both return 3.8
490  except ValueError:
491  pass
492  if self.__predefined:
493  with open(self.__filename) as f:
494  Bfield = None
495  for line in f.readlines():
496  if line.startswith("#magnetic field: ") and "," in line:
497  if Bfield is not None:
498  raise AllInOneError(self.__filename + " has multiple 'magnetic field' lines.")
499  return float(line.replace("#magnetic field: ", "").split(",")[1].split("#")[0].strip())
500 
501  if run > 0:
502  dasQuery = ('run = %s'%run) #for data
503  data = self.__getData(dasQuery)
504  try:
505  return self.__findInJson(data, ["run","bfield"])
506  except KeyError:
507  return "unknown Can't get the magnetic field for run %s from DAS" % run
508 
509  #run < 0 - find B field for the first and last runs, and make sure they're compatible
510  # (to within tolerance)
511  #NOT FOOLPROOF! The magnetic field might go up and then down, or vice versa
512  if self.__firstusedrun is None or self.__lastusedrun is None:
513  return "unknown Can't get the exact magnetic field for the dataset until data has been retrieved from DAS."
514  firstrunB = self.__getMagneticFieldForRun(self.__firstusedrun)
515  lastrunB = self.__getMagneticFieldForRun(self.__lastusedrun)
516  try:
517  if abs(firstrunB - lastrunB) <= tolerance:
518  return .5*(firstrunB + lastrunB)
519  print firstrunB, lastrunB, tolerance
520  return ("unknown The beginning and end of your run range for %s\n"
521  "have different magnetic fields (%s, %s)!\n"
522  "Try limiting the run range using firstRun, lastRun, begin, end, or JSON,\n"
523  "or increasing the tolerance (in dataset.py) from %s.") % (self.__name, firstrunB, lastrunB, tolerance)
524  except TypeError:
525  try:
526  if "unknown" in firstrunB:
527  return firstrunB
528  else:
529  return lastrunB
530  except TypeError:
531  return lastrunB
532 
def __getMagneticFieldForRun(self, run=-1, tolerance=0.5)
Definition: dataset.py:479
def __findInJson(self, jsondict, strings)
Definition: dataset.py:289
def replace(string, replacements)
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
double split
Definition: MVATrainer.cc:139
def __getData(self, dasQuery, dasLimit=0)
Definition: dataset.py:341
def dataset.Dataset.__getParentDataset (   self)
private

Definition at line 398 of file dataset.py.

References dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__name, and harvestTrackValidationPlots.str.

Referenced by dataset.Dataset.parentDataset().

398  def __getParentDataset( self ):
399  dasQuery = "parent dataset=" + self.__name
400  data = self.__getData( dasQuery )
401  try:
402  return self.__findInJson(data, ["parent", "name"])
403  except KeyError:
404  raise AllInOneError("Cannot find the parent of the dataset '" + self.__name + "'\n"
405  "Here is the DAS output:\n" + str(jsondict) +
406  "\nIt's possible that this was a server error. If so, it may work if you try again later")
407 
def __findInJson(self, jsondict, strings)
Definition: dataset.py:289
def __getParentDataset(self)
Definition: dataset.py:398
def __getData(self, dasQuery, dasLimit=0)
Definition: dataset.py:341
def dataset.Dataset.__getRunList (   self)
private

Definition at line 599 of file dataset.py.

References dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__name, and dataset.Dataset.__runList.

Referenced by dataset.Dataset.__lumiSelectionSnippet(), dataset.Dataset.convertTimeToRun(), and dataset.Dataset.runList().

599  def __getRunList( self ):
600  if self.__runList:
601  return self.__runList
602  dasQuery_runs = ( 'run dataset=%s | grep run.run_number,'
603  'run.creation_time'%( self.__name ) )
604  print "Requesting run information for '%s' from DAS..."%( self.__name ),
605  data = self.__getData( dasQuery_runs )
606  print "Done."
607  data = [ self.__findInJson(entry,"run") for entry in data ]
608  data.sort( key = lambda run: self.__findInJson(run, "run_number") )
609  self.__runList = data
610  return data
611 
def __getRunList(self)
Definition: dataset.py:599
def __findInJson(self, jsondict, strings)
Definition: dataset.py:289
def __getData(self, dasQuery, dasLimit=0)
Definition: dataset.py:341
def dataset.Dataset.__lumiSelectionSnippet (   self,
  jsonPath = None,
  firstRun = None,
  lastRun = None 
)
private

Definition at line 115 of file dataset.py.

References dataset.Dataset.__chunks(), dataset.Dataset.__findInJson(), dataset.Dataset.__firstusedrun, dataset.Dataset.__getRunList(), dataset.Dataset.__lastusedrun, dataset.Dataset.getForceRunRangeFunction(), dataset.int, join(), list(), hpstanc_transforms.max, min(), python.rootplot.root2matplotlib.replace(), split, and harvestTrackValidationPlots.str.

Referenced by dataset.Dataset.__createSnippet().

115  def __lumiSelectionSnippet( self, jsonPath = None, firstRun = None, lastRun = None ):
116  lumiSecExtend = ""
117  if firstRun or lastRun or jsonPath:
118  if not jsonPath:
119  selectedRunList = self.__getRunList()
120  if firstRun:
121  selectedRunList = [ run for run in selectedRunList \
122  if self.__findInJson(run, "run_number") >= firstRun ]
123  if lastRun:
124  selectedRunList = [ run for run in selectedRunList \
125  if self.__findInJson(run, "run_number") <= lastRun ]
126  lumiList = [ str( self.__findInJson(run, "run_number") ) + ":1-" \
127  + str( self.__findInJson(run, "run_number") ) + ":max" \
128  for run in selectedRunList ]
129  splitLumiList = list( self.__chunks( lumiList, 255 ) )
130  else:
131  theLumiList = None
132  try:
133  theLumiList = LumiList ( filename = jsonPath )
134  except ValueError:
135  pass
136 
137  if theLumiList is not None:
138  allRuns = theLumiList.getRuns()
139  runsToRemove = []
140  for run in allRuns:
141  if firstRun and int( run ) < firstRun:
142  runsToRemove.append( run )
143  if lastRun and int( run ) > lastRun:
144  runsToRemove.append( run )
145  theLumiList.removeRuns( runsToRemove )
146  splitLumiList = list( self.__chunks(
147  theLumiList.getCMSSWString().split(','), 255 ) )
148  if not (splitLumiList and splitLumiList[0] and splitLumiList[0][0]):
149  splitLumiList = None
150  else:
151  with open(jsonPath) as f:
152  jsoncontents = f.read()
153  if "process.source.lumisToProcess" in jsoncontents:
154  msg = "%s is not a json file, but it seems to be a CMSSW lumi selection cff snippet. Trying to use it" % jsonPath
155  if firstRun or lastRun:
156  msg += ("\n (after applying firstRun and/or lastRun)")
157  msg += ".\nPlease note that, depending on the format of this file, it may not work as expected."
158  msg += "\nCheck your config file to make sure that it worked properly."
159  print msg
160 
161  runlist = self.__getRunList()
162  if firstRun or lastRun:
163  self.__firstusedrun = -1
164  self.__lastusedrun = -1
165  jsoncontents = re.sub(r"\d+:(\d+|max)(-\d+:(\d+|max))?", self.getForceRunRangeFunction(firstRun, lastRun), jsoncontents)
166  jsoncontents = (jsoncontents.replace("'',\n","").replace("''\n","")
167  .replace('"",\n','').replace('""\n',''))
168  self.__firstusedrun = max(self.__firstusedrun, int(self.__findInJson(runlist[0],"run_number")))
169  self.__lastusedrun = min(self.__lastusedrun, int(self.__findInJson(runlist[-1],"run_number")))
170  if self.__lastusedrun < self.__firstusedrun:
171  jsoncontents = None
172  else:
173  self.__firstusedrun = int(self.__findInJson(runlist[0],"run_number"))
174  self.__lastusedrun = int(self.__findInJson(runlist[-1],"run_number"))
175  lumiSecExtend = jsoncontents
176  splitLumiList = None
177  else:
178  raise AllInOneError("%s is not a valid json file!" % jsonPath)
179 
180  if splitLumiList and splitLumiList[0] and splitLumiList[0][0]:
181  lumiSecStr = [ "',\n'".join( lumis ) \
182  for lumis in splitLumiList ]
183  lumiSecStr = [ "lumiSecs.extend( [\n'" + lumis + "'\n] )" \
184  for lumis in lumiSecStr ]
185  lumiSecExtend = "\n".join( lumiSecStr )
186  runlist = self.__getRunList()
187  self.__firstusedrun = max(int(splitLumiList[0][0].split(":")[0]), int(self.__findInJson(runlist[0],"run_number")))
188  self.__lastusedrun = min(int(splitLumiList[-1][-1].split(":")[0]), int(self.__findInJson(runlist[-1],"run_number")))
189  elif lumiSecExtend:
190  pass
191  else:
192  msg = "You are trying to run a validation without any runs! Check that:"
193  if firstRun or lastRun:
194  msg += "\n - firstRun/begin and lastRun/end are correct for this dataset, and there are runs in between containing data"
195  if jsonPath:
196  msg += "\n - your JSON file is correct for this dataset, and the runs contain data"
197  if (firstRun or lastRun) and jsonPath:
198  msg += "\n - firstRun/begin and lastRun/end are consistent with your JSON file"
199  raise AllInOneError(msg)
200 
201  else:
202  runlist = self.__getRunList()
203  self.__firstusedrun = int(self.__findInJson(self.__getRunList()[0],"run_number"))
204  self.__lastusedrun = int(self.__findInJson(self.__getRunList()[-1],"run_number"))
205 
206  return lumiSecExtend
207 
def __getRunList(self)
Definition: dataset.py:599
def __lumiSelectionSnippet(self, jsonPath=None, firstRun=None, lastRun=None)
Definition: dataset.py:115
def __findInJson(self, jsondict, strings)
Definition: dataset.py:289
def replace(string, replacements)
T min(T a, T b)
Definition: MathUtil.h:58
def getForceRunRangeFunction(self, firstRun, lastRun)
Definition: dataset.py:336
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def __chunks(self, theList, n)
Definition: dataset.py:79
double split
Definition: MVATrainer.cc:139
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run
def dataset.Dataset.buildListOfBadFiles (   self)
fills the list of bad files from the IntegrityCheck log.

When the integrity check file is not available,
files are considered as good.

Definition at line 275 of file dataset.py.

276  '''fills the list of bad files from the IntegrityCheck log.
277 
278  When the integrity check file is not available,
279  files are considered as good.'''
280  mask = "IntegrityCheck"
281 
282  self.bad_files = {}
283  self.good_files = []
284 
285  file_mask = castortools.matchingFiles(self.castorDir, '^%s_.*\.txt$' % mask)
286  if file_mask:
287  # here to avoid circular dependency
288  from edmIntegrityCheck import PublishToFileSystem
289  p = PublishToFileSystem(mask)
290  report = p.get(self.castorDir)
291  if report is not None and report:
292  self.maskExists = True
293  self.report = report
294  dup = report.get('ValidDuplicates',{})
295  for name, status in report['Files'].iteritems():
296  # print name, status
297  if not status[0]:
298  self.bad_files[name] = 'MarkedBad'
299  elif name in dup:
300  self.bad_files[name] = 'ValidDup'
301  else:
302  self.good_files.append( name )
303  else:
304  raise IntegrityCheckError( "ERROR: IntegrityCheck log file IntegrityCheck_XXXXXXXXXX.txt not found" )
305 
def buildListOfBadFiles(self)
Definition: dataset.py:275
def dataset.Dataset.buildListOfFiles (   self,
  pattern = '.*root' 
)
fills list of files, taking all root files matching the pattern in the castor dir

Definition at line 271 of file dataset.py.

271  def buildListOfFiles(self, pattern='.*root'):
272  '''fills list of files, taking all root files matching the pattern in the castor dir'''
273  self.files = castortools.matchingFiles( self.castorDir, pattern )
274 
def buildListOfFiles(self, pattern='.*root')
Definition: dataset.py:271
def dataset.Dataset.convertTimeToRun (   self,
  begin = None,
  end = None,
  firstRun = None,
  lastRun = None,
  shortTuple = True 
)

Definition at line 626 of file dataset.py.

References dataset.Dataset.__dateString(), dataset.Dataset.__datetime(), dataset.Dataset.__find_ge(), dataset.Dataset.__find_lt(), dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__getRunList(), dataset.Dataset.__name, and dataset.int.

Referenced by dataset.Dataset.__createSnippet(), and dataset.Dataset.__dateString().

626  shortTuple = True ):
627  if ( begin and firstRun ) or ( end and lastRun ):
628  msg = ( "The Usage of "
629  + "'begin' & 'firstRun' " * int( bool( begin and
630  firstRun ) )
631  + "and " * int( bool( ( begin and firstRun ) and
632  ( end and lastRun ) ) )
633  + "'end' & 'lastRun' " * int( bool( end and lastRun ) )
634  + "is ambigous." )
635  raise AllInOneError( msg )
636 
637  if begin or end:
638  runList = [ self.__findInJson(run, "run_number") for run in self.__getRunList() ]
639 
640  if begin:
641  lastdate = begin
642  for delta in [ 1, 5, 10, 20, 30 ]: #try searching for about 2 months after begin
643  firstdate = lastdate
644  lastdate = self.__dateString(self.__datetime(firstdate) + datetime.timedelta(delta))
645  dasQuery_begin = "run date between[%s,%s]" % (firstdate, lastdate)
646  begindata = self.__getData(dasQuery_begin)
647  if len(begindata) > 0:
648  begindata.sort(key = lambda run: self.__findInJson(run, ["run", "run_number"]))
649  try:
650  runIndex = self.__find_ge( runList, self.__findInJson(begindata[0], ["run", "run_number"]))
651  except ValueError:
652  msg = ( "Your 'begin' is after the creation time of the last "
653  "run in the dataset\n'%s'"%( self.__name ) )
654  raise AllInOneError( msg )
655  firstRun = runList[runIndex]
656  begin = None
657  break
658 
659  if begin:
660  raise AllInOneError("No runs within a reasonable time interval after your 'begin'."
661  "Try using a 'begin' that has runs soon after it (within 2 months at most)")
662 
663  if end:
664  firstdate = end
665  for delta in [ 1, 5, 10, 20, 30 ]: #try searching for about 2 months before end
666  lastdate = firstdate
667  firstdate = self.__dateString(self.__datetime(lastdate) - datetime.timedelta(delta))
668  dasQuery_end = "run date between[%s,%s]" % (firstdate, lastdate)
669  enddata = self.__getData(dasQuery_end)
670  if len(enddata) > 0:
671  enddata.sort(key = lambda run: self.__findInJson(run, ["run", "run_number"]))
672  try:
673  runIndex = self.__find_lt( runList, self.__findInJson(enddata[-1], ["run", "run_number"]))
674  except ValueError:
675  msg = ( "Your 'end' is before the creation time of the first "
676  "run in the dataset\n'%s'"%( self.__name ) )
677  raise AllInOneError( msg )
678  lastRun = runList[runIndex]
679  end = None
680  break
681 
682  if end:
683  raise AllInOneError("No runs within a reasonable time interval before your 'end'."
684  "Try using an 'end' that has runs soon before it (within 2 months at most)")
685 
686  if shortTuple:
687  return firstRun, lastRun
688  else:
689  return begin, end, firstRun, lastRun
690 
def __getRunList(self)
Definition: dataset.py:599
def __findInJson(self, jsondict, strings)
Definition: dataset.py:289
def __find_lt(self, a, x)
Definition: dataset.py:275
def __datetime(self, stringForDas)
Definition: dataset.py:612
def __dateString(self, date)
Definition: dataset.py:621
def __find_ge(self, a, x)
Definition: dataset.py:282
def __getData(self, dasQuery, dasLimit=0)
Definition: dataset.py:341
def dataset.Dataset.createdatasetfile_hippy (   self,
  filename,
  filesperjob,
  firstrun,
  lastrun 
)

Definition at line 831 of file dataset.py.

References dataset.Dataset.__chunks(), dataset.Dataset.fileList(), and join().

831  def createdatasetfile_hippy(self, filename, filesperjob, firstrun, lastrun):
832  with open(filename, "w") as f:
833  for job in self.__chunks(self.fileList(firstRun=firstrun, lastRun=lastrun, forcerunselection=True), filesperjob):
834  f.write(",".join("'{}'".format(file) for file in job)+"\n")
835 
def createdatasetfile_hippy(self, filename, filesperjob, firstrun, lastrun)
Definition: dataset.py:831
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def __chunks(self, theList, n)
Definition: dataset.py:79
def fileList(self, parent=False, firstRun=None, lastRun=None, forcerunselection=False)
Definition: dataset.py:863
def dataset.Dataset.datasetSnippet (   self,
  jsonPath = None,
  begin = None,
  end = None,
  firstRun = None,
  lastRun = None,
  crab = False,
  parent = False 
)

Definition at line 710 of file dataset.py.

References dataset.Dataset.__createSnippet(), dataset.Dataset.__filename, dataset.Dataset.__name, dataset.Dataset.__official, dataset.Dataset.__origName, dataset.Dataset.__predefined, and dataset.Dataset.dump_cff().

Referenced by dataset.Dataset.parentDataset().

710  firstRun = None, lastRun = None, crab = False, parent = False ):
711  if self.__predefined and (jsonPath or begin or end or firstRun or lastRun):
712  msg = ( "The parameters 'JSON', 'begin', 'end', 'firstRun', and 'lastRun' "
713  "only work for official datasets, not predefined _cff.py files" )
714  raise AllInOneError( msg )
715  if self.__predefined and parent:
716  with open(self.__filename) as f:
717  if "secFiles.extend" not in f.read():
718  msg = ("The predefined dataset '%s' does not contain secondary files, "
719  "which your validation requires!") % self.__name
720  if self.__official:
721  self.__name = self.__origName
722  self.__predefined = False
723  print msg
724  print ("Retreiving the files from DAS. You will be asked if you want "
725  "to overwrite the old dataset.\n"
726  "It will still be compatible with validations that don't need secondary files.")
727  else:
728  raise AllInOneError(msg)
729 
730  if self.__predefined:
731  snippet = ("process.load(\"Alignment.OfflineValidation.%s_cff\")\n"
732  "process.maxEvents = cms.untracked.PSet(\n"
733  " input = cms.untracked.int32(.oO[nEvents]Oo. / .oO[parallelJobs]Oo.)\n"
734  ")\n"
735  "process.source.skipEvents=cms.untracked.uint32(.oO[nIndex]Oo.*.oO[nEvents]Oo./.oO[parallelJobs]Oo.)"
736  %(self.__name))
737  if not parent:
738  with open(self.__filename) as f:
739  if "secFiles.extend" in f.read():
740  snippet += "\nprocess.source.secondaryFileNames = cms.untracked.vstring()"
741  return snippet
742  theMap = { "process": "process.",
743  "tab": " " * len( "process." ),
744  "nEvents": ".oO[nEvents]Oo. / .oO[parallelJobs]Oo.",
745  "skipEventsString": "process.source.skipEvents=cms.untracked.uint32(.oO[nIndex]Oo.*.oO[nEvents]Oo./.oO[parallelJobs]Oo.)\n",
746  "importCms": "",
747  "header": ""
748  }
749  datasetSnippet = self.__createSnippet( jsonPath = jsonPath,
750  begin = begin,
751  end = end,
752  firstRun = firstRun,
753  lastRun = lastRun,
754  repMap = theMap,
755  crab = crab,
756  parent = parent )
757  if jsonPath == "" and begin == "" and end == "" and firstRun == "" and lastRun == "":
758  try:
759  self.dump_cff(parent = parent)
760  except AllInOneError as e:
761  print "Can't store the dataset as a cff:"
762  print e
763  print "This may be inconvenient in the future, but will not cause a problem for this validation."
764  return datasetSnippet
765 
def __createSnippet(self, jsonPath=None, begin=None, end=None, firstRun=None, lastRun=None, repMap=None, crab=False, parent=False)
Definition: dataset.py:230
def dump_cff(self, outName=None, jsonPath=None, begin=None, end=None, firstRun=None, lastRun=None, parent=False)
Definition: dataset.py:767
def dataset.Dataset.dataType (   self)

Definition at line 691 of file dataset.py.

References dataset.Dataset.__dataType, and dataset.Dataset.__getDataType().

691  def dataType( self ):
692  if not self.__dataType:
693  self.__dataType = self.__getDataType()
694  return self.__dataType
695 
def __getDataType(self)
Definition: dataset.py:373
def dataType(self)
Definition: dataset.py:691
def dataset.Dataset.dump_cff (   self,
  outName = None,
  jsonPath = None,
  begin = None,
  end = None,
  firstRun = None,
  lastRun = None,
  parent = False 
)

Definition at line 767 of file dataset.py.

References dataset.Dataset.__alreadyStored, dataset.Dataset.__cmssw, dataset.Dataset.__createSnippet(), dataset.Dataset.__dataType, dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.__magneticField, dataset.Dataset.__name, python.rootplot.root2matplotlib.replace(), split, harvestTrackValidationPlots.str, and digi_MixPreMix_cfi.strip.

Referenced by dataset.Dataset.datasetSnippet().

767  end = None, firstRun = None, lastRun = None, parent = False ):
768  if self.__alreadyStored:
769  return
770  self.__alreadyStored = True
771  if outName == None:
772  outName = "Dataset" + self.__name.replace("/", "_")
773  packageName = os.path.join( "Alignment", "OfflineValidation" )
774  if not os.path.exists( os.path.join(
775  self.__cmssw, "src", packageName ) ):
776  msg = ("You try to store the predefined dataset'%s'.\n"
777  "For that you need to check out the package '%s' to your "
778  "private relase area in\n"%( outName, packageName )
779  + self.__cmssw )
780  raise AllInOneError( msg )
781  theMap = { "process": "",
782  "tab": "",
783  "nEvents": str( -1 ),
784  "skipEventsString": "",
785  "importCms": "import FWCore.ParameterSet.Config as cms\n",
786  "header": "#Do not delete or (unless you know what you're doing) change these comments\n"
787  "#%(name)s\n"
788  "#data type: %(dataType)s\n"
789  "#magnetic field: .oO[magneticField]Oo.\n" #put in magnetic field later
790  %{"name": self.__name, #need to create the snippet before getting the magnetic field
791  "dataType": self.__dataType} #so that we know the first and last runs
792  }
793  dataset_cff = self.__createSnippet( jsonPath = jsonPath,
794  begin = begin,
795  end = end,
796  firstRun = firstRun,
797  lastRun = lastRun,
798  repMap = theMap,
799  parent = parent)
800  magneticField = self.__magneticField
801  if magneticField == "MagneticField":
802  magneticField = "%s, %s #%s" % (magneticField,
803  str(self.__getMagneticFieldForRun()).replace("\n"," ").split("#")[0].strip(),
804  "Use MagneticField_cff.py; the number is for determining which track selection to use."
805  )
806  dataset_cff = dataset_cff.replace(".oO[magneticField]Oo.",magneticField)
807  filePath = os.path.join( self.__cmssw, "src", packageName,
808  "python", outName + "_cff.py" )
809  if os.path.exists( filePath ):
810  existMsg = "The predefined dataset '%s' already exists.\n"%( outName )
811  askString = "Do you want to overwrite it? [y/n]\n"
812  inputQuery = existMsg + askString
813  while True:
814  userInput = raw_input( inputQuery ).lower()
815  if userInput == "y":
816  break
817  elif userInput == "n":
818  return
819  else:
820  inputQuery = askString
821  print ( "The predefined dataset '%s' will be stored in the file\n"
822  %( outName )
823  + filePath +
824  "\nFor future use you have to do 'scram b'." )
825  print
826  theFile = open( filePath, "w" )
827  theFile.write( dataset_cff )
828  theFile.close()
829  return
830 
def __getMagneticFieldForRun(self, run=-1, tolerance=0.5)
Definition: dataset.py:479
def __createSnippet(self, jsonPath=None, begin=None, end=None, firstRun=None, lastRun=None, repMap=None, crab=False, parent=False)
Definition: dataset.py:230
def replace(string, replacements)
double split
Definition: MVATrainer.cc:139
def dataset.Dataset.extractFileSizes (   self)
Get the file size for each file, from the eos ls -l command.

Definition at line 306 of file dataset.py.

References dataset.EOSDataset.castorDir, and dataset.Dataset.castorDir.

306  def extractFileSizes(self):
307  '''Get the file size for each file, from the eos ls -l command.'''
308  # EOS command does not work in tier3
309  lsout = castortools.runXRDCommand(self.castorDir,'dirlist')[0]
310  lsout = lsout.split('\n')
311  self.filesAndSizes = {}
312  for entry in lsout:
313  values = entry.split()
314  if( len(values) != 5):
315  continue
316  # using full abs path as a key.
317  file = '/'.join([self.lfnDir, values[4].split("/")[-1]])
318  size = values[1]
319  self.filesAndSizes[file] = size
320 
def extractFileSizes(self)
Definition: dataset.py:306
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
if(dp >Float(M_PI)) dp-
double split
Definition: MVATrainer.cc:139
def dataset.Dataset.fileInfoList (   self,
  parent = False 
)

Definition at line 901 of file dataset.py.

References dataset.Dataset.__dasLimit, and dataset.Dataset.__getFileInfoList().

Referenced by dataset.Dataset.fileList().

901  def fileInfoList( self, parent = False ):
902  return self.__getFileInfoList( self.__dasLimit, parent )
903 
def __getFileInfoList(self, dasLimit, parent=False)
Definition: dataset.py:533
def fileInfoList(self, parent=False)
Definition: dataset.py:901
def dataset.Dataset.fileList (   self,
  parent = False,
  firstRun = None,
  lastRun = None,
  forcerunselection = False 
)

Definition at line 863 of file dataset.py.

References dataset.Dataset.__fileList, dataset.Dataset.__findInJson(), dataset.Dataset.__parentFileList, dataset.Dataset.fileInfoList(), objects.autophobj.float, and dataset.Dataset.getrunnumberfromfilename().

Referenced by dataset.Dataset.__fileListSnippet(), and dataset.Dataset.createdatasetfile_hippy().

863  def fileList(self, parent=False, firstRun=None, lastRun=None, forcerunselection=False):
864  if self.__fileList and not parent:
865  return self.__fileList
866  if self.__parentFileList and parent:
867  return self.__parentFileList
868 
869  fileList = [ self.__findInJson(fileInfo,"name")
870  for fileInfo in self.fileInfoList(parent) ]
871 
872  if firstRun is not None or lastRun is not None:
873  if firstRun is None: firstRun = -1
874  if lastRun is None: lastRun = float('infinity')
875  unknownfilenames, reasons = [], set()
876  for filename in fileList[:]:
877  try:
878  if not firstRun < self.getrunnumberfromfilename(filename) < lastRun:
879  fileList.remove(filename)
880  except AllInOneError as e:
881  if forcerunselection: raise
882  unknownfilenames.append(e.message.split("\n")[1])
883  reasons .add (e.message.split("\n")[2])
884  if reasons:
885  if len(unknownfilenames) == len(fileList):
886  print "Could not figure out the run numbers of any of the filenames for the following reason(s):"
887  else:
888  print "Could not figure out the run numbers of the following filenames:"
889  for filename in unknownfilenames:
890  print " "+filename
891  print "for the following reason(s):"
892  for reason in reasons:
893  print " "+reason
894  print "Using the files anyway. The runs will be filtered at the CMSSW level."
895  if not parent:
896  self.__fileList = fileList
897  else:
898  self.__parentFileList = fileList
899  return fileList
900 
def __findInJson(self, jsondict, strings)
Definition: dataset.py:289
def fileInfoList(self, parent=False)
Definition: dataset.py:901
def getrunnumberfromfilename(filename)
Definition: dataset.py:837
def fileList(self, parent=False, firstRun=None, lastRun=None, forcerunselection=False)
Definition: dataset.py:863
def dataset.Dataset.forcerunrange (   self,
  firstRun,
  lastRun,
  s 
)
s must be in the format run1:lum1-run2:lum2

Definition at line 311 of file dataset.py.

References dataset.Dataset.__firstusedrun, dataset.Dataset.__lastusedrun, dataset.int, and split.

Referenced by dataset.Dataset.getForceRunRangeFunction().

311  def forcerunrange(self, firstRun, lastRun, s):
312  """s must be in the format run1:lum1-run2:lum2"""
313  s = s.group()
314  run1 = s.split("-")[0].split(":")[0]
315  lum1 = s.split("-")[0].split(":")[1]
316  try:
317  run2 = s.split("-")[1].split(":")[0]
318  lum2 = s.split("-")[1].split(":")[1]
319  except IndexError:
320  run2 = run1
321  lum2 = lum1
322  if int(run2) < firstRun or int(run1) > lastRun:
323  return ""
324  if int(run1) < firstRun or firstRun < 0:
325  run1 = firstRun
326  lum1 = 1
327  if int(run2) > lastRun:
328  run2 = lastRun
329  lum2 = "max"
330  if int(run1) < self.__firstusedrun or self.__firstusedrun < 0:
331  self.__firstusedrun = int(run1)
332  if int(run2) > self.__lastusedrun:
333  self.__lastusedrun = int(run2)
334  return "%s:%s-%s:%s" % (run1, lum1, run2, lum2)
335 
def forcerunrange(self, firstRun, lastRun, s)
Definition: dataset.py:311
double split
Definition: MVATrainer.cc:139
def dataset.Dataset.getForceRunRangeFunction (   self,
  firstRun,
  lastRun 
)

Definition at line 336 of file dataset.py.

References dataset.Dataset.forcerunrange().

Referenced by dataset.Dataset.__lumiSelectionSnippet().

336  def getForceRunRangeFunction(self, firstRun, lastRun):
337  def forcerunrangefunction(s):
338  return self.forcerunrange(firstRun, lastRun, s)
339  return forcerunrangefunction
340 
def forcerunrange(self, firstRun, lastRun, s)
Definition: dataset.py:311
def getForceRunRangeFunction(self, firstRun, lastRun)
Definition: dataset.py:336
def dataset.Dataset.getPrimaryDatasetEntries (   self)

Definition at line 326 of file dataset.py.

References dataset.int, runall.testit.report, WorkFlowRunner.WorkFlowRunner.report, dataset.BaseDataset.report, and ALIUtils.report.

327  if self.report is not None and self.report:
328  return int(self.report.get('PrimaryDatasetEntries',-1))
329  return -1
330 
331 
def getPrimaryDatasetEntries(self)
Definition: dataset.py:326
def dataset.Dataset.getrunnumberfromfilename (   filename)
static

Definition at line 837 of file dataset.py.

References Vispa.Plugins.EdmBrowser.EdmDataAccessor.all(), dataset.int, and join().

Referenced by dataset.Dataset.fileList().

838  parts = filename.split("/")
839  result = error = None
840  if parts[0] != "" or parts[1] != "store":
841  error = "does not start with /store"
842  elif parts[2] in ["mc", "relval"]:
843  result = 1
844  elif parts[-2] != "00000" or not parts[-1].endswith(".root"):
845  error = "does not end with 00000/something.root"
846  elif len(parts) != 12:
847  error = "should be exactly 11 slashes counting the first one"
848  else:
849  runnumberparts = parts[-5:-2]
850  if not all(len(part)==3 for part in runnumberparts):
851  error = "the 3 directories {} do not have length 3 each".format("/".join(runnumberparts))
852  try:
853  result = int("".join(runnumberparts))
854  except ValueError:
855  error = "the 3 directories {} do not form an integer".format("/".join(runnumberparts))
856 
857  if error:
858  error = "could not figure out which run number this file is from:\n{}\n{}".format(filename, error)
859  raise AllInOneError(error)
860 
861  return result
862 
def getrunnumberfromfilename(filename)
Definition: dataset.py:837
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def dataset.Dataset.magneticField (   self)

Definition at line 696 of file dataset.py.

References dataset.Dataset.__getMagneticField(), and dataset.Dataset.__magneticField.

696  def magneticField( self ):
697  if not self.__magneticField:
698  self.__magneticField = self.__getMagneticField()
699  return self.__magneticField
700 
def __getMagneticField(self)
Definition: dataset.py:408
def magneticField(self)
Definition: dataset.py:696
def dataset.Dataset.magneticFieldForRun (   self,
  run = -1 
)

Definition at line 701 of file dataset.py.

References dataset.Dataset.__getMagneticFieldForRun().

701  def magneticFieldForRun( self, run = -1 ):
702  return self.__getMagneticFieldForRun(run)
703 
def __getMagneticFieldForRun(self, run=-1, tolerance=0.5)
Definition: dataset.py:479
def magneticFieldForRun(self, run=-1)
Definition: dataset.py:701
def dataset.Dataset.name (   self)
def dataset.Dataset.parentDataset (   self)

Definition at line 704 of file dataset.py.

References dataset.Dataset.__getParentDataset(), dataset.Dataset.__parentDataset, and dataset.Dataset.datasetSnippet().

Referenced by dataset.Dataset.__getFileInfoList().

704  def parentDataset( self ):
705  if not self.__parentDataset:
706  self.__parentDataset = self.__getParentDataset()
707  return self.__parentDataset
708 
def parentDataset(self)
Definition: dataset.py:704
def __getParentDataset(self)
Definition: dataset.py:398
def dataset.Dataset.predefined (   self)

Definition at line 907 of file dataset.py.

References dataset.Dataset.__predefined.

907  def predefined( self ):
908  return self.__predefined
909 
def predefined(self)
Definition: dataset.py:907
def dataset.Dataset.printInfo (   self)

Definition at line 321 of file dataset.py.

References dataset.EOSDataset.castorDir, dataset.Dataset.castorDir, dataset.Dataset.lfnDir, ElectronMVAID.ElectronMVAID.name, counter.Counter.name, average.Average.name, histograms.Histograms.name, AlignableObjectId::entry.name, TmModule.name, cond::persistency::TAG::NAME.name, cond::persistency::GLOBAL_TAG::NAME.name, core.autovars.NTupleVariable.name, cond::persistency::RUN_INFO::RUN_NUMBER.name, cond::persistency::TAG::TIME_TYPE.name, cond::persistency::GLOBAL_TAG::VALIDITY.name, cond::persistency::RUN_INFO::START_TIME.name, genericValidation.GenericValidation.name, cond::persistency::TAG::OBJECT_TYPE.name, cond::persistency::GLOBAL_TAG::DESCRIPTION.name, cond::persistency::RUN_INFO::END_TIME.name, cond::persistency::TAG::SYNCHRONIZATION.name, preexistingValidation.PreexistingValidation.name, cond::persistency::GLOBAL_TAG::RELEASE.name, MEPSet.name, cond::persistency::TAG::END_OF_VALIDITY.name, cond::persistency::GLOBAL_TAG::SNAPSHOT_TIME.name, cond::persistency::TAG::DESCRIPTION.name, cond::persistency::GTEditorData.name, cond::persistency::GLOBAL_TAG::INSERTION_TIME.name, cond::persistency::TAG::LAST_VALIDATED_TIME.name, FWTGeoRecoGeometry::Info.name, Types._Untracked.name, cond::persistency::TAG::INSERTION_TIME.name, cond::persistency::TAG::MODIFICATION_TIME.name, dataset.BaseDataset.name, OutputMEPSet.name, personalPlayback.Applet.name, ParameterSet.name, PixelDCSObject< T >::Item.name, analyzer.Analyzer.name, DQMRivetClient::LumiOption.name, MagCylinder.name, alignment.Alignment.name, ParSet.name, DQMRivetClient::ScaleFactorOption.name, SingleObjectCondition.name, EgHLTOfflineSummaryClient::SumHistBinData.name, cond::persistency::GTProxyData.name, core.autovars.NTupleObjectType.name, MyWatcher.name, Mapper::definition< ScannerT >.name, edm::PathTimingSummary.name, cond::TimeTypeSpecs.name, lumi::TriggerInfo.name, edm::PathSummary.name, PixelEndcapLinkMaker::Item.name, DQMGenericClient::EfficOption.name, perftools::EdmEventSize::BranchRecord.name, cond::persistency::GLOBAL_TAG_MAP::GLOBAL_TAG_NAME.name, FWTableViewManager::TableEntry.name, PixelBarrelLinkMaker::Item.name, cond::persistency::GLOBAL_TAG_MAP::RECORD.name, EcalLogicID.name, cond::persistency::GLOBAL_TAG_MAP::LABEL.name, ExpressionHisto< T >.name, cond::persistency::GLOBAL_TAG_MAP::TAG_NAME.name, XMLProcessor::_loaderBaseConfig.name, cond::persistency::PAYLOAD::HASH.name, TreeCrawler.Package.name, cond::persistency::PAYLOAD::OBJECT_TYPE.name, cond::persistency::PAYLOAD::DATA.name, cond::persistency::PAYLOAD::STREAMER_INFO.name, MagGeoBuilderFromDDD::volumeHandle.name, cond::persistency::PAYLOAD::VERSION.name, cond::persistency::PAYLOAD::INSERTION_TIME.name, options.ConnectionHLTMenu.name, DQMGenericClient::ProfileOption.name, emtf::Node.name, DQMGenericClient::NormOption.name, FastHFShowerLibrary.name, h4DSegm.name, core.TriggerMatchAnalyzer.TriggerMatchAnalyzer.name, PhysicsTools::Calibration::Variable.name, DQMGenericClient::CDOption.name, cond::TagInfo_t.name, CounterChecker.name, EDMtoMEConverter.name, looper.Looper.name, MEtoEDM< T >::MEtoEDMObject.name, cond::persistency::IOV::TAG_NAME.name, TrackerSectorStruct.name, cond::persistency::IOV::SINCE.name, cond::persistency::IOV::PAYLOAD_HASH.name, classes.MonitorData.name, cond::persistency::IOV::INSERTION_TIME.name, HistogramManager.name, MuonGeometrySanityCheckPoint.name, classes.OutputData.name, options.HLTProcessOptions.name, h2DSegm.name, core.TriggerBitAnalyzer.TriggerBitAnalyzer.name, config.Analyzer.name, geometry.Structure.name, core.autovars.NTupleSubObject.name, DQMNet::WaitObject.name, AlpgenParameterName.name, SiStripMonitorDigi.name, core.autovars.NTupleObject.name, config.Service.name, cond::persistency::TAG_LOG::TAG_NAME.name, cond::persistency::TAG_LOG::EVENT_TIME.name, cond::persistency::TAG_LOG::USER_NAME.name, cond::persistency::TAG_LOG::HOST_NAME.name, cond::persistency::TAG_LOG::COMMAND.name, cond::persistency::TAG_LOG::ACTION.name, cond::persistency::TAG_LOG::USER_TEXT.name, core.autovars.NTupleCollection.name, BPHRecoBuilder::BPHRecoSource.name, BPHRecoBuilder::BPHCompSource.name, personalPlayback.FrameworkJob.name, plotscripts.SawTeethFunction.name, hTMaxCell.name, cscdqm::ParHistoDef.name, BeautifulSoup.Tag.name, SummaryOutputProducer::GenericSummary.name, and BeautifulSoup.SoupStrainer.name.

321  def printInfo(self):
322  print 'sample : ' + self.name
323  print 'LFN : ' + self.lfnDir
324  print 'Castor path : ' + self.castorDir
325 
def printInfo(self)
Definition: dataset.py:321
def dataset.Dataset.runList (   self)

Definition at line 910 of file dataset.py.

References dataset.Dataset.__getRunList(), and dataset.Dataset.__runList.

910  def runList( self ):
911  if self.__runList:
912  return self.__runList
913  return self.__getRunList()
914 
915 
def __getRunList(self)
Definition: dataset.py:599
def runList(self)
Definition: dataset.py:910

Member Data Documentation

dataset.Dataset.__alreadyStored
private

Definition at line 23 of file dataset.py.

Referenced by dataset.Dataset.dump_cff().

dataset.Dataset.__cmssw
private

Definition at line 24 of file dataset.py.

Referenced by dataset.Dataset.__getMagneticField(), and dataset.Dataset.dump_cff().

dataset.Dataset.__cmsswrelease
private

Definition at line 25 of file dataset.py.

Referenced by dataset.Dataset.__getMagneticField().

dataset.Dataset.__dasLimit
private

Definition at line 19 of file dataset.py.

Referenced by dataset.Dataset.fileInfoList().

dataset.Dataset.__dataType
private
tuple dataset.Dataset.__dummy_source_template
staticprivate
Initial value:
1 = ("readFiles = cms.untracked.vstring()\n"
2  "secFiles = cms.untracked.vstring()\n"
3  "%(process)ssource = cms.Source(\"PoolSource\",\n"
4  "%(tab)s secondaryFileNames ="
5  "secFiles,\n"
6  "%(tab)s fileNames = readFiles\n"
7  ")\n"
8  "readFiles.extend(['dummy_File.root'])\n"
9  "%(process)smaxEvents = cms.untracked.PSet( "
10  "input = cms.untracked.int32(%(nEvents)s) )\n"
11  "%(skipEventsString)s\n")

Definition at line 103 of file dataset.py.

Referenced by dataset.Dataset.__createSnippet().

dataset.Dataset.__fileInfoList
private

Definition at line 21 of file dataset.py.

Referenced by dataset.Dataset.__getFileInfoList().

dataset.Dataset.__fileList
private

Definition at line 20 of file dataset.py.

Referenced by dataset.Dataset.fileList().

dataset.Dataset.__filename
private
dataset.Dataset.__firstusedrun
private
dataset.Dataset.__lastusedrun
private
dataset.Dataset.__magneticField
private
dataset.Dataset.__name
private
dataset.Dataset.__official
private

Definition at line 34 of file dataset.py.

Referenced by dataset.Dataset.datasetSnippet().

dataset.Dataset.__origName
private

Definition at line 18 of file dataset.py.

Referenced by dataset.Dataset.datasetSnippet().

dataset.Dataset.__parentDataset
private

Definition at line 28 of file dataset.py.

Referenced by dataset.Dataset.parentDataset().

dataset.Dataset.__parentFileInfoList
private

Definition at line 30 of file dataset.py.

Referenced by dataset.Dataset.__getFileInfoList().

dataset.Dataset.__parentFileList
private

Definition at line 29 of file dataset.py.

Referenced by dataset.Dataset.fileList().

dataset.Dataset.__predefined
private
dataset.Dataset.__runList
private

Definition at line 22 of file dataset.py.

Referenced by dataset.Dataset.__getRunList(), and dataset.Dataset.runList().

dataset.Dataset.__source_template
staticprivate

Definition at line 85 of file dataset.py.

Referenced by dataset.Dataset.__createSnippet().

dataset.Dataset.bad_files

Definition at line 282 of file dataset.py.

dataset.Dataset.castorDir

Definition at line 266 of file dataset.py.

Referenced by dataset.Dataset.extractFileSizes(), and dataset.Dataset.printInfo().

dataset.Dataset.dasData = das_client.get_data(dasQuery, dasLimit)
static

Definition at line 342 of file dataset.py.

dataset.Dataset.error = self.__findInJson(jsondict,["data","error"])
static
dataset.Dataset.files

Definition at line 273 of file dataset.py.

dataset.Dataset.filesAndSizes

Definition at line 311 of file dataset.py.

dataset.Dataset.good_files

Definition at line 283 of file dataset.py.

int dataset.Dataset.i = 0
static

Definition at line 359 of file dataset.py.

dataset.Dataset.jsondict = json.loads( dasData )
static

Definition at line 344 of file dataset.py.

string dataset.Dataset.jsonfile = "das_query_output_%i.txt"
static

Definition at line 358 of file dataset.py.

dataset.Dataset.jsonfile = jsonfile%i
static

Definition at line 362 of file dataset.py.

dataset.Dataset.jsonstr = self.__findInJson(jsondict,"reason")
static

Definition at line 354 of file dataset.py.

dataset.Dataset.lfnDir

Definition at line 265 of file dataset.py.

Referenced by dataset.Dataset.printInfo().

dataset.Dataset.maskExists

Definition at line 267 of file dataset.py.

string dataset.Dataset.msg = "The DAS query returned an error. The output is very long, and has been stored in:\n"
static
dataset.Dataset.report

Definition at line 268 of file dataset.py.

Referenced by addOnTests.testit.run().

dataset.Dataset.theFile = open( jsonfile, "w" )
static

Definition at line 363 of file dataset.py.