CMS 3D CMS Logo

List of all members | Public Member Functions | Static Public Member Functions | Public Attributes | Static Public Attributes | Private Member Functions | Private Attributes | Static Private Attributes
dataset.Dataset Class Reference
Inheritance diagram for dataset.Dataset:
dataset.BaseDataset

Public Member Functions

def __init__ (self, datasetName, dasLimit=0, tryPredefinedFirst=True, cmssw=os.environ["CMSSW_BASE"], cmsswrelease=os.environ["CMSSW_RELEASE_BASE"], magneticfield=None, dasinstance=None)
 
def __init__ (self, name, user, pattern='.*root')
 
def buildListOfBadFiles (self)
 
def buildListOfFiles (self, pattern='.*root')
 
def convertTimeToRun (self, begin=None, end=None, firstRun=None, lastRun=None, shortTuple=True)
 
def createdatasetfile_hippy (self, filename, filesperjob, firstrun, lastrun)
 
def datasetSnippet (self, jsonPath=None, begin=None, end=None, firstRun=None, lastRun=None, crab=False, parent=False)
 
def dataType (self)
 
def dump_cff (self, outName=None, jsonPath=None, begin=None, end=None, firstRun=None, lastRun=None, parent=False)
 
def extractFileSizes (self)
 
def fileInfoList (self, parent=False)
 
def fileList (self, parent=False, firstRun=None, lastRun=None, forcerunselection=False)
 
def forcerunrange (self, firstRun, lastRun, s)
 
def getForceRunRangeFunction (self, firstRun, lastRun)
 
def getPrimaryDatasetEntries (self)
 
def magneticField (self)
 
def magneticFieldForRun (self, run=-1)
 
def name (self)
 
def parentDataset (self)
 
def predefined (self)
 
def printInfo (self)
 
def runList (self)
 
- Public Member Functions inherited from dataset.BaseDataset
def __init__ (self, name, user, pattern='.*root', run_range=None, dbsInstance=None)
 def init(self, name, user, pattern='. More...
 
def buildListOfBadFiles (self)
 
def buildListOfFiles (self, pattern)
 
def extractFileSizes (self)
 
def getPrimaryDatasetEntries (self)
 
def listOfFiles (self)
 
def listOfGoodFiles (self)
 
def listOfGoodFilesWithPrescale (self, prescale)
 
def printFiles (self, abspath=True, info=True)
 
def printInfo (self)
 

Static Public Member Functions

def getrunnumberfromfilename (filename)
 

Public Attributes

 bad_files
 
 castorDir
 
 files
 
 filesAndSizes
 
 good_files
 
 lfnDir
 
 maskExists
 
 report
 
- Public Attributes inherited from dataset.BaseDataset
 bad_files
 
 dbsInstance
 MM. More...
 
 files
 
 filesAndSizes
 
 good_files
 
 name
 
 pattern
 
 primaryDatasetEntries
 MM. More...
 
 report
 
 run_range
 
 user
 

Static Public Attributes

 dasData
 
 error = self.__findInJson(jsondict,["data","error"])
 
int i = 0
 
 jsondict = json.loads( dasData )
 
string jsonfile = "das_query_output_%i.txt"
 
 jsonfile = jsonfile%i
 
 jsonstr = self.__findInJson(jsondict,"reason")
 
string msg = "The DAS query returned an error. The output is very long, and has been stored in:\n"
 
 theFile = open( jsonfile, "w" )
 

Private Member Functions

def __chunks (self, theList, n)
 
def __createSnippet (self, jsonPath=None, begin=None, end=None, firstRun=None, lastRun=None, repMap=None, crab=False, parent=False)
 
def __dateString (self, date)
 
def __datetime (self, stringForDas)
 
def __fileListSnippet (self, crab=False, parent=False, firstRun=None, lastRun=None, forcerunselection=False)
 
def __find_ge (self, a, x)
 
def __find_lt (self, a, x)
 
def __findInJson (self, jsondict, strings)
 
def __getData (self, dasQuery, dasLimit=0)
 
def __getDataType (self)
 
def __getFileInfoList (self, dasLimit, parent=False)
 
def __getMagneticField (self)
 
def __getMagneticFieldForRun (self, run=-1, tolerance=0.5)
 
def __getParentDataset (self)
 
def __getRunList (self)
 
def __lumiSelectionSnippet (self, jsonPath=None, firstRun=None, lastRun=None)
 

Private Attributes

 __cmssw
 
 __cmsswrelease
 
 __dasinstance
 
 __dasLimit
 
 __dataType
 
 __filename
 
 __firstusedrun
 
 __inputMagneticField
 
 __lastusedrun
 
 __magneticField
 
 __name
 
 __official
 
 __origName
 
 __parentDataset
 
 __predefined
 

Static Private Attributes

tuple __dummy_source_template
 
 __source_template
 

Detailed Description

Definition at line 35 of file dataset.py.

Constructor & Destructor Documentation

def dataset.Dataset.__init__ (   self,
  datasetName,
  dasLimit = 0,
  tryPredefinedFirst = True,
  cmssw = os.environ["CMSSW_BASE"],
  cmsswrelease = os.environ["CMSSW_RELEASE_BASE"],
  magneticfield = None,
  dasinstance = None 
)

Definition at line 38 of file dataset.py.

Referenced by dataset.Dataset.__init__().

38  magneticfield = None, dasinstance = None):
39  self.__name = datasetName
40  self.__origName = datasetName
41  self.__dasLimit = dasLimit
42  self.__dasinstance = dasinstance
43  self.__cmssw = cmssw
44  self.__cmsswrelease = cmsswrelease
45  self.__firstusedrun = None
46  self.__lastusedrun = None
47  self.__parentDataset = None
48 
49  # check, if dataset name matches CMS dataset naming scheme
50  if re.match( r'/.+/.+/.+', self.__name ):
51  self.__official = True
52  fileName = "Dataset" + self.__name.replace("/","_") + "_cff.py"
53  else:
54  self.__official = False
55  fileName = self.__name + "_cff.py"
56 
57  searchPath1 = os.path.join( self.__cmssw, "python",
58  "Alignment", "OfflineValidation",
59  fileName )
60  searchPath2 = os.path.join( self.__cmssw, "src",
61  "Alignment", "OfflineValidation",
62  "python", fileName )
63  searchPath3 = os.path.join( self.__cmsswrelease,
64  "python", "Alignment",
65  "OfflineValidation", fileName )
66  if self.__official and not tryPredefinedFirst:
67  self.__predefined = False
68  elif os.path.exists( searchPath1 ):
69  self.__predefined = True
70  self.__filename = searchPath1
71  elif os.path.exists( searchPath2 ):
72  msg = ("The predefined dataset '%s' does exist in '%s', but "
73  "you need to run 'scram b' first."
74  %( self.__name, searchPath2 ))
75  if self.__official:
76  print msg
77  print "Getting the data from DAS again. To go faster next time, run scram b."
78  else:
79  raise AllInOneError( msg )
80  elif os.path.exists( searchPath3 ):
81  self.__predefined = True
82  self.__filename = searchPath3
83  elif self.__official:
84  self.__predefined = False
85  else:
86  msg = ("The predefined dataset '%s' does not exist. Please "
87  "create it first or check for typos."%( self.__name ))
88  raise AllInOneError( msg )
89 
90  if self.__predefined and self.__official:
91  self.__name = "Dataset" + self.__name.replace("/","_")
92 
93  if magneticfield is not None:
94  try:
95  magneticfield = float(magneticfield)
96  except ValueError:
97  raise AllInOneError("Bad magneticfield {} which can't be converted to float".format(magneticfield))
98  self.__inputMagneticField = magneticfield
99 
100  self.__dataType = self.__getDataType()
102 
103 
def __getDataType(self)
Definition: dataset.py:408
def __getMagneticField(self)
Definition: dataset.py:447
def dataset.Dataset.__init__ (   self,
  name,
  user,
  pattern = '.*root' 
)

Definition at line 264 of file dataset.py.

References dataset.Dataset.__init__().

264  def __init__(self, name, user, pattern='.*root'):
265  self.lfnDir = castorBaseDir(user) + name
266  self.castorDir = castortools.lfnToCastor( self.lfnDir )
267  self.maskExists = False
268  self.report = None
269  super(Dataset, self).__init__(name, user, pattern)
270 
def __init__(self, datasetName, dasLimit=0, tryPredefinedFirst=True, cmssw=os.environ["CMSSW_BASE"], cmsswrelease=os.environ["CMSSW_RELEASE_BASE"], magneticfield=None, dasinstance=None)
Definition: dataset.py:38

Member Function Documentation

def dataset.Dataset.__chunks (   self,
  theList,
  n 
)
private
Yield successive n-sized chunks from theList.

Definition at line 104 of file dataset.py.

Referenced by dataset.Dataset.__fileListSnippet(), dataset.Dataset.__lumiSelectionSnippet(), and dataset.Dataset.createdatasetfile_hippy().

104  def __chunks( self, theList, n ):
105  """ Yield successive n-sized chunks from theList.
106  """
107  for i in xrange( 0, len( theList ), n ):
108  yield theList[i:i+n]
109 
def __chunks(self, theList, n)
Definition: dataset.py:104
def dataset.Dataset.__createSnippet (   self,
  jsonPath = None,
  begin = None,
  end = None,
  firstRun = None,
  lastRun = None,
  repMap = None,
  crab = False,
  parent = False 
)
private

Definition at line 260 of file dataset.py.

References dataset.Dataset.__dummy_source_template, dataset.Dataset.__fileListSnippet(), dataset.Dataset.__lumiSelectionSnippet(), dataset.Dataset.__source_template, dataset.Dataset.convertTimeToRun(), and dataset.int.

Referenced by dataset.Dataset.__fileListSnippet(), dataset.Dataset.datasetSnippet(), and dataset.Dataset.dump_cff().

260  crab = False, parent = False ):
261 
262  if firstRun:
263  firstRun = int( firstRun )
264  if lastRun:
265  lastRun = int( lastRun )
266  if ( begin and firstRun ) or ( end and lastRun ):
267  msg = ( "The Usage of "
268  + "'begin' & 'firstRun' " * int( bool( begin and
269  firstRun ) )
270  + "and " * int( bool( ( begin and firstRun ) and
271  ( end and lastRun ) ) )
272  + "'end' & 'lastRun' " * int( bool( end and lastRun ) )
273  + "is ambigous." )
274  raise AllInOneError( msg )
275  if begin or end:
276  ( firstRun, lastRun ) = self.convertTimeToRun(
277  begin = begin, end = end, firstRun = firstRun,
278  lastRun = lastRun )
279  if ( firstRun and lastRun ) and ( firstRun > lastRun ):
280  msg = ( "The lower time/runrange limit ('begin'/'firstRun') "
281  "chosen is greater than the upper time/runrange limit "
282  "('end'/'lastRun').")
283  raise AllInOneError( msg )
284 
285  lumiSecExtend = self.__lumiSelectionSnippet(jsonPath=jsonPath, firstRun=firstRun, lastRun=lastRun)
286  lumiStr = goodLumiSecStr = ""
287  if lumiSecExtend:
288  goodLumiSecStr = "lumiSecs = cms.untracked.VLuminosityBlockRange()\n"
289  lumiStr = " lumisToProcess = lumiSecs,\n"
290 
291  files = self.__fileListSnippet(crab=crab, parent=parent, firstRun=firstRun, lastRun=lastRun, forcerunselection=False)
292 
293  theMap = repMap
294  theMap["files"] = files
295  theMap["json"] = jsonPath
296  theMap["lumiStr"] = lumiStr
297  theMap["goodLumiSecStr"] = goodLumiSecStr%( theMap )
298  theMap["lumiSecExtend"] = lumiSecExtend
299  if crab:
300  dataset_snippet = self.__dummy_source_template%( theMap )
301  else:
302  dataset_snippet = self.__source_template%( theMap )
303  return dataset_snippet
304 
def __lumiSelectionSnippet(self, jsonPath=None, firstRun=None, lastRun=None)
Definition: dataset.py:140
def convertTimeToRun(self, begin=None, end=None, firstRun=None, lastRun=None, shortTuple=True)
Definition: dataset.py:677
tuple __dummy_source_template
Definition: dataset.py:128
def __fileListSnippet(self, crab=False, parent=False, firstRun=None, lastRun=None, forcerunselection=False)
Definition: dataset.py:236
def dataset.Dataset.__dateString (   self,
  date 
)
private

Definition at line 672 of file dataset.py.

References dataset.Dataset.convertTimeToRun(), and harvestTrackValidationPlots.str.

Referenced by dataset.Dataset.convertTimeToRun().

672  def __dateString(self, date):
673  return str(date.year) + str(date.month).zfill(2) + str(date.day).zfill(2)
674 
def __dateString(self, date)
Definition: dataset.py:672
def dataset.Dataset.__datetime (   self,
  stringForDas 
)
private

Definition at line 663 of file dataset.py.

References dataset.int.

Referenced by dataset.Dataset.convertTimeToRun().

663  def __datetime(self, stringForDas):
664  if len(stringForDas) != 8:
665  raise AllInOneError(stringForDas + " is not a valid date string.\n"
666  + "DAS accepts dates in the form 'yyyymmdd'")
667  year = stringForDas[:4]
668  month = stringForDas[4:6]
669  day = stringForDas[6:8]
670  return datetime.date(int(year), int(month), int(day))
671 
def __datetime(self, stringForDas)
Definition: dataset.py:663
def dataset.Dataset.__fileListSnippet (   self,
  crab = False,
  parent = False,
  firstRun = None,
  lastRun = None,
  forcerunselection = False 
)
private

Definition at line 236 of file dataset.py.

References dataset.Dataset.__chunks(), dataset.Dataset.__createSnippet(), dataset.Dataset.__name, dataset.Dataset.fileList(), join(), and list().

Referenced by dataset.Dataset.__createSnippet().

236  def __fileListSnippet(self, crab=False, parent=False, firstRun=None, lastRun=None, forcerunselection=False):
237  if crab:
238  files = ""
239  else:
240  splitFileList = list( self.__chunks( self.fileList(firstRun=firstRun, lastRun=lastRun, forcerunselection=forcerunselection), 255 ) )
241  if not splitFileList:
242  raise AllInOneError("No files found for dataset {}. Check the spelling, or maybe specify another das instance?".format(self.__name))
243  fileStr = [ "',\n'".join( files ) for files in splitFileList ]
244  fileStr = [ "readFiles.extend( [\n'" + files + "'\n] )" \
245  for files in fileStr ]
246  files = "\n".join( fileStr )
247 
248  if parent:
249  splitParentFileList = list( self.__chunks( self.fileList(parent=True, firstRun=firstRun, lastRun=lastRun, forcerunselection=forcerunselection), 255 ) )
250  parentFileStr = [ "',\n'".join( parentFiles ) for parentFiles in splitParentFileList ]
251  parentFileStr = [ "secFiles.extend( [\n'" + parentFiles + "'\n] )" \
252  for parentFiles in parentFileStr ]
253  parentFiles = "\n".join( parentFileStr )
254  files += "\n\n" + parentFiles
255 
256  return files
257 
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def __chunks(self, theList, n)
Definition: dataset.py:104
def __fileListSnippet(self, crab=False, parent=False, firstRun=None, lastRun=None, forcerunselection=False)
Definition: dataset.py:236
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run
def fileList(self, parent=False, firstRun=None, lastRun=None, forcerunselection=False)
Definition: dataset.py:917
def dataset.Dataset.__find_ge (   self,
  a,
  x 
)
private

Definition at line 312 of file dataset.py.

Referenced by dataset.Dataset.convertTimeToRun().

312  def __find_ge( self, a, x):
313  'Find leftmost item greater than or equal to x'
314  i = bisect.bisect_left( a, x )
315  if i != len( a ):
316  return i
317  raise ValueError
318 
def __find_ge(self, a, x)
Definition: dataset.py:312
def dataset.Dataset.__find_lt (   self,
  a,
  x 
)
private

Definition at line 305 of file dataset.py.

Referenced by dataset.Dataset.convertTimeToRun().

305  def __find_lt( self, a, x ):
306  'Find rightmost value less than x'
307  i = bisect.bisect_left( a, x )
308  if i:
309  return i-1
310  raise ValueError
311 
def __find_lt(self, a, x)
Definition: dataset.py:305
def dataset.Dataset.__findInJson (   self,
  jsondict,
  strings 
)
private

Definition at line 319 of file dataset.py.

References dataset.Dataset.__findInJson().

Referenced by dataset.Dataset.__findInJson(), dataset.Dataset.__getDataType(), dataset.Dataset.__getFileInfoList(), dataset.Dataset.__getMagneticField(), dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.__getParentDataset(), dataset.Dataset.__getRunList(), dataset.Dataset.__lumiSelectionSnippet(), dataset.Dataset.convertTimeToRun(), and dataset.Dataset.fileList().

319  def __findInJson(self, jsondict, strings):
320  if isinstance(strings, str):
321  strings = [ strings ]
322 
323  if len(strings) == 0:
324  return jsondict
325  if isinstance(jsondict,dict):
326  if strings[0] in jsondict:
327  try:
328  return self.__findInJson(jsondict[strings[0]], strings[1:])
329  except KeyError:
330  pass
331  else:
332  for a in jsondict:
333  if strings[0] in a:
334  try:
335  return self.__findInJson(a[strings[0]], strings[1:])
336  except (TypeError, KeyError): #TypeError because a could be a string and contain strings[0]
337  pass
338  #if it's not found
339  raise KeyError("Can't find " + strings[0])
340 
def __findInJson(self, jsondict, strings)
Definition: dataset.py:319
def dataset.Dataset.__getData (   self,
  dasQuery,
  dasLimit = 0 
)
private
def dataset.Dataset.__getDataType (   self)
private

Definition at line 408 of file dataset.py.

References dataset.Dataset.__dasinstance, dataset.Dataset.__filename, dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__name, dataset.Dataset.__predefined, ElectronMVAID.ElectronMVAID.name, counter.Counter.name, average.Average.name, histograms.Histograms.name, AlignableObjectId::entry.name, cond::persistency::RUN_INFO::RUN_NUMBER.name, cond::persistency::TAG::NAME.name, TmModule.name, cond::persistency::GLOBAL_TAG::NAME.name, core.autovars.NTupleVariable.name, cond::persistency::RUN_INFO::START_TIME.name, cond::persistency::TAG::TIME_TYPE.name, cond::persistency::GLOBAL_TAG::VALIDITY.name, cond::persistency::RUN_INFO::END_TIME.name, cond::persistency::TAG::OBJECT_TYPE.name, cond::persistency::GLOBAL_TAG::DESCRIPTION.name, cond::persistency::GLOBAL_TAG::RELEASE.name, cond::persistency::TAG::SYNCHRONIZATION.name, MEPSet.name, cond::persistency::TAG::END_OF_VALIDITY.name, cond::persistency::GLOBAL_TAG::SNAPSHOT_TIME.name, cond::persistency::GLOBAL_TAG::INSERTION_TIME.name, cond::persistency::TAG::DESCRIPTION.name, cond::persistency::GTEditorData.name, cond::persistency::TAG::LAST_VALIDATED_TIME.name, FWTGeoRecoGeometry::Info.name, Types._Untracked.name, preexistingValidation.PreexistingValidation.name, cond::persistency::TAG::INSERTION_TIME.name, dataset.BaseDataset.name, cond::persistency::TAG::MODIFICATION_TIME.name, OutputMEPSet.name, personalPlayback.Applet.name, ParameterSet.name, PixelDCSObject< T >::Item.name, analyzer.Analyzer.name, DQMRivetClient::LumiOption.name, MagCylinder.name, ParSet.name, DQMRivetClient::ScaleFactorOption.name, SingleObjectCondition.name, EgHLTOfflineSummaryClient::SumHistBinData.name, cond::persistency::GTProxyData.name, core.autovars.NTupleObjectType.name, MyWatcher.name, Mapper::definition< ScannerT >.name, edm::PathTimingSummary.name, alignment.Alignment.name, cond::TimeTypeSpecs.name, lumi::TriggerInfo.name, edm::PathSummary.name, cond::persistency::GLOBAL_TAG_MAP::GLOBAL_TAG_NAME.name, DQMGenericClient::EfficOption.name, PixelEndcapLinkMaker::Item.name, perftools::EdmEventSize::BranchRecord.name, FWTableViewManager::TableEntry.name, cond::persistency::GLOBAL_TAG_MAP::RECORD.name, PixelBarrelLinkMaker::Item.name, EcalLogicID.name, cond::persistency::GLOBAL_TAG_MAP::LABEL.name, cond::persistency::GLOBAL_TAG_MAP::TAG_NAME.name, ExpressionHisto< T >.name, XMLProcessor::_loaderBaseConfig.name, cond::persistency::PAYLOAD::HASH.name, TreeCrawler.Package.name, genericValidation.GenericValidation.name, cond::persistency::PAYLOAD::OBJECT_TYPE.name, cond::persistency::PAYLOAD::DATA.name, cond::persistency::PAYLOAD::STREAMER_INFO.name, cond::persistency::PAYLOAD::VERSION.name, MagGeoBuilderFromDDD::volumeHandle.name, cond::persistency::PAYLOAD::INSERTION_TIME.name, options.ConnectionHLTMenu.name, DQMGenericClient::ProfileOption.name, emtf::Node.name, DQMGenericClient::NormOption.name, FastHFShowerLibrary.name, h4DSegm.name, core.TriggerMatchAnalyzer.TriggerMatchAnalyzer.name, PhysicsTools::Calibration::Variable.name, DQMGenericClient::CDOption.name, CounterChecker.name, cond::TagInfo_t.name, EDMtoMEConverter.name, looper.Looper.name, MEtoEDM< T >::MEtoEDMObject.name, cond::persistency::IOV::TAG_NAME.name, TrackerSectorStruct.name, cond::persistency::IOV::SINCE.name, classes.MonitorData.name, cond::persistency::IOV::PAYLOAD_HASH.name, cond::persistency::IOV::INSERTION_TIME.name, HistogramManager.name, MuonGeometrySanityCheckPoint.name, classes.OutputData.name, options.HLTProcessOptions.name, h2DSegm.name, core.TriggerBitAnalyzer.TriggerBitAnalyzer.name, config.Analyzer.name, geometry.Structure.name, core.autovars.NTupleSubObject.name, DQMNet::WaitObject.name, AlpgenParameterName.name, SiStripMonitorDigi.name, core.autovars.NTupleObject.name, config.Service.name, cond::persistency::TAG_LOG::TAG_NAME.name, cond::persistency::TAG_LOG::EVENT_TIME.name, cond::persistency::TAG_LOG::USER_NAME.name, cond::persistency::TAG_LOG::HOST_NAME.name, cond::persistency::TAG_LOG::COMMAND.name, cond::persistency::TAG_LOG::ACTION.name, cond::persistency::TAG_LOG::USER_TEXT.name, core.autovars.NTupleCollection.name, BPHRecoBuilder::BPHRecoSource.name, BPHRecoBuilder::BPHCompSource.name, personalPlayback.FrameworkJob.name, plotscripts.SawTeethFunction.name, hTMaxCell.name, cscdqm::ParHistoDef.name, BeautifulSoup.Tag.name, SummaryOutputProducer::GenericSummary.name, BeautifulSoup.SoupStrainer.name, and python.rootplot.root2matplotlib.replace().

Referenced by dataset.Dataset.dataType().

408  def __getDataType( self ):
409  if self.__predefined:
410  with open(self.__filename) as f:
411  datatype = None
412  for line in f.readlines():
413  if line.startswith("#data type: "):
414  if datatype is not None:
415  raise AllInOneError(self.__filename + " has multiple 'data type' lines.")
416  datatype = line.replace("#data type: ", "").replace("\n","")
417  return datatype
418  return "unknown"
419 
420  dasQuery_type = ( 'dataset dataset=%s instance=%s detail=true | grep dataset.datatype,'
421  'dataset.name'%( self.__name, self.__dasinstance ) )
422  #####################################################################
423  #can remove this once dasgoclient is updated
424  if olddas: dasQuery_type = dasQuery_type.replace("detail=true", "")
425  #####################################################################
426  data = self.__getData( dasQuery_type )
427 
428  try:
429  return self.__findInJson(data, ["dataset", "datatype"])
430  except KeyError:
431  print ("Cannot find the datatype of the dataset '%s'\n"
432  "It may not be possible to automatically find the magnetic field,\n"
433  "and you will not be able run in CRAB mode"
434  %( self.name() ))
435  return "unknown"
436 
def __getDataType(self)
Definition: dataset.py:408
def __findInJson(self, jsondict, strings)
Definition: dataset.py:319
def replace(string, replacements)
def __getData(self, dasQuery, dasLimit=0)
Definition: dataset.py:371
def dataset.Dataset.__getFileInfoList (   self,
  dasLimit,
  parent = False 
)
private

Definition at line 589 of file dataset.py.

References dataset.Dataset.__dasinstance, dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__name, dataset.Dataset.__predefined, ElectronMVAID.ElectronMVAID.name, counter.Counter.name, average.Average.name, AlignableObjectId::entry.name, histograms.Histograms.name, cond::persistency::RUN_INFO::RUN_NUMBER.name, cond::persistency::TAG::NAME.name, TmModule.name, core.autovars.NTupleVariable.name, cond::persistency::GLOBAL_TAG::NAME.name, cond::persistency::RUN_INFO::START_TIME.name, cond::persistency::TAG::TIME_TYPE.name, cond::persistency::GLOBAL_TAG::VALIDITY.name, cond::persistency::RUN_INFO::END_TIME.name, cond::persistency::TAG::OBJECT_TYPE.name, cond::persistency::GLOBAL_TAG::DESCRIPTION.name, cond::persistency::GLOBAL_TAG::RELEASE.name, cond::persistency::TAG::SYNCHRONIZATION.name, cond::persistency::GLOBAL_TAG::SNAPSHOT_TIME.name, MEPSet.name, cond::persistency::TAG::END_OF_VALIDITY.name, cond::persistency::GLOBAL_TAG::INSERTION_TIME.name, cond::persistency::TAG::DESCRIPTION.name, cond::persistency::GTEditorData.name, cond::persistency::TAG::LAST_VALIDATED_TIME.name, FWTGeoRecoGeometry::Info.name, Types._Untracked.name, preexistingValidation.PreexistingValidation.name, cond::persistency::TAG::INSERTION_TIME.name, cond::persistency::TAG::MODIFICATION_TIME.name, dataset.BaseDataset.name, OutputMEPSet.name, personalPlayback.Applet.name, ParameterSet.name, PixelDCSObject< T >::Item.name, analyzer.Analyzer.name, DQMRivetClient::LumiOption.name, MagCylinder.name, ParSet.name, DQMRivetClient::ScaleFactorOption.name, SingleObjectCondition.name, EgHLTOfflineSummaryClient::SumHistBinData.name, cond::persistency::GTProxyData.name, core.autovars.NTupleObjectType.name, MyWatcher.name, Mapper::definition< ScannerT >.name, edm::PathTimingSummary.name, alignment.Alignment.name, cond::TimeTypeSpecs.name, lumi::TriggerInfo.name, edm::PathSummary.name, cond::persistency::GLOBAL_TAG_MAP::GLOBAL_TAG_NAME.name, DQMGenericClient::EfficOption.name, PixelEndcapLinkMaker::Item.name, perftools::EdmEventSize::BranchRecord.name, FWTableViewManager::TableEntry.name, cond::persistency::GLOBAL_TAG_MAP::RECORD.name, PixelBarrelLinkMaker::Item.name, EcalLogicID.name, cond::persistency::GLOBAL_TAG_MAP::LABEL.name, cond::persistency::GLOBAL_TAG_MAP::TAG_NAME.name, ExpressionHisto< T >.name, XMLProcessor::_loaderBaseConfig.name, cond::persistency::PAYLOAD::HASH.name, TreeCrawler.Package.name, cond::persistency::PAYLOAD::OBJECT_TYPE.name, genericValidation.GenericValidation.name, cond::persistency::PAYLOAD::DATA.name, cond::persistency::PAYLOAD::STREAMER_INFO.name, MagGeoBuilderFromDDD::volumeHandle.name, cond::persistency::PAYLOAD::VERSION.name, cond::persistency::PAYLOAD::INSERTION_TIME.name, options.ConnectionHLTMenu.name, DQMGenericClient::ProfileOption.name, emtf::Node.name, DQMGenericClient::NormOption.name, FastHFShowerLibrary.name, h4DSegm.name, core.TriggerMatchAnalyzer.TriggerMatchAnalyzer.name, PhysicsTools::Calibration::Variable.name, DQMGenericClient::CDOption.name, CounterChecker.name, cond::TagInfo_t.name, EDMtoMEConverter.name, looper.Looper.name, MEtoEDM< T >::MEtoEDMObject.name, cond::persistency::IOV::TAG_NAME.name, TrackerSectorStruct.name, cond::persistency::IOV::SINCE.name, classes.MonitorData.name, cond::persistency::IOV::PAYLOAD_HASH.name, cond::persistency::IOV::INSERTION_TIME.name, HistogramManager.name, MuonGeometrySanityCheckPoint.name, classes.OutputData.name, options.HLTProcessOptions.name, h2DSegm.name, core.TriggerBitAnalyzer.TriggerBitAnalyzer.name, config.Analyzer.name, geometry.Structure.name, core.autovars.NTupleSubObject.name, DQMNet::WaitObject.name, AlpgenParameterName.name, SiStripMonitorDigi.name, core.autovars.NTupleObject.name, config.Service.name, cond::persistency::TAG_LOG::TAG_NAME.name, cond::persistency::TAG_LOG::EVENT_TIME.name, cond::persistency::TAG_LOG::USER_NAME.name, cond::persistency::TAG_LOG::HOST_NAME.name, cond::persistency::TAG_LOG::COMMAND.name, cond::persistency::TAG_LOG::ACTION.name, cond::persistency::TAG_LOG::USER_TEXT.name, core.autovars.NTupleCollection.name, BPHRecoBuilder::BPHRecoSource.name, BPHRecoBuilder::BPHCompSource.name, personalPlayback.FrameworkJob.name, plotscripts.SawTeethFunction.name, hTMaxCell.name, cscdqm::ParHistoDef.name, BeautifulSoup.Tag.name, SummaryOutputProducer::GenericSummary.name, BeautifulSoup.SoupStrainer.name, and dataset.Dataset.parentDataset().

Referenced by dataset.Dataset.fileInfoList().

589  def __getFileInfoList( self, dasLimit, parent = False ):
590  if self.__predefined:
591  if parent:
592  extendstring = "secFiles.extend"
593  else:
594  extendstring = "readFiles.extend"
595  with open(self.__fileName) as f:
596  files = []
597  copy = False
598  for line in f.readlines():
599  if "]" in line:
600  copy = False
601  if copy:
602  files.append({name: line.translate(None, "', " + '"')})
603  if extendstring in line and "[" in line and "]" not in line:
604  copy = True
605  return files
606 
607  if parent:
608  searchdataset = self.parentDataset()
609  else:
610  searchdataset = self.__name
611  dasQuery_files = ( 'file dataset=%s instance=%s detail=true | grep file.name, file.nevents, '
612  'file.creation_time, '
613  'file.modification_time'%( searchdataset, self.__dasinstance ) )
614  #####################################################################
615  #can remove this once dasgoclient is updated
616  if olddas: dasQuery_files = dasQuery_files.replace("detail=true", "")
617  #####################################################################
618  print "Requesting file information for '%s' from DAS..."%( searchdataset ),
619  sys.stdout.flush()
620  data = self.__getData( dasQuery_files, dasLimit )
621  print "Done."
622  data = [ self.__findInJson(entry,"file") for entry in data ]
623  if len( data ) == 0:
624  msg = ("No files are available for the dataset '%s'. This can be "
625  "due to a typo or due to a DAS problem. Please check the "
626  "spelling of the dataset and/or retry to run "
627  "'validateAlignments.py'."%( self.name() ))
628  raise AllInOneError( msg )
629  fileInformationList = []
630  for file in data:
631  fileName = 'unknown'
632  try:
633  fileName = self.__findInJson(file, "name")
634  fileCreationTime = self.__findInJson(file, "creation_time")
635  fileNEvents = self.__findInJson(file, "nevents")
636  except KeyError:
637  print ("DAS query gives bad output for file '%s'. Skipping it.\n"
638  "It may work if you try again later.") % fileName
639  fileNEvents = 0
640  # select only non-empty files
641  if fileNEvents == 0:
642  continue
643  fileDict = { "name": fileName,
644  "creation_time": fileCreationTime,
645  "nevents": fileNEvents
646  }
647  fileInformationList.append( fileDict )
648  fileInformationList.sort( key=lambda info: self.__findInJson(info,"name") )
649  return fileInformationList
650 
def __getFileInfoList(self, dasLimit, parent=False)
Definition: dataset.py:589
def __findInJson(self, jsondict, strings)
Definition: dataset.py:319
def parentDataset(self)
Definition: dataset.py:755
def __getData(self, dasQuery, dasLimit=0)
Definition: dataset.py:371
def dataset.Dataset.__getMagneticField (   self)
private

Definition at line 447 of file dataset.py.

References dataset.Dataset.__cmssw, dataset.Dataset.__cmsswrelease, dataset.Dataset.__dasinstance, dataset.Dataset.__dataType, dataset.Dataset.__filename, dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__inputMagneticField, dataset.Dataset.__name, dataset.Dataset.__predefined, python.rootplot.root2matplotlib.replace(), and digi_MixPreMix_cfi.strip.

Referenced by dataset.Dataset.magneticField().

447  def __getMagneticField( self ):
448  Bfieldlocation = os.path.join( self.__cmssw, "python", "Configuration", "StandardSequences" )
449  if not os.path.isdir(Bfieldlocation):
450  Bfieldlocation = os.path.join( self.__cmsswrelease, "python", "Configuration", "StandardSequences" )
451  Bfieldlist = [ f.replace("_cff.py",'') \
452  for f in os.listdir(Bfieldlocation) \
453  if f.startswith("MagneticField_") and f.endswith("_cff.py") ]
454  Bfieldlist.sort( key = lambda Bfield: -len(Bfield) ) #Put it in order of decreasing length, so that searching in the name gives the longer match
455 
456  if self.__inputMagneticField is not None:
457  if self.__inputMagneticField == 3.8:
458  return "MagneticField"
459  elif self.__inputMagneticField == 0:
460  return "MagneticField_0T"
461  else:
462  raise ValueError("Unknown input magnetic field {}".format(self.__inputMagneticField))
463 
464  if self.__predefined:
465  with open(self.__filename) as f:
466  datatype = None
467  Bfield = None
468  for line in f.readlines():
469  if line.startswith("#data type: "):
470  if datatype is not None:
471  raise AllInOneError(self.__filename + " has multiple 'data type' lines.")
472  datatype = line.replace("#data type: ", "").replace("\n","")
473  datatype = datatype.split("#")[0].strip()
474  if line.startswith("#magnetic field: "):
475  if Bfield is not None:
476  raise AllInOneError(self.__filename + " has multiple 'magnetic field' lines.")
477  Bfield = line.replace("#magnetic field: ", "").replace("\n","")
478  Bfield = Bfield.split("#")[0].strip()
479  if Bfield is not None:
480  Bfield = Bfield.split(",")[0]
481  if Bfield in Bfieldlist or Bfield == "unknown":
482  return Bfield
483  else:
484  print "Your dataset has magnetic field '%s', which does not exist in your CMSSW version!" % Bfield
485  print "Using Bfield='unknown' - this will revert to the default"
486  return "unknown"
487  elif datatype == "data":
488  return "MagneticField" #this should be in the "#magnetic field" line, but for safety in case it got messed up
489  else:
490  return "unknown"
491 
492  if self.__dataType == "data":
493  return "MagneticField"
494 
495  #try to find the magnetic field from DAS
496  #it seems to be there for the newer (7X) MC samples, except cosmics
497  dasQuery_B = ('dataset dataset=%s instance=%s'%(self.__name, self.__dasinstance))
498  data = self.__getData( dasQuery_B )
499 
500  try:
501  Bfield = self.__findInJson(data, ["dataset", "mcm", "sequences", "magField"])
502  if Bfield in Bfieldlist:
503  return Bfield
504  elif Bfield == "38T" or Bfield == "38T_PostLS1":
505  return "MagneticField"
506  elif "MagneticField_" + Bfield in Bfieldlist:
507  return "MagneticField_" + Bfield
508  elif Bfield == "":
509  pass
510  else:
511  print "Your dataset has magnetic field '%s', which does not exist in your CMSSW version!" % Bfield
512  print "Using Bfield='unknown' - this will revert to the default magnetic field"
513  return "unknown"
514  except KeyError:
515  pass
516 
517  for possibleB in Bfieldlist:
518  if (possibleB != "MagneticField"
519  and possibleB.replace("MagneticField_","") in self.__name.replace("TkAlCosmics0T", "")):
520  #final attempt - try to identify the dataset from the name
521  #all cosmics dataset names contain "TkAlCosmics0T"
522  if possibleB == "MagneticField_38T" or possibleB == "MagneticField_38T_PostLS1":
523  return "MagneticField"
524  return possibleB
525 
526  return "unknown"
527 
def __findInJson(self, jsondict, strings)
Definition: dataset.py:319
def replace(string, replacements)
def __getMagneticField(self)
Definition: dataset.py:447
def __getData(self, dasQuery, dasLimit=0)
Definition: dataset.py:371
def dataset.Dataset.__getMagneticFieldForRun (   self,
  run = -1,
  tolerance = 0.5 
)
private
For MC, this returns the same as the previous function.
   For data, it gets the magnetic field from the runs.  This is important for
   deciding which template to use for offlinevalidation

Definition at line 528 of file dataset.py.

References dataset.Dataset.__dasinstance, dataset.Dataset.__dataType, dataset.Dataset.__filename, dataset.Dataset.__findInJson(), dataset.Dataset.__firstusedrun, dataset.Dataset.__getData(), dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.__inputMagneticField, dataset.Dataset.__lastusedrun, dataset.Dataset.__magneticField, dataset.Dataset.__name, dataset.Dataset.__predefined, funct.abs(), objects.autophobj.float, python.rootplot.root2matplotlib.replace(), split, and digi_MixPreMix_cfi.strip.

Referenced by dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.dump_cff(), and dataset.Dataset.magneticFieldForRun().

528  def __getMagneticFieldForRun( self, run = -1, tolerance = 0.5 ):
529  """For MC, this returns the same as the previous function.
530  For data, it gets the magnetic field from the runs. This is important for
531  deciding which template to use for offlinevalidation
532  """
533  if self.__dataType == "mc" and self.__magneticField == "MagneticField":
534  return 3.8 #For 3.8T MC the default MagneticField is used
535  if self.__inputMagneticField is not None:
536  return self.__inputMagneticField
537  if "T" in self.__magneticField:
538  Bfield = self.__magneticField.split("T")[0].replace("MagneticField_","")
539  try:
540  return float(Bfield) / 10.0 #e.g. 38T and 38T_PostLS1 both return 3.8
541  except ValueError:
542  pass
543  if self.__predefined:
544  with open(self.__filename) as f:
545  Bfield = None
546  for line in f.readlines():
547  if line.startswith("#magnetic field: ") and "," in line:
548  if Bfield is not None:
549  raise AllInOneError(self.__filename + " has multiple 'magnetic field' lines.")
550  return float(line.replace("#magnetic field: ", "").split(",")[1].split("#")[0].strip())
551 
552  if run > 0:
553  dasQuery = ('run=%s instance=%s detail=true'%(run, self.__dasinstance)) #for data
554  #####################################################################
555  #can remove this once dasgoclient is updated
556  if olddas: dasQuery = dasQuery.replace("detail=true", "")
557  #####################################################################
558  data = self.__getData(dasQuery)
559  try:
560  return self.__findInJson(data, ["run","bfield"])
561  except KeyError:
562  return "unknown Can't get the magnetic field for run %s from DAS" % run
563 
564  #run < 0 - find B field for the first and last runs, and make sure they're compatible
565  # (to within tolerance)
566  #NOT FOOLPROOF! The magnetic field might go up and then down, or vice versa
567  if self.__firstusedrun is None or self.__lastusedrun is None:
568  return "unknown Can't get the exact magnetic field for the dataset until data has been retrieved from DAS."
569  firstrunB = self.__getMagneticFieldForRun(self.__firstusedrun)
570  lastrunB = self.__getMagneticFieldForRun(self.__lastusedrun)
571  try:
572  if abs(firstrunB - lastrunB) <= tolerance:
573  return .5*(firstrunB + lastrunB)
574  print firstrunB, lastrunB, tolerance
575  return ("unknown The beginning and end of your run range for %s\n"
576  "have different magnetic fields (%s, %s)!\n"
577  "Try limiting the run range using firstRun, lastRun, begin, end, or JSON,\n"
578  "or increasing the tolerance (in dataset.py) from %s.") % (self.__name, firstrunB, lastrunB, tolerance)
579  except TypeError:
580  try:
581  if "unknown" in firstrunB:
582  return firstrunB
583  else:
584  return lastrunB
585  except TypeError:
586  return lastrunB
587 
def __getMagneticFieldForRun(self, run=-1, tolerance=0.5)
Definition: dataset.py:528
def __findInJson(self, jsondict, strings)
Definition: dataset.py:319
def replace(string, replacements)
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
double split
Definition: MVATrainer.cc:139
def __getData(self, dasQuery, dasLimit=0)
Definition: dataset.py:371
def dataset.Dataset.__getParentDataset (   self)
private

Definition at line 437 of file dataset.py.

References dataset.Dataset.__dasinstance, dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__name, and harvestTrackValidationPlots.str.

Referenced by dataset.Dataset.parentDataset().

437  def __getParentDataset( self ):
438  dasQuery = "parent dataset=" + self.__name + " instance="+self.__dasinstance
439  data = self.__getData( dasQuery )
440  try:
441  return self.__findInJson(data, ["parent", "name"])
442  except KeyError:
443  raise AllInOneError("Cannot find the parent of the dataset '" + self.__name + "'\n"
444  "Here is the DAS output:\n" + str(jsondict) +
445  "\nIt's possible that this was a server error. If so, it may work if you try again later")
446 
def __findInJson(self, jsondict, strings)
Definition: dataset.py:319
def __getParentDataset(self)
Definition: dataset.py:437
def __getData(self, dasQuery, dasLimit=0)
Definition: dataset.py:371
def dataset.Dataset.__getRunList (   self)
private

Definition at line 652 of file dataset.py.

References dataset.Dataset.__dasinstance, dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), and dataset.Dataset.__name.

Referenced by dataset.Dataset.__lumiSelectionSnippet(), dataset.Dataset.convertTimeToRun(), and dataset.Dataset.runList().

652  def __getRunList( self ):
653  dasQuery_runs = ( 'run dataset=%s instance=%s | grep run.run_number,'
654  'run.creation_time'%( self.__name, self.__dasinstance ) )
655  print "Requesting run information for '%s' from DAS..."%( self.__name ),
656  sys.stdout.flush()
657  data = self.__getData( dasQuery_runs )
658  print "Done."
659  data = [ self.__findInJson(entry,"run") for entry in data ]
660  data.sort( key = lambda run: self.__findInJson(run, "run_number") )
661  return data
662 
def __getRunList(self)
Definition: dataset.py:652
def __findInJson(self, jsondict, strings)
Definition: dataset.py:319
def __getData(self, dasQuery, dasLimit=0)
Definition: dataset.py:371
def dataset.Dataset.__lumiSelectionSnippet (   self,
  jsonPath = None,
  firstRun = None,
  lastRun = None 
)
private

Definition at line 140 of file dataset.py.

References dataset.Dataset.__chunks(), dataset.Dataset.__findInJson(), dataset.Dataset.__firstusedrun, dataset.Dataset.__getRunList(), dataset.Dataset.__inputMagneticField, dataset.Dataset.__lastusedrun, dataset.Dataset.getForceRunRangeFunction(), dataset.int, join(), list(), hpstanc_transforms.max, min(), python.rootplot.root2matplotlib.replace(), split, and harvestTrackValidationPlots.str.

Referenced by dataset.Dataset.__createSnippet().

140  def __lumiSelectionSnippet( self, jsonPath = None, firstRun = None, lastRun = None ):
141  lumiSecExtend = ""
142  if firstRun or lastRun or jsonPath:
143  if not jsonPath:
144  selectedRunList = self.__getRunList()
145  if firstRun:
146  selectedRunList = [ run for run in selectedRunList \
147  if self.__findInJson(run, "run_number") >= firstRun ]
148  if lastRun:
149  selectedRunList = [ run for run in selectedRunList \
150  if self.__findInJson(run, "run_number") <= lastRun ]
151  lumiList = [ str( self.__findInJson(run, "run_number") ) + ":1-" \
152  + str( self.__findInJson(run, "run_number") ) + ":max" \
153  for run in selectedRunList ]
154  splitLumiList = list( self.__chunks( lumiList, 255 ) )
155  else:
156  theLumiList = None
157  try:
158  theLumiList = LumiList ( filename = jsonPath )
159  except ValueError:
160  pass
161 
162  if theLumiList is not None:
163  allRuns = theLumiList.getRuns()
164  runsToRemove = []
165  for run in allRuns:
166  if firstRun and int( run ) < firstRun:
167  runsToRemove.append( run )
168  if lastRun and int( run ) > lastRun:
169  runsToRemove.append( run )
170  theLumiList.removeRuns( runsToRemove )
171  splitLumiList = list( self.__chunks(
172  theLumiList.getCMSSWString().split(','), 255 ) )
173  if not (splitLumiList and splitLumiList[0] and splitLumiList[0][0]):
174  splitLumiList = None
175  else:
176  with open(jsonPath) as f:
177  jsoncontents = f.read()
178  if "process.source.lumisToProcess" in jsoncontents:
179  msg = "%s is not a json file, but it seems to be a CMSSW lumi selection cff snippet. Trying to use it" % jsonPath
180  if firstRun or lastRun:
181  msg += ("\n (after applying firstRun and/or lastRun)")
182  msg += ".\nPlease note that, depending on the format of this file, it may not work as expected."
183  msg += "\nCheck your config file to make sure that it worked properly."
184  print msg
185 
186  runlist = self.__getRunList()
187  if firstRun or lastRun:
188  self.__firstusedrun = -1
189  self.__lastusedrun = -1
190  jsoncontents = re.sub(r"\d+:(\d+|max)(-\d+:(\d+|max))?", self.getForceRunRangeFunction(firstRun, lastRun), jsoncontents)
191  jsoncontents = (jsoncontents.replace("'',\n","").replace("''\n","")
192  .replace('"",\n','').replace('""\n',''))
193  self.__firstusedrun = max(self.__firstusedrun, int(self.__findInJson(runlist[0],"run_number")))
194  self.__lastusedrun = min(self.__lastusedrun, int(self.__findInJson(runlist[-1],"run_number")))
195  if self.__lastusedrun < self.__firstusedrun:
196  jsoncontents = None
197  else:
198  self.__firstusedrun = int(self.__findInJson(runlist[0],"run_number"))
199  self.__lastusedrun = int(self.__findInJson(runlist[-1],"run_number"))
200  lumiSecExtend = jsoncontents
201  splitLumiList = None
202  else:
203  raise AllInOneError("%s is not a valid json file!" % jsonPath)
204 
205  if splitLumiList and splitLumiList[0] and splitLumiList[0][0]:
206  lumiSecStr = [ "',\n'".join( lumis ) \
207  for lumis in splitLumiList ]
208  lumiSecStr = [ "lumiSecs.extend( [\n'" + lumis + "'\n] )" \
209  for lumis in lumiSecStr ]
210  lumiSecExtend = "\n".join( lumiSecStr )
211  runlist = self.__getRunList()
212  self.__firstusedrun = max(int(splitLumiList[0][0].split(":")[0]), int(self.__findInJson(runlist[0],"run_number")))
213  self.__lastusedrun = min(int(splitLumiList[-1][-1].split(":")[0]), int(self.__findInJson(runlist[-1],"run_number")))
214  elif lumiSecExtend:
215  pass
216  else:
217  msg = "You are trying to run a validation without any runs! Check that:"
218  if firstRun or lastRun:
219  msg += "\n - firstRun/begin and lastRun/end are correct for this dataset, and there are runs in between containing data"
220  if jsonPath:
221  msg += "\n - your JSON file is correct for this dataset, and the runs contain data"
222  if (firstRun or lastRun) and jsonPath:
223  msg += "\n - firstRun/begin and lastRun/end are consistent with your JSON file"
224  raise AllInOneError(msg)
225 
226  else:
227  if self.__inputMagneticField is not None:
228  pass #never need self.__firstusedrun or self.__lastusedrun
229  else:
230  runlist = self.__getRunList()
231  self.__firstusedrun = int(self.__findInJson(self.__getRunList()[0],"run_number"))
232  self.__lastusedrun = int(self.__findInJson(self.__getRunList()[-1],"run_number"))
233 
234  return lumiSecExtend
235 
def __getRunList(self)
Definition: dataset.py:652
def __lumiSelectionSnippet(self, jsonPath=None, firstRun=None, lastRun=None)
Definition: dataset.py:140
def __findInJson(self, jsondict, strings)
Definition: dataset.py:319
def replace(string, replacements)
T min(T a, T b)
Definition: MathUtil.h:58
def getForceRunRangeFunction(self, firstRun, lastRun)
Definition: dataset.py:366
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def __chunks(self, theList, n)
Definition: dataset.py:104
double split
Definition: MVATrainer.cc:139
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run
def dataset.Dataset.buildListOfBadFiles (   self)
fills the list of bad files from the IntegrityCheck log.

When the integrity check file is not available,
files are considered as good.

Definition at line 275 of file dataset.py.

276  '''fills the list of bad files from the IntegrityCheck log.
277 
278  When the integrity check file is not available,
279  files are considered as good.'''
280  mask = "IntegrityCheck"
281 
282  self.bad_files = {}
283  self.good_files = []
284 
285  file_mask = castortools.matchingFiles(self.castorDir, '^%s_.*\.txt$' % mask)
286  if file_mask:
287  # here to avoid circular dependency
288  from edmIntegrityCheck import PublishToFileSystem
289  p = PublishToFileSystem(mask)
290  report = p.get(self.castorDir)
291  if report is not None and report:
292  self.maskExists = True
293  self.report = report
294  dup = report.get('ValidDuplicates',{})
295  for name, status in report['Files'].iteritems():
296  # print name, status
297  if not status[0]:
298  self.bad_files[name] = 'MarkedBad'
299  elif name in dup:
300  self.bad_files[name] = 'ValidDup'
301  else:
302  self.good_files.append( name )
303  else:
304  raise IntegrityCheckError( "ERROR: IntegrityCheck log file IntegrityCheck_XXXXXXXXXX.txt not found" )
305 
def buildListOfBadFiles(self)
Definition: dataset.py:275
def dataset.Dataset.buildListOfFiles (   self,
  pattern = '.*root' 
)
fills list of files, taking all root files matching the pattern in the castor dir

Definition at line 271 of file dataset.py.

271  def buildListOfFiles(self, pattern='.*root'):
272  '''fills list of files, taking all root files matching the pattern in the castor dir'''
273  self.files = castortools.matchingFiles( self.castorDir, pattern )
274 
def buildListOfFiles(self, pattern='.*root')
Definition: dataset.py:271
def dataset.Dataset.convertTimeToRun (   self,
  begin = None,
  end = None,
  firstRun = None,
  lastRun = None,
  shortTuple = True 
)

Definition at line 677 of file dataset.py.

References dataset.Dataset.__dasinstance, dataset.Dataset.__dateString(), dataset.Dataset.__datetime(), dataset.Dataset.__find_ge(), dataset.Dataset.__find_lt(), dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__getRunList(), dataset.Dataset.__name, and dataset.int.

Referenced by dataset.Dataset.__createSnippet(), and dataset.Dataset.__dateString().

677  shortTuple = True ):
678  if ( begin and firstRun ) or ( end and lastRun ):
679  msg = ( "The Usage of "
680  + "'begin' & 'firstRun' " * int( bool( begin and
681  firstRun ) )
682  + "and " * int( bool( ( begin and firstRun ) and
683  ( end and lastRun ) ) )
684  + "'end' & 'lastRun' " * int( bool( end and lastRun ) )
685  + "is ambigous." )
686  raise AllInOneError( msg )
687 
688  if begin or end:
689  runList = [ self.__findInJson(run, "run_number") for run in self.__getRunList() ]
690 
691  if begin:
692  lastdate = begin
693  for delta in [ 1, 5, 10, 20, 30 ]: #try searching for about 2 months after begin
694  firstdate = lastdate
695  lastdate = self.__dateString(self.__datetime(firstdate) + datetime.timedelta(delta))
696  dasQuery_begin = "run date between[%s,%s] instance=%s" % (firstdate, lastdate, self.__dasinstance)
697  begindata = self.__getData(dasQuery_begin)
698  if len(begindata) > 0:
699  begindata.sort(key = lambda run: self.__findInJson(run, ["run", "run_number"]))
700  try:
701  runIndex = self.__find_ge( runList, self.__findInJson(begindata[0], ["run", "run_number"]))
702  except ValueError:
703  msg = ( "Your 'begin' is after the creation time of the last "
704  "run in the dataset\n'%s'"%( self.__name ) )
705  raise AllInOneError( msg )
706  firstRun = runList[runIndex]
707  begin = None
708  break
709 
710  if begin:
711  raise AllInOneError("No runs within a reasonable time interval after your 'begin'."
712  "Try using a 'begin' that has runs soon after it (within 2 months at most)")
713 
714  if end:
715  firstdate = end
716  for delta in [ 1, 5, 10, 20, 30 ]: #try searching for about 2 months before end
717  lastdate = firstdate
718  firstdate = self.__dateString(self.__datetime(lastdate) - datetime.timedelta(delta))
719  dasQuery_end = "run date between[%s,%s] instance=%s" % (firstdate, lastdate, self.__dasinstance)
720  enddata = self.__getData(dasQuery_end)
721  if len(enddata) > 0:
722  enddata.sort(key = lambda run: self.__findInJson(run, ["run", "run_number"]))
723  try:
724  runIndex = self.__find_lt( runList, self.__findInJson(enddata[-1], ["run", "run_number"]))
725  except ValueError:
726  msg = ( "Your 'end' is before the creation time of the first "
727  "run in the dataset\n'%s'"%( self.__name ) )
728  raise AllInOneError( msg )
729  lastRun = runList[runIndex]
730  end = None
731  break
732 
733  if end:
734  raise AllInOneError("No runs within a reasonable time interval before your 'end'."
735  "Try using an 'end' that has runs soon before it (within 2 months at most)")
736 
737  if shortTuple:
738  return firstRun, lastRun
739  else:
740  return begin, end, firstRun, lastRun
741 
def __getRunList(self)
Definition: dataset.py:652
def __findInJson(self, jsondict, strings)
Definition: dataset.py:319
def __find_lt(self, a, x)
Definition: dataset.py:305
def __datetime(self, stringForDas)
Definition: dataset.py:663
def __dateString(self, date)
Definition: dataset.py:672
def __find_ge(self, a, x)
Definition: dataset.py:312
def __getData(self, dasQuery, dasLimit=0)
Definition: dataset.py:371
def dataset.Dataset.createdatasetfile_hippy (   self,
  filename,
  filesperjob,
  firstrun,
  lastrun 
)

Definition at line 884 of file dataset.py.

References dataset.Dataset.__chunks(), dataset.Dataset.fileList(), and join().

884  def createdatasetfile_hippy(self, filename, filesperjob, firstrun, lastrun):
885  with open(filename, "w") as f:
886  for job in self.__chunks(self.fileList(firstRun=firstrun, lastRun=lastrun, forcerunselection=True), filesperjob):
887  f.write(",".join("'{}'".format(file) for file in job)+"\n")
888 
def createdatasetfile_hippy(self, filename, filesperjob, firstrun, lastrun)
Definition: dataset.py:884
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def __chunks(self, theList, n)
Definition: dataset.py:104
def fileList(self, parent=False, firstRun=None, lastRun=None, forcerunselection=False)
Definition: dataset.py:917
def dataset.Dataset.datasetSnippet (   self,
  jsonPath = None,
  begin = None,
  end = None,
  firstRun = None,
  lastRun = None,
  crab = False,
  parent = False 
)

Definition at line 761 of file dataset.py.

References dataset.Dataset.__createSnippet(), dataset.Dataset.__filename, dataset.Dataset.__name, dataset.Dataset.__official, dataset.Dataset.__origName, dataset.Dataset.__predefined, and dataset.Dataset.dump_cff().

Referenced by dataset.Dataset.parentDataset().

761  firstRun = None, lastRun = None, crab = False, parent = False ):
762  if not firstRun: firstRun = None
763  if not lastRun: lastRun = None
764  if not begin: begin = None
765  if not end: end = None
766  if self.__predefined and (jsonPath or begin or end or firstRun or lastRun):
767  msg = ( "The parameters 'JSON', 'begin', 'end', 'firstRun', and 'lastRun' "
768  "only work for official datasets, not predefined _cff.py files" )
769  raise AllInOneError( msg )
770  if self.__predefined and parent:
771  with open(self.__filename) as f:
772  if "secFiles.extend" not in f.read():
773  msg = ("The predefined dataset '%s' does not contain secondary files, "
774  "which your validation requires!") % self.__name
775  if self.__official:
776  self.__name = self.__origName
777  self.__predefined = False
778  print msg
779  print ("Retreiving the files from DAS. You will be asked if you want "
780  "to overwrite the old dataset.\n"
781  "It will still be compatible with validations that don't need secondary files.")
782  else:
783  raise AllInOneError(msg)
784 
785  if self.__predefined:
786  snippet = ("process.load(\"Alignment.OfflineValidation.%s_cff\")\n"
787  "process.maxEvents = cms.untracked.PSet(\n"
788  " input = cms.untracked.int32(.oO[nEvents]Oo. / .oO[parallelJobs]Oo.)\n"
789  ")\n"
790  "process.source.skipEvents=cms.untracked.uint32(.oO[nIndex]Oo.*.oO[nEvents]Oo./.oO[parallelJobs]Oo.)"
791  %(self.__name))
792  if not parent:
793  with open(self.__filename) as f:
794  if "secFiles.extend" in f.read():
795  snippet += "\nprocess.source.secondaryFileNames = cms.untracked.vstring()"
796  return snippet
797  theMap = { "process": "process.",
798  "tab": " " * len( "process." ),
799  "nEvents": ".oO[nEvents]Oo. / .oO[parallelJobs]Oo.",
800  "skipEventsString": "process.source.skipEvents=cms.untracked.uint32(.oO[nIndex]Oo.*.oO[nEvents]Oo./.oO[parallelJobs]Oo.)\n",
801  "importCms": "",
802  "header": ""
803  }
804  datasetSnippet = self.__createSnippet( jsonPath = jsonPath,
805  begin = begin,
806  end = end,
807  firstRun = firstRun,
808  lastRun = lastRun,
809  repMap = theMap,
810  crab = crab,
811  parent = parent )
812  if jsonPath == "" and begin == "" and end == "" and firstRun == "" and lastRun == "":
813  try:
814  self.dump_cff(parent = parent)
815  except AllInOneError as e:
816  print "Can't store the dataset as a cff:"
817  print e
818  print "This may be inconvenient in the future, but will not cause a problem for this validation."
819  return datasetSnippet
820 
def __createSnippet(self, jsonPath=None, begin=None, end=None, firstRun=None, lastRun=None, repMap=None, crab=False, parent=False)
Definition: dataset.py:260
def dump_cff(self, outName=None, jsonPath=None, begin=None, end=None, firstRun=None, lastRun=None, parent=False)
Definition: dataset.py:823
def dataset.Dataset.dataType (   self)

Definition at line 742 of file dataset.py.

References dataset.Dataset.__dataType, and dataset.Dataset.__getDataType().

742  def dataType( self ):
743  if not self.__dataType:
744  self.__dataType = self.__getDataType()
745  return self.__dataType
746 
def __getDataType(self)
Definition: dataset.py:408
def dataType(self)
Definition: dataset.py:742
def dataset.Dataset.dump_cff (   self,
  outName = None,
  jsonPath = None,
  begin = None,
  end = None,
  firstRun = None,
  lastRun = None,
  parent = False 
)

Definition at line 823 of file dataset.py.

References dataset.Dataset.__cmssw, dataset.Dataset.__createSnippet(), dataset.Dataset.__dataType, dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.__magneticField, dataset.Dataset.__name, python.rootplot.root2matplotlib.replace(), split, harvestTrackValidationPlots.str, and digi_MixPreMix_cfi.strip.

Referenced by dataset.Dataset.datasetSnippet().

823  end = None, firstRun = None, lastRun = None, parent = False ):
824  if outName == None:
825  outName = "Dataset" + self.__name.replace("/", "_")
826  packageName = os.path.join( "Alignment", "OfflineValidation" )
827  if not os.path.exists( os.path.join(
828  self.__cmssw, "src", packageName ) ):
829  msg = ("You try to store the predefined dataset'%s'.\n"
830  "For that you need to check out the package '%s' to your "
831  "private relase area in\n"%( outName, packageName )
832  + self.__cmssw )
833  raise AllInOneError( msg )
834  theMap = { "process": "",
835  "tab": "",
836  "nEvents": str( -1 ),
837  "skipEventsString": "",
838  "importCms": "import FWCore.ParameterSet.Config as cms\n",
839  "header": "#Do not delete or (unless you know what you're doing) change these comments\n"
840  "#%(name)s\n"
841  "#data type: %(dataType)s\n"
842  "#magnetic field: .oO[magneticField]Oo.\n" #put in magnetic field later
843  %{"name": self.__name, #need to create the snippet before getting the magnetic field
844  "dataType": self.__dataType} #so that we know the first and last runs
845  }
846  dataset_cff = self.__createSnippet( jsonPath = jsonPath,
847  begin = begin,
848  end = end,
849  firstRun = firstRun,
850  lastRun = lastRun,
851  repMap = theMap,
852  parent = parent)
853  magneticField = self.__magneticField
854  if magneticField == "MagneticField":
855  magneticField = "%s, %s #%s" % (magneticField,
856  str(self.__getMagneticFieldForRun()).replace("\n"," ").split("#")[0].strip(),
857  "Use MagneticField_cff.py; the number is for determining which track selection to use."
858  )
859  dataset_cff = dataset_cff.replace(".oO[magneticField]Oo.",magneticField)
860  filePath = os.path.join( self.__cmssw, "src", packageName,
861  "python", outName + "_cff.py" )
862  if os.path.exists( filePath ):
863  existMsg = "The predefined dataset '%s' already exists.\n"%( outName )
864  askString = "Do you want to overwrite it? [y/n]\n"
865  inputQuery = existMsg + askString
866  while True:
867  userInput = raw_input( inputQuery ).lower()
868  if userInput == "y":
869  break
870  elif userInput == "n":
871  return
872  else:
873  inputQuery = askString
874  print ( "The predefined dataset '%s' will be stored in the file\n"
875  %( outName )
876  + filePath +
877  "\nFor future use you have to do 'scram b'." )
878  print
879  theFile = open( filePath, "w" )
880  theFile.write( dataset_cff )
881  theFile.close()
882  return
883 
def __getMagneticFieldForRun(self, run=-1, tolerance=0.5)
Definition: dataset.py:528
def __createSnippet(self, jsonPath=None, begin=None, end=None, firstRun=None, lastRun=None, repMap=None, crab=False, parent=False)
Definition: dataset.py:260
def replace(string, replacements)
double split
Definition: MVATrainer.cc:139
def dataset.Dataset.extractFileSizes (   self)
Get the file size for each file, from the eos ls -l command.

Definition at line 306 of file dataset.py.

References dataset.EOSDataset.castorDir, and dataset.Dataset.castorDir.

306  def extractFileSizes(self):
307  '''Get the file size for each file, from the eos ls -l command.'''
308  # EOS command does not work in tier3
309  lsout = castortools.runXRDCommand(self.castorDir,'dirlist')[0]
310  lsout = lsout.split('\n')
311  self.filesAndSizes = {}
312  for entry in lsout:
313  values = entry.split()
314  if( len(values) != 5):
315  continue
316  # using full abs path as a key.
317  file = '/'.join([self.lfnDir, values[4].split("/")[-1]])
318  size = values[1]
319  self.filesAndSizes[file] = size
320 
def extractFileSizes(self)
Definition: dataset.py:306
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
if(dp >Float(M_PI)) dp-
double split
Definition: MVATrainer.cc:139
def dataset.Dataset.fileInfoList (   self,
  parent = False 
)

Definition at line 946 of file dataset.py.

References dataset.Dataset.__dasLimit, and dataset.Dataset.__getFileInfoList().

Referenced by dataset.Dataset.fileList().

946  def fileInfoList( self, parent = False ):
947  return self.__getFileInfoList( self.__dasLimit, parent )
948 
def __getFileInfoList(self, dasLimit, parent=False)
Definition: dataset.py:589
def fileInfoList(self, parent=False)
Definition: dataset.py:946
def dataset.Dataset.fileList (   self,
  parent = False,
  firstRun = None,
  lastRun = None,
  forcerunselection = False 
)

Definition at line 917 of file dataset.py.

References dataset.Dataset.__findInJson(), dataset.Dataset.fileInfoList(), objects.autophobj.float, and dataset.Dataset.getrunnumberfromfilename().

Referenced by dataset.Dataset.__fileListSnippet(), and dataset.Dataset.createdatasetfile_hippy().

917  def fileList(self, parent=False, firstRun=None, lastRun=None, forcerunselection=False):
918  fileList = [ self.__findInJson(fileInfo,"name")
919  for fileInfo in self.fileInfoList(parent) ]
920 
921  if firstRun or lastRun:
922  if not firstRun: firstRun = -1
923  if not lastRun: lastRun = float('infinity')
924  unknownfilenames, reasons = [], set()
925  for filename in fileList[:]:
926  try:
927  if not firstRun < self.getrunnumberfromfilename(filename) < lastRun:
928  fileList.remove(filename)
929  except AllInOneError as e:
930  if forcerunselection: raise
931  unknownfilenames.append(e.message.split("\n")[1])
932  reasons .add (e.message.split("\n")[2])
933  if reasons:
934  if len(unknownfilenames) == len(fileList):
935  print "Could not figure out the run numbers of any of the filenames for the following reason(s):"
936  else:
937  print "Could not figure out the run numbers of the following filenames:"
938  for filename in unknownfilenames:
939  print " "+filename
940  print "for the following reason(s):"
941  for reason in reasons:
942  print " "+reason
943  print "Using the files anyway. The runs will be filtered at the CMSSW level."
944  return fileList
945 
def __findInJson(self, jsondict, strings)
Definition: dataset.py:319
def fileInfoList(self, parent=False)
Definition: dataset.py:946
def getrunnumberfromfilename(filename)
Definition: dataset.py:890
def fileList(self, parent=False, firstRun=None, lastRun=None, forcerunselection=False)
Definition: dataset.py:917
def dataset.Dataset.forcerunrange (   self,
  firstRun,
  lastRun,
  s 
)
s must be in the format run1:lum1-run2:lum2

Definition at line 341 of file dataset.py.

References dataset.Dataset.__firstusedrun, dataset.Dataset.__lastusedrun, dataset.int, and split.

Referenced by dataset.Dataset.getForceRunRangeFunction().

341  def forcerunrange(self, firstRun, lastRun, s):
342  """s must be in the format run1:lum1-run2:lum2"""
343  s = s.group()
344  run1 = s.split("-")[0].split(":")[0]
345  lum1 = s.split("-")[0].split(":")[1]
346  try:
347  run2 = s.split("-")[1].split(":")[0]
348  lum2 = s.split("-")[1].split(":")[1]
349  except IndexError:
350  run2 = run1
351  lum2 = lum1
352  if int(run2) < firstRun or int(run1) > lastRun:
353  return ""
354  if int(run1) < firstRun or firstRun < 0:
355  run1 = firstRun
356  lum1 = 1
357  if int(run2) > lastRun:
358  run2 = lastRun
359  lum2 = "max"
360  if int(run1) < self.__firstusedrun or self.__firstusedrun < 0:
361  self.__firstusedrun = int(run1)
362  if int(run2) > self.__lastusedrun:
363  self.__lastusedrun = int(run2)
364  return "%s:%s-%s:%s" % (run1, lum1, run2, lum2)
365 
def forcerunrange(self, firstRun, lastRun, s)
Definition: dataset.py:341
double split
Definition: MVATrainer.cc:139
def dataset.Dataset.getForceRunRangeFunction (   self,
  firstRun,
  lastRun 
)

Definition at line 366 of file dataset.py.

References dataset.Dataset.forcerunrange().

Referenced by dataset.Dataset.__lumiSelectionSnippet().

366  def getForceRunRangeFunction(self, firstRun, lastRun):
367  def forcerunrangefunction(s):
368  return self.forcerunrange(firstRun, lastRun, s)
369  return forcerunrangefunction
370 
def forcerunrange(self, firstRun, lastRun, s)
Definition: dataset.py:341
def getForceRunRangeFunction(self, firstRun, lastRun)
Definition: dataset.py:366
def dataset.Dataset.getPrimaryDatasetEntries (   self)

Definition at line 326 of file dataset.py.

References dataset.int, runall.testit.report, WorkFlowRunner.WorkFlowRunner.report, dataset.BaseDataset.report, and ALIUtils.report.

327  if self.report is not None and self.report:
328  return int(self.report.get('PrimaryDatasetEntries',-1))
329  return -1
330 
331 
def getPrimaryDatasetEntries(self)
Definition: dataset.py:326
def dataset.Dataset.getrunnumberfromfilename (   filename)
static

Definition at line 890 of file dataset.py.

References Vispa.Plugins.EdmBrowser.EdmDataAccessor.all(), dataset.int, and join().

Referenced by dataset.Dataset.fileList().

891  parts = filename.split("/")
892  result = error = None
893  if parts[0] != "" or parts[1] != "store":
894  error = "does not start with /store"
895  elif parts[2] in ["mc", "relval"]:
896  result = 1
897  elif parts[-2] != "00000" or not parts[-1].endswith(".root"):
898  error = "does not end with 00000/something.root"
899  elif len(parts) != 12:
900  error = "should be exactly 11 slashes counting the first one"
901  else:
902  runnumberparts = parts[-5:-2]
903  if not all(len(part)==3 for part in runnumberparts):
904  error = "the 3 directories {} do not have length 3 each".format("/".join(runnumberparts))
905  try:
906  result = int("".join(runnumberparts))
907  except ValueError:
908  error = "the 3 directories {} do not form an integer".format("/".join(runnumberparts))
909 
910  if error:
911  error = "could not figure out which run number this file is from:\n{}\n{}".format(filename, error)
912  raise AllInOneError(error)
913 
914  return result
915 
def getrunnumberfromfilename(filename)
Definition: dataset.py:890
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def dataset.Dataset.magneticField (   self)

Definition at line 747 of file dataset.py.

References dataset.Dataset.__getMagneticField(), and dataset.Dataset.__magneticField.

747  def magneticField( self ):
748  if not self.__magneticField:
749  self.__magneticField = self.__getMagneticField()
750  return self.__magneticField
751 
def __getMagneticField(self)
Definition: dataset.py:447
def magneticField(self)
Definition: dataset.py:747
def dataset.Dataset.magneticFieldForRun (   self,
  run = -1 
)

Definition at line 752 of file dataset.py.

References dataset.Dataset.__getMagneticFieldForRun().

752  def magneticFieldForRun( self, run = -1 ):
753  return self.__getMagneticFieldForRun(run)
754 
def __getMagneticFieldForRun(self, run=-1, tolerance=0.5)
Definition: dataset.py:528
def magneticFieldForRun(self, run=-1)
Definition: dataset.py:752
def dataset.Dataset.name (   self)
def dataset.Dataset.parentDataset (   self)

Definition at line 755 of file dataset.py.

References dataset.Dataset.__getParentDataset(), dataset.Dataset.__parentDataset, and dataset.Dataset.datasetSnippet().

Referenced by dataset.Dataset.__getFileInfoList().

755  def parentDataset( self ):
756  if not self.__parentDataset:
757  self.__parentDataset = self.__getParentDataset()
758  return self.__parentDataset
759 
def parentDataset(self)
Definition: dataset.py:755
def __getParentDataset(self)
Definition: dataset.py:437
def dataset.Dataset.predefined (   self)

Definition at line 952 of file dataset.py.

References dataset.Dataset.__predefined.

952  def predefined( self ):
953  return self.__predefined
954 
def predefined(self)
Definition: dataset.py:952
def dataset.Dataset.printInfo (   self)

Definition at line 321 of file dataset.py.

References dataset.EOSDataset.castorDir, dataset.Dataset.castorDir, dataset.Dataset.lfnDir, ElectronMVAID.ElectronMVAID.name, counter.Counter.name, average.Average.name, histograms.Histograms.name, AlignableObjectId::entry.name, TmModule.name, cond::persistency::TAG::NAME.name, cond::persistency::GLOBAL_TAG::NAME.name, core.autovars.NTupleVariable.name, cond::persistency::RUN_INFO::RUN_NUMBER.name, cond::persistency::TAG::TIME_TYPE.name, cond::persistency::GLOBAL_TAG::VALIDITY.name, cond::persistency::RUN_INFO::START_TIME.name, cond::persistency::TAG::OBJECT_TYPE.name, cond::persistency::GLOBAL_TAG::DESCRIPTION.name, cond::persistency::RUN_INFO::END_TIME.name, cond::persistency::TAG::SYNCHRONIZATION.name, cond::persistency::GLOBAL_TAG::RELEASE.name, MEPSet.name, cond::persistency::TAG::END_OF_VALIDITY.name, cond::persistency::GLOBAL_TAG::SNAPSHOT_TIME.name, cond::persistency::TAG::DESCRIPTION.name, cond::persistency::GTEditorData.name, cond::persistency::GLOBAL_TAG::INSERTION_TIME.name, cond::persistency::TAG::LAST_VALIDATED_TIME.name, FWTGeoRecoGeometry::Info.name, Types._Untracked.name, preexistingValidation.PreexistingValidation.name, cond::persistency::TAG::INSERTION_TIME.name, cond::persistency::TAG::MODIFICATION_TIME.name, dataset.BaseDataset.name, OutputMEPSet.name, personalPlayback.Applet.name, ParameterSet.name, PixelDCSObject< T >::Item.name, analyzer.Analyzer.name, DQMRivetClient::LumiOption.name, MagCylinder.name, ParSet.name, DQMRivetClient::ScaleFactorOption.name, SingleObjectCondition.name, EgHLTOfflineSummaryClient::SumHistBinData.name, cond::persistency::GTProxyData.name, core.autovars.NTupleObjectType.name, MyWatcher.name, Mapper::definition< ScannerT >.name, edm::PathTimingSummary.name, alignment.Alignment.name, cond::TimeTypeSpecs.name, lumi::TriggerInfo.name, edm::PathSummary.name, PixelEndcapLinkMaker::Item.name, DQMGenericClient::EfficOption.name, perftools::EdmEventSize::BranchRecord.name, cond::persistency::GLOBAL_TAG_MAP::GLOBAL_TAG_NAME.name, FWTableViewManager::TableEntry.name, PixelBarrelLinkMaker::Item.name, cond::persistency::GLOBAL_TAG_MAP::RECORD.name, EcalLogicID.name, cond::persistency::GLOBAL_TAG_MAP::LABEL.name, ExpressionHisto< T >.name, cond::persistency::GLOBAL_TAG_MAP::TAG_NAME.name, XMLProcessor::_loaderBaseConfig.name, cond::persistency::PAYLOAD::HASH.name, TreeCrawler.Package.name, genericValidation.GenericValidation.name, cond::persistency::PAYLOAD::OBJECT_TYPE.name, cond::persistency::PAYLOAD::DATA.name, cond::persistency::PAYLOAD::STREAMER_INFO.name, MagGeoBuilderFromDDD::volumeHandle.name, cond::persistency::PAYLOAD::VERSION.name, cond::persistency::PAYLOAD::INSERTION_TIME.name, options.ConnectionHLTMenu.name, DQMGenericClient::ProfileOption.name, emtf::Node.name, DQMGenericClient::NormOption.name, FastHFShowerLibrary.name, h4DSegm.name, core.TriggerMatchAnalyzer.TriggerMatchAnalyzer.name, PhysicsTools::Calibration::Variable.name, DQMGenericClient::CDOption.name, CounterChecker.name, EDMtoMEConverter.name, cond::TagInfo_t.name, looper.Looper.name, MEtoEDM< T >::MEtoEDMObject.name, cond::persistency::IOV::TAG_NAME.name, TrackerSectorStruct.name, cond::persistency::IOV::SINCE.name, cond::persistency::IOV::PAYLOAD_HASH.name, classes.MonitorData.name, cond::persistency::IOV::INSERTION_TIME.name, HistogramManager.name, MuonGeometrySanityCheckPoint.name, classes.OutputData.name, options.HLTProcessOptions.name, h2DSegm.name, core.TriggerBitAnalyzer.TriggerBitAnalyzer.name, config.Analyzer.name, geometry.Structure.name, core.autovars.NTupleSubObject.name, DQMNet::WaitObject.name, AlpgenParameterName.name, SiStripMonitorDigi.name, core.autovars.NTupleObject.name, config.Service.name, cond::persistency::TAG_LOG::TAG_NAME.name, cond::persistency::TAG_LOG::EVENT_TIME.name, cond::persistency::TAG_LOG::USER_NAME.name, cond::persistency::TAG_LOG::HOST_NAME.name, cond::persistency::TAG_LOG::COMMAND.name, cond::persistency::TAG_LOG::ACTION.name, cond::persistency::TAG_LOG::USER_TEXT.name, core.autovars.NTupleCollection.name, BPHRecoBuilder::BPHRecoSource.name, BPHRecoBuilder::BPHCompSource.name, personalPlayback.FrameworkJob.name, plotscripts.SawTeethFunction.name, hTMaxCell.name, cscdqm::ParHistoDef.name, BeautifulSoup.Tag.name, SummaryOutputProducer::GenericSummary.name, and BeautifulSoup.SoupStrainer.name.

321  def printInfo(self):
322  print 'sample : ' + self.name
323  print 'LFN : ' + self.lfnDir
324  print 'Castor path : ' + self.castorDir
325 
def printInfo(self)
Definition: dataset.py:321
def dataset.Dataset.runList (   self)

Definition at line 956 of file dataset.py.

References dataset.Dataset.__getRunList().

956  def runList( self ):
957  return self.__getRunList()
958 
959 
def __getRunList(self)
Definition: dataset.py:652
def runList(self)
Definition: dataset.py:956

Member Data Documentation

dataset.Dataset.__cmssw
private

Definition at line 43 of file dataset.py.

Referenced by dataset.Dataset.__getMagneticField(), and dataset.Dataset.dump_cff().

dataset.Dataset.__cmsswrelease
private

Definition at line 44 of file dataset.py.

Referenced by dataset.Dataset.__getMagneticField().

dataset.Dataset.__dasinstance
private
dataset.Dataset.__dasLimit
private

Definition at line 41 of file dataset.py.

Referenced by dataset.Dataset.fileInfoList().

dataset.Dataset.__dataType
private
tuple dataset.Dataset.__dummy_source_template
staticprivate
Initial value:
1 = ("readFiles = cms.untracked.vstring()\n"
2  "secFiles = cms.untracked.vstring()\n"
3  "%(process)ssource = cms.Source(\"PoolSource\",\n"
4  "%(tab)s secondaryFileNames ="
5  "secFiles,\n"
6  "%(tab)s fileNames = readFiles\n"
7  ")\n"
8  "readFiles.extend(['dummy_File.root'])\n"
9  "%(process)smaxEvents = cms.untracked.PSet( "
10  "input = cms.untracked.int32(%(nEvents)s) )\n"
11  "%(skipEventsString)s\n")

Definition at line 128 of file dataset.py.

Referenced by dataset.Dataset.__createSnippet().

dataset.Dataset.__filename
private
dataset.Dataset.__firstusedrun
private
dataset.Dataset.__inputMagneticField
private
dataset.Dataset.__lastusedrun
private
dataset.Dataset.__magneticField
private
dataset.Dataset.__name
private
dataset.Dataset.__official
private

Definition at line 51 of file dataset.py.

Referenced by dataset.Dataset.datasetSnippet().

dataset.Dataset.__origName
private

Definition at line 40 of file dataset.py.

Referenced by dataset.Dataset.datasetSnippet().

dataset.Dataset.__parentDataset
private

Definition at line 47 of file dataset.py.

Referenced by dataset.Dataset.parentDataset().

dataset.Dataset.__predefined
private
dataset.Dataset.__source_template
staticprivate

Definition at line 110 of file dataset.py.

Referenced by dataset.Dataset.__createSnippet().

dataset.Dataset.bad_files

Definition at line 282 of file dataset.py.

dataset.Dataset.castorDir

Definition at line 266 of file dataset.py.

Referenced by dataset.Dataset.extractFileSizes(), and dataset.Dataset.printInfo().

dataset.Dataset.dasData
static
Initial value:
1 = das_client.get_data(dasQuery, dasLimit,
2  ############################################
3  #can remove this once dasgoclient is updated
4  cmd="das_client" if olddas else None
5  ############################################
6  )

Definition at line 372 of file dataset.py.

dataset.Dataset.error = self.__findInJson(jsondict,["data","error"])
static
dataset.Dataset.files

Definition at line 273 of file dataset.py.

dataset.Dataset.filesAndSizes

Definition at line 311 of file dataset.py.

dataset.Dataset.good_files

Definition at line 283 of file dataset.py.

int dataset.Dataset.i = 0
static

Definition at line 394 of file dataset.py.

dataset.Dataset.jsondict = json.loads( dasData )
static

Definition at line 379 of file dataset.py.

string dataset.Dataset.jsonfile = "das_query_output_%i.txt"
static

Definition at line 393 of file dataset.py.

dataset.Dataset.jsonfile = jsonfile%i
static

Definition at line 397 of file dataset.py.

dataset.Dataset.jsonstr = self.__findInJson(jsondict,"reason")
static

Definition at line 389 of file dataset.py.

dataset.Dataset.lfnDir

Definition at line 265 of file dataset.py.

Referenced by dataset.Dataset.printInfo().

dataset.Dataset.maskExists

Definition at line 267 of file dataset.py.

string dataset.Dataset.msg = "The DAS query returned an error. The output is very long, and has been stored in:\n"
static
dataset.Dataset.report

Definition at line 268 of file dataset.py.

Referenced by addOnTests.testit.run().

dataset.Dataset.theFile = open( jsonfile, "w" )
static

Definition at line 398 of file dataset.py.