CMS 3D CMS Logo

List of all members | Public Member Functions | Static Public Member Functions | Public Attributes | Private Member Functions | Private Attributes | Static Private Attributes
dataset.Dataset Class Reference
Inheritance diagram for dataset.Dataset:
dataset.BaseDataset

Public Member Functions

def __init__ (self, datasetName, dasLimit=0, tryPredefinedFirst=True, cmssw=os.environ["CMSSW_BASE"], cmsswrelease=os.environ["CMSSW_RELEASE_BASE"], magneticfield=None, dasinstance=None)
 
def __init__ (self, name, user, pattern='.*root')
 
def buildListOfBadFiles (self)
 
def buildListOfFiles (self, pattern='.*root')
 
def convertTimeToRun (self, begin=None, end=None, firstRun=None, lastRun=None, shortTuple=True)
 
def createdatasetfile_hippy (self, filename, filesperjob, firstrun, lastrun)
 
def datasetSnippet (self, jsonPath=None, begin=None, end=None, firstRun=None, lastRun=None, crab=False, parent=False)
 
def dataType (self)
 
def dump_cff (self, outName=None, jsonPath=None, begin=None, end=None, firstRun=None, lastRun=None, parent=False)
 
def extractFileSizes (self)
 
def fileInfoList (self, parent=False)
 
def fileList (self, parent=False, firstRun=None, lastRun=None, forcerunselection=False)
 
def forcerunrange (self, firstRun, lastRun, s)
 
def getForceRunRangeFunction (self, firstRun, lastRun)
 
def getPrimaryDatasetEntries (self)
 
def magneticField (self)
 
def magneticFieldForRun (self, run=-1)
 
def name (self)
 
def parentDataset (self)
 
def predefined (self)
 
def printInfo (self)
 
def runList (self)
 
- Public Member Functions inherited from dataset.BaseDataset
def __init__ (self, name, user, pattern='.*root', run_range=None, dbsInstance=None)
 def init(self, name, user, pattern='. More...
 
def buildListOfBadFiles (self)
 
def buildListOfFiles (self, pattern)
 
def extractFileSizes (self)
 
def getPrimaryDatasetEntries (self)
 
def listOfFiles (self)
 
def listOfGoodFiles (self)
 
def listOfGoodFilesWithPrescale (self, prescale)
 
def printFiles (self, abspath=True, info=True)
 
def printInfo (self)
 

Static Public Member Functions

def getrunnumberfromfilename (filename)
 

Public Attributes

 bad_files
 
 castorDir
 
 files
 
 filesAndSizes
 
 good_files
 
 lfnDir
 
 maskExists
 
 report
 
- Public Attributes inherited from dataset.BaseDataset
 bad_files
 
 dbsInstance
 MM. More...
 
 files
 
 filesAndSizes
 
 good_files
 
 name
 
 pattern
 
 primaryDatasetEntries
 MM. More...
 
 report
 
 run_range
 
 user
 

Private Member Functions

def __chunks (self, theList, n)
 
def __createSnippet (self, jsonPath=None, begin=None, end=None, firstRun=None, lastRun=None, repMap=None, crab=False, parent=False)
 
def __dateString (self, date)
 
def __datetime (self, stringForDas)
 
def __fileListSnippet (self, crab=False, parent=False, firstRun=None, lastRun=None, forcerunselection=False)
 
def __find_ge (self, a, x)
 
def __find_lt (self, a, x)
 
def __findInJson (self, jsondict, strings)
 
def __getData (self, dasQuery, dasLimit=0)
 
def __getDataType (self)
 
def __getFileInfoList (self, dasLimit, parent=False)
 
def __getMagneticField (self)
 
def __getMagneticFieldForRun (self, run=-1, tolerance=0.5)
 
def __getParentDataset (self)
 
def __getRunList (self)
 
def __lumiSelectionSnippet (self, jsonPath=None, firstRun=None, lastRun=None)
 

Private Attributes

 __cmssw
 
 __cmsswrelease
 
 __dasinstance
 
 __dasLimit
 
 __dataType
 
 __filename
 
 __firstusedrun
 
 __inputMagneticField
 
 __lastusedrun
 
 __magneticField
 
 __name
 
 __official
 
 __origName
 
 __parentDataset
 
 __predefined
 

Static Private Attributes

tuple __dummy_source_template
 
 __source_template
 

Detailed Description

Definition at line 18 of file dataset.py.

Constructor & Destructor Documentation

def dataset.Dataset.__init__ (   self,
  datasetName,
  dasLimit = 0,
  tryPredefinedFirst = True,
  cmssw = os.environ["CMSSW_BASE"],
  cmsswrelease = os.environ["CMSSW_RELEASE_BASE"],
  magneticfield = None,
  dasinstance = None 
)

Definition at line 21 of file dataset.py.

Referenced by dataset.Dataset.__init__().

21  magneticfield = None, dasinstance = None):
22  self.__name = datasetName
23  self.__origName = datasetName
24  self.__dasLimit = dasLimit
25  self.__dasinstance = dasinstance
26  self.__cmssw = cmssw
27  self.__cmsswrelease = cmsswrelease
28  self.__firstusedrun = None
29  self.__lastusedrun = None
30  self.__parentDataset = None
31 
32  # check, if dataset name matches CMS dataset naming scheme
33  if re.match( r'/.+/.+/.+', self.__name ):
34  self.__official = True
35  fileName = "Dataset" + self.__name.replace("/","_") + "_cff.py"
36  else:
37  self.__official = False
38  fileName = self.__name + "_cff.py"
39 
40  searchPath1 = os.path.join( self.__cmssw, "python",
41  "Alignment", "OfflineValidation",
42  fileName )
43  searchPath2 = os.path.join( self.__cmssw, "src",
44  "Alignment", "OfflineValidation",
45  "python", fileName )
46  searchPath3 = os.path.join( self.__cmsswrelease,
47  "python", "Alignment",
48  "OfflineValidation", fileName )
49  if self.__official and not tryPredefinedFirst:
50  self.__predefined = False
51  elif os.path.exists( searchPath1 ):
52  self.__predefined = True
53  self.__filename = searchPath1
54  elif os.path.exists( searchPath2 ):
55  msg = ("The predefined dataset '%s' does exist in '%s', but "
56  "you need to run 'scram b' first."
57  %( self.__name, searchPath2 ))
58  if self.__official:
59  print(msg)
60  print("Getting the data from DAS again. To go faster next time, run scram b.")
61  else:
62  raise AllInOneError( msg )
63  elif os.path.exists( searchPath3 ):
64  self.__predefined = True
65  self.__filename = searchPath3
66  elif self.__official:
67  self.__predefined = False
68  else:
69  msg = ("The predefined dataset '%s' does not exist. Please "
70  "create it first or check for typos."%( self.__name ))
71  raise AllInOneError( msg )
72 
73  if self.__predefined and self.__official:
74  self.__name = "Dataset" + self.__name.replace("/","_")
75 
76  if magneticfield is not None:
77  try:
78  magneticfield = float(magneticfield)
79  except ValueError:
80  raise AllInOneError("Bad magneticfield {} which can't be converted to float".format(magneticfield))
81  self.__inputMagneticField = magneticfield
82 
83  self.__dataType = self.__getDataType()
85 
86 
def __getDataType(self)
Definition: dataset.py:386
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
def __getMagneticField(self)
Definition: dataset.py:421
def dataset.Dataset.__init__ (   self,
  name,
  user,
  pattern = '.*root' 
)

Definition at line 266 of file dataset.py.

References dataset.Dataset.__init__().

266  def __init__(self, name, user, pattern='.*root'):
267  self.lfnDir = castorBaseDir(user) + name
268  self.castorDir = castortools.lfnToCastor( self.lfnDir )
269  self.maskExists = False
270  self.report = None
271  super(Dataset, self).__init__(name, user, pattern)
272 
def __init__(self, datasetName, dasLimit=0, tryPredefinedFirst=True, cmssw=os.environ["CMSSW_BASE"], cmsswrelease=os.environ["CMSSW_RELEASE_BASE"], magneticfield=None, dasinstance=None)
Definition: dataset.py:21

Member Function Documentation

def dataset.Dataset.__chunks (   self,
  theList,
  n 
)
private
Yield successive n-sized chunks from theList.

Definition at line 87 of file dataset.py.

Referenced by dataset.Dataset.__fileListSnippet(), dataset.Dataset.__lumiSelectionSnippet(), and dataset.Dataset.createdatasetfile_hippy().

87  def __chunks( self, theList, n ):
88  """ Yield successive n-sized chunks from theList.
89  """
90  for i in xrange( 0, len( theList ), n ):
91  yield theList[i:i+n]
92 
def __chunks(self, theList, n)
Definition: dataset.py:87
def dataset.Dataset.__createSnippet (   self,
  jsonPath = None,
  begin = None,
  end = None,
  firstRun = None,
  lastRun = None,
  repMap = None,
  crab = False,
  parent = False 
)
private

Definition at line 243 of file dataset.py.

References dataset.Dataset.__dummy_source_template, dataset.Dataset.__fileListSnippet(), dataset.Dataset.__lumiSelectionSnippet(), dataset.Dataset.__source_template, electrons_cff.bool, dataset.Dataset.convertTimeToRun(), and dataset.int.

Referenced by dataset.Dataset.__fileListSnippet(), dataset.Dataset.datasetSnippet(), and dataset.Dataset.dump_cff().

243  crab = False, parent = False ):
244 
245  if firstRun:
246  firstRun = int( firstRun )
247  if lastRun:
248  lastRun = int( lastRun )
249  if ( begin and firstRun ) or ( end and lastRun ):
250  msg = ( "The Usage of "
251  + "'begin' & 'firstRun' " * int( bool( begin and
252  firstRun ) )
253  + "and " * int( bool( ( begin and firstRun ) and
254  ( end and lastRun ) ) )
255  + "'end' & 'lastRun' " * int( bool( end and lastRun ) )
256  + "is ambigous." )
257  raise AllInOneError( msg )
258  if begin or end:
259  ( firstRun, lastRun ) = self.convertTimeToRun(
260  begin = begin, end = end, firstRun = firstRun,
261  lastRun = lastRun )
262  if ( firstRun and lastRun ) and ( firstRun > lastRun ):
263  msg = ( "The lower time/runrange limit ('begin'/'firstRun') "
264  "chosen is greater than the upper time/runrange limit "
265  "('end'/'lastRun').")
266  raise AllInOneError( msg )
267 
268  lumiSecExtend = self.__lumiSelectionSnippet(jsonPath=jsonPath, firstRun=firstRun, lastRun=lastRun)
269  lumiStr = goodLumiSecStr = ""
270  if lumiSecExtend:
271  goodLumiSecStr = "lumiSecs = cms.untracked.VLuminosityBlockRange()\n"
272  lumiStr = " lumisToProcess = lumiSecs,\n"
273 
274  files = self.__fileListSnippet(crab=crab, parent=parent, firstRun=firstRun, lastRun=lastRun, forcerunselection=False)
275 
276  theMap = repMap
277  theMap["files"] = files
278  theMap["json"] = jsonPath
279  theMap["lumiStr"] = lumiStr
280  theMap["goodLumiSecStr"] = goodLumiSecStr%( theMap )
281  theMap["lumiSecExtend"] = lumiSecExtend
282  if crab:
283  dataset_snippet = self.__dummy_source_template%( theMap )
284  else:
285  dataset_snippet = self.__source_template%( theMap )
286  return dataset_snippet
287 
def __lumiSelectionSnippet(self, jsonPath=None, firstRun=None, lastRun=None)
Definition: dataset.py:123
def convertTimeToRun(self, begin=None, end=None, firstRun=None, lastRun=None, shortTuple=True)
Definition: dataset.py:643
tuple __dummy_source_template
Definition: dataset.py:111
def __fileListSnippet(self, crab=False, parent=False, firstRun=None, lastRun=None, forcerunselection=False)
Definition: dataset.py:219
def dataset.Dataset.__dateString (   self,
  date 
)
private

Definition at line 638 of file dataset.py.

References dataset.Dataset.convertTimeToRun(), and str.

Referenced by dataset.Dataset.convertTimeToRun().

638  def __dateString(self, date):
639  return str(date.year) + str(date.month).zfill(2) + str(date.day).zfill(2)
640 
def __dateString(self, date)
Definition: dataset.py:638
#define str(s)
def dataset.Dataset.__datetime (   self,
  stringForDas 
)
private

Definition at line 629 of file dataset.py.

References dataset.int.

Referenced by dataset.Dataset.convertTimeToRun().

629  def __datetime(self, stringForDas):
630  if len(stringForDas) != 8:
631  raise AllInOneError(stringForDas + " is not a valid date string.\n"
632  + "DAS accepts dates in the form 'yyyymmdd'")
633  year = stringForDas[:4]
634  month = stringForDas[4:6]
635  day = stringForDas[6:8]
636  return datetime.date(int(year), int(month), int(day))
637 
def __datetime(self, stringForDas)
Definition: dataset.py:629
def dataset.Dataset.__fileListSnippet (   self,
  crab = False,
  parent = False,
  firstRun = None,
  lastRun = None,
  forcerunselection = False 
)
private

Definition at line 219 of file dataset.py.

References dataset.Dataset.__chunks(), dataset.Dataset.__createSnippet(), dataset.Dataset.__name, dataset.Dataset.fileList(), join(), and list().

Referenced by dataset.Dataset.__createSnippet().

219  def __fileListSnippet(self, crab=False, parent=False, firstRun=None, lastRun=None, forcerunselection=False):
220  if crab:
221  files = ""
222  else:
223  splitFileList = list( self.__chunks( self.fileList(firstRun=firstRun, lastRun=lastRun, forcerunselection=forcerunselection), 255 ) )
224  if not splitFileList:
225  raise AllInOneError("No files found for dataset {}. Check the spelling, or maybe specify another das instance?".format(self.__name))
226  fileStr = [ "',\n'".join( files ) for files in splitFileList ]
227  fileStr = [ "readFiles.extend( [\n'" + files + "'\n] )" \
228  for files in fileStr ]
229  files = "\n".join( fileStr )
230 
231  if parent:
232  splitParentFileList = list( self.__chunks( self.fileList(parent=True, firstRun=firstRun, lastRun=lastRun, forcerunselection=forcerunselection), 255 ) )
233  parentFileStr = [ "',\n'".join( parentFiles ) for parentFiles in splitParentFileList ]
234  parentFileStr = [ "secFiles.extend( [\n'" + parentFiles + "'\n] )" \
235  for parentFiles in parentFileStr ]
236  parentFiles = "\n".join( parentFileStr )
237  files += "\n\n" + parentFiles
238 
239  return files
240 
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def __chunks(self, theList, n)
Definition: dataset.py:87
def __fileListSnippet(self, crab=False, parent=False, firstRun=None, lastRun=None, forcerunselection=False)
Definition: dataset.py:219
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run
def fileList(self, parent=False, firstRun=None, lastRun=None, forcerunselection=False)
Definition: dataset.py:883
def dataset.Dataset.__find_ge (   self,
  a,
  x 
)
private

Definition at line 295 of file dataset.py.

Referenced by dataset.Dataset.convertTimeToRun().

295  def __find_ge( self, a, x):
296  'Find leftmost item greater than or equal to x'
297  i = bisect.bisect_left( a, x )
298  if i != len( a ):
299  return i
300  raise ValueError
301 
def __find_ge(self, a, x)
Definition: dataset.py:295
def dataset.Dataset.__find_lt (   self,
  a,
  x 
)
private

Definition at line 288 of file dataset.py.

Referenced by dataset.Dataset.convertTimeToRun().

288  def __find_lt( self, a, x ):
289  'Find rightmost value less than x'
290  i = bisect.bisect_left( a, x )
291  if i:
292  return i-1
293  raise ValueError
294 
def __find_lt(self, a, x)
Definition: dataset.py:288
def dataset.Dataset.__findInJson (   self,
  jsondict,
  strings 
)
private

Definition at line 302 of file dataset.py.

References dataset.Dataset.__findInJson().

Referenced by dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__getDataType(), dataset.Dataset.__getFileInfoList(), dataset.Dataset.__getMagneticField(), dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.__getParentDataset(), dataset.Dataset.__getRunList(), dataset.Dataset.__lumiSelectionSnippet(), dataset.Dataset.convertTimeToRun(), and dataset.Dataset.fileList().

302  def __findInJson(self, jsondict, strings):
303  if isinstance(strings, str):
304  strings = [ strings ]
305 
306  if len(strings) == 0:
307  return jsondict
308  if isinstance(jsondict,dict):
309  if strings[0] in jsondict:
310  try:
311  return self.__findInJson(jsondict[strings[0]], strings[1:])
312  except KeyError:
313  pass
314  else:
315  for a in jsondict:
316  if strings[0] in a:
317  try:
318  return self.__findInJson(a[strings[0]], strings[1:])
319  except (TypeError, KeyError): #TypeError because a could be a string and contain strings[0]
320  pass
321  #if it's not found
322  raise KeyError("Can't find " + strings[0])
323 
def __findInJson(self, jsondict, strings)
Definition: dataset.py:302
def dataset.Dataset.__getData (   self,
  dasQuery,
  dasLimit = 0 
)
private

Definition at line 354 of file dataset.py.

References dataset.Dataset.__findInJson(), and str.

Referenced by dataset.Dataset.__getDataType(), dataset.Dataset.__getFileInfoList(), dataset.Dataset.__getMagneticField(), dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.__getParentDataset(), dataset.Dataset.__getRunList(), and dataset.Dataset.convertTimeToRun().

354  def __getData( self, dasQuery, dasLimit = 0 ):
355  dasData = das_client.get_data(dasQuery, dasLimit)
356  if isinstance(dasData, str):
357  jsondict = json.loads( dasData )
358  else:
359  jsondict = dasData
360  # Check, if the DAS query fails
361  try:
362  error = self.__findInJson(jsondict,["data","error"])
363  except KeyError:
364  error = None
365  if error or self.__findInJson(jsondict,"status") != 'ok' or "data" not in jsondict:
366  try:
367  jsonstr = self.__findInJson(jsondict,"reason")
368  except KeyError:
369  jsonstr = str(jsondict)
370  if len(jsonstr) > 10000:
371  jsonfile = "das_query_output_%i.txt"
372  i = 0
373  while os.path.lexists(jsonfile % i):
374  i += 1
375  jsonfile = jsonfile % i
376  theFile = open( jsonfile, "w" )
377  theFile.write( jsonstr )
378  theFile.close()
379  msg = "The DAS query returned an error. The output is very long, and has been stored in:\n" + jsonfile
380  else:
381  msg = "The DAS query returned a error. Here is the output\n" + jsonstr
382  msg += "\nIt's possible that this was a server error. If so, it may work if you try again later"
383  raise AllInOneError(msg)
384  return self.__findInJson(jsondict,"data")
385 
def __findInJson(self, jsondict, strings)
Definition: dataset.py:302
#define str(s)
def __getData(self, dasQuery, dasLimit=0)
Definition: dataset.py:354
def dataset.Dataset.__getDataType (   self)
private

Definition at line 386 of file dataset.py.

References dataset.Dataset.__dasinstance, dataset.Dataset.__filename, dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__name, dataset.Dataset.__predefined, ElectronMVAID.ElectronMVAID.name, counter.Counter.name, average.Average.name, histograms.Histograms.name, AlignableObjectId::entry.name, cond::persistency::RUN_INFO::RUN_NUMBER.name, cond::persistency::TAG::NAME.name, TmModule.name, cond::persistency::GLOBAL_TAG::NAME.name, core.autovars.NTupleVariable.name, cond::persistency::GLOBAL_TAG::VALIDITY.name, cond::persistency::RUN_INFO::START_TIME.name, cond::persistency::TAG::TIME_TYPE.name, cond::persistency::GLOBAL_TAG::DESCRIPTION.name, cond::persistency::TAG::OBJECT_TYPE.name, cond::persistency::RUN_INFO::END_TIME.name, cond::persistency::GLOBAL_TAG::RELEASE.name, cond::persistency::TAG::SYNCHRONIZATION.name, cond::persistency::GLOBAL_TAG::SNAPSHOT_TIME.name, MEPSet.name, cond::persistency::TAG::END_OF_VALIDITY.name, cond::persistency::GLOBAL_TAG::INSERTION_TIME.name, cond::persistency::TAG::DESCRIPTION.name, cond::persistency::GTEditorData.name, nanoaod::MergeableCounterTable::SingleColumn< T >.name, cond::persistency::TAG::LAST_VALIDATED_TIME.name, cond::persistency::TAG::INSERTION_TIME.name, preexistingValidation.PreexistingValidation.name, cond::persistency::TAG::MODIFICATION_TIME.name, FWTGeoRecoGeometry::Info.name, Types._Untracked.name, OutputMEPSet.name, dataset.BaseDataset.name, personalPlayback.Applet.name, ParameterSet.name, PixelDCSObject< T >::Item.name, analyzer.Analyzer.name, DQMRivetClient::LumiOption.name, MagCylinder.name, ParSet.name, DQMRivetClient::ScaleFactorOption.name, EgHLTOfflineSummaryClient::SumHistBinData.name, SingleObjectCondition.name, cond::persistency::GTProxyData.name, core.autovars.NTupleObjectType.name, MyWatcher.name, edm::PathTimingSummary.name, nanoaod::MergeableCounterTable::VectorColumn< T >.name, cond::TimeTypeSpecs.name, lumi::TriggerInfo.name, alignment.Alignment.name, edm::PathSummary.name, cond::persistency::GLOBAL_TAG_MAP::GLOBAL_TAG_NAME.name, PixelEndcapLinkMaker::Item.name, perftools::EdmEventSize::BranchRecord.name, DQMGenericClient::EfficOption.name, FWTableViewManager::TableEntry.name, cond::persistency::GLOBAL_TAG_MAP::RECORD.name, PixelBarrelLinkMaker::Item.name, EcalLogicID.name, cms::DDAlgoArguments.name, validateAlignments.ParallelMergeJob.name, cond::persistency::GLOBAL_TAG_MAP::LABEL.name, MEtoEDM< T >::MEtoEDMObject.name, cond::persistency::GLOBAL_TAG_MAP::TAG_NAME.name, ExpressionHisto< T >.name, XMLProcessor::_loaderBaseConfig.name, cond::persistency::PAYLOAD::HASH.name, cond::persistency::PAYLOAD::OBJECT_TYPE.name, genericValidation.GenericValidation.name, TreeCrawler.Package.name, cond::persistency::PAYLOAD::DATA.name, cond::persistency::PAYLOAD::STREAMER_INFO.name, cond::persistency::PAYLOAD::VERSION.name, options.ConnectionHLTMenu.name, MagGeoBuilderFromDDD::volumeHandle.name, cond::persistency::PAYLOAD::INSERTION_TIME.name, DQMGenericClient::ProfileOption.name, dqmoffline::l1t::HistDefinition.name, DQMGenericClient::NormOption.name, emtf::Node.name, h4DSegm.name, core.TriggerMatchAnalyzer.TriggerMatchAnalyzer.name, FastHFShowerLibrary.name, PhysicsTools::Calibration::Variable.name, DQMGenericClient::CDOption.name, CounterChecker.name, cond::TagInfo_t.name, looper.Looper.name, DQMGenericClient::NoFlowOption.name, cond::persistency::IOV::TAG_NAME.name, TrackerSectorStruct.name, cond::persistency::IOV::SINCE.name, EDMtoMEConverter.name, Mapper::definition< ScannerT >.name, cond::persistency::IOV::PAYLOAD_HASH.name, classes.MonitorData.name, cond::persistency::IOV::INSERTION_TIME.name, HistogramManager.name, MuonGeometrySanityCheckPoint.name, classes.OutputData.name, options.HLTProcessOptions.name, h2DSegm.name, core.TriggerBitAnalyzer.TriggerBitAnalyzer.name, nanoaod::FlatTable::Column.name, config.Analyzer.name, geometry.Structure.name, core.autovars.NTupleSubObject.name, DQMNet::WaitObject.name, AlpgenParameterName.name, SiStripMonitorDigi.name, core.autovars.NTupleObject.name, config.Service.name, cond::persistency::TAG_LOG::TAG_NAME.name, cond::persistency::TAG_LOG::EVENT_TIME.name, cond::persistency::TAG_LOG::USER_NAME.name, cond::persistency::TAG_LOG::HOST_NAME.name, cond::persistency::TAG_LOG::COMMAND.name, cond::persistency::TAG_LOG::ACTION.name, cond::persistency::TAG_LOG::USER_TEXT.name, core.autovars.NTupleCollection.name, BPHRecoBuilder::BPHRecoSource.name, BPHRecoBuilder::BPHCompSource.name, personalPlayback.FrameworkJob.name, plotscripts.SawTeethFunction.name, crabFunctions.CrabTask.name, hTMaxCell.name, cscdqm::ParHistoDef.name, BeautifulSoup.Tag.name, SummaryOutputProducer::GenericSummary.name, BeautifulSoup.SoupStrainer.name, and python.rootplot.root2matplotlib.replace().

Referenced by dataset.Dataset.dataType().

386  def __getDataType( self ):
387  if self.__predefined:
388  with open(self.__filename) as f:
389  datatype = None
390  for line in f.readlines():
391  if line.startswith("#data type: "):
392  if datatype is not None:
393  raise AllInOneError(self.__filename + " has multiple 'data type' lines.")
394  datatype = line.replace("#data type: ", "").replace("\n","")
395  return datatype
396  return "unknown"
397 
398  dasQuery_type = ( 'dataset dataset=%s instance=%s detail=true | grep dataset.datatype,'
399  'dataset.name'%( self.__name, self.__dasinstance ) )
400  data = self.__getData( dasQuery_type )
401 
402  try:
403  return self.__findInJson(data, ["dataset", "datatype"])
404  except KeyError:
405  print ("Cannot find the datatype of the dataset '%s'\n"
406  "It may not be possible to automatically find the magnetic field,\n"
407  "and you will not be able run in CRAB mode"
408  %( self.name() ))
409  return "unknown"
410 
def __getDataType(self)
Definition: dataset.py:386
def __findInJson(self, jsondict, strings)
Definition: dataset.py:302
def replace(string, replacements)
def __getData(self, dasQuery, dasLimit=0)
Definition: dataset.py:354
def dataset.Dataset.__getFileInfoList (   self,
  dasLimit,
  parent = False 
)
private

Definition at line 559 of file dataset.py.

References dataset.Dataset.__dasinstance, dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__name, dataset.Dataset.__predefined, ElectronMVAID.ElectronMVAID.name, counter.Counter.name, average.Average.name, AlignableObjectId::entry.name, histograms.Histograms.name, cond::persistency::RUN_INFO::RUN_NUMBER.name, cond::persistency::TAG::NAME.name, TmModule.name, cond::persistency::GLOBAL_TAG::NAME.name, core.autovars.NTupleVariable.name, cond::persistency::GLOBAL_TAG::VALIDITY.name, cond::persistency::RUN_INFO::START_TIME.name, cond::persistency::TAG::TIME_TYPE.name, cond::persistency::GLOBAL_TAG::DESCRIPTION.name, cond::persistency::TAG::OBJECT_TYPE.name, cond::persistency::RUN_INFO::END_TIME.name, cond::persistency::GLOBAL_TAG::RELEASE.name, cond::persistency::TAG::SYNCHRONIZATION.name, cond::persistency::GLOBAL_TAG::SNAPSHOT_TIME.name, MEPSet.name, cond::persistency::TAG::END_OF_VALIDITY.name, cond::persistency::GLOBAL_TAG::INSERTION_TIME.name, cond::persistency::TAG::DESCRIPTION.name, cond::persistency::GTEditorData.name, nanoaod::MergeableCounterTable::SingleColumn< T >.name, cond::persistency::TAG::LAST_VALIDATED_TIME.name, cond::persistency::TAG::INSERTION_TIME.name, cond::persistency::TAG::MODIFICATION_TIME.name, FWTGeoRecoGeometry::Info.name, preexistingValidation.PreexistingValidation.name, Types._Untracked.name, OutputMEPSet.name, dataset.BaseDataset.name, personalPlayback.Applet.name, ParameterSet.name, PixelDCSObject< T >::Item.name, analyzer.Analyzer.name, DQMRivetClient::LumiOption.name, MagCylinder.name, ParSet.name, DQMRivetClient::ScaleFactorOption.name, EgHLTOfflineSummaryClient::SumHistBinData.name, SingleObjectCondition.name, cond::persistency::GTProxyData.name, core.autovars.NTupleObjectType.name, MyWatcher.name, edm::PathTimingSummary.name, nanoaod::MergeableCounterTable::VectorColumn< T >.name, cond::TimeTypeSpecs.name, lumi::TriggerInfo.name, alignment.Alignment.name, edm::PathSummary.name, cond::persistency::GLOBAL_TAG_MAP::GLOBAL_TAG_NAME.name, PixelEndcapLinkMaker::Item.name, perftools::EdmEventSize::BranchRecord.name, DQMGenericClient::EfficOption.name, FWTableViewManager::TableEntry.name, cond::persistency::GLOBAL_TAG_MAP::RECORD.name, PixelBarrelLinkMaker::Item.name, cms::DDAlgoArguments.name, EcalLogicID.name, validateAlignments.ParallelMergeJob.name, cond::persistency::GLOBAL_TAG_MAP::LABEL.name, MEtoEDM< T >::MEtoEDMObject.name, cond::persistency::GLOBAL_TAG_MAP::TAG_NAME.name, ExpressionHisto< T >.name, XMLProcessor::_loaderBaseConfig.name, cond::persistency::PAYLOAD::HASH.name, cond::persistency::PAYLOAD::OBJECT_TYPE.name, genericValidation.GenericValidation.name, TreeCrawler.Package.name, cond::persistency::PAYLOAD::DATA.name, cond::persistency::PAYLOAD::STREAMER_INFO.name, cond::persistency::PAYLOAD::VERSION.name, options.ConnectionHLTMenu.name, MagGeoBuilderFromDDD::volumeHandle.name, cond::persistency::PAYLOAD::INSERTION_TIME.name, DQMGenericClient::ProfileOption.name, dqmoffline::l1t::HistDefinition.name, DQMGenericClient::NormOption.name, emtf::Node.name, h4DSegm.name, core.TriggerMatchAnalyzer.TriggerMatchAnalyzer.name, PhysicsTools::Calibration::Variable.name, FastHFShowerLibrary.name, DQMGenericClient::CDOption.name, CounterChecker.name, cond::TagInfo_t.name, looper.Looper.name, DQMGenericClient::NoFlowOption.name, cond::persistency::IOV::TAG_NAME.name, TrackerSectorStruct.name, EDMtoMEConverter.name, cond::persistency::IOV::SINCE.name, Mapper::definition< ScannerT >.name, cond::persistency::IOV::PAYLOAD_HASH.name, classes.MonitorData.name, cond::persistency::IOV::INSERTION_TIME.name, HistogramManager.name, MuonGeometrySanityCheckPoint.name, classes.OutputData.name, options.HLTProcessOptions.name, h2DSegm.name, core.TriggerBitAnalyzer.TriggerBitAnalyzer.name, nanoaod::FlatTable::Column.name, config.Analyzer.name, geometry.Structure.name, core.autovars.NTupleSubObject.name, DQMNet::WaitObject.name, AlpgenParameterName.name, SiStripMonitorDigi.name, core.autovars.NTupleObject.name, config.Service.name, cond::persistency::TAG_LOG::TAG_NAME.name, cond::persistency::TAG_LOG::EVENT_TIME.name, cond::persistency::TAG_LOG::USER_NAME.name, cond::persistency::TAG_LOG::HOST_NAME.name, cond::persistency::TAG_LOG::COMMAND.name, cond::persistency::TAG_LOG::ACTION.name, cond::persistency::TAG_LOG::USER_TEXT.name, core.autovars.NTupleCollection.name, BPHRecoBuilder::BPHRecoSource.name, BPHRecoBuilder::BPHCompSource.name, personalPlayback.FrameworkJob.name, plotscripts.SawTeethFunction.name, crabFunctions.CrabTask.name, hTMaxCell.name, cscdqm::ParHistoDef.name, BeautifulSoup.Tag.name, SummaryOutputProducer::GenericSummary.name, BeautifulSoup.SoupStrainer.name, dataset.Dataset.parentDataset(), and edm.print().

Referenced by dataset.Dataset.fileInfoList().

559  def __getFileInfoList( self, dasLimit, parent = False ):
560  if self.__predefined:
561  if parent:
562  extendstring = "secFiles.extend"
563  else:
564  extendstring = "readFiles.extend"
565  with open(self.__fileName) as f:
566  files = []
567  copy = False
568  for line in f.readlines():
569  if "]" in line:
570  copy = False
571  if copy:
572  files.append({name: line.translate(None, "', " + '"')})
573  if extendstring in line and "[" in line and "]" not in line:
574  copy = True
575  return files
576 
577  if parent:
578  searchdataset = self.parentDataset()
579  else:
580  searchdataset = self.__name
581  dasQuery_files = ( 'file dataset=%s instance=%s detail=true | grep file.name, file.nevents, '
582  'file.creation_time, '
583  'file.modification_time'%( searchdataset, self.__dasinstance ) )
584  print("Requesting file information for '%s' from DAS..."%( searchdataset ), end=' ')
585  sys.stdout.flush()
586  data = self.__getData( dasQuery_files, dasLimit )
587  print("Done.")
588  data = [ self.__findInJson(entry,"file") for entry in data ]
589  if len( data ) == 0:
590  msg = ("No files are available for the dataset '%s'. This can be "
591  "due to a typo or due to a DAS problem. Please check the "
592  "spelling of the dataset and/or retry to run "
593  "'validateAlignments.py'."%( self.name() ))
594  raise AllInOneError( msg )
595  fileInformationList = []
596  for file in data:
597  fileName = 'unknown'
598  try:
599  fileName = self.__findInJson(file, "name")
600  fileCreationTime = self.__findInJson(file, "creation_time")
601  fileNEvents = self.__findInJson(file, "nevents")
602  except KeyError:
603  print(("DAS query gives bad output for file '%s'. Skipping it.\n"
604  "It may work if you try again later.") % fileName)
605  fileNEvents = 0
606  # select only non-empty files
607  if fileNEvents == 0:
608  continue
609  fileDict = { "name": fileName,
610  "creation_time": fileCreationTime,
611  "nevents": fileNEvents
612  }
613  fileInformationList.append( fileDict )
614  fileInformationList.sort( key=lambda info: self.__findInJson(info,"name") )
615  return fileInformationList
616 
def __getFileInfoList(self, dasLimit, parent=False)
Definition: dataset.py:559
def __findInJson(self, jsondict, strings)
Definition: dataset.py:302
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
def parentDataset(self)
Definition: dataset.py:721
def __getData(self, dasQuery, dasLimit=0)
Definition: dataset.py:354
def dataset.Dataset.__getMagneticField (   self)
private

Definition at line 421 of file dataset.py.

References dataset.Dataset.__cmssw, dataset.Dataset.__cmsswrelease, dataset.Dataset.__dasinstance, dataset.Dataset.__dataType, dataset.Dataset.__filename, dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__inputMagneticField, dataset.Dataset.__name, dataset.Dataset.__predefined, edm.print(), python.rootplot.root2matplotlib.replace(), and digitizers_cfi.strip.

Referenced by dataset.Dataset.magneticField().

421  def __getMagneticField( self ):
422  Bfieldlocation = os.path.join( self.__cmssw, "python", "Configuration", "StandardSequences" )
423  if not os.path.isdir(Bfieldlocation):
424  Bfieldlocation = os.path.join( self.__cmsswrelease, "python", "Configuration", "StandardSequences" )
425  Bfieldlist = [ f.replace("_cff.py",'') \
426  for f in os.listdir(Bfieldlocation) \
427  if f.startswith("MagneticField_") and f.endswith("_cff.py") ]
428  Bfieldlist.sort( key = lambda Bfield: -len(Bfield) ) #Put it in order of decreasing length, so that searching in the name gives the longer match
429 
430  if self.__inputMagneticField is not None:
431  if self.__inputMagneticField == 3.8:
432  return "MagneticField"
433  elif self.__inputMagneticField == 0:
434  return "MagneticField_0T"
435  else:
436  raise ValueError("Unknown input magnetic field {}".format(self.__inputMagneticField))
437 
438  if self.__predefined:
439  with open(self.__filename) as f:
440  datatype = None
441  Bfield = None
442  for line in f.readlines():
443  if line.startswith("#data type: "):
444  if datatype is not None:
445  raise AllInOneError(self.__filename + " has multiple 'data type' lines.")
446  datatype = line.replace("#data type: ", "").replace("\n","")
447  datatype = datatype.split("#")[0].strip()
448  if line.startswith("#magnetic field: "):
449  if Bfield is not None:
450  raise AllInOneError(self.__filename + " has multiple 'magnetic field' lines.")
451  Bfield = line.replace("#magnetic field: ", "").replace("\n","")
452  Bfield = Bfield.split("#")[0].strip()
453  if Bfield is not None:
454  Bfield = Bfield.split(",")[0]
455  if Bfield in Bfieldlist or Bfield == "unknown":
456  return Bfield
457  else:
458  print("Your dataset has magnetic field '%s', which does not exist in your CMSSW version!" % Bfield)
459  print("Using Bfield='unknown' - this will revert to the default")
460  return "unknown"
461  elif datatype == "data":
462  return "MagneticField" #this should be in the "#magnetic field" line, but for safety in case it got messed up
463  else:
464  return "unknown"
465 
466  if self.__dataType == "data":
467  return "MagneticField"
468 
469  #try to find the magnetic field from DAS
470  #it seems to be there for the newer (7X) MC samples, except cosmics
471  dasQuery_B = ('dataset dataset=%s instance=%s'%(self.__name, self.__dasinstance))
472  data = self.__getData( dasQuery_B )
473 
474  try:
475  Bfield = self.__findInJson(data, ["dataset", "mcm", "sequences", "magField"])
476  if Bfield in Bfieldlist:
477  return Bfield
478  elif Bfield == "38T" or Bfield == "38T_PostLS1":
479  return "MagneticField"
480  elif "MagneticField_" + Bfield in Bfieldlist:
481  return "MagneticField_" + Bfield
482  elif Bfield == "":
483  pass
484  else:
485  print("Your dataset has magnetic field '%s', which does not exist in your CMSSW version!" % Bfield)
486  print("Using Bfield='unknown' - this will revert to the default magnetic field")
487  return "unknown"
488  except KeyError:
489  pass
490 
491  for possibleB in Bfieldlist:
492  if (possibleB != "MagneticField"
493  and possibleB.replace("MagneticField_","") in self.__name.replace("TkAlCosmics0T", "")):
494  #final attempt - try to identify the dataset from the name
495  #all cosmics dataset names contain "TkAlCosmics0T"
496  if possibleB == "MagneticField_38T" or possibleB == "MagneticField_38T_PostLS1":
497  return "MagneticField"
498  return possibleB
499 
500  return "unknown"
501 
def __findInJson(self, jsondict, strings)
Definition: dataset.py:302
def replace(string, replacements)
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
def __getMagneticField(self)
Definition: dataset.py:421
def __getData(self, dasQuery, dasLimit=0)
Definition: dataset.py:354
def dataset.Dataset.__getMagneticFieldForRun (   self,
  run = -1,
  tolerance = 0.5 
)
private
For MC, this returns the same as the previous function.
   For data, it gets the magnetic field from the runs.  This is important for
   deciding which template to use for offlinevalidation

Definition at line 502 of file dataset.py.

References dataset.Dataset.__dasinstance, dataset.Dataset.__dataType, dataset.Dataset.__filename, dataset.Dataset.__findInJson(), dataset.Dataset.__firstusedrun, dataset.Dataset.__getData(), dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.__inputMagneticField, dataset.Dataset.__lastusedrun, dataset.Dataset.__magneticField, dataset.Dataset.__name, dataset.Dataset.__predefined, funct.abs(), objects.autophobj.float, edm.print(), python.rootplot.root2matplotlib.replace(), split, and digitizers_cfi.strip.

Referenced by dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.dump_cff(), and dataset.Dataset.magneticFieldForRun().

502  def __getMagneticFieldForRun( self, run = -1, tolerance = 0.5 ):
503  """For MC, this returns the same as the previous function.
504  For data, it gets the magnetic field from the runs. This is important for
505  deciding which template to use for offlinevalidation
506  """
507  if self.__dataType == "mc" and self.__magneticField == "MagneticField":
508  return 3.8 #For 3.8T MC the default MagneticField is used
509  if self.__inputMagneticField is not None:
510  return self.__inputMagneticField
511  if "T" in self.__magneticField:
512  Bfield = self.__magneticField.split("T")[0].replace("MagneticField_","")
513  try:
514  return float(Bfield) / 10.0 #e.g. 38T and 38T_PostLS1 both return 3.8
515  except ValueError:
516  pass
517  if self.__predefined:
518  with open(self.__filename) as f:
519  Bfield = None
520  for line in f.readlines():
521  if line.startswith("#magnetic field: ") and "," in line:
522  if Bfield is not None:
523  raise AllInOneError(self.__filename + " has multiple 'magnetic field' lines.")
524  return float(line.replace("#magnetic field: ", "").split(",")[1].split("#")[0].strip())
525 
526  if run > 0:
527  dasQuery = ('run=%s instance=%s detail=true'%(run, self.__dasinstance)) #for data
528  data = self.__getData(dasQuery)
529  try:
530  return self.__findInJson(data, ["run","bfield"])
531  except KeyError:
532  return "unknown Can't get the magnetic field for run %s from DAS" % run
533 
534  #run < 0 - find B field for the first and last runs, and make sure they're compatible
535  # (to within tolerance)
536  #NOT FOOLPROOF! The magnetic field might go up and then down, or vice versa
537  if self.__firstusedrun is None or self.__lastusedrun is None:
538  return "unknown Can't get the exact magnetic field for the dataset until data has been retrieved from DAS."
539  firstrunB = self.__getMagneticFieldForRun(self.__firstusedrun)
540  lastrunB = self.__getMagneticFieldForRun(self.__lastusedrun)
541  try:
542  if abs(firstrunB - lastrunB) <= tolerance:
543  return .5*(firstrunB + lastrunB)
544  print(firstrunB, lastrunB, tolerance)
545  return ("unknown The beginning and end of your run range for %s\n"
546  "have different magnetic fields (%s, %s)!\n"
547  "Try limiting the run range using firstRun, lastRun, begin, end, or JSON,\n"
548  "or increasing the tolerance (in dataset.py) from %s.") % (self.__name, firstrunB, lastrunB, tolerance)
549  except TypeError:
550  try:
551  if "unknown" in firstrunB:
552  return firstrunB
553  else:
554  return lastrunB
555  except TypeError:
556  return lastrunB
557 
def __getMagneticFieldForRun(self, run=-1, tolerance=0.5)
Definition: dataset.py:502
def __findInJson(self, jsondict, strings)
Definition: dataset.py:302
def replace(string, replacements)
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
double split
Definition: MVATrainer.cc:139
def __getData(self, dasQuery, dasLimit=0)
Definition: dataset.py:354
def dataset.Dataset.__getParentDataset (   self)
private

Definition at line 411 of file dataset.py.

References dataset.Dataset.__dasinstance, dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__name, and str.

Referenced by dataset.Dataset.parentDataset().

411  def __getParentDataset( self ):
412  dasQuery = "parent dataset=" + self.__name + " instance="+self.__dasinstance
413  data = self.__getData( dasQuery )
414  try:
415  return self.__findInJson(data, ["parent", "name"])
416  except KeyError:
417  raise AllInOneError("Cannot find the parent of the dataset '" + self.__name + "'\n"
418  "Here is the DAS output:\n" + str(jsondict) +
419  "\nIt's possible that this was a server error. If so, it may work if you try again later")
420 
def __findInJson(self, jsondict, strings)
Definition: dataset.py:302
def __getParentDataset(self)
Definition: dataset.py:411
#define str(s)
def __getData(self, dasQuery, dasLimit=0)
Definition: dataset.py:354
def dataset.Dataset.__getRunList (   self)
private

Definition at line 618 of file dataset.py.

References dataset.Dataset.__dasinstance, dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__name, and edm.print().

Referenced by dataset.Dataset.__lumiSelectionSnippet(), dataset.Dataset.convertTimeToRun(), and dataset.Dataset.runList().

618  def __getRunList( self ):
619  dasQuery_runs = ( 'run dataset=%s instance=%s | grep run.run_number,'
620  'run.creation_time'%( self.__name, self.__dasinstance ) )
621  print("Requesting run information for '%s' from DAS..."%( self.__name ), end=' ')
622  sys.stdout.flush()
623  data = self.__getData( dasQuery_runs )
624  print("Done.")
625  data = [ self.__findInJson(entry,"run") for entry in data ]
626  data.sort( key = lambda run: self.__findInJson(run, "run_number") )
627  return data
628 
def __getRunList(self)
Definition: dataset.py:618
def __findInJson(self, jsondict, strings)
Definition: dataset.py:302
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
def __getData(self, dasQuery, dasLimit=0)
Definition: dataset.py:354
def dataset.Dataset.__lumiSelectionSnippet (   self,
  jsonPath = None,
  firstRun = None,
  lastRun = None 
)
private

Definition at line 123 of file dataset.py.

References dataset.Dataset.__chunks(), dataset.Dataset.__findInJson(), dataset.Dataset.__firstusedrun, dataset.Dataset.__getRunList(), dataset.Dataset.__inputMagneticField, dataset.Dataset.__lastusedrun, dataset.Dataset.getForceRunRangeFunction(), dataset.int, join(), list(), SiStripPI.max, min(), edm.print(), python.rootplot.root2matplotlib.replace(), split, and str.

Referenced by dataset.Dataset.__createSnippet().

123  def __lumiSelectionSnippet( self, jsonPath = None, firstRun = None, lastRun = None ):
124  lumiSecExtend = ""
125  if firstRun or lastRun or jsonPath:
126  if not jsonPath:
127  selectedRunList = self.__getRunList()
128  if firstRun:
129  selectedRunList = [ run for run in selectedRunList \
130  if self.__findInJson(run, "run_number") >= firstRun ]
131  if lastRun:
132  selectedRunList = [ run for run in selectedRunList \
133  if self.__findInJson(run, "run_number") <= lastRun ]
134  lumiList = [ str( self.__findInJson(run, "run_number") ) + ":1-" \
135  + str( self.__findInJson(run, "run_number") ) + ":max" \
136  for run in selectedRunList ]
137  splitLumiList = list( self.__chunks( lumiList, 255 ) )
138  else:
139  theLumiList = None
140  try:
141  theLumiList = LumiList ( filename = jsonPath )
142  except ValueError:
143  pass
144 
145  if theLumiList is not None:
146  allRuns = theLumiList.getRuns()
147  runsToRemove = []
148  for run in allRuns:
149  if firstRun and int( run ) < firstRun:
150  runsToRemove.append( run )
151  if lastRun and int( run ) > lastRun:
152  runsToRemove.append( run )
153  theLumiList.removeRuns( runsToRemove )
154  splitLumiList = list( self.__chunks(
155  theLumiList.getCMSSWString().split(','), 255 ) )
156  if not (splitLumiList and splitLumiList[0] and splitLumiList[0][0]):
157  splitLumiList = None
158  else:
159  with open(jsonPath) as f:
160  jsoncontents = f.read()
161  if "process.source.lumisToProcess" in jsoncontents:
162  msg = "%s is not a json file, but it seems to be a CMSSW lumi selection cff snippet. Trying to use it" % jsonPath
163  if firstRun or lastRun:
164  msg += ("\n (after applying firstRun and/or lastRun)")
165  msg += ".\nPlease note that, depending on the format of this file, it may not work as expected."
166  msg += "\nCheck your config file to make sure that it worked properly."
167  print(msg)
168 
169  runlist = self.__getRunList()
170  if firstRun or lastRun:
171  self.__firstusedrun = -1
172  self.__lastusedrun = -1
173  jsoncontents = re.sub(r"\d+:(\d+|max)(-\d+:(\d+|max))?", self.getForceRunRangeFunction(firstRun, lastRun), jsoncontents)
174  jsoncontents = (jsoncontents.replace("'',\n","").replace("''\n","")
175  .replace('"",\n','').replace('""\n',''))
176  self.__firstusedrun = max(self.__firstusedrun, int(self.__findInJson(runlist[0],"run_number")))
177  self.__lastusedrun = min(self.__lastusedrun, int(self.__findInJson(runlist[-1],"run_number")))
178  if self.__lastusedrun < self.__firstusedrun:
179  jsoncontents = None
180  else:
181  self.__firstusedrun = int(self.__findInJson(runlist[0],"run_number"))
182  self.__lastusedrun = int(self.__findInJson(runlist[-1],"run_number"))
183  lumiSecExtend = jsoncontents
184  splitLumiList = None
185  else:
186  raise AllInOneError("%s is not a valid json file!" % jsonPath)
187 
188  if splitLumiList and splitLumiList[0] and splitLumiList[0][0]:
189  lumiSecStr = [ "',\n'".join( lumis ) \
190  for lumis in splitLumiList ]
191  lumiSecStr = [ "lumiSecs.extend( [\n'" + lumis + "'\n] )" \
192  for lumis in lumiSecStr ]
193  lumiSecExtend = "\n".join( lumiSecStr )
194  runlist = self.__getRunList()
195  self.__firstusedrun = max(int(splitLumiList[0][0].split(":")[0]), int(self.__findInJson(runlist[0],"run_number")))
196  self.__lastusedrun = min(int(splitLumiList[-1][-1].split(":")[0]), int(self.__findInJson(runlist[-1],"run_number")))
197  elif lumiSecExtend:
198  pass
199  else:
200  msg = "You are trying to run a validation without any runs! Check that:"
201  if firstRun or lastRun:
202  msg += "\n - firstRun/begin and lastRun/end are correct for this dataset, and there are runs in between containing data"
203  if jsonPath:
204  msg += "\n - your JSON file is correct for this dataset, and the runs contain data"
205  if (firstRun or lastRun) and jsonPath:
206  msg += "\n - firstRun/begin and lastRun/end are consistent with your JSON file"
207  raise AllInOneError(msg)
208 
209  else:
210  if self.__inputMagneticField is not None:
211  pass #never need self.__firstusedrun or self.__lastusedrun
212  else:
213  runlist = self.__getRunList()
214  self.__firstusedrun = int(self.__findInJson(self.__getRunList()[0],"run_number"))
215  self.__lastusedrun = int(self.__findInJson(self.__getRunList()[-1],"run_number"))
216 
217  return lumiSecExtend
218 
def __getRunList(self)
Definition: dataset.py:618
def __lumiSelectionSnippet(self, jsonPath=None, firstRun=None, lastRun=None)
Definition: dataset.py:123
def __findInJson(self, jsondict, strings)
Definition: dataset.py:302
def replace(string, replacements)
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
T min(T a, T b)
Definition: MathUtil.h:58
def getForceRunRangeFunction(self, firstRun, lastRun)
Definition: dataset.py:349
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def __chunks(self, theList, n)
Definition: dataset.py:87
#define str(s)
double split
Definition: MVATrainer.cc:139
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run
def dataset.Dataset.buildListOfBadFiles (   self)
fills the list of bad files from the IntegrityCheck log.

When the integrity check file is not available,
files are considered as good.

Definition at line 277 of file dataset.py.

278  '''fills the list of bad files from the IntegrityCheck log.
279 
280  When the integrity check file is not available,
281  files are considered as good.'''
282  mask = "IntegrityCheck"
283 
284  self.bad_files = {}
285  self.good_files = []
286 
287  file_mask = castortools.matchingFiles(self.castorDir, '^%s_.*\.txt$' % mask)
288  if file_mask:
289  # here to avoid circular dependency
290  from edmIntegrityCheck import PublishToFileSystem
291  p = PublishToFileSystem(mask)
292  report = p.get(self.castorDir)
293  if report is not None and report:
294  self.maskExists = True
295  self.report = report
296  dup = report.get('ValidDuplicates',{})
297  for name, status in six.iteritems(report['Files']):
298  # print name, status
299  if not status[0]:
300  self.bad_files[name] = 'MarkedBad'
301  elif name in dup:
302  self.bad_files[name] = 'ValidDup'
303  else:
304  self.good_files.append( name )
305  else:
306  raise IntegrityCheckError( "ERROR: IntegrityCheck log file IntegrityCheck_XXXXXXXXXX.txt not found" )
307 
def buildListOfBadFiles(self)
Definition: dataset.py:277
def dataset.Dataset.buildListOfFiles (   self,
  pattern = '.*root' 
)
fills list of files, taking all root files matching the pattern in the castor dir

Definition at line 273 of file dataset.py.

273  def buildListOfFiles(self, pattern='.*root'):
274  '''fills list of files, taking all root files matching the pattern in the castor dir'''
275  self.files = castortools.matchingFiles( self.castorDir, pattern )
276 
def buildListOfFiles(self, pattern='.*root')
Definition: dataset.py:273
def dataset.Dataset.convertTimeToRun (   self,
  begin = None,
  end = None,
  firstRun = None,
  lastRun = None,
  shortTuple = True 
)

Definition at line 643 of file dataset.py.

References dataset.Dataset.__dasinstance, dataset.Dataset.__dateString(), dataset.Dataset.__datetime(), dataset.Dataset.__find_ge(), dataset.Dataset.__find_lt(), dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__getRunList(), dataset.Dataset.__name, electrons_cff.bool, and dataset.int.

Referenced by dataset.Dataset.__createSnippet(), and dataset.Dataset.__dateString().

643  shortTuple = True ):
644  if ( begin and firstRun ) or ( end and lastRun ):
645  msg = ( "The Usage of "
646  + "'begin' & 'firstRun' " * int( bool( begin and
647  firstRun ) )
648  + "and " * int( bool( ( begin and firstRun ) and
649  ( end and lastRun ) ) )
650  + "'end' & 'lastRun' " * int( bool( end and lastRun ) )
651  + "is ambigous." )
652  raise AllInOneError( msg )
653 
654  if begin or end:
655  runList = [ self.__findInJson(run, "run_number") for run in self.__getRunList() ]
656 
657  if begin:
658  lastdate = begin
659  for delta in [ 1, 5, 10, 20, 30 ]: #try searching for about 2 months after begin
660  firstdate = lastdate
661  lastdate = self.__dateString(self.__datetime(firstdate) + datetime.timedelta(delta))
662  dasQuery_begin = "run date between[%s,%s] instance=%s" % (firstdate, lastdate, self.__dasinstance)
663  begindata = self.__getData(dasQuery_begin)
664  if len(begindata) > 0:
665  begindata.sort(key = lambda run: self.__findInJson(run, ["run", "run_number"]))
666  try:
667  runIndex = self.__find_ge( runList, self.__findInJson(begindata[0], ["run", "run_number"]))
668  except ValueError:
669  msg = ( "Your 'begin' is after the creation time of the last "
670  "run in the dataset\n'%s'"%( self.__name ) )
671  raise AllInOneError( msg )
672  firstRun = runList[runIndex]
673  begin = None
674  break
675 
676  if begin:
677  raise AllInOneError("No runs within a reasonable time interval after your 'begin'."
678  "Try using a 'begin' that has runs soon after it (within 2 months at most)")
679 
680  if end:
681  firstdate = end
682  for delta in [ 1, 5, 10, 20, 30 ]: #try searching for about 2 months before end
683  lastdate = firstdate
684  firstdate = self.__dateString(self.__datetime(lastdate) - datetime.timedelta(delta))
685  dasQuery_end = "run date between[%s,%s] instance=%s" % (firstdate, lastdate, self.__dasinstance)
686  enddata = self.__getData(dasQuery_end)
687  if len(enddata) > 0:
688  enddata.sort(key = lambda run: self.__findInJson(run, ["run", "run_number"]))
689  try:
690  runIndex = self.__find_lt( runList, self.__findInJson(enddata[-1], ["run", "run_number"]))
691  except ValueError:
692  msg = ( "Your 'end' is before the creation time of the first "
693  "run in the dataset\n'%s'"%( self.__name ) )
694  raise AllInOneError( msg )
695  lastRun = runList[runIndex]
696  end = None
697  break
698 
699  if end:
700  raise AllInOneError("No runs within a reasonable time interval before your 'end'."
701  "Try using an 'end' that has runs soon before it (within 2 months at most)")
702 
703  if shortTuple:
704  return firstRun, lastRun
705  else:
706  return begin, end, firstRun, lastRun
707 
def __getRunList(self)
Definition: dataset.py:618
def __findInJson(self, jsondict, strings)
Definition: dataset.py:302
def __find_lt(self, a, x)
Definition: dataset.py:288
def __datetime(self, stringForDas)
Definition: dataset.py:629
def __dateString(self, date)
Definition: dataset.py:638
def __find_ge(self, a, x)
Definition: dataset.py:295
def __getData(self, dasQuery, dasLimit=0)
Definition: dataset.py:354
def dataset.Dataset.createdatasetfile_hippy (   self,
  filename,
  filesperjob,
  firstrun,
  lastrun 
)

Definition at line 850 of file dataset.py.

References dataset.Dataset.__chunks(), dataset.Dataset.fileList(), and join().

850  def createdatasetfile_hippy(self, filename, filesperjob, firstrun, lastrun):
851  with open(filename, "w") as f:
852  for job in self.__chunks(self.fileList(firstRun=firstrun, lastRun=lastrun, forcerunselection=True), filesperjob):
853  f.write(",".join("'{}'".format(file) for file in job)+"\n")
854 
def createdatasetfile_hippy(self, filename, filesperjob, firstrun, lastrun)
Definition: dataset.py:850
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def __chunks(self, theList, n)
Definition: dataset.py:87
def fileList(self, parent=False, firstRun=None, lastRun=None, forcerunselection=False)
Definition: dataset.py:883
def dataset.Dataset.datasetSnippet (   self,
  jsonPath = None,
  begin = None,
  end = None,
  firstRun = None,
  lastRun = None,
  crab = False,
  parent = False 
)

Definition at line 727 of file dataset.py.

References dataset.Dataset.__createSnippet(), dataset.Dataset.__filename, dataset.Dataset.__name, dataset.Dataset.__official, dataset.Dataset.__origName, dataset.Dataset.__predefined, dataset.Dataset.dump_cff(), and edm.print().

Referenced by dataset.Dataset.parentDataset().

727  firstRun = None, lastRun = None, crab = False, parent = False ):
728  if not firstRun: firstRun = None
729  if not lastRun: lastRun = None
730  if not begin: begin = None
731  if not end: end = None
732  if self.__predefined and (jsonPath or begin or end or firstRun or lastRun):
733  msg = ( "The parameters 'JSON', 'begin', 'end', 'firstRun', and 'lastRun' "
734  "only work for official datasets, not predefined _cff.py files" )
735  raise AllInOneError( msg )
736  if self.__predefined and parent:
737  with open(self.__filename) as f:
738  if "secFiles.extend" not in f.read():
739  msg = ("The predefined dataset '%s' does not contain secondary files, "
740  "which your validation requires!") % self.__name
741  if self.__official:
742  self.__name = self.__origName
743  self.__predefined = False
744  print(msg)
745  print ("Retreiving the files from DAS. You will be asked if you want "
746  "to overwrite the old dataset.\n"
747  "It will still be compatible with validations that don't need secondary files.")
748  else:
749  raise AllInOneError(msg)
750 
751  if self.__predefined:
752  snippet = ("process.load(\"Alignment.OfflineValidation.%s_cff\")\n"
753  "process.maxEvents = cms.untracked.PSet(\n"
754  " input = cms.untracked.int32(.oO[nEvents]Oo. / .oO[parallelJobs]Oo.)\n"
755  ")\n"
756  "process.source.skipEvents=cms.untracked.uint32(.oO[nIndex]Oo.*.oO[nEvents]Oo./.oO[parallelJobs]Oo.)"
757  %(self.__name))
758  if not parent:
759  with open(self.__filename) as f:
760  if "secFiles.extend" in f.read():
761  snippet += "\nprocess.source.secondaryFileNames = cms.untracked.vstring()"
762  return snippet
763  theMap = { "process": "process.",
764  "tab": " " * len( "process." ),
765  "nEvents": ".oO[nEvents]Oo. / .oO[parallelJobs]Oo.",
766  "skipEventsString": "process.source.skipEvents=cms.untracked.uint32(.oO[nIndex]Oo.*.oO[nEvents]Oo./.oO[parallelJobs]Oo.)\n",
767  "importCms": "",
768  "header": ""
769  }
770  datasetSnippet = self.__createSnippet( jsonPath = jsonPath,
771  begin = begin,
772  end = end,
773  firstRun = firstRun,
774  lastRun = lastRun,
775  repMap = theMap,
776  crab = crab,
777  parent = parent )
778  if jsonPath == "" and begin == "" and end == "" and firstRun == "" and lastRun == "":
779  try:
780  self.dump_cff(parent = parent)
781  except AllInOneError as e:
782  print("Can't store the dataset as a cff:")
783  print(e)
784  print("This may be inconvenient in the future, but will not cause a problem for this validation.")
785  return datasetSnippet
786 
def __createSnippet(self, jsonPath=None, begin=None, end=None, firstRun=None, lastRun=None, repMap=None, crab=False, parent=False)
Definition: dataset.py:243
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
def dump_cff(self, outName=None, jsonPath=None, begin=None, end=None, firstRun=None, lastRun=None, parent=False)
Definition: dataset.py:789
def dataset.Dataset.dataType (   self)

Definition at line 708 of file dataset.py.

References dataset.Dataset.__dataType, and dataset.Dataset.__getDataType().

708  def dataType( self ):
709  if not self.__dataType:
710  self.__dataType = self.__getDataType()
711  return self.__dataType
712 
def __getDataType(self)
Definition: dataset.py:386
def dataType(self)
Definition: dataset.py:708
def dataset.Dataset.dump_cff (   self,
  outName = None,
  jsonPath = None,
  begin = None,
  end = None,
  firstRun = None,
  lastRun = None,
  parent = False 
)

Definition at line 789 of file dataset.py.

References dataset.Dataset.__cmssw, dataset.Dataset.__createSnippet(), dataset.Dataset.__dataType, dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.__magneticField, dataset.Dataset.__name, edm.print(), python.rootplot.root2matplotlib.replace(), split, str, and digitizers_cfi.strip.

Referenced by dataset.Dataset.datasetSnippet().

789  end = None, firstRun = None, lastRun = None, parent = False ):
790  if outName == None:
791  outName = "Dataset" + self.__name.replace("/", "_")
792  packageName = os.path.join( "Alignment", "OfflineValidation" )
793  if not os.path.exists( os.path.join(
794  self.__cmssw, "src", packageName ) ):
795  msg = ("You try to store the predefined dataset'%s'.\n"
796  "For that you need to check out the package '%s' to your "
797  "private relase area in\n"%( outName, packageName )
798  + self.__cmssw )
799  raise AllInOneError( msg )
800  theMap = { "process": "",
801  "tab": "",
802  "nEvents": str( -1 ),
803  "skipEventsString": "",
804  "importCms": "import FWCore.ParameterSet.Config as cms\n",
805  "header": "#Do not delete or (unless you know what you're doing) change these comments\n"
806  "#%(name)s\n"
807  "#data type: %(dataType)s\n"
808  "#magnetic field: .oO[magneticField]Oo.\n" #put in magnetic field later
809  %{"name": self.__name, #need to create the snippet before getting the magnetic field
810  "dataType": self.__dataType} #so that we know the first and last runs
811  }
812  dataset_cff = self.__createSnippet( jsonPath = jsonPath,
813  begin = begin,
814  end = end,
815  firstRun = firstRun,
816  lastRun = lastRun,
817  repMap = theMap,
818  parent = parent)
819  magneticField = self.__magneticField
820  if magneticField == "MagneticField":
821  magneticField = "%s, %s #%s" % (magneticField,
822  str(self.__getMagneticFieldForRun()).replace("\n"," ").split("#")[0].strip(),
823  "Use MagneticField_cff.py; the number is for determining which track selection to use."
824  )
825  dataset_cff = dataset_cff.replace(".oO[magneticField]Oo.",magneticField)
826  filePath = os.path.join( self.__cmssw, "src", packageName,
827  "python", outName + "_cff.py" )
828  if os.path.exists( filePath ):
829  existMsg = "The predefined dataset '%s' already exists.\n"%( outName )
830  askString = "Do you want to overwrite it? [y/n]\n"
831  inputQuery = existMsg + askString
832  while True:
833  userInput = raw_input( inputQuery ).lower()
834  if userInput == "y":
835  break
836  elif userInput == "n":
837  return
838  else:
839  inputQuery = askString
840  print ( "The predefined dataset '%s' will be stored in the file\n"
841  %( outName )
842  + filePath +
843  "\nFor future use you have to do 'scram b'." )
844  print()
845  theFile = open( filePath, "w" )
846  theFile.write( dataset_cff )
847  theFile.close()
848  return
849 
def __getMagneticFieldForRun(self, run=-1, tolerance=0.5)
Definition: dataset.py:502
def __createSnippet(self, jsonPath=None, begin=None, end=None, firstRun=None, lastRun=None, repMap=None, crab=False, parent=False)
Definition: dataset.py:243
def replace(string, replacements)
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
#define str(s)
double split
Definition: MVATrainer.cc:139
def dataset.Dataset.extractFileSizes (   self)
Get the file size for each file, from the eos ls -l command.

Definition at line 308 of file dataset.py.

References dataset.EOSDataset.castorDir, and dataset.Dataset.castorDir.

308  def extractFileSizes(self):
309  '''Get the file size for each file, from the eos ls -l command.'''
310  # EOS command does not work in tier3
311  lsout = castortools.runXRDCommand(self.castorDir,'dirlist')[0]
312  lsout = lsout.split('\n')
313  self.filesAndSizes = {}
314  for entry in lsout:
315  values = entry.split()
316  if( len(values) != 5):
317  continue
318  # using full abs path as a key.
319  file = '/'.join([self.lfnDir, values[4].split("/")[-1]])
320  size = values[1]
321  self.filesAndSizes[file] = size
322 
def extractFileSizes(self)
Definition: dataset.py:308
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
double split
Definition: MVATrainer.cc:139
def dataset.Dataset.fileInfoList (   self,
  parent = False 
)

Definition at line 912 of file dataset.py.

References dataset.Dataset.__dasLimit, and dataset.Dataset.__getFileInfoList().

Referenced by dataset.Dataset.fileList().

912  def fileInfoList( self, parent = False ):
913  return self.__getFileInfoList( self.__dasLimit, parent )
914 
def __getFileInfoList(self, dasLimit, parent=False)
Definition: dataset.py:559
def fileInfoList(self, parent=False)
Definition: dataset.py:912
def dataset.Dataset.fileList (   self,
  parent = False,
  firstRun = None,
  lastRun = None,
  forcerunselection = False 
)

Definition at line 883 of file dataset.py.

References dataset.Dataset.__findInJson(), dataset.Dataset.fileInfoList(), objects.autophobj.float, dataset.Dataset.getrunnumberfromfilename(), and edm.print().

Referenced by dataset.Dataset.__fileListSnippet(), and dataset.Dataset.createdatasetfile_hippy().

883  def fileList(self, parent=False, firstRun=None, lastRun=None, forcerunselection=False):
884  fileList = [ self.__findInJson(fileInfo,"name")
885  for fileInfo in self.fileInfoList(parent) ]
886 
887  if firstRun or lastRun:
888  if not firstRun: firstRun = -1
889  if not lastRun: lastRun = float('infinity')
890  unknownfilenames, reasons = [], set()
891  for filename in fileList[:]:
892  try:
893  if not firstRun <= self.getrunnumberfromfilename(filename) <= lastRun:
894  fileList.remove(filename)
895  except AllInOneError as e:
896  if forcerunselection: raise
897  unknownfilenames.append(e.message.split("\n")[1])
898  reasons .add (e.message.split("\n")[2])
899  if reasons:
900  if len(unknownfilenames) == len(fileList):
901  print("Could not figure out the run numbers of any of the filenames for the following reason(s):")
902  else:
903  print("Could not figure out the run numbers of the following filenames:")
904  for filename in unknownfilenames:
905  print(" "+filename)
906  print("for the following reason(s):")
907  for reason in reasons:
908  print(" "+reason)
909  print("Using the files anyway. The runs will be filtered at the CMSSW level.")
910  return fileList
911 
def __findInJson(self, jsondict, strings)
Definition: dataset.py:302
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
def fileInfoList(self, parent=False)
Definition: dataset.py:912
def getrunnumberfromfilename(filename)
Definition: dataset.py:856
def fileList(self, parent=False, firstRun=None, lastRun=None, forcerunselection=False)
Definition: dataset.py:883
def dataset.Dataset.forcerunrange (   self,
  firstRun,
  lastRun,
  s 
)
s must be in the format run1:lum1-run2:lum2

Definition at line 324 of file dataset.py.

References dataset.Dataset.__firstusedrun, dataset.Dataset.__lastusedrun, dataset.int, and split.

Referenced by dataset.Dataset.getForceRunRangeFunction().

324  def forcerunrange(self, firstRun, lastRun, s):
325  """s must be in the format run1:lum1-run2:lum2"""
326  s = s.group()
327  run1 = s.split("-")[0].split(":")[0]
328  lum1 = s.split("-")[0].split(":")[1]
329  try:
330  run2 = s.split("-")[1].split(":")[0]
331  lum2 = s.split("-")[1].split(":")[1]
332  except IndexError:
333  run2 = run1
334  lum2 = lum1
335  if int(run2) < firstRun or int(run1) > lastRun:
336  return ""
337  if int(run1) < firstRun or firstRun < 0:
338  run1 = firstRun
339  lum1 = 1
340  if int(run2) > lastRun:
341  run2 = lastRun
342  lum2 = "max"
343  if int(run1) < self.__firstusedrun or self.__firstusedrun < 0:
344  self.__firstusedrun = int(run1)
345  if int(run2) > self.__lastusedrun:
346  self.__lastusedrun = int(run2)
347  return "%s:%s-%s:%s" % (run1, lum1, run2, lum2)
348 
def forcerunrange(self, firstRun, lastRun, s)
Definition: dataset.py:324
double split
Definition: MVATrainer.cc:139
def dataset.Dataset.getForceRunRangeFunction (   self,
  firstRun,
  lastRun 
)

Definition at line 349 of file dataset.py.

References dataset.Dataset.forcerunrange().

Referenced by dataset.Dataset.__lumiSelectionSnippet().

349  def getForceRunRangeFunction(self, firstRun, lastRun):
350  def forcerunrangefunction(s):
351  return self.forcerunrange(firstRun, lastRun, s)
352  return forcerunrangefunction
353 
def forcerunrange(self, firstRun, lastRun, s)
Definition: dataset.py:324
def getForceRunRangeFunction(self, firstRun, lastRun)
Definition: dataset.py:349
def dataset.Dataset.getPrimaryDatasetEntries (   self)

Definition at line 328 of file dataset.py.

References dataset.int, runall.testit.report, WorkFlowRunner.WorkFlowRunner.report, dataset.BaseDataset.report, and ALIUtils.report.

329  if self.report is not None and self.report:
330  return int(self.report.get('PrimaryDatasetEntries',-1))
331  return -1
332 
333 
def getPrimaryDatasetEntries(self)
Definition: dataset.py:328
def dataset.Dataset.getrunnumberfromfilename (   filename)
static

Definition at line 856 of file dataset.py.

References Vispa.Plugins.EdmBrowser.EdmDataAccessor.all(), dataset.int, and join().

Referenced by dataset.Dataset.fileList().

857  parts = filename.split("/")
858  result = error = None
859  if parts[0] != "" or parts[1] != "store":
860  error = "does not start with /store"
861  elif parts[2] in ["mc", "relval"]:
862  result = 1
863  elif not parts[-1].endswith(".root"):
864  error = "does not end with something.root"
865  elif len(parts) != 12:
866  error = "should be exactly 11 slashes counting the first one"
867  else:
868  runnumberparts = parts[-5:-2]
869  if not all(len(part)==3 for part in runnumberparts):
870  error = "the 3 directories {} do not have length 3 each".format("/".join(runnumberparts))
871  try:
872  result = int("".join(runnumberparts))
873  except ValueError:
874  error = "the 3 directories {} do not form an integer".format("/".join(runnumberparts))
875 
876  if error:
877  error = "could not figure out which run number this file is from:\n{}\n{}".format(filename, error)
878  raise AllInOneError(error)
879 
880  return result
881 
def getrunnumberfromfilename(filename)
Definition: dataset.py:856
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def dataset.Dataset.magneticField (   self)

Definition at line 713 of file dataset.py.

References dataset.Dataset.__getMagneticField(), and dataset.Dataset.__magneticField.

713  def magneticField( self ):
714  if not self.__magneticField:
715  self.__magneticField = self.__getMagneticField()
716  return self.__magneticField
717 
def __getMagneticField(self)
Definition: dataset.py:421
def magneticField(self)
Definition: dataset.py:713
def dataset.Dataset.magneticFieldForRun (   self,
  run = -1 
)

Definition at line 718 of file dataset.py.

References dataset.Dataset.__getMagneticFieldForRun().

718  def magneticFieldForRun( self, run = -1 ):
719  return self.__getMagneticFieldForRun(run)
720 
def __getMagneticFieldForRun(self, run=-1, tolerance=0.5)
Definition: dataset.py:502
def magneticFieldForRun(self, run=-1)
Definition: dataset.py:718
def dataset.Dataset.name (   self)
def dataset.Dataset.parentDataset (   self)

Definition at line 721 of file dataset.py.

References dataset.Dataset.__getParentDataset(), dataset.Dataset.__parentDataset, and dataset.Dataset.datasetSnippet().

Referenced by dataset.Dataset.__getFileInfoList().

721  def parentDataset( self ):
722  if not self.__parentDataset:
723  self.__parentDataset = self.__getParentDataset()
724  return self.__parentDataset
725 
def parentDataset(self)
Definition: dataset.py:721
def __getParentDataset(self)
Definition: dataset.py:411
def dataset.Dataset.predefined (   self)

Definition at line 918 of file dataset.py.

References dataset.Dataset.__predefined.

918  def predefined( self ):
919  return self.__predefined
920 
def predefined(self)
Definition: dataset.py:918
def dataset.Dataset.printInfo (   self)

Definition at line 323 of file dataset.py.

References dataset.EOSDataset.castorDir, dataset.Dataset.castorDir, dataset.Dataset.lfnDir, ElectronMVAID.ElectronMVAID.name, counter.Counter.name, average.Average.name, histograms.Histograms.name, AlignableObjectId::entry.name, cond::persistency::TAG::NAME.name, TmModule.name, cond::persistency::GLOBAL_TAG::NAME.name, core.autovars.NTupleVariable.name, cond::persistency::RUN_INFO::RUN_NUMBER.name, cond::persistency::TAG::TIME_TYPE.name, cond::persistency::GLOBAL_TAG::VALIDITY.name, cond::persistency::RUN_INFO::START_TIME.name, cond::persistency::TAG::OBJECT_TYPE.name, cond::persistency::GLOBAL_TAG::DESCRIPTION.name, cond::persistency::RUN_INFO::END_TIME.name, cond::persistency::TAG::SYNCHRONIZATION.name, cond::persistency::GLOBAL_TAG::RELEASE.name, MEPSet.name, cond::persistency::TAG::END_OF_VALIDITY.name, cond::persistency::GLOBAL_TAG::SNAPSHOT_TIME.name, cond::persistency::GTEditorData.name, cond::persistency::TAG::DESCRIPTION.name, cond::persistency::GLOBAL_TAG::INSERTION_TIME.name, nanoaod::MergeableCounterTable::SingleColumn< T >.name, cond::persistency::TAG::LAST_VALIDATED_TIME.name, cond::persistency::TAG::INSERTION_TIME.name, cond::persistency::TAG::MODIFICATION_TIME.name, preexistingValidation.PreexistingValidation.name, FWTGeoRecoGeometry::Info.name, Types._Untracked.name, dataset.BaseDataset.name, OutputMEPSet.name, personalPlayback.Applet.name, ParameterSet.name, PixelDCSObject< T >::Item.name, DQMRivetClient::LumiOption.name, MagCylinder.name, analyzer.Analyzer.name, ParSet.name, DQMRivetClient::ScaleFactorOption.name, EgHLTOfflineSummaryClient::SumHistBinData.name, SingleObjectCondition.name, cond::persistency::GTProxyData.name, core.autovars.NTupleObjectType.name, MyWatcher.name, edm::PathTimingSummary.name, nanoaod::MergeableCounterTable::VectorColumn< T >.name, cond::TimeTypeSpecs.name, lumi::TriggerInfo.name, alignment.Alignment.name, edm::PathSummary.name, PixelEndcapLinkMaker::Item.name, cond::persistency::GLOBAL_TAG_MAP::GLOBAL_TAG_NAME.name, perftools::EdmEventSize::BranchRecord.name, DQMGenericClient::EfficOption.name, FWTableViewManager::TableEntry.name, cond::persistency::GLOBAL_TAG_MAP::RECORD.name, PixelBarrelLinkMaker::Item.name, EcalLogicID.name, cms::DDAlgoArguments.name, cond::persistency::GLOBAL_TAG_MAP::LABEL.name, validateAlignments.ParallelMergeJob.name, MEtoEDM< T >::MEtoEDMObject.name, cond::persistency::GLOBAL_TAG_MAP::TAG_NAME.name, ExpressionHisto< T >.name, XMLProcessor::_loaderBaseConfig.name, cond::persistency::PAYLOAD::HASH.name, cond::persistency::PAYLOAD::OBJECT_TYPE.name, cond::persistency::PAYLOAD::DATA.name, genericValidation.GenericValidation.name, TreeCrawler.Package.name, cond::persistency::PAYLOAD::STREAMER_INFO.name, options.ConnectionHLTMenu.name, MagGeoBuilderFromDDD::volumeHandle.name, cond::persistency::PAYLOAD::VERSION.name, cond::persistency::PAYLOAD::INSERTION_TIME.name, DQMGenericClient::ProfileOption.name, dqmoffline::l1t::HistDefinition.name, DQMGenericClient::NormOption.name, emtf::Node.name, h4DSegm.name, PhysicsTools::Calibration::Variable.name, FastHFShowerLibrary.name, core.TriggerMatchAnalyzer.TriggerMatchAnalyzer.name, DQMGenericClient::CDOption.name, CounterChecker.name, cond::TagInfo_t.name, looper.Looper.name, DQMGenericClient::NoFlowOption.name, cond::persistency::IOV::TAG_NAME.name, EDMtoMEConverter.name, cond::persistency::IOV::SINCE.name, TrackerSectorStruct.name, Mapper::definition< ScannerT >.name, cond::persistency::IOV::PAYLOAD_HASH.name, cond::persistency::IOV::INSERTION_TIME.name, classes.MonitorData.name, HistogramManager.name, MuonGeometrySanityCheckPoint.name, classes.OutputData.name, options.HLTProcessOptions.name, h2DSegm.name, core.TriggerBitAnalyzer.TriggerBitAnalyzer.name, nanoaod::FlatTable::Column.name, config.Analyzer.name, geometry.Structure.name, core.autovars.NTupleSubObject.name, DQMNet::WaitObject.name, AlpgenParameterName.name, SiStripMonitorDigi.name, core.autovars.NTupleObject.name, config.Service.name, cond::persistency::TAG_LOG::TAG_NAME.name, cond::persistency::TAG_LOG::EVENT_TIME.name, cond::persistency::TAG_LOG::USER_NAME.name, cond::persistency::TAG_LOG::HOST_NAME.name, cond::persistency::TAG_LOG::COMMAND.name, cond::persistency::TAG_LOG::ACTION.name, cond::persistency::TAG_LOG::USER_TEXT.name, core.autovars.NTupleCollection.name, BPHRecoBuilder::BPHRecoSource.name, BPHRecoBuilder::BPHCompSource.name, personalPlayback.FrameworkJob.name, plotscripts.SawTeethFunction.name, crabFunctions.CrabTask.name, hTMaxCell.name, cscdqm::ParHistoDef.name, BeautifulSoup.Tag.name, SummaryOutputProducer::GenericSummary.name, BeautifulSoup.SoupStrainer.name, and edm.print().

323  def printInfo(self):
324  print('sample : ' + self.name)
325  print('LFN : ' + self.lfnDir)
326  print('Castor path : ' + self.castorDir)
327 
def printInfo(self)
Definition: dataset.py:323
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
def dataset.Dataset.runList (   self)

Definition at line 922 of file dataset.py.

References dataset.Dataset.__getRunList(), and edm.print().

922  def runList( self ):
923  return self.__getRunList()
924 
925 
def __getRunList(self)
Definition: dataset.py:618
def runList(self)
Definition: dataset.py:922

Member Data Documentation

dataset.Dataset.__cmssw
private

Definition at line 26 of file dataset.py.

Referenced by dataset.Dataset.__getMagneticField(), and dataset.Dataset.dump_cff().

dataset.Dataset.__cmsswrelease
private

Definition at line 27 of file dataset.py.

Referenced by dataset.Dataset.__getMagneticField().

dataset.Dataset.__dasinstance
private
dataset.Dataset.__dasLimit
private

Definition at line 24 of file dataset.py.

Referenced by dataset.Dataset.fileInfoList().

dataset.Dataset.__dataType
private
tuple dataset.Dataset.__dummy_source_template
staticprivate
Initial value:
1 = ("readFiles = cms.untracked.vstring()\n"
2  "secFiles = cms.untracked.vstring()\n"
3  "%(process)ssource = cms.Source(\"PoolSource\",\n"
4  "%(tab)s secondaryFileNames ="
5  "secFiles,\n"
6  "%(tab)s fileNames = readFiles\n"
7  ")\n"
8  "readFiles.extend(['dummy_File.root'])\n"
9  "%(process)smaxEvents = cms.untracked.PSet( "
10  "input = cms.untracked.int32(%(nEvents)s) )\n"
11  "%(skipEventsString)s\n")

Definition at line 111 of file dataset.py.

Referenced by dataset.Dataset.__createSnippet().

dataset.Dataset.__filename
private
dataset.Dataset.__firstusedrun
private
dataset.Dataset.__inputMagneticField
private
dataset.Dataset.__lastusedrun
private
dataset.Dataset.__magneticField
private
dataset.Dataset.__name
private
dataset.Dataset.__official
private

Definition at line 34 of file dataset.py.

Referenced by dataset.Dataset.datasetSnippet().

dataset.Dataset.__origName
private

Definition at line 23 of file dataset.py.

Referenced by dataset.Dataset.datasetSnippet().

dataset.Dataset.__parentDataset
private

Definition at line 30 of file dataset.py.

Referenced by dataset.Dataset.parentDataset().

dataset.Dataset.__predefined
private
dataset.Dataset.__source_template
staticprivate

Definition at line 93 of file dataset.py.

Referenced by dataset.Dataset.__createSnippet().

dataset.Dataset.bad_files

Definition at line 284 of file dataset.py.

dataset.Dataset.castorDir

Definition at line 268 of file dataset.py.

Referenced by dataset.Dataset.extractFileSizes(), and dataset.Dataset.printInfo().

dataset.Dataset.files

Definition at line 275 of file dataset.py.

dataset.Dataset.filesAndSizes

Definition at line 313 of file dataset.py.

dataset.Dataset.good_files

Definition at line 285 of file dataset.py.

dataset.Dataset.lfnDir

Definition at line 267 of file dataset.py.

Referenced by dataset.Dataset.printInfo().

dataset.Dataset.maskExists

Definition at line 269 of file dataset.py.

dataset.Dataset.report

Definition at line 270 of file dataset.py.

Referenced by addOnTests.testit.run().