CMS 3D CMS Logo

List of all members | Public Member Functions | Static Public Member Functions | Public Attributes | Private Member Functions | Private Attributes | Static Private Attributes
dataset.Dataset Class Reference
Inheritance diagram for dataset.Dataset:
dataset.BaseDataset dataset.DatasetBase

Public Member Functions

def __init__ (self, datasetName, dasLimit=0, tryPredefinedFirst=True, cmssw=os.environ["CMSSW_BASE"], cmsswrelease=os.environ["CMSSW_RELEASE_BASE"], magneticfield=None, dasinstance=None)
 
def __init__ (self, datasetname, dasinstance=defaultdasinstance)
 
def __init__ (self, name, user, pattern='.*root')
 
def buildListOfBadFiles (self)
 
def buildListOfFiles (self, pattern='.*root')
 
def convertTimeToRun (self, begin=None, end=None, firstRun=None, lastRun=None, shortTuple=True)
 
def createdatasetfile_hippy (self, filename, filesperjob, firstrun, lastrun)
 
def datasetSnippet (self, jsonPath=None, begin=None, end=None, firstRun=None, lastRun=None, crab=False, parent=False)
 
def dataType (self)
 
def dump_cff (self, outName=None, jsonPath=None, begin=None, end=None, firstRun=None, lastRun=None, parent=False)
 
def extractFileSizes (self)
 
def fileInfoList (self, parent=False)
 
def fileList (self, parent=False, firstRun=None, lastRun=None, forcerunselection=False)
 
def forcerunrange (self, firstRun, lastRun, s)
 
def getfiles (self, usecache)
 
def getForceRunRangeFunction (self, firstRun, lastRun)
 
def getPrimaryDatasetEntries (self)
 
def headercomment (self)
 
def magneticField (self)
 
def magneticFieldForRun (self, run=-1)
 
def name (self)
 
def parentDataset (self)
 
def predefined (self)
 
def printInfo (self)
 
def runList (self)
 
- Public Member Functions inherited from dataset.BaseDataset
def __init__ (self, name, user, pattern='.*root', run_range=None, dbsInstance=None)
 def init(self, name, user, pattern='. More...
 
def buildListOfBadFiles (self)
 
def buildListOfFiles (self, pattern)
 
def extractFileSizes (self)
 
def getPrimaryDatasetEntries (self)
 
def listOfFiles (self)
 
def listOfGoodFiles (self)
 
def listOfGoodFilesWithPrescale (self, prescale)
 
def printFiles (self, abspath=True, info=True)
 
def printInfo (self)
 
- Public Member Functions inherited from dataset.DatasetBase
def getfiles (self, usecache)
 
def headercomment (self)
 
def writefilelist_hippy (self, firstrun, lastrun, runs, eventsperjob, maxevents, outputfile, usecache=True)
 
def writefilelist_validation (self, firstrun, lastrun, runs, maxevents, outputfile=None, usecache=True)
 

Static Public Member Functions

def getrunnumberfromfilename (filename)
 

Public Attributes

 bad_files
 
 castorDir
 
 dasinstance
 
 datasetname
 
 filenamebase
 
 files
 
 filesAndSizes
 
 good_files
 
 lfnDir
 
 maskExists
 
 official
 
 report
 
- Public Attributes inherited from dataset.BaseDataset
 bad_files
 
 dbsInstance
 MM. More...
 
 files
 
 filesAndSizes
 
 good_files
 
 name
 
 pattern
 
 primaryDatasetEntries
 MM. More...
 
 report
 
 run_range
 
 user
 

Private Member Functions

def __chunks (self, theList, n)
 
def __createSnippet (self, jsonPath=None, begin=None, end=None, firstRun=None, lastRun=None, repMap=None, crab=False, parent=False)
 
def __dateString (self, date)
 
def __datetime (self, stringForDas)
 
def __fileListSnippet (self, crab=False, parent=False, firstRun=None, lastRun=None, forcerunselection=False)
 
def __find_ge (self, a, x)
 
def __find_lt (self, a, x)
 
def __findInJson (self, jsondict, strings)
 
def __getData (self, dasQuery, dasLimit=0)
 
def __getDataType (self)
 
def __getFileInfoList (self, dasLimit, parent=False)
 
def __getMagneticField (self)
 
def __getMagneticFieldForRun (self, run=-1, tolerance=0.5)
 
def __getParentDataset (self)
 
def __getRunList (self)
 
def __lumiSelectionSnippet (self, jsonPath=None, firstRun=None, lastRun=None)
 

Private Attributes

 __cmssw
 
 __cmsswrelease
 
 __dasinstance
 
 __dasLimit
 
 __dataType
 
 __filename
 
 __firstusedrun
 
 __inputMagneticField
 
 __lastusedrun
 
 __magneticField
 
 __name
 
 __official
 
 __origName
 
 __parentDataset
 
 __predefined
 

Static Private Attributes

tuple __dummy_source_template
 
 __source_template
 

Detailed Description

Definition at line 198 of file dataset.py.

Constructor & Destructor Documentation

def dataset.Dataset.__init__ (   self,
  datasetname,
  dasinstance = defaultdasinstance 
)

Definition at line 199 of file dataset.py.

Referenced by dataset.Dataset.__init__().

199  def __init__(self, datasetname, dasinstance=defaultdasinstance):
200  self.datasetname = datasetname
201  if re.match(r'/.+/.+/.+', datasetname):
202  self.official = True
203  self.filenamebase = "Dataset" + self.datasetname.replace("/","_")
204  else:
205  self.official = False
206  self.filenamebase = datasetname
207 
208  self.dasinstance = dasinstance
209 
def __init__(self, datasetname, dasinstance=defaultdasinstance)
Definition: dataset.py:199
def dataset.Dataset.__init__ (   self,
  datasetName,
  dasLimit = 0,
  tryPredefinedFirst = True,
  cmssw = os.environ["CMSSW_BASE"],
  cmsswrelease = os.environ["CMSSW_RELEASE_BASE"],
  magneticfield = None,
  dasinstance = None 
)

Definition at line 23 of file dataset.py.

23  magneticfield = None, dasinstance = None):
24  self.__name = datasetName
25  self.__origName = datasetName
26  self.__dasLimit = dasLimit
27  self.__dasinstance = dasinstance
28  self.__cmssw = cmssw
29  self.__cmsswrelease = cmsswrelease
30  self.__firstusedrun = None
31  self.__lastusedrun = None
32  self.__parentDataset = None
33 
34  # check, if dataset name matches CMS dataset naming scheme
35  if re.match( r'/.+/.+/.+', self.__name ):
36  self.__official = True
37  fileName = "Dataset" + self.__name.replace("/","_") + "_cff.py"
38  else:
39  self.__official = False
40  fileName = self.__name + "_cff.py"
41 
42  searchPath1 = os.path.join( self.__cmssw, "python",
43  "Alignment", "OfflineValidation",
44  fileName )
45  searchPath2 = os.path.join( self.__cmssw, "src",
46  "Alignment", "OfflineValidation",
47  "python", fileName )
48  searchPath3 = os.path.join( self.__cmsswrelease,
49  "python", "Alignment",
50  "OfflineValidation", fileName )
51  if self.__official and not tryPredefinedFirst:
52  self.__predefined = False
53  elif os.path.exists( searchPath1 ):
54  self.__predefined = True
55  self.__filename = searchPath1
56  elif os.path.exists( searchPath2 ):
57  msg = ("The predefined dataset '%s' does exist in '%s', but "
58  "you need to run 'scram b' first."
59  %( self.__name, searchPath2 ))
60  if self.__official:
61  print(msg)
62  print("Getting the data from DAS again. To go faster next time, run scram b.")
63  else:
64  raise AllInOneError( msg )
65  elif os.path.exists( searchPath3 ):
66  self.__predefined = True
67  self.__filename = searchPath3
68  elif self.__official:
69  self.__predefined = False
70  else:
71  msg = ("The predefined dataset '%s' does not exist. Please "
72  "create it first or check for typos."%( self.__name ))
73  raise AllInOneError( msg )
74 
75  if self.__predefined and self.__official:
76  self.__name = "Dataset" + self.__name.replace("/","_")
77 
78  if magneticfield is not None:
79  try:
80  magneticfield = float(magneticfield)
81  except ValueError:
82  raise AllInOneError("Bad magneticfield {} which can't be converted to float".format(magneticfield))
83  self.__inputMagneticField = magneticfield
84 
85  self.__dataType = self.__getDataType()
87 
88 
def __getDataType(self)
Definition: dataset.py:388
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
def __getMagneticField(self)
Definition: dataset.py:423
def dataset.Dataset.__init__ (   self,
  name,
  user,
  pattern = '.*root' 
)

Definition at line 268 of file dataset.py.

References dataset.Dataset.__init__().

268  def __init__(self, name, user, pattern='.*root'):
269  self.lfnDir = castorBaseDir(user) + name
270  self.castorDir = castortools.lfnToCastor( self.lfnDir )
271  self.maskExists = False
272  self.report = None
273  super(Dataset, self).__init__(name, user, pattern)
274 
def __init__(self, datasetname, dasinstance=defaultdasinstance)
Definition: dataset.py:199

Member Function Documentation

def dataset.Dataset.__chunks (   self,
  theList,
  n 
)
private
Yield successive n-sized chunks from theList.

Definition at line 89 of file dataset.py.

References FastTimerService_cff.range.

Referenced by dataset.Dataset.__fileListSnippet(), dataset.Dataset.__lumiSelectionSnippet(), and dataset.Dataset.createdatasetfile_hippy().

89  def __chunks( self, theList, n ):
90  """ Yield successive n-sized chunks from theList.
91  """
92  for i in range( 0, len( theList ), n ):
93  yield theList[i:i+n]
94 
def __chunks(self, theList, n)
Definition: dataset.py:89
def dataset.Dataset.__createSnippet (   self,
  jsonPath = None,
  begin = None,
  end = None,
  firstRun = None,
  lastRun = None,
  repMap = None,
  crab = False,
  parent = False 
)
private

Definition at line 245 of file dataset.py.

References dataset.Dataset.__dummy_source_template, dataset.Dataset.__fileListSnippet(), dataset.Dataset.__lumiSelectionSnippet(), dataset.Dataset.__source_template, electrons_cff.bool, dataset.Dataset.convertTimeToRun(), and dataset.int.

Referenced by dataset.Dataset.__fileListSnippet(), dataset.Dataset.datasetSnippet(), and dataset.Dataset.dump_cff().

245  crab = False, parent = False ):
246 
247  if firstRun:
248  firstRun = int( firstRun )
249  if lastRun:
250  lastRun = int( lastRun )
251  if ( begin and firstRun ) or ( end and lastRun ):
252  msg = ( "The Usage of "
253  + "'begin' & 'firstRun' " * int( bool( begin and
254  firstRun ) )
255  + "and " * int( bool( ( begin and firstRun ) and
256  ( end and lastRun ) ) )
257  + "'end' & 'lastRun' " * int( bool( end and lastRun ) )
258  + "is ambigous." )
259  raise AllInOneError( msg )
260  if begin or end:
261  ( firstRun, lastRun ) = self.convertTimeToRun(
262  begin = begin, end = end, firstRun = firstRun,
263  lastRun = lastRun )
264  if ( firstRun and lastRun ) and ( firstRun > lastRun ):
265  msg = ( "The lower time/runrange limit ('begin'/'firstRun') "
266  "chosen is greater than the upper time/runrange limit "
267  "('end'/'lastRun').")
268  raise AllInOneError( msg )
269 
270  lumiSecExtend = self.__lumiSelectionSnippet(jsonPath=jsonPath, firstRun=firstRun, lastRun=lastRun)
271  lumiStr = goodLumiSecStr = ""
272  if lumiSecExtend:
273  goodLumiSecStr = "lumiSecs = cms.untracked.VLuminosityBlockRange()\n"
274  lumiStr = " lumisToProcess = lumiSecs,\n"
275 
276  files = self.__fileListSnippet(crab=crab, parent=parent, firstRun=firstRun, lastRun=lastRun, forcerunselection=False)
277 
278  theMap = repMap
279  theMap["files"] = files
280  theMap["json"] = jsonPath
281  theMap["lumiStr"] = lumiStr
282  theMap["goodLumiSecStr"] = goodLumiSecStr%( theMap )
283  theMap["lumiSecExtend"] = lumiSecExtend
284  if crab:
285  dataset_snippet = self.__dummy_source_template%( theMap )
286  else:
287  dataset_snippet = self.__source_template%( theMap )
288  return dataset_snippet
289 
def __lumiSelectionSnippet(self, jsonPath=None, firstRun=None, lastRun=None)
Definition: dataset.py:125
def convertTimeToRun(self, begin=None, end=None, firstRun=None, lastRun=None, shortTuple=True)
Definition: dataset.py:645
tuple __dummy_source_template
Definition: dataset.py:113
def __fileListSnippet(self, crab=False, parent=False, firstRun=None, lastRun=None, forcerunselection=False)
Definition: dataset.py:221
def dataset.Dataset.__dateString (   self,
  date 
)
private

Definition at line 640 of file dataset.py.

References dataset.Dataset.convertTimeToRun(), and str.

Referenced by dataset.Dataset.convertTimeToRun().

640  def __dateString(self, date):
641  return str(date.year) + str(date.month).zfill(2) + str(date.day).zfill(2)
642 
def __dateString(self, date)
Definition: dataset.py:640
#define str(s)
def dataset.Dataset.__datetime (   self,
  stringForDas 
)
private

Definition at line 631 of file dataset.py.

References dataset.int.

Referenced by dataset.Dataset.convertTimeToRun().

631  def __datetime(self, stringForDas):
632  if len(stringForDas) != 8:
633  raise AllInOneError(stringForDas + " is not a valid date string.\n"
634  + "DAS accepts dates in the form 'yyyymmdd'")
635  year = stringForDas[:4]
636  month = stringForDas[4:6]
637  day = stringForDas[6:8]
638  return datetime.date(int(year), int(month), int(day))
639 
def __datetime(self, stringForDas)
Definition: dataset.py:631
def dataset.Dataset.__fileListSnippet (   self,
  crab = False,
  parent = False,
  firstRun = None,
  lastRun = None,
  forcerunselection = False 
)
private

Definition at line 221 of file dataset.py.

References dataset.Dataset.__chunks(), dataset.Dataset.__createSnippet(), dataset.Dataset.__name, dataset.Dataset.fileList(), join(), and list().

Referenced by dataset.Dataset.__createSnippet().

221  def __fileListSnippet(self, crab=False, parent=False, firstRun=None, lastRun=None, forcerunselection=False):
222  if crab:
223  files = ""
224  else:
225  splitFileList = list( self.__chunks( self.fileList(firstRun=firstRun, lastRun=lastRun, forcerunselection=forcerunselection), 255 ) )
226  if not splitFileList:
227  raise AllInOneError("No files found for dataset {}. Check the spelling, or maybe specify another das instance?".format(self.__name))
228  fileStr = [ "',\n'".join( files ) for files in splitFileList ]
229  fileStr = [ "readFiles.extend( [\n'" + files + "'\n] )" \
230  for files in fileStr ]
231  files = "\n".join( fileStr )
232 
233  if parent:
234  splitParentFileList = list( self.__chunks( self.fileList(parent=True, firstRun=firstRun, lastRun=lastRun, forcerunselection=forcerunselection), 255 ) )
235  parentFileStr = [ "',\n'".join( parentFiles ) for parentFiles in splitParentFileList ]
236  parentFileStr = [ "secFiles.extend( [\n'" + parentFiles + "'\n] )" \
237  for parentFiles in parentFileStr ]
238  parentFiles = "\n".join( parentFileStr )
239  files += "\n\n" + parentFiles
240 
241  return files
242 
static std::string join(char **cmd)
Definition: RemoteFile.cc:17
def __chunks(self, theList, n)
Definition: dataset.py:89
def __fileListSnippet(self, crab=False, parent=False, firstRun=None, lastRun=None, forcerunselection=False)
Definition: dataset.py:221
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run
def fileList(self, parent=False, firstRun=None, lastRun=None, forcerunselection=False)
Definition: dataset.py:885
def dataset.Dataset.__find_ge (   self,
  a,
  x 
)
private

Definition at line 297 of file dataset.py.

Referenced by dataset.Dataset.convertTimeToRun().

297  def __find_ge( self, a, x):
298  'Find leftmost item greater than or equal to x'
299  i = bisect.bisect_left( a, x )
300  if i != len( a ):
301  return i
302  raise ValueError
303 
def __find_ge(self, a, x)
Definition: dataset.py:297
def dataset.Dataset.__find_lt (   self,
  a,
  x 
)
private

Definition at line 290 of file dataset.py.

Referenced by dataset.Dataset.convertTimeToRun().

290  def __find_lt( self, a, x ):
291  'Find rightmost value less than x'
292  i = bisect.bisect_left( a, x )
293  if i:
294  return i-1
295  raise ValueError
296 
def __find_lt(self, a, x)
Definition: dataset.py:290
def dataset.Dataset.__findInJson (   self,
  jsondict,
  strings 
)
private

Definition at line 304 of file dataset.py.

References dataset.Dataset.__findInJson().

Referenced by dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__getDataType(), dataset.Dataset.__getFileInfoList(), dataset.Dataset.__getMagneticField(), dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.__getParentDataset(), dataset.Dataset.__getRunList(), dataset.Dataset.__lumiSelectionSnippet(), dataset.Dataset.convertTimeToRun(), and dataset.Dataset.fileList().

304  def __findInJson(self, jsondict, strings):
305  if isinstance(strings, str):
306  strings = [ strings ]
307 
308  if len(strings) == 0:
309  return jsondict
310  if isinstance(jsondict,dict):
311  if strings[0] in jsondict:
312  try:
313  return self.__findInJson(jsondict[strings[0]], strings[1:])
314  except KeyError:
315  pass
316  else:
317  for a in jsondict:
318  if strings[0] in a:
319  try:
320  return self.__findInJson(a[strings[0]], strings[1:])
321  except (TypeError, KeyError): #TypeError because a could be a string and contain strings[0]
322  pass
323  #if it's not found
324  raise KeyError("Can't find " + strings[0])
325 
def __findInJson(self, jsondict, strings)
Definition: dataset.py:304
def dataset.Dataset.__getData (   self,
  dasQuery,
  dasLimit = 0 
)
private

Definition at line 356 of file dataset.py.

References dataset.Dataset.__findInJson(), and str.

Referenced by dataset.Dataset.__getDataType(), dataset.Dataset.__getFileInfoList(), dataset.Dataset.__getMagneticField(), dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.__getParentDataset(), dataset.Dataset.__getRunList(), and dataset.Dataset.convertTimeToRun().

356  def __getData( self, dasQuery, dasLimit = 0 ):
357  dasData = das_client.get_data(dasQuery, dasLimit)
358  if isinstance(dasData, str):
359  jsondict = json.loads( dasData )
360  else:
361  jsondict = dasData
362  # Check, if the DAS query fails
363  try:
364  error = self.__findInJson(jsondict,["data","error"])
365  except KeyError:
366  error = None
367  if error or self.__findInJson(jsondict,"status") != 'ok' or "data" not in jsondict:
368  try:
369  jsonstr = self.__findInJson(jsondict,"reason")
370  except KeyError:
371  jsonstr = str(jsondict)
372  if len(jsonstr) > 10000:
373  jsonfile = "das_query_output_%i.txt"
374  i = 0
375  while os.path.lexists(jsonfile % i):
376  i += 1
377  jsonfile = jsonfile % i
378  theFile = open( jsonfile, "w" )
379  theFile.write( jsonstr )
380  theFile.close()
381  msg = "The DAS query returned an error. The output is very long, and has been stored in:\n" + jsonfile
382  else:
383  msg = "The DAS query returned a error. Here is the output\n" + jsonstr
384  msg += "\nIt's possible that this was a server error. If so, it may work if you try again later"
385  raise AllInOneError(msg)
386  return self.__findInJson(jsondict,"data")
387 
def __findInJson(self, jsondict, strings)
Definition: dataset.py:304
#define str(s)
def __getData(self, dasQuery, dasLimit=0)
Definition: dataset.py:356
def dataset.Dataset.__getDataType (   self)
private

Definition at line 388 of file dataset.py.

References dataset.Dataset.__dasinstance, dataset.Dataset.__filename, dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__name, dataset.Dataset.__predefined, ElectronMVAID.ElectronMVAID.name, DigiComparisonTask.name, TestTask.name, TPComparisonTask.name, DigiPhase1Task.name, HcalOfflineHarvesting.name, HcalOnlineHarvesting.name, HFRaddamTask.name, LaserTask.name, NoCQTask.name, PedestalTask.name, RecHitTask.name, QIE10Task.name, QIE11Task.name, UMNioTask.name, ZDCTask.name, AlignableObjectId::entry.name, RawTask.name, average.Average.name, counter.Counter.name, TPTask.name, DigiTask.name, LEDTask.name, histograms.Histograms.name, cond::persistency::RUN_INFO::RUN_NUMBER.name, cond::persistency::TAG::NAME.name, TmModule.name, cond::persistency::GTEditorData.name, cond::persistency::GLOBAL_TAG::NAME.name, cond::persistency::RUN_INFO::START_TIME.name, cond::persistency::TAG::TIME_TYPE.name, cond::persistency::GLOBAL_TAG::VALIDITY.name, cond::persistency::GLOBAL_TAG::DESCRIPTION.name, cond::persistency::RUN_INFO::END_TIME.name, core.autovars.NTupleVariable.name, cond::persistency::TAG::OBJECT_TYPE.name, cond::persistency::GLOBAL_TAG::RELEASE.name, cond::persistency::TAG::SYNCHRONIZATION.name, cond::persistency::GLOBAL_TAG::SNAPSHOT_TIME.name, cond::persistency::TAG::END_OF_VALIDITY.name, MEPSet.name, cond::persistency::GLOBAL_TAG::INSERTION_TIME.name, cms::dd::NameValuePair< T >.name, cond::persistency::TAG::DESCRIPTION.name, cond::persistency::TAG::LAST_VALIDATED_TIME.name, cond::persistency::TAG::INSERTION_TIME.name, FWTGeoRecoGeometry::Info.name, nanoaod::MergeableCounterTable::SingleColumn< T >.name, cond::persistency::TAG::MODIFICATION_TIME.name, ParameterSet.name, preexistingValidation.PreexistingValidation.name, OutputMEPSet.name, PixelDCSObject< T >::Item.name, cms::dd::ValuePair< T, U >.name, dataset.BaseDataset.name, personalPlayback.Applet.name, Types._Untracked.name, DQMRivetClient::LumiOption.name, ParSet.name, MagCylinder.name, analyzer.Analyzer.name, cond::persistency::GTProxyData.name, DQMRivetClient::ScaleFactorOption.name, Barrel.name, EgHLTOfflineSummaryClient::SumHistBinData.name, edm::PathTimingSummary.name, cms::DDAlgoArguments.name, SingleObjectCondition.name, cond::TimeTypeSpecs.name, perftools::EdmEventSize::BranchRecord.name, core.autovars.NTupleObjectType.name, MyWatcher.name, edm::PathSummary.name, EcalLogicID.name, DQMGenericClient::EfficOption.name, lumi::TriggerInfo.name, alignment.Alignment.name, XMLProcessor::_loaderBaseConfig.name, nanoaod::MergeableCounterTable::VectorColumn< T >.name, PixelEndcapLinkMaker::Item.name, MEtoEDM< T >::MEtoEDMObject.name, FWTableViewManager::TableEntry.name, PixelBarrelLinkMaker::Item.name, ExpressionHisto< T >.name, Supermodule.name, DQMGenericClient::ProfileOption.name, TreeCrawler.Package.name, cond::persistency::GLOBAL_TAG_MAP::GLOBAL_TAG_NAME.name, genericValidation.GenericValidation.name, cond::persistency::GLOBAL_TAG_MAP::RECORD.name, options.ConnectionHLTMenu.name, DQMGenericClient::NormOption.name, cond::persistency::GLOBAL_TAG_MAP::LABEL.name, cond::persistency::GLOBAL_TAG_MAP::TAG_NAME.name, DQMGenericClient::CDOption.name, dqmoffline::l1t::HistDefinition.name, magneticfield::BaseVolumeHandle.name, FastHFShowerLibrary.name, h4DSegm.name, emtf::Node.name, cms::DDParsingContext::CompositeMaterial.name, DQMGenericClient::NoFlowOption.name, core.TriggerMatchAnalyzer.TriggerMatchAnalyzer.name, cond::persistency::PAYLOAD::HASH.name, cond::persistency::PAYLOAD::OBJECT_TYPE.name, cond::persistency::PAYLOAD::DATA.name, CounterChecker.name, PhysicsTools::Calibration::Variable.name, cond::persistency::PAYLOAD::STREAMER_INFO.name, cond::TagInfo_t.name, cond::persistency::PAYLOAD::VERSION.name, TrackerSectorStruct.name, cond::persistency::PAYLOAD::INSERTION_TIME.name, MuonGeometrySanityCheckPoint.name, FCDTask.name, Mapper::definition< ScannerT >.name, looper.Looper.name, EDMtoMEConverter.name, classes.MonitorData.name, HistogramManager.name, classes.OutputData.name, Crystal.name, cond::persistency::IOV::TAG_NAME.name, cond::persistency::IOV::SINCE.name, cond::persistency::IOV::PAYLOAD_HASH.name, h2DSegm.name, cond::persistency::IOV::INSERTION_TIME.name, options.HLTProcessOptions.name, core.TriggerBitAnalyzer.TriggerBitAnalyzer.name, AlpgenParameterName.name, DQMNet::WaitObject.name, config.Analyzer.name, geometry.Structure.name, core.autovars.NTupleSubObject.name, Capsule.name, core.autovars.NTupleObject.name, Ceramic.name, SiStripMonitorDigi.name, BulkSilicon.name, config.Service.name, nanoaod::FlatTable::Column.name, APD.name, BPHRecoBuilder::BPHRecoSource.name, core.autovars.NTupleCollection.name, BPHRecoBuilder::BPHCompSource.name, cond::persistency::TAG_LOG::TAG_NAME.name, cond::persistency::TAG_LOG::EVENT_TIME.name, cond::persistency::TAG_LOG::USER_NAME.name, cond::persistency::TAG_LOG::HOST_NAME.name, cond::persistency::TAG_LOG::COMMAND.name, cond::persistency::TAG_LOG::ACTION.name, cond::persistency::TAG_LOG::USER_TEXT.name, InnerLayerVolume.name, personalPlayback.FrameworkJob.name, Grid.name, Grille.name, BackPipe.name, plotscripts.SawTeethFunction.name, PatchPanel.name, BackCoolTank.name, DryAirTube.name, MBCoolTube.name, crabFunctions.CrabTask.name, MBManif.name, cscdqm::ParHistoDef.name, hTMaxCell.name, BeautifulSoup.Tag.name, SummaryOutputProducer::GenericSummary.name, BeautifulSoup.SoupStrainer.name, and python.rootplot.root2matplotlib.replace().

Referenced by dataset.Dataset.dataType().

388  def __getDataType( self ):
389  if self.__predefined:
390  with open(self.__filename) as f:
391  datatype = None
392  for line in f.readlines():
393  if line.startswith("#data type: "):
394  if datatype is not None:
395  raise AllInOneError(self.__filename + " has multiple 'data type' lines.")
396  datatype = line.replace("#data type: ", "").replace("\n","")
397  return datatype
398  return "unknown"
399 
400  dasQuery_type = ( 'dataset dataset=%s instance=%s detail=true | grep dataset.datatype,'
401  'dataset.name'%( self.__name, self.__dasinstance ) )
402  data = self.__getData( dasQuery_type )
403 
404  try:
405  return self.__findInJson(data, ["dataset", "datatype"])
406  except KeyError:
407  print ("Cannot find the datatype of the dataset '%s'\n"
408  "It may not be possible to automatically find the magnetic field,\n"
409  "and you will not be able run in CRAB mode"
410  %( self.name() ))
411  return "unknown"
412 
def __getDataType(self)
Definition: dataset.py:388
def __findInJson(self, jsondict, strings)
Definition: dataset.py:304
def replace(string, replacements)
def __getData(self, dasQuery, dasLimit=0)
Definition: dataset.py:356
def dataset.Dataset.__getFileInfoList (   self,
  dasLimit,
  parent = False 
)
private

Definition at line 561 of file dataset.py.

References dataset.Dataset.__dasinstance, dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__name, dataset.Dataset.__predefined, ElectronMVAID.ElectronMVAID.name, DigiComparisonTask.name, TestTask.name, TPComparisonTask.name, DigiPhase1Task.name, HcalOfflineHarvesting.name, HcalOnlineHarvesting.name, HFRaddamTask.name, LaserTask.name, NoCQTask.name, PedestalTask.name, QIE11Task.name, RecHitTask.name, QIE10Task.name, UMNioTask.name, ZDCTask.name, AlignableObjectId::entry.name, RawTask.name, average.Average.name, counter.Counter.name, TPTask.name, DigiTask.name, LEDTask.name, histograms.Histograms.name, cond::persistency::RUN_INFO::RUN_NUMBER.name, cond::persistency::TAG::NAME.name, TmModule.name, cond::persistency::GTEditorData.name, cond::persistency::GLOBAL_TAG::NAME.name, cond::persistency::RUN_INFO::START_TIME.name, cond::persistency::TAG::TIME_TYPE.name, cond::persistency::GLOBAL_TAG::VALIDITY.name, cond::persistency::GLOBAL_TAG::DESCRIPTION.name, cond::persistency::RUN_INFO::END_TIME.name, core.autovars.NTupleVariable.name, cond::persistency::TAG::OBJECT_TYPE.name, cond::persistency::GLOBAL_TAG::RELEASE.name, cond::persistency::TAG::SYNCHRONIZATION.name, cond::persistency::GLOBAL_TAG::SNAPSHOT_TIME.name, cond::persistency::TAG::END_OF_VALIDITY.name, MEPSet.name, cond::persistency::GLOBAL_TAG::INSERTION_TIME.name, cms::dd::NameValuePair< T >.name, cond::persistency::TAG::DESCRIPTION.name, cond::persistency::TAG::LAST_VALIDATED_TIME.name, cond::persistency::TAG::INSERTION_TIME.name, FWTGeoRecoGeometry::Info.name, nanoaod::MergeableCounterTable::SingleColumn< T >.name, cond::persistency::TAG::MODIFICATION_TIME.name, ParameterSet.name, preexistingValidation.PreexistingValidation.name, OutputMEPSet.name, PixelDCSObject< T >::Item.name, cms::dd::ValuePair< T, U >.name, dataset.BaseDataset.name, personalPlayback.Applet.name, Types._Untracked.name, DQMRivetClient::LumiOption.name, MagCylinder.name, ParSet.name, analyzer.Analyzer.name, cond::persistency::GTProxyData.name, DQMRivetClient::ScaleFactorOption.name, Barrel.name, EgHLTOfflineSummaryClient::SumHistBinData.name, edm::PathTimingSummary.name, cms::DDAlgoArguments.name, SingleObjectCondition.name, cond::TimeTypeSpecs.name, perftools::EdmEventSize::BranchRecord.name, core.autovars.NTupleObjectType.name, MyWatcher.name, edm::PathSummary.name, EcalLogicID.name, DQMGenericClient::EfficOption.name, lumi::TriggerInfo.name, alignment.Alignment.name, XMLProcessor::_loaderBaseConfig.name, nanoaod::MergeableCounterTable::VectorColumn< T >.name, PixelEndcapLinkMaker::Item.name, MEtoEDM< T >::MEtoEDMObject.name, FWTableViewManager::TableEntry.name, PixelBarrelLinkMaker::Item.name, ExpressionHisto< T >.name, Supermodule.name, DQMGenericClient::ProfileOption.name, TreeCrawler.Package.name, cond::persistency::GLOBAL_TAG_MAP::GLOBAL_TAG_NAME.name, genericValidation.GenericValidation.name, cond::persistency::GLOBAL_TAG_MAP::RECORD.name, options.ConnectionHLTMenu.name, DQMGenericClient::NormOption.name, cond::persistency::GLOBAL_TAG_MAP::LABEL.name, cond::persistency::GLOBAL_TAG_MAP::TAG_NAME.name, DQMGenericClient::CDOption.name, dqmoffline::l1t::HistDefinition.name, magneticfield::BaseVolumeHandle.name, FastHFShowerLibrary.name, h4DSegm.name, emtf::Node.name, cms::DDParsingContext::CompositeMaterial.name, DQMGenericClient::NoFlowOption.name, core.TriggerMatchAnalyzer.TriggerMatchAnalyzer.name, cond::persistency::PAYLOAD::HASH.name, cond::persistency::PAYLOAD::OBJECT_TYPE.name, cond::persistency::PAYLOAD::DATA.name, CounterChecker.name, PhysicsTools::Calibration::Variable.name, cond::persistency::PAYLOAD::STREAMER_INFO.name, cond::TagInfo_t.name, cond::persistency::PAYLOAD::VERSION.name, TrackerSectorStruct.name, cond::persistency::PAYLOAD::INSERTION_TIME.name, MuonGeometrySanityCheckPoint.name, FCDTask.name, Mapper::definition< ScannerT >.name, looper.Looper.name, EDMtoMEConverter.name, classes.MonitorData.name, HistogramManager.name, classes.OutputData.name, Crystal.name, cond::persistency::IOV::TAG_NAME.name, cond::persistency::IOV::SINCE.name, cond::persistency::IOV::PAYLOAD_HASH.name, cond::persistency::IOV::INSERTION_TIME.name, h2DSegm.name, options.HLTProcessOptions.name, core.TriggerBitAnalyzer.TriggerBitAnalyzer.name, AlpgenParameterName.name, DQMNet::WaitObject.name, config.Analyzer.name, geometry.Structure.name, core.autovars.NTupleSubObject.name, Capsule.name, core.autovars.NTupleObject.name, Ceramic.name, SiStripMonitorDigi.name, BulkSilicon.name, config.Service.name, nanoaod::FlatTable::Column.name, APD.name, core.autovars.NTupleCollection.name, BPHRecoBuilder::BPHRecoSource.name, BPHRecoBuilder::BPHCompSource.name, cond::persistency::TAG_LOG::TAG_NAME.name, cond::persistency::TAG_LOG::EVENT_TIME.name, cond::persistency::TAG_LOG::USER_NAME.name, cond::persistency::TAG_LOG::HOST_NAME.name, cond::persistency::TAG_LOG::COMMAND.name, cond::persistency::TAG_LOG::ACTION.name, cond::persistency::TAG_LOG::USER_TEXT.name, InnerLayerVolume.name, personalPlayback.FrameworkJob.name, Grid.name, Grille.name, BackPipe.name, plotscripts.SawTeethFunction.name, PatchPanel.name, BackCoolTank.name, DryAirTube.name, MBCoolTube.name, crabFunctions.CrabTask.name, MBManif.name, cscdqm::ParHistoDef.name, hTMaxCell.name, BeautifulSoup.Tag.name, SummaryOutputProducer::GenericSummary.name, BeautifulSoup.SoupStrainer.name, dataset.Dataset.parentDataset(), and edm.print().

Referenced by dataset.Dataset.fileInfoList().

561  def __getFileInfoList( self, dasLimit, parent = False ):
562  if self.__predefined:
563  if parent:
564  extendstring = "secFiles.extend"
565  else:
566  extendstring = "readFiles.extend"
567  with open(self.__fileName) as f:
568  files = []
569  copy = False
570  for line in f.readlines():
571  if "]" in line:
572  copy = False
573  if copy:
574  files.append({name: line.translate(None, "', " + '"')})
575  if extendstring in line and "[" in line and "]" not in line:
576  copy = True
577  return files
578 
579  if parent:
580  searchdataset = self.parentDataset()
581  else:
582  searchdataset = self.__name
583  dasQuery_files = ( 'file dataset=%s instance=%s detail=true | grep file.name, file.nevents, '
584  'file.creation_time, '
585  'file.modification_time'%( searchdataset, self.__dasinstance ) )
586  print("Requesting file information for '%s' from DAS..."%( searchdataset ), end=' ')
587  sys.stdout.flush()
588  data = self.__getData( dasQuery_files, dasLimit )
589  print("Done.")
590  data = [ self.__findInJson(entry,"file") for entry in data ]
591  if len( data ) == 0:
592  msg = ("No files are available for the dataset '%s'. This can be "
593  "due to a typo or due to a DAS problem. Please check the "
594  "spelling of the dataset and/or retry to run "
595  "'validateAlignments.py'."%( self.name() ))
596  raise AllInOneError( msg )
597  fileInformationList = []
598  for file in data:
599  fileName = 'unknown'
600  try:
601  fileName = self.__findInJson(file, "name")
602  fileCreationTime = self.__findInJson(file, "creation_time")
603  fileNEvents = self.__findInJson(file, "nevents")
604  except KeyError:
605  print(("DAS query gives bad output for file '%s'. Skipping it.\n"
606  "It may work if you try again later.") % fileName)
607  fileNEvents = 0
608  # select only non-empty files
609  if fileNEvents == 0:
610  continue
611  fileDict = { "name": fileName,
612  "creation_time": fileCreationTime,
613  "nevents": fileNEvents
614  }
615  fileInformationList.append( fileDict )
616  fileInformationList.sort( key=lambda info: self.__findInJson(info,"name") )
617  return fileInformationList
618 
def __getFileInfoList(self, dasLimit, parent=False)
Definition: dataset.py:561
def __findInJson(self, jsondict, strings)
Definition: dataset.py:304
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
def parentDataset(self)
Definition: dataset.py:723
def __getData(self, dasQuery, dasLimit=0)
Definition: dataset.py:356
def dataset.Dataset.__getMagneticField (   self)
private

Definition at line 423 of file dataset.py.

References dataset.Dataset.__cmssw, dataset.Dataset.__cmsswrelease, dataset.Dataset.__dasinstance, dataset.Dataset.__dataType, dataset.Dataset.__filename, dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__inputMagneticField, dataset.Dataset.__name, dataset.Dataset.__predefined, edm.print(), python.rootplot.root2matplotlib.replace(), and digitizers_cfi.strip.

Referenced by dataset.Dataset.magneticField().

423  def __getMagneticField( self ):
424  Bfieldlocation = os.path.join( self.__cmssw, "python", "Configuration", "StandardSequences" )
425  if not os.path.isdir(Bfieldlocation):
426  Bfieldlocation = os.path.join( self.__cmsswrelease, "python", "Configuration", "StandardSequences" )
427  Bfieldlist = [ f.replace("_cff.py",'') \
428  for f in os.listdir(Bfieldlocation) \
429  if f.startswith("MagneticField_") and f.endswith("_cff.py") ]
430  Bfieldlist.sort( key = lambda Bfield: -len(Bfield) ) #Put it in order of decreasing length, so that searching in the name gives the longer match
431 
432  if self.__inputMagneticField is not None:
433  if self.__inputMagneticField == 3.8:
434  return "MagneticField"
435  elif self.__inputMagneticField == 0:
436  return "MagneticField_0T"
437  else:
438  raise ValueError("Unknown input magnetic field {}".format(self.__inputMagneticField))
439 
440  if self.__predefined:
441  with open(self.__filename) as f:
442  datatype = None
443  Bfield = None
444  for line in f.readlines():
445  if line.startswith("#data type: "):
446  if datatype is not None:
447  raise AllInOneError(self.__filename + " has multiple 'data type' lines.")
448  datatype = line.replace("#data type: ", "").replace("\n","")
449  datatype = datatype.split("#")[0].strip()
450  if line.startswith("#magnetic field: "):
451  if Bfield is not None:
452  raise AllInOneError(self.__filename + " has multiple 'magnetic field' lines.")
453  Bfield = line.replace("#magnetic field: ", "").replace("\n","")
454  Bfield = Bfield.split("#")[0].strip()
455  if Bfield is not None:
456  Bfield = Bfield.split(",")[0]
457  if Bfield in Bfieldlist or Bfield == "unknown":
458  return Bfield
459  else:
460  print("Your dataset has magnetic field '%s', which does not exist in your CMSSW version!" % Bfield)
461  print("Using Bfield='unknown' - this will revert to the default")
462  return "unknown"
463  elif datatype == "data":
464  return "MagneticField" #this should be in the "#magnetic field" line, but for safety in case it got messed up
465  else:
466  return "unknown"
467 
468  if self.__dataType == "data":
469  return "MagneticField"
470 
471  #try to find the magnetic field from DAS
472  #it seems to be there for the newer (7X) MC samples, except cosmics
473  dasQuery_B = ('dataset dataset=%s instance=%s'%(self.__name, self.__dasinstance))
474  data = self.__getData( dasQuery_B )
475 
476  try:
477  Bfield = self.__findInJson(data, ["dataset", "mcm", "sequences", "magField"])
478  if Bfield in Bfieldlist:
479  return Bfield
480  elif Bfield == "38T" or Bfield == "38T_PostLS1":
481  return "MagneticField"
482  elif "MagneticField_" + Bfield in Bfieldlist:
483  return "MagneticField_" + Bfield
484  elif Bfield == "":
485  pass
486  else:
487  print("Your dataset has magnetic field '%s', which does not exist in your CMSSW version!" % Bfield)
488  print("Using Bfield='unknown' - this will revert to the default magnetic field")
489  return "unknown"
490  except KeyError:
491  pass
492 
493  for possibleB in Bfieldlist:
494  if (possibleB != "MagneticField"
495  and possibleB.replace("MagneticField_","") in self.__name.replace("TkAlCosmics0T", "")):
496  #final attempt - try to identify the dataset from the name
497  #all cosmics dataset names contain "TkAlCosmics0T"
498  if possibleB == "MagneticField_38T" or possibleB == "MagneticField_38T_PostLS1":
499  return "MagneticField"
500  return possibleB
501 
502  return "unknown"
503 
def __findInJson(self, jsondict, strings)
Definition: dataset.py:304
def replace(string, replacements)
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
def __getMagneticField(self)
Definition: dataset.py:423
def __getData(self, dasQuery, dasLimit=0)
Definition: dataset.py:356
def dataset.Dataset.__getMagneticFieldForRun (   self,
  run = -1,
  tolerance = 0.5 
)
private
For MC, this returns the same as the previous function.
   For data, it gets the magnetic field from the runs.  This is important for
   deciding which template to use for offlinevalidation

Definition at line 504 of file dataset.py.

References dataset.Dataset.__dasinstance, dataset.Dataset.__dataType, dataset.Dataset.__filename, dataset.Dataset.__findInJson(), dataset.Dataset.__firstusedrun, dataset.Dataset.__getData(), dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.__inputMagneticField, dataset.Dataset.__lastusedrun, dataset.Dataset.__magneticField, dataset.Dataset.__name, dataset.Dataset.__predefined, funct.abs(), dqmMemoryStats.float, edm.print(), python.rootplot.root2matplotlib.replace(), cms::dd.split(), and digitizers_cfi.strip.

Referenced by dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.dump_cff(), and dataset.Dataset.magneticFieldForRun().

504  def __getMagneticFieldForRun( self, run = -1, tolerance = 0.5 ):
505  """For MC, this returns the same as the previous function.
506  For data, it gets the magnetic field from the runs. This is important for
507  deciding which template to use for offlinevalidation
508  """
509  if self.__dataType == "mc" and self.__magneticField == "MagneticField":
510  return 3.8 #For 3.8T MC the default MagneticField is used
511  if self.__inputMagneticField is not None:
512  return self.__inputMagneticField
513  if "T" in self.__magneticField:
514  Bfield = self.__magneticField.split("T")[0].replace("MagneticField_","")
515  try:
516  return float(Bfield) / 10.0 #e.g. 38T and 38T_PostLS1 both return 3.8
517  except ValueError:
518  pass
519  if self.__predefined:
520  with open(self.__filename) as f:
521  Bfield = None
522  for line in f.readlines():
523  if line.startswith("#magnetic field: ") and "," in line:
524  if Bfield is not None:
525  raise AllInOneError(self.__filename + " has multiple 'magnetic field' lines.")
526  return float(line.replace("#magnetic field: ", "").split(",")[1].split("#")[0].strip())
527 
528  if run > 0:
529  dasQuery = ('run=%s instance=%s detail=true'%(run, self.__dasinstance)) #for data
530  data = self.__getData(dasQuery)
531  try:
532  return self.__findInJson(data, ["run","bfield"])
533  except KeyError:
534  return "unknown Can't get the magnetic field for run %s from DAS" % run
535 
536  #run < 0 - find B field for the first and last runs, and make sure they're compatible
537  # (to within tolerance)
538  #NOT FOOLPROOF! The magnetic field might go up and then down, or vice versa
539  if self.__firstusedrun is None or self.__lastusedrun is None:
540  return "unknown Can't get the exact magnetic field for the dataset until data has been retrieved from DAS."
541  firstrunB = self.__getMagneticFieldForRun(self.__firstusedrun)
542  lastrunB = self.__getMagneticFieldForRun(self.__lastusedrun)
543  try:
544  if abs(firstrunB - lastrunB) <= tolerance:
545  return .5*(firstrunB + lastrunB)
546  print(firstrunB, lastrunB, tolerance)
547  return ("unknown The beginning and end of your run range for %s\n"
548  "have different magnetic fields (%s, %s)!\n"
549  "Try limiting the run range using firstRun, lastRun, begin, end, or JSON,\n"
550  "or increasing the tolerance (in dataset.py) from %s.") % (self.__name, firstrunB, lastrunB, tolerance)
551  except TypeError:
552  try:
553  if "unknown" in firstrunB:
554  return firstrunB
555  else:
556  return lastrunB
557  except TypeError:
558  return lastrunB
559 
std::vector< std::string_view > split(std::string_view, const char *)
def __getMagneticFieldForRun(self, run=-1, tolerance=0.5)
Definition: dataset.py:504
def __findInJson(self, jsondict, strings)
Definition: dataset.py:304
def replace(string, replacements)
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
def __getData(self, dasQuery, dasLimit=0)
Definition: dataset.py:356
def dataset.Dataset.__getParentDataset (   self)
private

Definition at line 413 of file dataset.py.

References dataset.Dataset.__dasinstance, dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__name, and str.

Referenced by dataset.Dataset.parentDataset().

413  def __getParentDataset( self ):
414  dasQuery = "parent dataset=" + self.__name + " instance="+self.__dasinstance
415  data = self.__getData( dasQuery )
416  try:
417  return self.__findInJson(data, ["parent", "name"])
418  except KeyError:
419  raise AllInOneError("Cannot find the parent of the dataset '" + self.__name + "'\n"
420  "Here is the DAS output:\n" + str(jsondict) +
421  "\nIt's possible that this was a server error. If so, it may work if you try again later")
422 
def __findInJson(self, jsondict, strings)
Definition: dataset.py:304
def __getParentDataset(self)
Definition: dataset.py:413
#define str(s)
def __getData(self, dasQuery, dasLimit=0)
Definition: dataset.py:356
def dataset.Dataset.__getRunList (   self)
private

Definition at line 620 of file dataset.py.

References dataset.Dataset.__dasinstance, dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__name, and edm.print().

Referenced by dataset.Dataset.__lumiSelectionSnippet(), dataset.Dataset.convertTimeToRun(), and dataset.Dataset.runList().

620  def __getRunList( self ):
621  dasQuery_runs = ( 'run dataset=%s instance=%s | grep run.run_number,'
622  'run.creation_time'%( self.__name, self.__dasinstance ) )
623  print("Requesting run information for '%s' from DAS..."%( self.__name ), end=' ')
624  sys.stdout.flush()
625  data = self.__getData( dasQuery_runs )
626  print("Done.")
627  data = [ self.__findInJson(entry,"run") for entry in data ]
628  data.sort( key = lambda run: self.__findInJson(run, "run_number") )
629  return data
630 
def __getRunList(self)
Definition: dataset.py:620
def __findInJson(self, jsondict, strings)
Definition: dataset.py:304
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
def __getData(self, dasQuery, dasLimit=0)
Definition: dataset.py:356
def dataset.Dataset.__lumiSelectionSnippet (   self,
  jsonPath = None,
  firstRun = None,
  lastRun = None 
)
private

Definition at line 125 of file dataset.py.

References dataset.Dataset.__chunks(), dataset.Dataset.__findInJson(), dataset.Dataset.__firstusedrun, dataset.Dataset.__getRunList(), dataset.Dataset.__inputMagneticField, dataset.Dataset.__lastusedrun, dataset.Dataset.getForceRunRangeFunction(), dataset.int, join(), list(), SiStripPI.max, min(), edm.print(), python.rootplot.root2matplotlib.replace(), cms::dd.split(), and str.

Referenced by dataset.Dataset.__createSnippet().

125  def __lumiSelectionSnippet( self, jsonPath = None, firstRun = None, lastRun = None ):
126  lumiSecExtend = ""
127  if firstRun or lastRun or jsonPath:
128  if not jsonPath:
129  selectedRunList = self.__getRunList()
130  if firstRun:
131  selectedRunList = [ run for run in selectedRunList \
132  if self.__findInJson(run, "run_number") >= firstRun ]
133  if lastRun:
134  selectedRunList = [ run for run in selectedRunList \
135  if self.__findInJson(run, "run_number") <= lastRun ]
136  lumiList = [ str( self.__findInJson(run, "run_number") ) + ":1-" \
137  + str( self.__findInJson(run, "run_number") ) + ":max" \
138  for run in selectedRunList ]
139  splitLumiList = list( self.__chunks( lumiList, 255 ) )
140  else:
141  theLumiList = None
142  try:
143  theLumiList = LumiList ( filename = jsonPath )
144  except ValueError:
145  pass
146 
147  if theLumiList is not None:
148  allRuns = theLumiList.getRuns()
149  runsToRemove = []
150  for run in allRuns:
151  if firstRun and int( run ) < firstRun:
152  runsToRemove.append( run )
153  if lastRun and int( run ) > lastRun:
154  runsToRemove.append( run )
155  theLumiList.removeRuns( runsToRemove )
156  splitLumiList = list( self.__chunks(
157  theLumiList.getCMSSWString().split(','), 255 ) )
158  if not (splitLumiList and splitLumiList[0] and splitLumiList[0][0]):
159  splitLumiList = None
160  else:
161  with open(jsonPath) as f:
162  jsoncontents = f.read()
163  if "process.source.lumisToProcess" in jsoncontents:
164  msg = "%s is not a json file, but it seems to be a CMSSW lumi selection cff snippet. Trying to use it" % jsonPath
165  if firstRun or lastRun:
166  msg += ("\n (after applying firstRun and/or lastRun)")
167  msg += ".\nPlease note that, depending on the format of this file, it may not work as expected."
168  msg += "\nCheck your config file to make sure that it worked properly."
169  print(msg)
170 
171  runlist = self.__getRunList()
172  if firstRun or lastRun:
173  self.__firstusedrun = -1
174  self.__lastusedrun = -1
175  jsoncontents = re.sub(r"\d+:(\d+|max)(-\d+:(\d+|max))?", self.getForceRunRangeFunction(firstRun, lastRun), jsoncontents)
176  jsoncontents = (jsoncontents.replace("'',\n","").replace("''\n","")
177  .replace('"",\n','').replace('""\n',''))
178  self.__firstusedrun = max(self.__firstusedrun, int(self.__findInJson(runlist[0],"run_number")))
179  self.__lastusedrun = min(self.__lastusedrun, int(self.__findInJson(runlist[-1],"run_number")))
180  if self.__lastusedrun < self.__firstusedrun:
181  jsoncontents = None
182  else:
183  self.__firstusedrun = int(self.__findInJson(runlist[0],"run_number"))
184  self.__lastusedrun = int(self.__findInJson(runlist[-1],"run_number"))
185  lumiSecExtend = jsoncontents
186  splitLumiList = None
187  else:
188  raise AllInOneError("%s is not a valid json file!" % jsonPath)
189 
190  if splitLumiList and splitLumiList[0] and splitLumiList[0][0]:
191  lumiSecStr = [ "',\n'".join( lumis ) \
192  for lumis in splitLumiList ]
193  lumiSecStr = [ "lumiSecs.extend( [\n'" + lumis + "'\n] )" \
194  for lumis in lumiSecStr ]
195  lumiSecExtend = "\n".join( lumiSecStr )
196  runlist = self.__getRunList()
197  self.__firstusedrun = max(int(splitLumiList[0][0].split(":")[0]), int(self.__findInJson(runlist[0],"run_number")))
198  self.__lastusedrun = min(int(splitLumiList[-1][-1].split(":")[0]), int(self.__findInJson(runlist[-1],"run_number")))
199  elif lumiSecExtend:
200  pass
201  else:
202  msg = "You are trying to run a validation without any runs! Check that:"
203  if firstRun or lastRun:
204  msg += "\n - firstRun/begin and lastRun/end are correct for this dataset, and there are runs in between containing data"
205  if jsonPath:
206  msg += "\n - your JSON file is correct for this dataset, and the runs contain data"
207  if (firstRun or lastRun) and jsonPath:
208  msg += "\n - firstRun/begin and lastRun/end are consistent with your JSON file"
209  raise AllInOneError(msg)
210 
211  else:
212  if self.__inputMagneticField is not None:
213  pass #never need self.__firstusedrun or self.__lastusedrun
214  else:
215  runlist = self.__getRunList()
216  self.__firstusedrun = int(self.__findInJson(self.__getRunList()[0],"run_number"))
217  self.__lastusedrun = int(self.__findInJson(self.__getRunList()[-1],"run_number"))
218 
219  return lumiSecExtend
220 
def __getRunList(self)
Definition: dataset.py:620
std::vector< std::string_view > split(std::string_view, const char *)
def __lumiSelectionSnippet(self, jsonPath=None, firstRun=None, lastRun=None)
Definition: dataset.py:125
def __findInJson(self, jsondict, strings)
Definition: dataset.py:304
def replace(string, replacements)
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
T min(T a, T b)
Definition: MathUtil.h:58
def getForceRunRangeFunction(self, firstRun, lastRun)
Definition: dataset.py:351
static std::string join(char **cmd)
Definition: RemoteFile.cc:17
def __chunks(self, theList, n)
Definition: dataset.py:89
#define str(s)
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run
def dataset.Dataset.buildListOfBadFiles (   self)
fills the list of bad files from the IntegrityCheck log.

When the integrity check file is not available,
files are considered as good.

Definition at line 279 of file dataset.py.

280  '''fills the list of bad files from the IntegrityCheck log.
281 
282  When the integrity check file is not available,
283  files are considered as good.'''
284  mask = "IntegrityCheck"
285 
286  self.bad_files = {}
287  self.good_files = []
288 
289  file_mask = castortools.matchingFiles(self.castorDir, '^%s_.*\.txt$' % mask)
290  if file_mask:
291  # here to avoid circular dependency
292  from .edmIntegrityCheck import PublishToFileSystem
293  p = PublishToFileSystem(mask)
294  report = p.get(self.castorDir)
295  if report is not None and report:
296  self.maskExists = True
297  self.report = report
298  dup = report.get('ValidDuplicates',{})
299  for name, status in six.iteritems(report['Files']):
300  # print name, status
301  if not status[0]:
302  self.bad_files[name] = 'MarkedBad'
303  elif name in dup:
304  self.bad_files[name] = 'ValidDup'
305  else:
306  self.good_files.append( name )
307  else:
308  raise IntegrityCheckError( "ERROR: IntegrityCheck log file IntegrityCheck_XXXXXXXXXX.txt not found" )
309 
def buildListOfBadFiles(self)
Definition: dataset.py:279
def dataset.Dataset.buildListOfFiles (   self,
  pattern = '.*root' 
)
fills list of files, taking all root files matching the pattern in the castor dir

Definition at line 275 of file dataset.py.

275  def buildListOfFiles(self, pattern='.*root'):
276  '''fills list of files, taking all root files matching the pattern in the castor dir'''
277  self.files = castortools.matchingFiles( self.castorDir, pattern )
278 
def buildListOfFiles(self, pattern='.*root')
Definition: dataset.py:275
def dataset.Dataset.convertTimeToRun (   self,
  begin = None,
  end = None,
  firstRun = None,
  lastRun = None,
  shortTuple = True 
)

Definition at line 645 of file dataset.py.

References dataset.Dataset.__dasinstance, dataset.Dataset.__dateString(), dataset.Dataset.__datetime(), dataset.Dataset.__find_ge(), dataset.Dataset.__find_lt(), dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__getRunList(), dataset.Dataset.__name, electrons_cff.bool, and dataset.int.

Referenced by dataset.Dataset.__createSnippet(), and dataset.Dataset.__dateString().

645  shortTuple = True ):
646  if ( begin and firstRun ) or ( end and lastRun ):
647  msg = ( "The Usage of "
648  + "'begin' & 'firstRun' " * int( bool( begin and
649  firstRun ) )
650  + "and " * int( bool( ( begin and firstRun ) and
651  ( end and lastRun ) ) )
652  + "'end' & 'lastRun' " * int( bool( end and lastRun ) )
653  + "is ambigous." )
654  raise AllInOneError( msg )
655 
656  if begin or end:
657  runList = [ self.__findInJson(run, "run_number") for run in self.__getRunList() ]
658 
659  if begin:
660  lastdate = begin
661  for delta in [ 1, 5, 10, 20, 30 ]: #try searching for about 2 months after begin
662  firstdate = lastdate
663  lastdate = self.__dateString(self.__datetime(firstdate) + datetime.timedelta(delta))
664  dasQuery_begin = "run date between[%s,%s] instance=%s" % (firstdate, lastdate, self.__dasinstance)
665  begindata = self.__getData(dasQuery_begin)
666  if len(begindata) > 0:
667  begindata.sort(key = lambda run: self.__findInJson(run, ["run", "run_number"]))
668  try:
669  runIndex = self.__find_ge( runList, self.__findInJson(begindata[0], ["run", "run_number"]))
670  except ValueError:
671  msg = ( "Your 'begin' is after the creation time of the last "
672  "run in the dataset\n'%s'"%( self.__name ) )
673  raise AllInOneError( msg )
674  firstRun = runList[runIndex]
675  begin = None
676  break
677 
678  if begin:
679  raise AllInOneError("No runs within a reasonable time interval after your 'begin'."
680  "Try using a 'begin' that has runs soon after it (within 2 months at most)")
681 
682  if end:
683  firstdate = end
684  for delta in [ 1, 5, 10, 20, 30 ]: #try searching for about 2 months before end
685  lastdate = firstdate
686  firstdate = self.__dateString(self.__datetime(lastdate) - datetime.timedelta(delta))
687  dasQuery_end = "run date between[%s,%s] instance=%s" % (firstdate, lastdate, self.__dasinstance)
688  enddata = self.__getData(dasQuery_end)
689  if len(enddata) > 0:
690  enddata.sort(key = lambda run: self.__findInJson(run, ["run", "run_number"]))
691  try:
692  runIndex = self.__find_lt( runList, self.__findInJson(enddata[-1], ["run", "run_number"]))
693  except ValueError:
694  msg = ( "Your 'end' is before the creation time of the first "
695  "run in the dataset\n'%s'"%( self.__name ) )
696  raise AllInOneError( msg )
697  lastRun = runList[runIndex]
698  end = None
699  break
700 
701  if end:
702  raise AllInOneError("No runs within a reasonable time interval before your 'end'."
703  "Try using an 'end' that has runs soon before it (within 2 months at most)")
704 
705  if shortTuple:
706  return firstRun, lastRun
707  else:
708  return begin, end, firstRun, lastRun
709 
def __getRunList(self)
Definition: dataset.py:620
def __findInJson(self, jsondict, strings)
Definition: dataset.py:304
def __find_lt(self, a, x)
Definition: dataset.py:290
def __datetime(self, stringForDas)
Definition: dataset.py:631
def __dateString(self, date)
Definition: dataset.py:640
def __find_ge(self, a, x)
Definition: dataset.py:297
def __getData(self, dasQuery, dasLimit=0)
Definition: dataset.py:356
def dataset.Dataset.createdatasetfile_hippy (   self,
  filename,
  filesperjob,
  firstrun,
  lastrun 
)

Definition at line 852 of file dataset.py.

References dataset.Dataset.__chunks(), dataset.Dataset.fileList(), and join().

852  def createdatasetfile_hippy(self, filename, filesperjob, firstrun, lastrun):
853  with open(filename, "w") as f:
854  for job in self.__chunks(self.fileList(firstRun=firstrun, lastRun=lastrun, forcerunselection=True), filesperjob):
855  f.write(",".join("'{}'".format(file) for file in job)+"\n")
856 
def createdatasetfile_hippy(self, filename, filesperjob, firstrun, lastrun)
Definition: dataset.py:852
static std::string join(char **cmd)
Definition: RemoteFile.cc:17
def __chunks(self, theList, n)
Definition: dataset.py:89
def fileList(self, parent=False, firstRun=None, lastRun=None, forcerunselection=False)
Definition: dataset.py:885
def dataset.Dataset.datasetSnippet (   self,
  jsonPath = None,
  begin = None,
  end = None,
  firstRun = None,
  lastRun = None,
  crab = False,
  parent = False 
)

Definition at line 729 of file dataset.py.

References dataset.Dataset.__createSnippet(), dataset.Dataset.__filename, dataset.Dataset.__name, dataset.Dataset.__official, dataset.Dataset.__origName, dataset.Dataset.__predefined, dataset.Dataset.dump_cff(), and edm.print().

Referenced by dataset.Dataset.parentDataset().

729  firstRun = None, lastRun = None, crab = False, parent = False ):
730  if not firstRun: firstRun = None
731  if not lastRun: lastRun = None
732  if not begin: begin = None
733  if not end: end = None
734  if self.__predefined and (jsonPath or begin or end or firstRun or lastRun):
735  msg = ( "The parameters 'JSON', 'begin', 'end', 'firstRun', and 'lastRun' "
736  "only work for official datasets, not predefined _cff.py files" )
737  raise AllInOneError( msg )
738  if self.__predefined and parent:
739  with open(self.__filename) as f:
740  if "secFiles.extend" not in f.read():
741  msg = ("The predefined dataset '%s' does not contain secondary files, "
742  "which your validation requires!") % self.__name
743  if self.__official:
744  self.__name = self.__origName
745  self.__predefined = False
746  print(msg)
747  print ("Retreiving the files from DAS. You will be asked if you want "
748  "to overwrite the old dataset.\n"
749  "It will still be compatible with validations that don't need secondary files.")
750  else:
751  raise AllInOneError(msg)
752 
753  if self.__predefined:
754  snippet = ("process.load(\"Alignment.OfflineValidation.%s_cff\")\n"
755  "process.maxEvents = cms.untracked.PSet(\n"
756  " input = cms.untracked.int32(.oO[nEvents]Oo. / .oO[parallelJobs]Oo.)\n"
757  ")\n"
758  "process.source.skipEvents=cms.untracked.uint32(.oO[nIndex]Oo.*.oO[nEvents]Oo./.oO[parallelJobs]Oo.)"
759  %(self.__name))
760  if not parent:
761  with open(self.__filename) as f:
762  if "secFiles.extend" in f.read():
763  snippet += "\nprocess.source.secondaryFileNames = cms.untracked.vstring()"
764  return snippet
765  theMap = { "process": "process.",
766  "tab": " " * len( "process." ),
767  "nEvents": ".oO[nEvents]Oo. / .oO[parallelJobs]Oo.",
768  "skipEventsString": "process.source.skipEvents=cms.untracked.uint32(.oO[nIndex]Oo.*.oO[nEvents]Oo./.oO[parallelJobs]Oo.)\n",
769  "importCms": "",
770  "header": ""
771  }
772  datasetSnippet = self.__createSnippet( jsonPath = jsonPath,
773  begin = begin,
774  end = end,
775  firstRun = firstRun,
776  lastRun = lastRun,
777  repMap = theMap,
778  crab = crab,
779  parent = parent )
780  if jsonPath == "" and begin == "" and end == "" and firstRun == "" and lastRun == "":
781  try:
782  self.dump_cff(parent = parent)
783  except AllInOneError as e:
784  print("Can't store the dataset as a cff:")
785  print(e)
786  print("This may be inconvenient in the future, but will not cause a problem for this validation.")
787  return datasetSnippet
788 
def __createSnippet(self, jsonPath=None, begin=None, end=None, firstRun=None, lastRun=None, repMap=None, crab=False, parent=False)
Definition: dataset.py:245
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
def dump_cff(self, outName=None, jsonPath=None, begin=None, end=None, firstRun=None, lastRun=None, parent=False)
Definition: dataset.py:791
def dataset.Dataset.dataType (   self)

Definition at line 710 of file dataset.py.

References dataset.Dataset.__dataType, and dataset.Dataset.__getDataType().

710  def dataType( self ):
711  if not self.__dataType:
712  self.__dataType = self.__getDataType()
713  return self.__dataType
714 
def __getDataType(self)
Definition: dataset.py:388
def dataType(self)
Definition: dataset.py:710
def dataset.Dataset.dump_cff (   self,
  outName = None,
  jsonPath = None,
  begin = None,
  end = None,
  firstRun = None,
  lastRun = None,
  parent = False 
)

Definition at line 791 of file dataset.py.

References dataset.Dataset.__cmssw, dataset.Dataset.__createSnippet(), dataset.Dataset.__dataType, dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.__magneticField, dataset.Dataset.__name, edm.print(), python.rootplot.root2matplotlib.replace(), cms::dd.split(), str, and digitizers_cfi.strip.

Referenced by dataset.Dataset.datasetSnippet().

791  end = None, firstRun = None, lastRun = None, parent = False ):
792  if outName == None:
793  outName = "Dataset" + self.__name.replace("/", "_")
794  packageName = os.path.join( "Alignment", "OfflineValidation" )
795  if not os.path.exists( os.path.join(
796  self.__cmssw, "src", packageName ) ):
797  msg = ("You try to store the predefined dataset'%s'.\n"
798  "For that you need to check out the package '%s' to your "
799  "private relase area in\n"%( outName, packageName )
800  + self.__cmssw )
801  raise AllInOneError( msg )
802  theMap = { "process": "",
803  "tab": "",
804  "nEvents": str( -1 ),
805  "skipEventsString": "",
806  "importCms": "import FWCore.ParameterSet.Config as cms\n",
807  "header": "#Do not delete or (unless you know what you're doing) change these comments\n"
808  "#%(name)s\n"
809  "#data type: %(dataType)s\n"
810  "#magnetic field: .oO[magneticField]Oo.\n" #put in magnetic field later
811  %{"name": self.__name, #need to create the snippet before getting the magnetic field
812  "dataType": self.__dataType} #so that we know the first and last runs
813  }
814  dataset_cff = self.__createSnippet( jsonPath = jsonPath,
815  begin = begin,
816  end = end,
817  firstRun = firstRun,
818  lastRun = lastRun,
819  repMap = theMap,
820  parent = parent)
821  magneticField = self.__magneticField
822  if magneticField == "MagneticField":
823  magneticField = "%s, %s #%s" % (magneticField,
824  str(self.__getMagneticFieldForRun()).replace("\n"," ").split("#")[0].strip(),
825  "Use MagneticField_cff.py; the number is for determining which track selection to use."
826  )
827  dataset_cff = dataset_cff.replace(".oO[magneticField]Oo.",magneticField)
828  filePath = os.path.join( self.__cmssw, "src", packageName,
829  "python", outName + "_cff.py" )
830  if os.path.exists( filePath ):
831  existMsg = "The predefined dataset '%s' already exists.\n"%( outName )
832  askString = "Do you want to overwrite it? [y/n]\n"
833  inputQuery = existMsg + askString
834  while True:
835  userInput = raw_input( inputQuery ).lower()
836  if userInput == "y":
837  break
838  elif userInput == "n":
839  return
840  else:
841  inputQuery = askString
842  print ( "The predefined dataset '%s' will be stored in the file\n"
843  %( outName )
844  + filePath +
845  "\nFor future use you have to do 'scram b'." )
846  print()
847  theFile = open( filePath, "w" )
848  theFile.write( dataset_cff )
849  theFile.close()
850  return
851 
std::vector< std::string_view > split(std::string_view, const char *)
def __getMagneticFieldForRun(self, run=-1, tolerance=0.5)
Definition: dataset.py:504
def __createSnippet(self, jsonPath=None, begin=None, end=None, firstRun=None, lastRun=None, repMap=None, crab=False, parent=False)
Definition: dataset.py:245
def replace(string, replacements)
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
#define str(s)
def dataset.Dataset.extractFileSizes (   self)
Get the file size for each file, from the eos ls -l command.

Definition at line 310 of file dataset.py.

References dataset.EOSDataset.castorDir, and dataset.Dataset.castorDir.

310  def extractFileSizes(self):
311  '''Get the file size for each file, from the eos ls -l command.'''
312  # EOS command does not work in tier3
313  lsout = castortools.runXRDCommand(self.castorDir,'dirlist')[0]
314  lsout = lsout.split('\n')
315  self.filesAndSizes = {}
316  for entry in lsout:
317  values = entry.split()
318  if( len(values) != 5):
319  continue
320  # using full abs path as a key.
321  file = '/'.join([self.lfnDir, values[4].split("/")[-1]])
322  size = values[1]
323  self.filesAndSizes[file] = size
324 
std::vector< std::string_view > split(std::string_view, const char *)
def extractFileSizes(self)
Definition: dataset.py:310
static std::string join(char **cmd)
Definition: RemoteFile.cc:17
def dataset.Dataset.fileInfoList (   self,
  parent = False 
)

Definition at line 914 of file dataset.py.

References dataset.Dataset.__dasLimit, and dataset.Dataset.__getFileInfoList().

Referenced by dataset.Dataset.fileList().

914  def fileInfoList( self, parent = False ):
915  return self.__getFileInfoList( self.__dasLimit, parent )
916 
def __getFileInfoList(self, dasLimit, parent=False)
Definition: dataset.py:561
def fileInfoList(self, parent=False)
Definition: dataset.py:914
def dataset.Dataset.fileList (   self,
  parent = False,
  firstRun = None,
  lastRun = None,
  forcerunselection = False 
)

Definition at line 885 of file dataset.py.

References dataset.Dataset.__findInJson(), dataset.Dataset.fileInfoList(), dqmMemoryStats.float, dataset.Dataset.getrunnumberfromfilename(), and edm.print().

Referenced by dataset.Dataset.__fileListSnippet(), and dataset.Dataset.createdatasetfile_hippy().

885  def fileList(self, parent=False, firstRun=None, lastRun=None, forcerunselection=False):
886  fileList = [ self.__findInJson(fileInfo,"name")
887  for fileInfo in self.fileInfoList(parent) ]
888 
889  if firstRun or lastRun:
890  if not firstRun: firstRun = -1
891  if not lastRun: lastRun = float('infinity')
892  unknownfilenames, reasons = [], set()
893  for filename in fileList[:]:
894  try:
895  if not firstRun <= self.getrunnumberfromfilename(filename) <= lastRun:
896  fileList.remove(filename)
897  except AllInOneError as e:
898  if forcerunselection: raise
899  unknownfilenames.append(e.message.split("\n")[1])
900  reasons .add (e.message.split("\n")[2])
901  if reasons:
902  if len(unknownfilenames) == len(fileList):
903  print("Could not figure out the run numbers of any of the filenames for the following reason(s):")
904  else:
905  print("Could not figure out the run numbers of the following filenames:")
906  for filename in unknownfilenames:
907  print(" "+filename)
908  print("for the following reason(s):")
909  for reason in reasons:
910  print(" "+reason)
911  print("Using the files anyway. The runs will be filtered at the CMSSW level.")
912  return fileList
913 
def __findInJson(self, jsondict, strings)
Definition: dataset.py:304
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
def fileInfoList(self, parent=False)
Definition: dataset.py:914
def getrunnumberfromfilename(filename)
Definition: dataset.py:858
def fileList(self, parent=False, firstRun=None, lastRun=None, forcerunselection=False)
Definition: dataset.py:885
def dataset.Dataset.forcerunrange (   self,
  firstRun,
  lastRun,
  s 
)
s must be in the format run1:lum1-run2:lum2

Definition at line 326 of file dataset.py.

References dataset.Dataset.__firstusedrun, dataset.Dataset.__lastusedrun, dataset.int, and cms::dd.split().

Referenced by dataset.Dataset.getForceRunRangeFunction().

326  def forcerunrange(self, firstRun, lastRun, s):
327  """s must be in the format run1:lum1-run2:lum2"""
328  s = s.group()
329  run1 = s.split("-")[0].split(":")[0]
330  lum1 = s.split("-")[0].split(":")[1]
331  try:
332  run2 = s.split("-")[1].split(":")[0]
333  lum2 = s.split("-")[1].split(":")[1]
334  except IndexError:
335  run2 = run1
336  lum2 = lum1
337  if int(run2) < firstRun or int(run1) > lastRun:
338  return ""
339  if int(run1) < firstRun or firstRun < 0:
340  run1 = firstRun
341  lum1 = 1
342  if int(run2) > lastRun:
343  run2 = lastRun
344  lum2 = "max"
345  if int(run1) < self.__firstusedrun or self.__firstusedrun < 0:
346  self.__firstusedrun = int(run1)
347  if int(run2) > self.__lastusedrun:
348  self.__lastusedrun = int(run2)
349  return "%s:%s-%s:%s" % (run1, lum1, run2, lum2)
350 
std::vector< std::string_view > split(std::string_view, const char *)
def forcerunrange(self, firstRun, lastRun, s)
Definition: dataset.py:326
def dataset.Dataset.getfiles (   self,
  usecache 
)

Definition at line 211 of file dataset.py.

References dataset.Dataset.dasinstance, dataset.dasquery(), dataset.Dataset.datasetname, dataset.Dataset.filenamebase, dataset.findinjson(), dataset.int, and edm.print().

211  def getfiles(self, usecache):
212  filename = os.path.join(os.environ["CMSSW_BASE"], "src", "Alignment", "CommonAlignment", "data", self.filenamebase+".csv")
213  if not usecache:
214  try:
215  os.remove(filename)
216  except OSError as e:
217  if os.path.exists(filename):
218  raise
219 
220  result = []
221  try:
222  with open(filename) as f:
223  for row in csv.DictReader(f):
224  result.append(DataFile(**row))
225  return result
226  except IOError:
227  pass
228 
229  query = "file dataset={} instance={} detail=true | grep file.name, file.nevents".format(self.datasetname, self.dasinstance)
230  dasoutput = dasquery(query)
231  if not dasoutput:
232  raise DatasetError("No files are available for the dataset '{}'. This can be "
233  "due to a typo or due to a DAS problem. Please check the "
234  "spelling of the dataset and/or try again.".format(datasetname))
235  result = [DataFile(findinjson(_, "file", "name"), findinjson(_, "file", "nevents")) for _ in dasoutput if int(findinjson(_, "file", "nevents"))]
236  try:
237  with open(filename, "w") as f:
238  writer = csv.DictWriter(f, ("filename", "nevents", "runs"))
239  writer.writeheader()
240  for datafile in result:
241  writer.writerow(datafile.getdict())
242  except Exception as e:
243  print("Couldn't write the dataset csv file:\n\n{}".format(e))
244  return result
245 
def dasquery(dasQuery, dasLimit=0)
Definition: dataset.py:27
def getfiles(self, usecache)
Definition: dataset.py:211
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
def findinjson(jsondict, strings)
Definition: dataset.py:95
def dataset.Dataset.getForceRunRangeFunction (   self,
  firstRun,
  lastRun 
)

Definition at line 351 of file dataset.py.

References dataset.Dataset.forcerunrange().

Referenced by dataset.Dataset.__lumiSelectionSnippet().

351  def getForceRunRangeFunction(self, firstRun, lastRun):
352  def forcerunrangefunction(s):
353  return self.forcerunrange(firstRun, lastRun, s)
354  return forcerunrangefunction
355 
def forcerunrange(self, firstRun, lastRun, s)
Definition: dataset.py:326
def getForceRunRangeFunction(self, firstRun, lastRun)
Definition: dataset.py:351
def dataset.Dataset.getPrimaryDatasetEntries (   self)

Definition at line 330 of file dataset.py.

References dataset.int, runall.testit.report, WorkFlowRunner.WorkFlowRunner.report, ALIUtils.report, and dataset.BaseDataset.report.

331  if self.report is not None and self.report:
332  return int(self.report.get('PrimaryDatasetEntries',-1))
333  return -1
334 
335 
def getPrimaryDatasetEntries(self)
Definition: dataset.py:330
def dataset.Dataset.getrunnumberfromfilename (   filename)
static

Definition at line 858 of file dataset.py.

References Vispa.Plugins.EdmBrowser.EdmDataAccessor.all(), dataset.int, and join().

Referenced by dataset.Dataset.fileList().

859  parts = filename.split("/")
860  result = error = None
861  if parts[0] != "" or parts[1] != "store":
862  error = "does not start with /store"
863  elif parts[2] in ["mc", "relval"]:
864  result = 1
865  elif not parts[-1].endswith(".root"):
866  error = "does not end with something.root"
867  elif len(parts) != 12:
868  error = "should be exactly 11 slashes counting the first one"
869  else:
870  runnumberparts = parts[-5:-2]
871  if not all(len(part)==3 for part in runnumberparts):
872  error = "the 3 directories {} do not have length 3 each".format("/".join(runnumberparts))
873  try:
874  result = int("".join(runnumberparts))
875  except ValueError:
876  error = "the 3 directories {} do not form an integer".format("/".join(runnumberparts))
877 
878  if error:
879  error = "could not figure out which run number this file is from:\n{}\n{}".format(filename, error)
880  raise AllInOneError(error)
881 
882  return result
883 
def getrunnumberfromfilename(filename)
Definition: dataset.py:858
static std::string join(char **cmd)
Definition: RemoteFile.cc:17
def dataset.Dataset.headercomment (   self)

Definition at line 247 of file dataset.py.

References dataset.Dataset.datasetname.

247  def headercomment(self):
248  return self.datasetname
249 
def headercomment(self)
Definition: dataset.py:247
def dataset.Dataset.magneticField (   self)

Definition at line 715 of file dataset.py.

References dataset.Dataset.__getMagneticField(), and dataset.Dataset.__magneticField.

715  def magneticField( self ):
716  if not self.__magneticField:
717  self.__magneticField = self.__getMagneticField()
718  return self.__magneticField
719 
def __getMagneticField(self)
Definition: dataset.py:423
def magneticField(self)
Definition: dataset.py:715
def dataset.Dataset.magneticFieldForRun (   self,
  run = -1 
)

Definition at line 720 of file dataset.py.

References dataset.Dataset.__getMagneticFieldForRun().

720  def magneticFieldForRun( self, run = -1 ):
721  return self.__getMagneticFieldForRun(run)
722 
def __getMagneticFieldForRun(self, run=-1, tolerance=0.5)
Definition: dataset.py:504
def magneticFieldForRun(self, run=-1)
Definition: dataset.py:720
def dataset.Dataset.name (   self)
def dataset.Dataset.parentDataset (   self)

Definition at line 723 of file dataset.py.

References dataset.Dataset.__getParentDataset(), dataset.Dataset.__parentDataset, and dataset.Dataset.datasetSnippet().

Referenced by dataset.Dataset.__getFileInfoList().

723  def parentDataset( self ):
724  if not self.__parentDataset:
725  self.__parentDataset = self.__getParentDataset()
726  return self.__parentDataset
727 
def parentDataset(self)
Definition: dataset.py:723
def __getParentDataset(self)
Definition: dataset.py:413
def dataset.Dataset.predefined (   self)

Definition at line 920 of file dataset.py.

References dataset.Dataset.__predefined.

920  def predefined( self ):
921  return self.__predefined
922 
def predefined(self)
Definition: dataset.py:920
def dataset.Dataset.printInfo (   self)

Definition at line 325 of file dataset.py.

References dataset.EOSDataset.castorDir, dataset.Dataset.castorDir, dataset.Dataset.lfnDir, ElectronMVAID.ElectronMVAID.name, DigiComparisonTask.name, TestTask.name, TPComparisonTask.name, HcalOnlineHarvesting.name, HFRaddamTask.name, LaserTask.name, DigiPhase1Task.name, PedestalTask.name, QIE10Task.name, QIE11Task.name, HcalOfflineHarvesting.name, NoCQTask.name, RecHitTask.name, UMNioTask.name, ZDCTask.name, AlignableObjectId::entry.name, RawTask.name, average.Average.name, counter.Counter.name, TPTask.name, DigiTask.name, histograms.Histograms.name, LEDTask.name, TmModule.name, cond::persistency::TAG::NAME.name, cond::persistency::GTEditorData.name, cond::persistency::GLOBAL_TAG::NAME.name, cond::persistency::RUN_INFO::RUN_NUMBER.name, cond::persistency::TAG::TIME_TYPE.name, cond::persistency::GLOBAL_TAG::VALIDITY.name, cond::persistency::RUN_INFO::START_TIME.name, cond::persistency::TAG::OBJECT_TYPE.name, core.autovars.NTupleVariable.name, cond::persistency::GLOBAL_TAG::DESCRIPTION.name, cond::persistency::RUN_INFO::END_TIME.name, cond::persistency::TAG::SYNCHRONIZATION.name, cond::persistency::GLOBAL_TAG::RELEASE.name, cond::persistency::TAG::END_OF_VALIDITY.name, MEPSet.name, cond::persistency::GLOBAL_TAG::SNAPSHOT_TIME.name, cms::dd::NameValuePair< T >.name, cond::persistency::TAG::DESCRIPTION.name, cond::persistency::GLOBAL_TAG::INSERTION_TIME.name, cond::persistency::TAG::LAST_VALIDATED_TIME.name, cond::persistency::TAG::INSERTION_TIME.name, FWTGeoRecoGeometry::Info.name, nanoaod::MergeableCounterTable::SingleColumn< T >.name, cond::persistency::TAG::MODIFICATION_TIME.name, ParameterSet.name, preexistingValidation.PreexistingValidation.name, OutputMEPSet.name, PixelDCSObject< T >::Item.name, cms::dd::ValuePair< T, U >.name, dataset.BaseDataset.name, personalPlayback.Applet.name, Types._Untracked.name, DQMRivetClient::LumiOption.name, ParSet.name, analyzer.Analyzer.name, MagCylinder.name, cond::persistency::GTProxyData.name, DQMRivetClient::ScaleFactorOption.name, Barrel.name, EgHLTOfflineSummaryClient::SumHistBinData.name, edm::PathTimingSummary.name, cms::DDAlgoArguments.name, SingleObjectCondition.name, cond::TimeTypeSpecs.name, core.autovars.NTupleObjectType.name, perftools::EdmEventSize::BranchRecord.name, MyWatcher.name, edm::PathSummary.name, EcalLogicID.name, DQMGenericClient::EfficOption.name, lumi::TriggerInfo.name, alignment.Alignment.name, nanoaod::MergeableCounterTable::VectorColumn< T >.name, PixelEndcapLinkMaker::Item.name, XMLProcessor::_loaderBaseConfig.name, MEtoEDM< T >::MEtoEDMObject.name, FWTableViewManager::TableEntry.name, PixelBarrelLinkMaker::Item.name, ExpressionHisto< T >.name, Supermodule.name, DQMGenericClient::ProfileOption.name, cond::persistency::GLOBAL_TAG_MAP::GLOBAL_TAG_NAME.name, TreeCrawler.Package.name, genericValidation.GenericValidation.name, options.ConnectionHLTMenu.name, cond::persistency::GLOBAL_TAG_MAP::RECORD.name, DQMGenericClient::NormOption.name, cond::persistency::GLOBAL_TAG_MAP::LABEL.name, cond::persistency::GLOBAL_TAG_MAP::TAG_NAME.name, dqmoffline::l1t::HistDefinition.name, DQMGenericClient::CDOption.name, magneticfield::BaseVolumeHandle.name, FastHFShowerLibrary.name, cms::DDParsingContext::CompositeMaterial.name, emtf::Node.name, h4DSegm.name, DQMGenericClient::NoFlowOption.name, core.TriggerMatchAnalyzer.TriggerMatchAnalyzer.name, cond::persistency::PAYLOAD::HASH.name, cond::persistency::PAYLOAD::OBJECT_TYPE.name, cond::persistency::PAYLOAD::DATA.name, CounterChecker.name, PhysicsTools::Calibration::Variable.name, cond::TagInfo_t.name, cond::persistency::PAYLOAD::STREAMER_INFO.name, cond::persistency::PAYLOAD::VERSION.name, cond::persistency::PAYLOAD::INSERTION_TIME.name, TrackerSectorStruct.name, MuonGeometrySanityCheckPoint.name, FCDTask.name, Mapper::definition< ScannerT >.name, looper.Looper.name, EDMtoMEConverter.name, classes.MonitorData.name, HistogramManager.name, classes.OutputData.name, Crystal.name, cond::persistency::IOV::TAG_NAME.name, cond::persistency::IOV::SINCE.name, cond::persistency::IOV::PAYLOAD_HASH.name, h2DSegm.name, cond::persistency::IOV::INSERTION_TIME.name, options.HLTProcessOptions.name, core.TriggerBitAnalyzer.TriggerBitAnalyzer.name, DQMNet::WaitObject.name, AlpgenParameterName.name, config.Analyzer.name, geometry.Structure.name, core.autovars.NTupleSubObject.name, Capsule.name, core.autovars.NTupleObject.name, Ceramic.name, SiStripMonitorDigi.name, BulkSilicon.name, config.Service.name, nanoaod::FlatTable::Column.name, APD.name, BPHRecoBuilder::BPHRecoSource.name, core.autovars.NTupleCollection.name, BPHRecoBuilder::BPHCompSource.name, cond::persistency::TAG_LOG::TAG_NAME.name, cond::persistency::TAG_LOG::EVENT_TIME.name, cond::persistency::TAG_LOG::USER_NAME.name, cond::persistency::TAG_LOG::HOST_NAME.name, cond::persistency::TAG_LOG::COMMAND.name, cond::persistency::TAG_LOG::ACTION.name, cond::persistency::TAG_LOG::USER_TEXT.name, InnerLayerVolume.name, personalPlayback.FrameworkJob.name, Grid.name, Grille.name, BackPipe.name, plotscripts.SawTeethFunction.name, PatchPanel.name, BackCoolTank.name, DryAirTube.name, MBCoolTube.name, crabFunctions.CrabTask.name, MBManif.name, cscdqm::ParHistoDef.name, hTMaxCell.name, BeautifulSoup.Tag.name, SummaryOutputProducer::GenericSummary.name, BeautifulSoup.SoupStrainer.name, and edm.print().

325  def printInfo(self):
326  print('sample : ' + self.name)
327  print('LFN : ' + self.lfnDir)
328  print('Castor path : ' + self.castorDir)
329 
def printInfo(self)
Definition: dataset.py:325
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
def dataset.Dataset.runList (   self)

Definition at line 924 of file dataset.py.

References dataset.Dataset.__getRunList(), and edm.print().

924  def runList( self ):
925  return self.__getRunList()
926 
927 
def __getRunList(self)
Definition: dataset.py:620
def runList(self)
Definition: dataset.py:924

Member Data Documentation

dataset.Dataset.__cmssw
private

Definition at line 28 of file dataset.py.

Referenced by dataset.Dataset.__getMagneticField(), and dataset.Dataset.dump_cff().

dataset.Dataset.__cmsswrelease
private

Definition at line 29 of file dataset.py.

Referenced by dataset.Dataset.__getMagneticField().

dataset.Dataset.__dasinstance
private
dataset.Dataset.__dasLimit
private

Definition at line 26 of file dataset.py.

Referenced by dataset.Dataset.fileInfoList().

dataset.Dataset.__dataType
private
tuple dataset.Dataset.__dummy_source_template
staticprivate
Initial value:
1 = ("readFiles = cms.untracked.vstring()\n"
2  "secFiles = cms.untracked.vstring()\n"
3  "%(process)ssource = cms.Source(\"PoolSource\",\n"
4  "%(tab)s secondaryFileNames ="
5  "secFiles,\n"
6  "%(tab)s fileNames = readFiles\n"
7  ")\n"
8  "readFiles.extend(['dummy_File.root'])\n"
9  "%(process)smaxEvents = cms.untracked.PSet( "
10  "input = cms.untracked.int32(%(nEvents)s) )\n"
11  "%(skipEventsString)s\n")

Definition at line 113 of file dataset.py.

Referenced by dataset.Dataset.__createSnippet().

dataset.Dataset.__filename
private
dataset.Dataset.__firstusedrun
private
dataset.Dataset.__inputMagneticField
private
dataset.Dataset.__lastusedrun
private
dataset.Dataset.__magneticField
private
dataset.Dataset.__name
private
dataset.Dataset.__official
private

Definition at line 36 of file dataset.py.

Referenced by dataset.Dataset.datasetSnippet().

dataset.Dataset.__origName
private

Definition at line 25 of file dataset.py.

Referenced by dataset.Dataset.datasetSnippet().

dataset.Dataset.__parentDataset
private

Definition at line 32 of file dataset.py.

Referenced by dataset.Dataset.parentDataset().

dataset.Dataset.__predefined
private
dataset.Dataset.__source_template
staticprivate

Definition at line 95 of file dataset.py.

Referenced by dataset.Dataset.__createSnippet().

dataset.Dataset.bad_files

Definition at line 286 of file dataset.py.

dataset.Dataset.castorDir

Definition at line 270 of file dataset.py.

Referenced by dataset.Dataset.extractFileSizes(), and dataset.Dataset.printInfo().

dataset.Dataset.dasinstance

Definition at line 208 of file dataset.py.

Referenced by dataset.Dataset.getfiles().

dataset.Dataset.datasetname

Definition at line 200 of file dataset.py.

Referenced by dataset.Dataset.getfiles(), and dataset.Dataset.headercomment().

dataset.Dataset.filenamebase

Definition at line 203 of file dataset.py.

Referenced by dataset.Dataset.getfiles().

dataset.Dataset.files

Definition at line 277 of file dataset.py.

dataset.Dataset.filesAndSizes

Definition at line 315 of file dataset.py.

dataset.Dataset.good_files

Definition at line 287 of file dataset.py.

dataset.Dataset.lfnDir

Definition at line 269 of file dataset.py.

Referenced by dataset.Dataset.printInfo().

dataset.Dataset.maskExists

Definition at line 271 of file dataset.py.

dataset.Dataset.official

Definition at line 202 of file dataset.py.

dataset.Dataset.report

Definition at line 272 of file dataset.py.

Referenced by addOnTests.testit.run().