CMS 3D CMS Logo

List of all members | Public Member Functions | Static Public Member Functions | Public Attributes | Private Member Functions | Private Attributes | Static Private Attributes
dataset.Dataset Class Reference
Inheritance diagram for dataset.Dataset:
dataset.BaseDataset dataset.DatasetBase

Public Member Functions

def __init__ (self, datasetName, dasLimit=0, tryPredefinedFirst=True, cmssw=os.environ["CMSSW_BASE"], cmsswrelease=os.environ["CMSSW_RELEASE_BASE"], magneticfield=None, dasinstance=None)
 
def __init__ (self, datasetname, dasinstance=defaultdasinstance)
 
def __init__ (self, name, user, pattern='.*root')
 
def buildListOfBadFiles (self)
 
def buildListOfFiles (self, pattern='.*root')
 
def convertTimeToRun (self, begin=None, end=None, firstRun=None, lastRun=None, shortTuple=True)
 
def createdatasetfile_hippy (self, filename, filesperjob, firstrun, lastrun)
 
def datasetSnippet (self, jsonPath=None, begin=None, end=None, firstRun=None, lastRun=None, crab=False, parent=False)
 
def dataType (self)
 
def dump_cff (self, outName=None, jsonPath=None, begin=None, end=None, firstRun=None, lastRun=None, parent=False)
 
def extractFileSizes (self)
 
def fileInfoList (self, parent=False)
 
def fileList (self, parent=False, firstRun=None, lastRun=None, forcerunselection=False)
 
def forcerunrange (self, firstRun, lastRun, s)
 
def getfiles (self, usecache)
 
def getForceRunRangeFunction (self, firstRun, lastRun)
 
def getPrimaryDatasetEntries (self)
 
def headercomment (self)
 
def magneticField (self)
 
def magneticFieldForRun (self, run=-1)
 
def name (self)
 
def parentDataset (self)
 
def predefined (self)
 
def printInfo (self)
 
def runList (self)
 
- Public Member Functions inherited from dataset.BaseDataset
def __init__ (self, name, user, pattern='.*root', run_range=None, dbsInstance=None)
 def init(self, name, user, pattern='. More...
 
def buildListOfBadFiles (self)
 
def buildListOfFiles (self, pattern)
 
def extractFileSizes (self)
 
def getPrimaryDatasetEntries (self)
 
def listOfFiles (self)
 
def listOfGoodFiles (self)
 
def listOfGoodFilesWithPrescale (self, prescale)
 
def printFiles (self, abspath=True, info=True)
 
def printInfo (self)
 
- Public Member Functions inherited from dataset.DatasetBase
def getfiles (self, usecache)
 
def headercomment (self)
 
def writefilelist_hippy (self, firstrun, lastrun, runs, eventsperjob, maxevents, outputfile, usecache=True)
 
def writefilelist_validation (self, firstrun, lastrun, runs, maxevents, outputfile=None, usecache=True)
 

Static Public Member Functions

def getrunnumberfromfilename (filename)
 

Public Attributes

 bad_files
 
 castorDir
 
 dasinstance
 
 datasetname
 
 filenamebase
 
 files
 
 filesAndSizes
 
 good_files
 
 lfnDir
 
 maskExists
 
 official
 
 report
 
- Public Attributes inherited from dataset.BaseDataset
 bad_files
 
 dbsInstance
 MM. More...
 
 files
 
 filesAndSizes
 
 good_files
 
 name
 
 pattern
 
 primaryDatasetEntries
 MM. More...
 
 report
 
 run_range
 
 user
 

Private Member Functions

def __chunks (self, theList, n)
 
def __createSnippet (self, jsonPath=None, begin=None, end=None, firstRun=None, lastRun=None, repMap=None, crab=False, parent=False)
 
def __dateString (self, date)
 
def __datetime (self, stringForDas)
 
def __fileListSnippet (self, crab=False, parent=False, firstRun=None, lastRun=None, forcerunselection=False)
 
def __find_ge (self, a, x)
 
def __find_lt (self, a, x)
 
def __findInJson (self, jsondict, strings)
 
def __getData (self, dasQuery, dasLimit=0)
 
def __getDataType (self)
 
def __getFileInfoList (self, dasLimit, parent=False)
 
def __getMagneticField (self)
 
def __getMagneticFieldForRun (self, run=-1, tolerance=0.5)
 
def __getParentDataset (self)
 
def __getRunList (self)
 
def __lumiSelectionSnippet (self, jsonPath=None, firstRun=None, lastRun=None)
 

Private Attributes

 __cmssw
 
 __cmsswrelease
 
 __dasinstance
 
 __dasLimit
 
 __dataType
 
 __filename
 
 __firstusedrun
 
 __inputMagneticField
 
 __lastusedrun
 
 __magneticField
 
 __name
 
 __official
 
 __origName
 
 __parentDataset
 
 __predefined
 

Static Private Attributes

tuple __dummy_source_template
 
 __source_template
 

Detailed Description

Definition at line 198 of file dataset.py.

Constructor & Destructor Documentation

◆ __init__() [1/3]

def dataset.Dataset.__init__ (   self,
  datasetname,
  dasinstance = defaultdasinstance 
)

Definition at line 199 of file dataset.py.

Referenced by dataset.Dataset.__init__().

199  def __init__(self, datasetname, dasinstance=defaultdasinstance):
200  self.datasetname = datasetname
201  if re.match(r'/.+/.+/.+', datasetname):
202  self.official = True
203  self.filenamebase = "Dataset" + self.datasetname.replace("/","_")
204  else:
205  self.official = False
206  self.filenamebase = datasetname
207 
208  self.dasinstance = dasinstance
209 
def __init__(self, dataset, job_number, job_id, job_name, isDA, isMC, applyBOWS, applyEXTRACOND, extraconditions, runboundary, lumilist, intlumi, maxevents, gt, allFromGT, alignmentDB, alignmentTAG, apeDB, apeTAG, bowDB, bowTAG, vertextype, tracktype, refittertype, ttrhtype, applyruncontrol, ptcut, CMSSW_dir, the_dir)
def replace(string, replacements)

◆ __init__() [2/3]

def dataset.Dataset.__init__ (   self,
  datasetName,
  dasLimit = 0,
  tryPredefinedFirst = True,
  cmssw = os.environ["CMSSW_BASE"],
  cmsswrelease = os.environ["CMSSW_RELEASE_BASE"],
  magneticfield = None,
  dasinstance = None 
)

Definition at line 23 of file dataset.py.

23  magneticfield = None, dasinstance = None):
24  self.__name = datasetName
25  self.__origName = datasetName
26  self.__dasLimit = dasLimit
27  self.__dasinstance = dasinstance
28  self.__cmssw = cmssw
29  self.__cmsswrelease = cmsswrelease
30  self.__firstusedrun = None
31  self.__lastusedrun = None
32  self.__parentDataset = None
33 
34  # check, if dataset name matches CMS dataset naming scheme
35  if re.match( r'/.+/.+/.+', self.__name ):
36  self.__official = True
37  fileName = "Dataset" + self.__name.replace("/","_") + "_cff.py"
38  else:
39  self.__official = False
40  fileName = self.__name + "_cff.py"
41 
42  searchPath1 = os.path.join( self.__cmssw, "python",
43  "Alignment", "OfflineValidation",
44  fileName )
45  searchPath2 = os.path.join( self.__cmssw, "src",
46  "Alignment", "OfflineValidation",
47  "python", fileName )
48  searchPath3 = os.path.join( self.__cmsswrelease,
49  "python", "Alignment",
50  "OfflineValidation", fileName )
51  if self.__official and not tryPredefinedFirst:
52  self.__predefined = False
53  elif os.path.exists( searchPath1 ):
54  self.__predefined = True
55  self.__filename = searchPath1
56  elif os.path.exists( searchPath2 ):
57  msg = ("The predefined dataset '%s' does exist in '%s', but "
58  "you need to run 'scram b' first."
59  %( self.__name, searchPath2 ))
60  if self.__official:
61  print(msg)
62  print("Getting the data from DAS again. To go faster next time, run scram b.")
63  else:
64  raise AllInOneError( msg )
65  elif os.path.exists( searchPath3 ):
66  self.__predefined = True
67  self.__filename = searchPath3
68  elif self.__official:
69  self.__predefined = False
70  else:
71  msg = ("The predefined dataset '%s' does not exist. Please "
72  "create it first or check for typos."%( self.__name ))
73  raise AllInOneError( msg )
74 
75  if self.__predefined and self.__official:
76  self.__name = "Dataset" + self.__name.replace("/","_")
77 
78  if magneticfield is not None:
79  try:
80  magneticfield = float(magneticfield)
81  except ValueError:
82  raise AllInOneError("Bad magneticfield {} which can't be converted to float".format(magneticfield))
83  self.__inputMagneticField = magneticfield
84 
85  self.__dataType = self.__getDataType()
86  self.__magneticField = self.__getMagneticField()
87 
88 
def replace(string, replacements)
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47

◆ __init__() [3/3]

def dataset.Dataset.__init__ (   self,
  name,
  user,
  pattern = '.*root' 
)

Definition at line 267 of file dataset.py.

References dataset.Dataset.__init__().

267  def __init__(self, name, user, pattern='.*root'):
268  self.lfnDir = castorBaseDir(user) + name
269  self.castorDir = castortools.lfnToCastor( self.lfnDir )
270  self.maskExists = False
271  self.report = None
272  super(Dataset, self).__init__(name, user, pattern)
273 
def __init__(self, dataset, job_number, job_id, job_name, isDA, isMC, applyBOWS, applyEXTRACOND, extraconditions, runboundary, lumilist, intlumi, maxevents, gt, allFromGT, alignmentDB, alignmentTAG, apeDB, apeTAG, bowDB, bowTAG, vertextype, tracktype, refittertype, ttrhtype, applyruncontrol, ptcut, CMSSW_dir, the_dir)

Member Function Documentation

◆ __chunks()

def dataset.Dataset.__chunks (   self,
  theList,
  n 
)
private
Yield successive n-sized chunks from theList.

Definition at line 89 of file dataset.py.

References FastTimerService_cff.range.

Referenced by dataset.Dataset.__fileListSnippet(), dataset.Dataset.__lumiSelectionSnippet(), and dataset.Dataset.createdatasetfile_hippy().

89  def __chunks( self, theList, n ):
90  """ Yield successive n-sized chunks from theList.
91  """
92  for i in range( 0, len( theList ), n ):
93  yield theList[i:i+n]
94 

◆ __createSnippet()

def dataset.Dataset.__createSnippet (   self,
  jsonPath = None,
  begin = None,
  end = None,
  firstRun = None,
  lastRun = None,
  repMap = None,
  crab = False,
  parent = False 
)
private

Definition at line 245 of file dataset.py.

References dataset.Dataset.__dummy_source_template, dataset.Dataset.__fileListSnippet(), dataset.Dataset.__lumiSelectionSnippet(), dataset.Dataset.__source_template, electrons_cff.bool, dataset.Dataset.convertTimeToRun(), and dataset.int.

Referenced by dataset.Dataset.__fileListSnippet(), dataset.Dataset.datasetSnippet(), and dataset.Dataset.dump_cff().

245  crab = False, parent = False ):
246 
247  if firstRun:
248  firstRun = int( firstRun )
249  if lastRun:
250  lastRun = int( lastRun )
251  if ( begin and firstRun ) or ( end and lastRun ):
252  msg = ( "The Usage of "
253  + "'begin' & 'firstRun' " * int( bool( begin and
254  firstRun ) )
255  + "and " * int( bool( ( begin and firstRun ) and
256  ( end and lastRun ) ) )
257  + "'end' & 'lastRun' " * int( bool( end and lastRun ) )
258  + "is ambigous." )
259  raise AllInOneError( msg )
260  if begin or end:
261  ( firstRun, lastRun ) = self.convertTimeToRun(
262  begin = begin, end = end, firstRun = firstRun,
263  lastRun = lastRun )
264  if ( firstRun and lastRun ) and ( firstRun > lastRun ):
265  msg = ( "The lower time/runrange limit ('begin'/'firstRun') "
266  "chosen is greater than the upper time/runrange limit "
267  "('end'/'lastRun').")
268  raise AllInOneError( msg )
269 
270  lumiSecExtend = self.__lumiSelectionSnippet(jsonPath=jsonPath, firstRun=firstRun, lastRun=lastRun)
271  lumiStr = goodLumiSecStr = ""
272  if lumiSecExtend:
273  goodLumiSecStr = "lumiSecs = cms.untracked.VLuminosityBlockRange()\n"
274  lumiStr = " lumisToProcess = lumiSecs,\n"
275 
276  files = self.__fileListSnippet(crab=crab, parent=parent, firstRun=firstRun, lastRun=lastRun, forcerunselection=False)
277 
278  theMap = repMap
279  theMap["files"] = files
280  theMap["json"] = jsonPath
281  theMap["lumiStr"] = lumiStr
282  theMap["goodLumiSecStr"] = goodLumiSecStr%( theMap )
283  theMap["lumiSecExtend"] = lumiSecExtend
284  if crab:
285  dataset_snippet = self.__dummy_source_template%( theMap )
286  else:
287  dataset_snippet = self.__source_template%( theMap )
288  return dataset_snippet
289 

◆ __dateString()

def dataset.Dataset.__dateString (   self,
  date 
)
private

Definition at line 640 of file dataset.py.

References dataset.Dataset.convertTimeToRun(), and str.

Referenced by dataset.Dataset.convertTimeToRun().

640  def __dateString(self, date):
641  return str(date.year) + str(date.month).zfill(2) + str(date.day).zfill(2)
642 
#define str(s)

◆ __datetime()

def dataset.Dataset.__datetime (   self,
  stringForDas 
)
private

Definition at line 631 of file dataset.py.

References dataset.int.

Referenced by dataset.Dataset.convertTimeToRun().

631  def __datetime(self, stringForDas):
632  if len(stringForDas) != 8:
633  raise AllInOneError(stringForDas + " is not a valid date string.\n"
634  + "DAS accepts dates in the form 'yyyymmdd'")
635  year = stringForDas[:4]
636  month = stringForDas[4:6]
637  day = stringForDas[6:8]
638  return datetime.date(int(year), int(month), int(day))
639 

◆ __fileListSnippet()

def dataset.Dataset.__fileListSnippet (   self,
  crab = False,
  parent = False,
  firstRun = None,
  lastRun = None,
  forcerunselection = False 
)
private

Definition at line 221 of file dataset.py.

References dataset.Dataset.__chunks(), dataset.Dataset.__createSnippet(), dataset.Dataset.__name, dataset.Dataset.fileList(), and join().

Referenced by dataset.Dataset.__createSnippet().

221  def __fileListSnippet(self, crab=False, parent=False, firstRun=None, lastRun=None, forcerunselection=False):
222  if crab:
223  files = ""
224  else:
225  splitFileList = list( self.__chunks( self.fileList(firstRun=firstRun, lastRun=lastRun, forcerunselection=forcerunselection), 255 ) )
226  if not splitFileList:
227  raise AllInOneError("No files found for dataset {}. Check the spelling, or maybe specify another das instance?".format(self.__name))
228  fileStr = [ "',\n'".join( files ) for files in splitFileList ]
229  fileStr = [ "readFiles.extend( [\n'" + files + "'\n] )" \
230  for files in fileStr ]
231  files = "\n".join( fileStr )
232 
233  if parent:
234  splitParentFileList = list( self.__chunks( self.fileList(parent=True, firstRun=firstRun, lastRun=lastRun, forcerunselection=forcerunselection), 255 ) )
235  parentFileStr = [ "',\n'".join( parentFiles ) for parentFiles in splitParentFileList ]
236  parentFileStr = [ "secFiles.extend( [\n'" + parentFiles + "'\n] )" \
237  for parentFiles in parentFileStr ]
238  parentFiles = "\n".join( parentFileStr )
239  files += "\n\n" + parentFiles
240 
241  return files
242 
static std::string join(char **cmd)
Definition: RemoteFile.cc:19

◆ __find_ge()

def dataset.Dataset.__find_ge (   self,
  a,
  x 
)
private

Definition at line 297 of file dataset.py.

Referenced by dataset.Dataset.convertTimeToRun().

297  def __find_ge( self, a, x):
298  'Find leftmost item greater than or equal to x'
299  i = bisect.bisect_left( a, x )
300  if i != len( a ):
301  return i
302  raise ValueError
303 

◆ __find_lt()

def dataset.Dataset.__find_lt (   self,
  a,
  x 
)
private

Definition at line 290 of file dataset.py.

Referenced by dataset.Dataset.convertTimeToRun().

290  def __find_lt( self, a, x ):
291  'Find rightmost value less than x'
292  i = bisect.bisect_left( a, x )
293  if i:
294  return i-1
295  raise ValueError
296 

◆ __findInJson()

def dataset.Dataset.__findInJson (   self,
  jsondict,
  strings 
)
private

Definition at line 304 of file dataset.py.

References dataset.Dataset.__findInJson().

Referenced by dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__getDataType(), dataset.Dataset.__getFileInfoList(), dataset.Dataset.__getMagneticField(), dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.__getParentDataset(), dataset.Dataset.__getRunList(), dataset.Dataset.__lumiSelectionSnippet(), dataset.Dataset.convertTimeToRun(), and dataset.Dataset.fileList().

304  def __findInJson(self, jsondict, strings):
305  if isinstance(strings, str):
306  strings = [ strings ]
307 
308  if len(strings) == 0:
309  return jsondict
310  if isinstance(jsondict,dict):
311  if strings[0] in jsondict:
312  try:
313  return self.__findInJson(jsondict[strings[0]], strings[1:])
314  except KeyError:
315  pass
316  else:
317  for a in jsondict:
318  if strings[0] in a:
319  try:
320  return self.__findInJson(a[strings[0]], strings[1:])
321  except (TypeError, KeyError): #TypeError because a could be a string and contain strings[0]
322  pass
323  #if it's not found
324  raise KeyError("Can't find " + strings[0])
325 

◆ __getData()

def dataset.Dataset.__getData (   self,
  dasQuery,
  dasLimit = 0 
)
private

Definition at line 356 of file dataset.py.

References dataset.Dataset.__findInJson(), das_client.get_data(), and str.

Referenced by dataset.Dataset.__getDataType(), dataset.Dataset.__getFileInfoList(), dataset.Dataset.__getMagneticField(), dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.__getParentDataset(), dataset.Dataset.__getRunList(), and dataset.Dataset.convertTimeToRun().

356  def __getData( self, dasQuery, dasLimit = 0 ):
357  dasData = das_client.get_data(dasQuery, dasLimit)
358  if isinstance(dasData, str):
359  jsondict = json.loads( dasData )
360  else:
361  jsondict = dasData
362  # Check, if the DAS query fails
363  try:
364  error = self.__findInJson(jsondict,["data","error"])
365  except KeyError:
366  error = None
367  if error or self.__findInJson(jsondict,"status") != 'ok' or "data" not in jsondict:
368  try:
369  jsonstr = self.__findInJson(jsondict,"reason")
370  except KeyError:
371  jsonstr = str(jsondict)
372  if len(jsonstr) > 10000:
373  jsonfile = "das_query_output_%i.txt"
374  i = 0
375  while os.path.lexists(jsonfile % i):
376  i += 1
377  jsonfile = jsonfile % i
378  theFile = open( jsonfile, "w" )
379  theFile.write( jsonstr )
380  theFile.close()
381  msg = "The DAS query returned an error. The output is very long, and has been stored in:\n" + jsonfile
382  else:
383  msg = "The DAS query returned a error. Here is the output\n" + jsonstr
384  msg += "\nIt's possible that this was a server error. If so, it may work if you try again later"
385  raise AllInOneError(msg)
386  return self.__findInJson(jsondict,"data")
387 
def get_data(host, query, idx, limit, debug, threshold=300, ckey=None, cert=None, capath=None, qcache=0, das_headers=True)
Definition: das_client.py:276
#define str(s)

◆ __getDataType()

def dataset.Dataset.__getDataType (   self)
private

Definition at line 388 of file dataset.py.

References dataset.Dataset.__dasinstance, dataset.Dataset.__filename, dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__name, dataset.Dataset.__predefined, ElectronMVAID.ElectronMVAID.name, HcalOfflineHarvesting.name, HcalOnlineHarvesting.name, HFRaddamTask.name, LaserTask.name, NoCQTask.name, PedestalTask.name, QIE10Task.name, QIE11Task.name, RecHitTask.name, UMNioTask.name, ZDCTask.name, AlignableObjectId::entry.name, RawTask.name, average.Average.name, counter.Counter.name, TPTask.name, histograms.Histograms.name, DigiTask.name, LEDTask.name, cond::persistency::GLOBAL_TAG::NAME.name, cond::persistency::RUN_INFO::RUN_NUMBER.name, cond::persistency::TAG::NAME.name, TmModule.name, cond::persistency::GTEditorData.name, cond::persistency::GLOBAL_TAG::VALIDITY.name, cond::persistency::RUN_INFO::START_TIME.name, cond::persistency::TAG::TIME_TYPE.name, cond::persistency::GLOBAL_TAG::DESCRIPTION.name, cond::persistency::RUN_INFO::END_TIME.name, core.autovars.NTupleVariable.name, cond::persistency::TAG::OBJECT_TYPE.name, DQMRivetClient::NormOption.name, cond::persistency::GLOBAL_TAG::RELEASE.name, cond::persistency::TAG::SYNCHRONIZATION.name, cond::persistency::GLOBAL_TAG::SNAPSHOT_TIME.name, cond::persistency::TAG::END_OF_VALIDITY.name, MEPSet.name, cond::persistency::O2O_RUN::JOB_NAME.name, cond::persistency::GLOBAL_TAG::INSERTION_TIME.name, cms::dd::NameValuePair< T >.name, cond::persistency::TAG::DESCRIPTION.name, cond::persistency::O2O_RUN::START_TIME.name, cond::persistency::TAG::LAST_VALIDATED_TIME.name, FWTGeoRecoGeometry::Info.name, cond::persistency::O2O_RUN::END_TIME.name, cond::persistency::TAG::INSERTION_TIME.name, cond::persistency::O2O_RUN::STATUS_CODE.name, cond::persistency::TAG::MODIFICATION_TIME.name, cond::persistency::O2O_RUN::LOG.name, ParameterSet.name, nanoaod::MergeableCounterTable::SingleColumn< T >.name, cond::persistency::TAG::PROTECTION_CODE.name, preexistingValidation.PreexistingValidation.name, OutputMEPSet.name, MEPSetData.name, PixelDCSObject< T >::Item.name, AlignmentConstraint.name, dataset.BaseDataset.name, personalPlayback.Applet.name, cms::dd::ValuePair< T, U >.name, Types._Untracked.name, MagCylinder.name, analyzer.Analyzer.name, DQMRivetClient::LumiOption.name, heppy::ParSet.name, cond::persistency::GTProxyData.name, SingleObjectCondition.name, edm::PathTimingSummary.name, DQMRivetClient::ScaleFactorOption.name, cms::DDAlgoArguments.name, EgHLTOfflineSummaryClient::SumHistBinData.name, Barrel.name, cond::TimeTypeSpecs.name, perftools::EdmEventSize::BranchRecord.name, core.autovars.NTupleObjectType.name, edm::PathSummary.name, EcalLogicID.name, alignment.Alignment.name, lumi::TriggerInfo.name, XMLProcessor::_loaderBaseConfig.name, PixelEndcapLinkMaker::Item.name, FWTableViewManager::TableEntry.name, MEtoEDM< T >::MEtoEDMObject.name, PixelBarrelLinkMaker::Item.name, ExpressionHisto< T >.name, DQMGenericClient::EfficOption.name, TreeCrawler.Package.name, Supermodule.name, genericValidation.GenericValidation.name, cond::persistency::GLOBAL_TAG_MAP::GLOBAL_TAG_NAME.name, options.ConnectionHLTMenu.name, cond::persistency::GLOBAL_TAG_MAP::RECORD.name, cms::DDParsingContext::CompositeMaterial.name, cond::persistency::GLOBAL_TAG_MAP::LABEL.name, cond::persistency::GLOBAL_TAG_MAP::TAG_NAME.name, cond::Tag_t.name, dqmoffline::l1t::HistDefinition.name, DQMGenericClient::ProfileOption.name, FastHFShowerLibrary.name, magneticfield::BaseVolumeHandle.name, nanoaod::MergeableCounterTable::VectorColumn< T >.name, emtf::Node.name, h4DSegm.name, DQMGenericClient::NormOption.name, core.TriggerMatchAnalyzer.TriggerMatchAnalyzer.name, CounterChecker.name, DQMGenericClient::CDOption.name, PhysicsTools::Calibration::Variable.name, cond::TagInfo_t.name, TrackerSectorStruct.name, MuonGeometrySanityCheckPoint.name, DQMGenericClient::NoFlowOption.name, FCDTask.name, cond::persistency::PAYLOAD::HASH.name, cond::persistency::PAYLOAD::OBJECT_TYPE.name, looper.Looper.name, Mapper::definition< ScannerT >.name, EDMtoMEConverter.name, cond::persistency::PAYLOAD::DATA.name, cond::persistency::PAYLOAD::STREAMER_INFO.name, cond::persistency::PAYLOAD::VERSION.name, cond::persistency::PAYLOAD::INSERTION_TIME.name, classes.MonitorData.name, HistogramManager.name, classes.OutputData.name, BPHDecayToResResBuilderBase::DZSelect.name, Crystal.name, h2DSegm.name, options.HLTProcessOptions.name, cond::persistency::IOV::TAG_NAME.name, cond::persistency::IOV::SINCE.name, cond::persistency::IOV::PAYLOAD_HASH.name, cond::persistency::IOV::INSERTION_TIME.name, core.TriggerBitAnalyzer.TriggerBitAnalyzer.name, DQMNet::WaitObject.name, AlpgenParameterName.name, config.Analyzer.name, geometry.Structure.name, core.autovars.NTupleSubObject.name, Capsule.name, core.autovars.NTupleObject.name, Ceramic.name, SiStripMonitorDigi.name, BulkSilicon.name, config.Service.name, APD.name, core.autovars.NTupleCollection.name, nanoaod::FlatTable::Column.name, BPHRecoBuilder::BPHRecoSource.name, BPHRecoBuilder::BPHCompSource.name, StraightTrackAlignment::RPSetPlots.name, cond::persistency::TAG_AUTHORIZATION::TAG_NAME.name, cond::persistency::TAG_AUTHORIZATION::ACCESS_TYPE.name, cond::persistency::TAG_AUTHORIZATION::CREDENTIAL.name, cond::persistency::TAG_AUTHORIZATION::CREDENTIAL_TYPE.name, InnerLayerVolume.name, cond::payloadInspector::TagReference.name, cond::persistency::TAG_LOG::TAG_NAME.name, cond::persistency::TAG_LOG::EVENT_TIME.name, cond::persistency::TAG_LOG::USER_NAME.name, cond::persistency::TAG_LOG::HOST_NAME.name, cond::persistency::TAG_LOG::COMMAND.name, cond::persistency::TAG_LOG::ACTION.name, cond::persistency::TAG_LOG::USER_TEXT.name, personalPlayback.FrameworkJob.name, Grid.name, trklet::TrackletConfigBuilder::DTCinfo.name, Grille.name, BackPipe.name, plotscripts.SawTeethFunction.name, PatchPanel.name, BackCoolTank.name, DryAirTube.name, crabFunctions.CrabTask.name, MBCoolTube.name, MBManif.name, cscdqm::ParHistoDef.name, hTMaxCell.name, BeautifulSoup.Tag.name, SummaryOutputProducer::GenericSummary.name, BeautifulSoup.SoupStrainer.name, and python.rootplot.root2matplotlib.replace().

Referenced by dataset.Dataset.dataType().

388  def __getDataType( self ):
389  if self.__predefined:
390  with open(self.__filename) as f:
391  datatype = None
392  for line in f.readlines():
393  if line.startswith("#data type: "):
394  if datatype is not None:
395  raise AllInOneError(self.__filename + " has multiple 'data type' lines.")
396  datatype = line.replace("#data type: ", "").replace("\n","")
397  return datatype
398  return "unknown"
399 
400  dasQuery_type = ( 'dataset dataset=%s instance=%s detail=true | grep dataset.datatype,'
401  'dataset.name'%( self.__name, self.__dasinstance ) )
402  data = self.__getData( dasQuery_type )
403 
404  try:
405  return self.__findInJson(data, ["dataset", "datatype"])
406  except KeyError:
407  print ("Cannot find the datatype of the dataset '%s'\n"
408  "It may not be possible to automatically find the magnetic field,\n"
409  "and you will not be able run in CRAB mode"
410  %( self.name() ))
411  return "unknown"
412 
def replace(string, replacements)

◆ __getFileInfoList()

def dataset.Dataset.__getFileInfoList (   self,
  dasLimit,
  parent = False 
)
private

Definition at line 561 of file dataset.py.

References dataset.Dataset.__dasinstance, dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__name, dataset.Dataset.__predefined, ElectronMVAID.ElectronMVAID.name, HcalOfflineHarvesting.name, HcalOnlineHarvesting.name, HFRaddamTask.name, LaserTask.name, NoCQTask.name, PedestalTask.name, QIE10Task.name, QIE11Task.name, RecHitTask.name, UMNioTask.name, ZDCTask.name, AlignableObjectId::entry.name, RawTask.name, average.Average.name, counter.Counter.name, TPTask.name, histograms.Histograms.name, DigiTask.name, LEDTask.name, cond::persistency::GLOBAL_TAG::NAME.name, cond::persistency::RUN_INFO::RUN_NUMBER.name, cond::persistency::TAG::NAME.name, TmModule.name, cond::persistency::GTEditorData.name, cond::persistency::GLOBAL_TAG::VALIDITY.name, cond::persistency::RUN_INFO::START_TIME.name, cond::persistency::TAG::TIME_TYPE.name, cond::persistency::GLOBAL_TAG::DESCRIPTION.name, cond::persistency::RUN_INFO::END_TIME.name, core.autovars.NTupleVariable.name, cond::persistency::TAG::OBJECT_TYPE.name, DQMRivetClient::NormOption.name, cond::persistency::GLOBAL_TAG::RELEASE.name, cond::persistency::TAG::SYNCHRONIZATION.name, cond::persistency::GLOBAL_TAG::SNAPSHOT_TIME.name, cond::persistency::TAG::END_OF_VALIDITY.name, MEPSet.name, cond::persistency::O2O_RUN::JOB_NAME.name, cond::persistency::GLOBAL_TAG::INSERTION_TIME.name, cms::dd::NameValuePair< T >.name, cond::persistency::TAG::DESCRIPTION.name, cond::persistency::O2O_RUN::START_TIME.name, cond::persistency::TAG::LAST_VALIDATED_TIME.name, FWTGeoRecoGeometry::Info.name, cond::persistency::O2O_RUN::END_TIME.name, cond::persistency::TAG::INSERTION_TIME.name, cond::persistency::O2O_RUN::STATUS_CODE.name, cond::persistency::TAG::MODIFICATION_TIME.name, cond::persistency::O2O_RUN::LOG.name, ParameterSet.name, nanoaod::MergeableCounterTable::SingleColumn< T >.name, cond::persistency::TAG::PROTECTION_CODE.name, preexistingValidation.PreexistingValidation.name, OutputMEPSet.name, MEPSetData.name, AlignmentConstraint.name, PixelDCSObject< T >::Item.name, dataset.BaseDataset.name, personalPlayback.Applet.name, cms::dd::ValuePair< T, U >.name, Types._Untracked.name, MagCylinder.name, analyzer.Analyzer.name, DQMRivetClient::LumiOption.name, heppy::ParSet.name, cond::persistency::GTProxyData.name, SingleObjectCondition.name, edm::PathTimingSummary.name, DQMRivetClient::ScaleFactorOption.name, cms::DDAlgoArguments.name, EgHLTOfflineSummaryClient::SumHistBinData.name, Barrel.name, cond::TimeTypeSpecs.name, perftools::EdmEventSize::BranchRecord.name, core.autovars.NTupleObjectType.name, edm::PathSummary.name, EcalLogicID.name, alignment.Alignment.name, lumi::TriggerInfo.name, XMLProcessor::_loaderBaseConfig.name, PixelEndcapLinkMaker::Item.name, FWTableViewManager::TableEntry.name, MEtoEDM< T >::MEtoEDMObject.name, PixelBarrelLinkMaker::Item.name, ExpressionHisto< T >.name, DQMGenericClient::EfficOption.name, TreeCrawler.Package.name, Supermodule.name, cond::persistency::GLOBAL_TAG_MAP::GLOBAL_TAG_NAME.name, genericValidation.GenericValidation.name, options.ConnectionHLTMenu.name, cond::persistency::GLOBAL_TAG_MAP::RECORD.name, cond::persistency::GLOBAL_TAG_MAP::LABEL.name, cms::DDParsingContext::CompositeMaterial.name, cond::persistency::GLOBAL_TAG_MAP::TAG_NAME.name, cond::Tag_t.name, dqmoffline::l1t::HistDefinition.name, DQMGenericClient::ProfileOption.name, FastHFShowerLibrary.name, magneticfield::BaseVolumeHandle.name, nanoaod::MergeableCounterTable::VectorColumn< T >.name, emtf::Node.name, h4DSegm.name, DQMGenericClient::NormOption.name, core.TriggerMatchAnalyzer.TriggerMatchAnalyzer.name, CounterChecker.name, DQMGenericClient::CDOption.name, PhysicsTools::Calibration::Variable.name, cond::TagInfo_t.name, TrackerSectorStruct.name, MuonGeometrySanityCheckPoint.name, cond::persistency::PAYLOAD::HASH.name, FCDTask.name, DQMGenericClient::NoFlowOption.name, looper.Looper.name, Mapper::definition< ScannerT >.name, cond::persistency::PAYLOAD::OBJECT_TYPE.name, EDMtoMEConverter.name, cond::persistency::PAYLOAD::DATA.name, cond::persistency::PAYLOAD::STREAMER_INFO.name, cond::persistency::PAYLOAD::VERSION.name, cond::persistency::PAYLOAD::INSERTION_TIME.name, classes.MonitorData.name, HistogramManager.name, classes.OutputData.name, BPHDecayToResResBuilderBase::DZSelect.name, Crystal.name, h2DSegm.name, options.HLTProcessOptions.name, cond::persistency::IOV::TAG_NAME.name, cond::persistency::IOV::SINCE.name, cond::persistency::IOV::PAYLOAD_HASH.name, cond::persistency::IOV::INSERTION_TIME.name, DQMNet::WaitObject.name, core.TriggerBitAnalyzer.TriggerBitAnalyzer.name, AlpgenParameterName.name, config.Analyzer.name, geometry.Structure.name, core.autovars.NTupleSubObject.name, Capsule.name, core.autovars.NTupleObject.name, Ceramic.name, SiStripMonitorDigi.name, BulkSilicon.name, config.Service.name, APD.name, core.autovars.NTupleCollection.name, nanoaod::FlatTable::Column.name, BPHRecoBuilder::BPHRecoSource.name, BPHRecoBuilder::BPHCompSource.name, StraightTrackAlignment::RPSetPlots.name, cond::persistency::TAG_AUTHORIZATION::TAG_NAME.name, cond::persistency::TAG_AUTHORIZATION::ACCESS_TYPE.name, cond::persistency::TAG_AUTHORIZATION::CREDENTIAL.name, cond::persistency::TAG_AUTHORIZATION::CREDENTIAL_TYPE.name, InnerLayerVolume.name, cond::payloadInspector::TagReference.name, cond::persistency::TAG_LOG::TAG_NAME.name, cond::persistency::TAG_LOG::EVENT_TIME.name, cond::persistency::TAG_LOG::USER_NAME.name, cond::persistency::TAG_LOG::HOST_NAME.name, cond::persistency::TAG_LOG::COMMAND.name, cond::persistency::TAG_LOG::ACTION.name, cond::persistency::TAG_LOG::USER_TEXT.name, personalPlayback.FrameworkJob.name, Grid.name, trklet::TrackletConfigBuilder::DTCinfo.name, Grille.name, BackPipe.name, plotscripts.SawTeethFunction.name, PatchPanel.name, BackCoolTank.name, DryAirTube.name, crabFunctions.CrabTask.name, MBCoolTube.name, MBManif.name, cscdqm::ParHistoDef.name, hTMaxCell.name, BeautifulSoup.Tag.name, SummaryOutputProducer::GenericSummary.name, BeautifulSoup.SoupStrainer.name, dataset.Dataset.parentDataset(), and print().

Referenced by dataset.Dataset.fileInfoList().

561  def __getFileInfoList( self, dasLimit, parent = False ):
562  if self.__predefined:
563  if parent:
564  extendstring = "secFiles.extend"
565  else:
566  extendstring = "readFiles.extend"
567  with open(self.__fileName) as f:
568  files = []
569  copy = False
570  for line in f.readlines():
571  if "]" in line:
572  copy = False
573  if copy:
574  files.append({name: line.translate(None, "', " + '"')})
575  if extendstring in line and "[" in line and "]" not in line:
576  copy = True
577  return files
578 
579  if parent:
580  searchdataset = self.parentDataset()
581  else:
582  searchdataset = self.__name
583  dasQuery_files = ( 'file dataset=%s instance=%s detail=true | grep file.name, file.nevents, '
584  'file.creation_time, '
585  'file.modification_time'%( searchdataset, self.__dasinstance ) )
586  print("Requesting file information for '%s' from DAS..."%( searchdataset ), end=' ')
587  sys.stdout.flush()
588  data = self.__getData( dasQuery_files, dasLimit )
589  print("Done.")
590  data = [ self.__findInJson(entry,"file") for entry in data ]
591  if len( data ) == 0:
592  msg = ("No files are available for the dataset '%s'. This can be "
593  "due to a typo or due to a DAS problem. Please check the "
594  "spelling of the dataset and/or retry to run "
595  "'validateAlignments.py'."%( self.name() ))
596  raise AllInOneError( msg )
597  fileInformationList = []
598  for file in data:
599  fileName = 'unknown'
600  try:
601  fileName = self.__findInJson(file, "name")
602  fileCreationTime = self.__findInJson(file, "creation_time")
603  fileNEvents = self.__findInJson(file, "nevents")
604  except KeyError:
605  print(("DAS query gives bad output for file '%s'. Skipping it.\n"
606  "It may work if you try again later.") % fileName)
607  fileNEvents = 0
608  # select only non-empty files
609  if fileNEvents == 0:
610  continue
611  fileDict = { "name": fileName,
612  "creation_time": fileCreationTime,
613  "nevents": fileNEvents
614  }
615  fileInformationList.append( fileDict )
616  fileInformationList.sort( key=lambda info: self.__findInJson(info,"name") )
617  return fileInformationList
618 
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47

◆ __getMagneticField()

def dataset.Dataset.__getMagneticField (   self)
private

Definition at line 423 of file dataset.py.

References dataset.Dataset.__cmssw, dataset.Dataset.__cmsswrelease, dataset.Dataset.__dasinstance, dataset.Dataset.__dataType, dataset.Dataset.__filename, dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__inputMagneticField, dataset.Dataset.__name, dataset.Dataset.__predefined, print(), python.rootplot.root2matplotlib.replace(), and digitizers_cfi.strip.

Referenced by dataset.Dataset.magneticField().

423  def __getMagneticField( self ):
424  Bfieldlocation = os.path.join( self.__cmssw, "python", "Configuration", "StandardSequences" )
425  if not os.path.isdir(Bfieldlocation):
426  Bfieldlocation = os.path.join( self.__cmsswrelease, "python", "Configuration", "StandardSequences" )
427  Bfieldlist = [ f.replace("_cff.py",'') \
428  for f in os.listdir(Bfieldlocation) \
429  if f.startswith("MagneticField_") and f.endswith("_cff.py") ]
430  Bfieldlist.sort( key = lambda Bfield: -len(Bfield) ) #Put it in order of decreasing length, so that searching in the name gives the longer match
431 
432  if self.__inputMagneticField is not None:
433  if self.__inputMagneticField == 3.8:
434  return "MagneticField"
435  elif self.__inputMagneticField == 0:
436  return "MagneticField_0T"
437  else:
438  raise ValueError("Unknown input magnetic field {}".format(self.__inputMagneticField))
439 
440  if self.__predefined:
441  with open(self.__filename) as f:
442  datatype = None
443  Bfield = None
444  for line in f.readlines():
445  if line.startswith("#data type: "):
446  if datatype is not None:
447  raise AllInOneError(self.__filename + " has multiple 'data type' lines.")
448  datatype = line.replace("#data type: ", "").replace("\n","")
449  datatype = datatype.split("#")[0].strip()
450  if line.startswith("#magnetic field: "):
451  if Bfield is not None:
452  raise AllInOneError(self.__filename + " has multiple 'magnetic field' lines.")
453  Bfield = line.replace("#magnetic field: ", "").replace("\n","")
454  Bfield = Bfield.split("#")[0].strip()
455  if Bfield is not None:
456  Bfield = Bfield.split(",")[0]
457  if Bfield in Bfieldlist or Bfield == "unknown":
458  return Bfield
459  else:
460  print("Your dataset has magnetic field '%s', which does not exist in your CMSSW version!" % Bfield)
461  print("Using Bfield='unknown' - this will revert to the default")
462  return "unknown"
463  elif datatype == "data":
464  return "MagneticField" #this should be in the "#magnetic field" line, but for safety in case it got messed up
465  else:
466  return "unknown"
467 
468  if self.__dataType == "data":
469  return "MagneticField"
470 
471  #try to find the magnetic field from DAS
472  #it seems to be there for the newer (7X) MC samples, except cosmics
473  dasQuery_B = ('dataset dataset=%s instance=%s'%(self.__name, self.__dasinstance))
474  data = self.__getData( dasQuery_B )
475 
476  try:
477  Bfield = self.__findInJson(data, ["dataset", "mcm", "sequences", "magField"])
478  if Bfield in Bfieldlist:
479  return Bfield
480  elif Bfield == "38T" or Bfield == "38T_PostLS1":
481  return "MagneticField"
482  elif "MagneticField_" + Bfield in Bfieldlist:
483  return "MagneticField_" + Bfield
484  elif Bfield == "":
485  pass
486  else:
487  print("Your dataset has magnetic field '%s', which does not exist in your CMSSW version!" % Bfield)
488  print("Using Bfield='unknown' - this will revert to the default magnetic field")
489  return "unknown"
490  except KeyError:
491  pass
492 
493  for possibleB in Bfieldlist:
494  if (possibleB != "MagneticField"
495  and possibleB.replace("MagneticField_","") in self.__name.replace("TkAlCosmics0T", "")):
496  #final attempt - try to identify the dataset from the name
497  #all cosmics dataset names contain "TkAlCosmics0T"
498  if possibleB == "MagneticField_38T" or possibleB == "MagneticField_38T_PostLS1":
499  return "MagneticField"
500  return possibleB
501 
502  return "unknown"
503 
def replace(string, replacements)
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47

◆ __getMagneticFieldForRun()

def dataset.Dataset.__getMagneticFieldForRun (   self,
  run = -1,
  tolerance = 0.5 
)
private
For MC, this returns the same as the previous function.
   For data, it gets the magnetic field from the runs.  This is important for
   deciding which template to use for offlinevalidation

Definition at line 504 of file dataset.py.

References dataset.Dataset.__dasinstance, dataset.Dataset.__dataType, dataset.Dataset.__filename, dataset.Dataset.__findInJson(), dataset.Dataset.__firstusedrun, dataset.Dataset.__getData(), dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.__inputMagneticField, dataset.Dataset.__lastusedrun, dataset.Dataset.__magneticField, dataset.Dataset.__name, dataset.Dataset.__predefined, funct.abs(), dqmMemoryStats.float, print(), python.rootplot.root2matplotlib.replace(), submitPVValidationJobs.split(), and digitizers_cfi.strip.

Referenced by dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.dump_cff(), and dataset.Dataset.magneticFieldForRun().

504  def __getMagneticFieldForRun( self, run = -1, tolerance = 0.5 ):
505  """For MC, this returns the same as the previous function.
506  For data, it gets the magnetic field from the runs. This is important for
507  deciding which template to use for offlinevalidation
508  """
509  if self.__dataType == "mc" and self.__magneticField == "MagneticField":
510  return 3.8 #For 3.8T MC the default MagneticField is used
511  if self.__inputMagneticField is not None:
512  return self.__inputMagneticField
513  if "T" in self.__magneticField:
514  Bfield = self.__magneticField.split("T")[0].replace("MagneticField_","")
515  try:
516  return float(Bfield) / 10.0 #e.g. 38T and 38T_PostLS1 both return 3.8
517  except ValueError:
518  pass
519  if self.__predefined:
520  with open(self.__filename) as f:
521  Bfield = None
522  for line in f.readlines():
523  if line.startswith("#magnetic field: ") and "," in line:
524  if Bfield is not None:
525  raise AllInOneError(self.__filename + " has multiple 'magnetic field' lines.")
526  return float(line.replace("#magnetic field: ", "").split(",")[1].split("#")[0].strip())
527 
528  if run > 0:
529  dasQuery = ('run=%s instance=%s detail=true'%(run, self.__dasinstance)) #for data
530  data = self.__getData(dasQuery)
531  try:
532  return self.__findInJson(data, ["run","bfield"])
533  except KeyError:
534  return "unknown Can't get the magnetic field for run %s from DAS" % run
535 
536  #run < 0 - find B field for the first and last runs, and make sure they're compatible
537  # (to within tolerance)
538  #NOT FOOLPROOF! The magnetic field might go up and then down, or vice versa
539  if self.__firstusedrun is None or self.__lastusedrun is None:
540  return "unknown Can't get the exact magnetic field for the dataset until data has been retrieved from DAS."
541  firstrunB = self.__getMagneticFieldForRun(self.__firstusedrun)
542  lastrunB = self.__getMagneticFieldForRun(self.__lastusedrun)
543  try:
544  if abs(firstrunB - lastrunB) <= tolerance:
545  return .5*(firstrunB + lastrunB)
546  print(firstrunB, lastrunB, tolerance)
547  return ("unknown The beginning and end of your run range for %s\n"
548  "have different magnetic fields (%s, %s)!\n"
549  "Try limiting the run range using firstRun, lastRun, begin, end, or JSON,\n"
550  "or increasing the tolerance (in dataset.py) from %s.") % (self.__name, firstrunB, lastrunB, tolerance)
551  except TypeError:
552  try:
553  if "unknown" in firstrunB:
554  return firstrunB
555  else:
556  return lastrunB
557  except TypeError:
558  return lastrunB
559 
def replace(string, replacements)
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47
Abs< T >::type abs(const T &t)
Definition: Abs.h:22

◆ __getParentDataset()

def dataset.Dataset.__getParentDataset (   self)
private

Definition at line 413 of file dataset.py.

References dataset.Dataset.__dasinstance, dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__name, and str.

Referenced by dataset.Dataset.parentDataset().

413  def __getParentDataset( self ):
414  dasQuery = "parent dataset=" + self.__name + " instance="+self.__dasinstance
415  data = self.__getData( dasQuery )
416  try:
417  return self.__findInJson(data, ["parent", "name"])
418  except KeyError:
419  raise AllInOneError("Cannot find the parent of the dataset '" + self.__name + "'\n"
420  "Here is the DAS output:\n" + str(jsondict) +
421  "\nIt's possible that this was a server error. If so, it may work if you try again later")
422 
#define str(s)

◆ __getRunList()

def dataset.Dataset.__getRunList (   self)
private

Definition at line 620 of file dataset.py.

References dataset.Dataset.__dasinstance, dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__name, and print().

Referenced by dataset.Dataset.__lumiSelectionSnippet(), dataset.Dataset.convertTimeToRun(), and dataset.Dataset.runList().

620  def __getRunList( self ):
621  dasQuery_runs = ( 'run dataset=%s instance=%s | grep run.run_number,'
622  'run.creation_time'%( self.__name, self.__dasinstance ) )
623  print("Requesting run information for '%s' from DAS..."%( self.__name ), end=' ')
624  sys.stdout.flush()
625  data = self.__getData( dasQuery_runs )
626  print("Done.")
627  data = [ self.__findInJson(entry,"run") for entry in data ]
628  data.sort( key = lambda run: self.__findInJson(run, "run_number") )
629  return data
630 
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47

◆ __lumiSelectionSnippet()

def dataset.Dataset.__lumiSelectionSnippet (   self,
  jsonPath = None,
  firstRun = None,
  lastRun = None 
)
private

Definition at line 125 of file dataset.py.

References dataset.Dataset.__chunks(), dataset.Dataset.__findInJson(), dataset.Dataset.__firstusedrun, dataset.Dataset.__getRunList(), dataset.Dataset.__inputMagneticField, dataset.Dataset.__lastusedrun, dataset.Dataset.getForceRunRangeFunction(), dataset.int, join(), SiStripPI.max, SiStripPI.min, print(), python.rootplot.root2matplotlib.replace(), submitPVValidationJobs.split(), and str.

Referenced by dataset.Dataset.__createSnippet().

125  def __lumiSelectionSnippet( self, jsonPath = None, firstRun = None, lastRun = None ):
126  lumiSecExtend = ""
127  if firstRun or lastRun or jsonPath:
128  if not jsonPath:
129  selectedRunList = self.__getRunList()
130  if firstRun:
131  selectedRunList = [ run for run in selectedRunList \
132  if self.__findInJson(run, "run_number") >= firstRun ]
133  if lastRun:
134  selectedRunList = [ run for run in selectedRunList \
135  if self.__findInJson(run, "run_number") <= lastRun ]
136  lumiList = [ str( self.__findInJson(run, "run_number") ) + ":1-" \
137  + str( self.__findInJson(run, "run_number") ) + ":max" \
138  for run in selectedRunList ]
139  splitLumiList = list( self.__chunks( lumiList, 255 ) )
140  else:
141  theLumiList = None
142  try:
143  theLumiList = LumiList ( filename = jsonPath )
144  except ValueError:
145  pass
146 
147  if theLumiList is not None:
148  allRuns = theLumiList.getRuns()
149  runsToRemove = []
150  for run in allRuns:
151  if firstRun and int( run ) < firstRun:
152  runsToRemove.append( run )
153  if lastRun and int( run ) > lastRun:
154  runsToRemove.append( run )
155  theLumiList.removeRuns( runsToRemove )
156  splitLumiList = list( self.__chunks(
157  theLumiList.getCMSSWString().split(','), 255 ) )
158  if not (splitLumiList and splitLumiList[0] and splitLumiList[0][0]):
159  splitLumiList = None
160  else:
161  with open(jsonPath) as f:
162  jsoncontents = f.read()
163  if "process.source.lumisToProcess" in jsoncontents:
164  msg = "%s is not a json file, but it seems to be a CMSSW lumi selection cff snippet. Trying to use it" % jsonPath
165  if firstRun or lastRun:
166  msg += ("\n (after applying firstRun and/or lastRun)")
167  msg += ".\nPlease note that, depending on the format of this file, it may not work as expected."
168  msg += "\nCheck your config file to make sure that it worked properly."
169  print(msg)
170 
171  runlist = self.__getRunList()
172  if firstRun or lastRun:
173  self.__firstusedrun = -1
174  self.__lastusedrun = -1
175  jsoncontents = re.sub(r"\d+:(\d+|max)(-\d+:(\d+|max))?", self.getForceRunRangeFunction(firstRun, lastRun), jsoncontents)
176  jsoncontents = (jsoncontents.replace("'',\n","").replace("''\n","")
177  .replace('"",\n','').replace('""\n',''))
178  self.__firstusedrun = max(self.__firstusedrun, int(self.__findInJson(runlist[0],"run_number")))
179  self.__lastusedrun = min(self.__lastusedrun, int(self.__findInJson(runlist[-1],"run_number")))
180  if self.__lastusedrun < self.__firstusedrun:
181  jsoncontents = None
182  else:
183  self.__firstusedrun = int(self.__findInJson(runlist[0],"run_number"))
184  self.__lastusedrun = int(self.__findInJson(runlist[-1],"run_number"))
185  lumiSecExtend = jsoncontents
186  splitLumiList = None
187  else:
188  raise AllInOneError("%s is not a valid json file!" % jsonPath)
189 
190  if splitLumiList and splitLumiList[0] and splitLumiList[0][0]:
191  lumiSecStr = [ "',\n'".join( lumis ) \
192  for lumis in splitLumiList ]
193  lumiSecStr = [ "lumiSecs.extend( [\n'" + lumis + "'\n] )" \
194  for lumis in lumiSecStr ]
195  lumiSecExtend = "\n".join( lumiSecStr )
196  runlist = self.__getRunList()
197  self.__firstusedrun = max(int(splitLumiList[0][0].split(":")[0]), int(self.__findInJson(runlist[0],"run_number")))
198  self.__lastusedrun = min(int(splitLumiList[-1][-1].split(":")[0]), int(self.__findInJson(runlist[-1],"run_number")))
199  elif lumiSecExtend:
200  pass
201  else:
202  msg = "You are trying to run a validation without any runs! Check that:"
203  if firstRun or lastRun:
204  msg += "\n - firstRun/begin and lastRun/end are correct for this dataset, and there are runs in between containing data"
205  if jsonPath:
206  msg += "\n - your JSON file is correct for this dataset, and the runs contain data"
207  if (firstRun or lastRun) and jsonPath:
208  msg += "\n - firstRun/begin and lastRun/end are consistent with your JSON file"
209  raise AllInOneError(msg)
210 
211  else:
212  if self.__inputMagneticField is not None:
213  pass #never need self.__firstusedrun or self.__lastusedrun
214  else:
215  runlist = self.__getRunList()
216  self.__firstusedrun = int(self.__findInJson(self.__getRunList()[0],"run_number"))
217  self.__lastusedrun = int(self.__findInJson(self.__getRunList()[-1],"run_number"))
218 
219  return lumiSecExtend
220 
def replace(string, replacements)
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47
static std::string join(char **cmd)
Definition: RemoteFile.cc:19
#define str(s)

◆ buildListOfBadFiles()

def dataset.Dataset.buildListOfBadFiles (   self)
fills the list of bad files from the IntegrityCheck log.

When the integrity check file is not available,
files are considered as good.

Definition at line 278 of file dataset.py.

278  def buildListOfBadFiles(self):
279  '''fills the list of bad files from the IntegrityCheck log.
280 
281  When the integrity check file is not available,
282  files are considered as good.'''
283  mask = "IntegrityCheck"
284 
285  self.bad_files = {}
286  self.good_files = []
287 
288  file_mask = castortools.matchingFiles(self.castorDir, '^%s_.*\.txt$' % mask)
289  if file_mask:
290  # here to avoid circular dependency
291  from .edmIntegrityCheck import PublishToFileSystem
292  p = PublishToFileSystem(mask)
293  report = p.get(self.castorDir)
294  if report is not None and report:
295  self.maskExists = True
296  self.report = report
297  dup = report.get('ValidDuplicates',{})
298  for name, status in report['Files'].items():
299  # print name, status
300  if not status[0]:
301  self.bad_files[name] = 'MarkedBad'
302  elif name in dup:
303  self.bad_files[name] = 'ValidDup'
304  else:
305  self.good_files.append( name )
306  else:
307  raise IntegrityCheckError( "ERROR: IntegrityCheck log file IntegrityCheck_XXXXXXXXXX.txt not found" )
308 

◆ buildListOfFiles()

def dataset.Dataset.buildListOfFiles (   self,
  pattern = '.*root' 
)
fills list of files, taking all root files matching the pattern in the castor dir

Definition at line 274 of file dataset.py.

274  def buildListOfFiles(self, pattern='.*root'):
275  '''fills list of files, taking all root files matching the pattern in the castor dir'''
276  self.files = castortools.matchingFiles( self.castorDir, pattern )
277 

◆ convertTimeToRun()

def dataset.Dataset.convertTimeToRun (   self,
  begin = None,
  end = None,
  firstRun = None,
  lastRun = None,
  shortTuple = True 
)

Definition at line 645 of file dataset.py.

References dataset.Dataset.__dasinstance, dataset.Dataset.__dateString(), dataset.Dataset.__datetime(), dataset.Dataset.__find_ge(), dataset.Dataset.__find_lt(), dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__getRunList(), dataset.Dataset.__name, electrons_cff.bool, and dataset.int.

Referenced by dataset.Dataset.__createSnippet(), and dataset.Dataset.__dateString().

645  shortTuple = True ):
646  if ( begin and firstRun ) or ( end and lastRun ):
647  msg = ( "The Usage of "
648  + "'begin' & 'firstRun' " * int( bool( begin and
649  firstRun ) )
650  + "and " * int( bool( ( begin and firstRun ) and
651  ( end and lastRun ) ) )
652  + "'end' & 'lastRun' " * int( bool( end and lastRun ) )
653  + "is ambigous." )
654  raise AllInOneError( msg )
655 
656  if begin or end:
657  runList = [ self.__findInJson(run, "run_number") for run in self.__getRunList() ]
658 
659  if begin:
660  lastdate = begin
661  for delta in [ 1, 5, 10, 20, 30 ]: #try searching for about 2 months after begin
662  firstdate = lastdate
663  lastdate = self.__dateString(self.__datetime(firstdate) + datetime.timedelta(delta))
664  dasQuery_begin = "run date between[%s,%s] instance=%s" % (firstdate, lastdate, self.__dasinstance)
665  begindata = self.__getData(dasQuery_begin)
666  if len(begindata) > 0:
667  begindata.sort(key = lambda run: self.__findInJson(run, ["run", "run_number"]))
668  try:
669  runIndex = self.__find_ge( runList, self.__findInJson(begindata[0], ["run", "run_number"]))
670  except ValueError:
671  msg = ( "Your 'begin' is after the creation time of the last "
672  "run in the dataset\n'%s'"%( self.__name ) )
673  raise AllInOneError( msg )
674  firstRun = runList[runIndex]
675  begin = None
676  break
677 
678  if begin:
679  raise AllInOneError("No runs within a reasonable time interval after your 'begin'."
680  "Try using a 'begin' that has runs soon after it (within 2 months at most)")
681 
682  if end:
683  firstdate = end
684  for delta in [ 1, 5, 10, 20, 30 ]: #try searching for about 2 months before end
685  lastdate = firstdate
686  firstdate = self.__dateString(self.__datetime(lastdate) - datetime.timedelta(delta))
687  dasQuery_end = "run date between[%s,%s] instance=%s" % (firstdate, lastdate, self.__dasinstance)
688  enddata = self.__getData(dasQuery_end)
689  if len(enddata) > 0:
690  enddata.sort(key = lambda run: self.__findInJson(run, ["run", "run_number"]))
691  try:
692  runIndex = self.__find_lt( runList, self.__findInJson(enddata[-1], ["run", "run_number"]))
693  except ValueError:
694  msg = ( "Your 'end' is before the creation time of the first "
695  "run in the dataset\n'%s'"%( self.__name ) )
696  raise AllInOneError( msg )
697  lastRun = runList[runIndex]
698  end = None
699  break
700 
701  if end:
702  raise AllInOneError("No runs within a reasonable time interval before your 'end'."
703  "Try using an 'end' that has runs soon before it (within 2 months at most)")
704 
705  if shortTuple:
706  return firstRun, lastRun
707  else:
708  return begin, end, firstRun, lastRun
709 

◆ createdatasetfile_hippy()

def dataset.Dataset.createdatasetfile_hippy (   self,
  filename,
  filesperjob,
  firstrun,
  lastrun 
)

Definition at line 852 of file dataset.py.

References dataset.Dataset.__chunks(), dataset.Dataset.fileList(), and join().

852  def createdatasetfile_hippy(self, filename, filesperjob, firstrun, lastrun):
853  with open(filename, "w") as f:
854  for job in self.__chunks(self.fileList(firstRun=firstrun, lastRun=lastrun, forcerunselection=True), filesperjob):
855  f.write(",".join("'{}'".format(file) for file in job)+"\n")
856 
static std::string join(char **cmd)
Definition: RemoteFile.cc:19

◆ datasetSnippet()

def dataset.Dataset.datasetSnippet (   self,
  jsonPath = None,
  begin = None,
  end = None,
  firstRun = None,
  lastRun = None,
  crab = False,
  parent = False 
)

Definition at line 729 of file dataset.py.

References dataset.Dataset.__createSnippet(), dataset.Dataset.__filename, dataset.Dataset.__name, dataset.Dataset.__official, dataset.Dataset.__origName, dataset.Dataset.__predefined, dataset.Dataset.dump_cff(), and print().

Referenced by dataset.Dataset.parentDataset().

729  firstRun = None, lastRun = None, crab = False, parent = False ):
730  if not firstRun: firstRun = None
731  if not lastRun: lastRun = None
732  if not begin: begin = None
733  if not end: end = None
734  if self.__predefined and (jsonPath or begin or end or firstRun or lastRun):
735  msg = ( "The parameters 'JSON', 'begin', 'end', 'firstRun', and 'lastRun' "
736  "only work for official datasets, not predefined _cff.py files" )
737  raise AllInOneError( msg )
738  if self.__predefined and parent:
739  with open(self.__filename) as f:
740  if "secFiles.extend" not in f.read():
741  msg = ("The predefined dataset '%s' does not contain secondary files, "
742  "which your validation requires!") % self.__name
743  if self.__official:
744  self.__name = self.__origName
745  self.__predefined = False
746  print(msg)
747  print ("Retreiving the files from DAS. You will be asked if you want "
748  "to overwrite the old dataset.\n"
749  "It will still be compatible with validations that don't need secondary files.")
750  else:
751  raise AllInOneError(msg)
752 
753  if self.__predefined:
754  snippet = ("process.load(\"Alignment.OfflineValidation.%s_cff\")\n"
755  "process.maxEvents = cms.untracked.PSet(\n"
756  " input = cms.untracked.int32(int(.oO[nEvents]Oo. / .oO[parallelJobs]Oo.))\n"
757  ")\n"
758  "process.source.skipEvents=cms.untracked.uint32(int(.oO[nIndex]Oo.*.oO[nEvents]Oo./.oO[parallelJobs]Oo.))"
759  %(self.__name))
760  if not parent:
761  with open(self.__filename) as f:
762  if "secFiles.extend" in f.read():
763  snippet += "\nprocess.source.secondaryFileNames = cms.untracked.vstring()"
764  return snippet
765  theMap = { "process": "process.",
766  "tab": " " * len( "process." ),
767  "nEvents": ".oO[nEvents]Oo. / .oO[parallelJobs]Oo.",
768  "skipEventsString": "process.source.skipEvents=cms.untracked.uint32(int(.oO[nIndex]Oo.*.oO[nEvents]Oo./.oO[parallelJobs]Oo.))\n",
769  "importCms": "",
770  "header": ""
771  }
772  datasetSnippet = self.__createSnippet( jsonPath = jsonPath,
773  begin = begin,
774  end = end,
775  firstRun = firstRun,
776  lastRun = lastRun,
777  repMap = theMap,
778  crab = crab,
779  parent = parent )
780  if jsonPath == "" and begin == "" and end == "" and firstRun == "" and lastRun == "":
781  try:
782  self.dump_cff(parent = parent)
783  except AllInOneError as e:
784  print("Can't store the dataset as a cff:")
785  print(e)
786  print("This may be inconvenient in the future, but will not cause a problem for this validation.")
787  return datasetSnippet
788 
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47

◆ dataType()

def dataset.Dataset.dataType (   self)

Definition at line 710 of file dataset.py.

References dataset.Dataset.__dataType, and dataset.Dataset.__getDataType().

710  def dataType( self ):
711  if not self.__dataType:
712  self.__dataType = self.__getDataType()
713  return self.__dataType
714 

◆ dump_cff()

def dataset.Dataset.dump_cff (   self,
  outName = None,
  jsonPath = None,
  begin = None,
  end = None,
  firstRun = None,
  lastRun = None,
  parent = False 
)

Definition at line 791 of file dataset.py.

References dataset.Dataset.__cmssw, dataset.Dataset.__createSnippet(), dataset.Dataset.__dataType, dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.__magneticField, dataset.Dataset.__name, print(), python.rootplot.root2matplotlib.replace(), submitPVValidationJobs.split(), str, and digitizers_cfi.strip.

Referenced by dataset.Dataset.datasetSnippet().

791  end = None, firstRun = None, lastRun = None, parent = False ):
792  if outName == None:
793  outName = "Dataset" + self.__name.replace("/", "_")
794  packageName = os.path.join( "Alignment", "OfflineValidation" )
795  if not os.path.exists( os.path.join(
796  self.__cmssw, "src", packageName ) ):
797  msg = ("You try to store the predefined dataset'%s'.\n"
798  "For that you need to check out the package '%s' to your "
799  "private relase area in\n"%( outName, packageName )
800  + self.__cmssw )
801  raise AllInOneError( msg )
802  theMap = { "process": "",
803  "tab": "",
804  "nEvents": str( -1 ),
805  "skipEventsString": "",
806  "importCms": "import FWCore.ParameterSet.Config as cms\n",
807  "header": "#Do not delete or (unless you know what you're doing) change these comments\n"
808  "#%(name)s\n"
809  "#data type: %(dataType)s\n"
810  "#magnetic field: .oO[magneticField]Oo.\n" #put in magnetic field later
811  %{"name": self.__name, #need to create the snippet before getting the magnetic field
812  "dataType": self.__dataType} #so that we know the first and last runs
813  }
814  dataset_cff = self.__createSnippet( jsonPath = jsonPath,
815  begin = begin,
816  end = end,
817  firstRun = firstRun,
818  lastRun = lastRun,
819  repMap = theMap,
820  parent = parent)
821  magneticField = self.__magneticField
822  if magneticField == "MagneticField":
823  magneticField = "%s, %s #%s" % (magneticField,
824  str(self.__getMagneticFieldForRun()).replace("\n"," ").split("#")[0].strip(),
825  "Use MagneticField_cff.py; the number is for determining which track selection to use."
826  )
827  dataset_cff = dataset_cff.replace(".oO[magneticField]Oo.",magneticField)
828  filePath = os.path.join( self.__cmssw, "src", packageName,
829  "python", outName + "_cff.py" )
830  if os.path.exists( filePath ):
831  existMsg = "The predefined dataset '%s' already exists.\n"%( outName )
832  askString = "Do you want to overwrite it? [y/n]\n"
833  inputQuery = existMsg + askString
834  while True:
835  userInput = raw_input( inputQuery ).lower()
836  if userInput == "y":
837  break
838  elif userInput == "n":
839  return
840  else:
841  inputQuery = askString
842  print ( "The predefined dataset '%s' will be stored in the file\n"
843  %( outName )
844  + filePath +
845  "\nFor future use you have to do 'scram b'." )
846  print()
847  theFile = open( filePath, "w" )
848  theFile.write( dataset_cff )
849  theFile.close()
850  return
851 
def replace(string, replacements)
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47
#define str(s)

◆ extractFileSizes()

def dataset.Dataset.extractFileSizes (   self)
Get the file size for each file, from the eos ls -l command.

Definition at line 309 of file dataset.py.

References dataset.EOSDataset.castorDir, and dataset.Dataset.castorDir.

309  def extractFileSizes(self):
310  '''Get the file size for each file, from the eos ls -l command.'''
311  # EOS command does not work in tier3
312  lsout = castortools.runXRDCommand(self.castorDir,'dirlist')[0]
313  lsout = lsout.split('\n')
314  self.filesAndSizes = {}
315  for entry in lsout:
316  values = entry.split()
317  if( len(values) != 5):
318  continue
319  # using full abs path as a key.
320  file = '/'.join([self.lfnDir, values[4].split("/")[-1]])
321  size = values[1]
322  self.filesAndSizes[file] = size
323 
static std::string join(char **cmd)
Definition: RemoteFile.cc:19

◆ fileInfoList()

def dataset.Dataset.fileInfoList (   self,
  parent = False 
)

Definition at line 914 of file dataset.py.

References dataset.Dataset.__dasLimit, and dataset.Dataset.__getFileInfoList().

Referenced by dataset.Dataset.fileList().

914  def fileInfoList( self, parent = False ):
915  return self.__getFileInfoList( self.__dasLimit, parent )
916 

◆ fileList()

def dataset.Dataset.fileList (   self,
  parent = False,
  firstRun = None,
  lastRun = None,
  forcerunselection = False 
)

Definition at line 885 of file dataset.py.

References dataset.Dataset.__findInJson(), dataset.Dataset.fileInfoList(), dqmMemoryStats.float, dataset.Dataset.getrunnumberfromfilename(), and print().

Referenced by dataset.Dataset.__fileListSnippet(), and dataset.Dataset.createdatasetfile_hippy().

885  def fileList(self, parent=False, firstRun=None, lastRun=None, forcerunselection=False):
886  fileList = [ self.__findInJson(fileInfo,"name")
887  for fileInfo in self.fileInfoList(parent) ]
888 
889  if firstRun or lastRun:
890  if not firstRun: firstRun = -1
891  if not lastRun: lastRun = float('infinity')
892  unknownfilenames, reasons = [], set()
893  for filename in fileList[:]:
894  try:
895  if not firstRun <= self.getrunnumberfromfilename(filename) <= lastRun:
896  fileList.remove(filename)
897  except AllInOneError as e:
898  if forcerunselection: raise
899  unknownfilenames.append(e.message.split("\n")[1])
900  reasons .add (e.message.split("\n")[2])
901  if reasons:
902  if len(unknownfilenames) == len(fileList):
903  print("Could not figure out the run numbers of any of the filenames for the following reason(s):")
904  else:
905  print("Could not figure out the run numbers of the following filenames:")
906  for filename in unknownfilenames:
907  print(" "+filename)
908  print("for the following reason(s):")
909  for reason in reasons:
910  print(" "+reason)
911  print("Using the files anyway. The runs will be filtered at the CMSSW level.")
912  return fileList
913 
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47

◆ forcerunrange()

def dataset.Dataset.forcerunrange (   self,
  firstRun,
  lastRun,
  s 
)
s must be in the format run1:lum1-run2:lum2

Definition at line 326 of file dataset.py.

References dataset.Dataset.__firstusedrun, dataset.Dataset.__lastusedrun, dataset.int, and submitPVValidationJobs.split().

Referenced by dataset.Dataset.getForceRunRangeFunction().

326  def forcerunrange(self, firstRun, lastRun, s):
327  """s must be in the format run1:lum1-run2:lum2"""
328  s = s.group()
329  run1 = s.split("-")[0].split(":")[0]
330  lum1 = s.split("-")[0].split(":")[1]
331  try:
332  run2 = s.split("-")[1].split(":")[0]
333  lum2 = s.split("-")[1].split(":")[1]
334  except IndexError:
335  run2 = run1
336  lum2 = lum1
337  if int(run2) < firstRun or int(run1) > lastRun:
338  return ""
339  if int(run1) < firstRun or firstRun < 0:
340  run1 = firstRun
341  lum1 = 1
342  if int(run2) > lastRun:
343  run2 = lastRun
344  lum2 = "max"
345  if int(run1) < self.__firstusedrun or self.__firstusedrun < 0:
346  self.__firstusedrun = int(run1)
347  if int(run2) > self.__lastusedrun:
348  self.__lastusedrun = int(run2)
349  return "%s:%s-%s:%s" % (run1, lum1, run2, lum2)
350 

◆ getfiles()

def dataset.Dataset.getfiles (   self,
  usecache 
)

Definition at line 211 of file dataset.py.

References dataset.Dataset.dasinstance, dataset.dasquery(), dataset.Dataset.datasetname, dataset.Dataset.filenamebase, dataset.findinjson(), dataset.int, and print().

211  def getfiles(self, usecache):
212  filename = os.path.join(os.environ["CMSSW_BASE"], "src", "Alignment", "CommonAlignment", "data", self.filenamebase+".csv")
213  if not usecache:
214  try:
215  os.remove(filename)
216  except OSError as e:
217  if os.path.exists(filename):
218  raise
219 
220  result = []
221  try:
222  with open(filename) as f:
223  for row in csv.DictReader(f):
224  result.append(DataFile(**row))
225  return result
226  except IOError:
227  pass
228 
229  query = "file dataset={} instance={} detail=true | grep file.name, file.nevents".format(self.datasetname, self.dasinstance)
230  dasoutput = dasquery(query)
231  if not dasoutput:
232  raise DatasetError("No files are available for the dataset '{}'. This can be "
233  "due to a typo or due to a DAS problem. Please check the "
234  "spelling of the dataset and/or try again.".format(datasetname))
235  result = [DataFile(findinjson(_, "file", "name"), findinjson(_, "file", "nevents")) for _ in dasoutput if int(findinjson(_, "file", "nevents"))]
236  try:
237  with open(filename, "w") as f:
238  writer = csv.DictWriter(f, ("filename", "nevents", "runs"))
239  writer.writeheader()
240  for datafile in result:
241  writer.writerow(datafile.getdict())
242  except Exception as e:
243  print("Couldn't write the dataset csv file:\n\n{}".format(e))
244  return result
245 
def dasquery(dasQuery, dasLimit=0)
Definition: dataset.py:27
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47
def findinjson(jsondict, strings)
Definition: dataset.py:95

◆ getForceRunRangeFunction()

def dataset.Dataset.getForceRunRangeFunction (   self,
  firstRun,
  lastRun 
)

Definition at line 351 of file dataset.py.

References dataset.Dataset.forcerunrange().

Referenced by dataset.Dataset.__lumiSelectionSnippet().

351  def getForceRunRangeFunction(self, firstRun, lastRun):
352  def forcerunrangefunction(s):
353  return self.forcerunrange(firstRun, lastRun, s)
354  return forcerunrangefunction
355 

◆ getPrimaryDatasetEntries()

def dataset.Dataset.getPrimaryDatasetEntries (   self)

Definition at line 329 of file dataset.py.

References dataset.int, runall.testit.report, WorkFlowRunner.WorkFlowRunner.report, ALIUtils.report, and dataset.BaseDataset.report.

329  def getPrimaryDatasetEntries(self):
330  if self.report is not None and self.report:
331  return int(self.report.get('PrimaryDatasetEntries',-1))
332  return -1
333 
334 

◆ getrunnumberfromfilename()

def dataset.Dataset.getrunnumberfromfilename (   filename)
static

Definition at line 858 of file dataset.py.

References python.cmstools.all(), dataset.int, and join().

Referenced by dataset.Dataset.fileList().

858  def getrunnumberfromfilename(filename):
859  parts = filename.split("/")
860  result = error = None
861  if parts[0] != "" or parts[1] != "store":
862  error = "does not start with /store"
863  elif parts[2] in ["mc", "relval"]:
864  result = 1
865  elif not parts[-1].endswith(".root"):
866  error = "does not end with something.root"
867  elif len(parts) != 12:
868  error = "should be exactly 11 slashes counting the first one"
869  else:
870  runnumberparts = parts[-5:-2]
871  if not all(len(part)==3 for part in runnumberparts):
872  error = "the 3 directories {} do not have length 3 each".format("/".join(runnumberparts))
873  try:
874  result = int("".join(runnumberparts))
875  except ValueError:
876  error = "the 3 directories {} do not form an integer".format("/".join(runnumberparts))
877 
878  if error:
879  error = "could not figure out which run number this file is from:\n{}\n{}".format(filename, error)
880  raise AllInOneError(error)
881 
882  return result
883 
def all(container)
workaround iterator generators for ROOT classes
Definition: cmstools.py:25
static std::string join(char **cmd)
Definition: RemoteFile.cc:19

◆ headercomment()

def dataset.Dataset.headercomment (   self)

Definition at line 247 of file dataset.py.

References dataset.Dataset.datasetname.

247  def headercomment(self):
248  return self.datasetname
249 

◆ magneticField()

def dataset.Dataset.magneticField (   self)

Definition at line 715 of file dataset.py.

References dataset.Dataset.__getMagneticField(), and dataset.Dataset.__magneticField.

715  def magneticField( self ):
716  if not self.__magneticField:
717  self.__magneticField = self.__getMagneticField()
718  return self.__magneticField
719 

◆ magneticFieldForRun()

def dataset.Dataset.magneticFieldForRun (   self,
  run = -1 
)

Definition at line 720 of file dataset.py.

References dataset.Dataset.__getMagneticFieldForRun().

720  def magneticFieldForRun( self, run = -1 ):
721  return self.__getMagneticFieldForRun(run)
722 

◆ name()

def dataset.Dataset.name (   self)

Definition at line 917 of file dataset.py.

References dataset.Dataset.__name.

Referenced by config.CFG.__str__(), validation.Sample.digest(), and VIDSelectorBase.VIDSelectorBase.initialize().

917  def name( self ):
918  return self.__name
919 

◆ parentDataset()

def dataset.Dataset.parentDataset (   self)

Definition at line 723 of file dataset.py.

References dataset.Dataset.__getParentDataset(), dataset.Dataset.__parentDataset, and dataset.Dataset.datasetSnippet().

Referenced by dataset.Dataset.__getFileInfoList().

723  def parentDataset( self ):
724  if not self.__parentDataset:
725  self.__parentDataset = self.__getParentDataset()
726  return self.__parentDataset
727 

◆ predefined()

def dataset.Dataset.predefined (   self)

Definition at line 920 of file dataset.py.

References dataset.Dataset.__predefined.

920  def predefined( self ):
921  return self.__predefined
922 

◆ printInfo()

def dataset.Dataset.printInfo (   self)

Definition at line 324 of file dataset.py.

References dataset.EOSDataset.castorDir, dataset.Dataset.castorDir, dataset.Dataset.lfnDir, ElectronMVAID.ElectronMVAID.name, HcalOfflineHarvesting.name, HcalOnlineHarvesting.name, HFRaddamTask.name, LaserTask.name, NoCQTask.name, PedestalTask.name, QIE10Task.name, QIE11Task.name, RecHitTask.name, UMNioTask.name, ZDCTask.name, AlignableObjectId::entry.name, RawTask.name, counter.Counter.name, average.Average.name, TPTask.name, DigiTask.name, histograms.Histograms.name, LEDTask.name, cond::persistency::TAG::NAME.name, cond::persistency::RUN_INFO::RUN_NUMBER.name, TmModule.name, cond::persistency::GTEditorData.name, cond::persistency::GLOBAL_TAG::NAME.name, cond::persistency::TAG::TIME_TYPE.name, cond::persistency::RUN_INFO::START_TIME.name, cond::persistency::GLOBAL_TAG::VALIDITY.name, cond::persistency::RUN_INFO::END_TIME.name, cond::persistency::TAG::OBJECT_TYPE.name, core.autovars.NTupleVariable.name, cond::persistency::GLOBAL_TAG::DESCRIPTION.name, DQMRivetClient::NormOption.name, cond::persistency::TAG::SYNCHRONIZATION.name, cond::persistency::GLOBAL_TAG::RELEASE.name, cond::persistency::TAG::END_OF_VALIDITY.name, MEPSet.name, cond::persistency::GLOBAL_TAG::SNAPSHOT_TIME.name, cond::persistency::O2O_RUN::JOB_NAME.name, cond::persistency::GLOBAL_TAG::INSERTION_TIME.name, cond::persistency::TAG::DESCRIPTION.name, cms::dd::NameValuePair< T >.name, cond::persistency::O2O_RUN::START_TIME.name, cond::persistency::TAG::LAST_VALIDATED_TIME.name, FWTGeoRecoGeometry::Info.name, cond::persistency::O2O_RUN::END_TIME.name, cond::persistency::TAG::INSERTION_TIME.name, cond::persistency::O2O_RUN::STATUS_CODE.name, cond::persistency::TAG::MODIFICATION_TIME.name, cond::persistency::O2O_RUN::LOG.name, nanoaod::MergeableCounterTable::SingleColumn< T >.name, ParameterSet.name, cond::persistency::TAG::PROTECTION_CODE.name, preexistingValidation.PreexistingValidation.name, OutputMEPSet.name, MEPSetData.name, AlignmentConstraint.name, PixelDCSObject< T >::Item.name, dataset.BaseDataset.name, cms::dd::ValuePair< T, U >.name, personalPlayback.Applet.name, Types._Untracked.name, MagCylinder.name, analyzer.Analyzer.name, DQMRivetClient::LumiOption.name, heppy::ParSet.name, cond::persistency::GTProxyData.name, SingleObjectCondition.name, edm::PathTimingSummary.name, DQMRivetClient::ScaleFactorOption.name, cms::DDAlgoArguments.name, EgHLTOfflineSummaryClient::SumHistBinData.name, Barrel.name, cond::TimeTypeSpecs.name, core.autovars.NTupleObjectType.name, perftools::EdmEventSize::BranchRecord.name, edm::PathSummary.name, EcalLogicID.name, alignment.Alignment.name, lumi::TriggerInfo.name, PixelEndcapLinkMaker::Item.name, XMLProcessor::_loaderBaseConfig.name, FWTableViewManager::TableEntry.name, MEtoEDM< T >::MEtoEDMObject.name, PixelBarrelLinkMaker::Item.name, ExpressionHisto< T >.name, DQMGenericClient::EfficOption.name, TreeCrawler.Package.name, Supermodule.name, cond::persistency::GLOBAL_TAG_MAP::GLOBAL_TAG_NAME.name, genericValidation.GenericValidation.name, cond::persistency::GLOBAL_TAG_MAP::RECORD.name, options.ConnectionHLTMenu.name, cond::persistency::GLOBAL_TAG_MAP::LABEL.name, cms::DDParsingContext::CompositeMaterial.name, cond::persistency::GLOBAL_TAG_MAP::TAG_NAME.name, cond::Tag_t.name, dqmoffline::l1t::HistDefinition.name, DQMGenericClient::ProfileOption.name, magneticfield::BaseVolumeHandle.name, nanoaod::MergeableCounterTable::VectorColumn< T >.name, FastHFShowerLibrary.name, emtf::Node.name, h4DSegm.name, DQMGenericClient::NormOption.name, core.TriggerMatchAnalyzer.TriggerMatchAnalyzer.name, DQMGenericClient::CDOption.name, CounterChecker.name, PhysicsTools::Calibration::Variable.name, cond::TagInfo_t.name, TrackerSectorStruct.name, MuonGeometrySanityCheckPoint.name, FCDTask.name, cond::persistency::PAYLOAD::HASH.name, DQMGenericClient::NoFlowOption.name, Mapper::definition< ScannerT >.name, cond::persistency::PAYLOAD::OBJECT_TYPE.name, looper.Looper.name, cond::persistency::PAYLOAD::DATA.name, EDMtoMEConverter.name, cond::persistency::PAYLOAD::STREAMER_INFO.name, cond::persistency::PAYLOAD::VERSION.name, cond::persistency::PAYLOAD::INSERTION_TIME.name, classes.MonitorData.name, HistogramManager.name, classes.OutputData.name, BPHDecayToResResBuilderBase::DZSelect.name, Crystal.name, h2DSegm.name, options.HLTProcessOptions.name, cond::persistency::IOV::TAG_NAME.name, cond::persistency::IOV::SINCE.name, cond::persistency::IOV::PAYLOAD_HASH.name, cond::persistency::IOV::INSERTION_TIME.name, DQMNet::WaitObject.name, core.TriggerBitAnalyzer.TriggerBitAnalyzer.name, AlpgenParameterName.name, config.Analyzer.name, geometry.Structure.name, core.autovars.NTupleSubObject.name, Capsule.name, core.autovars.NTupleObject.name, Ceramic.name, SiStripMonitorDigi.name, BulkSilicon.name, config.Service.name, APD.name, core.autovars.NTupleCollection.name, nanoaod::FlatTable::Column.name, BPHRecoBuilder::BPHRecoSource.name, BPHRecoBuilder::BPHCompSource.name, StraightTrackAlignment::RPSetPlots.name, cond::persistency::TAG_AUTHORIZATION::TAG_NAME.name, cond::persistency::TAG_AUTHORIZATION::ACCESS_TYPE.name, cond::persistency::TAG_AUTHORIZATION::CREDENTIAL.name, cond::persistency::TAG_AUTHORIZATION::CREDENTIAL_TYPE.name, InnerLayerVolume.name, cond::payloadInspector::TagReference.name, cond::persistency::TAG_LOG::TAG_NAME.name, cond::persistency::TAG_LOG::EVENT_TIME.name, cond::persistency::TAG_LOG::USER_NAME.name, cond::persistency::TAG_LOG::HOST_NAME.name, cond::persistency::TAG_LOG::COMMAND.name, cond::persistency::TAG_LOG::ACTION.name, cond::persistency::TAG_LOG::USER_TEXT.name, personalPlayback.FrameworkJob.name, Grid.name, trklet::TrackletConfigBuilder::DTCinfo.name, Grille.name, BackPipe.name, plotscripts.SawTeethFunction.name, PatchPanel.name, BackCoolTank.name, DryAirTube.name, crabFunctions.CrabTask.name, MBCoolTube.name, MBManif.name, cscdqm::ParHistoDef.name, hTMaxCell.name, BeautifulSoup.Tag.name, SummaryOutputProducer::GenericSummary.name, BeautifulSoup.SoupStrainer.name, and print().

324  def printInfo(self):
325  print('sample : ' + self.name)
326  print('LFN : ' + self.lfnDir)
327  print('Castor path : ' + self.castorDir)
328 
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47
def printInfo(self, event)

◆ runList()

def dataset.Dataset.runList (   self)

Definition at line 924 of file dataset.py.

References dataset.Dataset.__getRunList(), and print().

924  def runList( self ):
925  return self.__getRunList()
926 
927 

Member Data Documentation

◆ __cmssw

dataset.Dataset.__cmssw
private

Definition at line 28 of file dataset.py.

Referenced by dataset.Dataset.__getMagneticField(), and dataset.Dataset.dump_cff().

◆ __cmsswrelease

dataset.Dataset.__cmsswrelease
private

Definition at line 29 of file dataset.py.

Referenced by dataset.Dataset.__getMagneticField().

◆ __dasinstance

dataset.Dataset.__dasinstance
private

◆ __dasLimit

dataset.Dataset.__dasLimit
private

Definition at line 26 of file dataset.py.

Referenced by dataset.Dataset.fileInfoList().

◆ __dataType

dataset.Dataset.__dataType
private

◆ __dummy_source_template

tuple dataset.Dataset.__dummy_source_template
staticprivate
Initial value:
= ("readFiles = cms.untracked.vstring()\n"
"secFiles = cms.untracked.vstring()\n"
"%(process)ssource = cms.Source(\"PoolSource\",\n"
"%(tab)s secondaryFileNames ="
"secFiles,\n"
"%(tab)s fileNames = readFiles\n"
")\n"
"readFiles.extend(['dummy_File.root'])\n"
"%(process)smaxEvents = cms.untracked.PSet( "
"input = cms.untracked.int32(int(%(nEvents)s)) )\n"
"%(skipEventsString)s\n")

Definition at line 113 of file dataset.py.

Referenced by dataset.Dataset.__createSnippet().

◆ __filename

dataset.Dataset.__filename
private

◆ __firstusedrun

dataset.Dataset.__firstusedrun
private

◆ __inputMagneticField

dataset.Dataset.__inputMagneticField
private

◆ __lastusedrun

dataset.Dataset.__lastusedrun
private

◆ __magneticField

dataset.Dataset.__magneticField
private

◆ __name

dataset.Dataset.__name
private

◆ __official

dataset.Dataset.__official
private

Definition at line 36 of file dataset.py.

Referenced by dataset.Dataset.datasetSnippet().

◆ __origName

dataset.Dataset.__origName
private

Definition at line 25 of file dataset.py.

Referenced by dataset.Dataset.datasetSnippet().

◆ __parentDataset

dataset.Dataset.__parentDataset
private

Definition at line 32 of file dataset.py.

Referenced by dataset.Dataset.parentDataset().

◆ __predefined

dataset.Dataset.__predefined
private

◆ __source_template

dataset.Dataset.__source_template
staticprivate

Definition at line 95 of file dataset.py.

Referenced by dataset.Dataset.__createSnippet().

◆ bad_files

dataset.Dataset.bad_files

Definition at line 285 of file dataset.py.

◆ castorDir

dataset.Dataset.castorDir

Definition at line 269 of file dataset.py.

Referenced by dataset.Dataset.extractFileSizes(), and dataset.Dataset.printInfo().

◆ dasinstance

dataset.Dataset.dasinstance

Definition at line 208 of file dataset.py.

Referenced by dataset.Dataset.getfiles().

◆ datasetname

dataset.Dataset.datasetname

Definition at line 200 of file dataset.py.

Referenced by dataset.Dataset.getfiles(), and dataset.Dataset.headercomment().

◆ filenamebase

dataset.Dataset.filenamebase

Definition at line 203 of file dataset.py.

Referenced by dataset.Dataset.getfiles().

◆ files

dataset.Dataset.files

Definition at line 276 of file dataset.py.

◆ filesAndSizes

dataset.Dataset.filesAndSizes

Definition at line 314 of file dataset.py.

◆ good_files

dataset.Dataset.good_files

Definition at line 286 of file dataset.py.

◆ lfnDir

dataset.Dataset.lfnDir

Definition at line 268 of file dataset.py.

Referenced by dataset.Dataset.printInfo().

◆ maskExists

dataset.Dataset.maskExists

Definition at line 270 of file dataset.py.

◆ official

dataset.Dataset.official

Definition at line 202 of file dataset.py.

◆ report

dataset.Dataset.report

Definition at line 271 of file dataset.py.

Referenced by addOnTests.testit.run().