CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
List of all members | Public Member Functions | Static Public Member Functions | Public Attributes | Private Member Functions | Private Attributes | Static Private Attributes
dataset.Dataset Class Reference
Inheritance diagram for dataset.Dataset:
dataset.BaseDataset dataset.DatasetBase

Public Member Functions

def __init__
 
def __init__
 
def __init__
 
def buildListOfBadFiles
 
def buildListOfFiles
 
def convertTimeToRun
 
def createdatasetfile_hippy
 
def datasetSnippet
 
def dataType
 
def dump_cff
 
def extractFileSizes
 
def fileInfoList
 
def fileList
 
def forcerunrange
 
def getfiles
 
def getForceRunRangeFunction
 
def getPrimaryDatasetEntries
 
def headercomment
 
def magneticField
 
def magneticFieldForRun
 
def name
 
def parentDataset
 
def predefined
 
def printInfo
 
def runList
 
- Public Member Functions inherited from dataset.BaseDataset
def __init__
 def init(self, name, user, pattern='. More...
 
def buildListOfBadFiles
 
def buildListOfFiles
 
def extractFileSizes
 
def getPrimaryDatasetEntries
 
def listOfFiles
 
def listOfGoodFiles
 
def listOfGoodFilesWithPrescale
 
def printFiles
 
def printInfo
 
- Public Member Functions inherited from dataset.DatasetBase
def getfiles
 
def headercomment
 
def writefilelist_hippy
 
def writefilelist_validation
 

Static Public Member Functions

def getrunnumberfromfilename
 

Public Attributes

 bad_files
 
 castorDir
 
 dasinstance
 
 datasetname
 
 filenamebase
 
 files
 
 filesAndSizes
 
 good_files
 
 lfnDir
 
 maskExists
 
 official
 
 report
 
- Public Attributes inherited from dataset.BaseDataset
 bad_files
 
 dbsInstance
 MM. More...
 
 files
 
 filesAndSizes
 
 good_files
 
 name
 
 pattern
 
 primaryDatasetEntries
 MM. More...
 
 report
 
 run_range
 
 user
 

Private Member Functions

def __chunks
 
def __createSnippet
 
def __dateString
 
def __datetime
 
def __fileListSnippet
 
def __find_ge
 
def __find_lt
 
def __findInJson
 
def __getData
 
def __getDataType
 
def __getFileInfoList
 
def __getMagneticField
 
def __getMagneticFieldForRun
 
def __getParentDataset
 
def __getRunList
 
def __lumiSelectionSnippet
 

Private Attributes

 __cmssw
 
 __cmsswrelease
 
 __dasinstance
 
 __dasLimit
 
 __dataType
 
 __filename
 
 __firstusedrun
 
 __inputMagneticField
 
 __lastusedrun
 
 __magneticField
 
 __name
 
 __official
 
 __origName
 
 __parentDataset
 
 __predefined
 

Static Private Attributes

tuple __dummy_source_template
 

Detailed Description

Definition at line 198 of file dataset.py.

Constructor & Destructor Documentation

def dataset.Dataset.__init__ (   self,
  datasetname,
  dasinstance = defaultdasinstance 
)

Definition at line 199 of file dataset.py.

Referenced by dataset.Dataset.__init__().

200  def __init__(self, datasetname, dasinstance=defaultdasinstance):
201  self.datasetname = datasetname
202  if re.match(r'/.+/.+/.+', datasetname):
203  self.official = True
204  self.filenamebase = "Dataset" + self.datasetname.replace("/","_")
205  else:
206  self.official = False
207  self.filenamebase = datasetname
209  self.dasinstance = dasinstance
def dataset.Dataset.__init__ (   self,
  datasetName,
  dasLimit = 0,
  tryPredefinedFirst = True,
  cmssw = os.environ["CMSSW_BASE"],
  cmsswrelease = os.environ["CMSSW_RELEASE_BASE"],
  magneticfield = None,
  dasinstance = None 
)

Definition at line 23 of file dataset.py.

23 
24  magneticfield = None, dasinstance = None):
25  self.__name = datasetName
26  self.__origName = datasetName
27  self.__dasLimit = dasLimit
28  self.__dasinstance = dasinstance
29  self.__cmssw = cmssw
30  self.__cmsswrelease = cmsswrelease
31  self.__firstusedrun = None
32  self.__lastusedrun = None
33  self.__parentDataset = None
34 
35  # check, if dataset name matches CMS dataset naming scheme
36  if re.match( r'/.+/.+/.+', self.__name ):
37  self.__official = True
38  fileName = "Dataset" + self.__name.replace("/","_") + "_cff.py"
39  else:
40  self.__official = False
41  fileName = self.__name + "_cff.py"
42 
43  searchPath1 = os.path.join( self.__cmssw, "python",
44  "Alignment", "OfflineValidation",
45  fileName )
46  searchPath2 = os.path.join( self.__cmssw, "src",
47  "Alignment", "OfflineValidation",
48  "python", fileName )
49  searchPath3 = os.path.join( self.__cmsswrelease,
50  "python", "Alignment",
51  "OfflineValidation", fileName )
52  if self.__official and not tryPredefinedFirst:
53  self.__predefined = False
54  elif os.path.exists( searchPath1 ):
55  self.__predefined = True
56  self.__filename = searchPath1
57  elif os.path.exists( searchPath2 ):
58  msg = ("The predefined dataset '%s' does exist in '%s', but "
59  "you need to run 'scram b' first."
60  %( self.__name, searchPath2 ))
61  if self.__official:
62  print(msg)
63  print("Getting the data from DAS again. To go faster next time, run scram b.")
64  else:
65  raise AllInOneError( msg )
66  elif os.path.exists( searchPath3 ):
67  self.__predefined = True
68  self.__filename = searchPath3
69  elif self.__official:
70  self.__predefined = False
71  else:
72  msg = ("The predefined dataset '%s' does not exist. Please "
73  "create it first or check for typos."%( self.__name ))
74  raise AllInOneError( msg )
75 
76  if self.__predefined and self.__official:
77  self.__name = "Dataset" + self.__name.replace("/","_")
78 
79  if magneticfield is not None:
80  try:
81  magneticfield = float(magneticfield)
82  except ValueError:
83  raise AllInOneError("Bad magneticfield {} which can't be converted to float".format(magneticfield))
84  self.__inputMagneticField = magneticfield
85 
86  self.__dataType = self.__getDataType()
88 
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47
def __getMagneticField
Definition: dataset.py:423
def __getDataType
Definition: dataset.py:388
def dataset.Dataset.__init__ (   self,
  name,
  user,
  pattern = '.*root' 
)

Definition at line 267 of file dataset.py.

References dataset.Dataset.__init__().

268  def __init__(self, name, user, pattern='.*root'):
269  self.lfnDir = castorBaseDir(user) + name
270  self.castorDir = castortools.lfnToCastor( self.lfnDir )
271  self.maskExists = False
272  self.report = None
273  super(Dataset, self).__init__(name, user, pattern)

Member Function Documentation

def dataset.Dataset.__chunks (   self,
  theList,
  n 
)
private
Yield successive n-sized chunks from theList.

Definition at line 89 of file dataset.py.

References sistrip::SpyUtilities.range().

Referenced by dataset.Dataset.__fileListSnippet(), dataset.Dataset.__lumiSelectionSnippet(), and dataset.Dataset.createdatasetfile_hippy().

89 
90  def __chunks( self, theList, n ):
91  """ Yield successive n-sized chunks from theList.
92  """
93  for i in range( 0, len( theList ), n ):
94  yield theList[i:i+n]
const uint16_t range(const Frame &aFrame)
def dataset.Dataset.__createSnippet (   self,
  jsonPath = None,
  begin = None,
  end = None,
  firstRun = None,
  lastRun = None,
  repMap = None,
  crab = False,
  parent = False 
)
private

Definition at line 245 of file dataset.py.

References dataset.Dataset.__dummy_source_template, dataset.Dataset.__fileListSnippet(), dataset.Dataset.__lumiSelectionSnippet(), and dataset.Dataset.convertTimeToRun().

Referenced by dataset.Dataset.__fileListSnippet(), dataset.Dataset.datasetSnippet(), and dataset.Dataset.dump_cff().

246  crab = False, parent = False ):
247 
248  if firstRun:
249  firstRun = int( firstRun )
250  if lastRun:
251  lastRun = int( lastRun )
252  if ( begin and firstRun ) or ( end and lastRun ):
253  msg = ( "The Usage of "
254  + "'begin' & 'firstRun' " * int( bool( begin and
255  firstRun ) )
256  + "and " * int( bool( ( begin and firstRun ) and
257  ( end and lastRun ) ) )
258  + "'end' & 'lastRun' " * int( bool( end and lastRun ) )
259  + "is ambigous." )
260  raise AllInOneError( msg )
261  if begin or end:
262  ( firstRun, lastRun ) = self.convertTimeToRun(
263  begin = begin, end = end, firstRun = firstRun,
264  lastRun = lastRun )
265  if ( firstRun and lastRun ) and ( firstRun > lastRun ):
266  msg = ( "The lower time/runrange limit ('begin'/'firstRun') "
267  "chosen is greater than the upper time/runrange limit "
268  "('end'/'lastRun').")
269  raise AllInOneError( msg )
270 
271  lumiSecExtend = self.__lumiSelectionSnippet(jsonPath=jsonPath, firstRun=firstRun, lastRun=lastRun)
272  lumiStr = goodLumiSecStr = ""
273  if lumiSecExtend:
274  goodLumiSecStr = "lumiSecs = cms.untracked.VLuminosityBlockRange()\n"
275  lumiStr = " lumisToProcess = lumiSecs,\n"
276 
277  files = self.__fileListSnippet(crab=crab, parent=parent, firstRun=firstRun, lastRun=lastRun, forcerunselection=False)
278 
279  theMap = repMap
280  theMap["files"] = files
281  theMap["json"] = jsonPath
282  theMap["lumiStr"] = lumiStr
283  theMap["goodLumiSecStr"] = goodLumiSecStr%( theMap )
284  theMap["lumiSecExtend"] = lumiSecExtend
285  if crab:
286  dataset_snippet = self.__dummy_source_template%( theMap )
287  else:
288  dataset_snippet = self.__source_template%( theMap )
289  return dataset_snippet
def __lumiSelectionSnippet
Definition: dataset.py:125
def __fileListSnippet
Definition: dataset.py:221
def convertTimeToRun
Definition: dataset.py:645
tuple __dummy_source_template
Definition: dataset.py:113
def dataset.Dataset.__dateString (   self,
  date 
)
private

Definition at line 640 of file dataset.py.

References dataset.Dataset.convertTimeToRun(), and str.

Referenced by dataset.Dataset.convertTimeToRun().

641  def __dateString(self, date):
642  return str(date.year) + str(date.month).zfill(2) + str(date.day).zfill(2)
def __dateString
Definition: dataset.py:640
#define str(s)
def dataset.Dataset.__datetime (   self,
  stringForDas 
)
private

Definition at line 631 of file dataset.py.

Referenced by dataset.Dataset.convertTimeToRun().

632  def __datetime(self, stringForDas):
633  if len(stringForDas) != 8:
634  raise AllInOneError(stringForDas + " is not a valid date string.\n"
635  + "DAS accepts dates in the form 'yyyymmdd'")
636  year = stringForDas[:4]
637  month = stringForDas[4:6]
638  day = stringForDas[6:8]
639  return datetime.date(int(year), int(month), int(day))
def dataset.Dataset.__fileListSnippet (   self,
  crab = False,
  parent = False,
  firstRun = None,
  lastRun = None,
  forcerunselection = False 
)
private

Definition at line 221 of file dataset.py.

References dataset.Dataset.__chunks(), dataset.Dataset.__createSnippet(), dataset.Dataset.__name, dataset.Dataset.fileList(), and join().

Referenced by dataset.Dataset.__createSnippet().

222  def __fileListSnippet(self, crab=False, parent=False, firstRun=None, lastRun=None, forcerunselection=False):
223  if crab:
224  files = ""
225  else:
226  splitFileList = list( self.__chunks( self.fileList(firstRun=firstRun, lastRun=lastRun, forcerunselection=forcerunselection), 255 ) )
227  if not splitFileList:
228  raise AllInOneError("No files found for dataset {}. Check the spelling, or maybe specify another das instance?".format(self.__name))
229  fileStr = [ "',\n'".join( files ) for files in splitFileList ]
230  fileStr = [ "readFiles.extend( [\n'" + files + "'\n] )" \
231  for files in fileStr ]
232  files = "\n".join( fileStr )
233 
234  if parent:
235  splitParentFileList = list( self.__chunks( self.fileList(parent=True, firstRun=firstRun, lastRun=lastRun, forcerunselection=forcerunselection), 255 ) )
236  parentFileStr = [ "',\n'".join( parentFiles ) for parentFiles in splitParentFileList ]
237  parentFileStr = [ "secFiles.extend( [\n'" + parentFiles + "'\n] )" \
238  for parentFiles in parentFileStr ]
239  parentFiles = "\n".join( parentFileStr )
240  files += "\n\n" + parentFiles
241 
242  return files
def __fileListSnippet
Definition: dataset.py:221
static std::string join(char **cmd)
Definition: RemoteFile.cc:19
def dataset.Dataset.__find_ge (   self,
  a,
  x 
)
private

Definition at line 297 of file dataset.py.

Referenced by dataset.Dataset.convertTimeToRun().

298  def __find_ge( self, a, x):
299  'Find leftmost item greater than or equal to x'
300  i = bisect.bisect_left( a, x )
301  if i != len( a ):
302  return i
303  raise ValueError
def dataset.Dataset.__find_lt (   self,
  a,
  x 
)
private

Definition at line 290 of file dataset.py.

Referenced by dataset.Dataset.convertTimeToRun().

291  def __find_lt( self, a, x ):
292  'Find rightmost value less than x'
293  i = bisect.bisect_left( a, x )
294  if i:
295  return i-1
296  raise ValueError
def dataset.Dataset.__findInJson (   self,
  jsondict,
  strings 
)
private

Definition at line 304 of file dataset.py.

References dataset.Dataset.__findInJson().

Referenced by dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__getDataType(), dataset.Dataset.__getFileInfoList(), dataset.Dataset.__getMagneticField(), dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.__getParentDataset(), dataset.Dataset.__getRunList(), dataset.Dataset.__lumiSelectionSnippet(), dataset.Dataset.convertTimeToRun(), and dataset.Dataset.fileList().

305  def __findInJson(self, jsondict, strings):
306  if isinstance(strings, str):
307  strings = [ strings ]
308 
309  if len(strings) == 0:
310  return jsondict
311  if isinstance(jsondict,dict):
312  if strings[0] in jsondict:
313  try:
314  return self.__findInJson(jsondict[strings[0]], strings[1:])
315  except KeyError:
316  pass
317  else:
318  for a in jsondict:
319  if strings[0] in a:
320  try:
321  return self.__findInJson(a[strings[0]], strings[1:])
322  except (TypeError, KeyError): #TypeError because a could be a string and contain strings[0]
323  pass
324  #if it's not found
325  raise KeyError("Can't find " + strings[0])
def __findInJson
Definition: dataset.py:304
def dataset.Dataset.__getData (   self,
  dasQuery,
  dasLimit = 0 
)
private

Definition at line 356 of file dataset.py.

References dataset.Dataset.__findInJson(), das_client.get_data(), and str.

Referenced by dataset.Dataset.__getDataType(), dataset.Dataset.__getFileInfoList(), dataset.Dataset.__getMagneticField(), dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.__getParentDataset(), dataset.Dataset.__getRunList(), and dataset.Dataset.convertTimeToRun().

357  def __getData( self, dasQuery, dasLimit = 0 ):
358  dasData = das_client.get_data(dasQuery, dasLimit)
359  if isinstance(dasData, str):
360  jsondict = json.loads( dasData )
361  else:
362  jsondict = dasData
363  # Check, if the DAS query fails
364  try:
365  error = self.__findInJson(jsondict,["data","error"])
366  except KeyError:
367  error = None
368  if error or self.__findInJson(jsondict,"status") != 'ok' or "data" not in jsondict:
369  try:
370  jsonstr = self.__findInJson(jsondict,"reason")
371  except KeyError:
372  jsonstr = str(jsondict)
373  if len(jsonstr) > 10000:
374  jsonfile = "das_query_output_%i.txt"
375  i = 0
376  while os.path.lexists(jsonfile % i):
377  i += 1
378  jsonfile = jsonfile % i
379  theFile = open( jsonfile, "w" )
380  theFile.write( jsonstr )
381  theFile.close()
382  msg = "The DAS query returned an error. The output is very long, and has been stored in:\n" + jsonfile
383  else:
384  msg = "The DAS query returned a error. Here is the output\n" + jsonstr
385  msg += "\nIt's possible that this was a server error. If so, it may work if you try again later"
386  raise AllInOneError(msg)
387  return self.__findInJson(jsondict,"data")
def __findInJson
Definition: dataset.py:304
def get_data
Definition: das_client.py:276
#define str(s)
def dataset.Dataset.__getDataType (   self)
private

Definition at line 388 of file dataset.py.

References dataset.Dataset.__dasinstance, dataset.Dataset.__filename, dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__name, dataset.Dataset.__predefined, ElectronMVAID.ElectronMVAID.name, AlignableObjectId::entry.name, average.Average.name, counter.Counter.name, histograms.Histograms.name, cond::persistency::GLOBAL_TAG::NAME.name, cond::persistency::RUN_INFO::RUN_NUMBER.name, cond::persistency::TAG::NAME.name, TmModule.name, cond::persistency::GTEditorData.name, cond::persistency::GLOBAL_TAG::VALIDITY.name, cond::persistency::RUN_INFO::START_TIME.name, cond::persistency::TAG::TIME_TYPE.name, cond::persistency::GLOBAL_TAG::DESCRIPTION.name, core.autovars.NTupleVariable.name, cond::persistency::RUN_INFO::END_TIME.name, cond::persistency::TAG::OBJECT_TYPE.name, cond::persistency::GLOBAL_TAG::RELEASE.name, cond::persistency::TAG::SYNCHRONIZATION.name, DQMRivetClient::NormOption.name, cond::persistency::GLOBAL_TAG::SNAPSHOT_TIME.name, cond::persistency::TAG::END_OF_VALIDITY.name, MEPSet.name, cond::persistency::O2O_RUN::JOB_NAME.name, cond::persistency::GLOBAL_TAG::INSERTION_TIME.name, cms::dd::NameValuePair< T >.name, cond::persistency::TAG::DESCRIPTION.name, cond::persistency::O2O_RUN::START_TIME.name, cond::persistency::TAG::LAST_VALIDATED_TIME.name, cond::persistency::O2O_RUN::END_TIME.name, cond::persistency::TAG::INSERTION_TIME.name, FWTGeoRecoGeometry::Info.name, cond::persistency::O2O_RUN::STATUS_CODE.name, cond::persistency::TAG::MODIFICATION_TIME.name, ParameterSet.name, cond::persistency::O2O_RUN::LOG.name, nanoaod::MergeableCounterTable::SingleColumn< T >.name, cond::persistency::TAG::PROTECTION_CODE.name, preexistingValidation.PreexistingValidation.name, OutputMEPSet.name, PixelDCSObject< class >::Item.name, dataset.BaseDataset.name, AlignmentConstraint.name, cms::dd::ValuePair< T, U >.name, personalPlayback.Applet.name, Types._Untracked.name, MagCylinder.name, analyzer.Analyzer.name, heppy::ParSet.name, DQMRivetClient::LumiOption.name, o2olib.O2OJob.name, cond::persistency::GTProxyData.name, SingleObjectCondition.name, EgHLTOfflineSummaryClient::SumHistBinData.name, edm::PathTimingSummary.name, DQMRivetClient::ScaleFactorOption.name, cms::DDAlgoArguments.name, Barrel.name, core.autovars.NTupleObjectType.name, cond::TimeTypeSpecs.name, perftools::EdmEventSize::BranchRecord.name, edm::PathSummary.name, EcalLogicID.name, alignment.Alignment.name, lumi::TriggerInfo.name, XMLProcessor::_loaderBaseConfig.name, PixelEndcapLinkMaker::Item.name, MEtoEDM< T >::MEtoEDMObject.name, FWTableViewManager::TableEntry.name, PixelBarrelLinkMaker::Item.name, DQMGenericClient::EfficOption.name, ExpressionHisto< T >.name, Supermodule.name, TreeCrawler.Package.name, genericValidation.GenericValidation.name, cond::persistency::GLOBAL_TAG_MAP::GLOBAL_TAG_NAME.name, options.ConnectionHLTMenu.name, cond::persistency::GLOBAL_TAG_MAP::RECORD.name, cond::persistency::GLOBAL_TAG_MAP::LABEL.name, cond::persistency::GLOBAL_TAG_MAP::TAG_NAME.name, cms::DDParsingContext::CompositeMaterial.name, cond::Tag_t.name, dqmoffline::l1t::HistDefinition.name, DQMGenericClient::ProfileOption.name, nanoaod::MergeableCounterTable::VectorColumn< T >.name, magneticfield::BaseVolumeHandle.name, FastHFShowerLibrary.name, emtf::Node.name, h4DSegm.name, DQMGenericClient::NormOption.name, CounterChecker.name, DQMGenericClient::CDOption.name, cond::TagInfo_t.name, PhysicsTools::Calibration::Variable.name, TrackerSectorStruct.name, MuonGeometrySanityCheckPoint.name, cond::persistency::PAYLOAD::HASH.name, DQMGenericClient::NoFlowOption.name, cond::persistency::PAYLOAD::OBJECT_TYPE.name, looper.Looper.name, Mapper::definition< ScannerT >.name, EDMtoMEConverter.name, cond::persistency::PAYLOAD::DATA.name, cond::persistency::PAYLOAD::STREAMER_INFO.name, cond::persistency::PAYLOAD::VERSION.name, cond::persistency::PAYLOAD::INSERTION_TIME.name, classes.MonitorData.name, HistogramManager.name, classes.OutputData.name, Crystal.name, h2DSegm.name, options.HLTProcessOptions.name, cond::persistency::IOV::TAG_NAME.name, cond::persistency::IOV::SINCE.name, cond::persistency::IOV::PAYLOAD_HASH.name, cond::persistency::IOV::INSERTION_TIME.name, DQMNet::WaitObject.name, AlpgenParameterName.name, config.Analyzer.name, geometry.Structure.name, core.autovars.NTupleSubObject.name, Capsule.name, core.autovars.NTupleObject.name, Ceramic.name, SiStripMonitorDigi.name, BulkSilicon.name, config.Service.name, APD.name, nanoaod::FlatTable::Column.name, core.autovars.NTupleCollection.name, BPHRecoBuilder::BPHRecoSource.name, BPHRecoBuilder::BPHCompSource.name, StraightTrackAlignment::RPSetPlots.name, cond::persistency::TAG_AUTHORIZATION::TAG_NAME.name, cond::persistency::TAG_AUTHORIZATION::ACCESS_TYPE.name, cond::persistency::TAG_AUTHORIZATION::CREDENTIAL.name, cond::persistency::TAG_AUTHORIZATION::CREDENTIAL_TYPE.name, InnerLayerVolume.name, cond::payloadInspector::TagReference.name, cond::persistency::TAG_LOG::TAG_NAME.name, cond::persistency::TAG_LOG::EVENT_TIME.name, cond::persistency::TAG_LOG::USER_NAME.name, cond::persistency::TAG_LOG::HOST_NAME.name, cond::persistency::TAG_LOG::COMMAND.name, cond::persistency::TAG_LOG::ACTION.name, cond::persistency::TAG_LOG::USER_TEXT.name, personalPlayback.FrameworkJob.name, Grid.name, Grille.name, BackPipe.name, plotscripts.SawTeethFunction.name, PatchPanel.name, BackCoolTank.name, DryAirTube.name, crabFunctions.CrabTask.name, MBCoolTube.name, MBManif.name, cscdqm::ParHistoDef.name, hTMaxCell.name, BeautifulSoup.Tag.name, SummaryOutputProducer::GenericSummary.name, BeautifulSoup.SoupStrainer.name, and python.rootplot.root2matplotlib.replace().

Referenced by dataset.Dataset.dataType().

389  def __getDataType( self ):
390  if self.__predefined:
391  with open(self.__filename) as f:
392  datatype = None
393  for line in f.readlines():
394  if line.startswith("#data type: "):
395  if datatype is not None:
396  raise AllInOneError(self.__filename + " has multiple 'data type' lines.")
397  datatype = line.replace("#data type: ", "").replace("\n","")
398  return datatype
399  return "unknown"
400 
401  dasQuery_type = ( 'dataset dataset=%s instance=%s detail=true | grep dataset.datatype,'
402  'dataset.name'%( self.__name, self.__dasinstance ) )
403  data = self.__getData( dasQuery_type )
404 
405  try:
406  return self.__findInJson(data, ["dataset", "datatype"])
407  except KeyError:
408  print ("Cannot find the datatype of the dataset '%s'\n"
409  "It may not be possible to automatically find the magnetic field,\n"
410  "and you will not be able run in CRAB mode"
411  %( self.name() ))
412  return "unknown"
def __findInJson
Definition: dataset.py:304
def __getDataType
Definition: dataset.py:388
def dataset.Dataset.__getFileInfoList (   self,
  dasLimit,
  parent = False 
)
private

Definition at line 561 of file dataset.py.

References dataset.Dataset.__dasinstance, dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__name, dataset.Dataset.__predefined, ElectronMVAID.ElectronMVAID.name, AlignableObjectId::entry.name, average.Average.name, counter.Counter.name, histograms.Histograms.name, cond::persistency::GLOBAL_TAG::NAME.name, cond::persistency::RUN_INFO::RUN_NUMBER.name, cond::persistency::TAG::NAME.name, TmModule.name, cond::persistency::GTEditorData.name, cond::persistency::GLOBAL_TAG::VALIDITY.name, cond::persistency::RUN_INFO::START_TIME.name, cond::persistency::TAG::TIME_TYPE.name, cond::persistency::GLOBAL_TAG::DESCRIPTION.name, core.autovars.NTupleVariable.name, cond::persistency::RUN_INFO::END_TIME.name, cond::persistency::TAG::OBJECT_TYPE.name, cond::persistency::GLOBAL_TAG::RELEASE.name, cond::persistency::TAG::SYNCHRONIZATION.name, DQMRivetClient::NormOption.name, cond::persistency::GLOBAL_TAG::SNAPSHOT_TIME.name, cond::persistency::TAG::END_OF_VALIDITY.name, MEPSet.name, cond::persistency::O2O_RUN::JOB_NAME.name, cond::persistency::GLOBAL_TAG::INSERTION_TIME.name, cms::dd::NameValuePair< T >.name, cond::persistency::TAG::DESCRIPTION.name, cond::persistency::O2O_RUN::START_TIME.name, cond::persistency::TAG::LAST_VALIDATED_TIME.name, cond::persistency::O2O_RUN::END_TIME.name, cond::persistency::TAG::INSERTION_TIME.name, FWTGeoRecoGeometry::Info.name, cond::persistency::O2O_RUN::STATUS_CODE.name, cond::persistency::TAG::MODIFICATION_TIME.name, ParameterSet.name, cond::persistency::O2O_RUN::LOG.name, nanoaod::MergeableCounterTable::SingleColumn< T >.name, cond::persistency::TAG::PROTECTION_CODE.name, preexistingValidation.PreexistingValidation.name, OutputMEPSet.name, AlignmentConstraint.name, PixelDCSObject< class >::Item.name, dataset.BaseDataset.name, cms::dd::ValuePair< T, U >.name, personalPlayback.Applet.name, Types._Untracked.name, MagCylinder.name, analyzer.Analyzer.name, heppy::ParSet.name, DQMRivetClient::LumiOption.name, o2olib.O2OJob.name, cond::persistency::GTProxyData.name, SingleObjectCondition.name, EgHLTOfflineSummaryClient::SumHistBinData.name, edm::PathTimingSummary.name, DQMRivetClient::ScaleFactorOption.name, cms::DDAlgoArguments.name, Barrel.name, core.autovars.NTupleObjectType.name, perftools::EdmEventSize::BranchRecord.name, cond::TimeTypeSpecs.name, edm::PathSummary.name, EcalLogicID.name, alignment.Alignment.name, lumi::TriggerInfo.name, XMLProcessor::_loaderBaseConfig.name, PixelEndcapLinkMaker::Item.name, MEtoEDM< T >::MEtoEDMObject.name, FWTableViewManager::TableEntry.name, PixelBarrelLinkMaker::Item.name, DQMGenericClient::EfficOption.name, ExpressionHisto< T >.name, Supermodule.name, TreeCrawler.Package.name, genericValidation.GenericValidation.name, cond::persistency::GLOBAL_TAG_MAP::GLOBAL_TAG_NAME.name, options.ConnectionHLTMenu.name, cond::persistency::GLOBAL_TAG_MAP::RECORD.name, cond::persistency::GLOBAL_TAG_MAP::LABEL.name, cond::persistency::GLOBAL_TAG_MAP::TAG_NAME.name, cms::DDParsingContext::CompositeMaterial.name, cond::Tag_t.name, dqmoffline::l1t::HistDefinition.name, DQMGenericClient::ProfileOption.name, nanoaod::MergeableCounterTable::VectorColumn< T >.name, magneticfield::BaseVolumeHandle.name, FastHFShowerLibrary.name, emtf::Node.name, h4DSegm.name, DQMGenericClient::NormOption.name, CounterChecker.name, DQMGenericClient::CDOption.name, PhysicsTools::Calibration::Variable.name, cond::TagInfo_t.name, TrackerSectorStruct.name, MuonGeometrySanityCheckPoint.name, cond::persistency::PAYLOAD::HASH.name, DQMGenericClient::NoFlowOption.name, cond::persistency::PAYLOAD::OBJECT_TYPE.name, looper.Looper.name, Mapper::definition< ScannerT >.name, EDMtoMEConverter.name, cond::persistency::PAYLOAD::DATA.name, cond::persistency::PAYLOAD::STREAMER_INFO.name, cond::persistency::PAYLOAD::VERSION.name, cond::persistency::PAYLOAD::INSERTION_TIME.name, classes.MonitorData.name, HistogramManager.name, classes.OutputData.name, Crystal.name, h2DSegm.name, options.HLTProcessOptions.name, cond::persistency::IOV::TAG_NAME.name, cond::persistency::IOV::SINCE.name, cond::persistency::IOV::PAYLOAD_HASH.name, cond::persistency::IOV::INSERTION_TIME.name, DQMNet::WaitObject.name, AlpgenParameterName.name, config.Analyzer.name, geometry.Structure.name, core.autovars.NTupleSubObject.name, Capsule.name, core.autovars.NTupleObject.name, Ceramic.name, SiStripMonitorDigi.name, config.Service.name, BulkSilicon.name, APD.name, nanoaod::FlatTable::Column.name, core.autovars.NTupleCollection.name, BPHRecoBuilder::BPHRecoSource.name, BPHRecoBuilder::BPHCompSource.name, StraightTrackAlignment::RPSetPlots.name, cond::persistency::TAG_AUTHORIZATION::TAG_NAME.name, cond::persistency::TAG_AUTHORIZATION::ACCESS_TYPE.name, cond::persistency::TAG_AUTHORIZATION::CREDENTIAL.name, cond::persistency::TAG_AUTHORIZATION::CREDENTIAL_TYPE.name, InnerLayerVolume.name, cond::payloadInspector::TagReference.name, cond::persistency::TAG_LOG::TAG_NAME.name, cond::persistency::TAG_LOG::EVENT_TIME.name, cond::persistency::TAG_LOG::USER_NAME.name, cond::persistency::TAG_LOG::HOST_NAME.name, cond::persistency::TAG_LOG::COMMAND.name, cond::persistency::TAG_LOG::ACTION.name, cond::persistency::TAG_LOG::USER_TEXT.name, personalPlayback.FrameworkJob.name, Grid.name, Grille.name, BackPipe.name, plotscripts.SawTeethFunction.name, PatchPanel.name, BackCoolTank.name, DryAirTube.name, crabFunctions.CrabTask.name, MBCoolTube.name, MBManif.name, cscdqm::ParHistoDef.name, hTMaxCell.name, BeautifulSoup.Tag.name, SummaryOutputProducer::GenericSummary.name, BeautifulSoup.SoupStrainer.name, dataset.Dataset.parentDataset(), and print().

Referenced by dataset.Dataset.fileInfoList().

562  def __getFileInfoList( self, dasLimit, parent = False ):
563  if self.__predefined:
564  if parent:
565  extendstring = "secFiles.extend"
566  else:
567  extendstring = "readFiles.extend"
568  with open(self.__fileName) as f:
569  files = []
570  copy = False
571  for line in f.readlines():
572  if "]" in line:
573  copy = False
574  if copy:
575  files.append({name: line.translate(None, "', " + '"')})
576  if extendstring in line and "[" in line and "]" not in line:
577  copy = True
578  return files
579 
580  if parent:
581  searchdataset = self.parentDataset()
582  else:
583  searchdataset = self.__name
584  dasQuery_files = ( 'file dataset=%s instance=%s detail=true | grep file.name, file.nevents, '
585  'file.creation_time, '
586  'file.modification_time'%( searchdataset, self.__dasinstance ) )
587  print("Requesting file information for '%s' from DAS..."%( searchdataset ), end=' ')
588  sys.stdout.flush()
589  data = self.__getData( dasQuery_files, dasLimit )
590  print("Done.")
591  data = [ self.__findInJson(entry,"file") for entry in data ]
592  if len( data ) == 0:
593  msg = ("No files are available for the dataset '%s'. This can be "
594  "due to a typo or due to a DAS problem. Please check the "
595  "spelling of the dataset and/or retry to run "
596  "'validateAlignments.py'."%( self.name() ))
597  raise AllInOneError( msg )
598  fileInformationList = []
599  for file in data:
600  fileName = 'unknown'
601  try:
602  fileName = self.__findInJson(file, "name")
603  fileCreationTime = self.__findInJson(file, "creation_time")
604  fileNEvents = self.__findInJson(file, "nevents")
605  except KeyError:
606  print(("DAS query gives bad output for file '%s'. Skipping it.\n"
607  "It may work if you try again later.") % fileName)
608  fileNEvents = 0
609  # select only non-empty files
610  if fileNEvents == 0:
611  continue
612  fileDict = { "name": fileName,
613  "creation_time": fileCreationTime,
614  "nevents": fileNEvents
615  }
616  fileInformationList.append( fileDict )
617  fileInformationList.sort( key=lambda info: self.__findInJson(info,"name") )
618  return fileInformationList
def __findInJson
Definition: dataset.py:304
def __getFileInfoList
Definition: dataset.py:561
def parentDataset
Definition: dataset.py:723
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47
def dataset.Dataset.__getMagneticField (   self)
private

Definition at line 423 of file dataset.py.

References dataset.Dataset.__cmssw, dataset.Dataset.__cmsswrelease, dataset.Dataset.__dasinstance, dataset.Dataset.__dataType, dataset.Dataset.__filename, dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__inputMagneticField, dataset.Dataset.__name, dataset.Dataset.__predefined, print(), python.rootplot.root2matplotlib.replace(), and digitizers_cfi.strip.

Referenced by dataset.Dataset.magneticField().

424  def __getMagneticField( self ):
425  Bfieldlocation = os.path.join( self.__cmssw, "python", "Configuration", "StandardSequences" )
426  if not os.path.isdir(Bfieldlocation):
427  Bfieldlocation = os.path.join( self.__cmsswrelease, "python", "Configuration", "StandardSequences" )
428  Bfieldlist = [ f.replace("_cff.py",'') \
429  for f in os.listdir(Bfieldlocation) \
430  if f.startswith("MagneticField_") and f.endswith("_cff.py") ]
431  Bfieldlist.sort( key = lambda Bfield: -len(Bfield) ) #Put it in order of decreasing length, so that searching in the name gives the longer match
432 
433  if self.__inputMagneticField is not None:
434  if self.__inputMagneticField == 3.8:
435  return "MagneticField"
436  elif self.__inputMagneticField == 0:
437  return "MagneticField_0T"
438  else:
439  raise ValueError("Unknown input magnetic field {}".format(self.__inputMagneticField))
440 
441  if self.__predefined:
442  with open(self.__filename) as f:
443  datatype = None
444  Bfield = None
445  for line in f.readlines():
446  if line.startswith("#data type: "):
447  if datatype is not None:
448  raise AllInOneError(self.__filename + " has multiple 'data type' lines.")
449  datatype = line.replace("#data type: ", "").replace("\n","")
450  datatype = datatype.split("#")[0].strip()
451  if line.startswith("#magnetic field: "):
452  if Bfield is not None:
453  raise AllInOneError(self.__filename + " has multiple 'magnetic field' lines.")
454  Bfield = line.replace("#magnetic field: ", "").replace("\n","")
455  Bfield = Bfield.split("#")[0].strip()
456  if Bfield is not None:
457  Bfield = Bfield.split(",")[0]
458  if Bfield in Bfieldlist or Bfield == "unknown":
459  return Bfield
460  else:
461  print("Your dataset has magnetic field '%s', which does not exist in your CMSSW version!" % Bfield)
462  print("Using Bfield='unknown' - this will revert to the default")
463  return "unknown"
464  elif datatype == "data":
465  return "MagneticField" #this should be in the "#magnetic field" line, but for safety in case it got messed up
466  else:
467  return "unknown"
468 
469  if self.__dataType == "data":
470  return "MagneticField"
471 
472  #try to find the magnetic field from DAS
473  #it seems to be there for the newer (7X) MC samples, except cosmics
474  dasQuery_B = ('dataset dataset=%s instance=%s'%(self.__name, self.__dasinstance))
475  data = self.__getData( dasQuery_B )
476 
477  try:
478  Bfield = self.__findInJson(data, ["dataset", "mcm", "sequences", "magField"])
479  if Bfield in Bfieldlist:
480  return Bfield
481  elif Bfield == "38T" or Bfield == "38T_PostLS1":
482  return "MagneticField"
483  elif "MagneticField_" + Bfield in Bfieldlist:
484  return "MagneticField_" + Bfield
485  elif Bfield == "":
486  pass
487  else:
488  print("Your dataset has magnetic field '%s', which does not exist in your CMSSW version!" % Bfield)
489  print("Using Bfield='unknown' - this will revert to the default magnetic field")
490  return "unknown"
491  except KeyError:
492  pass
493 
494  for possibleB in Bfieldlist:
495  if (possibleB != "MagneticField"
496  and possibleB.replace("MagneticField_","") in self.__name.replace("TkAlCosmics0T", "")):
497  #final attempt - try to identify the dataset from the name
498  #all cosmics dataset names contain "TkAlCosmics0T"
499  if possibleB == "MagneticField_38T" or possibleB == "MagneticField_38T_PostLS1":
500  return "MagneticField"
501  return possibleB
502 
503  return "unknown"
def __findInJson
Definition: dataset.py:304
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47
def __getMagneticField
Definition: dataset.py:423
def dataset.Dataset.__getMagneticFieldForRun (   self,
  run = -1,
  tolerance = 0.5 
)
private
For MC, this returns the same as the previous function.
   For data, it gets the magnetic field from the runs.  This is important for
   deciding which template to use for offlinevalidation

Definition at line 504 of file dataset.py.

References dataset.Dataset.__dasinstance, dataset.Dataset.__dataType, dataset.Dataset.__filename, dataset.Dataset.__findInJson(), dataset.Dataset.__firstusedrun, dataset.Dataset.__getData(), dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.__inputMagneticField, dataset.Dataset.__lastusedrun, dataset.Dataset.__magneticField, dataset.Dataset.__name, dataset.Dataset.__predefined, funct.abs(), print(), python.rootplot.root2matplotlib.replace(), submitPVValidationJobs.split(), and digitizers_cfi.strip.

Referenced by dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.dump_cff(), and dataset.Dataset.magneticFieldForRun().

505  def __getMagneticFieldForRun( self, run = -1, tolerance = 0.5 ):
506  """For MC, this returns the same as the previous function.
507  For data, it gets the magnetic field from the runs. This is important for
508  deciding which template to use for offlinevalidation
509  """
510  if self.__dataType == "mc" and self.__magneticField == "MagneticField":
511  return 3.8 #For 3.8T MC the default MagneticField is used
512  if self.__inputMagneticField is not None:
513  return self.__inputMagneticField
514  if "T" in self.__magneticField:
515  Bfield = self.__magneticField.split("T")[0].replace("MagneticField_","")
516  try:
517  return float(Bfield) / 10.0 #e.g. 38T and 38T_PostLS1 both return 3.8
518  except ValueError:
519  pass
520  if self.__predefined:
521  with open(self.__filename) as f:
522  Bfield = None
523  for line in f.readlines():
524  if line.startswith("#magnetic field: ") and "," in line:
525  if Bfield is not None:
526  raise AllInOneError(self.__filename + " has multiple 'magnetic field' lines.")
527  return float(line.replace("#magnetic field: ", "").split(",")[1].split("#")[0].strip())
528 
529  if run > 0:
530  dasQuery = ('run=%s instance=%s detail=true'%(run, self.__dasinstance)) #for data
531  data = self.__getData(dasQuery)
532  try:
533  return self.__findInJson(data, ["run","bfield"])
534  except KeyError:
535  return "unknown Can't get the magnetic field for run %s from DAS" % run
536 
537  #run < 0 - find B field for the first and last runs, and make sure they're compatible
538  # (to within tolerance)
539  #NOT FOOLPROOF! The magnetic field might go up and then down, or vice versa
540  if self.__firstusedrun is None or self.__lastusedrun is None:
541  return "unknown Can't get the exact magnetic field for the dataset until data has been retrieved from DAS."
542  firstrunB = self.__getMagneticFieldForRun(self.__firstusedrun)
543  lastrunB = self.__getMagneticFieldForRun(self.__lastusedrun)
544  try:
545  if abs(firstrunB - lastrunB) <= tolerance:
546  return .5*(firstrunB + lastrunB)
547  print(firstrunB, lastrunB, tolerance)
548  return ("unknown The beginning and end of your run range for %s\n"
549  "have different magnetic fields (%s, %s)!\n"
550  "Try limiting the run range using firstRun, lastRun, begin, end, or JSON,\n"
551  "or increasing the tolerance (in dataset.py) from %s.") % (self.__name, firstrunB, lastrunB, tolerance)
552  except TypeError:
553  try:
554  if "unknown" in firstrunB:
555  return firstrunB
556  else:
557  return lastrunB
558  except TypeError:
559  return lastrunB
def __findInJson
Definition: dataset.py:304
def __getMagneticFieldForRun
Definition: dataset.py:504
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
def dataset.Dataset.__getParentDataset (   self)
private

Definition at line 413 of file dataset.py.

References dataset.Dataset.__dasinstance, dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__name, and str.

Referenced by dataset.Dataset.parentDataset().

414  def __getParentDataset( self ):
415  dasQuery = "parent dataset=" + self.__name + " instance="+self.__dasinstance
416  data = self.__getData( dasQuery )
417  try:
418  return self.__findInJson(data, ["parent", "name"])
419  except KeyError:
420  raise AllInOneError("Cannot find the parent of the dataset '" + self.__name + "'\n"
421  "Here is the DAS output:\n" + str(jsondict) +
422  "\nIt's possible that this was a server error. If so, it may work if you try again later")
def __findInJson
Definition: dataset.py:304
def __getParentDataset
Definition: dataset.py:413
#define str(s)
def dataset.Dataset.__getRunList (   self)
private

Definition at line 620 of file dataset.py.

References dataset.Dataset.__dasinstance, dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__name, and print().

Referenced by dataset.Dataset.__lumiSelectionSnippet(), dataset.Dataset.convertTimeToRun(), and dataset.Dataset.runList().

621  def __getRunList( self ):
622  dasQuery_runs = ( 'run dataset=%s instance=%s | grep run.run_number,'
623  'run.creation_time'%( self.__name, self.__dasinstance ) )
624  print("Requesting run information for '%s' from DAS..."%( self.__name ), end=' ')
625  sys.stdout.flush()
626  data = self.__getData( dasQuery_runs )
627  print("Done.")
628  data = [ self.__findInJson(entry,"run") for entry in data ]
629  data.sort( key = lambda run: self.__findInJson(run, "run_number") )
630  return data
def __findInJson
Definition: dataset.py:304
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47
def __getRunList
Definition: dataset.py:620
def dataset.Dataset.__lumiSelectionSnippet (   self,
  jsonPath = None,
  firstRun = None,
  lastRun = None 
)
private

Definition at line 125 of file dataset.py.

References dataset.Dataset.__chunks(), dataset.Dataset.__findInJson(), dataset.Dataset.__firstusedrun, dataset.Dataset.__getRunList(), dataset.Dataset.__inputMagneticField, dataset.Dataset.__lastusedrun, dataset.Dataset.getForceRunRangeFunction(), join(), SiStripPI.max, min(), print(), python.rootplot.root2matplotlib.replace(), submitPVValidationJobs.split(), and str.

Referenced by dataset.Dataset.__createSnippet().

126  def __lumiSelectionSnippet( self, jsonPath = None, firstRun = None, lastRun = None ):
127  lumiSecExtend = ""
128  if firstRun or lastRun or jsonPath:
129  if not jsonPath:
130  selectedRunList = self.__getRunList()
131  if firstRun:
132  selectedRunList = [ run for run in selectedRunList \
133  if self.__findInJson(run, "run_number") >= firstRun ]
134  if lastRun:
135  selectedRunList = [ run for run in selectedRunList \
136  if self.__findInJson(run, "run_number") <= lastRun ]
137  lumiList = [ str( self.__findInJson(run, "run_number") ) + ":1-" \
138  + str( self.__findInJson(run, "run_number") ) + ":max" \
139  for run in selectedRunList ]
140  splitLumiList = list( self.__chunks( lumiList, 255 ) )
141  else:
142  theLumiList = None
143  try:
144  theLumiList = LumiList ( filename = jsonPath )
145  except ValueError:
146  pass
147 
148  if theLumiList is not None:
149  allRuns = theLumiList.getRuns()
150  runsToRemove = []
151  for run in allRuns:
152  if firstRun and int( run ) < firstRun:
153  runsToRemove.append( run )
154  if lastRun and int( run ) > lastRun:
155  runsToRemove.append( run )
156  theLumiList.removeRuns( runsToRemove )
157  splitLumiList = list( self.__chunks(
158  theLumiList.getCMSSWString().split(','), 255 ) )
159  if not (splitLumiList and splitLumiList[0] and splitLumiList[0][0]):
160  splitLumiList = None
161  else:
162  with open(jsonPath) as f:
163  jsoncontents = f.read()
164  if "process.source.lumisToProcess" in jsoncontents:
165  msg = "%s is not a json file, but it seems to be a CMSSW lumi selection cff snippet. Trying to use it" % jsonPath
166  if firstRun or lastRun:
167  msg += ("\n (after applying firstRun and/or lastRun)")
168  msg += ".\nPlease note that, depending on the format of this file, it may not work as expected."
169  msg += "\nCheck your config file to make sure that it worked properly."
170  print(msg)
171 
172  runlist = self.__getRunList()
173  if firstRun or lastRun:
174  self.__firstusedrun = -1
175  self.__lastusedrun = -1
176  jsoncontents = re.sub(r"\d+:(\d+|max)(-\d+:(\d+|max))?", self.getForceRunRangeFunction(firstRun, lastRun), jsoncontents)
177  jsoncontents = (jsoncontents.replace("'',\n","").replace("''\n","")
178  .replace('"",\n','').replace('""\n',''))
179  self.__firstusedrun = max(self.__firstusedrun, int(self.__findInJson(runlist[0],"run_number")))
180  self.__lastusedrun = min(self.__lastusedrun, int(self.__findInJson(runlist[-1],"run_number")))
181  if self.__lastusedrun < self.__firstusedrun:
182  jsoncontents = None
183  else:
184  self.__firstusedrun = int(self.__findInJson(runlist[0],"run_number"))
185  self.__lastusedrun = int(self.__findInJson(runlist[-1],"run_number"))
186  lumiSecExtend = jsoncontents
187  splitLumiList = None
188  else:
189  raise AllInOneError("%s is not a valid json file!" % jsonPath)
190 
191  if splitLumiList and splitLumiList[0] and splitLumiList[0][0]:
192  lumiSecStr = [ "',\n'".join( lumis ) \
193  for lumis in splitLumiList ]
194  lumiSecStr = [ "lumiSecs.extend( [\n'" + lumis + "'\n] )" \
195  for lumis in lumiSecStr ]
196  lumiSecExtend = "\n".join( lumiSecStr )
197  runlist = self.__getRunList()
198  self.__firstusedrun = max(int(splitLumiList[0][0].split(":")[0]), int(self.__findInJson(runlist[0],"run_number")))
199  self.__lastusedrun = min(int(splitLumiList[-1][-1].split(":")[0]), int(self.__findInJson(runlist[-1],"run_number")))
200  elif lumiSecExtend:
201  pass
202  else:
203  msg = "You are trying to run a validation without any runs! Check that:"
204  if firstRun or lastRun:
205  msg += "\n - firstRun/begin and lastRun/end are correct for this dataset, and there are runs in between containing data"
206  if jsonPath:
207  msg += "\n - your JSON file is correct for this dataset, and the runs contain data"
208  if (firstRun or lastRun) and jsonPath:
209  msg += "\n - firstRun/begin and lastRun/end are consistent with your JSON file"
210  raise AllInOneError(msg)
211 
212  else:
213  if self.__inputMagneticField is not None:
214  pass #never need self.__firstusedrun or self.__lastusedrun
215  else:
216  runlist = self.__getRunList()
217  self.__firstusedrun = int(self.__findInJson(self.__getRunList()[0],"run_number"))
218  self.__lastusedrun = int(self.__findInJson(self.__getRunList()[-1],"run_number"))
219 
220  return lumiSecExtend
def __findInJson
Definition: dataset.py:304
def __lumiSelectionSnippet
Definition: dataset.py:125
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47
T min(T a, T b)
Definition: MathUtil.h:58
static std::string join(char **cmd)
Definition: RemoteFile.cc:19
def __getRunList
Definition: dataset.py:620
#define str(s)
def getForceRunRangeFunction
Definition: dataset.py:351
def dataset.Dataset.buildListOfBadFiles (   self)
fills the list of bad files from the IntegrityCheck log.

When the integrity check file is not available,
files are considered as good.

Definition at line 278 of file dataset.py.

279  def buildListOfBadFiles(self):
280  '''fills the list of bad files from the IntegrityCheck log.
281 
282  When the integrity check file is not available,
283  files are considered as good.'''
284  mask = "IntegrityCheck"
285 
286  self.bad_files = {}
287  self.good_files = []
288 
289  file_mask = castortools.matchingFiles(self.castorDir, '^%s_.*\.txt$' % mask)
290  if file_mask:
291  # here to avoid circular dependency
292  from .edmIntegrityCheck import PublishToFileSystem
293  p = PublishToFileSystem(mask)
294  report = p.get(self.castorDir)
295  if report is not None and report:
296  self.maskExists = True
297  self.report = report
298  dup = report.get('ValidDuplicates',{})
299  for name, status in report['Files'].items():
300  # print name, status
301  if not status[0]:
302  self.bad_files[name] = 'MarkedBad'
303  elif name in dup:
304  self.bad_files[name] = 'ValidDup'
305  else:
306  self.good_files.append( name )
307  else:
308  raise IntegrityCheckError( "ERROR: IntegrityCheck log file IntegrityCheck_XXXXXXXXXX.txt not found" )
def buildListOfBadFiles
Definition: dataset.py:278
def dataset.Dataset.buildListOfFiles (   self,
  pattern = '.*root' 
)
fills list of files, taking all root files matching the pattern in the castor dir

Definition at line 274 of file dataset.py.

275  def buildListOfFiles(self, pattern='.*root'):
276  '''fills list of files, taking all root files matching the pattern in the castor dir'''
277  self.files = castortools.matchingFiles( self.castorDir, pattern )
def buildListOfFiles
Definition: dataset.py:274
def dataset.Dataset.convertTimeToRun (   self,
  begin = None,
  end = None,
  firstRun = None,
  lastRun = None,
  shortTuple = True 
)

Definition at line 645 of file dataset.py.

References dataset.Dataset.__dasinstance, dataset.Dataset.__dateString(), dataset.Dataset.__datetime(), dataset.Dataset.__find_ge(), dataset.Dataset.__find_lt(), dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__getRunList(), and dataset.Dataset.__name.

Referenced by dataset.Dataset.__createSnippet(), and dataset.Dataset.__dateString().

646  shortTuple = True ):
647  if ( begin and firstRun ) or ( end and lastRun ):
648  msg = ( "The Usage of "
649  + "'begin' & 'firstRun' " * int( bool( begin and
650  firstRun ) )
651  + "and " * int( bool( ( begin and firstRun ) and
652  ( end and lastRun ) ) )
653  + "'end' & 'lastRun' " * int( bool( end and lastRun ) )
654  + "is ambigous." )
655  raise AllInOneError( msg )
656 
657  if begin or end:
658  runList = [ self.__findInJson(run, "run_number") for run in self.__getRunList() ]
659 
660  if begin:
661  lastdate = begin
662  for delta in [ 1, 5, 10, 20, 30 ]: #try searching for about 2 months after begin
663  firstdate = lastdate
664  lastdate = self.__dateString(self.__datetime(firstdate) + datetime.timedelta(delta))
665  dasQuery_begin = "run date between[%s,%s] instance=%s" % (firstdate, lastdate, self.__dasinstance)
666  begindata = self.__getData(dasQuery_begin)
667  if len(begindata) > 0:
668  begindata.sort(key = lambda run: self.__findInJson(run, ["run", "run_number"]))
669  try:
670  runIndex = self.__find_ge( runList, self.__findInJson(begindata[0], ["run", "run_number"]))
671  except ValueError:
672  msg = ( "Your 'begin' is after the creation time of the last "
673  "run in the dataset\n'%s'"%( self.__name ) )
674  raise AllInOneError( msg )
675  firstRun = runList[runIndex]
676  begin = None
677  break
678 
679  if begin:
680  raise AllInOneError("No runs within a reasonable time interval after your 'begin'."
681  "Try using a 'begin' that has runs soon after it (within 2 months at most)")
682 
683  if end:
684  firstdate = end
685  for delta in [ 1, 5, 10, 20, 30 ]: #try searching for about 2 months before end
686  lastdate = firstdate
687  firstdate = self.__dateString(self.__datetime(lastdate) - datetime.timedelta(delta))
688  dasQuery_end = "run date between[%s,%s] instance=%s" % (firstdate, lastdate, self.__dasinstance)
689  enddata = self.__getData(dasQuery_end)
690  if len(enddata) > 0:
691  enddata.sort(key = lambda run: self.__findInJson(run, ["run", "run_number"]))
692  try:
693  runIndex = self.__find_lt( runList, self.__findInJson(enddata[-1], ["run", "run_number"]))
694  except ValueError:
695  msg = ( "Your 'end' is before the creation time of the first "
696  "run in the dataset\n'%s'"%( self.__name ) )
697  raise AllInOneError( msg )
698  lastRun = runList[runIndex]
699  end = None
700  break
701 
702  if end:
703  raise AllInOneError("No runs within a reasonable time interval before your 'end'."
704  "Try using an 'end' that has runs soon before it (within 2 months at most)")
705 
706  if shortTuple:
707  return firstRun, lastRun
708  else:
709  return begin, end, firstRun, lastRun
def __findInJson
Definition: dataset.py:304
def __getRunList
Definition: dataset.py:620
def __dateString
Definition: dataset.py:640
def dataset.Dataset.createdatasetfile_hippy (   self,
  filename,
  filesperjob,
  firstrun,
  lastrun 
)

Definition at line 852 of file dataset.py.

References dataset.Dataset.__chunks(), dataset.Dataset.fileList(), and join().

853  def createdatasetfile_hippy(self, filename, filesperjob, firstrun, lastrun):
854  with open(filename, "w") as f:
855  for job in self.__chunks(self.fileList(firstRun=firstrun, lastRun=lastrun, forcerunselection=True), filesperjob):
856  f.write(",".join("'{}'".format(file) for file in job)+"\n")
static std::string join(char **cmd)
Definition: RemoteFile.cc:19
def createdatasetfile_hippy
Definition: dataset.py:852
def dataset.Dataset.datasetSnippet (   self,
  jsonPath = None,
  begin = None,
  end = None,
  firstRun = None,
  lastRun = None,
  crab = False,
  parent = False 
)

Definition at line 729 of file dataset.py.

References dataset.Dataset.__createSnippet(), dataset.Dataset.__filename, dataset.Dataset.__name, dataset.Dataset.__official, dataset.Dataset.__origName, dataset.Dataset.__predefined, dataset.Dataset.dump_cff(), and print().

Referenced by dataset.Dataset.parentDataset().

730  firstRun = None, lastRun = None, crab = False, parent = False ):
731  if not firstRun: firstRun = None
732  if not lastRun: lastRun = None
733  if not begin: begin = None
734  if not end: end = None
735  if self.__predefined and (jsonPath or begin or end or firstRun or lastRun):
736  msg = ( "The parameters 'JSON', 'begin', 'end', 'firstRun', and 'lastRun' "
737  "only work for official datasets, not predefined _cff.py files" )
738  raise AllInOneError( msg )
739  if self.__predefined and parent:
740  with open(self.__filename) as f:
741  if "secFiles.extend" not in f.read():
742  msg = ("The predefined dataset '%s' does not contain secondary files, "
743  "which your validation requires!") % self.__name
744  if self.__official:
745  self.__name = self.__origName
746  self.__predefined = False
747  print(msg)
748  print ("Retreiving the files from DAS. You will be asked if you want "
749  "to overwrite the old dataset.\n"
750  "It will still be compatible with validations that don't need secondary files.")
751  else:
752  raise AllInOneError(msg)
753 
754  if self.__predefined:
755  snippet = ("process.load(\"Alignment.OfflineValidation.%s_cff\")\n"
756  "process.maxEvents = cms.untracked.PSet(\n"
757  " input = cms.untracked.int32(int(.oO[nEvents]Oo. / .oO[parallelJobs]Oo.))\n"
758  ")\n"
759  "process.source.skipEvents=cms.untracked.uint32(int(.oO[nIndex]Oo.*.oO[nEvents]Oo./.oO[parallelJobs]Oo.))"
760  %(self.__name))
761  if not parent:
762  with open(self.__filename) as f:
763  if "secFiles.extend" in f.read():
764  snippet += "\nprocess.source.secondaryFileNames = cms.untracked.vstring()"
765  return snippet
766  theMap = { "process": "process.",
767  "tab": " " * len( "process." ),
768  "nEvents": ".oO[nEvents]Oo. / .oO[parallelJobs]Oo.",
769  "skipEventsString": "process.source.skipEvents=cms.untracked.uint32(int(.oO[nIndex]Oo.*.oO[nEvents]Oo./.oO[parallelJobs]Oo.))\n",
770  "importCms": "",
771  "header": ""
772  }
773  datasetSnippet = self.__createSnippet( jsonPath = jsonPath,
774  begin = begin,
775  end = end,
776  firstRun = firstRun,
777  lastRun = lastRun,
778  repMap = theMap,
779  crab = crab,
780  parent = parent )
781  if jsonPath == "" and begin == "" and end == "" and firstRun == "" and lastRun == "":
782  try:
783  self.dump_cff(parent = parent)
784  except AllInOneError as e:
785  print("Can't store the dataset as a cff:")
786  print(e)
787  print("This may be inconvenient in the future, but will not cause a problem for this validation.")
788  return datasetSnippet
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47
def __createSnippet
Definition: dataset.py:245
def dataset.Dataset.dataType (   self)

Definition at line 710 of file dataset.py.

References dataset.Dataset.__dataType, and dataset.Dataset.__getDataType().

711  def dataType( self ):
712  if not self.__dataType:
713  self.__dataType = self.__getDataType()
714  return self.__dataType
def __getDataType
Definition: dataset.py:388
def dataset.Dataset.dump_cff (   self,
  outName = None,
  jsonPath = None,
  begin = None,
  end = None,
  firstRun = None,
  lastRun = None,
  parent = False 
)

Definition at line 791 of file dataset.py.

References dataset.Dataset.__cmssw, dataset.Dataset.__createSnippet(), dataset.Dataset.__dataType, dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.__magneticField, dataset.Dataset.__name, print(), python.rootplot.root2matplotlib.replace(), submitPVValidationJobs.split(), str, and digitizers_cfi.strip.

Referenced by dataset.Dataset.datasetSnippet().

792  end = None, firstRun = None, lastRun = None, parent = False ):
793  if outName == None:
794  outName = "Dataset" + self.__name.replace("/", "_")
795  packageName = os.path.join( "Alignment", "OfflineValidation" )
796  if not os.path.exists( os.path.join(
797  self.__cmssw, "src", packageName ) ):
798  msg = ("You try to store the predefined dataset'%s'.\n"
799  "For that you need to check out the package '%s' to your "
800  "private relase area in\n"%( outName, packageName )
801  + self.__cmssw )
802  raise AllInOneError( msg )
803  theMap = { "process": "",
804  "tab": "",
805  "nEvents": str( -1 ),
806  "skipEventsString": "",
807  "importCms": "import FWCore.ParameterSet.Config as cms\n",
808  "header": "#Do not delete or (unless you know what you're doing) change these comments\n"
809  "#%(name)s\n"
810  "#data type: %(dataType)s\n"
811  "#magnetic field: .oO[magneticField]Oo.\n" #put in magnetic field later
812  %{"name": self.__name, #need to create the snippet before getting the magnetic field
813  "dataType": self.__dataType} #so that we know the first and last runs
814  }
815  dataset_cff = self.__createSnippet( jsonPath = jsonPath,
816  begin = begin,
817  end = end,
818  firstRun = firstRun,
819  lastRun = lastRun,
820  repMap = theMap,
821  parent = parent)
822  magneticField = self.__magneticField
823  if magneticField == "MagneticField":
824  magneticField = "%s, %s #%s" % (magneticField,
825  str(self.__getMagneticFieldForRun()).replace("\n"," ").split("#")[0].strip(),
826  "Use MagneticField_cff.py; the number is for determining which track selection to use."
827  )
828  dataset_cff = dataset_cff.replace(".oO[magneticField]Oo.",magneticField)
829  filePath = os.path.join( self.__cmssw, "src", packageName,
830  "python", outName + "_cff.py" )
831  if os.path.exists( filePath ):
832  existMsg = "The predefined dataset '%s' already exists.\n"%( outName )
833  askString = "Do you want to overwrite it? [y/n]\n"
834  inputQuery = existMsg + askString
835  while True:
836  userInput = raw_input( inputQuery ).lower()
837  if userInput == "y":
838  break
839  elif userInput == "n":
840  return
841  else:
842  inputQuery = askString
843  print ( "The predefined dataset '%s' will be stored in the file\n"
844  %( outName )
845  + filePath +
846  "\nFor future use you have to do 'scram b'." )
847  print()
848  theFile = open( filePath, "w" )
849  theFile.write( dataset_cff )
850  theFile.close()
851  return
def __getMagneticFieldForRun
Definition: dataset.py:504
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47
def __createSnippet
Definition: dataset.py:245
#define str(s)
def dataset.Dataset.extractFileSizes (   self)
Get the file size for each file, from the eos ls -l command.

Definition at line 309 of file dataset.py.

References dataset.EOSDataset.castorDir, and dataset.Dataset.castorDir.

310  def extractFileSizes(self):
311  '''Get the file size for each file, from the eos ls -l command.'''
312  # EOS command does not work in tier3
313  lsout = castortools.runXRDCommand(self.castorDir,'dirlist')[0]
314  lsout = lsout.split('\n')
315  self.filesAndSizes = {}
316  for entry in lsout:
317  values = entry.split()
318  if( len(values) != 5):
319  continue
320  # using full abs path as a key.
321  file = '/'.join([self.lfnDir, values[4].split("/")[-1]])
322  size = values[1]
323  self.filesAndSizes[file] = size
def extractFileSizes
Definition: dataset.py:309
if(conf_.getParameter< bool >("UseStripCablingDB"))
static std::string join(char **cmd)
Definition: RemoteFile.cc:19
def dataset.Dataset.fileInfoList (   self,
  parent = False 
)

Definition at line 914 of file dataset.py.

References dataset.Dataset.__dasLimit, and dataset.Dataset.__getFileInfoList().

Referenced by dataset.Dataset.fileList().

915  def fileInfoList( self, parent = False ):
916  return self.__getFileInfoList( self.__dasLimit, parent )
def __getFileInfoList
Definition: dataset.py:561
def fileInfoList
Definition: dataset.py:914
def dataset.Dataset.fileList (   self,
  parent = False,
  firstRun = None,
  lastRun = None,
  forcerunselection = False 
)

Definition at line 885 of file dataset.py.

References dataset.Dataset.__findInJson(), dataset.Dataset.fileInfoList(), dataset.Dataset.getrunnumberfromfilename(), and print().

Referenced by dataset.Dataset.__fileListSnippet(), and dataset.Dataset.createdatasetfile_hippy().

886  def fileList(self, parent=False, firstRun=None, lastRun=None, forcerunselection=False):
887  fileList = [ self.__findInJson(fileInfo,"name")
888  for fileInfo in self.fileInfoList(parent) ]
889 
890  if firstRun or lastRun:
891  if not firstRun: firstRun = -1
892  if not lastRun: lastRun = float('infinity')
893  unknownfilenames, reasons = [], set()
894  for filename in fileList[:]:
895  try:
896  if not firstRun <= self.getrunnumberfromfilename(filename) <= lastRun:
897  fileList.remove(filename)
898  except AllInOneError as e:
899  if forcerunselection: raise
900  unknownfilenames.append(e.message.split("\n")[1])
901  reasons .add (e.message.split("\n")[2])
902  if reasons:
903  if len(unknownfilenames) == len(fileList):
904  print("Could not figure out the run numbers of any of the filenames for the following reason(s):")
905  else:
906  print("Could not figure out the run numbers of the following filenames:")
907  for filename in unknownfilenames:
908  print(" "+filename)
909  print("for the following reason(s):")
910  for reason in reasons:
911  print(" "+reason)
912  print("Using the files anyway. The runs will be filtered at the CMSSW level.")
913  return fileList
def __findInJson
Definition: dataset.py:304
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47
def fileInfoList
Definition: dataset.py:914
def getrunnumberfromfilename
Definition: dataset.py:858
def dataset.Dataset.forcerunrange (   self,
  firstRun,
  lastRun,
  s 
)
s must be in the format run1:lum1-run2:lum2

Definition at line 326 of file dataset.py.

References dataset.Dataset.__firstusedrun, dataset.Dataset.__lastusedrun, and submitPVValidationJobs.split().

Referenced by dataset.Dataset.getForceRunRangeFunction().

327  def forcerunrange(self, firstRun, lastRun, s):
328  """s must be in the format run1:lum1-run2:lum2"""
329  s = s.group()
330  run1 = s.split("-")[0].split(":")[0]
331  lum1 = s.split("-")[0].split(":")[1]
332  try:
333  run2 = s.split("-")[1].split(":")[0]
334  lum2 = s.split("-")[1].split(":")[1]
335  except IndexError:
336  run2 = run1
337  lum2 = lum1
338  if int(run2) < firstRun or int(run1) > lastRun:
339  return ""
340  if int(run1) < firstRun or firstRun < 0:
341  run1 = firstRun
342  lum1 = 1
343  if int(run2) > lastRun:
344  run2 = lastRun
345  lum2 = "max"
346  if int(run1) < self.__firstusedrun or self.__firstusedrun < 0:
347  self.__firstusedrun = int(run1)
348  if int(run2) > self.__lastusedrun:
349  self.__lastusedrun = int(run2)
350  return "%s:%s-%s:%s" % (run1, lum1, run2, lum2)
def forcerunrange
Definition: dataset.py:326
def dataset.Dataset.getfiles (   self,
  usecache 
)

Definition at line 211 of file dataset.py.

References dataset.Dataset.dasinstance, dataset.dasquery(), dataset.Dataset.datasetname, dataset.Dataset.filenamebase, dataset.findinjson(), and print().

212  def getfiles(self, usecache):
213  filename = os.path.join(os.environ["CMSSW_BASE"], "src", "Alignment", "CommonAlignment", "data", self.filenamebase+".csv")
214  if not usecache:
215  try:
216  os.remove(filename)
217  except OSError as e:
218  if os.path.exists(filename):
219  raise
220 
221  result = []
222  try:
223  with open(filename) as f:
224  for row in csv.DictReader(f):
225  result.append(DataFile(**row))
226  return result
227  except IOError:
228  pass
229 
230  query = "file dataset={} instance={} detail=true | grep file.name, file.nevents".format(self.datasetname, self.dasinstance)
231  dasoutput = dasquery(query)
232  if not dasoutput:
233  raise DatasetError("No files are available for the dataset '{}'. This can be "
234  "due to a typo or due to a DAS problem. Please check the "
235  "spelling of the dataset and/or try again.".format(datasetname))
236  result = [DataFile(findinjson(_, "file", "name"), findinjson(_, "file", "nevents")) for _ in dasoutput if int(findinjson(_, "file", "nevents"))]
237  try:
238  with open(filename, "w") as f:
239  writer = csv.DictWriter(f, ("filename", "nevents", "runs"))
240  writer.writeheader()
241  for datafile in result:
242  writer.writerow(datafile.getdict())
243  except Exception as e:
244  print("Couldn't write the dataset csv file:\n\n{}".format(e))
245  return result
def findinjson
Definition: dataset.py:95
def dasquery
Definition: dataset.py:27
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47
def dataset.Dataset.getForceRunRangeFunction (   self,
  firstRun,
  lastRun 
)

Definition at line 351 of file dataset.py.

References dataset.Dataset.forcerunrange().

Referenced by dataset.Dataset.__lumiSelectionSnippet().

352  def getForceRunRangeFunction(self, firstRun, lastRun):
353  def forcerunrangefunction(s):
354  return self.forcerunrange(firstRun, lastRun, s)
355  return forcerunrangefunction
def forcerunrange
Definition: dataset.py:326
def getForceRunRangeFunction
Definition: dataset.py:351
def dataset.Dataset.getPrimaryDatasetEntries (   self)

Definition at line 329 of file dataset.py.

References runall.testit.report, WorkFlowRunner.WorkFlowRunner.report, ALIUtils.report, and dataset.BaseDataset.report.

330  def getPrimaryDatasetEntries(self):
331  if self.report is not None and self.report:
332  return int(self.report.get('PrimaryDatasetEntries',-1))
333  return -1
334 
def getPrimaryDatasetEntries
Definition: dataset.py:329
def dataset.Dataset.getrunnumberfromfilename (   filename)
static

Definition at line 858 of file dataset.py.

References python.cmstools.all(), and join().

Referenced by dataset.Dataset.fileList().

859  def getrunnumberfromfilename(filename):
860  parts = filename.split("/")
861  result = error = None
862  if parts[0] != "" or parts[1] != "store":
863  error = "does not start with /store"
864  elif parts[2] in ["mc", "relval"]:
865  result = 1
866  elif not parts[-1].endswith(".root"):
867  error = "does not end with something.root"
868  elif len(parts) != 12:
869  error = "should be exactly 11 slashes counting the first one"
870  else:
871  runnumberparts = parts[-5:-2]
872  if not all(len(part)==3 for part in runnumberparts):
873  error = "the 3 directories {} do not have length 3 each".format("/".join(runnumberparts))
874  try:
875  result = int("".join(runnumberparts))
876  except ValueError:
877  error = "the 3 directories {} do not form an integer".format("/".join(runnumberparts))
878 
879  if error:
880  error = "could not figure out which run number this file is from:\n{}\n{}".format(filename, error)
881  raise AllInOneError(error)
882 
883  return result
def all
workaround iterator generators for ROOT classes
Definition: cmstools.py:25
static std::string join(char **cmd)
Definition: RemoteFile.cc:19
def getrunnumberfromfilename
Definition: dataset.py:858
def dataset.Dataset.headercomment (   self)

Definition at line 247 of file dataset.py.

References dataset.Dataset.datasetname.

248  def headercomment(self):
249  return self.datasetname
def headercomment
Definition: dataset.py:247
def dataset.Dataset.magneticField (   self)

Definition at line 715 of file dataset.py.

References dataset.Dataset.__getMagneticField(), and dataset.Dataset.__magneticField.

716  def magneticField( self ):
717  if not self.__magneticField:
718  self.__magneticField = self.__getMagneticField()
719  return self.__magneticField
def magneticField
Definition: dataset.py:715
def __getMagneticField
Definition: dataset.py:423
def dataset.Dataset.magneticFieldForRun (   self,
  run = -1 
)

Definition at line 720 of file dataset.py.

References dataset.Dataset.__getMagneticFieldForRun().

721  def magneticFieldForRun( self, run = -1 ):
722  return self.__getMagneticFieldForRun(run)
def __getMagneticFieldForRun
Definition: dataset.py:504
def magneticFieldForRun
Definition: dataset.py:720
def dataset.Dataset.name (   self)

Definition at line 917 of file dataset.py.

References dataset.Dataset.__name.

Referenced by config.CFG.__str__(), geometryComparison.GeometryComparison.createScript(), validation.Sample.digest(), primaryVertexResolution.PrimaryVertexResolution.getRepMap(), primaryVertexValidation.PrimaryVertexValidation.getRepMap(), zMuMuValidation.ZMuMuValidation.getRepMap(), and VIDSelectorBase.VIDSelectorBase.initialize().

918  def name( self ):
919  return self.__name
def dataset.Dataset.parentDataset (   self)

Definition at line 723 of file dataset.py.

References dataset.Dataset.__getParentDataset(), dataset.Dataset.__parentDataset, and dataset.Dataset.datasetSnippet().

Referenced by dataset.Dataset.__getFileInfoList().

724  def parentDataset( self ):
725  if not self.__parentDataset:
726  self.__parentDataset = self.__getParentDataset()
727  return self.__parentDataset
def parentDataset
Definition: dataset.py:723
def __getParentDataset
Definition: dataset.py:413
def dataset.Dataset.predefined (   self)

Definition at line 920 of file dataset.py.

References dataset.Dataset.__predefined.

921  def predefined( self ):
922  return self.__predefined
def dataset.Dataset.printInfo (   self)

Definition at line 324 of file dataset.py.

References dataset.EOSDataset.castorDir, dataset.Dataset.castorDir, dataset.Dataset.lfnDir, ElectronMVAID.ElectronMVAID.name, AlignableObjectId::entry.name, average.Average.name, counter.Counter.name, histograms.Histograms.name, TmModule.name, cond::persistency::TAG::NAME.name, cond::persistency::RUN_INFO::RUN_NUMBER.name, cond::persistency::GTEditorData.name, cond::persistency::GLOBAL_TAG::NAME.name, cond::persistency::TAG::TIME_TYPE.name, cond::persistency::RUN_INFO::START_TIME.name, cond::persistency::GLOBAL_TAG::VALIDITY.name, cond::persistency::RUN_INFO::END_TIME.name, core.autovars.NTupleVariable.name, cond::persistency::TAG::OBJECT_TYPE.name, cond::persistency::GLOBAL_TAG::DESCRIPTION.name, cond::persistency::TAG::SYNCHRONIZATION.name, DQMRivetClient::NormOption.name, cond::persistency::GLOBAL_TAG::RELEASE.name, cond::persistency::TAG::END_OF_VALIDITY.name, MEPSet.name, cond::persistency::GLOBAL_TAG::SNAPSHOT_TIME.name, cond::persistency::O2O_RUN::JOB_NAME.name, cond::persistency::GLOBAL_TAG::INSERTION_TIME.name, cond::persistency::TAG::DESCRIPTION.name, cms::dd::NameValuePair< T >.name, cond::persistency::O2O_RUN::START_TIME.name, cond::persistency::TAG::LAST_VALIDATED_TIME.name, cond::persistency::O2O_RUN::END_TIME.name, cond::persistency::TAG::INSERTION_TIME.name, FWTGeoRecoGeometry::Info.name, cond::persistency::O2O_RUN::STATUS_CODE.name, cond::persistency::TAG::MODIFICATION_TIME.name, cond::persistency::O2O_RUN::LOG.name, ParameterSet.name, nanoaod::MergeableCounterTable::SingleColumn< T >.name, cond::persistency::TAG::PROTECTION_CODE.name, preexistingValidation.PreexistingValidation.name, OutputMEPSet.name, AlignmentConstraint.name, PixelDCSObject< class >::Item.name, dataset.BaseDataset.name, cms::dd::ValuePair< T, U >.name, personalPlayback.Applet.name, Types._Untracked.name, MagCylinder.name, analyzer.Analyzer.name, heppy::ParSet.name, DQMRivetClient::LumiOption.name, o2olib.O2OJob.name, cond::persistency::GTProxyData.name, SingleObjectCondition.name, EgHLTOfflineSummaryClient::SumHistBinData.name, edm::PathTimingSummary.name, DQMRivetClient::ScaleFactorOption.name, cms::DDAlgoArguments.name, Barrel.name, perftools::EdmEventSize::BranchRecord.name, core.autovars.NTupleObjectType.name, cond::TimeTypeSpecs.name, edm::PathSummary.name, EcalLogicID.name, alignment.Alignment.name, lumi::TriggerInfo.name, PixelEndcapLinkMaker::Item.name, XMLProcessor::_loaderBaseConfig.name, MEtoEDM< T >::MEtoEDMObject.name, FWTableViewManager::TableEntry.name, PixelBarrelLinkMaker::Item.name, ExpressionHisto< T >.name, DQMGenericClient::EfficOption.name, Supermodule.name, TreeCrawler.Package.name, cond::persistency::GLOBAL_TAG_MAP::GLOBAL_TAG_NAME.name, genericValidation.GenericValidation.name, options.ConnectionHLTMenu.name, cond::persistency::GLOBAL_TAG_MAP::RECORD.name, cond::persistency::GLOBAL_TAG_MAP::LABEL.name, cms::DDParsingContext::CompositeMaterial.name, cond::persistency::GLOBAL_TAG_MAP::TAG_NAME.name, cond::Tag_t.name, dqmoffline::l1t::HistDefinition.name, DQMGenericClient::ProfileOption.name, nanoaod::MergeableCounterTable::VectorColumn< T >.name, FastHFShowerLibrary.name, magneticfield::BaseVolumeHandle.name, emtf::Node.name, h4DSegm.name, DQMGenericClient::NormOption.name, DQMGenericClient::CDOption.name, CounterChecker.name, PhysicsTools::Calibration::Variable.name, cond::TagInfo_t.name, TrackerSectorStruct.name, MuonGeometrySanityCheckPoint.name, DQMGenericClient::NoFlowOption.name, cond::persistency::PAYLOAD::HASH.name, looper.Looper.name, Mapper::definition< ScannerT >.name, cond::persistency::PAYLOAD::OBJECT_TYPE.name, cond::persistency::PAYLOAD::DATA.name, EDMtoMEConverter.name, cond::persistency::PAYLOAD::STREAMER_INFO.name, cond::persistency::PAYLOAD::VERSION.name, cond::persistency::PAYLOAD::INSERTION_TIME.name, classes.MonitorData.name, HistogramManager.name, classes.OutputData.name, Crystal.name, h2DSegm.name, options.HLTProcessOptions.name, cond::persistency::IOV::TAG_NAME.name, cond::persistency::IOV::SINCE.name, cond::persistency::IOV::PAYLOAD_HASH.name, cond::persistency::IOV::INSERTION_TIME.name, DQMNet::WaitObject.name, AlpgenParameterName.name, config.Analyzer.name, geometry.Structure.name, core.autovars.NTupleSubObject.name, Capsule.name, core.autovars.NTupleObject.name, Ceramic.name, SiStripMonitorDigi.name, BulkSilicon.name, config.Service.name, APD.name, nanoaod::FlatTable::Column.name, core.autovars.NTupleCollection.name, BPHRecoBuilder::BPHRecoSource.name, BPHRecoBuilder::BPHCompSource.name, StraightTrackAlignment::RPSetPlots.name, cond::persistency::TAG_AUTHORIZATION::TAG_NAME.name, cond::persistency::TAG_AUTHORIZATION::ACCESS_TYPE.name, cond::persistency::TAG_AUTHORIZATION::CREDENTIAL.name, cond::persistency::TAG_AUTHORIZATION::CREDENTIAL_TYPE.name, InnerLayerVolume.name, cond::payloadInspector::TagReference.name, cond::persistency::TAG_LOG::TAG_NAME.name, cond::persistency::TAG_LOG::EVENT_TIME.name, cond::persistency::TAG_LOG::USER_NAME.name, cond::persistency::TAG_LOG::HOST_NAME.name, cond::persistency::TAG_LOG::COMMAND.name, cond::persistency::TAG_LOG::ACTION.name, cond::persistency::TAG_LOG::USER_TEXT.name, personalPlayback.FrameworkJob.name, Grid.name, Grille.name, BackPipe.name, plotscripts.SawTeethFunction.name, PatchPanel.name, BackCoolTank.name, DryAirTube.name, crabFunctions.CrabTask.name, MBCoolTube.name, MBManif.name, cscdqm::ParHistoDef.name, hTMaxCell.name, BeautifulSoup.Tag.name, SummaryOutputProducer::GenericSummary.name, BeautifulSoup.SoupStrainer.name, and print().

325  def printInfo(self):
326  print('sample : ' + self.name)
327  print('LFN : ' + self.lfnDir)
328  print('Castor path : ' + self.castorDir)
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47
def dataset.Dataset.runList (   self)

Definition at line 924 of file dataset.py.

References dataset.Dataset.__getRunList(), and print().

925  def runList( self ):
926  return self.__getRunList()
927 
def __getRunList
Definition: dataset.py:620

Member Data Documentation

dataset.Dataset.__cmssw
private

Definition at line 28 of file dataset.py.

Referenced by dataset.Dataset.__getMagneticField(), and dataset.Dataset.dump_cff().

dataset.Dataset.__cmsswrelease
private

Definition at line 29 of file dataset.py.

Referenced by dataset.Dataset.__getMagneticField().

dataset.Dataset.__dasinstance
private

Definition at line 27 of file dataset.py.

Referenced by dataset.Dataset.__getDataType(), dataset.Dataset.__getFileInfoList(), dataset.Dataset.__getMagneticField(), dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.__getParentDataset(), dataset.Dataset.__getRunList(), and dataset.Dataset.convertTimeToRun().

dataset.Dataset.__dasLimit
private

Definition at line 26 of file dataset.py.

Referenced by dataset.Dataset.fileInfoList().

dataset.Dataset.__dataType
private

Definition at line 85 of file dataset.py.

Referenced by dataset.Dataset.__getMagneticField(), dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.dataType(), and dataset.Dataset.dump_cff().

tuple dataset.Dataset.__dummy_source_template
staticprivate
Initial value:
1 = ("readFiles = cms.untracked.vstring()\n"
2  "secFiles = cms.untracked.vstring()\n"
3  "%(process)ssource = cms.Source(\"PoolSource\",\n"
4  "%(tab)s secondaryFileNames ="
5  "secFiles,\n"
6  "%(tab)s fileNames = readFiles\n"
7  ")\n"
8  "readFiles.extend(['dummy_File.root'])\n"
9  "%(process)smaxEvents = cms.untracked.PSet( "
10  "input = cms.untracked.int32(int(%(nEvents)s)) )\n"
11  "%(skipEventsString)s\n")

Definition at line 113 of file dataset.py.

Referenced by dataset.Dataset.__createSnippet().

dataset.Dataset.__filename
private

Definition at line 55 of file dataset.py.

Referenced by dataset.Dataset.__getDataType(), dataset.Dataset.__getMagneticField(), dataset.Dataset.__getMagneticFieldForRun(), and dataset.Dataset.datasetSnippet().

dataset.Dataset.__firstusedrun
private

Definition at line 30 of file dataset.py.

Referenced by dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.__lumiSelectionSnippet(), and dataset.Dataset.forcerunrange().

dataset.Dataset.__inputMagneticField
private

Definition at line 83 of file dataset.py.

Referenced by dataset.Dataset.__getMagneticField(), dataset.Dataset.__getMagneticFieldForRun(), and dataset.Dataset.__lumiSelectionSnippet().

dataset.Dataset.__lastusedrun
private

Definition at line 31 of file dataset.py.

Referenced by dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.__lumiSelectionSnippet(), and dataset.Dataset.forcerunrange().

dataset.Dataset.__magneticField
private

Definition at line 86 of file dataset.py.

Referenced by dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.dump_cff(), and dataset.Dataset.magneticField().

dataset.Dataset.__name
private

Definition at line 24 of file dataset.py.

Referenced by dataset.Dataset.__fileListSnippet(), dataset.Dataset.__getDataType(), dataset.Dataset.__getFileInfoList(), dataset.Dataset.__getMagneticField(), dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.__getParentDataset(), dataset.Dataset.__getRunList(), dataset.Dataset.convertTimeToRun(), dataset.Dataset.datasetSnippet(), dataset.Dataset.dump_cff(), Config.Process.dumpConfig(), Config.Process.dumpPython(), genericValidation.ValidationWithPlotsSummaryBase.SummaryItem.name(), dataset.Dataset.name(), Config.Process.name_(), and Config.Process.splitPython().

dataset.Dataset.__official
private

Definition at line 36 of file dataset.py.

Referenced by dataset.Dataset.datasetSnippet().

dataset.Dataset.__origName
private

Definition at line 25 of file dataset.py.

Referenced by dataset.Dataset.datasetSnippet().

dataset.Dataset.__parentDataset
private

Definition at line 32 of file dataset.py.

Referenced by dataset.Dataset.parentDataset().

dataset.Dataset.__predefined
private

Definition at line 52 of file dataset.py.

Referenced by dataset.Dataset.__getDataType(), dataset.Dataset.__getFileInfoList(), dataset.Dataset.__getMagneticField(), dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.datasetSnippet(), and dataset.Dataset.predefined().

dataset.Dataset.bad_files

Definition at line 285 of file dataset.py.

dataset.Dataset.castorDir

Definition at line 269 of file dataset.py.

Referenced by dataset.Dataset.extractFileSizes(), and dataset.Dataset.printInfo().

dataset.Dataset.dasinstance

Definition at line 208 of file dataset.py.

Referenced by dataset.Dataset.getfiles().

dataset.Dataset.datasetname

Definition at line 200 of file dataset.py.

Referenced by dataset.Dataset.getfiles(), and dataset.Dataset.headercomment().

dataset.Dataset.filenamebase

Definition at line 203 of file dataset.py.

Referenced by dataset.Dataset.getfiles().

dataset.Dataset.files

Definition at line 276 of file dataset.py.

dataset.Dataset.filesAndSizes

Definition at line 314 of file dataset.py.

dataset.Dataset.good_files

Definition at line 286 of file dataset.py.

dataset.Dataset.lfnDir

Definition at line 268 of file dataset.py.

Referenced by dataset.Dataset.printInfo().

dataset.Dataset.maskExists

Definition at line 270 of file dataset.py.

dataset.Dataset.official

Definition at line 202 of file dataset.py.

dataset.Dataset.report

Definition at line 271 of file dataset.py.

Referenced by addOnTests.testit.run().