CMS 3D CMS Logo

List of all members | Public Member Functions | Static Public Member Functions | Public Attributes | Static Public Attributes | Private Member Functions | Private Attributes | Static Private Attributes
dataset.Dataset Class Reference
Inheritance diagram for dataset.Dataset:
dataset.BaseDataset

Public Member Functions

def __init__ (self, datasetName, dasLimit=0, tryPredefinedFirst=True, cmssw=os.environ["CMSSW_BASE"], cmsswrelease=os.environ["CMSSW_RELEASE_BASE"], magneticfield=None, dasinstance=None)
 
def __init__ (self, name, user, pattern='.*root')
 
def buildListOfBadFiles (self)
 
def buildListOfFiles (self, pattern='.*root')
 
def convertTimeToRun (self, begin=None, end=None, firstRun=None, lastRun=None, shortTuple=True)
 
def createdatasetfile_hippy (self, filename, filesperjob, firstrun, lastrun)
 
def datasetSnippet (self, jsonPath=None, begin=None, end=None, firstRun=None, lastRun=None, crab=False, parent=False)
 
def dataType (self)
 
def dump_cff (self, outName=None, jsonPath=None, begin=None, end=None, firstRun=None, lastRun=None, parent=False)
 
def extractFileSizes (self)
 
def fileInfoList (self, parent=False)
 
def fileList (self, parent=False, firstRun=None, lastRun=None, forcerunselection=False)
 
def forcerunrange (self, firstRun, lastRun, s)
 
def getForceRunRangeFunction (self, firstRun, lastRun)
 
def getPrimaryDatasetEntries (self)
 
def magneticField (self)
 
def magneticFieldForRun (self, run=-1)
 
def name (self)
 
def parentDataset (self)
 
def predefined (self)
 
def printInfo (self)
 
def runList (self)
 
- Public Member Functions inherited from dataset.BaseDataset
def __init__ (self, name, user, pattern='.*root', run_range=None, dbsInstance=None)
 def init(self, name, user, pattern='. More...
 
def buildListOfBadFiles (self)
 
def buildListOfFiles (self, pattern)
 
def extractFileSizes (self)
 
def getPrimaryDatasetEntries (self)
 
def listOfFiles (self)
 
def listOfGoodFiles (self)
 
def listOfGoodFilesWithPrescale (self, prescale)
 
def printFiles (self, abspath=True, info=True)
 
def printInfo (self)
 

Static Public Member Functions

def getrunnumberfromfilename (filename)
 

Public Attributes

 bad_files
 
 castorDir
 
 files
 
 filesAndSizes
 
 good_files
 
 lfnDir
 
 maskExists
 
 report
 
- Public Attributes inherited from dataset.BaseDataset
 bad_files
 
 dbsInstance
 MM. More...
 
 files
 
 filesAndSizes
 
 good_files
 
 name
 
 pattern
 
 primaryDatasetEntries
 MM. More...
 
 report
 
 run_range
 
 user
 

Static Public Attributes

 dasData = das_client.get_data(dasQuery, dasLimit)
 
 error = self.__findInJson(jsondict,["data","error"])
 
int i = 0
 
 jsondict = json.loads( dasData )
 
string jsonfile = "das_query_output_%i.txt"
 
 jsonfile = jsonfile%i
 
 jsonstr = self.__findInJson(jsondict,"reason")
 
string msg = "The DAS query returned an error. The output is very long, and has been stored in:\n"
 
 theFile = open( jsonfile, "w" )
 

Private Member Functions

def __chunks (self, theList, n)
 
def __createSnippet (self, jsonPath=None, begin=None, end=None, firstRun=None, lastRun=None, repMap=None, crab=False, parent=False)
 
def __dateString (self, date)
 
def __datetime (self, stringForDas)
 
def __fileListSnippet (self, crab=False, parent=False, firstRun=None, lastRun=None, forcerunselection=False)
 
def __find_ge (self, a, x)
 
def __find_lt (self, a, x)
 
def __findInJson (self, jsondict, strings)
 
def __getData (self, dasQuery, dasLimit=0)
 
def __getDataType (self)
 
def __getFileInfoList (self, dasLimit, parent=False)
 
def __getMagneticField (self)
 
def __getMagneticFieldForRun (self, run=-1, tolerance=0.5)
 
def __getParentDataset (self)
 
def __getRunList (self)
 
def __lumiSelectionSnippet (self, jsonPath=None, firstRun=None, lastRun=None)
 

Private Attributes

 __cmssw
 
 __cmsswrelease
 
 __dasinstance
 
 __dasLimit
 
 __dataType
 
 __filename
 
 __firstusedrun
 
 __inputMagneticField
 
 __lastusedrun
 
 __magneticField
 
 __name
 
 __official
 
 __origName
 
 __parentDataset
 
 __predefined
 

Static Private Attributes

tuple __dummy_source_template
 
 __source_template
 

Detailed Description

Definition at line 17 of file dataset.py.

Constructor & Destructor Documentation

def dataset.Dataset.__init__ (   self,
  datasetName,
  dasLimit = 0,
  tryPredefinedFirst = True,
  cmssw = os.environ["CMSSW_BASE"],
  cmsswrelease = os.environ["CMSSW_RELEASE_BASE"],
  magneticfield = None,
  dasinstance = None 
)

Definition at line 20 of file dataset.py.

Referenced by dataset.Dataset.__init__().

20  magneticfield = None, dasinstance = None):
21  self.__name = datasetName
22  self.__origName = datasetName
23  self.__dasLimit = dasLimit
24  self.__dasinstance = dasinstance
25  self.__cmssw = cmssw
26  self.__cmsswrelease = cmsswrelease
27  self.__firstusedrun = None
28  self.__lastusedrun = None
29  self.__parentDataset = None
30 
31  # check, if dataset name matches CMS dataset naming scheme
32  if re.match( r'/.+/.+/.+', self.__name ):
33  self.__official = True
34  fileName = "Dataset" + self.__name.replace("/","_") + "_cff.py"
35  else:
36  self.__official = False
37  fileName = self.__name + "_cff.py"
38 
39  searchPath1 = os.path.join( self.__cmssw, "python",
40  "Alignment", "OfflineValidation",
41  fileName )
42  searchPath2 = os.path.join( self.__cmssw, "src",
43  "Alignment", "OfflineValidation",
44  "python", fileName )
45  searchPath3 = os.path.join( self.__cmsswrelease,
46  "python", "Alignment",
47  "OfflineValidation", fileName )
48  if self.__official and not tryPredefinedFirst:
49  self.__predefined = False
50  elif os.path.exists( searchPath1 ):
51  self.__predefined = True
52  self.__filename = searchPath1
53  elif os.path.exists( searchPath2 ):
54  msg = ("The predefined dataset '%s' does exist in '%s', but "
55  "you need to run 'scram b' first."
56  %( self.__name, searchPath2 ))
57  if self.__official:
58  print msg
59  print "Getting the data from DAS again. To go faster next time, run scram b."
60  else:
61  raise AllInOneError( msg )
62  elif os.path.exists( searchPath3 ):
63  self.__predefined = True
64  self.__filename = searchPath3
65  elif self.__official:
66  self.__predefined = False
67  else:
68  msg = ("The predefined dataset '%s' does not exist. Please "
69  "create it first or check for typos."%( self.__name ))
70  raise AllInOneError( msg )
71 
72  if self.__predefined and self.__official:
73  self.__name = "Dataset" + self.__name.replace("/","_")
74 
75  if magneticfield is not None:
76  try:
77  magneticfield = float(magneticfield)
78  except ValueError:
79  raise AllInOneError("Bad magneticfield {} which can't be converted to float".format(magneticfield))
80  self.__inputMagneticField = magneticfield
81 
82  self.__dataType = self.__getDataType()
84 
85 
def __getDataType(self)
Definition: dataset.py:385
def __getMagneticField(self)
Definition: dataset.py:420
def dataset.Dataset.__init__ (   self,
  name,
  user,
  pattern = '.*root' 
)

Definition at line 264 of file dataset.py.

References dataset.Dataset.__init__().

264  def __init__(self, name, user, pattern='.*root'):
265  self.lfnDir = castorBaseDir(user) + name
266  self.castorDir = castortools.lfnToCastor( self.lfnDir )
267  self.maskExists = False
268  self.report = None
269  super(Dataset, self).__init__(name, user, pattern)
270 
def __init__(self, datasetName, dasLimit=0, tryPredefinedFirst=True, cmssw=os.environ["CMSSW_BASE"], cmsswrelease=os.environ["CMSSW_RELEASE_BASE"], magneticfield=None, dasinstance=None)
Definition: dataset.py:20

Member Function Documentation

def dataset.Dataset.__chunks (   self,
  theList,
  n 
)
private
Yield successive n-sized chunks from theList.

Definition at line 86 of file dataset.py.

Referenced by dataset.Dataset.__fileListSnippet(), dataset.Dataset.__lumiSelectionSnippet(), and dataset.Dataset.createdatasetfile_hippy().

86  def __chunks( self, theList, n ):
87  """ Yield successive n-sized chunks from theList.
88  """
89  for i in xrange( 0, len( theList ), n ):
90  yield theList[i:i+n]
91 
def __chunks(self, theList, n)
Definition: dataset.py:86
def dataset.Dataset.__createSnippet (   self,
  jsonPath = None,
  begin = None,
  end = None,
  firstRun = None,
  lastRun = None,
  repMap = None,
  crab = False,
  parent = False 
)
private

Definition at line 242 of file dataset.py.

References dataset.Dataset.__dummy_source_template, dataset.Dataset.__fileListSnippet(), dataset.Dataset.__lumiSelectionSnippet(), dataset.Dataset.__source_template, electrons_cff.bool, dataset.Dataset.convertTimeToRun(), and dataset.int.

Referenced by dataset.Dataset.__fileListSnippet(), dataset.Dataset.datasetSnippet(), and dataset.Dataset.dump_cff().

242  crab = False, parent = False ):
243 
244  if firstRun:
245  firstRun = int( firstRun )
246  if lastRun:
247  lastRun = int( lastRun )
248  if ( begin and firstRun ) or ( end and lastRun ):
249  msg = ( "The Usage of "
250  + "'begin' & 'firstRun' " * int( bool( begin and
251  firstRun ) )
252  + "and " * int( bool( ( begin and firstRun ) and
253  ( end and lastRun ) ) )
254  + "'end' & 'lastRun' " * int( bool( end and lastRun ) )
255  + "is ambigous." )
256  raise AllInOneError( msg )
257  if begin or end:
258  ( firstRun, lastRun ) = self.convertTimeToRun(
259  begin = begin, end = end, firstRun = firstRun,
260  lastRun = lastRun )
261  if ( firstRun and lastRun ) and ( firstRun > lastRun ):
262  msg = ( "The lower time/runrange limit ('begin'/'firstRun') "
263  "chosen is greater than the upper time/runrange limit "
264  "('end'/'lastRun').")
265  raise AllInOneError( msg )
266 
267  lumiSecExtend = self.__lumiSelectionSnippet(jsonPath=jsonPath, firstRun=firstRun, lastRun=lastRun)
268  lumiStr = goodLumiSecStr = ""
269  if lumiSecExtend:
270  goodLumiSecStr = "lumiSecs = cms.untracked.VLuminosityBlockRange()\n"
271  lumiStr = " lumisToProcess = lumiSecs,\n"
272 
273  files = self.__fileListSnippet(crab=crab, parent=parent, firstRun=firstRun, lastRun=lastRun, forcerunselection=False)
274 
275  theMap = repMap
276  theMap["files"] = files
277  theMap["json"] = jsonPath
278  theMap["lumiStr"] = lumiStr
279  theMap["goodLumiSecStr"] = goodLumiSecStr%( theMap )
280  theMap["lumiSecExtend"] = lumiSecExtend
281  if crab:
282  dataset_snippet = self.__dummy_source_template%( theMap )
283  else:
284  dataset_snippet = self.__source_template%( theMap )
285  return dataset_snippet
286 
def __lumiSelectionSnippet(self, jsonPath=None, firstRun=None, lastRun=None)
Definition: dataset.py:122
def convertTimeToRun(self, begin=None, end=None, firstRun=None, lastRun=None, shortTuple=True)
Definition: dataset.py:642
tuple __dummy_source_template
Definition: dataset.py:110
def __fileListSnippet(self, crab=False, parent=False, firstRun=None, lastRun=None, forcerunselection=False)
Definition: dataset.py:218
def dataset.Dataset.__dateString (   self,
  date 
)
private

Definition at line 637 of file dataset.py.

References dataset.Dataset.convertTimeToRun(), and harvestTrackValidationPlots.str.

Referenced by dataset.Dataset.convertTimeToRun().

637  def __dateString(self, date):
638  return str(date.year) + str(date.month).zfill(2) + str(date.day).zfill(2)
639 
def __dateString(self, date)
Definition: dataset.py:637
def dataset.Dataset.__datetime (   self,
  stringForDas 
)
private

Definition at line 628 of file dataset.py.

References dataset.int.

Referenced by dataset.Dataset.convertTimeToRun().

628  def __datetime(self, stringForDas):
629  if len(stringForDas) != 8:
630  raise AllInOneError(stringForDas + " is not a valid date string.\n"
631  + "DAS accepts dates in the form 'yyyymmdd'")
632  year = stringForDas[:4]
633  month = stringForDas[4:6]
634  day = stringForDas[6:8]
635  return datetime.date(int(year), int(month), int(day))
636 
def __datetime(self, stringForDas)
Definition: dataset.py:628
def dataset.Dataset.__fileListSnippet (   self,
  crab = False,
  parent = False,
  firstRun = None,
  lastRun = None,
  forcerunselection = False 
)
private

Definition at line 218 of file dataset.py.

References dataset.Dataset.__chunks(), dataset.Dataset.__createSnippet(), dataset.Dataset.__name, dataset.Dataset.fileList(), join(), and list().

Referenced by dataset.Dataset.__createSnippet().

218  def __fileListSnippet(self, crab=False, parent=False, firstRun=None, lastRun=None, forcerunselection=False):
219  if crab:
220  files = ""
221  else:
222  splitFileList = list( self.__chunks( self.fileList(firstRun=firstRun, lastRun=lastRun, forcerunselection=forcerunselection), 255 ) )
223  if not splitFileList:
224  raise AllInOneError("No files found for dataset {}. Check the spelling, or maybe specify another das instance?".format(self.__name))
225  fileStr = [ "',\n'".join( files ) for files in splitFileList ]
226  fileStr = [ "readFiles.extend( [\n'" + files + "'\n] )" \
227  for files in fileStr ]
228  files = "\n".join( fileStr )
229 
230  if parent:
231  splitParentFileList = list( self.__chunks( self.fileList(parent=True, firstRun=firstRun, lastRun=lastRun, forcerunselection=forcerunselection), 255 ) )
232  parentFileStr = [ "',\n'".join( parentFiles ) for parentFiles in splitParentFileList ]
233  parentFileStr = [ "secFiles.extend( [\n'" + parentFiles + "'\n] )" \
234  for parentFiles in parentFileStr ]
235  parentFiles = "\n".join( parentFileStr )
236  files += "\n\n" + parentFiles
237 
238  return files
239 
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def __chunks(self, theList, n)
Definition: dataset.py:86
def __fileListSnippet(self, crab=False, parent=False, firstRun=None, lastRun=None, forcerunselection=False)
Definition: dataset.py:218
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run
def fileList(self, parent=False, firstRun=None, lastRun=None, forcerunselection=False)
Definition: dataset.py:882
def dataset.Dataset.__find_ge (   self,
  a,
  x 
)
private

Definition at line 294 of file dataset.py.

Referenced by dataset.Dataset.convertTimeToRun().

294  def __find_ge( self, a, x):
295  'Find leftmost item greater than or equal to x'
296  i = bisect.bisect_left( a, x )
297  if i != len( a ):
298  return i
299  raise ValueError
300 
def __find_ge(self, a, x)
Definition: dataset.py:294
def dataset.Dataset.__find_lt (   self,
  a,
  x 
)
private

Definition at line 287 of file dataset.py.

Referenced by dataset.Dataset.convertTimeToRun().

287  def __find_lt( self, a, x ):
288  'Find rightmost value less than x'
289  i = bisect.bisect_left( a, x )
290  if i:
291  return i-1
292  raise ValueError
293 
def __find_lt(self, a, x)
Definition: dataset.py:287
def dataset.Dataset.__findInJson (   self,
  jsondict,
  strings 
)
private

Definition at line 301 of file dataset.py.

References dataset.Dataset.__findInJson().

Referenced by dataset.Dataset.__findInJson(), dataset.Dataset.__getDataType(), dataset.Dataset.__getFileInfoList(), dataset.Dataset.__getMagneticField(), dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.__getParentDataset(), dataset.Dataset.__getRunList(), dataset.Dataset.__lumiSelectionSnippet(), dataset.Dataset.convertTimeToRun(), and dataset.Dataset.fileList().

301  def __findInJson(self, jsondict, strings):
302  if isinstance(strings, str):
303  strings = [ strings ]
304 
305  if len(strings) == 0:
306  return jsondict
307  if isinstance(jsondict,dict):
308  if strings[0] in jsondict:
309  try:
310  return self.__findInJson(jsondict[strings[0]], strings[1:])
311  except KeyError:
312  pass
313  else:
314  for a in jsondict:
315  if strings[0] in a:
316  try:
317  return self.__findInJson(a[strings[0]], strings[1:])
318  except (TypeError, KeyError): #TypeError because a could be a string and contain strings[0]
319  pass
320  #if it's not found
321  raise KeyError("Can't find " + strings[0])
322 
def __findInJson(self, jsondict, strings)
Definition: dataset.py:301
def dataset.Dataset.__getData (   self,
  dasQuery,
  dasLimit = 0 
)
private
def dataset.Dataset.__getDataType (   self)
private

Definition at line 385 of file dataset.py.

References dataset.Dataset.__dasinstance, dataset.Dataset.__filename, dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__name, dataset.Dataset.__predefined, ElectronMVAID.ElectronMVAID.name, counter.Counter.name, average.Average.name, histograms.Histograms.name, AlignableObjectId::entry.name, cond::persistency::RUN_INFO::RUN_NUMBER.name, cond::persistency::TAG::NAME.name, TmModule.name, core.autovars.NTupleVariable.name, cond::persistency::GLOBAL_TAG::NAME.name, cond::persistency::RUN_INFO::START_TIME.name, cond::persistency::TAG::TIME_TYPE.name, cond::persistency::GLOBAL_TAG::VALIDITY.name, cond::persistency::RUN_INFO::END_TIME.name, cond::persistency::TAG::OBJECT_TYPE.name, cond::persistency::GLOBAL_TAG::DESCRIPTION.name, cond::persistency::GLOBAL_TAG::RELEASE.name, cond::persistency::TAG::SYNCHRONIZATION.name, cond::persistency::GLOBAL_TAG::SNAPSHOT_TIME.name, MEPSet.name, cond::persistency::TAG::END_OF_VALIDITY.name, cond::persistency::GLOBAL_TAG::INSERTION_TIME.name, cond::persistency::TAG::DESCRIPTION.name, cond::persistency::GTEditorData.name, nanoaod::MergeableCounterTable::SingleColumn< T >.name, cond::persistency::TAG::LAST_VALIDATED_TIME.name, FWTGeoRecoGeometry::Info.name, Types._Untracked.name, cond::persistency::TAG::INSERTION_TIME.name, preexistingValidation.PreexistingValidation.name, dataset.BaseDataset.name, cond::persistency::TAG::MODIFICATION_TIME.name, OutputMEPSet.name, personalPlayback.Applet.name, ParameterSet.name, PixelDCSObject< T >::Item.name, analyzer.Analyzer.name, DQMRivetClient::LumiOption.name, MagCylinder.name, ParSet.name, DQMRivetClient::ScaleFactorOption.name, SingleObjectCondition.name, EgHLTOfflineSummaryClient::SumHistBinData.name, cond::persistency::GTProxyData.name, core.autovars.NTupleObjectType.name, Mapper::definition< ScannerT >.name, MyWatcher.name, edm::PathTimingSummary.name, nanoaod::MergeableCounterTable::VectorColumn< T >.name, alignment.Alignment.name, cond::TimeTypeSpecs.name, lumi::TriggerInfo.name, edm::PathSummary.name, cond::persistency::GLOBAL_TAG_MAP::GLOBAL_TAG_NAME.name, DQMGenericClient::EfficOption.name, PixelEndcapLinkMaker::Item.name, perftools::EdmEventSize::BranchRecord.name, FWTableViewManager::TableEntry.name, validateAlignments.ParallelMergeJob.name, cond::persistency::GLOBAL_TAG_MAP::RECORD.name, PixelBarrelLinkMaker::Item.name, EcalLogicID.name, cond::persistency::GLOBAL_TAG_MAP::LABEL.name, MEtoEDM< T >::MEtoEDMObject.name, cond::persistency::GLOBAL_TAG_MAP::TAG_NAME.name, ExpressionHisto< T >.name, XMLProcessor::_loaderBaseConfig.name, TreeCrawler.Package.name, cond::persistency::PAYLOAD::HASH.name, genericValidation.GenericValidation.name, cond::persistency::PAYLOAD::OBJECT_TYPE.name, cond::persistency::PAYLOAD::DATA.name, cond::persistency::PAYLOAD::STREAMER_INFO.name, MagGeoBuilderFromDDD::volumeHandle.name, cond::persistency::PAYLOAD::VERSION.name, cond::persistency::PAYLOAD::INSERTION_TIME.name, DQMGenericClient::ProfileOption.name, options.ConnectionHLTMenu.name, DQMGenericClient::NormOption.name, FastHFShowerLibrary.name, emtf::Node.name, h4DSegm.name, core.TriggerMatchAnalyzer.TriggerMatchAnalyzer.name, PhysicsTools::Calibration::Variable.name, DQMGenericClient::CDOption.name, CounterChecker.name, cond::TagInfo_t.name, EDMtoMEConverter.name, looper.Looper.name, cond::persistency::IOV::TAG_NAME.name, TrackerSectorStruct.name, cond::persistency::IOV::SINCE.name, classes.MonitorData.name, cond::persistency::IOV::PAYLOAD_HASH.name, cond::persistency::IOV::INSERTION_TIME.name, HistogramManager.name, MuonGeometrySanityCheckPoint.name, classes.OutputData.name, options.HLTProcessOptions.name, h2DSegm.name, core.TriggerBitAnalyzer.TriggerBitAnalyzer.name, config.Analyzer.name, nanoaod::FlatTable::Column.name, geometry.Structure.name, core.autovars.NTupleSubObject.name, DQMNet::WaitObject.name, AlpgenParameterName.name, SiStripMonitorDigi.name, core.autovars.NTupleObject.name, config.Service.name, cond::persistency::TAG_LOG::TAG_NAME.name, cond::persistency::TAG_LOG::EVENT_TIME.name, cond::persistency::TAG_LOG::USER_NAME.name, cond::persistency::TAG_LOG::HOST_NAME.name, cond::persistency::TAG_LOG::COMMAND.name, cond::persistency::TAG_LOG::ACTION.name, cond::persistency::TAG_LOG::USER_TEXT.name, core.autovars.NTupleCollection.name, BPHRecoBuilder::BPHRecoSource.name, BPHRecoBuilder::BPHCompSource.name, personalPlayback.FrameworkJob.name, plotscripts.SawTeethFunction.name, hTMaxCell.name, cscdqm::ParHistoDef.name, BeautifulSoup.Tag.name, SummaryOutputProducer::GenericSummary.name, BeautifulSoup.SoupStrainer.name, and python.rootplot.root2matplotlib.replace().

Referenced by dataset.Dataset.dataType().

385  def __getDataType( self ):
386  if self.__predefined:
387  with open(self.__filename) as f:
388  datatype = None
389  for line in f.readlines():
390  if line.startswith("#data type: "):
391  if datatype is not None:
392  raise AllInOneError(self.__filename + " has multiple 'data type' lines.")
393  datatype = line.replace("#data type: ", "").replace("\n","")
394  return datatype
395  return "unknown"
396 
397  dasQuery_type = ( 'dataset dataset=%s instance=%s detail=true | grep dataset.datatype,'
398  'dataset.name'%( self.__name, self.__dasinstance ) )
399  data = self.__getData( dasQuery_type )
400 
401  try:
402  return self.__findInJson(data, ["dataset", "datatype"])
403  except KeyError:
404  print ("Cannot find the datatype of the dataset '%s'\n"
405  "It may not be possible to automatically find the magnetic field,\n"
406  "and you will not be able run in CRAB mode"
407  %( self.name() ))
408  return "unknown"
409 
def __getDataType(self)
Definition: dataset.py:385
def __findInJson(self, jsondict, strings)
Definition: dataset.py:301
def replace(string, replacements)
def __getData(self, dasQuery, dasLimit=0)
Definition: dataset.py:353
def dataset.Dataset.__getFileInfoList (   self,
  dasLimit,
  parent = False 
)
private

Definition at line 558 of file dataset.py.

References dataset.Dataset.__dasinstance, dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__name, dataset.Dataset.__predefined, ElectronMVAID.ElectronMVAID.name, counter.Counter.name, average.Average.name, AlignableObjectId::entry.name, histograms.Histograms.name, cond::persistency::RUN_INFO::RUN_NUMBER.name, cond::persistency::TAG::NAME.name, TmModule.name, core.autovars.NTupleVariable.name, cond::persistency::GLOBAL_TAG::NAME.name, cond::persistency::RUN_INFO::START_TIME.name, cond::persistency::TAG::TIME_TYPE.name, cond::persistency::GLOBAL_TAG::VALIDITY.name, cond::persistency::RUN_INFO::END_TIME.name, cond::persistency::TAG::OBJECT_TYPE.name, cond::persistency::GLOBAL_TAG::DESCRIPTION.name, cond::persistency::GLOBAL_TAG::RELEASE.name, cond::persistency::TAG::SYNCHRONIZATION.name, cond::persistency::GLOBAL_TAG::SNAPSHOT_TIME.name, MEPSet.name, cond::persistency::TAG::END_OF_VALIDITY.name, cond::persistency::GLOBAL_TAG::INSERTION_TIME.name, cond::persistency::TAG::DESCRIPTION.name, cond::persistency::GTEditorData.name, nanoaod::MergeableCounterTable::SingleColumn< T >.name, cond::persistency::TAG::LAST_VALIDATED_TIME.name, FWTGeoRecoGeometry::Info.name, Types._Untracked.name, preexistingValidation.PreexistingValidation.name, cond::persistency::TAG::INSERTION_TIME.name, cond::persistency::TAG::MODIFICATION_TIME.name, dataset.BaseDataset.name, OutputMEPSet.name, personalPlayback.Applet.name, ParameterSet.name, PixelDCSObject< T >::Item.name, analyzer.Analyzer.name, DQMRivetClient::LumiOption.name, MagCylinder.name, ParSet.name, DQMRivetClient::ScaleFactorOption.name, SingleObjectCondition.name, EgHLTOfflineSummaryClient::SumHistBinData.name, cond::persistency::GTProxyData.name, core.autovars.NTupleObjectType.name, Mapper::definition< ScannerT >.name, MyWatcher.name, edm::PathTimingSummary.name, nanoaod::MergeableCounterTable::VectorColumn< T >.name, alignment.Alignment.name, cond::TimeTypeSpecs.name, lumi::TriggerInfo.name, edm::PathSummary.name, cond::persistency::GLOBAL_TAG_MAP::GLOBAL_TAG_NAME.name, DQMGenericClient::EfficOption.name, PixelEndcapLinkMaker::Item.name, perftools::EdmEventSize::BranchRecord.name, FWTableViewManager::TableEntry.name, validateAlignments.ParallelMergeJob.name, cond::persistency::GLOBAL_TAG_MAP::RECORD.name, PixelBarrelLinkMaker::Item.name, EcalLogicID.name, cond::persistency::GLOBAL_TAG_MAP::LABEL.name, MEtoEDM< T >::MEtoEDMObject.name, cond::persistency::GLOBAL_TAG_MAP::TAG_NAME.name, ExpressionHisto< T >.name, XMLProcessor::_loaderBaseConfig.name, cond::persistency::PAYLOAD::HASH.name, TreeCrawler.Package.name, cond::persistency::PAYLOAD::OBJECT_TYPE.name, genericValidation.GenericValidation.name, cond::persistency::PAYLOAD::DATA.name, cond::persistency::PAYLOAD::STREAMER_INFO.name, cond::persistency::PAYLOAD::VERSION.name, MagGeoBuilderFromDDD::volumeHandle.name, cond::persistency::PAYLOAD::INSERTION_TIME.name, DQMGenericClient::ProfileOption.name, options.ConnectionHLTMenu.name, DQMGenericClient::NormOption.name, FastHFShowerLibrary.name, emtf::Node.name, h4DSegm.name, core.TriggerMatchAnalyzer.TriggerMatchAnalyzer.name, PhysicsTools::Calibration::Variable.name, DQMGenericClient::CDOption.name, CounterChecker.name, EDMtoMEConverter.name, cond::TagInfo_t.name, looper.Looper.name, cond::persistency::IOV::TAG_NAME.name, cond::persistency::IOV::SINCE.name, TrackerSectorStruct.name, cond::persistency::IOV::PAYLOAD_HASH.name, classes.MonitorData.name, cond::persistency::IOV::INSERTION_TIME.name, HistogramManager.name, MuonGeometrySanityCheckPoint.name, classes.OutputData.name, options.HLTProcessOptions.name, h2DSegm.name, core.TriggerBitAnalyzer.TriggerBitAnalyzer.name, config.Analyzer.name, nanoaod::FlatTable::Column.name, geometry.Structure.name, core.autovars.NTupleSubObject.name, DQMNet::WaitObject.name, AlpgenParameterName.name, SiStripMonitorDigi.name, core.autovars.NTupleObject.name, config.Service.name, cond::persistency::TAG_LOG::TAG_NAME.name, cond::persistency::TAG_LOG::EVENT_TIME.name, cond::persistency::TAG_LOG::USER_NAME.name, cond::persistency::TAG_LOG::HOST_NAME.name, cond::persistency::TAG_LOG::COMMAND.name, cond::persistency::TAG_LOG::ACTION.name, cond::persistency::TAG_LOG::USER_TEXT.name, core.autovars.NTupleCollection.name, BPHRecoBuilder::BPHRecoSource.name, BPHRecoBuilder::BPHCompSource.name, personalPlayback.FrameworkJob.name, plotscripts.SawTeethFunction.name, hTMaxCell.name, cscdqm::ParHistoDef.name, BeautifulSoup.Tag.name, SummaryOutputProducer::GenericSummary.name, BeautifulSoup.SoupStrainer.name, and dataset.Dataset.parentDataset().

Referenced by dataset.Dataset.fileInfoList().

558  def __getFileInfoList( self, dasLimit, parent = False ):
559  if self.__predefined:
560  if parent:
561  extendstring = "secFiles.extend"
562  else:
563  extendstring = "readFiles.extend"
564  with open(self.__fileName) as f:
565  files = []
566  copy = False
567  for line in f.readlines():
568  if "]" in line:
569  copy = False
570  if copy:
571  files.append({name: line.translate(None, "', " + '"')})
572  if extendstring in line and "[" in line and "]" not in line:
573  copy = True
574  return files
575 
576  if parent:
577  searchdataset = self.parentDataset()
578  else:
579  searchdataset = self.__name
580  dasQuery_files = ( 'file dataset=%s instance=%s detail=true | grep file.name, file.nevents, '
581  'file.creation_time, '
582  'file.modification_time'%( searchdataset, self.__dasinstance ) )
583  print "Requesting file information for '%s' from DAS..."%( searchdataset ),
584  sys.stdout.flush()
585  data = self.__getData( dasQuery_files, dasLimit )
586  print "Done."
587  data = [ self.__findInJson(entry,"file") for entry in data ]
588  if len( data ) == 0:
589  msg = ("No files are available for the dataset '%s'. This can be "
590  "due to a typo or due to a DAS problem. Please check the "
591  "spelling of the dataset and/or retry to run "
592  "'validateAlignments.py'."%( self.name() ))
593  raise AllInOneError( msg )
594  fileInformationList = []
595  for file in data:
596  fileName = 'unknown'
597  try:
598  fileName = self.__findInJson(file, "name")
599  fileCreationTime = self.__findInJson(file, "creation_time")
600  fileNEvents = self.__findInJson(file, "nevents")
601  except KeyError:
602  print ("DAS query gives bad output for file '%s'. Skipping it.\n"
603  "It may work if you try again later.") % fileName
604  fileNEvents = 0
605  # select only non-empty files
606  if fileNEvents == 0:
607  continue
608  fileDict = { "name": fileName,
609  "creation_time": fileCreationTime,
610  "nevents": fileNEvents
611  }
612  fileInformationList.append( fileDict )
613  fileInformationList.sort( key=lambda info: self.__findInJson(info,"name") )
614  return fileInformationList
615 
def __getFileInfoList(self, dasLimit, parent=False)
Definition: dataset.py:558
def __findInJson(self, jsondict, strings)
Definition: dataset.py:301
def parentDataset(self)
Definition: dataset.py:720
def __getData(self, dasQuery, dasLimit=0)
Definition: dataset.py:353
def dataset.Dataset.__getMagneticField (   self)
private

Definition at line 420 of file dataset.py.

References dataset.Dataset.__cmssw, dataset.Dataset.__cmsswrelease, dataset.Dataset.__dasinstance, dataset.Dataset.__dataType, dataset.Dataset.__filename, dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__inputMagneticField, dataset.Dataset.__name, dataset.Dataset.__predefined, python.rootplot.root2matplotlib.replace(), and digi_MixPreMix_cfi.strip.

Referenced by dataset.Dataset.magneticField().

420  def __getMagneticField( self ):
421  Bfieldlocation = os.path.join( self.__cmssw, "python", "Configuration", "StandardSequences" )
422  if not os.path.isdir(Bfieldlocation):
423  Bfieldlocation = os.path.join( self.__cmsswrelease, "python", "Configuration", "StandardSequences" )
424  Bfieldlist = [ f.replace("_cff.py",'') \
425  for f in os.listdir(Bfieldlocation) \
426  if f.startswith("MagneticField_") and f.endswith("_cff.py") ]
427  Bfieldlist.sort( key = lambda Bfield: -len(Bfield) ) #Put it in order of decreasing length, so that searching in the name gives the longer match
428 
429  if self.__inputMagneticField is not None:
430  if self.__inputMagneticField == 3.8:
431  return "MagneticField"
432  elif self.__inputMagneticField == 0:
433  return "MagneticField_0T"
434  else:
435  raise ValueError("Unknown input magnetic field {}".format(self.__inputMagneticField))
436 
437  if self.__predefined:
438  with open(self.__filename) as f:
439  datatype = None
440  Bfield = None
441  for line in f.readlines():
442  if line.startswith("#data type: "):
443  if datatype is not None:
444  raise AllInOneError(self.__filename + " has multiple 'data type' lines.")
445  datatype = line.replace("#data type: ", "").replace("\n","")
446  datatype = datatype.split("#")[0].strip()
447  if line.startswith("#magnetic field: "):
448  if Bfield is not None:
449  raise AllInOneError(self.__filename + " has multiple 'magnetic field' lines.")
450  Bfield = line.replace("#magnetic field: ", "").replace("\n","")
451  Bfield = Bfield.split("#")[0].strip()
452  if Bfield is not None:
453  Bfield = Bfield.split(",")[0]
454  if Bfield in Bfieldlist or Bfield == "unknown":
455  return Bfield
456  else:
457  print "Your dataset has magnetic field '%s', which does not exist in your CMSSW version!" % Bfield
458  print "Using Bfield='unknown' - this will revert to the default"
459  return "unknown"
460  elif datatype == "data":
461  return "MagneticField" #this should be in the "#magnetic field" line, but for safety in case it got messed up
462  else:
463  return "unknown"
464 
465  if self.__dataType == "data":
466  return "MagneticField"
467 
468  #try to find the magnetic field from DAS
469  #it seems to be there for the newer (7X) MC samples, except cosmics
470  dasQuery_B = ('dataset dataset=%s instance=%s'%(self.__name, self.__dasinstance))
471  data = self.__getData( dasQuery_B )
472 
473  try:
474  Bfield = self.__findInJson(data, ["dataset", "mcm", "sequences", "magField"])
475  if Bfield in Bfieldlist:
476  return Bfield
477  elif Bfield == "38T" or Bfield == "38T_PostLS1":
478  return "MagneticField"
479  elif "MagneticField_" + Bfield in Bfieldlist:
480  return "MagneticField_" + Bfield
481  elif Bfield == "":
482  pass
483  else:
484  print "Your dataset has magnetic field '%s', which does not exist in your CMSSW version!" % Bfield
485  print "Using Bfield='unknown' - this will revert to the default magnetic field"
486  return "unknown"
487  except KeyError:
488  pass
489 
490  for possibleB in Bfieldlist:
491  if (possibleB != "MagneticField"
492  and possibleB.replace("MagneticField_","") in self.__name.replace("TkAlCosmics0T", "")):
493  #final attempt - try to identify the dataset from the name
494  #all cosmics dataset names contain "TkAlCosmics0T"
495  if possibleB == "MagneticField_38T" or possibleB == "MagneticField_38T_PostLS1":
496  return "MagneticField"
497  return possibleB
498 
499  return "unknown"
500 
def __findInJson(self, jsondict, strings)
Definition: dataset.py:301
def replace(string, replacements)
def __getMagneticField(self)
Definition: dataset.py:420
def __getData(self, dasQuery, dasLimit=0)
Definition: dataset.py:353
def dataset.Dataset.__getMagneticFieldForRun (   self,
  run = -1,
  tolerance = 0.5 
)
private
For MC, this returns the same as the previous function.
   For data, it gets the magnetic field from the runs.  This is important for
   deciding which template to use for offlinevalidation

Definition at line 501 of file dataset.py.

References dataset.Dataset.__dasinstance, dataset.Dataset.__dataType, dataset.Dataset.__filename, dataset.Dataset.__findInJson(), dataset.Dataset.__firstusedrun, dataset.Dataset.__getData(), dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.__inputMagneticField, dataset.Dataset.__lastusedrun, dataset.Dataset.__magneticField, dataset.Dataset.__name, dataset.Dataset.__predefined, funct.abs(), objects.autophobj.float, python.rootplot.root2matplotlib.replace(), split, and digi_MixPreMix_cfi.strip.

Referenced by dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.dump_cff(), and dataset.Dataset.magneticFieldForRun().

501  def __getMagneticFieldForRun( self, run = -1, tolerance = 0.5 ):
502  """For MC, this returns the same as the previous function.
503  For data, it gets the magnetic field from the runs. This is important for
504  deciding which template to use for offlinevalidation
505  """
506  if self.__dataType == "mc" and self.__magneticField == "MagneticField":
507  return 3.8 #For 3.8T MC the default MagneticField is used
508  if self.__inputMagneticField is not None:
509  return self.__inputMagneticField
510  if "T" in self.__magneticField:
511  Bfield = self.__magneticField.split("T")[0].replace("MagneticField_","")
512  try:
513  return float(Bfield) / 10.0 #e.g. 38T and 38T_PostLS1 both return 3.8
514  except ValueError:
515  pass
516  if self.__predefined:
517  with open(self.__filename) as f:
518  Bfield = None
519  for line in f.readlines():
520  if line.startswith("#magnetic field: ") and "," in line:
521  if Bfield is not None:
522  raise AllInOneError(self.__filename + " has multiple 'magnetic field' lines.")
523  return float(line.replace("#magnetic field: ", "").split(",")[1].split("#")[0].strip())
524 
525  if run > 0:
526  dasQuery = ('run=%s instance=%s detail=true'%(run, self.__dasinstance)) #for data
527  data = self.__getData(dasQuery)
528  try:
529  return self.__findInJson(data, ["run","bfield"])
530  except KeyError:
531  return "unknown Can't get the magnetic field for run %s from DAS" % run
532 
533  #run < 0 - find B field for the first and last runs, and make sure they're compatible
534  # (to within tolerance)
535  #NOT FOOLPROOF! The magnetic field might go up and then down, or vice versa
536  if self.__firstusedrun is None or self.__lastusedrun is None:
537  return "unknown Can't get the exact magnetic field for the dataset until data has been retrieved from DAS."
538  firstrunB = self.__getMagneticFieldForRun(self.__firstusedrun)
539  lastrunB = self.__getMagneticFieldForRun(self.__lastusedrun)
540  try:
541  if abs(firstrunB - lastrunB) <= tolerance:
542  return .5*(firstrunB + lastrunB)
543  print firstrunB, lastrunB, tolerance
544  return ("unknown The beginning and end of your run range for %s\n"
545  "have different magnetic fields (%s, %s)!\n"
546  "Try limiting the run range using firstRun, lastRun, begin, end, or JSON,\n"
547  "or increasing the tolerance (in dataset.py) from %s.") % (self.__name, firstrunB, lastrunB, tolerance)
548  except TypeError:
549  try:
550  if "unknown" in firstrunB:
551  return firstrunB
552  else:
553  return lastrunB
554  except TypeError:
555  return lastrunB
556 
def __getMagneticFieldForRun(self, run=-1, tolerance=0.5)
Definition: dataset.py:501
def __findInJson(self, jsondict, strings)
Definition: dataset.py:301
def replace(string, replacements)
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
double split
Definition: MVATrainer.cc:139
def __getData(self, dasQuery, dasLimit=0)
Definition: dataset.py:353
def dataset.Dataset.__getParentDataset (   self)
private

Definition at line 410 of file dataset.py.

References dataset.Dataset.__dasinstance, dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__name, and harvestTrackValidationPlots.str.

Referenced by dataset.Dataset.parentDataset().

410  def __getParentDataset( self ):
411  dasQuery = "parent dataset=" + self.__name + " instance="+self.__dasinstance
412  data = self.__getData( dasQuery )
413  try:
414  return self.__findInJson(data, ["parent", "name"])
415  except KeyError:
416  raise AllInOneError("Cannot find the parent of the dataset '" + self.__name + "'\n"
417  "Here is the DAS output:\n" + str(jsondict) +
418  "\nIt's possible that this was a server error. If so, it may work if you try again later")
419 
def __findInJson(self, jsondict, strings)
Definition: dataset.py:301
def __getParentDataset(self)
Definition: dataset.py:410
def __getData(self, dasQuery, dasLimit=0)
Definition: dataset.py:353
def dataset.Dataset.__getRunList (   self)
private

Definition at line 617 of file dataset.py.

References dataset.Dataset.__dasinstance, dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), and dataset.Dataset.__name.

Referenced by dataset.Dataset.__lumiSelectionSnippet(), dataset.Dataset.convertTimeToRun(), and dataset.Dataset.runList().

617  def __getRunList( self ):
618  dasQuery_runs = ( 'run dataset=%s instance=%s | grep run.run_number,'
619  'run.creation_time'%( self.__name, self.__dasinstance ) )
620  print "Requesting run information for '%s' from DAS..."%( self.__name ),
621  sys.stdout.flush()
622  data = self.__getData( dasQuery_runs )
623  print "Done."
624  data = [ self.__findInJson(entry,"run") for entry in data ]
625  data.sort( key = lambda run: self.__findInJson(run, "run_number") )
626  return data
627 
def __getRunList(self)
Definition: dataset.py:617
def __findInJson(self, jsondict, strings)
Definition: dataset.py:301
def __getData(self, dasQuery, dasLimit=0)
Definition: dataset.py:353
def dataset.Dataset.__lumiSelectionSnippet (   self,
  jsonPath = None,
  firstRun = None,
  lastRun = None 
)
private

Definition at line 122 of file dataset.py.

References dataset.Dataset.__chunks(), dataset.Dataset.__findInJson(), dataset.Dataset.__firstusedrun, dataset.Dataset.__getRunList(), dataset.Dataset.__inputMagneticField, dataset.Dataset.__lastusedrun, dataset.Dataset.getForceRunRangeFunction(), dataset.int, join(), list(), SiStripPI.max, min(), python.rootplot.root2matplotlib.replace(), split, and harvestTrackValidationPlots.str.

Referenced by dataset.Dataset.__createSnippet().

122  def __lumiSelectionSnippet( self, jsonPath = None, firstRun = None, lastRun = None ):
123  lumiSecExtend = ""
124  if firstRun or lastRun or jsonPath:
125  if not jsonPath:
126  selectedRunList = self.__getRunList()
127  if firstRun:
128  selectedRunList = [ run for run in selectedRunList \
129  if self.__findInJson(run, "run_number") >= firstRun ]
130  if lastRun:
131  selectedRunList = [ run for run in selectedRunList \
132  if self.__findInJson(run, "run_number") <= lastRun ]
133  lumiList = [ str( self.__findInJson(run, "run_number") ) + ":1-" \
134  + str( self.__findInJson(run, "run_number") ) + ":max" \
135  for run in selectedRunList ]
136  splitLumiList = list( self.__chunks( lumiList, 255 ) )
137  else:
138  theLumiList = None
139  try:
140  theLumiList = LumiList ( filename = jsonPath )
141  except ValueError:
142  pass
143 
144  if theLumiList is not None:
145  allRuns = theLumiList.getRuns()
146  runsToRemove = []
147  for run in allRuns:
148  if firstRun and int( run ) < firstRun:
149  runsToRemove.append( run )
150  if lastRun and int( run ) > lastRun:
151  runsToRemove.append( run )
152  theLumiList.removeRuns( runsToRemove )
153  splitLumiList = list( self.__chunks(
154  theLumiList.getCMSSWString().split(','), 255 ) )
155  if not (splitLumiList and splitLumiList[0] and splitLumiList[0][0]):
156  splitLumiList = None
157  else:
158  with open(jsonPath) as f:
159  jsoncontents = f.read()
160  if "process.source.lumisToProcess" in jsoncontents:
161  msg = "%s is not a json file, but it seems to be a CMSSW lumi selection cff snippet. Trying to use it" % jsonPath
162  if firstRun or lastRun:
163  msg += ("\n (after applying firstRun and/or lastRun)")
164  msg += ".\nPlease note that, depending on the format of this file, it may not work as expected."
165  msg += "\nCheck your config file to make sure that it worked properly."
166  print msg
167 
168  runlist = self.__getRunList()
169  if firstRun or lastRun:
170  self.__firstusedrun = -1
171  self.__lastusedrun = -1
172  jsoncontents = re.sub(r"\d+:(\d+|max)(-\d+:(\d+|max))?", self.getForceRunRangeFunction(firstRun, lastRun), jsoncontents)
173  jsoncontents = (jsoncontents.replace("'',\n","").replace("''\n","")
174  .replace('"",\n','').replace('""\n',''))
175  self.__firstusedrun = max(self.__firstusedrun, int(self.__findInJson(runlist[0],"run_number")))
176  self.__lastusedrun = min(self.__lastusedrun, int(self.__findInJson(runlist[-1],"run_number")))
177  if self.__lastusedrun < self.__firstusedrun:
178  jsoncontents = None
179  else:
180  self.__firstusedrun = int(self.__findInJson(runlist[0],"run_number"))
181  self.__lastusedrun = int(self.__findInJson(runlist[-1],"run_number"))
182  lumiSecExtend = jsoncontents
183  splitLumiList = None
184  else:
185  raise AllInOneError("%s is not a valid json file!" % jsonPath)
186 
187  if splitLumiList and splitLumiList[0] and splitLumiList[0][0]:
188  lumiSecStr = [ "',\n'".join( lumis ) \
189  for lumis in splitLumiList ]
190  lumiSecStr = [ "lumiSecs.extend( [\n'" + lumis + "'\n] )" \
191  for lumis in lumiSecStr ]
192  lumiSecExtend = "\n".join( lumiSecStr )
193  runlist = self.__getRunList()
194  self.__firstusedrun = max(int(splitLumiList[0][0].split(":")[0]), int(self.__findInJson(runlist[0],"run_number")))
195  self.__lastusedrun = min(int(splitLumiList[-1][-1].split(":")[0]), int(self.__findInJson(runlist[-1],"run_number")))
196  elif lumiSecExtend:
197  pass
198  else:
199  msg = "You are trying to run a validation without any runs! Check that:"
200  if firstRun or lastRun:
201  msg += "\n - firstRun/begin and lastRun/end are correct for this dataset, and there are runs in between containing data"
202  if jsonPath:
203  msg += "\n - your JSON file is correct for this dataset, and the runs contain data"
204  if (firstRun or lastRun) and jsonPath:
205  msg += "\n - firstRun/begin and lastRun/end are consistent with your JSON file"
206  raise AllInOneError(msg)
207 
208  else:
209  if self.__inputMagneticField is not None:
210  pass #never need self.__firstusedrun or self.__lastusedrun
211  else:
212  runlist = self.__getRunList()
213  self.__firstusedrun = int(self.__findInJson(self.__getRunList()[0],"run_number"))
214  self.__lastusedrun = int(self.__findInJson(self.__getRunList()[-1],"run_number"))
215 
216  return lumiSecExtend
217 
def __getRunList(self)
Definition: dataset.py:617
def __lumiSelectionSnippet(self, jsonPath=None, firstRun=None, lastRun=None)
Definition: dataset.py:122
def __findInJson(self, jsondict, strings)
Definition: dataset.py:301
def replace(string, replacements)
T min(T a, T b)
Definition: MathUtil.h:58
def getForceRunRangeFunction(self, firstRun, lastRun)
Definition: dataset.py:348
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def __chunks(self, theList, n)
Definition: dataset.py:86
double split
Definition: MVATrainer.cc:139
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run
def dataset.Dataset.buildListOfBadFiles (   self)
fills the list of bad files from the IntegrityCheck log.

When the integrity check file is not available,
files are considered as good.

Definition at line 275 of file dataset.py.

276  '''fills the list of bad files from the IntegrityCheck log.
277 
278  When the integrity check file is not available,
279  files are considered as good.'''
280  mask = "IntegrityCheck"
281 
282  self.bad_files = {}
283  self.good_files = []
284 
285  file_mask = castortools.matchingFiles(self.castorDir, '^%s_.*\.txt$' % mask)
286  if file_mask:
287  # here to avoid circular dependency
288  from edmIntegrityCheck import PublishToFileSystem
289  p = PublishToFileSystem(mask)
290  report = p.get(self.castorDir)
291  if report is not None and report:
292  self.maskExists = True
293  self.report = report
294  dup = report.get('ValidDuplicates',{})
295  for name, status in report['Files'].iteritems():
296  # print name, status
297  if not status[0]:
298  self.bad_files[name] = 'MarkedBad'
299  elif name in dup:
300  self.bad_files[name] = 'ValidDup'
301  else:
302  self.good_files.append( name )
303  else:
304  raise IntegrityCheckError( "ERROR: IntegrityCheck log file IntegrityCheck_XXXXXXXXXX.txt not found" )
305 
def buildListOfBadFiles(self)
Definition: dataset.py:275
def dataset.Dataset.buildListOfFiles (   self,
  pattern = '.*root' 
)
fills list of files, taking all root files matching the pattern in the castor dir

Definition at line 271 of file dataset.py.

271  def buildListOfFiles(self, pattern='.*root'):
272  '''fills list of files, taking all root files matching the pattern in the castor dir'''
273  self.files = castortools.matchingFiles( self.castorDir, pattern )
274 
def buildListOfFiles(self, pattern='.*root')
Definition: dataset.py:271
def dataset.Dataset.convertTimeToRun (   self,
  begin = None,
  end = None,
  firstRun = None,
  lastRun = None,
  shortTuple = True 
)

Definition at line 642 of file dataset.py.

References dataset.Dataset.__dasinstance, dataset.Dataset.__dateString(), dataset.Dataset.__datetime(), dataset.Dataset.__find_ge(), dataset.Dataset.__find_lt(), dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__getRunList(), dataset.Dataset.__name, electrons_cff.bool, and dataset.int.

Referenced by dataset.Dataset.__createSnippet(), and dataset.Dataset.__dateString().

642  shortTuple = True ):
643  if ( begin and firstRun ) or ( end and lastRun ):
644  msg = ( "The Usage of "
645  + "'begin' & 'firstRun' " * int( bool( begin and
646  firstRun ) )
647  + "and " * int( bool( ( begin and firstRun ) and
648  ( end and lastRun ) ) )
649  + "'end' & 'lastRun' " * int( bool( end and lastRun ) )
650  + "is ambigous." )
651  raise AllInOneError( msg )
652 
653  if begin or end:
654  runList = [ self.__findInJson(run, "run_number") for run in self.__getRunList() ]
655 
656  if begin:
657  lastdate = begin
658  for delta in [ 1, 5, 10, 20, 30 ]: #try searching for about 2 months after begin
659  firstdate = lastdate
660  lastdate = self.__dateString(self.__datetime(firstdate) + datetime.timedelta(delta))
661  dasQuery_begin = "run date between[%s,%s] instance=%s" % (firstdate, lastdate, self.__dasinstance)
662  begindata = self.__getData(dasQuery_begin)
663  if len(begindata) > 0:
664  begindata.sort(key = lambda run: self.__findInJson(run, ["run", "run_number"]))
665  try:
666  runIndex = self.__find_ge( runList, self.__findInJson(begindata[0], ["run", "run_number"]))
667  except ValueError:
668  msg = ( "Your 'begin' is after the creation time of the last "
669  "run in the dataset\n'%s'"%( self.__name ) )
670  raise AllInOneError( msg )
671  firstRun = runList[runIndex]
672  begin = None
673  break
674 
675  if begin:
676  raise AllInOneError("No runs within a reasonable time interval after your 'begin'."
677  "Try using a 'begin' that has runs soon after it (within 2 months at most)")
678 
679  if end:
680  firstdate = end
681  for delta in [ 1, 5, 10, 20, 30 ]: #try searching for about 2 months before end
682  lastdate = firstdate
683  firstdate = self.__dateString(self.__datetime(lastdate) - datetime.timedelta(delta))
684  dasQuery_end = "run date between[%s,%s] instance=%s" % (firstdate, lastdate, self.__dasinstance)
685  enddata = self.__getData(dasQuery_end)
686  if len(enddata) > 0:
687  enddata.sort(key = lambda run: self.__findInJson(run, ["run", "run_number"]))
688  try:
689  runIndex = self.__find_lt( runList, self.__findInJson(enddata[-1], ["run", "run_number"]))
690  except ValueError:
691  msg = ( "Your 'end' is before the creation time of the first "
692  "run in the dataset\n'%s'"%( self.__name ) )
693  raise AllInOneError( msg )
694  lastRun = runList[runIndex]
695  end = None
696  break
697 
698  if end:
699  raise AllInOneError("No runs within a reasonable time interval before your 'end'."
700  "Try using an 'end' that has runs soon before it (within 2 months at most)")
701 
702  if shortTuple:
703  return firstRun, lastRun
704  else:
705  return begin, end, firstRun, lastRun
706 
def __getRunList(self)
Definition: dataset.py:617
def __findInJson(self, jsondict, strings)
Definition: dataset.py:301
def __find_lt(self, a, x)
Definition: dataset.py:287
def __datetime(self, stringForDas)
Definition: dataset.py:628
def __dateString(self, date)
Definition: dataset.py:637
def __find_ge(self, a, x)
Definition: dataset.py:294
def __getData(self, dasQuery, dasLimit=0)
Definition: dataset.py:353
def dataset.Dataset.createdatasetfile_hippy (   self,
  filename,
  filesperjob,
  firstrun,
  lastrun 
)

Definition at line 849 of file dataset.py.

References dataset.Dataset.__chunks(), dataset.Dataset.fileList(), and join().

849  def createdatasetfile_hippy(self, filename, filesperjob, firstrun, lastrun):
850  with open(filename, "w") as f:
851  for job in self.__chunks(self.fileList(firstRun=firstrun, lastRun=lastrun, forcerunselection=True), filesperjob):
852  f.write(",".join("'{}'".format(file) for file in job)+"\n")
853 
def createdatasetfile_hippy(self, filename, filesperjob, firstrun, lastrun)
Definition: dataset.py:849
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def __chunks(self, theList, n)
Definition: dataset.py:86
def fileList(self, parent=False, firstRun=None, lastRun=None, forcerunselection=False)
Definition: dataset.py:882
def dataset.Dataset.datasetSnippet (   self,
  jsonPath = None,
  begin = None,
  end = None,
  firstRun = None,
  lastRun = None,
  crab = False,
  parent = False 
)

Definition at line 726 of file dataset.py.

References dataset.Dataset.__createSnippet(), dataset.Dataset.__filename, dataset.Dataset.__name, dataset.Dataset.__official, dataset.Dataset.__origName, dataset.Dataset.__predefined, and dataset.Dataset.dump_cff().

Referenced by dataset.Dataset.parentDataset().

726  firstRun = None, lastRun = None, crab = False, parent = False ):
727  if not firstRun: firstRun = None
728  if not lastRun: lastRun = None
729  if not begin: begin = None
730  if not end: end = None
731  if self.__predefined and (jsonPath or begin or end or firstRun or lastRun):
732  msg = ( "The parameters 'JSON', 'begin', 'end', 'firstRun', and 'lastRun' "
733  "only work for official datasets, not predefined _cff.py files" )
734  raise AllInOneError( msg )
735  if self.__predefined and parent:
736  with open(self.__filename) as f:
737  if "secFiles.extend" not in f.read():
738  msg = ("The predefined dataset '%s' does not contain secondary files, "
739  "which your validation requires!") % self.__name
740  if self.__official:
741  self.__name = self.__origName
742  self.__predefined = False
743  print msg
744  print ("Retreiving the files from DAS. You will be asked if you want "
745  "to overwrite the old dataset.\n"
746  "It will still be compatible with validations that don't need secondary files.")
747  else:
748  raise AllInOneError(msg)
749 
750  if self.__predefined:
751  snippet = ("process.load(\"Alignment.OfflineValidation.%s_cff\")\n"
752  "process.maxEvents = cms.untracked.PSet(\n"
753  " input = cms.untracked.int32(.oO[nEvents]Oo. / .oO[parallelJobs]Oo.)\n"
754  ")\n"
755  "process.source.skipEvents=cms.untracked.uint32(.oO[nIndex]Oo.*.oO[nEvents]Oo./.oO[parallelJobs]Oo.)"
756  %(self.__name))
757  if not parent:
758  with open(self.__filename) as f:
759  if "secFiles.extend" in f.read():
760  snippet += "\nprocess.source.secondaryFileNames = cms.untracked.vstring()"
761  return snippet
762  theMap = { "process": "process.",
763  "tab": " " * len( "process." ),
764  "nEvents": ".oO[nEvents]Oo. / .oO[parallelJobs]Oo.",
765  "skipEventsString": "process.source.skipEvents=cms.untracked.uint32(.oO[nIndex]Oo.*.oO[nEvents]Oo./.oO[parallelJobs]Oo.)\n",
766  "importCms": "",
767  "header": ""
768  }
769  datasetSnippet = self.__createSnippet( jsonPath = jsonPath,
770  begin = begin,
771  end = end,
772  firstRun = firstRun,
773  lastRun = lastRun,
774  repMap = theMap,
775  crab = crab,
776  parent = parent )
777  if jsonPath == "" and begin == "" and end == "" and firstRun == "" and lastRun == "":
778  try:
779  self.dump_cff(parent = parent)
780  except AllInOneError as e:
781  print "Can't store the dataset as a cff:"
782  print e
783  print "This may be inconvenient in the future, but will not cause a problem for this validation."
784  return datasetSnippet
785 
def __createSnippet(self, jsonPath=None, begin=None, end=None, firstRun=None, lastRun=None, repMap=None, crab=False, parent=False)
Definition: dataset.py:242
def dump_cff(self, outName=None, jsonPath=None, begin=None, end=None, firstRun=None, lastRun=None, parent=False)
Definition: dataset.py:788
def dataset.Dataset.dataType (   self)

Definition at line 707 of file dataset.py.

References dataset.Dataset.__dataType, and dataset.Dataset.__getDataType().

707  def dataType( self ):
708  if not self.__dataType:
709  self.__dataType = self.__getDataType()
710  return self.__dataType
711 
def __getDataType(self)
Definition: dataset.py:385
def dataType(self)
Definition: dataset.py:707
def dataset.Dataset.dump_cff (   self,
  outName = None,
  jsonPath = None,
  begin = None,
  end = None,
  firstRun = None,
  lastRun = None,
  parent = False 
)

Definition at line 788 of file dataset.py.

References dataset.Dataset.__cmssw, dataset.Dataset.__createSnippet(), dataset.Dataset.__dataType, dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.__magneticField, dataset.Dataset.__name, python.rootplot.root2matplotlib.replace(), split, harvestTrackValidationPlots.str, and digi_MixPreMix_cfi.strip.

Referenced by dataset.Dataset.datasetSnippet().

788  end = None, firstRun = None, lastRun = None, parent = False ):
789  if outName == None:
790  outName = "Dataset" + self.__name.replace("/", "_")
791  packageName = os.path.join( "Alignment", "OfflineValidation" )
792  if not os.path.exists( os.path.join(
793  self.__cmssw, "src", packageName ) ):
794  msg = ("You try to store the predefined dataset'%s'.\n"
795  "For that you need to check out the package '%s' to your "
796  "private relase area in\n"%( outName, packageName )
797  + self.__cmssw )
798  raise AllInOneError( msg )
799  theMap = { "process": "",
800  "tab": "",
801  "nEvents": str( -1 ),
802  "skipEventsString": "",
803  "importCms": "import FWCore.ParameterSet.Config as cms\n",
804  "header": "#Do not delete or (unless you know what you're doing) change these comments\n"
805  "#%(name)s\n"
806  "#data type: %(dataType)s\n"
807  "#magnetic field: .oO[magneticField]Oo.\n" #put in magnetic field later
808  %{"name": self.__name, #need to create the snippet before getting the magnetic field
809  "dataType": self.__dataType} #so that we know the first and last runs
810  }
811  dataset_cff = self.__createSnippet( jsonPath = jsonPath,
812  begin = begin,
813  end = end,
814  firstRun = firstRun,
815  lastRun = lastRun,
816  repMap = theMap,
817  parent = parent)
818  magneticField = self.__magneticField
819  if magneticField == "MagneticField":
820  magneticField = "%s, %s #%s" % (magneticField,
821  str(self.__getMagneticFieldForRun()).replace("\n"," ").split("#")[0].strip(),
822  "Use MagneticField_cff.py; the number is for determining which track selection to use."
823  )
824  dataset_cff = dataset_cff.replace(".oO[magneticField]Oo.",magneticField)
825  filePath = os.path.join( self.__cmssw, "src", packageName,
826  "python", outName + "_cff.py" )
827  if os.path.exists( filePath ):
828  existMsg = "The predefined dataset '%s' already exists.\n"%( outName )
829  askString = "Do you want to overwrite it? [y/n]\n"
830  inputQuery = existMsg + askString
831  while True:
832  userInput = raw_input( inputQuery ).lower()
833  if userInput == "y":
834  break
835  elif userInput == "n":
836  return
837  else:
838  inputQuery = askString
839  print ( "The predefined dataset '%s' will be stored in the file\n"
840  %( outName )
841  + filePath +
842  "\nFor future use you have to do 'scram b'." )
843  print
844  theFile = open( filePath, "w" )
845  theFile.write( dataset_cff )
846  theFile.close()
847  return
848 
def __getMagneticFieldForRun(self, run=-1, tolerance=0.5)
Definition: dataset.py:501
def __createSnippet(self, jsonPath=None, begin=None, end=None, firstRun=None, lastRun=None, repMap=None, crab=False, parent=False)
Definition: dataset.py:242
def replace(string, replacements)
double split
Definition: MVATrainer.cc:139
def dataset.Dataset.extractFileSizes (   self)
Get the file size for each file, from the eos ls -l command.

Definition at line 306 of file dataset.py.

References dataset.EOSDataset.castorDir, and dataset.Dataset.castorDir.

306  def extractFileSizes(self):
307  '''Get the file size for each file, from the eos ls -l command.'''
308  # EOS command does not work in tier3
309  lsout = castortools.runXRDCommand(self.castorDir,'dirlist')[0]
310  lsout = lsout.split('\n')
311  self.filesAndSizes = {}
312  for entry in lsout:
313  values = entry.split()
314  if( len(values) != 5):
315  continue
316  # using full abs path as a key.
317  file = '/'.join([self.lfnDir, values[4].split("/")[-1]])
318  size = values[1]
319  self.filesAndSizes[file] = size
320 
def extractFileSizes(self)
Definition: dataset.py:306
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
if(dp >Float(M_PI)) dp-
double split
Definition: MVATrainer.cc:139
def dataset.Dataset.fileInfoList (   self,
  parent = False 
)

Definition at line 911 of file dataset.py.

References dataset.Dataset.__dasLimit, and dataset.Dataset.__getFileInfoList().

Referenced by dataset.Dataset.fileList().

911  def fileInfoList( self, parent = False ):
912  return self.__getFileInfoList( self.__dasLimit, parent )
913 
def __getFileInfoList(self, dasLimit, parent=False)
Definition: dataset.py:558
def fileInfoList(self, parent=False)
Definition: dataset.py:911
def dataset.Dataset.fileList (   self,
  parent = False,
  firstRun = None,
  lastRun = None,
  forcerunselection = False 
)

Definition at line 882 of file dataset.py.

References dataset.Dataset.__findInJson(), dataset.Dataset.fileInfoList(), objects.autophobj.float, and dataset.Dataset.getrunnumberfromfilename().

Referenced by dataset.Dataset.__fileListSnippet(), and dataset.Dataset.createdatasetfile_hippy().

882  def fileList(self, parent=False, firstRun=None, lastRun=None, forcerunselection=False):
883  fileList = [ self.__findInJson(fileInfo,"name")
884  for fileInfo in self.fileInfoList(parent) ]
885 
886  if firstRun or lastRun:
887  if not firstRun: firstRun = -1
888  if not lastRun: lastRun = float('infinity')
889  unknownfilenames, reasons = [], set()
890  for filename in fileList[:]:
891  try:
892  if not firstRun <= self.getrunnumberfromfilename(filename) <= lastRun:
893  fileList.remove(filename)
894  except AllInOneError as e:
895  if forcerunselection: raise
896  unknownfilenames.append(e.message.split("\n")[1])
897  reasons .add (e.message.split("\n")[2])
898  if reasons:
899  if len(unknownfilenames) == len(fileList):
900  print "Could not figure out the run numbers of any of the filenames for the following reason(s):"
901  else:
902  print "Could not figure out the run numbers of the following filenames:"
903  for filename in unknownfilenames:
904  print " "+filename
905  print "for the following reason(s):"
906  for reason in reasons:
907  print " "+reason
908  print "Using the files anyway. The runs will be filtered at the CMSSW level."
909  return fileList
910 
def __findInJson(self, jsondict, strings)
Definition: dataset.py:301
def fileInfoList(self, parent=False)
Definition: dataset.py:911
def getrunnumberfromfilename(filename)
Definition: dataset.py:855
def fileList(self, parent=False, firstRun=None, lastRun=None, forcerunselection=False)
Definition: dataset.py:882
def dataset.Dataset.forcerunrange (   self,
  firstRun,
  lastRun,
  s 
)
s must be in the format run1:lum1-run2:lum2

Definition at line 323 of file dataset.py.

References dataset.Dataset.__firstusedrun, dataset.Dataset.__lastusedrun, dataset.int, and split.

Referenced by dataset.Dataset.getForceRunRangeFunction().

323  def forcerunrange(self, firstRun, lastRun, s):
324  """s must be in the format run1:lum1-run2:lum2"""
325  s = s.group()
326  run1 = s.split("-")[0].split(":")[0]
327  lum1 = s.split("-")[0].split(":")[1]
328  try:
329  run2 = s.split("-")[1].split(":")[0]
330  lum2 = s.split("-")[1].split(":")[1]
331  except IndexError:
332  run2 = run1
333  lum2 = lum1
334  if int(run2) < firstRun or int(run1) > lastRun:
335  return ""
336  if int(run1) < firstRun or firstRun < 0:
337  run1 = firstRun
338  lum1 = 1
339  if int(run2) > lastRun:
340  run2 = lastRun
341  lum2 = "max"
342  if int(run1) < self.__firstusedrun or self.__firstusedrun < 0:
343  self.__firstusedrun = int(run1)
344  if int(run2) > self.__lastusedrun:
345  self.__lastusedrun = int(run2)
346  return "%s:%s-%s:%s" % (run1, lum1, run2, lum2)
347 
def forcerunrange(self, firstRun, lastRun, s)
Definition: dataset.py:323
double split
Definition: MVATrainer.cc:139
def dataset.Dataset.getForceRunRangeFunction (   self,
  firstRun,
  lastRun 
)

Definition at line 348 of file dataset.py.

References dataset.Dataset.forcerunrange().

Referenced by dataset.Dataset.__lumiSelectionSnippet().

348  def getForceRunRangeFunction(self, firstRun, lastRun):
349  def forcerunrangefunction(s):
350  return self.forcerunrange(firstRun, lastRun, s)
351  return forcerunrangefunction
352 
def forcerunrange(self, firstRun, lastRun, s)
Definition: dataset.py:323
def getForceRunRangeFunction(self, firstRun, lastRun)
Definition: dataset.py:348
def dataset.Dataset.getPrimaryDatasetEntries (   self)

Definition at line 326 of file dataset.py.

References dataset.int, runall.testit.report, WorkFlowRunner.WorkFlowRunner.report, dataset.BaseDataset.report, and ALIUtils.report.

327  if self.report is not None and self.report:
328  return int(self.report.get('PrimaryDatasetEntries',-1))
329  return -1
330 
331 
def getPrimaryDatasetEntries(self)
Definition: dataset.py:326
def dataset.Dataset.getrunnumberfromfilename (   filename)
static

Definition at line 855 of file dataset.py.

References Vispa.Plugins.EdmBrowser.EdmDataAccessor.all(), dataset.int, and join().

Referenced by dataset.Dataset.fileList().

856  parts = filename.split("/")
857  result = error = None
858  if parts[0] != "" or parts[1] != "store":
859  error = "does not start with /store"
860  elif parts[2] in ["mc", "relval"]:
861  result = 1
862  elif parts[-2] != "00000" or not parts[-1].endswith(".root"):
863  error = "does not end with 00000/something.root"
864  elif len(parts) != 12:
865  error = "should be exactly 11 slashes counting the first one"
866  else:
867  runnumberparts = parts[-5:-2]
868  if not all(len(part)==3 for part in runnumberparts):
869  error = "the 3 directories {} do not have length 3 each".format("/".join(runnumberparts))
870  try:
871  result = int("".join(runnumberparts))
872  except ValueError:
873  error = "the 3 directories {} do not form an integer".format("/".join(runnumberparts))
874 
875  if error:
876  error = "could not figure out which run number this file is from:\n{}\n{}".format(filename, error)
877  raise AllInOneError(error)
878 
879  return result
880 
def getrunnumberfromfilename(filename)
Definition: dataset.py:855
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def dataset.Dataset.magneticField (   self)

Definition at line 712 of file dataset.py.

References dataset.Dataset.__getMagneticField(), and dataset.Dataset.__magneticField.

712  def magneticField( self ):
713  if not self.__magneticField:
714  self.__magneticField = self.__getMagneticField()
715  return self.__magneticField
716 
def __getMagneticField(self)
Definition: dataset.py:420
def magneticField(self)
Definition: dataset.py:712
def dataset.Dataset.magneticFieldForRun (   self,
  run = -1 
)

Definition at line 717 of file dataset.py.

References dataset.Dataset.__getMagneticFieldForRun().

717  def magneticFieldForRun( self, run = -1 ):
718  return self.__getMagneticFieldForRun(run)
719 
def __getMagneticFieldForRun(self, run=-1, tolerance=0.5)
Definition: dataset.py:501
def magneticFieldForRun(self, run=-1)
Definition: dataset.py:717
def dataset.Dataset.name (   self)
def dataset.Dataset.parentDataset (   self)

Definition at line 720 of file dataset.py.

References dataset.Dataset.__getParentDataset(), dataset.Dataset.__parentDataset, and dataset.Dataset.datasetSnippet().

Referenced by dataset.Dataset.__getFileInfoList().

720  def parentDataset( self ):
721  if not self.__parentDataset:
722  self.__parentDataset = self.__getParentDataset()
723  return self.__parentDataset
724 
def parentDataset(self)
Definition: dataset.py:720
def __getParentDataset(self)
Definition: dataset.py:410
def dataset.Dataset.predefined (   self)

Definition at line 917 of file dataset.py.

References dataset.Dataset.__predefined.

917  def predefined( self ):
918  return self.__predefined
919 
def predefined(self)
Definition: dataset.py:917
def dataset.Dataset.printInfo (   self)

Definition at line 321 of file dataset.py.

References dataset.EOSDataset.castorDir, dataset.Dataset.castorDir, dataset.Dataset.lfnDir, ElectronMVAID.ElectronMVAID.name, counter.Counter.name, average.Average.name, AlignableObjectId::entry.name, histograms.Histograms.name, TmModule.name, cond::persistency::TAG::NAME.name, cond::persistency::GLOBAL_TAG::NAME.name, core.autovars.NTupleVariable.name, cond::persistency::RUN_INFO::RUN_NUMBER.name, cond::persistency::TAG::TIME_TYPE.name, cond::persistency::GLOBAL_TAG::VALIDITY.name, cond::persistency::RUN_INFO::START_TIME.name, cond::persistency::TAG::OBJECT_TYPE.name, cond::persistency::GLOBAL_TAG::DESCRIPTION.name, cond::persistency::RUN_INFO::END_TIME.name, cond::persistency::TAG::SYNCHRONIZATION.name, cond::persistency::GLOBAL_TAG::RELEASE.name, cond::persistency::TAG::END_OF_VALIDITY.name, MEPSet.name, cond::persistency::GLOBAL_TAG::SNAPSHOT_TIME.name, cond::persistency::TAG::DESCRIPTION.name, cond::persistency::GTEditorData.name, cond::persistency::GLOBAL_TAG::INSERTION_TIME.name, nanoaod::MergeableCounterTable::SingleColumn< T >.name, cond::persistency::TAG::LAST_VALIDATED_TIME.name, FWTGeoRecoGeometry::Info.name, Types._Untracked.name, preexistingValidation.PreexistingValidation.name, cond::persistency::TAG::INSERTION_TIME.name, cond::persistency::TAG::MODIFICATION_TIME.name, dataset.BaseDataset.name, OutputMEPSet.name, personalPlayback.Applet.name, ParameterSet.name, PixelDCSObject< T >::Item.name, analyzer.Analyzer.name, DQMRivetClient::LumiOption.name, MagCylinder.name, ParSet.name, DQMRivetClient::ScaleFactorOption.name, SingleObjectCondition.name, EgHLTOfflineSummaryClient::SumHistBinData.name, cond::persistency::GTProxyData.name, core.autovars.NTupleObjectType.name, Mapper::definition< ScannerT >.name, MyWatcher.name, edm::PathTimingSummary.name, nanoaod::MergeableCounterTable::VectorColumn< T >.name, alignment.Alignment.name, cond::TimeTypeSpecs.name, lumi::TriggerInfo.name, edm::PathSummary.name, PixelEndcapLinkMaker::Item.name, DQMGenericClient::EfficOption.name, perftools::EdmEventSize::BranchRecord.name, cond::persistency::GLOBAL_TAG_MAP::GLOBAL_TAG_NAME.name, FWTableViewManager::TableEntry.name, PixelBarrelLinkMaker::Item.name, validateAlignments.ParallelMergeJob.name, cond::persistency::GLOBAL_TAG_MAP::RECORD.name, EcalLogicID.name, cond::persistency::GLOBAL_TAG_MAP::LABEL.name, cond::persistency::GLOBAL_TAG_MAP::TAG_NAME.name, MEtoEDM< T >::MEtoEDMObject.name, ExpressionHisto< T >.name, XMLProcessor::_loaderBaseConfig.name, cond::persistency::PAYLOAD::HASH.name, TreeCrawler.Package.name, genericValidation.GenericValidation.name, cond::persistency::PAYLOAD::OBJECT_TYPE.name, cond::persistency::PAYLOAD::DATA.name, cond::persistency::PAYLOAD::STREAMER_INFO.name, MagGeoBuilderFromDDD::volumeHandle.name, cond::persistency::PAYLOAD::VERSION.name, cond::persistency::PAYLOAD::INSERTION_TIME.name, DQMGenericClient::ProfileOption.name, options.ConnectionHLTMenu.name, DQMGenericClient::NormOption.name, FastHFShowerLibrary.name, emtf::Node.name, h4DSegm.name, core.TriggerMatchAnalyzer.TriggerMatchAnalyzer.name, PhysicsTools::Calibration::Variable.name, DQMGenericClient::CDOption.name, CounterChecker.name, cond::TagInfo_t.name, EDMtoMEConverter.name, looper.Looper.name, cond::persistency::IOV::TAG_NAME.name, cond::persistency::IOV::SINCE.name, TrackerSectorStruct.name, cond::persistency::IOV::PAYLOAD_HASH.name, classes.MonitorData.name, cond::persistency::IOV::INSERTION_TIME.name, HistogramManager.name, MuonGeometrySanityCheckPoint.name, classes.OutputData.name, options.HLTProcessOptions.name, h2DSegm.name, core.TriggerBitAnalyzer.TriggerBitAnalyzer.name, nanoaod::FlatTable::Column.name, config.Analyzer.name, geometry.Structure.name, core.autovars.NTupleSubObject.name, DQMNet::WaitObject.name, AlpgenParameterName.name, SiStripMonitorDigi.name, core.autovars.NTupleObject.name, config.Service.name, cond::persistency::TAG_LOG::TAG_NAME.name, cond::persistency::TAG_LOG::EVENT_TIME.name, cond::persistency::TAG_LOG::USER_NAME.name, cond::persistency::TAG_LOG::HOST_NAME.name, cond::persistency::TAG_LOG::COMMAND.name, cond::persistency::TAG_LOG::ACTION.name, cond::persistency::TAG_LOG::USER_TEXT.name, core.autovars.NTupleCollection.name, BPHRecoBuilder::BPHRecoSource.name, BPHRecoBuilder::BPHCompSource.name, personalPlayback.FrameworkJob.name, plotscripts.SawTeethFunction.name, hTMaxCell.name, cscdqm::ParHistoDef.name, BeautifulSoup.Tag.name, SummaryOutputProducer::GenericSummary.name, and BeautifulSoup.SoupStrainer.name.

321  def printInfo(self):
322  print 'sample : ' + self.name
323  print 'LFN : ' + self.lfnDir
324  print 'Castor path : ' + self.castorDir
325 
def printInfo(self)
Definition: dataset.py:321
def dataset.Dataset.runList (   self)

Definition at line 921 of file dataset.py.

References dataset.Dataset.__getRunList().

921  def runList( self ):
922  return self.__getRunList()
923 
924 
def __getRunList(self)
Definition: dataset.py:617
def runList(self)
Definition: dataset.py:921

Member Data Documentation

dataset.Dataset.__cmssw
private

Definition at line 25 of file dataset.py.

Referenced by dataset.Dataset.__getMagneticField(), and dataset.Dataset.dump_cff().

dataset.Dataset.__cmsswrelease
private

Definition at line 26 of file dataset.py.

Referenced by dataset.Dataset.__getMagneticField().

dataset.Dataset.__dasinstance
private
dataset.Dataset.__dasLimit
private

Definition at line 23 of file dataset.py.

Referenced by dataset.Dataset.fileInfoList().

dataset.Dataset.__dataType
private
tuple dataset.Dataset.__dummy_source_template
staticprivate
Initial value:
1 = ("readFiles = cms.untracked.vstring()\n"
2  "secFiles = cms.untracked.vstring()\n"
3  "%(process)ssource = cms.Source(\"PoolSource\",\n"
4  "%(tab)s secondaryFileNames ="
5  "secFiles,\n"
6  "%(tab)s fileNames = readFiles\n"
7  ")\n"
8  "readFiles.extend(['dummy_File.root'])\n"
9  "%(process)smaxEvents = cms.untracked.PSet( "
10  "input = cms.untracked.int32(%(nEvents)s) )\n"
11  "%(skipEventsString)s\n")

Definition at line 110 of file dataset.py.

Referenced by dataset.Dataset.__createSnippet().

dataset.Dataset.__filename
private
dataset.Dataset.__firstusedrun
private
dataset.Dataset.__inputMagneticField
private
dataset.Dataset.__lastusedrun
private
dataset.Dataset.__magneticField
private
dataset.Dataset.__name
private
dataset.Dataset.__official
private

Definition at line 33 of file dataset.py.

Referenced by dataset.Dataset.datasetSnippet().

dataset.Dataset.__origName
private

Definition at line 22 of file dataset.py.

Referenced by dataset.Dataset.datasetSnippet().

dataset.Dataset.__parentDataset
private

Definition at line 29 of file dataset.py.

Referenced by dataset.Dataset.parentDataset().

dataset.Dataset.__predefined
private
dataset.Dataset.__source_template
staticprivate

Definition at line 92 of file dataset.py.

Referenced by dataset.Dataset.__createSnippet().

dataset.Dataset.bad_files

Definition at line 282 of file dataset.py.

dataset.Dataset.castorDir

Definition at line 266 of file dataset.py.

Referenced by dataset.Dataset.extractFileSizes(), and dataset.Dataset.printInfo().

dataset.Dataset.dasData = das_client.get_data(dasQuery, dasLimit)
static

Definition at line 354 of file dataset.py.

dataset.Dataset.error = self.__findInJson(jsondict,["data","error"])
static
dataset.Dataset.files

Definition at line 273 of file dataset.py.

dataset.Dataset.filesAndSizes

Definition at line 311 of file dataset.py.

dataset.Dataset.good_files

Definition at line 283 of file dataset.py.

int dataset.Dataset.i = 0
static

Definition at line 371 of file dataset.py.

dataset.Dataset.jsondict = json.loads( dasData )
static

Definition at line 356 of file dataset.py.

string dataset.Dataset.jsonfile = "das_query_output_%i.txt"
static

Definition at line 370 of file dataset.py.

dataset.Dataset.jsonfile = jsonfile%i
static

Definition at line 374 of file dataset.py.

dataset.Dataset.jsonstr = self.__findInJson(jsondict,"reason")
static

Definition at line 366 of file dataset.py.

dataset.Dataset.lfnDir

Definition at line 265 of file dataset.py.

Referenced by dataset.Dataset.printInfo().

dataset.Dataset.maskExists

Definition at line 267 of file dataset.py.

string dataset.Dataset.msg = "The DAS query returned an error. The output is very long, and has been stored in:\n"
static
dataset.Dataset.report

Definition at line 268 of file dataset.py.

Referenced by addOnTests.testit.run().

dataset.Dataset.theFile = open( jsonfile, "w" )
static

Definition at line 375 of file dataset.py.