CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
List of all members | Public Member Functions | Public Attributes | Private Member Functions | Private Attributes | Static Private Attributes
dataset.Dataset Class Reference
Inheritance diagram for dataset.Dataset:
dataset.BaseDataset

Public Member Functions

def __init__
 
def __init__
 
def buildListOfBadFiles
 
def buildListOfFiles
 
def convertTimeToRun
 
def datasetSnippet
 
def dataType
 
def dump_cff
 
def extractFileSizes
 
def fileInfoList
 
def fileList
 
def forcerunrange
 
def getForceRunRangeFunction
 
def getPrimaryDatasetEntries
 
def magneticField
 
def magneticFieldForRun
 
def name
 
def parentDataset
 
def predefined
 
def printInfo
 
def runList
 
- Public Member Functions inherited from dataset.BaseDataset
def __init__
 def init(self, name, user, pattern='. More...
 
def buildListOfBadFiles
 
def buildListOfFiles
 
def extractFileSizes
 
def getPrimaryDatasetEntries
 
def listOfFiles
 
def listOfGoodFiles
 
def listOfGoodFilesWithPrescale
 
def printFiles
 
def printInfo
 

Public Attributes

 bad_files
 
 castorDir
 
 files
 
 filesAndSizes
 
 good_files
 
 lfnDir
 
 maskExists
 
 report
 
- Public Attributes inherited from dataset.BaseDataset
 bad_files
 
 dbsInstance
 MM. More...
 
 files
 
 filesAndSizes
 
 good_files
 
 name
 
 pattern
 
 primaryDatasetEntries
 MM. More...
 
 report
 
 run_range
 
 user
 

Private Member Functions

def __chunks
 
def __createSnippet
 
def __dateString
 
def __datetime
 
def __find_ge
 
def __find_lt
 
def __findInJson
 
def __getData
 
def __getDataType
 
def __getFileInfoList
 
def __getMagneticField
 
def __getMagneticFieldForRun
 
def __getParentDataset
 
def __getRunList
 

Private Attributes

 __alreadyStored
 
 __cmssw
 
 __cmsswrelease
 
 __dasLimit
 
 __dataType
 
 __fileInfoList
 
 __fileList
 
 __filename
 
 __firstusedrun
 
 __firstUsedRun
 
 __lastusedrun
 
 __lastUsedRun
 
 __magneticField
 
 __name
 
 __official
 
 __origName
 
 __parentDataset
 
 __parentFileInfoList
 
 __parentFileList
 
 __predefined
 
 __runList
 

Static Private Attributes

tuple __dummy_source_template
 

Detailed Description

Definition at line 14 of file dataset.py.

Constructor & Destructor Documentation

def dataset.Dataset.__init__ (   self,
  datasetName,
  dasLimit = 0,
  tryPredefinedFirst = True,
  cmssw = os.environ["CMSSW_BASE"],
  cmsswrelease = os.environ["CMSSW_RELEASE_BASE"] 
)

Definition at line 16 of file dataset.py.

Referenced by dataset.Dataset.__init__().

16 
17  cmssw = os.environ["CMSSW_BASE"], cmsswrelease = os.environ["CMSSW_RELEASE_BASE"]):
18  self.__name = datasetName
19  self.__origName = datasetName
20  self.__dasLimit = dasLimit
21  self.__fileList = None
22  self.__fileInfoList = None
23  self.__runList = None
24  self.__alreadyStored = False
25  self.__cmssw = cmssw
26  self.__cmsswrelease = cmsswrelease
27  self.__firstusedrun = None
28  self.__lastusedrun = None
29  self.__parentDataset = None
30  self.__parentFileList = None
31  self.__parentFileInfoList = None
32 
33  # check, if dataset name matches CMS dataset naming scheme
34  if re.match( r'/.+/.+/.+', self.__name ):
35  self.__official = True
36  fileName = "Dataset" + self.__name.replace("/","_") + "_cff.py"
37  else:
38  self.__official = False
39  fileName = self.__name + "_cff.py"
40 
41  searchPath1 = os.path.join( self.__cmssw, "python",
42  "Alignment", "OfflineValidation",
43  fileName )
44  searchPath2 = os.path.join( self.__cmssw, "src",
45  "Alignment", "OfflineValidation",
46  "python", fileName )
47  searchPath3 = os.path.join( self.__cmsswrelease,
48  "python", "Alignment",
49  "OfflineValidation", fileName )
50  if self.__official and not tryPredefinedFirst:
51  self.__predefined = False
52  elif os.path.exists( searchPath1 ):
53  self.__predefined = True
54  self.__filename = searchPath1
55  elif os.path.exists( searchPath2 ):
56  msg = ("The predefined dataset '%s' does exist in '%s', but "
57  "you need to run 'scram b' first."
58  %( self.__name, searchPath2 ))
59  if self.__official:
60  print msg
61  print "Getting the data from DAS again. To go faster next time, run scram b."
62  else:
63  raise AllInOneError( msg )
64  elif os.path.exists( searchPath3 ):
65  self.__predefined = True
66  self.__filename = searchPath3
67  elif self.__official:
68  self.__predefined = False
69  else:
70  msg = ("The predefined dataset '%s' does not exist. Please "
71  "create it first or check for typos."%( self.__name ))
72  raise AllInOneError( msg )
73 
74  if self.__predefined and self.__official:
75  self.__name = "Dataset" + self.__name.replace("/","_")
76 
77  self.__dataType = self.__getDataType()
def __getMagneticField
Definition: dataset.py:356
def __getDataType
Definition: dataset.py:322
def dataset.Dataset.__init__ (   self,
  name,
  user,
  pattern = '.*root' 
)

Definition at line 265 of file dataset.py.

References dataset.Dataset.__init__().

266  def __init__(self, name, user, pattern='.*root'):
267  self.lfnDir = castorBaseDir(user) + name
268  self.castorDir = castortools.lfnToCastor( self.lfnDir )
269  self.maskExists = False
270  self.report = None
271  super(Dataset, self).__init__(name, user, pattern)

Member Function Documentation

def dataset.Dataset.__chunks (   self,
  theList,
  n 
)
private
Yield successive n-sized chunks from theList.

Definition at line 79 of file dataset.py.

Referenced by dataset.Dataset.__createSnippet().

79 
80  def __chunks( self, theList, n ):
81  """ Yield successive n-sized chunks from theList.
82  """
83  for i in xrange( 0, len( theList ), n ):
84  yield theList[i:i+n]
def dataset.Dataset.__createSnippet (   self,
  jsonPath = None,
  begin = None,
  end = None,
  firstRun = None,
  lastRun = None,
  repMap = None,
  crab = False,
  parent = False 
)
private

Definition at line 117 of file dataset.py.

References dataset.Dataset.__chunks(), dataset.Dataset.__findInJson(), dataset.Dataset.__getRunList(), dataset.Dataset.convertTimeToRun(), list(), dataset.Dataset.predefined(), and split.

Referenced by dataset.Dataset.datasetSnippet(), and dataset.Dataset.dump_cff().

118  crab = False, parent = False ):
119  if firstRun:
120  firstRun = int( firstRun )
121  if lastRun:
122  lastRun = int( lastRun )
123  if ( begin and firstRun ) or ( end and lastRun ):
124  msg = ( "The Usage of "
125  + "'begin' & 'firstRun' " * int( bool( begin and
126  firstRun ) )
127  + "and " * int( bool( ( begin and firstRun ) and
128  ( end and lastRun ) ) )
129  + "'end' & 'lastRun' " * int( bool( end and lastRun ) )
130  + "is ambigous." )
131  raise AllInOneError( msg )
132  if begin or end:
133  ( firstRun, lastRun ) = self.convertTimeToRun(
134  begin = begin, end = end, firstRun = firstRun,
135  lastRun = lastRun )
136  if ( firstRun and lastRun ) and ( firstRun > lastRun ):
137  msg = ( "The lower time/runrange limit ('begin'/'firstRun') "
138  "chosen is greater than the upper time/runrange limit "
139  "('end'/'lastRun').")
140  raise AllInOneError( msg )
141  if self.predefined() and (jsonPath or begin or end or firstRun or lastRun):
142  msg = ( "The parameters 'JSON', 'begin', 'end', 'firstRun', and 'lastRun'"
143  "only work for official datasets, not predefined _cff.py files" )
144  raise AllInOneError( msg )
145  goodLumiSecStr = ""
146  lumiStr = ""
147  lumiSecExtend = ""
148  if firstRun or lastRun or jsonPath:
149  goodLumiSecStr = ( "lumiSecs = cms.untracked."
150  "VLuminosityBlockRange()\n" )
151  lumiStr = " lumisToProcess = lumiSecs,\n"
152  if not jsonPath:
153  selectedRunList = self.__getRunList()
154  if firstRun:
155  selectedRunList = [ run for run in selectedRunList \
156  if self.__findInJson(run, "run_number") >= firstRun ]
157  if lastRun:
158  selectedRunList = [ run for run in selectedRunList \
159  if self.__findInJson(run, "run_number") <= lastRun ]
160  lumiList = [ str( self.__findInJson(run, "run_number") ) + ":1-" \
161  + str( self.__findInJson(run, "run_number") ) + ":max" \
162  for run in selectedRunList ]
163  splitLumiList = list( self.__chunks( lumiList, 255 ) )
164  else:
165  theLumiList = None
166  try:
167  theLumiList = LumiList ( filename = jsonPath )
168  except ValueError:
169  pass
170 
171  if theLumiList is not None:
172  allRuns = theLumiList.getRuns()
173  runsToRemove = []
174  for run in allRuns:
175  if firstRun and int( run ) < firstRun:
176  runsToRemove.append( run )
177  if lastRun and int( run ) > lastRun:
178  runsToRemove.append( run )
179  theLumiList.removeRuns( runsToRemove )
180  splitLumiList = list( self.__chunks(
181  theLumiList.getCMSSWString().split(','), 255 ) )
182  else:
183  with open(jsonPath) as f:
184  jsoncontents = f.read()
185  if "process.source.lumisToProcess" in jsoncontents:
186  msg = "%s is not a json file, but it seems to be a CMSSW lumi selection cff snippet. Trying to use it" % jsonPath
187  if firstRun or lastRun:
188  msg += ("\n (after applying firstRun and/or lastRun)")
189  msg += ".\nPlease note that, depending on the format of this file, it may not work as expected."
190  msg += "\nCheck your config file to make sure that it worked properly."
191  print msg
193  self.__firstUsedRun = -1
194  self.__lastUsedRun = -1
195  if firstRun or lastRun:
196  jsoncontents = re.sub("\d+:(\d+|max)-\d+:(\d+|max)", self.getForceRunRangeFunction(firstRun, lastRun), jsoncontents)
197  lumiSecExtend = jsoncontents
198  splitLumiList = [[""]]
199 
200  if not len(splitLumiList[0][0]) == 0:
201  lumiSecStr = [ "',\n'".join( lumis ) \
202  for lumis in splitLumiList ]
203  lumiSecStr = [ "lumiSecs.extend( [\n'" + lumis + "'\n] )" \
204  for lumis in lumiSecStr ]
205  lumiSecExtend = "\n".join( lumiSecStr )
206  self.__firstusedrun = splitLumiList[0][0].split(":")[0]
207  self.__lastusedrun = splitLumiList[-1][-1].split(":")[0]
208  else:
209  self.__firstusedrun = self.__findInJson(self.__getRunList()[0],"run_number")
210  self.__lastusedrun = self.__findInJson(self.__getRunList()[-1],"run_number")
211 
212  if crab:
213  files = ""
214  else:
215  splitFileList = list( self.__chunks( self.fileList(), 255 ) )
216  fileStr = [ "',\n'".join( files ) for files in splitFileList ]
217  fileStr = [ "readFiles.extend( [\n'" + files + "'\n] )" \
218  for files in fileStr ]
219  files = "\n".join( fileStr )
220 
221  if parent:
222  splitParentFileList = list( self.__chunks( self.fileList(parent = True), 255 ) )
223  parentFileStr = [ "',\n'".join( parentFiles ) for parentFiles in splitParentFileList ]
224  parentFileStr = [ "secFiles.extend( [\n'" + parentFiles + "'\n] )" \
225  for parentFiles in parentFileStr ]
226  parentFiles = "\n".join( parentFileStr )
227  files += "\n\n" + parentFiles
228 
229 
230  theMap = repMap
231  theMap["files"] = files
232  theMap["json"] = jsonPath
233  theMap["lumiStr"] = lumiStr
234  theMap["goodLumiSecStr"] = goodLumiSecStr%( theMap )
235  theMap["lumiSecExtend"] = lumiSecExtend
236  if crab:
237  dataset_snippet = self.__dummy_source_template%( theMap )
238  else:
239  dataset_snippet = self.__source_template%( theMap )
240  return dataset_snippet
def __findInJson
Definition: dataset.py:255
def convertTimeToRun
Definition: dataset.py:553
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
tuple __dummy_source_template
Definition: dataset.py:103
def __getRunList
Definition: dataset.py:526
def getForceRunRangeFunction
Definition: dataset.py:299
double split
Definition: MVATrainer.cc:139
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run
def dataset.Dataset.__dateString (   self,
  date 
)
private

Definition at line 548 of file dataset.py.

References dataset.Dataset.convertTimeToRun().

Referenced by dataset.Dataset.convertTimeToRun().

549  def __dateString(self, date):
550  return str(date.year) + str(date.month).zfill(2) + str(date.day).zfill(2)
def __dateString
Definition: dataset.py:548
def dataset.Dataset.__datetime (   self,
  stringForDas 
)
private

Definition at line 539 of file dataset.py.

Referenced by dataset.Dataset.convertTimeToRun().

540  def __datetime(self, stringForDas):
541  if len(stringForDas) != 8:
542  raise AllInOneError(stringForDas + " is not a valid date string.\n"
543  + "DAS accepts dates in the form 'yyyymmdd'")
544  year = stringForDas[:4]
545  month = stringForDas[4:6]
546  day = stringForDas[6:8]
547  return datetime.date(int(year), int(month), int(day))
def dataset.Dataset.__find_ge (   self,
  a,
  x 
)
private

Definition at line 248 of file dataset.py.

Referenced by dataset.Dataset.convertTimeToRun().

249  def __find_ge( self, a, x):
250  'Find leftmost item greater than or equal to x'
251  i = bisect.bisect_left( a, x )
252  if i != len( a ):
253  return i
254  raise ValueError
def dataset.Dataset.__find_lt (   self,
  a,
  x 
)
private

Definition at line 241 of file dataset.py.

Referenced by dataset.Dataset.convertTimeToRun().

242  def __find_lt( self, a, x ):
243  'Find rightmost value less than x'
244  i = bisect.bisect_left( a, x )
245  if i:
246  return i-1
247  raise ValueError
def dataset.Dataset.__findInJson (   self,
  jsondict,
  strings 
)
private

Definition at line 255 of file dataset.py.

References dataset.Dataset.__findInJson().

Referenced by dataset.Dataset.__createSnippet(), dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__getDataType(), dataset.Dataset.__getFileInfoList(), dataset.Dataset.__getMagneticField(), dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.__getParentDataset(), dataset.Dataset.__getRunList(), dataset.Dataset.convertTimeToRun(), and dataset.Dataset.fileList().

256  def __findInJson(self, jsondict, strings):
257  if isinstance(strings, str):
258  strings = [ strings ]
259 
260  if len(strings) == 0:
261  return jsondict
262  if isinstance(jsondict,dict):
263  if strings[0] in jsondict:
264  try:
265  return self.__findInJson(jsondict[strings[0]], strings[1:])
266  except KeyError:
267  pass
268  else:
269  for a in jsondict:
270  if strings[0] in a:
271  try:
272  return self.__findInJson(a[strings[0]], strings[1:])
273  except (TypeError, KeyError): #TypeError because a could be a string and contain strings[0]
274  pass
275  #if it's not found
276  raise KeyError("Can't find " + strings[0])
def __findInJson
Definition: dataset.py:255
def dataset.Dataset.__getData (   self,
  dasQuery,
  dasLimit = 0 
)
private

Definition at line 304 of file dataset.py.

References dataset.Dataset.__findInJson().

Referenced by dataset.Dataset.__getDataType(), dataset.Dataset.__getFileInfoList(), dataset.Dataset.__getMagneticField(), dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.__getParentDataset(), dataset.Dataset.__getRunList(), and dataset.Dataset.convertTimeToRun().

305  def __getData( self, dasQuery, dasLimit = 0 ):
306  dasData = das_client.get_data( 'https://cmsweb.cern.ch',
307  dasQuery, 0, dasLimit, False )
308  if isinstance(dasData, str):
309  jsondict = json.loads( dasData )
310  else:
311  jsondict = dasData
312  # Check, if the DAS query fails
313  try:
314  error = self.__findInJson(jsondict,["data","error"])
315  except KeyError:
316  error = None
317  if error or self.__findInJson(jsondict,"status") != 'ok' or "data" not in jsondict:
318  msg = ("The DAS query returned a error. Here is the output\n" + str(jsondict) +
319  "\nIt's possible that this was a server error. If so, it may work if you try again later")
320  raise AllInOneError(msg)
321  return self.__findInJson(jsondict,"data")
def __findInJson
Definition: dataset.py:255
def dataset.Dataset.__getDataType (   self)
private

Definition at line 322 of file dataset.py.

References dataset.Dataset.__filename, dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__name, dataset.Dataset.__predefined, ElectronMVAID.ElectronMVAID.name, counter.Counter.name, entry.name, average.Average.name, histograms.Histograms.name, cond::persistency::TAG::NAME.name, TmModule.name, cond::persistency::GLOBAL_TAG::NAME.name, core.autovars.NTupleVariable.name, cond::persistency::TAG::TIME_TYPE.name, genericValidation.GenericValidation.name, cond::persistency::GLOBAL_TAG::VALIDITY.name, cond::persistency::COND_LOG_TABLE::EXECTIME.name, cond::persistency::TAG::OBJECT_TYPE.name, cond::persistency::GLOBAL_TAG::DESCRIPTION.name, ora::RecordSpecImpl::Item.name, cond::persistency::COND_LOG_TABLE::IOVTAG.name, cond::persistency::TAG::SYNCHRONIZATION.name, cond::persistency::GLOBAL_TAG::RELEASE.name, cond::persistency::COND_LOG_TABLE::USERTEXT.name, cond::persistency::TAG::END_OF_VALIDITY.name, cond::persistency::GLOBAL_TAG::SNAPSHOT_TIME.name, cond::persistency::TAG::DESCRIPTION.name, cond::persistency::GTEditorData.name, cond::persistency::GLOBAL_TAG::INSERTION_TIME.name, cond::persistency::TAG::LAST_VALIDATED_TIME.name, FWTGeoRecoGeometry::Info.name, Types._Untracked.name, alignment.Alignment.name, cond::persistency::TAG::INSERTION_TIME.name, cond::persistency::TAG::MODIFICATION_TIME.name, dataset.BaseDataset.name, ParameterSet.name, PixelDCSObject< class >::Item.name, analyzer.Analyzer.name, DQMRivetClient::LumiOption.name, MagCylinder.name, cond::persistency::GTProxyData.name, ParSet.name, DQMRivetClient::ScaleFactorOption.name, SingleObjectCondition.name, EgHLTOfflineSummaryClient::SumHistBinData.name, XMLHTRZeroSuppressionLoader::_loaderBaseConfig.name, XMLRBXPedestalsLoader::_loaderBaseConfig.name, DQMGenericClient::EfficOption.name, core.autovars.NTupleObjectType.name, MyWatcher.name, edm::PathTimingSummary.name, cond::TimeTypeSpecs.name, lumi::TriggerInfo.name, cond::persistency::GLOBAL_TAG_MAP::GLOBAL_TAG_NAME.name, edm::PathSummary.name, PixelEndcapLinkMaker::Item.name, perftools::EdmEventSize::BranchRecord.name, RecAnalyzerMinbias.name, cond::persistency::GLOBAL_TAG_MAP::RECORD.name, FWTableViewManager::TableEntry.name, PixelBarrelLinkMaker::Item.name, cond::persistency::GLOBAL_TAG_MAP::LABEL.name, Mapper::definition< ScannerT >.name, EcalLogicID.name, cond::persistency::GLOBAL_TAG_MAP::TAG_NAME.name, McSelector.name, ExpressionHisto< T >.name, RecoSelector.name, DQMGenericClient::ProfileOption.name, cond::persistency::PAYLOAD::HASH.name, TreeCrawler.Package.name, XMLProcessor::_loaderBaseConfig.name, cond::persistency::PAYLOAD::OBJECT_TYPE.name, cond::persistency::PAYLOAD::DATA.name, cond::persistency::PAYLOAD::STREAMER_INFO.name, cond::persistency::PAYLOAD::VERSION.name, MagGeoBuilderFromDDD::volumeHandle.name, options.ConnectionHLTMenu.name, cond::persistency::PAYLOAD::INSERTION_TIME.name, DQMGenericClient::NormOption.name, DQMGenericClient::CDOption.name, cond::TagInfo_t.name, h4DSegm.name, PhysicsTools::Calibration::Variable.name, looper.Looper.name, EDMtoMEConverter.name, MEtoEDM< T >::MEtoEDMObject.name, options.HLTProcessOptions.name, cond::persistency::IOV::TAG_NAME.name, cond::persistency::IOV::SINCE.name, cond::persistency::IOV::PAYLOAD_HASH.name, cond::persistency::IOV::INSERTION_TIME.name, MuonGeometrySanityCheckPoint.name, config.Analyzer.name, config.Service.name, core.autovars.NTupleSubObject.name, core.autovars.NTupleObject.name, h2DSegm.name, DQMNet::WaitObject.name, AlpgenParameterName.name, SiStripMonitorDigi.name, core.autovars.NTupleCollection.name, cond::persistency::TAG_MIGRATION::SOURCE_ACCOUNT.name, cond::persistency::TAG_MIGRATION::SOURCE_TAG.name, cond::persistency::TAG_MIGRATION::TAG_NAME.name, cond::persistency::TAG_MIGRATION::STATUS_CODE.name, cond::persistency::TAG_MIGRATION::INSERTION_TIME.name, FastTimerService::LuminosityDescription.name, cond::persistency::PAYLOAD_MIGRATION::SOURCE_ACCOUNT.name, cond::persistency::PAYLOAD_MIGRATION::SOURCE_TOKEN.name, cond::persistency::PAYLOAD_MIGRATION::PAYLOAD_HASH.name, cond::persistency::PAYLOAD_MIGRATION::INSERTION_TIME.name, conddblib.Tag.name, conddblib.GlobalTag.name, plotscripts.SawTeethFunction.name, FastTimerService::ProcessDescription.name, hTMaxCell.name, cscdqm::ParHistoDef.name, BeautifulSoup.Tag.name, TiXmlAttribute.name, BeautifulSoup.SoupStrainer.name, and python.rootplot.root2matplotlib.replace().

Referenced by dataset.Dataset.dataType().

323  def __getDataType( self ):
324  if self.__predefined:
325  with open(self.__filename) as f:
326  datatype = None
327  for line in f.readlines():
328  if line.startswith("#data type: "):
329  if datatype is not None:
330  raise AllInOneError(self.__filename + " has multiple 'data type' lines.")
331  datatype = line.replace("#data type: ", "").replace("\n","")
332  return "unknown"
333 
334  dasQuery_type = ( 'dataset dataset=%s | grep dataset.datatype,'
335  'dataset.name'%( self.__name ) )
336  data = self.__getData( dasQuery_type )
337 
338  try:
339  return self.__findInJson(data, ["dataset", "datatype"])
340  except KeyError:
341  print ("Cannot find the datatype of the dataset '%s'\n"
342  "It may not be possible to automatically find the magnetic field,\n"
343  "and you will not be able run in CRAB mode"
344  %( self.name() ))
345  return "unknown"
def __findInJson
Definition: dataset.py:255
def __getDataType
Definition: dataset.py:322
def dataset.Dataset.__getFileInfoList (   self,
  dasLimit,
  parent = False 
)
private

Definition at line 460 of file dataset.py.

References dataset.Dataset.__fileInfoList, dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__name, dataset.Dataset.__parentFileInfoList, dataset.Dataset.__predefined, ElectronMVAID.ElectronMVAID.name, counter.Counter.name, entry.name, average.Average.name, histograms.Histograms.name, cond::persistency::TAG::NAME.name, TmModule.name, cond::persistency::GLOBAL_TAG::NAME.name, core.autovars.NTupleVariable.name, cond::persistency::TAG::TIME_TYPE.name, genericValidation.GenericValidation.name, cond::persistency::GLOBAL_TAG::VALIDITY.name, cond::persistency::COND_LOG_TABLE::EXECTIME.name, cond::persistency::TAG::OBJECT_TYPE.name, cond::persistency::GLOBAL_TAG::DESCRIPTION.name, ora::RecordSpecImpl::Item.name, cond::persistency::COND_LOG_TABLE::IOVTAG.name, cond::persistency::TAG::SYNCHRONIZATION.name, cond::persistency::GLOBAL_TAG::RELEASE.name, cond::persistency::COND_LOG_TABLE::USERTEXT.name, cond::persistency::TAG::END_OF_VALIDITY.name, cond::persistency::GLOBAL_TAG::SNAPSHOT_TIME.name, cond::persistency::GLOBAL_TAG::INSERTION_TIME.name, cond::persistency::TAG::DESCRIPTION.name, cond::persistency::GTEditorData.name, cond::persistency::TAG::LAST_VALIDATED_TIME.name, FWTGeoRecoGeometry::Info.name, Types._Untracked.name, cond::persistency::TAG::INSERTION_TIME.name, alignment.Alignment.name, cond::persistency::TAG::MODIFICATION_TIME.name, dataset.BaseDataset.name, ParameterSet.name, PixelDCSObject< class >::Item.name, analyzer.Analyzer.name, DQMRivetClient::LumiOption.name, MagCylinder.name, cond::persistency::GTProxyData.name, ParSet.name, DQMRivetClient::ScaleFactorOption.name, SingleObjectCondition.name, EgHLTOfflineSummaryClient::SumHistBinData.name, XMLHTRZeroSuppressionLoader::_loaderBaseConfig.name, DQMGenericClient::EfficOption.name, XMLRBXPedestalsLoader::_loaderBaseConfig.name, core.autovars.NTupleObjectType.name, MyWatcher.name, edm::PathTimingSummary.name, cond::TimeTypeSpecs.name, lumi::TriggerInfo.name, cond::persistency::GLOBAL_TAG_MAP::GLOBAL_TAG_NAME.name, edm::PathSummary.name, PixelEndcapLinkMaker::Item.name, perftools::EdmEventSize::BranchRecord.name, RecAnalyzerMinbias.name, cond::persistency::GLOBAL_TAG_MAP::RECORD.name, FWTableViewManager::TableEntry.name, PixelBarrelLinkMaker::Item.name, cond::persistency::GLOBAL_TAG_MAP::LABEL.name, Mapper::definition< ScannerT >.name, EcalLogicID.name, cond::persistency::GLOBAL_TAG_MAP::TAG_NAME.name, ExpressionHisto< T >.name, McSelector.name, RecoSelector.name, XMLProcessor::_loaderBaseConfig.name, DQMGenericClient::ProfileOption.name, cond::persistency::PAYLOAD::HASH.name, TreeCrawler.Package.name, cond::persistency::PAYLOAD::OBJECT_TYPE.name, cond::persistency::PAYLOAD::DATA.name, cond::persistency::PAYLOAD::STREAMER_INFO.name, MagGeoBuilderFromDDD::volumeHandle.name, cond::persistency::PAYLOAD::VERSION.name, options.ConnectionHLTMenu.name, cond::persistency::PAYLOAD::INSERTION_TIME.name, DQMGenericClient::NormOption.name, DQMGenericClient::CDOption.name, cond::TagInfo_t.name, h4DSegm.name, PhysicsTools::Calibration::Variable.name, looper.Looper.name, EDMtoMEConverter.name, MEtoEDM< T >::MEtoEDMObject.name, options.HLTProcessOptions.name, cond::persistency::IOV::TAG_NAME.name, cond::persistency::IOV::SINCE.name, cond::persistency::IOV::PAYLOAD_HASH.name, cond::persistency::IOV::INSERTION_TIME.name, MuonGeometrySanityCheckPoint.name, config.Analyzer.name, config.Service.name, core.autovars.NTupleSubObject.name, core.autovars.NTupleObject.name, h2DSegm.name, DQMNet::WaitObject.name, AlpgenParameterName.name, SiStripMonitorDigi.name, core.autovars.NTupleCollection.name, cond::persistency::TAG_MIGRATION::SOURCE_ACCOUNT.name, cond::persistency::TAG_MIGRATION::SOURCE_TAG.name, cond::persistency::TAG_MIGRATION::TAG_NAME.name, cond::persistency::TAG_MIGRATION::STATUS_CODE.name, cond::persistency::TAG_MIGRATION::INSERTION_TIME.name, FastTimerService::LuminosityDescription.name, cond::persistency::PAYLOAD_MIGRATION::SOURCE_ACCOUNT.name, cond::persistency::PAYLOAD_MIGRATION::SOURCE_TOKEN.name, cond::persistency::PAYLOAD_MIGRATION::PAYLOAD_HASH.name, cond::persistency::PAYLOAD_MIGRATION::INSERTION_TIME.name, conddblib.Tag.name, conddblib.GlobalTag.name, plotscripts.SawTeethFunction.name, FastTimerService::ProcessDescription.name, hTMaxCell.name, cscdqm::ParHistoDef.name, BeautifulSoup.Tag.name, TiXmlAttribute.name, BeautifulSoup.SoupStrainer.name, and dataset.Dataset.parentDataset().

Referenced by dataset.Dataset.fileInfoList().

461  def __getFileInfoList( self, dasLimit, parent = False ):
462  if self.__predefined:
463  if parent:
464  extendstring = "secFiles.extend"
465  else:
466  extendstring = "readFiles.extend"
467  with open(self.__fileName) as f:
468  files = []
469  copy = False
470  for line in f.readlines():
471  if "]" in line:
472  copy = False
473  if copy:
474  files.append({name: line.translate(None, "', " + '"')})
475  if extendstring in line and "[" in line and "]" not in line:
476  copy = True
477  return files
478 
479  if self.__fileInfoList and not parent:
480  return self.__fileInfoList
481  if self.__parentFileInfoList and parent:
482  return self.__parentFileInfoList
483 
484  if parent:
485  searchdataset = self.parentDataset()
486  else:
487  searchdataset = self.__name
488  dasQuery_files = ( 'file dataset=%s | grep file.name, file.nevents, '
489  'file.creation_time, '
490  'file.modification_time'%( searchdataset ) )
491  print "Requesting file information for '%s' from DAS..."%( searchdataset ),
492  data = self.__getData( dasQuery_files, dasLimit )
493  print "Done."
494  data = [ self.__findInJson(entry,"file") for entry in data ]
495  if len( data ) == 0:
496  msg = ("No files are available for the dataset '%s'. This can be "
497  "due to a typo or due to a DAS problem. Please check the "
498  "spelling of the dataset and/or retry to run "
499  "'validateAlignments.py'."%( self.name() ))
500  raise AllInOneError( msg )
501  fileInformationList = []
502  for file in data:
503  fileName = 'unknown'
504  try:
505  fileName = self.__findInJson(file, "name")
506  fileCreationTime = self.__findInJson(file, "creation_time")
507  fileNEvents = self.__findInJson(file, "nevents")
508  except KeyError:
509  print ("DAS query gives bad output for file '%s'. Skipping it.\n"
510  "It may work if you try again later.") % fileName
511  fileNEvents = 0
512  # select only non-empty files
513  if fileNEvents == 0:
514  continue
515  fileDict = { "name": fileName,
516  "creation_time": fileCreationTime,
517  "nevents": fileNEvents
518  }
519  fileInformationList.append( fileDict )
520  fileInformationList.sort( key=lambda info: self.__findInJson(info,"name") )
521  if parent:
522  self.__parentFileInfoList = fileInformationList
523  else:
524  self.__fileInfoList = fileInformationList
525  return fileInformationList
def __findInJson
Definition: dataset.py:255
def __getFileInfoList
Definition: dataset.py:460
def parentDataset
Definition: dataset.py:631
def dataset.Dataset.__getMagneticField (   self)
private

Definition at line 356 of file dataset.py.

References dataset.Dataset.__cmsswrelease, dataset.Dataset.__dataType, dataset.Dataset.__filename, dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__name, dataset.Dataset.__predefined, and python.rootplot.root2matplotlib.replace().

Referenced by dataset.Dataset.magneticField().

357  def __getMagneticField( self ):
358  Bfieldlocation = os.path.join( self.__cmsswrelease, "python", "Configuration", "StandardSequences" )
359  Bfieldlist = [ f.replace("MagneticField_",'').replace("_cff.py",'') \
360  for f in os.listdir(Bfieldlocation) \
361  if f.startswith("MagneticField_") and f.endswith("_cff.py") and f != "MagneticField_cff.py" ]
362  Bfieldlist.sort( key = lambda Bfield: -len(Bfield) ) #Put it in order of decreasing length, so that searching in the name gives the longer match
363 
364  if self.__predefined:
365  with open(self.__filename) as f:
366  datatype = None
367  Bfield = None
368  for line in f.readlines():
369  if line.startswith("#data type: "):
370  if datatype is not None:
371  raise AllInOneError(self.__filename + " has multiple 'data type' lines.")
372  datatype = line.replace("#data type: ", "").replace("\n","")
373  if line.startswith("#magnetic field: "):
374  if Bfield is not None:
375  raise AllInOneError(self.__filename + " has multiple 'magnetic field' lines.")
376  Bfield = line.replace("#magnetic field: ", "").replace("\n","")
377  if Bfield is not None:
378  Bfield = Bfield.split(",")[0]
379  if Bfield in Bfieldlist or Bfield == "unknown":
380  return Bfield
381  else:
382  print "Your dataset has magnetic field '%s', which does not exist in your CMSSW version!" % Bfield
383  print "Using Bfield='unknown' - this will revert to the default"
384  return "unknown"
385  elif datatype == "data":
386  return "AutoFromDBCurrent" #this should be in the "#magnetic field" line, but for safety in case it got messed up
387  else:
388  return "unknown"
389 
390  if self.__dataType == "data":
391  return "AutoFromDBCurrent"
392 
393  dasQuery_B = ( 'dataset dataset=%s'%( self.__name ) ) #try to find the magnetic field from DAS
394  data = self.__getData( dasQuery_B ) #it seems to be there for the newer (7X) MC samples, except cosmics
395 
396  try:
397  Bfield = self.__findInJson(data, ["dataset", "mcm", "sequences", "magField"])
398  if Bfield in Bfieldlist:
399  return Bfield
400  elif Bfield == "":
401  pass
402  else:
403  print "Your dataset has magnetic field '%s', which does not exist in your CMSSW version!" % Bfield
404  print "Using Bfield='unknown' - this will revert to the default magnetic field"
405  return "unknown"
406  except KeyError:
407  pass
408 
409  for possibleB in Bfieldlist:
410  if possibleB in self.__name.replace("TkAlCosmics0T", ""): #for some reason all cosmics dataset names contain this string
411  return possibleB
412 
413  return "unknown"
def __findInJson
Definition: dataset.py:255
def __getMagneticField
Definition: dataset.py:356
def dataset.Dataset.__getMagneticFieldForRun (   self,
  run = -1,
  tolerance = 0.5 
)
private
For MC, this returns the same as the previous function.
   For data, it gets the magnetic field from the runs.  This is important for
   deciding which template to use for offlinevalidation

Definition at line 414 of file dataset.py.

References dataset.Dataset.__filename, dataset.Dataset.__findInJson(), dataset.Dataset.__firstusedrun, dataset.Dataset.__getData(), dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.__lastusedrun, dataset.Dataset.__magneticField, dataset.Dataset.__name, dataset.Dataset.__predefined, funct.abs(), and split.

Referenced by dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.dump_cff(), and dataset.Dataset.magneticFieldForRun().

415  def __getMagneticFieldForRun( self, run = -1, tolerance = 0.5 ):
416  """For MC, this returns the same as the previous function.
417  For data, it gets the magnetic field from the runs. This is important for
418  deciding which template to use for offlinevalidation
419  """
420  if "T" in self.__magneticField: #for MC
421  Bfield = self.__magneticField.split("T")[0]
422  return float(Bfield) / 10.0 #e.g. 38T and 38T_PostLS1 both return 3.8
423  if self.__predefined:
424  with open(self.__filename) as f:
425  Bfield = None
426  for line in f.readlines():
427  if line.startswith("#magnetic field: ") and "," in line:
428  if Bfield is not None:
429  raise AllInOneError(self.__filename + " has multiple 'magnetic field' lines.")
430  return float(line.replace("#magnetic field: ", "").split(",")[1])
431 
432  if run > 0:
433  dasQuery = ('run = %s'%run) #for data
434  data = self.__getData(dasQuery)
435  try:
436  return self.__findInJson(data, ["run","bfield"])
437  except KeyError:
438  return "unknown Can't get the magnetic field for run %s from DAS" % run
439 
440  #run < 0 - find B field for the first and last runs, and make sure they're compatible
441  # (to within tolerance)
442  #NOT FOOLPROOF! The magnetic field might go up and then down, or vice versa
443  if self.__firstusedrun is None or self.__lastusedrun is None:
444  return "unknown Can't get the exact magnetic field for the dataset until data has been retrieved from DAS."
445  firstrunB = self.__getMagneticFieldForRun(self.__firstusedrun)
446  lastrunB = self.__getMagneticFieldForRun(self.__lastusedrun)
447  try:
448  if abs(firstrunB - lastrunB) <= tolerance:
449  return .5*(firstrunB + lastrunB)
450  print firstrunB, lastrunB, tolerance
451  return ("unknown The beginning and end of your run range for %s\n"
452  "have different magnetic fields (%s, %s)!\n"
453  "Try limiting the run range using firstRun, lastRun, begin, end, or JSON,\n"
454  "or increasing the tolerance (in dataset.py) from %s.") % (self.__name, firstrunB, lastrunB, tolerance)
455  except TypeError:
456  if "unknown" in firstrunB:
457  return firstrunB
458  else:
459  return lastrunB
def __findInJson
Definition: dataset.py:255
def __getMagneticFieldForRun
Definition: dataset.py:414
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
double split
Definition: MVATrainer.cc:139
def dataset.Dataset.__getParentDataset (   self)
private

Definition at line 346 of file dataset.py.

References dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), and dataset.Dataset.__name.

Referenced by dataset.Dataset.parentDataset().

347  def __getParentDataset( self ):
348  dasQuery = "parent dataset=" + self.__name
349  data = self.__getData( dasQuery )
350  try:
351  return self.__findInJson(data, ["parent", "name"])
352  except KeyError:
353  raise AllInOneError("Cannot find the parent of the dataset '" + self.__name + "'\n"
354  "Here is the DAS output:\n" + str(jsondict) +
355  "\nIt's possible that this was a server error. If so, it may work if you try again later")
def __findInJson
Definition: dataset.py:255
def __getParentDataset
Definition: dataset.py:346
def dataset.Dataset.__getRunList (   self)
private

Definition at line 526 of file dataset.py.

References dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__name, and dataset.Dataset.__runList.

Referenced by dataset.Dataset.__createSnippet(), dataset.Dataset.convertTimeToRun(), and dataset.Dataset.runList().

527  def __getRunList( self ):
528  if self.__runList:
529  return self.__runList
530  dasQuery_runs = ( 'run dataset=%s | grep run.run_number,'
531  'run.creation_time'%( self.__name ) )
532  print "Requesting run information for '%s' from DAS..."%( self.__name ),
533  data = self.__getData( dasQuery_runs )
534  print "Done."
535  data = [ self.__findInJson(entry,"run") for entry in data ]
536  data.sort( key = lambda run: self.__findInJson(run, "run_number") )
537  self.__runList = data
538  return data
def __findInJson
Definition: dataset.py:255
def __getRunList
Definition: dataset.py:526
def dataset.Dataset.buildListOfBadFiles (   self)
fills the list of bad files from the IntegrityCheck log.

When the integrity check file is not available,
files are considered as good.

Definition at line 276 of file dataset.py.

277  def buildListOfBadFiles(self):
278  '''fills the list of bad files from the IntegrityCheck log.
279 
280  When the integrity check file is not available,
281  files are considered as good.'''
282  mask = "IntegrityCheck"
283 
284  self.bad_files = {}
285  self.good_files = []
286 
287  file_mask = castortools.matchingFiles(self.castorDir, '^%s_.*\.txt$' % mask)
288  if file_mask:
289  from CMGTools.Production.edmIntegrityCheck import PublishToFileSystem
290  p = PublishToFileSystem(mask)
291  report = p.get(self.castorDir)
292  if report is not None and report:
293  self.maskExists = True
294  self.report = report
295  dup = report.get('ValidDuplicates',{})
296  for name, status in report['Files'].iteritems():
297  # print name, status
298  if not status[0]:
299  self.bad_files[name] = 'MarkedBad'
300  elif dup.has_key(name):
301  self.bad_files[name] = 'ValidDup'
302  else:
303  self.good_files.append( name )
304  else:
305  raise IntegrityCheckError( "ERROR: IntegrityCheck log file IntegrityCheck_XXXXXXXXXX.txt not found" )
def buildListOfBadFiles
Definition: dataset.py:276
def dataset.Dataset.buildListOfFiles (   self,
  pattern = '.*root' 
)
fills list of files, taking all root files matching the pattern in the castor dir

Definition at line 272 of file dataset.py.

273  def buildListOfFiles(self, pattern='.*root'):
274  '''fills list of files, taking all root files matching the pattern in the castor dir'''
275  self.files = castortools.matchingFiles( self.castorDir, pattern )
def buildListOfFiles
Definition: dataset.py:272
def dataset.Dataset.convertTimeToRun (   self,
  begin = None,
  end = None,
  firstRun = None,
  lastRun = None,
  shortTuple = True 
)

Definition at line 553 of file dataset.py.

References dataset.Dataset.__dateString(), dataset.Dataset.__datetime(), dataset.Dataset.__find_ge(), dataset.Dataset.__find_lt(), dataset.Dataset.__findInJson(), dataset.Dataset.__getData(), dataset.Dataset.__getRunList(), and dataset.Dataset.__name.

Referenced by dataset.Dataset.__createSnippet(), and dataset.Dataset.__dateString().

554  shortTuple = True ):
555  if ( begin and firstRun ) or ( end and lastRun ):
556  msg = ( "The Usage of "
557  + "'begin' & 'firstRun' " * int( bool( begin and
558  firstRun ) )
559  + "and " * int( bool( ( begin and firstRun ) and
560  ( end and lastRun ) ) )
561  + "'end' & 'lastRun' " * int( bool( end and lastRun ) )
562  + "is ambigous." )
563  raise AllInOneError( msg )
564 
565  if begin or end:
566  runList = [ self.__findInJson(run, "run_number") for run in self.__getRunList() ]
567 
568  if begin:
569  lastdate = begin
570  for delta in [ 1, 5, 10, 20, 30 ]: #try searching for about 2 months after begin
571  firstdate = lastdate
572  lastdate = self.__dateString(self.__datetime(firstdate) + datetime.timedelta(delta))
573  dasQuery_begin = "run date between[%s,%s]" % (firstdate, lastdate)
574  begindata = self.__getData(dasQuery_begin)
575  if len(begindata) > 0:
576  begindata.sort(key = lambda run: self.__findInJson(run, ["run", "run_number"]))
577  try:
578  runIndex = self.__find_ge( runList, self.__findInJson(begindata[0], ["run", "run_number"]))
579  except ValueError:
580  msg = ( "Your 'begin' is after the creation time of the last "
581  "run in the dataset\n'%s'"%( self.__name ) )
582  raise AllInOneError( msg )
583  firstRun = runList[runIndex]
584  begin = None
585  break
586 
587  if begin:
588  raise AllInOneError("No runs within a reasonable time interval after your 'begin'."
589  "Try using a 'begin' that has runs soon after it (within 2 months at most)")
590 
591  if end:
592  firstdate = end
593  for delta in [ 1, 5, 10, 20, 30 ]: #try searching for about 2 months before end
594  lastdate = firstdate
595  firstdate = self.__dateString(self.__datetime(lastdate) - datetime.timedelta(delta))
596  dasQuery_end = "run date between[%s,%s]" % (firstdate, lastdate)
597  enddata = self.__getData(dasQuery_end)
598  if len(enddata) > 0:
599  enddata.sort(key = lambda run: self.__findInJson(run, ["run", "run_number"]))
600  try:
601  runIndex = self.__find_lt( runList, self.__findInJson(enddata[-1], ["run", "run_number"]))
602  except ValueError:
603  msg = ( "Your 'end' is before the creation time of the first "
604  "run in the dataset\n'%s'"%( self.__name ) )
605  raise AllInOneError( msg )
606  lastRun = runList[runIndex]
607  end = None
608  break
609 
610  if end:
611  raise AllInOneError("No runs within a reasonable time interval before your 'end'."
612  "Try using an 'end' that has runs soon before it (within 2 months at most)")
613 
614  if shortTuple:
615  return firstRun, lastRun
616  else:
617  return begin, end, firstRun, lastRun
def __findInJson
Definition: dataset.py:255
def __getRunList
Definition: dataset.py:526
def __dateString
Definition: dataset.py:548
def dataset.Dataset.datasetSnippet (   self,
  jsonPath = None,
  begin = None,
  end = None,
  firstRun = None,
  lastRun = None,
  crab = False,
  parent = False 
)

Definition at line 637 of file dataset.py.

References dataset.Dataset.__createSnippet(), dataset.Dataset.__filename, dataset.Dataset.__name, dataset.Dataset.__official, dataset.Dataset.__origName, dataset.Dataset.__predefined, and dataset.Dataset.dump_cff().

Referenced by dataset.Dataset.parentDataset().

638  firstRun = None, lastRun = None, crab = False, parent = False ):
639  if self.__predefined and parent:
640  with open(self.__filename) as f:
641  if "secFiles.extend" not in f.read():
642  msg = ("The predefined dataset '%s' does not contain secondary files, "
643  "which your validation requires!") % self.__name
644  if self.__official:
645  self.__name = self.__origName
646  self.__predefined = False
647  print msg
648  print ("Retreiving the files from DAS. You will be asked if you want "
649  "to overwrite the old dataset.\n"
650  "It will still be compatible with validations that don't need secondary files.")
651  else:
652  raise AllInOneError(msg)
653 
654  if self.__predefined:
655  snippet = ("process.load(\"Alignment.OfflineValidation.%s_cff\")\n"
656  "process.maxEvents = cms.untracked.PSet(\n"
657  " input = cms.untracked.int32(.oO[nEvents]Oo. / .oO[parallelJobs]Oo.)\n"
658  ")\n"
659  "process.source.skipEvents=cms.untracked.uint32(.oO[nIndex]Oo.*.oO[nEvents]Oo./.oO[parallelJobs]Oo.)"
660  %(self.__name))
661  if not parent:
662  with open(self.__filename) as f:
663  if "secFiles.extend" in f.read():
664  snippet += "\nprocess.source.secondaryFileNames = cms.untracked.vstring()"
665  return snippet
666  theMap = { "process": "process.",
667  "tab": " " * len( "process." ),
668  "nEvents": ".oO[nEvents]Oo. / .oO[parallelJobs]Oo.",
669  "skipEventsString": "process.source.skipEvents=cms.untracked.uint32(.oO[nIndex]Oo.*.oO[nEvents]Oo./.oO[parallelJobs]Oo.)\n",
670  "importCms": "",
671  "header": ""
672  }
673  datasetSnippet = self.__createSnippet( jsonPath = jsonPath,
674  begin = begin,
675  end = end,
676  firstRun = firstRun,
677  lastRun = lastRun,
678  repMap = theMap,
679  crab = crab,
680  parent = parent )
681  if jsonPath == "" and begin == "" and end == "" and firstRun == "" and lastRun == "":
682  try:
683  self.dump_cff(parent = parent)
684  except AllInOneError, e:
685  print "Can't store the dataset as a cff:"
686  print e
687  print "This may be inconvenient in the future, but will not cause a problem for this validation."
688  return datasetSnippet
def __createSnippet
Definition: dataset.py:117
def dataset.Dataset.dataType (   self)

Definition at line 618 of file dataset.py.

References dataset.Dataset.__dataType, and dataset.Dataset.__getDataType().

619  def dataType( self ):
620  if not self.__dataType:
621  self.__dataType = self.__getDataType()
622  return self.__dataType
def __getDataType
Definition: dataset.py:322
def dataset.Dataset.dump_cff (   self,
  outName = None,
  jsonPath = None,
  begin = None,
  end = None,
  firstRun = None,
  lastRun = None,
  parent = False 
)

Definition at line 690 of file dataset.py.

References dataset.Dataset.__alreadyStored, dataset.Dataset.__cmssw, dataset.Dataset.__createSnippet(), dataset.Dataset.__dataType, dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.__magneticField, dataset.Dataset.__name, and python.rootplot.root2matplotlib.replace().

Referenced by dataset.Dataset.datasetSnippet().

691  end = None, firstRun = None, lastRun = None, parent = False ):
692  if self.__alreadyStored:
693  return
694  self.__alreadyStored = True
695  if outName == None:
696  outName = "Dataset" + self.__name.replace("/", "_")
697  packageName = os.path.join( "Alignment", "OfflineValidation" )
698  if not os.path.exists( os.path.join(
699  self.__cmssw, "src", packageName ) ):
700  msg = ("You try to store the predefined dataset'%s'.\n"
701  "For that you need to check out the package '%s' to your "
702  "private relase area in\n"%( outName, packageName )
703  + self.__cmssw )
704  raise AllInOneError( msg )
705  theMap = { "process": "",
706  "tab": "",
707  "nEvents": str( -1 ),
708  "skipEventsString": "",
709  "importCms": "import FWCore.ParameterSet.Config as cms\n",
710  "header": "#Do not delete or (unless you know what you're doing) change these comments\n"
711  "#%(name)s\n"
712  "#data type: %(dataType)s\n"
713  "#magnetic field: .oO[magneticField]Oo.\n" #put in magnetic field later
714  %{"name": self.__name, #need to create the snippet before getting the magnetic field
715  "dataType": self.__dataType} #so that we know the first and last runs
716  }
717  dataset_cff = self.__createSnippet( jsonPath = jsonPath,
718  begin = begin,
719  end = end,
720  firstRun = firstRun,
721  lastRun = lastRun,
722  repMap = theMap,
723  parent = parent)
724  magneticField = self.__magneticField
725  if magneticField == "AutoFromDBCurrent":
726  magneticField = "%s, %s" % (magneticField, str(self.__getMagneticFieldForRun()).replace("\n"," ")[0])
727  dataset_cff = dataset_cff.replace(".oO[magneticField]Oo.",magneticField)
728  filePath = os.path.join( self.__cmssw, "src", packageName,
729  "python", outName + "_cff.py" )
730  if os.path.exists( filePath ):
731  existMsg = "The predefined dataset '%s' already exists.\n"%( outName )
732  askString = "Do you want to overwrite it? [y/n]\n"
733  inputQuery = existMsg + askString
734  while True:
735  userInput = raw_input( inputQuery ).lower()
736  if userInput == "y":
737  break
738  elif userInput == "n":
739  return
740  else:
741  inputQuery = askString
742  print ( "The predefined dataset '%s' will be stored in the file\n"
743  %( outName )
744  + filePath +
745  "\nFor future use you have to do 'scram b'." )
746  print
747  theFile = open( filePath, "w" )
748  theFile.write( dataset_cff )
749  theFile.close()
750  return
def __getMagneticFieldForRun
Definition: dataset.py:414
def __createSnippet
Definition: dataset.py:117
def dataset.Dataset.extractFileSizes (   self)
Get the file size for each file, from the eos ls -l command.

Definition at line 306 of file dataset.py.

References dataset.EOSDataset.castorDir, and dataset.Dataset.castorDir.

307  def extractFileSizes(self):
308  '''Get the file size for each file, from the eos ls -l command.'''
309  # EOS command does not work in tier3
310  lsout = castortools.runXRDCommand(self.castorDir,'dirlist')[0]
311  lsout = lsout.split('\n')
312  self.filesAndSizes = {}
313  for entry in lsout:
314  values = entry.split()
315  if( len(values) != 5):
316  continue
317  # using full abs path as a key.
318  file = '/'.join([self.lfnDir, values[4].split("/")[-1]])
319  size = values[1]
320  self.filesAndSizes[file] = size
def extractFileSizes
Definition: dataset.py:306
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
if(conf.exists("allCellsPositionCalc"))
double split
Definition: MVATrainer.cc:139
def dataset.Dataset.fileInfoList (   self,
  parent = False 
)

Definition at line 766 of file dataset.py.

References dataset.Dataset.__dasLimit, and dataset.Dataset.__getFileInfoList().

Referenced by dataset.Dataset.fileList().

767  def fileInfoList( self, parent = False ):
768  return self.__getFileInfoList( self.__dasLimit, parent )
def __getFileInfoList
Definition: dataset.py:460
def fileInfoList
Definition: dataset.py:766
def dataset.Dataset.fileList (   self,
  parent = False 
)

Definition at line 751 of file dataset.py.

References dataset.Dataset.__fileList, dataset.Dataset.__findInJson(), dataset.Dataset.__parentFileList, and dataset.Dataset.fileInfoList().

752  def fileList( self, parent = False ):
753  if self.__fileList and not parent:
754  return self.__fileList
755  if self.__parentFileList and parent:
756  return self.__parentFileList
757 
758  fileList = [ self.__findInJson(fileInfo,"name") \
759  for fileInfo in self.fileInfoList(parent) ]
760 
761  if not parent:
762  self.__fileList = fileList
763  else:
764  self.__parentFileList = fileList
765  return fileList
def __findInJson
Definition: dataset.py:255
def fileInfoList
Definition: dataset.py:766
def dataset.Dataset.forcerunrange (   self,
  firstRun,
  lastRun,
  s 
)
s must be in the format run1:lum1-run2:lum2

Definition at line 277 of file dataset.py.

References dataset.Dataset.__firstUsedRun, dataset.Dataset.__lastUsedRun, and split.

Referenced by dataset.Dataset.getForceRunRangeFunction().

278  def forcerunrange(self, firstRun, lastRun, s):
279  """s must be in the format run1:lum1-run2:lum2"""
280  s = s.group()
281  print s
282  run1 = s.split("-")[0].split(":")[0]
283  lum1 = s.split("-")[0].split(":")[1]
284  run2 = s.split("-")[1].split(":")[0]
285  lum2 = s.split("-")[1].split(":")[1]
286  if int(run2) < firstRun or int(run1) > lastRun:
287  return ""
288  if int(run1) < firstRun or firstRun < 0:
289  run1 = firstRun
290  lum1 = 1
291  if int(run2) > lastRun:
292  run2 = lastRun
293  lum2 = "max"
294  if int(run1) < self.__firstUsedRun:
295  self.__firstUsedRun = int(run1)
296  if int(run2) > self.__lastUsedRun:
297  self.__lastUsedRun = int(run2)
298  return "%s:%s-%s:%s" % (run1, lum1, run2, lum2)
def forcerunrange
Definition: dataset.py:277
double split
Definition: MVATrainer.cc:139
def dataset.Dataset.getForceRunRangeFunction (   self,
  firstRun,
  lastRun 
)

Definition at line 299 of file dataset.py.

References dataset.Dataset.forcerunrange().

300  def getForceRunRangeFunction(self, firstRun, lastRun):
301  def forcerunrangefunction(s):
302  return self.forcerunrange(firstRun, lastRun, s)
303  return forcerunrangefunction
def forcerunrange
Definition: dataset.py:277
def getForceRunRangeFunction
Definition: dataset.py:299
def dataset.Dataset.getPrimaryDatasetEntries (   self)

Definition at line 326 of file dataset.py.

References runall.testit.report, dataset.BaseDataset.report, ALIUtils.report, and WorkFlowRunner.WorkFlowRunner.report.

327  def getPrimaryDatasetEntries(self):
328  if self.report is not None and self.report:
329  return int(self.report.get('PrimaryDatasetEntries',-1))
330  return -1
331 
def getPrimaryDatasetEntries
Definition: dataset.py:326
def dataset.Dataset.magneticField (   self)

Definition at line 623 of file dataset.py.

References dataset.Dataset.__getMagneticField(), and dataset.Dataset.__magneticField.

624  def magneticField( self ):
625  if not self.__magneticField:
626  self.__magneticField = self.__getMagneticField()
627  return self.__magneticField
def magneticField
Definition: dataset.py:623
def __getMagneticField
Definition: dataset.py:356
def dataset.Dataset.magneticFieldForRun (   self,
  run = -1 
)

Definition at line 628 of file dataset.py.

References dataset.Dataset.__getMagneticFieldForRun().

629  def magneticFieldForRun( self, run = -1 ):
630  return self.__getMagneticFieldForRun(run)
def __getMagneticFieldForRun
Definition: dataset.py:414
def magneticFieldForRun
Definition: dataset.py:628
def dataset.Dataset.name (   self)

Definition at line 769 of file dataset.py.

References dataset.Dataset.__name.

Referenced by cuy.divideElement.__init__(), cuy.plotElement.__init__(), cuy.additionElement.__init__(), cuy.superimposeElement.__init__(), cuy.graphElement.__init__(), config.CFG.__str__(), VIDSelectorBase.VIDSelectorBase.initialize(), and Vispa.Views.PropertyView.Property.valueChanged().

770  def name( self ):
771  return self.__name
def dataset.Dataset.parentDataset (   self)

Definition at line 631 of file dataset.py.

References dataset.Dataset.__getParentDataset(), dataset.Dataset.__parentDataset, and dataset.Dataset.datasetSnippet().

Referenced by dataset.Dataset.__getFileInfoList().

632  def parentDataset( self ):
633  if not self.__parentDataset:
634  self.__parentDataset = self.__getParentDataset()
635  return self.__parentDataset
def parentDataset
Definition: dataset.py:631
def __getParentDataset
Definition: dataset.py:346
def dataset.Dataset.predefined (   self)

Definition at line 772 of file dataset.py.

References dataset.Dataset.__predefined.

Referenced by dataset.Dataset.__createSnippet().

773  def predefined( self ):
774  return self.__predefined
def dataset.Dataset.printInfo (   self)

Definition at line 321 of file dataset.py.

References dataset.EOSDataset.castorDir, dataset.Dataset.castorDir, dataset.Dataset.lfnDir, ElectronMVAID.ElectronMVAID.name, counter.Counter.name, entry.name, average.Average.name, histograms.Histograms.name, cond::persistency::GLOBAL_TAG::NAME.name, TmModule.name, core.autovars.NTupleVariable.name, cond::persistency::TAG::NAME.name, genericValidation.GenericValidation.name, cond::persistency::GLOBAL_TAG::VALIDITY.name, cond::persistency::TAG::TIME_TYPE.name, cond::persistency::TAG::OBJECT_TYPE.name, cond::persistency::GLOBAL_TAG::DESCRIPTION.name, cond::persistency::COND_LOG_TABLE::EXECTIME.name, cond::persistency::TAG::SYNCHRONIZATION.name, cond::persistency::GLOBAL_TAG::RELEASE.name, ora::RecordSpecImpl::Item.name, cond::persistency::COND_LOG_TABLE::IOVTAG.name, cond::persistency::TAG::END_OF_VALIDITY.name, cond::persistency::GLOBAL_TAG::SNAPSHOT_TIME.name, cond::persistency::COND_LOG_TABLE::USERTEXT.name, cond::persistency::GTEditorData.name, cond::persistency::TAG::DESCRIPTION.name, cond::persistency::GLOBAL_TAG::INSERTION_TIME.name, cond::persistency::TAG::LAST_VALIDATED_TIME.name, FWTGeoRecoGeometry::Info.name, Types._Untracked.name, cond::persistency::TAG::INSERTION_TIME.name, alignment.Alignment.name, cond::persistency::TAG::MODIFICATION_TIME.name, dataset.BaseDataset.name, ParameterSet.name, PixelDCSObject< class >::Item.name, analyzer.Analyzer.name, DQMRivetClient::LumiOption.name, MagCylinder.name, cond::persistency::GTProxyData.name, ParSet.name, DQMRivetClient::ScaleFactorOption.name, SingleObjectCondition.name, EgHLTOfflineSummaryClient::SumHistBinData.name, DQMGenericClient::EfficOption.name, core.autovars.NTupleObjectType.name, XMLRBXPedestalsLoader::_loaderBaseConfig.name, XMLHTRZeroSuppressionLoader::_loaderBaseConfig.name, MyWatcher.name, edm::PathTimingSummary.name, cond::TimeTypeSpecs.name, lumi::TriggerInfo.name, edm::PathSummary.name, cond::persistency::GLOBAL_TAG_MAP::GLOBAL_TAG_NAME.name, PixelEndcapLinkMaker::Item.name, perftools::EdmEventSize::BranchRecord.name, RecAnalyzerMinbias.name, FWTableViewManager::TableEntry.name, cond::persistency::GLOBAL_TAG_MAP::RECORD.name, PixelBarrelLinkMaker::Item.name, EcalLogicID.name, Mapper::definition< ScannerT >.name, cond::persistency::GLOBAL_TAG_MAP::LABEL.name, cond::persistency::GLOBAL_TAG_MAP::TAG_NAME.name, ExpressionHisto< T >.name, McSelector.name, RecoSelector.name, DQMGenericClient::ProfileOption.name, cond::persistency::PAYLOAD::HASH.name, TreeCrawler.Package.name, XMLProcessor::_loaderBaseConfig.name, cond::persistency::PAYLOAD::OBJECT_TYPE.name, cond::persistency::PAYLOAD::DATA.name, cond::persistency::PAYLOAD::STREAMER_INFO.name, cond::persistency::PAYLOAD::VERSION.name, options.ConnectionHLTMenu.name, MagGeoBuilderFromDDD::volumeHandle.name, cond::persistency::PAYLOAD::INSERTION_TIME.name, DQMGenericClient::NormOption.name, DQMGenericClient::CDOption.name, cond::TagInfo_t.name, h4DSegm.name, PhysicsTools::Calibration::Variable.name, looper.Looper.name, EDMtoMEConverter.name, MEtoEDM< T >::MEtoEDMObject.name, options.HLTProcessOptions.name, cond::persistency::IOV::TAG_NAME.name, cond::persistency::IOV::SINCE.name, cond::persistency::IOV::PAYLOAD_HASH.name, cond::persistency::IOV::INSERTION_TIME.name, MuonGeometrySanityCheckPoint.name, config.Analyzer.name, config.Service.name, core.autovars.NTupleSubObject.name, core.autovars.NTupleObject.name, h2DSegm.name, DQMNet::WaitObject.name, AlpgenParameterName.name, SiStripMonitorDigi.name, core.autovars.NTupleCollection.name, cond::persistency::TAG_MIGRATION::SOURCE_ACCOUNT.name, cond::persistency::TAG_MIGRATION::SOURCE_TAG.name, cond::persistency::TAG_MIGRATION::TAG_NAME.name, cond::persistency::TAG_MIGRATION::STATUS_CODE.name, cond::persistency::TAG_MIGRATION::INSERTION_TIME.name, FastTimerService::LuminosityDescription.name, cond::persistency::PAYLOAD_MIGRATION::SOURCE_ACCOUNT.name, cond::persistency::PAYLOAD_MIGRATION::SOURCE_TOKEN.name, cond::persistency::PAYLOAD_MIGRATION::PAYLOAD_HASH.name, cond::persistency::PAYLOAD_MIGRATION::INSERTION_TIME.name, conddblib.Tag.name, conddblib.GlobalTag.name, plotscripts.SawTeethFunction.name, FastTimerService::ProcessDescription.name, hTMaxCell.name, cscdqm::ParHistoDef.name, BeautifulSoup.Tag.name, TiXmlAttribute.name, and BeautifulSoup.SoupStrainer.name.

322  def printInfo(self):
323  print 'sample : ' + self.name
324  print 'LFN : ' + self.lfnDir
325  print 'Castor path : ' + self.castorDir
def dataset.Dataset.runList (   self)

Definition at line 775 of file dataset.py.

References dataset.Dataset.__getRunList(), and dataset.Dataset.__runList.

776  def runList( self ):
777  if self.__runList:
778  return self.__runList
779  return self.__getRunList()
780 
def __getRunList
Definition: dataset.py:526

Member Data Documentation

dataset.Dataset.__alreadyStored
private

Definition at line 23 of file dataset.py.

Referenced by dataset.Dataset.dump_cff().

dataset.Dataset.__cmssw
private

Definition at line 24 of file dataset.py.

Referenced by dataset.Dataset.dump_cff().

dataset.Dataset.__cmsswrelease
private

Definition at line 25 of file dataset.py.

Referenced by dataset.Dataset.__getMagneticField().

dataset.Dataset.__dasLimit
private

Definition at line 19 of file dataset.py.

Referenced by dataset.Dataset.fileInfoList().

dataset.Dataset.__dataType
private

Definition at line 76 of file dataset.py.

Referenced by dataset.Dataset.__getMagneticField(), dataset.Dataset.dataType(), and dataset.Dataset.dump_cff().

tuple dataset.Dataset.__dummy_source_template
staticprivate
Initial value:
1 = ("readFiles = cms.untracked.vstring()\n"
2  "secFiles = cms.untracked.vstring()\n"
3  "%(process)ssource = cms.Source(\"PoolSource\",\n"
4  "%(tab)s secondaryFileNames ="
5  "secFiles,\n"
6  "%(tab)s fileNames = readFiles\n"
7  ")\n"
8  "readFiles.extend(['dummy_File.root'])\n"
9  "%(process)smaxEvents = cms.untracked.PSet( "
10  "input = cms.untracked.int32(%(nEvents)s) )\n"
11  "%(skipEventsString)s\n")

Definition at line 103 of file dataset.py.

dataset.Dataset.__fileInfoList
private

Definition at line 21 of file dataset.py.

Referenced by dataset.Dataset.__getFileInfoList().

dataset.Dataset.__fileList
private

Definition at line 20 of file dataset.py.

Referenced by dataset.Dataset.fileList().

dataset.Dataset.__filename
private

Definition at line 53 of file dataset.py.

Referenced by dataset.Dataset.__getDataType(), dataset.Dataset.__getMagneticField(), dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.datasetSnippet(), csvReporter.csvReporter.writeRow(), and csvReporter.csvReporter.writeRows().

dataset.Dataset.__firstusedrun
private

Definition at line 26 of file dataset.py.

Referenced by dataset.Dataset.__getMagneticFieldForRun().

dataset.Dataset.__firstUsedRun
private

Definition at line 192 of file dataset.py.

Referenced by dataset.Dataset.forcerunrange().

dataset.Dataset.__lastusedrun
private

Definition at line 27 of file dataset.py.

Referenced by dataset.Dataset.__getMagneticFieldForRun().

dataset.Dataset.__lastUsedRun
private

Definition at line 193 of file dataset.py.

Referenced by dataset.Dataset.forcerunrange().

dataset.Dataset.__magneticField
private

Definition at line 77 of file dataset.py.

Referenced by dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.dump_cff(), and dataset.Dataset.magneticField().

dataset.Dataset.__name
private

Definition at line 17 of file dataset.py.

Referenced by dataset.Dataset.__getDataType(), dataset.Dataset.__getFileInfoList(), dataset.Dataset.__getMagneticField(), dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.__getParentDataset(), dataset.Dataset.__getRunList(), dataset.Dataset.convertTimeToRun(), dataset.Dataset.datasetSnippet(), dataset.Dataset.dump_cff(), Config.Process.dumpConfig(), Config.Process.dumpPython(), dataset.Dataset.name(), and Config.Process.name_().

dataset.Dataset.__official
private

Definition at line 34 of file dataset.py.

Referenced by dataset.Dataset.datasetSnippet().

dataset.Dataset.__origName
private

Definition at line 18 of file dataset.py.

Referenced by dataset.Dataset.datasetSnippet().

dataset.Dataset.__parentDataset
private

Definition at line 28 of file dataset.py.

Referenced by dataset.Dataset.parentDataset().

dataset.Dataset.__parentFileInfoList
private

Definition at line 30 of file dataset.py.

Referenced by dataset.Dataset.__getFileInfoList().

dataset.Dataset.__parentFileList
private

Definition at line 29 of file dataset.py.

Referenced by dataset.Dataset.fileList().

dataset.Dataset.__predefined
private

Definition at line 50 of file dataset.py.

Referenced by dataset.Dataset.__getDataType(), dataset.Dataset.__getFileInfoList(), dataset.Dataset.__getMagneticField(), dataset.Dataset.__getMagneticFieldForRun(), dataset.Dataset.datasetSnippet(), and dataset.Dataset.predefined().

dataset.Dataset.__runList
private

Definition at line 22 of file dataset.py.

Referenced by dataset.Dataset.__getRunList(), and dataset.Dataset.runList().

dataset.Dataset.bad_files

Definition at line 283 of file dataset.py.

dataset.Dataset.castorDir

Definition at line 267 of file dataset.py.

Referenced by dataset.Dataset.extractFileSizes(), and dataset.Dataset.printInfo().

dataset.Dataset.files

Definition at line 274 of file dataset.py.

dataset.Dataset.filesAndSizes

Definition at line 311 of file dataset.py.

dataset.Dataset.good_files

Definition at line 284 of file dataset.py.

dataset.Dataset.lfnDir

Definition at line 266 of file dataset.py.

Referenced by dataset.Dataset.printInfo().

dataset.Dataset.maskExists

Definition at line 268 of file dataset.py.

dataset.Dataset.report

Definition at line 269 of file dataset.py.

Referenced by addOnTests.testit.run().