CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
List of all members | Public Member Functions | Private Member Functions | Private Attributes | Static Private Attributes
dataset.Dataset Class Reference

Public Member Functions

def __init__
 
def convertTimeToRun
 
def datasetSnippet
 
def dataType
 
def dump_cff
 
def fileInfoList
 
def fileList
 
def name
 
def predefined
 
def runList
 

Private Member Functions

def __chunks
 
def __createSnippet
 
def __find_ge
 
def __find_lt
 
def __getData
 
def __getDataType
 
def __getFileInfoList
 
def __getRunList
 

Private Attributes

 __dasLimit
 
 __dataType
 
 __fileInfoList
 
 __fileList
 
 __name
 
 __predefined
 
 __runList
 

Static Private Attributes

tuple __dummy_source_template
 

Detailed Description

Definition at line 13 of file dataset.py.

Constructor & Destructor Documentation

def dataset.Dataset.__init__ (   self,
  datasetName,
  dasLimit = 0 
)

Definition at line 14 of file dataset.py.

14 
15  def __init__( self, datasetName, dasLimit = 0 ):
16  self.__name = datasetName
17  # check, if dataset name matches CMS dataset naming scheme
18  if re.match( r'/.+/.+/.+', self.__name ):
19  self.__dataType = self.__getDataType()
20  self.__predefined = False
21  else:
22  fileName = self.__name + "_cff.py"
23  searchPath1 = os.path.join( os.environ["CMSSW_BASE"], "python",
24  "Alignment", "OfflineValidation",
25  fileName )
26  searchPath2 = os.path.join( os.environ["CMSSW_BASE"], "src",
27  "Alignment", "OfflineValidation",
28  "python", fileName )
29  searchPath3 = os.path.join( os.environ["CMSSW_RELEASE_BASE"],
30  "python", "Alignment",
31  "OfflineValidation", fileName )
32  if os.path.exists( searchPath1 ):
33  pass
34  elif os.path.exists( searchPath2 ):
35  msg = ("The predefined dataset '%s' does exist in '%s', but "
36  "you need to run 'scram b' first."
37  %( self.__name, searchPath2 ))
38  raise AllInOneError( msg )
39  elif os.path.exists( searchPath3 ):
40  pass
41  else:
42  msg = ("The predefined dataset '%s' does not exist. Please "
43  "create it first or check for typos."%( self.__name ))
44  raise AllInOneError( msg )
45  self.__dataType = "unknown"
46  self.__predefined = True
47  self.__dasLimit = dasLimit
48  self.__fileList = None
49  self.__fileInfoList = None
50  self.__runList = None
def __getDataType
Definition: dataset.py:185

Member Function Documentation

def dataset.Dataset.__chunks (   self,
  theList,
  n 
)
private
Yield successive n-sized chunks from theList.

Definition at line 51 of file dataset.py.

References dataset.Dataset.__createSnippet().

Referenced by dataset.Dataset.__createSnippet().

51 
52  def __chunks( self, theList, n ):
53  """ Yield successive n-sized chunks from theList.
54  """
55  for i in xrange( 0, len( theList ), n ):
56  yield theList[i:i+n]
def dataset.Dataset.__createSnippet (   self,
  jsonPath = None,
  begin = None,
  end = None,
  firstRun = None,
  lastRun = None,
  repMap = None,
  crab = False 
)
private

Definition at line 59 of file dataset.py.

References dataset.Dataset.__chunks(), dataset.Dataset.__dummy_source_template, dataset.Dataset.__getRunList(), dataset.Dataset.convertTimeToRun(), dataset.Dataset.fileList(), join(), list(), and split.

Referenced by dataset.Dataset.__chunks(), dataset.Dataset.datasetSnippet(), and dataset.Dataset.dump_cff().

59 
60  crab = False ):
61  if firstRun:
62  firstRun = int( firstRun )
63  if lastRun:
64  lastRun = int( lastRun )
65  if ( begin and firstRun ) or ( end and lastRun ):
66  msg = ( "The Usage of "
67  + "'begin' & 'firstRun' " * int( bool( begin and
68  firstRun ) )
69  + "and " * int( bool( ( begin and firstRun ) and
70  ( end and lastRun ) ) )
71  + "'end' & 'lastRun' " * int( bool( end and lastRun ) )
72  + "is ambigous." )
73  raise AllInOneError( msg )
74  if begin or end:
75  ( firstRun, lastRun ) = self.convertTimeToRun(
76  begin = begin, end = end, firstRun = firstRun,
77  lastRun = lastRun )
78  if ( firstRun and lastRun ) and ( firstRun > lastRun ):
79  msg = ( "The lower time/runrange limit ('begin'/'firstRun') "
80  "chosen is greater than the upper time/runrange limit "
81  "('end'/'lastRun').")
82  raise AllInOneError( msg )
83  goodLumiSecStr = ""
84  lumiStr = ""
85  lumiSecExtend = ""
86  if firstRun or lastRun:
87  goodLumiSecStr = ( "lumiSecs = cms.untracked."
88  "VLuminosityBlockRange()\n" )
89  lumiStr = " lumisToProcess = lumiSecs,\n"
90  if not jsonPath:
91  selectedRunList = self.__getRunList()
92  if firstRun:
93  selectedRunList = [ run for run in selectedRunList \
94  if run["run_number"] >= firstRun ]
95  if lastRun:
96  selectedRunList = [ run for run in selectedRunList \
97  if run["run_number"] <= lastRun ]
98  lumiList = [ str( run["run_number"] ) + ":1-" \
99  + str( run["run_number"] ) + ":max" \
100  for run in selectedRunList ]
101  splitLumiList = list( self.__chunks( lumiList, 255 ) )
102  else:
103  theLumiList = LumiList ( filename = jsonPath )
104  allRuns = theLumiList.getRuns()
105  runsToRemove = []
106  for run in allRuns:
107  if firstRun and int( run ) < firstRun:
108  runsToRemove.append( run )
109  if lastRun and int( run ) > lastRun:
110  runsToRemove.append( run )
111  theLumiList.removeRuns( runsToRemove )
112  splitLumiList = list( self.__chunks(
113  theLumiList.getCMSSWString().split(','), 255 ) )
114  if not len(splitLumiList[0][0]) == 0:
115  lumiSecStr = [ "',\n'".join( lumis ) \
116  for lumis in splitLumiList ]
117  lumiSecStr = [ "lumiSecs.extend( [\n'" + lumis + "'\n] )" \
118  for lumis in lumiSecStr ]
119  lumiSecExtend = "\n".join( lumiSecStr )
120  elif jsonPath:
121  goodLumiSecStr = ( "goodLumiSecs = LumiList.LumiList(filename"
122  "= '%(json)s').getCMSSWString().split(',')\n"
123  "lumiSecs = cms.untracked"
124  ".VLuminosityBlockRange()\n"
125  )
126  lumiStr = " lumisToProcess = lumiSecs,\n"
127  lumiSecExtend = "lumiSecs.extend(goodLumiSecs)\n"
128  if crab:
129  files = ""
130  else:
131  splitFileList = list( self.__chunks( self.fileList(), 255 ) )
132  fileStr = [ "',\n'".join( files ) for files in splitFileList ]
133  fileStr = [ "readFiles.extend( [\n'" + files + "'\n] )" \
134  for files in fileStr ]
135  files = "\n".join( fileStr )
136  theMap = repMap
137  theMap["files"] = files
138  theMap["json"] = jsonPath
139  theMap["lumiStr"] = lumiStr
140  theMap["goodLumiSecStr"] = goodLumiSecStr%( theMap )
141  theMap["lumiSecExtend"] = lumiSecExtend
142  if crab:
143  dataset_snippet = self.__dummy_source_template%( theMap )
144  else:
145  dataset_snippet = self.__source_template%( theMap )
146  return dataset_snippet
def convertTimeToRun
Definition: dataset.py:263
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
tuple __dummy_source_template
Definition: dataset.py:147
def __getRunList
Definition: dataset.py:232
double split
Definition: MVATrainer.cc:139
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run
def dataset.Dataset.__find_ge (   self,
  a,
  x 
)
private

Definition at line 165 of file dataset.py.

Referenced by dataset.Dataset.convertTimeToRun().

166  def __find_ge( self, a, x):
167  'Find leftmost item greater than or equal to x'
168  i = bisect.bisect_left( a, x )
169  if i != len( a ):
170  return i
171  raise ValueError
def dataset.Dataset.__find_lt (   self,
  a,
  x 
)
private

Definition at line 158 of file dataset.py.

Referenced by dataset.Dataset.convertTimeToRun().

159  def __find_lt( self, a, x ):
160  'Find rightmost value less than x'
161  i = bisect.bisect_left( a, x )
162  if i:
163  return i-1
164  raise ValueError
def dataset.Dataset.__getData (   self,
  dasQuery,
  dasLimit = 0 
)
private

Definition at line 172 of file dataset.py.

Referenced by dataset.Dataset.__getDataType(), dataset.Dataset.__getFileInfoList(), and dataset.Dataset.__getRunList().

173  def __getData( self, dasQuery, dasLimit = 0 ):
174  dasData = das_client.get_data( 'https://cmsweb.cern.ch',
175  dasQuery, 0, dasLimit, False )
176  if isinstance(dasData, str):
177  jsondict = json.loads( dasData )
178  else:
179  jsondict = dasData
180  # Check, if the DAS query fails
181  if jsondict["status"] != 'ok':
182  msg = "Status not 'ok', but:", jsondict["status"]
183  raise AllInOneError(msg)
184  return jsondict["data"]
def dataset.Dataset.__getDataType (   self)
private

Definition at line 185 of file dataset.py.

References dataset.Dataset.__getData(), dataset.Dataset.__name, entry.name, genericValidation.GenericValidation.name, TmModule.name, cond::persistency::GLOBAL_TAG::NAME.name, cond::persistency::TAG::NAME.name, cond::persistency::TAG::TIME_TYPE.name, listHistos.plotInfo.name, cond::persistency::GLOBAL_TAG::VALIDITY.name, cond::persistency::TAG::OBJECT_TYPE.name, cond::persistency::GLOBAL_TAG::DESCRIPTION.name, cond::persistency::TAG::SYNCHRONIZATION.name, EventMonitor::Entry.name, cond::persistency::GLOBAL_TAG::RELEASE.name, ora::RecordSpecImpl::Item.name, cond::persistency::TAG::END_OF_VALIDITY.name, cond::persistency::GLOBAL_TAG::SNAPSHOT_TIME.name, cond::persistency::TAG::DESCRIPTION.name, cond::persistency::GTEditorData.name, cond::persistency::GLOBAL_TAG::INSERTION_TIME.name, cond::persistency::TAG::LAST_VALIDATED_TIME.name, FWTGeoRecoGeometry::Info.name, Types._Untracked.name, alignment.Alignment.name, cond::persistency::TAG::INSERTION_TIME.name, cond::persistency::TAG::MODIFICATION_TIME.name, HistoDef.name(), ParameterSet.name, fit::RootMinuitCommand.name, CombinedMVAJetTagComputer::Computer.name, PixelDCSObject< class >::Item.name, DQMRivetClient::LumiOption.name, MagCylinder.name, PrintSensitive.name, PFTauMVAInputDiscriminantTranslator::DiscriminantInfo.name, cond::persistency::GTProxyData.name, ALIFileOut.name(), RHStopTracer::StopPoint.name, DQMRivetClient::ScaleFactorOption.name, runEdmFileComparison.EdmObject.name, SingleObjectCondition.name, EgHLTOfflineSummaryClient::SumHistBinData.name, DQMGenericClient::EfficOption.name, PhysicsTools::Source.name, XMLHTRZeroSuppressionLoader::_loaderBaseConfig.name, XMLRBXPedestalsLoader::_loaderBaseConfig.name, MyWatcher.name, edm::PathTimingSummary.name, dirstructure.Weighted.name, cond::TimeTypeSpecs.name, lumi::TriggerInfo.name, ALIFileIn.name(), edm::PathSummary.name, cond::persistency::GLOBAL_TAG_MAP::GLOBAL_TAG_NAME.name, PrintMaterialBudgetInfo.name, perftools::EdmEventSize::BranchRecord.name, PixelEndcapLinkMaker::Item.name, FWTableViewManager::TableEntry.name, cond::persistency::GLOBAL_TAG_MAP::RECORD.name, PixelBarrelLinkMaker::Item.name, options.ConnectionHLTMenu.name, EcalLogicID.name, cond::persistency::GLOBAL_TAG_MAP::LABEL.name, Mapper::definition< ScannerT >.name, cond::persistency::GLOBAL_TAG_MAP::TAG_NAME.name, ExpressionHisto< T >.name, LinuxElapsedTime.name, BPhysicsSpectrum.name, McSelector.name, SensitiveDetector.name, RecoSelector.name, python.rootplot.utilities.Hist2D.name, Entry.name(), XMLProcessor::_loaderBaseConfig.name, DQMGenericClient::NormOption.name, TreeCrawler.Package.name, cond::persistency::PAYLOAD::HASH.name, TrajectorySeedProducer::LayerSpec.name, cond::persistency::PAYLOAD::OBJECT_TYPE.name, CaloTrkProcessing::Detector.name, cond::persistency::PAYLOAD::DATA.name, PrintGeomInfoAction.name, cond::persistency::PAYLOAD::STREAMER_INFO.name, HcalForwardLibWriter::FileHandle.name, cond::persistency::PAYLOAD::VERSION.name, MagGeoBuilderFromDDD::volumeHandle.name, PrintGeomMatInfo.name, DQMGenericClient::CDOption.name, cond::persistency::PAYLOAD::INSERTION_TIME.name, OpticalObject.name(), PhysicsTools::Calibration::Variable.name, cond::TagInfo_t.name, h4DSegm.name, PhysicsTools::Variable::Value.name, options.HLTProcessOptions.name, EDMtoMEConverter.name, ProcTMVA::Method.name, TreeSaver::Var.name, BPhysicsValidation::ParticleMonitor.name, python.rootplot.tree2hists.Plot.name, MEtoEDM< T >::MEtoEDMObject.name, cond::persistency::IOV::TAG_NAME.name, cond::persistency::IOV::SINCE.name, cond::persistency::IOV::PAYLOAD_HASH.name, cond::persistency::IOV::INSERTION_TIME.name, PhysicsTools::TreeReader::Value.name, BPhysicsValidation.name, MuonGeometrySanityCheckPoint.name, PhysicsTools::TrainProcessor.name, Measurement.name(), TotemSD.name, PhysicsTools::MVAModuleHelper< Record, Object, Filler >::Value.name, HistoData.name, PhysicsTools::ProcessRegistry< Base_t, CalibBase_t, Parent_t >.name, utils.StatisticalTest.name, PhysicsTools::MVATrainer.name, h2DSegm.name, python.rootplot.utilities.Hist.name, BscSD.name, IntegratedCalibrationBase.name(), DQMNet::WaitObject.name, AlpgenParameterName.name, SiStripMonitorDigi.name, FP420SD.name, cond::persistency::TAG_MIGRATION::SOURCE_ACCOUNT.name, cond::persistency::TAG_MIGRATION::SOURCE_TAG.name, cond::persistency::TAG_MIGRATION::TAG_NAME.name, BasicHepMCValidation::ParticleMonitor.name, cond::persistency::TAG_MIGRATION::STATUS_CODE.name, cond::persistency::TAG_MIGRATION::INSERTION_TIME.name, public_plots_tools.ColorScheme.name, PhysicsTools::Variable.name, HRes1DHit.name, PhysicsTools::TrainerMonitoring::Object.name, FastTimerService::LuminosityDescription.name, conddblib.Tag.name, utils.KS.name, conddblib.GlobalTag.name, utils.Chi2.name, utils_v2.StatisticalTest.name, utils.BinToBin.name, HEff1DHit.name, plotscripts.SawTeethFunction.name, utils_v2.KolmogorovTest.name, dirstructure.Comparison.name, utils_v2.Chi2Test.name, @16316::Id.name, dqm_interfaces.DirID.name, utils.BinToBin1percent.name, FastTimerService::ProcessDescription.name, dataset.Dataset.name(), python.rootplot.utilities.RootFile.name, hTMaxCell.name, HRes2DHit.name, cscdqm::ParHistoDef.name, dqm_interfaces.DirWalkerFile.name, BeautifulSoup.Tag.name, @16312::Id.name, HEff2DHit.name, TiXmlAttribute.name, BeautifulSoup.SoupStrainer.name, HRes4DHit.name, and HEff4DHit.name.

186  def __getDataType( self ):
187  dasQuery_type = ( 'dataset dataset=%s | grep dataset.datatype,'
188  'dataset.name'%( self.__name ) )
189  data = self.__getData( dasQuery_type )
190  for a in data[0]["dataset"]:
191  if "datatype" in a:
192  return a["datatype"]
193  msg = ("Cannot find the datatype of the dataset '%s'"%( self.name() ))
194  raise AllInOneError( msg )
def __getDataType
Definition: dataset.py:185
def dataset.Dataset.__getFileInfoList (   self,
  dasLimit 
)
private

Definition at line 195 of file dataset.py.

References dataset.Dataset.__fileInfoList, dataset.Dataset.__getData(), dataset.Dataset.__name, entry.name, genericValidation.GenericValidation.name, cond::persistency::TAG::NAME.name, TmModule.name, cond::persistency::GLOBAL_TAG::NAME.name, cond::persistency::TAG::TIME_TYPE.name, listHistos.plotInfo.name, cond::persistency::GLOBAL_TAG::VALIDITY.name, cond::persistency::TAG::OBJECT_TYPE.name, cond::persistency::GLOBAL_TAG::DESCRIPTION.name, EventMonitor::Entry.name, cond::persistency::TAG::SYNCHRONIZATION.name, cond::persistency::GLOBAL_TAG::RELEASE.name, ora::RecordSpecImpl::Item.name, cond::persistency::TAG::END_OF_VALIDITY.name, cond::persistency::GLOBAL_TAG::SNAPSHOT_TIME.name, cond::persistency::TAG::DESCRIPTION.name, cond::persistency::GTEditorData.name, cond::persistency::GLOBAL_TAG::INSERTION_TIME.name, cond::persistency::TAG::LAST_VALIDATED_TIME.name, FWTGeoRecoGeometry::Info.name, Types._Untracked.name, alignment.Alignment.name, cond::persistency::TAG::INSERTION_TIME.name, cond::persistency::TAG::MODIFICATION_TIME.name, HistoDef.name(), ParameterSet.name, fit::RootMinuitCommand.name, CombinedMVAJetTagComputer::Computer.name, PixelDCSObject< class >::Item.name, DQMRivetClient::LumiOption.name, MagCylinder.name, PrintSensitive.name, PFTauMVAInputDiscriminantTranslator::DiscriminantInfo.name, cond::persistency::GTProxyData.name, ALIFileOut.name(), RHStopTracer::StopPoint.name, runEdmFileComparison.EdmObject.name, DQMRivetClient::ScaleFactorOption.name, SingleObjectCondition.name, EgHLTOfflineSummaryClient::SumHistBinData.name, XMLRBXPedestalsLoader::_loaderBaseConfig.name, DQMGenericClient::EfficOption.name, PhysicsTools::Source.name, XMLHTRZeroSuppressionLoader::_loaderBaseConfig.name, MyWatcher.name, edm::PathTimingSummary.name, dirstructure.Weighted.name, cond::TimeTypeSpecs.name, lumi::TriggerInfo.name, ALIFileIn.name(), PrintMaterialBudgetInfo.name, edm::PathSummary.name, cond::persistency::GLOBAL_TAG_MAP::GLOBAL_TAG_NAME.name, perftools::EdmEventSize::BranchRecord.name, PixelEndcapLinkMaker::Item.name, FWTableViewManager::TableEntry.name, cond::persistency::GLOBAL_TAG_MAP::RECORD.name, PixelBarrelLinkMaker::Item.name, Mapper::definition< ScannerT >.name, options.ConnectionHLTMenu.name, EcalLogicID.name, cond::persistency::GLOBAL_TAG_MAP::LABEL.name, cond::persistency::GLOBAL_TAG_MAP::TAG_NAME.name, LinuxElapsedTime.name, ExpressionHisto< T >.name, BPhysicsSpectrum.name, McSelector.name, SensitiveDetector.name, RecoSelector.name, python.rootplot.utilities.Hist2D.name, XMLProcessor::_loaderBaseConfig.name, DQMGenericClient::NormOption.name, Entry.name(), cond::persistency::PAYLOAD::HASH.name, TreeCrawler.Package.name, cond::persistency::PAYLOAD::OBJECT_TYPE.name, TrajectorySeedProducer::LayerSpec.name, CaloTrkProcessing::Detector.name, cond::persistency::PAYLOAD::DATA.name, cond::persistency::PAYLOAD::STREAMER_INFO.name, HcalForwardLibWriter::FileHandle.name, PrintGeomInfoAction.name, cond::persistency::PAYLOAD::VERSION.name, MagGeoBuilderFromDDD::volumeHandle.name, DQMGenericClient::CDOption.name, PrintGeomMatInfo.name, cond::persistency::PAYLOAD::INSERTION_TIME.name, OpticalObject.name(), PhysicsTools::Calibration::Variable.name, h4DSegm.name, cond::TagInfo_t.name, PhysicsTools::Variable::Value.name, options.HLTProcessOptions.name, EDMtoMEConverter.name, ProcTMVA::Method.name, TreeSaver::Var.name, BPhysicsValidation::ParticleMonitor.name, python.rootplot.tree2hists.Plot.name, MEtoEDM< T >::MEtoEDMObject.name, cond::persistency::IOV::TAG_NAME.name, cond::persistency::IOV::SINCE.name, cond::persistency::IOV::PAYLOAD_HASH.name, cond::persistency::IOV::INSERTION_TIME.name, PhysicsTools::TreeReader::Value.name, BPhysicsValidation.name, MuonGeometrySanityCheckPoint.name, PhysicsTools::TrainProcessor.name, Measurement.name(), TotemSD.name, PhysicsTools::MVAModuleHelper< Record, Object, Filler >::Value.name, HistoData.name, PhysicsTools::ProcessRegistry< Base_t, CalibBase_t, Parent_t >.name, utils.StatisticalTest.name, PhysicsTools::MVATrainer.name, h2DSegm.name, python.rootplot.utilities.Hist.name, BscSD.name, IntegratedCalibrationBase.name(), DQMNet::WaitObject.name, AlpgenParameterName.name, SiStripMonitorDigi.name, FP420SD.name, cond::persistency::TAG_MIGRATION::SOURCE_ACCOUNT.name, cond::persistency::TAG_MIGRATION::SOURCE_TAG.name, cond::persistency::TAG_MIGRATION::TAG_NAME.name, BasicHepMCValidation::ParticleMonitor.name, cond::persistency::TAG_MIGRATION::STATUS_CODE.name, cond::persistency::TAG_MIGRATION::INSERTION_TIME.name, public_plots_tools.ColorScheme.name, PhysicsTools::Variable.name, HRes1DHit.name, PhysicsTools::TrainerMonitoring::Object.name, FastTimerService::LuminosityDescription.name, conddblib.Tag.name, utils.KS.name, conddblib.GlobalTag.name, utils.Chi2.name, utils_v2.StatisticalTest.name, utils.BinToBin.name, HEff1DHit.name, utils_v2.KolmogorovTest.name, plotscripts.SawTeethFunction.name, dirstructure.Comparison.name, utils_v2.Chi2Test.name, @16316::Id.name, dqm_interfaces.DirID.name, utils.BinToBin1percent.name, FastTimerService::ProcessDescription.name, dataset.Dataset.name(), python.rootplot.utilities.RootFile.name, hTMaxCell.name, HRes2DHit.name, cscdqm::ParHistoDef.name, dqm_interfaces.DirWalkerFile.name, BeautifulSoup.Tag.name, @16312::Id.name, HEff2DHit.name, TiXmlAttribute.name, BeautifulSoup.SoupStrainer.name, HRes4DHit.name, and HEff4DHit.name.

Referenced by dataset.Dataset.fileInfoList().

196  def __getFileInfoList( self, dasLimit ):
197  if self.__fileInfoList:
198  return self.__fileInfoList
199  dasQuery_files = ( 'file dataset=%s | grep file.name, file.nevents, '
200  'file.creation_time, '
201  'file.modification_time'%( self.__name ) )
202  print "Requesting file information for '%s' from DAS..."%( self.__name ),
203  data = self.__getData( dasQuery_files, dasLimit )
204  print "Done."
205  data = [ entry["file"] for entry in data ]
206  if len( data ) == 0:
207  msg = ("No files are available for the dataset '%s'. This can be "
208  "due to a typo or due to a DAS problem. Please check the "
209  "spelling of the dataset and/or retry to run "
210  "'validateAlignments.py'."%( self.name() ))
211  raise AllInOneError( msg )
212  fileInformationList = []
213  for file in data:
214  fileName = file[0]["name"]
215  fileCreationTime = file[0]["creation_time"]
216  for ii in range(3):
217  try:
218  fileNEvents = file[ii]["nevents"]
219  except KeyError:
220  continue
221  break
222  # select only non-empty files
223  if fileNEvents == 0:
224  continue
225  fileDict = { "name": fileName,
226  "creation_time": fileCreationTime,
227  "nevents": fileNEvents
228  }
229  fileInformationList.append( fileDict )
230  fileInformationList.sort( key=lambda info: info["name"] )
231  return fileInformationList
def __getFileInfoList
Definition: dataset.py:195
def dataset.Dataset.__getRunList (   self)
private

Definition at line 232 of file dataset.py.

References dataset.Dataset.__getData(), dataset.Dataset.__name, dataset.Dataset.__runList, and dataset.Dataset.convertTimeToRun().

Referenced by dataset.Dataset.__createSnippet(), dataset.Dataset.convertTimeToRun(), and dataset.Dataset.runList().

233  def __getRunList( self ):
234  if self.__runList:
235  return self.__runList
236  dasQuery_runs = ( 'run dataset=%s | grep run.run_number,'
237  'run.creation_time'%( self.__name ) )
238  print "Requesting run information for '%s' from DAS..."%( self.__name ),
239  data = self.__getData( dasQuery_runs )
240  print "Done."
241  data = [ entry["run"][0] for entry in data ]
242  data.sort( key = lambda run: run["creation_time"] )
243  self.__runList = data
244  return data
def __getRunList
Definition: dataset.py:232
def dataset.Dataset.convertTimeToRun (   self,
  begin = None,
  end = None,
  firstRun = None,
  lastRun = None,
  shortTuple = True 
)

Definition at line 263 of file dataset.py.

References dataset.Dataset.__find_ge(), dataset.Dataset.__find_lt(), dataset.Dataset.__getRunList(), and dataset.Dataset.__name.

Referenced by dataset.Dataset.__createSnippet(), and dataset.Dataset.__getRunList().

264  shortTuple = True ):
265  if ( begin and firstRun ) or ( end and lastRun ):
266  msg = ( "The Usage of "
267  + "'begin' & 'firstRun' " * int( bool( begin and
268  firstRun ) )
269  + "and " * int( bool( ( begin and firstRun ) and
270  ( end and lastRun ) ) )
271  + "'end' & 'lastRun' " * int( bool( end and lastRun ) )
272  + "is ambigous." )
273  raise AllInOneError( msg )
274 
275  runList = [ run["run_number"] for run in self.__getRunList() ]
276  runTimeList = [ run["creation_time"] for run in self.__getRunList() ]
277  if begin:
278  try:
279  runIndex = self.__find_ge( runTimeList, begin )
280  except ValueError:
281  msg = ( "Your 'begin' is after the creation time of the last "
282  "run in the dataset\n'%s'"%( self.__name ) )
283  raise AllInOneError( msg )
284  firstRun = runList[runIndex]
285  begin = None
286  if end:
287  try:
288  runIndex = self.__find_lt( runTimeList, end )
289  except ValueError:
290  msg = ( "Your 'end' is before the creation time of the first "
291  "run in the dataset\n'%s'"%( self.__name ) )
292  raise AllInOneError( msg )
293  lastRun = runList[runIndex]
294  end = None
295  if shortTuple:
296  return firstRun, lastRun
297  else:
298  return begin, end, firstRun, lastRun
def __getRunList
Definition: dataset.py:232
def dataset.Dataset.datasetSnippet (   self,
  jsonPath = None,
  begin = None,
  end = None,
  firstRun = None,
  lastRun = None,
  nEvents = None,
  crab = False 
)

Definition at line 304 of file dataset.py.

References dataset.Dataset.__createSnippet(), dataset.Dataset.__name, dataset.Dataset.__predefined, and dataset.Dataset.dump_cff().

Referenced by dataset.Dataset.dataType().

305  crab = False ):
306  if self.__predefined:
307  return ("process.load(\"Alignment.OfflineValidation.%s_cff\")\n"
308  "process.maxEvents = cms.untracked.PSet(\n"
309  " input = cms.untracked.int32(%s)\n"
310  ")"
311  %( self.__name, nEvents ))
312  theMap = { "process": "process.",
313  "tab": " " * len( "process." ),
314  "nEvents": str( nEvents ),
315  "importCms": ""
316  }
317  datasetSnippet = self.__createSnippet( jsonPath = jsonPath,
318  begin = begin,
319  end = end,
320  firstRun = firstRun,
321  lastRun = lastRun,
322  repMap = theMap,
323  crab = crab )
324  return datasetSnippet
def __createSnippet
Definition: dataset.py:59
def dataset.Dataset.dataType (   self)

Definition at line 299 of file dataset.py.

References dataset.Dataset.__dataType, and dataset.Dataset.datasetSnippet().

300  def dataType( self ):
301  return self.__dataType
def dataset.Dataset.dump_cff (   self,
  outName = None,
  jsonPath = None,
  begin = None,
  end = None,
  firstRun = None,
  lastRun = None 
)

Definition at line 326 of file dataset.py.

References dataset.Dataset.__createSnippet().

Referenced by dataset.Dataset.datasetSnippet().

327  end = None, firstRun = None, lastRun = None ):
328  if outName == None:
329  outName = "Dataset"
330  packageName = os.path.join( "Alignment", "OfflineValidation" )
331  if not os.path.exists( os.path.join(
332  os.environ["CMSSW_BASE"], "src", packageName ) ):
333  msg = ("You try to store the predefined dataset'%s'.\n"
334  "For that you need to check out the package '%s' to your "
335  "private relase area in\n"%( outName, packageName )
336  + os.environ["CMSSW_BASE"] )
337  raise AllInOneError( msg )
338  theMap = { "process": "",
339  "tab": "",
340  "nEvents": str( -1 ),
341  "importCms": "import FWCore.ParameterSet.Config as cms\n" }
342  dataset_cff = self.__createSnippet( jsonPath = jsonPath,
343  begin = begin,
344  end = end,
345  firstRun = firstRun,
346  lastRun = lastRun,
347  repMap = theMap)
348  filePath = os.path.join( os.environ["CMSSW_BASE"], "src", packageName,
349  "python", outName + "_cff.py" )
350  if os.path.exists( filePath ):
351  existMsg = "The predefined dataset '%s' already exists.\n"%( outName )
352  askString = "Do you want to overwrite it? [y/n]\n"
353  inputQuery = existMsg + askString
354  while True:
355  userInput = raw_input( inputQuery ).lower()
356  if userInput == "y":
357  break
358  elif userInput == "n":
359  return
360  else:
361  inputQuery = askString
362  print ( "The predefined dataset '%s' will be stored in the file\n"
363  %( outName )
364  + filePath +
365  "\nFor future use you have to do 'scram b'." )
366  print
367  theFile = open( filePath, "w" )
368  theFile.write( dataset_cff )
369  theFile.close()
370  return
def __createSnippet
Definition: dataset.py:59
def dataset.Dataset.fileInfoList (   self)

Definition at line 379 of file dataset.py.

References dataset.Dataset.__dasLimit, and dataset.Dataset.__getFileInfoList().

Referenced by dataset.Dataset.fileList().

380  def fileInfoList( self ):
381  return self.__getFileInfoList( self.__dasLimit )
def __getFileInfoList
Definition: dataset.py:195
def fileInfoList
Definition: dataset.py:379
def dataset.Dataset.fileList (   self)

Definition at line 371 of file dataset.py.

References dataset.Dataset.__fileList, and dataset.Dataset.fileInfoList().

Referenced by dataset.Dataset.__createSnippet().

372  def fileList( self ):
373  if self.__fileList:
374  return self.__fileList
375  fileList = [ fileInfo["name"] \
376  for fileInfo in self.fileInfoList() ]
377  self.__fileList = fileList
378  return fileList
def fileInfoList
Definition: dataset.py:379
def dataset.Dataset.name (   self)

Definition at line 382 of file dataset.py.

References dataset.Dataset.__name.

Referenced by dataset.Dataset.__getDataType(), dataset.Dataset.__getFileInfoList(), cuy.divideElement.__init__(), cuy.plotElement.__init__(), cuy.additionElement.__init__(), cuy.superimposeElement.__init__(), cuy.graphElement.__init__(), and Vispa.Views.PropertyView.Property.valueChanged().

383  def name( self ):
384  return self.__name
def dataset.Dataset.predefined (   self)

Definition at line 385 of file dataset.py.

References dataset.Dataset.__predefined.

386  def predefined( self ):
387  return self.__predefined
def dataset.Dataset.runList (   self)

Definition at line 388 of file dataset.py.

References dataset.Dataset.__getRunList(), and dataset.Dataset.__runList.

389  def runList( self ):
390  if self.__runList:
391  return self.__runList
392  return self.__getRunList()
393 
def __getRunList
Definition: dataset.py:232

Member Data Documentation

dataset.Dataset.__dasLimit
private

Definition at line 46 of file dataset.py.

Referenced by dataset.Dataset.fileInfoList().

dataset.Dataset.__dataType
private

Definition at line 18 of file dataset.py.

Referenced by dataset.Dataset.dataType().

tuple dataset.Dataset.__dummy_source_template
staticprivate
Initial value:
1 = ("%(process)smaxEvents = cms.untracked.PSet( "
2  "input = cms.untracked.int32(%(nEvents)s) )\n"
3  "readFiles = cms.untracked.vstring()\n"
4  "secFiles = cms.untracked.vstring()\n"
5  "%(process)ssource = cms.Source(\"PoolSource\",\n"
6  "%(tab)s secondaryFileNames ="
7  "secFiles,\n"
8  "%(tab)s fileNames = readFiles\n"
9  ")\n"
10  "readFiles.extend(['dummy_File.root'])\n")

Definition at line 147 of file dataset.py.

Referenced by dataset.Dataset.__createSnippet().

dataset.Dataset.__fileInfoList
private

Definition at line 48 of file dataset.py.

Referenced by dataset.Dataset.__getFileInfoList().

dataset.Dataset.__fileList
private

Definition at line 47 of file dataset.py.

Referenced by dataset.Dataset.fileList().

dataset.Dataset.__name
private

Definition at line 15 of file dataset.py.

Referenced by dataset.Dataset.__getDataType(), dataset.Dataset.__getFileInfoList(), dataset.Dataset.__getRunList(), dataset.Dataset.convertTimeToRun(), dataset.Dataset.datasetSnippet(), Config.Process.dumpConfig(), Config.Process.dumpPython(), dataset.Dataset.name(), and Config.Process.name_().

dataset.Dataset.__predefined
private

Definition at line 19 of file dataset.py.

Referenced by dataset.Dataset.datasetSnippet(), and dataset.Dataset.predefined().

dataset.Dataset.__runList
private

Definition at line 49 of file dataset.py.

Referenced by dataset.Dataset.__getRunList(), and dataset.Dataset.runList().