CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
List of all members | Public Member Functions | Static Public Member Functions | Public Attributes
dataset.CMSDataset Class Reference
Inheritance diagram for dataset.CMSDataset:
dataset.BaseDataset

Public Member Functions

def __init__
 
def buildListOfFiles
 
def buildListOfFilesDBS
 
def getPrimaryDatasetEntries
 
- Public Member Functions inherited from dataset.BaseDataset
def __init__
 def init(self, name, user, pattern='. More...
 
def buildListOfBadFiles
 
def buildListOfFiles
 
def extractFileSizes
 
def getPrimaryDatasetEntries
 
def listOfFiles
 
def listOfGoodFiles
 
def listOfGoodFilesWithPrescale
 
def printFiles
 
def printInfo
 

Static Public Member Functions

def findPrimaryDatasetEntries
 
def findPrimaryDatasetNumFiles
 

Public Attributes

 files
 
- Public Attributes inherited from dataset.BaseDataset
 bad_files
 
 dbsInstance
 MM. More...
 
 files
 
 filesAndSizes
 
 good_files
 
 name
 
 pattern
 
 primaryDatasetEntries
 MM. More...
 
 report
 
 run_range
 
 user
 

Detailed Description

Definition at line 129 of file dataset.py.

Constructor & Destructor Documentation

def dataset.CMSDataset.__init__ (   self,
  name,
  run_range = None 
)

Definition at line 131 of file dataset.py.

132  def __init__(self, name, run_range = None):
133  super(CMSDataset, self).__init__( name, 'CMS', run_range=run_range)

Member Function Documentation

def dataset.CMSDataset.buildListOfFiles (   self,
  pattern = '.*root' 
)

Definition at line 163 of file dataset.py.

References dataset.CMSDataset.findPrimaryDatasetNumFiles(), and dataset.BaseDataset.run_range.

164  def buildListOfFiles(self, pattern='.*root'):
165  runs = (-1,-1)
166  if self.run_range is not None:
167  runs = self.run_range
168  num_files=self.findPrimaryDatasetNumFiles(self.name.rstrip('/'),
169  runs[0],runs[1])
170  limit = 10000
171  if num_files > limit:
172  num_steps = int(num_files/limit)+1
173  self.files = []
174  for i in range(num_steps):
175  DBSFiles=self.buildListOfFilesDBS(pattern,
176  i*limit,
177  ((i+1)*limit)-1)
178  self.files.extend(DBSFiles)
179  else:
180  self.files = self.buildListOfFilesDBS(pattern)
const uint16_t range(const Frame &aFrame)
def buildListOfFilesDBS
Definition: dataset.py:134
def findPrimaryDatasetNumFiles
Definition: dataset.py:205
def dataset.CMSDataset.buildListOfFilesDBS (   self,
  pattern,
  begin = -1,
  end = -1 
)

Definition at line 134 of file dataset.py.

References print(), and dataset.BaseDataset.run_range.

135  def buildListOfFilesDBS(self, pattern, begin=-1, end=-1):
136  print('buildListOfFilesDBS',begin,end)
137  sampleName = self.name.rstrip('/')
138  query, qwhat = sampleName, "dataset"
139  if "#" in sampleName: qwhat = "block"
140  if self.run_range is not None and self.run_range != (-1,-1):
141  if self.run_range[0] == self.run_range[1]:
142  query += " run=%s" % self.run_range[0]
143  else:
144  print("WARNING: queries with run ranges are slow in DAS")
145  query += " run between [%s,%s]" % ( self.run_range[0],self.run_range[1] )
146  dbs='das_client.py --query="file %s=%s"'%(qwhat,query)
147  if begin >= 0:
148  dbs += ' --index %d' % begin
149  if end >= 0:
150  dbs += ' --limit %d' % (end-begin+1)
151  else:
152  dbs += ' --limit 0'
153  print('dbs\t: %s' % dbs)
154  dbsOut = os.popen(dbs)
155  files = []
156  for line in dbsOut:
157  if line.find('/store')==-1:
158  continue
159  line = line.rstrip()
160  # print 'line',line
161  files.append(line)
162  return files
def buildListOfFilesDBS
Definition: dataset.py:134
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47
def dataset.CMSDataset.findPrimaryDatasetEntries (   dataset,
  runmin,
  runmax 
)
static

Definition at line 182 of file dataset.py.

References print().

Referenced by dataset.CMSDataset.getPrimaryDatasetEntries(), and dataset.PrivateDataset.getPrimaryDatasetEntries().

183  def findPrimaryDatasetEntries(dataset, runmin, runmax):
184 
185  query, qwhat = dataset, "dataset"
186  if "#" in dataset: qwhat = "block"
187  if runmin >0 or runmax > 0:
188  if runmin == runmax:
189  query = "%s run=%d" % (query,runmin)
190  else:
191  print("WARNING: queries with run ranges are slow in DAS")
192  query = "%s run between [%d, %d]" % (query,runmin if runmin > 0 else 1, runmax if runmax > 0 else 999999)
193  dbs='das_client.py --query="summary %s=%s"'%(qwhat,query)
194  dbsOut = os.popen(dbs).readlines()
195 
196  entries = []
197  for line in dbsOut:
198  line = line.replace('\n','')
199  if "nevents" in line:
200  entries.append(int(line.split(":")[1]))
201  if entries:
202  return sum(entries)
203  return -1
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47
def findPrimaryDatasetEntries
Definition: dataset.py:182
def dataset.CMSDataset.findPrimaryDatasetNumFiles (   dataset,
  runmin,
  runmax 
)
static

Definition at line 205 of file dataset.py.

References print().

Referenced by dataset.CMSDataset.buildListOfFiles(), and dataset.PrivateDataset.buildListOfFilesDBS().

206  def findPrimaryDatasetNumFiles(dataset, runmin, runmax):
207 
208  query, qwhat = dataset, "dataset"
209  if "#" in dataset: qwhat = "block"
210  if runmin >0 or runmax > 0:
211  if runmin == runmax:
212  query = "%s run=%d" % (query,runmin)
213  else:
214  print("WARNING: queries with run ranges are slow in DAS")
215  query = "%s run between [%d, %d]" % (query,runmin if runmin > 0 else 1, runmax if runmax > 0 else 999999)
216  dbs='das_client.py --query="summary %s=%s"'%(qwhat,query)
217  dbsOut = os.popen(dbs).readlines()
218 
219  entries = []
220  for line in dbsOut:
221  line = line.replace('\n','')
222  if "nfiles" in line:
223  entries.append(int(line.split(":")[1]))
224  if entries:
225  return sum(entries)
226  return -1
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47
def findPrimaryDatasetNumFiles
Definition: dataset.py:205
def dataset.CMSDataset.getPrimaryDatasetEntries (   self)

Definition at line 227 of file dataset.py.

References dataset.CMSDataset.findPrimaryDatasetEntries(), ElectronMVAID.ElectronMVAID.name, AlignableObjectId::entry.name, average.Average.name, counter.Counter.name, histograms.Histograms.name, cond::persistency::TAG::NAME.name, cond::persistency::RUN_INFO::RUN_NUMBER.name, TmModule.name, cond::persistency::GTEditorData.name, cond::persistency::GLOBAL_TAG::NAME.name, cond::persistency::TAG::TIME_TYPE.name, cond::persistency::RUN_INFO::START_TIME.name, cond::persistency::GLOBAL_TAG::VALIDITY.name, core.autovars.NTupleVariable.name, cond::persistency::RUN_INFO::END_TIME.name, cond::persistency::TAG::OBJECT_TYPE.name, cond::persistency::GLOBAL_TAG::DESCRIPTION.name, cond::persistency::TAG::SYNCHRONIZATION.name, DQMRivetClient::NormOption.name, cond::persistency::GLOBAL_TAG::RELEASE.name, cond::persistency::GLOBAL_TAG::SNAPSHOT_TIME.name, cond::persistency::TAG::END_OF_VALIDITY.name, MEPSet.name, cond::persistency::GLOBAL_TAG::INSERTION_TIME.name, cond::persistency::O2O_RUN::JOB_NAME.name, cond::persistency::TAG::DESCRIPTION.name, cms::dd::NameValuePair< T >.name, cond::persistency::O2O_RUN::START_TIME.name, cond::persistency::TAG::LAST_VALIDATED_TIME.name, cond::persistency::O2O_RUN::END_TIME.name, cond::persistency::TAG::INSERTION_TIME.name, FWTGeoRecoGeometry::Info.name, cond::persistency::O2O_RUN::STATUS_CODE.name, cond::persistency::TAG::MODIFICATION_TIME.name, cond::persistency::O2O_RUN::LOG.name, ParameterSet.name, nanoaod::MergeableCounterTable::SingleColumn< T >.name, cond::persistency::TAG::PROTECTION_CODE.name, preexistingValidation.PreexistingValidation.name, OutputMEPSet.name, AlignmentConstraint.name, PixelDCSObject< class >::Item.name, dataset.BaseDataset.name, cms::dd::ValuePair< T, U >.name, personalPlayback.Applet.name, Types._Untracked.name, MagCylinder.name, analyzer.Analyzer.name, heppy::ParSet.name, DQMRivetClient::LumiOption.name, o2olib.O2OJob.name, cond::persistency::GTProxyData.name, SingleObjectCondition.name, EgHLTOfflineSummaryClient::SumHistBinData.name, edm::PathTimingSummary.name, DQMRivetClient::ScaleFactorOption.name, cms::DDAlgoArguments.name, Barrel.name, perftools::EdmEventSize::BranchRecord.name, core.autovars.NTupleObjectType.name, cond::TimeTypeSpecs.name, edm::PathSummary.name, EcalLogicID.name, alignment.Alignment.name, lumi::TriggerInfo.name, PixelEndcapLinkMaker::Item.name, XMLProcessor::_loaderBaseConfig.name, MEtoEDM< T >::MEtoEDMObject.name, FWTableViewManager::TableEntry.name, PixelBarrelLinkMaker::Item.name, ExpressionHisto< T >.name, DQMGenericClient::EfficOption.name, TreeCrawler.Package.name, Supermodule.name, cond::persistency::GLOBAL_TAG_MAP::GLOBAL_TAG_NAME.name, genericValidation.GenericValidation.name, options.ConnectionHLTMenu.name, cond::persistency::GLOBAL_TAG_MAP::RECORD.name, cond::persistency::GLOBAL_TAG_MAP::LABEL.name, cms::DDParsingContext::CompositeMaterial.name, cond::persistency::GLOBAL_TAG_MAP::TAG_NAME.name, cond::Tag_t.name, dqmoffline::l1t::HistDefinition.name, DQMGenericClient::ProfileOption.name, magneticfield::BaseVolumeHandle.name, nanoaod::MergeableCounterTable::VectorColumn< T >.name, FastHFShowerLibrary.name, emtf::Node.name, h4DSegm.name, DQMGenericClient::NormOption.name, DQMGenericClient::CDOption.name, CounterChecker.name, PhysicsTools::Calibration::Variable.name, cond::TagInfo_t.name, TrackerSectorStruct.name, MuonGeometrySanityCheckPoint.name, cond::persistency::PAYLOAD::HASH.name, DQMGenericClient::NoFlowOption.name, looper.Looper.name, Mapper::definition< ScannerT >.name, cond::persistency::PAYLOAD::OBJECT_TYPE.name, cond::persistency::PAYLOAD::DATA.name, EDMtoMEConverter.name, cond::persistency::PAYLOAD::STREAMER_INFO.name, cond::persistency::PAYLOAD::VERSION.name, cond::persistency::PAYLOAD::INSERTION_TIME.name, classes.MonitorData.name, HistogramManager.name, classes.OutputData.name, Crystal.name, h2DSegm.name, options.HLTProcessOptions.name, cond::persistency::IOV::TAG_NAME.name, cond::persistency::IOV::SINCE.name, cond::persistency::IOV::PAYLOAD_HASH.name, cond::persistency::IOV::INSERTION_TIME.name, DQMNet::WaitObject.name, AlpgenParameterName.name, config.Analyzer.name, geometry.Structure.name, core.autovars.NTupleSubObject.name, Capsule.name, core.autovars.NTupleObject.name, Ceramic.name, SiStripMonitorDigi.name, BulkSilicon.name, config.Service.name, APD.name, nanoaod::FlatTable::Column.name, core.autovars.NTupleCollection.name, BPHRecoBuilder::BPHRecoSource.name, BPHRecoBuilder::BPHCompSource.name, StraightTrackAlignment::RPSetPlots.name, cond::persistency::TAG_AUTHORIZATION::TAG_NAME.name, cond::persistency::TAG_AUTHORIZATION::ACCESS_TYPE.name, cond::persistency::TAG_AUTHORIZATION::CREDENTIAL.name, cond::persistency::TAG_AUTHORIZATION::CREDENTIAL_TYPE.name, InnerLayerVolume.name, cond::payloadInspector::TagReference.name, cond::persistency::TAG_LOG::TAG_NAME.name, cond::persistency::TAG_LOG::EVENT_TIME.name, cond::persistency::TAG_LOG::USER_NAME.name, cond::persistency::TAG_LOG::HOST_NAME.name, cond::persistency::TAG_LOG::COMMAND.name, cond::persistency::TAG_LOG::ACTION.name, cond::persistency::TAG_LOG::USER_TEXT.name, personalPlayback.FrameworkJob.name, Grid.name, Grille.name, BackPipe.name, plotscripts.SawTeethFunction.name, PatchPanel.name, BackCoolTank.name, DryAirTube.name, crabFunctions.CrabTask.name, MBCoolTube.name, MBManif.name, cscdqm::ParHistoDef.name, hTMaxCell.name, BeautifulSoup.Tag.name, SummaryOutputProducer::GenericSummary.name, BeautifulSoup.SoupStrainer.name, and dataset.BaseDataset.run_range.

228  def getPrimaryDatasetEntries(self):
229  runmin = -1
230  runmax = -1
231  if self.run_range is not None:
232  runmin = self.run_range[0]
233  runmax = self.run_range[1]
234  return self.findPrimaryDatasetEntries(self.name, runmin, runmax)
def getPrimaryDatasetEntries
Definition: dataset.py:227
def findPrimaryDatasetEntries
Definition: dataset.py:182

Member Data Documentation

dataset.CMSDataset.files

Definition at line 172 of file dataset.py.