CMS 3D CMS Logo

Classes | Functions | Variables
dataset Namespace Reference

Classes

class  BaseDataset
 
class  CMSDataset
 
class  DataFile
 
class  Dataset
 
class  DatasetBase
 
class  DatasetError
 
class  EOSDataset
 
class  IntegrityCheckError
 
class  LocalDataset
 
class  MultipleDatasets
 
class  PrivateDataset
 
class  RunRange
 

Functions

def createDataset (user, dataset, pattern, readcache=False, basedir=None, run_range=None)
 
def createMyDataset (user, dataset, pattern, dbsInstance, readcache=False)
 if user == 'CMS': data = CMSDataset( dataset ) elif user == 'LOCAL': if basedir is None: basedir = os.environ['CMGLOCALBASEDIR'] data = LocalDataset( dataset, basedir, pattern ) else: data = Dataset( user, dataset, pattern ) More...
 
def dasquery (dasQuery, dasLimit=0)
 
def findinjson (jsondict, *strings)
 
def getDatasetFromCache (cachename)
 
def getrunnumbersfromfile (filename, trydas=True, allowunknown=False, dasinstance=defaultdasinstance)
 
def writeDatasetToCache (cachename, dataset)
 

Variables

 abspath
 
 action
 
 args
 
 data
 
 dataset = Dataset( datasetName )
 
string datasetName = '/MinimumBias/Run2012D-TkAlMinBias-v1/ALCARECO'
 
 default
 
string defaultdasinstance = "prod/global"
 
 dest
 
 end
 
 firstRun
 
 help
 
 info
 
 int
 
tuple jsonFile
 
 jsonPath
 
 name
 
 options
 
 outName
 
 parser
 
 run_range
 
 type
 
 usage
 
 user
 
string validationfooter
 
string validationheader
 

Function Documentation

◆ createDataset()

def dataset.createDataset (   user,
  dataset,
  pattern,
  readcache = False,
  basedir = None,
  run_range = None 
)

Definition at line 429 of file dataset.py.

429 def createDataset( user, dataset, pattern, readcache=False,
430  basedir = None, run_range = None):
431 
432 
433  def cacheFileName(data, user, pattern):
434  return '{user}%{name}%{pattern}.pck'.format( user = user, name = data.replace('/','_'), pattern = pattern)
435 
436  def writeCache(dataset):
437  writeDatasetToCache( cacheFileName(dataset.name, dataset.user, dataset.pattern), dataset )
438 
439  def readCache(data, user, pattern):
440  return getDatasetFromCache( cacheFileName(data, user, pattern) )
441 
442  if readcache:
443  try:
444  data = readCache(dataset, user, pattern)
445  except IOError:
446  readcache = False
447  if not readcache:
448  if user == 'CMS':
449  data = CMSDataset( dataset , run_range = run_range)
450  info = False
451  elif user == 'LOCAL':
452  data = LocalDataset( dataset, basedir, pattern)
453  info = False
454  elif user == 'EOS':
455  data = EOSDataset(dataset, basedir, pattern)
456  info = False
457  else:
458  data = Dataset( dataset, user, pattern)
459  writeCache(data)

References getDatasetFromCache(), and writeDatasetToCache().

Referenced by datasetToSource.datasetToSource(), production_tasks.CheckDatasetExists.run(), and production_tasks.SourceCFG.run().

◆ createMyDataset()

def dataset.createMyDataset (   user,
  dataset,
  pattern,
  dbsInstance,
  readcache = False 
)

if user == 'CMS': data = CMSDataset( dataset ) elif user == 'LOCAL': if basedir is None: basedir = os.environ['CMGLOCALBASEDIR'] data = LocalDataset( dataset, basedir, pattern ) else: data = Dataset( user, dataset, pattern )

MM

Definition at line 471 of file dataset.py.

471 def createMyDataset( user, dataset, pattern, dbsInstance, readcache=False):
472 
473  cachedir = '/'.join( [os.environ['HOME'],'.cmgdataset'])
474 
475  def cacheFileName(data, user, dbsInstance, pattern):
476  cf = data.replace('/','_')
477  name = '{dir}/{user}%{dbsInstance}%{name}%{pattern}.pck'.format(
478  dir = cachedir,
479  user = user,
480  dbsInstance = dbsInstance,
481  name = cf,
482  pattern = pattern)
483  return name
484 
485  def writeCache(dataset):
486  if not os.path.exists(cachedir):
487  os.mkdir(cachedir)
488  cachename = cacheFileName(dataset.name,
489  dataset.user,
490  dataset.dbsInstance,
491  dataset.pattern)
492  pckfile = open( cachename, 'w')
493  pickle.dump(dataset, pckfile)
494 
495  def readCache(data, user, dbsInstance, pattern):
496  cachename = cacheFileName(data, user, dbsInstance, pattern)
497 
498  pckfile = open( cachename)
499  dataset = pickle.load(pckfile)
500  #print 'reading cache'
501  return dataset
502 
503  if readcache:
504  try:
505  data = readCache(dataset, user, dbsInstance, pattern)
506  except IOError:
507  readcache = False
508  if not readcache:
509  if user == 'PRIVATE':
510  data = PrivateDataset( dataset, dbsInstance )
511  info = False
512  writeCache(data)
513  return data

References join().

Referenced by datasetToSource.myDatasetToSource().

◆ dasquery()

def dataset.dasquery (   dasQuery,
  dasLimit = 0 
)

Definition at line 27 of file dataset.py.

27 def dasquery(dasQuery, dasLimit=0):
28  dasData = das_client.get_data(dasQuery, dasLimit)
29  if isinstance(dasData, str):
30  jsondict = json.loads( dasData )
31  else:
32  jsondict = dasData
33  # Check, if the DAS query fails
34  try:
35  error = findinjson(jsondict, "data","error")
36  except KeyError:
37  error = None
38  if error or findinjson(jsondict, "status") != 'ok' or "data" not in jsondict:
39  try:
40  jsonstr = findinjson(jsondict, "reason")
41  except KeyError:
42  jsonstr = str(jsondict)
43  if len(jsonstr) > 10000:
44  jsonfile = "das_query_output_%i.txt"
45  i = 0
46  while os.path.lexists(jsonfile % i):
47  i += 1
48  jsonfile = jsonfile % i
49  theFile = open( jsonfile, "w" )
50  theFile.write( jsonstr )
51  theFile.close()
52  msg = "The DAS query returned an error. The output is very long, and has been stored in:\n" + jsonfile
53  else:
54  msg = "The DAS query returned a error. Here is the output\n" + jsonstr
55  msg += "\nIt's possible that this was a server error. If so, it may work if you try again later"
56  raise DatasetError(msg)
57  return findinjson(jsondict, "data")
58 

References findinjson(), das_client.get_data(), and str.

Referenced by dataset.Dataset.getfiles(), and getrunnumbersfromfile().

◆ findinjson()

def dataset.findinjson (   jsondict,
strings 
)

Definition at line 95 of file dataset.py.

95 def findinjson(jsondict, *strings):
96  if len(strings) == 0:
97  return jsondict
98  if isinstance(jsondict,dict):
99  if strings[0] in jsondict:
100  try:
101  return findinjson(jsondict[strings[0]], *strings[1:])
102  except KeyError:
103  pass
104  else:
105  for a in jsondict:
106  if strings[0] in a:
107  try:
108  return findinjson(a[strings[0]], *strings[1:])
109  except (TypeError, KeyError): #TypeError because a could be a string and contain strings[0]
110  pass
111  #if it's not found
112  raise KeyError("Can't find " + strings[0])
113 

Referenced by dasquery(), dataset.Dataset.getfiles(), and getrunnumbersfromfile().

◆ getDatasetFromCache()

def dataset.getDatasetFromCache (   cachename)

Definition at line 416 of file dataset.py.

416 def getDatasetFromCache( cachename ) :
417  cachedir = '/'.join( [os.environ['HOME'],'.cmgdataset'])
418  pckfile = open( cachedir + "/" + cachename )
419  dataset = pickle.load(pckfile)
420  return dataset
421 

References join().

Referenced by createDataset().

◆ getrunnumbersfromfile()

def dataset.getrunnumbersfromfile (   filename,
  trydas = True,
  allowunknown = False,
  dasinstance = defaultdasinstance 
)

Definition at line 59 of file dataset.py.

59 def getrunnumbersfromfile(filename, trydas=True, allowunknown=False, dasinstance=defaultdasinstance):
60  parts = filename.split("/")
61  error = None
62  if parts[0] != "" or parts[1] != "store":
63  error = "does not start with /store"
64  elif parts[2] in ["mc", "relval"]:
65  return [1]
66  elif not parts[-1].endswith(".root"):
67  error = "does not end with something.root"
68  elif len(parts) != 12:
69  error = "should be exactly 11 slashes counting the first one"
70  else:
71  runnumberparts = parts[-5:-2]
72  if not all(len(part)==3 for part in runnumberparts):
73  error = "the 3 directories {} do not have length 3 each".format("/".join(runnumberparts))
74  try:
75  return [int("".join(runnumberparts))]
76  except ValueError:
77  error = "the 3 directories {} do not form an integer".format("/".join(runnumberparts))
78 
79  if error and trydas:
80  try:
81  query = "run file={} instance={}".format(filename, dasinstance)
82  dasoutput = dasquery(query)
83  result = findinjson(dasoutput, "run")
84  return sum((findinjson(run, "run_number") for run in result), [])
85  except Exception as e:
86  error = str(e)
87 
88  if error and allowunknown:
89  return [-1]
90 
91  if error:
92  error = "could not figure out which run number this file is from.\nMaybe try with allowunknown=True?\n {}\n{}".format(filename, error)
93  raise DatasetError(error)
94 

References python.cmstools.all(), dasquery(), findinjson(), int, join(), and str.

◆ writeDatasetToCache()

def dataset.writeDatasetToCache (   cachename,
  dataset 
)

Definition at line 422 of file dataset.py.

422 def writeDatasetToCache( cachename, dataset ):
423  cachedir = '/'.join( [os.environ['HOME'],'.cmgdataset'])
424  if not os.path.exists(cachedir):
425  os.mkdir(cachedir)
426  pckfile = open( cachedir + "/" + cachename, 'w')
427  pickle.dump(dataset, pckfile)
428 

References join().

Referenced by createDataset().

Variable Documentation

◆ abspath

dataset.abspath

Definition at line 55 of file dataset.py.

◆ action

dataset.action

Definition at line 20 of file dataset.py.

◆ args

dataset.args

Definition at line 38 of file dataset.py.

◆ data

dataset.data

Definition at line 49 of file dataset.py.

◆ dataset

dataset.dataset = Dataset( datasetName )

Definition at line 934 of file dataset.py.

◆ datasetName

string dataset.datasetName = '/MinimumBias/Run2012D-TkAlMinBias-v1/ALCARECO'

Definition at line 930 of file dataset.py.

◆ default

dataset.default

Definition at line 16 of file dataset.py.

◆ defaultdasinstance

string dataset.defaultdasinstance = "prod/global"

Definition at line 15 of file dataset.py.

◆ dest

dataset.dest

Definition at line 16 of file dataset.py.

◆ end

dataset.end

Definition at line 941 of file dataset.py.

◆ firstRun

dataset.firstRun

◆ help

dataset.help

Definition at line 16 of file dataset.py.

◆ info

dataset.info

Definition at line 46 of file dataset.py.

◆ int

dataset.int

◆ jsonFile

tuple dataset.jsonFile
Initial value:
1 = ( '/afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/'
2  'Collisions12/8TeV/Prompt/'
3  'Cert_190456-207898_8TeV_PromptReco_Collisions12_JSON.txt' )

Definition at line 931 of file dataset.py.

◆ jsonPath

dataset.jsonPath

Definition at line 939 of file dataset.py.

◆ name

dataset.name

Definition at line 45 of file dataset.py.

◆ options

dataset.options

Definition at line 38 of file dataset.py.

◆ outName

dataset.outName

◆ parser

dataset.parser

Definition at line 14 of file dataset.py.

◆ run_range

dataset.run_range

Definition at line 48 of file dataset.py.

◆ type

dataset.type

Definition at line 35 of file dataset.py.

◆ usage

dataset.usage

Definition at line 15 of file dataset.py.

◆ user

dataset.user

Definition at line 44 of file dataset.py.

◆ validationfooter

string dataset.validationfooter
Initial value:
1 = """
2 ] )
3 """

Definition at line 278 of file dataset.py.

◆ validationheader

string dataset.validationheader
Initial value:
1 = """
2 import FWCore.ParameterSet.Config as cms
3 
4 maxEvents = cms.untracked.PSet( input = cms.untracked.int32(-1) )
5 readFiles = cms.untracked.vstring()
6 secFiles = cms.untracked.vstring()
7 source = cms.Source ("PoolSource",fileNames = readFiles, secondaryFileNames = secFiles)
8 readFiles.extend( [
9 """

Definition at line 268 of file dataset.py.

dataset.dasquery
def dasquery(dasQuery, dasLimit=0)
Definition: dataset.py:27
dataset.writeDatasetToCache
def writeDatasetToCache(cachename, dataset)
Definition: dataset.py:422
join
static std::string join(char **cmd)
Definition: RemoteFile.cc:17
python.cmstools.all
def all(container)
workaround iterator generators for ROOT classes
Definition: cmstools.py:26
dataset.getrunnumbersfromfile
def getrunnumbersfromfile(filename, trydas=True, allowunknown=False, dasinstance=defaultdasinstance)
Definition: dataset.py:59
str
#define str(s)
Definition: TestProcessor.cc:51
dataset.createDataset
def createDataset(user, dataset, pattern, readcache=False, basedir=None, run_range=None)
Definition: dataset.py:429
dataset.int
int
Definition: dataset.py:35
das_client.get_data
def get_data(host, query, idx, limit, debug, threshold=300, ckey=None, cert=None, capath=None, qcache=0, das_headers=True)
Definition: das_client.py:275
dataset.getDatasetFromCache
def getDatasetFromCache(cachename)
Definition: dataset.py:416
format
dataset.findinjson
def findinjson(jsondict, *strings)
Definition: dataset.py:95
dataset.createMyDataset
def createMyDataset(user, dataset, pattern, dbsInstance, readcache=False)
if user == 'CMS': data = CMSDataset( dataset ) elif user == 'LOCAL': if basedir is None: basedir = os...
Definition: dataset.py:471