CMS 3D CMS Logo

Classes | Functions | Variables
dataset Namespace Reference

Classes

class  BaseDataset
 
class  CMSDataset
 
class  DataFile
 
class  Dataset
 
class  DatasetBase
 
class  DatasetError
 
class  EOSDataset
 
class  IntegrityCheckError
 
class  LocalDataset
 
class  MultipleDatasets
 
class  PrivateDataset
 
class  RunRange
 

Functions

def createDataset (user, dataset, pattern, readcache=False, basedir=None, run_range=None)
 
def createMyDataset (user, dataset, pattern, dbsInstance, readcache=False)
 if user == 'CMS': data = CMSDataset( dataset ) elif user == 'LOCAL': if basedir is None: basedir = os.environ['CMGLOCALBASEDIR'] data = LocalDataset( dataset, basedir, pattern ) else: data = Dataset( user, dataset, pattern ) More...
 
def dasquery (dasQuery, dasLimit=0)
 
def findinjson (jsondict, strings)
 
def getDatasetFromCache (cachename)
 
def getrunnumbersfromfile (filename, trydas=True, allowunknown=False, dasinstance=defaultdasinstance)
 
def writeDatasetToCache (cachename, dataset)
 

Variables

 abspath
 
 action
 
 args
 
 data
 
 default
 
string defaultdasinstance = "prod/global"
 
 dest
 
 help
 
 info
 
 int
 
 name
 
 options
 
 parser
 
 run_range
 
 type
 
 usage
 
 user
 
string validationfooter
 
string validationheader
 

Function Documentation

◆ createDataset()

def dataset.createDataset (   user,
  dataset,
  pattern,
  readcache = False,
  basedir = None,
  run_range = None 
)

Definition at line 429 of file dataset.py.

References getDatasetFromCache(), and writeDatasetToCache().

Referenced by datasetToSource.datasetToSource(), production_tasks.CheckDatasetExists.run(), production_tasks.SourceCFG.run(), and writeDatasetToCache().

429  basedir = None, run_range = None):
430 
431 
432  def cacheFileName(data, user, pattern):
433  return '{user}%{name}%{pattern}.pck'.format( user = user, name = data.replace('/','_'), pattern = pattern)
434 
435  def writeCache(dataset):
436  writeDatasetToCache( cacheFileName(dataset.name, dataset.user, dataset.pattern), dataset )
437 
438  def readCache(data, user, pattern):
439  return getDatasetFromCache( cacheFileName(data, user, pattern) )
440 
441  if readcache:
442  try:
443  data = readCache(dataset, user, pattern)
444  except IOError:
445  readcache = False
446  if not readcache:
447  if user == 'CMS':
448  data = CMSDataset( dataset , run_range = run_range)
449  info = False
450  elif user == 'LOCAL':
451  data = LocalDataset( dataset, basedir, pattern)
452  info = False
453  elif user == 'EOS':
454  data = EOSDataset(dataset, basedir, pattern)
455  info = False
456  else:
457  data = Dataset( dataset, user, pattern)
458  writeCache(data)
def writeDatasetToCache(cachename, dataset)
Definition: dataset.py:421
def getDatasetFromCache(cachename)
Definition: dataset.py:415

◆ createMyDataset()

def dataset.createMyDataset (   user,
  dataset,
  pattern,
  dbsInstance,
  readcache = False 
)

if user == 'CMS': data = CMSDataset( dataset ) elif user == 'LOCAL': if basedir is None: basedir = os.environ['CMGLOCALBASEDIR'] data = LocalDataset( dataset, basedir, pattern ) else: data = Dataset( user, dataset, pattern )

MM

Definition at line 470 of file dataset.py.

References join().

Referenced by datasetToSource.myDatasetToSource().

470 def createMyDataset( user, dataset, pattern, dbsInstance, readcache=False):
471 
472  cachedir = '/'.join( [os.environ['HOME'],'.cmgdataset'])
473 
474  def cacheFileName(data, user, dbsInstance, pattern):
475  cf = data.replace('/','_')
476  name = '{dir}/{user}%{dbsInstance}%{name}%{pattern}.pck'.format(
477  dir = cachedir,
478  user = user,
479  dbsInstance = dbsInstance,
480  name = cf,
481  pattern = pattern)
482  return name
483 
484  def writeCache(dataset):
485  if not os.path.exists(cachedir):
486  os.mkdir(cachedir)
487  cachename = cacheFileName(dataset.name,
488  dataset.user,
489  dataset.dbsInstance,
490  dataset.pattern)
491  pckfile = open( cachename, 'w')
492  pickle.dump(dataset, pckfile)
493 
494  def readCache(data, user, dbsInstance, pattern):
495  cachename = cacheFileName(data, user, dbsInstance, pattern)
496 
497  pckfile = open( cachename)
498  dataset = pickle.load(pckfile)
499  #print 'reading cache'
500  return dataset
501 
502  if readcache:
503  try:
504  data = readCache(dataset, user, dbsInstance, pattern)
505  except IOError:
506  readcache = False
507  if not readcache:
508  if user == 'PRIVATE':
509  data = PrivateDataset( dataset, dbsInstance )
510  info = False
511  writeCache(data)
512  return data
static std::string join(char **cmd)
Definition: RemoteFile.cc:21
def createMyDataset(user, dataset, pattern, dbsInstance, readcache=False)
if user == 'CMS': data = CMSDataset( dataset ) elif user == 'LOCAL': if basedir is None: basedir = os...
Definition: dataset.py:470

◆ dasquery()

def dataset.dasquery (   dasQuery,
  dasLimit = 0 
)

Definition at line 27 of file dataset.py.

References findinjson(), das_client.get_data(), and str.

Referenced by dataset.Dataset.getfiles(), and getrunnumbersfromfile().

27 def dasquery(dasQuery, dasLimit=0):
28  dasData = das_client.get_data(dasQuery, dasLimit)
29  if isinstance(dasData, str):
30  jsondict = json.loads( dasData )
31  else:
32  jsondict = dasData
33  # Check, if the DAS query fails
34  try:
35  error = findinjson(jsondict, "data","error")
36  except KeyError:
37  error = None
38  if error or findinjson(jsondict, "status") != 'ok' or "data" not in jsondict:
39  try:
40  jsonstr = findinjson(jsondict, "reason")
41  except KeyError:
42  jsonstr = str(jsondict)
43  if len(jsonstr) > 10000:
44  jsonfile = "das_query_output_%i.txt"
45  i = 0
46  while os.path.lexists(jsonfile % i):
47  i += 1
48  jsonfile = jsonfile % i
49  theFile = open( jsonfile, "w" )
50  theFile.write( jsonstr )
51  theFile.close()
52  msg = "The DAS query returned an error. The output is very long, and has been stored in:\n" + jsonfile
53  else:
54  msg = "The DAS query returned a error. Here is the output\n" + jsonstr
55  msg += "\nIt's possible that this was a server error. If so, it may work if you try again later"
56  raise DatasetError(msg)
57  return findinjson(jsondict, "data")
58 
def get_data(host, query, idx, limit, debug, threshold=300, ckey=None, cert=None, capath=None, qcache=0, das_headers=True)
Definition: das_client.py:276
def dasquery(dasQuery, dasLimit=0)
Definition: dataset.py:27
def findinjson(jsondict, strings)
Definition: dataset.py:95
#define str(s)

◆ findinjson()

def dataset.findinjson (   jsondict,
  strings 
)

Definition at line 95 of file dataset.py.

Referenced by dasquery(), dataset.Dataset.getfiles(), and getrunnumbersfromfile().

95 def findinjson(jsondict, *strings):
96  if len(strings) == 0:
97  return jsondict
98  if isinstance(jsondict,dict):
99  if strings[0] in jsondict:
100  try:
101  return findinjson(jsondict[strings[0]], *strings[1:])
102  except KeyError:
103  pass
104  else:
105  for a in jsondict:
106  if strings[0] in a:
107  try:
108  return findinjson(a[strings[0]], *strings[1:])
109  except (TypeError, KeyError): #TypeError because a could be a string and contain strings[0]
110  pass
111  #if it's not found
112  raise KeyError("Can't find " + strings[0])
113 
def findinjson(jsondict, strings)
Definition: dataset.py:95

◆ getDatasetFromCache()

def dataset.getDatasetFromCache (   cachename)

Definition at line 415 of file dataset.py.

References join().

Referenced by createDataset().

415 def getDatasetFromCache( cachename ) :
416  cachedir = '/'.join( [os.environ['HOME'],'.cmgdataset'])
417  pckfile = open( cachedir + "/" + cachename )
418  dataset = pickle.load(pckfile)
419  return dataset
420 
static std::string join(char **cmd)
Definition: RemoteFile.cc:21
def getDatasetFromCache(cachename)
Definition: dataset.py:415

◆ getrunnumbersfromfile()

def dataset.getrunnumbersfromfile (   filename,
  trydas = True,
  allowunknown = False,
  dasinstance = defaultdasinstance 
)

Definition at line 59 of file dataset.py.

References python.cmstools.all(), dasquery(), findinjson(), int, join(), and str.

59 def getrunnumbersfromfile(filename, trydas=True, allowunknown=False, dasinstance=defaultdasinstance):
60  parts = filename.split("/")
61  error = None
62  if parts[0] != "" or parts[1] != "store":
63  error = "does not start with /store"
64  elif parts[2] in ["mc", "relval"]:
65  return [1]
66  elif not parts[-1].endswith(".root"):
67  error = "does not end with something.root"
68  elif len(parts) != 12:
69  error = "should be exactly 11 slashes counting the first one"
70  else:
71  runnumberparts = parts[-5:-2]
72  if not all(len(part)==3 for part in runnumberparts):
73  error = "the 3 directories {} do not have length 3 each".format("/".join(runnumberparts))
74  try:
75  return [int("".join(runnumberparts))]
76  except ValueError:
77  error = "the 3 directories {} do not form an integer".format("/".join(runnumberparts))
78 
79  if error and trydas:
80  try:
81  query = "run file={} instance={}".format(filename, dasinstance)
82  dasoutput = dasquery(query)
83  result = findinjson(dasoutput, "run")
84  return sum((findinjson(run, "run_number") for run in result), [])
85  except Exception as e:
86  error = str(e)
87 
88  if error and allowunknown:
89  return [-1]
90 
91  if error:
92  error = "could not figure out which run number this file is from.\nMaybe try with allowunknown=True?\n {}\n{}".format(filename, error)
93  raise DatasetError(error)
94 
def dasquery(dasQuery, dasLimit=0)
Definition: dataset.py:27
def all(container)
workaround iterator generators for ROOT classes
Definition: cmstools.py:25
def getrunnumbersfromfile(filename, trydas=True, allowunknown=False, dasinstance=defaultdasinstance)
Definition: dataset.py:59
static std::string join(char **cmd)
Definition: RemoteFile.cc:21
def findinjson(jsondict, strings)
Definition: dataset.py:95
#define str(s)

◆ writeDatasetToCache()

def dataset.writeDatasetToCache (   cachename,
  dataset 
)

Definition at line 421 of file dataset.py.

References createDataset(), and join().

Referenced by createDataset().

421 def writeDatasetToCache( cachename, dataset ):
422  cachedir = '/'.join( [os.environ['HOME'],'.cmgdataset'])
423  if not os.path.exists(cachedir):
424  os.mkdir(cachedir)
425  pckfile = open( cachedir + "/" + cachename, 'w')
426  pickle.dump(dataset, pckfile)
427 
def writeDatasetToCache(cachename, dataset)
Definition: dataset.py:421
static std::string join(char **cmd)
Definition: RemoteFile.cc:21

Variable Documentation

◆ abspath

dataset.abspath

Definition at line 55 of file dataset.py.

◆ action

dataset.action

Definition at line 20 of file dataset.py.

◆ args

dataset.args

Definition at line 38 of file dataset.py.

◆ data

dataset.data

Definition at line 49 of file dataset.py.

◆ default

dataset.default

Definition at line 16 of file dataset.py.

◆ defaultdasinstance

string dataset.defaultdasinstance = "prod/global"

Definition at line 15 of file dataset.py.

◆ dest

dataset.dest

Definition at line 16 of file dataset.py.

◆ help

dataset.help

Definition at line 16 of file dataset.py.

◆ info

dataset.info

Definition at line 46 of file dataset.py.

◆ int

dataset.int

◆ name

dataset.name

Definition at line 45 of file dataset.py.

◆ options

dataset.options

Definition at line 38 of file dataset.py.

◆ parser

dataset.parser

Definition at line 14 of file dataset.py.

◆ run_range

dataset.run_range

Definition at line 48 of file dataset.py.

◆ type

dataset.type

Definition at line 35 of file dataset.py.

◆ usage

dataset.usage

Definition at line 15 of file dataset.py.

◆ user

dataset.user

Definition at line 44 of file dataset.py.

◆ validationfooter

string dataset.validationfooter
Initial value:
1 = """
2 ] )
3 """

Definition at line 278 of file dataset.py.

◆ validationheader

string dataset.validationheader
Initial value:
1 = """
2 import FWCore.ParameterSet.Config as cms
3 
4 maxEvents = cms.untracked.PSet( input = cms.untracked.int32(-1) )
5 readFiles = cms.untracked.vstring()
6 secFiles = cms.untracked.vstring()
7 source = cms.Source ("PoolSource",fileNames = readFiles, secondaryFileNames = secFiles)
8 readFiles.extend( [
9 """

Definition at line 268 of file dataset.py.