CMS 3D CMS Logo

visDQMUtils.py
Go to the documentation of this file.
1 import re
2 
3 # Various regular expressions used to check filename validity:
4 
5 # Regexp for valid dataset names.
6 RXDATASET = re.compile(r"^(/[-A-Za-z0-9_]+){3}$")
7 # Regexp for valid RelVal dataset names.
8 RXRELVALMC = re.compile(r"^/RelVal[^/]+/(CMSSW(?:_[0-9]+)+(?:_pre[0-9]+)?)[-_].*$")
9 RXRELVALRUNDEPMC = re.compile(r"^/RelVal[^/]+/(CMSSW(?:_[0-9]+)+(?:_pre[0-9]+)?)[-_].*rundepMC.*$")
10 RXRELVALDATA = re.compile(r"^/[^/]+/(CMSSW(?:_[0-9]+)+(?:_pre[0-9]+)?)[-_].*$")
11 RXRUNDEPMC = re.compile(r"^/(?!RelVal)[^/]+/.*rundepMC.*$")
12 
13 # Regexp for online DQM files.
14 RXONLINE = re.compile(r"^(?:.*/)?DQM_V(\d+)(_[A-Za-z0-9]+)?_R(\d+)\.root$")
15 
16 # Regexp for offline DQM files.
17 RXOFFLINE = re.compile(r"^(?:.*/)?DQM_V(\d+)_R(\d+)((?:__[-A-Za-z0-9_]+){3})\.root$")
18 
19 # --------------------------------------------------------------------
20 # Pre-classify a file into main category based on file name structure.
21 # path: path (relative to the uploads dir, coming from the walk) of the root
22 # file
23 # Returns a tuple of:
24 # a boolean: True or False depending on whether the classification went OK
25 # a string or dictionary:
26 # - In case the classification went wrong: A string with the reason
27 # - In case the classification was OK: A dictionary with classification
28 # information
29 def classifyDQMFile(path):
30  print(path)
31  try:
32  m = re.match(RXONLINE, path)
33  if m:
34  version = int(m.group(1))
35  runnr = int(m.group(3))
36  subsys = m.group(2) and m.group(2)[1:]
37  if version != 1:
38  return False, "file version is not 1"
39  elif runnr <= 10000:
40  return False, "online file has run number <= 10000"
41  else:
42  # online_data
43  return True, { 'class': 'online_data', 'version': version,
44  'subsystem': subsys, 'runnr': runnr,
45  'dataset': "/Global/Online/ALL" }
46 
47  m = re.match(RXOFFLINE, path)
48  if m:
49  version = int(m.group(1))
50  dataset = m.group(3).replace("__", "/")
51  if not re.match(RXDATASET, dataset):
52  return False, "Invalid dataset name"
53  relvalmc = re.match(RXRELVALMC, dataset)
54  relvaldata = re.match(RXRELVALDATA, dataset)
55  relvalrundepmc = re.match(RXRELVALRUNDEPMC, dataset)
56  rundepmc = re.match(RXRUNDEPMC, dataset)
57  runnr = int(m.group(2))
58  if version != 1:
59  return False, "file version is not 1"
60  if runnr < 1:
61  return False, "file matches offline naming, but run number is < 1"
62  elif rundepmc:
63  if runnr == 1:
64  return False, "file matches Run Dependent MonteCarlo naming, but run number is 1"
65  else:
66  # simulated_rundep
67  return True, { 'class': 'simulated_rundep', 'version': version,
68  'runnr': runnr, 'dataset': dataset }
69  elif relvalrundepmc:
70  if runnr == 1:
71  return False, "file matches Run Dependent MonteCarlo naming, but run number is 1"
72  else:
73  # relval_rundepmc
74  return True, { 'class': 'relval_rundepmc', 'version': version,
75  'runnr': runnr, 'dataset': dataset,
76  'release': relvalrundepmc.group(1)}
77  elif relvalmc:
78  if runnr != 1:
79  return False, "file matches relval mc naming, but run number != 1"
80  else:
81  # relval_mc
82  return True, { 'class': 'relval_mc', 'version': version,
83  'runnr': runnr, 'dataset': dataset,
84  'release': relvalmc.group(1) }
85  elif relvaldata:
86  if runnr == 1:
87  return False, "file matches relval data naming, but run number = 1"
88  else:
89  # relval_data
90  return True, { 'class': 'relval_data', 'version': version,
91  'runnr': runnr, 'dataset': dataset,
92  'release': relvaldata.group(1) }
93  elif dataset.find("CMSSW") >= 0:
94  return False, "non-relval dataset name contains 'CMSSW'"
95  elif runnr > 1:
96  # offline_data
97  return True, { 'class': 'offline_data', 'version': version,
98  'runnr': runnr, 'dataset': dataset }
99  else:
100  # simulated
101  return True, { 'class': 'simulated', 'version': int(m.group(1)),
102  'runnr': runnr, 'dataset': dataset }
103 
104  return False, "file matches no known naming convention"
105  except:
106  return False, "error while classifying file name"
107 
def replace(string, replacements)
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
def classifyDQMFile(path)
Definition: visDQMUtils.py:29