CMS 3D CMS Logo

Classes | Functions | Variables
utils_v2 Namespace Reference

Classes

class  Chi2Test
 
class  ComparisonError
 
class  KolmogorovTest
 
class  StatisticalTest
 StatisticalTests. More...
 

Functions

def auth_download_file (url, chunk_size=1048576)
 
def auth_wget (url)
 -----------------— Recursife file downloader --------------------— More...
 
def check_disk_for_space (work_path, size_needed)
 
def get_relval_cmssw_version (file)
 
def get_relval_id (file)
 
def get_relval_max_version (files)
 
def get_relval_version (file)
 -------------—— Make files pairs: RelVal utils ---------------—— More...
 
def get_relvaldata_cmssw_version (file)
 
def get_relvaldata_id (file)
 -----------—— Make files pairs: RelValData utils --------------—— More...
 
def get_relvaldata_max_version (files)
 
def get_relvaldata_version (file)
 
def get_size_to_download (work_path, files_with_urls)
 
def get_version (filename)
 
def init_database (db_path)
 Utils. More...
 
def is_relvaldata (files)
 --------------------— Make file pairs -----------------------— More...
 
def make_file_pairs (files1, files2)
 
def recursive_search_online (url, rel1, frags1, rel2, frags2)
 
def search_on_disk (work_path, rel1, frags1, rel2, frags2)
 
def show_status_bar (total_size)
 

Variables

 comparison_errors
 Exception definitions. More...
 
 tests
 

Detailed Description

Help functions for ValidationMatrix_v2.py.

Author:  Albertas Gimbutas,  Vilnius University (LT)
e-mail:  albertasgim@gmail.com

Function Documentation

def utils_v2.auth_download_file (   url,
  chunk_size = 1048576 
)

Definition at line 198 of file utils_v2.py.

References estimatePileup.basename, join(), and edm.print().

198 def auth_download_file(url, chunk_size=1048576):
199  filename = basename(url)
200  file_path = join(auth_download_file.work_dir, filename)
201 
202  file = open(file_path, 'wb')
203  opener = build_opener(X509CertOpen())
204  url_file = opener.open(Request(url))
205  chunk = url_file.read(chunk_size)
206  while chunk:
207  file.write(chunk)
208  auth_download_file.q.put((1,)) # reports, that downloaded 1MB
209  chunk = url_file.read(chunk_size)
210  print('\rDownloaded: %s ' % (filename,))
211  file.close()
212 
213 
def auth_download_file(url, chunk_size=1048576)
Definition: utils_v2.py:198
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def utils_v2.auth_wget (   url)

-----------------— Recursife file downloader --------------------—

Definition at line 184 of file utils_v2.py.

References cmsRelvalreport.exit, and edm.print().

Referenced by recursive_search_online().

184 def auth_wget(url):
185  try:
186  opener = build_opener(X509CertOpen())
187  return opener.open(Request(url)).read()
188  except HTTPError as e:
189  print('\nError: DQM GUI is temporarily unavailable. Probably maintainance hours. '+\
190  'Please try again later. Original error message: ``%s``. \nExiting...\n' % (e,))
191  exit()
192  except BadStatusLine as e:
193  print('\nYou do not have permissions to access DQM GUI. Please check if your certificates '+\
194  'in ``~/.globus`` directory are configured correctly. Exitting...')
195  exit()
196 
197 
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
def auth_wget(url)
-----------------— Recursife file downloader --------------------—
Definition: utils_v2.py:184
def utils_v2.check_disk_for_space (   work_path,
  size_needed 
)
Checks afs file system for space.

Definition at line 460 of file utils_v2.py.

Referenced by ValidationMatrix_v2.ReleaseComparison.compare().

460 def check_disk_for_space(work_path, size_needed):
461  '''Checks afs file system for space.'''
462  pass
463  # try:
464  # fs_proc = subprocess.Popen(['fs', 'listquota', work_path], stdout=subprocess.PIPE)
465  # except OSError:
466  # return
467  # fs_response = fs_proc.communicate()[0]
468  # quota, used = re.findall('([\d]+)', fs_response)[:2]
469  # free_space = int(quota) - int(used)
470  # if free_space * 1024 < size_needed:
471  # print '\nNot enougth free space on disk.',
472  # print 'Free space: %d MB. Need: %d MB. Exiting...\n' % (free_space / 1024, size_needed /1048576)
473  # exit()
474  # elif size_needed:
475  # print 'Free space on disk: %d MB.\n' % (free_space / 1024,)
476 
477 
def check_disk_for_space(work_path, size_needed)
Definition: utils_v2.py:460
def utils_v2.get_relval_cmssw_version (   file)

Definition at line 90 of file utils_v2.py.

Referenced by get_version().

91  cmssw_release = re.findall('(CMSSW_\d*_\d*_\d*(?:_[\w\d]*)?)-', file)
92  gr_r_version = re.findall('CMSSW_\d*_\d*_\d*(?:_[\w\d]*)?-([\w\d]*)_V\d*\w?(_[\w\d]*)?-v', file)
93  if cmssw_release and gr_r_version:
94  return (cmssw_release[0], gr_r_version[0])
95 
def get_relval_cmssw_version(file)
Definition: utils_v2.py:90
def utils_v2.get_relval_id (   file)
Returns unique relval ID (dataset name) for a given file.

Definition at line 96 of file utils_v2.py.

96 def get_relval_id(file):
97  """Returns unique relval ID (dataset name) for a given file."""
98  dataset_name = re.findall('R\d{9}__([\w\d]*)__CMSSW_', file)
99  return dataset_name[0]
100 
def get_relval_id(file)
Definition: utils_v2.py:96
def utils_v2.get_relval_max_version (   files)
Returns file with maximum version at a) beggining of the file,
e.g. DQM_V000M b) at the end of run, e.g. _run2012-vM. M has to be max.

Definition at line 78 of file utils_v2.py.

References get_relval_version().

79  """Returns file with maximum version at a) beggining of the file,
80  e.g. DQM_V000M b) at the end of run, e.g. _run2012-vM. M has to be max."""
81  max_file = files[0]
82  max_v = get_relval_version(files[0])
83  for file in files:
84  file_v = get_relval_version(file)
85  if file_v[1] > max_v[1] or ((file_v[1] == max_v[1]) and (file_v[0] > max_v[0])):
86  max_file = file
87  max_v = file_v
88  return max_file
89 
def get_relval_max_version(files)
Definition: utils_v2.py:78
def get_relval_version(file)
-------------—— Make files pairs: RelVal utils ---------------——
Definition: utils_v2.py:71
def utils_v2.get_relval_version (   file)

-------------—— Make files pairs: RelVal utils ---------------——

Returns tuple (CMSSW version, run version) for specified file.

Definition at line 71 of file utils_v2.py.

References createfilelist.int.

Referenced by get_relval_max_version().

72  """Returns tuple (CMSSW version, run version) for specified file."""
73  cmssw_version = re.findall('DQM_V(\d*)_', file)
74  run_version = re.findall('CMSSW_\d*_\d*_\d*(?:_[\w\d]*)?-[\w\d]*_V\d*\w?(?:_[\w\d]*)?-v(\d*)__', file)
75  if cmssw_version and run_version:
76  return (int(cmssw_version[0]), int(run_version[0]))
77 
def get_relval_version(file)
-------------—— Make files pairs: RelVal utils ---------------——
Definition: utils_v2.py:71
def utils_v2.get_relvaldata_cmssw_version (   file)
Returns tuple (CMSSW release, GR_R version) for specified RelValData file.

Definition at line 40 of file utils_v2.py.

Referenced by get_version().

41  """Returns tuple (CMSSW release, GR_R version) for specified RelValData file."""
42  cmssw_release = re.findall('(CMSSW_\d*_\d*_\d*(?:_[\w\d]*)?)-', file)
43  gr_r_version = re.findall('-(GR_R_\d*_V\d*\w?)(?:_RelVal)?_', file)
44  if not gr_r_version:
45  gr_r_version = re.findall('CMSSW_\d*_\d*_\d*(?:_[\w\d]*)?-(\w*)_RelVal_', file)
46  if cmssw_release and gr_r_version:
47  return (cmssw_release[0], gr_r_version[0])
48 
def get_relvaldata_cmssw_version(file)
Definition: utils_v2.py:40
def utils_v2.get_relvaldata_id (   file)

-----------—— Make files pairs: RelValData utils --------------——

Returns unique relvaldata ID for a given file.

Definition at line 30 of file utils_v2.py.

31  """Returns unique relvaldata ID for a given file."""
32  run_id = re.search('R\d{9}', file)
33  run = re.search('_RelVal_([\w\d]*)-v\d__', file)
34  if not run:
35  run = re.search('GR_R_\d*_V\d*C?_([\w\d]*)-v\d__', file)
36  if run_id and run:
37  return (run_id.group(), run.group(1))
38  return None
39 
def get_relvaldata_id(file)
-----------—— Make files pairs: RelValData utils --------------——
Definition: utils_v2.py:30
def utils_v2.get_relvaldata_max_version (   files)
Returns file with maximum version at a) beggining of the file,
e.g. DQM_V000M b) at the end of run, e.g. _run2012-vM. M has to be max.

Definition at line 58 of file utils_v2.py.

References get_relvaldata_version().

59  """Returns file with maximum version at a) beggining of the file,
60  e.g. DQM_V000M b) at the end of run, e.g. _run2012-vM. M has to be max."""
61  max_file = files[0]
62  max_v = get_relvaldata_version(files[0])
63  for file in files:
64  file_v = get_relvaldata_version(file)
65  if file_v[1] > max_v[1] or ((file_v[1] == max_v[1]) and (file_v[0] > max_v[0])):
66  max_file = file
67  max_v = file_v
68  return max_file
69 
def get_relvaldata_max_version(files)
Definition: utils_v2.py:58
def get_relvaldata_version(file)
Definition: utils_v2.py:49
def utils_v2.get_relvaldata_version (   file)
Returns tuple (CMSSW version, run version) for specified file.

Definition at line 49 of file utils_v2.py.

References createfilelist.int.

Referenced by get_relvaldata_max_version().

50  """Returns tuple (CMSSW version, run version) for specified file."""
51  cmssw_version = re.findall('DQM_V(\d*)_', file)
52  run_version = re.findall('_RelVal_[\w\d]*-v(\d)__', file)
53  if not run_version:
54  run_version = re.findall('GR_R_\d*_V\d*C?_[\w\d]*-v(\d)__', file)
55  if cmssw_version and run_version:
56  return (int(cmssw_version[0]), int(run_version[0]))
57 
def get_relvaldata_version(file)
Definition: utils_v2.py:49
def utils_v2.get_size_to_download (   work_path,
  files_with_urls 
)
Returns file list to download and total size to download.

Definition at line 444 of file utils_v2.py.

References createfilelist.int, join(), and edm.print().

Referenced by ValidationMatrix_v2.ReleaseComparison.compare().

444 def get_size_to_download(work_path, files_with_urls):
445  """Returns file list to download and total size to download."""
446  opener = build_opener(X509CertOpen())
447  size_to_download = 0
448  files_to_download = []
449  for filename, url in files_with_urls:
450  url_file = opener.open(Request(url))
451  size = int(url_file.headers["Content-Length"])
452  file_path = join(work_path, filename)
453  if exists(file_path) and getsize(file_path) / 1024 == size / 1024:
454  print("Exists on disk %s." % filename)
455  else:
456  size_to_download += size
457  files_to_download.append(url)
458  return size_to_download, files_to_download
459 
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
def get_size_to_download(work_path, files_with_urls)
Definition: utils_v2.py:444
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def utils_v2.get_version (   filename)
Returns CMSSW and GR_R versions for the given filename.

Definition at line 433 of file utils_v2.py.

References get_relval_cmssw_version(), get_relvaldata_cmssw_version(), is_relvaldata(), join(), and digitizers_cfi.strip.

Referenced by ValidationMatrix_v2.ReleaseComparison.compare().

433 def get_version(filename):
434  """Returns CMSSW and GR_R versions for the given filename."""
435  if is_relvaldata([filename]):
436  version_elems = get_relvaldata_cmssw_version(filename)
437  else:
438  relval_version = get_relval_cmssw_version(filename)
439  version_elems = (relval_version[0], relval_version[1][0], relval_version[1][1])
440  version_elems = [elem.strip('_').strip('RelVal_') for elem in version_elems]
441  return '___'.join([elem for elem in version_elems if elem])
442 
443 
def get_version(filename)
Definition: utils_v2.py:433
def get_relval_cmssw_version(file)
Definition: utils_v2.py:90
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def is_relvaldata(files)
--------------------— Make file pairs -----------------------—
Definition: utils_v2.py:102
def get_relvaldata_cmssw_version(file)
Definition: utils_v2.py:40
def utils_v2.init_database (   db_path)

Utils.

Definition at line 389 of file utils_v2.py.

References estimatePileup.basename, and edm.print().

Referenced by ValidationMatrix_v2.ReleaseComparison.compare().

389 def init_database(db_path):
390  print('Initialising DB: %s...' % basename(db_path), end=' ')
391  conn = sqlite3.connect(db_path)
392 
393  ## Creates tables
394  c = conn.cursor()
395  c.execute("""CREATE TABLE IF NOT EXISTS ReleaseComparison (
396  id INTEGER PRIMARY KEY,
397  title TEXT,
398  release1 TEXT,
399  release2 TEXT,
400  statistical_test TEXT
401  );""")
402  c.execute("""CREATE TABLE IF NOT EXISTS Directory (
403  id INTEGER PRIMARY KEY,
404  name TEXT,
405  parent_id INTEGER,
406  from_histogram_id INTEGER,
407  till_histogram_id INTEGER,
408  FOREIGN KEY (parent_id) REFERENCES Directory(id)
409  FOREIGN KEY (from_histogram_id) REFERENCES HistogramComparison(id)
410  FOREIGN KEY (till_histogram_id) REFERENCES HistogramComparison(id)
411  )""")
412  c.execute("""CREATE TABLE IF NOT EXISTS RootFileComparison (
413  id INTEGER PRIMARY KEY,
414  filename1 TEXT,
415  filename2 TEXT,
416  release_comparison_id INTEGER,
417  directory_id INTEGER,
418  FOREIGN KEY (release_comparison_id) REFERENCES ReleaseComparison(id),
419  FOREIGN KEY (directory_id) REFERENCES Directory(id)
420  )""")
421  c.execute("""CREATE TABLE IF NOT EXISTS HistogramComparison (
422  id INTEGER PRIMARY KEY,
423  name TEXT,
424  p_value REAL,
425  directory_id INTEGER,
426  FOREIGN KEY (directory_id) REFERENCES Directory(id)
427  )""")
428 
429  print('Done.')
430  return db_path
431 
432 
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
def init_database(db_path)
Utils.
Definition: utils_v2.py:389
def utils_v2.is_relvaldata (   files)

--------------------— Make file pairs -----------------------—

Definition at line 102 of file utils_v2.py.

References any().

Referenced by get_version(), and make_file_pairs().

102 def is_relvaldata(files):
103  is_relvaldata_re = re.compile('_RelVal_')
104  return any([is_relvaldata_re.search(filename) for filename in files])
105 
bool any(const std::vector< T > &v, const T &what)
Definition: ECalSD.cc:37
def is_relvaldata(files)
--------------------— Make file pairs -----------------------—
Definition: utils_v2.py:102
def utils_v2.make_file_pairs (   files1,
  files2 
)

Definition at line 106 of file utils_v2.py.

References mps_setup.append, cmsPerfStripChart.dict, cmsRelvalreport.exit, is_relvaldata(), edm.print(), and str.

Referenced by ValidationMatrix_v2.ReleaseComparison.compare().

106 def make_file_pairs(files1, files2):
107  print('\n################# Analyzing files ###################')
108  ## Select functions to use
109  if is_relvaldata(files1):
110  is_relval_data = True
111  get_cmssw_version = get_relvaldata_cmssw_version
112  get_id = get_relvaldata_id
113  get_max_version = get_relvaldata_max_version
114  else:
115  is_relval_data = False
116  get_cmssw_version = get_relval_cmssw_version
117  get_id = get_relval_id
118  get_max_version = get_relval_max_version
119 
120  ## Divide files into groups
121  versions1, versions2 = dict(), dict() # {version1: [file1, file2, ...], version2: [...], ...}
122  for files, versions in (files1, versions1), (files2, versions2):
123  for file in files:
124  version = get_cmssw_version(file)
125  if version:
126  if version in versions:
127  versions[version].append(file)
128  else:
129  versions[version] = [file]
130 
131  ## Print the division into groups
132  print('For RELEASE1 found file groups:')
133  for version in versions1:
134  print(' %s: %d files' % (str(version), len(versions1[version])))
135  if not versions1:
136  print('None.')
137 
138  print('\nFor RELEASE2 found file groups:')
139  for version in versions2:
140  print(' %s: %d files' % (str(version), len(versions2[version])))
141  if not versions2:
142  print('None.')
143 
144  if not len(versions1) or not len(versions2):
145  print('\nNot enough file groups. Exiting...\n')
146  exit()
147 
148  ## Pair till you find pairs.
149  pairs = []
150  for v1 in sorted(versions1, key=lambda x: len(versions1[x]), reverse=True):
151  for v2 in sorted(versions2, key=lambda x: len(versions2[x]), reverse=True):
152  if v1 == v2:
153  continue
154  ## Print the groups.
155  print('\n################# Pairing the files ###################')
156  print('%s (%d files) VS %s (%d files):\n' % (str(v1),
157  len(versions1[v1]), str(v2), len(versions2[v2])))
158 
159  ## Pairing two versions
160  for unique_id in set([get_id(file) for file in versions1[v1]]):
161  if is_relval_data:
162  dataset_re = re.compile(unique_id[0] + '_')
163  run_re = re.compile(unique_id[1])
164  c1_files = [file for file in versions1[v1] if dataset_re.search(file) and run_re.search(file)]
165  c2_files = [file for file in versions2[v2] if dataset_re.search(file) and run_re.search(file)]
166  else:
167  dataset_re = re.compile(unique_id + '_')
168  c1_files = [file for file in versions1[v1] if dataset_re.search(file)]
169  c2_files = [file for file in versions2[v2] if dataset_re.search(file)]
170 
171  if len(c1_files) > 0 and len(c2_files) > 0:
172  first_file = get_max_version(c1_files)
173  second_file = get_max_version(c2_files)
174  print('%s\n%s\n' % (first_file, second_file))
175  pairs.append((first_file, second_file))
176 
177  print("Got %d pairs." % (len(pairs)))
178  if pairs:
179  return pairs
180  print('Found no file pairs. Exiting..\n')
181  exit()
182 
def make_file_pairs(files1, files2)
Definition: utils_v2.py:106
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
def is_relvaldata(files)
--------------------— Make file pairs -----------------------—
Definition: utils_v2.py:102
#define str(s)
def utils_v2.recursive_search_online (   url,
  rel1,
  frags1,
  rel2,
  frags2 
)
Recursively searches for files, that matches the pattern.

Definition at line 214 of file utils_v2.py.

References Vispa.Plugins.EdmBrowser.EdmDataAccessor.all(), auth_wget(), cmsPerfStripChart.dict, join(), and edm.print().

Referenced by ValidationMatrix_v2.ReleaseComparison.compare().

214 def recursive_search_online(url, rel1, frags1, rel2, frags2):
215  """Recursively searches for files, that matches the pattern."""
216  if not url:
217  url = 'https://cmsweb.cern.ch/dqm/relval/data/browse/ROOT/RelValData/'
218  g1, g2 = recursive_search_online(url, rel1, frags1, rel2, frags2)
219  url = 'https://cmsweb.cern.ch/dqm/relval/data/browse/ROOT/RelVal/'
220  g3, g4 = recursive_search_online(url, rel1, frags1, rel2, frags2)
221  g1.update(g3), g2.update(g4)
222  return g1, g2
223 
224  domain = '://'.join(urlparse(url)[:2])
225 
226  ## Compile regular expressions
227  href_re = re.compile(r"<a href='([-./\w]*)'>([-./\w]*)<")
228 
229  def compile_res(rel, frags):
230  frags = frags.split(',')
231  regexps = [s for s in frags if not s.startswith('!')]
232  regexps += ['^((?%s).)*$' % s for s in frags if s.startswith('!')]
233  regexps += [rel + '-', '.root']
234  return [re.compile(r) for r in regexps]
235 
236  res1 = compile_res(rel1, frags1)
237  res2 = compile_res(rel2, frags2)
238 
239  ## Recursively find files that matches regular expressions
240  hrefs = [(name, path) for path, name in href_re.findall(auth_wget(url))[1:]]
241  files_with_urls1, files_with_urls2 = dict(), dict()
242  for name, path in hrefs:
243  if splitext(name)[1]: # If file
244  if all([r.search(name) for r in res1]):
245  files_with_urls1[name] = domain + path
246  if all([r.search(name) for r in res2]):
247  files_with_urls2[name] = domain + path
248  else:
249  print(domain + path)
250  new_hrefs = href_re.findall(auth_wget(domain + path))[1:]
251  hrefs.extend([(name, path) for path, name in new_hrefs])
252  return files_with_urls1, files_with_urls2
253 
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
def recursive_search_online(url, rel1, frags1, rel2, frags2)
Definition: utils_v2.py:214
def auth_wget(url)
-----------------— Recursife file downloader --------------------—
Definition: utils_v2.py:184
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def utils_v2.search_on_disk (   work_path,
  rel1,
  frags1,
  rel2,
  frags2 
)

Definition at line 254 of file utils_v2.py.

References Vispa.Plugins.EdmBrowser.EdmDataAccessor.all(), cmsRelvalreport.exit, and edm.print().

Referenced by ValidationMatrix_v2.ReleaseComparison.compare().

254 def search_on_disk(work_path, rel1, frags1, rel2, frags2):
255  if not work_path:
256  print('No working directory specified. Use "--dir DIR" option to ' +\
257  'specify working directory. Exiting...')
258  exit()
259  ## Compile regular expressions
260  def compile_res(rel, frags):
261  frags = frags.split(',')
262  regexps = [s for s in frags if not s.startswith('!')]
263  regexps += ['^((?%s).)*$' % s for s in frags if s.startswith('!')]
264  regexps += [rel + '-', '.root']
265  return [re.compile(r) for r in regexps]
266 
267  res1 = compile_res(rel1, frags1)
268  res2 = compile_res(rel2, frags2)
269 
270  ## Recursively find files that matches regular expressions
271  files = listdir(work_path)
272  files1, files2 = [], []
273  for name in files:
274  if splitext(name)[1]:
275  if all([r.search(name) for r in res1]):
276  files1.append(name)
277  if all([r.search(name) for r in res2]):
278  files2.append(name)
279  return files1, files2
280 
281 
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
def search_on_disk(work_path, rel1, frags1, rel2, frags2)
Definition: utils_v2.py:254
def utils_v2.show_status_bar (   total_size)
Shows download status.

Definition at line 478 of file utils_v2.py.

478 def show_status_bar(total_size):
479  """Shows download status."""
480  q = show_status_bar.q
481  total_size = total_size / (1024*1024)
482  downloaded = 0
483  while downloaded < total_size:
484  try:
485  o = q.get(timeout=20)
486  downloaded += 1
487  print('\r %d/%d MB %d%% ' % (downloaded, total_size, 100*downloaded/total_size), end=' ')
488  sys.stdout.flush()
489  except Empty:
490  time.sleep(1)
491  break
492 
def show_status_bar(total_size)
Definition: utils_v2.py:478
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66

Variable Documentation

utils_v2.comparison_errors

Exception definitions.

Definition at line 283 of file utils_v2.py.

utils_v2.tests