3 Help functions for ValidationMatrix_v2.py. 5 Author: Albertas Gimbutas, Vilnius University (LT) 6 e-mail: albertasgim@gmail.com 8 from __future__
import print_function
9 from __future__
import absolute_import
10 from builtins
import range
15 from datetime
import datetime
16 from multiprocessing
import Pool, Queue, Process
18 from optparse
import OptionParser, OptionGroup
19 from os
import makedirs, listdir
20 from os.path
import basename, dirname, isfile, splitext, join, exists, getsize
21 from Queue
import Empty
22 from urllib2
import build_opener, Request, HTTPError
23 from urlparse
import urlparse
24 from httplib
import BadStatusLine
27 from Utilities.RelMon.authentication
import X509CertOpen
29 from .authentication
import X509CertOpen
33 """Returns unique relvaldata ID for a given file.""" 34 run_id = re.search(
'R\d{9}', file)
35 run = re.search(
'_RelVal_([\w\d]*)-v\d__', file)
37 run = re.search(
'GR_R_\d*_V\d*C?_([\w\d]*)-v\d__', file)
39 return (run_id.group(), run.group(1))
43 """Returns tuple (CMSSW release, GR_R version) for specified RelValData file.""" 44 cmssw_release = re.findall(
'(CMSSW_\d*_\d*_\d*(?:_[\w\d]*)?)-', file)
45 gr_r_version = re.findall(
'-(GR_R_\d*_V\d*\w?)(?:_RelVal)?_', file)
47 gr_r_version = re.findall(
'CMSSW_\d*_\d*_\d*(?:_[\w\d]*)?-(\w*)_RelVal_', file)
48 if cmssw_release
and gr_r_version:
49 return (cmssw_release[0], gr_r_version[0])
52 """Returns tuple (CMSSW version, run version) for specified file.""" 53 cmssw_version = re.findall(
'DQM_V(\d*)_', file)
54 run_version = re.findall(
'_RelVal_[\w\d]*-v(\d)__', file)
56 run_version = re.findall(
'GR_R_\d*_V\d*C?_[\w\d]*-v(\d)__', file)
57 if cmssw_version
and run_version:
58 return (
int(cmssw_version[0]),
int(run_version[0]))
61 """Returns file with maximum version at a) beggining of the file, 62 e.g. DQM_V000M b) at the end of run, e.g. _run2012-vM. M has to be max.""" 67 if file_v[1] > max_v[1]
or ((file_v[1] == max_v[1])
and (file_v[0] > max_v[0])):
74 """Returns tuple (CMSSW version, run version) for specified file.""" 75 cmssw_version = re.findall(
'DQM_V(\d*)_', file)
76 run_version = re.findall(
'CMSSW_\d*_\d*_\d*(?:_[\w\d]*)?-[\w\d]*_V\d*\w?(?:_[\w\d]*)?-v(\d*)__', file)
77 if cmssw_version
and run_version:
78 return (
int(cmssw_version[0]),
int(run_version[0]))
81 """Returns file with maximum version at a) beggining of the file, 82 e.g. DQM_V000M b) at the end of run, e.g. _run2012-vM. M has to be max.""" 87 if file_v[1] > max_v[1]
or ((file_v[1] == max_v[1])
and (file_v[0] > max_v[0])):
93 cmssw_release = re.findall(
'(CMSSW_\d*_\d*_\d*(?:_[\w\d]*)?)-', file)
94 gr_r_version = re.findall(
'CMSSW_\d*_\d*_\d*(?:_[\w\d]*)?-([\w\d]*)_V\d*\w?(_[\w\d]*)?-v', file)
95 if cmssw_release
and gr_r_version:
96 return (cmssw_release[0], gr_r_version[0])
99 """Returns unique relval ID (dataset name) for a given file.""" 100 dataset_name = re.findall(
'R\d{9}__([\w\d]*)__CMSSW_', file)
101 return dataset_name[0]
105 is_relvaldata_re = re.compile(
'_RelVal_')
106 return any([is_relvaldata_re.search(filename)
for filename
in files])
109 print(
'\n################# Analyzing files ###################')
112 is_relval_data =
True 113 get_cmssw_version = get_relvaldata_cmssw_version
114 get_id = get_relvaldata_id
115 get_max_version = get_relvaldata_max_version
117 is_relval_data =
False 118 get_cmssw_version = get_relval_cmssw_version
119 get_id = get_relval_id
120 get_max_version = get_relval_max_version
123 versions1, versions2 = dict(), dict()
124 for files, versions
in (files1, versions1), (files2, versions2):
126 version = get_cmssw_version(file)
128 if version
in versions:
129 versions[version].
append(file)
131 versions[version] = [file]
134 print(
'For RELEASE1 found file groups:')
135 for version
in versions1:
136 print(
' %s: %d files' % (
str(version), len(versions1[version])))
140 print(
'\nFor RELEASE2 found file groups:')
141 for version
in versions2:
142 print(
' %s: %d files' % (
str(version), len(versions2[version])))
146 if not len(versions1)
or not len(versions2):
147 print(
'\nNot enough file groups. Exiting...\n')
152 for v1
in sorted(versions1, key=
lambda x: len(versions1[x]), reverse=
True):
153 for v2
in sorted(versions2, key=
lambda x: len(versions2[x]), reverse=
True):
157 print(
'\n################# Pairing the files ###################')
158 print(
'%s (%d files) VS %s (%d files):\n' % (
str(v1),
159 len(versions1[v1]),
str(v2), len(versions2[v2])))
162 for unique_id
in set([get_id(file)
for file
in versions1[v1]]):
164 dataset_re = re.compile(unique_id[0] +
'_')
165 run_re = re.compile(unique_id[1])
166 c1_files = [file
for file
in versions1[v1]
if dataset_re.search(file)
and run_re.search(file)]
167 c2_files = [file
for file
in versions2[v2]
if dataset_re.search(file)
and run_re.search(file)]
169 dataset_re = re.compile(unique_id +
'_')
170 c1_files = [file
for file
in versions1[v1]
if dataset_re.search(file)]
171 c2_files = [file
for file
in versions2[v2]
if dataset_re.search(file)]
173 if len(c1_files) > 0
and len(c2_files) > 0:
174 first_file = get_max_version(c1_files)
175 second_file = get_max_version(c2_files)
176 print(
'%s\n%s\n' % (first_file, second_file))
177 pairs.append((first_file, second_file))
179 print(
"Got %d pairs." % (len(pairs)))
182 print(
'Found no file pairs. Exiting..\n')
189 return opener.open(Request(url)).
read()
190 except HTTPError
as e:
191 print(
'\nError: DQM GUI is temporarily unavailable. Probably maintainance hours. '+\
192 'Please try again later. Original error message: ``%s``. \nExiting...\n' % (e,))
194 except BadStatusLine
as e:
195 print(
'\nYou do not have permissions to access DQM GUI. Please check if your certificates '+\
196 'in ``~/.globus`` directory are configured correctly. Exitting...')
201 filename = basename(url)
202 file_path =
join(auth_download_file.work_dir, filename)
204 file = open(file_path,
'wb')
206 url_file = opener.open(Request(url))
207 chunk = url_file.read(chunk_size)
210 auth_download_file.q.put((1,))
211 chunk = url_file.read(chunk_size)
212 print(
'\rDownloaded: %s ' % (filename,))
217 """Recursively searches for files, that matches the pattern.""" 219 url =
'https://cmsweb.cern.ch/dqm/relval/data/browse/ROOT/RelValData/' 221 url =
'https://cmsweb.cern.ch/dqm/relval/data/browse/ROOT/RelVal/' 223 g1.update(g3), g2.update(g4)
226 domain =
'://'.
join(urlparse(url)[:2])
229 href_re = re.compile(
r"<a href='([-./\w]*)'>([-./\w]*)<")
231 def compile_res(rel, frags):
232 frags = frags.split(
',')
233 regexps = [s
for s
in frags
if not s.startswith(
'!')]
234 regexps += [
'^((?%s).)*$' % s
for s
in frags
if s.startswith(
'!')]
235 regexps += [rel +
'-',
'.root']
236 return [re.compile(r)
for r
in regexps]
238 res1 = compile_res(rel1, frags1)
239 res2 = compile_res(rel2, frags2)
242 hrefs = [(name, path)
for path, name
in href_re.findall(
auth_wget(url))[1:]]
243 files_with_urls1, files_with_urls2 = dict(), dict()
244 for name, path
in hrefs:
245 if splitext(name)[1]:
246 if all([r.search(name)
for r
in res1]):
247 files_with_urls1[name] = domain + path
248 if all([r.search(name)
for r
in res2]):
249 files_with_urls2[name] = domain + path
252 new_hrefs = href_re.findall(
auth_wget(domain + path))[1:]
253 hrefs.extend([(name, path)
for path, name
in new_hrefs])
254 return files_with_urls1, files_with_urls2
258 print(
'No working directory specified. Use "--dir DIR" option to ' +\
259 'specify working directory. Exiting...')
262 def compile_res(rel, frags):
263 frags = frags.split(
',')
264 regexps = [s
for s
in frags
if not s.startswith(
'!')]
265 regexps += [
'^((?%s).)*$' % s
for s
in frags
if s.startswith(
'!')]
266 regexps += [rel +
'-',
'.root']
267 return [re.compile(r)
for r
in regexps]
269 res1 = compile_res(rel1, frags1)
270 res2 = compile_res(rel2, frags2)
273 files = listdir(work_path)
274 files1, files2 = [], []
276 if splitext(name)[1]:
277 if all([r.search(name)
for r
in res1]):
279 if all([r.search(name)
for r
in res2]):
281 return files1, files2
285 comparison_errors = {
286 'Missing histogram': -1,
287 'Histograms have different types': -2,
288 'Object is not a histogram': -3,
289 'Ranges of histograms are different': -4
293 def __init__(self, error_message, *args, **kwargs):
298 return 'Comparison Error: %d' % self.
error_code 311 return (x + 1) * (y + 1) * (z + 1)
315 if h.GetBinContent(i) != 0:
322 if not isinstance(h1, type(h2)):
324 if not h1.InheritsFrom(
'TH1'):
337 p_value = super(KolmogorovTest, self).
do_test(h1, h2)
338 if p_value
is not None:
342 if h.GetSumw2().GetSize() == 0:
344 return h1.KolmogorovTest(h2)
351 for i
in range(1, bin_count):
352 content = h.GetBinContent(i)
354 h.SetBinContent(i, -1 * content)
355 if h.GetBinError(i) == 0
and content != 0:
356 h.SetBinContent(i, 0)
360 for i
in range(1, bin_count):
361 if h.GetBinContent(i) > 0:
363 if filled_bins > more_than:
368 p_value = super(Chi2Test, self).
do_test(h1, h2)
369 if p_value
is not None:
383 if h1.InheritsFrom(
"TProfile")
or (h1.GetEntries() != h1.GetSumOfWeights()):
384 return h1.Chi2Test(h2,
'WW')
385 return h1.Chi2Test(h2,
'UU')
388 tests = {KolmogorovTest.name: KolmogorovTest, Chi2Test.name: Chi2Test}
392 print(
'Initialising DB: %s...' % basename(db_path), end=
' ')
393 conn = sqlite3.connect(db_path)
397 c.execute(
"""CREATE TABLE IF NOT EXISTS ReleaseComparison ( 398 id INTEGER PRIMARY KEY, 402 statistical_test TEXT 404 c.execute(
"""CREATE TABLE IF NOT EXISTS Directory ( 405 id INTEGER PRIMARY KEY, 408 from_histogram_id INTEGER, 409 till_histogram_id INTEGER, 410 FOREIGN KEY (parent_id) REFERENCES Directory(id) 411 FOREIGN KEY (from_histogram_id) REFERENCES HistogramComparison(id) 412 FOREIGN KEY (till_histogram_id) REFERENCES HistogramComparison(id) 414 c.execute(
"""CREATE TABLE IF NOT EXISTS RootFileComparison ( 415 id INTEGER PRIMARY KEY, 418 release_comparison_id INTEGER, 419 directory_id INTEGER, 420 FOREIGN KEY (release_comparison_id) REFERENCES ReleaseComparison(id), 421 FOREIGN KEY (directory_id) REFERENCES Directory(id) 423 c.execute(
"""CREATE TABLE IF NOT EXISTS HistogramComparison ( 424 id INTEGER PRIMARY KEY, 427 directory_id INTEGER, 428 FOREIGN KEY (directory_id) REFERENCES Directory(id) 436 """Returns CMSSW and GR_R versions for the given filename.""" 441 version_elems = (relval_version[0], relval_version[1][0], relval_version[1][1])
442 version_elems = [elem.strip(
'_').
strip(
'RelVal_')
for elem
in version_elems]
443 return '___'.
join([elem
for elem
in version_elems
if elem])
447 """Returns file list to download and total size to download.""" 450 files_to_download = []
451 for filename, url
in files_with_urls:
452 url_file = opener.open(Request(url))
453 size =
int(url_file.headers[
"Content-Length"])
454 file_path =
join(work_path, filename)
455 if exists(file_path)
and getsize(file_path) / 1024 == size / 1024:
456 print(
"Exists on disk %s." % filename)
458 size_to_download += size
459 files_to_download.append(url)
460 return size_to_download, files_to_download
463 '''Checks afs file system for space.''' 481 """Shows download status.""" 482 q = show_status_bar.q
483 total_size = total_size / (1024*1024)
485 while downloaded < total_size:
487 o = q.get(timeout=20)
489 print(
'\r %d/%d MB %d%% ' % (downloaded, total_size, 100*downloaded/total_size), end=
' ')
def enough_filled_bins(self, h, bin_count, more_than=3)
def get_version(filename)
def make_file_pairs(files1, files2)
bool any(const std::vector< T > &v, const T &what)
def __init__(self, error_message, args, kwargs)
def do_test(self, h1, h2)
def show_status_bar(total_size)
def auth_download_file(url, chunk_size=1048576)
def do_test(self, h1, h2)
def recursive_search_online(url, rel1, frags1, rel2, frags2)
def get_relval_max_version(files)
def get_size_to_download(work_path, files_with_urls)
def auth_wget(url)
-----------------— Recursife file downloader --------------------—
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
def get_relval_cmssw_version(file)
def search_on_disk(work_path, rel1, frags1, rel2, frags2)
static std::string join(char **cmd)
def is_relvaldata(files)
--------------------— Make file pairs -----------------------—
def get_relvaldata_cmssw_version(file)
def get_relvaldata_max_version(files)
def get_relvaldata_version(file)
def check_disk_for_space(work_path, size_needed)
def init_database(db_path)
Utils.
def get_relvaldata_id(file)
-----------—— Make files pairs: RelValData utils --------------——
def get_relval_version(file)
-------------—— Make files pairs: RelVal utils ---------------——
def do_test(self, h1, h2)
def make_absolute(self, h, bin_count)