3 Help functions for ValidationMatrix_v2.py. 5 Author: Albertas Gimbutas, Vilnius University (LT) 6 e-mail: albertasgim@gmail.com 8 from __future__
import print_function
13 from datetime
import datetime
14 from multiprocessing
import Pool, Queue, Process
16 from optparse
import OptionParser, OptionGroup
17 from os
import makedirs, listdir
18 from os.path
import basename, dirname, isfile, splitext, join, exists, getsize
19 from Queue
import Empty
20 from urllib2
import build_opener, Request, HTTPError
21 from urlparse
import urlparse
22 from httplib
import BadStatusLine
25 from Utilities.RelMon.authentication
import X509CertOpen
27 from authentication
import X509CertOpen
31 """Returns unique relvaldata ID for a given file.""" 32 run_id = re.search(
'R\d{9}', file)
33 run = re.search(
'_RelVal_([\w\d]*)-v\d__', file)
35 run = re.search(
'GR_R_\d*_V\d*C?_([\w\d]*)-v\d__', file)
37 return (run_id.group(), run.group(1))
41 """Returns tuple (CMSSW release, GR_R version) for specified RelValData file.""" 42 cmssw_release = re.findall(
'(CMSSW_\d*_\d*_\d*(?:_[\w\d]*)?)-', file)
43 gr_r_version = re.findall(
'-(GR_R_\d*_V\d*\w?)(?:_RelVal)?_', file)
45 gr_r_version = re.findall(
'CMSSW_\d*_\d*_\d*(?:_[\w\d]*)?-(\w*)_RelVal_', file)
46 if cmssw_release
and gr_r_version:
47 return (cmssw_release[0], gr_r_version[0])
50 """Returns tuple (CMSSW version, run version) for specified file.""" 51 cmssw_version = re.findall(
'DQM_V(\d*)_', file)
52 run_version = re.findall(
'_RelVal_[\w\d]*-v(\d)__', file)
54 run_version = re.findall(
'GR_R_\d*_V\d*C?_[\w\d]*-v(\d)__', file)
55 if cmssw_version
and run_version:
56 return (
int(cmssw_version[0]),
int(run_version[0]))
59 """Returns file with maximum version at a) beggining of the file, 60 e.g. DQM_V000M b) at the end of run, e.g. _run2012-vM. M has to be max.""" 65 if file_v[1] > max_v[1]
or ((file_v[1] == max_v[1])
and (file_v[0] > max_v[0])):
72 """Returns tuple (CMSSW version, run version) for specified file.""" 73 cmssw_version = re.findall(
'DQM_V(\d*)_', file)
74 run_version = re.findall(
'CMSSW_\d*_\d*_\d*(?:_[\w\d]*)?-[\w\d]*_V\d*\w?(?:_[\w\d]*)?-v(\d*)__', file)
75 if cmssw_version
and run_version:
76 return (
int(cmssw_version[0]),
int(run_version[0]))
79 """Returns file with maximum version at a) beggining of the file, 80 e.g. DQM_V000M b) at the end of run, e.g. _run2012-vM. M has to be max.""" 85 if file_v[1] > max_v[1]
or ((file_v[1] == max_v[1])
and (file_v[0] > max_v[0])):
91 cmssw_release = re.findall(
'(CMSSW_\d*_\d*_\d*(?:_[\w\d]*)?)-', file)
92 gr_r_version = re.findall(
'CMSSW_\d*_\d*_\d*(?:_[\w\d]*)?-([\w\d]*)_V\d*\w?(_[\w\d]*)?-v', file)
93 if cmssw_release
and gr_r_version:
94 return (cmssw_release[0], gr_r_version[0])
97 """Returns unique relval ID (dataset name) for a given file.""" 98 dataset_name = re.findall(
'R\d{9}__([\w\d]*)__CMSSW_', file)
99 return dataset_name[0]
103 is_relvaldata_re = re.compile(
'_RelVal_')
104 return any([is_relvaldata_re.search(filename)
for filename
in files])
107 print(
'\n################# Analyzing files ###################')
110 is_relval_data =
True 111 get_cmssw_version = get_relvaldata_cmssw_version
112 get_id = get_relvaldata_id
113 get_max_version = get_relvaldata_max_version
115 is_relval_data =
False 116 get_cmssw_version = get_relval_cmssw_version
117 get_id = get_relval_id
118 get_max_version = get_relval_max_version
121 versions1, versions2 =
dict(),
dict()
122 for files, versions
in (files1, versions1), (files2, versions2):
124 version = get_cmssw_version(file)
126 if version
in versions:
127 versions[version].
append(file)
129 versions[version] = [file]
132 print(
'For RELEASE1 found file groups:')
133 for version
in versions1:
134 print(
' %s: %d files' % (
str(version), len(versions1[version])))
138 print(
'\nFor RELEASE2 found file groups:')
139 for version
in versions2:
140 print(
' %s: %d files' % (
str(version), len(versions2[version])))
144 if not len(versions1)
or not len(versions2):
145 print(
'\nNot enough file groups. Exiting...\n')
150 for v1
in sorted(versions1, key=
lambda x: len(versions1[x]), reverse=
True):
151 for v2
in sorted(versions2, key=
lambda x: len(versions2[x]), reverse=
True):
155 print(
'\n################# Pairing the files ###################')
156 print(
'%s (%d files) VS %s (%d files):\n' % (
str(v1),
157 len(versions1[v1]),
str(v2), len(versions2[v2])))
160 for unique_id
in set([get_id(file)
for file
in versions1[v1]]):
162 dataset_re = re.compile(unique_id[0] +
'_')
163 run_re = re.compile(unique_id[1])
164 c1_files = [file
for file
in versions1[v1]
if dataset_re.search(file)
and run_re.search(file)]
165 c2_files = [file
for file
in versions2[v2]
if dataset_re.search(file)
and run_re.search(file)]
167 dataset_re = re.compile(unique_id +
'_')
168 c1_files = [file
for file
in versions1[v1]
if dataset_re.search(file)]
169 c2_files = [file
for file
in versions2[v2]
if dataset_re.search(file)]
171 if len(c1_files) > 0
and len(c2_files) > 0:
172 first_file = get_max_version(c1_files)
173 second_file = get_max_version(c2_files)
174 print(
'%s\n%s\n' % (first_file, second_file))
175 pairs.append((first_file, second_file))
177 print(
"Got %d pairs." % (len(pairs)))
180 print(
'Found no file pairs. Exiting..\n')
187 return opener.open(Request(url)).read()
188 except HTTPError
as e:
189 print(
'\nError: DQM GUI is temporarily unavailable. Probably maintainance hours. '+\
190 'Please try again later. Original error message: ``%s``. \nExiting...\n' % (e,))
192 except BadStatusLine
as e:
193 print(
'\nYou do not have permissions to access DQM GUI. Please check if your certificates '+\
194 'in ``~/.globus`` directory are configured correctly. Exitting...')
200 file_path =
join(auth_download_file.work_dir, filename)
202 file = open(file_path,
'wb')
204 url_file = opener.open(Request(url))
205 chunk = url_file.read(chunk_size)
208 auth_download_file.q.put((1,))
209 chunk = url_file.read(chunk_size)
210 print(
'\rDownloaded: %s ' % (filename,))
215 """Recursively searches for files, that matches the pattern.""" 217 url =
'https://cmsweb.cern.ch/dqm/relval/data/browse/ROOT/RelValData/' 219 url =
'https://cmsweb.cern.ch/dqm/relval/data/browse/ROOT/RelVal/' 221 g1.update(g3), g2.update(g4)
224 domain =
'://'.
join(urlparse(url)[:2])
227 href_re = re.compile(
r"<a href='([-./\w]*)'>([-./\w]*)<")
229 def compile_res(rel, frags):
230 frags = frags.split(
',')
231 regexps = [s
for s
in frags
if not s.startswith(
'!')]
232 regexps += [
'^((?%s).)*$' % s
for s
in frags
if s.startswith(
'!')]
233 regexps += [rel +
'-',
'.root']
234 return [re.compile(r)
for r
in regexps]
236 res1 = compile_res(rel1, frags1)
237 res2 = compile_res(rel2, frags2)
240 hrefs = [(name, path)
for path, name
in href_re.findall(
auth_wget(url))[1:]]
241 files_with_urls1, files_with_urls2 =
dict(),
dict()
242 for name, path
in hrefs:
243 if splitext(name)[1]:
244 if all([r.search(name)
for r
in res1]):
245 files_with_urls1[name] = domain + path
246 if all([r.search(name)
for r
in res2]):
247 files_with_urls2[name] = domain + path
250 new_hrefs = href_re.findall(
auth_wget(domain + path))[1:]
251 hrefs.extend([(name, path)
for path, name
in new_hrefs])
252 return files_with_urls1, files_with_urls2
256 print(
'No working directory specified. Use "--dir DIR" option to ' +\
257 'specify working directory. Exiting...')
260 def compile_res(rel, frags):
261 frags = frags.split(
',')
262 regexps = [s
for s
in frags
if not s.startswith(
'!')]
263 regexps += [
'^((?%s).)*$' % s
for s
in frags
if s.startswith(
'!')]
264 regexps += [rel +
'-',
'.root']
265 return [re.compile(r)
for r
in regexps]
267 res1 = compile_res(rel1, frags1)
268 res2 = compile_res(rel2, frags2)
271 files = listdir(work_path)
272 files1, files2 = [], []
274 if splitext(name)[1]:
275 if all([r.search(name)
for r
in res1]):
277 if all([r.search(name)
for r
in res2]):
279 return files1, files2
283 comparison_errors = {
284 'Missing histogram': -1,
285 'Histograms have different types': -2,
286 'Object is not a histogram': -3,
287 'Ranges of histograms are different': -4
291 def __init__(self, error_message, *args, **kwargs):
296 return 'Comparison Error: %d' % self.
error_code 309 return (x + 1) * (y + 1) * (z + 1)
313 if h.GetBinContent(i) != 0:
320 if not isinstance(h1, type(h2)):
322 if not h1.InheritsFrom(
'TH1'):
335 p_value = super(KolmogorovTest, self).
do_test(h1, h2)
336 if p_value
is not None:
340 if h.GetSumw2().GetSize() == 0:
342 return h1.KolmogorovTest(h2)
349 for i
in xrange(1, bin_count):
350 content = h.GetBinContent(i)
352 h.SetBinContent(i, -1 * content)
353 if h.GetBinError(i) == 0
and content != 0:
354 h.SetBinContent(i, 0)
358 for i
in xrange(1, bin_count):
359 if h.GetBinContent(i) > 0:
361 if filled_bins > more_than:
366 p_value = super(Chi2Test, self).
do_test(h1, h2)
367 if p_value
is not None:
381 if h1.InheritsFrom(
"TProfile")
or (h1.GetEntries() != h1.GetSumOfWeights()):
382 return h1.Chi2Test(h2,
'WW')
383 return h1.Chi2Test(h2,
'UU')
386 tests = {KolmogorovTest.name: KolmogorovTest, Chi2Test.name: Chi2Test}
391 conn = sqlite3.connect(db_path)
395 c.execute(
"""CREATE TABLE IF NOT EXISTS ReleaseComparison ( 396 id INTEGER PRIMARY KEY, 400 statistical_test TEXT 402 c.execute(
"""CREATE TABLE IF NOT EXISTS Directory ( 403 id INTEGER PRIMARY KEY, 406 from_histogram_id INTEGER, 407 till_histogram_id INTEGER, 408 FOREIGN KEY (parent_id) REFERENCES Directory(id) 409 FOREIGN KEY (from_histogram_id) REFERENCES HistogramComparison(id) 410 FOREIGN KEY (till_histogram_id) REFERENCES HistogramComparison(id) 412 c.execute(
"""CREATE TABLE IF NOT EXISTS RootFileComparison ( 413 id INTEGER PRIMARY KEY, 416 release_comparison_id INTEGER, 417 directory_id INTEGER, 418 FOREIGN KEY (release_comparison_id) REFERENCES ReleaseComparison(id), 419 FOREIGN KEY (directory_id) REFERENCES Directory(id) 421 c.execute(
"""CREATE TABLE IF NOT EXISTS HistogramComparison ( 422 id INTEGER PRIMARY KEY, 425 directory_id INTEGER, 426 FOREIGN KEY (directory_id) REFERENCES Directory(id) 434 """Returns CMSSW and GR_R versions for the given filename.""" 439 version_elems = (relval_version[0], relval_version[1][0], relval_version[1][1])
440 version_elems = [elem.strip(
'_').
strip(
'RelVal_')
for elem
in version_elems]
441 return '___'.
join([elem
for elem
in version_elems
if elem])
445 """Returns file list to download and total size to download.""" 448 files_to_download = []
449 for filename, url
in files_with_urls:
450 url_file = opener.open(Request(url))
451 size =
int(url_file.headers[
"Content-Length"])
452 file_path =
join(work_path, filename)
453 if exists(file_path)
and getsize(file_path) / 1024 == size / 1024:
454 print(
"Exists on disk %s." % filename)
456 size_to_download += size
457 files_to_download.append(url)
458 return size_to_download, files_to_download
461 '''Checks afs file system for space.''' 479 """Shows download status.""" 480 q = show_status_bar.q
481 total_size = total_size / (1024*1024)
483 while downloaded < total_size:
485 o = q.get(timeout=20)
487 print(
'\r %d/%d MB %d%% ' % (downloaded, total_size, 100*downloaded/total_size), end=
' ')
def enough_filled_bins(self, h, bin_count, more_than=3)
def get_version(filename)
def make_file_pairs(files1, files2)
bool any(const std::vector< T > &v, const T &what)
def __init__(self, error_message, args, kwargs)
def do_test(self, h1, h2)
def show_status_bar(total_size)
def auth_download_file(url, chunk_size=1048576)
S & print(S &os, JobReport::InputFile const &f)
def do_test(self, h1, h2)
def recursive_search_online(url, rel1, frags1, rel2, frags2)
def get_relval_max_version(files)
def get_size_to_download(work_path, files_with_urls)
def auth_wget(url)
-----------------— Recursife file downloader --------------------—
def get_relval_cmssw_version(file)
def search_on_disk(work_path, rel1, frags1, rel2, frags2)
static std::string join(char **cmd)
def is_relvaldata(files)
--------------------— Make file pairs -----------------------—
def get_relvaldata_cmssw_version(file)
def get_relvaldata_max_version(files)
def get_relvaldata_version(file)
def check_disk_for_space(work_path, size_needed)
def init_database(db_path)
Utils.
def get_relvaldata_id(file)
-----------—— Make files pairs: RelValData utils --------------——
def get_relval_version(file)
-------------—— Make files pairs: RelVal utils ---------------——
def do_test(self, h1, h2)
def make_absolute(self, h, bin_count)