1 from __future__
import print_function
2 from __future__
import absolute_import
14 from builtins
import range
19 from cPickle
import load
20 from os.path
import dirname,basename,join,isfile
21 from threading
import Thread
22 from time
import asctime
28 ROOT.gErrorIgnoreLevel=1001
29 ROOT.gROOT.SetBatch(
True)
32 from urllib2
import Request,build_opener,urlopen
34 if "RELMON_SA" in os.environ:
35 from .definitions
import *
36 from .authentication
import X509CertOpen
37 from .utils
import __file__
as this_module_name
40 from Utilities.RelMon.authentication
import X509CertOpen
41 from Utilities.RelMon.utils
import __file__
as this_module_name
48 if msg_level>=_log_level:
49 print(
"[%s] %s" %(asctime(),message))
53 this_dir=
dirname(this_module_name)
54 this_dir_one_up=this_dir[:this_dir.rfind(
"/")+1]
57 if "RELMON_SA" in os.environ:
58 style_file=this_dir_one_up+
"data/tdrstyle_mod.C" 60 style_file=
"%s/src/Utilities/RelMon/data/tdrstyle_mod.C"%(os.environ[
"CMSSW_BASE"])
62 gROOT.ProcessLine(
".L %s" %style_file)
63 gROOT.ProcessLine(
"setTDRStyle()")
65 "Print could not set the TDR style. File %s not found?" %style_file
70 bitsarray = array.array(
'B')
71 bitsarray.fromstring(literal.decode(
'hex'))
75 tbuffer = TBufferFile(TBufferFile.kRead, len(bitsarray), bitsarray,
False,0)
77 print(
"could not transform to object array:")
78 print([ i
for i
in bitsarray ])
81 if rootType ==
'TPROF':
83 if rootType ==
'TPROF2D':
84 rootType =
'TProfile2D' 86 root_class=eval(rootType+
'.Class()')
88 return tbuffer.ReadObject(root_class)
94 To be used in loops on bin number with range() 95 For each dimension there are GetNbinsX()+2 bins including underflow 96 and overflow, and range() loops starts from 0. So the total number 97 of bins as upper limit of a range() loop already includes the next 104 return (h.GetNbinsX()+2)*(biny)*(binz)
128 logger(1,
"*** ERROR: object types in comparison don't match: %s!=%s" %(type1,type2))
129 self.
rank=test_codes[
"DIFF_TYPES"]
130 elif not self.h2.InheritsFrom(
"TH1"):
131 logger(1,
"*** ERROR: object type is not histogram but a %s" %(type1))
132 self.
rank=test_codes[
"NO_HIST"]
140 are_empty=is_empty1
and is_empty2
141 one_empty=is_empty1
or is_empty2
156 return test_codes[
"DIFF_BIN"]
166 logger(0,
"+++ Test %s FAILED: rank is %s and threshold is %s ==> %s" %(self.
name, self.
rank, self.
threshold, status))
179 if h.GetBinContent(i)!=0:
return False 188 for ibin
in range(0,nbins+2):
189 if h.GetBinContent(ibin)>0:
192 if filled_bins/nbins < .5:
201 StatisticalTest.__init__(self,threshold)
207 for h
in self.
h1,self.
h2:
222 return self.h1.KolmogorovTest(self.
h2)
227 if not profile.InheritsFrom(
"TH1"):
231 n_bins=profile.GetNbinsX()
233 for ibin
in range(1,n_bins+2):
234 bin_low_edges.append(profile.GetBinLowEdge(ibin))
235 bin_low_edges=array.array(
'f',bin_low_edges)
236 histo=TH1F(profile.GetName(),profile.GetTitle(),n_bins,bin_low_edges)
237 for ibin
in range(0,n_bins+2):
238 histo.SetBinContent(ibin,profile.GetBinContent(ibin))
239 histo.SetBinError(ibin,profile.GetBinError(ibin))
246 StatisticalTest.__init__(self,threshold)
250 nbins=self.h1.GetNbinsX()
252 for h
in self.
h1,self.
h2:
254 for ibin
in range(0,nbins+2):
255 if h.GetBinContent(ibin)>0:
257 n_filled_l.append(nfilled)
258 return len([x
for x
in n_filled_l
if x>=min_filled] )>0
263 for i
in range(0,nbins):
264 for h
in self.
h1,self.
h2:
265 binc=h.GetBinContent(i)
267 h.SetBinContent(i,-1*binc)
268 if h.GetBinError(i)==0
and binc!=0:
273 if histogram.InheritsFrom(
"TProfile")
or (histogram.GetEntries()!=histogram.GetSumOfWeights()):
289 if hist1 ==
'W' and hist2 ==
'W':
290 chi2 = self.h1.Chi2Test(self.
h2,
'WW')
292 elif hist1 ==
'U' and hist2 == 'U': 293 chi2 = self.h1.Chi2Test(self.h2,'UU')
295 elif hist1 ==
'U' and hist2 == 'W':
296 chi2 = self.h1.Chi2Test(self.
h2,
'UW')
298 elif hist1 ==
'W' and hist2 ==
'U': 299 chi2 = self.h2.Chi2Test(self.h1,'UW')
308 """The bin to bin comparison builds a fake pvalue. It is 0 if the number of 309 bins is different. It is % of corresponding bins otherwhise. 310 A threshold of 1 is needed to require a 1 to 1 correspondance between 314 StatisticalTest.__init__(self, threshold)
319 if self.h1.GetNbinsX() != self.h2.GetNbinsX() \
320 or self.h1.GetNbinsY() != self.h2.GetNbinsY() \
321 or self.h1.GetNbinsZ() != self.h2.GetNbinsZ() \
322 or abs(self.h1.GetXaxis().GetXmin() - self.h2.GetXaxis().GetXmin()) >self.
epsilon \
323 or abs(self.h1.GetYaxis().GetXmin() - self.h2.GetYaxis().GetXmin()) >self.
epsilon \
324 or abs(self.h1.GetZaxis().GetXmin() - self.h2.GetZaxis().GetXmin()) >self.
epsilon \
325 or abs(self.h1.GetXaxis().GetXmax() - self.h2.GetXaxis().GetXmax()) >self.
epsilon \
326 or abs(self.h1.GetYaxis().GetXmax() - self.h2.GetYaxis().GetXmax()) >self.
epsilon \
327 or abs(self.h1.GetZaxis().GetXmax() - self.h2.GetZaxis().GetXmax()) >self.
epsilon:
334 return test_codes[
"DIFF_BIN"]
339 for ibin
in range(0, nbins):
340 h1bin=self.h1.GetBinContent(ibin)
341 h2bin=self.h2.GetBinContent(ibin)
344 binavg=.5*(h1bin+h2bin)
350 print(
"Bin %ibin: bindiff %s" %(ibin,bindiff))
358 print(
"Histogram %s differs: nok: %s ntot: %s" %(self.h1.GetName(),n_ok_bins,nbins))
365 """The bin to bin comparison builds a fake pvalue. It is 0 if the number of 366 bins is different. It is % of corresponding bins otherwhise. 367 A threshold of 1 is needed to require a 1 to 1 correspondance between 371 StatisticalTest.__init__(self, threshold)
377 if self.h1.GetNbinsX() != self.h2.GetNbinsX() \
378 or self.h1.GetNbinsY() != self.h2.GetNbinsY() \
379 or self.h1.GetNbinsZ() != self.h2.GetNbinsZ() \
380 or abs(self.h1.GetXaxis().GetXmin() - self.h2.GetXaxis().GetXmin()) >self.
epsilon \
381 or abs(self.h1.GetYaxis().GetXmin() - self.h2.GetYaxis().GetXmin()) >self.
epsilon \
382 or abs(self.h1.GetZaxis().GetXmin() - self.h2.GetZaxis().GetXmin()) >self.
epsilon \
383 or abs(self.h1.GetXaxis().GetXmax() - self.h2.GetXaxis().GetXmax()) >self.
epsilon \
384 or abs(self.h1.GetYaxis().GetXmax() - self.h2.GetYaxis().GetXmax()) >self.
epsilon \
385 or abs(self.h1.GetZaxis().GetXmax() - self.h2.GetZaxis().GetXmax()) >self.
epsilon:
392 return test_codes[
"DIFF_BIN"]
397 for ibin
in range(0,nbins):
399 h1bin=self.h1.GetBinContent(ibin)
400 h2bin=self.h2.GetBinContent(ibin)
403 binavg=.5*(h1bin+h2bin)
405 if binavg==0
or 100*
abs(bindiff)/binavg < self.
tolerance:
409 print(
"-->Bin %i bin: bindiff %s (%s - %s )" %(ibin,bindiff,h1bin,h2bin))
417 print(
"%s nok: %s ntot: %s" %(self.h1.GetName(),n_ok_bins,nbins))
421 Statistical_Tests={
"KS":KS,
424 "BinToBin1percent":BinToBin1percent,
429 def ask_ok(prompt, retries=4, complaint='yes or no'):
431 ok = raw_input(prompt)
432 if ok
in (
'y',
'ye',
'yes'):
434 if ok
in (
'n',
'no'):
436 retries = retries - 1
438 raise IOError(
'refusenik user')
445 Thread.__init__(self)
458 """ Fetch the WHOLE file, not in bunches... To be optimised. 461 datareq = Request(url)
462 datareq.add_header(
'authenticated_wget',
"The ultimate wgetter")
466 print(
"Checking existence of file %s on disk..."%filename)
467 if not isfile(
"./%s"%filename):
468 bin_content=opener.open(datareq).read()
470 print(
"File %s exists, skipping.." %filename)
472 print(
"Error: Unknown url %s" %url)
474 if bin_content!=
None:
475 ofile = open(filename,
'wb')
476 ofile.write(bin_content)
483 """Returns unique relvaldata ID for a given file.""" 484 run_id = re.search(
'R\d{9}', file)
485 run = re.search(
'_RelVal_([\w\d]*)-v\d__', file)
487 run = re.search(
'GR_R_\d*_V\d*C?_([\w\d]*)-v\d__', file)
489 return (run_id.group(), run.group(1))
493 """Returns tuple (CMSSW release, GR_R version) for specified RelValData file.""" 494 cmssw_release = re.findall(
'(CMSSW_\d*_\d*_\d*(?:_[\w\d]*)?)-', file)
495 gr_r_version = re.findall(
'-(GR_R_\d*_V\d*\w?)(?:_RelVal)?_', file)
497 gr_r_version = re.findall(
'CMSSW_\d*_\d*_\d*(?:_[\w\d]*)?-(\w*)_RelVal_', file)
498 if cmssw_release
and gr_r_version:
499 return (cmssw_release[0], gr_r_version[0])
502 """Returns tuple (CMSSW version, run version) for specified file.""" 503 cmssw_version = re.findall(
'DQM_V(\d*)_', file)
504 run_version = re.findall(
'_RelVal_[\w\d]*-v(\d)__', file)
506 run_version = re.findall(
'GR_R_\d*_V\d*C?_[\w\d]*-v(\d)__', file)
507 if cmssw_version
and run_version:
508 return (
int(cmssw_version[0]),
int(run_version[0]))
511 """Returns file with maximum version at a) beggining of the file, 512 e.g. DQM_V000M b) at the end of run, e.g. _run2012-vM. M has to be max.""" 517 if file_v[1] > max_v[1]
or ((file_v[1] == max_v[1])
and (file_v[0] > max_v[0])):
524 """Returns tuple (CMSSW version, run version) for specified file.""" 525 cmssw_version = re.findall(
'DQM_V(\d*)_', file)
526 run_version = re.findall(
'CMSSW_\d*_\d*_\d*(?:_[\w\d]*)?-[\w\d]*_V\d*\w?(?:_[\w\d]*)?-v(\d*)__', file)
527 if cmssw_version
and run_version:
528 return (
int(cmssw_version[0]),
int(run_version[0]))
531 """Returns file with maximum version at a) beggining of the file, 532 e.g. DQM_V000M b) at the end of run, e.g. _run2012-vM. M has to be max.""" 537 if file_v[1] > max_v[1]
or ((file_v[1] == max_v[1])
and (file_v[0] > max_v[0])):
543 cmssw_release = re.findall(
'(CMSSW_\d*_\d*_\d*(?:_[\w\d]*)?)-', file)
544 gr_r_version = re.findall(
'CMSSW_\d*_\d*_\d*(?:_[\w\d]*)?-([\w\d]*)_V\d*\w?(_[\w\d]*)?-v', file)
545 if cmssw_release
and gr_r_version:
546 if "PU" in gr_r_version[0][0]
and not "FastSim" in file:
547 __gt = re.sub(
'^[^_]*_',
"", gr_r_version[0][0])
548 __process_string = gr_r_version[0][1]
549 return (__gt, __process_string)
550 elif "PU" in gr_r_version[0][0]
and "FastSim" in file:
551 return (cmssw_release[0],
"PU_")
552 return (cmssw_release[0], gr_r_version[0])
555 """Returns unique relval ID (dataset name) for a given file.""" 556 dataset_name = re.findall(
'R\d{9}__([\w\D]*)__CMSSW_', file)
557 __process_string = re.search(
'CMSSW_\d*_\d*_\d*(?:_[\w\d]*)?-([\w\d]*)_V\d*\w?(_[\w\d]*)?-v', file)
560 if "PU" in __process_string.group(1)
and not "FastSim" in file:
561 _ps = re.search(
'^[^_]*_', __process_string.group(1)).
group()
562 elif "PU" in __process_string.group(1)
and "FastSim" in file:
563 return dataset_name[0]+
"_", _ps
564 return dataset_name[0], _ps
568 is_relvaldata_re = re.compile(
'_RelVal_')
569 return any([is_relvaldata_re.search(filename)
for filename
in files])
574 is_relval_data =
True 575 get_cmssw_version = get_relvaldata_cmssw_version
576 get_id = get_relvaldata_id
577 get_max_version = get_relvaldata_max_version
580 is_relval_data =
False 581 get_cmssw_version = get_relval_cmssw_version
582 get_id = get_relval_id
583 get_max_version = get_relval_max_version
587 versions_files =
dict()
589 version = get_cmssw_version(file)
590 if version
in versions_files:
591 versions_files[version].
append(file)
593 versions_files[version] = [file]
597 print(
'\nFound versions:')
598 for version
in versions_files:
599 print(
'%s: %d files' % (
str(version), len(versions_files[version])))
601 if len(versions_files) <= 1:
602 print(
'\nFound too little versions, there is nothing to pair. Exiting...\n')
606 versions = versions_files.keys()
607 sizes = [len(value)
for value
in versions_files.values()]
608 v1 = versions[sizes.index(
max(sizes))]
610 sizes.remove(
max(sizes))
611 v2 = versions[sizes.index(
max(sizes))]
615 print(
'\nPairing %s (%d files) and %s (%d files)' % (
str(v1),
616 len(versions_files[v1]),
str(v2), len(versions_files[v2])))
619 print(
'\nGot pairs:')
621 for unique_id
in set([get_id(file)
for file
in versions_files[v1]]):
623 dataset_re = re.compile(unique_id[0]+
'_')
624 run_re = re.compile(unique_id[1])
625 c1_files = [file
for file
in versions_files[v1]
if dataset_re.search(file)
and run_re.search(file)]
626 c2_files = [file
for file
in versions_files[v2]
if dataset_re.search(file)
and run_re.search(file)]
628 dataset_re = re.compile(unique_id[0]+
'_')
629 ps_re = re.compile(unique_id[1])
631 c1_files = [file
for file
in versions_files[v1]
if dataset_re.search(file)
and ps_re.search(file)]
632 c2_files = [file
for file
in versions_files[v2]
if dataset_re.search(file)
and ps_re.search(file)]
634 if len(c1_files) > 0
and len(c2_files) > 0:
635 first_file = get_max_version(c1_files)
636 second_file = get_max_version(c2_files)
637 print(
'%s\n%s\n' % (first_file, second_file))
638 pairs.extend((first_file, second_file))
640 print(
"Paired and got %d files.\n" % len(pairs))
def checkBinningMatches(self)
def checkBinningMatches(self)
bool any(const std::vector< T > &v, const T &what)
def get_relvaldata_version(file)
def __init__(self, threshold)
S & print(S &os, JobReport::InputFile const &f)
def check_filled_bins(self, min_filled)
def get_relval_version(file)
-------------—— Make files pairs: RelVal utils ---------------——
def ask_ok(prompt, retries=4, complaint='yes or no')
def set_operands(self, h1, h2)
Abs< T >::type abs(const T &t)
def profile2histo(profile)
def __init__(self, threshold)
def __init__(self, filename)
def __init__(self, threshold=1)
def is_relvaldata(files)
----------------------— Make files pairs -----------------------—
def make_files_pairs(files, verbose=True)
rank
2D! return test_codes["2D"]
def check_histograms(self, histogram)
def get_relvaldata_id(file)
-----------—— Make files pairs: RelValData utils --------------——
def __init__(self, threshold)
def __init__(self, threshold=1)
def get_relval_max_version(files)
def literal2root(literal, rootType)
def logger(msg_level, message)
def get_relvaldata_cmssw_version(file)
def get_relval_cmssw_version(file)
def get_relvaldata_max_version(files)