19 from cPickle
import load
20 from os.path
import dirname,basename,join,isfile
21 from threading
import Thread
22 from time
import asctime
28 ROOT.gErrorIgnoreLevel=1001
29 ROOT.gROOT.SetBatch(
True)
32 from urllib2
import Request,build_opener,urlopen
34 if os.environ.has_key(
"RELMON_SA"):
35 from definitions
import *
36 from authentication
import X509CertOpen
37 from utils
import __file__
as this_module_name
40 from Utilities.RelMon.authentication
import X509CertOpen
41 from Utilities.RelMon.utils
import __file__
as this_module_name
48 if msg_level>=_log_level:
49 print "[%s] %s" %(asctime(),message)
53 this_dir=
dirname(this_module_name)
54 this_dir_one_up=this_dir[:this_dir.rfind(
"/")+1]
57 if os.environ.has_key(
"RELMON_SA"):
58 style_file=this_dir_one_up+
"data/tdrstyle_mod.C"
60 style_file=
"%s/src/Utilities/RelMon/data/tdrstyle_mod.C"%(os.environ[
"CMSSW_BASE"])
62 gROOT.ProcessLine(
".L %s" %style_file)
63 gROOT.ProcessLine(
"setTDRStyle()")
65 "Print could not set the TDR style. File %s not found?" %style_file
70 bitsarray = array.array(
'B')
71 bitsarray.fromstring(literal.decode(
'hex'))
75 tbuffer = TBufferFile(TBufferFile.kRead, len(bitsarray), bitsarray,
False,0)
77 print "could not transform to object array:"
78 print [ i
for i
in bitsarray ]
81 if rootType ==
'TPROF':
83 if rootType ==
'TPROF2D':
84 rootType =
'TProfile2D'
86 root_class=eval(rootType+
'.Class()')
88 return tbuffer.ReadObject(root_class)
97 return (h.GetNbinsX()+1)*(biny)*(binz)
121 logger(1,
"*** ERROR: object types in comparison don't match: %s!=%s" %(type1,type2))
122 self.
rank=test_codes[
"DIFF_TYPES"]
123 elif not self.h2.InheritsFrom(
"TH1"):
124 logger(1,
"*** ERROR: object type is not histogram but a %s" %(type1))
125 self.
rank=test_codes[
"NO_HIST"]
133 are_empty=is_empty1
and is_empty2
134 one_empty=is_empty1
or is_empty2
150 return test_codes[
"DIFF_BIN"]
160 logger(0,
"+++ Test %s FAILED: rank is %s and threshold is %s ==> %s" %(self.
name, self.
rank, self.
threshold, status))
173 if h.GetBinContent(i)!=0:
return False
182 for ibin
in xrange(nbins):
183 if h.GetBinContent(ibin)>0:
186 if filled_bins/nbins < .5:
195 StatisticalTest.__init__(self,threshold)
201 for h
in self.
h1,self.
h2:
216 return self.h1.KolmogorovTest(self.
h2)
221 if not profile.InheritsFrom(
"TH1"):
225 n_bins=profile.GetNbinsX()
227 for ibin
in xrange(1,n_bins+2):
228 bin_low_edges.append(profile.GetBinLowEdge(ibin))
229 bin_low_edges=array.array(
'f',bin_low_edges)
230 histo=TH1F(profile.GetName(),profile.GetTitle(),n_bins,bin_low_edges)
231 for ibin
in xrange(0,n_bins+1):
232 histo.SetBinContent(ibin,profile.GetBinContent(ibin))
233 histo.SetBinError(ibin,profile.GetBinError(ibin))
240 StatisticalTest.__init__(self,threshold)
244 nbins=self.h1.GetNbinsX()
246 for h
in self.
h1,self.
h2:
248 for ibin
in xrange(1,nbins+1):
249 if h.GetBinContent(ibin)>0:
251 n_filled_l.append(nfilled)
252 return len(filter (
lambda x:x>=min_filled,n_filled_l) )>0
257 for i
in xrange(1,nbins):
258 for h
in self.
h1,self.
h2:
259 binc=h.GetBinContent(i)
261 h.SetBinContent(i,-1*binc)
262 if h.GetBinError(i)==0
and binc!=0:
267 if histogram.InheritsFrom(
"TProfile")
or (histogram.GetEntries()!=histogram.GetSumOfWeights()):
283 if hist1 ==
'W' and hist2 ==
'W':
284 chi2 = self.h1.Chi2Test(self.
h2,
'WW')
286 elif hist1 ==
'U' and hist2 == 'U':
287 chi2 = self.h1.Chi2Test(self.h2,'UU')
289 elif hist1 ==
'U' and hist2 == 'W':
290 chi2 = self.h1.Chi2Test(self.
h2,
'UW')
292 elif hist1 ==
'W' and hist2 ==
'U':
293 chi2 = self.h2.Chi2Test(self.h1,'UW')
302 """The bin to bin comparison builds a fake pvalue. It is 0 if the number of
303 bins is different. It is % of corresponding bins otherwhise.
304 A threshold of 1 is needed to require a 1 to 1 correspondance between
308 StatisticalTest.__init__(self, threshold)
313 if self.h1.GetNbinsX() != self.h2.GetNbinsX() \
314 or self.h1.GetNbinsY() != self.h2.GetNbinsY() \
315 or self.h1.GetNbinsZ() != self.h2.GetNbinsZ() \
316 or abs(self.h1.GetXaxis().GetXmin() - self.h2.GetXaxis().GetXmin()) >self.
epsilon \
317 or abs(self.h1.GetYaxis().GetXmin() - self.h2.GetYaxis().GetXmin()) >self.
epsilon \
318 or abs(self.h1.GetZaxis().GetXmin() - self.h2.GetZaxis().GetXmin()) >self.
epsilon \
319 or abs(self.h1.GetXaxis().GetXmax() - self.h2.GetXaxis().GetXmax()) >self.
epsilon \
320 or abs(self.h1.GetYaxis().GetXmax() - self.h2.GetYaxis().GetXmax()) >self.
epsilon \
321 or abs(self.h1.GetZaxis().GetXmax() - self.h2.GetZaxis().GetXmax()) >self.
epsilon:
328 return test_codes[
"DIFF_BIN"]
333 for ibin
in xrange(0,nbins):
334 h1bin=self.h1.GetBinContent(ibin)
335 h2bin=self.h2.GetBinContent(ibin)
338 binavg=.5*(h1bin+h2bin)
344 print "Bin %ibin: bindiff %s" %(ibin,bindiff)
352 print "Histogram %s differs: nok: %s ntot: %s" %(self.h1.GetName(),n_ok_bins,nbins)
359 """The bin to bin comparison builds a fake pvalue. It is 0 if the number of
360 bins is different. It is % of corresponding bins otherwhise.
361 A threshold of 1 is needed to require a 1 to 1 correspondance between
365 StatisticalTest.__init__(self, threshold)
371 if self.h1.GetNbinsX() != self.h2.GetNbinsX() \
372 or self.h1.GetNbinsY() != self.h2.GetNbinsY() \
373 or self.h1.GetNbinsZ() != self.h2.GetNbinsZ() \
374 or abs(self.h1.GetXaxis().GetXmin() - self.h2.GetXaxis().GetXmin()) >self.
epsilon \
375 or abs(self.h1.GetYaxis().GetXmin() - self.h2.GetYaxis().GetXmin()) >self.
epsilon \
376 or abs(self.h1.GetZaxis().GetXmin() - self.h2.GetZaxis().GetXmin()) >self.
epsilon \
377 or abs(self.h1.GetXaxis().GetXmax() - self.h2.GetXaxis().GetXmax()) >self.
epsilon \
378 or abs(self.h1.GetYaxis().GetXmax() - self.h2.GetYaxis().GetXmax()) >self.
epsilon \
379 or abs(self.h1.GetZaxis().GetXmax() - self.h2.GetZaxis().GetXmax()) >self.
epsilon:
386 return test_codes[
"DIFF_BIN"]
391 for ibin
in xrange(0,nbins):
393 h1bin=self.h1.GetBinContent(ibin)
394 h2bin=self.h2.GetBinContent(ibin)
397 binavg=.5*(h1bin+h2bin)
399 if binavg==0
or 100*
abs(bindiff)/binavg < self.
tolerance:
403 print "-->Bin %i bin: bindiff %s (%s - %s )" %(ibin,bindiff,h1bin,h2bin)
411 print "%s nok: %s ntot: %s" %(self.h1.GetName(),n_ok_bins,nbins)
415 Statistical_Tests={
"KS":KS,
418 "BinToBin1percent":BinToBin1percent,
423 def ask_ok(prompt, retries=4, complaint='yes or no'):
425 ok = raw_input(prompt)
426 if ok
in (
'y',
'ye',
'yes'):
428 if ok
in (
'n',
'no'):
430 retries = retries - 1
432 raise IOError(
'refusenik user')
439 Thread.__init__(self)
444 print "Reading directory from %s" %(self.
filename)
452 """ Fetch the WHOLE file, not in bunches... To be optimised.
455 datareq = Request(url)
456 datareq.add_header(
'authenticated_wget',
"The ultimate wgetter")
459 filename=basename(url)
460 print "Checking existence of file %s on disk..."%filename
461 if not isfile(
"./%s"%filename):
462 bin_content=opener.open(datareq).
read()
464 print "File %s exists, skipping.." %filename
466 print "Error: Unknown url %s" %url
468 if bin_content!=
None:
469 ofile = open(filename,
'wb')
470 ofile.write(bin_content)
477 """Returns unique relvaldata ID for a given file."""
478 run_id = re.search(
'R\d{9}', file)
479 run = re.search(
'_RelVal_([\w\d]*)-v\d__', file)
481 run = re.search(
'GR_R_\d*_V\d*C?_([\w\d]*)-v\d__', file)
483 return (run_id.group(), run.group(1))
487 """Returns tuple (CMSSW release, GR_R version) for specified RelValData file."""
488 cmssw_release = re.findall(
'(CMSSW_\d*_\d*_\d*(?:_[\w\d]*)?)-', file)
489 gr_r_version = re.findall(
'-(GR_R_\d*_V\d*\w?)(?:_RelVal)?_', file)
491 gr_r_version = re.findall(
'CMSSW_\d*_\d*_\d*(?:_[\w\d]*)?-(\w*)_RelVal_', file)
492 if cmssw_release
and gr_r_version:
493 return (cmssw_release[0], gr_r_version[0])
496 """Returns tuple (CMSSW version, run version) for specified file."""
497 cmssw_version = re.findall(
'DQM_V(\d*)_', file)
498 run_version = re.findall(
'_RelVal_[\w\d]*-v(\d)__', file)
500 run_version = re.findall(
'GR_R_\d*_V\d*C?_[\w\d]*-v(\d)__', file)
501 if cmssw_version
and run_version:
502 return (int(cmssw_version[0]), int(run_version[0]))
505 """Returns file with maximum version at a) beggining of the file,
506 e.g. DQM_V000M b) at the end of run, e.g. _run2012-vM. M has to be max."""
511 if file_v[1] > max_v[1]
or ((file_v[1] == max_v[1])
and (file_v[0] > max_v[0])):
518 """Returns tuple (CMSSW version, run version) for specified file."""
519 cmssw_version = re.findall(
'DQM_V(\d*)_', file)
520 run_version = re.findall(
'CMSSW_\d*_\d*_\d*(?:_[\w\d]*)?-[\w\d]*_V\d*\w?(?:_[\w\d]*)?-v(\d*)__', file)
521 if cmssw_version
and run_version:
522 return (int(cmssw_version[0]), int(run_version[0]))
525 """Returns file with maximum version at a) beggining of the file,
526 e.g. DQM_V000M b) at the end of run, e.g. _run2012-vM. M has to be max."""
531 if file_v[1] > max_v[1]
or ((file_v[1] == max_v[1])
and (file_v[0] > max_v[0])):
537 cmssw_release = re.findall(
'(CMSSW_\d*_\d*_\d*(?:_[\w\d]*)?)-', file)
538 gr_r_version = re.findall(
'CMSSW_\d*_\d*_\d*(?:_[\w\d]*)?-([\w\d]*)_V\d*\w?(_[\w\d]*)?-v', file)
539 if cmssw_release
and gr_r_version:
540 return (cmssw_release[0], gr_r_version[0])
543 """Returns unique relval ID (dataset name) for a given file."""
544 dataset_name = re.findall(
'R\d{9}__([\w\d]*)__CMSSW_', file)
545 return dataset_name[0]
549 is_relvaldata_re = re.compile(
'_RelVal_')
550 return any([is_relvaldata_re.search(filename)
for filename
in files])
555 is_relval_data =
True
556 get_cmssw_version = get_relvaldata_cmssw_version
557 get_id = get_relvaldata_id
558 get_max_version = get_relvaldata_max_version
561 is_relval_data =
False
562 get_cmssw_version = get_relval_cmssw_version
563 get_id = get_relval_id
564 get_max_version = get_relval_max_version
568 versions_files =
dict()
570 version = get_cmssw_version(file)
571 if versions_files.has_key(version):
572 versions_files[version].
append(file)
574 versions_files[version] = [file]
578 print '\nFound versions:'
579 for version
in versions_files:
580 print '%s: %d files' % (str(version), len(versions_files[version]))
582 if len(versions_files.keys()) <= 1:
583 print '\nFound too little versions, there is nothing to pair. Exiting...\n'
587 versions = versions_files.keys()
588 sizes = [len(value)
for value
in versions_files.values()]
589 v1 = versions[sizes.index(
max(sizes))]
591 sizes.remove(
max(sizes))
592 v2 = versions[sizes.index(
max(sizes))]
596 print '\nPairing %s (%d files) and %s (%d files)' % (str(v1),
597 len(versions_files[v1]), str(v2), len(versions_files[v2]))
602 for unique_id
in set([get_id(file)
for file
in versions_files[v1]]):
604 dataset_re = re.compile(unique_id[0]+
'_')
605 run_re = re.compile(unique_id[1])
606 c1_files = [file
for file
in versions_files[v1]
if dataset_re.search(file)
and run_re.search(file)]
607 c2_files = [file
for file
in versions_files[v2]
if dataset_re.search(file)
and run_re.search(file)]
609 dataset_re = re.compile(unique_id+
'_')
610 c1_files = [file
for file
in versions_files[v1]
if dataset_re.search(file)]
611 c2_files = [file
for file
in versions_files[v2]
if dataset_re.search(file)]
613 if len(c1_files) > 0
and len(c2_files) > 0:
614 first_file = get_max_version(c1_files)
615 second_file = get_max_version(c2_files)
616 print '%s\n%s\n' % (first_file, second_file)
617 pairs.extend((first_file, second_file))
619 print "Paired and got %d files.\n" % len(pairs)
def get_relval_version
-------------—— Make files pairs: RelVal utils ---------------——
const T & max(const T &a, const T &b)
def is_relvaldata
----------------------— Make files pairs -----------------------—
def get_relvaldata_cmssw_version
def get_relvaldata_id
-----------—— Make files pairs: RelValData utils --------------——
def get_relval_cmssw_version
def get_relvaldata_version
rank
2D! return test_codes["2D"]
def get_relvaldata_max_version
def get_relval_max_version
void set(const std::string &name, int value)
set the flag, with a run-time name