16 from cPickle
import load
17 from os.path
import dirname,basename,join,isfile
18 from threading
import Thread
19 from time
import asctime
25 ROOT.gErrorIgnoreLevel=1001
26 ROOT.gROOT.SetBatch(
True)
29 from urllib2
import Request,build_opener,urlopen
31 if os.environ.has_key(
"RELMON_SA"):
32 from definitions
import *
33 from authentication
import X509CertOpen
34 from utils
import __file__
as this_module_name
37 from Utilities.RelMon.authentication
import X509CertOpen
38 from Utilities.RelMon.utils
import __file__
as this_module_name
45 if msg_level>=_log_level:
46 print "[%s] %s" %(asctime(),message)
50 this_dir=
dirname(this_module_name)
51 this_dir_one_up=this_dir[:this_dir.rfind(
"/")+1]
54 if os.environ.has_key(
"RELMON_SA"):
55 style_file=this_dir_one_up+
"data/tdrstyle_mod.C"
57 style_file=
"%s/src/Utilities/RelMon/data/tdrstyle_mod.C"%(os.environ[
"CMSSW_BASE"])
59 gROOT.ProcessLine(
".L %s" %style_file)
60 gROOT.ProcessLine(
"setTDRStyle()")
62 "Print could not set the TDR style. File %s not found?" %style_file
67 bitsarray = array.array(
'B')
68 bitsarray.fromstring(literal.decode(
'hex'))
72 tbuffer = TBufferFile(TBufferFile.kRead, len(bitsarray), bitsarray,
False,0)
74 print "could not transform to object array:"
75 print [ i
for i
in bitsarray ]
78 if rootType ==
'TPROF':
80 if rootType ==
'TPROF2D':
81 rootType =
'TProfile2D'
83 root_class=eval(rootType+
'.Class()')
85 return tbuffer.ReadObject(root_class)
94 return (h.GetNbinsX()+1)*(biny)*(binz)
118 logger(1,
"*** ERROR: object types in comparison don't match: %s!=%s" %(type1,type2))
119 self.
rank=test_codes[
"DIFF_TYPES"]
120 elif not self.h2.InheritsFrom(
"TH1"):
121 logger(1,
"*** ERROR: object type is not histogram but a %s" %(type1))
122 self.
rank=test_codes[
"NO_HIST"]
130 are_empty=is_empty1
and is_empty2
131 one_empty=is_empty1
or is_empty2
146 return test_codes[
"DIFF_BIN"]
156 logger(0,
"+++ Test %s FAILED: rank is %s and threshold is %s ==> %s" %(self.
name, self.
rank, self.
threshold, status))
169 if h.GetBinContent(i)!=0:
return False
178 for ibin
in xrange(nbins):
179 if h.GetBinContent(ibin)>0:
182 if filled_bins/nbins < .5:
191 StatisticalTest.__init__(self,threshold)
197 for h
in self.
h1,self.
h2:
212 return self.h1.KolmogorovTest(self.
h2)
217 if not profile.InheritsFrom(
"TH1"):
221 n_bins=profile.GetNbinsX()
223 for ibin
in xrange(1,n_bins+2):
224 bin_low_edges.append(profile.GetBinLowEdge(ibin))
225 bin_low_edges=array.array(
'f',bin_low_edges)
226 histo=TH1F(profile.GetName(),profile.GetTitle(),n_bins,bin_low_edges)
227 for ibin
in xrange(0,n_bins+1):
228 histo.SetBinContent(ibin,profile.GetBinContent(ibin))
229 histo.SetBinError(ibin,profile.GetBinError(ibin))
236 StatisticalTest.__init__(self,threshold)
240 nbins=self.h1.GetNbinsX()
242 for h
in self.
h1,self.
h2:
244 for ibin
in xrange(1,nbins+1):
245 if h.GetBinContent(ibin)>0:
247 n_filled_l.append(nfilled)
248 return len(filter (
lambda x:x>=min_filled,n_filled_l) )>0
253 for i
in xrange(1,nbins):
254 for h
in self.
h1,self.
h2:
255 binc=h.GetBinContent(i)
257 h.SetBinContent(i,-1*binc)
258 if h.GetBinError(i)==0
and binc!=0:
263 if histogram.InheritsFrom(
"TProfile")
or (histogram.GetEntries()!=histogram.GetSumOfWeights()):
279 if hist1 ==
'W' and hist2 ==
'W':
280 chi2 = self.h1.Chi2Test(self.
h2,
'WW')
282 elif hist1 ==
'U' and hist2 == 'U':
283 chi2 = self.h1.Chi2Test(self.h2,'UU')
285 elif hist1 ==
'U' and hist2 == 'W':
286 chi2 = self.h1.Chi2Test(self.
h2,
'UW')
288 elif hist1 ==
'W' and hist2 ==
'U':
289 chi2 = self.h2.Chi2Test(self.h1,'UW')
298 """The bin to bin comparison builds a fake pvalue. It is 0 if the number of
299 bins is different. It is % of corresponding bins otherwhise.
300 A threshold of 1 is needed to require a 1 to 1 correspondance between
304 StatisticalTest.__init__(self, threshold)
309 if self.h1.GetNbinsX() != self.h2.GetNbinsX() \
310 or self.h1.GetNbinsY() != self.h2.GetNbinsY() \
311 or self.h1.GetNbinsZ() != self.h2.GetNbinsZ() \
312 or abs(self.h1.GetXaxis().GetXmin() - self.h2.GetXaxis().GetXmin()) >self.
epsilon \
313 or abs(self.h1.GetYaxis().GetXmin() - self.h2.GetYaxis().GetXmin()) >self.
epsilon \
314 or abs(self.h1.GetZaxis().GetXmin() - self.h2.GetZaxis().GetXmin()) >self.
epsilon \
315 or abs(self.h1.GetXaxis().GetXmax() - self.h2.GetXaxis().GetXmax()) >self.
epsilon \
316 or abs(self.h1.GetYaxis().GetXmax() - self.h2.GetYaxis().GetXmax()) >self.
epsilon \
317 or abs(self.h1.GetZaxis().GetXmax() - self.h2.GetZaxis().GetXmax()) >self.
epsilon:
324 return test_codes[
"DIFF_BIN"]
329 for ibin
in xrange(0, nbins+2):
330 h1bin=self.h1.GetBinContent(ibin)
331 h2bin=self.h2.GetBinContent(ibin)
334 binavg=.5*(h1bin+h2bin)
340 print "Bin %ibin: bindiff %s" %(ibin,bindiff)
348 print "Histogram %s differs: nok: %s ntot: %s" %(self.h1.GetName(),n_ok_bins,nbins)
355 """The bin to bin comparison builds a fake pvalue. It is 0 if the number of
356 bins is different. It is % of corresponding bins otherwhise.
357 A threshold of 1 is needed to require a 1 to 1 correspondance between
361 StatisticalTest.__init__(self, threshold)
367 if self.h1.GetNbinsX() != self.h2.GetNbinsX() \
368 or self.h1.GetNbinsY() != self.h2.GetNbinsY() \
369 or self.h1.GetNbinsZ() != self.h2.GetNbinsZ() \
370 or abs(self.h1.GetXaxis().GetXmin() - self.h2.GetXaxis().GetXmin()) >self.
epsilon \
371 or abs(self.h1.GetYaxis().GetXmin() - self.h2.GetYaxis().GetXmin()) >self.
epsilon \
372 or abs(self.h1.GetZaxis().GetXmin() - self.h2.GetZaxis().GetXmin()) >self.
epsilon \
373 or abs(self.h1.GetXaxis().GetXmax() - self.h2.GetXaxis().GetXmax()) >self.
epsilon \
374 or abs(self.h1.GetYaxis().GetXmax() - self.h2.GetYaxis().GetXmax()) >self.
epsilon \
375 or abs(self.h1.GetZaxis().GetXmax() - self.h2.GetZaxis().GetXmax()) >self.
epsilon:
382 return test_codes[
"DIFF_BIN"]
387 for ibin
in xrange(0,nbins):
389 h1bin=self.h1.GetBinContent(ibin)
390 h2bin=self.h2.GetBinContent(ibin)
393 binavg=.5*(h1bin+h2bin)
395 if binavg==0
or 100*
abs(bindiff)/binavg < self.
tolerance:
399 print "-->Bin %i bin: bindiff %s (%s - %s )" %(ibin,bindiff,h1bin,h2bin)
407 print "%s nok: %s ntot: %s" %(self.h1.GetName(),n_ok_bins,nbins)
411 Statistical_Tests={
"KS":KS,
414 "BinToBin1percent":BinToBin1percent,
419 def ask_ok(prompt, retries=4, complaint='yes or no'):
421 ok = raw_input(prompt)
422 if ok
in (
'y',
'ye',
'yes'):
424 if ok
in (
'n',
'no'):
426 retries = retries - 1
428 raise IOError(
'refusenik user')
435 Thread.__init__(self)
440 print "Reading directory from %s" %(self.
filename)
448 """ Fetch the WHOLE file, not in bunches... To be optimised.
451 datareq = Request(url)
452 datareq.add_header(
'authenticated_wget',
"The ultimate wgetter")
455 filename=basename(url)
456 print "Checking existence of file %s on disk..."%filename
457 if not isfile(
"./%s"%filename):
458 bin_content=opener.open(datareq).
read()
460 print "File %s exists, skipping.." %filename
462 print "Error: Unknown url %s" %url
464 if bin_content!=
None:
465 ofile = open(filename,
'wb')
466 ofile.write(bin_content)
473 """Returns unique relvaldata ID for a given file."""
474 run_id = re.search(
'R\d{9}', file)
475 run = re.search(
'_RelVal_([\w\d]*)-v\d__', file)
477 run = re.search(
'GR_R_\d*_V\d*C?_([\w\d]*)-v\d__', file)
479 return (run_id.group(), run.group(1))
483 """Returns tuple (CMSSW release, GR_R version) for specified RelValData file."""
484 cmssw_release = re.findall(
'(CMSSW_\d*_\d*_\d*(?:_[\w\d]*)?)-', file)
485 gr_r_version = re.findall(
'-(GR_R_\d*_V\d*\w?)(?:_RelVal)?_', file)
487 gr_r_version = re.findall(
'CMSSW_\d*_\d*_\d*(?:_[\w\d]*)?-(\w*)_RelVal_', file)
488 if cmssw_release
and gr_r_version:
489 return (cmssw_release[0], gr_r_version[0])
492 """Returns tuple (CMSSW version, run version) for specified file."""
493 cmssw_version = re.findall(
'DQM_V(\d*)_', file)
494 run_version = re.findall(
'_RelVal_[\w\d]*-v(\d)__', file)
496 run_version = re.findall(
'GR_R_\d*_V\d*C?_[\w\d]*-v(\d)__', file)
497 if cmssw_version
and run_version:
498 return (int(cmssw_version[0]), int(run_version[0]))
501 """Returns file with maximum version at a) beggining of the file,
502 e.g. DQM_V000M b) at the end of run, e.g. _run2012-vM. M has to be max."""
507 if file_v[1] > max_v[1]
or ((file_v[1] == max_v[1])
and (file_v[0] > max_v[0])):
514 """Returns tuple (CMSSW version, run version) for specified file."""
515 cmssw_version = re.findall(
'DQM_V(\d*)_', file)
516 run_version = re.findall(
'CMSSW_\d*_\d*_\d*(?:_[\w\d]*)?-[\w\d]*_V\d*\w?(?:_[\w\d]*)?-v(\d*)__', file)
517 if cmssw_version
and run_version:
518 return (int(cmssw_version[0]), int(run_version[0]))
521 """Returns file with maximum version at a) beggining of the file,
522 e.g. DQM_V000M b) at the end of run, e.g. _run2012-vM. M has to be max."""
527 if file_v[1] > max_v[1]
or ((file_v[1] == max_v[1])
and (file_v[0] > max_v[0])):
533 cmssw_release = re.findall(
'(CMSSW_\d*_\d*_\d*(?:_[\w\d]*)?)-', file)
534 gr_r_version = re.findall(
'CMSSW_\d*_\d*_\d*(?:_[\w\d]*)?-([\w\d]*)_V\d*\w?(_[\w\d]*)?-v', file)
535 if cmssw_release
and gr_r_version:
536 if "PU" in gr_r_version[0][0]
and not "FastSim" in file:
537 __gt = re.sub(
'^[^_]*_',
"", gr_r_version[0][0])
538 __process_string = gr_r_version[0][1]
539 return (__gt, __process_string)
540 elif "PU" in gr_r_version[0][0]
and "FastSim" in file:
541 return (cmssw_release[0],
"PU_")
542 return (cmssw_release[0], gr_r_version[0])
545 """Returns unique relval ID (dataset name) for a given file."""
546 dataset_name = re.findall(
'R\d{9}__([\w\D]*)__CMSSW_', file)
547 __process_string = re.search(
'CMSSW_\d*_\d*_\d*(?:_[\w\d]*)?-([\w\d]*)_V\d*\w?(_[\w\d]*)?-v', file)
550 if "PU" in __process_string.group(1)
and not "FastSim" in file:
551 _ps = re.search(
'^[^_]*_', __process_string.group(1)).
group()
552 elif "PU" in __process_string.group(1)
and "FastSim" in file:
553 return dataset_name[0]+
"_", _ps
554 return dataset_name[0], _ps
558 is_relvaldata_re = re.compile(
'_RelVal_')
559 return any([is_relvaldata_re.search(filename)
for filename
in files])
564 is_relval_data =
True
565 get_cmssw_version = get_relvaldata_cmssw_version
566 get_id = get_relvaldata_id
567 get_max_version = get_relvaldata_max_version
570 is_relval_data =
False
571 get_cmssw_version = get_relval_cmssw_version
572 get_id = get_relval_id
573 get_max_version = get_relval_max_version
577 versions_files =
dict()
579 version = get_cmssw_version(file)
580 if versions_files.has_key(version):
581 versions_files[version].
append(file)
583 versions_files[version] = [file]
587 print '\nFound versions:'
588 for version
in versions_files:
589 print '%s: %d files' % (str(version), len(versions_files[version]))
591 if len(versions_files.keys()) <= 1:
592 print '\nFound too little versions, there is nothing to pair. Exiting...\n'
596 versions = versions_files.keys()
597 sizes = [len(value)
for value
in versions_files.values()]
598 v1 = versions[sizes.index(
max(sizes))]
600 sizes.remove(
max(sizes))
601 v2 = versions[sizes.index(
max(sizes))]
605 print '\nPairing %s (%d files) and %s (%d files)' % (str(v1),
606 len(versions_files[v1]), str(v2), len(versions_files[v2]))
611 for unique_id
in set([get_id(file)
for file
in versions_files[v1]]):
613 dataset_re = re.compile(unique_id[0]+
'_')
614 run_re = re.compile(unique_id[1])
615 c1_files = [file
for file
in versions_files[v1]
if dataset_re.search(file)
and run_re.search(file)]
616 c2_files = [file
for file
in versions_files[v2]
if dataset_re.search(file)
and run_re.search(file)]
618 dataset_re = re.compile(unique_id[0]+
'_')
619 ps_re = re.compile(unique_id[1])
621 c1_files = [file
for file
in versions_files[v1]
if dataset_re.search(file)
and ps_re.search(file)]
622 c2_files = [file
for file
in versions_files[v2]
if dataset_re.search(file)
and ps_re.search(file)]
624 if len(c1_files) > 0
and len(c2_files) > 0:
625 first_file = get_max_version(c1_files)
626 second_file = get_max_version(c2_files)
627 print '%s\n%s\n' % (first_file, second_file)
628 pairs.extend((first_file, second_file))
630 print "Paired and got %d files.\n" % len(pairs)
bool any(const std::vector< T > &v, const T &what)
def get_relval_version
-------------—— Make files pairs: RelVal utils ---------------——
def is_relvaldata
----------------------— Make files pairs -----------------------—
Abs< T >::type abs(const T &t)
def get_relvaldata_cmssw_version
def get_relvaldata_id
-----------—— Make files pairs: RelValData utils --------------——
def get_relval_cmssw_version
def get_relvaldata_version
rank
2D! return test_codes["2D"]
def get_relvaldata_max_version
def get_relval_max_version