1 from __future__
import print_function
2 from __future__
import absolute_import
13 from builtins
import range
14 from copy
import deepcopy
15 from os
import chdir,getcwd,makedirs
16 from os.path
import abspath,exists,join, basename
17 from re
import sub,search
18 from re
import compile
as recompile
19 from sys
import exit,stderr,version_info
20 from threading
import Thread,activeCount
21 from time
import sleep
22 if version_info[0]==2:
23 from urllib2
import Request,build_opener,urlopen
25 from urllib.request
import Request,build_opener,urlopen
32 ROOT.gROOT.SetBatch(
True)
34 from .authentication
import X509CertOpen
35 from .dirstructure
import Comparison,Directory,tcanvas_print_processes
36 from .utils
import Chi2,KS,BinToBin,Statistical_Tests,literal2root
41 """Base class for exceptions in this module."""
45 """Exception occurs in case of problems of communication with the server.
59 """Communicate with the DQM Document server"""
63 base_dir=
'/data/json/archive/'
68 ident="DQMToJson/1.0 python/%d.%d.%d" % version_info[:3]):
72 self.
DQMpwd=DQMcommunicator.base_dir
80 url=url.replace(
' ',
'%20')
81 datareq = Request(url)
82 datareq.add_header(
'User-agent', self.
ident)
85 url_obj=self.
opener.open(datareq)
88 url_obj=urlopen(datareq)
98 data = sub(
"-inf",
'0', data)
99 data = sub(
"\s+inf",
'0', data)
100 data = sub(
"\s+nan",
'0', data)
101 data = sub(
'""(CMSSW.*?)""',
'"\\1"', data)
108 url=url.replace(
" ",
"%20")
117 for ntrials
in range(5):
125 print(
"Could not fetch %s. Retrying" %url)
128 for content_dict
in raw_folder[
"contents"]:
129 if "subdir" in content_dict:
130 form_folder[content_dict[
"subdir"]]={
"type":
'dir'}
131 elif "obj" in content_dict:
132 properties=content_dict[
"properties"]
133 obj_name=content_dict[
"obj"]
134 obj_type=properties[
"type"]
135 obj_kind=properties[
"kind"]
137 if "rootobj" in content_dict:
138 obj_as_string=content_dict[
"rootobj"]
139 form_folder[obj_name]={
'type':obj_type,
'obj_as_string':obj_as_string,
"kind":obj_kind}
147 def ls(self, url='', fetch_root=False):
154 url=
'%s?rootcontent=1'%url
155 form_folder=self.
ls_url(url)
164 if len_args!=1
and len_args!=3:
165 raise InvalidNumberOfArguments
167 dataset, run, folder = args
168 full_url=
'%s/data/json/archive/%s/%s/%s' % (self.
server, dataset, run, folder)
178 full_url=DQMcommunicator.base_dir
180 full_url=self.
DQMpwd+
"/"+folder
182 full_url=full_url.replace(
' ',
'%20')
193 A sample contains, among the other things, a data type, a dataset name
196 full_url=
'%s/data/json/samples?match=%s' % (self.
server, samples_string)
197 samples_dict=eval(self.
get_data(full_url))
198 return samples_dict[
"samples"]
205 for sample
in samples_list:
206 temp_datasets_list =
map(
lambda item:item[
"dataset"] ,sample[
'items'])
207 for temp_dataset
in temp_datasets_list:
208 if not temp_dataset
in datasets_list:
209 datasets_list.append(temp_dataset)
215 """Get the available cmssw versions for the relvals.
219 cmssw_versions_with_duplicates=map (
lambda x: x.split(
"/")[2],relvals_list)
220 return list(set(cmssw_versions_with_duplicates))
226 while(dataset_string.endswith(slash)
or dataset_string.beginswith(slash)):
227 dataset_string=dataset_string.strip(
"/")
231 map(
lambda sample: map (
lambda item: runlist.append(item[
'run']), sample[
'items']), samples_list)
247 set1.intersection_update(set2)
257 url = url.replace(
" ",
"%20")
259 for name,description
in self.
ls(url,
True).
items():
260 if "dir" not in description[
"type"]
and "ROOT" in description[
"kind"]:
261 objects.append(
literal2root(description[
"obj_as_string"],description[
"type"]))
271 url = url.replace(
" ",
"%20")
273 for name,description
in self.
ls(url,
True).
items():
274 if "dir" not in description[
"type"]
and "ROOT" in description[
"kind"]:
275 objects[name]=
literal2root(description[
"obj_as_string"],description[
"type"])
281 null_url = (len(url)==0)
286 url = url.replace(
" ",
"%20")
290 for name,description
in self.
ls(
"",
True).
items():
291 if "dir" in description[
"type"]:
294 elif "ROOT" in description[
"kind"]:
295 objects.append(
literal2root(description[
"obj_as_string"],description[
"type"]))
303 null_url = (len(url)==0)
305 if len(present_url)==0:
308 present_url+=
"_%s"%url
313 url = url.replace(
" ",
"%20")
317 for name,description
in self.
ls(
"",
False).
items():
318 if "dir" in description[
"type"]:
321 elif "ROOT" in description[
"kind"]:
322 objects_names.append(
"%s_%s"%(present_url,name))
330 null_url = (len(url)==0)
332 if len(present_url)==0:
335 present_url+=
"_%s"%url
340 url = url.replace(
" ",
"%20")
344 for name,description
in self.
ls(
"",
True).
items():
345 if "dir" in description[
"type"]:
348 elif "ROOT" in description[
"kind"]:
349 objects[
"%s_%s"%(present_url,name)]=
literal2root(description[
"obj_as_string"],description[
"type"])
357 """Structure used to identify a directory in the walked tree,
358 It carries the name and depth information.
367 compname2=dirid.compname
372 is_equal = self.
depth*depth2 <0
or self.
depth==depth2
373 if len(self.
mother)*len(dirid.mother)>0:
374 is_equal = is_equal
and self.
mother==dirid.mother
378 return "Directory %s at level %s" %(self.
name,self.
depth)
382 """ Fetch the content of the single "directory" in the dqm.
385 Thread.__init__(self)
395 """An interface to the DQM document db. It is threaded to compensate the
396 latency introduced by the finite response time of the server.
398 def __init__ (self,comm1,comm2,base1,base2,directory,depth=0,do_pngs=True,stat_test="KS",test_threshold=.5,black_list=[]):
399 Thread.__init__(self)
410 self.name+=
"_%s" %directory.name
416 print(
"Skipping %s since blacklisted!" %this_dir)
428 for fetcher
in fetchers:
430 for fetcher
in fetchers:
433 contents1 = fetchers[0].contents
434 contents2 = fetchers[1].contents
435 set1= set(contents1.keys())
436 set2= set(contents2.keys())
439 self_directory_directories=self.
directory.subdirs
440 self_directory_comparisons=self.
directory.comparisons
441 contents_names=list(set1.intersection(set2))
443 for name
in contents_names:
444 content = contents1[name]
445 if "dir" in content[
"type"]:
451 walkers.append(dirwalker)
452 n_threads=activeCount()
456 elif content[
"kind"]==
"ROOT":
461 literal2root(contents2[name][
"obj_as_string"],content[
"type"]),
464 self_directory_comparisons.append(comparison)
467 for walker
in walkers:
469 walker_directory=walker.directory
470 if not walker_directory.is_empty():
471 self_directory_directories.append(walker_directory)
476 """ Class acting as interface between the user and the harvested DQMRootFile.
477 It skips the directories created by the DQM infrastructure so to provide an
478 interface as similar as possible to a real direcory structure and to the
479 directory structure provided by the db interface.
487 print(
"Directory %s does not exist: skipping. Is this a custom rootfile?" %dqmdatadir)
492 is_null =
not directory
494 print(
"Directory %s does not exist!" %name, file=stderr)
497 def ls(self,directory_name=""):
500 if len(directory_name)==0:
503 directory=self.
rootfilepwd.GetDirectory(directory_name)
504 if self.
__is_null(directory,directory_name):
507 for key
in directory.GetListOfKeys():
508 contents[key.GetName()]=key.GetClassName()
511 def cd(self,directory_name):
512 """Change the current TDirectoryFile. The familiar "-" and ".." directories
513 can be accessed as well.
515 if directory_name==
"-":
519 if directory_name==
"..":
531 new_directory=self.
rootfilepwd.GetDirectory(directory_name)
532 if not self.
__is_null(new_directory,directory_name):
537 """Get a TObject from the rootfile.
546 def __init__(self, name, topdirname,rootfilename1, rootfilename2, run=-1, black_list=[], stat_test="KS", test_threshold=.5,draw_success=True,do_pngs=False, black_list_histos=[]):
551 self.
stat_test=Statistical_Tests[stat_test](test_threshold)
566 def cd(self,directory_name, on_disk=False, regexp=False,):
568 if len(directory_name)!=0:
570 if not exists(directory_name):
571 makedirs(directory_name)
572 chdir(directory_name)
584 if len(directory_name)!=0:
586 if not exists(directory_name):
587 makedirs(directory_name)
588 chdir(directory_name)
592 def ls(self,directory_name=""):
593 """Return common objects to the 2 files.
602 keys = [key
for key
in contents2.keys()
if key
in contents1]
605 if contents1[key]!=contents2[key]:
606 diff_file1 = set(contents1.keys()) - set(contents2.keys())
607 diff_file2 = set(contents2.keys()) - set(contents1.keys())
608 for key1
in diff_file1:
609 obj_type = contents1[key1]
610 if obj_type ==
"TDirectoryFile":
614 if obj_type[:2]!=
"TH" and obj_type[:3]!=
"TPr" :
617 for key1
in diff_file2:
618 obj_type = contents2[key1]
619 if obj_type ==
"TDirectoryFile":
623 if obj_type[:2]!=
"TH" and obj_type[:3]!=
"TPr" :
626 contents[key]=contents1[key]
639 this_dir=DirID(dir_name,depth)
641 if this_dir
in self.black_list:
655 n_top_contents=len(contents)
660 for name,obj_type
in contents.items():
661 if obj_type==
"TDirectoryFile":
665 print(
"Studying directory %s, %s/%s" %(name,cont_counter,n_top_contents))
671 subdir.draw_success=directory.draw_success
672 subdir.do_pngs=directory.do_pngs
673 self.__fill_single_dir(name,subdir,
join(mother_name,dir_name),depth)
674 if not subdir.is_empty():
676 print(
" ->Appending %s..." %name, end=
' ')
677 directory.subdirs.append(subdir)
682 if obj_type[:2]!=
"TH" and obj_type[:3]!=
"TPr" :
684 h1,h2=self.getObjs(name)
686 path =
join(mother_name,dir_name,name)
687 if path
in self.black_list_histos:
688 print(
" Skipping %s" %(path))
689 directory.comparisons.append(Comparison(name,
690 join(mother_name,dir_name),
692 deepcopy(self.stat_test),
693 draw_success=directory.draw_success,
694 do_pngs=directory.do_pngs, skip=
True))
696 directory.comparisons.append(Comparison(name,
697 join(mother_name,dir_name),
699 deepcopy(self.stat_test),
700 draw_success=directory.draw_success,
701 do_pngs=directory.do_pngs, skip=
False))
702 directory.filename1 = self.filename1
703 directory.filename2 = self.filename2
704 directory.different_histograms[
'file1'] = self.different_histograms[
'file1']
705 directory.different_histograms[
'file2'] = self.different_histograms[
'file2']
717 first_run_dir = list(
filter(
lambda k:
"Run " in k, self.
ls().
keys()))[0]
719 print(
"\nRundir not there: Is this a generic rootfile?\n")
722 self.
run=
int(rundir.split(
" ")[1])
724 print(
"Setting run number to 0")
727 rundir=
"Run %s"%self.
run
730 self.
cd(rundir,
False,
True)
732 print(
"\nRundir not there: Is this a generic rootfile?\n")
737 n_left_threads=len(tcanvas_print_processes)
739 print(
"Waiting for %s threads to finish..." %n_left_threads)
740 for p
in tcanvas_print_processes:
747 Thread.__init__(self)
757 black_list_str=black_list_str.replace(
"__",
" ")
758 if len(black_list_str)>0:
759 for ele
in black_list_str.split(
","):
760 dirname,level=ele.split(
"@")
763 if "/" not in dirname:
764 dirid=
DirID(dirname,level)
766 mother,daughter=dirname.split(
"/")
767 dirid=
DirID(daughter,level,mother)
768 if not dirid
in black_list:
769 black_list.append(dirid)