00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014 from copy import deepcopy
00015 from os import chdir,getcwd,makedirs
00016 from os.path import abspath,exists,join
00017 from re import sub,search
00018 from re import compile as recompile
00019 from sys import exit,stderr,version_info
00020 from threading import Thread,activeCount
00021 from time import sleep
00022 from urllib2 import Request,build_opener,urlopen
00023
00024 import sys
00025 argv=sys.argv
00026 from ROOT import *
00027 import ROOT
00028 sys.argv=argv
00029
00030 gROOT.SetBatch(True)
00031
00032 from authentication import X509CertOpen
00033 from dirstructure import Comparison,Directory,tcanvas_print_processes
00034 from utils import Chi2,KS,BinToBin,Statistical_Tests,literal2root
00035
00036
00037
00038 class Error(Exception):
00039 """Base class for exceptions in this module."""
00040 pass
00041
00042 class DQM_DB_Communication(Error):
00043 """Exception occurs in case of problems of communication with the server.
00044 """
00045 def __init__(self,msg):
00046 self.msg = msg
00047
00048 class InvalidNumberOfArguments(Error):
00049
00050 def __init__(self,msg):
00051 self.msg = msg
00052
00053
00054
00055 class DQMcommunicator(object):
00056
00057 """Communicate with the DQM Document server"""
00058
00059
00060
00061 base_dir='/data/json/archive/'
00062
00063 def __init__(self,
00064 server,
00065 is_private=False,
00066 ident="DQMToJson/1.0 python/%d.%d.%d" % version_info[:3]):
00067 self.ident = ident
00068 self.server = server
00069 self.is_private = is_private
00070 self.DQMpwd=DQMcommunicator.base_dir
00071 self.prevDQMpwd=self.DQMpwd
00072 self.opener=None
00073 if not self.is_private:
00074 self.opener=build_opener(X509CertOpen())
00075
00076
00077 def open_url(self,url):
00078 url=url.replace(' ','%20')
00079 datareq = Request(url)
00080 datareq.add_header('User-agent', self.ident)
00081 url_obj=0
00082 if not self.is_private:
00083 url_obj=self.opener.open(datareq)
00084
00085 else:
00086 url_obj=urlopen(datareq)
00087
00088 return url_obj
00089
00090
00091
00092 def get_data(self, full_url):
00093
00094 data = self.open_url(full_url).read()
00095
00096 data = sub("-inf", '0', data)
00097 data = sub("\s+inf", '0', data)
00098 data = sub("\s+nan", '0', data)
00099 data = sub('""(CMSSW.*?)""', '"\\1"', data)
00100
00101 return data
00102
00103
00104
00105 def ls_url(self, url):
00106 url=url.replace(" ","%20")
00107 url=self.server+url
00108
00109 form_folder={}
00110 raw_folder=None
00111 try:
00112 raw_folder=eval(self.get_data(url))
00113 except:
00114 print "Retrying.."
00115 for ntrials in xrange(5):
00116 try:
00117 if ntrials!=0:
00118 sleep(2)
00119
00120 raw_folder=eval(self.get_data(url))
00121 break
00122 except:
00123 print "Could not fetch %s. Retrying" %url
00124
00125
00126 for content_dict in raw_folder["contents"]:
00127 if content_dict.has_key("subdir"):
00128 form_folder[content_dict["subdir"]]={"type":'dir'}
00129 elif content_dict.has_key("obj"):
00130 properties=content_dict["properties"]
00131 obj_name=content_dict["obj"]
00132 obj_type=properties["type"]
00133 obj_kind=properties["kind"]
00134 obj_as_string=''
00135 if content_dict.has_key("rootobj"):
00136 obj_as_string=content_dict["rootobj"]
00137 form_folder[obj_name]={'type':obj_type,'obj_as_string':obj_as_string,"kind":obj_kind}
00138
00139
00140
00141 return form_folder
00142
00143
00144
00145 def ls(self, url='', fetch_root=False):
00146 if len(url)==0:
00147 url=join(self.DQMpwd,url)
00148
00149 form_folder={}
00150
00151 if fetch_root:
00152 url='%s?rootcontent=1'%url
00153 form_folder=self.ls_url(url)
00154
00155 return form_folder
00156
00157
00158
00159 def cd(self, *args):
00160 len_args=len(args)
00161 full_url=""
00162 if len_args!=1 and len_args!=3:
00163 raise(InvalidNumberOfArguments("3 or 1 args expected!"))
00164 if len_args==3:
00165 dataset, run, folder = args
00166 full_url='%s/data/json/archive/%s/%s/%s' % (self.server, dataset, run, folder)
00167 if len_args==1:
00168 folder=args[0]
00169 if folder==self.DQMpwd:
00170 full_url=self.DQMpwd
00171 elif folder=="..":
00172 full_url=self.DQMpwd[:self.DQMpwd.rfind("/")]
00173 elif folder=="-":
00174 full_url=self.oldDQMpwd
00175 elif folder=="":
00176 full_url=DQMcommunicator.base_dir
00177 else:
00178 full_url=self.DQMpwd+"/"+folder
00179
00180 full_url=full_url.replace(' ','%20')
00181
00182
00183 self.oldDQMpwd=self.DQMpwd
00184 self.DQMpwd=full_url
00185
00186
00187
00188
00189 def get_samples(self, samples_string="*"):
00190 """
00191 A sample contains, among the other things, a data type, a dataset name
00192 and a run.
00193 """
00194 full_url='%s/data/json/samples?match=%s' % (self.server, samples_string)
00195 samples_dict=eval(self.get_data(full_url))
00196 return samples_dict["samples"]
00197
00198
00199
00200 def get_datasets_list(self, dataset_string=""):
00201 samples_list=self.get_samples(dataset_string)
00202 datasets_list=[]
00203 for sample in samples_list:
00204 temp_datasets_list = map(lambda item:item["dataset"] ,sample['items'])
00205 for temp_dataset in temp_datasets_list:
00206 if not temp_dataset in datasets_list:
00207 datasets_list.append(temp_dataset)
00208 return datasets_list
00209
00210
00211
00212 def get_RelVal_CMSSW_versions(self,query):
00213 """Get the available cmssw versions for the relvals.
00214 """
00215 relvals_list=self.get_datasets_list(query)
00216
00217 cmssw_versions_with_duplicates=map (lambda x: x.split("/")[2],relvals_list)
00218 return list(set(cmssw_versions_with_duplicates))
00219
00220
00221
00222 def get_runs_list(self, dataset_string):
00223 slash="/"
00224 while(dataset_string.endswith(slash) or dataset_string.beginswith(slash)):
00225 dataset_string=dataset_string.strip("/")
00226 samples_list=self.get_samples(dataset_string)
00227 runlist=[]
00228
00229 map( lambda sample: map (lambda item: runlist.append(item['run']), sample['items']), samples_list)
00230 return runlist
00231
00232
00233
00234 def get_dataset_runs(self,dataset_string):
00235 dataset_runs={}
00236 for dataset in self.get_datasets_list(dataset_string):
00237 dataset_runs[dataset]=self.get_runs_list(dataset)
00238 return dataset_runs
00239
00240
00241
00242 def get_common_runs(self,dataset_string1,dataset_string2):
00243 set1=set(self.get_runs_list(dataset_string1))
00244 set2=set(self.get_runs_list(dataset_string2))
00245 set1.intersection_update(set2)
00246 return list (set2)
00247
00248
00249
00250 def get_root_objects_list(self, url=""):
00251 if len(url)==0:
00252 url=self.DQMpwd
00253 else:
00254 url="/"+url
00255 url = url.replace(" ","%20")
00256 objects=[]
00257 for name,description in self.ls(url,True).items():
00258 if "dir" not in description["type"] and "ROOT" in description["kind"]:
00259 objects.append(literal2root(description["obj_as_string"],description["type"]))
00260 return objects
00261
00262
00263
00264 def get_root_objects(self, url=""):
00265 if len(url)==0:
00266 url=self.DQMpwd
00267 else:
00268 url=self.server+"/"+url
00269 url = url.replace(" ","%20")
00270 objects={}
00271 for name,description in self.ls(url,True).items():
00272 if "dir" not in description["type"] and "ROOT" in description["kind"]:
00273 objects[name]=literal2root(description["obj_as_string"],description["type"])
00274 return objects
00275
00276
00277
00278 def get_root_objects_list_recursive(self, url=""):
00279 null_url = (len(url)==0)
00280 if len(url)==0:
00281 url=self.DQMpwd
00282 else:
00283 url="/"+url
00284 url = url.replace(" ","%20")
00285 if not null_url:
00286 self.cd(url)
00287 objects=[]
00288 for name,description in self.ls("",True).items():
00289 if "dir" in description["type"]:
00290 objects+=self.get_root_objects_list_recursive(name)
00291 self.cd("..")
00292 elif "ROOT" in description["kind"]:
00293 objects.append(literal2root(description["obj_as_string"],description["type"]))
00294 if not null_url:
00295 self.cd("..")
00296 return objects
00297
00298
00299
00300 def get_root_objects_names_list_recursive(self, url="",present_url=""):
00301 null_url = (len(url)==0)
00302 if (not null_url):
00303 if len(present_url)==0:
00304 present_url=url
00305 else:
00306 present_url+="_%s"%url
00307 if len(url)==0:
00308 url=self.DQMpwd
00309 else:
00310 url="/"+url
00311 url = url.replace(" ","%20")
00312 if not null_url:
00313 self.cd(url)
00314 objects_names=[]
00315 for name,description in self.ls("",False).items():
00316 if "dir" in description["type"]:
00317 objects_names+=self.get_root_objects_names_list_recursive(name,present_url)
00318 self.cd("..")
00319 elif "ROOT" in description["kind"]:
00320 objects_names.append("%s_%s"%(present_url,name))
00321 if not null_url:
00322 self.cd("..")
00323 return objects_names
00324
00325
00326
00327 def get_root_objects_recursive(self, url="",present_url=""):
00328 null_url = (len(url)==0)
00329 if (not null_url):
00330 if len(present_url)==0:
00331 present_url=url
00332 else:
00333 present_url+="_%s"%url
00334 if len(url)==0:
00335 url=self.DQMpwd
00336 else:
00337 url="/"+url
00338 url = url.replace(" ","%20")
00339
00340 self.cd(url)
00341 objects={}
00342 for name,description in self.ls("",True).items():
00343 if "dir" in description["type"]:
00344 objects.update(self.get_root_objects_recursive(name,present_url))
00345 self.cd("..")
00346 elif "ROOT" in description["kind"]:
00347 objects["%s_%s"%(present_url,name)]=literal2root(description["obj_as_string"],description["type"])
00348
00349 self.cd("..")
00350 return objects
00351
00352
00353
00354 class DirID(object):
00355 """Structure used to identify a directory in the walked tree,
00356 It carries the name and depth information.
00357 """
00358 def __init__(self,name,depth,mother=""):
00359 self.name=name
00360 self.compname=recompile(name)
00361 self.mother=mother
00362 self.depth=depth
00363 def __eq__(self,dirid):
00364 depth2=dirid.depth
00365 compname2=dirid.compname
00366 name2=dirid.name
00367 is_equal = False
00368
00369 if search(self.compname,name2)!=None or search(compname2,self.name)!=None:
00370 is_equal = self.depth*depth2 <0 or self.depth==depth2
00371 if len(self.mother)*(dirid.mother)>0:
00372 is_equal = is_equal and self.mother==dirid.mother
00373 return is_equal
00374
00375 def __repr__(self):
00376 return "Directory %s at level %s" %(self.name,self.depth)
00377
00378
00379 class DirFetcher(Thread):
00380 """ Fetch the content of the single "directory" in the dqm.
00381 """
00382 def __init__ (self,comm,directory):
00383 Thread.__init__(self)
00384 self.comm = comm
00385 self.directory = directory
00386 self.contents=None
00387 def run(self):
00388 self.contents = self.comm.ls(self.directory,True)
00389
00390
00391
00392 class DirWalkerDB(Thread):
00393 """An interface to the DQM document db. It is threaded to compensate the
00394 latency introduced by the finite response time of the server.
00395 """
00396 def __init__ (self,comm1,comm2,base1,base2,directory,depth=0,do_pngs=True,stat_test="KS",test_threshold=.5,black_list=[]):
00397 Thread.__init__(self)
00398 self.comm1 = deepcopy(comm1)
00399 self.comm2 = deepcopy(comm2)
00400 self.base1,self.base2 = base1,base2
00401 self.directory = directory
00402 self.depth=depth
00403 self.do_pngs=do_pngs
00404 self.test_threshold=test_threshold
00405 self.stat_test=stat_test
00406 self.black_list=black_list
00407
00408 self.name+="_%s" %directory.name
00409
00410 def run(self):
00411
00412 this_dir=DirID(self.directory.name,self.depth)
00413 if this_dir in self.black_list:
00414 print "Skipping %s since blacklisted!" %this_dir
00415 return 0
00416
00417 self.depth+=1
00418
00419 the_test=Statistical_Tests[self.stat_test](self.test_threshold)
00420
00421
00422 directory1=self.base1+"/"+self.directory.mother_dir+"/"+self.directory.name
00423 directory2=self.base2+"/"+self.directory.mother_dir+"/"+self.directory.name
00424
00425 fetchers =(DirFetcher(self.comm1,directory1),DirFetcher(self.comm2,directory2))
00426 for fetcher in fetchers:
00427 fetcher.start()
00428 for fetcher in fetchers:
00429 fetcher.join()
00430
00431 contents1 = fetchers[0].contents
00432 contents2 = fetchers[1].contents
00433 set1= set(contents1.keys())
00434 set2= set(contents2.keys())
00435
00436 walkers=[]
00437 self_directory_directories=self.directory.subdirs
00438 self_directory_comparisons=self.directory.comparisons
00439 contents_names=list(set1.intersection(set2))
00440
00441 for name in contents_names:
00442 content = contents1[name]
00443 if "dir" in content["type"]:
00444
00445 subdir=Directory(name,join(self.directory.mother_dir,self.directory.name))
00446 dirwalker=DirWalkerDB(self.comm1,self.comm2,self.base1,self.base2,subdir,self.depth,
00447 self.do_pngs,self.stat_test,self.test_threshold,self.black_list)
00448 dirwalker.start()
00449 walkers.append(dirwalker)
00450 n_threads=activeCount()
00451 if n_threads>5:
00452
00453 dirwalker.join()
00454 elif content["kind"]=="ROOT":
00455
00456 comparison=Comparison(name,
00457 join(self.directory.mother_dir,self.directory.name),
00458 literal2root(content["obj_as_string"],content["type"]),
00459 literal2root(contents2[name]["obj_as_string"],content["type"]),
00460 deepcopy(the_test),
00461 do_pngs=self.do_pngs)
00462 self_directory_comparisons.append(comparison)
00463
00464
00465 for walker in walkers:
00466 walker.join()
00467 walker_directory=walker.directory
00468 if not walker_directory.is_empty():
00469 self_directory_directories.append(walker_directory)
00470
00471
00472
00473 class DQMRootFile(object):
00474 """ Class acting as interface between the user and the harvested DQMRootFile.
00475 It skips the directories created by the DQM infrastructure so to provide an
00476 interface as similar as possible to a real direcory structure and to the
00477 directory structure provided by the db interface.
00478 """
00479 def __init__(self,rootfilename):
00480 dqmdatadir="DQMData"
00481 self.rootfile=TFile(rootfilename)
00482 self.rootfilepwd=self.rootfile.GetDirectory(dqmdatadir)
00483 self.rootfileprevpwd=self.rootfile.GetDirectory(dqmdatadir)
00484 if self.rootfilepwd == None:
00485 print "Directory %s does not exist: skipping. Is this a custom rootfile?" %dqmdatadir
00486 self.rootfilepwd=self.rootfile
00487 self.rootfileprevpwd=self.rootfile
00488
00489 def __is_null(self,directory,name):
00490 is_null = not directory
00491 if is_null:
00492 print >> stderr, "Directory %s does not exist!" %name
00493 return is_null
00494
00495 def ls(self,directory_name=""):
00496 contents={}
00497 directory=None
00498 if len(directory_name)==0:
00499 directory=self.rootfilepwd
00500
00501 directory=self.rootfilepwd.GetDirectory(directory_name)
00502 if self.__is_null(directory,directory_name):
00503 return contents
00504
00505 for key in directory.GetListOfKeys():
00506 contents[key.GetName()]=key.GetClassName()
00507 return contents
00508
00509 def cd(self,directory_name):
00510 """Change the current TDirectoryFile. The familiar "-" and ".." directories
00511 can be accessed as well.
00512 """
00513 if directory_name=="-":
00514 tmp=self.rootfilepwd
00515 self.rootfilepwd=self.rootfileprevpwd
00516 self.rootfileprevpwd=tmp
00517 if directory_name=="..":
00518
00519 self.rootfileprevpwd=self.rootfilepwd
00520
00521 mom=self.rootfilepwd.GetMotherDir()
00522
00523
00524 if "Run " not in self.rootfilepwd.GetName():
00525 self.rootfilepwd.Delete()
00526
00527 self.rootfilepwd=mom
00528 else:
00529 new_directory=self.rootfilepwd.GetDirectory(directory_name)
00530 if not self.__is_null(new_directory,directory_name):
00531 self.rootfileprevpwd=self.rootfilepwd
00532 self.rootfilepwd=new_directory
00533
00534 def getObj(self,objname):
00535 """Get a TObject from the rootfile.
00536 """
00537 obj=self.rootfilepwd.Get(objname)
00538 if not self.__is_null(obj,objname):
00539 return obj
00540
00541
00542
00543 class DirWalkerFile(object):
00544 def __init__(self, name, topdirname,rootfilename1, rootfilename2, run=-1, black_list=[], stat_test="KS", test_threshold=.5,draw_success=True,do_pngs=False):
00545 self.name=name
00546 self.dqmrootfile1=DQMRootFile(abspath(rootfilename1))
00547 self.dqmrootfile2=DQMRootFile(abspath(rootfilename2))
00548 self.run=run
00549 self.stat_test=Statistical_Tests[stat_test](test_threshold)
00550 self.workdir=getcwd()
00551 self.black_list=black_list
00552 self.directory=Directory(topdirname)
00553
00554 self.directory.draw_success=draw_success
00555 self.directory.do_pngs=do_pngs
00556
00557 def __del__(self):
00558 chdir(self.workdir)
00559
00560 def cd(self,directory_name,on_disk=False):
00561 if len(directory_name)!=0:
00562 if on_disk:
00563 if not exists(directory_name):
00564 makedirs(directory_name)
00565 chdir(directory_name)
00566 self.dqmrootfile2.cd(directory_name)
00567 self.dqmrootfile1.cd(directory_name)
00568
00569 def ls(self,directory_name=""):
00570 """Return common objects to the 2 files.
00571 """
00572 contents1=self.dqmrootfile1.ls(directory_name)
00573 contents2=self.dqmrootfile2.ls(directory_name)
00574 contents={}
00575 keys = filter(lambda key: contents1.has_key(key),contents2.keys())
00576 for key in keys:
00577
00578 contents[key]=contents1[key]
00579 return contents
00580
00581 def getObjs(self,name):
00582 h1=self.dqmrootfile1.getObj(name)
00583 h2=self.dqmrootfile2.getObj(name)
00584 return h1,h2
00585
00586 def __fill_single_dir(self,dir_name,directory,mother_name="",depth=0):
00587
00588
00589
00590
00591 this_dir=DirID(dir_name,depth)
00592
00593 if this_dir in self.black_list:
00594
00595 return 0
00596
00597 depth+=1
00598
00599 self.cd(dir_name)
00600
00601
00602
00603
00604 contents=self.ls()
00605 if depth==1:
00606 n_top_contents=len(contents)
00607
00608
00609 cont_counter=1
00610 comparisons=[]
00611 for name,obj_type in contents.items():
00612 if obj_type=="TDirectoryFile":
00613
00614
00615 if depth==1:
00616 print "Studying directory %s, %s/%s" %(name,cont_counter,n_top_contents)
00617 cont_counter+=1
00618
00619
00620
00621 subdir=Directory(name)
00622 subdir.draw_success=directory.draw_success
00623 subdir.do_pngs=directory.do_pngs
00624 self.__fill_single_dir(name,subdir,join(mother_name,dir_name),depth)
00625 if not subdir.is_empty():
00626 if depth==1:
00627 print " ->Appending %s..." %name,
00628 directory.subdirs.append(subdir)
00629 if depth==1:
00630 print "Appended."
00631 else:
00632
00633 if obj_type[:2]!="TH" and obj_type[:3]!="TPr" :
00634 continue
00635 h1,h2=self.getObjs(name)
00636
00637
00638 directory.comparisons.append(Comparison(name,
00639 join(mother_name,dir_name),
00640 h1,h2,
00641 deepcopy(self.stat_test),
00642 draw_success=directory.draw_success,
00643 do_pngs=directory.do_pngs))
00644
00645 self.cd("..")
00646
00647 def walk(self):
00648
00649 rundir=""
00650 if self.run<0:
00651
00652
00653 first_run_dir = ""
00654 try:
00655 first_run_dir = filter(lambda k: "Run " in k, self.ls().keys())[0]
00656 except:
00657 print "\nRundir not there: Is this a generic rootfile?\n"
00658 rundir=first_run_dir
00659 self.run= int(rundir.split(" ")[1])
00660 else:
00661 rundir="Run %s"%self.run
00662
00663 try:
00664 self.cd(rundir,False)
00665 except:
00666 print "\nRundir not there: Is this a generic rootfile?\n"
00667
00668
00669 self.__fill_single_dir(self.directory.name,self.directory)
00670 print "Finished"
00671 n_left_threads=len(tcanvas_print_processes)
00672 if n_left_threads>0:
00673 print "Waiting for %s threads to finish..." %n_left_threads
00674 for p in tcanvas_print_processes:
00675 p.join()
00676
00677
00678
00679 class DirWalkerFile_thread_wrapper(Thread):
00680 def __init__(self, walker):
00681 Thread.__init__(self)
00682 self.walker=walker
00683 def run(self):
00684 self.walker.walk()
00685
00686
00687
00688 def string2blacklist(black_list_str):
00689 black_list=[]
00690
00691 black_list_str=black_list_str.replace("__"," ")
00692 if len(black_list_str)>0:
00693 for ele in black_list_str.split(","):
00694 dirname,level=ele.split("@")
00695 level=int(level)
00696 dirid=None
00697 if "/" not in dirname:
00698 dirid=DirID(dirname,level)
00699 else:
00700 mother,daughter=dirname.split("/")
00701 dirid=DirID(daughter,level,mother)
00702 if not dirid in black_list:
00703 black_list.append(dirid)
00704
00705 return black_list
00706
00707
00708