00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014 from copy import deepcopy
00015 from os import chdir,getcwd,makedirs
00016 from os.path import abspath,exists,join
00017 from re import sub,search
00018 from re import compile as recompile
00019 from sys import exit,stderr,version_info
00020 from threading import Thread,activeCount
00021 from time import sleep
00022 from urllib2 import Request,build_opener,urlopen
00023
00024 import sys
00025 argv=sys.argv
00026 from ROOT import *
00027 import ROOT
00028 sys.argv=argv
00029
00030 gROOT.SetBatch(True)
00031
00032 from authentication import X509CertOpen
00033 from dirstructure import Comparison,Directory,tcanvas_print_processes
00034 from utils import Chi2,KS,BinToBin,Statistical_Tests,literal2root
00035
00036
00037
00038 class Error(Exception):
00039 """Base class for exceptions in this module."""
00040 pass
00041
00042 class DQM_DB_Communication(Error):
00043 """Exception occurs in case of problems of communication with the server.
00044 """
00045 def __init__(self,msg):
00046 self.msg = msg
00047
00048 class InvalidNumberOfArguments(Error):
00049
00050 def __init__(self,msg):
00051 self.msg = msg
00052
00053
00054
00055 class DQMcommunicator(object):
00056
00057 """Communicate with the DQM Document server"""
00058
00059
00060
00061 base_dir='/data/json/archive/'
00062
00063 def __init__(self,
00064 server,
00065 is_private=False,
00066 ident="DQMToJson/1.0 python/%d.%d.%d" % version_info[:3]):
00067 self.ident = ident
00068 self.server = server
00069 self.is_private = is_private
00070 self.DQMpwd=DQMcommunicator.base_dir
00071 self.prevDQMpwd=self.DQMpwd
00072 self.opener=None
00073 if not self.is_private:
00074 self.opener=build_opener(X509CertOpen())
00075
00076
00077 def open_url(self,url):
00078 url=url.replace(' ','%20')
00079 datareq = Request(url)
00080 datareq.add_header('User-agent', self.ident)
00081 url_obj=0
00082 if not self.is_private:
00083 url_obj=self.opener.open(datareq)
00084
00085 else:
00086 url_obj=urlopen(datareq)
00087
00088 return url_obj
00089
00090
00091
00092 def get_data(self, full_url):
00093
00094 data = self.open_url(full_url).read()
00095
00096 data = sub("-inf", '0', data)
00097 data = sub("\s+inf", '0', data)
00098 data = sub("\s+nan", '0', data)
00099 data = sub('""(CMSSW.*?)""', '"\\1"', data)
00100
00101 return data
00102
00103
00104
00105 def ls_url(self, url):
00106 url=url.replace(" ","%20")
00107 url=self.server+url
00108
00109 form_folder={}
00110 raw_folder=None
00111 try:
00112 raw_folder=eval(self.get_data(url))
00113 except:
00114 print "Retrying.."
00115 for ntrials in xrange(5):
00116 try:
00117 if ntrials!=0:
00118 sleep(2)
00119
00120 raw_folder=eval(self.get_data(url))
00121 break
00122 except:
00123 print "Could not fetch %s. Retrying" %url
00124
00125
00126 for content_dict in raw_folder["contents"]:
00127 if content_dict.has_key("subdir"):
00128 form_folder[content_dict["subdir"]]={"type":'dir'}
00129 elif content_dict.has_key("obj"):
00130 properties=content_dict["properties"]
00131 obj_name=content_dict["obj"]
00132 obj_type=properties["type"]
00133 obj_kind=properties["kind"]
00134 obj_as_string=''
00135 if content_dict.has_key("rootobj"):
00136 obj_as_string=content_dict["rootobj"]
00137 form_folder[obj_name]={'type':obj_type,'obj_as_string':obj_as_string,"kind":obj_kind}
00138
00139
00140
00141 return form_folder
00142
00143
00144
00145 def ls(self, url='', fetch_root=False):
00146 if len(url)==0:
00147 url=join(self.DQMpwd,url)
00148
00149 form_folder={}
00150
00151 if fetch_root:
00152 url='%s?rootcontent=1'%url
00153 form_folder=self.ls_url(url)
00154
00155 return form_folder
00156
00157
00158
00159 def cd(self, *args):
00160 len_args=len(args)
00161 full_url=""
00162 if len_args!=1 and len_args!=3:
00163 raise(InvalidNumberOfArguments("3 or 1 args expected!"))
00164 if len_args==3:
00165 dataset, run, folder = args
00166 full_url='%s/data/json/archive/%s/%s/%s' % (self.server, dataset, run, folder)
00167 if len_args==1:
00168 folder=args[0]
00169 if folder==self.DQMpwd:
00170 full_url=self.DQMpwd
00171 elif folder=="..":
00172 full_url=self.DQMpwd[:self.DQMpwd.rfind("/")]
00173 elif folder=="-":
00174 full_url=self.oldDQMpwd
00175 elif folder=="":
00176 full_url=DQMcommunicator.base_dir
00177 else:
00178 full_url=self.DQMpwd+"/"+folder
00179
00180 full_url=full_url.replace(' ','%20')
00181
00182
00183 self.oldDQMpwd=self.DQMpwd
00184 self.DQMpwd=full_url
00185
00186
00187
00188
00189 def get_samples(self, samples_string="*"):
00190 """
00191 A sample contains, among the other things, a data type, a dataset name
00192 and a run.
00193 """
00194 full_url='%s/data/json/samples?match=%s' % (self.server, samples_string)
00195 samples_dict=eval(self.get_data(full_url))
00196 return samples_dict["samples"]
00197
00198
00199
00200 def get_datasets_list(self, dataset_string=""):
00201 samples_list=self.get_samples(dataset_string)
00202 datasets_list=[]
00203 for sample in samples_list:
00204 temp_datasets_list = map(lambda item:item["dataset"] ,sample['items'])
00205 for temp_dataset in temp_datasets_list:
00206 if not temp_dataset in datasets_list:
00207 datasets_list.append(temp_dataset)
00208 return datasets_list
00209
00210
00211
00212 def get_RelVal_CMSSW_versions(self,query):
00213 """Get the available cmssw versions for the relvals.
00214 """
00215 relvals_list=self.get_datasets_list(query)
00216
00217 cmssw_versions_with_duplicates=map (lambda x: x.split("/")[2],relvals_list)
00218 return list(set(cmssw_versions_with_duplicates))
00219
00220
00221
00222 def get_runs_list(self, dataset_string):
00223 slash="/"
00224 while(dataset_string.endswith(slash) or dataset_string.beginswith(slash)):
00225 dataset_string=dataset_string.strip("/")
00226 samples_list=self.get_samples(dataset_string)
00227 runlist=[]
00228
00229 map( lambda sample: map (lambda item: runlist.append(item['run']), sample['items']), samples_list)
00230 return runlist
00231
00232
00233
00234 def get_dataset_runs(self,dataset_string):
00235 dataset_runs={}
00236 for dataset in self.get_datasets_list(dataset_string):
00237 dataset_runs[dataset]=self.get_runs_list(dataset)
00238 return dataset_runs
00239
00240
00241
00242 def get_common_runs(self,dataset_string1,dataset_string2):
00243 set1=set(self.get_runs_list(dataset_string1))
00244 set2=set(self.get_runs_list(dataset_string2))
00245 set1.intersection_update(set2)
00246 return list (set2)
00247
00248
00249
00250 def get_root_objects_list(self, url=""):
00251 if len(url)==0:
00252 url=self.DQMpwd
00253 else:
00254 url="/"+url
00255 url = url.replace(" ","%20")
00256 objects=[]
00257 for name,description in self.ls(url,True).items():
00258 if "dir" not in description["type"] and "ROOT" in description["kind"]:
00259 objects.append(literal2root(description["obj_as_string"],description["type"]))
00260 return objects
00261
00262
00263
00264 def get_root_objects(self, url=""):
00265 if len(url)==0:
00266 url=self.DQMpwd
00267 else:
00268 url=self.server+"/"+url
00269 url = url.replace(" ","%20")
00270 objects={}
00271 for name,description in self.ls(url,True).items():
00272 if "dir" not in description["type"] and "ROOT" in description["kind"]:
00273 objects[name]=literal2root(description["obj_as_string"],description["type"])
00274 return objects
00275
00276
00277
00278 def get_root_objects_list_recursive(self, url=""):
00279 null_url = (len(url)==0)
00280 if len(url)==0:
00281 url=self.DQMpwd
00282 else:
00283 url="/"+url
00284 url = url.replace(" ","%20")
00285 if not null_url:
00286 self.cd(url)
00287 objects=[]
00288 for name,description in self.ls("",True).items():
00289 if "dir" in description["type"]:
00290 objects+=self.get_root_objects_list_recursive(name)
00291 self.cd("..")
00292 elif "ROOT" in description["kind"]:
00293 objects.append(literal2root(description["obj_as_string"],description["type"]))
00294 if not null_url:
00295 self.cd("..")
00296 return objects
00297
00298
00299
00300 def get_root_objects_names_list_recursive(self, url="",present_url=""):
00301 null_url = (len(url)==0)
00302 if (not null_url):
00303 if len(present_url)==0:
00304 present_url=url
00305 else:
00306 present_url+="_%s"%url
00307 if len(url)==0:
00308 url=self.DQMpwd
00309 else:
00310 url="/"+url
00311 url = url.replace(" ","%20")
00312 if not null_url:
00313 self.cd(url)
00314 objects_names=[]
00315 for name,description in self.ls("",False).items():
00316 if "dir" in description["type"]:
00317 objects_names+=self.get_root_objects_names_list_recursive(name,present_url)
00318 self.cd("..")
00319 elif "ROOT" in description["kind"]:
00320 objects_names.append("%s_%s"%(present_url,name))
00321 if not null_url:
00322 self.cd("..")
00323 return objects_names
00324
00325
00326
00327 def get_root_objects_recursive(self, url="",present_url=""):
00328 null_url = (len(url)==0)
00329 if (not null_url):
00330 if len(present_url)==0:
00331 present_url=url
00332 else:
00333 present_url+="_%s"%url
00334 if len(url)==0:
00335 url=self.DQMpwd
00336 else:
00337 url="/"+url
00338 url = url.replace(" ","%20")
00339
00340 self.cd(url)
00341 objects={}
00342 for name,description in self.ls("",True).items():
00343 if "dir" in description["type"]:
00344 objects.update(self.get_root_objects_recursive(name,present_url))
00345 self.cd("..")
00346 elif "ROOT" in description["kind"]:
00347 objects["%s_%s"%(present_url,name)]=literal2root(description["obj_as_string"],description["type"])
00348
00349 self.cd("..")
00350 return objects
00351
00352
00353
00354 class DirID(object):
00355 """Structure used to identify a directory in the walked tree,
00356 It carries the name and depth information.
00357 """
00358 def __init__(self,name,depth,mother=""):
00359 self.name=name
00360 self.compname=recompile(name)
00361 self.mother=mother
00362 self.depth=depth
00363 def __eq__(self,dirid):
00364 depth2=dirid.depth
00365 compname2=dirid.compname
00366 name2=dirid.name
00367 is_equal = False
00368
00369 if search(self.compname,name2)!=None or search(compname2,self.name)!=None:
00370 is_equal = self.depth*depth2 <0 or self.depth==depth2
00371 if len(self.mother)*(dirid.mother)>0:
00372 is_equal = is_equal and self.mother==dirid.mother
00373 return is_equal
00374
00375 def __repr__(self):
00376 return "Directory %s at level %s" %(self.name,self.depth)
00377
00378
00379 class DirFetcher(Thread):
00380 """ Fetch the content of the single "directory" in the dqm.
00381 """
00382 def __init__ (self,comm,directory):
00383 Thread.__init__(self)
00384 self.comm = comm
00385 self.directory = directory
00386 self.contents=None
00387 def run(self):
00388 self.contents = self.comm.ls(self.directory,True)
00389
00390
00391
00392 class DirWalkerDB(Thread):
00393 """An interface to the DQM document db. It is threaded to compensate the
00394 latency introduced by the finite response time of the server.
00395 """
00396 def __init__ (self,comm1,comm2,base1,base2,directory,depth=0,do_pngs=True,stat_test="KS",test_threshold=.5,black_list=[]):
00397 Thread.__init__(self)
00398 self.comm1 = deepcopy(comm1)
00399 self.comm2 = deepcopy(comm2)
00400 self.base1,self.base2 = base1,base2
00401 self.directory = directory
00402 self.depth=depth
00403 self.do_pngs=do_pngs
00404 self.test_threshold=test_threshold
00405 self.stat_test=stat_test
00406 self.black_list=black_list
00407
00408 self.name+="_%s" %directory.name
00409
00410 def run(self):
00411
00412 this_dir=DirID(self.directory.name,self.depth)
00413 if this_dir in self.black_list:
00414 print "Skipping %s since blacklisted!" %this_dir
00415 return 0
00416
00417 self.depth+=1
00418
00419 the_test=Statistical_Tests[self.stat_test](self.test_threshold)
00420
00421
00422 directory1=self.base1+"/"+self.directory.mother_dir+"/"+self.directory.name
00423 directory2=self.base2+"/"+self.directory.mother_dir+"/"+self.directory.name
00424
00425 fetchers =(DirFetcher(self.comm1,directory1),DirFetcher(self.comm2,directory2))
00426 for fetcher in fetchers:
00427 fetcher.start()
00428 for fetcher in fetchers:
00429 fetcher.join()
00430
00431 contents1 = fetchers[0].contents
00432 contents2 = fetchers[1].contents
00433 set1= set(contents1.keys())
00434 set2= set(contents2.keys())
00435
00436 walkers=[]
00437 self_directory_directories=self.directory.subdirs
00438 self_directory_comparisons=self.directory.comparisons
00439 contents_names=list(set1.intersection(set2))
00440
00441 for name in contents_names:
00442 content = contents1[name]
00443 if "dir" in content["type"]:
00444
00445 subdir=Directory(name,join(self.directory.mother_dir,self.directory.name))
00446 dirwalker=DirWalkerDB(self.comm1,self.comm2,self.base1,self.base2,subdir,self.depth,
00447 self.do_pngs,self.stat_test,self.test_threshold,self.black_list)
00448 dirwalker.start()
00449 walkers.append(dirwalker)
00450 n_threads=activeCount()
00451 if n_threads>5:
00452
00453 dirwalker.join()
00454 elif content["kind"]=="ROOT":
00455
00456 comparison=Comparison(name,
00457 join(self.directory.mother_dir,self.directory.name),
00458 literal2root(content["obj_as_string"],content["type"]),
00459 literal2root(contents2[name]["obj_as_string"],content["type"]),
00460 deepcopy(the_test),
00461 do_pngs=self.do_pngs)
00462 self_directory_comparisons.append(comparison)
00463
00464
00465 for walker in walkers:
00466 walker.join()
00467 walker_directory=walker.directory
00468 if not walker_directory.is_empty():
00469 self_directory_directories.append(walker_directory)
00470
00471
00472
00473 class DQMRootFile(object):
00474 """ Class acting as interface between the user and the harvested DQMRootFile.
00475 It skips the directories created by the DQM infrastructure so to provide an
00476 interface as similar as possible to a real direcory structure and to the
00477 directory structure provided by the db interface.
00478 """
00479 def __init__(self,rootfilename):
00480 dqmdatadir="DQMData"
00481 self.rootfile=TFile(rootfilename)
00482 self.rootfilepwd=self.rootfile.GetDirectory(dqmdatadir)
00483 self.rootfileprevpwd=self.rootfile.GetDirectory(dqmdatadir)
00484 if self.rootfilepwd == None:
00485 print "Directory %s does not exist: skipping. Is this a custom rootfile?" %dqmdatadir
00486 self.rootfilepwd=self.rootfile
00487 self.rootfileprevpwd=self.rootfile
00488
00489 def __is_null(self,directory,name):
00490 is_null = not directory
00491 if is_null:
00492 print >> stderr, "Directory %s does not exist!" %name
00493 return is_null
00494
00495 def ls(self,directory_name=""):
00496 contents={}
00497 directory=None
00498 if len(directory_name)==0:
00499 directory=self.rootfilepwd
00500
00501 directory=self.rootfilepwd.GetDirectory(directory_name)
00502 if self.__is_null(directory,directory_name):
00503 return contents
00504
00505 for key in directory.GetListOfKeys():
00506 contents[key.GetName()]=key.GetClassName()
00507 return contents
00508
00509 def cd(self,directory_name):
00510 """Change the current TDirectoryFile. The familiar "-" and ".." directories
00511 can be accessed as well.
00512 """
00513 if directory_name=="-":
00514 tmp=self.rootfilepwd
00515 self.rootfilepwd=self.rootfileprevpwd
00516 self.rootfileprevpwd=tmp
00517 if directory_name=="..":
00518
00519 self.rootfileprevpwd=self.rootfilepwd
00520
00521 mom=self.rootfilepwd.GetMotherDir()
00522
00523
00524 if "Run " not in self.rootfilepwd.GetName():
00525 self.rootfilepwd.Delete()
00526
00527 self.rootfilepwd=mom
00528 else:
00529 new_directory=self.rootfilepwd.GetDirectory(directory_name)
00530 if not self.__is_null(new_directory,directory_name):
00531 self.rootfileprevpwd=self.rootfilepwd
00532 self.rootfilepwd=new_directory
00533
00534 def getObj(self,objname):
00535 """Get a TObject from the rootfile.
00536 """
00537 obj=self.rootfilepwd.Get(objname)
00538 if not self.__is_null(obj,objname):
00539 return obj
00540
00541
00542
00543 class DirWalkerFile(object):
00544 def __init__(self, name, topdirname,rootfilename1, rootfilename2, run=-1, black_list=[], stat_test="KS", test_threshold=.5,draw_success=True,do_pngs=False, black_list_histos=[]):
00545 self.name=name
00546 self.dqmrootfile1=DQMRootFile(abspath(rootfilename1))
00547 self.dqmrootfile2=DQMRootFile(abspath(rootfilename2))
00548 self.run=run
00549 self.stat_test=Statistical_Tests[stat_test](test_threshold)
00550 self.workdir=getcwd()
00551 self.black_list=black_list
00552 self.directory=Directory(topdirname)
00553
00554 self.directory.draw_success=draw_success
00555 self.directory.do_pngs=do_pngs
00556 self.black_list_histos = black_list_histos
00557
00558 def __del__(self):
00559 chdir(self.workdir)
00560
00561 def cd(self,directory_name, on_disk=False, regexp=False,):
00562 if regexp == True:
00563 if len(directory_name)!=0:
00564 if on_disk:
00565 if not exists(directory_name):
00566 makedirs(directory_name)
00567 chdir(directory_name)
00568 tmp = self.dqmrootfile2.ls().keys()
00569 for elem in tmp:
00570 if "Run" in elem:
00571 next_dir = elem
00572 self.dqmrootfile2.cd(next_dir)
00573 tmp = self.dqmrootfile1.ls().keys()
00574 for elem in tmp:
00575 if "Run" in elem:
00576 next_dir = elem
00577 self.dqmrootfile1.cd(next_dir)
00578 else:
00579 if len(directory_name)!=0:
00580 if on_disk:
00581 if not exists(directory_name):
00582 makedirs(directory_name)
00583 chdir(directory_name)
00584 self.dqmrootfile2.cd(directory_name)
00585 self.dqmrootfile1.cd(directory_name)
00586
00587 def ls(self,directory_name=""):
00588 """Return common objects to the 2 files.
00589 """
00590 contents1=self.dqmrootfile1.ls(directory_name)
00591 contents2=self.dqmrootfile2.ls(directory_name)
00592 contents={}
00593 keys = filter(lambda key: contents1.has_key(key),contents2.keys())
00594 for key in keys:
00595
00596 contents[key]=contents1[key]
00597 return contents
00598
00599 def getObjs(self,name):
00600 h1=self.dqmrootfile1.getObj(name)
00601 h2=self.dqmrootfile2.getObj(name)
00602 return h1,h2
00603
00604 def __fill_single_dir(self,dir_name,directory,mother_name="",depth=0):
00605
00606
00607
00608
00609 this_dir=DirID(dir_name,depth)
00610
00611 if this_dir in self.black_list:
00612
00613 return 0
00614
00615 depth+=1
00616
00617 self.cd(dir_name)
00618
00619
00620
00621
00622 contents=self.ls()
00623 if depth==1:
00624 n_top_contents=len(contents)
00625
00626
00627 cont_counter=1
00628 comparisons=[]
00629 for name,obj_type in contents.items():
00630 if obj_type=="TDirectoryFile":
00631
00632
00633 if depth==1:
00634 print "Studying directory %s, %s/%s" %(name,cont_counter,n_top_contents)
00635 cont_counter+=1
00636
00637
00638
00639 subdir=Directory(name)
00640 subdir.draw_success=directory.draw_success
00641 subdir.do_pngs=directory.do_pngs
00642 self.__fill_single_dir(name,subdir,join(mother_name,dir_name),depth)
00643 if not subdir.is_empty():
00644 if depth==1:
00645 print " ->Appending %s..." %name,
00646 directory.subdirs.append(subdir)
00647 if depth==1:
00648 print "Appended."
00649 else:
00650
00651 if obj_type[:2]!="TH" and obj_type[:3]!="TPr" :
00652 continue
00653 h1,h2=self.getObjs(name)
00654
00655 path = join(mother_name,dir_name,name)
00656 if path in self.black_list_histos:
00657 print " Skipping %s" %(path)
00658 directory.comparisons.append(Comparison(name,
00659 join(mother_name,dir_name),
00660 h1,h2,
00661 deepcopy(self.stat_test),
00662 draw_success=directory.draw_success,
00663 do_pngs=directory.do_pngs, skip=True))
00664 else:
00665 directory.comparisons.append(Comparison(name,
00666 join(mother_name,dir_name),
00667 h1,h2,
00668 deepcopy(self.stat_test),
00669 draw_success=directory.draw_success,
00670 do_pngs=directory.do_pngs, skip=False))
00671
00672 self.cd("..")
00673
00674 def walk(self):
00675
00676 rundir=""
00677 if self.run<0:
00678
00679
00680 first_run_dir = ""
00681 try:
00682 first_run_dir = filter(lambda k: "Run " in k, self.ls().keys())[0]
00683 except:
00684 print "\nRundir not there: Is this a generic rootfile?\n"
00685 rundir=first_run_dir
00686 try:
00687 self.run= int(rundir.split(" ")[1])
00688 except:
00689 print "Setting run number to 0"
00690 self.run= 0
00691 else:
00692 rundir="Run %s"%self.run
00693
00694 try:
00695 self.cd(rundir, False, True)
00696 except:
00697 print "\nRundir not there: Is this a generic rootfile?\n"
00698
00699
00700 self.__fill_single_dir(self.directory.name,self.directory)
00701 print "Finished"
00702 n_left_threads=len(tcanvas_print_processes)
00703 if n_left_threads>0:
00704 print "Waiting for %s threads to finish..." %n_left_threads
00705 for p in tcanvas_print_processes:
00706 p.join()
00707
00708
00709
00710 class DirWalkerFile_thread_wrapper(Thread):
00711 def __init__(self, walker):
00712 Thread.__init__(self)
00713 self.walker=walker
00714 def run(self):
00715 self.walker.walk()
00716
00717
00718
00719 def string2blacklist(black_list_str):
00720 black_list=[]
00721
00722 black_list_str=black_list_str.replace("__"," ")
00723 if len(black_list_str)>0:
00724 for ele in black_list_str.split(","):
00725 dirname,level=ele.split("@")
00726 level=int(level)
00727 dirid=None
00728 if "/" not in dirname:
00729 dirid=DirID(dirname,level)
00730 else:
00731 mother,daughter=dirname.split("/")
00732 dirid=DirID(daughter,level,mother)
00733 if not dirid in black_list:
00734 black_list.append(dirid)
00735
00736 return black_list
00737
00738
00739