CMS 3D CMS Logo

/data/refman/pasoursint/CMSSW_5_3_10_patch1/src/Utilities/RelMon/scripts/ValidationMatrix.py

Go to the documentation of this file.
00001 #! /usr/bin/env python
00002 ################################################################################
00003 # RelMon: a tool for automatic Release Comparison                              
00004 # https://twiki.cern.ch/twiki/bin/view/CMSPublic/RelMon
00005 #
00006 # $Author: anorkus $
00007 # $Date: 2012/10/25 16:10:22 $
00008 # $Revision: 1.8 $
00009 #
00010 #                                                                              
00011 # Danilo Piparo CERN - danilo.piparo@cern.ch                                   
00012 #                                                                              
00013 ################################################################################
00014 
00015 from optparse import OptionParser
00016 
00017 import os
00018 import cPickle
00019 import glob
00020 from re import search
00021 from subprocess import call,PIPE
00022 from multiprocessing import Pool
00023 from sys import exit
00024 
00025 import sys
00026 argv=sys.argv
00027 sys.argv=[]
00028 if os.environ.has_key("RELMON_SA"):
00029   import definitions as definitions
00030   from dqm_interfaces import DirWalkerFile,string2blacklist,DirWalkerFile_thread_wrapper
00031   from dirstructure import Directory
00032   from directories2html import directory2html,make_summary_table
00033   from utils import ask_ok, unpickler, make_files_pairs
00034 else:
00035   import Utilities.RelMon.definitions as definitions
00036   from Utilities.RelMon.dqm_interfaces import DirWalkerFile,string2blacklist,DirWalkerFile_thread_wrapper
00037   from Utilities.RelMon.dirstructure import Directory
00038   from Utilities.RelMon.directories2html import directory2html,make_summary_table
00039   from Utilities.RelMon.utils import ask_ok, unpickler, make_files_pairs
00040 sys.argv=argv
00041 
00042 #-------------------------------------------------------------------------------
00043 
00044 def name2sample(filename):
00045   namebase=os.path.basename(filename)
00046   return namebase.split("__")[1]
00047 
00048 def name2version(filename):
00049   namebase=os.path.basename(filename)
00050   return namebase.split("__")[2]
00051   
00052 def name2run(filename):
00053   namebase=os.path.basename(filename)
00054   return namebase.split("__")[0].split("_")[2]  
00055 
00056 def name2runskim(filename):
00057   run=name2run(filename)
00058   skim=name2version(filename).split("_")[-1]
00059   # remove skim version
00060   if "-v" in skim:
00061     skim = skim[:skim.rfind('-v')]
00062   return "%s_%s"%(run,skim)
00063 
00064 #-------------------------------------------------------------------------------  
00065 
00066 def guess_params(ref_filenames,test_filenames):
00067   
00068   if len(ref_filenames)*len(test_filenames)==0:
00069     print "Empty reference and test filenames lists!"
00070     return [],"",""
00071   
00072   samples=[]
00073   ref_versions=[]
00074   test_versions=[]
00075     
00076   for ref, test in zip(map(os.path.basename,ref_filenames),map(os.path.basename,test_filenames)):
00077     
00078     ref_sample=name2sample(ref)
00079     ref_version=name2version(ref)
00080     test_sample=name2sample(test)
00081     test_version=name2version(test)
00082           
00083     if ref_sample!=test_sample:
00084       print "Files %s and %s do not seem to be relative to the same sample." %(ref, test)
00085       exit(2)
00086 
00087     # Slightly modify for data
00088     if search("20[01]",ref_version)!=None:
00089       ref_sample+=ref_version.split("_")[-1]
00090     samples.append(ref_sample)
00091  
00092     # append the versions
00093     ref_versions.append(ref_version)
00094     test_versions.append(test_version)
00095 
00096   # Check if ref and test versions are always the same.
00097   ref_versions=list(set(ref_versions))
00098   test_versions=list(set(test_versions))
00099   
00100   #for versions in ref_versions,test_versions:
00101     #if len(versions)!=1:
00102       #print "More than one kind of CMSSW version selected (%s)" %versions
00103       #exit(2)  
00104   
00105   cmssw_version1=ref_versions[0]
00106   cmssw_version2=test_versions[0]
00107   
00108   return samples,cmssw_version1,cmssw_version2
00109   
00110 
00111 #-------------------------------------------------------------------------------
00112 
00113 def check_root_files(names_list):
00114   for name in names_list:
00115     if not name.endswith(".root"):
00116       print "File %s does not seem to be a rootfile. Please check."
00117       return False
00118   return True
00119 
00120 #-------------------------------------------------------------------------------
00121 
00122 def add_to_blacklist(blacklist, pattern, target, blist_piece):
00123   int_pattern=pattern
00124   int_pattern=pattern.strip()  
00125   flip_condition=False
00126   if int_pattern[0]=='!':
00127     int_pattern=int_pattern[1:]
00128     flip_condition=True
00129 
00130   condition = search(int_pattern,target)!=None
00131   if flip_condition:
00132     condition = not condition
00133 
00134   if condition:
00135     #print "Found %s in %s" %(pattern,target)
00136     if blacklist!="": # if not the first, add a comma
00137       blacklist+=","
00138     blacklist+=blist_piece
00139   #else:
00140     #print "  NOT Found %s in %s" %(pattern,target)
00141   return blacklist
00142 
00143 #-------------------------------------------------------------------------------
00144 
00145 def guess_blacklists(samples,ver1,ver2,hlt):
00146   """Build a blacklist for each sample accordind to a set of rules
00147   """
00148   blacklists={}
00149   for sample in samples:
00150     blacklists[sample]="FED@1,AlcaBeamMonitor@1,Physics@1,Info@-1,HLT@1,AlCaReco@1"
00151     
00152     # HLT
00153     if hlt: #HLT
00154       blacklists[sample]+=",AlCaEcalPi0@2"
00155       if not search("2010+|2011+",ver1):
00156         print "We are treating MC files for the HLT"
00157         for pattern,blist in definitions.hlt_mc_pattern_blist_pairs:
00158           blacklists[sample]=add_to_blacklist(blacklists[sample],pattern,sample,blist)
00159 #          print 'HLT '+pattern
00160 #          print 'HLT '+sample
00161 #          print 'HLT '+blacklists[sample]   
00162       else:
00163         print "We are treating Data files for the HLT"    
00164         # at the moment it does not make sense since hlt is ran already
00165     
00166     else: #RECO
00167       #Monte Carlo
00168       if not search("2010+|2011+",ver1):
00169         print "We are treating MC files"        
00170         
00171         for pattern,blist in definitions.mc_pattern_blist_pairs:
00172           blacklists[sample]=add_to_blacklist(blacklists[sample],pattern,sample,blist)
00173 #          print "MC RECO"
00174           #print blacklists[sample]
00175           
00176       # Data
00177       else:
00178         print "We are treating Data files:"      
00179         blacklists[sample]+=",By__Lumi__Section@-1,AlCaReco@1"                                         
00180         for pattern,blist in definitions.data_pattern_blist_pairs:
00181           blacklists[sample]=add_to_blacklist(blacklists[sample],pattern,ver1,blist)
00182 #         print "DATA RECO: %s %s %s -->%s" %( ver1, pattern, blist, blacklists[sample])
00183 
00184 
00185   return blacklists
00186 
00187 #-------------------------------------------------------------------------------  
00188 
00189 def get_roofiles_in_dir(directory):  
00190   print directory
00191   files_list = filter(lambda s: s.endswith(".root"), os.listdir(directory))
00192   files_list_path=map(lambda s: os.path.join(directory,s), files_list)
00193   
00194   return files_list_path
00195   
00196 #-------------------------------------------------------------------------------  
00197 
00198 def get_filenames_from_pool(all_samples):
00199   
00200   # get a list of the files
00201   files_list=get_roofiles_in_dir(all_samples)
00202   
00203   if len(files_list)==0:
00204     print "Zero files found in directory %s!" %all_samples
00205     return [],[]
00206   
00207   # Are they an even number?
00208   for name in files_list:
00209     print "* ",name  
00210   if len(files_list)%2!=0:
00211     print "The numbuer of file is not even... Trying to recover a catastrophe."
00212     
00213   files_list=make_files_pairs(files_list)
00214   
00215   # Try to couple them according to their sample
00216   ref_filenames=[]
00217   test_filenames=[]
00218   #files_list.sort(key=name2version)
00219   #files_list.sort(key=name2sample) 
00220   #files_list.sort(key=name2run)
00221   for iname in xrange(len(files_list)):
00222     filename=files_list[iname]
00223     if iname%2==0:
00224       ref_filenames.append(filename)
00225     else:
00226       test_filenames.append(filename)
00227       
00228   print "The guess would be the following:"
00229   for ref,test in zip(ref_filenames,test_filenames):
00230     refbasedir=os.path.dirname(ref)
00231     testbasedir=os.path.dirname(test)
00232     dir_to_print=refbasedir
00233     if refbasedir!=testbasedir:
00234       dir_to_print="%s and %s" %(refbasedir,testbasedir)
00235     print "* Directory: %s " %dir_to_print
00236     refname=os.path.basename(ref)
00237     testname=os.path.basename(test)
00238     print "  o %s" %refname
00239     print "  o %s" %testname
00240   
00241   #is_ok=ask_ok("Is that ok?")
00242   #if not is_ok:
00243     #print "Manual input needed then!"
00244     #exit(2)
00245       
00246   
00247   return ref_filenames,test_filenames
00248   
00249 
00250 #-------------------------------------------------------------------------------
00251 
00252 def get_clean_fileanames(ref_samples,test_samples):
00253   # Process the samples starting from the names
00254   ref_filenames=map(lambda s:s.strip(),ref_samples.split(","))
00255   test_filenames=map(lambda s:s.strip(),test_samples.split(","))
00256 
00257   if len(ref_filenames)!=len(test_filenames):
00258     print "The numebr of reference and test files does not seem to be the same. Please check."
00259     exit(2)
00260 
00261   if not (check_root_files(ref_filenames) and check_root_files(test_filenames)):
00262     exit(2)
00263   return ref_filenames,test_filenames
00264 
00265 #-------------------------------------------------------------------------------
00266 
00267 def count_alive_processes(p_list):
00268   return len(filter(lambda p: p.returncode==None,p_list))
00269 
00270 #-------------------------------------------------------------------------------
00271 
00272 def call_compare_using_files(args):
00273   """Creates shell command to compare two files using compare_using_files.py
00274   script and calls it."""
00275   sample, ref_filename, test_filename, options = args
00276   blacklists=guess_blacklists([sample],name2version(ref_filename),name2version(test_filename),options.hlt)
00277   command = " compare_using_files.py "
00278   command+= "%s %s " %(ref_filename,test_filename)
00279   command+= " -C -R "
00280   if options.do_pngs:
00281     command+= " -p "
00282   command+= " -o %s " %sample
00283   # Change threshold to an experimental and empirical value of 10^-5
00284   command+= " --specify_run "
00285   command+= " -t %s " %options.test_threshold
00286   command+= " -s %s " %options.stat_test
00287 
00288   # Inspect the HLT directories
00289   if options.hlt:
00290     command+=" -d HLT "
00291   
00292   if options.hash_name:
00293     command += " --hash_name "
00294 
00295   if options.blacklist_file:
00296     command += " --use_black_file "
00297 
00298   if len(blacklists[sample]) >0:
00299     command+= '-B %s ' %blacklists[sample]
00300   print "\nExecuting --  %s" %command
00301 
00302   process=call(filter(lambda x: len(x)>0,command.split(" ")))
00303   return process
00304   
00305 
00306 #--------------------------------------------------------------------------------
00307 
00308 def do_comparisons_threaded(options):
00309 
00310   n_processes= int(options.n_processes)
00311 
00312   ref_filenames=[]
00313   test_filenames=[]
00314   
00315   if len(options.all_samples)>0:
00316     ref_filenames,test_filenames=get_filenames_from_pool(options.all_samples)  
00317   else:
00318     ref_filenames,test_filenames=get_clean_fileanames(options.ref_samples,options.test_samples)
00319  
00320   # make the paths absolute
00321   ref_filenames=map(os.path.abspath,ref_filenames)
00322   test_filenames=map(os.path.abspath,test_filenames)
00323   
00324   samples,cmssw_version1,cmssw_version2=guess_params(ref_filenames,test_filenames)
00325   
00326   if len(samples)==0:
00327     print "No Samples found... Quitting"
00328     return 0
00329   
00330 #  blacklists=guess_blacklists(samples,cmssw_version1,cmssw_version2,options.hlt)
00331 
00332   # Launch the single comparisons
00333   original_dir=os.getcwd()
00334 
00335   outdir=options.out_dir
00336   if len(outdir)==0:
00337     print "Creating automatic outdir:",
00338     outdir="%sVS%s" %(cmssw_version1,cmssw_version2)
00339     print outdir
00340   if len(options.input_dir)==0:
00341     print "Creating automatic indir:",
00342     options.input_dir=outdir
00343     print options.input_dir
00344   
00345   if not os.path.exists(outdir):
00346     os.mkdir(outdir)
00347   os.chdir(outdir)  
00348   
00349   # adjust the number of threads
00350   n_comparisons=len(ref_filenames)
00351   if n_comparisons < n_processes:
00352     print "Less comparisons than possible processes: reducing n processes to",
00353     n_processes=n_comparisons
00354   #elif n_processes/n_comparisons == 0:
00355     #print "More comparisons than possible processes, can be done in N rounds: reducing n processes to",    
00356     #original_nprocesses=n_processes
00357     #first=True
00358     #n_bunches=0
00359     #while first or n_processes > original_nprocesses:
00360       #n_processes=n_comparisons/2
00361       #if n_comparisons%2 !=0:
00362         #n_processes+=1
00363       #first=False
00364       
00365     #print n_processes
00366   #print n_processes
00367   
00368   # Test if we treat data
00369   skim_name=""
00370   if search("20[01]",cmssw_version1)!=None:
00371     skim_name=cmssw_version1.split("_")[-1]
00372     
00373   running_subprocesses=[]
00374   process_counter=0
00375   #print ref_filenames
00376 
00377   ## Compare all pairs of root files
00378   pool = Pool(n_processes)
00379   args_iterable = [list(args) + [options] for args in zip(samples, ref_filenames, test_filenames)]
00380   pool.map(call_compare_using_files, args_iterable) 
00381   # move the pickles on the top, hack
00382   os.system("mv */*pkl .")
00383   
00384   os.chdir("..")
00385 #-------------------------------------------------------------------------------
00386 def do_reports(indir):
00387   #print indir
00388   os.chdir(indir)
00389   pkl_list=filter(lambda x:".pkl" in x, os.listdir("./"))
00390   running_subprocesses=[]
00391   n_processes=int(options.n_processes)
00392   process_counter=0
00393   for pklfilename in pkl_list:
00394     command = "compare_using_files.py " 
00395     command+= "-R "
00396     if options.do_pngs:
00397       command+= " -p "
00398     command+= "-P %s " %pklfilename
00399     command+= "-o %s " %pklfilename[:-4]
00400     print "Executing %s" %command
00401     process=call(filter(lambda x: len(x)>0,command.split(" ")))
00402     process_counter+=1
00403     # add it to the list
00404     running_subprocesses.append(process)   
00405     if process_counter>=n_processes:
00406       process_counter=0
00407       for p in running_subprocesses:
00408         #print "Waiting for %s" %p.name
00409         p.wait()
00410         
00411   os.chdir("..")
00412   
00413 #-------------------------------------------------------------------------------
00414 def do_html(options, hashing_flag):
00415 
00416   if options.reports:
00417     print "Preparing reports for the single files..."
00418     do_reports(options.input_dir)
00419   # Do the summary page
00420   aggregation_rules={}
00421   aggregation_rules_twiki={}
00422   # check which aggregation rules are to be used
00423   if options.hlt:
00424     print "Aggregating directories according to HLT rules"
00425     aggregation_rules=definitions.aggr_pairs_dict['HLT']
00426     aggregation_rules_twiki=definitions.aggr_pairs_twiki_dict['HLT']
00427   else:
00428     aggregation_rules=definitions.aggr_pairs_dict['reco']
00429     aggregation_rules_twiki=definitions.aggr_pairs_twiki_dict['reco']
00430   table_html = make_summary_table(options.input_dir,aggregation_rules,aggregation_rules_twiki, hashing_flag)
00431 
00432   # create summary html file
00433   ofile = open("RelMonSummary.html","w")
00434   ofile.write(table_html)
00435   ofile.close()
00436 
00437 #-------------------------------------------------------------------------------
00438 
00439 if __name__ == "__main__":
00440 
00441   #-----------------------------------------------------------------------------
00442   ref_samples=""
00443   test_samples=""
00444   all_samples=""
00445   n_processes=1
00446   out_dir=""
00447   in_dir=""
00448   n_threads=1 # do not change this
00449   run=-1
00450   stat_test="Chi2"
00451   test_threshold=0.00001
00452   hlt=False
00453   #-----------------------------------------------------------------------------
00454 
00455 
00456   parser = OptionParser(usage="usage: %prog [options]")
00457 
00458   parser.add_option("-R","--ref_samples ",
00459                     action="store",
00460                     dest="ref_samples",
00461                     default=ref_samples,
00462                     help="The samples that act as reference (comma separated list)")
00463 
00464   parser.add_option("-T","--test_samples",
00465                     action="store",
00466                     dest="test_samples",
00467                     default=test_samples,
00468                     help="The samples to be tested (comma separated list)")
00469 
00470   parser.add_option("-a","--all_samples",
00471                     action="store",
00472                     dest="all_samples",
00473                     default=all_samples,
00474                     help="EXPERIMENTAL: Try to sort all samples selected (wildacrds) and organise a comparison")
00475 
00476   parser.add_option("-o","--out_dir",
00477                     action="store",
00478                     dest="out_dir",
00479                     default=out_dir,
00480                     help="The outdir other than <Version1>VS<Version2>")
00481 
00482   parser.add_option("-p","--do_pngs",
00483                     action="store_true",
00484                     dest="do_pngs",
00485                     default=False,
00486                     help="EXPERIMENTAL!!! Do the pngs of the comparison (takes 50%% of the total running time) \n(default is %s)" %False)
00487 
00488   parser.add_option("-r","--run ",
00489                     action="store",
00490                     dest="run",
00491                     default=run,
00492                     help="The run to be checked \n(default is %s)" %run)
00493 
00494   parser.add_option("-t","--test_threshold",
00495                     action="store",
00496                     dest="test_threshold",
00497                     default=test_threshold,
00498                     help="Threshold for the statistical test \n(default is %s)" %test_threshold)    
00499 
00500   parser.add_option("-s","--stat_test",
00501                     action="store",
00502                     dest="stat_test",
00503                     default=stat_test,
00504                     help="Statistical test (KS or Chi2) \n(default is %s)" %stat_test)  
00505   
00506   parser.add_option("-N","--numberOfProcesses",
00507                     action="store",
00508                     dest="n_processes",
00509                     default=n_processes,
00510                     help="Number of parallel processes to be run. Be Polite! \n(default is %s)" %n_processes)  
00511                     
00512   parser.add_option("--HLT",
00513                     action="store_true",
00514                     dest="hlt",
00515                     default=False,
00516                     help="Analyse HLT histograms\n(default is %s)" %hlt)
00517                     
00518   parser.add_option("-i","--input_dir",
00519                     action="store",
00520                     dest="input_dir",
00521                     default=in_dir,
00522                     help="Input directory for html creation \n(default is %s)" %in_dir)
00523   
00524   parser.add_option("--reports",
00525                     action="store_true",
00526                     dest="reports",
00527                     default=False,
00528                     help="Do the reports for the pickles \n(default is %s)" %in_dir)
00529 ##---HASHING---##
00530   parser.add_option("--hash_name",
00531                     action="store_true",
00532                     dest="hash_name",
00533                     default=False,
00534                     help="Set if you want to minimize & hash the output HTML files.")
00535 ##--Blacklist File --##                  
00536   parser.add_option("--use_black_file",
00537                     action="store_true",
00538                     dest="blacklist_file",
00539                     default=False,
00540                     help="Use a black list file of histograms located @ /RelMon/data")
00541 
00542   (options, args) = parser.parse_args()
00543 
00544   if len(options.test_samples)*len(options.ref_samples)+len(options.all_samples)==0 and len(options.input_dir)==0:
00545     print "No samples given as input."
00546     parser.print_help()
00547     exit(2)
00548 
00549   if len(options.all_samples)>0 or (len(options.ref_samples)*len(options.test_samples)>0):
00550     do_comparisons_threaded(options)
00551   if len(options.input_dir)>0:
00552     do_html(options, options.hash_name)
00553 
00554 
00555 
00556 
00557 
00558 
00559 
00560 
00561 
00562 
00563 
00564