CMS 3D CMS Logo

/data/refman/pasoursint/CMSSW_5_2_9/src/Utilities/RelMon/scripts/ValidationMatrix.py

Go to the documentation of this file.
00001 #! /usr/bin/env python
00002 ################################################################################
00003 # RelMon: a tool for automatic Release Comparison                              
00004 # https://twiki.cern.ch/twiki/bin/view/CMSPublic/RelMon
00005 #
00006 # $Author: agimbuta $
00007 # $Date: 2012/07/17 14:56:58 $
00008 # $Revision: 1.5 $
00009 #
00010 #                                                                              
00011 # Danilo Piparo CERN - danilo.piparo@cern.ch                                   
00012 #                                                                              
00013 ################################################################################
00014 
00015 from optparse import OptionParser
00016 
00017 import os
00018 import cPickle
00019 import glob
00020 from re import search
00021 from subprocess import Popen,PIPE
00022 from multiprocessing import Pool
00023 from sys import exit
00024 
00025 import sys
00026 argv=sys.argv
00027 sys.argv=[]
00028 if os.environ.has_key("RELMON_SA"):
00029   import definitions as definitions
00030   from dqm_interfaces import DirWalkerFile,string2blacklist,DirWalkerFile_thread_wrapper
00031   from dirstructure import Directory
00032   from directories2html import directory2html,make_summary_table
00033   from utils import ask_ok, unpickler, make_files_pairs
00034 else:
00035   import Utilities.RelMon.definitions as definitions
00036   from Utilities.RelMon.dqm_interfaces import DirWalkerFile,string2blacklist,DirWalkerFile_thread_wrapper
00037   from Utilities.RelMon.dirstructure import Directory
00038   from Utilities.RelMon.directories2html import directory2html,make_summary_table
00039   from Utilities.RelMon.utils import ask_ok, unpickler, make_files_pairs
00040 sys.argv=argv
00041 
00042 #-------------------------------------------------------------------------------
00043 
00044 def name2sample(filename):
00045   namebase=os.path.basename(filename)
00046   return namebase.split("__")[1]
00047 
00048 def name2version(filename):
00049   namebase=os.path.basename(filename)
00050   return namebase.split("__")[2]
00051   
00052 def name2run(filename):
00053   namebase=os.path.basename(filename)
00054   return namebase.split("__")[0].split("_")[2]  
00055 
00056 def name2runskim(filename):
00057   run=name2run(filename)
00058   skim=name2version(filename).split("_")[-1]
00059   # remove skim version
00060   if "-v" in skim:
00061     skim = skim[:skim.rfind('-v')]
00062   return "%s_%s"%(run,skim)
00063 
00064 #-------------------------------------------------------------------------------  
00065 
00066 def guess_params(ref_filenames,test_filenames):
00067   
00068   if len(ref_filenames)*len(test_filenames)==0:
00069     print "Empty reference and test filenames lists!"
00070     return [],"",""
00071   
00072   samples=[]
00073   ref_versions=[]
00074   test_versions=[]
00075     
00076   for ref, test in zip(map(os.path.basename,ref_filenames),map(os.path.basename,test_filenames)):
00077     
00078     ref_sample=name2sample(ref)
00079     ref_version=name2version(ref)
00080     test_sample=name2sample(test)
00081     test_version=name2version(test)
00082           
00083     if ref_sample!=test_sample:
00084       print "Files %s and %s do not seem to be relative to the same sample." %(ref, test)
00085       exit(2)
00086 
00087     # Slightly modify for data
00088     if search("20[01]",ref_version)!=None:
00089       ref_sample+=ref_version.split("_")[-1]
00090     samples.append(ref_sample)
00091  
00092     # append the versions
00093     ref_versions.append(ref_version)
00094     test_versions.append(test_version)
00095 
00096   # Check if ref and test versions are always the same.
00097   ref_versions=list(set(ref_versions))
00098   test_versions=list(set(test_versions))
00099   
00100   #for versions in ref_versions,test_versions:
00101     #if len(versions)!=1:
00102       #print "More than one kind of CMSSW version selected (%s)" %versions
00103       #exit(2)  
00104   
00105   cmssw_version1=ref_versions[0]
00106   cmssw_version2=test_versions[0]
00107   
00108   return samples,cmssw_version1,cmssw_version2
00109   
00110 
00111 #-------------------------------------------------------------------------------
00112 
00113 def check_root_files(names_list):
00114   for name in names_list:
00115     if not name.endswith(".root"):
00116       print "File %s does not seem to be a rootfile. Please check."
00117       return False
00118   return True
00119 
00120 #-------------------------------------------------------------------------------
00121 
00122 def add_to_blacklist(blacklist, pattern, target, blist_piece):
00123   int_pattern=pattern
00124   int_pattern=pattern.strip()  
00125   flip_condition=False
00126   if int_pattern[0]=='!':
00127     int_pattern=int_pattern[1:]
00128     flip_condition=True
00129 
00130   condition = search(int_pattern,target)!=None
00131   if flip_condition:
00132     condition = not condition
00133 
00134   if condition:
00135     #print "Found %s in %s" %(pattern,target)
00136     if blacklist!="": # if not the first, add a comma
00137       blacklist+=","
00138     blacklist+=blist_piece
00139   #else:
00140     #print "  NOT Found %s in %s" %(pattern,target)
00141   return blacklist
00142 
00143 #-------------------------------------------------------------------------------
00144 
00145 def guess_blacklists(samples,ver1,ver2,hlt):
00146   """Build a blacklist for each sample accordind to a set of rules
00147   """
00148   blacklists={}
00149   for sample in samples:
00150     blacklists[sample]="FED@1,AlcaBeamMonitor@1,Physics@1,Info@-1,HLT@1,AlCaReco@1"
00151     
00152     # HLT
00153     if hlt: #HLT
00154       blacklists[sample]+=",AlCaEcalPi0@2"
00155       if not search("2010+|2011+",ver1):
00156         print "We are treating MC files for the HLT"
00157         for pattern,blist in definitions.hlt_mc_pattern_blist_pairs:
00158           blacklists[sample]=add_to_blacklist(blacklists[sample],pattern,sample,blist)
00159 #          print 'HLT '+pattern
00160 #          print 'HLT '+sample
00161 #          print 'HLT '+blacklists[sample]   
00162       else:
00163         print "We are treating Data files for the HLT"    
00164         # at the moment it does not make sense since hlt is ran already
00165     
00166     else: #RECO
00167       #Monte Carlo
00168       if not search("2010+|2011+",ver1):
00169         print "We are treating MC files"        
00170         
00171         for pattern,blist in definitions.mc_pattern_blist_pairs:
00172           blacklists[sample]=add_to_blacklist(blacklists[sample],pattern,sample,blist)
00173 #          print "MC RECO"
00174           #print blacklists[sample]
00175           
00176       # Data
00177       else:
00178         print "We are treating Data files:"      
00179         blacklists[sample]+=",By__Lumi__Section@-1,AlCaReco@1"                                         
00180         for pattern,blist in definitions.data_pattern_blist_pairs:
00181           blacklists[sample]=add_to_blacklist(blacklists[sample],pattern,ver1,blist)
00182 #         print "DATA RECO: %s %s %s -->%s" %( ver1, pattern, blist, blacklists[sample])
00183 
00184 
00185   return blacklists
00186 
00187 #-------------------------------------------------------------------------------  
00188 
00189 def get_roofiles_in_dir(directory):  
00190   print directory
00191   files_list = filter(lambda s: s.endswith(".root"), os.listdir(directory))
00192   files_list_path=map(lambda s: os.path.join(directory,s), files_list)
00193   
00194   return files_list_path
00195   
00196 #-------------------------------------------------------------------------------  
00197 
00198 def get_filenames_from_pool(all_samples):
00199   
00200   # get a list of the files
00201   files_list=get_roofiles_in_dir(all_samples)
00202   
00203   if len(files_list)==0:
00204     print "Zero files found in directory %s!" %all_samples
00205     return [],[]
00206   
00207   # Are they an even number?
00208   for name in files_list:
00209     print "* ",name  
00210   if len(files_list)%2!=0:
00211     print "The numbuer of file is not even... Trying to recover a catastrophe."
00212     
00213   files_list=make_files_pairs(files_list)
00214   
00215   # Try to couple them according to their sample
00216   ref_filenames=[]
00217   test_filenames=[]
00218   #files_list.sort(key=name2version)
00219   #files_list.sort(key=name2sample) 
00220   #files_list.sort(key=name2run)
00221   for iname in xrange(len(files_list)):
00222     filename=files_list[iname]
00223     if iname%2==0:
00224       ref_filenames.append(filename)
00225     else:
00226       test_filenames.append(filename)
00227       
00228   print "The guess would be the following:"
00229   for ref,test in zip(ref_filenames,test_filenames):
00230     refbasedir=os.path.dirname(ref)
00231     testbasedir=os.path.dirname(test)
00232     dir_to_print=refbasedir
00233     if refbasedir!=testbasedir:
00234       dir_to_print="%s and %s" %(refbasedir,testbasedir)
00235     print "* Directory: %s " %dir_to_print
00236     refname=os.path.basename(ref)
00237     testname=os.path.basename(test)
00238     print "  o %s" %refname
00239     print "  o %s" %testname
00240   
00241   #is_ok=ask_ok("Is that ok?")
00242   #if not is_ok:
00243     #print "Manual input needed then!"
00244     #exit(2)
00245       
00246   
00247   return ref_filenames,test_filenames
00248   
00249 
00250 #-------------------------------------------------------------------------------
00251 
00252 def get_clean_fileanames(ref_samples,test_samples):
00253   # Process the samples starting from the names
00254   ref_filenames=map(lambda s:s.strip(),ref_samples.split(","))
00255   test_filenames=map(lambda s:s.strip(),test_samples.split(","))
00256 
00257   if len(ref_filenames)!=len(test_filenames):
00258     print "The numebr of reference and test files does not seem to be the same. Please check."
00259     exit(2)
00260 
00261   if not (check_root_files(ref_filenames) and check_root_files(test_filenames)):
00262     exit(2)
00263   return ref_filenames,test_filenames
00264 
00265 #-------------------------------------------------------------------------------
00266 
00267 def count_alive_processes(p_list):
00268   return len(filter(lambda p: p.returncode==None,p_list))
00269 
00270 #-------------------------------------------------------------------------------
00271 
00272 def call_compare_using_files(args):
00273   """Creates shell command to compare two files using compare_using_files.py
00274   script and calls it."""
00275   sample, ref_filename, test_filename, options = args
00276   blacklists=guess_blacklists([sample],name2version(ref_filename),name2version(test_filename),options.hlt)
00277   command = " compare_using_files.py "
00278   command+= "%s %s " %(ref_filename,test_filename)
00279   command+= " -C -R "
00280   if options.do_pngs:
00281     command+= " -p "
00282   command+= " -o %s " %sample
00283   # Change threshold to an experimental and empirical value of 10^-5
00284   command+= " --specify_run "
00285   command+= " -t %s " %options.test_threshold
00286   command+= " -s %s " %options.stat_test
00287 
00288   # Inspect the HLT directories
00289   if options.hlt:
00290     command+=" -d HLT "
00291 
00292   if len(blacklists[sample]) >0:
00293     command+= '-B %s ' %blacklists[sample]
00294   print "\nExecuting --  %s" %command
00295 
00296   process=Popen(filter(lambda x: len(x)>0,command.split(" ")))
00297   process.name=sample
00298 
00299 #--------------------------------------------------------------------------------
00300 
00301 def do_comparisons_threaded(options):
00302 
00303   n_processes= int(options.n_processes)
00304 
00305   ref_filenames=[]
00306   test_filenames=[]
00307   
00308   if len(options.all_samples)>0:
00309     ref_filenames,test_filenames=get_filenames_from_pool(options.all_samples)  
00310   else:
00311     ref_filenames,test_filenames=get_clean_fileanames(options.ref_samples,options.test_samples)
00312  
00313   # make the paths absolute
00314   ref_filenames=map(os.path.abspath,ref_filenames)
00315   test_filenames=map(os.path.abspath,test_filenames)
00316   
00317   samples,cmssw_version1,cmssw_version2=guess_params(ref_filenames,test_filenames)
00318   
00319   if len(samples)==0:
00320     print "No Samples found... Quitting"
00321     return 0
00322   
00323 #  blacklists=guess_blacklists(samples,cmssw_version1,cmssw_version2,options.hlt)
00324 
00325   # Launch the single comparisons
00326   original_dir=os.getcwd()
00327 
00328   outdir=options.out_dir
00329   if len(outdir)==0:
00330     print "Creating automatic outdir:",
00331     outdir="%sVS%s" %(cmssw_version1,cmssw_version2)
00332     print outdir
00333   if len(options.input_dir)==0:
00334     print "Creating automatic indir:",
00335     options.input_dir=outdir
00336     print options.input_dir
00337   
00338   if not os.path.exists(outdir):
00339     os.mkdir(outdir)
00340   os.chdir(outdir)  
00341   
00342   # adjust the number of threads
00343   n_comparisons=len(ref_filenames)
00344   if n_comparisons < n_processes:
00345     print "Less comparisons than possible processes: reducing n processes to",
00346     n_processes=n_comparisons
00347   #elif n_processes/n_comparisons == 0:
00348     #print "More comparisons than possible processes, can be done in N rounds: reducing n processes to",    
00349     #original_nprocesses=n_processes
00350     #first=True
00351     #n_bunches=0
00352     #while first or n_processes > original_nprocesses:
00353       #n_processes=n_comparisons/2
00354       #if n_comparisons%2 !=0:
00355         #n_processes+=1
00356       #first=False
00357       
00358     #print n_processes
00359   print n_processes
00360   
00361   # Test if we treat data
00362   skim_name=""
00363   if search("20[01]",cmssw_version1)!=None:
00364     skim_name=cmssw_version1.split("_")[-1]
00365     
00366   running_subprocesses=[]
00367   process_counter=0
00368   print ref_filenames
00369 
00370   ## Compare all pairs of root files
00371   pool = Pool(n_processes)
00372   args_iterable = [list(args) + [options] for args in zip(samples, ref_filenames, test_filenames)]
00373   pool.map(call_compare_using_files, args_iterable)
00374 
00375   # move the pickles on the top, hack
00376   os.system("mv */*pkl .")
00377   
00378   os.chdir("..")
00379 #-------------------------------------------------------------------------------
00380 def do_reports(indir):
00381   print indir
00382   os.chdir(indir)
00383   pkl_list=filter(lambda x:".pkl" in x, os.listdir("./"))
00384   running_subprocesses=[]
00385   n_processes=int(options.n_processes)
00386   process_counter=0
00387   for pklfilename in pkl_list:
00388     command = "compare_using_files.py " 
00389     command+= "-R "
00390     if options.do_pngs:
00391       command+= " -p "
00392     command+= "-P %s " %pklfilename
00393     command+= "-o %s " %pklfilename[:-4]
00394     print "Executing %s" %command
00395     process=Popen(filter(lambda x: len(x)>0,command.split(" ")))
00396     process_counter+=1
00397     # add it to the list
00398     running_subprocesses.append(process)   
00399     if process_counter>=n_processes:
00400       process_counter=0
00401       for p in running_subprocesses:
00402         #print "Waiting for %s" %p.name
00403         p.wait()
00404         
00405   os.chdir("..")
00406   
00407 #-------------------------------------------------------------------------------
00408 def do_html(options):
00409 
00410   if options.reports:
00411     print "Preparing reports for the single files..."
00412     do_reports(options.input_dir)
00413   # Do the summary page
00414   aggregation_rules={}
00415   aggregation_rules_twiki={}
00416   # check which aggregation rules are to be used
00417   if options.hlt:
00418     print "Aggregating directories according to HLT rules"
00419     aggregation_rules=definitions.aggr_pairs_dict['HLT']
00420     aggregation_rules_twiki=definitions.aggr_pairs_twiki_dict['HLT']
00421   else:
00422     aggregation_rules=definitions.aggr_pairs_dict['reco']
00423     aggregation_rules_twiki=definitions.aggr_pairs_twiki_dict['reco']
00424   table_html = make_summary_table(options.input_dir,aggregation_rules,aggregation_rules_twiki)
00425 
00426   # create summary html file
00427   ofile = open("RelMonSummary.html","w")
00428   ofile.write(table_html)
00429   ofile.close()
00430 
00431 #-------------------------------------------------------------------------------
00432 
00433 if __name__ == "__main__":
00434 
00435   #-----------------------------------------------------------------------------
00436   ref_samples=""
00437   test_samples=""
00438   all_samples=""
00439   n_processes=1
00440   out_dir=""
00441   in_dir=""
00442   n_threads=1 # do not change this
00443   run=-1
00444   stat_test="Chi2"
00445   test_threshold=0.00001
00446   hlt=False
00447   #-----------------------------------------------------------------------------
00448 
00449 
00450   parser = OptionParser(usage="usage: %prog [options]")
00451 
00452   parser.add_option("-R","--ref_samples ",
00453                     action="store",
00454                     dest="ref_samples",
00455                     default=ref_samples,
00456                     help="The samples that act as reference (comma separated list)")
00457 
00458   parser.add_option("-T","--test_samples",
00459                     action="store",
00460                     dest="test_samples",
00461                     default=test_samples,
00462                     help="The samples to be tested (comma separated list)")
00463 
00464   parser.add_option("-a","--all_samples",
00465                     action="store",
00466                     dest="all_samples",
00467                     default=all_samples,
00468                     help="EXPERIMENTAL: Try to sort all samples selected (wildacrds) and organise a comparison")
00469 
00470   parser.add_option("-o","--out_dir",
00471                     action="store",
00472                     dest="out_dir",
00473                     default=out_dir,
00474                     help="The outdir other than <Version1>VS<Version2>")
00475 
00476   parser.add_option("-p","--do_pngs",
00477                     action="store_true",
00478                     dest="do_pngs",
00479                     default=False,
00480                     help="EXPERIMENTAL!!! Do the pngs of the comparison (takes 50%% of the total running time) \n(default is %s)" %False)
00481 
00482   parser.add_option("-r","--run ",
00483                     action="store",
00484                     dest="run",
00485                     default=run,
00486                     help="The run to be checked \n(default is %s)" %run)
00487 
00488   parser.add_option("-t","--test_threshold",
00489                     action="store",
00490                     dest="test_threshold",
00491                     default=test_threshold,
00492                     help="Threshold for the statistical test \n(default is %s)" %test_threshold)    
00493 
00494   parser.add_option("-s","--stat_test",
00495                     action="store",
00496                     dest="stat_test",
00497                     default=stat_test,
00498                     help="Statistical test (KS or Chi2) \n(default is %s)" %stat_test)  
00499   
00500   parser.add_option("-N","--numberOfProcesses",
00501                     action="store",
00502                     dest="n_processes",
00503                     default=n_processes,
00504                     help="Number of parallel processes to be run. Be Polite! \n(default is %s)" %n_processes)  
00505                     
00506   parser.add_option("--HLT",
00507                     action="store_true",
00508                     dest="hlt",
00509                     default=False,
00510                     help="Analyse HLT histograms\n(default is %s)" %hlt)
00511                     
00512   parser.add_option("-i","--input_dir",
00513                     action="store",
00514                     dest="input_dir",
00515                     default=in_dir,
00516                     help="Input directory for html creation \n(default is %s)" %in_dir)
00517   
00518   parser.add_option("--reports",
00519                     action="store_true",
00520                     dest="reports",
00521                     default=False,
00522                     help="Do the reports for the pickles \n(default is %s)" %in_dir)
00523 
00524   (options, args) = parser.parse_args()
00525 
00526   if len(options.test_samples)*len(options.ref_samples)+len(options.all_samples)==0 and len(options.input_dir)==0:
00527     print "No samples given as input."
00528     parser.print_help()
00529     exit(2)
00530 
00531   if len(options.all_samples)>0 or (len(options.ref_samples)*len(options.test_samples)>0):
00532     do_comparisons_threaded(options)
00533   if len(options.input_dir)>0:
00534     do_html(options)
00535 
00536 
00537 
00538 
00539 
00540 
00541 
00542 
00543 
00544 
00545 
00546