00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 from optparse import OptionParser
00016
00017 import os
00018 import cPickle
00019 import glob
00020 from re import search
00021 from subprocess import call,PIPE
00022 from multiprocessing import Pool
00023 from sys import exit
00024
00025 import sys
00026 argv=sys.argv
00027 sys.argv=[]
00028 if os.environ.has_key("RELMON_SA"):
00029 import definitions as definitions
00030 from dqm_interfaces import DirWalkerFile,string2blacklist,DirWalkerFile_thread_wrapper
00031 from dirstructure import Directory
00032 from directories2html import directory2html,make_summary_table
00033 from utils import ask_ok, unpickler, make_files_pairs
00034 else:
00035 import Utilities.RelMon.definitions as definitions
00036 from Utilities.RelMon.dqm_interfaces import DirWalkerFile,string2blacklist,DirWalkerFile_thread_wrapper
00037 from Utilities.RelMon.dirstructure import Directory
00038 from Utilities.RelMon.directories2html import directory2html,make_summary_table
00039 from Utilities.RelMon.utils import ask_ok, unpickler, make_files_pairs
00040 sys.argv=argv
00041
00042
00043
00044 def name2sample(filename):
00045 namebase=os.path.basename(filename)
00046 return namebase.split("__")[1]
00047
00048 def name2version(filename):
00049 namebase=os.path.basename(filename)
00050 return namebase.split("__")[2]
00051
00052 def name2run(filename):
00053 namebase=os.path.basename(filename)
00054 return namebase.split("__")[0].split("_")[2]
00055
00056 def name2runskim(filename):
00057 run=name2run(filename)
00058 skim=name2version(filename).split("_")[-1]
00059
00060 if "-v" in skim:
00061 skim = skim[:skim.rfind('-v')]
00062 return "%s_%s"%(run,skim)
00063
00064
00065
00066 def guess_params(ref_filenames,test_filenames):
00067
00068 if len(ref_filenames)*len(test_filenames)==0:
00069 print "Empty reference and test filenames lists!"
00070 return [],"",""
00071
00072 samples=[]
00073 ref_versions=[]
00074 test_versions=[]
00075
00076 for ref, test in zip(map(os.path.basename,ref_filenames),map(os.path.basename,test_filenames)):
00077
00078 ref_sample=name2sample(ref)
00079 ref_version=name2version(ref)
00080 test_sample=name2sample(test)
00081 test_version=name2version(test)
00082
00083 if ref_sample!=test_sample:
00084 print "Files %s and %s do not seem to be relative to the same sample." %(ref, test)
00085 exit(2)
00086
00087
00088 if search("20[01]",ref_version)!=None:
00089 ref_sample+=ref_version.split("_")[-1]
00090 samples.append(ref_sample)
00091
00092
00093 ref_versions.append(ref_version)
00094 test_versions.append(test_version)
00095
00096
00097 ref_versions=list(set(ref_versions))
00098 test_versions=list(set(test_versions))
00099
00100
00101
00102
00103
00104
00105 cmssw_version1=ref_versions[0]
00106 cmssw_version2=test_versions[0]
00107
00108 return samples,cmssw_version1,cmssw_version2
00109
00110
00111
00112
00113 def check_root_files(names_list):
00114 for name in names_list:
00115 if not name.endswith(".root"):
00116 print "File %s does not seem to be a rootfile. Please check."
00117 return False
00118 return True
00119
00120
00121
00122 def add_to_blacklist(blacklist, pattern, target, blist_piece):
00123 int_pattern=pattern
00124 int_pattern=pattern.strip()
00125 flip_condition=False
00126 if int_pattern[0]=='!':
00127 int_pattern=int_pattern[1:]
00128 flip_condition=True
00129
00130 condition = search(int_pattern,target)!=None
00131 if flip_condition:
00132 condition = not condition
00133
00134 if condition:
00135
00136 if blacklist!="":
00137 blacklist+=","
00138 blacklist+=blist_piece
00139
00140
00141 return blacklist
00142
00143
00144
00145 def guess_blacklists(samples,ver1,ver2,hlt):
00146 """Build a blacklist for each sample accordind to a set of rules
00147 """
00148 blacklists={}
00149 for sample in samples:
00150 blacklists[sample]="FED@1,AlcaBeamMonitor@1,Physics@1,Info@-1,HLT@1,AlCaReco@1"
00151
00152
00153 if hlt:
00154 blacklists[sample]+=",AlCaEcalPi0@2"
00155 if not search("2010+|2011+",ver1):
00156 print "We are treating MC files for the HLT"
00157 for pattern,blist in definitions.hlt_mc_pattern_blist_pairs:
00158 blacklists[sample]=add_to_blacklist(blacklists[sample],pattern,sample,blist)
00159
00160
00161
00162 else:
00163 print "We are treating Data files for the HLT"
00164
00165
00166 else:
00167
00168 if not search("2010+|2011+",ver1):
00169 print "We are treating MC files"
00170
00171 for pattern,blist in definitions.mc_pattern_blist_pairs:
00172 blacklists[sample]=add_to_blacklist(blacklists[sample],pattern,sample,blist)
00173
00174
00175
00176
00177 else:
00178 print "We are treating Data files:"
00179 blacklists[sample]+=",By__Lumi__Section@-1,AlCaReco@1"
00180 for pattern,blist in definitions.data_pattern_blist_pairs:
00181 blacklists[sample]=add_to_blacklist(blacklists[sample],pattern,ver1,blist)
00182
00183
00184
00185 return blacklists
00186
00187
00188
00189 def get_roofiles_in_dir(directory):
00190 print directory
00191 files_list = filter(lambda s: s.endswith(".root"), os.listdir(directory))
00192 files_list_path=map(lambda s: os.path.join(directory,s), files_list)
00193
00194 return files_list_path
00195
00196
00197
00198 def get_filenames_from_pool(all_samples):
00199
00200
00201 files_list=get_roofiles_in_dir(all_samples)
00202
00203 if len(files_list)==0:
00204 print "Zero files found in directory %s!" %all_samples
00205 return [],[]
00206
00207
00208 for name in files_list:
00209 print "* ",name
00210 if len(files_list)%2!=0:
00211 print "The numbuer of file is not even... Trying to recover a catastrophe."
00212
00213 files_list=make_files_pairs(files_list)
00214
00215
00216 ref_filenames=[]
00217 test_filenames=[]
00218
00219
00220
00221 for iname in xrange(len(files_list)):
00222 filename=files_list[iname]
00223 if iname%2==0:
00224 ref_filenames.append(filename)
00225 else:
00226 test_filenames.append(filename)
00227
00228 print "The guess would be the following:"
00229 for ref,test in zip(ref_filenames,test_filenames):
00230 refbasedir=os.path.dirname(ref)
00231 testbasedir=os.path.dirname(test)
00232 dir_to_print=refbasedir
00233 if refbasedir!=testbasedir:
00234 dir_to_print="%s and %s" %(refbasedir,testbasedir)
00235 print "* Directory: %s " %dir_to_print
00236 refname=os.path.basename(ref)
00237 testname=os.path.basename(test)
00238 print " o %s" %refname
00239 print " o %s" %testname
00240
00241
00242
00243
00244
00245
00246
00247 return ref_filenames,test_filenames
00248
00249
00250
00251
00252 def get_clean_fileanames(ref_samples,test_samples):
00253
00254 ref_filenames=map(lambda s:s.strip(),ref_samples.split(","))
00255 test_filenames=map(lambda s:s.strip(),test_samples.split(","))
00256
00257 if len(ref_filenames)!=len(test_filenames):
00258 print "The numebr of reference and test files does not seem to be the same. Please check."
00259 exit(2)
00260
00261 if not (check_root_files(ref_filenames) and check_root_files(test_filenames)):
00262 exit(2)
00263 return ref_filenames,test_filenames
00264
00265
00266
00267 def count_alive_processes(p_list):
00268 return len(filter(lambda p: p.returncode==None,p_list))
00269
00270
00271
00272 def call_compare_using_files(args):
00273 """Creates shell command to compare two files using compare_using_files.py
00274 script and calls it."""
00275 sample, ref_filename, test_filename, options = args
00276 blacklists=guess_blacklists([sample],name2version(ref_filename),name2version(test_filename),options.hlt)
00277 command = " compare_using_files.py "
00278 command+= "%s %s " %(ref_filename,test_filename)
00279 command+= " -C -R "
00280 if options.do_pngs:
00281 command+= " -p "
00282 command+= " -o %s " %sample
00283
00284 command+= " --specify_run "
00285 command+= " -t %s " %options.test_threshold
00286 command+= " -s %s " %options.stat_test
00287
00288
00289 if options.hlt:
00290 command+=" -d HLT "
00291
00292 if options.hash_name:
00293 command += " --hash_name "
00294
00295 if options.blacklist_file:
00296 command += " --use_black_file "
00297
00298 if len(blacklists[sample]) >0:
00299 command+= '-B %s ' %blacklists[sample]
00300 print "\nExecuting -- %s" %command
00301
00302 process=call(filter(lambda x: len(x)>0,command.split(" ")))
00303 return process
00304
00305
00306
00307
00308 def do_comparisons_threaded(options):
00309
00310 n_processes= int(options.n_processes)
00311
00312 ref_filenames=[]
00313 test_filenames=[]
00314
00315 if len(options.all_samples)>0:
00316 ref_filenames,test_filenames=get_filenames_from_pool(options.all_samples)
00317 else:
00318 ref_filenames,test_filenames=get_clean_fileanames(options.ref_samples,options.test_samples)
00319
00320
00321 ref_filenames=map(os.path.abspath,ref_filenames)
00322 test_filenames=map(os.path.abspath,test_filenames)
00323
00324 samples,cmssw_version1,cmssw_version2=guess_params(ref_filenames,test_filenames)
00325
00326 if len(samples)==0:
00327 print "No Samples found... Quitting"
00328 return 0
00329
00330
00331
00332
00333 original_dir=os.getcwd()
00334
00335 outdir=options.out_dir
00336 if len(outdir)==0:
00337 print "Creating automatic outdir:",
00338 outdir="%sVS%s" %(cmssw_version1,cmssw_version2)
00339 print outdir
00340 if len(options.input_dir)==0:
00341 print "Creating automatic indir:",
00342 options.input_dir=outdir
00343 print options.input_dir
00344
00345 if not os.path.exists(outdir):
00346 os.mkdir(outdir)
00347 os.chdir(outdir)
00348
00349
00350 n_comparisons=len(ref_filenames)
00351 if n_comparisons < n_processes:
00352 print "Less comparisons than possible processes: reducing n processes to",
00353 n_processes=n_comparisons
00354
00355
00356
00357
00358
00359
00360
00361
00362
00363
00364
00365
00366
00367
00368
00369 skim_name=""
00370 if search("20[01]",cmssw_version1)!=None:
00371 skim_name=cmssw_version1.split("_")[-1]
00372
00373 running_subprocesses=[]
00374 process_counter=0
00375
00376
00377
00378 pool = Pool(n_processes)
00379 args_iterable = [list(args) + [options] for args in zip(samples, ref_filenames, test_filenames)]
00380 pool.map(call_compare_using_files, args_iterable)
00381
00382 os.system("mv */*pkl .")
00383
00384 os.chdir("..")
00385
00386 def do_reports(indir):
00387
00388 os.chdir(indir)
00389 pkl_list=filter(lambda x:".pkl" in x, os.listdir("./"))
00390 running_subprocesses=[]
00391 n_processes=int(options.n_processes)
00392 process_counter=0
00393 for pklfilename in pkl_list:
00394 command = "compare_using_files.py "
00395 command+= "-R "
00396 if options.do_pngs:
00397 command+= " -p "
00398 command+= "-P %s " %pklfilename
00399 command+= "-o %s " %pklfilename[:-4]
00400 print "Executing %s" %command
00401 process=call(filter(lambda x: len(x)>0,command.split(" ")))
00402 process_counter+=1
00403
00404 running_subprocesses.append(process)
00405 if process_counter>=n_processes:
00406 process_counter=0
00407 for p in running_subprocesses:
00408
00409 p.wait()
00410
00411 os.chdir("..")
00412
00413
00414 def do_html(options, hashing_flag):
00415
00416 if options.reports:
00417 print "Preparing reports for the single files..."
00418 do_reports(options.input_dir)
00419
00420 aggregation_rules={}
00421 aggregation_rules_twiki={}
00422
00423 if options.hlt:
00424 print "Aggregating directories according to HLT rules"
00425 aggregation_rules=definitions.aggr_pairs_dict['HLT']
00426 aggregation_rules_twiki=definitions.aggr_pairs_twiki_dict['HLT']
00427 else:
00428 aggregation_rules=definitions.aggr_pairs_dict['reco']
00429 aggregation_rules_twiki=definitions.aggr_pairs_twiki_dict['reco']
00430 table_html = make_summary_table(options.input_dir,aggregation_rules,aggregation_rules_twiki, hashing_flag)
00431
00432
00433 ofile = open("RelMonSummary.html","w")
00434 ofile.write(table_html)
00435 ofile.close()
00436
00437
00438
00439 if __name__ == "__main__":
00440
00441
00442 ref_samples=""
00443 test_samples=""
00444 all_samples=""
00445 n_processes=1
00446 out_dir=""
00447 in_dir=""
00448 n_threads=1
00449 run=-1
00450 stat_test="Chi2"
00451 test_threshold=0.00001
00452 hlt=False
00453
00454
00455
00456 parser = OptionParser(usage="usage: %prog [options]")
00457
00458 parser.add_option("-R","--ref_samples ",
00459 action="store",
00460 dest="ref_samples",
00461 default=ref_samples,
00462 help="The samples that act as reference (comma separated list)")
00463
00464 parser.add_option("-T","--test_samples",
00465 action="store",
00466 dest="test_samples",
00467 default=test_samples,
00468 help="The samples to be tested (comma separated list)")
00469
00470 parser.add_option("-a","--all_samples",
00471 action="store",
00472 dest="all_samples",
00473 default=all_samples,
00474 help="EXPERIMENTAL: Try to sort all samples selected (wildacrds) and organise a comparison")
00475
00476 parser.add_option("-o","--out_dir",
00477 action="store",
00478 dest="out_dir",
00479 default=out_dir,
00480 help="The outdir other than <Version1>VS<Version2>")
00481
00482 parser.add_option("-p","--do_pngs",
00483 action="store_true",
00484 dest="do_pngs",
00485 default=False,
00486 help="EXPERIMENTAL!!! Do the pngs of the comparison (takes 50%% of the total running time) \n(default is %s)" %False)
00487
00488 parser.add_option("-r","--run ",
00489 action="store",
00490 dest="run",
00491 default=run,
00492 help="The run to be checked \n(default is %s)" %run)
00493
00494 parser.add_option("-t","--test_threshold",
00495 action="store",
00496 dest="test_threshold",
00497 default=test_threshold,
00498 help="Threshold for the statistical test \n(default is %s)" %test_threshold)
00499
00500 parser.add_option("-s","--stat_test",
00501 action="store",
00502 dest="stat_test",
00503 default=stat_test,
00504 help="Statistical test (KS or Chi2) \n(default is %s)" %stat_test)
00505
00506 parser.add_option("-N","--numberOfProcesses",
00507 action="store",
00508 dest="n_processes",
00509 default=n_processes,
00510 help="Number of parallel processes to be run. Be Polite! \n(default is %s)" %n_processes)
00511
00512 parser.add_option("--HLT",
00513 action="store_true",
00514 dest="hlt",
00515 default=False,
00516 help="Analyse HLT histograms\n(default is %s)" %hlt)
00517
00518 parser.add_option("-i","--input_dir",
00519 action="store",
00520 dest="input_dir",
00521 default=in_dir,
00522 help="Input directory for html creation \n(default is %s)" %in_dir)
00523
00524 parser.add_option("--reports",
00525 action="store_true",
00526 dest="reports",
00527 default=False,
00528 help="Do the reports for the pickles \n(default is %s)" %in_dir)
00529
00530 parser.add_option("--hash_name",
00531 action="store_true",
00532 dest="hash_name",
00533 default=False,
00534 help="Set if you want to minimize & hash the output HTML files.")
00535
00536 parser.add_option("--use_black_file",
00537 action="store_true",
00538 dest="blacklist_file",
00539 default=False,
00540 help="Use a black list file of histograms located @ /RelMon/data")
00541
00542 (options, args) = parser.parse_args()
00543
00544 if len(options.test_samples)*len(options.ref_samples)+len(options.all_samples)==0 and len(options.input_dir)==0:
00545 print "No samples given as input."
00546 parser.print_help()
00547 exit(2)
00548
00549 if len(options.all_samples)>0 or (len(options.ref_samples)*len(options.test_samples)>0):
00550 do_comparisons_threaded(options)
00551 if len(options.input_dir)>0:
00552 do_html(options, options.hash_name)
00553
00554
00555
00556
00557
00558
00559
00560
00561
00562
00563
00564