CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
ValidationMatrix.py
Go to the documentation of this file.
1 #! /usr/bin/env python
2 ################################################################################
3 # RelMon: a tool for automatic Release Comparison
4 # https://twiki.cern.ch/twiki/bin/view/CMSPublic/RelMon
5 #
6 # $Author: anorkus $
7 # $Date: 2013/07/10 14:37:45 $
8 # $Revision: 1.11 $
9 #
10 #
11 # Danilo Piparo CERN - danilo.piparo@cern.ch
12 #
13 ################################################################################
14 
15 from optparse import OptionParser
16 
17 import os
18 import cPickle
19 import glob
20 from re import search
21 from subprocess import call,PIPE
22 from multiprocessing import Pool
23 from sys import exit
24 
25 import sys
26 argv=sys.argv
27 sys.argv=[]
28 if os.environ.has_key("RELMON_SA"):
29  import definitions as definitions
30  from dqm_interfaces import DirWalkerFile,string2blacklist,DirWalkerFile_thread_wrapper
31  from dirstructure import Directory
32  from directories2html import directory2html,make_summary_table
33  from utils import ask_ok, unpickler, make_files_pairs
34 else:
35  import Utilities.RelMon.definitions as definitions
36  from Utilities.RelMon.dqm_interfaces import DirWalkerFile,string2blacklist,DirWalkerFile_thread_wrapper
37  from Utilities.RelMon.dirstructure import Directory
38  from Utilities.RelMon.directories2html import directory2html,make_summary_table
39  from Utilities.RelMon.utils import ask_ok, unpickler, make_files_pairs
40 sys.argv=argv
41 
42 #-------------------------------------------------------------------------------
43 
44 def name2sample(filename):
45  namebase=os.path.basename(filename)
46  return namebase.split("__")[1]
47 
48 def name2version(filename):
49  namebase=os.path.basename(filename)
50  return namebase.split("__")[2]
51 
52 def name2run(filename):
53  namebase=os.path.basename(filename)
54  return namebase.split("__")[0].split("_")[2]
55 
56 def name2runskim(filename):
57  run=name2run(filename)
58  skim=name2version(filename).split("_")[-1]
59  # remove skim version
60  if "-v" in skim:
61  skim = skim[:skim.rfind('-v')]
62  return "%s_%s"%(run,skim)
63 
64 #-------------------------------------------------------------------------------
65 
66 def guess_params(ref_filenames,test_filenames):
67 
68  if len(ref_filenames)*len(test_filenames)==0:
69  print "Empty reference and test filenames lists!"
70  return [],"",""
71 
72  samples=[]
73  ref_versions=[]
74  test_versions=[]
75 
76  for ref, test in zip(map(os.path.basename,ref_filenames),map(os.path.basename,test_filenames)):
77 
78  ref_sample=name2sample(ref)
79  ref_version=name2version(ref)
80  test_sample=name2sample(test)
81  test_version=name2version(test)
82 
83  print " ## sample 1: %s vs sample 2: %s"%(ref_sample, test_sample)
84 
85  if ref_sample!=test_sample:
86  print "Files %s and %s do not seem to be relative to the same sample." %(ref, test)
87  # exit(2)
88 
89  # Slightly modify for data
90  if search("20[01]",ref_version)!=None:
91  ref_sample+=ref_version.split("_")[-1]
92  samples.append(ref_sample)
93 
94  # append the versions
95  ref_versions.append(ref_version)
96  test_versions.append(test_version)
97 
98  # Check if ref and test versions are always the same.
99  ref_versions=list(set(ref_versions))
100  test_versions=list(set(test_versions))
101 
102  #for versions in ref_versions,test_versions:
103  #if len(versions)!=1:
104  #print "More than one kind of CMSSW version selected (%s)" %versions
105  #exit(2)
106 
107  cmssw_version1=ref_versions[0]
108  cmssw_version2=test_versions[0]
109 
110  return samples,cmssw_version1,cmssw_version2
111 
112 
113 #-------------------------------------------------------------------------------
114 
115 def check_root_files(names_list):
116  for name in names_list:
117  if not name.endswith(".root"):
118  print "File %s does not seem to be a rootfile. Please check."
119  return False
120  return True
121 
122 #-------------------------------------------------------------------------------
123 
124 def add_to_blacklist(blacklist, pattern, target, blist_piece):
125  int_pattern=pattern
126  int_pattern=pattern.strip()
127  flip_condition=False
128  if int_pattern[0]=='!':
129  int_pattern=int_pattern[1:]
130  flip_condition=True
131 
132  condition = search(int_pattern,target)!=None
133  if flip_condition:
134  condition = not condition
135 
136  if condition:
137  #print "Found %s in %s" %(pattern,target)
138  if blacklist!="": # if not the first, add a comma
139  blacklist+=","
140  blacklist+=blist_piece
141  #else:
142  #print " NOT Found %s in %s" %(pattern,target)
143  return blacklist
144 
145 #-------------------------------------------------------------------------------
146 
147 def guess_blacklists(samples,ver1,ver2,hlt):
148  """Build a blacklist for each sample accordind to a set of rules
149  """
150  blacklists={}
151  for sample in samples:
152  blacklists[sample]="FED@1,AlcaBeamMonitor@1,HLT@1,AlCaReco@1"
153 
154  # HLT
155  if hlt: #HLT
156  blacklists[sample]+=",AlCaEcalPi0@2"
157  if not search("2010+|2011+",ver1):
158  print "We are treating MC files for the HLT"
159  for pattern,blist in definitions.hlt_mc_pattern_blist_pairs:
160  blacklists[sample]=add_to_blacklist(blacklists[sample],pattern,sample,blist)
161  else:
162  print "We are treating Data files for the HLT"
163  # at the moment it does not make sense since hlt is ran already
164 
165  else: #RECO
166  #Monte Carlo
167  if not search("2010+|2011+",ver1):
168  print "We are treating MC files"
169 
170  for pattern,blist in definitions.mc_pattern_blist_pairs:
171  blacklists[sample]=add_to_blacklist(blacklists[sample],pattern,sample,blist)
172 # print "MC RECO"
173  #print blacklists[sample]
174 
175  # Data
176  else:
177  print "We are treating Data files:"
178  blacklists[sample]+=",By__Lumi__Section@-1,AlCaReco@1"
179  for pattern,blist in definitions.data_pattern_blist_pairs:
180  blacklists[sample]=add_to_blacklist(blacklists[sample],pattern,ver1,blist)
181 # print "DATA RECO: %s %s %s -->%s" %( ver1, pattern, blist, blacklists[sample])
182 
183 
184  return blacklists
185 
186 #-------------------------------------------------------------------------------
187 
188 def get_roofiles_in_dir(directory):
189  print directory
190  files_list = filter(lambda s: s.endswith(".root"), os.listdir(directory))
191  files_list_path=map(lambda s: os.path.join(directory,s), files_list)
192 
193  return files_list_path
194 
195 #-------------------------------------------------------------------------------
196 
197 def get_filenames_from_pool(all_samples):
198 
199  # get a list of the files
200  files_list=get_roofiles_in_dir(all_samples)
201 
202  if len(files_list)==0:
203  print "Zero files found in directory %s!" %all_samples
204  return [],[]
205 
206  # Are they an even number?
207  for name in files_list:
208  print "* ",name
209  if len(files_list)%2!=0:
210  print "The numbuer of file is not even... Trying to recover a catastrophe."
211 
212  files_list=make_files_pairs(files_list)
213 
214  # Try to couple them according to their sample
215  ref_filenames=[]
216  test_filenames=[]
217  #files_list.sort(key=name2version)
218  #files_list.sort(key=name2sample)
219  #files_list.sort(key=name2run)
220  for iname in xrange(len(files_list)):
221  filename=files_list[iname]
222  if iname%2==0:
223  ref_filenames.append(filename)
224  else:
225  test_filenames.append(filename)
226 
227  print "The guess would be the following:"
228  for ref,test in zip(ref_filenames,test_filenames):
229  refbasedir=os.path.dirname(ref)
230  testbasedir=os.path.dirname(test)
231  dir_to_print=refbasedir
232  if refbasedir!=testbasedir:
233  dir_to_print="%s and %s" %(refbasedir,testbasedir)
234  print "* Directory: %s " %dir_to_print
235  refname=os.path.basename(ref)
236  testname=os.path.basename(test)
237  print " o %s" %refname
238  print " o %s" %testname
239 
240  #is_ok=ask_ok("Is that ok?")
241  #if not is_ok:
242  #print "Manual input needed then!"
243  #exit(2)
244 
245 
246  return ref_filenames,test_filenames
247 
248 
249 #-------------------------------------------------------------------------------
250 
251 def get_clean_fileanames(ref_samples,test_samples):
252  # Process the samples starting from the names
253  ref_filenames=map(lambda s:s.strip(),ref_samples.split(","))
254  test_filenames=map(lambda s:s.strip(),test_samples.split(","))
255 
256  if len(ref_filenames)!=len(test_filenames):
257  print "The numebr of reference and test files does not seem to be the same. Please check."
258  exit(2)
259 
260  if not (check_root_files(ref_filenames) and check_root_files(test_filenames)):
261  exit(2)
262  return ref_filenames,test_filenames
263 
264 #-------------------------------------------------------------------------------
265 
267  return len(filter(lambda p: p.returncode==None,p_list))
268 
269 #-------------------------------------------------------------------------------
270 
272  """Creates shell command to compare two files using compare_using_files.py
273  script and calls it."""
274  sample, ref_filename, test_filename, options = args
275  blacklists=guess_blacklists([sample],name2version(ref_filename),name2version(test_filename),options.hlt)
276  command = " compare_using_files.py "
277  command+= "%s %s " %(ref_filename,test_filename)
278  command+= " -C -R "
279  if options.do_pngs:
280  command+= " -p "
281  command+= " -o %s " %sample
282  # Change threshold to an experimental and empirical value of 10^-5
283  command+= " --specify_run "
284  command+= " -t %s " %options.test_threshold
285  command+= " -s %s " %options.stat_test
286 
287  # Inspect the HLT directories
288  if options.hlt:
289  command+=" -d HLT "
290 
291  if options.hash_name:
292  command += " --hash_name "
293 
294  if options.blacklist_file:
295  command += " --use_black_file "
296 
297  if options.standalone:
298  command += " --standalone "
299  if len(blacklists[sample]) >0:
300  command+= '-B %s ' %blacklists[sample]
301  print "\nExecuting -- %s" %command
302 
303  process=call(filter(lambda x: len(x)>0,command.split(" ")))
304  return process
305 
306 
307 #--------------------------------------------------------------------------------
308 
310 
311  n_processes= int(options.n_processes)
312 
313  ref_filenames=[]
314  test_filenames=[]
315 
316  if len(options.all_samples)>0:
317  ref_filenames,test_filenames=get_filenames_from_pool(options.all_samples)
318  else:
319  ref_filenames,test_filenames=get_clean_fileanames(options.ref_samples,options.test_samples)
320 
321  # make the paths absolute
322  ref_filenames=map(os.path.abspath,ref_filenames)
323  test_filenames=map(os.path.abspath,test_filenames)
324 
325  samples,cmssw_version1,cmssw_version2=guess_params(ref_filenames,test_filenames)
326 
327  if len(samples)==0:
328  print "No Samples found... Quitting"
329  return 0
330 
331 # blacklists=guess_blacklists(samples,cmssw_version1,cmssw_version2,options.hlt)
332 
333  # Launch the single comparisons
334  original_dir=os.getcwd()
335 
336  outdir=options.out_dir
337  if len(outdir)==0:
338  print "Creating automatic outdir:",
339  outdir="%sVS%s" %(cmssw_version1,cmssw_version2)
340  print outdir
341  if len(options.input_dir)==0:
342  print "Creating automatic indir:",
343  options.input_dir=outdir
344  print options.input_dir
345 
346  if not os.path.exists(outdir):
347  os.mkdir(outdir)
348  os.chdir(outdir)
349 
350  # adjust the number of threads
351  n_comparisons=len(ref_filenames)
352  if n_comparisons < n_processes:
353  print "Less comparisons than possible processes: reducing n processes to",
354  n_processes=n_comparisons
355  #elif n_processes/n_comparisons == 0:
356  #print "More comparisons than possible processes, can be done in N rounds: reducing n processes to",
357  #original_nprocesses=n_processes
358  #first=True
359  #n_bunches=0
360  #while first or n_processes > original_nprocesses:
361  #n_processes=n_comparisons/2
362  #if n_comparisons%2 !=0:
363  #n_processes+=1
364  #first=False
365 
366  #print n_processes
367  #print n_processes
368 
369  # Test if we treat data
370  skim_name=""
371  if search("20[01]",cmssw_version1)!=None:
372  skim_name=cmssw_version1.split("_")[-1]
373 
374  running_subprocesses=[]
375  process_counter=0
376  #print ref_filenames
377 
378  ## Compare all pairs of root files
379  pool = Pool(n_processes)
380  args_iterable = [list(args) + [options] for args in zip(samples, ref_filenames, test_filenames)]
381  pool.map(call_compare_using_files, args_iterable)
382  # move the pickles on the top, hack
383  os.system("mv */*pkl .")
384 
385  os.chdir("..")
386 #-------------------------------------------------------------------------------
387 def do_reports(indir):
388  #print indir
389  os.chdir(indir)
390  pkl_list=filter(lambda x:".pkl" in x, os.listdir("./"))
391  running_subprocesses=[]
392  n_processes=int(options.n_processes)
393  process_counter=0
394  for pklfilename in pkl_list:
395  command = "compare_using_files.py "
396  command+= "-R "
397  if options.do_pngs:
398  command+= " -p "
399  command+= "-P %s " %pklfilename
400  command+= "-o %s " %pklfilename[:-4]
401  print "Executing %s" %command
402  process=call(filter(lambda x: len(x)>0,command.split(" ")))
403  process_counter+=1
404  # add it to the list
405  running_subprocesses.append(process)
406  if process_counter>=n_processes:
407  process_counter=0
408  for p in running_subprocesses:
409  #print "Waiting for %s" %p.name
410  p.wait()
411 
412  os.chdir("..")
413 
414 #-------------------------------------------------------------------------------
415 def do_html(options, hashing_flag, standalone):
416 
417  if options.reports:
418  print "Preparing reports for the single files..."
419  do_reports(options.input_dir)
420  # Do the summary page
421  aggregation_rules={}
422  aggregation_rules_twiki={}
423  # check which aggregation rules are to be used
424  if options.hlt:
425  print "Aggregating directories according to HLT rules"
426  aggregation_rules=definitions.aggr_pairs_dict['HLT']
427  aggregation_rules_twiki=definitions.aggr_pairs_twiki_dict['HLT']
428  else:
429  aggregation_rules=definitions.aggr_pairs_dict['reco']
430  aggregation_rules_twiki=definitions.aggr_pairs_twiki_dict['reco']
431  table_html = make_summary_table(options.input_dir,aggregation_rules,aggregation_rules_twiki, hashing_flag, standalone)
432 
433  # create summary html file
434  ofile = open("RelMonSummary.html","w")
435  ofile.write(table_html)
436  ofile.close()
437 
438 #-------------------------------------------------------------------------------
439 
440 if __name__ == "__main__":
441 
442  #-----------------------------------------------------------------------------
443  ref_samples=""
444  test_samples=""
445  all_samples=""
446  n_processes=1
447  out_dir=""
448  in_dir=""
449  n_threads=1 # do not change this
450  run=-1
451  stat_test="Chi2"
452  test_threshold=0.00001
453  hlt=False
454  #-----------------------------------------------------------------------------
455 
456 
457  parser = OptionParser(usage="usage: %prog [options]")
458 
459  parser.add_option("-R","--ref_samples ",
460  action="store",
461  dest="ref_samples",
462  default=ref_samples,
463  help="The samples that act as reference (comma separated list)")
464 
465  parser.add_option("-T","--test_samples",
466  action="store",
467  dest="test_samples",
468  default=test_samples,
469  help="The samples to be tested (comma separated list)")
470 
471  parser.add_option("-a","--all_samples",
472  action="store",
473  dest="all_samples",
474  default=all_samples,
475  help="EXPERIMENTAL: Try to sort all samples selected (wildacrds) and organise a comparison")
476 
477  parser.add_option("-o","--out_dir",
478  action="store",
479  dest="out_dir",
480  default=out_dir,
481  help="The outdir other than <Version1>VS<Version2>")
482 
483  parser.add_option("-p","--do_pngs",
484  action="store_true",
485  dest="do_pngs",
486  default=False,
487  help="EXPERIMENTAL!!! Do the pngs of the comparison (takes 50%% of the total running time) \n(default is %s)" %False)
488 
489  parser.add_option("-r","--run ",
490  action="store",
491  dest="run",
492  default=run,
493  help="The run to be checked \n(default is %s)" %run)
494 
495  parser.add_option("-t","--test_threshold",
496  action="store",
497  dest="test_threshold",
498  default=test_threshold,
499  help="Threshold for the statistical test \n(default is %s)" %test_threshold)
500 
501  parser.add_option("-s","--stat_test",
502  action="store",
503  dest="stat_test",
504  default=stat_test,
505  help="Statistical test (KS or Chi2) \n(default is %s)" %stat_test)
506 
507  parser.add_option("-N","--numberOfProcesses",
508  action="store",
509  dest="n_processes",
510  default=n_processes,
511  help="Number of parallel processes to be run. Be Polite! \n(default is %s)" %n_processes)
512 
513  parser.add_option("--HLT",
514  action="store_true",
515  dest="hlt",
516  default=False,
517  help="Analyse HLT histograms\n(default is %s)" %hlt)
518 
519  parser.add_option("-i","--input_dir",
520  action="store",
521  dest="input_dir",
522  default=in_dir,
523  help="Input directory for html creation \n(default is %s)" %in_dir)
524 
525  parser.add_option("--reports",
526  action="store_true",
527  dest="reports",
528  default=False,
529  help="Do the reports for the pickles \n(default is %s)" %in_dir)
530 ##---HASHING---##
531  parser.add_option("--hash_name",
532  action="store_true",
533  dest="hash_name",
534  default=False,
535  help="Set if you want to minimize & hash the output HTML files.")
536 ##--Blacklist File --##
537  parser.add_option("--use_black_file",
538  action="store_true",
539  dest="blacklist_file",
540  default=False,
541  help="Use a black list file of histograms located @ /RelMon/data")
542 ##-- USE CSS files in web access, for stand-alone usage --##
543  parser.add_option("--standalone",
544  action="store_true",
545  dest="standalone",
546  default=False,
547  help="Define that using RelMon in standalone method. Makes CSS files accessible over HTTP")
548 
549  (options, args) = parser.parse_args()
550 
551  if len(options.test_samples)*len(options.ref_samples)+len(options.all_samples)==0 and len(options.input_dir)==0:
552  print "No samples given as input."
553  parser.print_help()
554  exit(2)
555 
556  if len(options.all_samples)>0 or (len(options.ref_samples)*len(options.test_samples)>0):
557  do_comparisons_threaded(options)
558  if len(options.input_dir)>0:
559  do_html(options, options.hash_name, options.standalone)
560 
561 
562 
563 
564 
565 
566 
567 
568 
569 
570 
571 
dictionary map
Definition: Association.py:205
def make_files_pairs
Definition: utils.py:552
double split
Definition: MVATrainer.cc:139
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run