CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
ValidationMatrix.py
Go to the documentation of this file.
1 #! /usr/bin/env python
2 ################################################################################
3 # RelMon: a tool for automatic Release Comparison
4 # https://twiki.cern.ch/twiki/bin/view/CMSPublic/RelMon
5 #
6 #
7 #
8 # Danilo Piparo CERN - danilo.piparo@cern.ch
9 #
10 ################################################################################
11 
12 from optparse import OptionParser
13 
14 import os
15 import cPickle
16 import glob
17 from re import search
18 from subprocess import call,PIPE
19 from multiprocessing import Pool
20 from sys import exit
21 
22 import sys
23 argv=sys.argv
24 sys.argv=[]
25 if os.environ.has_key("RELMON_SA"):
26  import definitions as definitions
27  from dqm_interfaces import DirWalkerFile,string2blacklist,DirWalkerFile_thread_wrapper
28  from dirstructure import Directory
29  from directories2html import directory2html,make_summary_table
30  from utils import ask_ok, unpickler, make_files_pairs
31 else:
32  import Utilities.RelMon.definitions as definitions
33  from Utilities.RelMon.dqm_interfaces import DirWalkerFile,string2blacklist,DirWalkerFile_thread_wrapper
34  from Utilities.RelMon.dirstructure import Directory
35  from Utilities.RelMon.directories2html import directory2html,make_summary_table
36  from Utilities.RelMon.utils import ask_ok, unpickler, make_files_pairs
37 sys.argv=argv
38 
39 #-------------------------------------------------------------------------------
40 
41 def name2sample(filename):
42  namebase=os.path.basename(filename)
43  return namebase.split("__")[1]
44 
45 def name2version(filename):
46  namebase=os.path.basename(filename)
47  return namebase.split("__")[2]
48 
49 def name2run(filename):
50  namebase=os.path.basename(filename)
51  return namebase.split("__")[0].split("_")[2]
52 
53 def name2runskim(filename):
54  run=name2run(filename)
55  skim=name2version(filename).split("_")[-1]
56  # remove skim version
57  if "-v" in skim:
58  skim = skim[:skim.rfind('-v')]
59  return "%s_%s"%(run,skim)
60 
61 #-------------------------------------------------------------------------------
62 
63 def guess_params(ref_filenames,test_filenames):
64 
65  if len(ref_filenames)*len(test_filenames)==0:
66  print "Empty reference and test filenames lists!"
67  return [],"",""
68 
69  samples=[]
70  ref_versions=[]
71  test_versions=[]
72 
73  for ref, test in zip(map(os.path.basename,ref_filenames),map(os.path.basename,test_filenames)):
74 
75  ref_sample=name2sample(ref)
76  ref_version=name2version(ref)
77  test_sample=name2sample(test)
78  test_version=name2version(test)
79 
80  print " ## sample 1: %s vs sample 2: %s"%(ref_sample, test_sample)
81 
82  if ref_sample!=test_sample:
83  print "Files %s and %s do not seem to be relative to the same sample." %(ref, test)
84  # exit(2)
85 
86  # Slightly modify for data
87  if search("20[01]",ref_version)!=None:
88  ref_sample+=ref_version.split("_")[-1]
89  samples.append(ref_sample)
90 
91  # append the versions
92  ref_versions.append(ref_version)
93  test_versions.append(test_version)
94 
95  # Check if ref and test versions are always the same.
96  ref_versions=list(set(ref_versions))
97  test_versions=list(set(test_versions))
98 
99  #for versions in ref_versions,test_versions:
100  #if len(versions)!=1:
101  #print "More than one kind of CMSSW version selected (%s)" %versions
102  #exit(2)
103 
104  cmssw_version1=ref_versions[0]
105  cmssw_version2=test_versions[0]
106 
107  return samples,cmssw_version1,cmssw_version2
108 
109 
110 #-------------------------------------------------------------------------------
111 
112 def check_root_files(names_list):
113  for name in names_list:
114  if not name.endswith(".root"):
115  print "File %s does not seem to be a rootfile. Please check."
116  return False
117  return True
118 
119 #-------------------------------------------------------------------------------
120 
121 def add_to_blacklist(blacklist, pattern, target, blist_piece):
122  int_pattern=pattern
123  int_pattern=pattern.strip()
124  flip_condition=False
125  if int_pattern[0]=='!':
126  int_pattern=int_pattern[1:]
127  flip_condition=True
128 
129  condition = search(int_pattern,target)!=None
130  if flip_condition:
131  condition = not condition
132 
133  if condition:
134  #print "Found %s in %s" %(pattern,target)
135  if blacklist!="": # if not the first, add a comma
136  blacklist+=","
137  blacklist+=blist_piece
138  #else:
139  #print " NOT Found %s in %s" %(pattern,target)
140  return blacklist
141 
142 #-------------------------------------------------------------------------------
143 
144 def guess_blacklists(samples,ver1,ver2,hlt):
145  """Build a blacklist for each sample accordind to a set of rules
146  """
147  blacklists={}
148  for sample in samples:
149  blacklists[sample]="FED@1,AlcaBeamMonitor@1,HLT@1,AlCaReco@1"
150 
151  # HLT
152  if hlt: #HLT
153  blacklists[sample]+=",AlCaEcalPi0@2"
154  if not search("2010+|2011+",ver1):
155  print "We are treating MC files for the HLT"
156  for pattern,blist in definitions.hlt_mc_pattern_blist_pairs:
157  blacklists[sample]=add_to_blacklist(blacklists[sample],pattern,sample,blist)
158  else:
159  print "We are treating Data files for the HLT"
160  # at the moment it does not make sense since hlt is ran already
161 
162  else: #RECO
163  #Monte Carlo
164  if not search("2010+|2011+",ver1):
165  print "We are treating MC files"
166 
167  for pattern,blist in definitions.mc_pattern_blist_pairs:
168  blacklists[sample]=add_to_blacklist(blacklists[sample],pattern,sample,blist)
169 # print "MC RECO"
170  #print blacklists[sample]
171 
172  # Data
173  else:
174  print "We are treating Data files:"
175  blacklists[sample]+=",By__Lumi__Section@-1,AlCaReco@1"
176  for pattern,blist in definitions.data_pattern_blist_pairs:
177  blacklists[sample]=add_to_blacklist(blacklists[sample],pattern,ver1,blist)
178 # print "DATA RECO: %s %s %s -->%s" %( ver1, pattern, blist, blacklists[sample])
179 
180 
181  return blacklists
182 
183 #-------------------------------------------------------------------------------
184 
185 def get_roofiles_in_dir(directory):
186  print directory
187  files_list = filter(lambda s: s.endswith(".root"), os.listdir(directory))
188  files_list_path=map(lambda s: os.path.join(directory,s), files_list)
189 
190  return files_list_path
191 
192 #-------------------------------------------------------------------------------
193 
194 def get_filenames_from_pool(all_samples):
195 
196  # get a list of the files
197  files_list=get_roofiles_in_dir(all_samples)
198 
199  if len(files_list)==0:
200  print "Zero files found in directory %s!" %all_samples
201  return [],[]
202 
203  # Are they an even number?
204  for name in files_list:
205  print "* ",name
206  if len(files_list)%2!=0:
207  print "The numbuer of file is not even... Trying to recover a catastrophe."
208 
209  files_list=make_files_pairs(files_list)
210 
211  # Try to couple them according to their sample
212  ref_filenames=[]
213  test_filenames=[]
214  #files_list.sort(key=name2version)
215  #files_list.sort(key=name2sample)
216  #files_list.sort(key=name2run)
217  for iname in xrange(len(files_list)):
218  filename=files_list[iname]
219  if iname%2==0:
220  ref_filenames.append(filename)
221  else:
222  test_filenames.append(filename)
223 
224  print "The guess would be the following:"
225  for ref,test in zip(ref_filenames,test_filenames):
226  refbasedir=os.path.dirname(ref)
227  testbasedir=os.path.dirname(test)
228  dir_to_print=refbasedir
229  if refbasedir!=testbasedir:
230  dir_to_print="%s and %s" %(refbasedir,testbasedir)
231  print "* Directory: %s " %dir_to_print
232  refname=os.path.basename(ref)
233  testname=os.path.basename(test)
234  print " o %s" %refname
235  print " o %s" %testname
236 
237  #is_ok=ask_ok("Is that ok?")
238  #if not is_ok:
239  #print "Manual input needed then!"
240  #exit(2)
241 
242 
243  return ref_filenames,test_filenames
244 
245 
246 #-------------------------------------------------------------------------------
247 
248 def get_clean_fileanames(ref_samples,test_samples):
249  # Process the samples starting from the names
250  ref_filenames=map(lambda s:s.strip(),ref_samples.split(","))
251  test_filenames=map(lambda s:s.strip(),test_samples.split(","))
252 
253  if len(ref_filenames)!=len(test_filenames):
254  print "The numebr of reference and test files does not seem to be the same. Please check."
255  exit(2)
256 
257  if not (check_root_files(ref_filenames) and check_root_files(test_filenames)):
258  exit(2)
259  return ref_filenames,test_filenames
260 
261 #-------------------------------------------------------------------------------
262 
264  return len(filter(lambda p: p.returncode==None,p_list))
265 
266 #-------------------------------------------------------------------------------
267 
269  """Creates shell command to compare two files using compare_using_files.py
270  script and calls it."""
271  sample, ref_filename, test_filename, options = args
272  blacklists=guess_blacklists([sample],name2version(ref_filename),name2version(test_filename),options.hlt)
273  command = " compare_using_files.py "
274  command+= "%s %s " %(ref_filename,test_filename)
275  command+= " -C -R "
276  if options.do_pngs:
277  command+= " -p "
278  command+= " -o %s " %sample
279  # Change threshold to an experimental and empirical value of 10^-5
280  command+= " --specify_run "
281  command+= " -t %s " %options.test_threshold
282  command+= " -s %s " %options.stat_test
283 
284  # Inspect the HLT directories
285  if options.hlt:
286  command+=" -d HLT "
287 
288  if options.hash_name:
289  command += " --hash_name "
290 
291  if options.blacklist_file:
292  command += " --use_black_file "
293 
294  if options.standalone:
295  command += " --standalone "
296  if len(blacklists[sample]) >0:
297  command+= '-B %s ' %blacklists[sample]
298  print "\nExecuting -- %s" %command
299 
300  process=call(filter(lambda x: len(x)>0,command.split(" ")))
301  return process
302 
303 
304 #--------------------------------------------------------------------------------
305 
307 
308  n_processes= int(options.n_processes)
309 
310  ref_filenames=[]
311  test_filenames=[]
312 
313  if len(options.all_samples)>0:
314  ref_filenames,test_filenames=get_filenames_from_pool(options.all_samples)
315  else:
316  ref_filenames,test_filenames=get_clean_fileanames(options.ref_samples,options.test_samples)
317 
318  # make the paths absolute
319  ref_filenames=map(os.path.abspath,ref_filenames)
320  test_filenames=map(os.path.abspath,test_filenames)
321 
322  samples,cmssw_version1,cmssw_version2=guess_params(ref_filenames,test_filenames)
323 
324  if len(samples)==0:
325  print "No Samples found... Quitting"
326  return 0
327 
328 # blacklists=guess_blacklists(samples,cmssw_version1,cmssw_version2,options.hlt)
329 
330  # Launch the single comparisons
331  original_dir=os.getcwd()
332 
333  outdir=options.out_dir
334  if len(outdir)==0:
335  print "Creating automatic outdir:",
336  outdir="%sVS%s" %(cmssw_version1,cmssw_version2)
337  print outdir
338  if len(options.input_dir)==0:
339  print "Creating automatic indir:",
340  options.input_dir=outdir
341  print options.input_dir
342 
343  if not os.path.exists(outdir):
344  os.mkdir(outdir)
345  os.chdir(outdir)
346 
347  # adjust the number of threads
348  n_comparisons=len(ref_filenames)
349  if n_comparisons < n_processes:
350  print "Less comparisons than possible processes: reducing n processes to",
351  n_processes=n_comparisons
352  #elif n_processes/n_comparisons == 0:
353  #print "More comparisons than possible processes, can be done in N rounds: reducing n processes to",
354  #original_nprocesses=n_processes
355  #first=True
356  #n_bunches=0
357  #while first or n_processes > original_nprocesses:
358  #n_processes=n_comparisons/2
359  #if n_comparisons%2 !=0:
360  #n_processes+=1
361  #first=False
362 
363  #print n_processes
364  #print n_processes
365 
366  # Test if we treat data
367  skim_name=""
368  if search("20[01]",cmssw_version1)!=None:
369  skim_name=cmssw_version1.split("_")[-1]
370 
371  running_subprocesses=[]
372  process_counter=0
373  #print ref_filenames
374 
375  ## Compare all pairs of root files
376  pool = Pool(n_processes)
377  args_iterable = [list(args) + [options] for args in zip(samples, ref_filenames, test_filenames)]
378  pool.map(call_compare_using_files, args_iterable)
379  # move the pickles on the top, hack
380  os.system("mv */*pkl .")
381 
382  os.chdir("..")
383 #-------------------------------------------------------------------------------
384 def do_reports(indir):
385  #print indir
386  os.chdir(indir)
387  pkl_list=filter(lambda x:".pkl" in x, os.listdir("./"))
388  running_subprocesses=[]
389  n_processes=int(options.n_processes)
390  process_counter=0
391  for pklfilename in pkl_list:
392  command = "compare_using_files.py "
393  command+= "-R "
394  if options.do_pngs:
395  command+= " -p "
396  command+= "-P %s " %pklfilename
397  command+= "-o %s " %pklfilename[:-4]
398  print "Executing %s" %command
399  process=call(filter(lambda x: len(x)>0,command.split(" ")))
400  process_counter+=1
401  # add it to the list
402  running_subprocesses.append(process)
403  if process_counter>=n_processes:
404  process_counter=0
405  for p in running_subprocesses:
406  #print "Waiting for %s" %p.name
407  p.wait()
408 
409  os.chdir("..")
410 
411 #-------------------------------------------------------------------------------
412 def do_html(options, hashing_flag, standalone):
413 
414  if options.reports:
415  print "Preparing reports for the single files..."
416  do_reports(options.input_dir)
417  # Do the summary page
418  aggregation_rules={}
419  aggregation_rules_twiki={}
420  # check which aggregation rules are to be used
421  if options.hlt:
422  print "Aggregating directories according to HLT rules"
423  aggregation_rules=definitions.aggr_pairs_dict['HLT']
424  aggregation_rules_twiki=definitions.aggr_pairs_twiki_dict['HLT']
425  else:
426  aggregation_rules=definitions.aggr_pairs_dict['reco']
427  aggregation_rules_twiki=definitions.aggr_pairs_twiki_dict['reco']
428  table_html = make_summary_table(options.input_dir,aggregation_rules,aggregation_rules_twiki, hashing_flag, standalone)
429 
430  # create summary html file
431  ofile = open("RelMonSummary.html","w")
432  ofile.write(table_html)
433  ofile.close()
434 
435 #-------------------------------------------------------------------------------
436 
437 if __name__ == "__main__":
438 
439  #-----------------------------------------------------------------------------
440  ref_samples=""
441  test_samples=""
442  all_samples=""
443  n_processes=1
444  out_dir=""
445  in_dir=""
446  n_threads=1 # do not change this
447  run=-1
448  stat_test="Chi2"
449  test_threshold=0.00001
450  hlt=False
451  #-----------------------------------------------------------------------------
452 
453 
454  parser = OptionParser(usage="usage: %prog [options]")
455 
456  parser.add_option("-R","--ref_samples ",
457  action="store",
458  dest="ref_samples",
459  default=ref_samples,
460  help="The samples that act as reference (comma separated list)")
461 
462  parser.add_option("-T","--test_samples",
463  action="store",
464  dest="test_samples",
465  default=test_samples,
466  help="The samples to be tested (comma separated list)")
467 
468  parser.add_option("-a","--all_samples",
469  action="store",
470  dest="all_samples",
471  default=all_samples,
472  help="EXPERIMENTAL: Try to sort all samples selected (wildacrds) and organise a comparison")
473 
474  parser.add_option("-o","--out_dir",
475  action="store",
476  dest="out_dir",
477  default=out_dir,
478  help="The outdir other than <Version1>VS<Version2>")
479 
480  parser.add_option("-p","--do_pngs",
481  action="store_true",
482  dest="do_pngs",
483  default=False,
484  help="EXPERIMENTAL!!! Do the pngs of the comparison (takes 50%% of the total running time) \n(default is %s)" %False)
485 
486  parser.add_option("-r","--run ",
487  action="store",
488  dest="run",
489  default=run,
490  help="The run to be checked \n(default is %s)" %run)
491 
492  parser.add_option("-t","--test_threshold",
493  action="store",
494  dest="test_threshold",
495  default=test_threshold,
496  help="Threshold for the statistical test \n(default is %s)" %test_threshold)
497 
498  parser.add_option("-s","--stat_test",
499  action="store",
500  dest="stat_test",
501  default=stat_test,
502  help="Statistical test (KS or Chi2) \n(default is %s)" %stat_test)
503 
504  parser.add_option("-N","--numberOfProcesses",
505  action="store",
506  dest="n_processes",
507  default=n_processes,
508  help="Number of parallel processes to be run. Be Polite! \n(default is %s)" %n_processes)
509 
510  parser.add_option("--HLT",
511  action="store_true",
512  dest="hlt",
513  default=False,
514  help="Analyse HLT histograms\n(default is %s)" %hlt)
515 
516  parser.add_option("-i","--input_dir",
517  action="store",
518  dest="input_dir",
519  default=in_dir,
520  help="Input directory for html creation \n(default is %s)" %in_dir)
521 
522  parser.add_option("--reports",
523  action="store_true",
524  dest="reports",
525  default=False,
526  help="Do the reports for the pickles \n(default is %s)" %in_dir)
527 ##---HASHING---##
528  parser.add_option("--hash_name",
529  action="store_true",
530  dest="hash_name",
531  default=False,
532  help="Set if you want to minimize & hash the output HTML files.")
533 ##--Blacklist File --##
534  parser.add_option("--use_black_file",
535  action="store_true",
536  dest="blacklist_file",
537  default=False,
538  help="Use a black list file of histograms located @ /RelMon/data")
539 ##-- USE CSS files in web access, for stand-alone usage --##
540  parser.add_option("--standalone",
541  action="store_true",
542  dest="standalone",
543  default=False,
544  help="Define that using RelMon in standalone method. Makes CSS files accessible over HTTP")
545 
546  (options, args) = parser.parse_args()
547 
548  if len(options.test_samples)*len(options.ref_samples)+len(options.all_samples)==0 and len(options.input_dir)==0:
549  print "No samples given as input."
550  parser.print_help()
551  exit(2)
552 
553  if len(options.all_samples)>0 or (len(options.ref_samples)*len(options.test_samples)>0):
554  do_comparisons_threaded(options)
555  if len(options.input_dir)>0:
556  do_html(options, options.hash_name, options.standalone)
557 
558 
559 
560 
561 
562 
563 
564 
565 
566 
567 
568 
std::vector< T >::const_iterator search(const cond::Time_t &val, const std::vector< T > &container)
Definition: IOVProxy.cc:229
tuple zip
Definition: archive.py:476
def make_files_pairs
Definition: utils.py:549
double split
Definition: MVATrainer.cc:139
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run