CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
ValidationMatrix.py
Go to the documentation of this file.
1 #! /usr/bin/env python
2 ################################################################################
3 # RelMon: a tool for automatic Release Comparison
4 # https://twiki.cern.ch/twiki/bin/view/CMSPublic/RelMon
5 #
6 #
7 #
8 # Danilo Piparo CERN - danilo.piparo@cern.ch
9 #
10 ################################################################################
11 
12 from optparse import OptionParser
13 
14 import os
15 import cPickle
16 import glob
17 from re import search
18 from subprocess import call,PIPE
19 from multiprocessing import Pool
20 from sys import exit
21 
22 import sys
23 argv=sys.argv
24 sys.argv=[]
25 if "RELMON_SA" in os.environ:
26  import definitions as definitions
27  from dqm_interfaces import DirWalkerFile,string2blacklist,DirWalkerFile_thread_wrapper
28  from dirstructure import Directory
29  from directories2html import directory2html,make_summary_table
30  from utils import ask_ok, unpickler, make_files_pairs
31 else:
32  import Utilities.RelMon.definitions as definitions
33  from Utilities.RelMon.dqm_interfaces import DirWalkerFile,string2blacklist,DirWalkerFile_thread_wrapper
34  from Utilities.RelMon.dirstructure import Directory
35  from Utilities.RelMon.directories2html import directory2html,make_summary_table
36  from Utilities.RelMon.utils import ask_ok, unpickler, make_files_pairs
37 sys.argv=argv
38 
39 #-------------------------------------------------------------------------------
40 
41 def name2sample(filename):
42  namebase=os.path.basename(filename)
43  return namebase.split("__")[1]
44 
45 def name2version(filename):
46  namebase=os.path.basename(filename)
47  return namebase.split("__")[2]
48 
49 def name2run(filename):
50  namebase=os.path.basename(filename)
51  return namebase.split("__")[0].split("_")[2]
52 
53 def name2runskim(filename):
54  run=name2run(filename)
55  skim=name2version(filename).split("_")[-1]
56  # remove skim version
57  if "-v" in skim:
58  skim = skim[:skim.rfind('-v')]
59  return "%s_%s"%(run,skim)
60 
61 def name2globaltag(filename):
62  namebase = os.path.basename(filename)
63  return namebase.split("__")[2].split("-")[1] #returns GT from file basename
64 
65 #-------------------------------------------------------------------------------
66 
67 def guess_params(ref_filenames,test_filenames):
68 
69  if len(ref_filenames)*len(test_filenames)==0:
70  print "Empty reference and test filenames lists!"
71  return [],"",""
72 
73  samples=[]
74  ref_versions=[]
75  test_versions=[]
76 
77  for ref, test in zip(map(os.path.basename,ref_filenames),map(os.path.basename,test_filenames)):
78 
79  ref_sample=name2sample(ref)
80  ref_version=name2version(ref)
81  test_sample=name2sample(test)
82  test_version=name2version(test)
83 
84  print " ## sample 1: %s vs sample 2: %s"%(ref_sample, test_sample)
85 
86  if ref_sample!=test_sample:
87  print "Files %s and %s do not seem to be relative to the same sample." %(ref, test)
88  # exit(2)
89 
90  # Slightly modify for data
91  if search("20[01]",ref_version)!=None:
92  ref_sample+=ref_version.split("_")[-1]
93  samples.append(ref_sample)
94 
95  # append the versions
96  ref_versions.append(ref_version)
97  test_versions.append(test_version)
98 
99  # Check if ref and test versions are always the same.
100  ref_versions=list(set(ref_versions))
101  test_versions=list(set(test_versions))
102 
103  #for versions in ref_versions,test_versions:
104  #if len(versions)!=1:
105  #print "More than one kind of CMSSW version selected (%s)" %versions
106  #exit(2)
107 
108  cmssw_version1=ref_versions[0]
109  cmssw_version2=test_versions[0]
110 
111  return samples,cmssw_version1,cmssw_version2
112 
113 
114 #-------------------------------------------------------------------------------
115 
116 def check_root_files(names_list):
117  for name in names_list:
118  if not name.endswith(".root"):
119  print "File %s does not seem to be a rootfile. Please check."
120  return False
121  return True
122 
123 #-------------------------------------------------------------------------------
124 
125 def add_to_blacklist(blacklist, pattern, target, blist_piece):
126  int_pattern=pattern
127  int_pattern=pattern.strip()
128  flip_condition=False
129  if int_pattern[0]=='!':
130  int_pattern=int_pattern[1:]
131  flip_condition=True
132 
133  condition = search(int_pattern,target)!=None
134  if flip_condition:
135  condition = not condition
136 
137  if condition:
138  #print "Found %s in %s" %(pattern,target)
139  if blacklist!="": # if not the first, add a comma
140  blacklist+=","
141  blacklist+=blist_piece
142  #else:
143  #print " NOT Found %s in %s" %(pattern,target)
144  return blacklist
145 
146 #-------------------------------------------------------------------------------
147 
148 def guess_blacklists(samples,ver1,ver2,hlt):
149  """Build a blacklist for each sample accordind to a set of rules
150  """
151  blacklists={}
152  for sample in samples:
153  blacklists[sample]="FED@1,AlcaBeamMonitor@1,HLT@1,AlCaReco@1"
154 
155  # HLT
156  if hlt: #HLT
157  blacklists[sample]+=",AlCaEcalPi0@2"
158  if not search("2010+|2011+|2012+|2015+",ver1):
159  print "We are treating MC files for the HLT"
160  for pattern,blist in definitions.hlt_mc_pattern_blist_pairs:
161  blacklists[sample]=add_to_blacklist(blacklists[sample],pattern,sample,blist)
162  else:
163  print "We are treating Data files for the HLT"
164  # at the moment it does not make sense since hlt is ran already
165 
166  else: #RECO
167  #Monte Carlo
168  if not search("2010+|2011+|2012+",ver1):
169  print "We are treating MC files"
170 
171  for pattern,blist in definitions.mc_pattern_blist_pairs:
172  blacklists[sample]=add_to_blacklist(blacklists[sample],pattern,sample,blist)
173 # print "MC RECO"
174  #print blacklists[sample]
175 
176  # Data
177  else:
178  print "We are treating Data files:"
179  blacklists[sample]+=",By__Lumi__Section@-1,AlCaReco@1"
180  for pattern,blist in definitions.data_pattern_blist_pairs:
181  blacklists[sample]=add_to_blacklist(blacklists[sample],pattern,ver1,blist)
182 # print "DATA RECO: %s %s %s -->%s" %( ver1, pattern, blist, blacklists[sample])
183 
184 
185  return blacklists
186 
187 #-------------------------------------------------------------------------------
188 
189 def get_roofiles_in_dir(directory):
190  print directory
191  files_list = filter(lambda s: s.endswith(".root"), os.listdir(directory))
192  files_list_path=map(lambda s: os.path.join(directory,s), files_list)
193 
194  return files_list_path
195 
196 #-------------------------------------------------------------------------------
197 
198 def get_filenames_from_pool(all_samples):
199 
200  # get a list of the files
201  files_list=get_roofiles_in_dir(all_samples)
202 
203  if len(files_list)==0:
204  print "Zero files found in directory %s!" %all_samples
205  return [],[]
206 
207  # Are they an even number?
208  for name in files_list:
209  print "* ",name
210  if len(files_list)%2!=0:
211  print "The numbuer of file is not even... Trying to recover a catastrophe."
212 
213  files_list=make_files_pairs(files_list)
214 
215  # Try to couple them according to their sample
216  ref_filenames=[]
217  test_filenames=[]
218  #files_list.sort(key=name2version)
219  #files_list.sort(key=name2sample)
220  #files_list.sort(key=name2run)
221  for iname in xrange(len(files_list)):
222  filename=files_list[iname]
223  if iname%2==0:
224  ref_filenames.append(filename)
225  else:
226  test_filenames.append(filename)
227 
228  print "The guess would be the following:"
229  for ref,test in zip(ref_filenames,test_filenames):
230  refbasedir=os.path.dirname(ref)
231  testbasedir=os.path.dirname(test)
232  dir_to_print=refbasedir
233  if refbasedir!=testbasedir:
234  dir_to_print="%s and %s" %(refbasedir,testbasedir)
235  print "* Directory: %s " %dir_to_print
236  refname=os.path.basename(ref)
237  testname=os.path.basename(test)
238  print " o %s" %refname
239  print " o %s" %testname
240 
241  #is_ok=ask_ok("Is that ok?")
242  #if not is_ok:
243  #print "Manual input needed then!"
244  #exit(2)
245 
246 
247  return ref_filenames,test_filenames
248 
249 
250 #-------------------------------------------------------------------------------
251 
252 def get_clean_fileanames(ref_samples,test_samples):
253  # Process the samples starting from the names
254  ref_filenames=map(lambda s:s.strip(),ref_samples.split(","))
255  test_filenames=map(lambda s:s.strip(),test_samples.split(","))
256 
257  if len(ref_filenames)!=len(test_filenames):
258  print "The numebr of reference and test files does not seem to be the same. Please check."
259  exit(2)
260 
261  if not (check_root_files(ref_filenames) and check_root_files(test_filenames)):
262  exit(2)
263  return ref_filenames,test_filenames
264 
265 #-------------------------------------------------------------------------------
266 
268  return len(filter(lambda p: p.returncode==None,p_list))
269 
270 #-------------------------------------------------------------------------------
271 
273  """Creates shell command to compare two files using compare_using_files.py
274  script and calls it."""
275  sample, ref_filename, test_filename, options = args
276  gt = name2globaltag(ref_filename)
277  blacklists=guess_blacklists([sample],name2version(ref_filename),name2version(test_filename),options.hlt)
278  command = " compare_using_files.py "
279  command+= "%s %s " %(ref_filename,test_filename)
280  command+= " -C -R "
281  if options.do_pngs:
282  command+= " -p "
283  command+= " -o %s_%s " %(sample, gt)
284  # Change threshold to an experimental and empirical value of 10^-5
285  command+= " --specify_run "
286  if options.stat_test in ["Bin2Bin", "BinToBin"]:
287  options.test_threshold = 0.9999
288  command+= " -t %s " %options.test_threshold
289  command+= " -s %s " %options.stat_test
290 
291  # Inspect the HLT directories
292  if options.hlt:
293  command+=" -d HLT "
294 
295  if options.hash_name:
296  command += " --hash_name "
297 
298  if options.blacklist_file:
299  command += " --use_black_file "
300 
301  if options.standalone:
302  command += " --standalone "
303  if len(blacklists[sample]) >0:
304  command+= '-B %s ' %blacklists[sample]
305  print "\nExecuting -- %s" %command
306 
307  process=call(filter(lambda x: len(x)>0,command.split(" ")))
308  return process
309 
310 
311 #--------------------------------------------------------------------------------
312 
314 
315  n_processes= int(options.n_processes)
316 
317  ref_filenames=[]
318  test_filenames=[]
319 
320  if len(options.all_samples)>0:
321  ref_filenames,test_filenames=get_filenames_from_pool(options.all_samples)
322  else:
323  ref_filenames,test_filenames=get_clean_fileanames(options.ref_samples,options.test_samples)
324 
325  # make the paths absolute
326  ref_filenames=map(os.path.abspath,ref_filenames)
327  test_filenames=map(os.path.abspath,test_filenames)
328 
329  samples,cmssw_version1,cmssw_version2=guess_params(ref_filenames,test_filenames)
330 
331  if len(samples)==0:
332  print "No Samples found... Quitting"
333  return 0
334 
335 # blacklists=guess_blacklists(samples,cmssw_version1,cmssw_version2,options.hlt)
336 
337  # Launch the single comparisons
338  original_dir=os.getcwd()
339 
340  outdir=options.out_dir
341  if len(outdir)==0:
342  print "Creating automatic outdir:",
343  outdir="%sVS%s" %(cmssw_version1,cmssw_version2)
344  print outdir
345  if len(options.input_dir)==0:
346  print "Creating automatic indir:",
347  options.input_dir=outdir
348  print options.input_dir
349 
350  if not os.path.exists(outdir):
351  os.mkdir(outdir)
352  os.chdir(outdir)
353 
354  # adjust the number of threads
355  n_comparisons=len(ref_filenames)
356  if n_comparisons < n_processes:
357  print "Less comparisons than possible processes: reducing n processes to",
358  n_processes=n_comparisons
359  #elif n_processes/n_comparisons == 0:
360  #print "More comparisons than possible processes, can be done in N rounds: reducing n processes to",
361  #original_nprocesses=n_processes
362  #first=True
363  #n_bunches=0
364  #while first or n_processes > original_nprocesses:
365  #n_processes=n_comparisons/2
366  #if n_comparisons%2 !=0:
367  #n_processes+=1
368  #first=False
369 
370  #print n_processes
371  #print n_processes
372 
373  # Test if we treat data
374  skim_name=""
375  if search("20[01]",cmssw_version1)!=None:
376  skim_name=cmssw_version1.split("_")[-1]
377 
378  running_subprocesses=[]
379  process_counter=0
380  #print ref_filenames
381 
382  ## Compare all pairs of root files
383  pool = Pool(n_processes)
384  args_iterable = [list(args) + [options] for args in zip(samples, ref_filenames, test_filenames)]
385  pool.map(call_compare_using_files, args_iterable)
386  # move the pickles on the top, hack
387  os.system("mv */*pkl .")
388 
389  os.chdir("..")
390 #-------------------------------------------------------------------------------
391 def do_reports(indir):
392  #print indir
393  os.chdir(indir)
394  pkl_list=filter(lambda x:".pkl" in x, os.listdir("./"))
395  running_subprocesses=[]
396  n_processes=int(options.n_processes)
397  process_counter=0
398  for pklfilename in pkl_list:
399  command = "compare_using_files.py "
400  command+= "-R "
401  if options.do_pngs:
402  command+= " -p "
403  command+= "-P %s " %pklfilename
404  command+= "-o %s " %pklfilename[:-4]
405  print "Executing %s" %command
406  process=call(filter(lambda x: len(x)>0,command.split(" ")))
407  process_counter+=1
408  # add it to the list
409  running_subprocesses.append(process)
410  if process_counter>=n_processes:
411  process_counter=0
412  for p in running_subprocesses:
413  #print "Waiting for %s" %p.name
414  p.wait()
415 
416  os.chdir("..")
417 
418 #-------------------------------------------------------------------------------
419 def do_html(options, hashing_flag, standalone):
420 
421  if options.reports:
422  print "Preparing reports for the single files..."
423  do_reports(options.input_dir)
424  # Do the summary page
425  aggregation_rules={}
426  aggregation_rules_twiki={}
427  # check which aggregation rules are to be used
428  if options.hlt:
429  print "Aggregating directories according to HLT rules"
430  aggregation_rules=definitions.aggr_pairs_dict['HLT']
431  aggregation_rules_twiki=definitions.aggr_pairs_twiki_dict['HLT']
432  else:
433  aggregation_rules=definitions.aggr_pairs_dict['reco']
434  aggregation_rules_twiki=definitions.aggr_pairs_twiki_dict['reco']
435  table_html = make_summary_table(options.input_dir,aggregation_rules,aggregation_rules_twiki, hashing_flag, standalone)
436 
437  # create summary html file
438  ofile = open("RelMonSummary.html","w")
439  ofile.write(table_html)
440  ofile.close()
441 
442 #-------------------------------------------------------------------------------
443 
444 if __name__ == "__main__":
445 
446  #-----------------------------------------------------------------------------
447  ref_samples=""
448  test_samples=""
449  all_samples=""
450  n_processes=1
451  out_dir=""
452  in_dir=""
453  n_threads=1 # do not change this
454  run=-1
455  stat_test="Chi2"
456  test_threshold=0.00001
457  hlt=False
458  #-----------------------------------------------------------------------------
459 
460 
461  parser = OptionParser(usage="usage: %prog [options]")
462 
463  parser.add_option("-R","--ref_samples ",
464  action="store",
465  dest="ref_samples",
466  default=ref_samples,
467  help="The samples that act as reference (comma separated list)")
468 
469  parser.add_option("-T","--test_samples",
470  action="store",
471  dest="test_samples",
472  default=test_samples,
473  help="The samples to be tested (comma separated list)")
474 
475  parser.add_option("-a","--all_samples",
476  action="store",
477  dest="all_samples",
478  default=all_samples,
479  help="EXPERIMENTAL: Try to sort all samples selected (wildacrds) and organise a comparison")
480 
481  parser.add_option("-o","--out_dir",
482  action="store",
483  dest="out_dir",
484  default=out_dir,
485  help="The outdir other than <Version1>VS<Version2>")
486 
487  parser.add_option("-p","--do_pngs",
488  action="store_true",
489  dest="do_pngs",
490  default=False,
491  help="EXPERIMENTAL!!! Do the pngs of the comparison (takes 50%% of the total running time) \n(default is %s)" %False)
492 
493  parser.add_option("-r","--run ",
494  action="store",
495  dest="run",
496  default=run,
497  help="The run to be checked \n(default is %s)" %run)
498 
499  parser.add_option("-t","--test_threshold",
500  action="store",
501  dest="test_threshold",
502  default=test_threshold,
503  help="Threshold for the statistical test \n(default is %s)" %test_threshold)
504 
505  parser.add_option("-s","--stat_test",
506  action="store",
507  dest="stat_test",
508  default=stat_test,
509  help="Statistical test (KS or Chi2) \n(default is %s)" %stat_test)
510 
511  parser.add_option("-N","--numberOfProcesses",
512  action="store",
513  dest="n_processes",
514  default=n_processes,
515  help="Number of parallel processes to be run. Be Polite! \n(default is %s)" %n_processes)
516 
517  parser.add_option("--HLT",
518  action="store_true",
519  dest="hlt",
520  default=False,
521  help="Analyse HLT histograms\n(default is %s)" %hlt)
522 
523  parser.add_option("-i","--input_dir",
524  action="store",
525  dest="input_dir",
526  default=in_dir,
527  help="Input directory for html creation \n(default is %s)" %in_dir)
528 
529  parser.add_option("--reports",
530  action="store_true",
531  dest="reports",
532  default=False,
533  help="Do the reports for the pickles \n(default is %s)" %in_dir)
534 ##---HASHING---##
535  parser.add_option("--hash_name",
536  action="store_true",
537  dest="hash_name",
538  default=False,
539  help="Set if you want to minimize & hash the output HTML files.")
540 ##--Blacklist File --##
541  parser.add_option("--use_black_file",
542  action="store_true",
543  dest="blacklist_file",
544  default=False,
545  help="Use a black list file of histograms located @ /RelMon/data")
546 ##-- USE CSS files in web access, for stand-alone usage --##
547  parser.add_option("--standalone",
548  action="store_true",
549  dest="standalone",
550  default=False,
551  help="Define that using RelMon in standalone method. Makes CSS files accessible over HTTP")
552 
553  (options, args) = parser.parse_args()
554 
555  if len(options.test_samples)*len(options.ref_samples)+len(options.all_samples)==0 and len(options.input_dir)==0:
556  print "No samples given as input."
557  parser.print_help()
558  exit(2)
559 
560  if len(options.all_samples)>0 or (len(options.ref_samples)*len(options.test_samples)>0):
561  do_comparisons_threaded(options)
562  if len(options.input_dir)>0:
563  do_html(options, options.hash_name, options.standalone)
564 
565 
566 
567 
568 
569 
570 
571 
572 
573 
574 
575 
Definition: search.py:1
def make_files_pairs
Definition: utils.py:561
double split
Definition: MVATrainer.cc:139
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run