CMS 3D CMS Logo

ValidationMatrix.py
Go to the documentation of this file.
1 #! /usr/bin/env python
2 ################################################################################
3 # RelMon: a tool for automatic Release Comparison
4 # https://twiki.cern.ch/twiki/bin/view/CMSPublic/RelMon
5 #
6 #
7 #
8 # Danilo Piparo CERN - danilo.piparo@cern.ch
9 #
10 ################################################################################
11 
12 from __future__ import print_function
13 from optparse import OptionParser
14 
15 import os
16 import cPickle
17 import glob
18 from re import search
19 from subprocess import call,PIPE
20 from multiprocessing import Pool
21 from sys import exit
22 
23 import sys
24 argv=sys.argv
25 sys.argv=[]
26 if "RELMON_SA" in os.environ:
27  import definitions as definitions
28  from dqm_interfaces import DirWalkerFile,string2blacklist,DirWalkerFile_thread_wrapper
29  from dirstructure import Directory
30  from directories2html import directory2html,make_summary_table
31  from utils import ask_ok, unpickler, make_files_pairs
32 else:
33  import Utilities.RelMon.definitions as definitions
34  from Utilities.RelMon.dqm_interfaces import DirWalkerFile,string2blacklist,DirWalkerFile_thread_wrapper
35  from Utilities.RelMon.dirstructure import Directory
36  from Utilities.RelMon.directories2html import directory2html,make_summary_table
37  from Utilities.RelMon.utils import ask_ok, unpickler, make_files_pairs
38 sys.argv=argv
39 
40 #-------------------------------------------------------------------------------
41 
42 def name2sample(filename):
43  namebase=os.path.basename(filename)
44  return namebase.split("__")[1]
45 
46 def name2version(filename):
47  namebase=os.path.basename(filename)
48  return namebase.split("__")[2]
49 
50 def name2run(filename):
51  namebase=os.path.basename(filename)
52  return namebase.split("__")[0].split("_")[2]
53 
54 def name2runskim(filename):
55  run=name2run(filename)
56  skim=name2version(filename).split("_")[-1]
57  # remove skim version
58  if "-v" in skim:
59  skim = skim[:skim.rfind('-v')]
60  return "%s_%s"%(run,skim)
61 
62 def name2globaltag(filename):
63  namebase = os.path.basename(filename)
64  return namebase.split("__")[2].split("-")[1] #returns GT from file basename
65 
66 #-------------------------------------------------------------------------------
67 
68 def guess_params(ref_filenames,test_filenames):
69 
70  if len(ref_filenames)*len(test_filenames)==0:
71  print("Empty reference and test filenames lists!")
72  return [],"",""
73 
74  samples=[]
75  ref_versions=[]
76  test_versions=[]
77 
78  for ref, test in zip(map(os.path.basename,ref_filenames),map(os.path.basename,test_filenames)):
79 
80  ref_sample=name2sample(ref)
81  ref_version=name2version(ref)
82  test_sample=name2sample(test)
83  test_version=name2version(test)
84 
85  print(" ## sample 1: %s vs sample 2: %s"%(ref_sample, test_sample))
86 
87  if ref_sample!=test_sample:
88  print("Files %s and %s do not seem to be relative to the same sample." %(ref, test))
89  # exit(2)
90 
91  # Slightly modify for data
92  if search("20[01]",ref_version)!=None:
93  ref_sample+=ref_version.split("_")[-1]
94  samples.append(ref_sample)
95 
96  # append the versions
97  ref_versions.append(ref_version)
98  test_versions.append(test_version)
99 
100  # Check if ref and test versions are always the same.
101  ref_versions=list(set(ref_versions))
102  test_versions=list(set(test_versions))
103 
104  #for versions in ref_versions,test_versions:
105  #if len(versions)!=1:
106  #print "More than one kind of CMSSW version selected (%s)" %versions
107  #exit(2)
108 
109  cmssw_version1=ref_versions[0]
110  cmssw_version2=test_versions[0]
111 
112  return samples,cmssw_version1,cmssw_version2
113 
114 
115 #-------------------------------------------------------------------------------
116 
117 def check_root_files(names_list):
118  for name in names_list:
119  if not name.endswith(".root"):
120  print("File %s does not seem to be a rootfile. Please check.")
121  return False
122  return True
123 
124 #-------------------------------------------------------------------------------
125 
126 def add_to_blacklist(blacklist, pattern, target, blist_piece):
127  int_pattern=pattern
128  int_pattern=pattern.strip()
129  flip_condition=False
130  if int_pattern[0]=='!':
131  int_pattern=int_pattern[1:]
132  flip_condition=True
133 
134  condition = search(int_pattern,target)!=None
135  if flip_condition:
136  condition = not condition
137 
138  if condition:
139  #print "Found %s in %s" %(pattern,target)
140  if blacklist!="": # if not the first, add a comma
141  blacklist+=","
142  blacklist+=blist_piece
143  #else:
144  #print " NOT Found %s in %s" %(pattern,target)
145  return blacklist
146 
147 #-------------------------------------------------------------------------------
148 
149 def guess_blacklists(samples,ver1,ver2,hlt):
150  """Build a blacklist for each sample accordind to a set of rules
151  """
152  blacklists={}
153  for sample in samples:
154  blacklists[sample]="FED@1,AlcaBeamMonitor@1,HLT@1,AlCaReco@1"
155 
156  # HLT
157  if hlt: #HLT
158  blacklists[sample]+=",AlCaEcalPi0@2"
159  if not search("2010+|2011+|2012+|2015+",ver1):
160  print("We are treating MC files for the HLT")
161  for pattern,blist in definitions.hlt_mc_pattern_blist_pairs:
162  blacklists[sample]=add_to_blacklist(blacklists[sample],pattern,sample,blist)
163  else:
164  print("We are treating Data files for the HLT")
165  # at the moment it does not make sense since hlt is ran already
166 
167  else: #RECO
168  #Monte Carlo
169  if not search("2010+|2011+|2012+",ver1):
170  print("We are treating MC files")
171 
172  for pattern,blist in definitions.mc_pattern_blist_pairs:
173  blacklists[sample]=add_to_blacklist(blacklists[sample],pattern,sample,blist)
174 # print "MC RECO"
175  #print blacklists[sample]
176 
177  # Data
178  else:
179  print("We are treating Data files:")
180  blacklists[sample]+=",By__Lumi__Section@-1,AlCaReco@1"
181  for pattern,blist in definitions.data_pattern_blist_pairs:
182  blacklists[sample]=add_to_blacklist(blacklists[sample],pattern,ver1,blist)
183 # print "DATA RECO: %s %s %s -->%s" %( ver1, pattern, blist, blacklists[sample])
184 
185 
186  return blacklists
187 
188 #-------------------------------------------------------------------------------
189 
190 def get_roofiles_in_dir(directory):
191  print(directory)
192  files_list = [s for s in os.listdir(directory) if s.endswith(".root")]
193  files_list_path=map(lambda s: os.path.join(directory,s), files_list)
194 
195  return files_list_path
196 
197 #-------------------------------------------------------------------------------
198 
199 def get_filenames_from_pool(all_samples):
200 
201  # get a list of the files
202  files_list=get_roofiles_in_dir(all_samples)
203 
204  if len(files_list)==0:
205  print("Zero files found in directory %s!" %all_samples)
206  return [],[]
207 
208  # Are they an even number?
209  for name in files_list:
210  print("* ",name)
211  if len(files_list)%2!=0:
212  print("The numbuer of file is not even... Trying to recover a catastrophe.")
213 
214  files_list=make_files_pairs(files_list)
215 
216  # Try to couple them according to their sample
217  ref_filenames=[]
218  test_filenames=[]
219  #files_list.sort(key=name2version)
220  #files_list.sort(key=name2sample)
221  #files_list.sort(key=name2run)
222  for iname in xrange(len(files_list)):
223  filename=files_list[iname]
224  if iname%2==0:
225  ref_filenames.append(filename)
226  else:
227  test_filenames.append(filename)
228 
229  print("The guess would be the following:")
230  for ref,test in zip(ref_filenames,test_filenames):
231  refbasedir=os.path.dirname(ref)
232  testbasedir=os.path.dirname(test)
233  dir_to_print=refbasedir
234  if refbasedir!=testbasedir:
235  dir_to_print="%s and %s" %(refbasedir,testbasedir)
236  print("* Directory: %s " %dir_to_print)
237  refname=os.path.basename(ref)
238  testname=os.path.basename(test)
239  print(" o %s" %refname)
240  print(" o %s" %testname)
241 
242  #is_ok=ask_ok("Is that ok?")
243  #if not is_ok:
244  #print "Manual input needed then!"
245  #exit(2)
246 
247 
248  return ref_filenames,test_filenames
249 
250 
251 #-------------------------------------------------------------------------------
252 
253 def get_clean_fileanames(ref_samples,test_samples):
254  # Process the samples starting from the names
255  ref_filenames=map(lambda s:s.strip(),ref_samples.split(","))
256  test_filenames=map(lambda s:s.strip(),test_samples.split(","))
257 
258  if len(ref_filenames)!=len(test_filenames):
259  print("The numebr of reference and test files does not seem to be the same. Please check.")
260  exit(2)
261 
262  if not (check_root_files(ref_filenames) and check_root_files(test_filenames)):
263  exit(2)
264  return ref_filenames,test_filenames
265 
266 #-------------------------------------------------------------------------------
267 
269  return len([p for p in p_list if p.returncode==None])
270 
271 #-------------------------------------------------------------------------------
272 
274  """Creates shell command to compare two files using compare_using_files.py
275  script and calls it."""
276  sample, ref_filename, test_filename, options = args
277  gt = name2globaltag(ref_filename)
278  blacklists=guess_blacklists([sample],name2version(ref_filename),name2version(test_filename),options.hlt)
279  command = " compare_using_files.py "
280  command+= "%s %s " %(ref_filename,test_filename)
281  command+= " -C -R "
282  if options.do_pngs:
283  command+= " -p "
284  command+= " -o %s_%s " %(sample, gt)
285  # Change threshold to an experimental and empirical value of 10^-5
286  command+= " --specify_run "
287  if options.stat_test in ["Bin2Bin", "BinToBin"]:
288  options.test_threshold = 0.9999
289  command+= " -t %s " %options.test_threshold
290  command+= " -s %s " %options.stat_test
291 
292  # Inspect the HLT directories
293  if options.hlt:
294  command+=" -d HLT "
295 
296  if options.hash_name:
297  command += " --hash_name "
298 
299  if options.blacklist_file:
300  command += " --use_black_file "
301 
302  if options.standalone:
303  command += " --standalone "
304  if len(blacklists[sample]) >0:
305  command+= '-B %s ' %blacklists[sample]
306  print("\nExecuting -- %s" %command)
307 
308  process=call([x for x in command.split(" ") if len(x)>0])
309  return process
310 
311 
312 #--------------------------------------------------------------------------------
313 
315 
316  n_processes= int(options.n_processes)
317 
318  ref_filenames=[]
319  test_filenames=[]
320 
321  if len(options.all_samples)>0:
322  ref_filenames,test_filenames=get_filenames_from_pool(options.all_samples)
323  else:
324  ref_filenames,test_filenames=get_clean_fileanames(options.ref_samples,options.test_samples)
325 
326  # make the paths absolute
327  ref_filenames=map(os.path.abspath,ref_filenames)
328  test_filenames=map(os.path.abspath,test_filenames)
329 
330  samples,cmssw_version1,cmssw_version2=guess_params(ref_filenames,test_filenames)
331 
332  if len(samples)==0:
333  print("No Samples found... Quitting")
334  return 0
335 
336 # blacklists=guess_blacklists(samples,cmssw_version1,cmssw_version2,options.hlt)
337 
338  # Launch the single comparisons
339  original_dir=os.getcwd()
340 
341  outdir=options.out_dir
342  if len(outdir)==0:
343  print("Creating automatic outdir:", end=' ')
344  outdir="%sVS%s" %(cmssw_version1,cmssw_version2)
345  print(outdir)
346  if len(options.input_dir)==0:
347  print("Creating automatic indir:", end=' ')
348  options.input_dir=outdir
349  print(options.input_dir)
350 
351  if not os.path.exists(outdir):
352  os.mkdir(outdir)
353  os.chdir(outdir)
354 
355  # adjust the number of threads
356  n_comparisons=len(ref_filenames)
357  if n_comparisons < n_processes:
358  print("Less comparisons than possible processes: reducing n processes to", end=' ')
359  n_processes=n_comparisons
360  #elif n_processes/n_comparisons == 0:
361  #print "More comparisons than possible processes, can be done in N rounds: reducing n processes to",
362  #original_nprocesses=n_processes
363  #first=True
364  #n_bunches=0
365  #while first or n_processes > original_nprocesses:
366  #n_processes=n_comparisons/2
367  #if n_comparisons%2 !=0:
368  #n_processes+=1
369  #first=False
370 
371  #print n_processes
372  #print n_processes
373 
374  # Test if we treat data
375  skim_name=""
376  if search("20[01]",cmssw_version1)!=None:
377  skim_name=cmssw_version1.split("_")[-1]
378 
379  running_subprocesses=[]
380  process_counter=0
381  #print ref_filenames
382 
383  ## Compare all pairs of root files
384  pool = Pool(n_processes)
385  args_iterable = [list(args) + [options] for args in zip(samples, ref_filenames, test_filenames)]
386  pool.map(call_compare_using_files, args_iterable)
387  # move the pickles on the top, hack
388  os.system("mv */*pkl .")
389 
390  os.chdir("..")
391 #-------------------------------------------------------------------------------
392 def do_reports(indir):
393  #print indir
394  os.chdir(indir)
395  pkl_list=[x for x in os.listdir("./") if ".pkl" in x]
396  running_subprocesses=[]
397  n_processes=int(options.n_processes)
398  process_counter=0
399  for pklfilename in pkl_list:
400  command = "compare_using_files.py "
401  command+= "-R "
402  if options.do_pngs:
403  command+= " -p "
404  command+= "-P %s " %pklfilename
405  command+= "-o %s " %pklfilename[:-4]
406  print("Executing %s" %command)
407  process=call([x for x in command.split(" ") if len(x)>0])
408  process_counter+=1
409  # add it to the list
410  running_subprocesses.append(process)
411  if process_counter>=n_processes:
412  process_counter=0
413  for p in running_subprocesses:
414  #print "Waiting for %s" %p.name
415  p.wait()
416 
417  os.chdir("..")
418 
419 #-------------------------------------------------------------------------------
420 def do_html(options, hashing_flag, standalone):
421 
422  if options.reports:
423  print("Preparing reports for the single files...")
424  do_reports(options.input_dir)
425  # Do the summary page
426  aggregation_rules={}
427  aggregation_rules_twiki={}
428  # check which aggregation rules are to be used
429  if options.hlt:
430  print("Aggregating directories according to HLT rules")
431  aggregation_rules=definitions.aggr_pairs_dict['HLT']
432  aggregation_rules_twiki=definitions.aggr_pairs_twiki_dict['HLT']
433  else:
434  aggregation_rules=definitions.aggr_pairs_dict['reco']
435  aggregation_rules_twiki=definitions.aggr_pairs_twiki_dict['reco']
436  table_html = make_summary_table(options.input_dir,aggregation_rules,aggregation_rules_twiki, hashing_flag, standalone)
437 
438  # create summary html file
439  ofile = open("RelMonSummary.html","w")
440  ofile.write(table_html)
441  ofile.close()
442 
443 #-------------------------------------------------------------------------------
444 
445 if __name__ == "__main__":
446 
447  #-----------------------------------------------------------------------------
448  ref_samples=""
449  test_samples=""
450  all_samples=""
451  n_processes=1
452  out_dir=""
453  in_dir=""
454  n_threads=1 # do not change this
455  run=-1
456  stat_test="Chi2"
457  test_threshold=0.00001
458  hlt=False
459  #-----------------------------------------------------------------------------
460 
461 
462  parser = OptionParser(usage="usage: %prog [options]")
463 
464  parser.add_option("-R","--ref_samples ",
465  action="store",
466  dest="ref_samples",
467  default=ref_samples,
468  help="The samples that act as reference (comma separated list)")
469 
470  parser.add_option("-T","--test_samples",
471  action="store",
472  dest="test_samples",
473  default=test_samples,
474  help="The samples to be tested (comma separated list)")
475 
476  parser.add_option("-a","--all_samples",
477  action="store",
478  dest="all_samples",
479  default=all_samples,
480  help="EXPERIMENTAL: Try to sort all samples selected (wildacrds) and organise a comparison")
481 
482  parser.add_option("-o","--out_dir",
483  action="store",
484  dest="out_dir",
485  default=out_dir,
486  help="The outdir other than <Version1>VS<Version2>")
487 
488  parser.add_option("-p","--do_pngs",
489  action="store_true",
490  dest="do_pngs",
491  default=False,
492  help="EXPERIMENTAL!!! Do the pngs of the comparison (takes 50%% of the total running time) \n(default is %s)" %False)
493 
494  parser.add_option("-r","--run ",
495  action="store",
496  dest="run",
497  default=run,
498  help="The run to be checked \n(default is %s)" %run)
499 
500  parser.add_option("-t","--test_threshold",
501  action="store",
502  dest="test_threshold",
503  default=test_threshold,
504  help="Threshold for the statistical test \n(default is %s)" %test_threshold)
505 
506  parser.add_option("-s","--stat_test",
507  action="store",
508  dest="stat_test",
509  default=stat_test,
510  help="Statistical test (KS or Chi2) \n(default is %s)" %stat_test)
511 
512  parser.add_option("-N","--numberOfProcesses",
513  action="store",
514  dest="n_processes",
515  default=n_processes,
516  help="Number of parallel processes to be run. Be Polite! \n(default is %s)" %n_processes)
517 
518  parser.add_option("--HLT",
519  action="store_true",
520  dest="hlt",
521  default=False,
522  help="Analyse HLT histograms\n(default is %s)" %hlt)
523 
524  parser.add_option("-i","--input_dir",
525  action="store",
526  dest="input_dir",
527  default=in_dir,
528  help="Input directory for html creation \n(default is %s)" %in_dir)
529 
530  parser.add_option("--reports",
531  action="store_true",
532  dest="reports",
533  default=False,
534  help="Do the reports for the pickles \n(default is %s)" %in_dir)
535 ##---HASHING---##
536  parser.add_option("--hash_name",
537  action="store_true",
538  dest="hash_name",
539  default=False,
540  help="Set if you want to minimize & hash the output HTML files.")
541 ##--Blacklist File --##
542  parser.add_option("--use_black_file",
543  action="store_true",
544  dest="blacklist_file",
545  default=False,
546  help="Use a black list file of histograms located @ /RelMon/data")
547 ##-- USE CSS files in web access, for stand-alone usage --##
548  parser.add_option("--standalone",
549  action="store_true",
550  dest="standalone",
551  default=False,
552  help="Define that using RelMon in standalone method. Makes CSS files accessible over HTTP")
553 
554  (options, args) = parser.parse_args()
555 
556  if len(options.test_samples)*len(options.ref_samples)+len(options.all_samples)==0 and len(options.input_dir)==0:
557  print("No samples given as input.")
558  parser.print_help()
559  exit(2)
560 
561  if len(options.all_samples)>0 or (len(options.ref_samples)*len(options.test_samples)>0):
562  do_comparisons_threaded(options)
563  if len(options.input_dir)>0:
564  do_html(options, options.hash_name, options.standalone)
565 
566 
567 
568 
569 
570 
571 
572 
573 
574 
575 
576 
def guess_params(ref_filenames, test_filenames)
def name2globaltag(filename)
std::vector< T >::const_iterator search(const cond::Time_t &val, const std::vector< T > &container)
Definition: IOVProxy.cc:314
def name2run(filename)
def name2sample(filename)
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
def get_roofiles_in_dir(directory)
def do_html(options, hashing_flag, standalone)
def make_summary_table(indir, aggregation_rules, aggregation_rules_twiki, hashing_flag, standalone_flag)
OutputIterator zip(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp)
def get_filenames_from_pool(all_samples)
def check_root_files(names_list)
def get_clean_fileanames(ref_samples, test_samples)
def call_compare_using_files(args)
def make_files_pairs(files, verbose=True)
Definition: utils.py:562
def do_comparisons_threaded(options)
def name2version(filename)
def guess_blacklists(samples, ver1, ver2, hlt)
def add_to_blacklist(blacklist, pattern, target, blist_piece)
def name2runskim(filename)
double split
Definition: MVATrainer.cc:139
def count_alive_processes(p_list)
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run