CMS 3D CMS Logo

ValidationMatrix.py
Go to the documentation of this file.
1 #! /usr/bin/env python3
2 
11 
12 from __future__ import print_function
13 from builtins import range
14 from optparse import OptionParser
15 
16 import os
17 import pickle
18 import glob
19 from re import search
20 from subprocess import call,PIPE
21 from multiprocessing import Pool
22 from sys import exit
23 
24 import sys
25 argv=sys.argv
26 sys.argv=[]
27 if "RELMON_SA" in os.environ:
28  import definitions as definitions
29  from dqm_interfaces import DirWalkerFile,string2blacklist,DirWalkerFile_thread_wrapper
30  from dirstructure import Directory
31  from directories2html import directory2html,make_summary_table
32  from utils import ask_ok, unpickler, make_files_pairs
33 else:
34  import Utilities.RelMon.definitions as definitions
35  from Utilities.RelMon.dqm_interfaces import DirWalkerFile,string2blacklist,DirWalkerFile_thread_wrapper
36  from Utilities.RelMon.dirstructure import Directory
37  from Utilities.RelMon.directories2html import directory2html,make_summary_table
38  from Utilities.RelMon.utils import ask_ok, unpickler, make_files_pairs
39 sys.argv=argv
40 
41 #-------------------------------------------------------------------------------
42 
43 def name2sample(filename):
44  namebase=os.path.basename(filename)
45  return namebase.split("__")[1]
46 
47 def name2version(filename):
48  namebase=os.path.basename(filename)
49  return namebase.split("__")[2]
50 
51 def name2run(filename):
52  namebase=os.path.basename(filename)
53  return namebase.split("__")[0].split("_")[2]
54 
55 def name2runskim(filename):
56  run=name2run(filename)
57  skim=name2version(filename).split("_")[-1]
58  # remove skim version
59  if "-v" in skim:
60  skim = skim[:skim.rfind('-v')]
61  return "%s_%s"%(run,skim)
62 
63 def name2globaltag(filename):
64  namebase = os.path.basename(filename)
65  return namebase.split("__")[2].split("-")[1] #returns GT from file basename
66 
67 #-------------------------------------------------------------------------------
68 
69 def guess_params(ref_filenames,test_filenames):
70 
71  if len(ref_filenames)*len(test_filenames)==0:
72  print("Empty reference and test filenames lists!")
73  return [],"",""
74 
75  samples=[]
76  ref_versions=[]
77  test_versions=[]
78 
79  for ref, test in zip(map(os.path.basename,ref_filenames),map(os.path.basename,test_filenames)):
80 
81  ref_sample=name2sample(ref)
82  ref_version=name2version(ref)
83  test_sample=name2sample(test)
84  test_version=name2version(test)
85 
86  print(" ## sample 1: %s vs sample 2: %s"%(ref_sample, test_sample))
87 
88  if ref_sample!=test_sample:
89  print("Files %s and %s do not seem to be relative to the same sample." %(ref, test))
90  # exit(2)
91 
92  # Slightly modify for data
93  if search("20[01]",ref_version)!=None:
94  ref_sample+=ref_version.split("_")[-1]
95  samples.append(ref_sample)
96 
97  # append the versions
98  ref_versions.append(ref_version)
99  test_versions.append(test_version)
100 
101  # Check if ref and test versions are always the same.
102  ref_versions=list(set(ref_versions))
103  test_versions=list(set(test_versions))
104 
105  #for versions in ref_versions,test_versions:
106  #if len(versions)!=1:
107  #print "More than one kind of CMSSW version selected (%s)" %versions
108  #exit(2)
109 
110  cmssw_version1=ref_versions[0]
111  cmssw_version2=test_versions[0]
112 
113  return samples,cmssw_version1,cmssw_version2
114 
115 
116 #-------------------------------------------------------------------------------
117 
118 def check_root_files(names_list):
119  for name in names_list:
120  if not name.endswith(".root"):
121  print("File %s does not seem to be a rootfile. Please check.")
122  return False
123  return True
124 
125 #-------------------------------------------------------------------------------
126 
127 def add_to_blacklist(blacklist, pattern, target, blist_piece):
128  int_pattern=pattern
129  int_pattern=pattern.strip()
130  flip_condition=False
131  if int_pattern[0]=='!':
132  int_pattern=int_pattern[1:]
133  flip_condition=True
134 
135  condition = search(int_pattern,target)!=None
136  if flip_condition:
137  condition = not condition
138 
139  if condition:
140  #print "Found %s in %s" %(pattern,target)
141  if blacklist!="": # if not the first, add a comma
142  blacklist+=","
143  blacklist+=blist_piece
144  #else:
145  #print " NOT Found %s in %s" %(pattern,target)
146  return blacklist
147 
148 #-------------------------------------------------------------------------------
149 
150 def guess_blacklists(samples,ver1,ver2,hlt):
151  """Build a blacklist for each sample accordind to a set of rules
152  """
153  blacklists={}
154  for sample in samples:
155  blacklists[sample]="FED@1,AlcaBeamMonitor@1,HLT@1,AlCaReco@1"
156 
157  # HLT
158  if hlt: #HLT
159  blacklists[sample]+=",AlCaEcalPi0@2"
160  if not search("2010+|2011+|2012+|2015+",ver1):
161  print("We are treating MC files for the HLT")
162  for pattern,blist in definitions.hlt_mc_pattern_blist_pairs:
163  blacklists[sample]=add_to_blacklist(blacklists[sample],pattern,sample,blist)
164  else:
165  print("We are treating Data files for the HLT")
166  # at the moment it does not make sense since hlt is ran already
167 
168  else: #RECO
169  #Monte Carlo
170  if not search("2010+|2011+|2012+",ver1):
171  print("We are treating MC files")
172 
173  for pattern,blist in definitions.mc_pattern_blist_pairs:
174  blacklists[sample]=add_to_blacklist(blacklists[sample],pattern,sample,blist)
175 # print "MC RECO"
176  #print blacklists[sample]
177 
178  # Data
179  else:
180  print("We are treating Data files:")
181  blacklists[sample]+=",By__Lumi__Section@-1,AlCaReco@1"
182  for pattern,blist in definitions.data_pattern_blist_pairs:
183  blacklists[sample]=add_to_blacklist(blacklists[sample],pattern,ver1,blist)
184 # print "DATA RECO: %s %s %s -->%s" %( ver1, pattern, blist, blacklists[sample])
185 
186 
187  return blacklists
188 
189 #-------------------------------------------------------------------------------
190 
191 def get_roofiles_in_dir(directory):
192  print(directory)
193  files_list = [s for s in os.listdir(directory) if s.endswith(".root")]
194  files_list_path=map(lambda s: os.path.join(directory,s), files_list)
195 
196  return files_list_path
197 
198 #-------------------------------------------------------------------------------
199 
200 def get_filenames_from_pool(all_samples):
201 
202  # get a list of the files
203  files_list=get_roofiles_in_dir(all_samples)
204 
205  if len(files_list)==0:
206  print("Zero files found in directory %s!" %all_samples)
207  return [],[]
208 
209  # Are they an even number?
210  for name in files_list:
211  print("* ",name)
212  if len(files_list)%2!=0:
213  print("The numbuer of file is not even... Trying to recover a catastrophe.")
214 
215  files_list=make_files_pairs(files_list)
216 
217  # Try to couple them according to their sample
218  ref_filenames=[]
219  test_filenames=[]
220  #files_list.sort(key=name2version)
221  #files_list.sort(key=name2sample)
222  #files_list.sort(key=name2run)
223  for iname in range(len(files_list)):
224  filename=files_list[iname]
225  if iname%2==0:
226  ref_filenames.append(filename)
227  else:
228  test_filenames.append(filename)
229 
230  print("The guess would be the following:")
231  for ref,test in zip(ref_filenames,test_filenames):
232  refbasedir=os.path.dirname(ref)
233  testbasedir=os.path.dirname(test)
234  dir_to_print=refbasedir
235  if refbasedir!=testbasedir:
236  dir_to_print="%s and %s" %(refbasedir,testbasedir)
237  print("* Directory: %s " %dir_to_print)
238  refname=os.path.basename(ref)
239  testname=os.path.basename(test)
240  print(" o %s" %refname)
241  print(" o %s" %testname)
242 
243  #is_ok=ask_ok("Is that ok?")
244  #if not is_ok:
245  #print "Manual input needed then!"
246  #exit(2)
247 
248 
249  return ref_filenames,test_filenames
250 
251 
252 #-------------------------------------------------------------------------------
253 
254 def get_clean_fileanames(ref_samples,test_samples):
255  # Process the samples starting from the names
256  ref_filenames=list(map(lambda s:s.strip(),ref_samples.split(",")))
257  test_filenames=list(map(lambda s:s.strip(),test_samples.split(",")))
258 
259  if len(ref_filenames)!=len(test_filenames):
260  print("The numebr of reference and test files does not seem to be the same. Please check.")
261  exit(2)
262 
263  if not (check_root_files(ref_filenames) and check_root_files(test_filenames)):
264  exit(2)
265  return ref_filenames,test_filenames
266 
267 #-------------------------------------------------------------------------------
268 
270  return len([p for p in p_list if p.returncode==None])
271 
272 #-------------------------------------------------------------------------------
273 
275  """Creates shell command to compare two files using compare_using_files.py
276  script and calls it."""
277  sample, ref_filename, test_filename, options = args
278  gt = name2globaltag(ref_filename)
279  blacklists=guess_blacklists([sample],name2version(ref_filename),name2version(test_filename),options.hlt)
280  command = " compare_using_files.py "
281  command+= "%s %s " %(ref_filename,test_filename)
282  command+= " -C -R "
283  if options.do_pngs:
284  command+= " -p "
285  command+= " -o %s_%s " %(sample, gt)
286  # Change threshold to an experimental and empirical value of 10^-5
287  command+= " --specify_run "
288  if options.stat_test in ["Bin2Bin", "BinToBin"]:
289  options.test_threshold = 0.9999
290  command+= " -t %s " %options.test_threshold
291  command+= " -s %s " %options.stat_test
292 
293  # Inspect the HLT directories
294  if options.hlt:
295  command+=" -d HLT "
296 
297  if options.hash_name:
298  command += " --hash_name "
299 
300  if options.blacklist_file:
301  command += " --use_black_file "
302 
303  if options.standalone:
304  command += " --standalone "
305  if len(blacklists[sample]) >0:
306  command+= '-B %s ' %blacklists[sample]
307  print("\nExecuting -- %s" %command)
308 
309  process=call([x for x in command.split(" ") if len(x)>0])
310  return process
311 
312 
313 #--------------------------------------------------------------------------------
314 
316 
317  n_processes= int(options.n_processes)
318 
319  ref_filenames=[]
320  test_filenames=[]
321 
322  if len(options.all_samples)>0:
323  ref_filenames,test_filenames=get_filenames_from_pool(options.all_samples)
324  else:
325  ref_filenames,test_filenames=get_clean_fileanames(options.ref_samples,options.test_samples)
326 
327  # make the paths absolute
328  ref_filenames=list(map(os.path.abspath,ref_filenames))
329  test_filenames=list(map(os.path.abspath,test_filenames))
330 
331  samples,cmssw_version1,cmssw_version2=guess_params(ref_filenames,test_filenames)
332 
333  if len(samples)==0:
334  print("No Samples found... Quitting")
335  return 0
336 
337 # blacklists=guess_blacklists(samples,cmssw_version1,cmssw_version2,options.hlt)
338 
339  # Launch the single comparisons
340  original_dir=os.getcwd()
341 
342  outdir=options.out_dir
343  if len(outdir)==0:
344  print("Creating automatic outdir:", end=' ')
345  outdir="%sVS%s" %(cmssw_version1,cmssw_version2)
346  print(outdir)
347  if len(options.input_dir)==0:
348  print("Creating automatic indir:", end=' ')
349  options.input_dir=outdir
350  print(options.input_dir)
351 
352  if not os.path.exists(outdir):
353  os.mkdir(outdir)
354  os.chdir(outdir)
355 
356  # adjust the number of threads
357  n_comparisons=len(ref_filenames)
358  if n_comparisons < n_processes:
359  print("Less comparisons than possible processes: reducing n processes to", end=' ')
360  n_processes=n_comparisons
361  #elif n_processes/n_comparisons == 0:
362  #print "More comparisons than possible processes, can be done in N rounds: reducing n processes to",
363  #original_nprocesses=n_processes
364  #first=True
365  #n_bunches=0
366  #while first or n_processes > original_nprocesses:
367  #n_processes=n_comparisons/2
368  #if n_comparisons%2 !=0:
369  #n_processes+=1
370  #first=False
371 
372  #print n_processes
373  #print n_processes
374 
375  # Test if we treat data
376  skim_name=""
377  if search("20[01]",cmssw_version1)!=None:
378  skim_name=cmssw_version1.split("_")[-1]
379 
380  running_subprocesses=[]
381  process_counter=0
382  #print ref_filenames
383 
384 
385  pool = Pool(n_processes)
386  args_iterable = [list(args) + [options] for args in zip(samples, ref_filenames, test_filenames)]
387  pool.map(call_compare_using_files, args_iterable)
388  # move the pickles on the top, hack
389  os.system("mv */*pkl .")
390 
391  os.chdir("..")
392 #-------------------------------------------------------------------------------
393 def do_reports(indir):
394  #print indir
395  os.chdir(indir)
396  pkl_list=[x for x in os.listdir("./") if ".pkl" in x]
397  running_subprocesses=[]
398  n_processes=int(options.n_processes)
399  process_counter=0
400  for pklfilename in pkl_list:
401  command = "compare_using_files.py "
402  command+= "-R "
403  if options.do_pngs:
404  command+= " -p "
405  command+= "-P %s " %pklfilename
406  command+= "-o %s " %pklfilename[:-4]
407  print("Executing %s" %command)
408  process=call([x for x in command.split(" ") if len(x)>0])
409  process_counter+=1
410  # add it to the list
411  running_subprocesses.append(process)
412  if process_counter>=n_processes:
413  process_counter=0
414  for p in running_subprocesses:
415  #print "Waiting for %s" %p.name
416  p.wait()
417 
418  os.chdir("..")
419 
420 #-------------------------------------------------------------------------------
421 def do_html(options, hashing_flag, standalone):
422 
423  if options.reports:
424  print("Preparing reports for the single files...")
425  do_reports(options.input_dir)
426  # Do the summary page
427  aggregation_rules={}
428  aggregation_rules_twiki={}
429  # check which aggregation rules are to be used
430  if options.hlt:
431  print("Aggregating directories according to HLT rules")
432  aggregation_rules=definitions.aggr_pairs_dict['HLT']
433  aggregation_rules_twiki=definitions.aggr_pairs_twiki_dict['HLT']
434  else:
435  aggregation_rules=definitions.aggr_pairs_dict['reco']
436  aggregation_rules_twiki=definitions.aggr_pairs_twiki_dict['reco']
437  table_html = make_summary_table(options.input_dir,aggregation_rules,aggregation_rules_twiki, hashing_flag, standalone)
438 
439  # create summary html file
440  ofile = open("RelMonSummary.html","w")
441  ofile.write(table_html)
442  ofile.close()
443 
444 #-------------------------------------------------------------------------------
445 
446 if __name__ == "__main__":
447 
448  #-----------------------------------------------------------------------------
449  ref_samples=""
450  test_samples=""
451  all_samples=""
452  n_processes=1
453  out_dir=""
454  in_dir=""
455  n_threads=1 # do not change this
456  run=-1
457  stat_test="Chi2"
458  test_threshold=0.00001
459  hlt=False
460  #-----------------------------------------------------------------------------
461 
462 
463  parser = OptionParser(usage="usage: %prog [options]")
464 
465  parser.add_option("-R","--ref_samples ",
466  action="store",
467  dest="ref_samples",
468  default=ref_samples,
469  help="The samples that act as reference (comma separated list)")
470 
471  parser.add_option("-T","--test_samples",
472  action="store",
473  dest="test_samples",
474  default=test_samples,
475  help="The samples to be tested (comma separated list)")
476 
477  parser.add_option("-a","--all_samples",
478  action="store",
479  dest="all_samples",
480  default=all_samples,
481  help="EXPERIMENTAL: Try to sort all samples selected (wildacrds) and organise a comparison")
482 
483  parser.add_option("-o","--out_dir",
484  action="store",
485  dest="out_dir",
486  default=out_dir,
487  help="The outdir other than <Version1>VS<Version2>")
488 
489  parser.add_option("-p","--do_pngs",
490  action="store_true",
491  dest="do_pngs",
492  default=False,
493  help="EXPERIMENTAL!!! Do the pngs of the comparison (takes 50%% of the total running time) \n(default is %s)" %False)
494 
495  parser.add_option("-r","--run ",
496  action="store",
497  dest="run",
498  default=run,
499  help="The run to be checked \n(default is %s)" %run)
500 
501  parser.add_option("-t","--test_threshold",
502  action="store",
503  dest="test_threshold",
504  default=test_threshold,
505  help="Threshold for the statistical test \n(default is %s)" %test_threshold)
506 
507  parser.add_option("-s","--stat_test",
508  action="store",
509  dest="stat_test",
510  default=stat_test,
511  help="Statistical test (KS or Chi2) \n(default is %s)" %stat_test)
512 
513  parser.add_option("-N","--numberOfProcesses",
514  action="store",
515  dest="n_processes",
516  default=n_processes,
517  help="Number of parallel processes to be run. Be Polite! \n(default is %s)" %n_processes)
518 
519  parser.add_option("--HLT",
520  action="store_true",
521  dest="hlt",
522  default=False,
523  help="Analyse HLT histograms\n(default is %s)" %hlt)
524 
525  parser.add_option("-i","--input_dir",
526  action="store",
527  dest="input_dir",
528  default=in_dir,
529  help="Input directory for html creation \n(default is %s)" %in_dir)
530 
531  parser.add_option("--reports",
532  action="store_true",
533  dest="reports",
534  default=False,
535  help="Do the reports for the pickles \n(default is %s)" %in_dir)
536 
537  parser.add_option("--hash_name",
538  action="store_true",
539  dest="hash_name",
540  default=False,
541  help="Set if you want to minimize & hash the output HTML files.")
542 
543  parser.add_option("--use_black_file",
544  action="store_true",
545  dest="blacklist_file",
546  default=False,
547  help="Use a black list file of histograms located @ /RelMon/data")
548 
549  parser.add_option("--standalone",
550  action="store_true",
551  dest="standalone",
552  default=False,
553  help="Define that using RelMon in standalone method. Makes CSS files accessible over HTTP")
554 
555  (options, args) = parser.parse_args()
556 
557  if len(options.test_samples)*len(options.ref_samples)+len(options.all_samples)==0 and len(options.input_dir)==0:
558  print("No samples given as input.")
559  parser.print_help()
560  exit(2)
561 
562  if len(options.all_samples)>0 or (len(options.ref_samples)*len(options.test_samples)>0):
563  do_comparisons_threaded(options)
564  if len(options.input_dir)>0:
565  do_html(options, options.hash_name, options.standalone)
566 
567 
568 
569 
570 
571 
572 
573 
574 
575 
576 
577 
def guess_params(ref_filenames, test_filenames)
def name2globaltag(filename)
std::vector< T >::const_iterator search(const cond::Time_t &val, const std::vector< T > &container)
Definition: IOVProxy.cc:21
def name2run(filename)
def name2sample(filename)
def get_roofiles_in_dir(directory)
def do_html(options, hashing_flag, standalone)
def make_summary_table(indir, aggregation_rules, aggregation_rules_twiki, hashing_flag, standalone_flag)
OutputIterator zip(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp)
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47
def get_filenames_from_pool(all_samples)
def check_root_files(names_list)
def get_clean_fileanames(ref_samples, test_samples)
def call_compare_using_files(args)
def make_files_pairs(files, verbose=True)
Definition: utils.py:573
def do_comparisons_threaded(options)
def name2version(filename)
def guess_blacklists(samples, ver1, ver2, hlt)
def add_to_blacklist(blacklist, pattern, target, blist_piece)
def name2runskim(filename)
def count_alive_processes(p_list)
def exit(msg="")