CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
ValidationMatrix.py
Go to the documentation of this file.
1 #! /usr/bin/env python
2 ################################################################################
3 # RelMon: a tool for automatic Release Comparison
4 # https://twiki.cern.ch/twiki/bin/view/CMSPublic/RelMon
5 #
6 # $Author: anorkus $
7 # $Date: 2012/10/25 16:10:22 $
8 # $Revision: 1.8 $
9 #
10 #
11 # Danilo Piparo CERN - danilo.piparo@cern.ch
12 #
13 ################################################################################
14 
15 from optparse import OptionParser
16 
17 import os
18 import cPickle
19 import glob
20 from re import search
21 from subprocess import call,PIPE
22 from multiprocessing import Pool
23 from sys import exit
24 
25 import sys
26 argv=sys.argv
27 sys.argv=[]
28 if os.environ.has_key("RELMON_SA"):
29  import definitions as definitions
30  from dqm_interfaces import DirWalkerFile,string2blacklist,DirWalkerFile_thread_wrapper
31  from dirstructure import Directory
32  from directories2html import directory2html,make_summary_table
33  from utils import ask_ok, unpickler, make_files_pairs
34 else:
35  import Utilities.RelMon.definitions as definitions
36  from Utilities.RelMon.dqm_interfaces import DirWalkerFile,string2blacklist,DirWalkerFile_thread_wrapper
37  from Utilities.RelMon.dirstructure import Directory
38  from Utilities.RelMon.directories2html import directory2html,make_summary_table
39  from Utilities.RelMon.utils import ask_ok, unpickler, make_files_pairs
40 sys.argv=argv
41 
42 #-------------------------------------------------------------------------------
43 
44 def name2sample(filename):
45  namebase=os.path.basename(filename)
46  return namebase.split("__")[1]
47 
48 def name2version(filename):
49  namebase=os.path.basename(filename)
50  return namebase.split("__")[2]
51 
52 def name2run(filename):
53  namebase=os.path.basename(filename)
54  return namebase.split("__")[0].split("_")[2]
55 
56 def name2runskim(filename):
57  run=name2run(filename)
58  skim=name2version(filename).split("_")[-1]
59  # remove skim version
60  if "-v" in skim:
61  skim = skim[:skim.rfind('-v')]
62  return "%s_%s"%(run,skim)
63 
64 #-------------------------------------------------------------------------------
65 
66 def guess_params(ref_filenames,test_filenames):
67 
68  if len(ref_filenames)*len(test_filenames)==0:
69  print "Empty reference and test filenames lists!"
70  return [],"",""
71 
72  samples=[]
73  ref_versions=[]
74  test_versions=[]
75 
76  for ref, test in zip(map(os.path.basename,ref_filenames),map(os.path.basename,test_filenames)):
77 
78  ref_sample=name2sample(ref)
79  ref_version=name2version(ref)
80  test_sample=name2sample(test)
81  test_version=name2version(test)
82 
83  if ref_sample!=test_sample:
84  print "Files %s and %s do not seem to be relative to the same sample." %(ref, test)
85  exit(2)
86 
87  # Slightly modify for data
88  if search("20[01]",ref_version)!=None:
89  ref_sample+=ref_version.split("_")[-1]
90  samples.append(ref_sample)
91 
92  # append the versions
93  ref_versions.append(ref_version)
94  test_versions.append(test_version)
95 
96  # Check if ref and test versions are always the same.
97  ref_versions=list(set(ref_versions))
98  test_versions=list(set(test_versions))
99 
100  #for versions in ref_versions,test_versions:
101  #if len(versions)!=1:
102  #print "More than one kind of CMSSW version selected (%s)" %versions
103  #exit(2)
104 
105  cmssw_version1=ref_versions[0]
106  cmssw_version2=test_versions[0]
107 
108  return samples,cmssw_version1,cmssw_version2
109 
110 
111 #-------------------------------------------------------------------------------
112 
113 def check_root_files(names_list):
114  for name in names_list:
115  if not name.endswith(".root"):
116  print "File %s does not seem to be a rootfile. Please check."
117  return False
118  return True
119 
120 #-------------------------------------------------------------------------------
121 
122 def add_to_blacklist(blacklist, pattern, target, blist_piece):
123  int_pattern=pattern
124  int_pattern=pattern.strip()
125  flip_condition=False
126  if int_pattern[0]=='!':
127  int_pattern=int_pattern[1:]
128  flip_condition=True
129 
130  condition = search(int_pattern,target)!=None
131  if flip_condition:
132  condition = not condition
133 
134  if condition:
135  #print "Found %s in %s" %(pattern,target)
136  if blacklist!="": # if not the first, add a comma
137  blacklist+=","
138  blacklist+=blist_piece
139  #else:
140  #print " NOT Found %s in %s" %(pattern,target)
141  return blacklist
142 
143 #-------------------------------------------------------------------------------
144 
145 def guess_blacklists(samples,ver1,ver2,hlt):
146  """Build a blacklist for each sample accordind to a set of rules
147  """
148  blacklists={}
149  for sample in samples:
150  blacklists[sample]="FED@1,AlcaBeamMonitor@1,Physics@1,Info@-1,HLT@1,AlCaReco@1"
151 
152  # HLT
153  if hlt: #HLT
154  blacklists[sample]+=",AlCaEcalPi0@2"
155  if not search("2010+|2011+",ver1):
156  print "We are treating MC files for the HLT"
157  for pattern,blist in definitions.hlt_mc_pattern_blist_pairs:
158  blacklists[sample]=add_to_blacklist(blacklists[sample],pattern,sample,blist)
159 # print 'HLT '+pattern
160 # print 'HLT '+sample
161 # print 'HLT '+blacklists[sample]
162  else:
163  print "We are treating Data files for the HLT"
164  # at the moment it does not make sense since hlt is ran already
165 
166  else: #RECO
167  #Monte Carlo
168  if not search("2010+|2011+",ver1):
169  print "We are treating MC files"
170 
171  for pattern,blist in definitions.mc_pattern_blist_pairs:
172  blacklists[sample]=add_to_blacklist(blacklists[sample],pattern,sample,blist)
173 # print "MC RECO"
174  #print blacklists[sample]
175 
176  # Data
177  else:
178  print "We are treating Data files:"
179  blacklists[sample]+=",By__Lumi__Section@-1,AlCaReco@1"
180  for pattern,blist in definitions.data_pattern_blist_pairs:
181  blacklists[sample]=add_to_blacklist(blacklists[sample],pattern,ver1,blist)
182 # print "DATA RECO: %s %s %s -->%s" %( ver1, pattern, blist, blacklists[sample])
183 
184 
185  return blacklists
186 
187 #-------------------------------------------------------------------------------
188 
189 def get_roofiles_in_dir(directory):
190  print directory
191  files_list = filter(lambda s: s.endswith(".root"), os.listdir(directory))
192  files_list_path=map(lambda s: os.path.join(directory,s), files_list)
193 
194  return files_list_path
195 
196 #-------------------------------------------------------------------------------
197 
198 def get_filenames_from_pool(all_samples):
199 
200  # get a list of the files
201  files_list=get_roofiles_in_dir(all_samples)
202 
203  if len(files_list)==0:
204  print "Zero files found in directory %s!" %all_samples
205  return [],[]
206 
207  # Are they an even number?
208  for name in files_list:
209  print "* ",name
210  if len(files_list)%2!=0:
211  print "The numbuer of file is not even... Trying to recover a catastrophe."
212 
213  files_list=make_files_pairs(files_list)
214 
215  # Try to couple them according to their sample
216  ref_filenames=[]
217  test_filenames=[]
218  #files_list.sort(key=name2version)
219  #files_list.sort(key=name2sample)
220  #files_list.sort(key=name2run)
221  for iname in xrange(len(files_list)):
222  filename=files_list[iname]
223  if iname%2==0:
224  ref_filenames.append(filename)
225  else:
226  test_filenames.append(filename)
227 
228  print "The guess would be the following:"
229  for ref,test in zip(ref_filenames,test_filenames):
230  refbasedir=os.path.dirname(ref)
231  testbasedir=os.path.dirname(test)
232  dir_to_print=refbasedir
233  if refbasedir!=testbasedir:
234  dir_to_print="%s and %s" %(refbasedir,testbasedir)
235  print "* Directory: %s " %dir_to_print
236  refname=os.path.basename(ref)
237  testname=os.path.basename(test)
238  print " o %s" %refname
239  print " o %s" %testname
240 
241  #is_ok=ask_ok("Is that ok?")
242  #if not is_ok:
243  #print "Manual input needed then!"
244  #exit(2)
245 
246 
247  return ref_filenames,test_filenames
248 
249 
250 #-------------------------------------------------------------------------------
251 
252 def get_clean_fileanames(ref_samples,test_samples):
253  # Process the samples starting from the names
254  ref_filenames=map(lambda s:s.strip(),ref_samples.split(","))
255  test_filenames=map(lambda s:s.strip(),test_samples.split(","))
256 
257  if len(ref_filenames)!=len(test_filenames):
258  print "The numebr of reference and test files does not seem to be the same. Please check."
259  exit(2)
260 
261  if not (check_root_files(ref_filenames) and check_root_files(test_filenames)):
262  exit(2)
263  return ref_filenames,test_filenames
264 
265 #-------------------------------------------------------------------------------
266 
268  return len(filter(lambda p: p.returncode==None,p_list))
269 
270 #-------------------------------------------------------------------------------
271 
273  """Creates shell command to compare two files using compare_using_files.py
274  script and calls it."""
275  sample, ref_filename, test_filename, options = args
276  blacklists=guess_blacklists([sample],name2version(ref_filename),name2version(test_filename),options.hlt)
277  command = " compare_using_files.py "
278  command+= "%s %s " %(ref_filename,test_filename)
279  command+= " -C -R "
280  if options.do_pngs:
281  command+= " -p "
282  command+= " -o %s " %sample
283  # Change threshold to an experimental and empirical value of 10^-5
284  command+= " --specify_run "
285  command+= " -t %s " %options.test_threshold
286  command+= " -s %s " %options.stat_test
287 
288  # Inspect the HLT directories
289  if options.hlt:
290  command+=" -d HLT "
291 
292  if options.hash_name:
293  command += " --hash_name "
294 
295  if options.blacklist_file:
296  command += " --use_black_file "
297 
298  if len(blacklists[sample]) >0:
299  command+= '-B %s ' %blacklists[sample]
300  print "\nExecuting -- %s" %command
301 
302  process=call(filter(lambda x: len(x)>0,command.split(" ")))
303  return process
304 
305 
306 #--------------------------------------------------------------------------------
307 
309 
310  n_processes= int(options.n_processes)
311 
312  ref_filenames=[]
313  test_filenames=[]
314 
315  if len(options.all_samples)>0:
316  ref_filenames,test_filenames=get_filenames_from_pool(options.all_samples)
317  else:
318  ref_filenames,test_filenames=get_clean_fileanames(options.ref_samples,options.test_samples)
319 
320  # make the paths absolute
321  ref_filenames=map(os.path.abspath,ref_filenames)
322  test_filenames=map(os.path.abspath,test_filenames)
323 
324  samples,cmssw_version1,cmssw_version2=guess_params(ref_filenames,test_filenames)
325 
326  if len(samples)==0:
327  print "No Samples found... Quitting"
328  return 0
329 
330 # blacklists=guess_blacklists(samples,cmssw_version1,cmssw_version2,options.hlt)
331 
332  # Launch the single comparisons
333  original_dir=os.getcwd()
334 
335  outdir=options.out_dir
336  if len(outdir)==0:
337  print "Creating automatic outdir:",
338  outdir="%sVS%s" %(cmssw_version1,cmssw_version2)
339  print outdir
340  if len(options.input_dir)==0:
341  print "Creating automatic indir:",
342  options.input_dir=outdir
343  print options.input_dir
344 
345  if not os.path.exists(outdir):
346  os.mkdir(outdir)
347  os.chdir(outdir)
348 
349  # adjust the number of threads
350  n_comparisons=len(ref_filenames)
351  if n_comparisons < n_processes:
352  print "Less comparisons than possible processes: reducing n processes to",
353  n_processes=n_comparisons
354  #elif n_processes/n_comparisons == 0:
355  #print "More comparisons than possible processes, can be done in N rounds: reducing n processes to",
356  #original_nprocesses=n_processes
357  #first=True
358  #n_bunches=0
359  #while first or n_processes > original_nprocesses:
360  #n_processes=n_comparisons/2
361  #if n_comparisons%2 !=0:
362  #n_processes+=1
363  #first=False
364 
365  #print n_processes
366  #print n_processes
367 
368  # Test if we treat data
369  skim_name=""
370  if search("20[01]",cmssw_version1)!=None:
371  skim_name=cmssw_version1.split("_")[-1]
372 
373  running_subprocesses=[]
374  process_counter=0
375  #print ref_filenames
376 
377  ## Compare all pairs of root files
378  pool = Pool(n_processes)
379  args_iterable = [list(args) + [options] for args in zip(samples, ref_filenames, test_filenames)]
380  pool.map(call_compare_using_files, args_iterable)
381  # move the pickles on the top, hack
382  os.system("mv */*pkl .")
383 
384  os.chdir("..")
385 #-------------------------------------------------------------------------------
386 def do_reports(indir):
387  #print indir
388  os.chdir(indir)
389  pkl_list=filter(lambda x:".pkl" in x, os.listdir("./"))
390  running_subprocesses=[]
391  n_processes=int(options.n_processes)
392  process_counter=0
393  for pklfilename in pkl_list:
394  command = "compare_using_files.py "
395  command+= "-R "
396  if options.do_pngs:
397  command+= " -p "
398  command+= "-P %s " %pklfilename
399  command+= "-o %s " %pklfilename[:-4]
400  print "Executing %s" %command
401  process=call(filter(lambda x: len(x)>0,command.split(" ")))
402  process_counter+=1
403  # add it to the list
404  running_subprocesses.append(process)
405  if process_counter>=n_processes:
406  process_counter=0
407  for p in running_subprocesses:
408  #print "Waiting for %s" %p.name
409  p.wait()
410 
411  os.chdir("..")
412 
413 #-------------------------------------------------------------------------------
414 def do_html(options, hashing_flag):
415 
416  if options.reports:
417  print "Preparing reports for the single files..."
418  do_reports(options.input_dir)
419  # Do the summary page
420  aggregation_rules={}
421  aggregation_rules_twiki={}
422  # check which aggregation rules are to be used
423  if options.hlt:
424  print "Aggregating directories according to HLT rules"
425  aggregation_rules=definitions.aggr_pairs_dict['HLT']
426  aggregation_rules_twiki=definitions.aggr_pairs_twiki_dict['HLT']
427  else:
428  aggregation_rules=definitions.aggr_pairs_dict['reco']
429  aggregation_rules_twiki=definitions.aggr_pairs_twiki_dict['reco']
430  table_html = make_summary_table(options.input_dir,aggregation_rules,aggregation_rules_twiki, hashing_flag)
431 
432  # create summary html file
433  ofile = open("RelMonSummary.html","w")
434  ofile.write(table_html)
435  ofile.close()
436 
437 #-------------------------------------------------------------------------------
438 
439 if __name__ == "__main__":
440 
441  #-----------------------------------------------------------------------------
442  ref_samples=""
443  test_samples=""
444  all_samples=""
445  n_processes=1
446  out_dir=""
447  in_dir=""
448  n_threads=1 # do not change this
449  run=-1
450  stat_test="Chi2"
451  test_threshold=0.00001
452  hlt=False
453  #-----------------------------------------------------------------------------
454 
455 
456  parser = OptionParser(usage="usage: %prog [options]")
457 
458  parser.add_option("-R","--ref_samples ",
459  action="store",
460  dest="ref_samples",
461  default=ref_samples,
462  help="The samples that act as reference (comma separated list)")
463 
464  parser.add_option("-T","--test_samples",
465  action="store",
466  dest="test_samples",
467  default=test_samples,
468  help="The samples to be tested (comma separated list)")
469 
470  parser.add_option("-a","--all_samples",
471  action="store",
472  dest="all_samples",
473  default=all_samples,
474  help="EXPERIMENTAL: Try to sort all samples selected (wildacrds) and organise a comparison")
475 
476  parser.add_option("-o","--out_dir",
477  action="store",
478  dest="out_dir",
479  default=out_dir,
480  help="The outdir other than <Version1>VS<Version2>")
481 
482  parser.add_option("-p","--do_pngs",
483  action="store_true",
484  dest="do_pngs",
485  default=False,
486  help="EXPERIMENTAL!!! Do the pngs of the comparison (takes 50%% of the total running time) \n(default is %s)" %False)
487 
488  parser.add_option("-r","--run ",
489  action="store",
490  dest="run",
491  default=run,
492  help="The run to be checked \n(default is %s)" %run)
493 
494  parser.add_option("-t","--test_threshold",
495  action="store",
496  dest="test_threshold",
497  default=test_threshold,
498  help="Threshold for the statistical test \n(default is %s)" %test_threshold)
499 
500  parser.add_option("-s","--stat_test",
501  action="store",
502  dest="stat_test",
503  default=stat_test,
504  help="Statistical test (KS or Chi2) \n(default is %s)" %stat_test)
505 
506  parser.add_option("-N","--numberOfProcesses",
507  action="store",
508  dest="n_processes",
509  default=n_processes,
510  help="Number of parallel processes to be run. Be Polite! \n(default is %s)" %n_processes)
511 
512  parser.add_option("--HLT",
513  action="store_true",
514  dest="hlt",
515  default=False,
516  help="Analyse HLT histograms\n(default is %s)" %hlt)
517 
518  parser.add_option("-i","--input_dir",
519  action="store",
520  dest="input_dir",
521  default=in_dir,
522  help="Input directory for html creation \n(default is %s)" %in_dir)
523 
524  parser.add_option("--reports",
525  action="store_true",
526  dest="reports",
527  default=False,
528  help="Do the reports for the pickles \n(default is %s)" %in_dir)
529 ##---HASHING---##
530  parser.add_option("--hash_name",
531  action="store_true",
532  dest="hash_name",
533  default=False,
534  help="Set if you want to minimize & hash the output HTML files.")
535 ##--Blacklist File --##
536  parser.add_option("--use_black_file",
537  action="store_true",
538  dest="blacklist_file",
539  default=False,
540  help="Use a black list file of histograms located @ /RelMon/data")
541 
542  (options, args) = parser.parse_args()
543 
544  if len(options.test_samples)*len(options.ref_samples)+len(options.all_samples)==0 and len(options.input_dir)==0:
545  print "No samples given as input."
546  parser.print_help()
547  exit(2)
548 
549  if len(options.all_samples)>0 or (len(options.ref_samples)*len(options.test_samples)>0):
550  do_comparisons_threaded(options)
551  if len(options.input_dir)>0:
552  do_html(options, options.hash_name)
553 
554 
555 
556 
557 
558 
559 
560 
561 
562 
563 
564 
def make_files_pairs
Definition: utils.py:552
double split
Definition: MVATrainer.cc:139
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run
void set(const std::string &name, int value)
set the flag, with a run-time name