CMS 3D CMS Logo

compareDQMOutput.py
Go to the documentation of this file.
1 #!/bin/env python3
2 
3 import os
4 import sys
5 import glob
6 import argparse
7 import subprocess
8 from threading import Thread
9 
10 COMPARISON_RESULTS = []
11 
12 def collect_and_compare_files(base_dir, pr_dir, output_dir, num_procs, pr_number, test_number, release_format):
13  files = get_file_pairs(base_dir, pr_dir)
14 
15  threads = []
16  for _ in range(num_procs):
17  thread = Thread(target=compare, args=(base_dir, pr_dir, output_dir, files, pr_number, test_number, release_format))
18  thread.start()
19  threads.append(thread)
20 
21  [thread.join() for thread in threads]
22 
23  COMPARISON_RESULTS.sort(key=lambda k: float(k['workflow']))
24 
25 def compare(base_dir, pr_dir, output_dir, files, pr_number, test_number, release_format):
26  while files:
27  try:
28  file_name = files.pop()
29  command = ['compareHistograms.py', '-b', os.path.join(base_dir, file_name), \
30  '-p', os.path.join(pr_dir, file_name), '-o', output_dir, '-n', pr_number, '-t', test_number, '-r', release_format]
31  print('Running comparison:')
32  print(' '.join(command))
33 
34  output = subprocess.check_output(command).decode()
35 
36  output_elements = output.split('\n')[1:]
37  base_output_filename = output_elements[0]
38  pr_output_filename = output_elements[1]
39  run_nr = base_output_filename.split('_')[2].lstrip('R').lstrip('0')
40  output_numbers = output_elements[2].split(' ')
41 
42  workflow = os.path.basename(os.path.dirname(os.path.join(base_dir, file_name))).split('_')[0]
43  base_dataset = '/' + '/'.join(base_output_filename.rstrip('.root').split('__')[1:])
44  pr_dataset = '/' + '/'.join(pr_output_filename.rstrip('.root').split('__')[1:])
45 
46  cmssw_version = '_'.join(release_format.split('_')[:4])
47  cmssw_version = cmssw_version[:-1] + 'x'
48  root_file_dir_in_gui = 'ROOT/RelValData/%s/' % cmssw_version
49  if 'R000000001__RelVal' in base_output_filename:
50  root_file_dir_in_gui = 'ROOT/RelVal/%s/' % cmssw_version
51 
52  base_file_path_in_gui = root_file_dir_in_gui + base_output_filename
53  pr_file_path_in_gui = root_file_dir_in_gui + pr_output_filename
54 
55  COMPARISON_RESULTS.append({'workflow': workflow, 'base_dataset': base_dataset, 'pr_dataset': pr_dataset, 'run_nr': run_nr,\
56  'changed_elements': int(output_numbers[0]), 'removed_elements': int(output_numbers[1]), 'added_elements': int(output_numbers[2]),
57  'base_file_path_in_gui': base_file_path_in_gui, 'pr_file_path_in_gui': pr_file_path_in_gui})
58  except Exception as ex:
59  print('Exception comparing two root files: %s' % ex)
60 
61 def get_file_pairs(base_dir, pr_dir):
62  base_files = glob.glob(os.path.join(base_dir, '*.*_*/DQM_*.root'))
63  pr_files = glob.glob(os.path.join(pr_dir, '*.*_*/DQM_*.root'))
64 
65  # Remove base directories and leave
66  # only parts of paths that are same
67  base_files = [ os.path.relpath(x, base_dir) for x in base_files ]
68  pr_files = [ os.path.relpath(x, pr_dir) for x in pr_files ]
69 
70  # Find intersection
71  return [value for value in base_files if value in pr_files]
72 
73 def upload_to_gui(output_dir, num_procs):
74  base_files = glob.glob(os.path.join(output_dir, 'base/*.root'))
75  pr_files = glob.glob(os.path.join(output_dir, 'pr/*.root'))
76 
77  files = base_files + pr_files
78 
79  print('Files to be uploaded:')
80  print(files)
81 
82  for _ in range(min(num_procs, len(files))):
83  thread = Thread(target=upload, args=(files,))
84  thread.start()
85 
86 def upload(files):
87  while files:
88  try:
89  file = files.pop()
90  command = ['visDQMUpload.py', 'https://cmsweb.cern.ch/dqm/dev', file]
91  print('Uploading output:')
92  print(' '.join(command))
93 
94  subprocess.call(command)
95  print('')
96  except Exception as ex:
97  # This might throw when another thread pops the last filename immediately after this one
98  # started the loop. In this case this exception can be safely ignored.
99  print('Exception uploading a file: %s' % ex)
100 
101 def generate_summary_html(output_dir, pr_list, summary_dir):
102  template_file_path = os.path.join(os.getenv('CMSSW_BASE'), 'src', 'DQMServices', 'FileIO', 'scripts', 'dqm-histo-comparison-summary-template.html')
103  if not os.path.isfile(template_file_path):
104  template_file_path = os.path.join(os.getenv('CMSSW_RELEASE_BASE'), 'src', 'DQMServices', 'FileIO', 'scripts', 'dqm-histo-comparison-summary-template.html')
105  template_file = open(template_file_path, 'r')
106  result = template_file.read()
107 
108  result = result.replace('$PR_LIST$', pr_list)
109 
110  table_items = ''
111  total_changes = 0
112 
113  for comp in COMPARISON_RESULTS:
114  total_changes += comp['removed_elements'] + comp['added_elements'] + comp['changed_elements']
115  baseline_count = comp['changed_elements'] + comp['removed_elements']
116  pr_count = comp['changed_elements'] + comp['added_elements']
117  overlay_count = baseline_count
118 
119  # Make urls
120  base_url = 'https://cmsweb.cern.ch/dqm/dev/start?runnr=%s;dataset%%3D%s;sampletype%%3Doffline_relval;workspace%%3DEverything;' % (comp['run_nr'], comp['base_dataset'])
121  pr_url = 'https://cmsweb.cern.ch/dqm/dev/start?runnr=%s;dataset%%3D%s;sampletype%%3Doffline_relval;workspace%%3DEverything;' % (comp['run_nr'], comp['pr_dataset'])
122  overlay_url = 'https://cmsweb.cern.ch/dqm/dev/start?runnr=%s;dataset%%3D%s;referenceshow%%3Dall;referencenorm=False;referenceobj1%%3Dother::%s::;sampletype%%3Doffline_relval;workspace%%3DEverything;' \
123  % (comp['run_nr'], comp['pr_dataset'], comp['base_dataset'])
124  base_raw_url = 'https://cmsweb.cern.ch/dqm/dev/jsroot/index.htm?file=https://cmsweb.cern.ch/dqm/dev/data/browse/%s' % comp['base_file_path_in_gui']
125  pr_raw_url = 'https://cmsweb.cern.ch/dqm/dev/jsroot/index.htm?file=https://cmsweb.cern.ch/dqm/dev/data/browse/%s' % comp['pr_file_path_in_gui']
126 
127  table_items += ' <tr>\n'
128  table_items += ' <td><a href="%s" target="_blank">%s baseline GUI</a><span> (%s)</span></td>\n' % (base_url, comp['workflow'], baseline_count)
129  table_items += ' <td><a href="%s" target="_blank">%s pr GUI</a><span> (%s)</span></td>\n' % (pr_url, comp['workflow'], pr_count)
130  table_items += ' <td><a href="%s" target="_blank">%s overlay GUI</a><span> (%s)</span></td>\n' % (overlay_url, comp['workflow'], overlay_count)
131  table_items += ' <td><a href="%s" target="_blank">%s baseline rootjs</a><span> (%s)</span></td>\n' % (base_raw_url, comp['workflow'], baseline_count)
132  table_items += ' <td><a href="%s" target="_blank">%s pr rootjs</a><span> (%s)</span></td>\n' % (pr_raw_url, comp['workflow'], pr_count)
133  table_items += ' <td><span class="removed">-%s</span><span class="added">+%s</span><span class="changed">%s</span></td>\n' \
134  % (comp['removed_elements'], comp['added_elements'], comp['changed_elements'])
135  table_items += ' </tr>\n'
136 
137  result = result.replace('$TOTAL_CHANGES$', str(total_changes))
138  result = result.replace('$NUMBER_OF_WORKFLOWS$', str(len(COMPARISON_RESULTS)))
139  result = result.replace('$PER_WORKFLOW_LIST$', table_items)
140  template_file.close()
141 
142  # Write output
143  result_file_path = os.path.join(summary_dir, 'dqm-histo-comparison-summary.html')
144  if os.path.dirname(result_file_path):
145  if not os.path.exists(os.path.dirname(result_file_path)):
146  os.makedirs(os.path.dirname(result_file_path))
147  summary_file = open(result_file_path, 'w')
148  summary_file.write(result)
149  summary_file.close()
150 
151 if __name__ == '__main__':
152  parser = argparse.ArgumentParser(description="This tool compares DQM monitor elements within DQM files found in base-dir with the ones found in in pr-dir. "
153  "All workflow directories are searched for correctly named DQM root files. "
154  "Comparison is done bin by bin and output is written to a root files containing only the changes.")
155  parser.add_argument('-b', '--base-dir', help='Baseline IB directory', default='basedata/')
156  parser.add_argument('-p', '--pr-dir', help='PR directory', default='prdata/')
157  parser.add_argument('-o', '--output-dir', help='Comparison root files output directory', default='dqmHistoComparisonOutput')
158  parser.add_argument('-j', '--nprocs', help='Number of processes', default=1, type=int)
159  parser.add_argument('-n', '--pr-number', help='This is obsolete and should NOT be used.', required=False)
160  parser.add_argument('-t', '--test-number', help='Unique test number to distinguish different comparisons of the same PR.', default='1')
161  parser.add_argument('-r', '--release-format', help='Release format in this format: CMSSW_10_5_X_2019-02-17-0000')
162  parser.add_argument('-s', '--summary-dir', help='Directory where summary with all links will be saved', default='')
163  parser.add_argument('-l', '--pr-list', help='A list of PRs participating in the comparison', default='')
164  args = parser.parse_args()
165 
166  # Get the number of the PR which triggered the comparison
167  pr_number = 'Unknown'
168  try:
169  pr_number = args.pr_list.split(' ')[0].split('/')[1].replace('#', '_')
170  except:
171  pass
172 
173  release_format = args.release_format
174  if not release_format:
175  try:
176  release_format = os.environ['CMSSW_VERSION']
177  except:
178  print('You are not in a CMSSW release. Please provide a valid release-format (-r option)')
179  os._exit(1)
180 
181  collect_and_compare_files(args.base_dir, args.pr_dir, args.output_dir, args.nprocs, pr_number, args.test_number, release_format)
182  upload_to_gui(args.output_dir, args.nprocs)
183  generate_summary_html(args.output_dir, args.pr_list, args.summary_dir)
def collect_and_compare_files(base_dir, pr_dir, output_dir, num_procs, pr_number, test_number, release_format)
def replace(string, replacements)
def upload_to_gui(output_dir, num_procs)
def get_file_pairs(base_dir, pr_dir)
def generate_summary_html(output_dir, pr_list, summary_dir)
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47
def compare(base_dir, pr_dir, output_dir, files, pr_number, test_number, release_format)
static std::string join(char **cmd)
Definition: RemoteFile.cc:19
bool decode(bool &, std::string_view)
Definition: types.cc:72
#define str(s)