CMS 3D CMS Logo

compareHistograms.py
Go to the documentation of this file.
1 #!/bin/env python
2 
3 from __future__ import print_function
4 import ROOT
5 ROOT.PyConfig.IgnoreCommandLineOptions = True
6 import os
7 import sys
8 import argparse
9 import root_numpy
10 import numpy as np
11 from blacklist import get_blacklist
12 
13 def create_dif(base_file_path, pr_file_path, pr_number, test_number, cmssw_version, output_dir_path):
14  base_file = ROOT.TFile(base_file_path, 'read')
15  ROOT.gROOT.GetListOfFiles().Remove(base_file)
16 
17  pr_file = ROOT.TFile(pr_file_path, 'read')
18  ROOT.gROOT.GetListOfFiles().Remove(pr_file)
19 
20  if base_file.IsOpen():
21  print('Baseline file successfully opened', file=sys.stderr)
22  else:
23  print('Unable to open base file', file=sys.stderr)
24  return
25 
26  if pr_file.IsOpen():
27  print('PR file successfully opened', file=sys.stderr)
28  else:
29  print('Unable to open PR file', file=sys.stderr)
30  return
31 
32  run_nr = get_run_nr(pr_file_path)
33 
34  # Get list of paths (lists of directories)
35  base_flat_dict = flatten_file(base_file, run_nr)
36  pr_flat_dict = flatten_file(pr_file, run_nr)
37 
38  # Paths that appear in both baseline and PR data. (Intersection)
39  shared_paths = list(set(pr_flat_dict).intersection(set(base_flat_dict)))
40 
41  # Paths that appear only in PR data. (Except)
42  only_pr_paths = list(set(pr_flat_dict).difference(set(base_flat_dict)))
43 
44  # Paths that appear only in baseline data. (Except)
45  only_base_paths = list(set(base_flat_dict).difference(set(pr_flat_dict)))
46 
47  # Histograms pointed to by these paths will be written to baseline output
48  paths_to_save_in_base = []
49 
50  # Histograms pointed to by these paths will be written to pr output
51  paths_to_save_in_pr = []
52 
53  # Make comparison
54  compare(shared_paths, pr_flat_dict, base_flat_dict, paths_to_save_in_pr, paths_to_save_in_base)
55 
56  # Collect paths that have to be written to baseline output file
57  for path in only_base_paths:
58  item = base_flat_dict[path]
59 
60  if item == None:
61  continue
62 
63  paths_to_save_in_base.append(path)
64 
65  # Collect paths that have to be written to PR output file
66  for path in only_pr_paths:
67  item = pr_flat_dict[path]
68 
69  if item == None:
70  continue
71 
72  paths_to_save_in_pr.append(path)
73 
74  base_output_filename = get_output_filename(pr_file_path, pr_number, test_number, cmssw_version, False)
75  pr_output_filename = get_output_filename(pr_file_path, pr_number, test_number, cmssw_version, True)
76 
77  # Write baseline output
78  save_paths(base_flat_dict, paths_to_save_in_base, os.path.join(output_dir_path, 'base', base_output_filename))
79 
80  # Write PR output
81  save_paths(pr_flat_dict, paths_to_save_in_pr, os.path.join(output_dir_path, 'pr', pr_output_filename))
82 
83  pr_file.Close()
84  base_file.Close()
85 
86  # Info about changed, added and removed elements
87  nr_of_changed_elements = len(set(paths_to_save_in_base).intersection(set(paths_to_save_in_pr)))
88  nr_of_removed_elements = len(paths_to_save_in_base) - nr_of_changed_elements
89  nr_of_added_elements = len(paths_to_save_in_pr) - nr_of_changed_elements
90 
91  print('Base output file. PR output file. Changed elements, removed elements, added elements:')
92  print(base_output_filename)
93  print(pr_output_filename)
94  print('%s %s %s' % (nr_of_changed_elements, nr_of_removed_elements, nr_of_added_elements))
95 
96 def compare(shared_paths, pr_flat_dict, base_flat_dict, paths_to_save_in_pr, paths_to_save_in_base):
97  # Collect paths that have to be written to both output files
98  for path in shared_paths:
99  pr_item = pr_flat_dict[path]
100  base_item = base_flat_dict[path]
101 
102  if pr_item == None or base_item == None:
103  continue
104 
105  are_different=False
106 
107  if pr_item.InheritsFrom('TProfile2D') and base_item.InheritsFrom('TProfile2D'):
108  # Compare TProfile (content, entries and errors)
109  are_different = not compare_TProfile(pr_item, base_item)
110 
111  elif pr_item.InheritsFrom('TProfile') and base_item.InheritsFrom('TProfile'):
112  # Compare TProfile (content, entries and errors)
113  are_different = not compare_TProfile(pr_item, base_item)
114 
115  elif pr_item.InheritsFrom('TH1') and base_item.InheritsFrom('TH1'):
116  # Compare bin by bin
117  pr_array = root_numpy.hist2array(hist=pr_item, include_overflow=True, copy=False)
118  base_array = root_numpy.hist2array(hist=base_item, include_overflow=True, copy=False)
119 
120  if pr_array.shape != base_array.shape or not np.allclose(pr_array, base_array, equal_nan=True):
121  are_different = True
122  else:
123  # Compare non histograms
124  if pr_item != base_item:
125  are_different = True
126 
127  if are_different:
128  paths_to_save_in_pr.append(path)
129  paths_to_save_in_base.append(path)
130 
131 # Returns False if different, True otherwise
132 def compare_TProfile(pr_item, base_item):
133  if pr_item.GetSize() != base_item.GetSize():
134  return False
135 
136  for i in range(pr_item.GetSize()):
137  pr_bin_content = pr_item.GetBinContent(i)
138  base_bin_content = base_item.GetBinContent(i)
139 
140  pr_bin_entries = pr_item.GetBinEntries(i)
141  base_bin_entries = base_item.GetBinEntries(i)
142 
143  pr_bin_error = pr_item.GetBinError(i)
144  base_bin_error = base_item.GetBinError(i)
145 
146  if not np.isclose(pr_bin_content, base_bin_content, equal_nan=True):
147  return False
148 
149  if not np.isclose(pr_bin_entries, base_bin_entries, equal_nan=True):
150  return False
151 
152  if not np.isclose(pr_bin_error, base_bin_error, equal_nan=True):
153  return False
154 
155  return True
156 
157 def flatten_file(file, run_nr):
158  result = {}
159  for key in file.GetListOfKeys():
160  try:
161  traverse_till_end(key.ReadObj(), [], result, run_nr)
162  except:
163  pass
164 
165  return result
166 
167 def traverse_till_end(node, dirs_list, result, run_nr):
168  new_dir_list = dirs_list + [get_node_name(node)]
169  if hasattr(node, 'GetListOfKeys'):
170  for key in node.GetListOfKeys():
171  traverse_till_end(key.ReadObj(), new_dir_list, result, run_nr)
172  else:
173  path = tuple(new_dir_list)
174  if path not in get_blacklist(run_nr):
175  result[path] = node
176 
177 def get_node_name(node):
178  if node.InheritsFrom('TObjString'):
179  # Strip out just the name from a tag (<name>value</name>)
180  return node.GetName().split('>')[0][1:]
181  else:
182  return node.GetName()
183 
184 def save_paths(flat_dict, paths, result_file_path):
185  if len(paths) == 0:
186  print('No differences were observed - output will not be written', file=sys.stderr)
187  return
188 
189  # Make sure output dir exists
190  result_dir = os.path.dirname(result_file_path)
191  if not os.path.exists(result_dir):
192  os.makedirs(result_dir)
193 
194  result_file = ROOT.TFile(result_file_path, 'recreate')
195  ROOT.gROOT.GetListOfFiles().Remove(result_file)
196 
197  if not result_file.IsOpen():
198  print('Unable to open %s output file' % result_file_path, file=sys.stderr)
199  return
200 
201  for path in paths:
202  save_to_file(flat_dict, path, result_file)
203 
204  result_file.Close()
205  print('Output written to %s file' % result_file_path, file=sys.stderr)
206 
207 # Saves file from flat_dict in the same dir of currently open file for writing
208 def save_to_file(flat_dict, path, output_file):
209  histogram = flat_dict[path]
210 
211  current = output_file
212 
213  # Last item is filename. No need to create dir for it
214  for directory in path[:-1]:
215  current = create_dir(current, directory)
216  current.cd()
217 
218  histogram.Write()
219 
220 # Create dir in root file if it doesn't exist
221 def create_dir(parent_dir, name):
222  dir = parent_dir.Get(name)
223  if not dir:
224  dir = parent_dir.mkdir(name)
225  return dir
226 
227 def get_output_filename(input_file_path, pr_number, test_number, cmssw_version, isPr):
228  # Samples of correct output file format:
229  # DQM_V0001_R000320822__wf136_892_pr__CMSSW_10_4_0_pre3-PR25518-1234__DQMIO.root
230  # When run number is 1 we have to use RelVal naming pattern:
231  # DQM_V0002_R000000001__RelVal_wf136_892_pr__CMSSW_10_4_0_pre3-PR25518-1234__DQMIO.root
232 
233  input_file_name = os.path.basename(input_file_path)
234 
235  run = input_file_name.split('_')[2]
236  workflow = os.path.basename(os.path.dirname(input_file_path)).split('_')[0].replace('.', '_')
237  if not workflow:
238  workflow = 'Unknown'
239 
240  relval_prefix = ''
241  if run == 'R000000001':
242  relval_prefix = 'RelVal_'
243 
244  baseOrPr = 'base'
245  if isPr:
246  baseOrPr = 'pr'
247 
248  return 'DQM_V0001_%s__%swf%s_%s__%s-PR%s-%s__DQMIO.root' % (run, relval_prefix, workflow, baseOrPr, cmssw_version, pr_number, test_number)
249 
250 def get_run_nr(file_path):
251  return os.path.basename(file_path).split('_')[2].lstrip('R').lstrip('0')
252 
253 if __name__ == '__main__':
254  parser = argparse.ArgumentParser(description="This tool compares DQM monitor elements found in base-file with the ones found in pr-file."
255  "Comparison is done bin by bin and output is written to a root file containing only the changes.")
256  parser.add_argument('-b', '--base-file', help='Baseline IB DQM root file', required=True)
257  parser.add_argument('-p', '--pr-file', help='PR DQM root file', required=True)
258  parser.add_argument('-n', '--pr-number', help='PR number under test', default='00001')
259  parser.add_argument('-t', '--test-number', help='Unique test number to distinguish different comparisons of the same PR.', default='1')
260  parser.add_argument('-r', '--release-format', help='Release format in this format: CMSSW_10_5_X_2019-02-17-0000', default=os.environ['CMSSW_VERSION'])
261  parser.add_argument('-o', '--output-dir', help='Comparison root files output directory', default='dqmHistoComparisonOutput')
262  args = parser.parse_args()
263 
264  cmssw_version = '_'.join(args.release_format.split('_')[:4])
265 
266  create_dif(args.base_file, args.pr_file, args.pr_number, args.test_number, cmssw_version, args.output_dir)
def get_output_filename(input_file_path, pr_number, test_number, cmssw_version, isPr)
def save_to_file(flat_dict, path, output_file)
def get_blacklist(RUN_NR)
Definition: blacklist.py:2
def replace(string, replacements)
def flatten_file(file, run_nr)
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
def traverse_till_end(node, dirs_list, result, run_nr)
def create_dif(base_file_path, pr_file_path, pr_number, test_number, cmssw_version, output_dir_path)
def create_dir(parent_dir, name)
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def compare_TProfile(pr_item, base_item)
def save_paths(flat_dict, paths, result_file_path)
def compare(shared_paths, pr_flat_dict, base_flat_dict, paths_to_save_in_pr, paths_to_save_in_base)
double split
Definition: MVATrainer.cc:139
def get_run_nr(file_path)
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run