CMS 3D CMS Logo

compareHistograms.py
Go to the documentation of this file.
1 #!/bin/env python3
2 
3 from __future__ import print_function
4 import ROOT
5 ROOT.PyConfig.IgnoreCommandLineOptions = True
6 import os
7 import sys
8 import argparse
9 import numpy as np
10 from DQMServices.FileIO.blacklist import get_blacklist
11 
12 def create_dif(base_file_path, pr_file_path, pr_number, test_number, cmssw_version, output_dir_path):
13  base_file = ROOT.TFile(base_file_path, 'read')
14  ROOT.gROOT.GetListOfFiles().Remove(base_file)
15 
16  pr_file = ROOT.TFile(pr_file_path, 'read')
17  ROOT.gROOT.GetListOfFiles().Remove(pr_file)
18 
19  if base_file.IsOpen():
20  print('Baseline file successfully opened', file=sys.stderr)
21  else:
22  print('Unable to open base file', file=sys.stderr)
23  return
24 
25  if pr_file.IsOpen():
26  print('PR file successfully opened', file=sys.stderr)
27  else:
28  print('Unable to open PR file', file=sys.stderr)
29  return
30 
31  run_nr = get_run_nr(pr_file_path)
32 
33  # Get list of paths (lists of directories)
34  base_flat_dict = flatten_file(base_file, run_nr)
35  pr_flat_dict = flatten_file(pr_file, run_nr)
36 
37  # Paths that appear in both baseline and PR data. (Intersection)
38  shared_paths = list(set(pr_flat_dict).intersection(set(base_flat_dict)))
39 
40  # Paths that appear only in PR data. (Except)
41  only_pr_paths = list(set(pr_flat_dict).difference(set(base_flat_dict)))
42 
43  # Paths that appear only in baseline data. (Except)
44  only_base_paths = list(set(base_flat_dict).difference(set(pr_flat_dict)))
45 
46  # Histograms pointed to by these paths will be written to baseline output
47  paths_to_save_in_base = []
48 
49  # Histograms pointed to by these paths will be written to pr output
50  paths_to_save_in_pr = []
51 
52  # Make comparison
53  compare(shared_paths, pr_flat_dict, base_flat_dict, paths_to_save_in_pr, paths_to_save_in_base)
54 
55  # Collect paths that have to be written to baseline output file
56  for path in only_base_paths:
57  item = base_flat_dict[path]
58 
59  if item == None:
60  continue
61 
62  paths_to_save_in_base.append(path)
63 
64  # Collect paths that have to be written to PR output file
65  for path in only_pr_paths:
66  item = pr_flat_dict[path]
67 
68  if item == None:
69  continue
70 
71  paths_to_save_in_pr.append(path)
72 
73  base_output_filename = get_output_filename(pr_file_path, pr_number, test_number, cmssw_version, False)
74  pr_output_filename = get_output_filename(pr_file_path, pr_number, test_number, cmssw_version, True)
75 
76  # Write baseline output
77  save_paths(base_flat_dict, paths_to_save_in_base, os.path.join(output_dir_path, 'base', base_output_filename))
78 
79  # Write PR output
80  save_paths(pr_flat_dict, paths_to_save_in_pr, os.path.join(output_dir_path, 'pr', pr_output_filename))
81 
82  pr_file.Close()
83  base_file.Close()
84 
85  # Info about changed, added and removed elements
86  nr_of_changed_elements = len(set(paths_to_save_in_base).intersection(set(paths_to_save_in_pr)))
87  nr_of_removed_elements = len(paths_to_save_in_base) - nr_of_changed_elements
88  nr_of_added_elements = len(paths_to_save_in_pr) - nr_of_changed_elements
89 
90  print('Base output file. PR output file. Changed elements, removed elements, added elements:')
91  print(base_output_filename)
92  print(pr_output_filename)
93  print('%s %s %s' % (nr_of_changed_elements, nr_of_removed_elements, nr_of_added_elements))
94 
95 def compare(shared_paths, pr_flat_dict, base_flat_dict, paths_to_save_in_pr, paths_to_save_in_base):
96  # Collect paths that have to be written to both output files
97  for path in shared_paths:
98  pr_item = pr_flat_dict[path]
99  base_item = base_flat_dict[path]
100 
101  if pr_item == None or base_item == None:
102  continue
103 
104  are_different=False
105 
106  if pr_item.InheritsFrom('TProfile2D') and base_item.InheritsFrom('TProfile2D'):
107  # Compare TProfile (content, entries and errors)
108  are_different = not compare_TProfile(pr_item, base_item)
109 
110  elif pr_item.InheritsFrom('TProfile') and base_item.InheritsFrom('TProfile'):
111  # Compare TProfile (content, entries and errors)
112  are_different = not compare_TProfile(pr_item, base_item)
113 
114  elif pr_item.InheritsFrom('TH1') and base_item.InheritsFrom('TH1'):
115  # Compare bin by bin
116  pr_array = np.array(pr_item)
117  base_array = np.array(base_item)
118 
119  if pr_array.shape != base_array.shape or not np.allclose(pr_array, base_array, equal_nan=True):
120  are_different = True
121  else:
122  # Compare non histograms
123  if pr_item != base_item:
124  are_different = True
125 
126  if are_different:
127  paths_to_save_in_pr.append(path)
128  paths_to_save_in_base.append(path)
129 
130 # Returns False if different, True otherwise
131 def compare_TProfile(pr_item, base_item):
132  if pr_item.GetSize() != base_item.GetSize():
133  return False
134 
135  for i in range(pr_item.GetSize()):
136  pr_bin_content = pr_item.GetBinContent(i)
137  base_bin_content = base_item.GetBinContent(i)
138 
139  pr_bin_entries = pr_item.GetBinEntries(i)
140  base_bin_entries = base_item.GetBinEntries(i)
141 
142  pr_bin_error = pr_item.GetBinError(i)
143  base_bin_error = base_item.GetBinError(i)
144 
145  if not np.isclose(pr_bin_content, base_bin_content, equal_nan=True):
146  return False
147 
148  if not np.isclose(pr_bin_entries, base_bin_entries, equal_nan=True):
149  return False
150 
151  if not np.isclose(pr_bin_error, base_bin_error, equal_nan=True):
152  return False
153 
154  return True
155 
156 def flatten_file(file, run_nr):
157  result = {}
158  for key in file.GetListOfKeys():
159  try:
160  traverse_till_end(key.ReadObj(), [], result, run_nr)
161  except:
162  pass
163 
164  return result
165 
166 def traverse_till_end(node, dirs_list, result, run_nr):
167  new_dir_list = dirs_list + [get_node_name(node)]
168  if hasattr(node, 'GetListOfKeys'):
169  for key in node.GetListOfKeys():
170  traverse_till_end(key.ReadObj(), new_dir_list, result, run_nr)
171  else:
172  if not is_blacklisted(new_dir_list, run_nr):
173  path = tuple(new_dir_list)
174  result[path] = node
175 
176 def get_node_name(node):
177  if node.InheritsFrom('TObjString'):
178  # Strip out just the name from a tag (<name>value</name>)
179  name = node.GetName().split('>')[0][1:]
180  return name + get_string_suffix()
181  else:
182  return node.GetName()
183 
185  return '_string_monitor_element'
186 
187 def is_blacklisted(dirs_list, run_nr):
188  # Copy the list
189  dirs_list = dirs_list[:]
190  # Remove string suffix
191  if dirs_list[-1].endswith(get_string_suffix()):
192  dirs_list[-1] = dirs_list[-1].replace(get_string_suffix(), '')
193 
194  return tuple(dirs_list) in get_blacklist(run_nr)
195 
196 def save_paths(flat_dict, paths, result_file_path):
197  if len(paths) == 0:
198  print('No differences were observed - output will not be written', file=sys.stderr)
199  return
200 
201  # Make sure output dir exists
202  result_dir = os.path.dirname(result_file_path)
203  if not os.path.exists(result_dir):
204  os.makedirs(result_dir)
205 
206  result_file = ROOT.TFile(result_file_path, 'recreate')
207  ROOT.gROOT.GetListOfFiles().Remove(result_file)
208 
209  if not result_file.IsOpen():
210  print('Unable to open %s output file' % result_file_path, file=sys.stderr)
211  return
212 
213  for path in paths:
214  save_to_file(flat_dict, path, result_file)
215 
216  result_file.Close()
217  print('Output written to %s file' % result_file_path, file=sys.stderr)
218 
219 # Saves file from flat_dict in the same dir of currently open file for writing
220 def save_to_file(flat_dict, path, output_file):
221  histogram = flat_dict[path]
222 
223  current = output_file
224 
225  # Last item is filename. No need to create dir for it
226  for directory in path[:-1]:
227  current = create_dir(current, directory)
228  current.cd()
229 
230  histogram.Write()
231 
232 # Create dir in root file if it doesn't exist
233 def create_dir(parent_dir, name):
234  dir = parent_dir.Get(name)
235  if not dir:
236  dir = parent_dir.mkdir(name)
237  return dir
238 
239 def get_output_filename(input_file_path, pr_number, test_number, cmssw_version, isPr):
240  # Samples of correct output file format:
241  # DQM_V0001_R000320822__wf136_892_pr__CMSSW_10_4_0_pre3-PR25518-1234__DQMIO.root
242  # When run number is 1 we have to use RelVal naming pattern:
243  # DQM_V0002_R000000001__RelVal_wf136_892_pr__CMSSW_10_4_0_pre3-PR25518-1234__DQMIO.root
244 
245  input_file_name = os.path.basename(input_file_path)
246 
247  run = input_file_name.split('_')[2]
248  workflow = os.path.basename(os.path.dirname(input_file_path)).split('_')[0].replace('.', '_')
249  if not workflow:
250  workflow = 'Unknown'
251 
252  relval_prefix = ''
253  if run == 'R000000001':
254  relval_prefix = 'RelVal_'
255 
256  baseOrPr = 'base'
257  if isPr:
258  baseOrPr = 'pr'
259 
260  return 'DQM_V0001_%s__%swf%s_%s__%s-PR%s-%s__DQMIO.root' % (run, relval_prefix, workflow, baseOrPr, cmssw_version, pr_number, test_number)
261 
262 def get_run_nr(file_path):
263  return os.path.basename(file_path).split('_')[2].lstrip('R').lstrip('0')
264 
265 if __name__ == '__main__':
266  parser = argparse.ArgumentParser(description="This tool compares DQM monitor elements found in base-file with the ones found in pr-file."
267  "Comparison is done bin by bin and output is written to a root file containing only the changes.")
268  parser.add_argument('-b', '--base-file', help='Baseline IB DQM root file', required=True)
269  parser.add_argument('-p', '--pr-file', help='PR DQM root file', required=True)
270  parser.add_argument('-n', '--pr-number', help='PR number under test', default='00001')
271  parser.add_argument('-t', '--test-number', help='Unique test number to distinguish different comparisons of the same PR.', default='1')
272  parser.add_argument('-r', '--release-format', help='Release format in this format: CMSSW_10_5_X_2019-02-17-0000', default=os.environ['CMSSW_VERSION'])
273  parser.add_argument('-o', '--output-dir', help='Comparison root files output directory', default='dqmHistoComparisonOutput')
274  args = parser.parse_args()
275 
276  cmssw_version = '_'.join(args.release_format.split('_')[:4])
277 
278  create_dif(args.base_file, args.pr_file, args.pr_number, args.test_number, cmssw_version, args.output_dir)
compareHistograms.create_dir
def create_dir(parent_dir, name)
Definition: compareHistograms.py:233
FastTimerService_cff.range
range
Definition: FastTimerService_cff.py:34
compareHistograms.compare
def compare(shared_paths, pr_flat_dict, base_flat_dict, paths_to_save_in_pr, paths_to_save_in_base)
Definition: compareHistograms.py:95
compareHistograms.flatten_file
def flatten_file(file, run_nr)
Definition: compareHistograms.py:156
compareHistograms.save_to_file
def save_to_file(flat_dict, path, output_file)
Definition: compareHistograms.py:220
compareHistograms.get_node_name
def get_node_name(node)
Definition: compareHistograms.py:176
join
static std::string join(char **cmd)
Definition: RemoteFile.cc:17
blacklist.get_blacklist
def get_blacklist(RUN_NR)
Definition: blacklist.py:2
compareHistograms.get_run_nr
def get_run_nr(file_path)
Definition: compareHistograms.py:262
compareHistograms.get_output_filename
def get_output_filename(input_file_path, pr_number, test_number, cmssw_version, isPr)
Definition: compareHistograms.py:239
compareHistograms.get_string_suffix
def get_string_suffix()
Definition: compareHistograms.py:184
submitPVValidationJobs.split
def split(sequence, size)
Definition: submitPVValidationJobs.py:352
compareHistograms.create_dif
def create_dif(base_file_path, pr_file_path, pr_number, test_number, cmssw_version, output_dir_path)
Definition: compareHistograms.py:12
print
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:46
compareHistograms.compare_TProfile
def compare_TProfile(pr_item, base_item)
Definition: compareHistograms.py:131
compareHistograms.traverse_till_end
def traverse_till_end(node, dirs_list, result, run_nr)
Definition: compareHistograms.py:166
compareHistograms.save_paths
def save_paths(flat_dict, paths, result_file_path)
Definition: compareHistograms.py:196
compareHistograms.is_blacklisted
def is_blacklisted(dirs_list, run_nr)
Definition: compareHistograms.py:187
compare
Definition: compare.py:1
reco::helper::VirtualJetProducerHelper::intersection
double intersection(double r12)
Definition: VirtualJetProducerHelper.h:14
python.rootplot.root2matplotlib.replace
def replace(string, replacements)
Definition: root2matplotlib.py:444