CMS 3D CMS Logo

compareHistograms.py
Go to the documentation of this file.
1 #!/bin/env python
2 
3 from __future__ import print_function
4 import ROOT
5 ROOT.PyConfig.IgnoreCommandLineOptions = True
6 import os
7 import sys
8 import argparse
9 import root_numpy
10 import numpy as np
11 from DQMServices.FileIO.blacklist import get_blacklist
12 
13 def create_dif(base_file_path, pr_file_path, pr_number, test_number, cmssw_version, output_dir_path):
14  base_file = ROOT.TFile(base_file_path, 'read')
15  ROOT.gROOT.GetListOfFiles().Remove(base_file)
16 
17  pr_file = ROOT.TFile(pr_file_path, 'read')
18  ROOT.gROOT.GetListOfFiles().Remove(pr_file)
19 
20  if base_file.IsOpen():
21  print('Baseline file successfully opened', file=sys.stderr)
22  else:
23  print('Unable to open base file', file=sys.stderr)
24  return
25 
26  if pr_file.IsOpen():
27  print('PR file successfully opened', file=sys.stderr)
28  else:
29  print('Unable to open PR file', file=sys.stderr)
30  return
31 
32  run_nr = get_run_nr(pr_file_path)
33 
34  # Get list of paths (lists of directories)
35  base_flat_dict = flatten_file(base_file, run_nr)
36  pr_flat_dict = flatten_file(pr_file, run_nr)
37 
38  # Paths that appear in both baseline and PR data. (Intersection)
39  shared_paths = list(set(pr_flat_dict).intersection(set(base_flat_dict)))
40 
41  # Paths that appear only in PR data. (Except)
42  only_pr_paths = list(set(pr_flat_dict).difference(set(base_flat_dict)))
43 
44  # Paths that appear only in baseline data. (Except)
45  only_base_paths = list(set(base_flat_dict).difference(set(pr_flat_dict)))
46 
47  # Histograms pointed to by these paths will be written to baseline output
48  paths_to_save_in_base = []
49 
50  # Histograms pointed to by these paths will be written to pr output
51  paths_to_save_in_pr = []
52 
53  # Make comparison
54  compare(shared_paths, pr_flat_dict, base_flat_dict, paths_to_save_in_pr, paths_to_save_in_base)
55 
56  # Collect paths that have to be written to baseline output file
57  for path in only_base_paths:
58  item = base_flat_dict[path]
59 
60  if item == None:
61  continue
62 
63  paths_to_save_in_base.append(path)
64 
65  # Collect paths that have to be written to PR output file
66  for path in only_pr_paths:
67  item = pr_flat_dict[path]
68 
69  if item == None:
70  continue
71 
72  paths_to_save_in_pr.append(path)
73 
74  base_output_filename = get_output_filename(pr_file_path, pr_number, test_number, cmssw_version, False)
75  pr_output_filename = get_output_filename(pr_file_path, pr_number, test_number, cmssw_version, True)
76 
77  # Write baseline output
78  save_paths(base_flat_dict, paths_to_save_in_base, os.path.join(output_dir_path, 'base', base_output_filename))
79 
80  # Write PR output
81  save_paths(pr_flat_dict, paths_to_save_in_pr, os.path.join(output_dir_path, 'pr', pr_output_filename))
82 
83  pr_file.Close()
84  base_file.Close()
85 
86  # Info about changed, added and removed elements
87  nr_of_changed_elements = len(set(paths_to_save_in_base).intersection(set(paths_to_save_in_pr)))
88  nr_of_removed_elements = len(paths_to_save_in_base) - nr_of_changed_elements
89  nr_of_added_elements = len(paths_to_save_in_pr) - nr_of_changed_elements
90 
91  print('Base output file. PR output file. Changed elements, removed elements, added elements:')
92  print(base_output_filename)
93  print(pr_output_filename)
94  print('%s %s %s' % (nr_of_changed_elements, nr_of_removed_elements, nr_of_added_elements))
95 
96 def compare(shared_paths, pr_flat_dict, base_flat_dict, paths_to_save_in_pr, paths_to_save_in_base):
97  # Collect paths that have to be written to both output files
98  for path in shared_paths:
99  pr_item = pr_flat_dict[path]
100  base_item = base_flat_dict[path]
101 
102  if pr_item == None or base_item == None:
103  continue
104 
105  are_different=False
106 
107  if pr_item.InheritsFrom('TProfile2D') and base_item.InheritsFrom('TProfile2D'):
108  # Compare TProfile (content, entries and errors)
109  are_different = not compare_TProfile(pr_item, base_item)
110 
111  elif pr_item.InheritsFrom('TProfile') and base_item.InheritsFrom('TProfile'):
112  # Compare TProfile (content, entries and errors)
113  are_different = not compare_TProfile(pr_item, base_item)
114 
115  elif pr_item.InheritsFrom('TH1') and base_item.InheritsFrom('TH1'):
116  # Compare bin by bin
117  pr_array = root_numpy.hist2array(hist=pr_item, include_overflow=True, copy=False)
118  base_array = root_numpy.hist2array(hist=base_item, include_overflow=True, copy=False)
119 
120  if pr_array.shape != base_array.shape or not np.allclose(pr_array, base_array, equal_nan=True):
121  are_different = True
122  else:
123  # Compare non histograms
124  if pr_item != base_item:
125  are_different = True
126 
127  if are_different:
128  paths_to_save_in_pr.append(path)
129  paths_to_save_in_base.append(path)
130 
131 # Returns False if different, True otherwise
132 def compare_TProfile(pr_item, base_item):
133  if pr_item.GetSize() != base_item.GetSize():
134  return False
135 
136  for i in range(pr_item.GetSize()):
137  pr_bin_content = pr_item.GetBinContent(i)
138  base_bin_content = base_item.GetBinContent(i)
139 
140  pr_bin_entries = pr_item.GetBinEntries(i)
141  base_bin_entries = base_item.GetBinEntries(i)
142 
143  pr_bin_error = pr_item.GetBinError(i)
144  base_bin_error = base_item.GetBinError(i)
145 
146  if not np.isclose(pr_bin_content, base_bin_content, equal_nan=True):
147  return False
148 
149  if not np.isclose(pr_bin_entries, base_bin_entries, equal_nan=True):
150  return False
151 
152  if not np.isclose(pr_bin_error, base_bin_error, equal_nan=True):
153  return False
154 
155  return True
156 
157 def flatten_file(file, run_nr):
158  result = {}
159  for key in file.GetListOfKeys():
160  try:
161  traverse_till_end(key.ReadObj(), [], result, run_nr)
162  except:
163  pass
164 
165  return result
166 
167 def traverse_till_end(node, dirs_list, result, run_nr):
168  new_dir_list = dirs_list + [get_node_name(node)]
169  if hasattr(node, 'GetListOfKeys'):
170  for key in node.GetListOfKeys():
171  traverse_till_end(key.ReadObj(), new_dir_list, result, run_nr)
172  else:
173  if not is_blacklisted(new_dir_list, run_nr):
174  path = tuple(new_dir_list)
175  result[path] = node
176 
177 def get_node_name(node):
178  if node.InheritsFrom('TObjString'):
179  # Strip out just the name from a tag (<name>value</name>)
180  name = node.GetName().split('>')[0][1:]
181  return name + get_string_suffix()
182  else:
183  return node.GetName()
184 
186  return '_string_monitor_element'
187 
188 def is_blacklisted(dirs_list, run_nr):
189  # Copy the list
190  dirs_list = dirs_list[:]
191  # Remove string suffix
192  if dirs_list[-1].endswith(get_string_suffix()):
193  dirs_list[-1] = dirs_list[-1].replace(get_string_suffix(), '')
194 
195  return tuple(dirs_list) in get_blacklist(run_nr)
196 
197 def save_paths(flat_dict, paths, result_file_path):
198  if len(paths) == 0:
199  print('No differences were observed - output will not be written', file=sys.stderr)
200  return
201 
202  # Make sure output dir exists
203  result_dir = os.path.dirname(result_file_path)
204  if not os.path.exists(result_dir):
205  os.makedirs(result_dir)
206 
207  result_file = ROOT.TFile(result_file_path, 'recreate')
208  ROOT.gROOT.GetListOfFiles().Remove(result_file)
209 
210  if not result_file.IsOpen():
211  print('Unable to open %s output file' % result_file_path, file=sys.stderr)
212  return
213 
214  for path in paths:
215  save_to_file(flat_dict, path, result_file)
216 
217  result_file.Close()
218  print('Output written to %s file' % result_file_path, file=sys.stderr)
219 
220 # Saves file from flat_dict in the same dir of currently open file for writing
221 def save_to_file(flat_dict, path, output_file):
222  histogram = flat_dict[path]
223 
224  current = output_file
225 
226  # Last item is filename. No need to create dir for it
227  for directory in path[:-1]:
228  current = create_dir(current, directory)
229  current.cd()
230 
231  histogram.Write()
232 
233 # Create dir in root file if it doesn't exist
234 def create_dir(parent_dir, name):
235  dir = parent_dir.Get(name)
236  if not dir:
237  dir = parent_dir.mkdir(name)
238  return dir
239 
240 def get_output_filename(input_file_path, pr_number, test_number, cmssw_version, isPr):
241  # Samples of correct output file format:
242  # DQM_V0001_R000320822__wf136_892_pr__CMSSW_10_4_0_pre3-PR25518-1234__DQMIO.root
243  # When run number is 1 we have to use RelVal naming pattern:
244  # DQM_V0002_R000000001__RelVal_wf136_892_pr__CMSSW_10_4_0_pre3-PR25518-1234__DQMIO.root
245 
246  input_file_name = os.path.basename(input_file_path)
247 
248  run = input_file_name.split('_')[2]
249  workflow = os.path.basename(os.path.dirname(input_file_path)).split('_')[0].replace('.', '_')
250  if not workflow:
251  workflow = 'Unknown'
252 
253  relval_prefix = ''
254  if run == 'R000000001':
255  relval_prefix = 'RelVal_'
256 
257  baseOrPr = 'base'
258  if isPr:
259  baseOrPr = 'pr'
260 
261  return 'DQM_V0001_%s__%swf%s_%s__%s-PR%s-%s__DQMIO.root' % (run, relval_prefix, workflow, baseOrPr, cmssw_version, pr_number, test_number)
262 
263 def get_run_nr(file_path):
264  return os.path.basename(file_path).split('_')[2].lstrip('R').lstrip('0')
265 
266 if __name__ == '__main__':
267  parser = argparse.ArgumentParser(description="This tool compares DQM monitor elements found in base-file with the ones found in pr-file."
268  "Comparison is done bin by bin and output is written to a root file containing only the changes.")
269  parser.add_argument('-b', '--base-file', help='Baseline IB DQM root file', required=True)
270  parser.add_argument('-p', '--pr-file', help='PR DQM root file', required=True)
271  parser.add_argument('-n', '--pr-number', help='PR number under test', default='00001')
272  parser.add_argument('-t', '--test-number', help='Unique test number to distinguish different comparisons of the same PR.', default='1')
273  parser.add_argument('-r', '--release-format', help='Release format in this format: CMSSW_10_5_X_2019-02-17-0000', default=os.environ['CMSSW_VERSION'])
274  parser.add_argument('-o', '--output-dir', help='Comparison root files output directory', default='dqmHistoComparisonOutput')
275  args = parser.parse_args()
276 
277  cmssw_version = '_'.join(args.release_format.split('_')[:4])
278 
279  create_dif(args.base_file, args.pr_file, args.pr_number, args.test_number, cmssw_version, args.output_dir)
compareHistograms.create_dir
def create_dir(parent_dir, name)
Definition: compareHistograms.py:234
FastTimerService_cff.range
range
Definition: FastTimerService_cff.py:34
compareHistograms.compare
def compare(shared_paths, pr_flat_dict, base_flat_dict, paths_to_save_in_pr, paths_to_save_in_base)
Definition: compareHistograms.py:96
compareHistograms.flatten_file
def flatten_file(file, run_nr)
Definition: compareHistograms.py:157
compareHistograms.save_to_file
def save_to_file(flat_dict, path, output_file)
Definition: compareHistograms.py:221
compareHistograms.get_node_name
def get_node_name(node)
Definition: compareHistograms.py:177
join
static std::string join(char **cmd)
Definition: RemoteFile.cc:17
cms::dd::split
std::vector< std::string_view > split(std::string_view, const char *)
blacklist.get_blacklist
def get_blacklist(RUN_NR)
Definition: blacklist.py:2
compareHistograms.get_run_nr
def get_run_nr(file_path)
Definition: compareHistograms.py:263
compareHistograms.get_output_filename
def get_output_filename(input_file_path, pr_number, test_number, cmssw_version, isPr)
Definition: compareHistograms.py:240
compareHistograms.get_string_suffix
def get_string_suffix()
Definition: compareHistograms.py:185
compareHistograms.create_dif
def create_dif(base_file_path, pr_file_path, pr_number, test_number, cmssw_version, output_dir_path)
Definition: compareHistograms.py:13
edm::print
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
compareHistograms.compare_TProfile
def compare_TProfile(pr_item, base_item)
Definition: compareHistograms.py:132
compareHistograms.traverse_till_end
def traverse_till_end(node, dirs_list, result, run_nr)
Definition: compareHistograms.py:167
compareHistograms.save_paths
def save_paths(flat_dict, paths, result_file_path)
Definition: compareHistograms.py:197
list
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*", "!HLTx*" if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL. It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of "!*" before the partial wildcard feature was incorporated). Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run
compareHistograms.is_blacklisted
def is_blacklisted(dirs_list, run_nr)
Definition: compareHistograms.py:188
compare
Definition: compare.py:1
reco::helper::VirtualJetProducerHelper::intersection
double intersection(double r12)
Definition: VirtualJetProducerHelper.h:14
python.rootplot.root2matplotlib.replace
def replace(string, replacements)
Definition: root2matplotlib.py:444