CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
Functions | Variables
compareHistograms Namespace Reference

Functions

def compare
 
def compare_TProfile
 
def compareMP
 
def create_dif
 
def create_dir
 
def flatten_file
 
def get_node_name
 
def get_output_filename
 
def get_run_nr
 
def get_string_suffix
 
def is_blacklisted
 
def save_paths
 
def save_to_file
 
def traverse_till_end
 

Variables

tuple args = parser.parse_args()
 
string cmssw_version = '_'
 
tuple parser
 

Function Documentation

def compareHistograms.compare (   shared_paths,
  pr_flat_dict,
  base_flat_dict,
  paths_to_save_in_pr,
  paths_to_save_in_base 
)

Definition at line 162 of file compareHistograms.py.

References compare_TProfile().

163 def compare(shared_paths, pr_flat_dict, base_flat_dict, paths_to_save_in_pr, paths_to_save_in_base):
164  # Collect paths that have to be written to both output files
165  for path in shared_paths:
166  pr_item = pr_flat_dict[path]
167  base_item = base_flat_dict[path]
168 
169  if pr_item == None or base_item == None:
170  continue
171 
172  are_different=False
173 
174  if pr_item.InheritsFrom('TProfile2D') and base_item.InheritsFrom('TProfile2D'):
175  # Compare TProfile (content, entries and errors)
176  are_different = not compare_TProfile(pr_item, base_item)
177 
178  elif pr_item.InheritsFrom('TProfile') and base_item.InheritsFrom('TProfile'):
179  # Compare TProfile (content, entries and errors)
180  are_different = not compare_TProfile(pr_item, base_item)
181 
182  elif pr_item.InheritsFrom('TH1') and base_item.InheritsFrom('TH1'):
183  # Compare bin by bin
184  pr_array = np.array(pr_item)
185  base_array = np.array(base_item)
186 
187  if pr_array.shape != base_array.shape or not np.allclose(pr_array, base_array, equal_nan=True):
188  are_different = True
189  else:
190  # Compare non histograms
191  if pr_item != base_item:
192  are_different = True
193 
194  if are_different:
195  paths_to_save_in_pr.append(path)
196  paths_to_save_in_base.append(path)
197 
# Returns False if different, True otherwise
def compareHistograms.compare_TProfile (   pr_item,
  base_item 
)

Definition at line 198 of file compareHistograms.py.

References sistrip::SpyUtilities.range().

Referenced by compare(), and compareMP().

199 def compare_TProfile(pr_item, base_item):
200  if pr_item.GetSize() != base_item.GetSize():
201  return False
202 
203  for i in range(pr_item.GetSize()):
204  pr_bin_content = pr_item.GetBinContent(i)
205  base_bin_content = base_item.GetBinContent(i)
206 
207  pr_bin_entries = pr_item.GetBinEntries(i)
208  base_bin_entries = base_item.GetBinEntries(i)
209 
210  pr_bin_error = pr_item.GetBinError(i)
211  base_bin_error = base_item.GetBinError(i)
212 
213  if not np.isclose(pr_bin_content, base_bin_content, equal_nan=True):
214  return False
215 
216  if not np.isclose(pr_bin_entries, base_bin_entries, equal_nan=True):
217  return False
218 
219  if not np.isclose(pr_bin_error, base_bin_error, equal_nan=True):
220  return False
221 
222  return True
const uint16_t range(const Frame &aFrame)
def compareHistograms.compareMP (   shared_paths,
  pr_flat_dict,
  base_flat_dict,
  iProc,
  return_dict 
)

Definition at line 123 of file compareHistograms.py.

References bitset_utilities.append(), and compare_TProfile().

124 def compareMP(shared_paths, pr_flat_dict, base_flat_dict, iProc, return_dict):
125  # Prepare output dictionary
126  comparisons = {'pr': [], 'base': []}
127 
128  # Collect paths that have to be written to both output files
129  for path in shared_paths:
130  pr_item = pr_flat_dict[path]
131  base_item = base_flat_dict[path]
132 
133  if pr_item == None or base_item == None:
134  continue
135 
136  are_different=False
137 
138  if pr_item.InheritsFrom('TProfile2D') and base_item.InheritsFrom('TProfile2D'):
139  # Compare TProfile (content, entries and errors)
140  are_different = not compare_TProfile(pr_item, base_item)
141 
142  elif pr_item.InheritsFrom('TProfile') and base_item.InheritsFrom('TProfile'):
143  # Compare TProfile (content, entries and errors)
144  are_different = not compare_TProfile(pr_item, base_item)
145 
146  elif pr_item.InheritsFrom('TH1') and base_item.InheritsFrom('TH1'):
147  # Compare bin by bin
148  pr_array = np.array(pr_item)
149  base_array = np.array(base_item)
150 
151  if pr_array.shape != base_array.shape or not np.allclose(pr_array, base_array, equal_nan=True):
152  are_different = True
153  else:
154  # Compare non histograms
155  if pr_item != base_item:
156  are_different = True
157 
158  if are_different:
159  comparisons['pr'].append(path)
160  comparisons['base'].append(path)
161  return_dict[iProc] = comparisons
boost::dynamic_bitset append(const boost::dynamic_bitset<> &bs1, const boost::dynamic_bitset<> &bs2)
this method takes two bitsets bs1 and bs2 and returns result of bs2 appended to the end of bs1 ...
def compareHistograms.create_dif (   base_file_path,
  pr_file_path,
  pr_number,
  test_number,
  cmssw_version,
  num_processes,
  output_dir_path 
)

Definition at line 13 of file compareHistograms.py.

References flatten_file(), get_output_filename(), get_run_nr(), reco::helper::VirtualJetProducerHelper.intersection(), join(), print(), sistrip::SpyUtilities.range(), and save_paths().

13 
14 def create_dif(base_file_path, pr_file_path, pr_number, test_number, cmssw_version, num_processes, output_dir_path):
15  base_file = ROOT.TFile(base_file_path, 'read')
16  ROOT.gROOT.GetListOfFiles().Remove(base_file)
17 
18  pr_file = ROOT.TFile(pr_file_path, 'read')
19  ROOT.gROOT.GetListOfFiles().Remove(pr_file)
20 
21  if base_file.IsOpen():
22  print('Baseline file successfully opened', file=sys.stderr)
23  else:
24  print('Unable to open base file', file=sys.stderr)
25  return
26 
27  if pr_file.IsOpen():
28  print('PR file successfully opened', file=sys.stderr)
29  else:
30  print('Unable to open PR file', file=sys.stderr)
31  return
32 
33  run_nr = get_run_nr(pr_file_path)
34 
35  # Get list of paths (lists of directories)
36  base_flat_dict = flatten_file(base_file, run_nr)
37  pr_flat_dict = flatten_file(pr_file, run_nr)
38 
39  # Paths that appear in both baseline and PR data. (Intersection)
40  shared_paths = list(set(pr_flat_dict).intersection(set(base_flat_dict)))
41 
42  # Paths that appear only in PR data. (Except)
43  only_pr_paths = list(set(pr_flat_dict).difference(set(base_flat_dict)))
44 
45  # Paths that appear only in baseline data. (Except)
46  only_base_paths = list(set(base_flat_dict).difference(set(pr_flat_dict)))
47 
48  # Histograms pointed to by these paths will be written to baseline output
49  paths_to_save_in_base = []
50 
51  # Histograms pointed to by these paths will be written to pr output
52  paths_to_save_in_pr = []
53 
54  # Make comparison
55  if num_processes > 1:
56  print("starting comparison using %d process(es)" % num_processes)
57  manager = multiprocessing.Manager()
58  return_dict = manager.dict()
59  proc = []
60  iProc = 0
61 
62  block = len(shared_paths)//num_processes
63  for i in range(num_processes):
64  p = multiprocessing.Process(target=compareMP, args=(shared_paths[i*block:(i+1)*block], pr_flat_dict, base_flat_dict, i, return_dict))
65  proc.append(p)
66  p.start()
67  iProc += 1
68  p = multiprocessing.Process(target=compareMP, args=(shared_paths[(i+1)*block:len(shared_paths)], pr_flat_dict, base_flat_dict, num_processes, return_dict))
69  proc.append(p)
70  p.start()
71  iProc += 1
72 
73  for i in range(iProc):
74  proc[i].join()
75  paths_to_save_in_pr.extend(return_dict[i]['pr'])
76  paths_to_save_in_base.extend(return_dict[i]['base'])
77 
78  paths_to_save_in_pr.sort()
79  paths_to_save_in_base.sort()
80  print("Done")
81  else:
82  compare(shared_paths, pr_flat_dict, base_flat_dict, paths_to_save_in_pr, paths_to_save_in_base)
83 
84  # Collect paths that have to be written to baseline output file
85  for path in only_base_paths:
86  item = base_flat_dict[path]
87 
88  if item == None:
89  continue
90 
91  paths_to_save_in_base.append(path)
92 
93  # Collect paths that have to be written to PR output file
94  for path in only_pr_paths:
95  item = pr_flat_dict[path]
96 
97  if item == None:
98  continue
99 
100  paths_to_save_in_pr.append(path)
101 
102  base_output_filename = get_output_filename(pr_file_path, pr_number, test_number, cmssw_version, False)
103  pr_output_filename = get_output_filename(pr_file_path, pr_number, test_number, cmssw_version, True)
104 
105  # Write baseline output
106  save_paths(base_flat_dict, paths_to_save_in_base, os.path.join(output_dir_path, 'base', base_output_filename))
107 
108  # Write PR output
109  save_paths(pr_flat_dict, paths_to_save_in_pr, os.path.join(output_dir_path, 'pr', pr_output_filename))
110 
111  pr_file.Close()
112  base_file.Close()
113 
114  # Info about changed, added and removed elements
115  nr_of_changed_elements = len(set(paths_to_save_in_base).intersection(set(paths_to_save_in_pr)))
116  nr_of_removed_elements = len(paths_to_save_in_base) - nr_of_changed_elements
117  nr_of_added_elements = len(paths_to_save_in_pr) - nr_of_changed_elements
118 
119  print('Base output file. PR output file. Changed elements, removed elements, added elements:')
120  print(base_output_filename)
121  print(pr_output_filename)
122  print('%s %s %s' % (nr_of_changed_elements, nr_of_removed_elements, nr_of_added_elements))
const uint16_t range(const Frame &aFrame)
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47
static std::string join(char **cmd)
Definition: RemoteFile.cc:19
def compareHistograms.create_dir (   parent_dir,
  name 
)

Definition at line 300 of file compareHistograms.py.

Referenced by save_to_file().

301 def create_dir(parent_dir, name):
302  dir = parent_dir.Get(name)
303  if not dir:
304  dir = parent_dir.mkdir(name)
305  return dir
def compareHistograms.flatten_file (   file,
  run_nr 
)

Definition at line 223 of file compareHistograms.py.

References traverse_till_end().

Referenced by create_dif().

224 def flatten_file(file, run_nr):
225  result = {}
226  for key in file.GetListOfKeys():
227  try:
228  traverse_till_end(key.ReadObj(), [], result, run_nr)
229  except:
230  pass
231 
232  return result
def compareHistograms.get_node_name (   node)

Definition at line 243 of file compareHistograms.py.

References get_string_suffix(), and submitPVValidationJobs.split().

Referenced by traverse_till_end().

244 def get_node_name(node):
245  if node.InheritsFrom('TObjString'):
246  # Strip out just the name from a tag (<name>value</name>)
247  name = node.GetName().split('>')[0][1:]
248  return name + get_string_suffix()
249  else:
250  return node.GetName()
def compareHistograms.get_output_filename (   input_file_path,
  pr_number,
  test_number,
  cmssw_version,
  isPr 
)

Definition at line 306 of file compareHistograms.py.

References python.rootplot.root2matplotlib.replace(), and submitPVValidationJobs.split().

Referenced by create_dif().

307 def get_output_filename(input_file_path, pr_number, test_number, cmssw_version, isPr):
308  # Samples of correct output file format:
309  # DQM_V0001_R000320822__wf136_892_pr__CMSSW_10_4_0_pre3-PR25518-1234__DQMIO.root
310  # When run number is 1 we have to use RelVal naming pattern:
311  # DQM_V0002_R000000001__RelVal_wf136_892_pr__CMSSW_10_4_0_pre3-PR25518-1234__DQMIO.root
312 
313  input_file_name = os.path.basename(input_file_path)
314 
315  run = input_file_name.split('_')[2]
316  workflow = os.path.basename(os.path.dirname(input_file_path)).split('_')[0].replace('.', '_')
317  if not workflow:
318  workflow = 'Unknown'
319 
320  relval_prefix = ''
321  if run == 'R000000001':
322  relval_prefix = 'RelVal_'
323 
324  baseOrPr = 'base'
325  if isPr:
326  baseOrPr = 'pr'
327 
328  return 'DQM_V0001_%s__%swf%s_%s__%s-PR%s-%s__DQMIO.root' % (run, relval_prefix, workflow, baseOrPr, cmssw_version, pr_number, test_number)
def compareHistograms.get_run_nr (   file_path)

Definition at line 329 of file compareHistograms.py.

References submitPVValidationJobs.split().

Referenced by create_dif().

330 def get_run_nr(file_path):
331  return os.path.basename(file_path).split('_')[2].lstrip('R').lstrip('0')
def compareHistograms.get_string_suffix ( )

Definition at line 251 of file compareHistograms.py.

Referenced by get_node_name(), and is_blacklisted().

252 def get_string_suffix():
253  return '_string_monitor_element'
def compareHistograms.is_blacklisted (   dirs_list,
  run_nr 
)

Definition at line 254 of file compareHistograms.py.

References blacklist.get_blacklist(), get_string_suffix(), and python.rootplot.root2matplotlib.replace().

Referenced by traverse_till_end().

255 def is_blacklisted(dirs_list, run_nr):
256  # Copy the list
257  dirs_list = dirs_list[:]
258  # Remove string suffix
259  if dirs_list[-1].endswith(get_string_suffix()):
260  dirs_list[-1] = dirs_list[-1].replace(get_string_suffix(), '')
261 
262  return tuple(dirs_list) in get_blacklist(run_nr)
def get_blacklist
Definition: blacklist.py:2
def compareHistograms.save_paths (   flat_dict,
  paths,
  result_file_path 
)

Definition at line 263 of file compareHistograms.py.

References print(), and save_to_file().

Referenced by create_dif().

264 def save_paths(flat_dict, paths, result_file_path):
265  if len(paths) == 0:
266  print('No differences were observed - output will not be written', file=sys.stderr)
267  return
268 
269  # Make sure output dir exists
270  result_dir = os.path.dirname(result_file_path)
271  if not os.path.exists(result_dir):
272  os.makedirs(result_dir)
273 
274  result_file = ROOT.TFile(result_file_path, 'recreate')
275  ROOT.gROOT.GetListOfFiles().Remove(result_file)
276 
277  if not result_file.IsOpen():
278  print('Unable to open %s output file' % result_file_path, file=sys.stderr)
279  return
280 
281  for path in paths:
282  save_to_file(flat_dict, path, result_file)
283 
284  result_file.Close()
285  print('Output written to %s file' % result_file_path, file=sys.stderr)
286 
# Saves file from flat_dict in the same dir of currently open file for writing
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47
def compareHistograms.save_to_file (   flat_dict,
  path,
  output_file 
)

Definition at line 287 of file compareHistograms.py.

References create_dir().

Referenced by save_paths().

288 def save_to_file(flat_dict, path, output_file):
289  histogram = flat_dict[path]
290 
291  current = output_file
292 
293  # Last item is filename. No need to create dir for it
294  for directory in path[:-1]:
295  current = create_dir(current, directory)
296  current.cd()
297 
298  histogram.Write()
299 
# Create dir in root file if it doesn't exist
def compareHistograms.traverse_till_end (   node,
  dirs_list,
  result,
  run_nr 
)

Definition at line 233 of file compareHistograms.py.

References get_node_name(), and is_blacklisted().

Referenced by flatten_file().

234 def traverse_till_end(node, dirs_list, result, run_nr):
235  new_dir_list = dirs_list + [get_node_name(node)]
236  if hasattr(node, 'GetListOfKeys'):
237  for key in node.GetListOfKeys():
238  traverse_till_end(key.ReadObj(), new_dir_list, result, run_nr)
239  else:
240  if not is_blacklisted(new_dir_list, run_nr):
241  path = tuple(new_dir_list)
242  result[path] = node

Variable Documentation

tuple compareHistograms.args = parser.parse_args()

Definition at line 342 of file compareHistograms.py.

string compareHistograms.cmssw_version = '_'

Definition at line 344 of file compareHistograms.py.

tuple compareHistograms.parser
Initial value:
1 = argparse.ArgumentParser(description="This tool compares DQM monitor elements found in base-file with the ones found in pr-file."
2  "Comparison is done bin by bin and output is written to a root file containing only the changes.")

Definition at line 333 of file compareHistograms.py.