CMS 3D CMS Logo

Functions | Variables
compareHistograms Namespace Reference

Functions

def compare (shared_paths, pr_flat_dict, base_flat_dict, paths_to_save_in_pr, paths_to_save_in_base)
 
def compare_TProfile (pr_item, base_item)
 
def compareMP (shared_paths, pr_flat_dict, base_flat_dict, iProc, return_dict)
 
def create_dif (base_file_path, pr_file_path, pr_number, test_number, cmssw_version, num_processes, output_dir_path)
 
def create_dir (parent_dir, name)
 
def flatten_file (file, run_nr)
 
def get_node_name (node)
 
def get_output_filename (input_file_path, pr_number, test_number, cmssw_version, isPr)
 
def get_run_nr (file_path)
 
def get_string_suffix ()
 
def is_blacklisted (dirs_list, run_nr)
 
def save_paths (flat_dict, paths, result_file_path)
 
def save_to_file (flat_dict, path, output_file)
 
def traverse_till_end (node, dirs_list, result, run_nr)
 

Variables

 args
 
 cmssw_version
 
 default
 
 description
 
 help
 
 IgnoreCommandLineOptions
 
 parser
 
 required
 
 type
 

Function Documentation

◆ compare()

def compareHistograms.compare (   shared_paths,
  pr_flat_dict,
  base_flat_dict,
  paths_to_save_in_pr,
  paths_to_save_in_base 
)

Definition at line 162 of file compareHistograms.py.

References compare_TProfile().

162 def compare(shared_paths, pr_flat_dict, base_flat_dict, paths_to_save_in_pr, paths_to_save_in_base):
163  # Collect paths that have to be written to both output files
164  for path in shared_paths:
165  pr_item = pr_flat_dict[path]
166  base_item = base_flat_dict[path]
167 
168  if pr_item == None or base_item == None:
169  continue
170 
171  are_different=False
172 
173  if pr_item.InheritsFrom('TProfile2D') and base_item.InheritsFrom('TProfile2D'):
174  # Compare TProfile (content, entries and errors)
175  are_different = not compare_TProfile(pr_item, base_item)
176 
177  elif pr_item.InheritsFrom('TProfile') and base_item.InheritsFrom('TProfile'):
178  # Compare TProfile (content, entries and errors)
179  are_different = not compare_TProfile(pr_item, base_item)
180 
181  elif pr_item.InheritsFrom('TH1') and base_item.InheritsFrom('TH1'):
182  # Compare bin by bin
183  pr_array = np.array(pr_item)
184  base_array = np.array(base_item)
185 
186  if pr_array.shape != base_array.shape or not np.allclose(pr_array, base_array, equal_nan=True):
187  are_different = True
188  else:
189  # Compare non histograms
190  if pr_item != base_item:
191  are_different = True
192 
193  if are_different:
194  paths_to_save_in_pr.append(path)
195  paths_to_save_in_base.append(path)
196 
197 # Returns False if different, True otherwise
bool compare(const P &i, const P &j)
def compare_TProfile(pr_item, base_item)

◆ compare_TProfile()

def compareHistograms.compare_TProfile (   pr_item,
  base_item 
)

Definition at line 198 of file compareHistograms.py.

References FastTimerService_cff.range.

Referenced by compare(), and compareMP().

198 def compare_TProfile(pr_item, base_item):
199  if pr_item.GetSize() != base_item.GetSize():
200  return False
201 
202  for i in range(pr_item.GetSize()):
203  pr_bin_content = pr_item.GetBinContent(i)
204  base_bin_content = base_item.GetBinContent(i)
205 
206  pr_bin_entries = pr_item.GetBinEntries(i)
207  base_bin_entries = base_item.GetBinEntries(i)
208 
209  pr_bin_error = pr_item.GetBinError(i)
210  base_bin_error = base_item.GetBinError(i)
211 
212  if not np.isclose(pr_bin_content, base_bin_content, equal_nan=True):
213  return False
214 
215  if not np.isclose(pr_bin_entries, base_bin_entries, equal_nan=True):
216  return False
217 
218  if not np.isclose(pr_bin_error, base_bin_error, equal_nan=True):
219  return False
220 
221  return True
222 
def compare_TProfile(pr_item, base_item)

◆ compareMP()

def compareHistograms.compareMP (   shared_paths,
  pr_flat_dict,
  base_flat_dict,
  iProc,
  return_dict 
)

Definition at line 123 of file compareHistograms.py.

References mps_setup.append, and compare_TProfile().

123 def compareMP(shared_paths, pr_flat_dict, base_flat_dict, iProc, return_dict):
124  # Prepare output dictionary
125  comparisons = {'pr': [], 'base': []}
126 
127  # Collect paths that have to be written to both output files
128  for path in shared_paths:
129  pr_item = pr_flat_dict[path]
130  base_item = base_flat_dict[path]
131 
132  if pr_item == None or base_item == None:
133  continue
134 
135  are_different=False
136 
137  if pr_item.InheritsFrom('TProfile2D') and base_item.InheritsFrom('TProfile2D'):
138  # Compare TProfile (content, entries and errors)
139  are_different = not compare_TProfile(pr_item, base_item)
140 
141  elif pr_item.InheritsFrom('TProfile') and base_item.InheritsFrom('TProfile'):
142  # Compare TProfile (content, entries and errors)
143  are_different = not compare_TProfile(pr_item, base_item)
144 
145  elif pr_item.InheritsFrom('TH1') and base_item.InheritsFrom('TH1'):
146  # Compare bin by bin
147  pr_array = np.array(pr_item)
148  base_array = np.array(base_item)
149 
150  if pr_array.shape != base_array.shape or not np.allclose(pr_array, base_array, equal_nan=True):
151  are_different = True
152  else:
153  # Compare non histograms
154  if pr_item != base_item:
155  are_different = True
156 
157  if are_different:
158  comparisons['pr'].append(path)
159  comparisons['base'].append(path)
160  return_dict[iProc] = comparisons
161 
def compare_TProfile(pr_item, base_item)
def compareMP(shared_paths, pr_flat_dict, base_flat_dict, iProc, return_dict)

◆ create_dif()

def compareHistograms.create_dif (   base_file_path,
  pr_file_path,
  pr_number,
  test_number,
  cmssw_version,
  num_processes,
  output_dir_path 
)

Definition at line 13 of file compareHistograms.py.

References flatten_file(), get_output_filename(), get_run_nr(), reco::helper::VirtualJetProducerHelper.intersection(), join(), print(), FastTimerService_cff.range, and save_paths().

13 def create_dif(base_file_path, pr_file_path, pr_number, test_number, cmssw_version, num_processes, output_dir_path):
14  base_file = ROOT.TFile(base_file_path, 'read')
15  ROOT.gROOT.GetListOfFiles().Remove(base_file)
16 
17  pr_file = ROOT.TFile(pr_file_path, 'read')
18  ROOT.gROOT.GetListOfFiles().Remove(pr_file)
19 
20  if base_file.IsOpen():
21  print('Baseline file successfully opened', file=sys.stderr)
22  else:
23  print('Unable to open base file', file=sys.stderr)
24  return
25 
26  if pr_file.IsOpen():
27  print('PR file successfully opened', file=sys.stderr)
28  else:
29  print('Unable to open PR file', file=sys.stderr)
30  return
31 
32  run_nr = get_run_nr(pr_file_path)
33 
34  # Get list of paths (lists of directories)
35  base_flat_dict = flatten_file(base_file, run_nr)
36  pr_flat_dict = flatten_file(pr_file, run_nr)
37 
38  # Paths that appear in both baseline and PR data. (Intersection)
39  shared_paths = list(set(pr_flat_dict).intersection(set(base_flat_dict)))
40 
41  # Paths that appear only in PR data. (Except)
42  only_pr_paths = list(set(pr_flat_dict).difference(set(base_flat_dict)))
43 
44  # Paths that appear only in baseline data. (Except)
45  only_base_paths = list(set(base_flat_dict).difference(set(pr_flat_dict)))
46 
47  # Histograms pointed to by these paths will be written to baseline output
48  paths_to_save_in_base = []
49 
50  # Histograms pointed to by these paths will be written to pr output
51  paths_to_save_in_pr = []
52 
53  # Make comparison
54  if num_processes > 1:
55  print("starting comparison using %d process(es)" % num_processes)
56  manager = multiprocessing.Manager()
57  return_dict = manager.dict()
58  proc = []
59  iProc = 0
60 
61  block = len(shared_paths)//num_processes
62  for i in range(num_processes):
63  p = multiprocessing.Process(target=compareMP, args=(shared_paths[i*block:(i+1)*block], pr_flat_dict, base_flat_dict, i, return_dict))
64  proc.append(p)
65  p.start()
66  iProc += 1
67  p = multiprocessing.Process(target=compareMP, args=(shared_paths[(i+1)*block:len(shared_paths)], pr_flat_dict, base_flat_dict, num_processes, return_dict))
68  proc.append(p)
69  p.start()
70  iProc += 1
71 
72  for i in range(iProc):
73  proc[i].join()
74  paths_to_save_in_pr.extend(return_dict[i]['pr'])
75  paths_to_save_in_base.extend(return_dict[i]['base'])
76 
77  paths_to_save_in_pr.sort()
78  paths_to_save_in_base.sort()
79  print("Done")
80  else:
81  compare(shared_paths, pr_flat_dict, base_flat_dict, paths_to_save_in_pr, paths_to_save_in_base)
82 
83  # Collect paths that have to be written to baseline output file
84  for path in only_base_paths:
85  item = base_flat_dict[path]
86 
87  if item == None:
88  continue
89 
90  paths_to_save_in_base.append(path)
91 
92  # Collect paths that have to be written to PR output file
93  for path in only_pr_paths:
94  item = pr_flat_dict[path]
95 
96  if item == None:
97  continue
98 
99  paths_to_save_in_pr.append(path)
100 
101  base_output_filename = get_output_filename(pr_file_path, pr_number, test_number, cmssw_version, False)
102  pr_output_filename = get_output_filename(pr_file_path, pr_number, test_number, cmssw_version, True)
103 
104  # Write baseline output
105  save_paths(base_flat_dict, paths_to_save_in_base, os.path.join(output_dir_path, 'base', base_output_filename))
106 
107  # Write PR output
108  save_paths(pr_flat_dict, paths_to_save_in_pr, os.path.join(output_dir_path, 'pr', pr_output_filename))
109 
110  pr_file.Close()
111  base_file.Close()
112 
113  # Info about changed, added and removed elements
114  nr_of_changed_elements = len(set(paths_to_save_in_base).intersection(set(paths_to_save_in_pr)))
115  nr_of_removed_elements = len(paths_to_save_in_base) - nr_of_changed_elements
116  nr_of_added_elements = len(paths_to_save_in_pr) - nr_of_changed_elements
117 
118  print('Base output file. PR output file. Changed elements, removed elements, added elements:')
119  print(base_output_filename)
120  print(pr_output_filename)
121  print('%s %s %s' % (nr_of_changed_elements, nr_of_removed_elements, nr_of_added_elements))
122 
def get_output_filename(input_file_path, pr_number, test_number, cmssw_version, isPr)
def flatten_file(file, run_nr)
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47
static std::string join(char **cmd)
Definition: RemoteFile.cc:19
def create_dif(base_file_path, pr_file_path, pr_number, test_number, cmssw_version, num_processes, output_dir_path)
def save_paths(flat_dict, paths, result_file_path)
def get_run_nr(file_path)

◆ create_dir()

def compareHistograms.create_dir (   parent_dir,
  name 
)

Definition at line 300 of file compareHistograms.py.

Referenced by save_to_file().

300 def create_dir(parent_dir, name):
301  dir = parent_dir.Get(name)
302  if not dir:
303  dir = parent_dir.mkdir(name)
304  return dir
305 
def create_dir(parent_dir, name)

◆ flatten_file()

def compareHistograms.flatten_file (   file,
  run_nr 
)

Definition at line 223 of file compareHistograms.py.

References traverse_till_end().

Referenced by create_dif().

223 def flatten_file(file, run_nr):
224  result = {}
225  for key in file.GetListOfKeys():
226  try:
227  traverse_till_end(key.ReadObj(), [], result, run_nr)
228  except:
229  pass
230 
231  return result
232 
def flatten_file(file, run_nr)
def traverse_till_end(node, dirs_list, result, run_nr)

◆ get_node_name()

def compareHistograms.get_node_name (   node)

Definition at line 243 of file compareHistograms.py.

References get_string_suffix(), and submitPVValidationJobs.split().

Referenced by traverse_till_end().

243 def get_node_name(node):
244  if node.InheritsFrom('TObjString'):
245  # Strip out just the name from a tag (<name>value</name>)
246  name = node.GetName().split('>')[0][1:]
247  return name + get_string_suffix()
248  else:
249  return node.GetName()
250 

◆ get_output_filename()

def compareHistograms.get_output_filename (   input_file_path,
  pr_number,
  test_number,
  cmssw_version,
  isPr 
)

Definition at line 306 of file compareHistograms.py.

References python.rootplot.root2matplotlib.replace(), and submitPVValidationJobs.split().

Referenced by create_dif().

306 def get_output_filename(input_file_path, pr_number, test_number, cmssw_version, isPr):
307  # Samples of correct output file format:
308  # DQM_V0001_R000320822__wf136_892_pr__CMSSW_10_4_0_pre3-PR25518-1234__DQMIO.root
309  # When run number is 1 we have to use RelVal naming pattern:
310  # DQM_V0002_R000000001__RelVal_wf136_892_pr__CMSSW_10_4_0_pre3-PR25518-1234__DQMIO.root
311 
312  input_file_name = os.path.basename(input_file_path)
313 
314  run = input_file_name.split('_')[2]
315  workflow = os.path.basename(os.path.dirname(input_file_path)).split('_')[0].replace('.', '_')
316  if not workflow:
317  workflow = 'Unknown'
318 
319  relval_prefix = ''
320  if run == 'R000000001':
321  relval_prefix = 'RelVal_'
322 
323  baseOrPr = 'base'
324  if isPr:
325  baseOrPr = 'pr'
326 
327  return 'DQM_V0001_%s__%swf%s_%s__%s-PR%s-%s__DQMIO.root' % (run, relval_prefix, workflow, baseOrPr, cmssw_version, pr_number, test_number)
328 
def get_output_filename(input_file_path, pr_number, test_number, cmssw_version, isPr)
def replace(string, replacements)

◆ get_run_nr()

def compareHistograms.get_run_nr (   file_path)

Definition at line 329 of file compareHistograms.py.

References submitPVValidationJobs.split().

Referenced by create_dif().

329 def get_run_nr(file_path):
330  return os.path.basename(file_path).split('_')[2].lstrip('R').lstrip('0')
331 
def get_run_nr(file_path)

◆ get_string_suffix()

def compareHistograms.get_string_suffix ( )

Definition at line 251 of file compareHistograms.py.

Referenced by get_node_name(), and is_blacklisted().

251 def get_string_suffix():
252  return '_string_monitor_element'
253 

◆ is_blacklisted()

def compareHistograms.is_blacklisted (   dirs_list,
  run_nr 
)

Definition at line 254 of file compareHistograms.py.

References blacklist.get_blacklist(), get_string_suffix(), and python.rootplot.root2matplotlib.replace().

Referenced by traverse_till_end().

254 def is_blacklisted(dirs_list, run_nr):
255  # Copy the list
256  dirs_list = dirs_list[:]
257  # Remove string suffix
258  if dirs_list[-1].endswith(get_string_suffix()):
259  dirs_list[-1] = dirs_list[-1].replace(get_string_suffix(), '')
260 
261  return tuple(dirs_list) in get_blacklist(run_nr)
262 
def get_blacklist(RUN_NR)
Definition: blacklist.py:2
def replace(string, replacements)
def is_blacklisted(dirs_list, run_nr)

◆ save_paths()

def compareHistograms.save_paths (   flat_dict,
  paths,
  result_file_path 
)

Definition at line 263 of file compareHistograms.py.

References print(), and save_to_file().

Referenced by create_dif().

263 def save_paths(flat_dict, paths, result_file_path):
264  if len(paths) == 0:
265  print('No differences were observed - output will not be written', file=sys.stderr)
266  return
267 
268  # Make sure output dir exists
269  result_dir = os.path.dirname(result_file_path)
270  if not os.path.exists(result_dir):
271  os.makedirs(result_dir)
272 
273  result_file = ROOT.TFile(result_file_path, 'recreate')
274  ROOT.gROOT.GetListOfFiles().Remove(result_file)
275 
276  if not result_file.IsOpen():
277  print('Unable to open %s output file' % result_file_path, file=sys.stderr)
278  return
279 
280  for path in paths:
281  save_to_file(flat_dict, path, result_file)
282 
283  result_file.Close()
284  print('Output written to %s file' % result_file_path, file=sys.stderr)
285 
286 # Saves file from flat_dict in the same dir of currently open file for writing
def save_to_file(flat_dict, path, output_file)
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47
def save_paths(flat_dict, paths, result_file_path)

◆ save_to_file()

def compareHistograms.save_to_file (   flat_dict,
  path,
  output_file 
)

Definition at line 287 of file compareHistograms.py.

References create_dir().

Referenced by save_paths().

287 def save_to_file(flat_dict, path, output_file):
288  histogram = flat_dict[path]
289 
290  current = output_file
291 
292  # Last item is filename. No need to create dir for it
293  for directory in path[:-1]:
294  current = create_dir(current, directory)
295  current.cd()
296 
297  histogram.Write()
298 
299 # Create dir in root file if it doesn't exist
def save_to_file(flat_dict, path, output_file)
def create_dir(parent_dir, name)

◆ traverse_till_end()

def compareHistograms.traverse_till_end (   node,
  dirs_list,
  result,
  run_nr 
)

Definition at line 233 of file compareHistograms.py.

References get_node_name(), and is_blacklisted().

Referenced by flatten_file().

233 def traverse_till_end(node, dirs_list, result, run_nr):
234  new_dir_list = dirs_list + [get_node_name(node)]
235  if hasattr(node, 'GetListOfKeys'):
236  for key in node.GetListOfKeys():
237  traverse_till_end(key.ReadObj(), new_dir_list, result, run_nr)
238  else:
239  if not is_blacklisted(new_dir_list, run_nr):
240  path = tuple(new_dir_list)
241  result[path] = node
242 
def traverse_till_end(node, dirs_list, result, run_nr)
def is_blacklisted(dirs_list, run_nr)

Variable Documentation

◆ args

compareHistograms.args

Definition at line 342 of file compareHistograms.py.

◆ cmssw_version

compareHistograms.cmssw_version

Definition at line 344 of file compareHistograms.py.

◆ default

compareHistograms.default

Definition at line 337 of file compareHistograms.py.

◆ description

compareHistograms.description

Definition at line 333 of file compareHistograms.py.

◆ help

compareHistograms.help

Definition at line 335 of file compareHistograms.py.

◆ IgnoreCommandLineOptions

compareHistograms.IgnoreCommandLineOptions

Definition at line 5 of file compareHistograms.py.

◆ parser

compareHistograms.parser

Definition at line 333 of file compareHistograms.py.

◆ required

compareHistograms.required

Definition at line 335 of file compareHistograms.py.

◆ type

compareHistograms.type

Definition at line 340 of file compareHistograms.py.