CMS 3D CMS Logo

Functions
compareHistograms Namespace Reference

Functions

def compare (shared_paths, pr_flat_dict, base_flat_dict, paths_to_save_in_pr, paths_to_save_in_base)
 
def compare_TProfile (pr_item, base_item)
 
def create_dif (base_file_path, pr_file_path, pr_number, test_number, cmssw_version, output_dir_path)
 
def create_dir (parent_dir, name)
 
def flatten_file (file, run_nr)
 
def get_node_name (node)
 
def get_output_filename (input_file_path, pr_number, test_number, cmssw_version, isPr)
 
def get_run_nr (file_path)
 
def save_paths (flat_dict, paths, result_file_path)
 
def save_to_file (flat_dict, path, output_file)
 
def traverse_till_end (node, dirs_list, result, run_nr)
 

Function Documentation

def compareHistograms.compare (   shared_paths,
  pr_flat_dict,
  base_flat_dict,
  paths_to_save_in_pr,
  paths_to_save_in_base 
)

Definition at line 96 of file compareHistograms.py.

References compare_TProfile().

96 def compare(shared_paths, pr_flat_dict, base_flat_dict, paths_to_save_in_pr, paths_to_save_in_base):
97  # Collect paths that have to be written to both output files
98  for path in shared_paths:
99  pr_item = pr_flat_dict[path]
100  base_item = base_flat_dict[path]
101 
102  if pr_item == None or base_item == None:
103  continue
104 
105  are_different=False
106 
107  if pr_item.InheritsFrom('TProfile2D') and base_item.InheritsFrom('TProfile2D'):
108  # Compare TProfile (content, entries and errors)
109  are_different = not compare_TProfile(pr_item, base_item)
110 
111  elif pr_item.InheritsFrom('TProfile') and base_item.InheritsFrom('TProfile'):
112  # Compare TProfile (content, entries and errors)
113  are_different = not compare_TProfile(pr_item, base_item)
114 
115  elif pr_item.InheritsFrom('TH1') and base_item.InheritsFrom('TH1'):
116  # Compare bin by bin
117  pr_array = root_numpy.hist2array(hist=pr_item, include_overflow=True, copy=False)
118  base_array = root_numpy.hist2array(hist=base_item, include_overflow=True, copy=False)
119 
120  if pr_array.shape != base_array.shape or not np.allclose(pr_array, base_array, equal_nan=True):
121  are_different = True
122  else:
123  # Compare non histograms
124  if pr_item != base_item:
125  are_different = True
126 
127  if are_different:
128  paths_to_save_in_pr.append(path)
129  paths_to_save_in_base.append(path)
130 
131 # Returns False if different, True otherwise
def compare_TProfile(pr_item, base_item)
def compare(shared_paths, pr_flat_dict, base_flat_dict, paths_to_save_in_pr, paths_to_save_in_base)
def compareHistograms.compare_TProfile (   pr_item,
  base_item 
)

Definition at line 132 of file compareHistograms.py.

Referenced by compare().

132 def compare_TProfile(pr_item, base_item):
133  if pr_item.GetSize() != base_item.GetSize():
134  return False
135 
136  for i in range(pr_item.GetSize()):
137  pr_bin_content = pr_item.GetBinContent(i)
138  base_bin_content = base_item.GetBinContent(i)
139 
140  pr_bin_entries = pr_item.GetBinEntries(i)
141  base_bin_entries = base_item.GetBinEntries(i)
142 
143  pr_bin_error = pr_item.GetBinError(i)
144  base_bin_error = base_item.GetBinError(i)
145 
146  if not np.isclose(pr_bin_content, base_bin_content, equal_nan=True):
147  return False
148 
149  if not np.isclose(pr_bin_entries, base_bin_entries, equal_nan=True):
150  return False
151 
152  if not np.isclose(pr_bin_error, base_bin_error, equal_nan=True):
153  return False
154 
155  return True
156 
def compare_TProfile(pr_item, base_item)
def compareHistograms.create_dif (   base_file_path,
  pr_file_path,
  pr_number,
  test_number,
  cmssw_version,
  output_dir_path 
)

Definition at line 13 of file compareHistograms.py.

References flatten_file(), get_output_filename(), get_run_nr(), reco::helper::VirtualJetProducerHelper.intersection(), list(), edm.print(), and save_paths().

Referenced by get_run_nr().

13 def create_dif(base_file_path, pr_file_path, pr_number, test_number, cmssw_version, output_dir_path):
14  base_file = ROOT.TFile(base_file_path, 'read')
15  ROOT.gROOT.GetListOfFiles().Remove(base_file)
16 
17  pr_file = ROOT.TFile(pr_file_path, 'read')
18  ROOT.gROOT.GetListOfFiles().Remove(pr_file)
19 
20  if base_file.IsOpen():
21  print('Baseline file successfully opened', file=sys.stderr)
22  else:
23  print('Unable to open base file', file=sys.stderr)
24  return
25 
26  if pr_file.IsOpen():
27  print('PR file successfully opened', file=sys.stderr)
28  else:
29  print('Unable to open PR file', file=sys.stderr)
30  return
31 
32  run_nr = get_run_nr(pr_file_path)
33 
34  # Get list of paths (lists of directories)
35  base_flat_dict = flatten_file(base_file, run_nr)
36  pr_flat_dict = flatten_file(pr_file, run_nr)
37 
38  # Paths that appear in both baseline and PR data. (Intersection)
39  shared_paths = list(set(pr_flat_dict).intersection(set(base_flat_dict)))
40 
41  # Paths that appear only in PR data. (Except)
42  only_pr_paths = list(set(pr_flat_dict).difference(set(base_flat_dict)))
43 
44  # Paths that appear only in baseline data. (Except)
45  only_base_paths = list(set(base_flat_dict).difference(set(pr_flat_dict)))
46 
47  # Histograms pointed to by these paths will be written to baseline output
48  paths_to_save_in_base = []
49 
50  # Histograms pointed to by these paths will be written to pr output
51  paths_to_save_in_pr = []
52 
53  # Make comparison
54  compare(shared_paths, pr_flat_dict, base_flat_dict, paths_to_save_in_pr, paths_to_save_in_base)
55 
56  # Collect paths that have to be written to baseline output file
57  for path in only_base_paths:
58  item = base_flat_dict[path]
59 
60  if item == None:
61  continue
62 
63  paths_to_save_in_base.append(path)
64 
65  # Collect paths that have to be written to PR output file
66  for path in only_pr_paths:
67  item = pr_flat_dict[path]
68 
69  if item == None:
70  continue
71 
72  paths_to_save_in_pr.append(path)
73 
74  base_output_filename = get_output_filename(pr_file_path, pr_number, test_number, cmssw_version, False)
75  pr_output_filename = get_output_filename(pr_file_path, pr_number, test_number, cmssw_version, True)
76 
77  # Write baseline output
78  save_paths(base_flat_dict, paths_to_save_in_base, os.path.join(output_dir_path, 'base', base_output_filename))
79 
80  # Write PR output
81  save_paths(pr_flat_dict, paths_to_save_in_pr, os.path.join(output_dir_path, 'pr', pr_output_filename))
82 
83  pr_file.Close()
84  base_file.Close()
85 
86  # Info about changed, added and removed elements
87  nr_of_changed_elements = len(set(paths_to_save_in_base).intersection(set(paths_to_save_in_pr)))
88  nr_of_removed_elements = len(paths_to_save_in_base) - nr_of_changed_elements
89  nr_of_added_elements = len(paths_to_save_in_pr) - nr_of_changed_elements
90 
91  print('Base output file. PR output file. Changed elements, removed elements, added elements:')
92  print(base_output_filename)
93  print(pr_output_filename)
94  print('%s %s %s' % (nr_of_changed_elements, nr_of_removed_elements, nr_of_added_elements))
95 
def get_output_filename(input_file_path, pr_number, test_number, cmssw_version, isPr)
def flatten_file(file, run_nr)
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
def create_dif(base_file_path, pr_file_path, pr_number, test_number, cmssw_version, output_dir_path)
def save_paths(flat_dict, paths, result_file_path)
def get_run_nr(file_path)
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run
def compareHistograms.create_dir (   parent_dir,
  name 
)

Definition at line 221 of file compareHistograms.py.

Referenced by save_to_file().

221 def create_dir(parent_dir, name):
222  dir = parent_dir.Get(name)
223  if not dir:
224  dir = parent_dir.mkdir(name)
225  return dir
226 
def create_dir(parent_dir, name)
def compareHistograms.flatten_file (   file,
  run_nr 
)

Definition at line 157 of file compareHistograms.py.

References traverse_till_end().

Referenced by create_dif().

157 def flatten_file(file, run_nr):
158  result = {}
159  for key in file.GetListOfKeys():
160  try:
161  traverse_till_end(key.ReadObj(), [], result, run_nr)
162  except:
163  pass
164 
165  return result
166 
def flatten_file(file, run_nr)
def traverse_till_end(node, dirs_list, result, run_nr)
def compareHistograms.get_node_name (   node)

Definition at line 177 of file compareHistograms.py.

References split.

Referenced by traverse_till_end().

177 def get_node_name(node):
178  if node.InheritsFrom('TObjString'):
179  # Strip out just the name from a tag (<name>value</name>)
180  return node.GetName().split('>')[0][1:]
181  else:
182  return node.GetName()
183 
double split
Definition: MVATrainer.cc:139
def compareHistograms.get_output_filename (   input_file_path,
  pr_number,
  test_number,
  cmssw_version,
  isPr 
)

Definition at line 227 of file compareHistograms.py.

References python.rootplot.root2matplotlib.replace(), and split.

Referenced by create_dif().

227 def get_output_filename(input_file_path, pr_number, test_number, cmssw_version, isPr):
228  # Samples of correct output file format:
229  # DQM_V0001_R000320822__wf136_892_pr__CMSSW_10_4_0_pre3-PR25518-1234__DQMIO.root
230  # When run number is 1 we have to use RelVal naming pattern:
231  # DQM_V0002_R000000001__RelVal_wf136_892_pr__CMSSW_10_4_0_pre3-PR25518-1234__DQMIO.root
232 
233  input_file_name = os.path.basename(input_file_path)
234 
235  run = input_file_name.split('_')[2]
236  workflow = os.path.basename(os.path.dirname(input_file_path)).split('_')[0].replace('.', '_')
237  if not workflow:
238  workflow = 'Unknown'
239 
240  relval_prefix = ''
241  if run == 'R000000001':
242  relval_prefix = 'RelVal_'
243 
244  baseOrPr = 'base'
245  if isPr:
246  baseOrPr = 'pr'
247 
248  return 'DQM_V0001_%s__%swf%s_%s__%s-PR%s-%s__DQMIO.root' % (run, relval_prefix, workflow, baseOrPr, cmssw_version, pr_number, test_number)
249 
def get_output_filename(input_file_path, pr_number, test_number, cmssw_version, isPr)
def replace(string, replacements)
double split
Definition: MVATrainer.cc:139
def compareHistograms.get_run_nr (   file_path)

Definition at line 250 of file compareHistograms.py.

References create_dif(), join(), and split.

Referenced by create_dif().

250 def get_run_nr(file_path):
251  return os.path.basename(file_path).split('_')[2].lstrip('R').lstrip('0')
252 
double split
Definition: MVATrainer.cc:139
def get_run_nr(file_path)
def compareHistograms.save_paths (   flat_dict,
  paths,
  result_file_path 
)

Definition at line 184 of file compareHistograms.py.

References edm.print(), and save_to_file().

Referenced by create_dif().

184 def save_paths(flat_dict, paths, result_file_path):
185  if len(paths) == 0:
186  print('No differences were observed - output will not be written', file=sys.stderr)
187  return
188 
189  # Make sure output dir exists
190  result_dir = os.path.dirname(result_file_path)
191  if not os.path.exists(result_dir):
192  os.makedirs(result_dir)
193 
194  result_file = ROOT.TFile(result_file_path, 'recreate')
195  ROOT.gROOT.GetListOfFiles().Remove(result_file)
196 
197  if not result_file.IsOpen():
198  print('Unable to open %s output file' % result_file_path, file=sys.stderr)
199  return
200 
201  for path in paths:
202  save_to_file(flat_dict, path, result_file)
203 
204  result_file.Close()
205  print('Output written to %s file' % result_file_path, file=sys.stderr)
206 
207 # Saves file from flat_dict in the same dir of currently open file for writing
def save_to_file(flat_dict, path, output_file)
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
def save_paths(flat_dict, paths, result_file_path)
def compareHistograms.save_to_file (   flat_dict,
  path,
  output_file 
)

Definition at line 208 of file compareHistograms.py.

References create_dir().

Referenced by save_paths().

208 def save_to_file(flat_dict, path, output_file):
209  histogram = flat_dict[path]
210 
211  current = output_file
212 
213  # Last item is filename. No need to create dir for it
214  for directory in path[:-1]:
215  current = create_dir(current, directory)
216  current.cd()
217 
218  histogram.Write()
219 
220 # Create dir in root file if it doesn't exist
def save_to_file(flat_dict, path, output_file)
def create_dir(parent_dir, name)
def compareHistograms.traverse_till_end (   node,
  dirs_list,
  result,
  run_nr 
)

Definition at line 167 of file compareHistograms.py.

References blacklist.get_blacklist(), and get_node_name().

Referenced by flatten_file().

167 def traverse_till_end(node, dirs_list, result, run_nr):
168  new_dir_list = dirs_list + [get_node_name(node)]
169  if hasattr(node, 'GetListOfKeys'):
170  for key in node.GetListOfKeys():
171  traverse_till_end(key.ReadObj(), new_dir_list, result, run_nr)
172  else:
173  path = tuple(new_dir_list)
174  if path not in get_blacklist(run_nr):
175  result[path] = node
176 
def get_blacklist(RUN_NR)
Definition: blacklist.py:2
def traverse_till_end(node, dirs_list, result, run_nr)