CMS 3D CMS Logo

Functions | Variables
compareHistograms Namespace Reference

Functions

def compare (shared_paths, pr_flat_dict, base_flat_dict, paths_to_save_in_pr, paths_to_save_in_base)
 
def compare_TProfile (pr_item, base_item)
 
def create_dif (base_file_path, pr_file_path, pr_number, test_number, cmssw_version, output_dir_path)
 
def create_dir (parent_dir, name)
 
def flatten_file (file, run_nr)
 
def get_node_name (node)
 
def get_output_filename (input_file_path, pr_number, test_number, cmssw_version, isPr)
 
def get_run_nr (file_path)
 
def get_string_suffix ()
 
def is_blacklisted (dirs_list, run_nr)
 
def save_paths (flat_dict, paths, result_file_path)
 
def save_to_file (flat_dict, path, output_file)
 
def traverse_till_end (node, dirs_list, result, run_nr)
 

Variables

 args
 
 cmssw_version
 
 default
 
 description
 
 help
 
 IgnoreCommandLineOptions
 
 parser
 
 required
 

Function Documentation

◆ compare()

def compareHistograms.compare (   shared_paths,
  pr_flat_dict,
  base_flat_dict,
  paths_to_save_in_pr,
  paths_to_save_in_base 
)

Definition at line 96 of file compareHistograms.py.

96 def compare(shared_paths, pr_flat_dict, base_flat_dict, paths_to_save_in_pr, paths_to_save_in_base):
97  # Collect paths that have to be written to both output files
98  for path in shared_paths:
99  pr_item = pr_flat_dict[path]
100  base_item = base_flat_dict[path]
101 
102  if pr_item == None or base_item == None:
103  continue
104 
105  are_different=False
106 
107  if pr_item.InheritsFrom('TProfile2D') and base_item.InheritsFrom('TProfile2D'):
108  # Compare TProfile (content, entries and errors)
109  are_different = not compare_TProfile(pr_item, base_item)
110 
111  elif pr_item.InheritsFrom('TProfile') and base_item.InheritsFrom('TProfile'):
112  # Compare TProfile (content, entries and errors)
113  are_different = not compare_TProfile(pr_item, base_item)
114 
115  elif pr_item.InheritsFrom('TH1') and base_item.InheritsFrom('TH1'):
116  # Compare bin by bin
117  pr_array = root_numpy.hist2array(hist=pr_item, include_overflow=True, copy=False)
118  base_array = root_numpy.hist2array(hist=base_item, include_overflow=True, copy=False)
119 
120  if pr_array.shape != base_array.shape or not np.allclose(pr_array, base_array, equal_nan=True):
121  are_different = True
122  else:
123  # Compare non histograms
124  if pr_item != base_item:
125  are_different = True
126 
127  if are_different:
128  paths_to_save_in_pr.append(path)
129  paths_to_save_in_base.append(path)
130 
131 # Returns False if different, True otherwise

References compare_TProfile().

◆ compare_TProfile()

def compareHistograms.compare_TProfile (   pr_item,
  base_item 
)

Definition at line 132 of file compareHistograms.py.

132 def compare_TProfile(pr_item, base_item):
133  if pr_item.GetSize() != base_item.GetSize():
134  return False
135 
136  for i in range(pr_item.GetSize()):
137  pr_bin_content = pr_item.GetBinContent(i)
138  base_bin_content = base_item.GetBinContent(i)
139 
140  pr_bin_entries = pr_item.GetBinEntries(i)
141  base_bin_entries = base_item.GetBinEntries(i)
142 
143  pr_bin_error = pr_item.GetBinError(i)
144  base_bin_error = base_item.GetBinError(i)
145 
146  if not np.isclose(pr_bin_content, base_bin_content, equal_nan=True):
147  return False
148 
149  if not np.isclose(pr_bin_entries, base_bin_entries, equal_nan=True):
150  return False
151 
152  if not np.isclose(pr_bin_error, base_bin_error, equal_nan=True):
153  return False
154 
155  return True
156 

References FastTimerService_cff.range.

Referenced by compare().

◆ create_dif()

def compareHistograms.create_dif (   base_file_path,
  pr_file_path,
  pr_number,
  test_number,
  cmssw_version,
  output_dir_path 
)

Definition at line 13 of file compareHistograms.py.

13 def create_dif(base_file_path, pr_file_path, pr_number, test_number, cmssw_version, output_dir_path):
14  base_file = ROOT.TFile(base_file_path, 'read')
15  ROOT.gROOT.GetListOfFiles().Remove(base_file)
16 
17  pr_file = ROOT.TFile(pr_file_path, 'read')
18  ROOT.gROOT.GetListOfFiles().Remove(pr_file)
19 
20  if base_file.IsOpen():
21  print('Baseline file successfully opened', file=sys.stderr)
22  else:
23  print('Unable to open base file', file=sys.stderr)
24  return
25 
26  if pr_file.IsOpen():
27  print('PR file successfully opened', file=sys.stderr)
28  else:
29  print('Unable to open PR file', file=sys.stderr)
30  return
31 
32  run_nr = get_run_nr(pr_file_path)
33 
34  # Get list of paths (lists of directories)
35  base_flat_dict = flatten_file(base_file, run_nr)
36  pr_flat_dict = flatten_file(pr_file, run_nr)
37 
38  # Paths that appear in both baseline and PR data. (Intersection)
39  shared_paths = list(set(pr_flat_dict).intersection(set(base_flat_dict)))
40 
41  # Paths that appear only in PR data. (Except)
42  only_pr_paths = list(set(pr_flat_dict).difference(set(base_flat_dict)))
43 
44  # Paths that appear only in baseline data. (Except)
45  only_base_paths = list(set(base_flat_dict).difference(set(pr_flat_dict)))
46 
47  # Histograms pointed to by these paths will be written to baseline output
48  paths_to_save_in_base = []
49 
50  # Histograms pointed to by these paths will be written to pr output
51  paths_to_save_in_pr = []
52 
53  # Make comparison
54  compare(shared_paths, pr_flat_dict, base_flat_dict, paths_to_save_in_pr, paths_to_save_in_base)
55 
56  # Collect paths that have to be written to baseline output file
57  for path in only_base_paths:
58  item = base_flat_dict[path]
59 
60  if item == None:
61  continue
62 
63  paths_to_save_in_base.append(path)
64 
65  # Collect paths that have to be written to PR output file
66  for path in only_pr_paths:
67  item = pr_flat_dict[path]
68 
69  if item == None:
70  continue
71 
72  paths_to_save_in_pr.append(path)
73 
74  base_output_filename = get_output_filename(pr_file_path, pr_number, test_number, cmssw_version, False)
75  pr_output_filename = get_output_filename(pr_file_path, pr_number, test_number, cmssw_version, True)
76 
77  # Write baseline output
78  save_paths(base_flat_dict, paths_to_save_in_base, os.path.join(output_dir_path, 'base', base_output_filename))
79 
80  # Write PR output
81  save_paths(pr_flat_dict, paths_to_save_in_pr, os.path.join(output_dir_path, 'pr', pr_output_filename))
82 
83  pr_file.Close()
84  base_file.Close()
85 
86  # Info about changed, added and removed elements
87  nr_of_changed_elements = len(set(paths_to_save_in_base).intersection(set(paths_to_save_in_pr)))
88  nr_of_removed_elements = len(paths_to_save_in_base) - nr_of_changed_elements
89  nr_of_added_elements = len(paths_to_save_in_pr) - nr_of_changed_elements
90 
91  print('Base output file. PR output file. Changed elements, removed elements, added elements:')
92  print(base_output_filename)
93  print(pr_output_filename)
94  print('%s %s %s' % (nr_of_changed_elements, nr_of_removed_elements, nr_of_added_elements))
95 

References flatten_file(), get_output_filename(), get_run_nr(), reco::helper::VirtualJetProducerHelper.intersection(), print(), and save_paths().

◆ create_dir()

def compareHistograms.create_dir (   parent_dir,
  name 
)

Definition at line 234 of file compareHistograms.py.

234 def create_dir(parent_dir, name):
235  dir = parent_dir.Get(name)
236  if not dir:
237  dir = parent_dir.mkdir(name)
238  return dir
239 

Referenced by save_to_file().

◆ flatten_file()

def compareHistograms.flatten_file (   file,
  run_nr 
)

Definition at line 157 of file compareHistograms.py.

157 def flatten_file(file, run_nr):
158  result = {}
159  for key in file.GetListOfKeys():
160  try:
161  traverse_till_end(key.ReadObj(), [], result, run_nr)
162  except:
163  pass
164 
165  return result
166 

References traverse_till_end().

Referenced by create_dif().

◆ get_node_name()

def compareHistograms.get_node_name (   node)

Definition at line 177 of file compareHistograms.py.

177 def get_node_name(node):
178  if node.InheritsFrom('TObjString'):
179  # Strip out just the name from a tag (<name>value</name>)
180  name = node.GetName().split('>')[0][1:]
181  return name + get_string_suffix()
182  else:
183  return node.GetName()
184 

References get_string_suffix(), and submitPVValidationJobs.split().

Referenced by traverse_till_end().

◆ get_output_filename()

def compareHistograms.get_output_filename (   input_file_path,
  pr_number,
  test_number,
  cmssw_version,
  isPr 
)

Definition at line 240 of file compareHistograms.py.

240 def get_output_filename(input_file_path, pr_number, test_number, cmssw_version, isPr):
241  # Samples of correct output file format:
242  # DQM_V0001_R000320822__wf136_892_pr__CMSSW_10_4_0_pre3-PR25518-1234__DQMIO.root
243  # When run number is 1 we have to use RelVal naming pattern:
244  # DQM_V0002_R000000001__RelVal_wf136_892_pr__CMSSW_10_4_0_pre3-PR25518-1234__DQMIO.root
245 
246  input_file_name = os.path.basename(input_file_path)
247 
248  run = input_file_name.split('_')[2]
249  workflow = os.path.basename(os.path.dirname(input_file_path)).split('_')[0].replace('.', '_')
250  if not workflow:
251  workflow = 'Unknown'
252 
253  relval_prefix = ''
254  if run == 'R000000001':
255  relval_prefix = 'RelVal_'
256 
257  baseOrPr = 'base'
258  if isPr:
259  baseOrPr = 'pr'
260 
261  return 'DQM_V0001_%s__%swf%s_%s__%s-PR%s-%s__DQMIO.root' % (run, relval_prefix, workflow, baseOrPr, cmssw_version, pr_number, test_number)
262 

References python.rootplot.root2matplotlib.replace(), and submitPVValidationJobs.split().

Referenced by create_dif().

◆ get_run_nr()

def compareHistograms.get_run_nr (   file_path)

Definition at line 263 of file compareHistograms.py.

263 def get_run_nr(file_path):
264  return os.path.basename(file_path).split('_')[2].lstrip('R').lstrip('0')
265 

References submitPVValidationJobs.split().

Referenced by create_dif().

◆ get_string_suffix()

def compareHistograms.get_string_suffix ( )

Definition at line 185 of file compareHistograms.py.

185 def get_string_suffix():
186  return '_string_monitor_element'
187 

Referenced by get_node_name(), and is_blacklisted().

◆ is_blacklisted()

def compareHistograms.is_blacklisted (   dirs_list,
  run_nr 
)

Definition at line 188 of file compareHistograms.py.

188 def is_blacklisted(dirs_list, run_nr):
189  # Copy the list
190  dirs_list = dirs_list[:]
191  # Remove string suffix
192  if dirs_list[-1].endswith(get_string_suffix()):
193  dirs_list[-1] = dirs_list[-1].replace(get_string_suffix(), '')
194 
195  return tuple(dirs_list) in get_blacklist(run_nr)
196 

References blacklist.get_blacklist(), get_string_suffix(), and python.rootplot.root2matplotlib.replace().

Referenced by traverse_till_end().

◆ save_paths()

def compareHistograms.save_paths (   flat_dict,
  paths,
  result_file_path 
)

Definition at line 197 of file compareHistograms.py.

197 def save_paths(flat_dict, paths, result_file_path):
198  if len(paths) == 0:
199  print('No differences were observed - output will not be written', file=sys.stderr)
200  return
201 
202  # Make sure output dir exists
203  result_dir = os.path.dirname(result_file_path)
204  if not os.path.exists(result_dir):
205  os.makedirs(result_dir)
206 
207  result_file = ROOT.TFile(result_file_path, 'recreate')
208  ROOT.gROOT.GetListOfFiles().Remove(result_file)
209 
210  if not result_file.IsOpen():
211  print('Unable to open %s output file' % result_file_path, file=sys.stderr)
212  return
213 
214  for path in paths:
215  save_to_file(flat_dict, path, result_file)
216 
217  result_file.Close()
218  print('Output written to %s file' % result_file_path, file=sys.stderr)
219 
220 # Saves file from flat_dict in the same dir of currently open file for writing

References print(), and save_to_file().

Referenced by create_dif().

◆ save_to_file()

def compareHistograms.save_to_file (   flat_dict,
  path,
  output_file 
)

Definition at line 221 of file compareHistograms.py.

221 def save_to_file(flat_dict, path, output_file):
222  histogram = flat_dict[path]
223 
224  current = output_file
225 
226  # Last item is filename. No need to create dir for it
227  for directory in path[:-1]:
228  current = create_dir(current, directory)
229  current.cd()
230 
231  histogram.Write()
232 
233 # Create dir in root file if it doesn't exist

References create_dir().

Referenced by save_paths().

◆ traverse_till_end()

def compareHistograms.traverse_till_end (   node,
  dirs_list,
  result,
  run_nr 
)

Definition at line 167 of file compareHistograms.py.

167 def traverse_till_end(node, dirs_list, result, run_nr):
168  new_dir_list = dirs_list + [get_node_name(node)]
169  if hasattr(node, 'GetListOfKeys'):
170  for key in node.GetListOfKeys():
171  traverse_till_end(key.ReadObj(), new_dir_list, result, run_nr)
172  else:
173  if not is_blacklisted(new_dir_list, run_nr):
174  path = tuple(new_dir_list)
175  result[path] = node
176 

References get_node_name(), and is_blacklisted().

Referenced by flatten_file().

Variable Documentation

◆ args

compareHistograms.args

Definition at line 275 of file compareHistograms.py.

◆ cmssw_version

compareHistograms.cmssw_version

Definition at line 277 of file compareHistograms.py.

◆ default

compareHistograms.default

Definition at line 271 of file compareHistograms.py.

◆ description

compareHistograms.description

Definition at line 267 of file compareHistograms.py.

◆ help

compareHistograms.help

Definition at line 269 of file compareHistograms.py.

◆ IgnoreCommandLineOptions

compareHistograms.IgnoreCommandLineOptions

Definition at line 5 of file compareHistograms.py.

◆ parser

compareHistograms.parser

Definition at line 267 of file compareHistograms.py.

◆ required

compareHistograms.required

Definition at line 269 of file compareHistograms.py.

compareHistograms.create_dir
def create_dir(parent_dir, name)
Definition: compareHistograms.py:234
FastTimerService_cff.range
range
Definition: FastTimerService_cff.py:34
compareHistograms.flatten_file
def flatten_file(file, run_nr)
Definition: compareHistograms.py:157
compareHistograms.save_to_file
def save_to_file(flat_dict, path, output_file)
Definition: compareHistograms.py:221
compareHistograms.get_node_name
def get_node_name(node)
Definition: compareHistograms.py:177
blacklist.get_blacklist
def get_blacklist(RUN_NR)
Definition: blacklist.py:2
compareHistograms.get_run_nr
def get_run_nr(file_path)
Definition: compareHistograms.py:263
compareHistograms.get_output_filename
def get_output_filename(input_file_path, pr_number, test_number, cmssw_version, isPr)
Definition: compareHistograms.py:240
compareHistograms.get_string_suffix
def get_string_suffix()
Definition: compareHistograms.py:185
submitPVValidationJobs.split
def split(sequence, size)
Definition: submitPVValidationJobs.py:352
compareHistograms.create_dif
def create_dif(base_file_path, pr_file_path, pr_number, test_number, cmssw_version, output_dir_path)
Definition: compareHistograms.py:13
print
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:46
compareHistograms.compare_TProfile
def compare_TProfile(pr_item, base_item)
Definition: compareHistograms.py:132
compareHistograms.traverse_till_end
def traverse_till_end(node, dirs_list, result, run_nr)
Definition: compareHistograms.py:167
compareHistograms.save_paths
def save_paths(flat_dict, paths, result_file_path)
Definition: compareHistograms.py:197
compareHistograms.is_blacklisted
def is_blacklisted(dirs_list, run_nr)
Definition: compareHistograms.py:188
compare
Definition: compare.py:1
compare
bool compare(const P &i, const P &j)
Definition: BDHadronTrackMonitoringAnalyzer.cc:203
reco::helper::VirtualJetProducerHelper::intersection
double intersection(double r12)
Definition: VirtualJetProducerHelper.h:14
python.rootplot.root2matplotlib.replace
def replace(string, replacements)
Definition: root2matplotlib.py:444