CMS 3D CMS Logo

app_utils.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 # coding: utf-8
3 '''
4 Helper functions for CherryPy application ``browse_db.py``.
5 
6 Author: Albertas Gimbutas, Vilnius University (LT)
7 e-mail: albertasgim@gmail.com
8 '''
9 from __future__ import print_function
10 
11 import sqlite3
12 import re
13 from os import getcwd, listdir
14 from os.path import join
15 from urllib import quote
16 from functools import reduce
17 
18 import six
19 
20 renaming = {
21  'MessageLogger': 'Miscellanea', 'FourVector': 'Generic',
22  'Castor': 'Castor Calorimeter', 'RPCDigisV': 'Resistive Plate Chambers',
23  'GlobalRecHitsV': 'Miscellanea: Sim.', 'Top': 'Top', 'HLTJETMET': 'JetMet',
24  'GlobalDigisV': 'Miscellanea: Sim.', 'L1TEMU': 'Level 1 Trigger',
25  'TrackerRecHitsV': 'Tracking System', 'MuonDTHitsV': 'Muon Objects',
26  'EcalDigisV': 'Ecal Calorimeter', 'EcalHitsV': 'Ecal Calorimeter',
27  'Muons': 'Muon Objects', 'DT': 'Drift Tubes', 'TrackerDigisV': 'Tracking System',
28  'Pixel': 'Tracking System', 'EcalPreshower': 'Ecal Calorimeter',
29  'EgammaV': 'Photons', 'AlCaEcalPi0': 'Alca', 'SusyExo': 'SusyExo',
30  'MuonDTDigisV': 'Muon Objects', 'TauRelVal': 'Tau',
31  'HcalHitsV': 'Hcal Calorimeter', 'RPC': 'Resistive Plate Chambers',
32  'EcalRecHitsV': 'Ecal Calorimeter', 'EgOffline': 'EGamma',
33  'MuonCSCDigisV': 'Muon Objects', 'ParticleFlow': 'Miscellanea',
34  'Info': 'Miscellanea', 'Tracking': 'Tracking',
35  'NoiseRatesV': 'Miscellanea: Sim.', 'Generator': 'Miscellanea: Sim.',
36  'Btag': 'B Tagging', 'Higgs': 'Higgs', 'GlobalHitsV': 'Miscellanea: Sim.',
37  'HcalRecHitsV': 'Hcal Calorimeter', 'TrackerHitsV': 'Tracking System',
38  'CSC': 'Cathode Strip Chambers', 'Muon,HLTMonMuon': 'Muon',
39  'Hcal': 'Hcal Calorimeter', 'TauOffline': 'Tau',
40  'HeavyFlavor': 'HeavyFlavor', 'JetMET': 'Jet', 'Physics': 'Miscellanea',
41  'CaloTowersV': 'Hcal Calorimeter', 'SiStrip': 'Tracking System',
42  'EcalClusterV': 'Ecal Calorimeter', 'HLTEgammaValidation': 'EGamma',
43  'EcalPhiSym': 'Alca', 'L1T': 'Level 1 Trigger', 'MixingV': 'Miscellanea: Sim.',
44  'FourVector_Val': 'Generic', 'EcalEndcap': 'Ecal Calorimeter',
45  'TauOnline': 'Tau', 'Egamma': 'Photons', 'HcalIsoTrack': 'Alca',
46  'EcalBarrel': 'Ecal Calorimeter'
47 }
48 
49 
50 def get_img_path(filename, path):
51  '''Returns image path for https://cmsweb.cern.ch/dqm histogram
52  visualisation service'''
53  run = int(re.findall('_R(\d*)__', filename)[0])
54  parts = [e.rstrip('.root') for e in filename.split('__')]
55  path = path.replace('Run summary/', '')
56  return 'archive/%s/%s/%s/%s/%s' % (run, parts[1], parts[2], parts[3], path)
57 
58 
59 def get_img_url(path, f1, f2=None, w=250, h=250):
60  '''Returns full URL of histogram (or histogram overlay) image for
61  https://cmsweb.cern.ch/dqm visualisation service.'''
62  base = 'https://cmsweb.cern.ch/dqm/relval/plotfairy'
63  if not f2:
64  return '%s/%s?w=%s;h=%s' % (base, get_img_path(f1, path), w, h)
65  return '%s/overlay?obj=%s;obj=%s;w=%s;h=%s' % (base,
66  get_img_path(f1, path), get_img_path(f2, path), w, h)
67 
68 
69 def get_dataset_name(name):
70  '''Returns extracted dataset name from the given ROOT filename.'''
71  if re.search('RelVal', name):
72  run = str(int(re.findall('_R(\d{9})_', name)[0]))
73  ds = re.findall('GR_R_\d*_V\d*C?_(?:RelVal)?_([\w\d]*-v\d+)_', name)[0]
74  else:
75  run, ds = re.findall('R(\d{9})__([\w\d]*)__CMSSW_', name)[0:1]
76  return '_'.join([ds, str(int(run))])
77 
78 
79 def get_release(name):
80  '''Returns extracted release from the given ROOT filename.'''
81  return re.findall('R\d{9}__([\w\d_-]*)__DQM.root', name)[0]
82 
83 
84 def get_stats(c, threshold, dir_ranges):
85  '''Returns ``successes``, ``fails``, ``nulls`` for the given dir_ranges.'''
86  successes, nulls, fails = 0, 0, 0
87  for from_id, till_id in dir_ranges:
88  c.execute('''SELECT count(*) FROM HistogramComparison
89  WHERE p_value >= 0 AND p_value > ? AND
90  id >= ? and id <= ?''', (threshold, from_id, till_id))
91  successes += c.fetchone()[0]
92  c.execute('''SELECT count(*) FROM HistogramComparison WHERE
93  p_value < 0 AND id >= ? AND id <= ?''', (from_id, till_id))
94  nulls += c.fetchone()[0]
95  c.execute('''SELECT count(*) FROM HistogramComparison
96  WHERE p_value >= 0 AND p_value <= ? AND
97  id >= ? AND id <= ?''', (threshold, from_id, till_id))
98  fails += c.fetchone()[0]
99  return successes, nulls, fails
100 
101 
102 def get_percentage(successes, nulls, fails):
103  '''Converts integers ``successes``, ``nulls`` and ``fails`` to percents.'''
104  if successes is None:
105  return None, None, None
106  total = successes + fails + nulls
107  if not total:
108  return None, None, None
109  success = round(100. * successes / total, 2)
110  null = round(100. * nulls / total, 2)
111  fail = round(100. * fails / total, 2)
112  return success, null, fail
113 
114 
115 def get_folders(c, file_id, filename, dir_id, threshold): # TODO: If folder [Egamma|JetMet] analyse their subdirs
116  '''Returns file folder stats for one "summary table" column.'''
117  ds_name = get_dataset_name(filename)
118  c.execute('''SELECT name, from_histogram_id, till_histogram_id FROM
119  Directory WHERE parent_id=?''', (dir_id,))
120  dirs = c.fetchall()
121  file_folders = dict()
122  total_successes, total_nulls, total_fails = 0, 0, 0
123  for name, from_id, till_id in dirs:
124  successes, nulls, fails = get_stats(c, threshold, ((from_id, till_id),))
125  total_successes += successes
126  total_nulls += nulls
127  total_fails += fails
128  if name in file_folders:
129  file_folders[name].append([file_id, ds_name, successes, nulls, fails])
130  else:
131  file_folders[name] = [file_id, ds_name, successes, nulls, fails]
132  return [('Summary', [file_id, ds_name, total_successes, total_nulls, total_fails])] + file_folders.items()
133 
134 
135 def join_ranges(ranges, elem):
136  '''To do less DB calls, joins [(from_id, till_id), ...] ranges.'''
137  if isinstance(ranges, tuple):
138  ranges = [ranges]
139  if ranges[-1][-1] + 1 == elem[0]:
140  ranges[-1] = (ranges[-1][0], elem[1])
141  else:
142  ranges.append(elem)
143  return ranges
144 
145 
147  '''Returns all ``ReleaseComparisons`` found on database.'''
148  c.execute('SELECT title, statistical_test FROM ReleaseComparison')
149  return c.fetchall()
150 
151 
152 def db_list_with_releases(path='.'):
153  '''Returns available database list and their releases.'''
154  db_list = [db for db in listdir(path) if db.endswith('.db')]
155  db_list_with_releases = []
156  for db in db_list:
157  conn = sqlite3.connect(join(path, db))
158  releases = get_release_list(conn.cursor())
159  db_list_with_releases.append((db[:-3], releases))
160  conn.close()
161  return db_list_with_releases
162 
163 # ------------------- Template Context generators --------------------
164 
165 def get_release_summary_stats(c, release_title, st_test, threshold=1e-5):
166  '''Returns context for ``release_summary.html`` template.'''
167  ## Summary
168  context = dict()
169  c.execute('''SELECT release1, release2, id FROM ReleaseComparison
170  WHERE title = ? AND statistical_test = ?''', (release_title, st_test))
171  context['release1'], context['release2'], release_comp_id = c.fetchone()
172 
173  # All directory ranges
174  c.execute('''SELECT from_histogram_id, till_histogram_id FROM Directory
175  WHERE id IN (SELECT directory_id FROM RootFileComparison
176  WHERE release_comparison_id = ?)''', (release_comp_id,))
177  dir_ranges = c.fetchall()
178 
179  if len(dir_ranges) > 1:
180  dir_ranges = reduce(join_ranges, dir_ranges)
181 
182  context['successes'], context['nulls'], context['fails'], = get_stats(c, threshold, dir_ranges)
183 
184  context['total'] = context['successes'] + context['fails'] + context['nulls']
185  if context['total']:
186  context['success'], context['null'], context['fail'] = \
187  get_percentage(context['successes'], context['nulls'], context['fails'])
188 
189  ## Data needed for the all the statistics:
190  c.execute('''SELECT id, filename1, directory_id FROM RootFileComparison
191  WHERE release_comparison_id = ?''', (release_comp_id,))
192  files = c.fetchall()
193 
194  ## folders: [(folder_name, [folder: (file_id, filename, success, null, fail)]), ...]
195  folders = dict()
196  for file_id, filename, dir_id in files:
197  # file_folders: [(folder_name, [(file_id, file_name, success, null, fail)]), ...]
198  file_folders = get_folders(c, file_id, filename, dir_id, threshold)
199  for folder_name, file_folder_stats in file_folders:
200  if folder_name in folders:
201  # Add folder stats
202  folders[folder_name].append(file_folder_stats)
203  # Update folder summary
204  folders[folder_name][0][2] += file_folder_stats[2]
205  folders[folder_name][0][3] += file_folder_stats[3]
206  folders[folder_name][0][4] += file_folder_stats[4]
207  else:
208  folder_summary = [None, 'Summary', file_folder_stats[2],
209  file_folder_stats[3], file_folder_stats[4]]
210  folders[folder_name] = [folder_summary, file_folder_stats]
211 
212  ## Calculate ratios
213  folders = [('Summary', folders.pop('Summary'))] + sorted(folders.items(), key=lambda x: x[0])
214  for folder, file_stats in folders:
215  # Insert N/A if histo is missing
216  if len(file_stats) != len(files)+1:
217  for i, file_ in enumerate(files):
218  if file_[0] != file_stats[i][0]:
219  file_stats = file_stats[:i] + [[None, "N/A", None, None, None]] + file_stats[i:]
220  # Count the ratios
221  for i, stats in enumerate(file_stats):
222  stats[2], stats[3], stats[4] = get_percentage(*stats[2:5])
223  context['folders'] = folders
224 
225 
226  ## Select Summary Barchart, Detailed Barchart
227  for folder in folders:
228  print(folder)
229  # detailed_ratios: (name, success_ratio)
230  # summary_ratios: (name, success_ratio)
231 
232 
233  ## Summary Barchart
234  # TODO: optimise not to fetch from DB again.
235  c.execute('''SELECT name, from_histogram_id, till_histogram_id FROM Directory
236  WHERE parent_id IN (SELECT directory_id FROM RootFileComparison
237  WHERE release_comparison_id = ?)''', (release_comp_id,))
238  lvl3_dir_ranges = c.fetchall()
239 
240  cum_lvl3_dir_ranges = dict()
241  for name, from_id, till_id in lvl3_dir_ranges:
242  if name in cum_lvl3_dir_ranges:
243  cum_lvl3_dir_ranges[name].append((from_id, till_id))
244  else:
245  cum_lvl3_dir_ranges[name] = [(from_id, till_id)]
246 
247  # Fetch stats
248  summary_stats = dict()
249  detailed_stats = dict()
250  for name, ranges in six.iteritems(cum_lvl3_dir_ranges):
251  successes, nulls, fails = get_stats(c, threshold, ranges)
252  if name in detailed_stats:
253  detailed_stats[name][0] += successes
254  detailed_stats[name][1] += nulls
255  detailed_stats[name][2] += fails
256  else:
257  detailed_stats[name] = [successes, nulls, fails]
258  if name in renaming:
259  if renaming[name] in summary_stats:
260  summary_stats[renaming[name]][0] += successes
261  summary_stats[renaming[name]][1] += nulls
262  summary_stats[renaming[name]][2] += fails
263  else:
264  summary_stats[renaming[name]] = [successes, nulls, fails]
265 
266  # Calculate ratio
267  summary_ratios = []
268  for name, stats in six.iteritems(summary_stats):
269  total = sum(stats)
270  if total:
271  ratio = float(stats[0]) / sum(stats)
272  summary_ratios.append((name, ratio))
273  detailed_ratios = []
274  for name, stats in six.iteritems(detailed_stats):
275  total = sum(stats)
276  if total:
277  ratio = float(stats[0]) / sum(stats)
278  detailed_ratios.append((name, ratio))
279 
280  context['summary_ratios'] = sorted(summary_ratios, key=lambda x: x[0])
281  context['detailed_ratios'] = sorted(detailed_ratios, key=lambda x: x[0])
282  return context
283 
284 
285 def get_directory_summary_stats(c, url_args, file_id, threshold):
286  '''Returns context for ``directory_summary.html`` template.'''
287  context = dict()
288  c.execute('''SELECT directory_id, filename1, filename2 FROM RootFileComparison
289  WHERE id = ?''', (file_id,))
290  dir_id, f1, f2 = c.fetchone()
291  context['release1'] = get_release(f1)
292  context['release2'] = get_release(f2)
293  if not url_args:
294  dir_name = get_dataset_name(f1)
295  else:
296  #### Select DQMData/Run directory.
297  directory_names = []
298 
299  for dir_name in url_args:
300  c.execute('''SELECT id, name FROM Directory WHERE name = ? AND
301  parent_id = ?''', (dir_name, dir_id))
302  dir_id, name = c.fetchone()
303  directory_names.append(name)
304  context['parent_name'] = '/'.join(directory_names)
305 
306  ## Select stats
307  c.execute('''SELECT from_histogram_id, till_histogram_id FROM
308  Directory WHERE id = ?''', (dir_id,))
309  ranges = c.fetchone()
310  successes, nulls, fails = get_stats(c, threshold, (ranges,))
311  success, null, fail = get_percentage(successes, nulls, fails)
312  context.update({
313  'successes': successes, 'nulls': nulls, 'fails': fails,
314  'success': success, 'null': null, 'fail': fail,
315  'total': successes + nulls + fails, 'dir_name': dir_name
316  })
317  # subdirs: name, total, success, fail, null
318  c.execute('''SELECT name, from_histogram_id, till_histogram_id FROM Directory
319  WHERE parent_id = ?''', (dir_id,))
320  subdirs = c.fetchall()
321  subdir_stats = []
322  for name, from_id, till_id in subdirs:
323  successes, nulls, fails = get_stats(c, threshold, [(from_id, till_id,)])
324  success, null, fail = get_percentage(successes, nulls, fails)
325  subdir_stats.append((name, successes + nulls + fails, successes,
326  nulls, fails, success, null, fail))
327  context['subdirs'] = sorted(subdir_stats, key=lambda x: x[4], reverse=True)
328 
329  # histograms: name, p_value
330  c.execute('''SELECT name, p_value FROM HistogramComparison
331  WHERE directory_id = ?''', (dir_id,))
332  failed_histos = []
333  successful_histos = []
334  null_histos = []
335  for name, p_value in c.fetchall():
336  path = quote('%s/%s' % ('/'.join(url_args), name))
337  url1 = get_img_url(path, f1)
338  url2 = get_img_url(path, f2)
339  overlay = get_img_url(path, f1, f2)
340  if p_value < 0:
341  null_histos.append((name, p_value, url1, url2, overlay))
342  elif p_value <= threshold:
343  failed_histos.append((name, p_value, url1, url2, overlay))
344  else:
345  successful_histos.append((name, p_value, url1, url2, overlay))
346 
347  context['failed_histos'] = sorted(failed_histos, key=lambda x: x[1], reverse=True)
348  context['null_histos'] = null_histos
349  context['successful_histos'] = sorted(successful_histos, key=lambda x: x[1], reverse=True)
350  return context
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
def get_percentage(successes, nulls, fails)
Definition: app_utils.py:102
def get_img_path(filename, path)
Definition: app_utils.py:50
def get_release_list(c)
Definition: app_utils.py:146
def get_folders(c, file_id, filename, dir_id, threshold)
Definition: app_utils.py:115
def db_list_with_releases(path='.')
Definition: app_utils.py:152
def get_dataset_name(name)
Definition: app_utils.py:69
static std::string join(char **cmd)
Definition: RemoteFile.cc:17
def get_release_summary_stats(c, release_title, st_test, threshold=1e-5)
Definition: app_utils.py:165
def get_directory_summary_stats(c, url_args, file_id, threshold)
Definition: app_utils.py:285
def get_img_url(path, f1, f2=None, w=250, h=250)
Definition: app_utils.py:59
def get_stats(c, threshold, dir_ranges)
Definition: app_utils.py:84
def join_ranges(ranges, elem)
Definition: app_utils.py:135
def get_release(name)
Definition: app_utils.py:79
#define str(s)