CMS 3D CMS Logo

app_utils.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 # coding: utf-8
3 '''
4 Helper functions for CherryPy application ``browse_db.py``.
5 
6 Author: Albertas Gimbutas, Vilnius University (LT)
7 e-mail: albertasgim@gmail.com
8 '''
9 
10 import sqlite3
11 import re
12 from os import getcwd, listdir
13 from os.path import join
14 from urllib import quote
15 from functools import reduce
16 
17 
18 renaming = {
19  'MessageLogger': 'Miscellanea', 'FourVector': 'Generic',
20  'Castor': 'Castor Calorimeter', 'RPCDigisV': 'Resistive Plate Chambers',
21  'GlobalRecHitsV': 'Miscellanea: Sim.', 'Top': 'Top', 'HLTJETMET': 'JetMet',
22  'GlobalDigisV': 'Miscellanea: Sim.', 'L1TEMU': 'Level 1 Trigger',
23  'TrackerRecHitsV': 'Tracking System', 'MuonDTHitsV': 'Muon Objects',
24  'EcalDigisV': 'Ecal Calorimeter', 'EcalHitsV': 'Ecal Calorimeter',
25  'Muons': 'Muon Objects', 'DT': 'Drift Tubes', 'TrackerDigisV': 'Tracking System',
26  'Pixel': 'Tracking System', 'EcalPreshower': 'Ecal Calorimeter',
27  'EgammaV': 'Photons', 'AlCaEcalPi0': 'Alca', 'SusyExo': 'SusyExo',
28  'MuonDTDigisV': 'Muon Objects', 'TauRelVal': 'Tau',
29  'HcalHitsV': 'Hcal Calorimeter', 'RPC': 'Resistive Plate Chambers',
30  'EcalRecHitsV': 'Ecal Calorimeter', 'EgOffline': 'EGamma',
31  'MuonCSCDigisV': 'Muon Objects', 'ParticleFlow': 'Miscellanea',
32  'Info': 'Miscellanea', 'Tracking': 'Tracking',
33  'NoiseRatesV': 'Miscellanea: Sim.', 'Generator': 'Miscellanea: Sim.',
34  'Btag': 'B Tagging', 'Higgs': 'Higgs', 'GlobalHitsV': 'Miscellanea: Sim.',
35  'HcalRecHitsV': 'Hcal Calorimeter', 'TrackerHitsV': 'Tracking System',
36  'CSC': 'Cathode Strip Chambers', 'Muon,HLTMonMuon': 'Muon',
37  'Hcal': 'Hcal Calorimeter', 'TauOffline': 'Tau',
38  'HeavyFlavor': 'HeavyFlavor', 'JetMET': 'Jet', 'Physics': 'Miscellanea',
39  'CaloTowersV': 'Hcal Calorimeter', 'SiStrip': 'Tracking System',
40  'EcalClusterV': 'Ecal Calorimeter', 'HLTEgammaValidation': 'EGamma',
41  'EcalPhiSym': 'Alca', 'L1T': 'Level 1 Trigger', 'MixingV': 'Miscellanea: Sim.',
42  'FourVector_Val': 'Generic', 'EcalEndcap': 'Ecal Calorimeter',
43  'TauOnline': 'Tau', 'Egamma': 'Photons', 'HcalIsoTrack': 'Alca',
44  'EcalBarrel': 'Ecal Calorimeter'
45 }
46 
47 
48 def get_img_path(filename, path):
49  '''Returns image path for https://cmsweb.cern.ch/dqm histogram
50  visualisation service'''
51  run = int(re.findall('_R(\d*)__', filename)[0])
52  parts = [e.rstrip('.root') for e in filename.split('__')]
53  path = path.replace('Run summary/', '')
54  return 'archive/%s/%s/%s/%s/%s' % (run, parts[1], parts[2], parts[3], path)
55 
56 
57 def get_img_url(path, f1, f2=None, w=250, h=250):
58  '''Returns full URL of histogram (or histogram overlay) image for
59  https://cmsweb.cern.ch/dqm visualisation service.'''
60  base = 'https://cmsweb.cern.ch/dqm/relval/plotfairy'
61  if not f2:
62  return '%s/%s?w=%s;h=%s' % (base, get_img_path(f1, path), w, h)
63  return '%s/overlay?obj=%s;obj=%s;w=%s;h=%s' % (base,
64  get_img_path(f1, path), get_img_path(f2, path), w, h)
65 
66 
67 def get_dataset_name(name):
68  '''Returns extracted dataset name from the given ROOT filename.'''
69  if re.search('RelVal', name):
70  run = str(int(re.findall('_R(\d{9})_', name)[0]))
71  ds = re.findall('GR_R_\d*_V\d*C?_(?:RelVal)?_([\w\d]*-v\d+)_', name)[0]
72  else:
73  run, ds = re.findall('R(\d{9})__([\w\d]*)__CMSSW_', name)[0:1]
74  return '_'.join([ds, str(int(run))])
75 
76 
77 def get_release(name):
78  '''Returns extracted release from the given ROOT filename.'''
79  return re.findall('R\d{9}__([\w\d_-]*)__DQM.root', name)[0]
80 
81 
82 def get_stats(c, threshold, dir_ranges):
83  '''Returns ``successes``, ``fails``, ``nulls`` for the given dir_ranges.'''
84  successes, nulls, fails = 0, 0, 0
85  for from_id, till_id in dir_ranges:
86  c.execute('''SELECT count(*) FROM HistogramComparison
87  WHERE p_value >= 0 AND p_value > ? AND
88  id >= ? and id <= ?''', (threshold, from_id, till_id))
89  successes += c.fetchone()[0]
90  c.execute('''SELECT count(*) FROM HistogramComparison WHERE
91  p_value < 0 AND id >= ? AND id <= ?''', (from_id, till_id))
92  nulls += c.fetchone()[0]
93  c.execute('''SELECT count(*) FROM HistogramComparison
94  WHERE p_value >= 0 AND p_value <= ? AND
95  id >= ? AND id <= ?''', (threshold, from_id, till_id))
96  fails += c.fetchone()[0]
97  return successes, nulls, fails
98 
99 
100 def get_percentage(successes, nulls, fails):
101  '''Converts integers ``successes``, ``nulls`` and ``fails`` to percents.'''
102  if successes is None:
103  return None, None, None
104  total = successes + fails + nulls
105  if not total:
106  return None, None, None
107  success = round(100. * successes / total, 2)
108  null = round(100. * nulls / total, 2)
109  fail = round(100. * fails / total, 2)
110  return success, null, fail
111 
112 
113 def get_folders(c, file_id, filename, dir_id, threshold): # TODO: If folder [Egamma|JetMet] analyse their subdirs
114  '''Returns file folder stats for one "summary table" column.'''
115  ds_name = get_dataset_name(filename)
116  c.execute('''SELECT name, from_histogram_id, till_histogram_id FROM
117  Directory WHERE parent_id=?''', (dir_id,))
118  dirs = c.fetchall()
119  file_folders = dict()
120  total_successes, total_nulls, total_fails = 0, 0, 0
121  for name, from_id, till_id in dirs:
122  successes, nulls, fails = get_stats(c, threshold, ((from_id, till_id),))
123  total_successes += successes
124  total_nulls += nulls
125  total_fails += fails
126  if name in file_folders:
127  file_folders[name].append([file_id, ds_name, successes, nulls, fails])
128  else:
129  file_folders[name] = [file_id, ds_name, successes, nulls, fails]
130  return [('Summary', [file_id, ds_name, total_successes, total_nulls, total_fails])] + file_folders.items()
131 
132 
133 def join_ranges(ranges, elem):
134  '''To do less DB calls, joins [(from_id, till_id), ...] ranges.'''
135  if type(ranges) == tuple:
136  ranges = [ranges]
137  if ranges[-1][-1] + 1 == elem[0]:
138  ranges[-1] = (ranges[-1][0], elem[1])
139  else:
140  ranges.append(elem)
141  return ranges
142 
143 
145  '''Returns all ``ReleaseComparisons`` found on database.'''
146  c.execute('SELECT title, statistical_test FROM ReleaseComparison')
147  return c.fetchall()
148 
149 
150 def db_list_with_releases(path='.'):
151  '''Returns available database list and their releases.'''
152  db_list = [db for db in listdir(path) if db.endswith('.db')]
153  db_list_with_releases = []
154  for db in db_list:
155  conn = sqlite3.connect(join(path, db))
156  releases = get_release_list(conn.cursor())
157  db_list_with_releases.append((db[:-3], releases))
158  conn.close()
159  return db_list_with_releases
160 
161 # ------------------- Template Context generators --------------------
162 
163 def get_release_summary_stats(c, release_title, st_test, threshold=1e-5):
164  '''Returns context for ``release_summary.html`` template.'''
165  ## Summary
166  context = dict()
167  c.execute('''SELECT release1, release2, id FROM ReleaseComparison
168  WHERE title = ? AND statistical_test = ?''', (release_title, st_test))
169  context['release1'], context['release2'], release_comp_id = c.fetchone()
170 
171  # All directory ranges
172  c.execute('''SELECT from_histogram_id, till_histogram_id FROM Directory
173  WHERE id IN (SELECT directory_id FROM RootFileComparison
174  WHERE release_comparison_id = ?)''', (release_comp_id,))
175  dir_ranges = c.fetchall()
176 
177  if len(dir_ranges) > 1:
178  dir_ranges = reduce(join_ranges, dir_ranges)
179 
180  context['successes'], context['nulls'], context['fails'], = get_stats(c, threshold, dir_ranges)
181 
182  context['total'] = context['successes'] + context['fails'] + context['nulls']
183  if context['total']:
184  context['success'], context['null'], context['fail'] = \
185  get_percentage(context['successes'], context['nulls'], context['fails'])
186 
187  ## Data needed for the all the statistics:
188  c.execute('''SELECT id, filename1, directory_id FROM RootFileComparison
189  WHERE release_comparison_id = ?''', (release_comp_id,))
190  files = c.fetchall()
191 
192  ## folders: [(folder_name, [folder: (file_id, filename, success, null, fail)]), ...]
193  folders = dict()
194  for file_id, filename, dir_id in files:
195  # file_folders: [(folder_name, [(file_id, file_name, success, null, fail)]), ...]
196  file_folders = get_folders(c, file_id, filename, dir_id, threshold)
197  for folder_name, file_folder_stats in file_folders:
198  if folder_name in folders:
199  # Add folder stats
200  folders[folder_name].append(file_folder_stats)
201  # Update folder summary
202  folders[folder_name][0][2] += file_folder_stats[2]
203  folders[folder_name][0][3] += file_folder_stats[3]
204  folders[folder_name][0][4] += file_folder_stats[4]
205  else:
206  folder_summary = [None, 'Summary', file_folder_stats[2],
207  file_folder_stats[3], file_folder_stats[4]]
208  folders[folder_name] = [folder_summary, file_folder_stats]
209 
210  ## Calculate ratios
211  folders = [('Summary', folders.pop('Summary'))] + sorted(folders.items(), key=lambda x: x[0])
212  for folder, file_stats in folders:
213  # Insert N/A if histo is missing
214  if len(file_stats) != len(files)+1:
215  for i, file_ in enumerate(files):
216  if file_[0] != file_stats[i][0]:
217  file_stats = file_stats[:i] + [[None, "N/A", None, None, None]] + file_stats[i:]
218  # Count the ratios
219  for i, stats in enumerate(file_stats):
220  stats[2], stats[3], stats[4] = get_percentage(*stats[2:5])
221  context['folders'] = folders
222 
223 
224  ## Select Summary Barchart, Detailed Barchart
225  for folder in folders:
226  print folder
227  # detailed_ratios: (name, success_ratio)
228  # summary_ratios: (name, success_ratio)
229 
230 
231  ## Summary Barchart
232  # TODO: optimise not to fetch from DB again.
233  c.execute('''SELECT name, from_histogram_id, till_histogram_id FROM Directory
234  WHERE parent_id IN (SELECT directory_id FROM RootFileComparison
235  WHERE release_comparison_id = ?)''', (release_comp_id,))
236  lvl3_dir_ranges = c.fetchall()
237 
238  cum_lvl3_dir_ranges = dict()
239  for name, from_id, till_id in lvl3_dir_ranges:
240  if name in cum_lvl3_dir_ranges:
241  cum_lvl3_dir_ranges[name].append((from_id, till_id))
242  else:
243  cum_lvl3_dir_ranges[name] = [(from_id, till_id)]
244 
245  # Fetch stats
246  summary_stats = dict()
247  detailed_stats = dict()
248  for name, ranges in cum_lvl3_dir_ranges.iteritems():
249  successes, nulls, fails = get_stats(c, threshold, ranges)
250  if name in detailed_stats:
251  detailed_stats[name][0] += successes
252  detailed_stats[name][1] += nulls
253  detailed_stats[name][2] += fails
254  else:
255  detailed_stats[name] = [successes, nulls, fails]
256  if name in renaming:
257  if renaming[name] in summary_stats:
258  summary_stats[renaming[name]][0] += successes
259  summary_stats[renaming[name]][1] += nulls
260  summary_stats[renaming[name]][2] += fails
261  else:
262  summary_stats[renaming[name]] = [successes, nulls, fails]
263 
264  # Calculate ratio
265  summary_ratios = []
266  for name, stats in summary_stats.iteritems():
267  total = sum(stats)
268  if total:
269  ratio = float(stats[0]) / sum(stats)
270  summary_ratios.append((name, ratio))
271  detailed_ratios = []
272  for name, stats in detailed_stats.iteritems():
273  total = sum(stats)
274  if total:
275  ratio = float(stats[0]) / sum(stats)
276  detailed_ratios.append((name, ratio))
277 
278  context['summary_ratios'] = sorted(summary_ratios, key=lambda x: x[0])
279  context['detailed_ratios'] = sorted(detailed_ratios, key=lambda x: x[0])
280  return context
281 
282 
283 def get_directory_summary_stats(c, url_args, file_id, threshold):
284  '''Returns context for ``directory_summary.html`` template.'''
285  context = dict()
286  c.execute('''SELECT directory_id, filename1, filename2 FROM RootFileComparison
287  WHERE id = ?''', (file_id,))
288  dir_id, f1, f2 = c.fetchone()
289  context['release1'] = get_release(f1)
290  context['release2'] = get_release(f2)
291  if not url_args:
292  dir_name = get_dataset_name(f1)
293  else:
294  #### Select DQMData/Run directory.
295  directory_names = []
296 
297  for dir_name in url_args:
298  c.execute('''SELECT id, name FROM Directory WHERE name = ? AND
299  parent_id = ?''', (dir_name, dir_id))
300  dir_id, name = c.fetchone()
301  directory_names.append(name)
302  context['parent_name'] = '/'.join(directory_names)
303 
304  ## Select stats
305  c.execute('''SELECT from_histogram_id, till_histogram_id FROM
306  Directory WHERE id = ?''', (dir_id,))
307  ranges = c.fetchone()
308  successes, nulls, fails = get_stats(c, threshold, (ranges,))
309  success, null, fail = get_percentage(successes, nulls, fails)
310  context.update({
311  'successes': successes, 'nulls': nulls, 'fails': fails,
312  'success': success, 'null': null, 'fail': fail,
313  'total': successes + nulls + fails, 'dir_name': dir_name
314  })
315  # subdirs: name, total, success, fail, null
316  c.execute('''SELECT name, from_histogram_id, till_histogram_id FROM Directory
317  WHERE parent_id = ?''', (dir_id,))
318  subdirs = c.fetchall()
319  subdir_stats = []
320  for name, from_id, till_id in subdirs:
321  successes, nulls, fails = get_stats(c, threshold, [(from_id, till_id,)])
322  success, null, fail = get_percentage(successes, nulls, fails)
323  subdir_stats.append((name, successes + nulls + fails, successes,
324  nulls, fails, success, null, fail))
325  context['subdirs'] = sorted(subdir_stats, key=lambda x: x[4], reverse=True)
326 
327  # histograms: name, p_value
328  c.execute('''SELECT name, p_value FROM HistogramComparison
329  WHERE directory_id = ?''', (dir_id,))
330  failed_histos = []
331  successful_histos = []
332  null_histos = []
333  for name, p_value in c.fetchall():
334  path = quote('%s/%s' % ('/'.join(url_args), name))
335  url1 = get_img_url(path, f1)
336  url2 = get_img_url(path, f2)
337  overlay = get_img_url(path, f1, f2)
338  if p_value < 0:
339  null_histos.append((name, p_value, url1, url2, overlay))
340  elif p_value <= threshold:
341  failed_histos.append((name, p_value, url1, url2, overlay))
342  else:
343  successful_histos.append((name, p_value, url1, url2, overlay))
344 
345  context['failed_histos'] = sorted(failed_histos, key=lambda x: x[1], reverse=True)
346  context['null_histos'] = null_histos
347  context['successful_histos'] = sorted(successful_histos, key=lambda x: x[1], reverse=True)
348  return context
def get_percentage(successes, nulls, fails)
Definition: app_utils.py:100
def get_img_path(filename, path)
Definition: app_utils.py:48
def get_release_list(c)
Definition: app_utils.py:144
def get_folders(c, file_id, filename, dir_id, threshold)
Definition: app_utils.py:113
def db_list_with_releases(path='.')
Definition: app_utils.py:150
def get_dataset_name(name)
Definition: app_utils.py:67
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def get_release_summary_stats(c, release_title, st_test, threshold=1e-5)
Definition: app_utils.py:163
def get_directory_summary_stats(c, url_args, file_id, threshold)
Definition: app_utils.py:283
def get_img_url(path, f1, f2=None, w=250, h=250)
Definition: app_utils.py:57
def get_stats(c, threshold, dir_ranges)
Definition: app_utils.py:82
def join_ranges(ranges, elem)
Definition: app_utils.py:133
def get_release(name)
Definition: app_utils.py:77