CMS 3D CMS Logo

app_utils.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 # coding: utf-8
3 '''
4 Helper functions for CherryPy application ``browse_db.py``.
5 
6 Author: Albertas Gimbutas, Vilnius University (LT)
7 e-mail: albertasgim@gmail.com
8 '''
9 
10 import sqlite3
11 import re
12 from os import getcwd, listdir
13 from os.path import join
14 from urllib import quote
15 from functools import reduce
16 
17 import six
18 
19 renaming = {
20  'MessageLogger': 'Miscellanea', 'FourVector': 'Generic',
21  'Castor': 'Castor Calorimeter', 'RPCDigisV': 'Resistive Plate Chambers',
22  'GlobalRecHitsV': 'Miscellanea: Sim.', 'Top': 'Top', 'HLTJETMET': 'JetMet',
23  'GlobalDigisV': 'Miscellanea: Sim.', 'L1TEMU': 'Level 1 Trigger',
24  'TrackerRecHitsV': 'Tracking System', 'MuonDTHitsV': 'Muon Objects',
25  'EcalDigisV': 'Ecal Calorimeter', 'EcalHitsV': 'Ecal Calorimeter',
26  'Muons': 'Muon Objects', 'DT': 'Drift Tubes', 'TrackerDigisV': 'Tracking System',
27  'Pixel': 'Tracking System', 'EcalPreshower': 'Ecal Calorimeter',
28  'EgammaV': 'Photons', 'AlCaEcalPi0': 'Alca', 'SusyExo': 'SusyExo',
29  'MuonDTDigisV': 'Muon Objects', 'TauRelVal': 'Tau',
30  'HcalHitsV': 'Hcal Calorimeter', 'RPC': 'Resistive Plate Chambers',
31  'EcalRecHitsV': 'Ecal Calorimeter', 'EgOffline': 'EGamma',
32  'MuonCSCDigisV': 'Muon Objects', 'ParticleFlow': 'Miscellanea',
33  'Info': 'Miscellanea', 'Tracking': 'Tracking',
34  'NoiseRatesV': 'Miscellanea: Sim.', 'Generator': 'Miscellanea: Sim.',
35  'Btag': 'B Tagging', 'Higgs': 'Higgs', 'GlobalHitsV': 'Miscellanea: Sim.',
36  'HcalRecHitsV': 'Hcal Calorimeter', 'TrackerHitsV': 'Tracking System',
37  'CSC': 'Cathode Strip Chambers', 'Muon,HLTMonMuon': 'Muon',
38  'Hcal': 'Hcal Calorimeter', 'TauOffline': 'Tau',
39  'HeavyFlavor': 'HeavyFlavor', 'JetMET': 'Jet', 'Physics': 'Miscellanea',
40  'CaloTowersV': 'Hcal Calorimeter', 'SiStrip': 'Tracking System',
41  'EcalClusterV': 'Ecal Calorimeter', 'HLTEgammaValidation': 'EGamma',
42  'EcalPhiSym': 'Alca', 'L1T': 'Level 1 Trigger', 'MixingV': 'Miscellanea: Sim.',
43  'FourVector_Val': 'Generic', 'EcalEndcap': 'Ecal Calorimeter',
44  'TauOnline': 'Tau', 'Egamma': 'Photons', 'HcalIsoTrack': 'Alca',
45  'EcalBarrel': 'Ecal Calorimeter'
46 }
47 
48 
49 def get_img_path(filename, path):
50  '''Returns image path for https://cmsweb.cern.ch/dqm histogram
51  visualisation service'''
52  run = int(re.findall('_R(\d*)__', filename)[0])
53  parts = [e.rstrip('.root') for e in filename.split('__')]
54  path = path.replace('Run summary/', '')
55  return 'archive/%s/%s/%s/%s/%s' % (run, parts[1], parts[2], parts[3], path)
56 
57 
58 def get_img_url(path, f1, f2=None, w=250, h=250):
59  '''Returns full URL of histogram (or histogram overlay) image for
60  https://cmsweb.cern.ch/dqm visualisation service.'''
61  base = 'https://cmsweb.cern.ch/dqm/relval/plotfairy'
62  if not f2:
63  return '%s/%s?w=%s;h=%s' % (base, get_img_path(f1, path), w, h)
64  return '%s/overlay?obj=%s;obj=%s;w=%s;h=%s' % (base,
65  get_img_path(f1, path), get_img_path(f2, path), w, h)
66 
67 
68 def get_dataset_name(name):
69  '''Returns extracted dataset name from the given ROOT filename.'''
70  if re.search('RelVal', name):
71  run = str(int(re.findall('_R(\d{9})_', name)[0]))
72  ds = re.findall('GR_R_\d*_V\d*C?_(?:RelVal)?_([\w\d]*-v\d+)_', name)[0]
73  else:
74  run, ds = re.findall('R(\d{9})__([\w\d]*)__CMSSW_', name)[0:1]
75  return '_'.join([ds, str(int(run))])
76 
77 
78 def get_release(name):
79  '''Returns extracted release from the given ROOT filename.'''
80  return re.findall('R\d{9}__([\w\d_-]*)__DQM.root', name)[0]
81 
82 
83 def get_stats(c, threshold, dir_ranges):
84  '''Returns ``successes``, ``fails``, ``nulls`` for the given dir_ranges.'''
85  successes, nulls, fails = 0, 0, 0
86  for from_id, till_id in dir_ranges:
87  c.execute('''SELECT count(*) FROM HistogramComparison
88  WHERE p_value >= 0 AND p_value > ? AND
89  id >= ? and id <= ?''', (threshold, from_id, till_id))
90  successes += c.fetchone()[0]
91  c.execute('''SELECT count(*) FROM HistogramComparison WHERE
92  p_value < 0 AND id >= ? AND id <= ?''', (from_id, till_id))
93  nulls += c.fetchone()[0]
94  c.execute('''SELECT count(*) FROM HistogramComparison
95  WHERE p_value >= 0 AND p_value <= ? AND
96  id >= ? AND id <= ?''', (threshold, from_id, till_id))
97  fails += c.fetchone()[0]
98  return successes, nulls, fails
99 
100 
101 def get_percentage(successes, nulls, fails):
102  '''Converts integers ``successes``, ``nulls`` and ``fails`` to percents.'''
103  if successes is None:
104  return None, None, None
105  total = successes + fails + nulls
106  if not total:
107  return None, None, None
108  success = round(100. * successes / total, 2)
109  null = round(100. * nulls / total, 2)
110  fail = round(100. * fails / total, 2)
111  return success, null, fail
112 
113 
114 def get_folders(c, file_id, filename, dir_id, threshold): # TODO: If folder [Egamma|JetMet] analyse their subdirs
115  '''Returns file folder stats for one "summary table" column.'''
116  ds_name = get_dataset_name(filename)
117  c.execute('''SELECT name, from_histogram_id, till_histogram_id FROM
118  Directory WHERE parent_id=?''', (dir_id,))
119  dirs = c.fetchall()
120  file_folders = dict()
121  total_successes, total_nulls, total_fails = 0, 0, 0
122  for name, from_id, till_id in dirs:
123  successes, nulls, fails = get_stats(c, threshold, ((from_id, till_id),))
124  total_successes += successes
125  total_nulls += nulls
126  total_fails += fails
127  if name in file_folders:
128  file_folders[name].append([file_id, ds_name, successes, nulls, fails])
129  else:
130  file_folders[name] = [file_id, ds_name, successes, nulls, fails]
131  return [('Summary', [file_id, ds_name, total_successes, total_nulls, total_fails])] + file_folders.items()
132 
133 
134 def join_ranges(ranges, elem):
135  '''To do less DB calls, joins [(from_id, till_id), ...] ranges.'''
136  if isinstance(ranges, tuple):
137  ranges = [ranges]
138  if ranges[-1][-1] + 1 == elem[0]:
139  ranges[-1] = (ranges[-1][0], elem[1])
140  else:
141  ranges.append(elem)
142  return ranges
143 
144 
146  '''Returns all ``ReleaseComparisons`` found on database.'''
147  c.execute('SELECT title, statistical_test FROM ReleaseComparison')
148  return c.fetchall()
149 
150 
151 def db_list_with_releases(path='.'):
152  '''Returns available database list and their releases.'''
153  db_list = [db for db in listdir(path) if db.endswith('.db')]
154  db_list_with_releases = []
155  for db in db_list:
156  conn = sqlite3.connect(join(path, db))
157  releases = get_release_list(conn.cursor())
158  db_list_with_releases.append((db[:-3], releases))
159  conn.close()
160  return db_list_with_releases
161 
162 # ------------------- Template Context generators --------------------
163 
164 def get_release_summary_stats(c, release_title, st_test, threshold=1e-5):
165  '''Returns context for ``release_summary.html`` template.'''
166  ## Summary
167  context = dict()
168  c.execute('''SELECT release1, release2, id FROM ReleaseComparison
169  WHERE title = ? AND statistical_test = ?''', (release_title, st_test))
170  context['release1'], context['release2'], release_comp_id = c.fetchone()
171 
172  # All directory ranges
173  c.execute('''SELECT from_histogram_id, till_histogram_id FROM Directory
174  WHERE id IN (SELECT directory_id FROM RootFileComparison
175  WHERE release_comparison_id = ?)''', (release_comp_id,))
176  dir_ranges = c.fetchall()
177 
178  if len(dir_ranges) > 1:
179  dir_ranges = reduce(join_ranges, dir_ranges)
180 
181  context['successes'], context['nulls'], context['fails'], = get_stats(c, threshold, dir_ranges)
182 
183  context['total'] = context['successes'] + context['fails'] + context['nulls']
184  if context['total']:
185  context['success'], context['null'], context['fail'] = \
186  get_percentage(context['successes'], context['nulls'], context['fails'])
187 
188  ## Data needed for the all the statistics:
189  c.execute('''SELECT id, filename1, directory_id FROM RootFileComparison
190  WHERE release_comparison_id = ?''', (release_comp_id,))
191  files = c.fetchall()
192 
193  ## folders: [(folder_name, [folder: (file_id, filename, success, null, fail)]), ...]
194  folders = dict()
195  for file_id, filename, dir_id in files:
196  # file_folders: [(folder_name, [(file_id, file_name, success, null, fail)]), ...]
197  file_folders = get_folders(c, file_id, filename, dir_id, threshold)
198  for folder_name, file_folder_stats in file_folders:
199  if folder_name in folders:
200  # Add folder stats
201  folders[folder_name].append(file_folder_stats)
202  # Update folder summary
203  folders[folder_name][0][2] += file_folder_stats[2]
204  folders[folder_name][0][3] += file_folder_stats[3]
205  folders[folder_name][0][4] += file_folder_stats[4]
206  else:
207  folder_summary = [None, 'Summary', file_folder_stats[2],
208  file_folder_stats[3], file_folder_stats[4]]
209  folders[folder_name] = [folder_summary, file_folder_stats]
210 
211  ## Calculate ratios
212  folders = [('Summary', folders.pop('Summary'))] + sorted(folders.items(), key=lambda x: x[0])
213  for folder, file_stats in folders:
214  # Insert N/A if histo is missing
215  if len(file_stats) != len(files)+1:
216  for i, file_ in enumerate(files):
217  if file_[0] != file_stats[i][0]:
218  file_stats = file_stats[:i] + [[None, "N/A", None, None, None]] + file_stats[i:]
219  # Count the ratios
220  for i, stats in enumerate(file_stats):
221  stats[2], stats[3], stats[4] = get_percentage(*stats[2:5])
222  context['folders'] = folders
223 
224 
225  ## Select Summary Barchart, Detailed Barchart
226  for folder in folders:
227  print folder
228  # detailed_ratios: (name, success_ratio)
229  # summary_ratios: (name, success_ratio)
230 
231 
232  ## Summary Barchart
233  # TODO: optimise not to fetch from DB again.
234  c.execute('''SELECT name, from_histogram_id, till_histogram_id FROM Directory
235  WHERE parent_id IN (SELECT directory_id FROM RootFileComparison
236  WHERE release_comparison_id = ?)''', (release_comp_id,))
237  lvl3_dir_ranges = c.fetchall()
238 
239  cum_lvl3_dir_ranges = dict()
240  for name, from_id, till_id in lvl3_dir_ranges:
241  if name in cum_lvl3_dir_ranges:
242  cum_lvl3_dir_ranges[name].append((from_id, till_id))
243  else:
244  cum_lvl3_dir_ranges[name] = [(from_id, till_id)]
245 
246  # Fetch stats
247  summary_stats = dict()
248  detailed_stats = dict()
249  for name, ranges in six.iteritems(cum_lvl3_dir_ranges):
250  successes, nulls, fails = get_stats(c, threshold, ranges)
251  if name in detailed_stats:
252  detailed_stats[name][0] += successes
253  detailed_stats[name][1] += nulls
254  detailed_stats[name][2] += fails
255  else:
256  detailed_stats[name] = [successes, nulls, fails]
257  if name in renaming:
258  if renaming[name] in summary_stats:
259  summary_stats[renaming[name]][0] += successes
260  summary_stats[renaming[name]][1] += nulls
261  summary_stats[renaming[name]][2] += fails
262  else:
263  summary_stats[renaming[name]] = [successes, nulls, fails]
264 
265  # Calculate ratio
266  summary_ratios = []
267  for name, stats in six.iteritems(summary_stats):
268  total = sum(stats)
269  if total:
270  ratio = float(stats[0]) / sum(stats)
271  summary_ratios.append((name, ratio))
272  detailed_ratios = []
273  for name, stats in six.iteritems(detailed_stats):
274  total = sum(stats)
275  if total:
276  ratio = float(stats[0]) / sum(stats)
277  detailed_ratios.append((name, ratio))
278 
279  context['summary_ratios'] = sorted(summary_ratios, key=lambda x: x[0])
280  context['detailed_ratios'] = sorted(detailed_ratios, key=lambda x: x[0])
281  return context
282 
283 
284 def get_directory_summary_stats(c, url_args, file_id, threshold):
285  '''Returns context for ``directory_summary.html`` template.'''
286  context = dict()
287  c.execute('''SELECT directory_id, filename1, filename2 FROM RootFileComparison
288  WHERE id = ?''', (file_id,))
289  dir_id, f1, f2 = c.fetchone()
290  context['release1'] = get_release(f1)
291  context['release2'] = get_release(f2)
292  if not url_args:
293  dir_name = get_dataset_name(f1)
294  else:
295  #### Select DQMData/Run directory.
296  directory_names = []
297 
298  for dir_name in url_args:
299  c.execute('''SELECT id, name FROM Directory WHERE name = ? AND
300  parent_id = ?''', (dir_name, dir_id))
301  dir_id, name = c.fetchone()
302  directory_names.append(name)
303  context['parent_name'] = '/'.join(directory_names)
304 
305  ## Select stats
306  c.execute('''SELECT from_histogram_id, till_histogram_id FROM
307  Directory WHERE id = ?''', (dir_id,))
308  ranges = c.fetchone()
309  successes, nulls, fails = get_stats(c, threshold, (ranges,))
310  success, null, fail = get_percentage(successes, nulls, fails)
311  context.update({
312  'successes': successes, 'nulls': nulls, 'fails': fails,
313  'success': success, 'null': null, 'fail': fail,
314  'total': successes + nulls + fails, 'dir_name': dir_name
315  })
316  # subdirs: name, total, success, fail, null
317  c.execute('''SELECT name, from_histogram_id, till_histogram_id FROM Directory
318  WHERE parent_id = ?''', (dir_id,))
319  subdirs = c.fetchall()
320  subdir_stats = []
321  for name, from_id, till_id in subdirs:
322  successes, nulls, fails = get_stats(c, threshold, [(from_id, till_id,)])
323  success, null, fail = get_percentage(successes, nulls, fails)
324  subdir_stats.append((name, successes + nulls + fails, successes,
325  nulls, fails, success, null, fail))
326  context['subdirs'] = sorted(subdir_stats, key=lambda x: x[4], reverse=True)
327 
328  # histograms: name, p_value
329  c.execute('''SELECT name, p_value FROM HistogramComparison
330  WHERE directory_id = ?''', (dir_id,))
331  failed_histos = []
332  successful_histos = []
333  null_histos = []
334  for name, p_value in c.fetchall():
335  path = quote('%s/%s' % ('/'.join(url_args), name))
336  url1 = get_img_url(path, f1)
337  url2 = get_img_url(path, f2)
338  overlay = get_img_url(path, f1, f2)
339  if p_value < 0:
340  null_histos.append((name, p_value, url1, url2, overlay))
341  elif p_value <= threshold:
342  failed_histos.append((name, p_value, url1, url2, overlay))
343  else:
344  successful_histos.append((name, p_value, url1, url2, overlay))
345 
346  context['failed_histos'] = sorted(failed_histos, key=lambda x: x[1], reverse=True)
347  context['null_histos'] = null_histos
348  context['successful_histos'] = sorted(successful_histos, key=lambda x: x[1], reverse=True)
349  return context
def get_percentage(successes, nulls, fails)
Definition: app_utils.py:101
def get_img_path(filename, path)
Definition: app_utils.py:49
def get_release_list(c)
Definition: app_utils.py:145
def get_folders(c, file_id, filename, dir_id, threshold)
Definition: app_utils.py:114
def db_list_with_releases(path='.')
Definition: app_utils.py:151
def get_dataset_name(name)
Definition: app_utils.py:68
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def get_release_summary_stats(c, release_title, st_test, threshold=1e-5)
Definition: app_utils.py:164
def get_directory_summary_stats(c, url_args, file_id, threshold)
Definition: app_utils.py:284
def get_img_url(path, f1, f2=None, w=250, h=250)
Definition: app_utils.py:58
def get_stats(c, threshold, dir_ranges)
Definition: app_utils.py:83
def join_ranges(ranges, elem)
Definition: app_utils.py:134
def get_release(name)
Definition: app_utils.py:78
#define str(s)