4 Helper functions for CherryPy application ``browse_db.py``. 6 Author: Albertas Gimbutas, Vilnius University (LT) 7 e-mail: albertasgim@gmail.com 9 from __future__
import print_function
13 from os
import getcwd, listdir
14 from os.path
import join
15 from urllib
import quote
16 from functools
import reduce
21 'MessageLogger':
'Miscellanea',
'FourVector':
'Generic',
22 'Castor':
'Castor Calorimeter',
'RPCDigisV':
'Resistive Plate Chambers',
23 'GlobalRecHitsV':
'Miscellanea: Sim.',
'Top':
'Top',
'HLTJETMET':
'JetMet',
24 'GlobalDigisV':
'Miscellanea: Sim.',
'L1TEMU':
'Level 1 Trigger',
25 'TrackerRecHitsV':
'Tracking System',
'MuonDTHitsV':
'Muon Objects',
26 'EcalDigisV':
'Ecal Calorimeter',
'EcalHitsV':
'Ecal Calorimeter',
27 'Muons':
'Muon Objects',
'DT':
'Drift Tubes',
'TrackerDigisV':
'Tracking System',
28 'Pixel':
'Tracking System',
'EcalPreshower':
'Ecal Calorimeter',
29 'EgammaV':
'Photons',
'AlCaEcalPi0':
'Alca',
'SusyExo':
'SusyExo',
30 'MuonDTDigisV':
'Muon Objects',
'TauRelVal':
'Tau',
31 'HcalHitsV':
'Hcal Calorimeter',
'RPC':
'Resistive Plate Chambers',
32 'EcalRecHitsV':
'Ecal Calorimeter',
'EgOffline':
'EGamma',
33 'MuonCSCDigisV':
'Muon Objects',
'ParticleFlow':
'Miscellanea',
34 'Info':
'Miscellanea',
'Tracking':
'Tracking',
35 'NoiseRatesV':
'Miscellanea: Sim.',
'Generator':
'Miscellanea: Sim.',
36 'Btag':
'B Tagging',
'Higgs':
'Higgs',
'GlobalHitsV':
'Miscellanea: Sim.',
37 'HcalRecHitsV':
'Hcal Calorimeter',
'TrackerHitsV':
'Tracking System',
38 'CSC':
'Cathode Strip Chambers',
'Muon,HLTMonMuon':
'Muon',
39 'Hcal':
'Hcal Calorimeter',
'TauOffline':
'Tau',
40 'HeavyFlavor':
'HeavyFlavor',
'JetMET':
'Jet',
'Physics':
'Miscellanea',
41 'CaloTowersV':
'Hcal Calorimeter',
'SiStrip':
'Tracking System',
42 'EcalClusterV':
'Ecal Calorimeter',
'HLTEgammaValidation':
'EGamma',
43 'EcalPhiSym':
'Alca',
'L1T':
'Level 1 Trigger',
'MixingV':
'Miscellanea: Sim.',
44 'FourVector_Val':
'Generic',
'EcalEndcap':
'Ecal Calorimeter',
45 'TauOnline':
'Tau',
'Egamma':
'Photons',
'HcalIsoTrack':
'Alca',
46 'EcalBarrel':
'Ecal Calorimeter' 51 '''Returns image path for https://cmsweb.cern.ch/dqm histogram 52 visualisation service''' 53 run =
int(re.findall(
'_R(\d*)__', filename)[0])
54 parts = [e.rstrip(
'.root')
for e
in filename.split(
'__')]
55 path = path.replace(
'Run summary/',
'')
56 return 'archive/%s/%s/%s/%s/%s' % (run, parts[1], parts[2], parts[3], path)
60 '''Returns full URL of histogram (or histogram overlay) image for 61 https://cmsweb.cern.ch/dqm visualisation service.''' 62 base =
'https://cmsweb.cern.ch/dqm/relval/plotfairy' 64 return '%s/%s?w=%s;h=%s' % (base,
get_img_path(f1, path), w, h)
65 return '%s/overlay?obj=%s;obj=%s;w=%s;h=%s' % (base,
70 '''Returns extracted dataset name from the given ROOT filename.''' 71 if re.search(
'RelVal', name):
72 run =
str(
int(re.findall(
'_R(\d{9})_', name)[0]))
73 ds = re.findall(
'GR_R_\d*_V\d*C?_(?:RelVal)?_([\w\d]*-v\d+)_', name)[0]
75 run, ds = re.findall(
'R(\d{9})__([\w\d]*)__CMSSW_', name)[0:1]
80 '''Returns extracted release from the given ROOT filename.''' 81 return re.findall(
'R\d{9}__([\w\d_-]*)__DQM.root', name)[0]
85 '''Returns ``successes``, ``fails``, ``nulls`` for the given dir_ranges.''' 86 successes, nulls, fails = 0, 0, 0
87 for from_id, till_id
in dir_ranges:
88 c.execute(
'''SELECT count(*) FROM HistogramComparison 89 WHERE p_value >= 0 AND p_value > ? AND 90 id >= ? and id <= ?''', (threshold, from_id, till_id))
91 successes += c.fetchone()[0]
92 c.execute(
'''SELECT count(*) FROM HistogramComparison WHERE 93 p_value < 0 AND id >= ? AND id <= ?''', (from_id, till_id))
94 nulls += c.fetchone()[0]
95 c.execute(
'''SELECT count(*) FROM HistogramComparison 96 WHERE p_value >= 0 AND p_value <= ? AND 97 id >= ? AND id <= ?''', (threshold, from_id, till_id))
98 fails += c.fetchone()[0]
99 return successes, nulls, fails
103 '''Converts integers ``successes``, ``nulls`` and ``fails`` to percents.''' 104 if successes
is None:
105 return None,
None,
None 106 total = successes + fails + nulls
108 return None,
None,
None 109 success = round(100. * successes / total, 2)
110 null = round(100. * nulls / total, 2)
111 fail = round(100. * fails / total, 2)
112 return success, null, fail
116 '''Returns file folder stats for one "summary table" column.''' 118 c.execute(
'''SELECT name, from_histogram_id, till_histogram_id FROM 119 Directory WHERE parent_id=?''', (dir_id,))
121 file_folders =
dict()
122 total_successes, total_nulls, total_fails = 0, 0, 0
123 for name, from_id, till_id
in dirs:
124 successes, nulls, fails =
get_stats(c, threshold, ((from_id, till_id),))
125 total_successes += successes
128 if name
in file_folders:
129 file_folders[name].
append([file_id, ds_name, successes, nulls, fails])
131 file_folders[name] = [file_id, ds_name, successes, nulls, fails]
132 return [(
'Summary', [file_id, ds_name, total_successes, total_nulls, total_fails])] + file_folders.items()
136 '''To do less DB calls, joins [(from_id, till_id), ...] ranges.''' 137 if isinstance(ranges, tuple):
139 if ranges[-1][-1] + 1 == elem[0]:
140 ranges[-1] = (ranges[-1][0], elem[1])
147 '''Returns all ``ReleaseComparisons`` found on database.''' 148 c.execute(
'SELECT title, statistical_test FROM ReleaseComparison')
153 '''Returns available database list and their releases.''' 154 db_list = [db
for db
in listdir(path)
if db.endswith(
'.db')]
155 db_list_with_releases = []
157 conn = sqlite3.connect(
join(path, db))
159 db_list_with_releases.append((db[:-3], releases))
161 return db_list_with_releases
166 '''Returns context for ``release_summary.html`` template.''' 169 c.execute(
'''SELECT release1, release2, id FROM ReleaseComparison 170 WHERE title = ? AND statistical_test = ?''', (release_title, st_test))
171 context[
'release1'], context[
'release2'], release_comp_id = c.fetchone()
174 c.execute(
'''SELECT from_histogram_id, till_histogram_id FROM Directory 175 WHERE id IN (SELECT directory_id FROM RootFileComparison 176 WHERE release_comparison_id = ?)''', (release_comp_id,))
177 dir_ranges = c.fetchall()
179 if len(dir_ranges) > 1:
180 dir_ranges = reduce(join_ranges, dir_ranges)
182 context[
'successes'], context[
'nulls'], context[
'fails'], =
get_stats(c, threshold, dir_ranges)
184 context[
'total'] = context[
'successes'] + context[
'fails'] + context[
'nulls']
186 context[
'success'], context[
'null'], context[
'fail'] = \
187 get_percentage(context[
'successes'], context[
'nulls'], context[
'fails'])
190 c.execute(
'''SELECT id, filename1, directory_id FROM RootFileComparison 191 WHERE release_comparison_id = ?''', (release_comp_id,))
196 for file_id, filename, dir_id
in files:
198 file_folders =
get_folders(c, file_id, filename, dir_id, threshold)
199 for folder_name, file_folder_stats
in file_folders:
200 if folder_name
in folders:
202 folders[folder_name].
append(file_folder_stats)
204 folders[folder_name][0][2] += file_folder_stats[2]
205 folders[folder_name][0][3] += file_folder_stats[3]
206 folders[folder_name][0][4] += file_folder_stats[4]
208 folder_summary = [
None,
'Summary', file_folder_stats[2],
209 file_folder_stats[3], file_folder_stats[4]]
210 folders[folder_name] = [folder_summary, file_folder_stats]
213 folders = [(
'Summary', folders.pop(
'Summary'))] + sorted(folders.items(), key=
lambda x: x[0])
214 for folder, file_stats
in folders:
216 if len(file_stats) != len(files)+1:
217 for i, file_
in enumerate(files):
218 if file_[0] != file_stats[i][0]:
219 file_stats = file_stats[:i] + [[
None,
"N/A",
None,
None,
None]] + file_stats[i:]
221 for i, stats
in enumerate(file_stats):
223 context[
'folders'] = folders
227 for folder
in folders:
235 c.execute(
'''SELECT name, from_histogram_id, till_histogram_id FROM Directory 236 WHERE parent_id IN (SELECT directory_id FROM RootFileComparison 237 WHERE release_comparison_id = ?)''', (release_comp_id,))
238 lvl3_dir_ranges = c.fetchall()
240 cum_lvl3_dir_ranges =
dict()
241 for name, from_id, till_id
in lvl3_dir_ranges:
242 if name
in cum_lvl3_dir_ranges:
243 cum_lvl3_dir_ranges[name].
append((from_id, till_id))
245 cum_lvl3_dir_ranges[name] = [(from_id, till_id)]
248 summary_stats =
dict()
249 detailed_stats =
dict()
250 for name, ranges
in six.iteritems(cum_lvl3_dir_ranges):
251 successes, nulls, fails =
get_stats(c, threshold, ranges)
252 if name
in detailed_stats:
253 detailed_stats[name][0] += successes
254 detailed_stats[name][1] += nulls
255 detailed_stats[name][2] += fails
257 detailed_stats[name] = [successes, nulls, fails]
259 if renaming[name]
in summary_stats:
260 summary_stats[renaming[name]][0] += successes
261 summary_stats[renaming[name]][1] += nulls
262 summary_stats[renaming[name]][2] += fails
264 summary_stats[renaming[name]] = [successes, nulls, fails]
268 for name, stats
in six.iteritems(summary_stats):
271 ratio =
float(stats[0]) / sum(stats)
272 summary_ratios.append((name, ratio))
274 for name, stats
in six.iteritems(detailed_stats):
277 ratio =
float(stats[0]) / sum(stats)
278 detailed_ratios.append((name, ratio))
280 context[
'summary_ratios'] = sorted(summary_ratios, key=
lambda x: x[0])
281 context[
'detailed_ratios'] = sorted(detailed_ratios, key=
lambda x: x[0])
286 '''Returns context for ``directory_summary.html`` template.''' 288 c.execute(
'''SELECT directory_id, filename1, filename2 FROM RootFileComparison 289 WHERE id = ?''', (file_id,))
290 dir_id, f1, f2 = c.fetchone()
299 for dir_name
in url_args:
300 c.execute(
'''SELECT id, name FROM Directory WHERE name = ? AND 301 parent_id = ?''', (dir_name, dir_id))
302 dir_id, name = c.fetchone()
303 directory_names.append(name)
304 context[
'parent_name'] =
'/'.
join(directory_names)
307 c.execute(
'''SELECT from_histogram_id, till_histogram_id FROM 308 Directory WHERE id = ?''', (dir_id,))
309 ranges = c.fetchone()
310 successes, nulls, fails =
get_stats(c, threshold, (ranges,))
313 'successes': successes,
'nulls': nulls,
'fails': fails,
314 'success': success,
'null': null,
'fail': fail,
315 'total': successes + nulls + fails,
'dir_name': dir_name
318 c.execute(
'''SELECT name, from_histogram_id, till_histogram_id FROM Directory 319 WHERE parent_id = ?''', (dir_id,))
320 subdirs = c.fetchall()
322 for name, from_id, till_id
in subdirs:
323 successes, nulls, fails =
get_stats(c, threshold, [(from_id, till_id,)])
325 subdir_stats.append((name, successes + nulls + fails, successes,
326 nulls, fails, success, null, fail))
327 context[
'subdirs'] = sorted(subdir_stats, key=
lambda x: x[4], reverse=
True)
330 c.execute(
'''SELECT name, p_value FROM HistogramComparison 331 WHERE directory_id = ?''', (dir_id,))
333 successful_histos = []
335 for name, p_value
in c.fetchall():
336 path = quote(
'%s/%s' % (
'/'.
join(url_args), name))
341 null_histos.append((name, p_value, url1, url2, overlay))
342 elif p_value <= threshold:
343 failed_histos.append((name, p_value, url1, url2, overlay))
345 successful_histos.append((name, p_value, url1, url2, overlay))
347 context[
'failed_histos'] = sorted(failed_histos, key=
lambda x: x[1], reverse=
True)
348 context[
'null_histos'] = null_histos
349 context[
'successful_histos'] = sorted(successful_histos, key=
lambda x: x[1], reverse=
True)
S & print(S &os, JobReport::InputFile const &f)
def get_percentage(successes, nulls, fails)
def get_img_path(filename, path)
def get_folders(c, file_id, filename, dir_id, threshold)
def db_list_with_releases(path='.')
def get_dataset_name(name)
static std::string join(char **cmd)
def get_release_summary_stats(c, release_title, st_test, threshold=1e-5)
def get_directory_summary_stats(c, url_args, file_id, threshold)
def get_img_url(path, f1, f2=None, w=250, h=250)
def get_stats(c, threshold, dir_ranges)
def join_ranges(ranges, elem)