4 Helper functions for CherryPy application ``browse_db.py``. 6 Author: Albertas Gimbutas, Vilnius University (LT) 7 e-mail: albertasgim@gmail.com 12 from os
import getcwd, listdir
13 from os.path
import join
14 from urllib
import quote
15 from functools
import reduce
20 'MessageLogger':
'Miscellanea',
'FourVector':
'Generic',
21 'Castor':
'Castor Calorimeter',
'RPCDigisV':
'Resistive Plate Chambers',
22 'GlobalRecHitsV':
'Miscellanea: Sim.',
'Top':
'Top',
'HLTJETMET':
'JetMet',
23 'GlobalDigisV':
'Miscellanea: Sim.',
'L1TEMU':
'Level 1 Trigger',
24 'TrackerRecHitsV':
'Tracking System',
'MuonDTHitsV':
'Muon Objects',
25 'EcalDigisV':
'Ecal Calorimeter',
'EcalHitsV':
'Ecal Calorimeter',
26 'Muons':
'Muon Objects',
'DT':
'Drift Tubes',
'TrackerDigisV':
'Tracking System',
27 'Pixel':
'Tracking System',
'EcalPreshower':
'Ecal Calorimeter',
28 'EgammaV':
'Photons',
'AlCaEcalPi0':
'Alca',
'SusyExo':
'SusyExo',
29 'MuonDTDigisV':
'Muon Objects',
'TauRelVal':
'Tau',
30 'HcalHitsV':
'Hcal Calorimeter',
'RPC':
'Resistive Plate Chambers',
31 'EcalRecHitsV':
'Ecal Calorimeter',
'EgOffline':
'EGamma',
32 'MuonCSCDigisV':
'Muon Objects',
'ParticleFlow':
'Miscellanea',
33 'Info':
'Miscellanea',
'Tracking':
'Tracking',
34 'NoiseRatesV':
'Miscellanea: Sim.',
'Generator':
'Miscellanea: Sim.',
35 'Btag':
'B Tagging',
'Higgs':
'Higgs',
'GlobalHitsV':
'Miscellanea: Sim.',
36 'HcalRecHitsV':
'Hcal Calorimeter',
'TrackerHitsV':
'Tracking System',
37 'CSC':
'Cathode Strip Chambers',
'Muon,HLTMonMuon':
'Muon',
38 'Hcal':
'Hcal Calorimeter',
'TauOffline':
'Tau',
39 'HeavyFlavor':
'HeavyFlavor',
'JetMET':
'Jet',
'Physics':
'Miscellanea',
40 'CaloTowersV':
'Hcal Calorimeter',
'SiStrip':
'Tracking System',
41 'EcalClusterV':
'Ecal Calorimeter',
'HLTEgammaValidation':
'EGamma',
42 'EcalPhiSym':
'Alca',
'L1T':
'Level 1 Trigger',
'MixingV':
'Miscellanea: Sim.',
43 'FourVector_Val':
'Generic',
'EcalEndcap':
'Ecal Calorimeter',
44 'TauOnline':
'Tau',
'Egamma':
'Photons',
'HcalIsoTrack':
'Alca',
45 'EcalBarrel':
'Ecal Calorimeter' 50 '''Returns image path for https://cmsweb.cern.ch/dqm histogram 51 visualisation service''' 52 run =
int(re.findall(
'_R(\d*)__', filename)[0])
53 parts = [e.rstrip(
'.root')
for e
in filename.split(
'__')]
54 path = path.replace(
'Run summary/',
'')
55 return 'archive/%s/%s/%s/%s/%s' % (run, parts[1], parts[2], parts[3], path)
59 '''Returns full URL of histogram (or histogram overlay) image for 60 https://cmsweb.cern.ch/dqm visualisation service.''' 61 base =
'https://cmsweb.cern.ch/dqm/relval/plotfairy' 63 return '%s/%s?w=%s;h=%s' % (base,
get_img_path(f1, path), w, h)
64 return '%s/overlay?obj=%s;obj=%s;w=%s;h=%s' % (base,
69 '''Returns extracted dataset name from the given ROOT filename.''' 70 if re.search(
'RelVal', name):
71 run =
str(
int(re.findall(
'_R(\d{9})_', name)[0]))
72 ds = re.findall(
'GR_R_\d*_V\d*C?_(?:RelVal)?_([\w\d]*-v\d+)_', name)[0]
74 run, ds = re.findall(
'R(\d{9})__([\w\d]*)__CMSSW_', name)[0:1]
79 '''Returns extracted release from the given ROOT filename.''' 80 return re.findall(
'R\d{9}__([\w\d_-]*)__DQM.root', name)[0]
84 '''Returns ``successes``, ``fails``, ``nulls`` for the given dir_ranges.''' 85 successes, nulls, fails = 0, 0, 0
86 for from_id, till_id
in dir_ranges:
87 c.execute(
'''SELECT count(*) FROM HistogramComparison 88 WHERE p_value >= 0 AND p_value > ? AND 89 id >= ? and id <= ?''', (threshold, from_id, till_id))
90 successes += c.fetchone()[0]
91 c.execute(
'''SELECT count(*) FROM HistogramComparison WHERE 92 p_value < 0 AND id >= ? AND id <= ?''', (from_id, till_id))
93 nulls += c.fetchone()[0]
94 c.execute(
'''SELECT count(*) FROM HistogramComparison 95 WHERE p_value >= 0 AND p_value <= ? AND 96 id >= ? AND id <= ?''', (threshold, from_id, till_id))
97 fails += c.fetchone()[0]
98 return successes, nulls, fails
102 '''Converts integers ``successes``, ``nulls`` and ``fails`` to percents.''' 103 if successes
is None:
104 return None,
None,
None 105 total = successes + fails + nulls
107 return None,
None,
None 108 success = round(100. * successes / total, 2)
109 null = round(100. * nulls / total, 2)
110 fail = round(100. * fails / total, 2)
111 return success, null, fail
115 '''Returns file folder stats for one "summary table" column.''' 117 c.execute(
'''SELECT name, from_histogram_id, till_histogram_id FROM 118 Directory WHERE parent_id=?''', (dir_id,))
120 file_folders =
dict()
121 total_successes, total_nulls, total_fails = 0, 0, 0
122 for name, from_id, till_id
in dirs:
123 successes, nulls, fails =
get_stats(c, threshold, ((from_id, till_id),))
124 total_successes += successes
127 if name
in file_folders:
128 file_folders[name].
append([file_id, ds_name, successes, nulls, fails])
130 file_folders[name] = [file_id, ds_name, successes, nulls, fails]
131 return [(
'Summary', [file_id, ds_name, total_successes, total_nulls, total_fails])] + file_folders.items()
135 '''To do less DB calls, joins [(from_id, till_id), ...] ranges.''' 136 if isinstance(ranges, tuple):
138 if ranges[-1][-1] + 1 == elem[0]:
139 ranges[-1] = (ranges[-1][0], elem[1])
146 '''Returns all ``ReleaseComparisons`` found on database.''' 147 c.execute(
'SELECT title, statistical_test FROM ReleaseComparison')
152 '''Returns available database list and their releases.''' 153 db_list = [db
for db
in listdir(path)
if db.endswith(
'.db')]
154 db_list_with_releases = []
156 conn = sqlite3.connect(
join(path, db))
158 db_list_with_releases.append((db[:-3], releases))
160 return db_list_with_releases
165 '''Returns context for ``release_summary.html`` template.''' 168 c.execute(
'''SELECT release1, release2, id FROM ReleaseComparison 169 WHERE title = ? AND statistical_test = ?''', (release_title, st_test))
170 context[
'release1'], context[
'release2'], release_comp_id = c.fetchone()
173 c.execute(
'''SELECT from_histogram_id, till_histogram_id FROM Directory 174 WHERE id IN (SELECT directory_id FROM RootFileComparison 175 WHERE release_comparison_id = ?)''', (release_comp_id,))
176 dir_ranges = c.fetchall()
178 if len(dir_ranges) > 1:
179 dir_ranges = reduce(join_ranges, dir_ranges)
181 context[
'successes'], context[
'nulls'], context[
'fails'], =
get_stats(c, threshold, dir_ranges)
183 context[
'total'] = context[
'successes'] + context[
'fails'] + context[
'nulls']
185 context[
'success'], context[
'null'], context[
'fail'] = \
186 get_percentage(context[
'successes'], context[
'nulls'], context[
'fails'])
189 c.execute(
'''SELECT id, filename1, directory_id FROM RootFileComparison 190 WHERE release_comparison_id = ?''', (release_comp_id,))
195 for file_id, filename, dir_id
in files:
197 file_folders =
get_folders(c, file_id, filename, dir_id, threshold)
198 for folder_name, file_folder_stats
in file_folders:
199 if folder_name
in folders:
201 folders[folder_name].
append(file_folder_stats)
203 folders[folder_name][0][2] += file_folder_stats[2]
204 folders[folder_name][0][3] += file_folder_stats[3]
205 folders[folder_name][0][4] += file_folder_stats[4]
207 folder_summary = [
None,
'Summary', file_folder_stats[2],
208 file_folder_stats[3], file_folder_stats[4]]
209 folders[folder_name] = [folder_summary, file_folder_stats]
212 folders = [(
'Summary', folders.pop(
'Summary'))] + sorted(folders.items(), key=
lambda x: x[0])
213 for folder, file_stats
in folders:
215 if len(file_stats) != len(files)+1:
216 for i, file_
in enumerate(files):
217 if file_[0] != file_stats[i][0]:
218 file_stats = file_stats[:i] + [[
None,
"N/A",
None,
None,
None]] + file_stats[i:]
220 for i, stats
in enumerate(file_stats):
222 context[
'folders'] = folders
226 for folder
in folders:
234 c.execute(
'''SELECT name, from_histogram_id, till_histogram_id FROM Directory 235 WHERE parent_id IN (SELECT directory_id FROM RootFileComparison 236 WHERE release_comparison_id = ?)''', (release_comp_id,))
237 lvl3_dir_ranges = c.fetchall()
239 cum_lvl3_dir_ranges =
dict()
240 for name, from_id, till_id
in lvl3_dir_ranges:
241 if name
in cum_lvl3_dir_ranges:
242 cum_lvl3_dir_ranges[name].
append((from_id, till_id))
244 cum_lvl3_dir_ranges[name] = [(from_id, till_id)]
247 summary_stats =
dict()
248 detailed_stats =
dict()
249 for name, ranges
in six.iteritems(cum_lvl3_dir_ranges):
250 successes, nulls, fails =
get_stats(c, threshold, ranges)
251 if name
in detailed_stats:
252 detailed_stats[name][0] += successes
253 detailed_stats[name][1] += nulls
254 detailed_stats[name][2] += fails
256 detailed_stats[name] = [successes, nulls, fails]
258 if renaming[name]
in summary_stats:
259 summary_stats[renaming[name]][0] += successes
260 summary_stats[renaming[name]][1] += nulls
261 summary_stats[renaming[name]][2] += fails
263 summary_stats[renaming[name]] = [successes, nulls, fails]
267 for name, stats
in six.iteritems(summary_stats):
270 ratio =
float(stats[0]) / sum(stats)
271 summary_ratios.append((name, ratio))
273 for name, stats
in six.iteritems(detailed_stats):
276 ratio =
float(stats[0]) / sum(stats)
277 detailed_ratios.append((name, ratio))
279 context[
'summary_ratios'] = sorted(summary_ratios, key=
lambda x: x[0])
280 context[
'detailed_ratios'] = sorted(detailed_ratios, key=
lambda x: x[0])
285 '''Returns context for ``directory_summary.html`` template.''' 287 c.execute(
'''SELECT directory_id, filename1, filename2 FROM RootFileComparison 288 WHERE id = ?''', (file_id,))
289 dir_id, f1, f2 = c.fetchone()
298 for dir_name
in url_args:
299 c.execute(
'''SELECT id, name FROM Directory WHERE name = ? AND 300 parent_id = ?''', (dir_name, dir_id))
301 dir_id, name = c.fetchone()
302 directory_names.append(name)
303 context[
'parent_name'] =
'/'.
join(directory_names)
306 c.execute(
'''SELECT from_histogram_id, till_histogram_id FROM 307 Directory WHERE id = ?''', (dir_id,))
308 ranges = c.fetchone()
309 successes, nulls, fails =
get_stats(c, threshold, (ranges,))
312 'successes': successes,
'nulls': nulls,
'fails': fails,
313 'success': success,
'null': null,
'fail': fail,
314 'total': successes + nulls + fails,
'dir_name': dir_name
317 c.execute(
'''SELECT name, from_histogram_id, till_histogram_id FROM Directory 318 WHERE parent_id = ?''', (dir_id,))
319 subdirs = c.fetchall()
321 for name, from_id, till_id
in subdirs:
322 successes, nulls, fails =
get_stats(c, threshold, [(from_id, till_id,)])
324 subdir_stats.append((name, successes + nulls + fails, successes,
325 nulls, fails, success, null, fail))
326 context[
'subdirs'] = sorted(subdir_stats, key=
lambda x: x[4], reverse=
True)
329 c.execute(
'''SELECT name, p_value FROM HistogramComparison 330 WHERE directory_id = ?''', (dir_id,))
332 successful_histos = []
334 for name, p_value
in c.fetchall():
335 path = quote(
'%s/%s' % (
'/'.
join(url_args), name))
340 null_histos.append((name, p_value, url1, url2, overlay))
341 elif p_value <= threshold:
342 failed_histos.append((name, p_value, url1, url2, overlay))
344 successful_histos.append((name, p_value, url1, url2, overlay))
346 context[
'failed_histos'] = sorted(failed_histos, key=
lambda x: x[1], reverse=
True)
347 context[
'null_histos'] = null_histos
348 context[
'successful_histos'] = sorted(successful_histos, key=
lambda x: x[1], reverse=
True)
def get_percentage(successes, nulls, fails)
def get_img_path(filename, path)
def get_folders(c, file_id, filename, dir_id, threshold)
def db_list_with_releases(path='.')
def get_dataset_name(name)
static std::string join(char **cmd)
def get_release_summary_stats(c, release_title, st_test, threshold=1e-5)
def get_directory_summary_stats(c, url_args, file_id, threshold)
def get_img_url(path, f1, f2=None, w=250, h=250)
def get_stats(c, threshold, dir_ranges)
def join_ranges(ranges, elem)