4 Helper functions for CherryPy application ``browse_db.py``. 6 Author: Albertas Gimbutas, Vilnius University (LT) 7 e-mail: albertasgim@gmail.com 12 from os
import getcwd, listdir
13 from os.path
import join
14 from urllib
import quote
15 from functools
import reduce
19 'MessageLogger':
'Miscellanea',
'FourVector':
'Generic',
20 'Castor':
'Castor Calorimeter',
'RPCDigisV':
'Resistive Plate Chambers',
21 'GlobalRecHitsV':
'Miscellanea: Sim.',
'Top':
'Top',
'HLTJETMET':
'JetMet',
22 'GlobalDigisV':
'Miscellanea: Sim.',
'L1TEMU':
'Level 1 Trigger',
23 'TrackerRecHitsV':
'Tracking System',
'MuonDTHitsV':
'Muon Objects',
24 'EcalDigisV':
'Ecal Calorimeter',
'EcalHitsV':
'Ecal Calorimeter',
25 'Muons':
'Muon Objects',
'DT':
'Drift Tubes',
'TrackerDigisV':
'Tracking System',
26 'Pixel':
'Tracking System',
'EcalPreshower':
'Ecal Calorimeter',
27 'EgammaV':
'Photons',
'AlCaEcalPi0':
'Alca',
'SusyExo':
'SusyExo',
28 'MuonDTDigisV':
'Muon Objects',
'TauRelVal':
'Tau',
29 'HcalHitsV':
'Hcal Calorimeter',
'RPC':
'Resistive Plate Chambers',
30 'EcalRecHitsV':
'Ecal Calorimeter',
'EgOffline':
'EGamma',
31 'MuonCSCDigisV':
'Muon Objects',
'ParticleFlow':
'Miscellanea',
32 'Info':
'Miscellanea',
'Tracking':
'Tracking',
33 'NoiseRatesV':
'Miscellanea: Sim.',
'Generator':
'Miscellanea: Sim.',
34 'Btag':
'B Tagging',
'Higgs':
'Higgs',
'GlobalHitsV':
'Miscellanea: Sim.',
35 'HcalRecHitsV':
'Hcal Calorimeter',
'TrackerHitsV':
'Tracking System',
36 'CSC':
'Cathode Strip Chambers',
'Muon,HLTMonMuon':
'Muon',
37 'Hcal':
'Hcal Calorimeter',
'TauOffline':
'Tau',
38 'HeavyFlavor':
'HeavyFlavor',
'JetMET':
'Jet',
'Physics':
'Miscellanea',
39 'CaloTowersV':
'Hcal Calorimeter',
'SiStrip':
'Tracking System',
40 'EcalClusterV':
'Ecal Calorimeter',
'HLTEgammaValidation':
'EGamma',
41 'EcalPhiSym':
'Alca',
'L1T':
'Level 1 Trigger',
'MixingV':
'Miscellanea: Sim.',
42 'FourVector_Val':
'Generic',
'EcalEndcap':
'Ecal Calorimeter',
43 'TauOnline':
'Tau',
'Egamma':
'Photons',
'HcalIsoTrack':
'Alca',
44 'EcalBarrel':
'Ecal Calorimeter' 49 '''Returns image path for https://cmsweb.cern.ch/dqm histogram 50 visualisation service''' 51 run =
int(re.findall(
'_R(\d*)__', filename)[0])
52 parts = [e.rstrip(
'.root')
for e
in filename.split(
'__')]
53 path = path.replace(
'Run summary/',
'')
54 return 'archive/%s/%s/%s/%s/%s' % (run, parts[1], parts[2], parts[3], path)
58 '''Returns full URL of histogram (or histogram overlay) image for 59 https://cmsweb.cern.ch/dqm visualisation service.''' 60 base =
'https://cmsweb.cern.ch/dqm/relval/plotfairy' 62 return '%s/%s?w=%s;h=%s' % (base,
get_img_path(f1, path), w, h)
63 return '%s/overlay?obj=%s;obj=%s;w=%s;h=%s' % (base,
68 '''Returns extracted dataset name from the given ROOT filename.''' 69 if re.search(
'RelVal', name):
70 run =
str(
int(re.findall(
'_R(\d{9})_', name)[0]))
71 ds = re.findall(
'GR_R_\d*_V\d*C?_(?:RelVal)?_([\w\d]*-v\d+)_', name)[0]
73 run, ds = re.findall(
'R(\d{9})__([\w\d]*)__CMSSW_', name)[0:1]
78 '''Returns extracted release from the given ROOT filename.''' 79 return re.findall(
'R\d{9}__([\w\d_-]*)__DQM.root', name)[0]
83 '''Returns ``successes``, ``fails``, ``nulls`` for the given dir_ranges.''' 84 successes, nulls, fails = 0, 0, 0
85 for from_id, till_id
in dir_ranges:
86 c.execute(
'''SELECT count(*) FROM HistogramComparison 87 WHERE p_value >= 0 AND p_value > ? AND 88 id >= ? and id <= ?''', (threshold, from_id, till_id))
89 successes += c.fetchone()[0]
90 c.execute(
'''SELECT count(*) FROM HistogramComparison WHERE 91 p_value < 0 AND id >= ? AND id <= ?''', (from_id, till_id))
92 nulls += c.fetchone()[0]
93 c.execute(
'''SELECT count(*) FROM HistogramComparison 94 WHERE p_value >= 0 AND p_value <= ? AND 95 id >= ? AND id <= ?''', (threshold, from_id, till_id))
96 fails += c.fetchone()[0]
97 return successes, nulls, fails
101 '''Converts integers ``successes``, ``nulls`` and ``fails`` to percents.''' 102 if successes
is None:
103 return None,
None,
None 104 total = successes + fails + nulls
106 return None,
None,
None 107 success = round(100. * successes / total, 2)
108 null = round(100. * nulls / total, 2)
109 fail = round(100. * fails / total, 2)
110 return success, null, fail
114 '''Returns file folder stats for one "summary table" column.''' 116 c.execute(
'''SELECT name, from_histogram_id, till_histogram_id FROM 117 Directory WHERE parent_id=?''', (dir_id,))
119 file_folders =
dict()
120 total_successes, total_nulls, total_fails = 0, 0, 0
121 for name, from_id, till_id
in dirs:
122 successes, nulls, fails =
get_stats(c, threshold, ((from_id, till_id),))
123 total_successes += successes
126 if name
in file_folders:
127 file_folders[name].
append([file_id, ds_name, successes, nulls, fails])
129 file_folders[name] = [file_id, ds_name, successes, nulls, fails]
130 return [(
'Summary', [file_id, ds_name, total_successes, total_nulls, total_fails])] + file_folders.items()
134 '''To do less DB calls, joins [(from_id, till_id), ...] ranges.''' 135 if type(ranges) == tuple:
137 if ranges[-1][-1] + 1 == elem[0]:
138 ranges[-1] = (ranges[-1][0], elem[1])
145 '''Returns all ``ReleaseComparisons`` found on database.''' 146 c.execute(
'SELECT title, statistical_test FROM ReleaseComparison')
151 '''Returns available database list and their releases.''' 152 db_list = [db
for db
in listdir(path)
if db.endswith(
'.db')]
153 db_list_with_releases = []
155 conn = sqlite3.connect(
join(path, db))
157 db_list_with_releases.append((db[:-3], releases))
159 return db_list_with_releases
164 '''Returns context for ``release_summary.html`` template.''' 167 c.execute(
'''SELECT release1, release2, id FROM ReleaseComparison 168 WHERE title = ? AND statistical_test = ?''', (release_title, st_test))
169 context[
'release1'], context[
'release2'], release_comp_id = c.fetchone()
172 c.execute(
'''SELECT from_histogram_id, till_histogram_id FROM Directory 173 WHERE id IN (SELECT directory_id FROM RootFileComparison 174 WHERE release_comparison_id = ?)''', (release_comp_id,))
175 dir_ranges = c.fetchall()
177 if len(dir_ranges) > 1:
178 dir_ranges = reduce(join_ranges, dir_ranges)
180 context[
'successes'], context[
'nulls'], context[
'fails'], =
get_stats(c, threshold, dir_ranges)
182 context[
'total'] = context[
'successes'] + context[
'fails'] + context[
'nulls']
184 context[
'success'], context[
'null'], context[
'fail'] = \
185 get_percentage(context[
'successes'], context[
'nulls'], context[
'fails'])
188 c.execute(
'''SELECT id, filename1, directory_id FROM RootFileComparison 189 WHERE release_comparison_id = ?''', (release_comp_id,))
194 for file_id, filename, dir_id
in files:
196 file_folders =
get_folders(c, file_id, filename, dir_id, threshold)
197 for folder_name, file_folder_stats
in file_folders:
198 if folder_name
in folders:
200 folders[folder_name].
append(file_folder_stats)
202 folders[folder_name][0][2] += file_folder_stats[2]
203 folders[folder_name][0][3] += file_folder_stats[3]
204 folders[folder_name][0][4] += file_folder_stats[4]
206 folder_summary = [
None,
'Summary', file_folder_stats[2],
207 file_folder_stats[3], file_folder_stats[4]]
208 folders[folder_name] = [folder_summary, file_folder_stats]
211 folders = [(
'Summary', folders.pop(
'Summary'))] + sorted(folders.items(), key=
lambda x: x[0])
212 for folder, file_stats
in folders:
214 if len(file_stats) != len(files)+1:
215 for i, file_
in enumerate(files):
216 if file_[0] != file_stats[i][0]:
217 file_stats = file_stats[:i] + [[
None,
"N/A",
None,
None,
None]] + file_stats[i:]
219 for i, stats
in enumerate(file_stats):
221 context[
'folders'] = folders
225 for folder
in folders:
233 c.execute(
'''SELECT name, from_histogram_id, till_histogram_id FROM Directory 234 WHERE parent_id IN (SELECT directory_id FROM RootFileComparison 235 WHERE release_comparison_id = ?)''', (release_comp_id,))
236 lvl3_dir_ranges = c.fetchall()
238 cum_lvl3_dir_ranges =
dict()
239 for name, from_id, till_id
in lvl3_dir_ranges:
240 if name
in cum_lvl3_dir_ranges:
241 cum_lvl3_dir_ranges[name].
append((from_id, till_id))
243 cum_lvl3_dir_ranges[name] = [(from_id, till_id)]
246 summary_stats =
dict()
247 detailed_stats =
dict()
248 for name, ranges
in cum_lvl3_dir_ranges.iteritems():
249 successes, nulls, fails =
get_stats(c, threshold, ranges)
250 if name
in detailed_stats:
251 detailed_stats[name][0] += successes
252 detailed_stats[name][1] += nulls
253 detailed_stats[name][2] += fails
255 detailed_stats[name] = [successes, nulls, fails]
257 if renaming[name]
in summary_stats:
258 summary_stats[renaming[name]][0] += successes
259 summary_stats[renaming[name]][1] += nulls
260 summary_stats[renaming[name]][2] += fails
262 summary_stats[renaming[name]] = [successes, nulls, fails]
266 for name, stats
in summary_stats.iteritems():
269 ratio =
float(stats[0]) / sum(stats)
270 summary_ratios.append((name, ratio))
272 for name, stats
in detailed_stats.iteritems():
275 ratio =
float(stats[0]) / sum(stats)
276 detailed_ratios.append((name, ratio))
278 context[
'summary_ratios'] = sorted(summary_ratios, key=
lambda x: x[0])
279 context[
'detailed_ratios'] = sorted(detailed_ratios, key=
lambda x: x[0])
284 '''Returns context for ``directory_summary.html`` template.''' 286 c.execute(
'''SELECT directory_id, filename1, filename2 FROM RootFileComparison 287 WHERE id = ?''', (file_id,))
288 dir_id, f1, f2 = c.fetchone()
297 for dir_name
in url_args:
298 c.execute(
'''SELECT id, name FROM Directory WHERE name = ? AND 299 parent_id = ?''', (dir_name, dir_id))
300 dir_id, name = c.fetchone()
301 directory_names.append(name)
302 context[
'parent_name'] =
'/'.
join(directory_names)
305 c.execute(
'''SELECT from_histogram_id, till_histogram_id FROM 306 Directory WHERE id = ?''', (dir_id,))
307 ranges = c.fetchone()
308 successes, nulls, fails =
get_stats(c, threshold, (ranges,))
311 'successes': successes,
'nulls': nulls,
'fails': fails,
312 'success': success,
'null': null,
'fail': fail,
313 'total': successes + nulls + fails,
'dir_name': dir_name
316 c.execute(
'''SELECT name, from_histogram_id, till_histogram_id FROM Directory 317 WHERE parent_id = ?''', (dir_id,))
318 subdirs = c.fetchall()
320 for name, from_id, till_id
in subdirs:
321 successes, nulls, fails =
get_stats(c, threshold, [(from_id, till_id,)])
323 subdir_stats.append((name, successes + nulls + fails, successes,
324 nulls, fails, success, null, fail))
325 context[
'subdirs'] = sorted(subdir_stats, key=
lambda x: x[4], reverse=
True)
328 c.execute(
'''SELECT name, p_value FROM HistogramComparison 329 WHERE directory_id = ?''', (dir_id,))
331 successful_histos = []
333 for name, p_value
in c.fetchall():
334 path = quote(
'%s/%s' % (
'/'.
join(url_args), name))
339 null_histos.append((name, p_value, url1, url2, overlay))
340 elif p_value <= threshold:
341 failed_histos.append((name, p_value, url1, url2, overlay))
343 successful_histos.append((name, p_value, url1, url2, overlay))
345 context[
'failed_histos'] = sorted(failed_histos, key=
lambda x: x[1], reverse=
True)
346 context[
'null_histos'] = null_histos
347 context[
'successful_histos'] = sorted(successful_histos, key=
lambda x: x[1], reverse=
True)
def get_percentage(successes, nulls, fails)
def get_img_path(filename, path)
def get_folders(c, file_id, filename, dir_id, threshold)
def db_list_with_releases(path='.')
def get_dataset_name(name)
static std::string join(char **cmd)
def get_release_summary_stats(c, release_title, st_test, threshold=1e-5)
def get_directory_summary_stats(c, url_args, file_id, threshold)
def get_img_url(path, f1, f2=None, w=250, h=250)
def get_stats(c, threshold, dir_ranges)
def join_ranges(ranges, elem)