00001
00002
00003 '''
00004 Helper functions for CherryPy application ``browse_db.py``.
00005
00006 Author: Albertas Gimbutas, Vilnius University (LT)
00007 e-mail: albertasgim@gmail.com
00008 '''
00009
00010 import sqlite3
00011 import re
00012 from os import getcwd, listdir
00013 from os.path import join
00014 from urllib import quote
00015
00016
00017 renaming = {
00018 'MessageLogger': 'Miscellanea', 'FourVector': 'Generic',
00019 'Castor': 'Castor Calorimeter', 'RPCDigisV': 'Resistive Plate Chambers',
00020 'GlobalRecHitsV': 'Miscellanea: Sim.', 'Top': 'Top', 'HLTJETMET': 'JetMet',
00021 'GlobalDigisV': 'Miscellanea: Sim.', 'L1TEMU': 'Level 1 Trigger',
00022 'TrackerRecHitsV': 'Tracking System', 'MuonDTHitsV': 'Muon Objects',
00023 'EcalDigisV': 'Ecal Calorimeter', 'EcalHitsV': 'Ecal Calorimeter',
00024 'Muons': 'Muon Objects', 'DT': 'Drift Tubes', 'TrackerDigisV': 'Tracking System',
00025 'Pixel': 'Tracking System', 'EcalPreshower': 'Ecal Calorimeter',
00026 'EgammaV': 'Photons', 'AlCaEcalPi0': 'Alca', 'SusyExo': 'SusyExo',
00027 'MuonDTDigisV': 'Muon Objects', 'TauRelVal': 'Tau',
00028 'HcalHitsV': 'Hcal Calorimeter', 'RPC': 'Resistive Plate Chambers',
00029 'EcalRecHitsV': 'Ecal Calorimeter', 'EgOffline': 'EGamma',
00030 'MuonCSCDigisV': 'Muon Objects', 'ParticleFlow': 'Miscellanea',
00031 'Info': 'Miscellanea', 'Tracking': 'Tracking',
00032 'NoiseRatesV': 'Miscellanea: Sim.', 'Generator': 'Miscellanea: Sim.',
00033 'Btag': 'B Tagging', 'Higgs': 'Higgs', 'GlobalHitsV': 'Miscellanea: Sim.',
00034 'HcalRecHitsV': 'Hcal Calorimeter', 'TrackerHitsV': 'Tracking System',
00035 'CSC': 'Cathode Strip Chambers', 'Muon,HLTMonMuon': 'Muon',
00036 'Hcal': 'Hcal Calorimeter', 'TauOffline': 'Tau',
00037 'HeavyFlavor': 'HeavyFlavor', 'JetMET': 'Jet', 'Physics': 'Miscellanea',
00038 'CaloTowersV': 'Hcal Calorimeter', 'SiStrip': 'Tracking System',
00039 'EcalClusterV': 'Ecal Calorimeter', 'HLTEgammaValidation': 'EGamma',
00040 'EcalPhiSym': 'Alca', 'L1T': 'Level 1 Trigger', 'MixingV': 'Miscellanea: Sim.',
00041 'FourVector_Val': 'Generic', 'EcalEndcap': 'Ecal Calorimeter',
00042 'TauOnline': 'Tau', 'Egamma': 'Photons', 'HcalIsoTrack': 'Alca',
00043 'EcalBarrel': 'Ecal Calorimeter'
00044 }
00045
00046
00047 def get_img_path(filename, path):
00048 '''Returns image path for https://cmsweb.cern.ch/dqm histogram
00049 visualisation service'''
00050 run = int(re.findall('_R(\d*)__', filename)[0])
00051 parts = [e.rstrip('.root') for e in filename.split('__')]
00052 path = path.replace('Run summary/', '')
00053 return 'archive/%s/%s/%s/%s/%s' % (run, parts[1], parts[2], parts[3], path)
00054
00055
00056 def get_img_url(path, f1, f2=None, w=250, h=250):
00057 '''Returns full URL of histogram (or histogram overlay) image for
00058 https://cmsweb.cern.ch/dqm visualisation service.'''
00059 base = 'https://cmsweb.cern.ch/dqm/relval/plotfairy'
00060 if not f2:
00061 return '%s/%s?w=%s;h=%s' % (base, get_img_path(f1, path), w, h)
00062 return '%s/overlay?obj=%s;obj=%s;w=%s;h=%s' % (base,
00063 get_img_path(f1, path), get_img_path(f2, path), w, h)
00064
00065
00066 def get_dataset_name(name):
00067 '''Returns extracted dataset name from the given ROOT filename.'''
00068 if re.search('RelVal', name):
00069 run = str(int(re.findall('_R(\d{9})_', name)[0]))
00070 ds = re.findall('GR_R_\d*_V\d*C?_(?:RelVal)?_([\w\d]*-v\d+)_', name)[0]
00071 else:
00072 run, ds = re.findall('R(\d{9})__([\w\d]*)__CMSSW_', name)[0:1]
00073 return '_'.join([ds, str(int(run))])
00074
00075
00076 def get_release(name):
00077 '''Returns extracted release from the given ROOT filename.'''
00078 return re.findall('R\d{9}__([\w\d_-]*)__DQM.root', name)[0]
00079
00080
00081 def get_stats(c, threshold, dir_ranges):
00082 '''Returns ``successes``, ``fails``, ``nulls`` for the given dir_ranges.'''
00083 successes, nulls, fails = 0, 0, 0
00084 for from_id, till_id in dir_ranges:
00085 c.execute('''SELECT count(*) FROM HistogramComparison
00086 WHERE p_value >= 0 AND p_value > ? AND
00087 id >= ? and id <= ?''', (threshold, from_id, till_id))
00088 successes += c.fetchone()[0]
00089 c.execute('''SELECT count(*) FROM HistogramComparison WHERE
00090 p_value < 0 AND id >= ? AND id <= ?''', (from_id, till_id))
00091 nulls += c.fetchone()[0]
00092 c.execute('''SELECT count(*) FROM HistogramComparison
00093 WHERE p_value >= 0 AND p_value <= ? AND
00094 id >= ? AND id <= ?''', (threshold, from_id, till_id))
00095 fails += c.fetchone()[0]
00096 return successes, nulls, fails
00097
00098
00099 def get_percentage(successes, nulls, fails):
00100 '''Converts integers ``successes``, ``nulls`` and ``fails`` to percents.'''
00101 if successes is None:
00102 return None, None, None
00103 total = successes + fails + nulls
00104 if not total:
00105 return None, None, None
00106 success = round(100. * successes / total, 2)
00107 null = round(100. * nulls / total, 2)
00108 fail = round(100. * fails / total, 2)
00109 return success, null, fail
00110
00111
00112 def get_folders(c, file_id, filename, dir_id, threshold):
00113 '''Returns file folder stats for one "summary table" column.'''
00114 ds_name = get_dataset_name(filename)
00115 c.execute('''SELECT name, from_histogram_id, till_histogram_id FROM
00116 Directory WHERE parent_id=?''', (dir_id,))
00117 dirs = c.fetchall()
00118 file_folders = dict()
00119 total_successes, total_nulls, total_fails = 0, 0, 0
00120 for name, from_id, till_id in dirs:
00121 successes, nulls, fails = get_stats(c, threshold, ((from_id, till_id),))
00122 total_successes += successes
00123 total_nulls += nulls
00124 total_fails += fails
00125 if file_folders.has_key(name):
00126 file_folders[name].append([file_id, ds_name, successes, nulls, fails])
00127 else:
00128 file_folders[name] = [file_id, ds_name, successes, nulls, fails]
00129 return [('Summary', [file_id, ds_name, total_successes, total_nulls, total_fails])] + file_folders.items()
00130
00131
00132 def join_ranges(ranges, elem):
00133 '''To do less DB calls, joins [(from_id, till_id), ...] ranges.'''
00134 if type(ranges) == tuple:
00135 ranges = [ranges]
00136 if ranges[-1][-1] + 1 == elem[0]:
00137 ranges[-1] = (ranges[-1][0], elem[1])
00138 else:
00139 ranges.append(elem)
00140 return ranges
00141
00142
00143 def get_release_list(c):
00144 '''Returns all ``ReleaseComparisons`` found on database.'''
00145 c.execute('SELECT title, statistical_test FROM ReleaseComparison')
00146 return c.fetchall()
00147
00148
00149 def db_list_with_releases(path='.'):
00150 '''Returns available database list and their releases.'''
00151 db_list = [db for db in listdir(path) if db.endswith('.db')]
00152 db_list_with_releases = []
00153 for db in db_list:
00154 conn = sqlite3.connect(join(path, db))
00155 releases = get_release_list(conn.cursor())
00156 db_list_with_releases.append((db[:-3], releases))
00157 conn.close()
00158 return db_list_with_releases
00159
00160
00161
00162 def get_release_summary_stats(c, release_title, st_test, threshold=1e-5):
00163 '''Returns context for ``release_summary.html`` template.'''
00164
00165 context = dict()
00166 c.execute('''SELECT release1, release2, id FROM ReleaseComparison
00167 WHERE title = ? AND statistical_test = ?''', (release_title, st_test))
00168 context['release1'], context['release2'], release_comp_id = c.fetchone()
00169
00170
00171 c.execute('''SELECT from_histogram_id, till_histogram_id FROM Directory
00172 WHERE id IN (SELECT directory_id FROM RootFileComparison
00173 WHERE release_comparison_id = ?)''', (release_comp_id,))
00174 dir_ranges = c.fetchall()
00175
00176 if len(dir_ranges) > 1:
00177 dir_ranges = reduce(join_ranges, dir_ranges)
00178
00179 context['successes'], context['nulls'], context['fails'], = get_stats(c, threshold, dir_ranges)
00180
00181 context['total'] = context['successes'] + context['fails'] + context['nulls']
00182 if context['total']:
00183 context['success'], context['null'], context['fail'] = \
00184 get_percentage(context['successes'], context['nulls'], context['fails'])
00185
00186
00187 c.execute('''SELECT id, filename1, directory_id FROM RootFileComparison
00188 WHERE release_comparison_id = ?''', (release_comp_id,))
00189 files = c.fetchall()
00190
00191
00192 folders = dict()
00193 for file_id, filename, dir_id in files:
00194
00195 file_folders = get_folders(c, file_id, filename, dir_id, threshold)
00196 for folder_name, file_folder_stats in file_folders:
00197 if folders.has_key(folder_name):
00198
00199 folders[folder_name].append(file_folder_stats)
00200
00201 folders[folder_name][0][2] += file_folder_stats[2]
00202 folders[folder_name][0][3] += file_folder_stats[3]
00203 folders[folder_name][0][4] += file_folder_stats[4]
00204 else:
00205 folder_summary = [None, 'Summary', file_folder_stats[2],
00206 file_folder_stats[3], file_folder_stats[4]]
00207 folders[folder_name] = [folder_summary, file_folder_stats]
00208
00209
00210 folders = [('Summary', folders.pop('Summary'))] + sorted(folders.items(), key=lambda x: x[0])
00211 for folder, file_stats in folders:
00212
00213 if len(file_stats) != len(files)+1:
00214 for i, file_ in enumerate(files):
00215 if file_[0] != file_stats[i][0]:
00216 file_stats = file_stats[:i] + [[None, "N/A", None, None, None]] + file_stats[i:]
00217
00218 for i, stats in enumerate(file_stats):
00219 stats[2], stats[3], stats[4] = get_percentage(*stats[2:5])
00220 context['folders'] = folders
00221
00222
00223
00224 for folder in folders:
00225 print folder
00226
00227
00228
00229
00230
00231
00232 c.execute('''SELECT name, from_histogram_id, till_histogram_id FROM Directory
00233 WHERE parent_id IN (SELECT directory_id FROM RootFileComparison
00234 WHERE release_comparison_id = ?)''', (release_comp_id,))
00235 lvl3_dir_ranges = c.fetchall()
00236
00237 cum_lvl3_dir_ranges = dict()
00238 for name, from_id, till_id in lvl3_dir_ranges:
00239 if cum_lvl3_dir_ranges.has_key(name):
00240 cum_lvl3_dir_ranges[name].append((from_id, till_id))
00241 else:
00242 cum_lvl3_dir_ranges[name] = [(from_id, till_id)]
00243
00244
00245 summary_stats = dict()
00246 detailed_stats = dict()
00247 for name, ranges in cum_lvl3_dir_ranges.iteritems():
00248 successes, nulls, fails = get_stats(c, threshold, ranges)
00249 if detailed_stats.has_key(name):
00250 detailed_stats[name][0] += successes
00251 detailed_stats[name][1] += nulls
00252 detailed_stats[name][2] += fails
00253 else:
00254 detailed_stats[name] = [successes, nulls, fails]
00255 if renaming.has_key(name):
00256 if summary_stats.has_key(renaming[name]):
00257 summary_stats[renaming[name]][0] += successes
00258 summary_stats[renaming[name]][1] += nulls
00259 summary_stats[renaming[name]][2] += fails
00260 else:
00261 summary_stats[renaming[name]] = [successes, nulls, fails]
00262
00263
00264 summary_ratios = []
00265 for name, stats in summary_stats.iteritems():
00266 total = sum(stats)
00267 if total:
00268 ratio = float(stats[0]) / sum(stats)
00269 summary_ratios.append((name, ratio))
00270 detailed_ratios = []
00271 for name, stats in detailed_stats.iteritems():
00272 total = sum(stats)
00273 if total:
00274 ratio = float(stats[0]) / sum(stats)
00275 detailed_ratios.append((name, ratio))
00276
00277 context['summary_ratios'] = sorted(summary_ratios, key=lambda x: x[0])
00278 context['detailed_ratios'] = sorted(detailed_ratios, key=lambda x: x[0])
00279 return context
00280
00281
00282 def get_directory_summary_stats(c, url_args, file_id, threshold):
00283 '''Returns context for ``directory_summary.html`` template.'''
00284 context = dict()
00285 c.execute('''SELECT directory_id, filename1, filename2 FROM RootFileComparison
00286 WHERE id = ?''', (file_id,))
00287 dir_id, f1, f2 = c.fetchone()
00288 context['release1'] = get_release(f1)
00289 context['release2'] = get_release(f2)
00290 if not url_args:
00291 dir_name = get_dataset_name(f1)
00292 else:
00293
00294 directory_names = []
00295
00296 for dir_name in url_args:
00297 c.execute('''SELECT id, name FROM Directory WHERE name = ? AND
00298 parent_id = ?''', (dir_name, dir_id))
00299 dir_id, name = c.fetchone()
00300 directory_names.append(name)
00301 context['parent_name'] = '/'.join(directory_names)
00302
00303
00304 c.execute('''SELECT from_histogram_id, till_histogram_id FROM
00305 Directory WHERE id = ?''', (dir_id,))
00306 ranges = c.fetchone()
00307 successes, nulls, fails = get_stats(c, threshold, (ranges,))
00308 success, null, fail = get_percentage(successes, nulls, fails)
00309 context.update({
00310 'successes': successes, 'nulls': nulls, 'fails': fails,
00311 'success': success, 'null': null, 'fail': fail,
00312 'total': successes + nulls + fails, 'dir_name': dir_name
00313 })
00314
00315 c.execute('''SELECT name, from_histogram_id, till_histogram_id FROM Directory
00316 WHERE parent_id = ?''', (dir_id,))
00317 subdirs = c.fetchall()
00318 subdir_stats = []
00319 for name, from_id, till_id in subdirs:
00320 successes, nulls, fails = get_stats(c, threshold, [(from_id, till_id,)])
00321 success, null, fail = get_percentage(successes, nulls, fails)
00322 subdir_stats.append((name, successes + nulls + fails, successes,
00323 nulls, fails, success, null, fail))
00324 context['subdirs'] = sorted(subdir_stats, key=lambda x: x[4], reverse=True)
00325
00326
00327 c.execute('''SELECT name, p_value FROM HistogramComparison
00328 WHERE directory_id = ?''', (dir_id,))
00329 failed_histos = []
00330 successful_histos = []
00331 null_histos = []
00332 for name, p_value in c.fetchall():
00333 path = quote('%s/%s' % ('/'.join(url_args), name))
00334 url1 = get_img_url(path, f1)
00335 url2 = get_img_url(path, f2)
00336 overlay = get_img_url(path, f1, f2)
00337 if p_value < 0:
00338 null_histos.append((name, p_value, url1, url2, overlay))
00339 elif p_value <= threshold:
00340 failed_histos.append((name, p_value, url1, url2, overlay))
00341 else:
00342 successful_histos.append((name, p_value, url1, url2, overlay))
00343
00344 context['failed_histos'] = sorted(failed_histos, key=lambda x: x[1], reverse=True)
00345 context['null_histos'] = null_histos
00346 context['successful_histos'] = sorted(successful_histos, key=lambda x: x[1], reverse=True)
00347 return context