CMS 3D CMS Logo

/afs/cern.ch/work/a/aaltunda/public/www/CMSSW_6_2_7/src/Utilities/RelMon/python/web/app_utils.py

Go to the documentation of this file.
00001 #! /usr/bin/python
00002 # coding: utf-8
00003 '''
00004 Helper functions for CherryPy application ``browse_db.py``.
00005 
00006 Author:  Albertas Gimbutas,  Vilnius University (LT)
00007 e-mail:  albertasgim@gmail.com
00008 '''
00009 
00010 import sqlite3
00011 import re
00012 from os import getcwd, listdir
00013 from os.path import join
00014 from urllib import quote
00015 
00016 
00017 renaming = {
00018         'MessageLogger': 'Miscellanea', 'FourVector': 'Generic',
00019         'Castor': 'Castor Calorimeter', 'RPCDigisV': 'Resistive Plate Chambers',
00020         'GlobalRecHitsV': 'Miscellanea: Sim.', 'Top': 'Top', 'HLTJETMET': 'JetMet',
00021         'GlobalDigisV': 'Miscellanea: Sim.', 'L1TEMU': 'Level 1 Trigger',
00022         'TrackerRecHitsV': 'Tracking System', 'MuonDTHitsV': 'Muon Objects',
00023         'EcalDigisV': 'Ecal Calorimeter', 'EcalHitsV': 'Ecal Calorimeter',
00024         'Muons': 'Muon Objects', 'DT': 'Drift Tubes', 'TrackerDigisV': 'Tracking System',
00025         'Pixel': 'Tracking System', 'EcalPreshower': 'Ecal Calorimeter',
00026         'EgammaV': 'Photons', 'AlCaEcalPi0': 'Alca', 'SusyExo': 'SusyExo',
00027         'MuonDTDigisV': 'Muon Objects', 'TauRelVal': 'Tau',
00028         'HcalHitsV': 'Hcal Calorimeter', 'RPC': 'Resistive Plate Chambers',
00029         'EcalRecHitsV': 'Ecal Calorimeter', 'EgOffline': 'EGamma',
00030         'MuonCSCDigisV': 'Muon Objects', 'ParticleFlow': 'Miscellanea',
00031         'Info': 'Miscellanea', 'Tracking': 'Tracking',
00032         'NoiseRatesV': 'Miscellanea: Sim.', 'Generator': 'Miscellanea: Sim.',
00033         'Btag': 'B Tagging', 'Higgs': 'Higgs', 'GlobalHitsV': 'Miscellanea: Sim.',
00034         'HcalRecHitsV': 'Hcal Calorimeter', 'TrackerHitsV': 'Tracking System',
00035         'CSC': 'Cathode Strip Chambers', 'Muon,HLTMonMuon': 'Muon',
00036         'Hcal': 'Hcal Calorimeter', 'TauOffline': 'Tau',
00037         'HeavyFlavor': 'HeavyFlavor', 'JetMET': 'Jet', 'Physics': 'Miscellanea',
00038         'CaloTowersV': 'Hcal Calorimeter', 'SiStrip': 'Tracking System',
00039         'EcalClusterV': 'Ecal Calorimeter', 'HLTEgammaValidation': 'EGamma',
00040         'EcalPhiSym': 'Alca', 'L1T': 'Level 1 Trigger', 'MixingV': 'Miscellanea: Sim.',
00041         'FourVector_Val': 'Generic', 'EcalEndcap': 'Ecal Calorimeter',
00042         'TauOnline': 'Tau', 'Egamma': 'Photons', 'HcalIsoTrack': 'Alca',
00043         'EcalBarrel': 'Ecal Calorimeter'
00044 }
00045 
00046 
00047 def get_img_path(filename, path):
00048     '''Returns image path for https://cmsweb.cern.ch/dqm histogram
00049     visualisation service'''
00050     run = int(re.findall('_R(\d*)__', filename)[0])
00051     parts = [e.rstrip('.root') for e in filename.split('__')]
00052     path = path.replace('Run summary/', '')
00053     return 'archive/%s/%s/%s/%s/%s' % (run, parts[1], parts[2], parts[3], path)
00054 
00055 
00056 def get_img_url(path, f1, f2=None, w=250, h=250):
00057     '''Returns full URL of histogram (or histogram overlay) image for
00058     https://cmsweb.cern.ch/dqm visualisation service.'''
00059     base = 'https://cmsweb.cern.ch/dqm/relval/plotfairy'
00060     if not f2:
00061         return '%s/%s?w=%s;h=%s' % (base, get_img_path(f1, path), w, h)
00062     return '%s/overlay?obj=%s;obj=%s;w=%s;h=%s' % (base,
00063                  get_img_path(f1, path), get_img_path(f2, path), w, h)
00064 
00065 
00066 def get_dataset_name(name):
00067     '''Returns extracted dataset name from the given ROOT filename.'''
00068     if re.search('RelVal', name):
00069         run = str(int(re.findall('_R(\d{9})_', name)[0]))
00070         ds = re.findall('GR_R_\d*_V\d*C?_(?:RelVal)?_([\w\d]*-v\d+)_', name)[0]
00071     else:
00072         run, ds = re.findall('R(\d{9})__([\w\d]*)__CMSSW_', name)[0:1]
00073     return '_'.join([ds, str(int(run))])
00074 
00075 
00076 def get_release(name):
00077     '''Returns extracted release from the given ROOT filename.'''
00078     return re.findall('R\d{9}__([\w\d_-]*)__DQM.root', name)[0]
00079 
00080 
00081 def get_stats(c, threshold, dir_ranges):
00082     '''Returns ``successes``, ``fails``, ``nulls`` for the given dir_ranges.'''
00083     successes, nulls, fails = 0, 0, 0
00084     for from_id, till_id in dir_ranges:
00085         c.execute('''SELECT count(*) FROM HistogramComparison
00086                      WHERE p_value >= 0 AND p_value > ? AND
00087                      id >= ? and id <= ?''', (threshold, from_id, till_id))
00088         successes += c.fetchone()[0]
00089         c.execute('''SELECT count(*) FROM HistogramComparison WHERE
00090                      p_value < 0 AND id >= ? AND id <= ?''', (from_id, till_id))
00091         nulls += c.fetchone()[0]
00092         c.execute('''SELECT count(*) FROM HistogramComparison
00093                      WHERE p_value >= 0 AND p_value <= ? AND
00094                      id >= ? AND id <= ?''', (threshold, from_id, till_id))
00095         fails += c.fetchone()[0]
00096     return successes, nulls, fails
00097 
00098 
00099 def get_percentage(successes, nulls, fails):
00100     '''Converts integers ``successes``, ``nulls`` and ``fails`` to percents.'''
00101     if successes is None:
00102         return None, None, None
00103     total = successes + fails + nulls
00104     if not total:
00105         return None, None, None
00106     success =  round(100. * successes / total, 2)
00107     null =  round(100. * nulls / total, 2)
00108     fail =  round(100. * fails / total, 2)
00109     return success, null, fail
00110 
00111 
00112 def get_folders(c, file_id, filename, dir_id, threshold):  # TODO: If folder [Egamma|JetMet] analyse their subdirs
00113     '''Returns file folder stats for one "summary table" column.'''
00114     ds_name = get_dataset_name(filename)
00115     c.execute('''SELECT name, from_histogram_id, till_histogram_id FROM
00116                  Directory WHERE parent_id=?''', (dir_id,))
00117     dirs = c.fetchall()
00118     file_folders = dict()
00119     total_successes, total_nulls, total_fails = 0, 0, 0
00120     for name, from_id, till_id in dirs:
00121         successes, nulls, fails = get_stats(c, threshold, ((from_id, till_id),))
00122         total_successes += successes
00123         total_nulls += nulls
00124         total_fails += fails
00125         if file_folders.has_key(name):
00126             file_folders[name].append([file_id, ds_name, successes, nulls, fails])
00127         else:
00128             file_folders[name] = [file_id, ds_name, successes, nulls, fails]
00129     return [('Summary', [file_id, ds_name, total_successes, total_nulls, total_fails])] + file_folders.items()
00130 
00131 
00132 def join_ranges(ranges, elem):
00133     '''To do less DB calls, joins [(from_id, till_id), ...] ranges.'''
00134     if type(ranges) == tuple:
00135         ranges = [ranges]
00136     if ranges[-1][-1] + 1 == elem[0]:
00137         ranges[-1] = (ranges[-1][0], elem[1])
00138     else:
00139         ranges.append(elem)
00140     return ranges
00141 
00142 
00143 def get_release_list(c):
00144     '''Returns all ``ReleaseComparisons`` found on database.'''
00145     c.execute('SELECT title, statistical_test FROM ReleaseComparison')
00146     return c.fetchall()
00147 
00148 
00149 def db_list_with_releases(path='.'):
00150     '''Returns available database list and their releases.'''
00151     db_list = [db for db in listdir(path) if db.endswith('.db')]
00152     db_list_with_releases = []
00153     for db in db_list:
00154         conn = sqlite3.connect(join(path, db))
00155         releases = get_release_list(conn.cursor())
00156         db_list_with_releases.append((db[:-3], releases))
00157         conn.close()
00158     return db_list_with_releases
00159 
00160 # -------------------     Template Context generators     --------------------
00161 
00162 def get_release_summary_stats(c, release_title, st_test, threshold=1e-5):
00163     '''Returns context for ``release_summary.html`` template.'''
00164     ## Summary
00165     context = dict()
00166     c.execute('''SELECT release1, release2, id FROM ReleaseComparison
00167                  WHERE title = ? AND statistical_test = ?''', (release_title, st_test))
00168     context['release1'], context['release2'], release_comp_id = c.fetchone()
00169 
00170     # All directory ranges
00171     c.execute('''SELECT from_histogram_id, till_histogram_id FROM Directory
00172                    WHERE id IN (SELECT directory_id FROM RootFileComparison
00173                    WHERE release_comparison_id = ?)''', (release_comp_id,))
00174     dir_ranges = c.fetchall()
00175 
00176     if len(dir_ranges) > 1:
00177         dir_ranges = reduce(join_ranges, dir_ranges)
00178 
00179     context['successes'], context['nulls'], context['fails'], = get_stats(c, threshold, dir_ranges)
00180 
00181     context['total'] = context['successes'] + context['fails'] + context['nulls']
00182     if context['total']:
00183         context['success'], context['null'], context['fail'] = \
00184             get_percentage(context['successes'], context['nulls'], context['fails'])
00185 
00186     ## Data needed for the all the statistics:
00187     c.execute('''SELECT id, filename1, directory_id FROM RootFileComparison
00188                  WHERE release_comparison_id = ?''', (release_comp_id,))
00189     files = c.fetchall()
00190 
00191     ## folders: [(folder_name, [folder: (file_id, filename, success, null, fail)]), ...]
00192     folders = dict()
00193     for file_id, filename, dir_id in files:
00194         # file_folders: [(folder_name, [(file_id, file_name, success, null, fail)]), ...]
00195         file_folders = get_folders(c, file_id, filename, dir_id, threshold)
00196         for folder_name, file_folder_stats in file_folders:
00197             if folders.has_key(folder_name):
00198                 # Add folder stats
00199                 folders[folder_name].append(file_folder_stats)
00200                 # Update folder summary
00201                 folders[folder_name][0][2] += file_folder_stats[2]
00202                 folders[folder_name][0][3] += file_folder_stats[3]
00203                 folders[folder_name][0][4] += file_folder_stats[4]
00204             else:
00205                 folder_summary = [None, 'Summary', file_folder_stats[2],
00206                                     file_folder_stats[3], file_folder_stats[4]]
00207                 folders[folder_name] = [folder_summary, file_folder_stats]
00208 
00209     ## Calculate ratios
00210     folders = [('Summary', folders.pop('Summary'))] + sorted(folders.items(), key=lambda x: x[0])
00211     for folder, file_stats in folders:
00212         # Insert N/A if histo is missing
00213         if len(file_stats) != len(files)+1:
00214             for i, file_ in enumerate(files):
00215                 if file_[0] != file_stats[i][0]:
00216                     file_stats = file_stats[:i] + [[None, "N/A", None, None, None]] + file_stats[i:]
00217         # Count the ratios
00218         for i, stats in enumerate(file_stats):
00219             stats[2], stats[3], stats[4] = get_percentage(*stats[2:5])
00220     context['folders'] = folders
00221 
00222 
00223     ## Select Summary Barchart, Detailed Barchart
00224     for folder in folders:
00225         print folder
00226     #   detailed_ratios: (name, success_ratio)
00227     #   summary_ratios: (name, success_ratio)
00228 
00229 
00230     ## Summary Barchart
00231     # TODO: optimise not to fetch from DB again.
00232     c.execute('''SELECT name, from_histogram_id, till_histogram_id FROM Directory
00233                  WHERE parent_id IN (SELECT directory_id FROM RootFileComparison
00234                  WHERE release_comparison_id = ?)''', (release_comp_id,))
00235     lvl3_dir_ranges = c.fetchall()
00236 
00237     cum_lvl3_dir_ranges = dict()
00238     for name, from_id, till_id in lvl3_dir_ranges:
00239         if cum_lvl3_dir_ranges.has_key(name):
00240             cum_lvl3_dir_ranges[name].append((from_id, till_id))
00241         else:
00242             cum_lvl3_dir_ranges[name] = [(from_id, till_id)]
00243 
00244     # Fetch stats
00245     summary_stats = dict()
00246     detailed_stats = dict()
00247     for name, ranges in cum_lvl3_dir_ranges.iteritems():
00248         successes, nulls, fails = get_stats(c, threshold, ranges)
00249         if detailed_stats.has_key(name):
00250             detailed_stats[name][0] += successes
00251             detailed_stats[name][1] += nulls
00252             detailed_stats[name][2] += fails
00253         else:
00254             detailed_stats[name] = [successes, nulls, fails]
00255         if renaming.has_key(name):
00256             if summary_stats.has_key(renaming[name]):
00257                 summary_stats[renaming[name]][0] += successes
00258                 summary_stats[renaming[name]][1] += nulls
00259                 summary_stats[renaming[name]][2] += fails
00260             else:
00261                 summary_stats[renaming[name]] = [successes, nulls, fails]
00262 
00263     # Calculate ratio
00264     summary_ratios = []
00265     for name, stats in summary_stats.iteritems():
00266         total = sum(stats)
00267         if total:
00268             ratio = float(stats[0]) / sum(stats)
00269             summary_ratios.append((name, ratio))
00270     detailed_ratios = []
00271     for name, stats in detailed_stats.iteritems():
00272         total = sum(stats)
00273         if total:
00274             ratio = float(stats[0]) / sum(stats)
00275             detailed_ratios.append((name, ratio))
00276 
00277     context['summary_ratios'] = sorted(summary_ratios, key=lambda x: x[0])
00278     context['detailed_ratios'] = sorted(detailed_ratios, key=lambda x: x[0])
00279     return context
00280 
00281 
00282 def get_directory_summary_stats(c, url_args, file_id, threshold):
00283     '''Returns context for ``directory_summary.html`` template.'''
00284     context = dict()
00285     c.execute('''SELECT directory_id, filename1, filename2 FROM RootFileComparison
00286                  WHERE id = ?''', (file_id,))
00287     dir_id, f1, f2 = c.fetchone()
00288     context['release1'] = get_release(f1)
00289     context['release2'] = get_release(f2)
00290     if not url_args:
00291         dir_name = get_dataset_name(f1)
00292     else:
00293         #### Select DQMData/Run directory.
00294         directory_names = []
00295 
00296         for dir_name in url_args:
00297             c.execute('''SELECT id, name FROM Directory WHERE name = ? AND
00298                     parent_id = ?''', (dir_name, dir_id))
00299             dir_id, name = c.fetchone()
00300             directory_names.append(name)
00301         context['parent_name'] = '/'.join(directory_names)
00302 
00303     ## Select stats
00304     c.execute('''SELECT from_histogram_id, till_histogram_id FROM
00305                  Directory WHERE id = ?''', (dir_id,))
00306     ranges = c.fetchone()
00307     successes, nulls, fails = get_stats(c, threshold, (ranges,))
00308     success, null, fail = get_percentage(successes, nulls, fails)
00309     context.update({
00310             'successes': successes, 'nulls': nulls, 'fails': fails,
00311             'success': success, 'null': null, 'fail': fail,
00312             'total': successes + nulls + fails, 'dir_name': dir_name
00313         })
00314     # subdirs: name, total, success, fail, null
00315     c.execute('''SELECT name, from_histogram_id, till_histogram_id FROM Directory
00316                  WHERE parent_id = ?''', (dir_id,))
00317     subdirs = c.fetchall()
00318     subdir_stats = []
00319     for name, from_id, till_id in subdirs:
00320         successes, nulls, fails = get_stats(c, threshold, [(from_id, till_id,)])
00321         success, null, fail = get_percentage(successes, nulls, fails)
00322         subdir_stats.append((name, successes + nulls + fails, successes,
00323                              nulls, fails, success, null, fail))
00324     context['subdirs'] = sorted(subdir_stats, key=lambda x: x[4], reverse=True)
00325 
00326     # histograms: name, p_value
00327     c.execute('''SELECT name, p_value FROM HistogramComparison
00328                  WHERE directory_id = ?''', (dir_id,))
00329     failed_histos = []
00330     successful_histos = []
00331     null_histos = []
00332     for name, p_value in c.fetchall():
00333         path = quote('%s/%s' % ('/'.join(url_args), name))
00334         url1 = get_img_url(path, f1)
00335         url2 = get_img_url(path, f2)
00336         overlay = get_img_url(path, f1, f2)
00337         if p_value < 0:
00338             null_histos.append((name, p_value, url1, url2, overlay))
00339         elif p_value <= threshold:
00340             failed_histos.append((name, p_value, url1, url2, overlay))
00341         else:
00342             successful_histos.append((name, p_value, url1, url2, overlay))
00343 
00344     context['failed_histos'] = sorted(failed_histos, key=lambda x: x[1], reverse=True)
00345     context['null_histos'] = null_histos
00346     context['successful_histos'] = sorted(successful_histos, key=lambda x: x[1], reverse=True)
00347     return context