CMS 3D CMS Logo

/data/refman/pasoursint/CMSSW_5_3_9_patch3/src/Utilities/RelMon/scripts/ValidationMatrix_v2.py

Go to the documentation of this file.
00001 #! /usr/bin/env python
00002 """
00003 The script compares two releases, generates SQLite3 database file with release
00004 comparison information.
00005 
00006 Author:  Albertas Gimbutas,  Vilnius University (LT)
00007 e-mail:  albertasgim@gmail.com
00008 
00009 Note: default Pool size for file comparison is 7.
00010 Note: did NOT finish static HTML generation implementation.
00011 """
00012 import sqlite3
00013 from datetime import datetime
00014 from multiprocessing import Pool, Queue, Process
00015 from subprocess import call
00016 from optparse import OptionParser, OptionGroup
00017 from os import makedirs, remove
00018 from os.path import basename, join, exists
00019 
00020 from Utilities.RelMon.utils_v2 import *
00021 from compare_using_files_v2 import RootFileComparison
00022 
00023 
00024 ##  Parse options
00025 parser = OptionParser(usage='Usage: %prog --re1 RELEASE1 [--f1 FR,FR,..] ' +
00026                             '--re2 RELEASE2 [--f2 FR,FR,..] [--st ST_TESTS] [options]')
00027 parser.add_option('--re1', action='store', dest='release1', default=None,
00028                     help='First CMSSW release for release comparison, e.g. CMSSW_5_3_2_pre7.')
00029 parser.add_option('--re2', action='store', dest='release2', default=None,
00030                     help='Second CMSSW release for release comparison.')
00031 parser.add_option('--f1', action='store', dest='fragments1', default='',
00032                     help='Comma separated filename fragments that have or have not to be ' +
00033                     'in RELEASE1 filenames. For "not include" use `!` before fragment, ' +
00034                     'e.g. `--f1 FullSim,!2012`.''')
00035 parser.add_option('--f2', action='store', dest='fragments2', default='',
00036                     help='Comma separated filename fragments that have or have not to be ' +
00037                     'in RELEASE2 filenames. For "not include" use `!` before fragment.''')
00038 
00039 optional_group = OptionGroup(parser, 'Optional')
00040 optional_group.add_option('--st', action='store', dest='st_tests', default='KS',
00041                     help='Comma separated statistical tests to use. \nAvailable: KS, Chi2. Default: %default.')
00042 optional_group.add_option('--title', action='store', dest='title', default=None,
00043                     help='Release comparison title.')
00044 optional_group.add_option('--dir', action='store', dest='dir', default=None,
00045         help='Directory to download and compare files in.')
00046 optional_group.add_option('--url', action='store', dest='url', default=None,
00047                     help='URL to fetch ROOT files from. File search is recursive ' +
00048                     'for links in given URL.')
00049 optional_group.add_option('--no-url', action='store_true', dest='no_url', default=False,
00050                     help='Search for files in DIR (specified by --dir option), ' +
00051                     'do NOT browse for files online.')
00052 optional_group.add_option('--db', action='store', dest='db_name', default=None,
00053         help='SQLite3 .db filename to use for the comparison. Default: auto-generated SQLite3 .db file.')
00054 optional_group.add_option('--cl', action='store_true', dest='clear_db', default=False,
00055                     help='Clean DB before comparison.')
00056 optional_group.add_option('--dry', action='store_true', dest='dry', default=False,
00057                     help='Do not download or compare files, just show the progress.')
00058 optional_group.add_option('--html', action='store_true', dest='html', default=False,
00059                     help='Generate static html. Default: %default.')
00060 parser.add_option_group(optional_group)
00061 
00062 
00063 def call_compare_using_files(args):
00064     file1, file2, work_path, db_name, clear_db = args
00065     command = ['./compare_using_files_v2.py', join(work_path, file1), join(work_path, file2), '--db', db_name]
00066     if clear_db:
00067         command.append('--cl')
00068     return call(command)
00069 
00070 def partial_db_name(db_name, i):
00071     """Generates temporary database name."""
00072     return '%s___%d.db' % (db_name.strip('.db'), i + 1)
00073 
00074 def merge_dbs(main_db, partial_db):
00075     conn = sqlite3.connect(main_db)
00076     c = conn.cursor()
00077 
00078     ## Test if database is empty
00079     c.execute('''SELECT * FROM Directory limit 1;''')
00080     directory_row = c.fetchall()
00081 
00082     ## Select offsets
00083     rel_cmp_offset, file_cmp_offset, directory_offset, hist_cmp_offset = 0, 0, 0, 0
00084     if directory_row:
00085         c.execute('''SELECT count(*) FROM ReleaseComparison;''')
00086         rel_cmp_offset = c.fetchone()[0]
00087         c.execute('''SELECT count(*) FROM RootFileComparison;''')
00088         file_cmp_offset = c.fetchone()[0]
00089         c.execute('''SELECT max(id) FROM Directory;''')
00090         directory_offset = c.fetchone()[0]
00091         c.execute('''SELECT max(id) FROM HistogramComparison;''')
00092         hist_cmp_offset = c.fetchone()[0]
00093 
00094     ## Merge DBs
00095     c.executescript("""
00096     ATTACH '{0}' AS partial;
00097     BEGIN;
00098 
00099     INSERT INTO ReleaseComparison (title, release1, release2, statistical_test)
00100     SELECT title, release1, release2, statistical_test FROM partial.ReleaseComparison;
00101 
00102     INSERT INTO RootFileComparison (filename1, filename2, release_comparison_id, directory_id)
00103     SELECT filename1, filename2, release_comparison_id+{1}, directory_id+{3} FROM partial.RootFileComparison;
00104 
00105     INSERT INTO Directory (id, name, parent_id, from_histogram_id, till_histogram_id)
00106     SELECT id+{3}, name, parent_id+{3}, from_histogram_id+{4}, till_histogram_id+{4} FROM partial.Directory;
00107 
00108     INSERT INTO HistogramComparison (name, p_value, directory_id)
00109     SELECT name, p_value, directory_id+{3} FROM partial.HistogramComparison;
00110 
00111     COMMIT;""".format(partial_db, rel_cmp_offset, file_cmp_offset, directory_offset, hist_cmp_offset))
00112 
00113     ## Select Last RootFileComparison ID
00114     c.execute('''SELECT max(id) FROM RootFileComparison;''')
00115     max_file_cmp_id = c.fetchone()[0]
00116     conn.close()
00117     return max_file_cmp_id
00118 
00119 
00120 class ReleaseComparison(object):
00121     """Generates release comparison information and stores it on SQLite3 .db file."""
00122     def __init__(self, work_path=None, db_name=None, clear_db=False, dry=False, no_url=False, use_external=False):
00123         self.work_path = work_path
00124         self.db_name = db_name
00125         self.clear_db = clear_db
00126         self.dry = dry
00127         self.no_url = no_url
00128         self.use_external_script_to_compare_files = use_external
00129 
00130     def was_compared(self, release1, release2, st_test_name):
00131         conn = sqlite3.connect(self.db_name)
00132         c = conn.cursor()
00133         c.execute('''SELECT id FROM ReleaseComparison WHERE release1=? AND
00134                 release2=? AND statistical_test=?''', (release1, release2, st_test_name))
00135         release_comparison_id = c.fetchone()
00136         conn.close()
00137         if release_comparison_id:
00138             return release_comparison_id[0]
00139         return False
00140 
00141     def compare(self, rel1, frags1, rel2, frags2, st_tests, url=None, title=None):
00142         print '\n#################     Searching for files     ###################'
00143         if self.no_url:
00144             print 'Searching for files on disk at %s' % (self.work_path)
00145             files1, files2 = search_on_disk(self.work_path, rel1, frags1, rel2, frags2)
00146             file_pairs = make_file_pairs(files1, files2)
00147         else:
00148             print 'Searching for files online at:'
00149             files_with_urls1, files_with_urls2 = recursive_search_online(url, rel1, frags1, rel2, frags2)
00150             file_pairs = make_file_pairs(files_with_urls1, files_with_urls2)
00151             files_with_urls1.update(files_with_urls2)
00152             files1, files2 = zip(*file_pairs)
00153             paired_files_with_urls = [(file, files_with_urls1[file]) for file in files1 + files2]
00154 
00155             if self.dry:
00156                 print 'DRY: nothing to do. Exiting.'
00157                 exit()
00158 
00159             ## Create working directory if not given.
00160             if not self.work_path:
00161                 self.work_path = '%s___VS___%s' % (get_version(files1[0]), get_version(files2[0]))
00162                 if self.db_name:
00163                     self.db_name = join(self.work_path, self.db_name)
00164 
00165             if not exists(self.work_path):
00166                 print '\n###################      Preparing directory     ###################'
00167                 print 'Creating working directory: %s ...' % self.work_path,
00168                 makedirs(self.work_path)
00169                 print 'Done.'
00170 
00171             print '\n#################     Downloading the files     ###################'
00172             total_size, files_to_download = get_size_to_download(self.work_path, paired_files_with_urls)
00173             check_disk_for_space(self.work_path, total_size)
00174 
00175             ## Download needed files.
00176             q = Queue()
00177             show_status_bar.q = q
00178             auth_download_file.q = q
00179             auth_download_file.work_dir = self.work_path
00180 
00181             Process(target=show_status_bar, args=(total_size,)).start()
00182             Pool(2).map(auth_download_file, files_to_download)
00183             if total_size:
00184                 print "Done."
00185 
00186         ## Create database
00187         print '\n#################     Preparing Database     ###################'
00188         if not self.db_name:
00189             self.db_name = '%s___VS___%s.db' % (get_version(file_pairs[0][0]), get_version(file_pairs[0][1]))
00190 
00191         if self.clear_db:
00192             print 'Clearing DB: %s...' % self.db_name,
00193             open(join(self.work_path, self.db_name), 'w').close()
00194             print 'Done.'
00195 
00196         ## Compare file pairs.
00197         self.db_name = init_database(join(self.work_path, self.db_name))
00198 
00199         # TODO: Use multiprocessing for this task.
00200         for st_test_name in st_tests.split(','):
00201             print '\n#################     Comparing Releases (%s)     ###################' % st_test_name
00202             st_test = tests[st_test_name]()
00203 
00204             some_files_compared = False
00205             file_comparison_ids = []
00206             if self.use_external_script_to_compare_files:
00207                 # Compare files using compare_using_files_v2.py
00208                 arg_list = [list(pair) + [self.work_path, partial_db_name(self.db_name, i),
00209                                                 self.clear_db] for i, pair in enumerate(file_pairs)]
00210                 pool = Pool(7)
00211                 pool.map(call_compare_using_files, arg_list)
00212 
00213                 # Merge databases
00214                 print '\n#################     Merging DBs (%s)     ###################' % st_test_name
00215                 for i, pair in enumerate(file_pairs):
00216                     tmp_db = partial_db_name(self.db_name, i)
00217                     print 'Merging %s...' % (basename(tmp_db),),
00218                     file_comparison_ids.append(merge_dbs(self.db_name, tmp_db))
00219                     remove(tmp_db)
00220                     print 'Done.'
00221                     some_files_compared = True
00222             else:
00223                 file_comparison = RootFileComparison(self.db_name)
00224 
00225                 for file1, file2 in file_pairs:
00226                     # TODO: If files are not found desplay nice message.
00227                     # TODO: Maybe subprocces would control the unwanted reports of RootFileComparison.compare()
00228                     file1_path = join(self.work_path, file1)
00229                     file2_path = join(self.work_path, file2)
00230 
00231                     if not file_comparison.was_compared(file1, file2, st_test_name):
00232                         print "Comparing:\n%s\n%s\n" % (file1, file2)
00233                         file_comparison_id = file_comparison.compare(file1_path, file2_path, st_test)
00234                         file_comparison_ids.append(file_comparison_id)
00235                         some_files_compared = True
00236                     else:
00237                         print "Already compared:\n%s\n%s\n" % (file1, file2)
00238 
00239             ## Calculate statistics for the release.
00240             release1 = get_version(file_pairs[0][0])
00241             release2 = get_version(file_pairs[0][1])
00242             if some_files_compared:
00243                 release_comparison_id = self.was_compared(release1, release2, st_test_name)
00244                 conn = sqlite3.connect(self.db_name)
00245                 c = conn.cursor()
00246                 if not release_comparison_id:
00247                     print 'Inserting release "%s  VS  %s" description.\n' % (release1, release2)
00248                     if not title:
00249                         title = "%s__VS__%s" % (release1, release2)
00250                     c.execute('''INSERT INTO ReleaseComparison(title, release1, release2,
00251                                    statistical_test) VALUES (?, ?, ?, ?)''', (title,
00252                                 release1, release2, st_test_name))
00253                     release_comparison_id = c.lastrowid
00254                 c.executemany('''UPDATE RootFileComparison SET release_comparison_id = ?
00255                         WHERE id == ?''', [(release_comparison_id, fid) for fid in file_comparison_ids])
00256                 conn.commit()
00257                 conn.close()
00258 
00259 
00260 if __name__ == '__main__':
00261     start = datetime.now()
00262     opts, args = parser.parse_args()
00263     if not opts.release1 or not opts.release2:
00264         parser.error('Not all releases specified. Please check --re1 and --re2 options.')
00265 
00266     rel_cmp = ReleaseComparison(opts.dir, opts.db_name, opts.clear_db, opts.dry, opts.no_url, use_external=True)
00267     rel_cmp.compare(opts.release1, opts.fragments1, opts.release2,
00268                         opts.fragments2, opts.st_tests, opts.url, opts.title)
00269     if opts.html:
00270         print '\n#################     Generating static HTML    #################'
00271         print '\n  Warrning!!!  Did NOT finished the implementation. \n'
00272         from Utilities.RelMon.web.dbfile2html import dbfile2html
00273         dbfile2html(rel_cmp.db_name, opts.dir)
00274     print '#################     Execution time: %s    #################\n' % (datetime.now() - start,)