00001 #! /usr/bin/env python
00002 """
00003 The script compares two releases, generates SQLite3 database file with release
00004 comparison information.
00006 Author:  Albertas Gimbutas,  Vilnius University (LT)
00007 e-mail:
00009 Note: default Pool size for file comparison is 7.
00010 Note: did NOT finish static HTML generation implementation.
00011 """
00012 import sqlite3
00013 from datetime import datetime
00014 from multiprocessing import Pool, Queue, Process
00015 from subprocess import call
00016 from optparse import OptionParser, OptionGroup
00017 from os import makedirs, remove
00018 from os.path import basename, join, exists
00020 from Utilities.RelMon.utils_v2 import *
00021 from compare_using_files_v2 import RootFileComparison
00024 ##  Parse options
00025 parser = OptionParser(usage='Usage: %prog --re1 RELEASE1 [--f1 FR,FR,..] ' +
00026                             '--re2 RELEASE2 [--f2 FR,FR,..] [--st ST_TESTS] [options]')
00027 parser.add_option('--re1', action='store', dest='release1', default=None,
00028                     help='First CMSSW release for release comparison, e.g. CMSSW_5_3_2_pre7.')
00029 parser.add_option('--re2', action='store', dest='release2', default=None,
00030                     help='Second CMSSW release for release comparison.')
00031 parser.add_option('--f1', action='store', dest='fragments1', default='',
00032                     help='Comma separated filename fragments that have or have not to be ' +
00033                     'in RELEASE1 filenames. For "not include" use `!` before fragment, ' +
00034                     'e.g. `--f1 FullSim,!2012`.''')
00035 parser.add_option('--f2', action='store', dest='fragments2', default='',
00036                     help='Comma separated filename fragments that have or have not to be ' +
00037                     'in RELEASE2 filenames. For "not include" use `!` before fragment.''')
00039 optional_group = OptionGroup(parser, 'Optional')
00040 optional_group.add_option('--st', action='store', dest='st_tests', default='KS',
00041                     help='Comma separated statistical tests to use. \nAvailable: KS, Chi2. Default: %default.')
00042 optional_group.add_option('--title', action='store', dest='title', default=None,
00043                     help='Release comparison title.')
00044 optional_group.add_option('--dir', action='store', dest='dir', default=None,
00045         help='Directory to download and compare files in.')
00046 optional_group.add_option('--url', action='store', dest='url', default=None,
00047                     help='URL to fetch ROOT files from. File search is recursive ' +
00048                     'for links in given URL.')
00049 optional_group.add_option('--no-url', action='store_true', dest='no_url', default=False,
00050                     help='Search for files in DIR (specified by --dir option), ' +
00051                     'do NOT browse for files online.')
00052 optional_group.add_option('--db', action='store', dest='db_name', default=None,
00053         help='SQLite3 .db filename to use for the comparison. Default: auto-generated SQLite3 .db file.')
00054 optional_group.add_option('--cl', action='store_true', dest='clear_db', default=False,
00055                     help='Clean DB before comparison.')
00056 optional_group.add_option('--dry', action='store_true', dest='dry', default=False,
00057                     help='Do not download or compare files, just show the progress.')
00058 optional_group.add_option('--html', action='store_true', dest='html', default=False,
00059                     help='Generate static html. Default: %default.')
00060 parser.add_option_group(optional_group)
00063 def call_compare_using_files(args):
00064     file1, file2, work_path, db_name, clear_db = args
00065     command = ['./', join(work_path, file1), join(work_path, file2), '--db', db_name]
00066     if clear_db:
00067         command.append('--cl')
00068     return call(command)
00070 def partial_db_name(db_name, i):
00071     """Generates temporary database name."""
00072     return '%s___%d.db' % (db_name.strip('.db'), i + 1)
00074 def merge_dbs(main_db, partial_db):
00075     conn = sqlite3.connect(main_db)
00076     c = conn.cursor()
00078     ## Test if database is empty
00079     c.execute('''SELECT * FROM Directory limit 1;''')
00080     directory_row = c.fetchall()
00082     ## Select offsets
00083     rel_cmp_offset, file_cmp_offset, directory_offset, hist_cmp_offset = 0, 0, 0, 0
00084     if directory_row:
00085         c.execute('''SELECT count(*) FROM ReleaseComparison;''')
00086         rel_cmp_offset = c.fetchone()[0]
00087         c.execute('''SELECT count(*) FROM RootFileComparison;''')
00088         file_cmp_offset = c.fetchone()[0]
00089         c.execute('''SELECT max(id) FROM Directory;''')
00090         directory_offset = c.fetchone()[0]
00091         c.execute('''SELECT max(id) FROM HistogramComparison;''')
00092         hist_cmp_offset = c.fetchone()[0]
00094     ## Merge DBs
00095     c.executescript("""
00096     ATTACH '{0}' AS partial;
00097     BEGIN;
00099     INSERT INTO ReleaseComparison (title, release1, release2, statistical_test)
00100     SELECT title, release1, release2, statistical_test FROM partial.ReleaseComparison;
00102     INSERT INTO RootFileComparison (filename1, filename2, release_comparison_id, directory_id)
00103     SELECT filename1, filename2, release_comparison_id+{1}, directory_id+{3} FROM partial.RootFileComparison;
00105     INSERT INTO Directory (id, name, parent_id, from_histogram_id, till_histogram_id)
00106     SELECT id+{3}, name, parent_id+{3}, from_histogram_id+{4}, till_histogram_id+{4} FROM partial.Directory;
00108     INSERT INTO HistogramComparison (name, p_value, directory_id)
00109     SELECT name, p_value, directory_id+{3} FROM partial.HistogramComparison;
00111     COMMIT;""".format(partial_db, rel_cmp_offset, file_cmp_offset, directory_offset, hist_cmp_offset))
00113     ## Select Last RootFileComparison ID
00114     c.execute('''SELECT max(id) FROM RootFileComparison;''')
00115     max_file_cmp_id = c.fetchone()[0]
00116     conn.close()
00117     return max_file_cmp_id
00120 class ReleaseComparison(object):
00121     """Generates release comparison information and stores it on SQLite3 .db file."""
00122     def __init__(self, work_path=None, db_name=None, clear_db=False, dry=False, no_url=False, use_external=False):
00123         self.work_path = work_path
00124         self.db_name = db_name
00125         self.clear_db = clear_db
00126         self.dry = dry
00127         self.no_url = no_url
00128         self.use_external_script_to_compare_files = use_external
00130     def was_compared(self, release1, release2, st_test_name):
00131         conn = sqlite3.connect(self.db_name)
00132         c = conn.cursor()
00133         c.execute('''SELECT id FROM ReleaseComparison WHERE release1=? AND
00134                 release2=? AND statistical_test=?''', (release1, release2, st_test_name))
00135         release_comparison_id = c.fetchone()
00136         conn.close()
00137         if release_comparison_id:
00138             return release_comparison_id[0]
00139         return False
00141     def compare(self, rel1, frags1, rel2, frags2, st_tests, url=None, title=None):
00142         print '\n#################     Searching for files     ###################'
00143         if self.no_url:
00144             print 'Searching for files on disk at %s' % (self.work_path)
00145             files1, files2 = search_on_disk(self.work_path, rel1, frags1, rel2, frags2)
00146             file_pairs = make_file_pairs(files1, files2)
00147         else:
00148             print 'Searching for files online at:'
00149             files_with_urls1, files_with_urls2 = recursive_search_online(url, rel1, frags1, rel2, frags2)
00150             file_pairs = make_file_pairs(files_with_urls1, files_with_urls2)
00151             files_with_urls1.update(files_with_urls2)
00152             files1, files2 = zip(*file_pairs)
00153             paired_files_with_urls = [(file, files_with_urls1[file]) for file in files1 + files2]
00155             if self.dry:
00156                 print 'DRY: nothing to do. Exiting.'
00157                 exit()
00159             ## Create working directory if not given.
00160             if not self.work_path:
00161                 self.work_path = '%s___VS___%s' % (get_version(files1[0]), get_version(files2[0]))
00162                 if self.db_name:
00163                     self.db_name = join(self.work_path, self.db_name)
00165             if not exists(self.work_path):
00166                 print '\n###################      Preparing directory     ###################'
00167                 print 'Creating working directory: %s ...' % self.work_path,
00168                 makedirs(self.work_path)
00169                 print 'Done.'
00171             print '\n#################     Downloading the files     ###################'
00172             total_size, files_to_download = get_size_to_download(self.work_path, paired_files_with_urls)
00173             check_disk_for_space(self.work_path, total_size)
00175             ## Download needed files.
00176             q = Queue()
00177             show_status_bar.q = q
00178             auth_download_file.q = q
00179             auth_download_file.work_dir = self.work_path
00181             Process(target=show_status_bar, args=(total_size,)).start()
00182             Pool(2).map(auth_download_file, files_to_download)
00183             if total_size:
00184                 print "Done."
00186         ## Create database
00187         print '\n#################     Preparing Database     ###################'
00188         if not self.db_name:
00189             self.db_name = '%s___VS___%s.db' % (get_version(file_pairs[0][0]), get_version(file_pairs[0][1]))
00191         if self.clear_db:
00192             print 'Clearing DB: %s...' % self.db_name,
00193             open(join(self.work_path, self.db_name), 'w').close()
00194             print 'Done.'
00196         ## Compare file pairs.
00197         self.db_name = init_database(join(self.work_path, self.db_name))
00199         # TODO: Use multiprocessing for this task.
00200         for st_test_name in st_tests.split(','):
00201             print '\n#################     Comparing Releases (%s)     ###################' % st_test_name
00202             st_test = tests[st_test_name]()
00204             some_files_compared = False
00205             file_comparison_ids = []
00206             if self.use_external_script_to_compare_files:
00207                 # Compare files using
00208                 arg_list = [list(pair) + [self.work_path, partial_db_name(self.db_name, i),
00209                                                 self.clear_db] for i, pair in enumerate(file_pairs)]
00210                 pool = Pool(7)
00211       , arg_list)
00213                 # Merge databases
00214                 print '\n#################     Merging DBs (%s)     ###################' % st_test_name
00215                 for i, pair in enumerate(file_pairs):
00216                     tmp_db = partial_db_name(self.db_name, i)
00217                     print 'Merging %s...' % (basename(tmp_db),),
00218                     file_comparison_ids.append(merge_dbs(self.db_name, tmp_db))
00219                     remove(tmp_db)
00220                     print 'Done.'
00221                     some_files_compared = True
00222             else:
00223                 file_comparison = RootFileComparison(self.db_name)
00225                 for file1, file2 in file_pairs:
00226                     # TODO: If files are not found desplay nice message.
00227                     # TODO: Maybe subprocces would control the unwanted reports of
00228                     file1_path = join(self.work_path, file1)
00229                     file2_path = join(self.work_path, file2)
00231                     if not file_comparison.was_compared(file1, file2, st_test_name):
00232                         print "Comparing:\n%s\n%s\n" % (file1, file2)
00233                         file_comparison_id =, file2_path, st_test)
00234                         file_comparison_ids.append(file_comparison_id)
00235                         some_files_compared = True
00236                     else:
00237                         print "Already compared:\n%s\n%s\n" % (file1, file2)
00239             ## Calculate statistics for the release.
00240             release1 = get_version(file_pairs[0][0])
00241             release2 = get_version(file_pairs[0][1])
00242             if some_files_compared:
00243                 release_comparison_id = self.was_compared(release1, release2, st_test_name)
00244                 conn = sqlite3.connect(self.db_name)
00245                 c = conn.cursor()
00246                 if not release_comparison_id:
00247                     print 'Inserting release "%s  VS  %s" description.\n' % (release1, release2)
00248                     if not title:
00249                         title = "%s__VS__%s" % (release1, release2)
00250                     c.execute('''INSERT INTO ReleaseComparison(title, release1, release2,
00251                                    statistical_test) VALUES (?, ?, ?, ?)''', (title,
00252                                 release1, release2, st_test_name))
00253                     release_comparison_id = c.lastrowid
00254                 c.executemany('''UPDATE RootFileComparison SET release_comparison_id = ?
00255                         WHERE id == ?''', [(release_comparison_id, fid) for fid in file_comparison_ids])
00256                 conn.commit()
00257                 conn.close()
00260 if __name__ == '__main__':
00261     start =
00262     opts, args = parser.parse_args()
00263     if not opts.release1 or not opts.release2:
00264         parser.error('Not all releases specified. Please check --re1 and --re2 options.')
00266     rel_cmp = ReleaseComparison(opts.dir, opts.db_name, opts.clear_db, opts.dry, opts.no_url, use_external=True)
00267, opts.fragments1, opts.release2,
00268                         opts.fragments2, opts.st_tests, opts.url, opts.title)
00269     if opts.html:
00270         print '\n#################     Generating static HTML    #################'
00271         print '\n  Warrning!!!  Did NOT finished the implementation. \n'
00272         from Utilities.RelMon.web.dbfile2html import dbfile2html
00273         dbfile2html(rel_cmp.db_name, opts.dir)
00274     print '#################     Execution time: %s    #################\n' % ( - start,)