00001
00002 """
00003 The script compares two releases, generates SQLite3 database file with release
00004 comparison information.
00005
00006 Author: Albertas Gimbutas, Vilnius University (LT)
00007 e-mail: albertasgim@gmail.com
00008
00009 Note: default Pool size for file comparison is 7.
00010 Note: did NOT finish static HTML generation implementation.
00011 """
00012 import sqlite3
00013 from datetime import datetime
00014 from multiprocessing import Pool, Queue, Process
00015 from subprocess import call
00016 from optparse import OptionParser, OptionGroup
00017 from os import makedirs, remove
00018 from os.path import basename, join, exists
00019
00020 from Utilities.RelMon.utils_v2 import *
00021 from compare_using_files_v2 import RootFileComparison
00022
00023
00024
00025 parser = OptionParser(usage='Usage: %prog --re1 RELEASE1 [--f1 FR,FR,..] ' +
00026 '--re2 RELEASE2 [--f2 FR,FR,..] [--st ST_TESTS] [options]')
00027 parser.add_option('--re1', action='store', dest='release1', default=None,
00028 help='First CMSSW release for release comparison, e.g. CMSSW_5_3_2_pre7.')
00029 parser.add_option('--re2', action='store', dest='release2', default=None,
00030 help='Second CMSSW release for release comparison.')
00031 parser.add_option('--f1', action='store', dest='fragments1', default='',
00032 help='Comma separated filename fragments that have or have not to be ' +
00033 'in RELEASE1 filenames. For "not include" use `!` before fragment, ' +
00034 'e.g. `--f1 FullSim,!2012`.''')
00035 parser.add_option('--f2', action='store', dest='fragments2', default='',
00036 help='Comma separated filename fragments that have or have not to be ' +
00037 'in RELEASE2 filenames. For "not include" use `!` before fragment.''')
00038
00039 optional_group = OptionGroup(parser, 'Optional')
00040 optional_group.add_option('--st', action='store', dest='st_tests', default='KS',
00041 help='Comma separated statistical tests to use. \nAvailable: KS, Chi2. Default: %default.')
00042 optional_group.add_option('--title', action='store', dest='title', default=None,
00043 help='Release comparison title.')
00044 optional_group.add_option('--dir', action='store', dest='dir', default=None,
00045 help='Directory to download and compare files in.')
00046 optional_group.add_option('--url', action='store', dest='url', default=None,
00047 help='URL to fetch ROOT files from. File search is recursive ' +
00048 'for links in given URL.')
00049 optional_group.add_option('--no-url', action='store_true', dest='no_url', default=False,
00050 help='Search for files in DIR (specified by --dir option), ' +
00051 'do NOT browse for files online.')
00052 optional_group.add_option('--db', action='store', dest='db_name', default=None,
00053 help='SQLite3 .db filename to use for the comparison. Default: auto-generated SQLite3 .db file.')
00054 optional_group.add_option('--cl', action='store_true', dest='clear_db', default=False,
00055 help='Clean DB before comparison.')
00056 optional_group.add_option('--dry', action='store_true', dest='dry', default=False,
00057 help='Do not download or compare files, just show the progress.')
00058 optional_group.add_option('--html', action='store_true', dest='html', default=False,
00059 help='Generate static html. Default: %default.')
00060 parser.add_option_group(optional_group)
00061
00062
00063 def call_compare_using_files(args):
00064 file1, file2, work_path, db_name, clear_db = args
00065 command = ['./compare_using_files_v2.py', join(work_path, file1), join(work_path, file2), '--db', db_name]
00066 if clear_db:
00067 command.append('--cl')
00068 return call(command)
00069
00070 def partial_db_name(db_name, i):
00071 """Generates temporary database name."""
00072 return '%s___%d.db' % (db_name.strip('.db'), i + 1)
00073
00074 def merge_dbs(main_db, partial_db):
00075 conn = sqlite3.connect(main_db)
00076 c = conn.cursor()
00077
00078
00079 c.execute('''SELECT * FROM Directory limit 1;''')
00080 directory_row = c.fetchall()
00081
00082
00083 rel_cmp_offset, file_cmp_offset, directory_offset, hist_cmp_offset = 0, 0, 0, 0
00084 if directory_row:
00085 c.execute('''SELECT count(*) FROM ReleaseComparison;''')
00086 rel_cmp_offset = c.fetchone()[0]
00087 c.execute('''SELECT count(*) FROM RootFileComparison;''')
00088 file_cmp_offset = c.fetchone()[0]
00089 c.execute('''SELECT max(id) FROM Directory;''')
00090 directory_offset = c.fetchone()[0]
00091 c.execute('''SELECT max(id) FROM HistogramComparison;''')
00092 hist_cmp_offset = c.fetchone()[0]
00093
00094
00095 c.executescript("""
00096 ATTACH '{0}' AS partial;
00097 BEGIN;
00098
00099 INSERT INTO ReleaseComparison (title, release1, release2, statistical_test)
00100 SELECT title, release1, release2, statistical_test FROM partial.ReleaseComparison;
00101
00102 INSERT INTO RootFileComparison (filename1, filename2, release_comparison_id, directory_id)
00103 SELECT filename1, filename2, release_comparison_id+{1}, directory_id+{3} FROM partial.RootFileComparison;
00104
00105 INSERT INTO Directory (id, name, parent_id, from_histogram_id, till_histogram_id)
00106 SELECT id+{3}, name, parent_id+{3}, from_histogram_id+{4}, till_histogram_id+{4} FROM partial.Directory;
00107
00108 INSERT INTO HistogramComparison (name, p_value, directory_id)
00109 SELECT name, p_value, directory_id+{3} FROM partial.HistogramComparison;
00110
00111 COMMIT;""".format(partial_db, rel_cmp_offset, file_cmp_offset, directory_offset, hist_cmp_offset))
00112
00113
00114 c.execute('''SELECT max(id) FROM RootFileComparison;''')
00115 max_file_cmp_id = c.fetchone()[0]
00116 conn.close()
00117 return max_file_cmp_id
00118
00119
00120 class ReleaseComparison(object):
00121 """Generates release comparison information and stores it on SQLite3 .db file."""
00122 def __init__(self, work_path=None, db_name=None, clear_db=False, dry=False, no_url=False, use_external=False):
00123 self.work_path = work_path
00124 self.db_name = db_name
00125 self.clear_db = clear_db
00126 self.dry = dry
00127 self.no_url = no_url
00128 self.use_external_script_to_compare_files = use_external
00129
00130 def was_compared(self, release1, release2, st_test_name):
00131 conn = sqlite3.connect(self.db_name)
00132 c = conn.cursor()
00133 c.execute('''SELECT id FROM ReleaseComparison WHERE release1=? AND
00134 release2=? AND statistical_test=?''', (release1, release2, st_test_name))
00135 release_comparison_id = c.fetchone()
00136 conn.close()
00137 if release_comparison_id:
00138 return release_comparison_id[0]
00139 return False
00140
00141 def compare(self, rel1, frags1, rel2, frags2, st_tests, url=None, title=None):
00142 print '\n################# Searching for files ###################'
00143 if self.no_url:
00144 print 'Searching for files on disk at %s' % (self.work_path)
00145 files1, files2 = search_on_disk(self.work_path, rel1, frags1, rel2, frags2)
00146 file_pairs = make_file_pairs(files1, files2)
00147 else:
00148 print 'Searching for files online at:'
00149 files_with_urls1, files_with_urls2 = recursive_search_online(url, rel1, frags1, rel2, frags2)
00150 file_pairs = make_file_pairs(files_with_urls1, files_with_urls2)
00151 files_with_urls1.update(files_with_urls2)
00152 files1, files2 = zip(*file_pairs)
00153 paired_files_with_urls = [(file, files_with_urls1[file]) for file in files1 + files2]
00154
00155 if self.dry:
00156 print 'DRY: nothing to do. Exiting.'
00157 exit()
00158
00159
00160 if not self.work_path:
00161 self.work_path = '%s___VS___%s' % (get_version(files1[0]), get_version(files2[0]))
00162 if self.db_name:
00163 self.db_name = join(self.work_path, self.db_name)
00164
00165 if not exists(self.work_path):
00166 print '\n################### Preparing directory ###################'
00167 print 'Creating working directory: %s ...' % self.work_path,
00168 makedirs(self.work_path)
00169 print 'Done.'
00170
00171 print '\n################# Downloading the files ###################'
00172 total_size, files_to_download = get_size_to_download(self.work_path, paired_files_with_urls)
00173 check_disk_for_space(self.work_path, total_size)
00174
00175
00176 q = Queue()
00177 show_status_bar.q = q
00178 auth_download_file.q = q
00179 auth_download_file.work_dir = self.work_path
00180
00181 Process(target=show_status_bar, args=(total_size,)).start()
00182 Pool(2).map(auth_download_file, files_to_download)
00183 if total_size:
00184 print "Done."
00185
00186
00187 print '\n################# Preparing Database ###################'
00188 if not self.db_name:
00189 self.db_name = '%s___VS___%s.db' % (get_version(file_pairs[0][0]), get_version(file_pairs[0][1]))
00190
00191 if self.clear_db:
00192 print 'Clearing DB: %s...' % self.db_name,
00193 open(join(self.work_path, self.db_name), 'w').close()
00194 print 'Done.'
00195
00196
00197 self.db_name = init_database(join(self.work_path, self.db_name))
00198
00199
00200 for st_test_name in st_tests.split(','):
00201 print '\n################# Comparing Releases (%s) ###################' % st_test_name
00202 st_test = tests[st_test_name]()
00203
00204 some_files_compared = False
00205 file_comparison_ids = []
00206 if self.use_external_script_to_compare_files:
00207
00208 arg_list = [list(pair) + [self.work_path, partial_db_name(self.db_name, i),
00209 self.clear_db] for i, pair in enumerate(file_pairs)]
00210 pool = Pool(7)
00211 pool.map(call_compare_using_files, arg_list)
00212
00213
00214 print '\n################# Merging DBs (%s) ###################' % st_test_name
00215 for i, pair in enumerate(file_pairs):
00216 tmp_db = partial_db_name(self.db_name, i)
00217 print 'Merging %s...' % (basename(tmp_db),),
00218 file_comparison_ids.append(merge_dbs(self.db_name, tmp_db))
00219 remove(tmp_db)
00220 print 'Done.'
00221 some_files_compared = True
00222 else:
00223 file_comparison = RootFileComparison(self.db_name)
00224
00225 for file1, file2 in file_pairs:
00226
00227
00228 file1_path = join(self.work_path, file1)
00229 file2_path = join(self.work_path, file2)
00230
00231 if not file_comparison.was_compared(file1, file2, st_test_name):
00232 print "Comparing:\n%s\n%s\n" % (file1, file2)
00233 file_comparison_id = file_comparison.compare(file1_path, file2_path, st_test)
00234 file_comparison_ids.append(file_comparison_id)
00235 some_files_compared = True
00236 else:
00237 print "Already compared:\n%s\n%s\n" % (file1, file2)
00238
00239
00240 release1 = get_version(file_pairs[0][0])
00241 release2 = get_version(file_pairs[0][1])
00242 if some_files_compared:
00243 release_comparison_id = self.was_compared(release1, release2, st_test_name)
00244 conn = sqlite3.connect(self.db_name)
00245 c = conn.cursor()
00246 if not release_comparison_id:
00247 print 'Inserting release "%s VS %s" description.\n' % (release1, release2)
00248 if not title:
00249 title = "%s__VS__%s" % (release1, release2)
00250 c.execute('''INSERT INTO ReleaseComparison(title, release1, release2,
00251 statistical_test) VALUES (?, ?, ?, ?)''', (title,
00252 release1, release2, st_test_name))
00253 release_comparison_id = c.lastrowid
00254 c.executemany('''UPDATE RootFileComparison SET release_comparison_id = ?
00255 WHERE id == ?''', [(release_comparison_id, fid) for fid in file_comparison_ids])
00256 conn.commit()
00257 conn.close()
00258
00259
00260 if __name__ == '__main__':
00261 start = datetime.now()
00262 opts, args = parser.parse_args()
00263 if not opts.release1 or not opts.release2:
00264 parser.error('Not all releases specified. Please check --re1 and --re2 options.')
00265
00266 rel_cmp = ReleaseComparison(opts.dir, opts.db_name, opts.clear_db, opts.dry, opts.no_url, use_external=True)
00267 rel_cmp.compare(opts.release1, opts.fragments1, opts.release2,
00268 opts.fragments2, opts.st_tests, opts.url, opts.title)
00269 if opts.html:
00270 print '\n################# Generating static HTML #################'
00271 print '\n Warrning!!! Did NOT finished the implementation. \n'
00272 from Utilities.RelMon.web.dbfile2html import dbfile2html
00273 dbfile2html(rel_cmp.db_name, opts.dir)
00274 print '################# Execution time: %s #################\n' % (datetime.now() - start,)