CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
ValidationMatrix_v2.py
Go to the documentation of this file.
1 #! /usr/bin/env python3
2 """
3 The script compares two releases, generates SQLite3 database file with release
4 comparison information.
5 
6 Author: Albertas Gimbutas, Vilnius University (LT)
7 e-mail: albertasgim@gmail.com
8 
9 Note: default Pool size for file comparison is 7.
10 Note: did NOT finish static HTML generation implementation.
11 """
12 from __future__ import print_function
13 import sqlite3
14 from datetime import datetime
15 from multiprocessing import Pool, Queue, Process
16 from subprocess import call
17 from optparse import OptionParser, OptionGroup
18 from os import makedirs, remove
19 from os.path import basename, join, exists
20 
21 from Utilities.RelMon.utils_v2 import *
22 from compare_using_files_v2 import RootFileComparison
23 
24 
25 ## Parse options
26 parser = OptionParser(usage='Usage: %prog --re1 RELEASE1 [--f1 FR,FR,..] ' +
27  '--re2 RELEASE2 [--f2 FR,FR,..] [--st ST_TESTS] [options]')
28 parser.add_option('--re1', action='store', dest='release1', default=None,
29  help='First CMSSW release for release comparison, e.g. CMSSW_5_3_2_pre7.')
30 parser.add_option('--re2', action='store', dest='release2', default=None,
31  help='Second CMSSW release for release comparison.')
32 parser.add_option('--f1', action='store', dest='fragments1', default='',
33  help='Comma separated filename fragments that have or have not to be ' +
34  'in RELEASE1 filenames. For "not include" use `!` before fragment, ' +
35  'e.g. `--f1 FullSim,!2012`.''')
36 parser.add_option('--f2', action='store', dest='fragments2', default='',
37  help='Comma separated filename fragments that have or have not to be ' +
38  'in RELEASE2 filenames. For "not include" use `!` before fragment.''')
39 
40 optional_group = OptionGroup(parser, 'Optional')
41 optional_group.add_option('--st', action='store', dest='st_tests', default='KS',
42  help='Comma separated statistical tests to use. \nAvailable: KS, Chi2. Default: %default.')
43 optional_group.add_option('--title', action='store', dest='title', default=None,
44  help='Release comparison title.')
45 optional_group.add_option('--dir', action='store', dest='dir', default=None,
46  help='Directory to download and compare files in.')
47 optional_group.add_option('--url', action='store', dest='url', default=None,
48  help='URL to fetch ROOT files from. File search is recursive ' +
49  'for links in given URL.')
50 optional_group.add_option('--no-url', action='store_true', dest='no_url', default=False,
51  help='Search for files in DIR (specified by --dir option), ' +
52  'do NOT browse for files online.')
53 optional_group.add_option('--db', action='store', dest='db_name', default=None,
54  help='SQLite3 .db filename to use for the comparison. Default: auto-generated SQLite3 .db file.')
55 optional_group.add_option('--cl', action='store_true', dest='clear_db', default=False,
56  help='Clean DB before comparison.')
57 optional_group.add_option('--dry', action='store_true', dest='dry', default=False,
58  help='Do not download or compare files, just show the progress.')
59 optional_group.add_option('--html', action='store_true', dest='html', default=False,
60  help='Generate static html. Default: %default.')
61 parser.add_option_group(optional_group)
62 
63 
65  file1, file2, work_path, db_name, clear_db = args
66  command = ['./compare_using_files_v2.py', join(work_path, file1), join(work_path, file2), '--db', db_name]
67  if clear_db:
68  command.append('--cl')
69  return call(command)
70 
71 def partial_db_name(db_name, i):
72  """Generates temporary database name."""
73  return '%s___%d.db' % (db_name.strip('.db'), i + 1)
74 
75 def merge_dbs(main_db, partial_db):
76  conn = sqlite3.connect(main_db)
77  c = conn.cursor()
78 
79  ## Test if database is empty
80  c.execute('''SELECT * FROM Directory limit 1;''')
81  directory_row = c.fetchall()
82 
83  ## Select offsets
84  rel_cmp_offset, file_cmp_offset, directory_offset, hist_cmp_offset = 0, 0, 0, 0
85  if directory_row:
86  c.execute('''SELECT count(*) FROM ReleaseComparison;''')
87  rel_cmp_offset = c.fetchone()[0]
88  c.execute('''SELECT count(*) FROM RootFileComparison;''')
89  file_cmp_offset = c.fetchone()[0]
90  c.execute('''SELECT max(id) FROM Directory;''')
91  directory_offset = c.fetchone()[0]
92  c.execute('''SELECT max(id) FROM HistogramComparison;''')
93  hist_cmp_offset = c.fetchone()[0]
94 
95  ## Merge DBs
96  c.executescript("""
97  ATTACH '{0}' AS partial;
98  BEGIN;
99 
100  INSERT INTO ReleaseComparison (title, release1, release2, statistical_test)
101  SELECT title, release1, release2, statistical_test FROM partial.ReleaseComparison;
102 
103  INSERT INTO RootFileComparison (filename1, filename2, release_comparison_id, directory_id)
104  SELECT filename1, filename2, release_comparison_id+{1}, directory_id+{3} FROM partial.RootFileComparison;
105 
106  INSERT INTO Directory (id, name, parent_id, from_histogram_id, till_histogram_id)
107  SELECT id+{3}, name, parent_id+{3}, from_histogram_id+{4}, till_histogram_id+{4} FROM partial.Directory;
108 
109  INSERT INTO HistogramComparison (name, p_value, directory_id)
110  SELECT name, p_value, directory_id+{3} FROM partial.HistogramComparison;
111 
112  COMMIT;""".format(partial_db, rel_cmp_offset, file_cmp_offset, directory_offset, hist_cmp_offset))
113 
114  ## Select Last RootFileComparison ID
115  c.execute('''SELECT max(id) FROM RootFileComparison;''')
116  max_file_cmp_id = c.fetchone()[0]
117  conn.close()
118  return max_file_cmp_id
119 
120 
121 class ReleaseComparison(object):
122  """Generates release comparison information and stores it on SQLite3 .db file."""
123  def __init__(self, work_path=None, db_name=None, clear_db=False, dry=False, no_url=False, use_external=False):
124  self.work_path = work_path
125  self.db_name = db_name
126  self.clear_db = clear_db
127  self.dry = dry
128  self.no_url = no_url
130 
131  def was_compared(self, release1, release2, st_test_name):
132  conn = sqlite3.connect(self.db_name)
133  c = conn.cursor()
134  c.execute('''SELECT id FROM ReleaseComparison WHERE release1=? AND
135  release2=? AND statistical_test=?''', (release1, release2, st_test_name))
136  release_comparison_id = c.fetchone()
137  conn.close()
138  if release_comparison_id:
139  return release_comparison_id[0]
140  return False
141 
142  def compare(self, rel1, frags1, rel2, frags2, st_tests, url=None, title=None):
143  print('\n################# Searching for files ###################')
144  if self.no_url:
145  print('Searching for files on disk at %s' % (self.work_path))
146  files1, files2 = search_on_disk(self.work_path, rel1, frags1, rel2, frags2)
147  file_pairs = make_file_pairs(files1, files2)
148  else:
149  print('Searching for files online at:')
150  files_with_urls1, files_with_urls2 = recursive_search_online(url, rel1, frags1, rel2, frags2)
151  file_pairs = make_file_pairs(files_with_urls1, files_with_urls2)
152  files_with_urls1.update(files_with_urls2)
153  files1, files2 = list(zip(*file_pairs))
154  paired_files_with_urls = [(file, files_with_urls1[file]) for file in files1 + files2]
155 
156  if self.dry:
157  print('DRY: nothing to do. Exiting.')
158  exit()
159 
160  ## Create working directory if not given.
161  if not self.work_path:
162  self.work_path = '%s___VS___%s' % (get_version(files1[0]), get_version(files2[0]))
163  if self.db_name:
164  self.db_name = join(self.work_path, self.db_name)
165 
166  if not exists(self.work_path):
167  print('\n################### Preparing directory ###################')
168  print('Creating working directory: %s ...' % self.work_path, end=' ')
169  makedirs(self.work_path)
170  print('Done.')
171 
172  print('\n################# Downloading the files ###################')
173  total_size, files_to_download = get_size_to_download(self.work_path, paired_files_with_urls)
174  check_disk_for_space(self.work_path, total_size)
175 
176  ## Download needed files.
177  q = Queue()
178  show_status_bar.q = q
179  auth_download_file.q = q
180  auth_download_file.work_dir = self.work_path
181 
182  Process(target=show_status_bar, args=(total_size,)).start()
183  Pool(2).map(auth_download_file, files_to_download)
184  if total_size:
185  print("Done.")
186 
187  ## Create database
188  print('\n################# Preparing Database ###################')
189  if not self.db_name:
190  self.db_name = '%s___VS___%s.db' % (get_version(file_pairs[0][0]), get_version(file_pairs[0][1]))
191 
192  if self.clear_db:
193  print('Clearing DB: %s...' % self.db_name, end=' ')
194  open(join(self.work_path, self.db_name), 'w').close()
195  print('Done.')
196 
197  ## Compare file pairs.
198  self.db_name = init_database(join(self.work_path, self.db_name))
199 
200  # TODO: Use multiprocessing for this task.
201  for st_test_name in st_tests.split(','):
202  print('\n################# Comparing Releases (%s) ###################' % st_test_name)
203  st_test = tests[st_test_name]()
204 
205  some_files_compared = False
206  file_comparison_ids = []
208  # Compare files using compare_using_files_v2.py
209  arg_list = [list(pair) + [self.work_path, partial_db_name(self.db_name, i),
210  self.clear_db] for i, pair in enumerate(file_pairs)]
211  pool = Pool(7)
212  pool.map(call_compare_using_files, arg_list)
213 
214  # Merge databases
215  print('\n################# Merging DBs (%s) ###################' % st_test_name)
216  for i, pair in enumerate(file_pairs):
217  tmp_db = partial_db_name(self.db_name, i)
218  print('Merging %s...' % (basename(tmp_db),), end=' ')
219  file_comparison_ids.append(merge_dbs(self.db_name, tmp_db))
220  remove(tmp_db)
221  print('Done.')
222  some_files_compared = True
223  else:
224  file_comparison = RootFileComparison(self.db_name)
225 
226  for file1, file2 in file_pairs:
227  # TODO: If files are not found desplay nice message.
228  # TODO: Maybe subprocces would control the unwanted reports of RootFileComparison.compare()
229  file1_path = join(self.work_path, file1)
230  file2_path = join(self.work_path, file2)
231 
232  if not file_comparison.was_compared(file1, file2, st_test_name):
233  print("Comparing:\n%s\n%s\n" % (file1, file2))
234  file_comparison_id = file_comparison.compare(file1_path, file2_path, st_test)
235  file_comparison_ids.append(file_comparison_id)
236  some_files_compared = True
237  else:
238  print("Already compared:\n%s\n%s\n" % (file1, file2))
239 
240  ## Calculate statistics for the release.
241  release1 = get_version(file_pairs[0][0])
242  release2 = get_version(file_pairs[0][1])
243  if some_files_compared:
244  release_comparison_id = self.was_compared(release1, release2, st_test_name)
245  conn = sqlite3.connect(self.db_name)
246  c = conn.cursor()
247  if not release_comparison_id:
248  print('Inserting release "%s VS %s" description.\n' % (release1, release2))
249  if not title:
250  title = "%s__VS__%s" % (release1, release2)
251  c.execute('''INSERT INTO ReleaseComparison(title, release1, release2,
252  statistical_test) VALUES (?, ?, ?, ?)''', (title,
253  release1, release2, st_test_name))
254  release_comparison_id = c.lastrowid
255  c.executemany('''UPDATE RootFileComparison SET release_comparison_id = ?
256  WHERE id == ?''', [(release_comparison_id, fid) for fid in file_comparison_ids])
257  conn.commit()
258  conn.close()
259 
260 
261 if __name__ == '__main__':
262  start = datetime.now()
263  opts, args = parser.parse_args()
264  if not opts.release1 or not opts.release2:
265  parser.error('Not all releases specified. Please check --re1 and --re2 options.')
266 
267  rel_cmp = ReleaseComparison(opts.dir, opts.db_name, opts.clear_db, opts.dry, opts.no_url, use_external=True)
268  rel_cmp.compare(opts.release1, opts.fragments1, opts.release2,
269  opts.fragments2, opts.st_tests, opts.url, opts.title)
270  if opts.html:
271  print('\n################# Generating static HTML #################')
272  print('\n Warrning!!! Did NOT finished the implementation. \n')
273  from Utilities.RelMon.web.dbfile2html import dbfile2html
274  dbfile2html(rel_cmp.db_name, opts.dir)
275  print('################# Execution time: %s #################\n' % (datetime.now() - start,))
Definition: start.py:1
def search_on_disk
Definition: utils_v2.py:256
def check_disk_for_space
Definition: utils_v2.py:462
OutputIterator zip(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp)
work_path
Create working directory if not given.
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47
static std::string join(char **cmd)
Definition: RemoteFile.cc:19
def recursive_search_online
Definition: utils_v2.py:216
def make_file_pairs
Definition: utils_v2.py:108
def init_database
Utils.
Definition: utils_v2.py:391
def get_size_to_download
Definition: utils_v2.py:446