CMS 3D CMS Logo

Classes | Functions | Variables

utils_v2 Namespace Reference

Classes

class  Chi2Test
class  ComparisonError
class  KolmogorovTest
class  StatisticalTest
 StatisticalTests. More...

Functions

def auth_download_file
def auth_wget
 -------------------- Recursife file downloader -----------------------
def check_disk_for_space
def get_relval_cmssw_version
def get_relval_id
def get_relval_max_version
def get_relval_version
 ------------------- Make files pairs: RelVal utils ---------------------
def get_relvaldata_cmssw_version
def get_relvaldata_id
 ----------------- Make files pairs: RelValData utils --------------------
def get_relvaldata_max_version
def get_relvaldata_version
def get_size_to_download
def get_version
def init_database
 Utils.
def is_relvaldata
 ----------------------- Make file pairs --------------------------
def make_file_pairs
def recursive_search_online
def search_on_disk
def show_status_bar

Variables

dictionary comparison_errors
 Exception definitions.
dictionary tests = {KolmogorovTest.name: KolmogorovTest, Chi2Test.name: Chi2Test}

Function Documentation

def utils_v2::auth_download_file (   url,
  chunk_size = 1048576 
)

Definition at line 197 of file utils_v2.py.

00198                                                :
00199     filename = basename(url)
00200     file_path = join(auth_download_file.work_dir, filename)
00201 
00202     file = open(file_path, 'wb')
00203     opener = build_opener(X509CertOpen())
00204     url_file = opener.open(Request(url))
00205     chunk = url_file.read(chunk_size)
00206     while chunk:
00207         file.write(chunk)
00208         auth_download_file.q.put((1,))   # reports, that downloaded 1MB
00209         chunk = url_file.read(chunk_size)
00210     print '\rDownloaded: %s  ' % (filename,)
00211     file.close()
00212 

def utils_v2::auth_wget (   url)

-------------------- Recursife file downloader -----------------------

Definition at line 183 of file utils_v2.py.

00184                   :
00185     try:
00186         opener = build_opener(X509CertOpen())
00187         return opener.open(Request(url)).read()
00188     except HTTPError, e:
00189         print '\nError: DQM GUI is temporarily unavailable. Probably maintainance hours. '+\
00190                 'Please try again later. Original error message: ``%s``. \nExiting...\n' % (e,)
00191         exit()
00192     except BadStatusLine, e:
00193         print '\nYou do not have permissions to access DQM GUI. Please check if your certificates '+\
00194             'in ``~/.globus`` directory are configured correctly. Exitting...' 
00195         exit()
00196 

def utils_v2::check_disk_for_space (   work_path,
  size_needed 
)
Checks afs file system for space.

Definition at line 459 of file utils_v2.py.

00460                                                 :
00461     '''Checks afs file system for space.'''
00462     pass
00463     # try:
00464     #     fs_proc = subprocess.Popen(['fs', 'listquota', work_path], stdout=subprocess.PIPE)
00465     # except OSError:
00466     #     return
00467     # fs_response = fs_proc.communicate()[0]
00468     # quota, used = re.findall('([\d]+)', fs_response)[:2]
00469     # free_space = int(quota) - int(used)
00470     # if free_space * 1024 < size_needed:
00471     #     print '\nNot enougth free space on disk.',
00472     #     print 'Free space: %d MB. Need: %d MB. Exiting...\n' % (free_space / 1024, size_needed /1048576)
00473     #     exit()
00474     # elif size_needed:
00475     #     print 'Free space on disk: %d MB.\n' % (free_space / 1024,)
00476 

def utils_v2::get_relval_cmssw_version (   file)

Definition at line 89 of file utils_v2.py.

00090                                   :
00091     cmssw_release = re.findall('(CMSSW_\d*_\d*_\d*(?:_[\w\d]*)?)-', file)
00092     gr_r_version = re.findall('CMSSW_\d*_\d*_\d*(?:_[\w\d]*)?-([\w\d]*)_V\d*\w?(_[\w\d]*)?-v', file)
00093     if cmssw_release and gr_r_version:
00094         return (cmssw_release[0], gr_r_version[0])

def utils_v2::get_relval_id (   file)
Returns unique relval ID (dataset name) for a given file.

Definition at line 95 of file utils_v2.py.

00096                        :
00097     """Returns unique relval ID (dataset name) for a given file."""
00098     dataset_name = re.findall('R\d{9}__([\w\d]*)__CMSSW_', file)
00099     return dataset_name[0]

def utils_v2::get_relval_max_version (   files)
Returns file with maximum version at a) beggining of the file,
e.g. DQM_V000M b) at the end of run, e.g. _run2012-vM. M has to be max.

Definition at line 77 of file utils_v2.py.

00078                                  :
00079     """Returns file with maximum version at a) beggining of the file,
00080     e.g. DQM_V000M b) at the end of run, e.g. _run2012-vM. M has to be max."""
00081     max_file = files[0]
00082     max_v = get_relval_version(files[0])
00083     for file in files:
00084         file_v = get_relval_version(file)
00085         if file_v[1] > max_v[1] or ((file_v[1] == max_v[1]) and (file_v[0] > max_v[0])):
00086             max_file = file
00087             max_v = file_v
00088     return max_file

def utils_v2::get_relval_version (   file)

------------------- Make files pairs: RelVal utils ---------------------

Returns tuple (CMSSW version, run version) for specified file.

Definition at line 70 of file utils_v2.py.

00071                             :
00072     """Returns tuple (CMSSW version, run version) for specified file."""
00073     cmssw_version = re.findall('DQM_V(\d*)_', file)
00074     run_version = re.findall('CMSSW_\d*_\d*_\d*(?:_[\w\d]*)?-[\w\d]*_V\d*\w?(?:_[\w\d]*)?-v(\d*)__', file)
00075     if cmssw_version and run_version:
00076         return (int(cmssw_version[0]), int(run_version[0]))

def utils_v2::get_relvaldata_cmssw_version (   file)
Returns tuple (CMSSW release, GR_R version) for specified RelValData file.

Definition at line 39 of file utils_v2.py.

00040                                       :
00041     """Returns tuple (CMSSW release, GR_R version) for specified RelValData file."""
00042     cmssw_release = re.findall('(CMSSW_\d*_\d*_\d*(?:_[\w\d]*)?)-', file)
00043     gr_r_version = re.findall('-(GR_R_\d*_V\d*\w?)(?:_RelVal)?_', file)
00044     if not gr_r_version:
00045         gr_r_version = re.findall('CMSSW_\d*_\d*_\d*(?:_[\w\d]*)?-(\w*)_RelVal_', file)
00046     if cmssw_release and gr_r_version:
00047         return (cmssw_release[0], gr_r_version[0])

def utils_v2::get_relvaldata_id (   file)

----------------- Make files pairs: RelValData utils --------------------

Returns unique relvaldata ID for a given file.

Definition at line 29 of file utils_v2.py.

00030                            :
00031     """Returns unique relvaldata ID for a given file."""
00032     run_id = re.search('R\d{9}', file)
00033     run = re.search('_RelVal_([\w\d]*)-v\d__', file)
00034     if not run:
00035         run = re.search('GR_R_\d*_V\d*C?_([\w\d]*)-v\d__', file)
00036     if run_id and run:
00037         return (run_id.group(), run.group(1))
00038     return None

def utils_v2::get_relvaldata_max_version (   files)
Returns file with maximum version at a) beggining of the file,
e.g. DQM_V000M b) at the end of run, e.g. _run2012-vM. M has to be max.

Definition at line 57 of file utils_v2.py.

00058                                      :
00059     """Returns file with maximum version at a) beggining of the file,
00060     e.g. DQM_V000M b) at the end of run, e.g. _run2012-vM. M has to be max."""
00061     max_file = files[0]
00062     max_v = get_relvaldata_version(files[0])
00063     for file in files:
00064         file_v = get_relvaldata_version(file)
00065         if file_v[1] > max_v[1] or ((file_v[1] == max_v[1]) and (file_v[0] > max_v[0])):
00066             max_file = file
00067             max_v = file_v
00068     return max_file

def utils_v2::get_relvaldata_version (   file)
Returns tuple (CMSSW version, run version) for specified file.

Definition at line 48 of file utils_v2.py.

00049                                 :
00050     """Returns tuple (CMSSW version, run version) for specified file."""
00051     cmssw_version = re.findall('DQM_V(\d*)_', file)
00052     run_version = re.findall('_RelVal_[\w\d]*-v(\d)__', file)
00053     if not run_version:
00054         run_version = re.findall('GR_R_\d*_V\d*C?_[\w\d]*-v(\d)__', file)
00055     if cmssw_version and run_version:
00056         return (int(cmssw_version[0]), int(run_version[0]))

def utils_v2::get_size_to_download (   work_path,
  files_with_urls 
)
Returns file list to download and total size to download.

Definition at line 443 of file utils_v2.py.

00444                                                     :
00445     """Returns file list to download and total size to download."""
00446     opener = build_opener(X509CertOpen())
00447     size_to_download = 0
00448     files_to_download = []
00449     for filename, url in files_with_urls:
00450         url_file = opener.open(Request(url))
00451         size = int(url_file.headers["Content-Length"])
00452         file_path = join(work_path, filename)
00453         if exists(file_path) and getsize(file_path) / 1024 == size / 1024:
00454             print "Exists on disk %s." % filename
00455         else:
00456             size_to_download += size
00457             files_to_download.append(url)
00458     return size_to_download, files_to_download

def utils_v2::get_version (   filename)
Returns CMSSW and GR_R versions for the given filename.

Definition at line 432 of file utils_v2.py.

00433                          :
00434     """Returns CMSSW and GR_R versions for the given filename."""
00435     if is_relvaldata([filename]):
00436         version_elems = get_relvaldata_cmssw_version(filename)
00437     else:
00438         relval_version = get_relval_cmssw_version(filename)
00439         version_elems = (relval_version[0], relval_version[1][0], relval_version[1][1])
00440     version_elems = [elem.strip('_').strip('RelVal_') for elem in version_elems]
00441     return '___'.join([elem for elem in version_elems if elem])
00442 

def utils_v2::init_database (   db_path)

Utils.

Definition at line 388 of file utils_v2.py.

00389                           :
00390     print 'Initialising DB: %s...' % basename(db_path),
00391     conn = sqlite3.connect(db_path)
00392 
00393     ## Creates tables
00394     c = conn.cursor()
00395     c.execute("""CREATE TABLE IF NOT EXISTS ReleaseComparison (
00396                         id INTEGER PRIMARY KEY,
00397                         title TEXT,
00398                         release1 TEXT,
00399                         release2 TEXT,
00400                         statistical_test TEXT
00401                     );""")
00402     c.execute("""CREATE TABLE IF NOT EXISTS Directory (
00403                         id INTEGER PRIMARY KEY,
00404                         name TEXT,
00405                         parent_id INTEGER,
00406                         from_histogram_id INTEGER,
00407                         till_histogram_id INTEGER,
00408                         FOREIGN KEY (parent_id) REFERENCES Directory(id)
00409                         FOREIGN KEY (from_histogram_id) REFERENCES HistogramComparison(id)
00410                         FOREIGN KEY (till_histogram_id) REFERENCES HistogramComparison(id)
00411                     )""")
00412     c.execute("""CREATE TABLE IF NOT EXISTS RootFileComparison (
00413                         id INTEGER PRIMARY KEY,
00414                         filename1 TEXT,
00415                         filename2 TEXT,
00416                         release_comparison_id INTEGER,
00417                         directory_id INTEGER,
00418                         FOREIGN KEY (release_comparison_id) REFERENCES ReleaseComparison(id),
00419                         FOREIGN KEY (directory_id) REFERENCES Directory(id)
00420                     )""")
00421     c.execute("""CREATE TABLE IF NOT EXISTS HistogramComparison (
00422                         id INTEGER PRIMARY KEY,
00423                         name TEXT,
00424                         p_value REAL,
00425                         directory_id INTEGER,
00426                         FOREIGN KEY (directory_id) REFERENCES Directory(id)
00427                     )""")
00428 
00429     print 'Done.'
00430     return db_path
00431 

def utils_v2::is_relvaldata (   files)

----------------------- Make file pairs --------------------------

Definition at line 101 of file utils_v2.py.

00102                         :
00103     is_relvaldata_re = re.compile('_RelVal_')
00104     return any([is_relvaldata_re.search(filename) for filename in files])

def utils_v2::make_file_pairs (   files1,
  files2 
)

Definition at line 105 of file utils_v2.py.

00106                                    :
00107     print '\n#################       Analyzing files       ###################'
00108     ## Select functions to use
00109     if is_relvaldata(files1):
00110         is_relval_data = True
00111         get_cmssw_version = get_relvaldata_cmssw_version
00112         get_id = get_relvaldata_id
00113         get_max_version = get_relvaldata_max_version
00114     else:
00115         is_relval_data = False
00116         get_cmssw_version = get_relval_cmssw_version
00117         get_id = get_relval_id
00118         get_max_version = get_relval_max_version
00119 
00120     ## Divide files into groups
00121     versions1, versions2 = dict(), dict() # {version1: [file1, file2, ...], version2: [...], ...}
00122     for files, versions in (files1, versions1), (files2, versions2):
00123         for file in files:
00124             version = get_cmssw_version(file)
00125             if version:
00126                 if versions.has_key(version):
00127                     versions[version].append(file)
00128                 else:
00129                     versions[version] = [file]
00130 
00131     ## Print the division into groups
00132     print 'For RELEASE1 found file groups:'
00133     for version in versions1:
00134         print '   %s: %d files' % (str(version),  len(versions1[version]))
00135     if not versions1:
00136         print 'None.'
00137 
00138     print '\nFor RELEASE2 found file groups:'
00139     for version in versions2:
00140         print '   %s: %d files' % (str(version),  len(versions2[version]))
00141     if not versions2:
00142         print 'None.'
00143 
00144     if not len(versions1) or not len(versions2):
00145         print '\nNot enough file groups. Exiting...\n'
00146         exit()
00147 
00148     ## Pair till you find pairs.
00149     pairs = []
00150     for v1 in sorted(versions1, key=lambda x: len(versions1[x]), reverse=True):
00151         for v2 in sorted(versions2, key=lambda x: len(versions2[x]), reverse=True):
00152             if v1 == v2:
00153                 continue
00154             ## Print the groups.
00155             print '\n#################     Pairing the files     ###################'
00156             print '%s (%d files)   VS   %s (%d files):\n' % (str(v1),
00157                     len(versions1[v1]), str(v2), len(versions2[v2]))
00158 
00159             ## Pairing two versions
00160             for unique_id in set([get_id(file) for file in versions1[v1]]):
00161                 if is_relval_data:
00162                     dataset_re = re.compile(unique_id[0] + '_')
00163                     run_re = re.compile(unique_id[1])
00164                     c1_files = [file for file in versions1[v1] if dataset_re.search(file) and run_re.search(file)]
00165                     c2_files = [file for file in versions2[v2] if dataset_re.search(file) and run_re.search(file)]
00166                 else:
00167                     dataset_re = re.compile(unique_id + '_')
00168                     c1_files = [file for file in versions1[v1] if dataset_re.search(file)]
00169                     c2_files = [file for file in versions2[v2] if dataset_re.search(file)]
00170 
00171                 if len(c1_files) > 0 and len(c2_files) > 0:
00172                     first_file = get_max_version(c1_files)
00173                     second_file = get_max_version(c2_files)
00174                     print '%s\n%s\n' % (first_file, second_file)
00175                     pairs.append((first_file, second_file))
00176 
00177             print "Got %d pairs." % (len(pairs))
00178             if pairs:
00179                 return pairs
00180     print 'Found no file pairs. Exiting..\n'
00181     exit()

def utils_v2::recursive_search_online (   url,
  rel1,
  frags1,
  rel2,
  frags2 
)
Recursively searches for files, that matches the pattern.

Definition at line 213 of file utils_v2.py.

00214                                                             :
00215     """Recursively searches for files, that matches the pattern."""
00216     if not url:
00217         url = 'https://cmsweb.cern.ch/dqm/relval/data/browse/ROOT/RelValData/'
00218         g1, g2 = recursive_search_online(url, rel1, frags1, rel2, frags2)
00219         url = 'https://cmsweb.cern.ch/dqm/relval/data/browse/ROOT/RelVal/'
00220         g3, g4 = recursive_search_online(url, rel1, frags1, rel2, frags2)
00221         g1.update(g3), g2.update(g4)
00222         return g1, g2
00223 
00224     domain = '://'.join(urlparse(url)[:2])
00225 
00226     ## Compile regular expressions
00227     href_re = re.compile(r"<a href='([-./\w]*)'>([-./\w]*)<")
00228 
00229     def compile_res(rel, frags):
00230         frags = frags.split(',')
00231         regexps = [s for s in frags if not s.startswith('!')]
00232         regexps += ['^((?%s).)*$' % s for s in frags if s.startswith('!')]
00233         regexps += [rel + '-', '.root']
00234         return [re.compile(r) for r in regexps]
00235 
00236     res1 = compile_res(rel1, frags1)
00237     res2 = compile_res(rel2, frags2)
00238 
00239     ## Recursively find files that matches regular expressions
00240     hrefs = [(name, path) for path, name in href_re.findall(auth_wget(url))[1:]]
00241     files_with_urls1, files_with_urls2 = dict(), dict()
00242     for name, path in hrefs:
00243         if splitext(name)[1]: # If file
00244             if all([r.search(name) for r in res1]):
00245                 files_with_urls1[name] = domain + path
00246             if all([r.search(name) for r in res2]):
00247                 files_with_urls2[name] = domain + path
00248         else:
00249             print domain + path
00250             new_hrefs = href_re.findall(auth_wget(domain + path))[1:]
00251             hrefs.extend([(name, path) for path, name in new_hrefs])
00252     return files_with_urls1, files_with_urls2

def utils_v2::search_on_disk (   work_path,
  rel1,
  frags1,
  rel2,
  frags2 
)

Definition at line 253 of file utils_v2.py.

00254                                                          :
00255     if not work_path:
00256         print 'No working directory specified. Use "--dir DIR" option to ' +\
00257               'specify working directory. Exiting...'
00258         exit()
00259     ## Compile regular expressions
00260     def compile_res(rel, frags):
00261         frags = frags.split(',')
00262         regexps = [s for s in frags if not s.startswith('!')]
00263         regexps += ['^((?%s).)*$' % s for s in frags if s.startswith('!')]
00264         regexps += [rel + '-', '.root']
00265         return [re.compile(r) for r in regexps]
00266 
00267     res1 = compile_res(rel1, frags1)
00268     res2 = compile_res(rel2, frags2)
00269 
00270     ## Recursively find files that matches regular expressions
00271     files = listdir(work_path)
00272     files1, files2 = [], []
00273     for name in files:
00274         if splitext(name)[1]:
00275             if all([r.search(name) for r in res1]):
00276                 files1.append(name)
00277             if all([r.search(name) for r in res2]):
00278                 files2.append(name)
00279     return files1, files2
00280 

def utils_v2::show_status_bar (   total_size)
Shows download status.

Definition at line 477 of file utils_v2.py.

00478                                :
00479     """Shows download status."""
00480     q = show_status_bar.q
00481     total_size = total_size / (1024*1024)
00482     downloaded = 0
00483     while downloaded < total_size:
00484         try:
00485             o = q.get(timeout=20)
00486             downloaded += 1
00487             print '\r      %d/%d MB     %d%%     ' % (downloaded, total_size, 100*downloaded/total_size),
00488             sys.stdout.flush()
00489         except Empty:
00490             time.sleep(1)
00491             break

Variable Documentation

Initial value:
00001 {
00002         'Missing histogram': -1,
00003         'Histograms have different types': -2,
00004         'Object is not a histogram': -3,
00005         'Ranges of histograms are different': -4
00006     }

Exception definitions.

Definition at line 282 of file utils_v2.py.

dictionary utils_v2::tests = {KolmogorovTest.name: KolmogorovTest, Chi2Test.name: Chi2Test}

Definition at line 385 of file utils_v2.py.

Referenced by QTestHandle::attachTests(), and QTestConfigure::enableTests().