CMS 3D CMS Logo

parserEdmSize.py
Go to the documentation of this file.
1 from __future__ import absolute_import
2 import re, os
3 from .FileNamesHelper import *
4 
5 
6 test_edm_file = re.compile("_EdmSize$", re.IGNORECASE)
7 
8 
9 
10 #to match float we could instead use: [-+]?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?
11 
12 edmSize_line_parsing_reg = re.compile( \
13  r"""
14  # <C++ type>_<module_name>_[opt:_<module label>]_<process name which produced>.(dot)
15  ^([^_]+)_([^_]+)_([^_]*)_([^.]+[.])
16  # <plain_size> <compressed_size>
17  \s([^\s]+)\s(.+)$
18 """, re.VERBOSE)
19 
20 
21 def parseEdmSize(lines):
22  """
23  Returns a list of dictionaries
24 
25  Example of data:
26  >>> parseEdmSize(lines = ( 'File MINBIAS__RAW2DIGI,RECO.root Events 8000', 'TrackingRecHitsOwned_generalTracks__RECO. 407639 18448.4', 'recoPreshowerClusterShapes_multi5x5PreshowerClusterShape_multi5x5PreshowerXClustersShape_RECO. 289.787 41.3311', 'recoPreshowerClusterShapes_multi5x5PreshowerClusterShape_multi5x5PreshowerYClustersShape_RECO. 289.767 47.2686', 'recoCaloClustersToOnerecoClusterShapesAssociation_hybridSuperClusters_hybridShapeAssoc_RECO. 272.111 65.4852'))
27  [{'module_name': 'generalTracks', 'module_label': '', 'size_compressed': '18448.4', 'cpp_type': 'TrackingRecHitsOwned', 'size_uncompressed': '407639'}, {'module_name': 'multi5x5PreshowerClusterShape', 'module_label': 'multi5x5PreshowerXClustersShape', 'size_compressed': '41.3311', 'cpp_type': 'recoPreshowerClusterShapes', 'size_uncompressed': '289.787'}, {'module_name': 'multi5x5PreshowerClusterShape', 'module_label': 'multi5x5PreshowerYClustersShape', 'size_compressed': '47.2686', 'cpp_type': 'recoPreshowerClusterShapes', 'size_uncompressed': '289.767'}, {'module_name': 'hybridSuperClusters', 'module_label': 'hybridShapeAssoc', 'size_compressed': '65.4852', 'cpp_type': 'recoCaloClustersToOnerecoClusterShapesAssociation', 'size_uncompressed': '272.111'}]
28 
29  """
30  #reg returns (cpp_type, mod_name, mod_label, proc_name, size_uncomp, size_comp)
31 
32  #TODO: I could change this into shorter ---...
33 
34  return [ {"cpp_type": cpp_type, "module_name": mod_name, "module_label": mod_label,
35  "size_uncompressed": size_uncomp, "size_compressed": size_comp} # we filter out the proc_name, AND CONVERT TO DICTIONARY
36  for (cpp_type, mod_name, mod_label, proc_name, size_uncomp, size_comp) in [
37  reg.groups() for reg in [
38  edmSize_line_parsing_reg.search(line) for line in lines]
39  if reg ] # we filter out not matched lines
40  ]
41 
42 """ Get EdmSize file size for the candle, step in current dir """
43 def getEdmReport(path, candle, step):
44  files = os.listdir(path)
45  edm_files = [os.path.join(path, f) for f in files
46  if test_edm_file.search(f)
47  and os.path.isfile(os.path.join(path, f)) ]
48 
49  """ get the size of file if it is the root file for current candle and step """
50  # TODO: a function candle, step --> file name
51 
52  try:
53  edm_fn = [f for f in edm_files
54  if f_candle_and_step_inJobID(candle, step, getJobID_fromEdmSizeFileName(f))][0] #that's in the same dir so candle and step is more than enough
55  except IndexError as e: #this would happen if there's no Edmsize report existing !!!
56  return False
57 
58  # open the file and read into lines
59  edm_file = open(edm_fn)
60  lines = edm_file.readlines()
61  edm_file.close()
62 
63  #return the parsed data
64  products = parseEdmSize(lines)
65 
66  return products
67 
def getEdmReport(path, candle, step)
def getJobID_fromEdmSizeFileName(logfile_name)
def parseEdmSize(lines)