CMS 3D CMS Logo

parserEdmSize.py
Go to the documentation of this file.
1 import re, os
2 from FileNamesHelper import *
3 
4 
5 test_edm_file = re.compile("_EdmSize$", re.IGNORECASE)
6 
7 
8 
9 #to match float we could instead use: [-+]?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?
10 
11 edmSize_line_parsing_reg = re.compile( \
12  r"""
13  # <C++ type>_<module_name>_[opt:_<module label>]_<process name which produced>.(dot)
14  ^([^_]+)_([^_]+)_([^_]*)_([^.]+[.])
15  # <plain_size> <compressed_size>
16  \s([^\s]+)\s(.+)$
17 """, re.VERBOSE)
18 
19 
20 def parseEdmSize(lines):
21  """
22  Returns a list of dictionaries
23 
24  Example of data:
25  >>> parseEdmSize(lines = ( 'File MINBIAS__RAW2DIGI,RECO.root Events 8000', 'TrackingRecHitsOwned_generalTracks__RECO. 407639 18448.4', 'recoPreshowerClusterShapes_multi5x5PreshowerClusterShape_multi5x5PreshowerXClustersShape_RECO. 289.787 41.3311', 'recoPreshowerClusterShapes_multi5x5PreshowerClusterShape_multi5x5PreshowerYClustersShape_RECO. 289.767 47.2686', 'recoCaloClustersToOnerecoClusterShapesAssociation_hybridSuperClusters_hybridShapeAssoc_RECO. 272.111 65.4852'))
26  [{'module_name': 'generalTracks', 'module_label': '', 'size_compressed': '18448.4', 'cpp_type': 'TrackingRecHitsOwned', 'size_uncompressed': '407639'}, {'module_name': 'multi5x5PreshowerClusterShape', 'module_label': 'multi5x5PreshowerXClustersShape', 'size_compressed': '41.3311', 'cpp_type': 'recoPreshowerClusterShapes', 'size_uncompressed': '289.787'}, {'module_name': 'multi5x5PreshowerClusterShape', 'module_label': 'multi5x5PreshowerYClustersShape', 'size_compressed': '47.2686', 'cpp_type': 'recoPreshowerClusterShapes', 'size_uncompressed': '289.767'}, {'module_name': 'hybridSuperClusters', 'module_label': 'hybridShapeAssoc', 'size_compressed': '65.4852', 'cpp_type': 'recoCaloClustersToOnerecoClusterShapesAssociation', 'size_uncompressed': '272.111'}]
27 
28  """
29  #reg returns (cpp_type, mod_name, mod_label, proc_name, size_uncomp, size_comp)
30 
31  #TODO: I could change this into shorter ---...
32 
33  return [ {"cpp_type": cpp_type, "module_name": mod_name, "module_label": mod_label,
34  "size_uncompressed": size_uncomp, "size_compressed": size_comp} # we filter out the proc_name, AND CONVERT TO DICTIONARY
35  for (cpp_type, mod_name, mod_label, proc_name, size_uncomp, size_comp) in [
36  reg.groups() for reg in [
37  edmSize_line_parsing_reg.search(line) for line in lines]
38  if reg ] # we filter out not matched lines
39  ]
40 
41 """ Get EdmSize file size for the candle, step in current dir """
42 def getEdmReport(path, candle, step):
43  files = os.listdir(path)
44  edm_files = [os.path.join(path, f) for f in files
45  if test_edm_file.search(f)
46  and os.path.isfile(os.path.join(path, f)) ]
47 
48  """ get the size of file if it is the root file for current candle and step """
49  # TODO: a function candle, step --> file name
50 
51  try:
52  edm_fn = [f for f in edm_files
53  if f_candle_and_step_inJobID(candle, step, getJobID_fromEdmSizeFileName(f))][0] #that's in the same dir so candle and step is more than enough
54  except IndexError as e: #this would happen if there's no Edmsize report existing !!!
55  return False
56 
57  # open the file and read into lines
58  edm_file = open(edm_fn)
59  lines = edm_file.readlines()
60  edm_file.close()
61 
62  #return the parsed data
63  products = parseEdmSize(lines)
64 
65  return products
66 
def getEdmReport(path, candle, step)
def getJobID_fromEdmSizeFileName(logfile_name)
def parseEdmSize(lines)