CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
Functions
parserTimingReport Namespace Reference

Functions

def calc_MinMaxAvgRMS
 
def calcRMS
 
def extractRSS_VSIZE
 
def loadTimeLog
 
def manual_run
 
def perf_profile
 
def processModuleTimeLogData
 mod_data["stats"] =calc_MinMaxAvgRMS(f_time = lambda x: x["time"], f_evt_num = lambda x: x["event_number"], items = times_bymod[mod_name]) More...
 

Function Documentation

def parserTimingReport.calc_MinMaxAvgRMS (   items,
  remove_first = True,
  f_time = lambda x: x[0],
  f_evt_num = lambda x: x[1] 
)
returns a dict of avg, min, max, rms 

Definition at line 126 of file parserTimingReport.py.

References calcRMS(), Association.map, max(), and min.

Referenced by processModuleTimeLogData().

127 def calc_MinMaxAvgRMS(items, remove_first = True, f_time = lambda x: x[0], f_evt_num = lambda x: x[1],):
128  """ returns a dict of avg, min, max, rms """
129  # save the cpu time of first event before removing the first result!
130  cpu_time_first = f_time(items[0])
131 
132  if len(items) > 1 and remove_first == True:
133  items.remove(items[0]) #TODO: if there is only one event - we have a problem -> do we eliminate the whole module?
134  # TODO: it removes it completely from all the data because we do not save/ do not copy it
135 
136  items_time = map(f_time, items)
137  min_value = min(items_time)
138  max_value = max(items_time)
139  max_index = items_time.index(max_value)
140  avg_value = float(sum(items_time)) / float(len(items_time))
141  rms_value = calcRMS(items_time,avg_value)
142 
143  return {"min": min_value, "max": max_value, "cputime_first": cpu_time_first,
144  "rms": rms_value, "avg": avg_value,
145  "event_number_of_max": f_evt_num(items[max_index])}
146 
#define min(a, b)
Definition: mlp_lapack.h:161
dictionary map
Definition: Association.py:160
const T & max(const T &a, const T &b)
def parserTimingReport.calcRMS (   items,
  avg 
)
returns RootMeanSquare  of items in a list 

Definition at line 119 of file parserTimingReport.py.

Referenced by calc_MinMaxAvgRMS().

120 def calcRMS(items,avg):
121  """ returns RootMeanSquare of items in a list """
122  # sqrt(sum(x^2))
123  # Not statistics RMS... "physics" RMS, i.e. standard deviation: sqrt(sum((x-avg)**2)/N)
124  # return math.sqrt(reduce(lambda x: (x - avg)**2, items) / len(items))
125  return math.sqrt(sum([(x-avg)**2 for x in items])/len(items))
def parserTimingReport.extractRSS_VSIZE (   line1,
  line2 
)
>>> extractRSS_VSIZE("%MSG-w MemoryCheck:  PostModule 19-Jun-2009 13:06:08 CEST Run: 1 Event: 1", \
             "MemoryCheck: event : VSIZE 923.07 0 RSS 760.25 0")
(('1', '760.25'), ('1', '923.07'))

Definition at line 29 of file parserTimingReport.py.

References split, and strip().

Referenced by loadTimeLog().

29 
30 def extractRSS_VSIZE(line1, line2):
31  """
32  >>> extractRSS_VSIZE("%MSG-w MemoryCheck: PostModule 19-Jun-2009 13:06:08 CEST Run: 1 Event: 1", \
33  "MemoryCheck: event : VSIZE 923.07 0 RSS 760.25 0")
34  (('1', '760.25'), ('1', '923.07'))
35  """
36 
37  if ("Run" in line1) and ("Event" in line1): # the first line
38  event_number = line1.split('Event:')[1].strip()
39  else: return False
40 
41  """ it's first or second MemoryCheck line """
42  if ("VSIZE" in line2) and ("RSS" in line2): # the second line
43  RSS = line2.split("RSS")[1].strip().split(" ")[0].strip() #changed partition into split for backward compatability with py2.3
44  VSIZE = line2.split("RSS")[0].strip().split("VSIZE")[1].strip().split(" ")[0].strip()
45  return ((event_number, RSS), (event_number, VSIZE))
46  else: return False
47 
void strip(std::string &input, const std::string &blanks=" \n\t")
Definition: stringTools.cc:16
double split
Definition: MVATrainer.cc:139
def parserTimingReport.loadTimeLog (   log_filename,
  maxsize_rad = 0 
)
gets the timing data from the logfile
 returns 4 lists:

    * ModuleTime data (event_number, module_label, module_name, seconds) and
    * EventTime data
        - with granularity of event (initial - not processed data)
    * RSS per event
    * VSIZE per event

Definition at line 48 of file parserTimingReport.py.

References extractRSS_VSIZE().

Referenced by manual_run(), perf_profile(), and cmsPerfSuiteHarvest.process_timesize_dir().

48 
49 def loadTimeLog(log_filename, maxsize_rad = 0): #TODO: remove maxsize to read, used for debugging
50  """ gets the timing data from the logfile
51  returns 4 lists:
52 
53  * ModuleTime data (event_number, module_label, module_name, seconds) and
54  * EventTime data
55  - with granularity of event (initial - not processed data)
56  * RSS per event
57  * VSIZE per event
58  """
59  # ----- format of logfile ----
60  # Report columns headings for modules: eventnum runnum modulelabel modulename timetakeni"
61  # e.g. TimeModule> 1 1 csctfDigis CSCTFUnpacker 0.0624561
62 
63  mod_data = []
64  evt_data = []
65  rss_data = []
66  vsize_data = []
67  # open file and read it and fill the structure!
68  logfile = open(log_filename, 'r')
69 
70  # get only the lines which have time report data
71  #TODO: reading and processing line by line might speed up the process!
72 
73  memcheck_line1 = False
74 
75 
76  for line in logfile.xreadlines():
77  if 'TimeModule>' in line.strip():
78  line = line.strip()
79  line_content_list = line.split(' ')[0:]
80 
81  event_number = int(line_content_list[1])
82  # module label and name were mixed up in the original doc
83  module_label = str(line_content_list[4])
84  module_name = str(line_content_list[3])
85  seconds = float(line_content_list[5])
86 
87  mod_data.append((event_number, module_label, module_name, seconds))
88 
89  if 'TimeEvent>' in line.strip():
90  line = line.strip()
91  line_content_list = line.split(' ')[0:]
92 
93  event_number = int(line_content_list[1])
94  time_seconds = str(line_content_list[3])
95 
96  #TODO: what are the other [last two] numbers? Real time? smf else? TimeEvent> 1 1 15.3982 13.451 13.451
97  evt_data.append((event_number, time_seconds))
98  """
99  %MSG-w MemoryCheck: PostModule 19-Jun-2009 13:06:08 CEST Run: 1 Event: 1
100  MemoryCheck: event : VSIZE 923.07 0 RSS 760.25 0
101  """
102  if 'MemoryCheck:' in line.strip():
103  # this is the first line out of two
104  if (not memcheck_line1):
105  memcheck_line1 = line.strip()
106  else:
107  (rss, vsize) = extractRSS_VSIZE(memcheck_line1, line.strip())
108  rss_data.append(rss)
109  vsize_data.append(vsize)
110  else:
111  memcheck_line1 = False
112 
113  logfile.close()
114 
115  return (mod_data, evt_data, rss_data, vsize_data)
116 
117 
118 
def parserTimingReport.manual_run ( )

Definition at line 230 of file parserTimingReport.py.

References cmssw_exportdb_xml.export_xml(), FileNamesHelper.getJobID_fromTimeReportLogName(), loadTimeLog(), and processModuleTimeLogData().

231 def manual_run():
232  timelog_f = "TTBAR__RAW2DIGI,RECO_TimingReport.log"
233  timelog_f = "TTBAR__GEN,SIM,DIGI,L1,DIGI2RAW,HLT_TimingReport.log"
234  #TODO: get STEP name from filename
235  release_files = {
236 
237  "CMSSW_3_1_0_pre9":
238  (
239  "CMSSW_3_1_0_pre9/MINBIAS__RAW2DIGI,RECO_TimingReport.log",
240  "CMSSW_3_1_0_pre9/TTBAR__RAW2DIGI,RECO_TimingReport.log")
241  ## "CMSSW_3_1_0_pre10":
242  }
243  for release, files in release_files.items():
244  print "Processing release: %s" % release
245  for timelog_f in files:
246  print "Processing file: %s" % timelog_f
247 
248  # TODO: automaticaly detect type of report file!!!
249  (mod_timelog, evt_timelog, rss_data, vsize_data) =loadTimeLog(timelog_f)
250 
251  mod_timelog= processModuleTimeLogData(mod_timelog, groupBy = "module_label")
252  print "Number of modules grouped by (module_label): %s" % len(mod_timelog)
253 
254  (candle, step, pileup_type, conditions, event_content) = getJobID_fromTimeReportLogName(timelog_f)
255 
256  """ We could get release from the path but that's quite ugly! """
257  export_xml(jobID = jobID, release=release, timelog_result=(mod_timelog, evt_timelog, rss_data, vsize_data))
258 
""" use to run performance profiling """
def getJobID_fromTimeReportLogName
def processModuleTimeLogData
mod_data["stats"] =calc_MinMaxAvgRMS(f_time = lambda x: x["time"], f_evt_num = lambda x: x["event_num...
def parserTimingReport.perf_profile ( )

Definition at line 259 of file parserTimingReport.py.

References cmssw_exportdb_xml.export_xml(), FileNamesHelper.getJobID_fromTimeReportLogName(), loadTimeLog(), processModuleTimeLogData(), and MCScenario_CRAFT1_22X.write_xml().

260 def perf_profile():
261  timelog_f = "test_data/TTBAR__RAW2DIGI,RECO_TimingReport.log"
262  (modules_timelog, evt_timelog, rss_data, vsize_data) = loadTimeLog(timelog_f)
263 
264  mod_timelog= processModuleTimeLogData(modules_timelog, groupBy = "module_label")
265 
266  (candle, step, pileup_type, conditions, event_content) = getJobID_fromTimeReportLogName(timelog_f)
267 
268  xmldoc = minidom.Document()
269  export_xml(step = step, candle = candle, release="test", timelog_result=(mod_timelog, evt_timelog, rss_data, vsize_data), xml_doc = xmldoc)
270  write_xml(xmldoc, "test_xml_output.xml")
def getJobID_fromTimeReportLogName
def processModuleTimeLogData
mod_data["stats"] =calc_MinMaxAvgRMS(f_time = lambda x: x["time"], f_evt_num = lambda x: x["event_num...
def parserTimingReport.processModuleTimeLogData (   modules_timelog,
  groupBy = "module_name" 
)

mod_data["stats"] =calc_MinMaxAvgRMS(f_time = lambda x: x["time"], f_evt_num = lambda x: x["event_number"], items = times_bymod[mod_name])

Processes the timelog data grouping events by module and calculates min, max, avg, rms 
Returns data as a list of dicts like: !

 {
    <module_name>: 
        {name:, label:, 
            stats: {num_events, avg, min, max, rms} 
 } 

mod_data["stats"]["num_events"] = len(times_bymod[mod_name])

times_bymod[mod_name] = mod_data Let's rewrite this using the dictionary we now have without any logical change (could do with some...):

Definition at line 147 of file parserTimingReport.py.

References python.multivaluedict.append(), calc_MinMaxAvgRMS(), relativeConstraints.keys, and update.

Referenced by manual_run(), perf_profile(), and cmsPerfSuiteHarvest.process_timesize_dir().

148 def processModuleTimeLogData(modules_timelog, groupBy = "module_name"):
149  """ Processes the timelog data grouping events by module and calculates min, max, avg, rms
150  Returns data as a list of dicts like: !
151 
152  {
153  <module_name>:
154  {name:, label:,
155  stats: {num_events, avg, min, max, rms}
156  }
157 
158  """
159  # group by module_name, we save a list for each module name
160  times_bymod = {}
161 
162  # print "Num of useful TimeLog lines: %s" % len(modules_timelog)
163 
164  for time_data in modules_timelog:
165  (event_number, module_label, module_name, seconds) = time_data
166 
167  # group times of modules By label or name, TODO: maybe both
168  if groupBy == "module_label":
169  key = module_label
170  else:
171  if groupBy =="name+label":
172  key = module_name + "_" + module_label
173  else:
174  key = module_name
175 
176 
177  try:
178  # is the list for current module initialized?
179  times_bymod[key]
180  except KeyError:
181  #Changing this from a list to a dict (see comments below):
182  #times_bymod[key] = []
183  times_bymod[key] = {}
184  #Running out of memory!
185  #times_bymod[key].append({"label": module_label, "name": module_name, "time": seconds, "event_number": event_number})
186  #Let's do it right:
187  #Instead of times_bymod[key]=[{"label": module_label, "name": module_name, "time": seconds, "event_number": event_number}]
188  #let's do times_bymod[key]={"module_label":{"module_name":[(seconds,event_number)]}} so we do not repeat label and name and especially they are not a pair of key/value
189  #During the first event all the keys will be initialized, then from event 2 on it will be just appending the (seconds,event_number) tuple to the list with the appropriate keys:
190 
191  #Check/Set up the module label dict:
192  try:
193  times_bymod[key][module_label]
194  except KeyError:
195  times_bymod[key].update({module_label:{}})
196 
197  #Check/Set up the module name dict:
198  try:
199  times_bymod[key][module_label][module_name]
200  except KeyError:
201  times_bymod[key][module_label].update({module_name:[]})
202 
203  #We're now ready to add the info as a tuple in the list!
204  times_bymod[key][module_label][module_name].append((seconds,event_number))
205 
206 
207  # calculate Min,Max, Avg, RMS for each module and in this way get the final data to be imported
208  ##for mod_name in times_bymod.keys():
209  ## #copy needed data
210  ## #mod_data = {"label": times_bymod[mod_name][0]["label"], "name": times_bymod[mod_name][0]["name"]}
211  ## #New data structure:
212  ## mod_data = {"label":times_bymod[mod_name].keys()[0],"name":times_bymod[mod_name][times_bymod[mod_name].keys()[0]].keys()[0]}
213  ## # add statistical data
214  ##
215  ## mod_data["stats"] =calc_MinMaxAvgRMS(f_time = lambda x: x["time"], f_evt_num = lambda x: x["event_number"], items = times_bymod[mod_name])
216  ##
217  ## mod_data["stats"]["num_events"] = len(times_bymod[mod_name])
218  ##
219  ## times_bymod[mod_name] = mod_data
220  #Let's rewrite this using the dictionary we now have without any logical change (could do with some...):
221  for key in times_bymod.keys():
222  for label in times_bymod[key].keys():
223  mod_data={'label':label}
224  for name in times_bymod[key][label].keys():
225  mod_data.update({'name':name})
226  mod_data['stats']= calc_MinMaxAvgRMS(f_time= lambda x:x[0],f_evt_num=lambda x:x[1],items=times_bymod[key][label][name])
227  mod_data['stats']['num_events']=len(times_bymod[key][label][name])
228  times_bymod[key]=mod_data
229  return times_bymod
def processModuleTimeLogData
mod_data[&quot;stats&quot;] =calc_MinMaxAvgRMS(f_time = lambda x: x[&quot;time&quot;], f_evt_num = lambda x: x[&quot;event_num...
#define update(a, b)