5 from cmssw_exportdb_xml
import *
6 from FileNamesHelper
import *
10 ncalls tottime percall cumtime percall filename:lineno(function)
11 1 0.000 0.000 71.618 71.618 <stdin>:1(foo)
12 1 0.315 0.315 71.933 71.933 <string>:1(<module>)
13 1 47.561 47.561 71.618 71.618 parserTimingReport.py:27(loadTimeLog)
14 8000 0.191 0.000 0.343 0.000 parserTimingReport.py:8(extractRSS_VSIZE)
15 1 0.000 0.000 0.000 0.000 {len}
16 2384000 3.239 0.000 3.239 0.000 {method 'append' of 'list' objects}
17 1 0.000 0.000 0.000 0.000 {method 'close' of 'file' objects}
18 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
19 24000 0.041 0.000 0.041 0.000 {method 'partition' of 'str' objects}
20 2392000 5.804 0.000 5.804 0.000 {method 'split' of 'str' objects}
21 10791332 14.782 0.000 14.782 0.000 {method 'strip' of 'str' objects}
22 1 0.000 0.000 0.000 0.000 {method 'xreadlines' of 'file' objects}
23 1 0.000 0.000 0.000 0.000 {open}
28 """ given two lines returns the VSIZE and RSS values along with event number """
31 >>> extractRSS_VSIZE("%MSG-w MemoryCheck: PostModule 19-Jun-2009 13:06:08 CEST Run: 1 Event: 1", \
32 "MemoryCheck: event : VSIZE 923.07 0 RSS 760.25 0")
33 (('1', '760.25'), ('1', '923.07'))
36 if (
"Run" in line1)
and (
"Event" in line1):
37 event_number = line1.split(
'Event:')[1].
strip()
40 """ it's first or second MemoryCheck line """
41 if (
"VSIZE" in line2)
and (
"RSS" in line2):
44 return ((event_number, RSS), (event_number, VSIZE))
49 """ gets the timing data from the logfile
52 * ModuleTime data (event_number, module_label, module_name, seconds) and
54 - with granularity of event (initial - not processed data)
67 logfile = open(log_filename,
'r')
72 memcheck_line1 =
False
75 for line
in logfile.xreadlines():
76 if 'TimeModule>' in line.strip():
78 line_content_list = line.split(
' ')[0:]
80 event_number = int(line_content_list[1])
82 module_label = str(line_content_list[4])
83 module_name = str(line_content_list[3])
84 seconds = float(line_content_list[5])
86 mod_data.append((event_number, module_label, module_name, seconds))
88 if 'TimeEvent>' in line.strip():
90 line_content_list = line.split(
' ')[0:]
92 event_number = int(line_content_list[1])
93 time_seconds = str(line_content_list[3])
96 evt_data.append((event_number, time_seconds))
98 %MSG-w MemoryCheck: PostModule 19-Jun-2009 13:06:08 CEST Run: 1 Event: 1
99 MemoryCheck: event : VSIZE 923.07 0 RSS 760.25 0
101 if 'MemoryCheck:' in line.strip():
103 if (
not memcheck_line1):
104 memcheck_line1 = line.strip()
108 vsize_data.append(vsize)
110 memcheck_line1 =
False
114 return (mod_data, evt_data, rss_data, vsize_data)
120 """ returns RootMeanSquare of items in a list """
124 return math.sqrt(sum([(x-avg)**2
for x
in items])/len(items))
126 def calc_MinMaxAvgRMS(items, remove_first = True, f_time = lambda x: x[0], f_evt_num =
lambda x: x[1],):
127 """ returns a dict of avg, min, max, rms """
129 cpu_time_first = f_time(items[0])
131 if len(items) > 1
and remove_first ==
True:
132 items.remove(items[0])
135 items_time =
map(f_time, items)
136 min_value =
min(items_time)
137 max_value =
max(items_time)
138 max_index = items_time.index(max_value)
139 avg_value = float(sum(items_time)) / float(len(items_time))
140 rms_value =
calcRMS(items_time,avg_value)
142 return {
"min": min_value,
"max": max_value,
"cputime_first": cpu_time_first,
143 "rms": rms_value,
"avg": avg_value,
144 "event_number_of_max": f_evt_num(items[max_index])}
148 """ Processes the timelog data grouping events by module and calculates min, max, avg, rms
149 Returns data as a list of dicts like: !
154 stats: {num_events, avg, min, max, rms}
163 for time_data
in modules_timelog:
164 (event_number, module_label, module_name, seconds) = time_data
167 if groupBy ==
"module_label":
170 if groupBy ==
"name+label":
171 key = module_name +
"_" + module_label
182 times_bymod[key] = {}
192 times_bymod[key][module_label]
194 times_bymod[key].
update({module_label:{}})
198 times_bymod[key][module_label][module_name]
200 times_bymod[key][module_label].
update({module_name:[]})
203 times_bymod[key][module_label][module_name].
append((seconds,event_number))
220 for key
in times_bymod.keys():
221 for label
in times_bymod[key].
keys():
222 mod_data={
'label':label}
223 for name
in times_bymod[key][label].
keys():
224 mod_data.update({
'name':name})
225 mod_data[
'stats']=
calc_MinMaxAvgRMS(f_time=
lambda x:x[0],f_evt_num=
lambda x:x[1],items=times_bymod[key][label][name])
226 mod_data[
'stats'][
'num_events']=len(times_bymod[key][label][name])
227 times_bymod[key]=mod_data
231 timelog_f =
"TTBAR__RAW2DIGI,RECO_TimingReport.log"
232 timelog_f =
"TTBAR__GEN,SIM,DIGI,L1,DIGI2RAW,HLT_TimingReport.log"
238 "CMSSW_3_1_0_pre9/MINBIAS__RAW2DIGI,RECO_TimingReport.log",
239 "CMSSW_3_1_0_pre9/TTBAR__RAW2DIGI,RECO_TimingReport.log")
242 for release, files
in release_files.items():
243 print "Processing release: %s" % release
244 for timelog_f
in files:
245 print "Processing file: %s" % timelog_f
248 (mod_timelog, evt_timelog, rss_data, vsize_data) =
loadTimeLog(timelog_f)
251 print "Number of modules grouped by (module_label): %s" % len(mod_timelog)
255 """ We could get release from the path but that's quite ugly! """
256 export_xml(jobID = jobID, release=release, timelog_result=(mod_timelog, evt_timelog, rss_data, vsize_data))
258 """ use to run performance profiling """
260 timelog_f =
"test_data/TTBAR__RAW2DIGI,RECO_TimingReport.log"
261 (modules_timelog, evt_timelog, rss_data, vsize_data) =
loadTimeLog(timelog_f)
267 xmldoc = minidom.Document()
268 export_xml(step = step, candle = candle, release=
"test", timelog_result=(mod_timelog, evt_timelog, rss_data, vsize_data), xml_doc = xmldoc)
271 if (__name__ ==
"__main__"):
def getJobID_fromTimeReportLogName
def processModuleTimeLogData
mod_data["stats"] =calc_MinMaxAvgRMS(f_time = lambda x: x["time"], f_evt_num = lambda x: x["event_num...
const T & max(const T &a, const T &b)