5 from cmssw_exportdb_xml
import *
6 from FileNamesHelper
import *
10 ncalls tottime percall cumtime percall filename:lineno(function)
11 1 0.000 0.000 71.618 71.618 <stdin>:1(foo)
12 1 0.315 0.315 71.933 71.933 <string>:1(<module>)
13 1 47.561 47.561 71.618 71.618 parserTimingReport.py:27(loadTimeLog)
14 8000 0.191 0.000 0.343 0.000 parserTimingReport.py:8(extractRSS_VSIZE)
15 1 0.000 0.000 0.000 0.000 {len}
16 2384000 3.239 0.000 3.239 0.000 {method 'append' of 'list' objects}
17 1 0.000 0.000 0.000 0.000 {method 'close' of 'file' objects}
18 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
19 24000 0.041 0.000 0.041 0.000 {method 'partition' of 'str' objects}
20 2392000 5.804 0.000 5.804 0.000 {method 'split' of 'str' objects}
21 10791332 14.782 0.000 14.782 0.000 {method 'strip' of 'str' objects}
22 1 0.000 0.000 0.000 0.000 {method 'xreadlines' of 'file' objects}
23 1 0.000 0.000 0.000 0.000 {open}
28 """ given two lines returns the VSIZE and RSS values along with event number """
31 >>> extractRSS_VSIZE("%MSG-w MemoryCheck: PostModule 19-Jun-2009 13:06:08 CEST Run: 1 Event: 1", \
32 "MemoryCheck: event : VSIZE 923.07 0 RSS 760.25 0")
33 (('1', '760.25'), ('1', '923.07'))
36 if (
"Run" in line1)
and (
"Event" in line1):
37 event_number = line1.split(
'Event:')[1].strip()
40 """ it's first or second MemoryCheck line """
41 if (
"VSIZE" in line2)
and (
"RSS" in line2):
42 RSS = line2.split(
"RSS")[1].strip().
split(
" ")[0].strip()
43 VSIZE = line2.split(
"RSS")[0].strip().
split(
"VSIZE")[1].strip().
split(
" ")[0].strip()
46 return ((record_number, RSS), (record_number, VSIZE))
51 """ gets the timing data from the logfile
54 * ModuleTime data (event_number, module_label, module_name, seconds) and
56 - with granularity of event (initial - not processed data)
69 logfile = open(log_filename,
'r')
74 memcheck_line1 =
False
80 if 'TimeModule>' in line.strip():
82 line_content_list = line.split(
' ')[0:]
84 event_number = int(line_content_list[1])
85 if event_number != last_event:
86 record_number=record_number+1
87 last_event=event_number
89 module_label = str(line_content_list[4])
90 module_name = str(line_content_list[3])
91 seconds = float(line_content_list[5])
94 mod_data.append((record_number, module_label, module_name, seconds))
95 if 'TimeEvent>' in line.strip():
97 line_content_list = line.split(
' ')[0:]
99 event_number = int(line_content_list[1])
100 if event_number != last_event:
101 record_number=record_number+1
102 last_event=event_number
104 time_seconds = str(line_content_list[3])
109 evt_data.append((record_number, time_seconds))
111 %MSG-w MemoryCheck: PostModule 19-Jun-2009 13:06:08 CEST Run: 1 Event: 1
112 MemoryCheck: event : VSIZE 923.07 0 RSS 760.25 0
114 if 'MemoryCheck:' in line.strip():
116 if (
not memcheck_line1):
117 memcheck_line1 = line.strip()
121 (rss, vsize) =
extractRSS_VSIZE(memcheck_line1, line.strip(), record_number)
123 vsize_data.append(vsize)
125 memcheck_line1 =
False
129 return (mod_data, evt_data, rss_data, vsize_data)
135 """ returns RootMeanSquare of items in a list """
139 return math.sqrt(sum([(x-avg)**2
for x
in items])/len(items))
141 def calc_MinMaxAvgRMS(items, remove_first = True, f_time = lambda x: x[0], f_evt_num =
lambda x: x[1],):
142 """ returns a dict of avg, min, max, rms """
144 cpu_time_first = f_time(items[0])
146 if len(items) > 1
and remove_first ==
True:
147 items.remove(items[0])
150 items_time = map(f_time, items)
151 min_value =
min(items_time)
152 max_value =
max(items_time)
153 max_index = items_time.index(max_value)
154 avg_value = float(sum(items_time)) / float(len(items_time))
155 rms_value =
calcRMS(items_time,avg_value)
157 return {
"min": min_value,
"max": max_value,
"cputime_first": cpu_time_first,
158 "rms": rms_value,
"avg": avg_value,
159 "event_number_of_max": f_evt_num(items[max_index])}
163 """ Processes the timelog data grouping events by module and calculates min, max, avg, rms
164 Returns data as a list of dicts like: !
169 stats: {num_events, avg, min, max, rms}
178 for time_data
in modules_timelog:
179 (event_number, module_label, module_name, seconds) = time_data
182 if groupBy ==
"module_label":
185 if groupBy ==
"name+label":
186 key = module_name +
"_" + module_label
197 times_bymod[key] = {}
207 times_bymod[key][module_label]
209 times_bymod[key].
update({module_label:{}})
213 times_bymod[key][module_label][module_name]
215 times_bymod[key][module_label].
update({module_name:[]})
218 times_bymod[key][module_label][module_name].
append((seconds,event_number))
235 for key
in times_bymod.keys():
236 for label
in times_bymod[key].
keys():
237 mod_data={
'label':label}
238 for name
in times_bymod[key][label].
keys():
239 mod_data.update({
'name':name})
240 mod_data[
'stats']=
calc_MinMaxAvgRMS(f_time=
lambda x:x[0],f_evt_num=
lambda x:x[1],items=times_bymod[key][label][name])
241 mod_data[
'stats'][
'num_events']=len(times_bymod[key][label][name])
242 times_bymod[key]=mod_data
246 timelog_f =
"TTBAR__RAW2DIGI,RECO_TimingReport.log"
247 timelog_f =
"TTBAR__GEN,SIM,DIGI,L1,DIGI2RAW,HLT_TimingReport.log"
253 "CMSSW_3_1_0_pre9/MINBIAS__RAW2DIGI,RECO_TimingReport.log",
254 "CMSSW_3_1_0_pre9/TTBAR__RAW2DIGI,RECO_TimingReport.log")
257 for release, files
in release_files.items():
258 print "Processing release: %s" % release
259 for timelog_f
in files:
260 print "Processing file: %s" % timelog_f
263 (mod_timelog, evt_timelog, rss_data, vsize_data) =
loadTimeLog(timelog_f)
266 print "Number of modules grouped by (module_label): %s" % len(mod_timelog)
270 """ We could get release from the path but that's quite ugly! """
271 export_xml(jobID = jobID, release=release, timelog_result=(mod_timelog, evt_timelog, rss_data, vsize_data))
273 """ use to run performance profiling """
275 timelog_f =
"test_data/TTBAR__RAW2DIGI,RECO_TimingReport.log"
276 (modules_timelog, evt_timelog, rss_data, vsize_data) =
loadTimeLog(timelog_f)
282 xmldoc = minidom.Document()
283 export_xml(step = step, candle = candle, release=
"test", timelog_result=(mod_timelog, evt_timelog, rss_data, vsize_data), xml_doc = xmldoc)
286 if (__name__ ==
"__main__"):
def getJobID_fromTimeReportLogName
def processModuleTimeLogData
mod_data["stats"] =calc_MinMaxAvgRMS(f_time = lambda x: x["time"], f_evt_num = lambda x: x["event_num...
boost::dynamic_bitset append(const boost::dynamic_bitset<> &bs1, const boost::dynamic_bitset<> &bs2)
this method takes two bitsets bs1 and bs2 and returns result of bs2 appended to the end of bs1 ...