5 from cmssw_exportdb_xml
import *
6 from FileNamesHelper
import *
10 ncalls tottime percall cumtime percall filename:lineno(function) 11 1 0.000 0.000 71.618 71.618 <stdin>:1(foo) 12 1 0.315 0.315 71.933 71.933 <string>:1(<module>) 13 1 47.561 47.561 71.618 71.618 parserTimingReport.py:27(loadTimeLog) 14 8000 0.191 0.000 0.343 0.000 parserTimingReport.py:8(extractRSS_VSIZE) 15 1 0.000 0.000 0.000 0.000 {len} 16 2384000 3.239 0.000 3.239 0.000 {method 'append' of 'list' objects} 17 1 0.000 0.000 0.000 0.000 {method 'close' of 'file' objects} 18 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects} 19 24000 0.041 0.000 0.041 0.000 {method 'partition' of 'str' objects} 20 2392000 5.804 0.000 5.804 0.000 {method 'split' of 'str' objects} 21 10791332 14.782 0.000 14.782 0.000 {method 'strip' of 'str' objects} 22 1 0.000 0.000 0.000 0.000 {method 'xreadlines' of 'file' objects} 23 1 0.000 0.000 0.000 0.000 {open} 28 """ given two lines returns the VSIZE and RSS values along with event number """ 31 >>> extractRSS_VSIZE("%MSG-w MemoryCheck: PostModule 19-Jun-2009 13:06:08 CEST Run: 1 Event: 1", \ 32 "MemoryCheck: event : VSIZE 923.07 0 RSS 760.25 0") 33 (('1', '760.25'), ('1', '923.07')) 36 if (
"Run" in line1)
and (
"Event" in line1):
37 event_number = line1.split(
'Event:')[1].
strip()
40 """ it's first or second MemoryCheck line """ 41 if (
"VSIZE" in line2)
and (
"RSS" in line2):
46 return ((record_number, RSS), (record_number, VSIZE))
51 """ gets the timing data from the logfile 54 * ModuleTime data (event_number, module_label, module_name, seconds) and 56 - with granularity of event (initial - not processed data) 69 logfile = open(log_filename,
'r') 74 memcheck_line1 =
False 80 if 'TimeModule>' in line.strip():
82 line_content_list = line.split(
' ')[0:]
84 event_number =
int(line_content_list[1])
85 if event_number != last_event:
86 record_number=record_number+1
87 last_event=event_number
89 module_label =
str(line_content_list[4])
90 module_name =
str(line_content_list[3])
91 seconds =
float(line_content_list[5])
94 mod_data.append((record_number, module_label, module_name, seconds))
95 if 'TimeEvent>' in line.strip():
97 line_content_list = line.split(
' ')[0:]
99 event_number =
int(line_content_list[1])
100 if event_number != last_event:
101 record_number=record_number+1
102 last_event=event_number
104 time_seconds =
str(line_content_list[3])
109 evt_data.append((record_number, time_seconds))
111 %MSG-w MemoryCheck: PostModule 19-Jun-2009 13:06:08 CEST Run: 1 Event: 1 112 MemoryCheck: event : VSIZE 923.07 0 RSS 760.25 0 114 if 'MemoryCheck:' in line.strip():
116 if (
not memcheck_line1):
117 memcheck_line1 = line.strip()
121 (rss, vsize) =
extractRSS_VSIZE(memcheck_line1, line.strip(), record_number)
123 vsize_data.append(vsize)
125 memcheck_line1 =
False 129 return (mod_data, evt_data, rss_data, vsize_data)
135 """ returns RootMeanSquare of items in a list """ 139 return math.sqrt(sum([(x-avg)**2
for x
in items])/len(items))
141 def calc_MinMaxAvgRMS(items, remove_first = True, f_time = lambda x: x[0], f_evt_num =
lambda x: x[1],):
142 """ returns a dict of avg, min, max, rms """ 144 cpu_time_first = f_time(items[0])
146 if len(items) > 1
and remove_first ==
True:
147 items.remove(items[0])
150 items_time =
map(f_time, items)
151 min_value =
min(items_time)
152 max_value =
max(items_time)
153 max_index = items_time.index(max_value)
154 avg_value =
float(sum(items_time)) /
float(len(items_time))
155 rms_value =
calcRMS(items_time,avg_value)
157 return {
"min": min_value,
"max": max_value,
"cputime_first": cpu_time_first,
158 "rms": rms_value,
"avg": avg_value,
159 "event_number_of_max": f_evt_num(items[max_index])}
163 """ Processes the timelog data grouping events by module and calculates min, max, avg, rms 164 Returns data as a list of dicts like: ! 169 stats: {num_events, avg, min, max, rms} 178 for time_data
in modules_timelog:
179 (event_number, module_label, module_name, seconds) = time_data
182 if groupBy ==
"module_label":
185 if groupBy ==
"name+label":
186 key = module_name +
"_" + module_label
197 times_bymod[key] = {}
207 times_bymod[key][module_label]
209 times_bymod[key].
update({module_label:{}})
213 times_bymod[key][module_label][module_name]
215 times_bymod[key][module_label].
update({module_name:[]})
218 times_bymod[key][module_label][module_name].
append((seconds,event_number))
235 for key
in times_bymod.keys():
236 for label
in times_bymod[key].
keys():
237 mod_data={
'label':label}
238 for name
in times_bymod[key][label].
keys():
239 mod_data.update({
'name':name})
240 mod_data[
'stats']=
calc_MinMaxAvgRMS(f_time=
lambda x:x[0],f_evt_num=
lambda x:x[1],items=times_bymod[key][label][name])
241 mod_data[
'stats'][
'num_events']=len(times_bymod[key][label][name])
242 times_bymod[key]=mod_data
246 timelog_f =
"TTBAR__RAW2DIGI,RECO_TimingReport.log" 247 timelog_f =
"TTBAR__GEN,SIM,DIGI,L1,DIGI2RAW,HLT_TimingReport.log" 253 "CMSSW_3_1_0_pre9/MINBIAS__RAW2DIGI,RECO_TimingReport.log",
254 "CMSSW_3_1_0_pre9/TTBAR__RAW2DIGI,RECO_TimingReport.log")
257 for release, files
in release_files.items():
258 print "Processing release: %s" % release
259 for timelog_f
in files:
260 print "Processing file: %s" % timelog_f
263 (mod_timelog, evt_timelog, rss_data, vsize_data) =
loadTimeLog(timelog_f)
266 print "Number of modules grouped by (module_label): %s" % len(mod_timelog)
270 """ We could get release from the path but that's quite ugly! """ 271 export_xml(jobID = jobID, release=release, timelog_result=(mod_timelog, evt_timelog, rss_data, vsize_data))
273 """ use to run performance profiling """ 275 timelog_f =
"test_data/TTBAR__RAW2DIGI,RECO_TimingReport.log" 276 (modules_timelog, evt_timelog, rss_data, vsize_data) =
loadTimeLog(timelog_f)
282 xmldoc = minidom.Document()
283 export_xml(step = step, candle = candle, release=
"test", timelog_result=(mod_timelog, evt_timelog, rss_data, vsize_data), xml_doc = xmldoc)
286 if (__name__ ==
"__main__"):
def export_xml(release, jobID, timelog_result, xml_doc, metadata=None, edmSize_result=None, parentNode=None)
def write_xml(scenario, fileName)
def loadTimeLog(log_filename, maxsize_rad=0)
def processModuleTimeLogData(modules_timelog, groupBy="module_name")
mod_data["stats"] =calc_MinMaxAvgRMS(f_time = lambda x: x["time"], f_evt_num = lambda x: x["event_num...
def calc_MinMaxAvgRMS(items, remove_first=True, f_time=lambda x:x[0], f_evt_num=lambda x:x[1])
def extractRSS_VSIZE(line1, line2, record_number)
def getJobID_fromTimeReportLogName(logfile_name)