2 from __future__
import print_function
6 from cmssw_exportdb_xml
import *
7 from FileNamesHelper
import *
10 Performance profiling: 11 ncalls tottime percall cumtime percall filename:lineno(function) 12 1 0.000 0.000 71.618 71.618 <stdin>:1(foo) 13 1 0.315 0.315 71.933 71.933 <string>:1(<module>) 14 1 47.561 47.561 71.618 71.618 parserTimingReport.py:27(loadTimeLog) 15 8000 0.191 0.000 0.343 0.000 parserTimingReport.py:8(extractRSS_VSIZE) 16 1 0.000 0.000 0.000 0.000 {len} 17 2384000 3.239 0.000 3.239 0.000 {method 'append' of 'list' objects} 18 1 0.000 0.000 0.000 0.000 {method 'close' of 'file' objects} 19 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects} 20 24000 0.041 0.000 0.041 0.000 {method 'partition' of 'str' objects} 21 2392000 5.804 0.000 5.804 0.000 {method 'split' of 'str' objects} 22 10791332 14.782 0.000 14.782 0.000 {method 'strip' of 'str' objects} 23 1 0.000 0.000 0.000 0.000 {method 'xreadlines' of 'file' objects} 24 1 0.000 0.000 0.000 0.000 {open} 29 """ given two lines returns the VSIZE and RSS values along with event number """ 32 >>> extractRSS_VSIZE("%MSG-w MemoryCheck: PostModule 19-Jun-2009 13:06:08 CEST Run: 1 Event: 1", \ 33 "MemoryCheck: event : VSIZE 923.07 0 RSS 760.25 0") 34 (('1', '760.25'), ('1', '923.07')) 37 if (
"Run" in line1)
and (
"Event" in line1):
38 event_number = line1.split(
'Event:')[1].
strip()
41 """ it's first or second MemoryCheck line """ 42 if (
"VSIZE" in line2)
and (
"RSS" in line2):
47 return ((record_number, RSS), (record_number, VSIZE))
52 """ gets the timing data from the logfile 55 * ModuleTime data (event_number, module_label, module_name, seconds) and 57 - with granularity of event (initial - not processed data) 70 logfile = open(log_filename,
'r') 75 memcheck_line1 =
False 81 if 'TimeModule>' in line.strip():
83 line_content_list = line.split(
' ')[0:]
85 event_number =
int(line_content_list[1])
86 if event_number != last_event:
87 record_number=record_number+1
88 last_event=event_number
90 module_label =
str(line_content_list[4])
91 module_name =
str(line_content_list[3])
92 seconds =
float(line_content_list[5])
95 mod_data.append((record_number, module_label, module_name, seconds))
96 if 'TimeEvent>' in line.strip():
98 line_content_list = line.split(
' ')[0:]
100 event_number =
int(line_content_list[1])
101 if event_number != last_event:
102 record_number=record_number+1
103 last_event=event_number
105 time_seconds =
str(line_content_list[3])
110 evt_data.append((record_number, time_seconds))
112 %MSG-w MemoryCheck: PostModule 19-Jun-2009 13:06:08 CEST Run: 1 Event: 1 113 MemoryCheck: event : VSIZE 923.07 0 RSS 760.25 0 115 if 'MemoryCheck:' in line.strip():
117 if (
not memcheck_line1):
118 memcheck_line1 = line.strip()
122 (rss, vsize) =
extractRSS_VSIZE(memcheck_line1, line.strip(), record_number)
124 vsize_data.append(vsize)
126 memcheck_line1 =
False 130 return (mod_data, evt_data, rss_data, vsize_data)
136 """ returns RootMeanSquare of items in a list """ 140 return math.sqrt(sum([(x-avg)**2
for x
in items])/len(items))
142 def calc_MinMaxAvgRMS(items, remove_first = True, f_time = lambda x: x[0], f_evt_num =
lambda x: x[1],):
143 """ returns a dict of avg, min, max, rms """ 145 cpu_time_first = f_time(items[0])
147 if len(items) > 1
and remove_first ==
True:
148 items.remove(items[0])
151 items_time =
map(f_time, items)
152 min_value =
min(items_time)
153 max_value =
max(items_time)
154 max_index = items_time.index(max_value)
155 avg_value =
float(sum(items_time)) /
float(len(items_time))
156 rms_value =
calcRMS(items_time,avg_value)
158 return {
"min": min_value,
"max": max_value,
"cputime_first": cpu_time_first,
159 "rms": rms_value,
"avg": avg_value,
160 "event_number_of_max": f_evt_num(items[max_index])}
164 """ Processes the timelog data grouping events by module and calculates min, max, avg, rms 165 Returns data as a list of dicts like: ! 170 stats: {num_events, avg, min, max, rms} 179 for time_data
in modules_timelog:
180 (event_number, module_label, module_name, seconds) = time_data
183 if groupBy ==
"module_label":
186 if groupBy ==
"name+label":
187 key = module_name +
"_" + module_label
198 times_bymod[key] = {}
208 times_bymod[key][module_label]
210 times_bymod[key].
update({module_label:{}})
214 times_bymod[key][module_label][module_name]
216 times_bymod[key][module_label].
update({module_name:[]})
219 times_bymod[key][module_label][module_name].
append((seconds,event_number))
236 for key
in times_bymod.keys():
237 for label
in times_bymod[key].
keys():
238 mod_data={
'label':label}
239 for name
in times_bymod[key][label].
keys():
240 mod_data.update({
'name':name})
241 mod_data[
'stats']=
calc_MinMaxAvgRMS(f_time=
lambda x:x[0],f_evt_num=
lambda x:x[1],items=times_bymod[key][label][name])
242 mod_data[
'stats'][
'num_events']=len(times_bymod[key][label][name])
243 times_bymod[key]=mod_data
247 timelog_f =
"TTBAR__RAW2DIGI,RECO_TimingReport.log" 248 timelog_f =
"TTBAR__GEN,SIM,DIGI,L1,DIGI2RAW,HLT_TimingReport.log" 254 "CMSSW_3_1_0_pre9/MINBIAS__RAW2DIGI,RECO_TimingReport.log",
255 "CMSSW_3_1_0_pre9/TTBAR__RAW2DIGI,RECO_TimingReport.log")
258 for release, files
in release_files.items():
259 print(
"Processing release: %s" % release)
260 for timelog_f
in files:
261 print(
"Processing file: %s" % timelog_f)
264 (mod_timelog, evt_timelog, rss_data, vsize_data) =
loadTimeLog(timelog_f)
267 print(
"Number of modules grouped by (module_label): %s" % len(mod_timelog))
271 """ We could get release from the path but that's quite ugly! """ 272 export_xml(jobID = jobID, release=release, timelog_result=(mod_timelog, evt_timelog, rss_data, vsize_data))
274 """ use to run performance profiling """ 276 timelog_f =
"test_data/TTBAR__RAW2DIGI,RECO_TimingReport.log" 277 (modules_timelog, evt_timelog, rss_data, vsize_data) =
loadTimeLog(timelog_f)
283 xmldoc = minidom.Document()
284 export_xml(step = step, candle = candle, release=
"test", timelog_result=(mod_timelog, evt_timelog, rss_data, vsize_data), xml_doc = xmldoc)
287 if (__name__ ==
"__main__"):
S & print(S &os, JobReport::InputFile const &f)
def export_xml(release, jobID, timelog_result, xml_doc, metadata=None, edmSize_result=None, parentNode=None)
def write_xml(scenario, fileName)
def loadTimeLog(log_filename, maxsize_rad=0)
def processModuleTimeLogData(modules_timelog, groupBy="module_name")
mod_data["stats"] =calc_MinMaxAvgRMS(f_time = lambda x: x["time"], f_evt_num = lambda x: x["event_num...
def calc_MinMaxAvgRMS(items, remove_first=True, f_time=lambda x:x[0], f_evt_num=lambda x:x[1])
def extractRSS_VSIZE(line1, line2, record_number)
def getJobID_fromTimeReportLogName(logfile_name)