2 from __future__
import print_function
3 from __future__
import absolute_import
7 from .cmssw_exportdb_xml
import *
8 from .FileNamesHelper
import *
11 Performance profiling: 12 ncalls tottime percall cumtime percall filename:lineno(function) 13 1 0.000 0.000 71.618 71.618 <stdin>:1(foo) 14 1 0.315 0.315 71.933 71.933 <string>:1(<module>) 15 1 47.561 47.561 71.618 71.618 parserTimingReport.py:27(loadTimeLog) 16 8000 0.191 0.000 0.343 0.000 parserTimingReport.py:8(extractRSS_VSIZE) 17 1 0.000 0.000 0.000 0.000 {len} 18 2384000 3.239 0.000 3.239 0.000 {method 'append' of 'list' objects} 19 1 0.000 0.000 0.000 0.000 {method 'close' of 'file' objects} 20 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects} 21 24000 0.041 0.000 0.041 0.000 {method 'partition' of 'str' objects} 22 2392000 5.804 0.000 5.804 0.000 {method 'split' of 'str' objects} 23 10791332 14.782 0.000 14.782 0.000 {method 'strip' of 'str' objects} 24 1 0.000 0.000 0.000 0.000 {method 'xreadlines' of 'file' objects} 25 1 0.000 0.000 0.000 0.000 {open} 30 """ given two lines returns the VSIZE and RSS values along with event number """ 33 >>> extractRSS_VSIZE("%MSG-w MemoryCheck: PostModule 19-Jun-2009 13:06:08 CEST Run: 1 Event: 1", \ 34 "MemoryCheck: event : VSIZE 923.07 0 RSS 760.25 0") 35 (('1', '760.25'), ('1', '923.07')) 38 if (
"Run" in line1)
and (
"Event" in line1):
39 event_number = line1.split(
'Event:')[1].
strip()
42 """ it's first or second MemoryCheck line """ 43 if (
"VSIZE" in line2)
and (
"RSS" in line2):
48 return ((record_number, RSS), (record_number, VSIZE))
53 """ gets the timing data from the logfile 56 * ModuleTime data (event_number, module_label, module_name, seconds) and 58 - with granularity of event (initial - not processed data) 71 logfile = open(log_filename,
'r') 76 memcheck_line1 =
False 82 if 'TimeModule>' in line.strip():
84 line_content_list = line.split(
' ')[0:]
86 event_number =
int(line_content_list[1])
87 if event_number != last_event:
88 record_number=record_number+1
89 last_event=event_number
91 module_label =
str(line_content_list[4])
92 module_name =
str(line_content_list[3])
93 seconds =
float(line_content_list[5])
96 mod_data.append((record_number, module_label, module_name, seconds))
97 if 'TimeEvent>' in line.strip():
99 line_content_list = line.split(
' ')[0:]
101 event_number =
int(line_content_list[1])
102 if event_number != last_event:
103 record_number=record_number+1
104 last_event=event_number
106 time_seconds =
str(line_content_list[3])
111 evt_data.append((record_number, time_seconds))
113 %MSG-w MemoryCheck: PostModule 19-Jun-2009 13:06:08 CEST Run: 1 Event: 1 114 MemoryCheck: event : VSIZE 923.07 0 RSS 760.25 0 116 if 'MemoryCheck:' in line.strip():
118 if (
not memcheck_line1):
119 memcheck_line1 = line.strip()
123 (rss, vsize) =
extractRSS_VSIZE(memcheck_line1, line.strip(), record_number)
125 vsize_data.append(vsize)
127 memcheck_line1 =
False 131 return (mod_data, evt_data, rss_data, vsize_data)
137 """ returns RootMeanSquare of items in a list """ 141 return math.sqrt(sum([(x-avg)**2
for x
in items])/len(items))
143 def calc_MinMaxAvgRMS(items, remove_first = True, f_time = lambda x: x[0], f_evt_num =
lambda x: x[1],):
144 """ returns a dict of avg, min, max, rms """ 146 cpu_time_first = f_time(items[0])
148 if len(items) > 1
and remove_first ==
True:
149 items.remove(items[0])
152 items_time =
map(f_time, items)
153 min_value =
min(items_time)
154 max_value =
max(items_time)
155 max_index = items_time.index(max_value)
156 avg_value =
float(sum(items_time)) /
float(len(items_time))
157 rms_value =
calcRMS(items_time,avg_value)
159 return {
"min": min_value,
"max": max_value,
"cputime_first": cpu_time_first,
160 "rms": rms_value,
"avg": avg_value,
161 "event_number_of_max": f_evt_num(items[max_index])}
165 """ Processes the timelog data grouping events by module and calculates min, max, avg, rms 166 Returns data as a list of dicts like: ! 171 stats: {num_events, avg, min, max, rms} 180 for time_data
in modules_timelog:
181 (event_number, module_label, module_name, seconds) = time_data
184 if groupBy ==
"module_label":
187 if groupBy ==
"name+label":
188 key = module_name +
"_" + module_label
199 times_bymod[key] = {}
209 times_bymod[key][module_label]
211 times_bymod[key].
update({module_label:{}})
215 times_bymod[key][module_label][module_name]
217 times_bymod[key][module_label].
update({module_name:[]})
220 times_bymod[key][module_label][module_name].
append((seconds,event_number))
237 for key
in times_bymod.keys():
238 for label
in times_bymod[key].
keys():
239 mod_data={
'label':label}
240 for name
in times_bymod[key][label].
keys():
241 mod_data.update({
'name':name})
242 mod_data[
'stats']=
calc_MinMaxAvgRMS(f_time=
lambda x:x[0],f_evt_num=
lambda x:x[1],items=times_bymod[key][label][name])
243 mod_data[
'stats'][
'num_events']=len(times_bymod[key][label][name])
244 times_bymod[key]=mod_data
248 timelog_f =
"TTBAR__RAW2DIGI,RECO_TimingReport.log" 249 timelog_f =
"TTBAR__GEN,SIM,DIGI,L1,DIGI2RAW,HLT_TimingReport.log" 255 "CMSSW_3_1_0_pre9/MINBIAS__RAW2DIGI,RECO_TimingReport.log",
256 "CMSSW_3_1_0_pre9/TTBAR__RAW2DIGI,RECO_TimingReport.log")
259 for release, files
in release_files.items():
260 print(
"Processing release: %s" % release)
261 for timelog_f
in files:
262 print(
"Processing file: %s" % timelog_f)
265 (mod_timelog, evt_timelog, rss_data, vsize_data) =
loadTimeLog(timelog_f)
268 print(
"Number of modules grouped by (module_label): %s" % len(mod_timelog))
272 """ We could get release from the path but that's quite ugly! """ 273 export_xml(jobID = jobID, release=release, timelog_result=(mod_timelog, evt_timelog, rss_data, vsize_data))
275 """ use to run performance profiling """ 277 timelog_f =
"test_data/TTBAR__RAW2DIGI,RECO_TimingReport.log" 278 (modules_timelog, evt_timelog, rss_data, vsize_data) =
loadTimeLog(timelog_f)
284 xmldoc = minidom.Document()
285 export_xml(step = step, candle = candle, release=
"test", timelog_result=(mod_timelog, evt_timelog, rss_data, vsize_data), xml_doc = xmldoc)
288 if (__name__ ==
"__main__"):
S & print(S &os, JobReport::InputFile const &f)
def export_xml(release, jobID, timelog_result, xml_doc, metadata=None, edmSize_result=None, parentNode=None)
def write_xml(scenario, fileName)
def loadTimeLog(log_filename, maxsize_rad=0)
def processModuleTimeLogData(modules_timelog, groupBy="module_name")
mod_data["stats"] =calc_MinMaxAvgRMS(f_time = lambda x: x["time"], f_evt_num = lambda x: x["event_num...
def calc_MinMaxAvgRMS(items, remove_first=True, f_time=lambda x:x[0], f_evt_num=lambda x:x[1])
def extractRSS_VSIZE(line1, line2, record_number)
def getJobID_fromTimeReportLogName(logfile_name)