00001
00002 import sys
00003 import math
00004 import re
00005 from cmssw_exportdb_xml import *
00006 from FileNamesHelper import *
00007
00008 """
00009 Performance profiling:
00010 ncalls tottime percall cumtime percall filename:lineno(function)
00011 1 0.000 0.000 71.618 71.618 <stdin>:1(foo)
00012 1 0.315 0.315 71.933 71.933 <string>:1(<module>)
00013 1 47.561 47.561 71.618 71.618 parserTimingReport.py:27(loadTimeLog)
00014 8000 0.191 0.000 0.343 0.000 parserTimingReport.py:8(extractRSS_VSIZE)
00015 1 0.000 0.000 0.000 0.000 {len}
00016 2384000 3.239 0.000 3.239 0.000 {method 'append' of 'list' objects}
00017 1 0.000 0.000 0.000 0.000 {method 'close' of 'file' objects}
00018 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
00019 24000 0.041 0.000 0.041 0.000 {method 'partition' of 'str' objects}
00020 2392000 5.804 0.000 5.804 0.000 {method 'split' of 'str' objects}
00021 10791332 14.782 0.000 14.782 0.000 {method 'strip' of 'str' objects}
00022 1 0.000 0.000 0.000 0.000 {method 'xreadlines' of 'file' objects}
00023 1 0.000 0.000 0.000 0.000 {open}
00024
00025 """
00026
00027
00028 """ given two lines returns the VSIZE and RSS values along with event number """
00029 def extractRSS_VSIZE(line1, line2, record_number):
00030 """
00031 >>> extractRSS_VSIZE("%MSG-w MemoryCheck: PostModule 19-Jun-2009 13:06:08 CEST Run: 1 Event: 1", \
00032 "MemoryCheck: event : VSIZE 923.07 0 RSS 760.25 0")
00033 (('1', '760.25'), ('1', '923.07'))
00034 """
00035
00036 if ("Run" in line1) and ("Event" in line1):
00037 event_number = line1.split('Event:')[1].strip()
00038 else: return False
00039
00040 """ it's first or second MemoryCheck line """
00041 if ("VSIZE" in line2) and ("RSS" in line2):
00042 RSS = line2.split("RSS")[1].strip().split(" ")[0].strip()
00043 VSIZE = line2.split("RSS")[0].strip().split("VSIZE")[1].strip().split(" ")[0].strip()
00044
00045
00046 return ((record_number, RSS), (record_number, VSIZE))
00047 else: return False
00048
00049
00050 def loadTimeLog(log_filename, maxsize_rad = 0):
00051 """ gets the timing data from the logfile
00052 returns 4 lists:
00053
00054 * ModuleTime data (event_number, module_label, module_name, seconds) and
00055 * EventTime data
00056 - with granularity of event (initial - not processed data)
00057 * RSS per event
00058 * VSIZE per event
00059 """
00060
00061
00062
00063
00064 mod_data = []
00065 evt_data = []
00066 rss_data = []
00067 vsize_data = []
00068
00069 logfile = open(log_filename, 'r')
00070
00071
00072
00073
00074 memcheck_line1 = False
00075
00076 record_number=0
00077 last_record=0
00078 last_event=0
00079 for line in logfile.xreadlines():
00080 if 'TimeModule>' in line.strip():
00081 line = line.strip()
00082 line_content_list = line.split(' ')[0:]
00083
00084 event_number = int(line_content_list[1])
00085 if event_number != last_event:
00086 record_number=record_number+1
00087 last_event=event_number
00088
00089 module_label = str(line_content_list[4])
00090 module_name = str(line_content_list[3])
00091 seconds = float(line_content_list[5])
00092
00093
00094 mod_data.append((record_number, module_label, module_name, seconds))
00095 if 'TimeEvent>' in line.strip():
00096 line = line.strip()
00097 line_content_list = line.split(' ')[0:]
00098
00099 event_number = int(line_content_list[1])
00100 if event_number != last_event:
00101 record_number=record_number+1
00102 last_event=event_number
00103
00104 time_seconds = str(line_content_list[3])
00105
00106
00107
00108
00109 evt_data.append((record_number, time_seconds))
00110 """
00111 %MSG-w MemoryCheck: PostModule 19-Jun-2009 13:06:08 CEST Run: 1 Event: 1
00112 MemoryCheck: event : VSIZE 923.07 0 RSS 760.25 0
00113 """
00114 if 'MemoryCheck:' in line.strip():
00115
00116 if (not memcheck_line1):
00117 memcheck_line1 = line.strip()
00118 else:
00119
00120
00121 (rss, vsize) = extractRSS_VSIZE(memcheck_line1, line.strip(), record_number)
00122 rss_data.append(rss)
00123 vsize_data.append(vsize)
00124 else:
00125 memcheck_line1 = False
00126
00127 logfile.close()
00128
00129 return (mod_data, evt_data, rss_data, vsize_data)
00130
00131
00132
00133
00134 def calcRMS(items,avg):
00135 """ returns RootMeanSquare of items in a list """
00136
00137
00138
00139 return math.sqrt(sum([(x-avg)**2 for x in items])/len(items))
00140
00141 def calc_MinMaxAvgRMS(items, remove_first = True, f_time = lambda x: x[0], f_evt_num = lambda x: x[1],):
00142 """ returns a dict of avg, min, max, rms """
00143
00144 cpu_time_first = f_time(items[0])
00145
00146 if len(items) > 1 and remove_first == True:
00147 items.remove(items[0])
00148
00149
00150 items_time = map(f_time, items)
00151 min_value = min(items_time)
00152 max_value = max(items_time)
00153 max_index = items_time.index(max_value)
00154 avg_value = float(sum(items_time)) / float(len(items_time))
00155 rms_value = calcRMS(items_time,avg_value)
00156
00157 return {"min": min_value, "max": max_value, "cputime_first": cpu_time_first,
00158 "rms": rms_value, "avg": avg_value,
00159 "event_number_of_max": f_evt_num(items[max_index])}
00160
00161
00162 def processModuleTimeLogData(modules_timelog, groupBy = "module_name"):
00163 """ Processes the timelog data grouping events by module and calculates min, max, avg, rms
00164 Returns data as a list of dicts like: !
00165
00166 {
00167 <module_name>:
00168 {name:, label:,
00169 stats: {num_events, avg, min, max, rms}
00170 }
00171
00172 """
00173
00174 times_bymod = {}
00175
00176
00177
00178 for time_data in modules_timelog:
00179 (event_number, module_label, module_name, seconds) = time_data
00180
00181
00182 if groupBy == "module_label":
00183 key = module_label
00184 else:
00185 if groupBy =="name+label":
00186 key = module_name + "_" + module_label
00187 else:
00188 key = module_name
00189
00190
00191 try:
00192
00193 times_bymod[key]
00194 except KeyError:
00195
00196
00197 times_bymod[key] = {}
00198
00199
00200
00201
00202
00203
00204
00205
00206 try:
00207 times_bymod[key][module_label]
00208 except KeyError:
00209 times_bymod[key].update({module_label:{}})
00210
00211
00212 try:
00213 times_bymod[key][module_label][module_name]
00214 except KeyError:
00215 times_bymod[key][module_label].update({module_name:[]})
00216
00217
00218 times_bymod[key][module_label][module_name].append((seconds,event_number))
00219
00220
00221
00222
00223
00224
00225
00226
00227
00228
00229
00230
00231
00232
00233
00234
00235 for key in times_bymod.keys():
00236 for label in times_bymod[key].keys():
00237 mod_data={'label':label}
00238 for name in times_bymod[key][label].keys():
00239 mod_data.update({'name':name})
00240 mod_data['stats']= calc_MinMaxAvgRMS(f_time= lambda x:x[0],f_evt_num=lambda x:x[1],items=times_bymod[key][label][name])
00241 mod_data['stats']['num_events']=len(times_bymod[key][label][name])
00242 times_bymod[key]=mod_data
00243 return times_bymod
00244
00245 def manual_run():
00246 timelog_f = "TTBAR__RAW2DIGI,RECO_TimingReport.log"
00247 timelog_f = "TTBAR__GEN,SIM,DIGI,L1,DIGI2RAW,HLT_TimingReport.log"
00248
00249 release_files = {
00250
00251 "CMSSW_3_1_0_pre9":
00252 (
00253 "CMSSW_3_1_0_pre9/MINBIAS__RAW2DIGI,RECO_TimingReport.log",
00254 "CMSSW_3_1_0_pre9/TTBAR__RAW2DIGI,RECO_TimingReport.log")
00255
00256 }
00257 for release, files in release_files.items():
00258 print "Processing release: %s" % release
00259 for timelog_f in files:
00260 print "Processing file: %s" % timelog_f
00261
00262
00263 (mod_timelog, evt_timelog, rss_data, vsize_data) =loadTimeLog(timelog_f)
00264
00265 mod_timelog= processModuleTimeLogData(mod_timelog, groupBy = "module_label")
00266 print "Number of modules grouped by (module_label): %s" % len(mod_timelog)
00267
00268 (candle, step, pileup_type, conditions, event_content) = getJobID_fromTimeReportLogName(timelog_f)
00269
00270 """ We could get release from the path but that's quite ugly! """
00271 export_xml(jobID = jobID, release=release, timelog_result=(mod_timelog, evt_timelog, rss_data, vsize_data))
00272
00273 """ use to run performance profiling """
00274 def perf_profile():
00275 timelog_f = "test_data/TTBAR__RAW2DIGI,RECO_TimingReport.log"
00276 (modules_timelog, evt_timelog, rss_data, vsize_data) = loadTimeLog(timelog_f)
00277
00278 mod_timelog= processModuleTimeLogData(modules_timelog, groupBy = "module_label")
00279
00280 (candle, step, pileup_type, conditions, event_content) = getJobID_fromTimeReportLogName(timelog_f)
00281
00282 xmldoc = minidom.Document()
00283 export_xml(step = step, candle = candle, release="test", timelog_result=(mod_timelog, evt_timelog, rss_data, vsize_data), xml_doc = xmldoc)
00284 write_xml(xmldoc, "test_xml_output.xml")
00285
00286 if (__name__ == "__main__"):
00287 perf_profile()