00001
00002 import sys
00003 import math
00004 import re
00005 from cmssw_exportdb_xml import *
00006 from FileNamesHelper import *
00007
00008 """
00009 Performance profiling:
00010 ncalls tottime percall cumtime percall filename:lineno(function)
00011 1 0.000 0.000 71.618 71.618 <stdin>:1(foo)
00012 1 0.315 0.315 71.933 71.933 <string>:1(<module>)
00013 1 47.561 47.561 71.618 71.618 parserTimingReport.py:27(loadTimeLog)
00014 8000 0.191 0.000 0.343 0.000 parserTimingReport.py:8(extractRSS_VSIZE)
00015 1 0.000 0.000 0.000 0.000 {len}
00016 2384000 3.239 0.000 3.239 0.000 {method 'append' of 'list' objects}
00017 1 0.000 0.000 0.000 0.000 {method 'close' of 'file' objects}
00018 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
00019 24000 0.041 0.000 0.041 0.000 {method 'partition' of 'str' objects}
00020 2392000 5.804 0.000 5.804 0.000 {method 'split' of 'str' objects}
00021 10791332 14.782 0.000 14.782 0.000 {method 'strip' of 'str' objects}
00022 1 0.000 0.000 0.000 0.000 {method 'xreadlines' of 'file' objects}
00023 1 0.000 0.000 0.000 0.000 {open}
00024
00025 """
00026
00027
00028 """ given two lines returns the VSIZE and RSS values along with event number """
00029 def extractRSS_VSIZE(line1, line2):
00030 """
00031 >>> extractRSS_VSIZE("%MSG-w MemoryCheck: PostModule 19-Jun-2009 13:06:08 CEST Run: 1 Event: 1", \
00032 "MemoryCheck: event : VSIZE 923.07 0 RSS 760.25 0")
00033 (('1', '760.25'), ('1', '923.07'))
00034 """
00035
00036 if ("Run" in line1) and ("Event" in line1):
00037 event_number = line1.split('Event:')[1].strip()
00038 else: return False
00039
00040 """ it's first or second MemoryCheck line """
00041 if ("VSIZE" in line2) and ("RSS" in line2):
00042 RSS = line2.split("RSS")[1].strip().split(" ")[0].strip()
00043 VSIZE = line2.split("RSS")[0].strip().split("VSIZE")[1].strip().split(" ")[0].strip()
00044 return ((event_number, RSS), (event_number, VSIZE))
00045 else: return False
00046
00047
00048 def loadTimeLog(log_filename, maxsize_rad = 0):
00049 """ gets the timing data from the logfile
00050 returns 4 lists:
00051
00052 * ModuleTime data (event_number, module_label, module_name, seconds) and
00053 * EventTime data
00054 - with granularity of event (initial - not processed data)
00055 * RSS per event
00056 * VSIZE per event
00057 """
00058
00059
00060
00061
00062 mod_data = []
00063 evt_data = []
00064 rss_data = []
00065 vsize_data = []
00066
00067 logfile = open(log_filename, 'r')
00068
00069
00070
00071
00072 memcheck_line1 = False
00073
00074
00075 for line in logfile.xreadlines():
00076 if 'TimeModule>' in line.strip():
00077 line = line.strip()
00078 line_content_list = line.split(' ')[0:]
00079
00080 event_number = int(line_content_list[1])
00081
00082 module_label = str(line_content_list[4])
00083 module_name = str(line_content_list[3])
00084 seconds = float(line_content_list[5])
00085
00086 mod_data.append((event_number, module_label, module_name, seconds))
00087
00088 if 'TimeEvent>' in line.strip():
00089 line = line.strip()
00090 line_content_list = line.split(' ')[0:]
00091
00092 event_number = int(line_content_list[1])
00093 time_seconds = str(line_content_list[3])
00094
00095
00096 evt_data.append((event_number, time_seconds))
00097 """
00098 %MSG-w MemoryCheck: PostModule 19-Jun-2009 13:06:08 CEST Run: 1 Event: 1
00099 MemoryCheck: event : VSIZE 923.07 0 RSS 760.25 0
00100 """
00101 if 'MemoryCheck:' in line.strip():
00102
00103 if (not memcheck_line1):
00104 memcheck_line1 = line.strip()
00105 else:
00106 (rss, vsize) = extractRSS_VSIZE(memcheck_line1, line.strip())
00107 rss_data.append(rss)
00108 vsize_data.append(vsize)
00109 else:
00110 memcheck_line1 = False
00111
00112 logfile.close()
00113
00114 return (mod_data, evt_data, rss_data, vsize_data)
00115
00116
00117
00118
00119 def calcRMS(items,avg):
00120 """ returns RootMeanSquare of items in a list """
00121
00122
00123
00124 return math.sqrt(sum([(x-avg)**2 for x in items])/len(items))
00125
00126 def calc_MinMaxAvgRMS(items, remove_first = True, f_time = lambda x: x[0], f_evt_num = lambda x: x[1],):
00127 """ returns a dict of avg, min, max, rms """
00128
00129 cpu_time_first = f_time(items[0])
00130
00131 if len(items) > 1 and remove_first == True:
00132 items.remove(items[0])
00133
00134
00135 items_time = map(f_time, items)
00136 min_value = min(items_time)
00137 max_value = max(items_time)
00138 max_index = items_time.index(max_value)
00139 avg_value = float(sum(items_time)) / float(len(items_time))
00140 rms_value = calcRMS(items_time,avg_value)
00141
00142 return {"min": min_value, "max": max_value, "cputime_first": cpu_time_first,
00143 "rms": rms_value, "avg": avg_value,
00144 "event_number_of_max": f_evt_num(items[max_index])}
00145
00146
00147 def processModuleTimeLogData(modules_timelog, groupBy = "module_name"):
00148 """ Processes the timelog data grouping events by module and calculates min, max, avg, rms
00149 Returns data as a list of dicts like: !
00150
00151 {
00152 <module_name>:
00153 {name:, label:,
00154 stats: {num_events, avg, min, max, rms}
00155 }
00156
00157 """
00158
00159 times_bymod = {}
00160
00161
00162
00163 for time_data in modules_timelog:
00164 (event_number, module_label, module_name, seconds) = time_data
00165
00166
00167 if groupBy == "module_label":
00168 key = module_label
00169 else:
00170 if groupBy =="name+label":
00171 key = module_name + "_" + module_label
00172 else:
00173 key = module_name
00174
00175
00176 try:
00177
00178 times_bymod[key]
00179 except KeyError:
00180
00181
00182 times_bymod[key] = {}
00183
00184
00185
00186
00187
00188
00189
00190
00191 try:
00192 times_bymod[key][module_label]
00193 except KeyError:
00194 times_bymod[key].update({module_label:{}})
00195
00196
00197 try:
00198 times_bymod[key][module_label][module_name]
00199 except KeyError:
00200 times_bymod[key][module_label].update({module_name:[]})
00201
00202
00203 times_bymod[key][module_label][module_name].append((seconds,event_number))
00204
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220 for key in times_bymod.keys():
00221 for label in times_bymod[key].keys():
00222 mod_data={'label':label}
00223 for name in times_bymod[key][label].keys():
00224 mod_data.update({'name':name})
00225 mod_data['stats']= calc_MinMaxAvgRMS(f_time= lambda x:x[0],f_evt_num=lambda x:x[1],items=times_bymod[key][label][name])
00226 mod_data['stats']['num_events']=len(times_bymod[key][label][name])
00227 times_bymod[key]=mod_data
00228 return times_bymod
00229
00230 def manual_run():
00231 timelog_f = "TTBAR__RAW2DIGI,RECO_TimingReport.log"
00232 timelog_f = "TTBAR__GEN,SIM,DIGI,L1,DIGI2RAW,HLT_TimingReport.log"
00233
00234 release_files = {
00235
00236 "CMSSW_3_1_0_pre9":
00237 (
00238 "CMSSW_3_1_0_pre9/MINBIAS__RAW2DIGI,RECO_TimingReport.log",
00239 "CMSSW_3_1_0_pre9/TTBAR__RAW2DIGI,RECO_TimingReport.log")
00240
00241 }
00242 for release, files in release_files.items():
00243 print "Processing release: %s" % release
00244 for timelog_f in files:
00245 print "Processing file: %s" % timelog_f
00246
00247
00248 (mod_timelog, evt_timelog, rss_data, vsize_data) =loadTimeLog(timelog_f)
00249
00250 mod_timelog= processModuleTimeLogData(mod_timelog, groupBy = "module_label")
00251 print "Number of modules grouped by (module_label): %s" % len(mod_timelog)
00252
00253 (candle, step, pileup_type, conditions, event_content) = getJobID_fromTimeReportLogName(timelog_f)
00254
00255 """ We could get release from the path but that's quite ugly! """
00256 export_xml(jobID = jobID, release=release, timelog_result=(mod_timelog, evt_timelog, rss_data, vsize_data))
00257
00258 """ use to run performance profiling """
00259 def perf_profile():
00260 timelog_f = "test_data/TTBAR__RAW2DIGI,RECO_TimingReport.log"
00261 (modules_timelog, evt_timelog, rss_data, vsize_data) = loadTimeLog(timelog_f)
00262
00263 mod_timelog= processModuleTimeLogData(modules_timelog, groupBy = "module_label")
00264
00265 (candle, step, pileup_type, conditions, event_content) = getJobID_fromTimeReportLogName(timelog_f)
00266
00267 xmldoc = minidom.Document()
00268 export_xml(step = step, candle = candle, release="test", timelog_result=(mod_timelog, evt_timelog, rss_data, vsize_data), xml_doc = xmldoc)
00269 write_xml(xmldoc, "test_xml_output.xml")
00270
00271 if (__name__ == "__main__"):
00272 perf_profile()