Functions | |
def | calc_MinMaxAvgRMS |
def | calcRMS |
def | extractRSS_VSIZE |
def | loadTimeLog |
def | manual_run |
def | perf_profile |
def | processModuleTimeLogData |
mod_data["stats"] =calc_MinMaxAvgRMS(f_time = lambda x: x["time"], f_evt_num = lambda x: x["event_number"], items = times_bymod[mod_name]) |
def parserTimingReport::calc_MinMaxAvgRMS | ( | items, | |
remove_first = True , |
|||
f_time = lambda x: x[0] , |
|||
f_evt_num = lambda x: x[1] |
|||
) |
returns a dict of avg, min, max, rms
Definition at line 126 of file parserTimingReport.py.
00126 : x[0], f_evt_num = lambda x: x[1],): 00127 """ returns a dict of avg, min, max, rms """ 00128 # save the cpu time of first event before removing the first result! 00129 cpu_time_first = f_time(items[0]) 00130 00131 if len(items) > 1 and remove_first == True: 00132 items.remove(items[0]) #TODO: if there is only one event - we have a problem -> do we eliminate the whole module? 00133 # TODO: it removes it completely from all the data because we do not save/ do not copy it 00134 00135 items_time = map(f_time, items) 00136 min_value = min(items_time) 00137 max_value = max(items_time) 00138 max_index = items_time.index(max_value) 00139 avg_value = float(sum(items_time)) / float(len(items_time)) 00140 rms_value = calcRMS(items_time,avg_value) 00141 00142 return {"min": min_value, "max": max_value, "cputime_first": cpu_time_first, 00143 "rms": rms_value, "avg": avg_value, 00144 "event_number_of_max": f_evt_num(items[max_index])} 00145 00146
def parserTimingReport::calcRMS | ( | items, | |
avg | |||
) |
returns RootMeanSquare of items in a list
Definition at line 119 of file parserTimingReport.py.
00120 : 00121 """ returns RootMeanSquare of items in a list """ 00122 # sqrt(sum(x^2)) 00123 # Not statistics RMS... "physics" RMS, i.e. standard deviation: sqrt(sum((x-avg)**2)/N) 00124 # return math.sqrt(reduce(lambda x: (x - avg)**2, items) / len(items)) 00125 return math.sqrt(sum([(x-avg)**2 for x in items])/len(items))
def parserTimingReport::extractRSS_VSIZE | ( | line1, | |
line2 | |||
) |
>>> extractRSS_VSIZE("%MSG-w MemoryCheck: PostModule 19-Jun-2009 13:06:08 CEST Run: 1 Event: 1", \ "MemoryCheck: event : VSIZE 923.07 0 RSS 760.25 0") (('1', '760.25'), ('1', '923.07'))
Definition at line 29 of file parserTimingReport.py.
00030 : 00031 """ 00032 >>> extractRSS_VSIZE("%MSG-w MemoryCheck: PostModule 19-Jun-2009 13:06:08 CEST Run: 1 Event: 1", \ 00033 "MemoryCheck: event : VSIZE 923.07 0 RSS 760.25 0") 00034 (('1', '760.25'), ('1', '923.07')) 00035 """ 00036 00037 if ("Run" in line1) and ("Event" in line1): # the first line 00038 event_number = line1.split('Event:')[1].strip() 00039 else: return False 00040 00041 """ it's first or second MemoryCheck line """ 00042 if ("VSIZE" in line2) and ("RSS" in line2): # the second line 00043 RSS = line2.split("RSS")[1].strip().split(" ")[0].strip() #changed partition into split for backward compatability with py2.3 00044 VSIZE = line2.split("RSS")[0].strip().split("VSIZE")[1].strip().split(" ")[0].strip() 00045 return ((event_number, RSS), (event_number, VSIZE)) 00046 else: return False 00047
def parserTimingReport::loadTimeLog | ( | log_filename, | |
maxsize_rad = 0 |
|||
) |
gets the timing data from the logfile returns 4 lists: * ModuleTime data (event_number, module_label, module_name, seconds) and * EventTime data - with granularity of event (initial - not processed data) * RSS per event * VSIZE per event
Definition at line 48 of file parserTimingReport.py.
00048 : #TODO: remove maxsize to read, used for debugging 00049 """ gets the timing data from the logfile 00050 returns 4 lists: 00051 00052 * ModuleTime data (event_number, module_label, module_name, seconds) and 00053 * EventTime data 00054 - with granularity of event (initial - not processed data) 00055 * RSS per event 00056 * VSIZE per event 00057 """ 00058 # ----- format of logfile ---- 00059 # Report columns headings for modules: eventnum runnum modulelabel modulename timetakeni" 00060 # e.g. TimeModule> 1 1 csctfDigis CSCTFUnpacker 0.0624561 00061 00062 mod_data = [] 00063 evt_data = [] 00064 rss_data = [] 00065 vsize_data = [] 00066 # open file and read it and fill the structure! 00067 logfile = open(log_filename, 'r') 00068 00069 # get only the lines which have time report data 00070 #TODO: reading and processing line by line might speed up the process! 00071 00072 memcheck_line1 = False 00073 00074 00075 for line in logfile.xreadlines(): 00076 if 'TimeModule>' in line.strip(): 00077 line = line.strip() 00078 line_content_list = line.split(' ')[0:] 00079 00080 event_number = int(line_content_list[1]) 00081 # module label and name were mixed up in the original doc 00082 module_label = str(line_content_list[4]) 00083 module_name = str(line_content_list[3]) 00084 seconds = float(line_content_list[5]) 00085 00086 mod_data.append((event_number, module_label, module_name, seconds)) 00087 00088 if 'TimeEvent>' in line.strip(): 00089 line = line.strip() 00090 line_content_list = line.split(' ')[0:] 00091 00092 event_number = int(line_content_list[1]) 00093 time_seconds = str(line_content_list[3]) 00094 00095 #TODO: what are the other [last two] numbers? Real time? smf else? TimeEvent> 1 1 15.3982 13.451 13.451 00096 evt_data.append((event_number, time_seconds)) 00097 """ 00098 %MSG-w MemoryCheck: PostModule 19-Jun-2009 13:06:08 CEST Run: 1 Event: 1 00099 MemoryCheck: event : VSIZE 923.07 0 RSS 760.25 0 00100 """ 00101 if 'MemoryCheck:' in line.strip(): 00102 # this is the first line out of two 00103 if (not memcheck_line1): 00104 memcheck_line1 = line.strip() 00105 else: 00106 (rss, vsize) = extractRSS_VSIZE(memcheck_line1, line.strip()) 00107 rss_data.append(rss) 00108 vsize_data.append(vsize) 00109 else: 00110 memcheck_line1 = False 00111 00112 logfile.close() 00113 00114 return (mod_data, evt_data, rss_data, vsize_data) 00115 00116 00117 00118
def parserTimingReport::manual_run | ( | ) |
Definition at line 230 of file parserTimingReport.py.
00231 : 00232 timelog_f = "TTBAR__RAW2DIGI,RECO_TimingReport.log" 00233 timelog_f = "TTBAR__GEN,SIM,DIGI,L1,DIGI2RAW,HLT_TimingReport.log" 00234 #TODO: get STEP name from filename 00235 release_files = { 00236 00237 "CMSSW_3_1_0_pre9": 00238 ( 00239 "CMSSW_3_1_0_pre9/MINBIAS__RAW2DIGI,RECO_TimingReport.log", 00240 "CMSSW_3_1_0_pre9/TTBAR__RAW2DIGI,RECO_TimingReport.log") 00241 ## "CMSSW_3_1_0_pre10": 00242 } 00243 for release, files in release_files.items(): 00244 print "Processing release: %s" % release 00245 for timelog_f in files: 00246 print "Processing file: %s" % timelog_f 00247 00248 # TODO: automaticaly detect type of report file!!! 00249 (mod_timelog, evt_timelog, rss_data, vsize_data) =loadTimeLog(timelog_f) 00250 00251 mod_timelog= processModuleTimeLogData(mod_timelog, groupBy = "module_label") 00252 print "Number of modules grouped by (module_label): %s" % len(mod_timelog) 00253 00254 (candle, step, pileup_type, conditions, event_content) = getJobID_fromTimeReportLogName(timelog_f) 00255 00256 """ We could get release from the path but that's quite ugly! """ 00257 export_xml(jobID = jobID, release=release, timelog_result=(mod_timelog, evt_timelog, rss_data, vsize_data)) 00258 """ use to run performance profiling """
def parserTimingReport::perf_profile | ( | ) |
Definition at line 259 of file parserTimingReport.py.
00260 : 00261 timelog_f = "test_data/TTBAR__RAW2DIGI,RECO_TimingReport.log" 00262 (modules_timelog, evt_timelog, rss_data, vsize_data) = loadTimeLog(timelog_f) 00263 00264 mod_timelog= processModuleTimeLogData(modules_timelog, groupBy = "module_label") 00265 00266 (candle, step, pileup_type, conditions, event_content) = getJobID_fromTimeReportLogName(timelog_f) 00267 00268 xmldoc = minidom.Document() 00269 export_xml(step = step, candle = candle, release="test", timelog_result=(mod_timelog, evt_timelog, rss_data, vsize_data), xml_doc = xmldoc) 00270 write_xml(xmldoc, "test_xml_output.xml")
def parserTimingReport::processModuleTimeLogData | ( | modules_timelog, | |
groupBy = "module_name" |
|||
) |
mod_data["stats"] =calc_MinMaxAvgRMS(f_time = lambda x: x["time"], f_evt_num = lambda x: x["event_number"], items = times_bymod[mod_name])
Processes the timelog data grouping events by module and calculates min, max, avg, rms Returns data as a list of dicts like: ! { <module_name>: {name:, label:, stats: {num_events, avg, min, max, rms} }
mod_data["stats"]["num_events"] = len(times_bymod[mod_name])
times_bymod[mod_name] = mod_data Let's rewrite this using the dictionary we now have without any logical change (could do with some...):
Definition at line 147 of file parserTimingReport.py.
00148 : 00149 """ Processes the timelog data grouping events by module and calculates min, max, avg, rms 00150 Returns data as a list of dicts like: ! 00151 00152 { 00153 <module_name>: 00154 {name:, label:, 00155 stats: {num_events, avg, min, max, rms} 00156 } 00157 00158 """ 00159 # group by module_name, we save a list for each module name 00160 times_bymod = {} 00161 00162 # print "Num of useful TimeLog lines: %s" % len(modules_timelog) 00163 00164 for time_data in modules_timelog: 00165 (event_number, module_label, module_name, seconds) = time_data 00166 00167 # group times of modules By label or name, TODO: maybe both 00168 if groupBy == "module_label": 00169 key = module_label 00170 else: 00171 if groupBy =="name+label": 00172 key = module_name + "_" + module_label 00173 else: 00174 key = module_name 00175 00176 00177 try: 00178 # is the list for current module initialized? 00179 times_bymod[key] 00180 except KeyError: 00181 #Changing this from a list to a dict (see comments below): 00182 #times_bymod[key] = [] 00183 times_bymod[key] = {} 00184 #Running out of memory! 00185 #times_bymod[key].append({"label": module_label, "name": module_name, "time": seconds, "event_number": event_number}) 00186 #Let's do it right: 00187 #Instead of times_bymod[key]=[{"label": module_label, "name": module_name, "time": seconds, "event_number": event_number}] 00188 #let's do times_bymod[key]={"module_label":{"module_name":[(seconds,event_number)]}} so we do not repeat label and name and especially they are not a pair of key/value 00189 #During the first event all the keys will be initialized, then from event 2 on it will be just appending the (seconds,event_number) tuple to the list with the appropriate keys: 00190 00191 #Check/Set up the module label dict: 00192 try: 00193 times_bymod[key][module_label] 00194 except KeyError: 00195 times_bymod[key].update({module_label:{}}) 00196 00197 #Check/Set up the module name dict: 00198 try: 00199 times_bymod[key][module_label][module_name] 00200 except KeyError: 00201 times_bymod[key][module_label].update({module_name:[]}) 00202 00203 #We're now ready to add the info as a tuple in the list! 00204 times_bymod[key][module_label][module_name].append((seconds,event_number)) 00205 00206 00207 # calculate Min,Max, Avg, RMS for each module and in this way get the final data to be imported 00208 ##for mod_name in times_bymod.keys(): 00209 ## #copy needed data 00210 ## #mod_data = {"label": times_bymod[mod_name][0]["label"], "name": times_bymod[mod_name][0]["name"]} 00211 ## #New data structure: 00212 ## mod_data = {"label":times_bymod[mod_name].keys()[0],"name":times_bymod[mod_name][times_bymod[mod_name].keys()[0]].keys()[0]} 00213 ## # add statistical data 00214 ## 00215 ## mod_data["stats"] =calc_MinMaxAvgRMS(f_time = lambda x: x["time"], f_evt_num = lambda x: x["event_number"], items = times_bymod[mod_name]) 00216 ## 00217 ## mod_data["stats"]["num_events"] = len(times_bymod[mod_name]) 00218 ## 00219 ## times_bymod[mod_name] = mod_data 00220 #Let's rewrite this using the dictionary we now have without any logical change (could do with some...): 00221 for key in times_bymod.keys(): 00222 for label in times_bymod[key].keys(): 00223 mod_data={'label':label} 00224 for name in times_bymod[key][label].keys(): 00225 mod_data.update({'name':name}) 00226 mod_data['stats']= calc_MinMaxAvgRMS(f_time= lambda x:x[0],f_evt_num=lambda x:x[1],items=times_bymod[key][label][name]) 00227 mod_data['stats']['num_events']=len(times_bymod[key][label][name]) 00228 times_bymod[key]=mod_data 00229 return times_bymod