Functions | |
def | calc_MinMaxAvgRMS |
def | calcRMS |
def | extractRSS_VSIZE |
def | loadTimeLog |
def | manual_run |
def | perf_profile |
def | processModuleTimeLogData |
mod_data["stats"] =calc_MinMaxAvgRMS(f_time = lambda x: x["time"], f_evt_num = lambda x: x["event_number"], items = times_bymod[mod_name]) |
def parserTimingReport::calc_MinMaxAvgRMS | ( | items, | |
remove_first = True , |
|||
f_time = lambda x: x[0] , |
|||
f_evt_num = lambda x: x[1] |
|||
) |
returns a dict of avg, min, max, rms
Definition at line 141 of file parserTimingReport.py.
00141 : x[0], f_evt_num = lambda x: x[1],): 00142 """ returns a dict of avg, min, max, rms """ 00143 # save the cpu time of first event before removing the first result! 00144 cpu_time_first = f_time(items[0]) 00145 00146 if len(items) > 1 and remove_first == True: 00147 items.remove(items[0]) #TODO: if there is only one event - we have a problem -> do we eliminate the whole module? 00148 # TODO: it removes it completely from all the data because we do not save/ do not copy it 00149 00150 items_time = map(f_time, items) 00151 min_value = min(items_time) 00152 max_value = max(items_time) 00153 max_index = items_time.index(max_value) 00154 avg_value = float(sum(items_time)) / float(len(items_time)) 00155 rms_value = calcRMS(items_time,avg_value) 00156 00157 return {"min": min_value, "max": max_value, "cputime_first": cpu_time_first, 00158 "rms": rms_value, "avg": avg_value, 00159 "event_number_of_max": f_evt_num(items[max_index])} 00160 00161
def parserTimingReport::calcRMS | ( | items, | |
avg | |||
) |
returns RootMeanSquare of items in a list
Definition at line 134 of file parserTimingReport.py.
00135 : 00136 """ returns RootMeanSquare of items in a list """ 00137 # sqrt(sum(x^2)) 00138 # Not statistics RMS... "physics" RMS, i.e. standard deviation: sqrt(sum((x-avg)**2)/N) 00139 # return math.sqrt(reduce(lambda x: (x - avg)**2, items) / len(items)) 00140 return math.sqrt(sum([(x-avg)**2 for x in items])/len(items))
def parserTimingReport::extractRSS_VSIZE | ( | line1, | |
line2, | |||
record_number | |||
) |
>>> extractRSS_VSIZE("%MSG-w MemoryCheck: PostModule 19-Jun-2009 13:06:08 CEST Run: 1 Event: 1", \ "MemoryCheck: event : VSIZE 923.07 0 RSS 760.25 0") (('1', '760.25'), ('1', '923.07'))
Definition at line 29 of file parserTimingReport.py.
00030 : 00031 """ 00032 >>> extractRSS_VSIZE("%MSG-w MemoryCheck: PostModule 19-Jun-2009 13:06:08 CEST Run: 1 Event: 1", \ 00033 "MemoryCheck: event : VSIZE 923.07 0 RSS 760.25 0") 00034 (('1', '760.25'), ('1', '923.07')) 00035 """ 00036 00037 if ("Run" in line1) and ("Event" in line1): # the first line 00038 event_number = line1.split('Event:')[1].strip() 00039 else: return False 00040 00041 """ it's first or second MemoryCheck line """ 00042 if ("VSIZE" in line2) and ("RSS" in line2): # the second line 00043 RSS = line2.split("RSS")[1].strip().split(" ")[0].strip() #changed partition into split for backward compatability with py2.3 00044 VSIZE = line2.split("RSS")[0].strip().split("VSIZE")[1].strip().split(" ")[0].strip() 00045 #Hack to return the record number instea of event number for now... can always switch back of add event number on top 00046 #return ((event_number, RSS), (event_number, VSIZE)) 00047 return ((record_number, RSS), (record_number, VSIZE)) 00048 else: return False 00049
def parserTimingReport::loadTimeLog | ( | log_filename, | |
maxsize_rad = 0 |
|||
) |
gets the timing data from the logfile returns 4 lists: * ModuleTime data (event_number, module_label, module_name, seconds) and * EventTime data - with granularity of event (initial - not processed data) * RSS per event * VSIZE per event
Definition at line 50 of file parserTimingReport.py.
00050 : #TODO: remove maxsize to read, used for debugging 00051 """ gets the timing data from the logfile 00052 returns 4 lists: 00053 00054 * ModuleTime data (event_number, module_label, module_name, seconds) and 00055 * EventTime data 00056 - with granularity of event (initial - not processed data) 00057 * RSS per event 00058 * VSIZE per event 00059 """ 00060 # ----- format of logfile ---- 00061 # Report columns headings for modules: eventnum runnum modulelabel modulename timetakeni" 00062 # e.g. TimeModule> 1 1 csctfDigis CSCTFUnpacker 0.0624561 00063 00064 mod_data = [] 00065 evt_data = [] 00066 rss_data = [] 00067 vsize_data = [] 00068 # open file and read it and fill the structure! 00069 logfile = open(log_filename, 'r') 00070 00071 # get only the lines which have time report data 00072 #TODO: reading and processing line by line might speed up the process! 00073 00074 memcheck_line1 = False 00075 00076 record_number=0 00077 last_record=0 00078 last_event=0 00079 for line in logfile.xreadlines(): 00080 if 'TimeModule>' in line.strip(): 00081 line = line.strip() 00082 line_content_list = line.split(' ')[0:] 00083 #Hack to avoid issues with the non-consecutive run numbers: 00084 event_number = int(line_content_list[1]) 00085 if event_number != last_event: 00086 record_number=record_number+1 00087 last_event=event_number 00088 # module label and name were mixed up in the original doc 00089 module_label = str(line_content_list[4]) 00090 module_name = str(line_content_list[3]) 00091 seconds = float(line_content_list[5]) 00092 #For now let's try to switch to the record_number... if we need to also have the event_number we can always add it back. 00093 #mod_data.append((event_number, module_label, module_name, seconds)) 00094 mod_data.append((record_number, module_label, module_name, seconds)) 00095 if 'TimeEvent>' in line.strip(): 00096 line = line.strip() 00097 line_content_list = line.split(' ')[0:] 00098 #Hack to avoid issues with the non-consecutive run numbers: 00099 event_number = int(line_content_list[1]) 00100 if event_number != last_event: 00101 record_number=record_number+1 00102 last_event=event_number 00103 # module label and name were mixed up in the original doc 00104 time_seconds = str(line_content_list[3]) 00105 00106 #TODO: what are the other [last two] numbers? Real time? smf else? TimeEvent> 1 1 15.3982 13.451 13.451 00107 #For now let's try to switch to the record_number... if we need to also have the event_number we can always add it back. 00108 #evt_data.append((event_number, time_seconds)) 00109 evt_data.append((record_number, time_seconds)) 00110 """ 00111 %MSG-w MemoryCheck: PostModule 19-Jun-2009 13:06:08 CEST Run: 1 Event: 1 00112 MemoryCheck: event : VSIZE 923.07 0 RSS 760.25 0 00113 """ 00114 if 'MemoryCheck:' in line.strip(): 00115 # this is the first line out of two 00116 if (not memcheck_line1): 00117 memcheck_line1 = line.strip() 00118 else: 00119 #FIXME (eventually) 00120 #Hacking in the record_number extracted from the TimeEvent and TimeModule parsing... NOT ROBUST... 00121 (rss, vsize) = extractRSS_VSIZE(memcheck_line1, line.strip(), record_number) 00122 rss_data.append(rss) 00123 vsize_data.append(vsize) 00124 else: 00125 memcheck_line1 = False 00126 00127 logfile.close() 00128 00129 return (mod_data, evt_data, rss_data, vsize_data) 00130 00131 00132 00133
def parserTimingReport::manual_run | ( | ) |
Definition at line 245 of file parserTimingReport.py.
00246 : 00247 timelog_f = "TTBAR__RAW2DIGI,RECO_TimingReport.log" 00248 timelog_f = "TTBAR__GEN,SIM,DIGI,L1,DIGI2RAW,HLT_TimingReport.log" 00249 #TODO: get STEP name from filename 00250 release_files = { 00251 00252 "CMSSW_3_1_0_pre9": 00253 ( 00254 "CMSSW_3_1_0_pre9/MINBIAS__RAW2DIGI,RECO_TimingReport.log", 00255 "CMSSW_3_1_0_pre9/TTBAR__RAW2DIGI,RECO_TimingReport.log") 00256 ## "CMSSW_3_1_0_pre10": 00257 } 00258 for release, files in release_files.items(): 00259 print "Processing release: %s" % release 00260 for timelog_f in files: 00261 print "Processing file: %s" % timelog_f 00262 00263 # TODO: automaticaly detect type of report file!!! 00264 (mod_timelog, evt_timelog, rss_data, vsize_data) =loadTimeLog(timelog_f) 00265 00266 mod_timelog= processModuleTimeLogData(mod_timelog, groupBy = "module_label") 00267 print "Number of modules grouped by (module_label): %s" % len(mod_timelog) 00268 00269 (candle, step, pileup_type, conditions, event_content) = getJobID_fromTimeReportLogName(timelog_f) 00270 00271 """ We could get release from the path but that's quite ugly! """ 00272 export_xml(jobID = jobID, release=release, timelog_result=(mod_timelog, evt_timelog, rss_data, vsize_data)) 00273 """ use to run performance profiling """
def parserTimingReport::perf_profile | ( | ) |
Definition at line 274 of file parserTimingReport.py.
00275 : 00276 timelog_f = "test_data/TTBAR__RAW2DIGI,RECO_TimingReport.log" 00277 (modules_timelog, evt_timelog, rss_data, vsize_data) = loadTimeLog(timelog_f) 00278 00279 mod_timelog= processModuleTimeLogData(modules_timelog, groupBy = "module_label") 00280 00281 (candle, step, pileup_type, conditions, event_content) = getJobID_fromTimeReportLogName(timelog_f) 00282 00283 xmldoc = minidom.Document() 00284 export_xml(step = step, candle = candle, release="test", timelog_result=(mod_timelog, evt_timelog, rss_data, vsize_data), xml_doc = xmldoc) 00285 write_xml(xmldoc, "test_xml_output.xml")
def parserTimingReport::processModuleTimeLogData | ( | modules_timelog, | |
groupBy = "module_name" |
|||
) |
mod_data["stats"] =calc_MinMaxAvgRMS(f_time = lambda x: x["time"], f_evt_num = lambda x: x["event_number"], items = times_bymod[mod_name])
Processes the timelog data grouping events by module and calculates min, max, avg, rms Returns data as a list of dicts like: ! { <module_name>: {name:, label:, stats: {num_events, avg, min, max, rms} }
mod_data["stats"]["num_events"] = len(times_bymod[mod_name])
times_bymod[mod_name] = mod_data Let's rewrite this using the dictionary we now have without any logical change (could do with some...):
Definition at line 162 of file parserTimingReport.py.
00163 : 00164 """ Processes the timelog data grouping events by module and calculates min, max, avg, rms 00165 Returns data as a list of dicts like: ! 00166 00167 { 00168 <module_name>: 00169 {name:, label:, 00170 stats: {num_events, avg, min, max, rms} 00171 } 00172 00173 """ 00174 # group by module_name, we save a list for each module name 00175 times_bymod = {} 00176 00177 # print "Num of useful TimeLog lines: %s" % len(modules_timelog) 00178 00179 for time_data in modules_timelog: 00180 (event_number, module_label, module_name, seconds) = time_data 00181 00182 # group times of modules By label or name, TODO: maybe both 00183 if groupBy == "module_label": 00184 key = module_label 00185 else: 00186 if groupBy =="name+label": 00187 key = module_name + "_" + module_label 00188 else: 00189 key = module_name 00190 00191 00192 try: 00193 # is the list for current module initialized? 00194 times_bymod[key] 00195 except KeyError: 00196 #Changing this from a list to a dict (see comments below): 00197 #times_bymod[key] = [] 00198 times_bymod[key] = {} 00199 #Running out of memory! 00200 #times_bymod[key].append({"label": module_label, "name": module_name, "time": seconds, "event_number": event_number}) 00201 #Let's do it right: 00202 #Instead of times_bymod[key]=[{"label": module_label, "name": module_name, "time": seconds, "event_number": event_number}] 00203 #let's do times_bymod[key]={"module_label":{"module_name":[(seconds,event_number)]}} so we do not repeat label and name and especially they are not a pair of key/value 00204 #During the first event all the keys will be initialized, then from event 2 on it will be just appending the (seconds,event_number) tuple to the list with the appropriate keys: 00205 00206 #Check/Set up the module label dict: 00207 try: 00208 times_bymod[key][module_label] 00209 except KeyError: 00210 times_bymod[key].update({module_label:{}}) 00211 00212 #Check/Set up the module name dict: 00213 try: 00214 times_bymod[key][module_label][module_name] 00215 except KeyError: 00216 times_bymod[key][module_label].update({module_name:[]}) 00217 00218 #We're now ready to add the info as a tuple in the list! 00219 times_bymod[key][module_label][module_name].append((seconds,event_number)) 00220 00221 00222 # calculate Min,Max, Avg, RMS for each module and in this way get the final data to be imported 00223 ##for mod_name in times_bymod.keys(): 00224 ## #copy needed data 00225 ## #mod_data = {"label": times_bymod[mod_name][0]["label"], "name": times_bymod[mod_name][0]["name"]} 00226 ## #New data structure: 00227 ## mod_data = {"label":times_bymod[mod_name].keys()[0],"name":times_bymod[mod_name][times_bymod[mod_name].keys()[0]].keys()[0]} 00228 ## # add statistical data 00229 ## 00230 ## mod_data["stats"] =calc_MinMaxAvgRMS(f_time = lambda x: x["time"], f_evt_num = lambda x: x["event_number"], items = times_bymod[mod_name]) 00231 ## 00232 ## mod_data["stats"]["num_events"] = len(times_bymod[mod_name]) 00233 ## 00234 ## times_bymod[mod_name] = mod_data 00235 #Let's rewrite this using the dictionary we now have without any logical change (could do with some...): 00236 for key in times_bymod.keys(): 00237 for label in times_bymod[key].keys(): 00238 mod_data={'label':label} 00239 for name in times_bymod[key][label].keys(): 00240 mod_data.update({'name':name}) 00241 mod_data['stats']= calc_MinMaxAvgRMS(f_time= lambda x:x[0],f_evt_num=lambda x:x[1],items=times_bymod[key][label][name]) 00242 mod_data['stats']['num_events']=len(times_bymod[key][label][name]) 00243 times_bymod[key]=mod_data 00244 return times_bymod