CMS 3D CMS Logo

/data/refman/pasoursint/CMSSW_5_3_10_patch2/src/Validation/Performance/scripts/cmsPerfRegress.py

Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 
00003 import time, os, sys, math, re, gzip
00004 import tempfile as tmp
00005 import optparse as opt
00006 from cmsPerfCommons import CandFname
00007 #from ROOT import gROOT, TCanvas, TF1
00008 import ROOT
00009 from array import array
00010 #Substitute popen with subprocess.Popen! popen obsolete...
00011 import subprocess
00012 
00013 _cmsver = os.environ['CMSSW_VERSION']
00014 values_set=('vsize','delta_vsize','rss','delta_rss')
00015 
00016 class Error(Exception):
00017     """Base class for exceptions in this module."""
00018     pass
00019 
00020 class TimingParseErr(Error):
00021     """Exception raised when Could not parse TimingReport Log.
00022 
00023     Attributes:
00024         expression -- input expression in which the error occurred
00025         message -- explanation of the error
00026     """
00027 
00028     def __init__(self, message):
00029         self.message = message
00030 
00031 class SimpMemParseErr(Error):
00032     """Exception raised when Could not parse TimingReport Log.
00033 
00034     Attributes:
00035         expression -- input expression in which the error occurred
00036         message -- explanation of the error
00037     """
00038 
00039     def __init__(self, message):
00040         self.message = message
00041 
00042 class EdmSizeErr(Error):
00043     """Exception raised when Could not parse TimingReport Log.
00044 
00045     Attributes:
00046         expression -- input expression in which the error occurred
00047         message -- explanation of the error
00048     """
00049 
00050     def __init__(self, message):
00051         self.message = message            
00052 
00053 class PerfReportErr(Error):
00054     """Exception raised when Could not parse TimingReport Log.
00055 
00056     Attributes:
00057         expression -- input expression in which the error occurred
00058         message -- explanation of the error
00059     """
00060 
00061     def __init__(self, message):
00062         self.message = message 
00063 
00064 #############
00065 # Option parser
00066 # 
00067 def getParameters():
00068     parser = opt.OptionParser()
00069     #
00070     # Options
00071     #
00072     parser.add_option('-n',
00073                       type="int",
00074                       help='Number of secs per bin. Default is 1.' ,
00075                       default=1,
00076                       dest='startevt')
00077     parser.add_option('-t',
00078                       '--report-type',
00079                       type="choice",
00080                       choices= ("timing", "simplememory","edmsize","igprof","callgrind",""),
00081                       help='Type of report to perform regrssion on. Default is TimingReport.' ,
00082                       default="timing",
00083                       dest='reporttype')
00084     parser.add_option('-i',
00085                       '--IgProfMemOption',
00086                       type="choice",
00087                       choices= ("-y MEM_TOTAL", "-y MEM_LIVE",""),
00088                       help='Eventual IgProfMem counter to use for the regression. Default is no argument (IgProfPerf).' ,
00089                       default="",
00090                       dest='igprofmem')
00091     (options,args) = parser.parse_args()
00092     if len(args) < 2:
00093         parser.error("ERROR: Not enough arguments")
00094         sys.exit()
00095 
00096 
00097     path1 = os.path.abspath(args[0])
00098     path2 = os.path.abspath(args[1])    
00099     if os.path.exists(path1) and os.path.exists(path2):
00100         return (path1, path2, options.startevt, options.reporttype, options.igprofmem)
00101     else:
00102         print "Error: one of the paths does not exist"
00103         sys.exit()
00104 
00105 ###########
00106 # Get max value in data set
00107 #
00108 def get_max(data,index=1):
00109     max_time=-1
00110     for el in data:
00111         sec=el[index]
00112         if max_time<sec:
00113             max_time=sec
00114     return max_time
00115 
00116 ###########
00117 # Get min value in data set
00118 #
00119 def get_min(data,index=1):
00120     min_time=1e20
00121     for el in data:
00122         sec=el[index]
00123         if min_time>sec:
00124             min_time=sec
00125     return min_time  
00126 
00127 ###########
00128 # Setup PyRoot as a batch run
00129 #
00130 def setBatch():
00131     __argv=sys.argv # trick for a strange behaviour of the TApp..
00132     sys.argv=sys.argv[:1]
00133     ROOT.gROOT.SetStyle("Plain") # style paranoia
00134     sys.argv=__argv
00135     #Cannot use this option when the logfile includes
00136     #a large number of events... PyRoot seg-faults.
00137     #Set ROOT in batch mode to avoid canvases popping up!
00138     ROOT.gROOT.SetBatch(1)
00139 
00140 ##########
00141 # Create the root file to save the graphs in
00142 #
00143 def createROOT(outdir,filename):
00144 
00145     # Save in file
00146     rootfilename = os.path.join(outdir,filename)
00147     myfile = None
00148     exists = os.path.exists(rootfilename)
00149     if exists:
00150         myfile=ROOT.TFile(rootfilename,'UPDATE')
00151     else:
00152         myfile=ROOT.TFile(rootfilename,'RECREATE')        
00153     return myfile
00154 
00155 ##########
00156 # Parse timing data from log file
00157 #
00158 def getTimingLogData(logfile_name):
00159     data=[]
00160     
00161     # open file and read it and fill the structure!
00162     logfile=open(logfile_name,'r')
00163     logfile_lines=logfile.readlines()
00164     logfile.close()
00165 
00166     # we get the info we need!
00167     i=0
00168     while i < len(logfile_lines):
00169         line=logfile_lines[i]
00170         if 'TimeEvent>' in line:
00171             line=line.strip()
00172             line_content_list = line.split(' ')[0:]
00173             event_number = int(line_content_list[1])
00174             seconds = float(line_content_list[3])
00175             data.append((event_number,seconds))
00176         i+=1
00177 
00178     return data
00179 
00180 ###########
00181 # Parse memory check data from log file
00182 #
00183 def getSimpleMemLogData(logfile_name,startevt, candle):
00184     data=[]
00185     values_set=('vsize','delta_vsize','rss','delta_rss')
00186     
00187     # open file and read it and fill the structure!
00188     logfile=open(logfile_name,'r')
00189     logfile_lines=logfile.readlines()
00190     logfile.close()
00191     
00192     step = ""
00193     steps = []
00194     #print candle
00195     #print CandFname[candle]
00196     #Get the step from log filename:
00197 
00198     #Catching the case of command line use in which the candle is unknown (and the log name does not match the perfsuite naming convention
00199     if candle:
00200         stepreg = re.compile("%s_([^_]*(_PILEUP)?)_%s((.log)|(.gz))?" % (CandFname[candle],"TimingReport"))
00201     else:
00202         stepreg = re.compile("([^_]*(_PILEUP)?)_%s((.log)|(.gz))?"%"TimingReport")
00203         
00204     #print logfile_name
00205     found=stepreg.search(logfile_name)
00206     if found:
00207         step=found.groups()[0]
00208         print "Determined step from log filename to be %s"%step
00209     else:
00210         print "Could not determine step from log filename"
00211         
00212     #steps.append((step,data))
00213 
00214     #Rewriting the following code... what a mess!
00215     # we get the info we need!
00216     #i=0   
00217     #while i < len(logfile_lines):
00218         #line=logfile_lines[i]
00219         #Format has changed... we use the output of individual steps, so no need to read into the logfile to find which step we are
00220         #referring too (by the way it would not work now!)... the step info comes from the logfilename done above...
00221         #if "RelValreport" in line and "cmsDriver" in line and "step" in line:
00222         #    stepreg = re.compile("--step=([^ ]*)")
00223         #    found = stepreg.search(line)
00224         #    if found:
00225         #        if step == "":
00226         #            step = found.groups()[0]
00227         #        else:
00228         #            steps.append((step,data))
00229         #            step = found.groups()[0]
00230         #            data = []        
00231         #if '%MSG-w MemoryCheck:' in line:
00232         #    line=line[:-1] #no \n!
00233         #    line_content_list=line.split(' ')
00234         #    event_number=int(line_content_list[-1])
00235         #    if event_number<startevt:
00236         #        i+=1
00237         #        continue
00238         #    i+=1 # we inspect the following line
00239         #    try:
00240         #        line=logfile_lines[i]
00241         #    except IndexError:
00242         #        continue
00243         #    line=line[:-1] #no \n!
00244         #    line_content_list=line.split(' ')
00245         #    vsize=float(line_content_list[4])
00246         #    delta_vsize=float(line_content_list[5])
00247         #    rss=float(line_content_list[7])
00248         #    delta_rss=float(line_content_list[8])
00249         #    
00250         #    data.append((event_number,{'vsize':vsize,
00251         #                               'delta_vsize':delta_vsize,
00252         #                               'rss':rss,
00253         #                               'delta_rss':delta_rss}))
00254         #i += 1
00255     event_number= startevt
00256     for line in logfile_lines:
00257         #Match SimpleMemoryCheck output to get the event number
00258         if '%MSG-w MemoryCheck:' in line: #Harcoded based on format: %MSG-w MemoryCheck:  PostModule 12-Jan-2009 16:00:29 CET Run: 1 Event: 1
00259             tokens=line.split()
00260             if int(tokens[-1])>= startevt:
00261                 event_number=int(tokens[-1])
00262         if 'VSIZE' in line: #Harcoded based on format: MemoryCheck: event : VSIZE 648.426 0 RSS 426.332 0
00263             tokens=line.split()
00264             vsize=float(tokens[4])
00265             delta_vsize=float(tokens[5])
00266             rss=float(tokens[7])
00267             delta_rss=float(tokens[8])
00268             data.append((event_number,{'vsize':vsize,
00269                                        'delta_vsize':delta_vsize,
00270                                        'rss':rss,
00271                                        'delta_rss':delta_rss
00272                                        }
00273                          )
00274                         )
00275             
00276     if not len(data) == 0:
00277         #print "!!!!!!!!!!!!!Adding step %s and data!"%step
00278         steps.append((step,data))
00279 
00280     #print "PRINTOUT@@@@@@"
00281     #print steps
00282     return steps
00283 
00284 ###############
00285 #
00286 # Create a new timing graph and histogram 
00287 #
00288 def newGraphAndHisto(histoleg,leg,npoints,nbins,min_val,max_val,data,graph_num,prevrev=""):
00289     
00290     colors = [2,4]
00291     prevRevName = "???"
00292     if not prevrev == "":
00293         (head, tail) = os.path.split(prevrev)
00294         prevRevName  = os.path.basename(tail)
00295     releaseNames = ["Previous (%s)" % prevRevName,_cmsver]
00296 
00297     histo=ROOT.TH1F("Seconds per event (histo: %s)" % graph_num,'Seconds per event',nbins,min_val,max_val)
00298 
00299     graph=ROOT.TGraph(npoints)
00300         
00301     evt_counter=0
00302     peak = data[0][1]
00303     for evt_num,secs in data:
00304         if secs > peak:
00305             peak = secs
00306         graph.SetPoint(evt_counter,evt_num,secs)
00307         histo.Fill(secs)        
00308         evt_counter+=1
00309 
00310     allsecs = []
00311     map(lambda x: allsecs.append(x[1]),data)
00312     total = reduce(lambda x,y: x + y,allsecs)
00313     mean  = total / evt_counter
00314     rms   = math.sqrt(reduce(lambda x,y: x + y,map(lambda x: x * x,allsecs)) / evt_counter)
00315                 
00316         
00317     graph.SetMarkerStyle(8)
00318     graph.SetMarkerSize(.7)
00319     graph.SetMarkerColor(1)
00320     graph.SetLineWidth(3)
00321     graph.SetLineColor(colors[graph_num]) # for each iterate through colors
00322     histo.SetLineColor(colors[graph_num])
00323     histo.SetStats(1)
00324     histoleg.AddEntry(histo, "%s release" % releaseNames[graph_num], "l")
00325     leg.AddEntry(graph     , "%s release" % releaseNames[graph_num], "l")
00326     leg.AddEntry(graph     , "Mean: %s" % str(mean)                , "l")            
00327     leg.AddEntry(graph     , "RMS : %s" % str(rms)                 , "l")
00328     leg.AddEntry(graph     , "Peak: %s" % str(peak)                , "l")
00329     leg.AddEntry(graph     , "Total time: %s" % str(total)         , "l")                    
00330     if graph_num == 0 :
00331         histo.SetFillColor(colors[graph_num])
00332 
00333     return (graph,histo,mean)
00334 
00335 ###########
00336 # Get limits to plot the graph
00337 #
00338 def getLimits(data,secsperbin):
00339     min_val=get_min(data,1)
00340     max_val=get_max(data,1)
00341     interval=int(max_val-min_val)
00342     
00343     min_val=min_val-interval*0.2
00344     max_val=max_val+interval*0.2
00345     interval=int(max_val-min_val)
00346     
00347     nbins=int(interval/secsperbin)
00348 
00349     npoints=len(data)
00350 
00351     last_event=data[-1][0]
00352 
00353     return (min_val,max_val,interval,npoints,last_event)
00354 
00355 ##############
00356 # Setup graph information for one graph (no need to it on both if they are superimposed)
00357 #
00358 def setupSuperimpose(graph1,graph2,last_event,max_val,reporttype=0, title = ""):
00359     name   = ""
00360     xtitle = ""
00361     ytitle = ""
00362     if   reporttype == 0:
00363         title  = 'Seconds per event'
00364         name   = 'SecondsPerEvent'
00365         xtitle = "Event"
00366         ytitle = "Processing time for each event (s)"
00367         graph1.GetYaxis().SetRangeUser(0,max_val)
00368         graph2.GetYaxis().SetRangeUser(0,max_val)            
00369     elif reporttype == 1:
00370         name   = "%s_graph" % title
00371         xtitle = "Event"
00372         ytitle = "MB"
00373         
00374     graph1.SetTitle(title)
00375     graph1.SetName(name)
00376     graph1.GetXaxis().SetTitle(xtitle)
00377     graph1.GetYaxis().SetTitleOffset(1.3)
00378     graph1.GetYaxis().SetTitle(ytitle)
00379     graph1.GetXaxis().SetLimits(0,last_event)
00380     # Do I need to set limits on graph 2? I don't know
00381     # I'm doing it anyway, can't hurt.
00382     graph2.GetXaxis().SetLimits(0,last_event)
00383 
00384 #############
00385 # Plot the mean line on a graph
00386 #
00387 def getMeanLines(avg,last_event,graph_num):
00388     colors = [2,4]
00389     avg_line=ROOT.TLine(1,avg,last_event,avg)
00390     avg_line.SetLineColor(colors[graph_num])
00391     avg_line.SetLineWidth(2)
00392 
00393     return avg_line
00394 
00395 ############
00396 # Get the difference in timing data (for comparison of two releases)
00397 #
00398 def getTimingDiff(data1,data2,npoints,last_event,orig_max_val):
00399     data3 = []
00400     for x in range(len(data2)):
00401         try:
00402             if data2[x][0] == data1[x][0]:
00403                 avgEventNum = data2[x][0]
00404                 diffSecs    = data2[x][1] - data1[x][1]                
00405                 data3.append((avgEventNum,diffSecs))                
00406         except IndexError:
00407             pass
00408         except ValueError:
00409             pass
00410 
00411     graph=ROOT.TGraph(npoints)
00412 
00413     evt_counter=0
00414     peak = data3[0][1]
00415     for evt_num,secs in data3:
00416         if secs > peak:
00417             peak = secs
00418         graph.SetPoint(evt_counter,evt_num,secs)
00419         evt_counter+=1
00420 
00421     allsecs = []
00422     map(lambda x: allsecs.append(x[1]),data3)
00423     total = reduce(lambda x,y: x + y,allsecs)
00424     mean  = total / evt_counter
00425     rms   = math.sqrt(reduce(lambda x,y: x + y,map(lambda x: x * x,allsecs)) / evt_counter)
00426         
00427     min_val=get_min(data3,1)
00428     max_val=get_max(data3,1)
00429     interval=int(max_val-min_val)
00430     
00431     min_val=min_val-interval*0.2
00432     max_val=max_val+interval*0.2
00433     interval=int(max_val-min_val)
00434 
00435     # Determine the max value to be something that makes the scale similar to what
00436     # the original graph had. Unless we can't seem the maximum value.
00437 
00438     new_max = min_val + orig_max_val
00439     if new_max < max_val:
00440         pass
00441     else :
00442         max_val = new_max
00443     
00444     graph.SetTitle('Change in processing time for each event between revs')
00445     graph.SetName('SecondsPerEvent')    
00446     graph.GetXaxis().SetTitle("Event Number")
00447     graph.GetYaxis().SetTitle("Change in processing time between revs (s)")    
00448     graph.GetYaxis().SetTitleOffset(1.3)
00449     graph.SetLineColor(2)
00450     graph.SetMarkerStyle(8)
00451     graph.SetMarkerSize(.7)
00452     graph.SetMarkerColor(1)
00453     graph.SetLineWidth(3)        
00454     graph.GetXaxis().SetLimits(0,last_event)
00455     graph.GetYaxis().SetRangeUser(min_val,max_val)
00456     leg = ROOT.TLegend(0.5,0.7,0.89,0.89)
00457     leg.AddEntry(graph, "Mean: %s" % str(mean), "l")            
00458     leg.AddEntry(graph, "RMS : %s" % str(rms) , "l")
00459     leg.AddEntry(graph, "Peak: %s" % str(peak), "l")
00460     leg.AddEntry(graph, "Total time change: %s" % str(total)  , "l")                    
00461 
00462     return (graph,leg)
00463 
00464 ##########
00465 # Draw superimposed graphs on a separate canvas
00466 #
00467 def drawGraphs(graph1,graph2,avg1,avg2,leg):
00468     graph_canvas = ROOT.TCanvas("graph_canvas")
00469     graph_canvas.cd()
00470     graph1.Draw("ALP")
00471     graph2.Draw("LP")
00472     avg1.Draw("Same")
00473     avg2.Draw("Same")
00474     leg.Draw()
00475     return graph_canvas
00476 
00477 ##########
00478 # Draw superimposed histograms on a separate canvas
00479 #
00480 def drawHistos(histo_stack,hstleg):
00481     histo_canvas = ROOT.TCanvas("histo_canvas")
00482     histo_canvas.cd()
00483     histo_stack.Draw("nostack")
00484     hstleg.Draw()
00485     return histo_canvas
00486 
00487 ##########
00488 # Draw data differences (comparison between two data sets or releases) on a separate canvas
00489 # 
00490 def drawChanges(graph,chgleg):
00491     graph_canvas = ROOT.TCanvas("change_canvas")
00492     graph_canvas.cd()
00493     graph.Draw("ALP")
00494     chgleg.Draw()
00495     return graph_canvas    
00496 
00497 ###########
00498 # Get limits for two graphs that will be superimposed upon one another
00499 # 
00500 def getTwoGraphLimits(last_event1,max_val1,last_event2,max_val2,min_val1=-1,min_val2=-1):
00501     biggestLastEvt = last_event1
00502     biggestMaxval  = max_val1
00503     lowest_val     = min_val1
00504     
00505     if min_val2 < lowest_val:
00506         lowest_val = min_val2
00507     if last_event2 > biggestLastEvt:
00508         biggestLastEvt = last_event2
00509     if max_val2 > biggestMaxval:
00510         biggestMaxval  = max_val2
00511     return (biggestLastEvt,biggestMaxval,lowest_val)
00512 
00513 def getNpoints(data):
00514     new_data=[]
00515     try:
00516         #This was necessary due to a bug in the getSimpleMemLogData parsing!!! no more necessary!
00517         if data[0][0]==data[1][0]:
00518             print 'Two modules seem to have some output.\nCollapsing ...'
00519             i=0
00520             while True:
00521                 dataline1=data[i]
00522                 i+=1
00523                 dataline2=data[i]
00524                 new_eventnumber=dataline1[0]
00525                 new_vsize=dataline2[1]['vsize']
00526                 new_delta_vsize=dataline1[1]['delta_vsize']+dataline2[1]['delta_vsize']
00527                 new_rss=dataline2[1]['rss']
00528                 new_delta_rss=dataline1[1]['delta_rss']+dataline2[1]['delta_rss']
00529 
00530                 new_data.append((new_eventnumber,{'vsize':new_vsize,
00531                                                   'delta_vsize':new_delta_vsize,
00532                                                   'rss':new_rss,
00533                                                   'delta_rss':new_delta_rss}))
00534                 i+=1
00535                 if i==len(data): break
00536 
00537             data=new_data
00538             print 'Collapsing: Done!'        
00539     except IndexError:
00540         pass
00541 
00542     return (data,len(data))
00543 
00544 ###########
00545 # Create simple memory check graphs 
00546 #
00547 def createSimplMemGraphs(data,npoints,graph_num,legs,prevrev=""):
00548     colors = [2,4]    
00549     values = ["vsize", "rss"]
00550     
00551     prevRevName = "???"
00552     if not prevrev == "":
00553         (head, tail) = os.path.split(prevrev)
00554         prevRevName  = os.path.basename(tail)
00555         
00556     releaseNames = ["Previous (%s)" % prevRevName,_cmsver]        
00557 
00558     #fill the graphs
00559     graphs = []
00560     minim  = -1
00561     peak   = -1
00562     peaks  = []
00563     minims = []
00564     idx = 0
00565     for value in values:
00566         leg = legs[idx]
00567 
00568         graph = ROOT.TGraph(npoints)
00569         graph.SetTitle(value)
00570         graph.SetLineColor(colors[graph_num])
00571         graph.SetMarkerStyle(8)
00572         graph.SetMarkerSize(.7)
00573         graph.SetMarkerColor(1)
00574         graph.SetLineWidth(3)
00575         graph.GetXaxis().SetTitle("Event")
00576         graph.GetYaxis().SetTitleOffset(1.3)
00577         graph.GetYaxis().SetTitle("MB")
00578 
00579         total = 0
00580         point_counter=0
00581         rms   = 0
00582         first = True
00583         for event_number,vals_dict in data:
00584             if first:
00585                 minim = vals_dict[value]
00586                 peak  = vals_dict[value]
00587                 first = False
00588             if vals_dict[value] > peak:
00589                 peak = vals_dict[value]
00590             if vals_dict[value] < minim:
00591                 minim = vals_dict[value]
00592             graph.SetPoint(point_counter, event_number, vals_dict[value])
00593             total += vals_dict[value]
00594             rms   += vals_dict[value] * vals_dict[value]
00595             point_counter+=1
00596 
00597         rms  = math.sqrt(rms / float(point_counter))
00598         mean = total / float(point_counter)
00599         last_event=data[-1][0]
00600         peaks.append(peak)
00601         minims.append(minim)
00602         graph.GetXaxis().SetRangeUser(0,last_event+1)
00603         leg.AddEntry(graph     , "%s release" % releaseNames[graph_num], "l")
00604         leg.AddEntry(graph     , "Mean: %s" % str(mean)                , "l")            
00605         leg.AddEntry(graph     , "RMS : %s" % str(rms)                 , "l")
00606         leg.AddEntry(graph     , "Peak: %s" % str(peak)                , "l")
00607         graphs.append(graph)
00608         idx += 1
00609 
00610     return (graphs[0] , last_event, peaks[0], minims[0], graphs[1], last_event, peaks[1], minims[1])
00611 
00612 #############
00613 # Produce the difference of two memory data sets  
00614 #
00615 def getMemDiff(data1,data2,npoints,last_event,orig_max_val,stepname,rss=False):
00616     data3 = []
00617     memtype = "vsize"
00618     if rss:
00619         memtype = "rss"
00620 
00621     graph=ROOT.TGraph(npoints)
00622     total = 0
00623     rms = 0
00624     evt_counter=0
00625     peak  = -1
00626     minum = -1
00627     first = True
00628     for x in range(len(data2)):
00629         try:
00630             (evtnum2,valdict2) = data2[x]
00631             (evtnum1,valdict1) = data1[x]
00632             if evtnum2 == evtnum1:
00633                 diffMBytes = valdict2[memtype] - valdict1[memtype]
00634 
00635                 if first:
00636                     peak  = diffMBytes
00637                     minum = diffMBytes
00638                     first = False
00639                 if diffMBytes > peak:
00640                     peak  = diffMBytes
00641                 if diffMBytes < minum:
00642                     minum = diffMBytes   
00643                 graph.SetPoint(evt_counter,evtnum2,diffMBytes)
00644                 evt_counter+=1
00645                 total += diffMBytes
00646                 rms += (diffMBytes * diffMBytes)
00647         except IndexError:
00648             pass
00649         except ValueError:
00650             pass
00651    
00652     mean  = total / evt_counter
00653     rms   = math.sqrt(rms / float(evt_counter))
00654         
00655     min_val  = minum
00656     max_val  = peak
00657     interval = int(max_val-min_val)
00658     
00659     min_val=min_val-interval*0.2
00660     max_val=max_val+interval*0.2
00661     interval=int(max_val-min_val)
00662     # Determine the max value to be something that makes the scale similar to what
00663     # the original graph had. Unless we can't seem the maximum value.
00664 
00665     new_max = min_val + orig_max_val
00666     if new_max < max_val:
00667         pass
00668     else :
00669         max_val = new_max
00670     
00671     graph.SetTitle("Change in %s memory usage for each event between revs for step %s" % (memtype,stepname))
00672     graph.SetName('MemoryUsagePerEvent')    
00673     graph.GetXaxis().SetTitle("Event Number")
00674     graph.GetYaxis().SetTitle("Change in memory usage between revs (MBs)")    
00675     graph.GetYaxis().SetTitleOffset(1.3)
00676     graph.SetLineColor(2)
00677     graph.SetMarkerStyle(8)
00678     graph.SetMarkerSize(.7)
00679     graph.SetMarkerColor(1)
00680     graph.SetLineWidth(3)    
00681     graph.GetXaxis().SetLimits(0,last_event)
00682     graph.GetYaxis().SetRangeUser(min_val,max_val)
00683     leg = ROOT.TLegend(0.5,0.7,0.89,0.89)
00684     leg.AddEntry(graph, "Mean: %s" % str(mean), "l")            
00685     leg.AddEntry(graph, "RMS : %s" % str(rms) , "l")
00686     leg.AddEntry(graph, "Peak: %s" % str(peak), "l")
00687     leg.AddEntry(graph, "Trough: %s" % str(minum)  , "l")                    
00688 
00689     return (graph,leg)
00690 
00691 ############
00692 # Draw two memory graphs superimposed on one another
00693 #
00694 def drawMemGraphs(graph1,graph2,min_val,max_val,last_event,leg,memtype,stepname):
00695     graph_canvas=ROOT.TCanvas("%s_%s_canvas" % (memtype,stepname))
00696     graph_canvas.cd()
00697     graph1.GetYaxis().SetRangeUser(min_val,max_val)
00698     graph1.GetXaxis().SetRangeUser(0,last_event)        
00699     graph1.Draw("ALP")
00700     graph2.Draw("LP" )
00701     leg.Draw()    
00702     graph_canvas.ForceUpdate()
00703     graph_canvas.Flush()
00704     return graph_canvas
00705 
00706 ###########
00707 # Draw the comparison graphs of two memory graphs
00708 #
00709 def drawMemChangeGraphs(graph,leg,memtype,stepname):
00710     graph_canvas=ROOT.TCanvas("%s_%s_change_canvas" % (memtype,stepname))
00711     graph_canvas.cd()
00712     graph.Draw("ALP" )
00713     leg.Draw()    
00714     graph_canvas.ForceUpdate()
00715     graph_canvas.Flush()
00716     return graph_canvas
00717 
00718 def getMemOrigScale(fst_min,snd_min,fst_max,snd_max):
00719     minim = fst_min
00720     if snd_min < minim:
00721         minim = snd_min
00722     maxim = fst_max
00723     if snd_max > maxim:
00724         maxim = snd_max
00725     return (minim,maxim)
00726         
00727 def cmpSimpMemReport(rootfilename,outdir,oldLogfile,newLogfile,startevt,batch=True,candle="",prevrev=""):
00728     if batch:
00729         setBatch()
00730     # the fundamental structure: the key is the evt number the value is a list containing
00731     # VSIZE deltaVSIZE RSS deltaRSS
00732     print "#####LOGFILES in cmsPErfRegress:"
00733     print oldLogfile
00734     print newLogfile
00735     try:
00736         info1 = getSimpleMemLogData(oldLogfile,startevt, candle)
00737         if len(info1) == 0:
00738             raise IndexError
00739     except IndexError:
00740         raise SimpMemParseErr(oldLogfile)
00741     except IOError:
00742         raise SimpMemParseErr(oldLogfile)        
00743     
00744     try:
00745         info2 = getSimpleMemLogData(newLogfile,startevt, candle)
00746         if len(info2) == 0:
00747             raise IndexError
00748     except IndexError:
00749         raise SimpMemParseErr(newLogfile)
00750     except IOError:
00751         raise SimpMemParseErr(newLogfile)        
00752     #print "DDDDD info1 %s"%info1 #Format is of the type:[('GEN,SIM', [(1, {'delta_vsize': 0.0, 'delta_rss': 0.0, 'vsize': 648.42600000000004, 'rss': 426.33199999999999}), (2,{...
00753     #print "DDDDD info2 %s"%info2
00754     canvases = []
00755     # skim the second entry when the event number is the same BUG!!!!!!!
00756     # i take elements in couples!
00757 
00758     candreg = re.compile("(.*)(?:\.log)")
00759     vsize_canvas = None
00760     rss_canvas = None
00761     i = 0
00762     firstRoot = True
00763     newrootfile = None
00764     #The following whileis confusing and not necessary info1 and info2 are supposed to only contain one set of simplememorycheck numbers
00765     #in a tuple as seen above ('GEN,SIM',[(event_number,{'delta_vsize': 0.0, etc
00766     #The data structure is questionable... but it is a remnant of past format of the log files I guess.
00767     #while ( i < len(info1) and i < len(info2)):
00768     #curinfo1 = info1[i]
00769     #curinfo2 = info2[i]
00770     (stepname1, data1) = info1[0]
00771     (stepname2, data2) = info2[0]
00772 
00773     #Again this is not necessary anymore...
00774     #if not stepname1 == stepname2:
00775     #print "WARNING: Could not compare %s step and %s step because they are not the same step" % (stepname1, stepname2)
00776     #        print " Searching for next occurence"
00777     #        x = 1
00778     #        if not (i + 1) > len(info1):
00779     #            found = False
00780     #            for info in info1[i + 1:]:
00781     #               (stepname,trash) = info
00782     #               if stepname == stepname2:
00783     #                    i += x
00784     #print " Next occurence found, skipping in-between steps"
00785     #                    assert i < len(info1)
00786     #                    found = True
00787     #                    break
00788     #                x += 1
00789     #            if found:
00790     #                continue
00791     #        print " No more occurences of this step, continuing" 
00792     #        i += 1
00793     #        continue
00794 
00795     #Not sure what this is for!
00796     #OK it was due to the bug of duplicated info in getSimpleMemLogData parsing!
00797     #No need!
00798     #(data1,npoints1) = getNpoints(data1)
00799     #(data2,npoints2) = getNpoints(data2)
00800     npoints1=len(data1)
00801     npoints2=len(data2)
00802     
00803     legs = []
00804     leg      = ROOT.TLegend(0.6,0.99,0.89,0.8)
00805     legs.append(leg)
00806     leg      = ROOT.TLegend(0.6,0.99,0.89,0.8)
00807     legs.append(leg)
00808     
00809     try:
00810         if len(data1) == 0:
00811             raise IndexError
00812         (vsize_graph1,
00813          vsize_lstevt1,
00814          vsize_peak1,
00815          vsize_minim1,
00816          rss_graph1,
00817          rss_lstevt1,
00818          rss_peak1,
00819          rss_minim1) = createSimplMemGraphs(data1,npoints1,0,legs,prevrev=prevrev)
00820         #candFilename = CandFname[candle]
00821         #outputdir = "%s_%s_SimpleMemReport" % (candFilename,stepname1)
00822         #outputdir = os.path.join(outdir,outputdir)
00823         #print "Graph1"
00824         #vsize_graph1.Print()
00825     except IndexError:
00826         raise SimpMemParseErr(oldLogfile)
00827     
00828     try:
00829         if len(data2) == 0:
00830             raise IndexError
00831         (vsize_graph2,
00832          vsize_lstevt2,
00833          vsize_peak2,
00834          vsize_minim2,
00835          rss_graph2,
00836          rss_lstevt2,
00837          rss_peak2,
00838          rss_minim2) = createSimplMemGraphs(data2,npoints2,1,legs,prevrev=prevrev)
00839         #candFilename = CandFname[candle]
00840         #outputdir = "%s_%s_SimpleMemReport" % (candFilename,stepname1)
00841         #outputdir = os.path.join(outdir,outputdir)
00842         #print "Graph2"
00843         #vsize_graph2.Print()#       os.path.join(outputdir,"vsize_graph2.gif"))
00844     except IndexError:
00845         raise SimpMemParseErr(newLogfile)  
00846     
00847     
00848     (vsize_lstevt, vsize_max_val, vsize_min_val) = getTwoGraphLimits(vsize_lstevt1, vsize_peak1, vsize_lstevt2, vsize_peak2, vsize_minim1, vsize_minim2)
00849     (rss_lstevt  , rss_max_val  , rss_min_val)   = getTwoGraphLimits(rss_lstevt1  , rss_peak1, rss_lstevt2  , rss_peak2, rss_minim1,   rss_minim2)    
00850     
00851     (vsize_min,vsize_max) = getMemOrigScale(vsize_minim1,vsize_minim2,vsize_peak1,vsize_peak2)
00852     (rss_min  ,rss_max  ) = getMemOrigScale(rss_minim1,rss_minim2,rss_peak1,rss_peak2)
00853     
00854     setupSuperimpose(vsize_graph1,
00855                      vsize_graph2,
00856                      vsize_lstevt,
00857                      0,
00858                      reporttype = 1,
00859                      title = "%s_vsize" % stepname1)
00860     setupSuperimpose(rss_graph1  ,
00861                      rss_graph2  ,
00862                      rss_lstevt  ,
00863                      0,
00864                      reporttype = 1,
00865                      title = "%s_rss"  %  stepname2)
00866     
00867     (vsizePerfDiffgraph, vsizeleg) = getMemDiff(data1,data2,npoints2,vsize_lstevt, (vsize_max - vsize_min), stepname1, rss=False)
00868     (rssPerfDiffgraph, rssleg)     = getMemDiff(data1,data2,npoints2,rss_lstevt  , (rss_max - rss_min)    , stepname1, rss=True )        
00869     
00870     vsize_canvas = drawMemGraphs(vsize_graph1, vsize_graph2, vsize_min_val, vsize_max_val, vsize_lstevt, legs[0], "vsize", stepname1)
00871     rss_canvas   = drawMemGraphs(rss_graph1  , rss_graph2  , rss_min_val, rss_max_val, rss_lstevt, legs[1], "rss"  , stepname1)
00872     vsize_change_canvas = drawMemChangeGraphs(vsizePerfDiffgraph, vsizeleg, "vsize", stepname1)         
00873     rss_change_canvas   = drawMemChangeGraphs(rssPerfDiffgraph  , rssleg  , "rss"  , stepname1)         
00874     
00875     if batch:
00876         
00877         
00878         logcandle = ""
00879         candname  = ""            
00880         found = candreg.search(os.path.basename(newLogfile))
00881         
00882         if found:
00883             logcandle = found.groups()[0]
00884             
00885         if   CandFname.has_key(candle):
00886             candFilename = CandFname[candle]
00887         elif CandFname.has_key(logcandle):
00888             candFilename = CandFname[logcandle]
00889         else:
00890             print "%s is an unknown candle!"%candle
00891             candFilename = "Unknown-candle"
00892             
00893         outputdir = "%s_%s_SimpleMemReport" % (candFilename,stepname1)
00894         outputdir = os.path.join(outdir,outputdir)
00895         
00896         if not os.path.exists(outputdir):
00897             os.mkdir(outputdir)
00898 
00899             #print the graphs as files :)
00900 
00901         newrootfile = createROOT(outputdir,rootfilename)                
00902         
00903         vsize_canvas.Print(       os.path.join(outputdir,"vsize_graphs.gif"), "gif")
00904         rss_canvas.Print(         os.path.join(outputdir,"rss_graphs.gif"  ), "gif")
00905         vsize_change_canvas.Print(os.path.join(outputdir,"vsize_change.gif"), "gif")
00906         rss_change_canvas.Print(  os.path.join(outputdir,"rss_change.gif"  ), "gif")
00907         # write it on file
00908         map(lambda x: x.Write(), [vsize_graph1,vsize_graph2, rss_graph1, rss_graph2, vsizePerfDiffgraph, rssPerfDiffgraph])
00909         map(lambda x: x.Write(), [vsize_canvas,rss_canvas,vsize_change_canvas,rss_change_canvas])
00910         newrootfile.Close()
00911     else:
00912         # we have to do this if we are running the application standalone
00913         # For some reason it will not draw some graphs at all if there is more than
00914         # one step.
00915         # If we wait between iterations of this loop the graphs will be drawn correctly.
00916         # Perhaps a graphics buffer problem with ROOT?
00917         # Perhaps a garbage collection problem in python? (Doubt it)
00918         canvases.append(rss_canvas)
00919         canvases.append(vsize_canvas)
00920         canvases.append(vsize_change_canvas)
00921         canvases.append(rss_change_canvas)            
00922         time.sleep(5.0)
00923 
00924     #Eliminated the while loop!    
00925     #i += 1
00926         
00927     #
00928     # Create a one dimensional function and draw it
00929     #
00930 
00931     if batch:
00932         pass
00933     else:
00934         if len(canvases) > 0:
00935             while reduce(lambda x,y: x or y ,canvases):
00936                 time.sleep(2.5)
00937     return 0            
00938         
00939 
00940 def cmpTimingReport(rootfilename,outdir,oldLogfile,newLogfile,secsperbin,batch=True,prevrev=""):
00941     if batch:
00942         setBatch()
00943     
00944     data1 = getTimingLogData(oldLogfile)
00945     data2 = getTimingLogData(newLogfile)        
00946         
00947     try:
00948         (min_val1,max_val1,nbins1,npoints1,last_event1) = getLimits(data1,secsperbin)
00949     except IndexError, detail:
00950         raise TimingParseErr(oldLogfile)
00951     
00952     try:
00953         (min_val2,max_val2,nbins2,npoints2,last_event2) = getLimits(data2,secsperbin)
00954     except IndexError, detail:
00955         raise TimingParseErr(newLogfile)
00956 
00957     hsStack  = ROOT.THStack("hsStack","Histogram Comparison")
00958     leg      = ROOT.TLegend(0.6,0.99,0.89,0.8)
00959     histoleg = ROOT.TLegend(0.5,0.8,0.89,0.89)    
00960 
00961     #
00962     
00963     (graph1,histo1,mean1) = newGraphAndHisto(histoleg,leg,npoints1,nbins1,min_val1,max_val1,data1,0,prevrev)
00964     hsStack.Add(histo1)
00965     (graph2,histo2,mean2) = newGraphAndHisto(histoleg,leg,npoints2,nbins2,min_val2,max_val2,data2,1,prevrev)
00966     hsStack.Add(histo2)
00967 
00968     (biggestLastEvt,biggestMaxval, trashthis) = getTwoGraphLimits(last_event1,max_val1,last_event2,max_val2,min_val1,min_val2)
00969     
00970     (changegraph,chgleg) = getTimingDiff(data1,data2,npoints2,biggestLastEvt,biggestMaxval)
00971     setupSuperimpose(graph1,graph2,biggestLastEvt,biggestMaxval)
00972     avg_line1 = getMeanLines(mean1,last_event1,0)
00973     avg_line2 = getMeanLines(mean2,last_event2,1)
00974 
00975     #
00976     # Create a one dimensional function and draw it
00977     #
00978     histo1.GetXaxis().SetTitle("s")            
00979     graph_canvas   = drawGraphs(graph1,graph2,avg_line1,avg_line2,leg)
00980     changes_canvas = drawChanges(changegraph,chgleg)
00981     histo_canvas   = drawHistos(hsStack,histoleg)
00982     
00983     newrootfile = None
00984     if batch:
00985 
00986         newrootfile = createROOT(outdir,rootfilename)      
00987 
00988         cput = ROOT.TTree()
00989         #  array(typecode, initializer)
00990         #  typecode is i for int, f for float etc.
00991         tot_a1 = array( "f", [ 0 ] )
00992         tot_a2 = array( "f", [ 0 ] )
00993 
00994         tot_a1[0] = mean1
00995         tot_a2[0] = mean2
00996 
00997         cput.Branch("total1",tot_a1,"total1/F")
00998         cput.Branch("total2",tot_a2,"total2/F")
00999         cput.Fill()
01000         cput.Write("cpu_time_tuple",ROOT.TObject.kOverwrite)
01001         
01002         names = ["graphs.gif","changes.gif","histos.gif"]
01003         
01004         graph_canvas.Print(  os.path.join(outdir,names[0]),"gif")
01005         changes_canvas.Print(os.path.join(outdir,names[1]),"gif")
01006         histo_canvas.Print(  os.path.join(outdir,names[2]),"gif")
01007         
01008         map(lambda x:x.Write(),[graph1,graph2,changegraph,hsStack,histo1,histo2])
01009         
01010         graph_canvas.Write()    # to file
01011         changes_canvas.Write()
01012         histo_canvas.Write() 
01013         newrootfile.Close()   
01014 
01015         return names
01016     else:
01017         
01018         while graph_canvas or histo_canvas or changes_canvas:
01019             time.sleep(2.5)
01020         return 0
01021     
01022 def rmtree(path):
01023     try:
01024         #os.remove(path)
01025         #Brute force solution:
01026         RemoveCmd="rm -Rf %s"%path
01027         os.system(RemoveCmd)
01028     except OSError, detail:
01029         if detail.errno == 39:
01030             try:
01031                 gen = os.walk(path)
01032                 nodes    = gen.next()
01033                 nodes[0] = par
01034                 nodes[1] = dirs
01035                 nodes[2] = files
01036                 for f in files:
01037                     os.remove(os.path.join(path,f))
01038                 for d in dirs:
01039                     rmtree(os.path.join(path,d))
01040             except OSError, detail:
01041                 print detail
01042             except IOError, detail:
01043                 print detail
01044             os.remove(path)
01045 
01046 def perfreport(perftype,file1,file2,outdir,IgProfMemopt=""):
01047     src = ""
01048     try:
01049         src = os.environ["CMSSW_SEARCH_PATH"]
01050     except KeyError , detail:
01051         print "ERROR: scramv1 environment could not be located", detail 
01052 
01053     vars = src.split(":")
01054     loc  = vars[0]
01055 
01056     proftype = ""
01057     if   perftype == 0: # EdmSize
01058         proftype = "-fe"
01059     elif perftype == 1: # IgProf
01060         proftype = "-fi"
01061     else:               # Callgrind
01062         proftype = "-ff"
01063 
01064     cmssw_release_base = ""
01065     cmssw_data = ""
01066     try:
01067         cmssw_release_base = os.environ['CMSSW_RELEASE_BASE']
01068         cmssw_data = os.environ['CMSSW_DATA_PATH']
01069     except KeyError, detail:
01070         raise PerfReportErr
01071 
01072     xmlfile = os.path.join(cmssw_release_base,"src","Validation","Performance","doc","regress.xml")
01073 
01074     prRoot = "/afs/cern.ch/user/g/gbenelli/public/PerfReport2/2.0.1"
01075 
01076     # this might be useful at some point
01077     #cd %s ; eval `scramv1 runtime -csh`  ; source $CMSSW_DATA_PATH/perfreport/2.0.0/etc/profile.d/init.csh; cd - ; %s\"" % (loc,perfcmd)
01078 
01079     # Before adding Danilo's 2.1 we did this
01080     #perfcmd = "perfreport -tmp %s -i %s -r %s -o %s" % (proftype,file2,file1,outdir)    
01081     #cmd = "tcsh -c \"source %s/perfreport/2.0.0/etc/profile.d/init.csh; cd - ; %s\"" % (cmssw_data,perfcmd)
01082 
01083     # now we do
01084 
01085     tmpdir  = tmp.mkdtemp(prefix=os.path.join(outdir,"tmp"))
01086 
01087     perfcmd = "%s %s %s -c %s -t%s -i %s -r %s -o %s" % (os.path.join(prRoot,"bin","perfreport"),proftype,IgProfMemopt,xmlfile,tmpdir,file2,file1,outdir)            
01088     cmd     = "tcsh -c \"cd %s ; eval `scramv1 runtime -csh` ; cd - ;source %s/etc/profile.d/init.csh ; %s\"" % (loc,prRoot,perfcmd)
01089     #Obsolete popen4-> subprocess.Popen
01090     #process  = os.popen(cmd)
01091     process = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
01092     exitstat = process.wait()
01093     cmdout   = process.stdout.read()
01094     exitstat = process.returncode
01095 
01096     try:
01097         rmtree(tmpdir)        #Brute force solution rm -RF tmpdir done in rmtree()
01098         #os.rmdir(tmpdir)
01099     except IOError, detail:
01100         print "WARNING: Could not remove dir because IO%s" % detail                
01101     except OSError, detail:
01102         print "WARNING: Could not remove dir because %s" % detail                
01103 
01104     if True:
01105         print cmd
01106         print cmdout
01107 
01108     if not exitstat == None:
01109         sig     = exitstat >> 16    # Get the top 16 bits
01110         xstatus = exitstat & 0xffff # Mask out all bits except the bottom 16
01111         raise PerfReportErr("ERROR: PerfReport returned a non-zero exit status (%s, SIG_INT = %s) run %s. Dump follows: \n%s" % (perfcmd,xstatus,sig,cmdout))
01112     
01113     
01114 def cmpEdmSizeReport(outdir,file1,file2):
01115     perfreport(0,file1,file2,outdir)
01116 
01117 def ungzip(inf,outh):
01118     gzf = gzip.open(inf,"r")
01119     print "ungzipping"
01120     for char in gzf:
01121         os.write(outh,char)
01122     os.close(outh)
01123     print "finish ungzipping"
01124     gzf.close()
01125 
01126 def ungzip2(inf,out):
01127     os.system("gzip -c -d %s > %s" % (inf,out)) 
01128 
01129 def cmpIgProfReport(outdir,file1,file2,IgProfMemOpt=""):
01130     (tfile1, tfile2) = ("", "")
01131     try:
01132         # don't make temp files in /tmp because it's never bloody big enough
01133         (th1, tfile1) = tmp.mkstemp(prefix=os.path.join(outdir,"igprofRegressRep."))
01134         (th2, tfile2) = tmp.mkstemp(prefix=os.path.join(outdir,"igprofRegressRep."))
01135         os.close(th1)
01136         os.close(th2)
01137         os.remove(tfile1)
01138         os.remove(tfile2)
01139         ungzip2(file1,tfile1)
01140         ungzip2(file2,tfile2)        
01141 
01142         perfreport(1,tfile1,tfile2,outdir,IgProfMemOpt)
01143 
01144         os.remove(tfile1)
01145         os.remove(tfile2)
01146     except OSError, detail:
01147         raise PerfReportErr("WARNING: The OS returned the following error when comparing %s and %s\n%s" % (file1,file2,str(detail)))
01148         if os.path.exists(tfile1):
01149             os.remove(tfile1)
01150         if os.path.exists(tfile2):
01151             os.remove(tfile2)
01152     except IOError, detail:
01153         raise PerfReportErr("IOError: When comparing %s and %s using temporary files %s and %s. Error message:\n%s" % (file1,file2,tfile1,tfile2,str(detail)))
01154         if os.path.exists(tfile1):
01155             os.remove(tfile1)
01156         if os.path.exists(tfile2):
01157             os.remove(tfile2)        
01158 
01159 
01160 def cmpCallgrindReport(outdir,file1,file2):
01161     perfreport(2,file1,file2,outdir)
01162 
01163 def _main():
01164     outdir = os.getcwd()
01165     
01166     (file1,file2,secsperbin,reporttype,IgProfMemOptions)  = getParameters()
01167 
01168     try:
01169         if   reporttype == "timing":
01170             rootfilename = "timingrep-regression.root"
01171             cmpTimingReport(rootfilename ,outdir,file1,file2,secsperbin,True)
01172         elif reporttype == "simplememory":
01173             rootfilename = "simpmem-regression.root"
01174             cmpSimpMemReport(rootfilename,outdir,file1,file2,secsperbin,True)
01175         elif reporttype == "edmsize":
01176             cmpEdmSizeReport(outdir,file1,file2)
01177         elif reporttype == "callgrind":
01178             cmpCallgrindReport(outdir,file1,file2)
01179         elif reporttype == "igprof":
01180             cmpIgProfReport(outdir,file1,file2,IgProfMemOptions)            
01181     except TimingParseErr, detail:
01182         print "WARNING: Could not parse data from Timing report file %s; not performing regression" % detail.message
01183     except SimpMemParseErr, detail:
01184         print "WARNING: Could not parse data from Memory report file %s; not performing regression" % detail.message
01185     except PerfReportErr     , detail:
01186         print "WARNING: Could not parse data from Edm file %s; not performing regression" % detail.message
01187     except IOError, detail:
01188         print detail
01189     except OSError, detail:
01190         print detail
01191 
01192 if __name__ == "__main__":
01193     _main()
01194