CMS 3D CMS Logo

cmsPerfStripChart.py
Go to the documentation of this file.
1 #! /usr/bin/env python
2 
3 import os, sys
4 try: import simplejson as json
5 except ImportError: import json
6 
7 # Helper functions
8 def get_yaxis_range(list):
9  """
10  Given a list of dictionaries, where each dict holds the information
11  about an IB, this function returns a tuple (low, high) with the lowest
12  and the highest value of y-axis, respectively.
13  """
14  low, high = sys.maxsize, -1
15  for node in list:
16  low = min((node['average'] - node['error']), low)
17  high = max((node['average'] + node['error']), high)
18  return (low, high)
19 
20 # Main operation function
21 def operate(timelog, memlog, json_f, num):
22  """
23  Main operation of the script (i.e. json db update, histograms' creation)
24  with respect to the specifications of all the files & formats concerned.
25  """
26  import re
27  import commands
28  import ROOT
29  from datetime import datetime
30 
31  script_name=os.path.basename(__file__)
32 
33  # Open files and store the lines.
34  timefile=open(timelog, 'r')
35  timelog_lines=timefile.readlines()
36  timefile.close()
37 
38  memfile=open(memlog, 'r')
39  memlog_lines=memfile.readlines()
40  memfile.close()
41 
42  # Get average, uncertainty of average and maximum rss.
43  max_rss=average=error=' '
44  i=0
45  while i<len(timelog_lines):
46  line=timelog_lines[i]
47  if 'Uncertainty of Average Time' in line:
48  line=line[:-1]
49  line_list=line.split(' ')
50  average=line_list[5]
51  error=line_list[7]
52  i+=1
53  i=0
54  while i<len(memlog_lines):
55  line=memlog_lines[i]
56  if 'Maximum rss' in line:
57  line=line[:-1]
58  line_list=line.split(' ')
59  max_rss=line_list[3]
60  break
61  i+=1
62 
63  # Get Integration Build's identifier
64  IB=os.path.basename(commands.getoutput("echo $CMSSW_BASE"))
65 
66  # Check if log files were parsed properly...
67  # and if the IB is valid using regular expressions.
68  try:
69  # regex for a float
70  regex="^\d+\.?\d*$"
71  if average == ' ' or re.match(regex, average) is None:
72  raise RuntimeError('Could not parse \"' + timelog + '\" properly. ' +\
73  'Check if Average Time is defined correctly.')
74  if error == ' ' or re.match(regex, error) is None:
75  raise RuntimeError('Could not parse \"' + timelog + '\" properly. ' +\
76  'Check if Uncertainty of Average Time is defined correctly.')
77  if max_rss == ' ' or re.match(regex, max_rss) is None:
78  raise RuntimeError('Could not parse \"' + memlog + '\" properly. ' +\
79  ' Check if Maximum rss is defined correct.')
80 
81  # regex for dates 'YYYY-MM-DD-HHMM'
82  regex = '(19|20|21)\d\d-(0[1-9]|1[012])-(0[1-9]|[12]'+\
83  '[0-9]|3[01])-([01][0-9]|2[0-4])([0-5][0-9])$'
84  if re.search(regex, IB) is None:
85  raise RuntimeError('Not a valid IB. Valid IB: ' +\
86  '[CMSSW_X_X_X_YYYY-MM-DD-HHMM]')
87  except Exception as err:
88  sys.stderr.write(script_name + ': Error: ' + str(err) + '\n')
89  return 2
90 
91  # Open for getting the data.
92  json_db=open(json_f, "r")
93  dict=json.load(json_db)
94  json_db.close()
95 
96  # Get the data to be stored and check if already exists.
97  ib_list=IB.split('_')
98  cmsrelease=ib_list[0] + '_' + ib_list[1] +\
99  '_' + ib_list[2] + '_' + ib_list[3]
100  data={"IB" : ib_list[4], "average" : float(average), "error" : float(error), "max_rss" : float(max_rss)}
101 
102  if data in dict["strips"]:
103  sys.stderr.write(script_name + ": Warning: Entry already exists " +\
104  "in json file and will not be stored! " +\
105  "Only the strip charts will be created.\n")
106  else:
107  dict["strips"].append(data)
108  print 'Storing entry to \"' + json_f +\
109  '\" file with attribute values:\n' +\
110  'IB=' + IB + '\naverage=' + average +\
111  '\nUncertainty of average=' + error +'\nmax_rss=' + max_rss
112  # Store the data in json file.
113  json_db = open(json_f, "w+")
114  json.dump(dict, json_db, indent=2)
115  json_db.close()
116  print 'File "' + json_f + '" was updated successfully!'
117 
118  # Change to datetime type (helpful for sorting).
119  for record in dict["strips"]:
120  time_list = record['IB'].split('-')
121  d = datetime(int(time_list[0]), int(time_list[1]),
122  int(time_list[2]), int(time_list[3][0:2]),
123  int(time_list[3][2:]))
124  record['IB'] = d
125 
126  # Sort the list.
127  list = sorted(dict["strips"], key=lambda k : k['IB'], reverse=True)
128 
129  # Check if there are NUM entries.
130  if num > len(list):
131  new_num = len(list)
132  sys.stderr.write(script_name + ': Warning: There are less than ' +\
133  str(num) + ' entries in json file. Changed number to ' +\
134  str(new_num) + '.\n')
135  num = new_num
136 
137  # The histograms.
138  ROOT.gROOT.SetStyle("Plain")
139  outdir='.'
140 
141  # Save in file
142  rootfilename=outdir + '/histograms.root'
143  myfile=ROOT.TFile(rootfilename, 'RECREATE')
144 
145  # Average time histogram.
146  histo1=ROOT.TH1F("AveCPU per IB", "Ave CPU per IB", num, 0., num)
147  histo1.SetTitle(cmsrelease + ": Showing last " + str(num) + " IBs")
148  histo1.SetName('avecpu_histo')
149 
150  # Maximum rss histogram.
151  histo2=ROOT.TH1F("Max rrs per IB", "Max rss per IB", num, 0., num)
152  histo2.SetTitle(cmsrelease + ": Showing last " + str(num) + " IBs")
153  histo2.SetName('maxrss_histo')
154 
155  # Fill in the histograms
156  for i in range(num):
157  datime = list[i]['IB'].__format__('%Y-%b-%d %H:%M')
158  average = list[i]['average']
159  max_rss = list[i]['max_rss']
160  error = list[i]['error']
161 
162  histo1.GetXaxis().SetBinLabel(num-i, datime)
163  histo1.SetBinContent(num-i, average)
164  histo1.SetBinError(num-i, error)
165  histo2.GetXaxis().SetBinLabel(num-i, datime)
166  histo2.SetBinContent(num-i, max_rss)
167 
168  histo1.SetStats(0)
169  histo1.GetYaxis().SetTitle("Average CPU time")
170  histo1.GetYaxis().SetTitleOffset(1.8)
171  histo1.GetXaxis().SetTitle("Integration Build")
172  histo1.GetXaxis().SetTitleOffset(4.)
173  histo1.GetXaxis().CenterTitle()
174  histo1.GetXaxis().LabelsOption('v')
175  # Histo1 - Set limits on the Y-axis
176  min, max = get_yaxis_range(list)
177  interval = max - min
178  # ...get a bit more space
179  min = min-interval*0.1
180  max = max+interval*0.1
181  histo1.GetYaxis().SetRangeUser(min, max)
182 
183  histo2.SetStats(0)
184  histo2.GetYaxis().SetTitle("Maximum rss")
185  histo2.GetYaxis().SetTitleOffset(1.8)
186  histo2.GetXaxis().SetTitle("Integration Build")
187  histo2.GetXaxis().SetTitleOffset(4.)
188  histo2.GetXaxis().CenterTitle()
189  histo2.GetXaxis().LabelsOption('v')
190 
191  # Draw and save!
192 
193  ave_canvas = ROOT.TCanvas(cmsrelease + '_average_canvas')
194  ave_canvas.SetGridy()
195  ave_canvas.SetBottomMargin(0.28)
196  ave_canvas.SetLeftMargin(0.18)
197  ave_canvas.cd()
198  # Histo1 - draw line
199  histo1.SetLineColor(2)
200  histo1.SetLineWidth(2)
201  histo1.DrawCopy("HISTO L")
202  # Histo1 - draw errors and markers
203  histo1.SetLineColor(1)
204  histo1.SetLineStyle(2)
205  histo1.SetLineWidth(1)
206  histo1.SetMarkerStyle(8)
207  histo1.SetMarkerSize(.6)
208  histo1.SetMarkerColor(1)
209  histo1.Draw("E1P SAME")
210  ROOT.gStyle.SetErrorX(0)
211  ave_canvas.Print(outdir + "/average_cpu_histo.png","png")
212 
213  rss_canvas = ROOT.TCanvas(cmsrelease + '_maxrss_canvas')
214  rss_canvas.SetGridy()
215  rss_canvas.SetBottomMargin(0.28)
216  rss_canvas.SetLeftMargin(0.18)
217  rss_canvas.cd()
218  # Histo2 - draw line
219  histo2.SetLineColor(2)
220  histo2.SetLineWidth(2)
221  histo2.DrawCopy("L")
222  # Histo2 - draw markers
223  histo2.SetMarkerStyle(8)
224  histo2.SetMarkerSize(.6)
225  histo2.SetMarkerColor(1)
226  histo2.Draw("P SAME")
227  rss_canvas.Print(outdir + "/maximum_rss_histo.png","png")
228 
229  # write them on file
230  histo1.Write()
231  ave_canvas.Write()
232  histo2.Write()
233  rss_canvas.Write()
234 
235 
236 ###########################################################################################
237 
238 if __name__ == '__main__':
239 
240  import optparse, stat
241 
242  ################################
243  # Definition of command usage. #
244  ################################
245  script_name= os.path.basename(__file__)
246  usage = script_name + ' <options> -t TIMELOG -m MEMLOG'
247  parser = optparse.OptionParser(usage)
248  parser.add_option('-t', '--timelog',
249  action='store',
250  dest='timelog',
251  default='',
252  metavar='TIMELOG',
253  help='input file TIMELOG, the output of cmsTiming_parser.py')
254  parser.add_option('-m', '--memlog',
255  action='store',
256  dest='memlog',
257  default='',
258  metavar='MEMLOG',
259  help='input file MEMLOG, the output of cmsSimplememchecker_parser.py')
260  parser.add_option('-j', '--jsonfile',
261  action='store',
262  dest='json_f',
263  default='strips.json',
264  metavar='FILE.JSON',
265  help='the .json file database')
266  parser.add_option('-n', type='int',
267  action='store',
268  dest='num',
269  default='30',
270  metavar='NUM',
271  help='last NUM entries to be printed in the strip charts. Default is 30.')
272  (options, args) = parser.parse_args()
273 
274  ######################################
275  # Some error handling for the usage. #
276  ######################################
277  if options.timelog == '' or\
278  options.memlog == '':
279  sys.exit('%s: Missing file operands!\n' % script_name+\
280  'Type %s --help for more information!' % script_name)
281  if not os.path.exists(options.timelog) or\
282  not os.path.exists(options.memlog):
283  sys.exit('%s: Error: Not present file(s)!' % script_name)
284 
285  #############################################
286  # Validity of .json file-database. #
287  #############################################
288 
289  # The format that the json file must have:
290  format = "\n { \"strips\" :\n" +\
291  " [\n {\"IB\" : \"XXX_XXX\", \"average\" : M, \"error\" : E \"max_rss\" : N},\n" +\
292  " .........................................\n" +\
293  " ]\n"+\
294  " }\n"
295 
296  # json file validity checks start under the try statement
297  json_db = open(options.json_f, "r+")
298  try:
299  # -check if the json file is empty; if yes, create a new database upon it
300  if os.stat(options.json_f)[stat.ST_SIZE] == 0:
301  sys.stderr.write(script_name + ': Warning: File \"' + options.json_f +\
302  '\" is empty. A new database will be created upon it.\n')
303  json_db.write("{\n \"strips\" : [\n ]\n}\n")
304  json_db.seek(0, 0)
305 
306  # -check if file loads as a valid json
307  dict = json.load(json_db)
308 
309  # -check if strips key is there.(Look format above!)
310  dict["strips"]
311 
312  # -check if value of strips is type of list
313  if not isinstance(dict["strips"], list):
314  raise Exception
315 
316  # -check if the list has valid elements
317  if dict["strips"]:
318  for item in dict["strips"]:
319  if not set(['IB', 'average', 'error', 'max_rss']).issubset(item):
320  raise KeyError
321  except ValueError:
322  sys.exit(script_name + ': Error: Not a valid json file! Please, check the format:\n' + format)
323  except KeyError:
324  sys.exit(script_name + ': Error: Invalid format in the json file! Check it here:\n' + format)
325  finally:
326  json_db.close()
327 
328  ####################
329  # Start operation. #
330  ####################
331 
332  # sys.exit() used in order to return an exit code to shell, in case of error
333  sys.exit(operate(options.timelog, options.memlog, options.json_f, options.num))
T min(T a, T b)
Definition: MathUtil.h:58
def get_yaxis_range(list)
double split
Definition: MVATrainer.cc:139
def operate(timelog, memlog, json_f, num)