CMS 3D CMS Logo

cmsPerfStripChart.py
Go to the documentation of this file.
1 #! /usr/bin/env python
2 
3 from __future__ import print_function
4 from builtins import range
5 import os, sys
6 try: import simplejson as json
7 except ImportError: import json
8 
9 # Helper functions
10 def get_yaxis_range(list):
11  """
12  Given a list of dictionaries, where each dict holds the information
13  about an IB, this function returns a tuple (low, high) with the lowest
14  and the highest value of y-axis, respectively.
15  """
16  low, high = sys.maxsize, -1
17  for node in list:
18  low = min((node['average'] - node['error']), low)
19  high = max((node['average'] + node['error']), high)
20  return (low, high)
21 
22 # Main operation function
23 def operate(timelog, memlog, json_f, num):
24  """
25  Main operation of the script (i.e. json db update, histograms' creation)
26  with respect to the specifications of all the files & formats concerned.
27  """
28  import re
29  import commands
30  import ROOT
31  from datetime import datetime
32 
33  script_name=os.path.basename(__file__)
34 
35  # Open files and store the lines.
36  timefile=open(timelog, 'r')
37  timelog_lines=timefile.readlines()
38  timefile.close()
39 
40  memfile=open(memlog, 'r')
41  memlog_lines=memfile.readlines()
42  memfile.close()
43 
44  # Get average, uncertainty of average and maximum rss.
45  max_rss=average=error=' '
46  i=0
47  while i<len(timelog_lines):
48  line=timelog_lines[i]
49  if 'Uncertainty of Average Time' in line:
50  line=line[:-1]
51  line_list=line.split(' ')
52  average=line_list[5]
53  error=line_list[7]
54  i+=1
55  i=0
56  while i<len(memlog_lines):
57  line=memlog_lines[i]
58  if 'Maximum rss' in line:
59  line=line[:-1]
60  line_list=line.split(' ')
61  max_rss=line_list[3]
62  break
63  i+=1
64 
65  # Get Integration Build's identifier
66  IB=os.path.basename(commands.getoutput("echo $CMSSW_BASE"))
67 
68  # Check if log files were parsed properly...
69  # and if the IB is valid using regular expressions.
70  try:
71  # regex for a float
72  regex="^\d+\.?\d*$"
73  if average == ' ' or re.match(regex, average) is None:
74  raise RuntimeError('Could not parse \"' + timelog + '\" properly. ' +\
75  'Check if Average Time is defined correctly.')
76  if error == ' ' or re.match(regex, error) is None:
77  raise RuntimeError('Could not parse \"' + timelog + '\" properly. ' +\
78  'Check if Uncertainty of Average Time is defined correctly.')
79  if max_rss == ' ' or re.match(regex, max_rss) is None:
80  raise RuntimeError('Could not parse \"' + memlog + '\" properly. ' +\
81  ' Check if Maximum rss is defined correct.')
82 
83  # regex for dates 'YYYY-MM-DD-HHMM'
84  regex = '(19|20|21)\d\d-(0[1-9]|1[012])-(0[1-9]|[12]'+\
85  '[0-9]|3[01])-([01][0-9]|2[0-4])([0-5][0-9])$'
86  if re.search(regex, IB) is None:
87  raise RuntimeError('Not a valid IB. Valid IB: ' +\
88  '[CMSSW_X_X_X_YYYY-MM-DD-HHMM]')
89  except Exception as err:
90  sys.stderr.write(script_name + ': Error: ' + str(err) + '\n')
91  return 2
92 
93  # Open for getting the data.
94  json_db=open(json_f, "r")
95  dict=json.load(json_db)
96  json_db.close()
97 
98  # Get the data to be stored and check if already exists.
99  ib_list=IB.split('_')
100  cmsrelease=ib_list[0] + '_' + ib_list[1] +\
101  '_' + ib_list[2] + '_' + ib_list[3]
102  data={"IB" : ib_list[4], "average" : float(average), "error" : float(error), "max_rss" : float(max_rss)}
103 
104  if data in dict["strips"]:
105  sys.stderr.write(script_name + ": Warning: Entry already exists " +\
106  "in json file and will not be stored! " +\
107  "Only the strip charts will be created.\n")
108  else:
109  dict["strips"].append(data)
110  print('Storing entry to \"' + json_f +\
111  '\" file with attribute values:\n' +\
112  'IB=' + IB + '\naverage=' + average +\
113  '\nUncertainty of average=' + error +'\nmax_rss=' + max_rss)
114  # Store the data in json file.
115  json_db = open(json_f, "w+")
116  json.dump(dict, json_db, indent=2)
117  json_db.close()
118  print('File "' + json_f + '" was updated successfully!')
119 
120  # Change to datetime type (helpful for sorting).
121  for record in dict["strips"]:
122  time_list = record['IB'].split('-')
123  d = datetime(int(time_list[0]), int(time_list[1]),
124  int(time_list[2]), int(time_list[3][0:2]),
125  int(time_list[3][2:]))
126  record['IB'] = d
127 
128  # Sort the list.
129  list = sorted(dict["strips"], key=lambda k : k['IB'], reverse=True)
130 
131  # Check if there are NUM entries.
132  if num > len(list):
133  new_num = len(list)
134  sys.stderr.write(script_name + ': Warning: There are less than ' +\
135  str(num) + ' entries in json file. Changed number to ' +\
136  str(new_num) + '.\n')
137  num = new_num
138 
139  # The histograms.
140  ROOT.gROOT.SetStyle("Plain")
141  outdir='.'
142 
143  # Save in file
144  rootfilename=outdir + '/histograms.root'
145  myfile=ROOT.TFile(rootfilename, 'RECREATE')
146 
147  # Average time histogram.
148  histo1=ROOT.TH1F("AveCPU per IB", "Ave CPU per IB", num, 0., num)
149  histo1.SetTitle(cmsrelease + ": Showing last " + str(num) + " IBs")
150  histo1.SetName('avecpu_histo')
151 
152  # Maximum rss histogram.
153  histo2=ROOT.TH1F("Max rrs per IB", "Max rss per IB", num, 0., num)
154  histo2.SetTitle(cmsrelease + ": Showing last " + str(num) + " IBs")
155  histo2.SetName('maxrss_histo')
156 
157  # Fill in the histograms
158  for i in range(num):
159  datime = list[i]['IB'].__format__('%Y-%b-%d %H:%M')
160  average = list[i]['average']
161  max_rss = list[i]['max_rss']
162  error = list[i]['error']
163 
164  histo1.GetXaxis().SetBinLabel(num-i, datime)
165  histo1.SetBinContent(num-i, average)
166  histo1.SetBinError(num-i, error)
167  histo2.GetXaxis().SetBinLabel(num-i, datime)
168  histo2.SetBinContent(num-i, max_rss)
169 
170  histo1.SetStats(0)
171  histo1.GetYaxis().SetTitle("Average CPU time")
172  histo1.GetYaxis().SetTitleOffset(1.8)
173  histo1.GetXaxis().SetTitle("Integration Build")
174  histo1.GetXaxis().SetTitleOffset(4.)
175  histo1.GetXaxis().CenterTitle()
176  histo1.GetXaxis().LabelsOption('v')
177  # Histo1 - Set limits on the Y-axis
178  min, max = get_yaxis_range(list)
179  interval = max - min
180  # ...get a bit more space
181  min = min-interval*0.1
182  max = max+interval*0.1
183  histo1.GetYaxis().SetRangeUser(min, max)
184 
185  histo2.SetStats(0)
186  histo2.GetYaxis().SetTitle("Maximum rss")
187  histo2.GetYaxis().SetTitleOffset(1.8)
188  histo2.GetXaxis().SetTitle("Integration Build")
189  histo2.GetXaxis().SetTitleOffset(4.)
190  histo2.GetXaxis().CenterTitle()
191  histo2.GetXaxis().LabelsOption('v')
192 
193  # Draw and save!
194 
195  ave_canvas = ROOT.TCanvas(cmsrelease + '_average_canvas')
196  ave_canvas.SetGridy()
197  ave_canvas.SetBottomMargin(0.28)
198  ave_canvas.SetLeftMargin(0.18)
199  ave_canvas.cd()
200  # Histo1 - draw line
201  histo1.SetLineColor(2)
202  histo1.SetLineWidth(2)
203  histo1.DrawCopy("HISTO L")
204  # Histo1 - draw errors and markers
205  histo1.SetLineColor(1)
206  histo1.SetLineStyle(2)
207  histo1.SetLineWidth(1)
208  histo1.SetMarkerStyle(8)
209  histo1.SetMarkerSize(.6)
210  histo1.SetMarkerColor(1)
211  histo1.Draw("E1P SAME")
212  ROOT.gStyle.SetErrorX(0)
213  ave_canvas.Print(outdir + "/average_cpu_histo.png","png")
214 
215  rss_canvas = ROOT.TCanvas(cmsrelease + '_maxrss_canvas')
216  rss_canvas.SetGridy()
217  rss_canvas.SetBottomMargin(0.28)
218  rss_canvas.SetLeftMargin(0.18)
219  rss_canvas.cd()
220  # Histo2 - draw line
221  histo2.SetLineColor(2)
222  histo2.SetLineWidth(2)
223  histo2.DrawCopy("L")
224  # Histo2 - draw markers
225  histo2.SetMarkerStyle(8)
226  histo2.SetMarkerSize(.6)
227  histo2.SetMarkerColor(1)
228  histo2.Draw("P SAME")
229  rss_canvas.Print(outdir + "/maximum_rss_histo.png","png")
230 
231  # write them on file
232  histo1.Write()
233  ave_canvas.Write()
234  histo2.Write()
235  rss_canvas.Write()
236 
237 
238 ###########################################################################################
239 
240 if __name__ == '__main__':
241 
242  import optparse, stat
243 
244  ################################
245  # Definition of command usage. #
246  ################################
247  script_name= os.path.basename(__file__)
248  usage = script_name + ' <options> -t TIMELOG -m MEMLOG'
249  parser = optparse.OptionParser(usage)
250  parser.add_option('-t', '--timelog',
251  action='store',
252  dest='timelog',
253  default='',
254  metavar='TIMELOG',
255  help='input file TIMELOG, the output of cmsTiming_parser.py')
256  parser.add_option('-m', '--memlog',
257  action='store',
258  dest='memlog',
259  default='',
260  metavar='MEMLOG',
261  help='input file MEMLOG, the output of cmsSimplememchecker_parser.py')
262  parser.add_option('-j', '--jsonfile',
263  action='store',
264  dest='json_f',
265  default='strips.json',
266  metavar='FILE.JSON',
267  help='the .json file database')
268  parser.add_option('-n', type='int',
269  action='store',
270  dest='num',
271  default='30',
272  metavar='NUM',
273  help='last NUM entries to be printed in the strip charts. Default is 30.')
274  (options, args) = parser.parse_args()
275 
276  ######################################
277  # Some error handling for the usage. #
278  ######################################
279  if options.timelog == '' or\
280  options.memlog == '':
281  sys.exit('%s: Missing file operands!\n' % script_name+\
282  'Type %s --help for more information!' % script_name)
283  if not os.path.exists(options.timelog) or\
284  not os.path.exists(options.memlog):
285  sys.exit('%s: Error: Not present file(s)!' % script_name)
286 
287  #############################################
288  # Validity of .json file-database. #
289  #############################################
290 
291  # The format that the json file must have:
292  format = "\n { \"strips\" :\n" +\
293  " [\n {\"IB\" : \"XXX_XXX\", \"average\" : M, \"error\" : E \"max_rss\" : N},\n" +\
294  " .........................................\n" +\
295  " ]\n"+\
296  " }\n"
297 
298  # json file validity checks start under the try statement
299  json_db = open(options.json_f, "r+")
300  try:
301  # -check if the json file is empty; if yes, create a new database upon it
302  if os.stat(options.json_f)[stat.ST_SIZE] == 0:
303  sys.stderr.write(script_name + ': Warning: File \"' + options.json_f +\
304  '\" is empty. A new database will be created upon it.\n')
305  json_db.write("{\n \"strips\" : [\n ]\n}\n")
306  json_db.seek(0, 0)
307 
308  # -check if file loads as a valid json
309  dict = json.load(json_db)
310 
311  # -check if strips key is there.(Look format above!)
312  dict["strips"]
313 
314  # -check if value of strips is type of list
315  if not isinstance(dict["strips"], list):
316  raise Exception
317 
318  # -check if the list has valid elements
319  if dict["strips"]:
320  for item in dict["strips"]:
321  if not set(['IB', 'average', 'error', 'max_rss']).issubset(item):
322  raise KeyError
323  except ValueError:
324  sys.exit(script_name + ': Error: Not a valid json file! Please, check the format:\n' + format)
325  except KeyError:
326  sys.exit(script_name + ': Error: Invalid format in the json file! Check it here:\n' + format)
327  finally:
328  json_db.close()
329 
330  ####################
331  # Start operation. #
332  ####################
333 
334  # sys.exit() used in order to return an exit code to shell, in case of error
335  sys.exit(operate(options.timelog, options.memlog, options.json_f, options.num))
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
T min(T a, T b)
Definition: MathUtil.h:58
#define str(s)
double split
Definition: MVATrainer.cc:139
def operate(timelog, memlog, json_f, num)