CMS 3D CMS Logo

cmsPerfStripChart.py
Go to the documentation of this file.
1 #! /usr/bin/env python
2 
3 from __future__ import print_function
4 import os, sys
5 try: import simplejson as json
6 except ImportError: import json
7 
8 # Helper functions
9 def get_yaxis_range(list):
10  """
11  Given a list of dictionaries, where each dict holds the information
12  about an IB, this function returns a tuple (low, high) with the lowest
13  and the highest value of y-axis, respectively.
14  """
15  low, high = sys.maxsize, -1
16  for node in list:
17  low = min((node['average'] - node['error']), low)
18  high = max((node['average'] + node['error']), high)
19  return (low, high)
20 
21 # Main operation function
22 def operate(timelog, memlog, json_f, num):
23  """
24  Main operation of the script (i.e. json db update, histograms' creation)
25  with respect to the specifications of all the files & formats concerned.
26  """
27  import re
28  import commands
29  import ROOT
30  from datetime import datetime
31 
32  script_name=os.path.basename(__file__)
33 
34  # Open files and store the lines.
35  timefile=open(timelog, 'r')
36  timelog_lines=timefile.readlines()
37  timefile.close()
38 
39  memfile=open(memlog, 'r')
40  memlog_lines=memfile.readlines()
41  memfile.close()
42 
43  # Get average, uncertainty of average and maximum rss.
44  max_rss=average=error=' '
45  i=0
46  while i<len(timelog_lines):
47  line=timelog_lines[i]
48  if 'Uncertainty of Average Time' in line:
49  line=line[:-1]
50  line_list=line.split(' ')
51  average=line_list[5]
52  error=line_list[7]
53  i+=1
54  i=0
55  while i<len(memlog_lines):
56  line=memlog_lines[i]
57  if 'Maximum rss' in line:
58  line=line[:-1]
59  line_list=line.split(' ')
60  max_rss=line_list[3]
61  break
62  i+=1
63 
64  # Get Integration Build's identifier
65  IB=os.path.basename(commands.getoutput("echo $CMSSW_BASE"))
66 
67  # Check if log files were parsed properly...
68  # and if the IB is valid using regular expressions.
69  try:
70  # regex for a float
71  regex="^\d+\.?\d*$"
72  if average == ' ' or re.match(regex, average) is None:
73  raise RuntimeError('Could not parse \"' + timelog + '\" properly. ' +\
74  'Check if Average Time is defined correctly.')
75  if error == ' ' or re.match(regex, error) is None:
76  raise RuntimeError('Could not parse \"' + timelog + '\" properly. ' +\
77  'Check if Uncertainty of Average Time is defined correctly.')
78  if max_rss == ' ' or re.match(regex, max_rss) is None:
79  raise RuntimeError('Could not parse \"' + memlog + '\" properly. ' +\
80  ' Check if Maximum rss is defined correct.')
81 
82  # regex for dates 'YYYY-MM-DD-HHMM'
83  regex = '(19|20|21)\d\d-(0[1-9]|1[012])-(0[1-9]|[12]'+\
84  '[0-9]|3[01])-([01][0-9]|2[0-4])([0-5][0-9])$'
85  if re.search(regex, IB) is None:
86  raise RuntimeError('Not a valid IB. Valid IB: ' +\
87  '[CMSSW_X_X_X_YYYY-MM-DD-HHMM]')
88  except Exception as err:
89  sys.stderr.write(script_name + ': Error: ' + str(err) + '\n')
90  return 2
91 
92  # Open for getting the data.
93  json_db=open(json_f, "r")
94  dict=json.load(json_db)
95  json_db.close()
96 
97  # Get the data to be stored and check if already exists.
98  ib_list=IB.split('_')
99  cmsrelease=ib_list[0] + '_' + ib_list[1] +\
100  '_' + ib_list[2] + '_' + ib_list[3]
101  data={"IB" : ib_list[4], "average" : float(average), "error" : float(error), "max_rss" : float(max_rss)}
102 
103  if data in dict["strips"]:
104  sys.stderr.write(script_name + ": Warning: Entry already exists " +\
105  "in json file and will not be stored! " +\
106  "Only the strip charts will be created.\n")
107  else:
108  dict["strips"].append(data)
109  print('Storing entry to \"' + json_f +\
110  '\" file with attribute values:\n' +\
111  'IB=' + IB + '\naverage=' + average +\
112  '\nUncertainty of average=' + error +'\nmax_rss=' + max_rss)
113  # Store the data in json file.
114  json_db = open(json_f, "w+")
115  json.dump(dict, json_db, indent=2)
116  json_db.close()
117  print('File "' + json_f + '" was updated successfully!')
118 
119  # Change to datetime type (helpful for sorting).
120  for record in dict["strips"]:
121  time_list = record['IB'].split('-')
122  d = datetime(int(time_list[0]), int(time_list[1]),
123  int(time_list[2]), int(time_list[3][0:2]),
124  int(time_list[3][2:]))
125  record['IB'] = d
126 
127  # Sort the list.
128  list = sorted(dict["strips"], key=lambda k : k['IB'], reverse=True)
129 
130  # Check if there are NUM entries.
131  if num > len(list):
132  new_num = len(list)
133  sys.stderr.write(script_name + ': Warning: There are less than ' +\
134  str(num) + ' entries in json file. Changed number to ' +\
135  str(new_num) + '.\n')
136  num = new_num
137 
138  # The histograms.
139  ROOT.gROOT.SetStyle("Plain")
140  outdir='.'
141 
142  # Save in file
143  rootfilename=outdir + '/histograms.root'
144  myfile=ROOT.TFile(rootfilename, 'RECREATE')
145 
146  # Average time histogram.
147  histo1=ROOT.TH1F("AveCPU per IB", "Ave CPU per IB", num, 0., num)
148  histo1.SetTitle(cmsrelease + ": Showing last " + str(num) + " IBs")
149  histo1.SetName('avecpu_histo')
150 
151  # Maximum rss histogram.
152  histo2=ROOT.TH1F("Max rrs per IB", "Max rss per IB", num, 0., num)
153  histo2.SetTitle(cmsrelease + ": Showing last " + str(num) + " IBs")
154  histo2.SetName('maxrss_histo')
155 
156  # Fill in the histograms
157  for i in range(num):
158  datime = list[i]['IB'].__format__('%Y-%b-%d %H:%M')
159  average = list[i]['average']
160  max_rss = list[i]['max_rss']
161  error = list[i]['error']
162 
163  histo1.GetXaxis().SetBinLabel(num-i, datime)
164  histo1.SetBinContent(num-i, average)
165  histo1.SetBinError(num-i, error)
166  histo2.GetXaxis().SetBinLabel(num-i, datime)
167  histo2.SetBinContent(num-i, max_rss)
168 
169  histo1.SetStats(0)
170  histo1.GetYaxis().SetTitle("Average CPU time")
171  histo1.GetYaxis().SetTitleOffset(1.8)
172  histo1.GetXaxis().SetTitle("Integration Build")
173  histo1.GetXaxis().SetTitleOffset(4.)
174  histo1.GetXaxis().CenterTitle()
175  histo1.GetXaxis().LabelsOption('v')
176  # Histo1 - Set limits on the Y-axis
177  min, max = get_yaxis_range(list)
178  interval = max - min
179  # ...get a bit more space
180  min = min-interval*0.1
181  max = max+interval*0.1
182  histo1.GetYaxis().SetRangeUser(min, max)
183 
184  histo2.SetStats(0)
185  histo2.GetYaxis().SetTitle("Maximum rss")
186  histo2.GetYaxis().SetTitleOffset(1.8)
187  histo2.GetXaxis().SetTitle("Integration Build")
188  histo2.GetXaxis().SetTitleOffset(4.)
189  histo2.GetXaxis().CenterTitle()
190  histo2.GetXaxis().LabelsOption('v')
191 
192  # Draw and save!
193 
194  ave_canvas = ROOT.TCanvas(cmsrelease + '_average_canvas')
195  ave_canvas.SetGridy()
196  ave_canvas.SetBottomMargin(0.28)
197  ave_canvas.SetLeftMargin(0.18)
198  ave_canvas.cd()
199  # Histo1 - draw line
200  histo1.SetLineColor(2)
201  histo1.SetLineWidth(2)
202  histo1.DrawCopy("HISTO L")
203  # Histo1 - draw errors and markers
204  histo1.SetLineColor(1)
205  histo1.SetLineStyle(2)
206  histo1.SetLineWidth(1)
207  histo1.SetMarkerStyle(8)
208  histo1.SetMarkerSize(.6)
209  histo1.SetMarkerColor(1)
210  histo1.Draw("E1P SAME")
211  ROOT.gStyle.SetErrorX(0)
212  ave_canvas.Print(outdir + "/average_cpu_histo.png","png")
213 
214  rss_canvas = ROOT.TCanvas(cmsrelease + '_maxrss_canvas')
215  rss_canvas.SetGridy()
216  rss_canvas.SetBottomMargin(0.28)
217  rss_canvas.SetLeftMargin(0.18)
218  rss_canvas.cd()
219  # Histo2 - draw line
220  histo2.SetLineColor(2)
221  histo2.SetLineWidth(2)
222  histo2.DrawCopy("L")
223  # Histo2 - draw markers
224  histo2.SetMarkerStyle(8)
225  histo2.SetMarkerSize(.6)
226  histo2.SetMarkerColor(1)
227  histo2.Draw("P SAME")
228  rss_canvas.Print(outdir + "/maximum_rss_histo.png","png")
229 
230  # write them on file
231  histo1.Write()
232  ave_canvas.Write()
233  histo2.Write()
234  rss_canvas.Write()
235 
236 
237 ###########################################################################################
238 
239 if __name__ == '__main__':
240 
241  import optparse, stat
242 
243  ################################
244  # Definition of command usage. #
245  ################################
246  script_name= os.path.basename(__file__)
247  usage = script_name + ' <options> -t TIMELOG -m MEMLOG'
248  parser = optparse.OptionParser(usage)
249  parser.add_option('-t', '--timelog',
250  action='store',
251  dest='timelog',
252  default='',
253  metavar='TIMELOG',
254  help='input file TIMELOG, the output of cmsTiming_parser.py')
255  parser.add_option('-m', '--memlog',
256  action='store',
257  dest='memlog',
258  default='',
259  metavar='MEMLOG',
260  help='input file MEMLOG, the output of cmsSimplememchecker_parser.py')
261  parser.add_option('-j', '--jsonfile',
262  action='store',
263  dest='json_f',
264  default='strips.json',
265  metavar='FILE.JSON',
266  help='the .json file database')
267  parser.add_option('-n', type='int',
268  action='store',
269  dest='num',
270  default='30',
271  metavar='NUM',
272  help='last NUM entries to be printed in the strip charts. Default is 30.')
273  (options, args) = parser.parse_args()
274 
275  ######################################
276  # Some error handling for the usage. #
277  ######################################
278  if options.timelog == '' or\
279  options.memlog == '':
280  sys.exit('%s: Missing file operands!\n' % script_name+\
281  'Type %s --help for more information!' % script_name)
282  if not os.path.exists(options.timelog) or\
283  not os.path.exists(options.memlog):
284  sys.exit('%s: Error: Not present file(s)!' % script_name)
285 
286  #############################################
287  # Validity of .json file-database. #
288  #############################################
289 
290  # The format that the json file must have:
291  format = "\n { \"strips\" :\n" +\
292  " [\n {\"IB\" : \"XXX_XXX\", \"average\" : M, \"error\" : E \"max_rss\" : N},\n" +\
293  " .........................................\n" +\
294  " ]\n"+\
295  " }\n"
296 
297  # json file validity checks start under the try statement
298  json_db = open(options.json_f, "r+")
299  try:
300  # -check if the json file is empty; if yes, create a new database upon it
301  if os.stat(options.json_f)[stat.ST_SIZE] == 0:
302  sys.stderr.write(script_name + ': Warning: File \"' + options.json_f +\
303  '\" is empty. A new database will be created upon it.\n')
304  json_db.write("{\n \"strips\" : [\n ]\n}\n")
305  json_db.seek(0, 0)
306 
307  # -check if file loads as a valid json
308  dict = json.load(json_db)
309 
310  # -check if strips key is there.(Look format above!)
311  dict["strips"]
312 
313  # -check if value of strips is type of list
314  if not isinstance(dict["strips"], list):
315  raise Exception
316 
317  # -check if the list has valid elements
318  if dict["strips"]:
319  for item in dict["strips"]:
320  if not set(['IB', 'average', 'error', 'max_rss']).issubset(item):
321  raise KeyError
322  except ValueError:
323  sys.exit(script_name + ': Error: Not a valid json file! Please, check the format:\n' + format)
324  except KeyError:
325  sys.exit(script_name + ': Error: Invalid format in the json file! Check it here:\n' + format)
326  finally:
327  json_db.close()
328 
329  ####################
330  # Start operation. #
331  ####################
332 
333  # sys.exit() used in order to return an exit code to shell, in case of error
334  sys.exit(operate(options.timelog, options.memlog, options.json_f, options.num))
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:65
T min(T a, T b)
Definition: MathUtil.h:58
def get_yaxis_range(list)
#define str(s)
double split
Definition: MVATrainer.cc:139
def operate(timelog, memlog, json_f, num)