CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
edmStreamStallGrapher.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 #----------------------------------------------
4 def printHelp():
5  s = """Purpose: Convert a cmsRun log with Tracer info into a stream stall graph.
6 
7 edmStreamStallGrapher [-g] <log file name>
8 
9 Option: -g instead of ascii art, create a pdf file showing the work being done on each stream
10 
11 To Use: Add the Tracer Service to the cmsRun job you want to check for stream stalls.
12  Make sure to use the 'printTimstamps' option
13  cms.Service("Tracer", printTimestamps = cms.untracked.bool(True))
14  After running the job, execute this script and pass the name of the log file to the
15  script as the only command line argument.
16 
17 To Read: The script will then print an 'ASCII art' stall graph which consists of the name of
18  the module which either started or stopped running on a stream, and the state of each
19  stream at that moment in time and if the module just started, you will also see the
20  amount of time on that stream between the previous module finishing and this module starting.
21  The state of a stream is represented by a symbol:
22  blank (" ") the stream is currently running a module
23  line ("|") the stream is waiting to run a module
24  minus ("-") the stream has just finished waiting and is starting a module
25  plus ("+") the stream just finished running a module
26  If a module had to wait more than 0.1 seconds, the end of the line will have "STALLED".
27  Once the first 4 events have finished processing, the program prints "FINISH INIT".
28  This is useful if one wants to ignore stalled caused by startup actions, e.g. reading
29  conditions.
30 
31  Once the graph is completed, the program outputs the list of modules which had
32  the greatest total stall times. The list is sorted by total stall time and
33  written in descending order. In addition, the list of all stall times for the
34  module is given.
35 """
36  print s
37 
38 
39 #----------------------------------------------
40 def getTime(line):
41  time = line.split(" ")[1]
42  time = time.split(":")
43  time = int(time[0])*60*60+int(time[1])*60+float(time[2])
44  return time
45 
46 #Stream states
47 kStarted=0
48 kFinished=1
49 
50 #----------------------------------------------
51 def readLogFile(fileName):
52  f = open(fileName,"r")
53 
54  processingSteps = list()
55  numStreams = 0
56  maxNameSize = 0
57  startTime = 0
58  foundEventToStartFrom = False
59  for l in f:
60  if not foundEventToStartFrom:
61  if l.find("event = 5") != -1:
62  foundEventToStartFrom = True
63  stream = int( l[l.find("stream = ")+9])
64  processingSteps.append(("FINISH INIT",kFinished,stream,getTime(l)-startTime,False))
65  if l.find("processing event for module") != -1:
66  time = getTime(l)
67  if startTime == 0:
68  startTime = time
69  time = time - startTime
70  trans = kStarted
71  stream = 0
72  delayed = False
73  if l.find("finished:") != -1:
74  trans = kFinished
75  stream = int( l[l.find("stream = ")+9])
76  name = l.split("'")[1]
77  if l.find("delayed") != -1:
78  delayed = True
79  if len(name) > maxNameSize:
80  maxNameSize = len(name)
81  processingSteps.append((name,trans,stream,time,delayed))
82  if stream > numStreams:
83  numStreams = stream
84  f.close()
85  return (processingSteps,numStreams,maxNameSize)
86 
87 
88 #----------------------------------------------
89 def findStalledModules(processingSteps, numStreams):
90  streamTime = [0]*(numStreams+1)
91  stalledModules = {}
92  for n,trans,s,time,delayed in processingSteps:
93  waitTime = None
94  if delayed:
95  n = "source"
96  if trans == kStarted:
97  waitTime = time - streamTime[s]
98  else:
99  streamTime[s] = time
100  if waitTime is not None:
101  if waitTime > 0.1:
102  t = stalledModules.setdefault(n,[])
103  t.append(waitTime)
104  return stalledModules
105 
106 
107 #----------------------------------------------
108 def createAsciiImage(processingSteps, numStreams, maxNameSize):
109  #print processingSteps
110  #exit(1)
111  streamState = [1]*(numStreams+1)
112  streamTime = [0]*(numStreams+1)
113  #lastTime = 0
114  seenInit = False
115  for n,trans,s,time,delayed in processingSteps:
116  if n == "FINISH INIT":
117  seenInit = True
118  continue
119  oldState = streamState[s]
120  streamState[s]=trans
121  waitTime = None
122  if delayed:
123  n = "source"
124  if trans == kStarted:
125  waitTime = time - streamTime[s]
126  streamState[s]=2
127  else:
128  if oldState != trans:
129  streamState[s]=3
130  streamTime[s] = time
131  states = "%-*s: " % (maxNameSize,n)
132  for state in streamState:
133  if state == 0:
134  states +=" "
135  elif state == 1:
136  states +="|"
137  elif state == 2:
138  states +="-"
139  elif state == 3:
140  states +="+"
141  if waitTime is not None:
142  states += " %.2f"% waitTime
143  if waitTime > 0.1 and seenInit:
144  states += " STALLED"
145 
146  print states
147  streamState[s]=trans
148  return stalledModules
149 
150 #----------------------------------------------
151 def printStalledModulesInOrder(stalledModules):
152  priorities = list()
153  maxNameSize = 0
154  for n,t in stalledModules.iteritems():
155  nameLength = len(n)
156  if nameLength > maxNameSize:
157  maxNameSize = nameLength
158  t.sort(reverse=True)
159  priorities.append((n,sum(t),t))
160 
161  def sumSort(i,j):
162  return cmp(i[1],j[1])
163  priorities.sort(cmp=sumSort, reverse=True)
164 
165  nameColumn = "Stalled Module"
166  if len(nameColumn) > maxNameSize:
167  maxNameSize = len(nameColumn)
168 
169  stallColumn = "Tot Stall Time"
170  stallColumnLength = len(stallColumn)
171 
172  print "%-*s" % (maxNameSize, nameColumn), "%-*s"%(stallColumnLength,stallColumn), " Stall Times"
173  for n,s,t in priorities:
174  paddedName = "%-*s:" % (maxNameSize,n)
175  print paddedName, "%-*.2f"%(stallColumnLength,s), ", ".join([ "%.2f"%x for x in t])
176 
177 
178 def createPDFImage(processingSteps, numStreams, stalledModuleInfo):
179  import matplotlib.pyplot as plt
180 
181  streamStartDepth = [0]*(numStreams+1)
182  streamTime = [0]*(numStreams+1)
183  streamStartTimes = [ [] for x in xrange(numStreams+1)]
184  streamColors = [[] for x in xrange(numStreams+1)]
185 
186  stalledModuleNames = [ x for x in stalledModuleInfo.iterkeys()]
187 
188  streamStartTimes = [ [] for x in xrange(numStreams+1)]
189  streamColors = [[] for x in xrange(numStreams+1)]
190 
191  for n,trans,s,time,delayed in processingSteps:
192  if n == "FINISH INIT":
193  continue
194  if trans == kStarted:
195  streamStartDepth[s] +=1
196  streamTime[s] = time
197  else:
198  streamStartDepth[s] -=1
199  if 0 == streamStartDepth[s]:
200  streamStartTimes[s].append((streamTime[s],time-streamTime[s]))
201  c="green"
202  if delayed:
203  c="orange"
204  elif n in stalledModuleNames:
205  c="red"
206  #elif len(streamColors[s]) %2:
207  # c="blue"
208  streamColors[s].append(c)
209 
210  #consolodate contiguous blocks with the same color
211  # this drastically reduces the size of the pdf file
212  oldStreamTimes = streamStartTimes
213  oldStreamColors = streamColors
214 
215  streamStartTimes = [ [] for x in xrange(numStreams+1)]
216  streamColors = [[] for x in xrange(numStreams+1)]
217 
218  for s in xrange(numStreams+1):
219  streamStartTimes[s].append(oldStreamTimes[s][0])
220  streamColors[s].append(oldStreamColors[s][0])
221  lastStartTime,lastTimeLength = oldStreamTimes[s][0]
222  lastColor = oldStreamColors[s][0]
223  for i in xrange(1, len(oldStreamTimes[s])):
224  start,length = oldStreamTimes[s][i]
225  color = oldStreamColors[s][i]
226  #use a millisecond tolerance to avoid rounding
227  if color == lastColor and abs(lastStartTime+lastTimeLength-start)<0.001:
228  lastTimeLength += length
229  else:
230  streamStartTimes[s].append((lastStartTime,lastTimeLength))
231  streamColors[s].append(lastColor)
232  lastStartTime = start
233  lastTimeLength = length
234  lastColor = color
235  streamStartTimes[s].append((lastStartTime,lastTimeLength))
236  streamColors[s].append(lastColor)
237 
238  fig, ax = plt.subplots()
239  ax.set_xlabel("Time (sec)")
240  ax.set_ylabel("Stream ID")
241 
242  i=1
243  for s in xrange(numStreams+1):
244  t = streamStartTimes[s]
245  ax.broken_barh(t,(i-0.4,0.8),facecolors=streamColors[s],edgecolors=streamColors[s],linewidth=0)
246  i=i+1
247 
248  #add key .1, .3, .7
249  fig.text(0.1, 0.95, "modules running", color = "green", horizontalalignment = 'left')
250  fig.text(0.5, 0.95, "stalled module running", color = "red", horizontalalignment = 'center')
251  fig.text(0.9, 0.95, "read from input", color = "orange", horizontalalignment = 'right')
252  plt.savefig("stall.pdf")
253 
254 
255 
256 #=======================================
257 if __name__=="__main__":
258  import sys
259 
260  if len(sys.argv) == 1:
261  printHelp()
262  exit(0)
263 
264  doGraphic = False
265  if len(sys.argv) == 3:
266  if sys.argv[1] == '-g':
267  doGraphic = True
268  else:
269  print "unknown argument ",sys.argv[1]
270  exit(-1)
271  fileName =sys.argv[-1]
272 
273  processingSteps,numStreams,maxNameSize = readLogFile(sys.argv[-1])
274  stalledModules = findStalledModules(processingSteps, numStreams)
275  if not doGraphic:
276  createAsciiImage(processingSteps, numStreams, maxNameSize)
277  else:
278  createPDFImage(processingSteps, numStreams, stalledModules)
279  printStalledModulesInOrder(stalledModules)
280 
boost::dynamic_bitset append(const boost::dynamic_bitset<> &bs1, const boost::dynamic_bitset<> &bs2)
this method takes two bitsets bs1 and bs2 and returns result of bs2 appended to the end of bs1 ...
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run