5 s =
"""Purpose: Convert a cmsRun log with Tracer info into a stream stall graph.
7 edmStreamStallGrapher [-g] <log file name>
9 Option: -g instead of ascii art, create a pdf file showing the work being done on each stream
11 To Use: Add the Tracer Service to the cmsRun job you want to check for stream stalls.
12 Make sure to use the 'printTimstamps' option
13 cms.Service("Tracer", printTimestamps = cms.untracked.bool(True))
14 After running the job, execute this script and pass the name of the log file to the
15 script as the only command line argument.
17 To Read: The script will then print an 'ASCII art' stall graph which consists of the name of
18 the module which either started or stopped running on a stream, and the state of each
19 stream at that moment in time and if the module just started, you will also see the
20 amount of time on that stream between the previous module finishing and this module starting.
21 The state of a stream is represented by a symbol:
22 blank (" ") the stream is currently running a module
23 line ("|") the stream is waiting to run a module
24 minus ("-") the stream has just finished waiting and is starting a module
25 plus ("+") the stream just finished running a module
26 If a module had to wait more than 0.1 seconds, the end of the line will have "STALLED".
27 Once the first 4 events have finished processing, the program prints "FINISH INIT".
28 This is useful if one wants to ignore stalled caused by startup actions, e.g. reading
31 Once the graph is completed, the program outputs the list of modules which had
32 the greatest total stall times. The list is sorted by total stall time and
33 written in descending order. In addition, the list of all stall times for the
41 time = line.split(
" ")[1]
42 time = time.split(
":")
43 time = int(time[0])*60*60+int(time[1])*60+float(time[2])
52 f = open(fileName,
"r")
54 processingSteps = list()
58 foundEventToStartFrom = False
60 if not foundEventToStartFrom:
61 if l.find(
"event = 5") != -1:
62 foundEventToStartFrom =
True
63 stream = int( l[l.find(
"stream = ")+9])
64 processingSteps.append((
"FINISH INIT",kFinished,stream,
getTime(l)-startTime,
False))
65 if l.find(
"processing event for module") != -1:
69 time = time - startTime
73 if l.find(
"finished:") != -1:
75 stream = int( l[l.find(
"stream = ")+9])
76 name = l.split(
"'")[1]
77 if l.find(
"delayed") != -1:
79 if len(name) > maxNameSize:
80 maxNameSize = len(name)
81 processingSteps.append((name,trans,stream,time,delayed))
82 if stream > numStreams:
85 return (processingSteps,numStreams,maxNameSize)
90 streamTime = [0]*(numStreams+1)
92 for n,trans,s,time,delayed
in processingSteps:
97 waitTime = time - streamTime[s]
100 if waitTime
is not None:
102 t = stalledModules.setdefault(n,[])
104 return stalledModules
111 streamState = [1]*(numStreams+1)
112 streamTime = [0]*(numStreams+1)
115 for n,trans,s,time,delayed
in processingSteps:
116 if n ==
"FINISH INIT":
119 oldState = streamState[s]
124 if trans == kStarted:
125 waitTime = time - streamTime[s]
128 if oldState != trans:
131 states =
"%-*s: " % (maxNameSize,n)
132 for state
in streamState:
141 if waitTime
is not None:
142 states +=
" %.2f"% waitTime
143 if waitTime > 0.1
and seenInit:
148 return stalledModules
154 for n,t
in stalledModules.iteritems():
156 if nameLength > maxNameSize:
157 maxNameSize = nameLength
159 priorities.append((n,sum(t),t))
162 return cmp(i[1],j[1])
163 priorities.sort(cmp=sumSort, reverse=
True)
165 nameColumn =
"Stalled Module"
166 if len(nameColumn) > maxNameSize:
167 maxNameSize = len(nameColumn)
169 stallColumn =
"Tot Stall Time"
170 stallColumnLength = len(stallColumn)
172 print "%-*s" % (maxNameSize, nameColumn),
"%-*s"%(stallColumnLength,stallColumn),
" Stall Times"
173 for n,s,t
in priorities:
174 paddedName =
"%-*s:" % (maxNameSize,n)
175 print paddedName,
"%-*.2f"%(stallColumnLength,s),
", ".
join([
"%.2f"%x
for x
in t])
179 import matplotlib.pyplot
as plt
181 streamStartDepth = [0]*(numStreams+1)
182 streamTime = [0]*(numStreams+1)
183 streamStartTimes = [ []
for x
in xrange(numStreams+1)]
184 streamColors = [[]
for x
in xrange(numStreams+1)]
186 stalledModuleNames = [ x
for x
in stalledModuleInfo.iterkeys()]
188 streamStartTimes = [ []
for x
in xrange(numStreams+1)]
189 streamColors = [[]
for x
in xrange(numStreams+1)]
191 for n,trans,s,time,delayed
in processingSteps:
192 if trans == kStarted:
193 streamStartDepth[s] +=1
196 streamStartDepth[s] -=1
197 if 0 == streamStartDepth[s]:
198 streamStartTimes[s].
append((streamTime[s],time-streamTime[s]))
202 elif n
in stalledModuleNames:
210 oldStreamTimes = streamStartTimes
211 oldStreamColors = streamColors
213 streamStartTimes = [ []
for x
in xrange(numStreams+1)]
214 streamColors = [[]
for x
in xrange(numStreams+1)]
216 for s
in xrange(numStreams+1):
217 streamStartTimes[s].
append(oldStreamTimes[s][0])
218 streamColors[s].
append(oldStreamColors[s][0])
219 lastStartTime,lastTimeLength = oldStreamTimes[s][0]
220 lastColor = oldStreamColors[s][0]
221 for i
in xrange(1, len(oldStreamTimes[s])):
222 start,length = oldStreamTimes[s][i]
223 color = oldStreamColors[s][i]
225 if color == lastColor
and abs(lastStartTime+lastTimeLength-start)<0.001:
226 lastTimeLength += length
228 streamStartTimes[s].
append((lastStartTime,lastTimeLength))
229 streamColors[s].
append(lastColor)
230 lastStartTime = start
231 lastTimeLength = length
233 streamStartTimes[s].
append((lastStartTime,lastTimeLength))
234 streamColors[s].
append(lastColor)
236 fig, ax = plt.subplots()
237 ax.set_xlabel(
"Time (sec)")
238 ax.set_ylabel(
"Stream ID")
241 for s
in xrange(numStreams+1):
242 t = streamStartTimes[s]
243 ax.broken_barh(t,(i-0.4,0.8),facecolors=streamColors[s],edgecolors=streamColors[s],linewidth=0)
247 fig.text(0.1, 0.95,
"modules running", color =
"green", horizontalalignment =
'left')
248 fig.text(0.5, 0.95,
"stalled module running", color =
"red", horizontalalignment =
'center')
249 fig.text(0.9, 0.95,
"read from input", color =
"orange", horizontalalignment =
'right')
250 plt.savefig(
"stall.pdf")
255 if __name__==
"__main__":
258 if len(sys.argv) == 1:
263 if len(sys.argv) == 3:
264 if sys.argv[1] ==
'-g':
267 print "unknown argument ",sys.argv[1]
269 fileName =sys.argv[-1]
271 processingSteps,numStreams,maxNameSize =
readLogFile(sys.argv[-1])
Abs< T >::type abs(const T &t)
static std::string join(char **cmd)
def printStalledModulesInOrder
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run