5 s =
"""Purpose: Convert a cmsRun log with Tracer info into a stream stall graph.
7 edmStreamStallGrapher [-g] <log file name>
9 Option: -g instead of ascii art, create a pdf file showing the work being done on each stream
11 To Use: Add the Tracer Service to the cmsRun job you want to check for stream stalls.
12 Make sure to use the 'printTimstamps' option
13 cms.Service("Tracer", printTimestamps = cms.untracked.bool(True))
14 After running the job, execute this script and pass the name of the log file to the
15 script as the only command line argument.
17 To Read: The script will then print an 'ASCII art' stall graph which consists of the name of
18 the module which either started or stopped running on a stream, and the state of each
19 stream at that moment in time and if the module just started, you will also see the
20 amount of time on that stream between the previous module finishing and this module starting.
21 The state of a stream is represented by a symbol:
22 blank (" ") the stream is currently running a module
23 line ("|") the stream is waiting to run a module
24 minus ("-") the stream has just finished waiting and is starting a module
25 plus ("+") the stream just finished running a module
26 If a module had to wait more than 0.1 seconds, the end of the line will have "STALLED".
27 Once the first 4 events have finished processing, the program prints "FINISH INIT".
28 This is useful if one wants to ignore stalled caused by startup actions, e.g. reading
31 Once the graph is completed, the program outputs the list of modules which had
32 the greatest total stall times. The list is sorted by total stall time and
33 written in descending order. In addition, the list of all stall times for the
41 time = line.split(
" ")[1]
42 time = time.split(
":")
43 time = int(time[0])*60*60+int(time[1])*60+float(time[2])
51 kSourceFindEvent =
"sourceFindEvent"
55 f = open(fileName,
"r")
57 processingSteps = list()
61 foundEventToStartFrom = False
63 if not foundEventToStartFrom:
64 if l.find(
"event = 5") != -1:
65 foundEventToStartFrom =
True
66 stream = int( l[l.find(
"stream = ")+9])
67 processingSteps.append((
"FINISH INIT",kFinished,stream,
getTime(l)-startTime,
False))
68 if l.find(
"processing event :") != -1:
72 time = time - startTime
74 streamIndex = l.find(
"stream = ")
75 stream = int( l[streamIndex+9:l.find(
" ",streamIndex+10)])
76 name = kSourceFindEvent
79 if l.find(
"starting:") != -1:
81 processingSteps.append((name,trans,stream,time,delayed))
82 if stream > numStreams:
84 if l.find(
"processing event for module") != -1:
88 time = time - startTime
92 if l.find(
"finished:") != -1:
94 streamIndex = l.find(
"stream = ")
95 stream = int( l[streamIndex+9:l.find(
" ",streamIndex+10)])
96 name = l.split(
"'")[1]
97 if l.find(
"delayed") != -1:
99 if len(name) > maxNameSize:
100 maxNameSize = len(name)
101 processingSteps.append((name,trans,stream,time,delayed))
102 if stream > numStreams:
105 return (processingSteps,numStreams,maxNameSize)
118 streamTime = [0]*(numStreams+1)
120 previousStartWasADelayed = [
False]*(numStreams+1)
121 for n,trans,s,time,delayed
in processingSteps:
123 if trans == kStarted:
124 if delayed
or (n == kSourceFindEvent):
125 previousStartWasADelayed[s] =
True
128 previousStartWasADelayed[s] =
False
129 waitTime = time - streamTime[s]
131 if (delayed
or (n == kSourceFindEvent))
and previousStartWasADelayed[s]:
133 if n != kSourceFindEvent:
135 waitTime = time - streamTime[s]
136 previousStartWasADelayed[s] =
False
138 if waitTime
is not None:
140 t = stalledModules.setdefault(n,[])
142 return stalledModules
149 streamState = [1]*(numStreams+1)
150 streamTime = [0]*(numStreams+1)
153 previousStartWasADelayed = [
False]*(numStreams+1)
154 for n,trans,s,time,delayed
in processingSteps:
155 if n ==
"FINISH INIT":
158 oldState = streamState[s]
161 if trans == kStarted:
162 if delayed
or (n == kSourceFindEvent):
163 previousStartWasADelayed[s] =
True
167 previousStartWasADelayed[s] =
False
168 waitTime = time - streamTime[s]
171 if delayed
or (n == kSourceFindEvent):
172 if previousStartWasADelayed[s]:
173 if n != kSourceFindEvent:
175 waitTime = time - streamTime[s]
177 previousStartWasADelayed[s] =
False
181 if oldState != trans:
184 states =
"%-*s: " % (maxNameSize,n)
185 for state
in streamState:
194 if waitTime
is not None:
195 states +=
" %.2f"% waitTime
196 if waitTime > 0.1
and seenInit:
197 states +=
" STALLED "+str(time)+
" "+str(s)
201 return stalledModules
207 for n,t
in stalledModules.iteritems():
209 if nameLength > maxNameSize:
210 maxNameSize = nameLength
212 priorities.append((n,sum(t),t))
215 return cmp(i[1],j[1])
216 priorities.sort(cmp=sumSort, reverse=
True)
218 nameColumn =
"Stalled Module"
219 if len(nameColumn) > maxNameSize:
220 maxNameSize = len(nameColumn)
222 stallColumn =
"Tot Stall Time"
223 stallColumnLength = len(stallColumn)
225 print "%-*s" % (maxNameSize, nameColumn),
"%-*s"%(stallColumnLength,stallColumn),
" Stall Times"
226 for n,s,t
in priorities:
227 paddedName =
"%-*s:" % (maxNameSize,n)
228 print paddedName,
"%-*.2f"%(stallColumnLength,s),
", ".
join([
"%.2f"%x
for x
in t])
232 import matplotlib.pyplot
as plt
234 previousStartWasADelayed=[
False]*(numStreams+1)
235 streamTime = [0]*(numStreams+1)
236 streamStartTimes = [ []
for x
in xrange(numStreams+1)]
237 streamColors = [[]
for x
in xrange(numStreams+1)]
239 stalledModuleNames = [ x
for x
in stalledModuleInfo.iterkeys()]
241 streamStartTimes = [ []
for x
in xrange(numStreams+1)]
242 streamColors = [[]
for x
in xrange(numStreams+1)]
244 for n,trans,s,time,delayed
in processingSteps:
245 if n ==
"FINISH INIT":
247 if trans == kStarted:
248 previousStartWasADelayed[s] = delayed
253 if previousStartWasADelayed[s]:
256 previousStartWasADelayed[s] =
False
259 if n == kSourceFindEvent:
261 streamStartTimes[s].
append((streamTime[s],time-streamTime[s]))
262 if c ==
"green" and n
in stalledModuleNames:
270 oldStreamTimes = streamStartTimes
271 oldStreamColors = streamColors
273 streamStartTimes = [ []
for x
in xrange(numStreams+1)]
274 streamColors = [[]
for x
in xrange(numStreams+1)]
276 for s
in xrange(numStreams+1):
277 streamStartTimes[s].
append(oldStreamTimes[s][0])
278 streamColors[s].
append(oldStreamColors[s][0])
279 lastStartTime,lastTimeLength = oldStreamTimes[s][0]
280 lastColor = oldStreamColors[s][0]
281 for i
in xrange(1, len(oldStreamTimes[s])):
282 start,length = oldStreamTimes[s][i]
283 color = oldStreamColors[s][i]
285 if color == lastColor
and abs(lastStartTime+lastTimeLength-start)<0.001:
286 lastTimeLength += length
288 streamStartTimes[s].
append((lastStartTime,lastTimeLength))
289 streamColors[s].
append(lastColor)
290 lastStartTime = start
291 lastTimeLength = length
293 streamStartTimes[s].
append((lastStartTime,lastTimeLength))
294 streamColors[s].
append(lastColor)
296 fig, ax = plt.subplots()
297 ax.set_xlabel(
"Time (sec)")
298 ax.set_ylabel(
"Stream ID")
301 for s
in xrange(numStreams+1):
302 t = streamStartTimes[s]
303 ax.broken_barh(t,(i-0.4,0.8),facecolors=streamColors[s],edgecolors=streamColors[s],linewidth=0)
307 fig.text(0.1, 0.95,
"modules running", color =
"green", horizontalalignment =
'left')
308 fig.text(0.5, 0.95,
"stalled module running", color =
"red", horizontalalignment =
'center')
309 fig.text(0.9, 0.95,
"read from input", color =
"orange", horizontalalignment =
'right')
310 plt.savefig(
"stall.pdf")
315 if __name__==
"__main__":
318 if len(sys.argv) == 1:
323 if len(sys.argv) == 3:
324 if sys.argv[1] ==
'-g':
327 print "unknown argument ",sys.argv[1]
329 fileName =sys.argv[-1]
331 processingSteps,numStreams,maxNameSize =
readLogFile(sys.argv[-1])
boost::dynamic_bitset append(const boost::dynamic_bitset<> &bs1, const boost::dynamic_bitset<> &bs2)
this method takes two bitsets bs1 and bs2 and returns result of bs2 appended to the end of bs1 ...
Abs< T >::type abs(const T &t)
static std::string join(char **cmd)
def printStalledModulesInOrder
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run