5 s =
"""Purpose: Convert a cmsRun log with Tracer info into a stream stall graph.
7 edmStreamStallGrapher [-g] <log file name>
9 Option: -g instead of ascii art, create a pdf file showing the work being done on each stream
11 To Use: Add the Tracer Service to the cmsRun job you want to check for stream stalls.
12 Make sure to use the 'printTimstamps' option
13 cms.Service("Tracer", printTimestamps = cms.untracked.bool(True))
14 After running the job, execute this script and pass the name of the log file to the
15 script as the only command line argument.
17 To Read: The script will then print an 'ASCII art' stall graph which consists of the name of
18 the module which either started or stopped running on a stream, and the state of each
19 stream at that moment in time and if the module just started, you will also see the
20 amount of time on that stream between the previous module finishing and this module starting.
21 The state of a stream is represented by a symbol:
22 blank (" ") the stream is currently running a module
23 line ("|") the stream is waiting to run a module
24 minus ("-") the stream has just finished waiting and is starting a module
25 plus ("+") the stream just finished running a module
26 If a module had to wait more than 0.1 seconds, the end of the line will have "STALLED".
27 Once the first 4 events have finished processing, the program prints "FINISH INIT".
28 This is useful if one wants to ignore stalled caused by startup actions, e.g. reading
31 Once the graph is completed, the program outputs the list of modules which had
32 the greatest total stall times. The list is sorted by total stall time and
33 written in descending order. In addition, the list of all stall times for the
41 time = line.split(
" ")[1]
42 time = time.split(
":")
43 time = int(time[0])*60*60+int(time[1])*60+float(time[2])
52 f = open(fileName,
"r")
54 processingSteps = list()
58 foundEventToStartFrom = False
60 if not foundEventToStartFrom:
61 if l.find(
"event = 5") != -1:
62 foundEventToStartFrom =
True
63 stream = int( l[l.find(
"stream = ")+9])
64 processingSteps.append((
"FINISH INIT",kFinished,stream,
getTime(l)-startTime,
False))
65 if l.find(
"processing event for module") != -1:
69 time = time - startTime
73 if l.find(
"finished:") != -1:
75 stream = int( l[l.find(
"stream = ")+9])
76 name = l.split(
"'")[1]
77 if l.find(
"delayed") != -1:
79 if len(name) > maxNameSize:
80 maxNameSize = len(name)
81 processingSteps.append((name,trans,stream,time,delayed))
82 if stream > numStreams:
85 return (processingSteps,numStreams,maxNameSize)
90 streamTime = [0]*(numStreams+1)
92 for n,trans,s,time,delayed
in processingSteps:
97 waitTime = time - streamTime[s]
100 if waitTime
is not None:
102 t = stalledModules.setdefault(n,[])
104 return stalledModules
111 streamState = [1]*(numStreams+1)
112 streamTime = [0]*(numStreams+1)
115 for n,trans,s,time,delayed
in processingSteps:
116 if n ==
"FINISH INIT":
119 oldState = streamState[s]
124 if trans == kStarted:
125 waitTime = time - streamTime[s]
128 if oldState != trans:
131 states =
"%-*s: " % (maxNameSize,n)
132 for state
in streamState:
141 if waitTime
is not None:
142 states +=
" %.2f"% waitTime
143 if waitTime > 0.1
and seenInit:
148 return stalledModules
154 for n,t
in stalledModules.iteritems():
156 if nameLength > maxNameSize:
157 maxNameSize = nameLength
159 priorities.append((n,sum(t),t))
162 return cmp(i[1],j[1])
163 priorities.sort(cmp=sumSort, reverse=
True)
165 nameColumn =
"Stalled Module"
166 if len(nameColumn) > maxNameSize:
167 maxNameSize = len(nameColumn)
169 stallColumn =
"Tot Stall Time"
170 stallColumnLength = len(stallColumn)
172 print "%-*s" % (maxNameSize, nameColumn),
"%-*s"%(stallColumnLength,stallColumn),
" Stall Times"
173 for n,s,t
in priorities:
174 paddedName =
"%-*s:" % (maxNameSize,n)
175 print paddedName,
"%-*.2f"%(stallColumnLength,s),
", ".
join([
"%.2f"%x
for x
in t])
179 import matplotlib.pyplot
as plt
181 streamStartDepth = [0]*(numStreams+1)
182 streamTime = [0]*(numStreams+1)
183 streamStartTimes = [ []
for x
in xrange(numStreams+1)]
184 streamColors = [[]
for x
in xrange(numStreams+1)]
186 stalledModuleNames = [ x
for x
in stalledModuleInfo.iterkeys()]
188 streamStartTimes = [ []
for x
in xrange(numStreams+1)]
189 streamColors = [[]
for x
in xrange(numStreams+1)]
191 for n,trans,s,time,delayed
in processingSteps:
192 if n ==
"FINISH INIT":
194 if trans == kStarted:
195 streamStartDepth[s] +=1
198 streamStartDepth[s] -=1
199 if 0 == streamStartDepth[s]:
200 streamStartTimes[s].
append((streamTime[s],time-streamTime[s]))
204 elif n
in stalledModuleNames:
212 oldStreamTimes = streamStartTimes
213 oldStreamColors = streamColors
215 streamStartTimes = [ []
for x
in xrange(numStreams+1)]
216 streamColors = [[]
for x
in xrange(numStreams+1)]
218 for s
in xrange(numStreams+1):
219 streamStartTimes[s].
append(oldStreamTimes[s][0])
220 streamColors[s].
append(oldStreamColors[s][0])
221 lastStartTime,lastTimeLength = oldStreamTimes[s][0]
222 lastColor = oldStreamColors[s][0]
223 for i
in xrange(1, len(oldStreamTimes[s])):
224 start,length = oldStreamTimes[s][i]
225 color = oldStreamColors[s][i]
227 if color == lastColor
and abs(lastStartTime+lastTimeLength-start)<0.001:
228 lastTimeLength += length
230 streamStartTimes[s].
append((lastStartTime,lastTimeLength))
231 streamColors[s].
append(lastColor)
232 lastStartTime = start
233 lastTimeLength = length
235 streamStartTimes[s].
append((lastStartTime,lastTimeLength))
236 streamColors[s].
append(lastColor)
238 fig, ax = plt.subplots()
239 ax.set_xlabel(
"Time (sec)")
240 ax.set_ylabel(
"Stream ID")
243 for s
in xrange(numStreams+1):
244 t = streamStartTimes[s]
245 ax.broken_barh(t,(i-0.4,0.8),facecolors=streamColors[s],edgecolors=streamColors[s],linewidth=0)
249 fig.text(0.1, 0.95,
"modules running", color =
"green", horizontalalignment =
'left')
250 fig.text(0.5, 0.95,
"stalled module running", color =
"red", horizontalalignment =
'center')
251 fig.text(0.9, 0.95,
"read from input", color =
"orange", horizontalalignment =
'right')
252 plt.savefig(
"stall.pdf")
257 if __name__==
"__main__":
260 if len(sys.argv) == 1:
265 if len(sys.argv) == 3:
266 if sys.argv[1] ==
'-g':
269 print "unknown argument ",sys.argv[1]
271 fileName =sys.argv[-1]
273 processingSteps,numStreams,maxNameSize =
readLogFile(sys.argv[-1])
boost::dynamic_bitset append(const boost::dynamic_bitset<> &bs1, const boost::dynamic_bitset<> &bs2)
this method takes two bitsets bs1 and bs2 and returns result of bs2 appended to the end of bs1 ...
Abs< T >::type abs(const T &t)
static std::string join(char **cmd)
def printStalledModulesInOrder
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run