CMS 3D CMS Logo

cmsswFiletrace.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 from __future__ import print_function
3 import os
4 import re
5 import sys
6 import atexit
7 import tempfile
8 import subprocess
9 from shutil import copy, rmtree
10 from collections import defaultdict
11 
12 # only needed to locate CMSSW
13 import six
14 import FWCore
15 import FWCore.ParameterSet.Types
16 
17 OUTFILE_TREE = "calltree"
18 OUTFILE_FILES = "callfiles"
19 FLAT_OUTPUT = False
20 # cmsRun alsways gets special handling, but also trace these scripts
21 WRAP_SCRIPTS = ["cmsDriver.py" ]
22 IGNORE_DIRS = [
23  os.path.dirname(os.__file__),
24  os.path.dirname(six.__file__),
25  FWCore.ParameterSet.Types.__file__,
26 ]
27 STRIPPATHS = [ # we will add the base dir from CMSSWCALLBASE env var here
28  os.environ["CMSSW_BASE"] + "/python/", os.environ["CMSSW_RELEASE_BASE"] + "/python/",
29  os.environ["CMSSW_BASE"] + "/cfipython/", os.environ["CMSSW_RELEASE_BASE"] + "/cfipython/"]
30 PREFIXINFO = []
31 ARGV0 = "" # set in main
32 
33 def addprefixinfo(argv):
34  cwd = os.path.abspath(os.getcwd())
35  wf = re.match(".*/(\d+\.\d+)_", cwd)
36  if wf:
37  PREFIXINFO.append("wf")
38  PREFIXINFO.append(wf.groups()[0])
39  online = re.match("(.*/)?(.*)_dqm_sourceclient-live_cfg\.py", argv[0])
40  if online:
41  PREFIXINFO.append("online")
42  PREFIXINFO.append(online.groups()[1])
43  step = re.match("(step\d+)_.*\.py", argv[0])
44  if step:
45  PREFIXINFO.append(step.groups()[0])
46  processing = re.match("step\d+_.*(RECO|ALCA|HARVEST).*\.py", argv[0])
47  if processing:
48  PREFIXINFO.append(processing.groups()[0])
49  if not PREFIXINFO:
50  PREFIXINFO.append(argv[0])
51 
52 def setupenv():
53  bindir = tempfile.mkdtemp()
54  print("+Setting up in ", bindir)
55  for s in WRAP_SCRIPTS:
56  os.symlink(ARGV0, bindir + "/" + s)
57  os.symlink(ARGV0, bindir + "/cmsRun")
58  os.environ["PATH"] = bindir + ":" + os.environ["PATH"]
59  os.environ["CMSSWCALLTREE"] = bindir + "/" + OUTFILE_TREE
60  os.environ["CMSSWCALLFILES"] = bindir + "/" + OUTFILE_FILES
61  os.environ["CMSSWCALLBASE"] = os.path.abspath(os.getcwd()) + "/"
62  with open(os.environ["CMSSWCALLTREE"], "w") as f:
63  pass
64  with open(os.environ["CMSSWCALLFILES"], "w") as f:
65  pass
66  return bindir
67 
68 def cleanupenv(tmpdir):
69  #with open(os.environ["CMSSWCALLTREE"], "a") as f:
70  # print("}", file=f)
71  print("+Cleaning up ", tmpdir)
72  copy(os.environ["CMSSWCALLTREE"], ".")
73  copy(os.environ["CMSSWCALLFILES"], ".")
74  rmtree(tmpdir)
75 
76 
77 def trace_command(argv):
78  tmpdir = None
79  if not "CMSSWCALLTREE" in os.environ:
80  tmpdir = setupenv()
81 
82  subprocess.call(argv)
83 
84  if tmpdir:
85  cleanupenv(tmpdir)
86 
87 def formatfile(filename):
88  filename = os.path.abspath(filename)
89  for pfx in STRIPPATHS:
90  if filename.startswith(pfx):
91  filename = filename[len(pfx):]
92  return filename
93 
94 def searchinpath(progname, path):
95  # Search $PATH. There seems to be no pre-made function for this.
96  for entry in path:
97  file_path = os.path.join(entry, progname)
98  if os.path.isfile(file_path):
99  break
100  if not os.path.isfile(file_path):
101  print("+Cannot find program (%s) in modified $PATH (%s)." % (progname, path))
102  sys.exit(1)
103  print("+Found %s as %s in %s." % (progname, file_path, path))
104  return file_path
105 
106 def writeoutput(callgraph, files):
107  progname = ", ".join(PREFIXINFO)
108  print("+Done running %s, writing output..." % progname)
109 
110  def format(func):
111  filename, funcname = func
112  return "%s::%s" % (formatfile(filename), funcname)
113 
114  def callpath(func):
115  # climb up in the call graph until we find a node without callers (this is
116  # the entry point, the traced call itself). There may be cycles, but any
117  # node is reachable from the entry point, so no backtracking required.
118  path = []
119  seen = set()
120  parents = {func}
121  timeout = 100 # go no more than this deep
122  while parents:
123  if len(parents) == 1:
124  func = next(iter(parents))
125  seen.add(func)
126  path.append(format(func))
127  if len(parents) > 1:
128  for func in parents:
129  if not func in seen:
130  break
131  if func in seen:
132  # somehow we got stuck in a loop and can't get out. So maybe
133  # backtracking is needed in some situations?
134  # Abort with a partial path for now.
135  return path
136  seen.add(func)
137  path.append(format(func) + "+")
138  parents = callgraph[func]
139  timeout -= 1
140  if timeout == 0:
141  print(seen, path, parents, func)
142  raise Exception('Call path too deep, aborting')
143  return path[:-1]
144 
145  with open(os.environ["CMSSWCALLFILES"], "a") as outfile:
146  for f in files:
147  print("%s: %s" % (progname, formatfile(f)), file=outfile)
148  with open(os.environ["CMSSWCALLTREE"], "a") as outfile:
149  if FLAT_OUTPUT:
150  for func in callgraph.keys():
151  print("%s: %s 1" % (progname, ";".join(reversed(callpath(func)))), file=outfile)
152  else:
153  for func in callgraph.keys():
154  for pfunc in callgraph[func]:
155  print("%s: %s -> %s" % (progname, format(func), format(pfunc)), file=outfile)
156 
157 def trace_python(prog_argv, path):
158  files = set()
159  callgraph = defaultdict(lambda: set())
160 
161  def nop_trace(frame, why, arg):
162  pass
163 
164  def tracefunc(frame, why, arg):
165  if why == 'call':
166  code = frame.f_code
167  # compared to the `trace` module, we don't attempt to find class names here
168  filename = code.co_filename
169 
170  for d in IGNORE_DIRS:
171  if filename.startswith(d):
172  sys.settrace(nop_trace)
173  return wait_for_return
174 
175  funcname = code.co_name
176  code = frame.f_back.f_code
177  p_filename = code.co_filename
178  p_funcname = code.co_name
179 
180  files.add(filename)
181  callgraph[(filename, funcname)].add((p_filename, p_funcname))
182  return None
183 
184  def wait_for_return(frame, why, arg):
185  if why == 'return':
186  sys.settrace(tracefunc)
187  return wait_for_return
188 
189  sys.argv = prog_argv
190  progname = prog_argv[0]
191 
192 
193  file_path = searchinpath(progname, path)
194  try:
195  with open(file_path) as fp:
196  code = compile(fp.read(), progname, 'exec')
197  # try to emulate __main__ namespace as much as possible
198  globals = {
199  '__file__': progname,
200  '__name__': '__main__',
201  '__package__': None,
202  '__cached__': None,
203  }
204 
205  # would be too easy if this covered all the cases...
206  atexit.register(lambda: writeoutput(callgraph, files))
207  # cmsDriver calls cmsRun via exec (execvpe specifically), so we also need
208  # to hook that...
209  old_execvpe = os.execvpe
210  def exec_hook(*args):
211  writeoutput(callgraph, files)
212  old_execvpe(*args)
213  os.execvpe = exec_hook
214 
215  # now turn on the traceing
216  sys.settrace(tracefunc)
217  try:
218  exec code in globals, globals
219  finally:
220  sys.settrace(None)
221 
222  except OSError as err:
223  print("+Cannot run file %r because: %s" % (sys.argv[0], err))
224  sys.exit(1)
225  except SystemExit:
226  pass
227  # this is not necessarily reached at all.
228  sys.exit(0)
229 
230 def help():
231  print("Usage: %s <some cmssw commandline>" % (sys.argv[0]))
232  print(" The given programs will be executed, instrumenting calls to %s and cmsRun." % (", ".join(WRAP_SCRIPTS)))
233  print(" cmsRun will not actually run cmssw, but all the Python code will be executed and instrumentd. The results are written to the files `%s` and `%s` in the same directory." % (OUTFILE_FILES, OUTFILE_TREE))
234  if FLAT_OUTPUT:
235  print(" The callgraph output file can be processed with Brendan Gregg's FlameGraph tool.")
236  else:
237  print(" The callgraph output lists edges pointing from each function to the one calling it.")
238 
239  print("Examples:")
240  print(" %s runTheMatrix.py -l 1000 --ibeos" % sys.argv[0])
241  print( "%s cmsRun rpc_dqm_sourceclient-live_cfg.py" % sys.argv[0])
242 
243 def main():
244  print("+Running cmsswfiletrace...")
245  global ARGV0
246  ARGV0 = sys.argv[0]
247  for s in WRAP_SCRIPTS:
248  if sys.argv[0].endswith(s):
249  print("+Wrapping %s..." % s)
250  addprefixinfo(sys.argv)
251  tmppath = os.path.dirname(sys.argv[0])
252  path = filter(
253  lambda s: not s.startswith(tmppath),
254  os.environ["PATH"].split(":")
255  )
256  STRIPPATHS.append(os.environ["CMSSWCALLBASE"])
257  trace_python([s] + sys.argv[1:], path)
258  return
259  if sys.argv[0].endswith('cmsRun'):
260  print("+Wrapping cmsRun...")
261  addprefixinfo(sys.argv[1:])
262  STRIPPATHS.append(os.environ["CMSSWCALLBASE"])
263  trace_python(sys.argv[1:], ["."])
264  return
265  if len(sys.argv) <= 1:
266  help()
267  return
268  # else
269  print("+Running command with tracing %s..." % sys.argv[1:])
270  trace_command(sys.argv[1:])
271 
272 
273 if __name__ == '__main__':
274  main()
275 
def trace_command(argv)
std::vector< std::string_view > split(std::string_view, const char *)
def searchinpath(progname, path)
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
def addprefixinfo(argv)
void add(std::map< std::string, TH1 * > &h, TH1 *hist)
static std::string join(char **cmd)
Definition: RemoteFile.cc:17
def writeoutput(callgraph, files)
def formatfile(filename)
Definition: main.py:1
def cleanupenv(tmpdir)
def trace_python(prog_argv, path)