CMS 3D CMS Logo

cmsswFiletrace.py
Go to the documentation of this file.
1 #!/usr/bin/env python3
2 from __future__ import print_function
3 import os
4 import re
5 import sys
6 import atexit
7 import tempfile
8 import subprocess
9 from shutil import copy, rmtree
10 from collections import defaultdict
11 
12 # only needed to locate CMSSW
13 import FWCore
14 import FWCore.ParameterSet.Types
15 
16 OUTFILE_TREE = "calltree"
17 OUTFILE_FILES = "callfiles"
18 FLAT_OUTPUT = False
19 # cmsRun alsways gets special handling, but also trace these scripts
20 WRAP_SCRIPTS = ["cmsDriver.py" ]
21 IGNORE_DIRS = [
22  os.path.dirname(os.__file__),
23  FWCore.ParameterSet.Types.__file__,
24 ]
25 STRIPPATHS = [ # we will add the base dir from CMSSWCALLBASE env var here
26  os.environ["CMSSW_BASE"] + "/python/", os.environ["CMSSW_RELEASE_BASE"] + "/python/",
27  os.environ["CMSSW_BASE"] + "/cfipython/", os.environ["CMSSW_RELEASE_BASE"] + "/cfipython/"]
28 PREFIXINFO = []
29 ARGV0 = "" # set in main
30 
31 def addprefixinfo(argv):
32  cwd = os.path.abspath(os.getcwd())
33  wf = re.match(".*/(\d+\.\d+)_", cwd)
34  if wf:
35  PREFIXINFO.append("wf")
36  PREFIXINFO.append(wf.groups()[0])
37  online = re.match("(.*/)?(.*)_dqm_sourceclient-live_cfg\.py", argv[0])
38  if online:
39  PREFIXINFO.append("online")
40  PREFIXINFO.append(online.groups()[1])
41  step = re.match("(step\d+)_.*\.py", argv[0])
42  if step:
43  PREFIXINFO.append(step.groups()[0])
44  processing = re.match("step\d+_.*(RECO|ALCA|HARVEST).*\.py", argv[0])
45  if processing:
46  PREFIXINFO.append(processing.groups()[0])
47  if not PREFIXINFO:
48  PREFIXINFO.append(argv[0])
49 
50 def setupenv():
51  bindir = tempfile.mkdtemp()
52  print("+Setting up in ", bindir)
53  for s in WRAP_SCRIPTS:
54  os.symlink(ARGV0, bindir + "/" + s)
55  os.symlink(ARGV0, bindir + "/cmsRun")
56  os.environ["PATH"] = bindir + ":" + os.environ["PATH"]
57  os.environ["CMSSWCALLTREE"] = bindir + "/" + OUTFILE_TREE
58  os.environ["CMSSWCALLFILES"] = bindir + "/" + OUTFILE_FILES
59  os.environ["CMSSWCALLBASE"] = os.path.abspath(os.getcwd()) + "/"
60  with open(os.environ["CMSSWCALLTREE"], "w") as f:
61  pass
62  with open(os.environ["CMSSWCALLFILES"], "w") as f:
63  pass
64  return bindir
65 
66 def cleanupenv(tmpdir):
67  #with open(os.environ["CMSSWCALLTREE"], "a") as f:
68  # print("}", file=f)
69  print("+Cleaning up ", tmpdir)
70  copy(os.environ["CMSSWCALLTREE"], ".")
71  copy(os.environ["CMSSWCALLFILES"], ".")
72  rmtree(tmpdir)
73 
74 
75 def trace_command(argv):
76  tmpdir = None
77  if not "CMSSWCALLTREE" in os.environ:
78  tmpdir = setupenv()
79 
80  subprocess.call(argv)
81 
82  if tmpdir:
83  cleanupenv(tmpdir)
84 
85 def formatfile(filename):
86  filename = os.path.abspath(filename)
87  for pfx in STRIPPATHS:
88  if filename.startswith(pfx):
89  filename = filename[len(pfx):]
90  return filename
91 
92 def searchinpath(progname, path):
93  # Search $PATH. There seems to be no pre-made function for this.
94  for entry in path:
95  file_path = os.path.join(entry, progname)
96  if os.path.isfile(file_path):
97  break
98  if not os.path.isfile(file_path):
99  print("+Cannot find program (%s) in modified $PATH (%s)." % (progname, path))
100  sys.exit(1)
101  print("+Found %s as %s in %s." % (progname, file_path, path))
102  return file_path
103 
104 def writeoutput(callgraph, files):
105  progname = ", ".join(PREFIXINFO)
106  print("+Done running %s, writing output..." % progname)
107 
108  def format(func):
109  filename, funcname = func
110  return "%s::%s" % (formatfile(filename), funcname)
111 
112  def callpath(func):
113  # climb up in the call graph until we find a node without callers (this is
114  # the entry point, the traced call itself). There may be cycles, but any
115  # node is reachable from the entry point, so no backtracking required.
116  path = []
117  seen = set()
118  parents = {func}
119  timeout = 100 # go no more than this deep
120  while parents:
121  if len(parents) == 1:
122  func = next(iter(parents))
123  seen.add(func)
124  path.append(format(func))
125  if len(parents) > 1:
126  for func in parents:
127  if not func in seen:
128  break
129  if func in seen:
130  # somehow we got stuck in a loop and can't get out. So maybe
131  # backtracking is needed in some situations?
132  # Abort with a partial path for now.
133  return path
134  seen.add(func)
135  path.append(format(func) + "+")
136  parents = callgraph[func]
137  timeout -= 1
138  if timeout == 0:
139  print(seen, path, parents, func)
140  raise Exception('Call path too deep, aborting')
141  return path[:-1]
142 
143  with open(os.environ["CMSSWCALLFILES"], "a") as outfile:
144  for f in files:
145  print("%s: %s" % (progname, formatfile(f)), file=outfile)
146  with open(os.environ["CMSSWCALLTREE"], "a") as outfile:
147  if FLAT_OUTPUT:
148  for func in callgraph.keys():
149  print("%s: %s 1" % (progname, ";".join(reversed(callpath(func)))), file=outfile)
150  else:
151  for func in callgraph.keys():
152  for pfunc in callgraph[func]:
153  print("%s: %s -> %s" % (progname, format(func), format(pfunc)), file=outfile)
154 
155 def trace_python(prog_argv, path):
156  files = set()
157  callgraph = defaultdict(lambda: set())
158 
159  def nop_trace(frame, why, arg):
160  pass
161 
162  def tracefunc(frame, why, arg):
163  if why == 'call':
164  code = frame.f_code
165  # compared to the `trace` module, we don't attempt to find class names here
166  filename = code.co_filename
167 
168  for d in IGNORE_DIRS:
169  if filename.startswith(d):
170  sys.settrace(nop_trace)
171  return wait_for_return
172 
173  funcname = code.co_name
174  code = frame.f_back.f_code
175  p_filename = code.co_filename
176  p_funcname = code.co_name
177 
178  files.add(filename)
179  callgraph[(filename, funcname)].add((p_filename, p_funcname))
180  return None
181 
182  def wait_for_return(frame, why, arg):
183  if why == 'return':
184  sys.settrace(tracefunc)
185  return wait_for_return
186 
187  sys.argv = prog_argv
188  progname = prog_argv[0]
189 
190 
191  file_path = searchinpath(progname, path)
192  try:
193  with open(file_path) as fp:
194  code = compile(fp.read(), progname, 'exec')
195  # try to emulate __main__ namespace as much as possible
196  globals = {
197  '__file__': progname,
198  '__name__': '__main__',
199  '__package__': None,
200  '__cached__': None,
201  }
202 
203  # would be too easy if this covered all the cases...
204  atexit.register(lambda: writeoutput(callgraph, files))
205  # cmsDriver calls cmsRun via exec (execvpe specifically), so we also need
206  # to hook that...
207  old_execvpe = os.execvpe
208  def exec_hook(*args):
209  writeoutput(callgraph, files)
210  old_execvpe(*args)
211  os.execvpe = exec_hook
212 
213  # now turn on the traceing
214  sys.settrace(tracefunc)
215  try:
216  exec code in globals, globals
217  finally:
218  sys.settrace(None)
219 
220  except OSError as err:
221  print("+Cannot run file %r because: %s" % (sys.argv[0], err))
222  sys.exit(1)
223  except SystemExit:
224  pass
225  # this is not necessarily reached at all.
226  sys.exit(0)
227 
228 def help():
229  print("Usage: %s <some cmssw commandline>" % (sys.argv[0]))
230  print(" The given programs will be executed, instrumenting calls to %s and cmsRun." % (", ".join(WRAP_SCRIPTS)))
231  print(" cmsRun will not actually run cmssw, but all the Python code will be executed and instrumentd. The results are written to the files `%s` and `%s` in the same directory." % (OUTFILE_FILES, OUTFILE_TREE))
232  if FLAT_OUTPUT:
233  print(" The callgraph output file can be processed with Brendan Gregg's FlameGraph tool.")
234  else:
235  print(" The callgraph output lists edges pointing from each function to the one calling it.")
236 
237  print("Examples:")
238  print(" %s runTheMatrix.py -l 1000 --ibeos" % sys.argv[0])
239  print( "%s cmsRun rpc_dqm_sourceclient-live_cfg.py" % sys.argv[0])
240 
241 def main():
242  print("+Running cmsswfiletrace...")
243  global ARGV0
244  ARGV0 = sys.argv[0]
245  for s in WRAP_SCRIPTS:
246  if sys.argv[0].endswith(s):
247  print("+Wrapping %s..." % s)
248  addprefixinfo(sys.argv)
249  tmppath = os.path.dirname(sys.argv[0])
250  path = filter(
251  lambda s: not s.startswith(tmppath),
252  os.environ["PATH"].split(":")
253  )
254  STRIPPATHS.append(os.environ["CMSSWCALLBASE"])
255  trace_python([s] + sys.argv[1:], path)
256  return
257  if sys.argv[0].endswith('cmsRun'):
258  print("+Wrapping cmsRun...")
259  addprefixinfo(sys.argv[1:])
260  STRIPPATHS.append(os.environ["CMSSWCALLBASE"])
261  trace_python(sys.argv[1:], ["."])
262  return
263  if len(sys.argv) <= 1:
264  help()
265  return
266  # else
267  print("+Running command with tracing %s..." % sys.argv[1:])
268  trace_command(sys.argv[1:])
269 
270 
271 if __name__ == '__main__':
272  main()
273 
def trace_command(argv)
def searchinpath(progname, path)
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47
def addprefixinfo(argv)
static std::string join(char **cmd)
Definition: RemoteFile.cc:19
def writeoutput(callgraph, files)
def formatfile(filename)
void add(std::map< std::string, TH1 *> &h, TH1 *hist)
Definition: main.py:1
def cleanupenv(tmpdir)
def trace_python(prog_argv, path)