CMS 3D CMS Logo

TreeCrawler.py
Go to the documentation of this file.
1 from __future__ import print_function
2 # This CMS code is based on previous work done by Toby Dickenson, as indiciated below
3 #
4 # for questions: Benedikt.Hegner@cern.ch
5 
6 # Copyright 2004 Toby Dickenson
7 #
8 # Permission is hereby granted, free of charge, to any person obtaining
9 # a copy of this software and associated documentation files (the
10 # "Software"), to deal in the Software without restriction, including
11 # without limitation the rights to use, copy, modify, merge, publish,
12 # distribute, sublicense, and/or sell copies of the Software, and to
13 # permit persons to whom the Software is furnished to do so, subject
14 # to the following conditions:
15 #
16 # The above copyright notice and this permission notice shall be included
17 # in all copies or substantial portions of the Software.
18 #
19 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
23 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 
27 from builtins import range
28 import sys, os, inspect, copy, struct, dis, imp
29 import modulefinder
30 
32  return ".".join(name.replace("python/","").replace(".py","").split("/")[-3:])
33 
34 
35 class Color:
36  """ANSI escape display sequences"""
37  info = "\033[1;34m"
38  hilight = "\033[31m"
39  alternate = "\033[32m"
40  extra = "\033[33m"
41  backlight = "\033[43m"
42  underline = "\033[4m"
43  lessemphasis = "\033[30m"
44  deemphasis = "\033[1;30m"
45  none = "\033[0m"
46 
47 _stack = []
48 
49 class SearchHit:
50  pass
51 
52 class Package(object):
53  def __init__(self,name,top=False):
54  self.name = name
55  self.dependencies = []
56  self.searched = False
57  self.stack = []
58  if top:
59  self.module = None
60  else:
61  self.module = __import__(name,[],[],"*")
62  def dump(self,level):
63  indent = " " * level
64  print(indent, "+", Color.info, self.name, Color.none)
65  # sort dependencies alphabetically
66  self.dependencies.sort(key = lambda x: x.name)
67  for package in self.dependencies:
68  package.dump(level+1)
69  def search(self,pattern,result):
70  """ recursive search for pattern in source files"""
71  # first start searching in the package itself / do this only once
72  if self.module:
73  for number, line in enumerate(inspect.getsource(self.module).splitlines()):
74  if pattern in line:
75  filename = packageNameFromFilename(inspect.getsourcefile(self.module))
76  if not self.searched:
77  # save the hit, so we can add later stacks to it
78  self.hit = SearchHit()
79  self.hit.number = number
80  self.hit.filename = filename
81  self.hit.line = line
82  self.hit.stacks = list()
83  result.append(self.hit)
84  self.hit.stacks.append(copy.copy(_stack))
85  # then go on with dependencies
86  _stack.append(self.name)
87  for package in self.dependencies:
88  package.search(pattern,result)
89  _stack.pop()
90  self.searched = True
91 
92 
93 class mymf(modulefinder.ModuleFinder):
94  def __init__(self,*args,**kwargs):
95  self._depgraph = {}
96  self._types = {}
97  self._last_caller = None
98  #TODO - replace by environment variables CMSSW_BASE and CMSSW_RELEASE_BASE (*and* do it only if the global one is not empty like for IB areas)
99  self._localarea = os.path.expandvars('$CMSSW_BASE')
100  self._globalarea = os.path.expandvars('$CMSSW_RELEASE_BASE')
101  modulefinder.ModuleFinder.__init__(self,*args,**kwargs)
102  def import_hook(self, name, caller=None, fromlist=None, level=-1):
103  old_last_caller = self._last_caller
104  try:
105  self._last_caller = caller
106  return modulefinder.ModuleFinder.import_hook(self,name,caller,fromlist, level=level)
107  finally:
108  self._last_caller = old_last_caller
109 
110  def import_module(self,partnam,fqname,parent):
111 
112  if partnam in ("os","unittest"):
113  r = None
114  else:
115  r = modulefinder.ModuleFinder.import_module(self,partnam,fqname,parent)
116  # since the modulefinder is not able to look into the global area when coming from the local area, we force a second try
117  if parent and not r and self._localarea != '' and self._globalarea != '':
118  parent.__file__ = parent.__file__.replace(self._localarea,self._globalarea)
119  parent.__path__[0] = parent.__path__[0].replace(self._localarea,self._globalarea)
120  r = modulefinder.ModuleFinder.import_module(self,partnam,fqname,parent)
121 
122  if r is not None:
123  self._depgraph.setdefault(self._last_caller.__name__,{})[r.__name__] = 1
124  return r
125  def load_module(self, fqname, fp, pathname, aux_info):
126  (suffix, mode, type) = aux_info
127  r = modulefinder.ModuleFinder.load_module(self, fqname, fp, pathname, (suffix, mode, type))
128  if r is not None:
129  self._types[r.__name__] = type
130  return r
131 
132  def scan_opcodes_25(self, co, unpack = struct.unpack):
133  """
134  This is basically just the default opcode scanner from ModuleFinder, but extended to also
135  look for "process.load(<module>)' commands. Since the Process object might not necassarily
136  be called "process", it scans for a call to a "load" method with a single parameter on
137  *any* object. If one is found it checks if the parameter is a string that refers to a valid
138  python module in the local or global area. If it does, the scanner assumes this was a call
139  to a Process object and yields the module name.
140  It's not possible to scan first for Process object declarations to get the name of the
141  objects since often (e.g. for customisation functions) the object is passed to a function
142  in a different file.
143 
144  The ModuleFinder.scan_opcodes_25 implementation this is based was taken from
145  https://hg.python.org/cpython/file/2.7/Lib/modulefinder.py#l364
146  """
147  # Scan the code, and yield 'interesting' opcode combinations
148  # Python 2.5 version (has absolute and relative imports)
149  code = co.co_code
150  names = co.co_names
151  consts = co.co_consts
152  LOAD_CONST = modulefinder.LOAD_CONST
153  IMPORT_NAME = modulefinder.IMPORT_NAME
154  STORE_OPS = modulefinder.STORE_OPS
155  HAVE_ARGUMENT = modulefinder.HAVE_ARGUMENT
156  LOAD_ATTR = chr(dis.opname.index('LOAD_ATTR'))
157  LOAD_NAME = chr(dis.opname.index('LOAD_NAME'))
158  CALL_FUNCTION = chr(dis.opname.index('CALL_FUNCTION'))
159  LOAD_LOAD_AND_IMPORT = LOAD_CONST + LOAD_CONST + IMPORT_NAME
160 
161  try :
162  indexOfLoadConst = names.index("load") # This might throw a ValueError
163  # These are the opcodes required to access the "load" attribute. This might
164  # not even be a function, but I check for that later.
165  loadMethodOpcodes = LOAD_ATTR+struct.pack('<H',indexOfLoadConst)
166  except ValueError :
167  # doesn't look like "load" is used anywhere in this file
168  loadMethodOpcodes=None
169 
170  while code:
171  c = code[0]
172 
173  # Check to see if this is a call to a "load" method
174  if loadMethodOpcodes!=None and len(code)>=9 : # Need at least 9 codes for the full call
175  if code[:3]==loadMethodOpcodes :
176  # The attribute "load" is being accessed, need to make sure this is a function call.
177  # I'll look ahead and see if the CALL_FUNCTION code is used - this could be in a different
178  # place depending on the number of arguments, but I'm only interested in methods with a
179  # single argument so I know exactly where CALL_FUNCTION should be.
180  if code[6]==CALL_FUNCTION :
181  # I know this is calling a method called "load" with one argument. I need
182  # to find out what the argument is. Note that I still don't know if this is
183  # on a cms.Process object.
184  indexInTable=unpack('<H',code[4:6])[0]
185  if code[3]==LOAD_CONST :
186  # The argument is a constant, so retrieve that from the table
187  loadMethodArgument=consts[indexInTable]
188  # I know a load method with one argument has been called on *something*, but I don't
189  # know if it was a cms.Process object. All I can do is check to see if the argument is
190  # a string, and if so if it refers to a python file in the user or global areas.
191  try :
192  loadMethodArgument = loadMethodArgument.replace("/",".")
193  # I can only use imp.find_module on submodules (i.e. each bit between a "."), so try
194  # that on each submodule in turn using the previously found filename. Note that I have
195  # to try this twice, because if the first pass traverses into a package in the local
196  # area but the subpackage has not been checked out it will report that the subpackage
197  # doesn't exist, even though it is available in the global area.
198  try :
199  parentFilename=[self._localarea+"/python"]
200  for subModule in loadMethodArgument.split(".") :
201  moduleInfo=imp.find_module( subModule, parentFilename )
202  parentFilename=[moduleInfo[1]]
203  # If control got this far without raising an exception, then it must be a valid python module
204  yield "import", (None, loadMethodArgument)
205  except ImportError :
206  # Didn't work in the local area, try in the global area.
207  parentFilename=[self._globalarea+"/python"]
208  for subModule in loadMethodArgument.split(".") :
209  moduleInfo=imp.find_module( subModule, parentFilename )
210  parentFilename=[moduleInfo[1]]
211  # If control got this far without raising an exception, then it must be a valid python module
212  yield "import", (None, loadMethodArgument)
213  except Exception as error:
214  # Either there was an import error (not a python module) or there was a string
215  # manipulaton error (argument not a string). Assume this wasn't a call on a
216  # cms.Process object and move on silently.
217  pass
218 
219  elif code[3]==LOAD_NAME :
220  # The argument is a variable. I can get the name of the variable quite easily but
221  # not the value, unless I execute all of the opcodes. Not sure what to do here,
222  # guess I'll just print a warning so that the user knows?
223  print("Unable to determine the value of variable '"+names[indexInTable]+"' to see if it is a proces.load(...) statement in file "+co.co_filename)
224 
225  code=code[9:]
226  continue
227 
228  if c in STORE_OPS:
229  oparg, = unpack('<H', code[1:3])
230  yield "store", (names[oparg],)
231  code = code[3:]
232  continue
233  if code[:9:3] == LOAD_LOAD_AND_IMPORT:
234  oparg_1, oparg_2, oparg_3 = unpack('<xHxHxH', code[:9])
235  level = consts[oparg_1]
236  if level == -1: # normal import
237  yield "import", (consts[oparg_2], names[oparg_3])
238  elif level == 0: # absolute import
239  yield "absolute_import", (consts[oparg_2], names[oparg_3])
240  else: # relative import
241  yield "relative_import", (level, consts[oparg_2], names[oparg_3])
242  code = code[9:]
243  continue
244  if c >= HAVE_ARGUMENT:
245  code = code[3:]
246  else:
247  code = code[1:]
248 
249 def removeRecursiveLoops( node, verbose=False, currentStack=None ) :
250  if currentStack is None : currentStack=[]
251  try :
252  duplicateIndex=currentStack.index( node ) # If there isn't a recursive loop this will raise a ValueError
253  if verbose :
254  print("Removing recursive loop in:")
255  for index in range(duplicateIndex,len(currentStack)) :
256  print(" ",currentStack[index].name,"-->")
257  print(" ",node.name)
258  currentStack[-1].dependencies.remove(node)
259  except ValueError:
260  # No recursive loop found, so continue traversing the tree
261  currentStack.append( node )
262  for subnode in node.dependencies :
263  removeRecursiveLoops( subnode, verbose, currentStack[:] )
264 
265 def transformIntoGraph(depgraph,toplevel):
266  packageDict = {}
267  # create the top level config
268  packageDict[toplevel] = Package(toplevel, top = True)
269 
270  # create package objects
271  for key, value in depgraph.items():
272  if key.count(".") == 2 and key != toplevel:
273  packageDict[key] = Package(key)
274  for name in value.keys():
275  if name.count(".") == 2: packageDict[name] = Package(name)
276  # now create dependencies
277  for key, value in depgraph.items():
278  if key.count(".") == 2 or key == toplevel:
279  package = packageDict[key]
280  package.dependencies = [packageDict[name] for name in value.keys() if name.count(".") == 2]
281 
282  removeRecursiveLoops( packageDict[toplevel] )
283  # find and return the top level config
284  return packageDict[toplevel]
285 
286 
287 def getDependenciesFromPythonFile(filename,toplevelname,path):
288  modulefinder = mymf(path)
289  modulefinder.run_script(filename)
290  globalDependencyDict = modulefinder._depgraph
291  globalDependencyDict[toplevelname] = globalDependencyDict["__main__"]
292  return globalDependencyDict
293 
294 
295 def getImportTree(filename,path):
296  toplevelname = packageNameFromFilename(filename)
297  # get dependencies from given file
298  globalDependencyDict = getDependenciesFromPythonFile(filename,toplevelname,path)
299 
300  # transform this flat structure in a dependency tree
301  dependencyGraph = transformIntoGraph(globalDependencyDict,toplevelname)
302  return dependencyGraph
def import_hook(self, name, caller=None, fromlist=None, level=-1)
Definition: TreeCrawler.py:102
def replace(string, replacements)
def __init__(self, name, top=False)
Definition: TreeCrawler.py:53
def getDependenciesFromPythonFile(filename, toplevelname, path)
Definition: TreeCrawler.py:287
def __init__(self, args, kwargs)
Definition: TreeCrawler.py:94
std::pair< unsigned int, unsigned int > unpack(cond::Time_t since)
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47
def import_module(self, partnam, fqname, parent)
Definition: TreeCrawler.py:110
def getImportTree(filename, path)
Definition: TreeCrawler.py:295
def packageNameFromFilename(name)
Definition: TreeCrawler.py:31
static std::string join(char **cmd)
Definition: RemoteFile.cc:19
def removeRecursiveLoops(node, verbose=False, currentStack=None)
Definition: TreeCrawler.py:249
def search(self, pattern, result)
Definition: TreeCrawler.py:69
def transformIntoGraph(depgraph, toplevel)
Definition: TreeCrawler.py:265
def dump(self, level)
Definition: TreeCrawler.py:62
def scan_opcodes_25(self, co, unpack=struct.unpack)
Definition: TreeCrawler.py:132
def load_module(self, fqname, fp, pathname, aux_info)
Definition: TreeCrawler.py:125