CMS 3D CMS Logo

TreeCrawler.py
Go to the documentation of this file.
1 from __future__ import print_function
2 # This CMS code is based on previous work done by Toby Dickenson, as indiciated below
3 #
4 # for questions: Benedikt.Hegner@cern.ch
5 
6 # Copyright 2004 Toby Dickenson
7 #
8 # Permission is hereby granted, free of charge, to any person obtaining
9 # a copy of this software and associated documentation files (the
10 # "Software"), to deal in the Software without restriction, including
11 # without limitation the rights to use, copy, modify, merge, publish,
12 # distribute, sublicense, and/or sell copies of the Software, and to
13 # permit persons to whom the Software is furnished to do so, subject
14 # to the following conditions:
15 #
16 # The above copyright notice and this permission notice shall be included
17 # in all copies or substantial portions of the Software.
18 #
19 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
23 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 
27 from builtins import range
28 import sys, os, inspect, copy, struct, dis, imp
29 import modulefinder
30 import six
31 
33  return ".".join(name.replace("python/","").replace(".py","").split("/")[-3:])
34 
35 
36 class Color:
37  """ANSI escape display sequences"""
38  info = "\033[1;34m"
39  hilight = "\033[31m"
40  alternate = "\033[32m"
41  extra = "\033[33m"
42  backlight = "\033[43m"
43  underline = "\033[4m"
44  lessemphasis = "\033[30m"
45  deemphasis = "\033[1;30m"
46  none = "\033[0m"
47 
48 _stack = []
49 
50 class SearchHit:
51  pass
52 
53 class Package(object):
54  def __init__(self,name,top=False):
55  self.name = name
56  self.dependencies = []
57  self.searched = False
58  self.stack = []
59  if top:
60  self.module = None
61  else:
62  self.module = __import__(name,[],[],"*")
63  def dump(self,level):
64  indent = " " * level
65  print(indent, "+", Color.info, self.name, Color.none)
66  # sort dependencies alphabetically
67  self.dependencies.sort(key = lambda x: x.name)
68  for package in self.dependencies:
69  package.dump(level+1)
70  def search(self,pattern,result):
71  """ recursive search for pattern in source files"""
72  # first start searching in the package itself / do this only once
73  if self.module:
74  for number, line in enumerate(inspect.getsource(self.module).splitlines()):
75  if pattern in line:
76  filename = packageNameFromFilename(inspect.getsourcefile(self.module))
77  if not self.searched:
78  # save the hit, so we can add later stacks to it
79  self.hit = SearchHit()
80  self.hit.number = number
81  self.hit.filename = filename
82  self.hit.line = line
83  self.hit.stacks = list()
84  result.append(self.hit)
85  self.hit.stacks.append(copy.copy(_stack))
86  # then go on with dependencies
87  _stack.append(self.name)
88  for package in self.dependencies:
89  package.search(pattern,result)
90  _stack.pop()
91  self.searched = True
92 
93 
94 class mymf(modulefinder.ModuleFinder):
95  def __init__(self,*args,**kwargs):
96  self._depgraph = {}
97  self._types = {}
98  self._last_caller = None
99  #TODO - replace by environment variables CMSSW_BASE and CMSSW_RELEASE_BASE (*and* do it only if the global one is not empty like for IB areas)
100  self._localarea = os.path.expandvars('$CMSSW_BASE')
101  self._globalarea = os.path.expandvars('$CMSSW_RELEASE_BASE')
102  modulefinder.ModuleFinder.__init__(self,*args,**kwargs)
103  def import_hook(self, name, caller=None, fromlist=None, level=-1):
104  old_last_caller = self._last_caller
105  try:
106  self._last_caller = caller
107  return modulefinder.ModuleFinder.import_hook(self,name,caller,fromlist, level=level)
108  finally:
109  self._last_caller = old_last_caller
110 
111  def import_module(self,partnam,fqname,parent):
112 
113  if partnam in ("os","unittest"):
114  r = None
115  else:
116  r = modulefinder.ModuleFinder.import_module(self,partnam,fqname,parent)
117  # since the modulefinder is not able to look into the global area when coming from the local area, we force a second try
118  if parent and not r and self._localarea != '' and self._globalarea != '':
119  parent.__file__ = parent.__file__.replace(self._localarea,self._globalarea)
120  parent.__path__[0] = parent.__path__[0].replace(self._localarea,self._globalarea)
121  r = modulefinder.ModuleFinder.import_module(self,partnam,fqname,parent)
122 
123  if r is not None:
124  self._depgraph.setdefault(self._last_caller.__name__,{})[r.__name__] = 1
125  return r
126  def load_module(self, fqname, fp, pathname, aux_info):
127  (suffix, mode, type) = aux_info
128  r = modulefinder.ModuleFinder.load_module(self, fqname, fp, pathname, (suffix, mode, type))
129  if r is not None:
130  self._types[r.__name__] = type
131  return r
132 
133  def scan_opcodes_25(self, co, unpack = struct.unpack):
134  """
135  This is basically just the default opcode scanner from ModuleFinder, but extended to also
136  look for "process.load(<module>)' commands. Since the Process object might not necassarily
137  be called "process", it scans for a call to a "load" method with a single parameter on
138  *any* object. If one is found it checks if the parameter is a string that refers to a valid
139  python module in the local or global area. If it does, the scanner assumes this was a call
140  to a Process object and yields the module name.
141  It's not possible to scan first for Process object declarations to get the name of the
142  objects since often (e.g. for customisation functions) the object is passed to a function
143  in a different file.
144 
145  The ModuleFinder.scan_opcodes_25 implementation this is based was taken from
146  https://hg.python.org/cpython/file/2.7/Lib/modulefinder.py#l364
147  """
148  # Scan the code, and yield 'interesting' opcode combinations
149  # Python 2.5 version (has absolute and relative imports)
150  code = co.co_code
151  names = co.co_names
152  consts = co.co_consts
153  LOAD_CONST = modulefinder.LOAD_CONST
154  IMPORT_NAME = modulefinder.IMPORT_NAME
155  STORE_OPS = modulefinder.STORE_OPS
156  HAVE_ARGUMENT = modulefinder.HAVE_ARGUMENT
157  LOAD_ATTR = chr(dis.opname.index('LOAD_ATTR'))
158  LOAD_NAME = chr(dis.opname.index('LOAD_NAME'))
159  CALL_FUNCTION = chr(dis.opname.index('CALL_FUNCTION'))
160  LOAD_LOAD_AND_IMPORT = LOAD_CONST + LOAD_CONST + IMPORT_NAME
161 
162  try :
163  indexOfLoadConst = names.index("load") # This might throw a ValueError
164  # These are the opcodes required to access the "load" attribute. This might
165  # not even be a function, but I check for that later.
166  loadMethodOpcodes = LOAD_ATTR+struct.pack('<H',indexOfLoadConst)
167  except ValueError :
168  # doesn't look like "load" is used anywhere in this file
169  loadMethodOpcodes=None
170 
171  while code:
172  c = code[0]
173 
174  # Check to see if this is a call to a "load" method
175  if loadMethodOpcodes!=None and len(code)>=9 : # Need at least 9 codes for the full call
176  if code[:3]==loadMethodOpcodes :
177  # The attribute "load" is being accessed, need to make sure this is a function call.
178  # I'll look ahead and see if the CALL_FUNCTION code is used - this could be in a different
179  # place depending on the number of arguments, but I'm only interested in methods with a
180  # single argument so I know exactly where CALL_FUNCTION should be.
181  if code[6]==CALL_FUNCTION :
182  # I know this is calling a method called "load" with one argument. I need
183  # to find out what the argument is. Note that I still don't know if this is
184  # on a cms.Process object.
185  indexInTable=unpack('<H',code[4:6])[0]
186  if code[3]==LOAD_CONST :
187  # The argument is a constant, so retrieve that from the table
188  loadMethodArgument=consts[indexInTable]
189  # I know a load method with one argument has been called on *something*, but I don't
190  # know if it was a cms.Process object. All I can do is check to see if the argument is
191  # a string, and if so if it refers to a python file in the user or global areas.
192  try :
193  loadMethodArgument = loadMethodArgument.replace("/",".")
194  # I can only use imp.find_module on submodules (i.e. each bit between a "."), so try
195  # that on each submodule in turn using the previously found filename. Note that I have
196  # to try this twice, because if the first pass traverses into a package in the local
197  # area but the subpackage has not been checked out it will report that the subpackage
198  # doesn't exist, even though it is available in the global area.
199  try :
200  parentFilename=[self._localarea+"/python"]
201  for subModule in loadMethodArgument.split(".") :
202  moduleInfo=imp.find_module( subModule, parentFilename )
203  parentFilename=[moduleInfo[1]]
204  # If control got this far without raising an exception, then it must be a valid python module
205  yield "import", (None, loadMethodArgument)
206  except ImportError :
207  # Didn't work in the local area, try in the global area.
208  parentFilename=[self._globalarea+"/python"]
209  for subModule in loadMethodArgument.split(".") :
210  moduleInfo=imp.find_module( subModule, parentFilename )
211  parentFilename=[moduleInfo[1]]
212  # If control got this far without raising an exception, then it must be a valid python module
213  yield "import", (None, loadMethodArgument)
214  except Exception as error:
215  # Either there was an import error (not a python module) or there was a string
216  # manipulaton error (argument not a string). Assume this wasn't a call on a
217  # cms.Process object and move on silently.
218  pass
219 
220  elif code[3]==LOAD_NAME :
221  # The argument is a variable. I can get the name of the variable quite easily but
222  # not the value, unless I execute all of the opcodes. Not sure what to do here,
223  # guess I'll just print a warning so that the user knows?
224  print("Unable to determine the value of variable '"+names[indexInTable]+"' to see if it is a proces.load(...) statement in file "+co.co_filename)
225 
226  code=code[9:]
227  continue
228 
229  if c in STORE_OPS:
230  oparg, = unpack('<H', code[1:3])
231  yield "store", (names[oparg],)
232  code = code[3:]
233  continue
234  if code[:9:3] == LOAD_LOAD_AND_IMPORT:
235  oparg_1, oparg_2, oparg_3 = unpack('<xHxHxH', code[:9])
236  level = consts[oparg_1]
237  if level == -1: # normal import
238  yield "import", (consts[oparg_2], names[oparg_3])
239  elif level == 0: # absolute import
240  yield "absolute_import", (consts[oparg_2], names[oparg_3])
241  else: # relative import
242  yield "relative_import", (level, consts[oparg_2], names[oparg_3])
243  code = code[9:]
244  continue
245  if c >= HAVE_ARGUMENT:
246  code = code[3:]
247  else:
248  code = code[1:]
249 
250 def removeRecursiveLoops( node, verbose=False, currentStack=None ) :
251  if currentStack is None : currentStack=[]
252  try :
253  duplicateIndex=currentStack.index( node ) # If there isn't a recursive loop this will raise a ValueError
254  if verbose :
255  print("Removing recursive loop in:")
256  for index in range(duplicateIndex,len(currentStack)) :
257  print(" ",currentStack[index].name,"-->")
258  print(" ",node.name)
259  currentStack[-1].dependencies.remove(node)
260  except ValueError:
261  # No recursive loop found, so continue traversing the tree
262  currentStack.append( node )
263  for subnode in node.dependencies :
264  removeRecursiveLoops( subnode, verbose, currentStack[:] )
265 
266 def transformIntoGraph(depgraph,toplevel):
267  packageDict = {}
268  # create the top level config
269  packageDict[toplevel] = Package(toplevel, top = True)
270 
271  # create package objects
272  for key, value in six.iteritems(depgraph):
273  if key.count(".") == 2 and key != toplevel:
274  packageDict[key] = Package(key)
275  for name in value.keys():
276  if name.count(".") == 2: packageDict[name] = Package(name)
277  # now create dependencies
278  for key, value in six.iteritems(depgraph):
279  if key.count(".") == 2 or key == toplevel:
280  package = packageDict[key]
281  package.dependencies = [packageDict[name] for name in value.keys() if name.count(".") == 2]
282 
283  removeRecursiveLoops( packageDict[toplevel] )
284  # find and return the top level config
285  return packageDict[toplevel]
286 
287 
288 def getDependenciesFromPythonFile(filename,toplevelname,path):
289  modulefinder = mymf(path)
290  modulefinder.run_script(filename)
291  globalDependencyDict = modulefinder._depgraph
292  globalDependencyDict[toplevelname] = globalDependencyDict["__main__"]
293  return globalDependencyDict
294 
295 
296 def getImportTree(filename,path):
297  toplevelname = packageNameFromFilename(filename)
298  # get dependencies from given file
299  globalDependencyDict = getDependenciesFromPythonFile(filename,toplevelname,path)
300 
301  # transform this flat structure in a dependency tree
302  dependencyGraph = transformIntoGraph(globalDependencyDict,toplevelname)
303  return dependencyGraph
def import_hook(self, name, caller=None, fromlist=None, level=-1)
Definition: TreeCrawler.py:103
def replace(string, replacements)
def __init__(self, name, top=False)
Definition: TreeCrawler.py:54
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
def getDependenciesFromPythonFile(filename, toplevelname, path)
Definition: TreeCrawler.py:288
def __init__(self, args, kwargs)
Definition: TreeCrawler.py:95
def import_module(self, partnam, fqname, parent)
Definition: TreeCrawler.py:111
def getImportTree(filename, path)
Definition: TreeCrawler.py:296
def packageNameFromFilename(name)
Definition: TreeCrawler.py:32
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def removeRecursiveLoops(node, verbose=False, currentStack=None)
Definition: TreeCrawler.py:250
def search(self, pattern, result)
Definition: TreeCrawler.py:70
def transformIntoGraph(depgraph, toplevel)
Definition: TreeCrawler.py:266
def dump(self, level)
Definition: TreeCrawler.py:63
def scan_opcodes_25(self, co, unpack=struct.unpack)
Definition: TreeCrawler.py:133
def load_module(self, fqname, fp, pathname, aux_info)
Definition: TreeCrawler.py:126
double split
Definition: MVATrainer.cc:139
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run