CMS 3D CMS Logo

/data/doxygen/doxygen-1.7.3/gen/CMSSW_4_2_8/src/Utilities/ReleaseScripts/scripts/XML2Python.py

Go to the documentation of this file.
00001 ## Original version of code heavily based on recipe written by Wai Yip
00002 ## Tung, released under PSF license.
00003 ## https://code.activestate.com/recipes/534109/
00004 
00005 import re
00006 import os
00007 import xml.sax.handler
00008 
00009 class DataNode (object):
00010 
00011     def __init__ (self, **kwargs):
00012         self._attrs = {}     # XML attributes and child elements
00013         self._data  = None   # child text data
00014         self._ncDict = kwargs.get ('nameChangeDict', {})
00015 
00016     def __len__ (self):
00017         # treat single element as a list of 1
00018         return 1
00019 
00020     def __getitem__ (self, key):
00021         if isinstance (key, basestring):
00022             return self._attrs.get(key,None)
00023         else:
00024             return [self][key]
00025 
00026     def __contains__ (self, name):
00027         return self._attrs.has_key(name)
00028 
00029     def __nonzero__ (self):
00030         return bool (self._attrs or self._data)
00031 
00032     def __getattr__ (self, name):
00033         if name.startswith('__'):
00034             # need to do this for Python special methods???
00035             raise AttributeError (name)
00036         return self._attrs.get (name, None)
00037 
00038     def _add_xml_attr (self, name, value):
00039         change = self._ncDict.get (name)
00040         if change:
00041             name = change
00042         if name in self._attrs:
00043             # multiple attribute of the same name are represented by a list
00044             children = self._attrs[name]
00045             if not isinstance(children, list):
00046                 children = [children]
00047                 self._attrs[name] = children
00048             children.append(value)
00049         else:
00050             self._attrs[name] = value
00051 
00052     def __str__ (self):
00053         return self._data or ''
00054 
00055     def __repr__ (self):
00056         items = sorted (self._attrs.items())
00057         if self._data:
00058             items.append(('data', self._data))
00059         return u'{%s}' % ', '.join([u'%s:%s' % (k,repr(v)) for k,v in items])
00060 
00061     def attributes (self):
00062         return self._attrs
00063 
00064 
00065 class TreeBuilder (xml.sax.handler.ContentHandler):
00066 
00067     non_id_char = re.compile('[^_0-9a-zA-Z]')
00068 
00069     def __init__ (self, **kwargs):
00070         self._stack = []
00071         self._text_parts = []
00072         self._ncDict = kwargs.get ('nameChangeDict', {})
00073         self._root = DataNode (nameChangeDict = self._ncDict)
00074         self.current = self._root
00075 
00076     def startElement (self, name, attrs):
00077         self._stack.append( (self.current, self._text_parts))
00078         self.current = DataNode (nameChangeDict = self._ncDict)
00079         self._text_parts = []
00080         # xml attributes --> python attributes
00081         for k, v in attrs.items():
00082             self.current._add_xml_attr (TreeBuilder._name_mangle(k), v)
00083 
00084     def endElement (self, name):
00085         text = ''.join (self._text_parts).strip()
00086         if text:
00087             self.current._data = text
00088         if self.current.attributes():
00089             obj = self.current
00090         else:
00091             # a text only node is simply represented by the string
00092             obj = text or ''
00093         self.current, self._text_parts = self._stack.pop()
00094         self.current._add_xml_attr (TreeBuilder._name_mangle(name), obj)
00095 
00096     def characters (self, content):
00097         self._text_parts.append(content)
00098 
00099     def root (self):
00100         return self._root
00101 
00102     def topLevel (self):
00103         '''Returns top level object'''
00104         return self._root.attributes().values()[0]
00105         
00106 
00107     @staticmethod
00108     def _name_mangle (name):
00109         return TreeBuilder.non_id_char.sub('_', name)
00110 
00111 
00112 regexList = [ (re.compile (r'&'), '&'   ),
00113               (re.compile (r'<'), '&lt;'    ),
00114               (re.compile (r'>'), '&gt;'    ),
00115               (re.compile (r'"'), '&quote;' ),
00116               (re.compile (r"'"), '&#39;'   )
00117               ]
00118 
00119 quoteRE = re.compile (r'(\w\s*=\s*")([^"]+)"')
00120 
00121 def fixQuoteValue (match):
00122     '''Changes all characters inside of the match'''
00123     quote = match.group(2)
00124     for regexTup in regexList:
00125         quote = regexTup[0].sub( regexTup[1], quote )
00126     return match.group(1) + quote + '"'
00127 
00128 
00129 def xml2obj (**kwargs):
00130     ''' Converts XML data into native Python object.  Takes either
00131     file handle or string as input.  Does NOT fix illegal characters.
00132 
00133     input source:  Exactly one of the three following is needed
00134     filehandle     - input from file handle
00135     contents       - input from string
00136     filename       - input from filename
00137 
00138     options:
00139     filtering      - boolean value telling code whether or not to fileter
00140                      input selection to remove illegal XML characters
00141     nameChangeDict - dictionaries of names to change in python object'''
00142 
00143     # make sure we have exactly 1 input source
00144     filehandle = kwargs.get ('filehandle')
00145     contents   = kwargs.get ('contents')
00146     filename   = kwargs.get ('filename')
00147     if not filehandle and not contents and not filename:
00148         raise RuntimeError, "You must provide 'filehandle', 'contents', or 'filename'"
00149     if     filehandle and contents or \
00150            filehandle and filename or \
00151            contents   and filename:
00152         raise RuntimeError, "You must provide only ONE of 'filehandle', 'contents', or 'filename'"
00153 
00154     # are we filtering?
00155     filtering = kwargs.get ('filtering')
00156     if filtering:
00157         # if we are filtering, we need to read in the contents to modify them
00158         if not contents:
00159             if not filehandle:
00160                 try:
00161                     filehandle = open (filename, 'r')
00162                 except:
00163                     raise RuntimeError, "Failed to open '%s'" % filename
00164             contents = ''
00165             for line in filehandle:
00166                 contents += line
00167             filehandle.close()
00168             filehandle = filename = ''
00169         contents = quoteRE.sub (fixQuoteValue, contents)
00170     
00171     ncDict = kwargs.get ('nameChangeDict', {})
00172     builder = TreeBuilder (nameChangeDict = ncDict)
00173     if contents:
00174         xml.sax.parseString(contents, builder)
00175     else:
00176         if not filehandle:
00177             try:
00178                 filehandle = open (filename, 'r')
00179             except:
00180                 raise RuntimeError, "Failed to open '%s'" % filename
00181         xml.sax.parse(filehandle, builder)
00182     return builder.topLevel()