CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
XML2Python.py
Go to the documentation of this file.
1 ## Original version of code heavily based on recipe written by Wai Yip
2 ## Tung, released under PSF license.
3 ## http://code.activestate.com/recipes/534109/
4 
5 import re
6 import os
7 import xml.sax.handler
8 
9 class DataNode (object):
10 
11  def __init__ (self, **kwargs):
12  self._attrs = {} # XML attributes and child elements
13  self._data = None # child text data
14  self._ncDict = kwargs.get ('nameChangeDict', {})
15 
16  def __len__ (self):
17  # treat single element as a list of 1
18  return 1
19 
20  def __getitem__ (self, key):
21  if isinstance (key, basestring):
22  return self._attrs.get(key,None)
23  else:
24  return [self][key]
25 
26  def __contains__ (self, name):
27  return self._attrs.has_key(name)
28 
29  def __nonzero__ (self):
30  return bool (self._attrs or self._data)
31 
32  def __getattr__ (self, name):
33  if name.startswith('__'):
34  # need to do this for Python special methods???
35  raise AttributeError (name)
36  return self._attrs.get (name, None)
37 
38  def _add_xml_attr (self, name, value):
39  change = self._ncDict.get (name)
40  if change:
41  name = change
42  if name in self._attrs:
43  # multiple attribute of the same name are represented by a list
44  children = self._attrs[name]
45  if not isinstance(children, list):
46  children = [children]
47  self._attrs[name] = children
48  children.append(value)
49  else:
50  self._attrs[name] = value
51 
52  def __str__ (self):
53  return self._data or ''
54 
55  def __repr__ (self):
56  items = sorted (self._attrs.items())
57  if self._data:
58  items.append(('data', self._data))
59  return u'{%s}' % ', '.join([u'%s:%s' % (k,repr(v)) for k,v in items])
60 
61  def attributes (self):
62  return self._attrs
63 
64 
65 class TreeBuilder (xml.sax.handler.ContentHandler):
66 
67  non_id_char = re.compile('[^_0-9a-zA-Z]')
68 
69  def __init__ (self, **kwargs):
70  self._stack = []
71  self._text_parts = []
72  self._ncDict = kwargs.get ('nameChangeDict', {})
73  self._root = DataNode (nameChangeDict = self._ncDict)
74  self.current = self._root
75 
76  def startElement (self, name, attrs):
77  self._stack.append( (self.current, self._text_parts))
78  self.current = DataNode (nameChangeDict = self._ncDict)
79  self._text_parts = []
80  # xml attributes --> python attributes
81  for k, v in attrs.items():
82  self.current._add_xml_attr (TreeBuilder._name_mangle(k), v)
83 
84  def endElement (self, name):
85  text = ''.join (self._text_parts).strip()
86  if text:
87  self.current._data = text
88  if self.current.attributes():
89  obj = self.current
90  else:
91  # a text only node is simply represented by the string
92  obj = text or ''
93  self.current, self._text_parts = self._stack.pop()
94  self.current._add_xml_attr (TreeBuilder._name_mangle(name), obj)
95 
96  def characters (self, content):
97  self._text_parts.append(content)
98 
99  def root (self):
100  return self._root
101 
102  def topLevel (self):
103  '''Returns top level object'''
104  return self._root.attributes().values()[0]
105 
106 
107  @staticmethod
108  def _name_mangle (name):
109  return TreeBuilder.non_id_char.sub('_', name)
110 
111 
112 regexList = [ (re.compile (r'&'), '&' ),
113  (re.compile (r'<'), '&lt;' ),
114  (re.compile (r'>'), '&gt;' ),
115  (re.compile (r'"'), '&quote;' ),
116  (re.compile (r"'"), '&#39;' )
117  ]
118 
119 quoteRE = re.compile (r'(\w\s*=\s*")([^"]+)"')
120 
121 def fixQuoteValue (match):
122  '''Changes all characters inside of the match'''
123  quote = match.group(2)
124  for regexTup in regexList:
125  quote = regexTup[0].sub( regexTup[1], quote )
126  return match.group(1) + quote + '"'
127 
128 
129 def xml2obj (**kwargs):
130  ''' Converts XML data into native Python object. Takes either
131  file handle or string as input. Does NOT fix illegal characters.
132 
133  input source: Exactly one of the three following is needed
134  filehandle - input from file handle
135  contents - input from string
136  filename - input from filename
137 
138  options:
139  filtering - boolean value telling code whether or not to fileter
140  input selection to remove illegal XML characters
141  nameChangeDict - dictionaries of names to change in python object'''
142 
143  # make sure we have exactly 1 input source
144  filehandle = kwargs.get ('filehandle')
145  contents = kwargs.get ('contents')
146  filename = kwargs.get ('filename')
147  if not filehandle and not contents and not filename:
148  raise RuntimeError, "You must provide 'filehandle', 'contents', or 'filename'"
149  if filehandle and contents or \
150  filehandle and filename or \
151  contents and filename:
152  raise RuntimeError, "You must provide only ONE of 'filehandle', 'contents', or 'filename'"
153 
154  # are we filtering?
155  filtering = kwargs.get ('filtering')
156  if filtering:
157  # if we are filtering, we need to read in the contents to modify them
158  if not contents:
159  if not filehandle:
160  try:
161  filehandle = open (filename, 'r')
162  except:
163  raise RuntimeError, "Failed to open '%s'" % filename
164  contents = ''
165  for line in filehandle:
166  contents += line
167  filehandle.close()
168  filehandle = filename = ''
169  contents = quoteRE.sub (fixQuoteValue, contents)
170 
171  ncDict = kwargs.get ('nameChangeDict', {})
172  builder = TreeBuilder (nameChangeDict = ncDict)
173  if contents:
174  xml.sax.parseString(contents, builder)
175  else:
176  if not filehandle:
177  try:
178  filehandle = open (filename, 'r')
179  except:
180  raise RuntimeError, "Failed to open '%s'" % filename
181  xml.sax.parse(filehandle, builder)
182  return builder.topLevel()
void strip(std::string &input, const std::string &blanks=" \n\t")
Definition: stringTools.cc:16
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
list object
Definition: dbtoconf.py:77