CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
XML2Python.py
Go to the documentation of this file.
1 ## Original version of code heavily based on recipe written by Wai Yip
2 ## Tung, released under PSF license.
3 ## http://code.activestate.com/recipes/534109/
4 
5 import re
6 import os
7 import xml.sax.handler
8 import pprint
9 
10 class DataNode (object):
11 
12  spaces = 4
13 
14  def __init__ (self, **kwargs):
15  self._attrs = {} # XML attributes and child elements
16  self._data = None # child text data
17  self._ncDict = kwargs.get ('nameChangeDict', {})
18 
19 
20  def __len__ (self):
21  # treat single element as a list of 1
22  return 1
23 
24 
25  def __getitem__ (self, key):
26  if isinstance (key, basestring):
27  return self._attrs.get(key,None)
28  else:
29  return [self][key]
30 
31 
32  def __contains__ (self, name):
33  return self._attrs.has_key(name)
34 
35 
36  def __nonzero__ (self):
37  return bool (self._attrs or self._data)
38 
39 
40  def __getattr__ (self, name):
41  if name.startswith('__'):
42  # need to do this for Python special methods???
43  raise AttributeError (name)
44  return self._attrs.get (name, None)
45 
46 
47  def _add_xml_attr (self, name, value):
48  change = self._ncDict.get (name)
49  if change:
50  name = change
51  if name in self._attrs:
52  # multiple attribute of the same name are represented by a list
53  children = self._attrs[name]
54  if not isinstance(children, list):
55  children = [children]
56  self._attrs[name] = children
57  children.append(value)
58  else:
59  self._attrs[name] = value
60 
61 
62  def __str__ (self):
63  return self.stringify()
64 
65 
66  def __repr__ (self):
67  items = sorted (self._attrs.items())
68  if self._data:
69  items.append(('data', self._data))
70  return u'{%s}' % ', '.join([u'%s:%s' % (k,repr(v)) for k,v in items])
71 
72 
73  def attributes (self):
74  return self._attrs
75 
76 
77  @staticmethod
78  def isiterable (obj):
79  return getattr (obj, '__iter__', False)
80 
81 
82  @staticmethod
83  def _outputValues (obj, name, offset):
84  retval = ' ' * offset
85  if name:
86  retval += '%s: ' % name
87  offset += len (name) + DataNode.spaces
88  # if this is a list
89  if isinstance (obj, list):
90  first = True
91  for value in obj:
92  print "value", value, value.__class__.__name__
93  if first:
94  tempoffset = offset
95  first = False
96  retval += '[\n ' + ' ' * offset
97  else:
98  retval += ',\n ' + ' ' * offset
99  tempoffset = offset
100  if isinstance (value, DataNode):
101  retval += value.stringify (offset=tempoffset)
102  print " calling stringify for %s" % value
103  elif DataNode.isiterable (value):
104  retval += DataNode._outputValues (value, '', offset)
105  else:
106  retval += "%s" % value
107  retval += '\n' + ' ' * (offset - 2) +']\n'
108  return retval
109  retval += pprint.pformat(obj,
110  indent= offset,
111  width=1)
112  return retval
113 
114 
115  def stringify (self, name = '', offset = 0):
116  # is this just data and nothing below
117  if self._data and not len (self._attrs):
118  return _outputValues (self._data, name, offset)
119  retval = ' ' * offset
120  if name:
121  retval += '%s : %s\n' % \
122  (name,
123  pprint.pformat (self._data,
124  indent= offset+DataNode.spaces,
125  width=1) )
126  else:
127  retval += pprint.pformat (self._data,
128  indent=offset+DataNode.spaces,
129  width=1)
130  return retval
131  # this has attributes
132  retval = ''
133  if name:
134  retval += '\n' + ' ' * offset
135  retval += '%s: ' % name
136  first = True
137  for key, value in sorted (self._attrs.iteritems()):
138  if first:
139  retval += '{ \n'
140  tempspace = offset + 3
141  first = False
142  else:
143  retval += ',\n'
144  tempspace = offset + 3
145  if isinstance (value, DataNode):
146  retval += value.stringify (key, tempspace)
147  else:
148  retval += DataNode._outputValues (value, key, tempspace)
149  # this has data too
150  if self._data:
151  retval += ',\n'
152  tempspace = offset + 3
153  retval += DataNode._ouptputValues (self._data, name, tempspace)
154  retval += '\n ' + ' ' * offset + '}'
155  return retval
156 
157 
158 
159 class TreeBuilder (xml.sax.handler.ContentHandler):
160 
161  non_id_char = re.compile('[^_0-9a-zA-Z]')
162 
163  def __init__ (self, **kwargs):
164  self._stack = []
165  self._text_parts = []
166  self._ncDict = kwargs.get ('nameChangeDict', {})
167  self._root = DataNode (nameChangeDict = self._ncDict)
168  self.current = self._root
169 
170  def startElement (self, name, attrs):
171  self._stack.append( (self.current, self._text_parts))
172  self.current = DataNode (nameChangeDict = self._ncDict)
173  self._text_parts = []
174  # xml attributes --> python attributes
175  for k, v in attrs.items():
176  self.current._add_xml_attr (TreeBuilder._name_mangle(k), v)
177 
178  def endElement (self, name):
179  text = ''.join (self._text_parts).strip()
180  if text:
181  self.current._data = text
182  if self.current.attributes():
183  obj = self.current
184  else:
185  # a text only node is simply represented by the string
186  obj = text or ''
187  self.current, self._text_parts = self._stack.pop()
188  self.current._add_xml_attr (TreeBuilder._name_mangle(name), obj)
189 
190  def characters (self, content):
191  self._text_parts.append(content)
192 
193  def root (self):
194  return self._root
195 
196  def topLevel (self):
197  '''Returns top level object'''
198  return self._root.attributes().values()[0]
199 
200 
201  @staticmethod
202  def _name_mangle (name):
203  return TreeBuilder.non_id_char.sub('_', name)
204 
205 
206 regexList = [ (re.compile (r'&'), '&' ),
207  (re.compile (r'<'), '&lt;' ),
208  (re.compile (r'>'), '&gt;' ),
209  (re.compile (r'"'), '&quote;' ),
210  (re.compile (r"'"), '&#39;' )
211  ]
212 
213 quoteRE = re.compile (r'(\w\s*=\s*")([^"]+)"')
214 
215 def fixQuoteValue (match):
216  '''Changes all characters inside of the match'''
217  quote = match.group(2)
218  for regexTup in regexList:
219  quote = regexTup[0].sub( regexTup[1], quote )
220  return match.group(1) + quote + '"'
221 
222 
223 def xml2obj (**kwargs):
224  ''' Converts XML data into native Python object. Takes either
225  file handle or string as input. Does NOT fix illegal characters.
226 
227  input source: Exactly one of the three following is needed
228  filehandle - input from file handle
229  contents - input from string
230  filename - input from filename
231 
232  options:
233  filtering - boolean value telling code whether or not to fileter
234  input selection to remove illegal XML characters
235  nameChangeDict - dictionaries of names to change in python object'''
236 
237  # make sure we have exactly 1 input source
238  filehandle = kwargs.get ('filehandle')
239  contents = kwargs.get ('contents')
240  filename = kwargs.get ('filename')
241  if not filehandle and not contents and not filename:
242  raise RuntimeError, "You must provide 'filehandle', 'contents', or 'filename'"
243  if filehandle and contents or \
244  filehandle and filename or \
245  contents and filename:
246  raise RuntimeError, "You must provide only ONE of 'filehandle', 'contents', or 'filename'"
247 
248  # are we filtering?
249  filtering = kwargs.get ('filtering')
250  if filtering:
251  # if we are filtering, we need to read in the contents to modify them
252  if not contents:
253  if not filehandle:
254  try:
255  filehandle = open (filename, 'r')
256  except:
257  raise RuntimeError, "Failed to open '%s'" % filename
258  contents = ''
259  for line in filehandle:
260  contents += line
261  filehandle.close()
262  filehandle = filename = ''
263  contents = quoteRE.sub (fixQuoteValue, contents)
264 
265  ncDict = kwargs.get ('nameChangeDict', {})
266  builder = TreeBuilder (nameChangeDict = ncDict)
267  if contents:
268  xml.sax.parseString(contents, builder)
269  else:
270  if not filehandle:
271  try:
272  filehandle = open (filename, 'r')
273  except:
274  raise RuntimeError, "Failed to open '%s'" % filename
275  xml.sax.parse(filehandle, builder)
276  return builder.topLevel()
void strip(std::string &input, const std::string &blanks=" \n\t")
Definition: stringTools.cc:16
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
list object
Definition: dbtoconf.py:77