CMS 3D CMS Logo

XML2Python.py
Go to the documentation of this file.
1 from __future__ import print_function
2 ## Original version of code heavily based on recipe written by Wai Yip
3 ## Tung, released under PSF license.
4 ## http://code.activestate.com/recipes/534109/
5 
6 import re
7 import os
8 import xml.sax.handler
9 import pprint
10 import six
11 
12 class DataNode (object):
13 
14  spaces = 4
15 
16  def __init__ (self, **kwargs):
17  self._attrs = {} # XML attributes and child elements
18  self._data = None # child text data
19  self._ncDict = kwargs.get ('nameChangeDict', {})
20 
21 
22  def __len__ (self):
23  # treat single element as a list of 1
24  return 1
25 
26 
27  def __getitem__ (self, key):
28  if isinstance (key, str):
29  return self._attrs.get(key,None)
30  else:
31  return [self][key]
32 
33 
34  def __contains__ (self, name):
35  return name in self._attrs
36 
37 
38  def __nonzero__ (self):
39  return bool (self._attrs or self._data)
40 
41 
42  def __getattr__ (self, name):
43  if name.startswith('__'):
44  # need to do this for Python special methods???
45  raise AttributeError (name)
46  return self._attrs.get (name, None)
47 
48 
49  def _add_xml_attr (self, name, value):
50  change = self._ncDict.get (name)
51  if change:
52  name = change
53  if name in self._attrs:
54  # multiple attribute of the same name are represented by a list
55  children = self._attrs[name]
56  if not isinstance(children, list):
57  children = [children]
58  self._attrs[name] = children
59  children.append(value)
60  else:
61  self._attrs[name] = value
62 
63 
64  def __str__ (self):
65  return self.stringify()
66 
67 
68  def __repr__ (self):
69  items = sorted (self._attrs.items())
70  if self._data:
71  items.append(('data', self._data))
72  return u'{%s}' % ', '.join([u'%s:%s' % (k,repr(v)) for k,v in items])
73 
74 
75  def attributes (self):
76  return self._attrs
77 
78 
79  @staticmethod
80  def isiterable (obj):
81  return getattr (obj, '__iter__', False)
82 
83 
84  @staticmethod
85  def _outputValues (obj, name, offset):
86  retval = ' ' * offset
87  if name:
88  retval += '%s: ' % name
89  offset += len (name) + DataNode.spaces
90  # if this is a list
91  if isinstance (obj, list):
92  first = True
93  for value in obj:
94  print("value", value, value.__class__.__name__)
95  if first:
96  tempoffset = offset
97  first = False
98  retval += '[\n ' + ' ' * offset
99  else:
100  retval += ',\n ' + ' ' * offset
101  tempoffset = offset
102  if isinstance (value, DataNode):
103  retval += value.stringify (offset=tempoffset)
104  print(" calling stringify for %s" % value)
105  elif DataNode.isiterable (value):
106  retval += DataNode._outputValues (value, '', offset)
107  else:
108  retval += "%s" % value
109  retval += '\n' + ' ' * (offset - 2) +']\n'
110  return retval
111  retval += pprint.pformat(obj,
112  indent= offset,
113  width=1)
114  return retval
115 
116 
117  def stringify (self, name = '', offset = 0):
118  # is this just data and nothing below
119  if self._data and not len (self._attrs):
120  return _outputValues (self._data, name, offset)
121  retval = ' ' * offset
122  if name:
123  retval += '%s : %s\n' % \
124  (name,
125  pprint.pformat (self._data,
126  indent= offset+DataNode.spaces,
127  width=1) )
128  else:
129  retval += pprint.pformat (self._data,
130  indent=offset+DataNode.spaces,
131  width=1)
132  return retval
133  # this has attributes
134  retval = ''
135  if name:
136  retval += '\n' + ' ' * offset
137  retval += '%s: ' % name
138  first = True
139  for key, value in sorted (six.iteritems(self._attrs)):
140  if first:
141  retval += '{ \n'
142  tempspace = offset + 3
143  first = False
144  else:
145  retval += ',\n'
146  tempspace = offset + 3
147  if isinstance (value, DataNode):
148  retval += value.stringify (key, tempspace)
149  else:
150  retval += DataNode._outputValues (value, key, tempspace)
151  # this has data too
152  if self._data:
153  retval += ',\n'
154  tempspace = offset + 3
155  retval += DataNode._ouptputValues (self._data, name, tempspace)
156  retval += '\n ' + ' ' * offset + '}'
157  return retval
158 
159 
160 
161 class TreeBuilder (xml.sax.handler.ContentHandler):
162 
163  non_id_char = re.compile('[^_0-9a-zA-Z]')
164 
165  def __init__ (self, **kwargs):
166  self._stack = []
167  self._text_parts = []
168  self._ncDict = kwargs.get ('nameChangeDict', {})
169  self._root = DataNode (nameChangeDict = self._ncDict)
170  self.current = self._root
171 
172  def startElement (self, name, attrs):
173  self._stack.append( (self.current, self._text_parts))
174  self.current = DataNode (nameChangeDict = self._ncDict)
175  self._text_parts = []
176  # xml attributes --> python attributes
177  for k, v in attrs.items():
178  self.current._add_xml_attr (TreeBuilder._name_mangle(k), v)
179 
180  def endElement (self, name):
181  text = ''.join (self._text_parts).strip()
182  if text:
183  self.current._data = text
184  if self.current.attributes():
185  obj = self.current
186  else:
187  # a text only node is simply represented by the string
188  obj = text or ''
189  self.current, self._text_parts = self._stack.pop()
190  self.current._add_xml_attr (TreeBuilder._name_mangle(name), obj)
191 
192  def characters (self, content):
193  self._text_parts.append(content)
194 
195  def root (self):
196  return self._root
197 
198  def topLevel (self):
199  '''Returns top level object'''
200  return self._root.attributes().values()[0]
201 
202 
203  @staticmethod
204  def _name_mangle (name):
205  return TreeBuilder.non_id_char.sub('_', name)
206 
207 
208 regexList = [ (re.compile (r'&'), '&' ),
209  (re.compile (r'<'), '&lt;' ),
210  (re.compile (r'>'), '&gt;' ),
211  (re.compile (r'"'), '&quote;' ),
212  (re.compile (r"'"), '&#39;' )
213  ]
214 
215 quoteRE = re.compile (r'(\w\s*=\s*")([^"]+)"')
216 
217 def fixQuoteValue (match):
218  '''Changes all characters inside of the match'''
219  quote = match.group(2)
220  for regexTup in regexList:
221  quote = regexTup[0].sub( regexTup[1], quote )
222  return match.group(1) + quote + '"'
223 
224 
225 def xml2obj (**kwargs):
226  ''' Converts XML data into native Python object. Takes either
227  file handle or string as input. Does NOT fix illegal characters.
228 
229  input source: Exactly one of the three following is needed
230  filehandle - input from file handle
231  contents - input from string
232  filename - input from filename
233 
234  options:
235  filtering - boolean value telling code whether or not to fileter
236  input selection to remove illegal XML characters
237  nameChangeDict - dictionaries of names to change in python object'''
238 
239  # make sure we have exactly 1 input source
240  filehandle = kwargs.get ('filehandle')
241  contents = kwargs.get ('contents')
242  filename = kwargs.get ('filename')
243  if not filehandle and not contents and not filename:
244  raise RuntimeError("You must provide 'filehandle', 'contents', or 'filename'")
245  if filehandle and contents or \
246  filehandle and filename or \
247  contents and filename:
248  raise RuntimeError("You must provide only ONE of 'filehandle', 'contents', or 'filename'")
249 
250  # are we filtering?
251  filtering = kwargs.get ('filtering')
252  if filtering:
253  # if we are filtering, we need to read in the contents to modify them
254  if not contents:
255  if not filehandle:
256  try:
257  filehandle = open (filename, 'r')
258  except:
259  raise RuntimeError("Failed to open '%s'" % filename)
260  contents = ''
261  for line in filehandle:
262  contents += line
263  filehandle.close()
264  filehandle = filename = ''
265  contents = quoteRE.sub (fixQuoteValue, contents)
266 
267  ncDict = kwargs.get ('nameChangeDict', {})
268  builder = TreeBuilder (nameChangeDict = ncDict)
269  if contents:
270  xml.sax.parseString(contents, builder)
271  else:
272  if not filehandle:
273  try:
274  filehandle = open (filename, 'r')
275  except:
276  raise RuntimeError("Failed to open '%s'" % filename)
277  xml.sax.parse(filehandle, builder)
278  return builder.topLevel()
def __init__(self, kwargs)
Definition: XML2Python.py:16
def __nonzero__(self)
Definition: XML2Python.py:38
def isiterable(obj)
Definition: XML2Python.py:80
def startElement(self, name, attrs)
Definition: XML2Python.py:172
def _add_xml_attr(self, name, value)
Definition: XML2Python.py:49
def _outputValues(obj, name, offset)
Definition: XML2Python.py:85
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
def __str__(self)
Definition: XML2Python.py:64
def __repr__(self)
Definition: XML2Python.py:68
def __getattr__(self, name)
Definition: XML2Python.py:42
def __len__(self)
Definition: XML2Python.py:22
def fixQuoteValue(match)
Definition: XML2Python.py:217
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def attributes(self)
Definition: XML2Python.py:75
def __init__(self, kwargs)
Definition: XML2Python.py:165
def xml2obj(kwargs)
Definition: XML2Python.py:225
def characters(self, content)
Definition: XML2Python.py:192
def __contains__(self, name)
Definition: XML2Python.py:34
def stringify(self, name='', offset=0)
Definition: XML2Python.py:117
def __getitem__(self, key)
Definition: XML2Python.py:27
def endElement(self, name)
Definition: XML2Python.py:180