CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
SimpleSAXParser.cc
Go to the documentation of this file.
2 
8 {
9  if (entity == "quot")
10  return "\"";
11  else if (entity == "amp")
12  return "&";
13  else if (entity == "lt")
14  return "<";
15  else if (entity == "gt")
16  return ">";
17  throw ParserError("Unknown entity " + entity);
18 }
19 
20 void
22 {
23 #ifdef SIMPLE_SAX_PARSER_DEBUG
24  static char *debug_states[] = {
25  "IN_DOCUMENT",
26  "IN_BEGIN_TAG",
27  "IN_DONE",
28  "IN_BEGIN_ELEMENT",
29  "IN_ELEMENT_WHITESPACE",
30  "IN_END_ELEMENT",
31  "IN_ATTRIBUTE_KEY",
32  "IN_END_TAG",
33  "IN_DATA",
34  "IN_BEGIN_ATTRIBUTE_VALUE",
35  "IN_STRING",
36  "IN_END_ATTRIBUTE_VALUE",
37  "IN_STRING_ENTITY",
38  "IN_DATA_ENTITY"
39  };
40 
41  std::cerr << debug_states[state] << std::endl;
42 #endif
43 }
44 
52 void
54 {
55  enum PARSER_STATES state = IN_DOCUMENT;
56  // Current delimiters for strings in attributes.
57  char stringDelims[] = "\"&";
58  std::string attributeName;
59  std::string attributeValue;
61  std::string currentData;
62 
63  while (state != IN_DONE)
64  {
65  debug_state_machine(state);
66 
67  switch(state)
68  {
69  // FIXME: IN_DOCUMENT should check the dtd...
70  case IN_DOCUMENT:
71  state = IN_DATA;
72  if (skipChar('<'))
73  state = IN_BEGIN_TAG;
74  break;
75 
76  case IN_BEGIN_TAG:
77  if (nextChar() >= 'A' && nextChar() <= 'z')
78  state = IN_BEGIN_ELEMENT;
79  else if (skipChar('/'))
80  state = IN_END_ELEMENT;
81  else
82  throw ParserError("Bad tag");
83  break;
84 
85  case IN_BEGIN_ELEMENT:
86  m_attributes.clear();
87  m_elementTags.push_back(getToken(" />"));
88  if (nextChar() == ' ')
89  state = IN_ELEMENT_WHITESPACE;
90  else if (skipChar('/'))
91  state = IN_END_ELEMENT;
92  else if (skipChar('>'))
93  {
95  state = IN_END_TAG;
96  }
97  else
98  throw ParserError("Bad element.");
99  break;
100 
102  while(skipChar(' ') || skipChar('\n') || skipChar('\t'))
103  {}
104 
105  if (nextChar() >= 'A' && nextChar() <= 'z')
106  state=IN_ATTRIBUTE_KEY;
107  else if (nextChar() == '/')
108  state = IN_END_ELEMENT;
109  else
110  throw ParserError("Syntax error in element" + m_elementTags.back());
111  break;
112 
113  case IN_ATTRIBUTE_KEY:
114  attributeName = getToken('=');
115  state = IN_BEGIN_ATTRIBUTE_VALUE;
116  break;
117 
119  if (skipChar('"'))
120  {
121  state = IN_STRING;
122  attributeValue.clear();
123  stringDelims[0] = '\"';
124  }
125  else if (skipChar('\''))
126  {
127  state = IN_STRING;
128  attributeValue.clear();
129  stringDelims[0] = '\'';
130  }
131  else
132  throw ParserError("Expecting quotes.");
133  break;
134 
135  case IN_STRING:
136  attributeValue += getToken(stringDelims);
137  if (skipChar(stringDelims[0]))
138  {
139  // Save the attributes in order, replacing those that are
140  // specified more than once.
141  Attribute attr(attributeName, attributeValue);
142  Attributes::iterator i = std::lower_bound(m_attributes.begin(),
143  m_attributes.end(),
144  attr);
145  if (i != m_attributes.end() && i->key == attr.key)
146  throw ParserError("Attribute " + i->key + " defined more than once");
147  m_attributes.insert(i, attr);
148  state = IN_END_ATTRIBUTE_VALUE;
149  }
150  else if (skipChar(stringDelims[1]))
151  state = IN_STRING_ENTITY;
152  else
153  throw ParserError("Unexpected end of input at " + attributeValue);
154  break;
155 
157  getToken(" />");
158  if (nextChar() == ' ')
159  state = IN_ELEMENT_WHITESPACE;
160  else if (skipChar('/'))
161  state = IN_END_ELEMENT;
162  else if (skipChar('>'))
163  {
165  state = IN_END_TAG;
166  }
167  break;
168 
169  case IN_END_ELEMENT:
170  tmp = getToken('>');
171  if (!tmp.empty() && tmp != m_elementTags.back())
172  throw ParserError("Non-matching closing element "
173  + tmp + " for " + attributeValue);
174  endElement(tmp);
175  m_elementTags.pop_back();
176  state = IN_END_TAG;
177  break;
178 
179  case IN_END_TAG:
180  if (nextChar() == EOF)
181  return;
182  else if (skipChar('<'))
183  state = IN_BEGIN_TAG;
184  else
185  state = IN_DATA;
186  break;
187 
188  case IN_DATA:
189  currentData += getToken("<&");
190  if (skipChar('&'))
191  state = IN_DATA_ENTITY;
192  else if (skipChar('<'))
193  {
194  data(currentData);
195  currentData.clear();
196  state = IN_BEGIN_TAG;
197  }
198  else if (nextChar() == EOF)
199  {
200  data(currentData);
201  return;
202  }
203  else
204  throw ParserError("Unexpected end of input in element " + m_elementTags.back() + currentData);
205  break;
206 
207  case IN_DATA_ENTITY:
208  currentData += parseEntity(getToken(';'));
209  state = IN_DATA;
210  break;
211 
212  case IN_STRING_ENTITY:
213  attributeValue += parseEntity(getToken(';'));
214  state = IN_STRING;
215  break;
216 
217  case IN_DONE:
218  return;
219  }
220  }
221 }
222 
224 
250 bool
251 fgettoken(std::istream &in, char **buffer, size_t *maxSize, const char *separators,
252  int *firstChar)
253 {
254  // if the passed first character is EOF or a separator,
255  // return an empty otherwise use it as first character
256  // of the buffer.
257  if (*firstChar == EOF || (int) separators[0] == *firstChar || strchr(separators + 1, *firstChar))
258  {
259  (*buffer)[0] = 0;
260  return true;
261  }
262  else
263  (*buffer)[0] = (char) *firstChar;
264 
265  size_t i = 1;
266 
267  while (true)
268  {
269  if (i >= *maxSize)
270  {
271  *maxSize += 1024;
272  *buffer = (char*) realloc(*buffer, *maxSize);
273  if (!*buffer)
274  return false;
275  }
276 
277  int c = in.get();
278 
279  if (c == EOF)
280  {
281  (*buffer)[i] = 0;
282  *firstChar = c;
283  return false;
284  }
285 
286  if (separators[0] == c || strchr(separators + 1, c))
287  {
288  (*buffer)[i] = 0;
289  *firstChar = c;
290  return true;
291  }
292 
293  (*buffer)[i++] = (char) c;
294  }
295 }
int i
Definition: DBlmapReader.cc:9
virtual void endElement(const std::string &)
std::string parseEntity(const std::string &entity)
virtual void data(const std::string &)
tuple maxSize
&#39;/store/data/Commissioning08/BeamHalo/RECO/StuffAlmostToP5_v1/000/061/642/10A0FE34-A67D-DD11-AD05-000...
bool skipChar(int c)
std::string getToken(const char *delim)
tuple attr
Definition: asciidump.py:432
std::vector< std::string > m_elementTags
bool fgettoken(std::istream &in, char **buffer, size_t *maxSize, const char *separators, int *firstChar)
virtual ~SimpleSAXParser()
std::vector< std::vector< double > > tmp
Definition: MVATrainer.cc:100
void debug_state_machine(enum SimpleSAXParser::PARSER_STATES state)
virtual void startElement(const std::string &, Attributes &)
Attributes m_attributes