CMS 3D CMS Logo

SimpleSAXParser.cc
Go to the documentation of this file.
2 
7  if (entity == "quot")
8  return "\"";
9  else if (entity == "amp")
10  return "&";
11  else if (entity == "lt")
12  return "<";
13  else if (entity == "gt")
14  return ">";
15  throw ParserError("Unknown entity " + entity);
16 }
17 
19 #ifdef SIMPLE_SAX_PARSER_DEBUG
20  static char *debug_states[] = {"IN_DOCUMENT",
21  "IN_BEGIN_TAG",
22  "IN_DONE",
23  "IN_BEGIN_ELEMENT",
24  "IN_ELEMENT_WHITESPACE",
25  "IN_END_ELEMENT",
26  "IN_ATTRIBUTE_KEY",
27  "IN_END_TAG",
28  "IN_DATA",
29  "IN_BEGIN_ATTRIBUTE_VALUE",
30  "IN_STRING",
31  "IN_END_ATTRIBUTE_VALUE",
32  "IN_STRING_ENTITY",
33  "IN_DATA_ENTITY"};
34 
35  std::cerr << debug_states[state] << std::endl;
36 #endif
37 }
38 
48  // Current delimiters for strings in attributes.
49  char stringDelims[] = "\"&";
50  std::string attributeName;
51  std::string attributeValue;
53  std::string currentData;
54 
55  while (state != IN_DONE) {
57 
58  switch (state) {
59  // FIXME: IN_DOCUMENT should check the dtd...
60  case IN_DOCUMENT:
61  state = IN_DATA;
62  if (skipChar('<'))
64  break;
65 
66  case IN_BEGIN_TAG:
67  if (nextChar() >= 'A' && nextChar() <= 'z')
69  else if (skipChar('/'))
71  else
72  throw ParserError("Bad tag");
73  break;
74 
75  case IN_BEGIN_ELEMENT:
76  m_attributes.clear();
77  m_elementTags.push_back(getToken(" />"));
78  if (nextChar() == ' ')
80  else if (skipChar('/'))
82  else if (skipChar('>')) {
84  state = IN_END_TAG;
85  } else
86  throw ParserError("Bad element.");
87  break;
88 
90  while (skipChar(' ') || skipChar('\n') || skipChar('\t')) {
91  }
92 
93  if (nextChar() >= 'A' && nextChar() <= 'z')
95  else if (nextChar() == '/')
97  else
98  throw ParserError("Syntax error in element" + m_elementTags.back());
99  break;
100 
101  case IN_ATTRIBUTE_KEY:
102  attributeName = getToken('=');
104  break;
105 
107  if (skipChar('"')) {
108  state = IN_STRING;
109  attributeValue.clear();
110  stringDelims[0] = '\"';
111  } else if (skipChar('\'')) {
112  state = IN_STRING;
113  attributeValue.clear();
114  stringDelims[0] = '\'';
115  } else
116  throw ParserError("Expecting quotes.");
117  break;
118 
119  case IN_STRING:
120  attributeValue += getToken(stringDelims);
121  if (skipChar(stringDelims[0])) {
122  // Save the attributes in order, replacing those that are
123  // specified more than once.
124  Attribute attr(attributeName, attributeValue);
125  Attributes::iterator i = std::lower_bound(m_attributes.begin(), m_attributes.end(), attr);
126  if (i != m_attributes.end() && i->key == attr.key)
127  throw ParserError("Attribute " + i->key + " defined more than once");
128  m_attributes.insert(i, attr);
130  } else if (skipChar(stringDelims[1]))
132  else
133  throw ParserError("Unexpected end of input at " + attributeValue);
134  break;
135 
137  getToken(" />");
138  if (nextChar() == ' ')
140  else if (skipChar('/'))
142  else if (skipChar('>')) {
144  state = IN_END_TAG;
145  }
146  break;
147 
148  case IN_END_ELEMENT:
149  tmp = getToken('>');
150  if (!tmp.empty() && tmp != m_elementTags.back())
151  throw ParserError("Non-matching closing element " + tmp + " for " + attributeValue);
152  endElement(tmp);
153  m_elementTags.pop_back();
154  state = IN_END_TAG;
155  break;
156 
157  case IN_END_TAG:
158  if (nextChar() == EOF)
159  return;
160  else if (skipChar('<'))
162  else
163  state = IN_DATA;
164  break;
165 
166  case IN_DATA:
167  currentData += getToken("<&");
168  if (skipChar('&'))
170  else if (skipChar('<')) {
171  data(currentData);
172  currentData.clear();
174  } else if (nextChar() == EOF) {
175  data(currentData);
176  return;
177  } else
178  throw ParserError("Unexpected end of input in element " + m_elementTags.back() + currentData);
179  break;
180 
181  case IN_DATA_ENTITY:
182  currentData += parseEntity(getToken(';'));
183  state = IN_DATA;
184  break;
185 
186  case IN_STRING_ENTITY:
187  attributeValue += parseEntity(getToken(';'));
188  state = IN_STRING;
189  break;
190 
191  case IN_DONE:
192  return;
193  }
194  }
195 }
196 
198 
224 bool fgettoken(std::istream &in, char **buffer, size_t *maxSize, const char *separators, int *firstChar) {
225  // if the passed first character is EOF or a separator,
226  // return an empty otherwise use it as first character
227  // of the buffer.
228  if (*firstChar == EOF || (int)separators[0] == *firstChar || strchr(separators + 1, *firstChar)) {
229  (*buffer)[0] = 0;
230  return true;
231  } else
232  (*buffer)[0] = (char)*firstChar;
233 
234  size_t i = 1;
235 
236  while (true) {
237  if (i >= *maxSize) {
238  *maxSize += 1024;
239  *buffer = (char *)realloc(*buffer, *maxSize);
240  if (!*buffer)
241  return false;
242  }
243 
244  int c = in.get();
245 
246  if (c == EOF) {
247  (*buffer)[i] = 0;
248  *firstChar = c;
249  return false;
250  }
251 
252  if (separators[0] == c || strchr(separators + 1, c)) {
253  (*buffer)[i] = 0;
254  *firstChar = c;
255  return true;
256  }
257 
258  (*buffer)[i++] = (char)c;
259  }
260 }
virtual void endElement(const std::string &)
std::string parseEntity(const std::string &entity)
virtual void data(const std::string &)
bool skipChar(int c)
std::string getToken(const char *delim)
std::vector< std::string > m_elementTags
void * realloc(void *ptr, size_t size) noexcept
bool fgettoken(std::istream &in, char **buffer, size_t *maxSize, const char *separators, int *firstChar)
virtual ~SimpleSAXParser()
void debug_state_machine(enum SimpleSAXParser::PARSER_STATES state)
tmp
align.sh
Definition: createJobs.py:716
virtual void startElement(const std::string &, Attributes &)
Attributes m_attributes