CMS 3D CMS Logo

SimpleSAXParser.cc
Go to the documentation of this file.
2 
7  if (entity == "quot")
8  return "\"";
9  else if (entity == "amp")
10  return "&";
11  else if (entity == "lt")
12  return "<";
13  else if (entity == "gt")
14  return ">";
15  throw ParserError("Unknown entity " + entity);
16 }
17 
19 #ifdef SIMPLE_SAX_PARSER_DEBUG
20  static char *debug_states[] = {"IN_DOCUMENT",
21  "IN_BEGIN_TAG",
22  "IN_DONE",
23  "IN_BEGIN_ELEMENT",
24  "IN_ELEMENT_WHITESPACE",
25  "IN_END_ELEMENT",
26  "IN_ATTRIBUTE_KEY",
27  "IN_END_TAG",
28  "IN_DATA",
29  "IN_BEGIN_ATTRIBUTE_VALUE",
30  "IN_STRING",
31  "IN_END_ATTRIBUTE_VALUE",
32  "IN_STRING_ENTITY",
33  "IN_DATA_ENTITY"};
34 
35  std::cerr << debug_states[state] << std::endl;
36 #endif
37 }
38 
47  enum PARSER_STATES state = IN_DOCUMENT;
48  // Current delimiters for strings in attributes.
49  char stringDelims[] = "\"&";
50  std::string attributeName;
51  std::string attributeValue;
53  std::string currentData;
54 
55  while (state != IN_DONE) {
56  debug_state_machine(state);
57 
58  switch (state) {
59  // FIXME: IN_DOCUMENT should check the dtd...
60  case IN_DOCUMENT:
61  state = IN_DATA;
62  if (skipChar('<'))
63  state = IN_BEGIN_TAG;
64  break;
65 
66  case IN_BEGIN_TAG:
67  if (nextChar() >= 'A' && nextChar() <= 'z')
68  state = IN_BEGIN_ELEMENT;
69  else if (skipChar('/'))
70  state = IN_END_ELEMENT;
71  else
72  throw ParserError("Bad tag");
73  break;
74 
75  case IN_BEGIN_ELEMENT:
76  m_attributes.clear();
77  m_elementTags.push_back(getToken(" />"));
78  if (nextChar() == ' ')
79  state = IN_ELEMENT_WHITESPACE;
80  else if (skipChar('/'))
81  state = IN_END_ELEMENT;
82  else if (skipChar('>')) {
84  state = IN_END_TAG;
85  } else
86  throw ParserError("Bad element.");
87  break;
88 
90  while (skipChar(' ') || skipChar('\n') || skipChar('\t')) {
91  }
92 
93  if (nextChar() >= 'A' && nextChar() <= 'z')
94  state = IN_ATTRIBUTE_KEY;
95  else if (nextChar() == '/')
96  state = IN_END_ELEMENT;
97  else
98  throw ParserError("Syntax error in element" + m_elementTags.back());
99  break;
100 
101  case IN_ATTRIBUTE_KEY:
102  attributeName = getToken('=');
103  state = IN_BEGIN_ATTRIBUTE_VALUE;
104  break;
105 
107  if (skipChar('"')) {
108  state = IN_STRING;
109  attributeValue.clear();
110  stringDelims[0] = '\"';
111  } else if (skipChar('\'')) {
112  state = IN_STRING;
113  attributeValue.clear();
114  stringDelims[0] = '\'';
115  } else
116  throw ParserError("Expecting quotes.");
117  break;
118 
119  case IN_STRING:
120  attributeValue += getToken(stringDelims);
121  if (skipChar(stringDelims[0])) {
122  // Save the attributes in order, replacing those that are
123  // specified more than once.
124  Attribute attr(attributeName, attributeValue);
125  Attributes::iterator i = std::lower_bound(m_attributes.begin(), m_attributes.end(), attr);
126  if (i != m_attributes.end() && i->key == attr.key)
127  throw ParserError("Attribute " + i->key + " defined more than once");
128  m_attributes.insert(i, attr);
129  state = IN_END_ATTRIBUTE_VALUE;
130  } else if (skipChar(stringDelims[1]))
131  state = IN_STRING_ENTITY;
132  else
133  throw ParserError("Unexpected end of input at " + attributeValue);
134  break;
135 
137  getToken(" />");
138  if (nextChar() == ' ')
139  state = IN_ELEMENT_WHITESPACE;
140  else if (skipChar('/'))
141  state = IN_END_ELEMENT;
142  else if (skipChar('>')) {
144  state = IN_END_TAG;
145  }
146  break;
147 
148  case IN_END_ELEMENT:
149  tmp = getToken('>');
150  if (!tmp.empty() && tmp != m_elementTags.back())
151  throw ParserError("Non-matching closing element " + tmp + " for " + attributeValue);
152  endElement(tmp);
153  m_elementTags.pop_back();
154  state = IN_END_TAG;
155  break;
156 
157  case IN_END_TAG:
158  if (nextChar() == EOF)
159  return;
160  else if (skipChar('<'))
161  state = IN_BEGIN_TAG;
162  else
163  state = IN_DATA;
164  break;
165 
166  case IN_DATA:
167  currentData += getToken("<&");
168  if (skipChar('&'))
169  state = IN_DATA_ENTITY;
170  else if (skipChar('<')) {
171  data(currentData);
172  currentData.clear();
173  state = IN_BEGIN_TAG;
174  } else if (nextChar() == EOF) {
175  data(currentData);
176  return;
177  } else
178  throw ParserError("Unexpected end of input in element " + m_elementTags.back() + currentData);
179  break;
180 
181  case IN_DATA_ENTITY:
182  currentData += parseEntity(getToken(';'));
183  state = IN_DATA;
184  break;
185 
186  case IN_STRING_ENTITY:
187  attributeValue += parseEntity(getToken(';'));
188  state = IN_STRING;
189  break;
190 
191  case IN_DONE:
192  return;
193  }
194  }
195 }
196 
198 
224 bool fgettoken(std::istream &in, char **buffer, size_t *maxSize, const char *separators, int *firstChar) {
225  // if the passed first character is EOF or a separator,
226  // return an empty otherwise use it as first character
227  // of the buffer.
228  if (*firstChar == EOF || (int)separators[0] == *firstChar || strchr(separators + 1, *firstChar)) {
229  (*buffer)[0] = 0;
230  return true;
231  } else
232  (*buffer)[0] = (char)*firstChar;
233 
234  size_t i = 1;
235 
236  while (true) {
237  if (i >= *maxSize) {
238  *maxSize += 1024;
239  *buffer = (char *)realloc(*buffer, *maxSize);
240  if (!*buffer)
241  return false;
242  }
243 
244  int c = in.get();
245 
246  if (c == EOF) {
247  (*buffer)[i] = 0;
248  *firstChar = c;
249  return false;
250  }
251 
252  if (separators[0] == c || strchr(separators + 1, c)) {
253  (*buffer)[i] = 0;
254  *firstChar = c;
255  return true;
256  }
257 
258  (*buffer)[i++] = (char)c;
259  }
260 }
SimpleSAXParser::getToken
std::string getToken(const char *delim)
Definition: SimpleSAXParser.h:126
SimpleSAXParser::startElement
virtual void startElement(const std::string &, Attributes &)
Definition: SimpleSAXParser.h:117
SimpleSAXParser::IN_DATA_ENTITY
Definition: SimpleSAXParser.h:107
mps_fire.i
i
Definition: mps_fire.py:355
SimpleSAXParser::m_attributes
Attributes m_attributes
Definition: SimpleSAXParser.h:152
SimpleSAXParser::PARSER_STATES
PARSER_STATES
Definition: SimpleSAXParser.h:93
SimpleSAXParser::IN_END_ELEMENT
Definition: SimpleSAXParser.h:99
SimpleSAXParser::IN_BEGIN_ATTRIBUTE_VALUE
Definition: SimpleSAXParser.h:103
SimpleSAXParser::data
virtual void data(const std::string &)
Definition: SimpleSAXParser.h:119
SimpleSAXParser::endElement
virtual void endElement(const std::string &)
Definition: SimpleSAXParser.h:118
SimpleSAXParser::skipChar
bool skipChar(int c)
Definition: SimpleSAXParser.h:138
SimpleSAXParser::IN_END_TAG
Definition: SimpleSAXParser.h:101
SimpleSAXParser::IN_ATTRIBUTE_KEY
Definition: SimpleSAXParser.h:100
SimpleSAXParser::parse
void parse(void)
Definition: SimpleSAXParser.cc:46
SimpleSAXParser::Attribute
Definition: SimpleSAXParser.h:71
createJobs.tmp
tmp
align.sh
Definition: createJobs.py:716
SimpleSAXParser::nextChar
int nextChar(void)
Definition: SimpleSAXParser.h:145
edmScanValgrind.buffer
buffer
Definition: edmScanValgrind.py:171
SimpleSAXParser::m_elementTags
std::vector< std::string > m_elementTags
Definition: SimpleSAXParser.h:151
SimpleSAXParser::parseEntity
std::string parseEntity(const std::string &entity)
Definition: SimpleSAXParser.cc:6
cuda_std::lower_bound
__host__ constexpr __device__ RandomIt lower_bound(RandomIt first, RandomIt last, const T &value, Compare comp={})
Definition: cudastdAlgorithm.h:27
AlCaHLTBitMon_QueryRunRegistry.string
string
Definition: AlCaHLTBitMon_QueryRunRegistry.py:256
SimpleSAXParser::IN_END_ATTRIBUTE_VALUE
Definition: SimpleSAXParser.h:105
SimpleSAXParser::IN_DATA
Definition: SimpleSAXParser.h:102
SimpleSAXParser::m_buffer
char * m_buffer
Definition: SimpleSAXParser.h:149
SimpleSAXParser::Attribute::key
std::string key
Definition: SimpleSAXParser.h:72
beam_dqm_sourceclient-live_cfg.cerr
cerr
Definition: beam_dqm_sourceclient-live_cfg.py:17
recoMuon::in
Definition: RecoMuonEnumerators.h:6
debug_state_machine
void debug_state_machine(enum SimpleSAXParser::PARSER_STATES state)
Definition: SimpleSAXParser.cc:18
SimpleSAXParser::IN_DOCUMENT
Definition: SimpleSAXParser.h:94
SimpleSAXParser::IN_BEGIN_TAG
Definition: SimpleSAXParser.h:95
SimpleSAXParser::IN_BEGIN_ELEMENT
Definition: SimpleSAXParser.h:97
HltBtagPostValidation_cff.c
c
Definition: HltBtagPostValidation_cff.py:31
SimpleSAXParser::IN_DONE
Definition: SimpleSAXParser.h:96
reco_skim_cfg_mod.maxSize
maxSize
Definition: reco_skim_cfg_mod.py:154
SimpleSAXParser::IN_STRING
Definition: SimpleSAXParser.h:104
SimpleSAXParser::~SimpleSAXParser
virtual ~SimpleSAXParser()
Definition: SimpleSAXParser.cc:197
SimpleSAXParser::IN_STRING_ENTITY
Definition: SimpleSAXParser.h:106
SimpleSAXParser::IN_ELEMENT_WHITESPACE
Definition: SimpleSAXParser.h:98
fgettoken
bool fgettoken(std::istream &in, char **buffer, size_t *maxSize, const char *separators, int *firstChar)
Definition: SimpleSAXParser.cc:224
SimpleSAXParser.h
SimpleSAXParser::ParserError
Definition: SimpleSAXParser.h:83