Go to the documentation of this file.00001 #include "Fireworks/Core/src/SimpleSAXParser.h"
00002
00006 std::string
00007 SimpleSAXParser::parseEntity(const std::string &entity)
00008 {
00009 if (entity == "quot")
00010 return "\"";
00011 else if (entity == "amp")
00012 return "&";
00013 else if (entity == "lt")
00014 return "<";
00015 else if (entity == "gt")
00016 return ">";
00017 throw ParserError("Unknown entity " + entity);
00018 }
00019
00020 void
00021 debug_state_machine(enum SimpleSAXParser::PARSER_STATES state)
00022 {
00023 #ifdef SIMPLE_SAX_PARSER_DEBUG
00024 static char *debug_states[] = {
00025 "IN_DOCUMENT",
00026 "IN_BEGIN_TAG",
00027 "IN_DONE",
00028 "IN_BEGIN_ELEMENT",
00029 "IN_ELEMENT_WHITESPACE",
00030 "IN_END_ELEMENT",
00031 "IN_ATTRIBUTE_KEY",
00032 "IN_END_TAG",
00033 "IN_DATA",
00034 "IN_BEGIN_ATTRIBUTE_VALUE",
00035 "IN_STRING",
00036 "IN_END_ATTRIBUTE_VALUE",
00037 "IN_STRING_ENTITY",
00038 "IN_DATA_ENTITY"
00039 };
00040
00041 std::cerr << debug_states[state] << std::endl;
00042 #endif
00043 }
00044
00052 void
00053 SimpleSAXParser::parse(void)
00054 {
00055 enum PARSER_STATES state = IN_DOCUMENT;
00056
00057 char stringDelims[] = "\"&";
00058 std::string attributeName;
00059 std::string attributeValue;
00060 std::string tmp;
00061 std::string currentData;
00062
00063 while (state != IN_DONE)
00064 {
00065 debug_state_machine(state);
00066
00067 switch(state)
00068 {
00069
00070 case IN_DOCUMENT:
00071 state = IN_DATA;
00072 if (skipChar('<'))
00073 state = IN_BEGIN_TAG;
00074 break;
00075
00076 case IN_BEGIN_TAG:
00077 if (nextChar() >= 'A' && nextChar() <= 'z')
00078 state = IN_BEGIN_ELEMENT;
00079 else if (skipChar('/'))
00080 state = IN_END_ELEMENT;
00081 else
00082 throw ParserError("Bad tag");
00083 break;
00084
00085 case IN_BEGIN_ELEMENT:
00086 m_attributes.clear();
00087 m_elementTags.push_back(getToken(" />"));
00088 if (nextChar() == ' ')
00089 state = IN_ELEMENT_WHITESPACE;
00090 else if (skipChar('/'))
00091 state = IN_END_ELEMENT;
00092 else if (skipChar('>'))
00093 {
00094 startElement(m_elementTags.back(), m_attributes);
00095 state = IN_END_TAG;
00096 }
00097 else
00098 throw ParserError("Bad element.");
00099 break;
00100
00101 case IN_ELEMENT_WHITESPACE:
00102 while(skipChar(' ') || skipChar('\n') || skipChar('\t'))
00103 {}
00104
00105 if (nextChar() >= 'A' && nextChar() <= 'z')
00106 state=IN_ATTRIBUTE_KEY;
00107 else if (nextChar() == '/')
00108 state = IN_END_ELEMENT;
00109 else
00110 throw ParserError("Syntax error in element" + m_elementTags.back());
00111 break;
00112
00113 case IN_ATTRIBUTE_KEY:
00114 attributeName = getToken('=');
00115 state = IN_BEGIN_ATTRIBUTE_VALUE;
00116 break;
00117
00118 case IN_BEGIN_ATTRIBUTE_VALUE:
00119 if (skipChar('"'))
00120 {
00121 state = IN_STRING;
00122 attributeValue.clear();
00123 stringDelims[0] = '\"';
00124 }
00125 else if (skipChar('\''))
00126 {
00127 state = IN_STRING;
00128 attributeValue.clear();
00129 stringDelims[0] = '\'';
00130 }
00131 else
00132 throw ParserError("Expecting quotes.");
00133 break;
00134
00135 case IN_STRING:
00136 attributeValue += getToken(stringDelims);
00137 if (skipChar(stringDelims[0]))
00138 {
00139
00140
00141 Attribute attr(attributeName, attributeValue);
00142 Attributes::iterator i = std::lower_bound(m_attributes.begin(),
00143 m_attributes.end(),
00144 attr);
00145 if (i != m_attributes.end() && i->key == attr.key)
00146 throw ParserError("Attribute " + i->key + " defined more than once");
00147 m_attributes.insert(i, attr);
00148 state = IN_END_ATTRIBUTE_VALUE;
00149 }
00150 else if (skipChar(stringDelims[1]))
00151 state = IN_STRING_ENTITY;
00152 else
00153 throw ParserError("Unexpected end of input at " + attributeValue);
00154 break;
00155
00156 case IN_END_ATTRIBUTE_VALUE:
00157 getToken(" />");
00158 if (nextChar() == ' ')
00159 state = IN_ELEMENT_WHITESPACE;
00160 else if (skipChar('/'))
00161 state = IN_END_ELEMENT;
00162 else if (skipChar('>'))
00163 {
00164 startElement(m_elementTags.back(), m_attributes);
00165 state = IN_END_TAG;
00166 }
00167 break;
00168
00169 case IN_END_ELEMENT:
00170 tmp = getToken('>');
00171 if (!tmp.empty() && tmp != m_elementTags.back())
00172 throw ParserError("Non-matching closing element "
00173 + tmp + " for " + attributeValue);
00174 endElement(tmp);
00175 m_elementTags.pop_back();
00176 state = IN_END_TAG;
00177 break;
00178
00179 case IN_END_TAG:
00180 if (nextChar() == EOF)
00181 return;
00182 else if (skipChar('<'))
00183 state = IN_BEGIN_TAG;
00184 else
00185 state = IN_DATA;
00186 break;
00187
00188 case IN_DATA:
00189 currentData += getToken("<&");
00190 if (skipChar('&'))
00191 state = IN_DATA_ENTITY;
00192 else if (skipChar('<'))
00193 {
00194 data(currentData);
00195 currentData.clear();
00196 state = IN_BEGIN_TAG;
00197 }
00198 else if (nextChar() == EOF)
00199 {
00200 data(currentData);
00201 return;
00202 }
00203 else
00204 throw ParserError("Unexpected end of input in element " + m_elementTags.back() + currentData);
00205 break;
00206
00207 case IN_DATA_ENTITY:
00208 currentData += parseEntity(getToken(';'));
00209 state = IN_DATA;
00210 break;
00211
00212 case IN_STRING_ENTITY:
00213 attributeValue += parseEntity(getToken(';'));
00214 state = IN_STRING;
00215 break;
00216
00217 case IN_DONE:
00218 return;
00219 }
00220 }
00221 }
00222
00223 SimpleSAXParser::~SimpleSAXParser() { delete [] m_buffer;}
00224
00250 bool
00251 fgettoken(std::istream &in, char **buffer, size_t *maxSize, const char *separators,
00252 int *firstChar)
00253 {
00254
00255
00256
00257 if (*firstChar == EOF || (int) separators[0] == *firstChar || strchr(separators + 1, *firstChar))
00258 {
00259 (*buffer)[0] = 0;
00260 return true;
00261 }
00262 else
00263 (*buffer)[0] = (char) *firstChar;
00264
00265 size_t i = 1;
00266
00267 while (true)
00268 {
00269 if (i >= *maxSize)
00270 {
00271 *maxSize += 1024;
00272 *buffer = (char*) realloc(*buffer, *maxSize);
00273 if (!*buffer)
00274 return false;
00275 }
00276
00277 int c = in.get();
00278
00279 if (c == EOF)
00280 {
00281 (*buffer)[i] = 0;
00282 *firstChar = c;
00283 return false;
00284 }
00285
00286 if (separators[0] == c || strchr(separators + 1, c))
00287 {
00288 (*buffer)[i] = 0;
00289 *firstChar = c;
00290 return true;
00291 }
00292
00293 (*buffer)[i++] = (char) c;
00294 }
00295 }