#include <SimpleSAXParser.h>
Classes | |
struct | Attribute |
class | ParserError |
Public Types | |
typedef std::vector< Attribute > | Attributes |
enum | PARSER_STATES { IN_DOCUMENT, IN_BEGIN_TAG, IN_DONE, IN_BEGIN_ELEMENT, IN_ELEMENT_WHITESPACE, IN_END_ELEMENT, IN_ATTRIBUTE_KEY, IN_END_TAG, IN_DATA, IN_BEGIN_ATTRIBUTE_VALUE, IN_STRING, IN_END_ATTRIBUTE_VALUE, IN_STRING_ENTITY, IN_DATA_ENTITY } |
Public Member Functions | |
virtual void | data (const std::string &) |
virtual void | endElement (const std::string &) |
void | parse (void) |
SimpleSAXParser (std::istream &f) | |
virtual void | startElement (const std::string &, Attributes &) |
virtual | ~SimpleSAXParser () |
Private Member Functions | |
std::string | getToken (const char *delim) |
std::string | getToken (const char delim) |
int | nextChar (void) |
const SimpleSAXParser & | operator= (const SimpleSAXParser &) |
std::string | parseEntity (const std::string &entity) |
SimpleSAXParser (const SimpleSAXParser &) | |
bool | skipChar (int c) |
Private Attributes | |
Attributes | m_attributes |
char * | m_buffer |
size_t | m_bufferSize |
std::vector< std::string > | m_elementTags |
std::istream & | m_in |
int | m_nextChar |
A simple SAX parser which is able to parse the configuration.
State machine for the parser can be drawn by cut and pasting the following to graphviz:
digraph { IN_DOCUMENT->IN_BEGIN_TAG [label="nextChar == '<'"]; IN_DOCUMENT->IN_DATA [label="nextChar != '<'"];
IN_BEGIN_TAG->IN_BEGIN_ELEMENT [label="nextChar >= 'a' && nextChar < 'Z'"]; IN_BEGIN_TAG->IN_END_ELEMENT [label= "nextChar == '/'"];
IN_BEGIN_ELEMENT->IN_END_ELEMENT [label="nextChar == '/'"]; IN_BEGIN_ELEMENT->IN_ELEMENT_WHITESPACE [label="nextChar == ' '"]; IN_BEGIN_ELEMENT->IN_END_TAG [label="nextChar == '>'"];
IN_ELEMENT_WHITESPACE->IN_ELEMENT_WHITESPACE [ label = "nextChar == \"\ \t\n""] IN_ELEMENT_WHITESPACE->IN_ATTRIBUTE_KEY [ label = "nextChar >= 'a' && nextChar < 'Z'"] IN_ELEMENT_WHITESPACE->IN_END_ELEMENT [label="nextChar == '/'"]
IN_END_ELEMENT->IN_END_TAG [label = "nextChar == '>'"];
IN_END_TAG->IN_BEGIN_TAG [label="nextChar == '<'"]; IN_END_TAG->IN_DATA [label="nextChar != '<'"]
IN_DATA->IN_BEGIN_TAG [label="nextChar == '<'"]; IN_DATA->IN_DATA_ENTITY [label="nextChar == '&'"]; IN_DATA->IN_DONE [label = "nextChar == EOF"];
IN_DATA_ENTITY->IN_DATA [label="nextChar == ';'"];
IN_ATTRIBUTE_KEY->IN_BEGIN_ATTRIBUTE_VALUE [label = "nextChar == '='"]
IN_BEGIN_ATTRIBUTE_VALUE->IN_STRING [label = "nextChar == '\"' || nextChar == '\'' "]
IN_STRING->IN_END_ATTRIBUTE_VALUE [label = "nextChar == quote"] IN_STRING->IN_STRING_ENTITY [label = "nextChar == '&'"]
IN_END_ATTRIBUTE_VALUE->IN_ELEMENT_WHITESPACE [label = "nextChar == ' '"] IN_END_ATTRIBUTE_VALUE->IN_END_ELEMENT [label = "nextChar == '/'"] IN_END_ATTRIBUTE_VALUE->IN_END_TAG [label = "nextChar == '>'"]
IN_STRING_ENTITY->IN_STRING [label = "nextChar == ';'"] }
Definition at line 71 of file SimpleSAXParser.h.
typedef std::vector<Attribute> SimpleSAXParser::Attributes |
Definition at line 93 of file SimpleSAXParser.h.
Definition at line 106 of file SimpleSAXParser.h.
SimpleSAXParser::SimpleSAXParser | ( | std::istream & | f | ) | [inline] |
Definition at line 123 of file SimpleSAXParser.h.
: m_in(f), m_bufferSize(1024), m_buffer(new char[m_bufferSize]), m_nextChar(m_in.get()) {}
SimpleSAXParser::~SimpleSAXParser | ( | ) | [virtual] |
SimpleSAXParser::SimpleSAXParser | ( | const SimpleSAXParser & | ) | [private] |
virtual void SimpleSAXParser::data | ( | const std::string & | ) | [inline, virtual] |
Reimplemented in FWXMLConfigParser.
Definition at line 137 of file SimpleSAXParser.h.
Referenced by parse().
{}
virtual void SimpleSAXParser::endElement | ( | const std::string & | ) | [inline, virtual] |
Reimplemented in FWXMLConfigParser.
Definition at line 136 of file SimpleSAXParser.h.
Referenced by parse().
{}
std::string SimpleSAXParser::getToken | ( | const char | delim | ) | [inline, private] |
Definition at line 150 of file SimpleSAXParser.h.
References fgettoken(), m_buffer, m_bufferSize, m_in, and m_nextChar.
{ char buf[2] = {delim, 0}; fgettoken(m_in, &m_buffer, &m_bufferSize, buf, &m_nextChar); m_nextChar = m_in.get(); return m_buffer; }
std::string SimpleSAXParser::getToken | ( | const char * | delim | ) | [inline, private] |
Definition at line 144 of file SimpleSAXParser.h.
References fgettoken(), m_buffer, m_bufferSize, m_in, and m_nextChar.
Referenced by parse().
{ fgettoken(m_in, &m_buffer, &m_bufferSize, delim, &m_nextChar); return m_buffer; }
int SimpleSAXParser::nextChar | ( | void | ) | [inline, private] |
Definition at line 166 of file SimpleSAXParser.h.
References m_nextChar.
Referenced by parse().
{ return m_nextChar; }
const SimpleSAXParser& SimpleSAXParser::operator= | ( | const SimpleSAXParser & | ) | [private] |
void SimpleSAXParser::parse | ( | void | ) |
Runs the state machine of the parser, invoking startElement(), setAttribute(), endElement(), data() virtual methods as approppriate. In order have the parser doing something usefull you need to derive from it and specialize the above mentioned virtual methods.
Default implementation is in any case useful to check syntax.
Definition at line 53 of file SimpleSAXParser.cc.
References asciidump::attr, data(), debug_state_machine(), endElement(), getToken(), i, IN_ATTRIBUTE_KEY, IN_BEGIN_ATTRIBUTE_VALUE, IN_BEGIN_ELEMENT, IN_BEGIN_TAG, IN_DATA, IN_DATA_ENTITY, IN_DOCUMENT, IN_DONE, IN_ELEMENT_WHITESPACE, IN_END_ATTRIBUTE_VALUE, IN_END_ELEMENT, IN_END_TAG, IN_STRING, IN_STRING_ENTITY, SimpleSAXParser::Attribute::key, m_attributes, m_elementTags, nextChar(), parseEntity(), skipChar(), startElement(), and tmp.
Referenced by FWConfigurationManager::readFromFile().
{ enum PARSER_STATES state = IN_DOCUMENT; // Current delimiters for strings in attributes. char stringDelims[] = "\"&"; std::string attributeName; std::string attributeValue; std::string tmp; std::string currentData; while (state != IN_DONE) { debug_state_machine(state); switch(state) { // FIXME: IN_DOCUMENT should check the dtd... case IN_DOCUMENT: state = IN_DATA; if (skipChar('<')) state = IN_BEGIN_TAG; break; case IN_BEGIN_TAG: if (nextChar() >= 'A' && nextChar() <= 'z') state = IN_BEGIN_ELEMENT; else if (skipChar('/')) state = IN_END_ELEMENT; else throw ParserError("Bad tag"); break; case IN_BEGIN_ELEMENT: m_attributes.clear(); m_elementTags.push_back(getToken(" />")); if (nextChar() == ' ') state = IN_ELEMENT_WHITESPACE; else if (skipChar('/')) state = IN_END_ELEMENT; else if (skipChar('>')) { startElement(m_elementTags.back(), m_attributes); state = IN_END_TAG; } else throw ParserError("Bad element."); break; case IN_ELEMENT_WHITESPACE: while(skipChar(' ') || skipChar('\n') || skipChar('\t')) {} if (nextChar() >= 'A' && nextChar() <= 'z') state=IN_ATTRIBUTE_KEY; else if (nextChar() == '/') state = IN_END_ELEMENT; else throw ParserError("Syntax error in element" + m_elementTags.back()); break; case IN_ATTRIBUTE_KEY: attributeName = getToken('='); state = IN_BEGIN_ATTRIBUTE_VALUE; break; case IN_BEGIN_ATTRIBUTE_VALUE: if (skipChar('"')) { state = IN_STRING; attributeValue.clear(); stringDelims[0] = '\"'; } else if (skipChar('\'')) { state = IN_STRING; attributeValue.clear(); stringDelims[0] = '\''; } else throw ParserError("Expecting quotes."); break; case IN_STRING: attributeValue += getToken(stringDelims); if (skipChar(stringDelims[0])) { // Save the attributes in order, replacing those that are // specified more than once. Attribute attr(attributeName, attributeValue); Attributes::iterator i = std::lower_bound(m_attributes.begin(), m_attributes.end(), attr); if (i != m_attributes.end() && i->key == attr.key) throw ParserError("Attribute " + i->key + " defined more than once"); m_attributes.insert(i, attr); state = IN_END_ATTRIBUTE_VALUE; } else if (skipChar(stringDelims[1])) state = IN_STRING_ENTITY; else throw ParserError("Unexpected end of input at " + attributeValue); break; case IN_END_ATTRIBUTE_VALUE: getToken(" />"); if (nextChar() == ' ') state = IN_ELEMENT_WHITESPACE; else if (skipChar('/')) state = IN_END_ELEMENT; else if (skipChar('>')) { startElement(m_elementTags.back(), m_attributes); state = IN_END_TAG; } break; case IN_END_ELEMENT: tmp = getToken('>'); if (!tmp.empty() && tmp != m_elementTags.back()) throw ParserError("Non-matching closing element " + tmp + " for " + attributeValue); endElement(tmp); m_elementTags.pop_back(); state = IN_END_TAG; break; case IN_END_TAG: if (nextChar() == EOF) return; else if (skipChar('<')) state = IN_BEGIN_TAG; else state = IN_DATA; break; case IN_DATA: currentData += getToken("<&"); if (skipChar('&')) state = IN_DATA_ENTITY; else if (skipChar('<')) { data(currentData); currentData.clear(); state = IN_BEGIN_TAG; } else if (nextChar() == EOF) { data(currentData); return; } else throw ParserError("Unexpected end of input in element " + m_elementTags.back() + currentData); break; case IN_DATA_ENTITY: currentData += parseEntity(getToken(';')); state = IN_DATA; break; case IN_STRING_ENTITY: attributeValue += parseEntity(getToken(';')); state = IN_STRING; break; case IN_DONE: return; } } }
std::string SimpleSAXParser::parseEntity | ( | const std::string & | entity | ) | [private] |
Helper function to handle entities, i.e. characters specified with the "&label;" syntax.
Definition at line 7 of file SimpleSAXParser.cc.
Referenced by parse().
{ if (entity == "quot") return "\""; else if (entity == "amp") return "&"; else if (entity == "lt") return "<"; else if (entity == "gt") return ">"; throw ParserError("Unknown entity " + entity); }
bool SimpleSAXParser::skipChar | ( | int | c | ) | [inline, private] |
Definition at line 158 of file SimpleSAXParser.h.
References m_in, and m_nextChar.
Referenced by parse().
{ if (m_nextChar != c) return false; m_nextChar = m_in.get(); return true; }
virtual void SimpleSAXParser::startElement | ( | const std::string & | , |
Attributes & | |||
) | [inline, virtual] |
Reimplemented in FWXMLConfigParser.
Definition at line 134 of file SimpleSAXParser.h.
Referenced by parse().
{}
Attributes SimpleSAXParser::m_attributes [private] |
Definition at line 173 of file SimpleSAXParser.h.
Referenced by parse().
char* SimpleSAXParser::m_buffer [private] |
Definition at line 170 of file SimpleSAXParser.h.
Referenced by getToken(), and ~SimpleSAXParser().
size_t SimpleSAXParser::m_bufferSize [private] |
Definition at line 169 of file SimpleSAXParser.h.
Referenced by getToken().
std::vector<std::string> SimpleSAXParser::m_elementTags [private] |
Definition at line 172 of file SimpleSAXParser.h.
Referenced by parse().
std::istream& SimpleSAXParser::m_in [private] |
Definition at line 168 of file SimpleSAXParser.h.
Referenced by getToken(), and skipChar().
int SimpleSAXParser::m_nextChar [private] |
Definition at line 171 of file SimpleSAXParser.h.
Referenced by getToken(), nextChar(), and skipChar().