CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
List of all members | Classes | Public Types | Public Member Functions | Private Member Functions | Private Attributes
SimpleSAXParser Class Reference

#include <SimpleSAXParser.h>

Inheritance diagram for SimpleSAXParser:
FWXMLConfigParser

Classes

struct  Attribute
 
class  ParserError
 

Public Types

typedef std::vector< AttributeAttributes
 
enum  PARSER_STATES {
  IN_DOCUMENT, IN_BEGIN_TAG, IN_DONE, IN_BEGIN_ELEMENT,
  IN_ELEMENT_WHITESPACE, IN_END_ELEMENT, IN_ATTRIBUTE_KEY, IN_END_TAG,
  IN_DATA, IN_BEGIN_ATTRIBUTE_VALUE, IN_STRING, IN_END_ATTRIBUTE_VALUE,
  IN_STRING_ENTITY, IN_DATA_ENTITY
}
 

Public Member Functions

virtual void data (const std::string &)
 
virtual void endElement (const std::string &)
 
void parse (void)
 
 SimpleSAXParser (std::istream &f)
 
virtual void startElement (const std::string &, Attributes &)
 
virtual ~SimpleSAXParser ()
 

Private Member Functions

std::string getToken (const char *delim)
 
std::string getToken (const char delim)
 
int nextChar (void)
 
const SimpleSAXParseroperator= (const SimpleSAXParser &)
 
std::string parseEntity (const std::string &entity)
 
 SimpleSAXParser (const SimpleSAXParser &)
 
bool skipChar (int c)
 

Private Attributes

Attributes m_attributes
 
char * m_buffer
 
size_t m_bufferSize
 
std::vector< std::string > m_elementTags
 
std::istream & m_in
 
int m_nextChar
 

Detailed Description

A simple SAX parser which is able to parse the configuration.

State machine for the parser can be drawn by cut and pasting the following to graphviz:

digraph { IN_DOCUMENT->IN_BEGIN_TAG [label="nextChar == '<'"]; IN_DOCUMENT->IN_DATA [label="nextChar != '<'"];

IN_BEGIN_TAG->IN_BEGIN_ELEMENT [label="nextChar >= 'a' && nextChar < 'Z'"]; IN_BEGIN_TAG->IN_END_ELEMENT [label= "nextChar == '/'"];

IN_BEGIN_ELEMENT->IN_END_ELEMENT [label="nextChar == '/'"]; IN_BEGIN_ELEMENT->IN_ELEMENT_WHITESPACE [label="nextChar == ' '"]; IN_BEGIN_ELEMENT->IN_END_TAG [label="nextChar == '>'"];

IN_ELEMENT_WHITESPACE->IN_ELEMENT_WHITESPACE [ label = "nextChar == \"\ \t\n""] IN_ELEMENT_WHITESPACE->IN_ATTRIBUTE_KEY [ label = "nextChar >= 'a' && nextChar < 'Z'"] IN_ELEMENT_WHITESPACE->IN_END_ELEMENT [label="nextChar == '/'"]

IN_END_ELEMENT->IN_END_TAG [label = "nextChar == '>'"];

IN_END_TAG->IN_BEGIN_TAG [label="nextChar == '<'"]; IN_END_TAG->IN_DATA [label="nextChar != '<'"]

IN_DATA->IN_BEGIN_TAG [label="nextChar == '<'"]; IN_DATA->IN_DATA_ENTITY [label="nextChar == '&'"]; IN_DATA->IN_DONE [label = "nextChar == EOF"];

IN_DATA_ENTITY->IN_DATA [label="nextChar == ';'"];

IN_ATTRIBUTE_KEY->IN_BEGIN_ATTRIBUTE_VALUE [label = "nextChar == '='"]

IN_BEGIN_ATTRIBUTE_VALUE->IN_STRING [label = "nextChar == '\"' || nextChar == '\'' "]

IN_STRING->IN_END_ATTRIBUTE_VALUE [label = "nextChar == quote"] IN_STRING->IN_STRING_ENTITY [label = "nextChar == '&'"]

IN_END_ATTRIBUTE_VALUE->IN_ELEMENT_WHITESPACE [label = "nextChar == ' '"] IN_END_ATTRIBUTE_VALUE->IN_END_ELEMENT [label = "nextChar == '/'"] IN_END_ATTRIBUTE_VALUE->IN_END_TAG [label = "nextChar == '>'"]

IN_STRING_ENTITY->IN_STRING [label = "nextChar == ';'"] }

Definition at line 71 of file SimpleSAXParser.h.

Member Typedef Documentation

typedef std::vector<Attribute> SimpleSAXParser::Attributes

Definition at line 93 of file SimpleSAXParser.h.

Member Enumeration Documentation

Enumerator
IN_DOCUMENT 
IN_BEGIN_TAG 
IN_DONE 
IN_BEGIN_ELEMENT 
IN_ELEMENT_WHITESPACE 
IN_END_ELEMENT 
IN_ATTRIBUTE_KEY 
IN_END_TAG 
IN_DATA 
IN_BEGIN_ATTRIBUTE_VALUE 
IN_STRING 
IN_END_ATTRIBUTE_VALUE 
IN_STRING_ENTITY 
IN_DATA_ENTITY 

Definition at line 106 of file SimpleSAXParser.h.

Constructor & Destructor Documentation

SimpleSAXParser::SimpleSAXParser ( std::istream &  f)
inline

Definition at line 123 of file SimpleSAXParser.h.

124  : m_in(f),
125  m_bufferSize(1024),
126  m_buffer(new char[m_bufferSize]),
127  m_nextChar(m_in.get())
128  {}
std::istream & m_in
double f[11][100]
SimpleSAXParser::~SimpleSAXParser ( )
virtual

Definition at line 223 of file SimpleSAXParser.cc.

References m_buffer.

223 { delete [] m_buffer;}
SimpleSAXParser::SimpleSAXParser ( const SimpleSAXParser )
private

Member Function Documentation

virtual void SimpleSAXParser::data ( const std::string &  )
inlinevirtual

Reimplemented in FWXMLConfigParser.

Definition at line 137 of file SimpleSAXParser.h.

Referenced by parse().

137 {}
virtual void SimpleSAXParser::endElement ( const std::string &  )
inlinevirtual

Reimplemented in FWXMLConfigParser.

Definition at line 136 of file SimpleSAXParser.h.

Referenced by parse().

136 {}
std::string SimpleSAXParser::getToken ( const char *  delim)
inlineprivate

Definition at line 144 of file SimpleSAXParser.h.

References fgettoken(), m_buffer, m_bufferSize, m_in, and m_nextChar.

Referenced by parse().

145  {
147  return m_buffer;
148  }
std::istream & m_in
bool fgettoken(std::istream &in, char **buffer, size_t *maxSize, const char *separators, int *firstChar)
std::string SimpleSAXParser::getToken ( const char  delim)
inlineprivate

Definition at line 150 of file SimpleSAXParser.h.

References fgettoken(), m_buffer, m_bufferSize, m_in, and m_nextChar.

151  {
152  char buf[2] = {delim, 0};
154  m_nextChar = m_in.get();
155  return m_buffer;
156  }
std::istream & m_in
bool fgettoken(std::istream &in, char **buffer, size_t *maxSize, const char *separators, int *firstChar)
int SimpleSAXParser::nextChar ( void  )
inlineprivate

Definition at line 166 of file SimpleSAXParser.h.

References m_nextChar.

Referenced by parse().

166 { return m_nextChar; }
const SimpleSAXParser& SimpleSAXParser::operator= ( const SimpleSAXParser )
private
void SimpleSAXParser::parse ( void  )

Runs the state machine of the parser, invoking startElement(), setAttribute(), endElement(), data() virtual methods as approppriate. In order have the parser doing something usefull you need to derive from it and specialize the above mentioned virtual methods.

Default implementation is in any case useful to check syntax.

Definition at line 53 of file SimpleSAXParser.cc.

References asciidump::attr, data(), debug_state_machine(), endElement(), getToken(), i, IN_ATTRIBUTE_KEY, IN_BEGIN_ATTRIBUTE_VALUE, IN_BEGIN_ELEMENT, IN_BEGIN_TAG, IN_DATA, IN_DATA_ENTITY, IN_DOCUMENT, IN_DONE, IN_ELEMENT_WHITESPACE, IN_END_ATTRIBUTE_VALUE, IN_END_ELEMENT, IN_END_TAG, IN_STRING, IN_STRING_ENTITY, SimpleSAXParser::Attribute::key, m_attributes, m_elementTags, nextChar(), parseEntity(), skipChar(), startElement(), evf::utils::state, and tmp.

Referenced by FWConfigurationManager::readFromFile().

54 {
56  // Current delimiters for strings in attributes.
57  char stringDelims[] = "\"&";
58  std::string attributeName;
59  std::string attributeValue;
60  std::string tmp;
61  std::string currentData;
62 
63  while (state != IN_DONE)
64  {
65  debug_state_machine(state);
66 
67  switch(state)
68  {
69  // FIXME: IN_DOCUMENT should check the dtd...
70  case IN_DOCUMENT:
71  state = IN_DATA;
72  if (skipChar('<'))
73  state = IN_BEGIN_TAG;
74  break;
75 
76  case IN_BEGIN_TAG:
77  if (nextChar() >= 'A' && nextChar() <= 'z')
78  state = IN_BEGIN_ELEMENT;
79  else if (skipChar('/'))
80  state = IN_END_ELEMENT;
81  else
82  throw ParserError("Bad tag");
83  break;
84 
85  case IN_BEGIN_ELEMENT:
86  m_attributes.clear();
87  m_elementTags.push_back(getToken(" />"));
88  if (nextChar() == ' ')
89  state = IN_ELEMENT_WHITESPACE;
90  else if (skipChar('/'))
91  state = IN_END_ELEMENT;
92  else if (skipChar('>'))
93  {
95  state = IN_END_TAG;
96  }
97  else
98  throw ParserError("Bad element.");
99  break;
100 
102  while(skipChar(' ') || skipChar('\n') || skipChar('\t'))
103  {}
104 
105  if (nextChar() >= 'A' && nextChar() <= 'z')
106  state=IN_ATTRIBUTE_KEY;
107  else if (nextChar() == '/')
108  state = IN_END_ELEMENT;
109  else
110  throw ParserError("Syntax error in element" + m_elementTags.back());
111  break;
112 
113  case IN_ATTRIBUTE_KEY:
114  attributeName = getToken('=');
115  state = IN_BEGIN_ATTRIBUTE_VALUE;
116  break;
117 
119  if (skipChar('"'))
120  {
121  state = IN_STRING;
122  attributeValue.clear();
123  stringDelims[0] = '\"';
124  }
125  else if (skipChar('\''))
126  {
127  state = IN_STRING;
128  attributeValue.clear();
129  stringDelims[0] = '\'';
130  }
131  else
132  throw ParserError("Expecting quotes.");
133  break;
134 
135  case IN_STRING:
136  attributeValue += getToken(stringDelims);
137  if (skipChar(stringDelims[0]))
138  {
139  // Save the attributes in order, replacing those that are
140  // specified more than once.
141  Attribute attr(attributeName, attributeValue);
142  Attributes::iterator i = std::lower_bound(m_attributes.begin(),
143  m_attributes.end(),
144  attr);
145  if (i != m_attributes.end() && i->key == attr.key)
146  throw ParserError("Attribute " + i->key + " defined more than once");
147  m_attributes.insert(i, attr);
148  state = IN_END_ATTRIBUTE_VALUE;
149  }
150  else if (skipChar(stringDelims[1]))
151  state = IN_STRING_ENTITY;
152  else
153  throw ParserError("Unexpected end of input at " + attributeValue);
154  break;
155 
157  getToken(" />");
158  if (nextChar() == ' ')
159  state = IN_ELEMENT_WHITESPACE;
160  else if (skipChar('/'))
161  state = IN_END_ELEMENT;
162  else if (skipChar('>'))
163  {
165  state = IN_END_TAG;
166  }
167  break;
168 
169  case IN_END_ELEMENT:
170  tmp = getToken('>');
171  if (!tmp.empty() && tmp != m_elementTags.back())
172  throw ParserError("Non-matching closing element "
173  + tmp + " for " + attributeValue);
174  endElement(tmp);
175  m_elementTags.pop_back();
176  state = IN_END_TAG;
177  break;
178 
179  case IN_END_TAG:
180  if (nextChar() == EOF)
181  return;
182  else if (skipChar('<'))
183  state = IN_BEGIN_TAG;
184  else
185  state = IN_DATA;
186  break;
187 
188  case IN_DATA:
189  currentData += getToken("<&");
190  if (skipChar('&'))
191  state = IN_DATA_ENTITY;
192  else if (skipChar('<'))
193  {
194  data(currentData);
195  currentData.clear();
196  state = IN_BEGIN_TAG;
197  }
198  else if (nextChar() == EOF)
199  {
200  data(currentData);
201  return;
202  }
203  else
204  throw ParserError("Unexpected end of input in element " + m_elementTags.back() + currentData);
205  break;
206 
207  case IN_DATA_ENTITY:
208  currentData += parseEntity(getToken(';'));
209  state = IN_DATA;
210  break;
211 
212  case IN_STRING_ENTITY:
213  attributeValue += parseEntity(getToken(';'));
214  state = IN_STRING;
215  break;
216 
217  case IN_DONE:
218  return;
219  }
220  }
221 }
int i
Definition: DBlmapReader.cc:9
virtual void endElement(const std::string &)
std::string parseEntity(const std::string &entity)
virtual void data(const std::string &)
bool skipChar(int c)
std::string getToken(const char *delim)
tuple attr
Definition: asciidump.py:432
std::vector< std::string > m_elementTags
char state
Definition: procUtils.cc:75
std::vector< std::vector< double > > tmp
Definition: MVATrainer.cc:100
void debug_state_machine(enum SimpleSAXParser::PARSER_STATES state)
virtual void startElement(const std::string &, Attributes &)
Attributes m_attributes
std::string SimpleSAXParser::parseEntity ( const std::string &  entity)
private

Helper function to handle entities, i.e. characters specified with the "&label;" syntax.

Definition at line 7 of file SimpleSAXParser.cc.

Referenced by parse().

8 {
9  if (entity == "quot")
10  return "\"";
11  else if (entity == "amp")
12  return "&";
13  else if (entity == "lt")
14  return "<";
15  else if (entity == "gt")
16  return ">";
17  throw ParserError("Unknown entity " + entity);
18 }
bool SimpleSAXParser::skipChar ( int  c)
inlineprivate

Definition at line 158 of file SimpleSAXParser.h.

References m_in, and m_nextChar.

Referenced by parse().

159  {
160  if (m_nextChar != c)
161  return false;
162  m_nextChar = m_in.get();
163  return true;
164  }
std::istream & m_in
virtual void SimpleSAXParser::startElement ( const std::string &  ,
Attributes  
)
inlinevirtual

Reimplemented in FWXMLConfigParser.

Definition at line 134 of file SimpleSAXParser.h.

Referenced by parse().

135  {}

Member Data Documentation

Attributes SimpleSAXParser::m_attributes
private

Definition at line 173 of file SimpleSAXParser.h.

Referenced by parse().

char* SimpleSAXParser::m_buffer
private

Definition at line 170 of file SimpleSAXParser.h.

Referenced by getToken(), and ~SimpleSAXParser().

size_t SimpleSAXParser::m_bufferSize
private

Definition at line 169 of file SimpleSAXParser.h.

Referenced by getToken().

std::vector<std::string> SimpleSAXParser::m_elementTags
private

Definition at line 172 of file SimpleSAXParser.h.

Referenced by parse().

std::istream& SimpleSAXParser::m_in
private

Definition at line 168 of file SimpleSAXParser.h.

Referenced by getToken(), and skipChar().

int SimpleSAXParser::m_nextChar
private

Definition at line 171 of file SimpleSAXParser.h.

Referenced by getToken(), nextChar(), and skipChar().