CMS 3D CMS Logo

/data/refman/pasoursint/CMSSW_5_3_3/src/Fireworks/Core/src/SimpleSAXParser.h

Go to the documentation of this file.
00001 #ifndef __SIMPLE_SAX_PARSER_H_
00002 #define __SIMPLE_SAX_PARSER_H_
00003 /*  A simple SAX-like parser. 
00004 
00005     And yes, I know the S in SAX stands for Simple.
00006         
00007     Licensed under GPLv3 license.
00008     
00009     TODO: incomplete support for entities.
00010     TODO: no support for DTD nor <?xml> preamble.
00011  */
00012 
00013 #include <string>
00014 #include <cstdio>
00015 #include <cstdlib>
00016 #include <cassert>
00017 #include <cstring>
00018 #include <iostream>
00019 #include <algorithm>
00020 #include <vector>
00021 
00022 bool
00023 fgettoken(std::istream &in, char **buffer, size_t *maxSize, const char *separators,
00024           int *firstChar);
00025 
00071 class SimpleSAXParser
00072 {
00073 public:
00074    struct Attribute
00075    {
00076       std::string    key;
00077       std::string    value;
00078 
00079       Attribute(const std::string &iKey, const std::string &iValue)
00080       :key(iKey), value(iValue)
00081       {}
00082       
00083       Attribute(const Attribute &attr)
00084       :key(attr.key), value(attr.value)
00085       {}
00086       
00087       bool operator<(const Attribute &attribute) const
00088       {
00089          return this->key < attribute.key;
00090       }
00091    };
00092 
00093    typedef std::vector<Attribute> Attributes;
00094    class ParserError
00095    {
00096    public:
00097       ParserError(const std::string &error)
00098       :m_error(error)
00099       {}
00100       
00101       const char *error() { return m_error.c_str(); }
00102    private:
00103       std::string m_error;
00104    };
00105    
00106    enum PARSER_STATES {
00107       IN_DOCUMENT,
00108       IN_BEGIN_TAG,
00109       IN_DONE,
00110       IN_BEGIN_ELEMENT,
00111       IN_ELEMENT_WHITESPACE,
00112       IN_END_ELEMENT,
00113       IN_ATTRIBUTE_KEY,
00114       IN_END_TAG,
00115       IN_DATA,
00116       IN_BEGIN_ATTRIBUTE_VALUE,
00117       IN_STRING,
00118       IN_END_ATTRIBUTE_VALUE,
00119       IN_STRING_ENTITY,
00120       IN_DATA_ENTITY
00121    };
00122    
00123    SimpleSAXParser(std::istream &f)
00124    : m_in(f),
00125      m_bufferSize(1024),
00126      m_buffer(new char[m_bufferSize]),
00127      m_nextChar(m_in.get())
00128    {}
00129 
00130    virtual ~SimpleSAXParser();
00131    
00132    void parse(void);
00133    
00134    virtual void startElement(const std::string &/*name*/, 
00135                              Attributes &/*attributes*/) {}
00136    virtual void endElement(const std::string &/*name*/) {}
00137    virtual void data(const std::string &/*data*/) {}
00138 
00139 private:
00140    SimpleSAXParser(const SimpleSAXParser&);    // stop default
00141    const SimpleSAXParser& operator=(const SimpleSAXParser&);    // stop default
00142    
00143    std::string parseEntity(const std::string &entity);
00144    std::string getToken(const char *delim)
00145       {
00146          fgettoken(m_in, &m_buffer, &m_bufferSize, delim, &m_nextChar);
00147          return m_buffer;
00148       }
00149 
00150    std::string getToken(const char delim)
00151       {
00152          char buf[2] = {delim, 0};
00153          fgettoken(m_in, &m_buffer, &m_bufferSize, buf, &m_nextChar);
00154          m_nextChar = m_in.get();
00155          return m_buffer;
00156       }
00157    
00158    bool skipChar(int c) 
00159       { 
00160          if (m_nextChar != c)
00161             return false;
00162          m_nextChar = m_in.get();
00163          return true;
00164       }
00165    
00166    int nextChar(void) { return m_nextChar; }
00167 
00168    std::istream                        &m_in;
00169    size_t                              m_bufferSize;
00170    char                                *m_buffer;
00171    int                                 m_nextChar;
00172    std::vector<std::string>            m_elementTags;
00173    Attributes                          m_attributes;
00174 };
00175 
00176 // NOTE: put in a .cc if this file is used in more than one place.
00177 #endif // __SIMPLE_SAX_PARSER_H_