CMS 3D CMS Logo

/data/refman/pasoursint/CMSSW_5_3_0/src/Fireworks/Core/src/SimpleSAXParser.cc

Go to the documentation of this file.
00001 #include "Fireworks/Core/src/SimpleSAXParser.h"
00002 
00006 std::string 
00007 SimpleSAXParser::parseEntity(const std::string &entity)
00008 {
00009    if (entity == "quot")
00010       return "\"";
00011    else if (entity == "amp")
00012       return "&";
00013    else if (entity == "lt")
00014       return "<";
00015    else if (entity == "gt")
00016       return ">";
00017    throw ParserError("Unknown entity " + entity);      
00018 }
00019 
00020 void
00021 debug_state_machine(enum SimpleSAXParser::PARSER_STATES state)
00022 {
00023 #ifdef SIMPLE_SAX_PARSER_DEBUG
00024   static char *debug_states[] = {
00025       "IN_DOCUMENT",
00026       "IN_BEGIN_TAG",
00027       "IN_DONE",
00028       "IN_BEGIN_ELEMENT",
00029       "IN_ELEMENT_WHITESPACE",
00030       "IN_END_ELEMENT",
00031       "IN_ATTRIBUTE_KEY",
00032       "IN_END_TAG",
00033       "IN_DATA",
00034       "IN_BEGIN_ATTRIBUTE_VALUE",
00035       "IN_STRING",
00036       "IN_END_ATTRIBUTE_VALUE",
00037       "IN_STRING_ENTITY",
00038       "IN_DATA_ENTITY"
00039    };
00040 
00041    std::cerr << debug_states[state] << std::endl;
00042 #endif
00043 }
00044 
00052 void 
00053 SimpleSAXParser::parse(void)
00054 {
00055    enum PARSER_STATES state = IN_DOCUMENT;
00056    // Current delimiters for strings in attributes.
00057    char stringDelims[] = "\"&";
00058    std::string attributeName;
00059    std::string attributeValue;
00060    std::string tmp;
00061    std::string currentData;
00062    
00063    while (state != IN_DONE)
00064    {
00065       debug_state_machine(state);
00066 
00067       switch(state)
00068       {
00069          // FIXME: IN_DOCUMENT should check the dtd...
00070          case IN_DOCUMENT:
00071             state = IN_DATA;
00072             if (skipChar('<'))
00073                state = IN_BEGIN_TAG;
00074             break;
00075          
00076          case IN_BEGIN_TAG:
00077             if (nextChar() >= 'A' && nextChar() <= 'z')
00078                state = IN_BEGIN_ELEMENT;
00079             else if (skipChar('/'))
00080                state = IN_END_ELEMENT;
00081             else
00082                throw ParserError("Bad tag");
00083             break;
00084             
00085          case IN_BEGIN_ELEMENT:
00086             m_attributes.clear();
00087             m_elementTags.push_back(getToken(" />"));
00088             if (nextChar() == ' ')
00089                state = IN_ELEMENT_WHITESPACE;
00090             else if (skipChar('/'))
00091                state = IN_END_ELEMENT;
00092             else if (skipChar('>'))
00093             {  
00094                startElement(m_elementTags.back(), m_attributes);
00095                state = IN_END_TAG;
00096             }
00097             else
00098                throw ParserError("Bad element.");
00099             break;
00100          
00101          case IN_ELEMENT_WHITESPACE:
00102             while(skipChar(' ') || skipChar('\n') || skipChar('\t'))
00103             {}
00104             
00105             if (nextChar() >= 'A' && nextChar() <= 'z')
00106                state=IN_ATTRIBUTE_KEY;
00107             else if (nextChar() == '/')
00108                state = IN_END_ELEMENT;
00109             else
00110                throw ParserError("Syntax error in element" + m_elementTags.back());
00111             break;
00112          
00113          case IN_ATTRIBUTE_KEY:
00114             attributeName = getToken('=');
00115             state = IN_BEGIN_ATTRIBUTE_VALUE;
00116             break;
00117             
00118          case IN_BEGIN_ATTRIBUTE_VALUE:
00119             if (skipChar('"'))
00120             {
00121                state = IN_STRING;
00122                attributeValue.clear();
00123                stringDelims[0] = '\"';
00124             }
00125             else if (skipChar('\''))
00126             {
00127                state = IN_STRING;
00128                attributeValue.clear();
00129                stringDelims[0] = '\'';               
00130             }
00131             else
00132                throw ParserError("Expecting quotes.");
00133             break;
00134             
00135          case IN_STRING:
00136             attributeValue += getToken(stringDelims);
00137             if (skipChar(stringDelims[0]))
00138             {
00139                // Save the attributes in order, replacing those that are
00140                // specified more than once.
00141                Attribute attr(attributeName, attributeValue);
00142                Attributes::iterator i = std::lower_bound(m_attributes.begin(),
00143                                                          m_attributes.end(),
00144                                                          attr);
00145                if (i != m_attributes.end() && i->key == attr.key)
00146                   throw ParserError("Attribute " + i->key + " defined more than once");
00147                m_attributes.insert(i, attr);
00148                state = IN_END_ATTRIBUTE_VALUE;
00149             }
00150             else if (skipChar(stringDelims[1]))
00151                state = IN_STRING_ENTITY;
00152             else
00153                throw ParserError("Unexpected end of input at " + attributeValue);
00154             break;
00155          
00156          case IN_END_ATTRIBUTE_VALUE:
00157             getToken(" />");
00158             if (nextChar() == ' ')
00159                state = IN_ELEMENT_WHITESPACE;
00160             else if (skipChar('/'))
00161                state = IN_END_ELEMENT;
00162             else if (skipChar('>'))
00163             {
00164                startElement(m_elementTags.back(), m_attributes);
00165                state = IN_END_TAG;
00166             }
00167             break;
00168          
00169          case IN_END_ELEMENT:
00170             tmp = getToken('>');
00171             if (!tmp.empty() && tmp != m_elementTags.back())
00172                throw ParserError("Non-matching closing element " 
00173                                  + tmp + " for " + attributeValue);
00174             endElement(tmp);
00175             m_elementTags.pop_back();
00176             state = IN_END_TAG;
00177             break;
00178          
00179          case IN_END_TAG:
00180             if (nextChar() == EOF)
00181                return;
00182             else if (skipChar('<'))
00183                state = IN_BEGIN_TAG;
00184             else
00185                state = IN_DATA;
00186             break;
00187        
00188          case IN_DATA:
00189             currentData += getToken("<&");
00190             if (skipChar('&'))
00191                state = IN_DATA_ENTITY;
00192             else if (skipChar('<'))
00193             {
00194                data(currentData);
00195                currentData.clear();               
00196                state = IN_BEGIN_TAG;
00197             }
00198             else if (nextChar() == EOF)
00199             {
00200                data(currentData);
00201                return;
00202             } 
00203             else
00204                throw ParserError("Unexpected end of input in element " + m_elementTags.back() + currentData);
00205             break;
00206          
00207          case IN_DATA_ENTITY:
00208             currentData += parseEntity(getToken(';'));
00209             state = IN_DATA;
00210             break;
00211             
00212          case IN_STRING_ENTITY:
00213             attributeValue += parseEntity(getToken(';'));
00214             state = IN_STRING;
00215             break;
00216          
00217          case IN_DONE:
00218             return;
00219       }
00220    }
00221 }
00222 
00223 SimpleSAXParser::~SimpleSAXParser() { delete [] m_buffer;}
00224 
00250 bool
00251 fgettoken(std::istream &in, char **buffer, size_t *maxSize, const char *separators,
00252           int *firstChar)
00253 {
00254   // if the passed first character is EOF or a separator,
00255   // return an empty otherwise use it as first character
00256   // of the buffer. 
00257   if (*firstChar == EOF || (int) separators[0] == *firstChar || strchr(separators + 1, *firstChar))
00258   {
00259     (*buffer)[0] = 0;
00260     return true;
00261   }
00262   else
00263     (*buffer)[0] = (char) *firstChar;
00264 
00265   size_t i = 1;
00266 
00267   while (true)
00268   {
00269     if (i >= *maxSize)
00270     {
00271       *maxSize += 1024;
00272       *buffer = (char*) realloc(*buffer, *maxSize);
00273       if (!*buffer)
00274          return false;
00275     }
00276 
00277     int c = in.get();
00278 
00279     if (c == EOF)
00280     {
00281        (*buffer)[i] = 0;
00282        *firstChar = c;
00283        return false;
00284     }
00285     
00286     if (separators[0] == c || strchr(separators + 1, c))
00287     {
00288       (*buffer)[i] = 0;
00289       *firstChar = c;
00290       return true;
00291     }
00292 
00293     (*buffer)[i++] = (char) c;
00294   }
00295 }