43 #if defined( DEBUG_PARSER )
44 # if defined( DEBUG ) && defined( _MSC_VER )
46 # define TIXML_LOG OutputDebugString
48 # define TIXML_LOG printf
60 {
""", 6,
'\"' },
81 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
82 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
83 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
84 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
85 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
86 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
87 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
88 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
89 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
90 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
91 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
92 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
93 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
96 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
102 const unsigned long BYTE_MASK = 0xBF;
103 const unsigned long BYTE_MARK = 0x80;
104 const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
108 else if ( input < 0x800 )
110 else if ( input < 0x10000 )
112 else if ( input < 0x200000 )
115 { *length = 0;
return; }
124 *output = (
char)((input | BYTE_MARK) & BYTE_MASK);
128 *output = (
char)((input | BYTE_MARK) & BYTE_MASK);
132 *output = (
char)((input | BYTE_MARK) & BYTE_MASK);
136 *output = (
char)(input | FIRST_BYTE_MARK[*length]);
151 return isalpha( anyByte );
172 return isalnum( anyByte );
227 const unsigned char* pU = (
const unsigned char*)p;
276 if ( *(p+1) && *(p+2) )
282 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU )
284 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU )
336 const unsigned char* pU = (
const unsigned char*)p;
370 while ( ( *p &&
IsWhiteSpace( *p ) ) || *p ==
'\n' || *p ==
'\r' )
382 if ( !in->good() )
return false;
389 *tag += (
char) in->get();
399 if ( c == character )
430 && (
IsAlpha( (
unsigned char) *p, encoding ) || *p ==
'_' ) )
434 && (
IsAlphaNum( (
unsigned char ) *p, encoding )
444 name->assign( start, p-start );
457 if ( *(p+1) && *(p+1) ==
'#' && *(p+2) )
459 unsigned long ucs = 0;
466 if ( !*(p+3) )
return 0;
469 q = strchr( q,
';' );
471 if ( !q || !*q )
return 0;
478 if ( *q >=
'0' && *q <=
'9' )
479 ucs += mult * (*q -
'0');
480 else if ( *q >=
'a' && *q <=
'f' )
481 ucs += mult * (*q -
'a' + 10);
482 else if ( *q >=
'A' && *q <=
'F' )
483 ucs += mult * (*q -
'A' + 10 );
493 if ( !*(p+2) )
return 0;
496 q = strchr( q,
';' );
498 if ( !q || !*q )
return 0;
505 if ( *q >=
'0' && *q <=
'9' )
506 ucs += mult * (*q -
'0');
523 return p + delta + 1;
534 return ( p +
entity[
i].strLength );
563 while ( *q && *tag &&
ToLower( *q, encoding ) ==
ToLower( *tag, encoding ) )
574 while ( *q && *tag && *q == *tag )
590 bool caseInsensitive,
599 && !
StringEqual( p, endTag, caseInsensitive, encoding )
603 char cArr[4] = { 0, 0, 0, 0 };
604 p =
GetChar( p, cArr, &len, encoding );
605 text->append( cArr, len );
610 bool whitespace =
false;
615 && !
StringEqual( p, endTag, caseInsensitive, encoding ) )
617 if ( *p ==
'\r' || *p ==
'\n' )
637 char cArr[4] = { 0, 0, 0, 0 };
638 p =
GetChar( p, cArr, &len, encoding );
642 text->append( cArr, len );
647 p += strlen( endTag );
670 int tagIndex = (int) tag->length();
671 while ( in->good() && in->peek() !=
'>' )
749 const unsigned char* pU = (
const unsigned char*)p;
771 p = node->
Parse( p, &data, encoding );
824 if ( pError && data )
826 data->
Stamp( pError, encoding );
837 if( !p || !*p || *p !=
'<' )
857 const char* xmlHeader = {
"<?xml" };
858 const char* commentHeader = {
"<!--" };
859 const char* dtdHeader = {
"<!" };
860 const char* cdataHeader = {
"<![CDATA[" };
865 TIXML_LOG(
"XML parsing Declaration\n" );
869 else if (
StringEqual( p, commentHeader,
false, encoding ) )
872 TIXML_LOG(
"XML parsing Comment\n" );
876 else if (
StringEqual( p, cdataHeader,
false, encoding ) )
879 TIXML_LOG(
"XML parsing CDATA\n" );
885 else if (
StringEqual( p, dtdHeader,
false, encoding ) )
888 TIXML_LOG(
"XML parsing Unknown(1)\n" );
892 else if (
IsAlpha( *(p+1), encoding )
896 TIXML_LOG(
"XML parsing Element\n" );
903 TIXML_LOG(
"XML parsing Unknown(2)\n" );
911 returnNode->
parent =
this;
943 if ( tag->length() < 3 )
return;
948 if ( tag->at( tag->length() - 1 ) ==
'>'
949 && tag->at( tag->length() - 2 ) ==
'/' )
954 else if ( tag->at( tag->length() - 1 ) ==
'>' )
966 if ( in->good() && in->peek() !=
'<' )
979 if ( !in->good() )
return;
980 assert( in->peek() ==
'<' );
981 int tagIndex = (int) tag->length();
983 bool closingTag =
false;
984 bool firstCharFound =
false;
1007 if ( c ==
'[' && tag->size() >= 9 )
1009 size_t len = tag->size();
1010 const char*
start = tag->c_str() + len - 9;
1011 if ( strcmp( start,
"<![CDATA[" ) == 0 ) {
1012 assert( !closingTag );
1017 if ( !firstCharFound && c !=
'<' && !
IsWhiteSpace( c ) )
1019 firstCharFound =
true;
1048 const char* tagloc = tag->c_str() + tagIndex;
1076 data->
Stamp( p, encoding );
1089 const char* pErr =
p;
1124 else if ( *p ==
'>' )
1139 if (
StringEqual( p, endTag.c_str(),
false, encoding ) )
1141 p += endTag.length();
1162 p = attrib->
Parse( p, data, encoding );
1172 #ifdef TIXML_USE_STL
1196 const char* pWithWhiteSpace =
p;
1214 p = textNode->
Parse( p, data, encoding );
1220 p = textNode->
Parse( pWithWhiteSpace, data, encoding );
1223 if ( !textNode->
Blank() )
1242 p = node->
Parse( p, data, encoding );
1251 pWithWhiteSpace =
p;
1263 #ifdef TIXML_USE_STL
1266 while ( in->good() )
1295 data->
Stamp( p, encoding );
1298 if ( !p || !*p || *p !=
'<' )
1306 while ( p && *p && *p !=
'>' )
1321 #ifdef TIXML_USE_STL
1324 while ( in->good() )
1338 && tag->at( tag->length() - 2 ) ==
'-'
1339 && tag->at( tag->length() - 3 ) ==
'-' )
1358 data->
Stamp( p, encoding );
1361 const char* startTag =
"<!--";
1362 const char* endTag =
"-->";
1364 if ( !
StringEqual( p, startTag,
false, encoding ) )
1369 p += strlen( startTag );
1391 while ( p && *p && !
StringEqual( p, endTag,
false, encoding ) )
1393 value.append( p, 1 );
1397 p += strlen( endTag );
1406 if ( !p || !*p )
return 0;
1414 data->
Stamp( p, encoding );
1418 const char* pErr =
p;
1426 if ( !p || !*p || *p !=
'=' )
1441 const char SINGLE_QUOTE =
'\'';
1442 const char DOUBLE_QUOTE =
'\"';
1444 if ( *p == SINGLE_QUOTE )
1450 else if ( *p == DOUBLE_QUOTE )
1464 && *p !=
'/' && *p !=
'>' )
1466 if ( *p == SINGLE_QUOTE || *p == DOUBLE_QUOTE ) {
1480 #ifdef TIXML_USE_STL
1483 while ( in->good() )
1486 if ( !
cdata && (c ==
'<' ) )
1501 if (
cdata && c ==
'>' && tag->size() >= 3 ) {
1502 size_t len = tag->size();
1503 if ( (*tag)[len-2] ==
']' && (*tag)[len-3] ==
']' ) {
1519 data->
Stamp( p, encoding );
1523 const char*
const startTag =
"<![CDATA[";
1524 const char*
const endTag =
"]]>";
1530 if ( !
StringEqual( p, startTag,
false, encoding ) )
1535 p += strlen( startTag );
1547 p =
ReadText( p, &dummy,
false, endTag,
false, encoding );
1552 bool ignoreWhite =
true;
1554 const char*
end =
"<";
1555 p =
ReadText( p, &
value, ignoreWhite, end,
false, encoding );
1562 #ifdef TIXML_USE_STL
1565 while ( in->good() )
1592 if ( !p || !*p || !
StringEqual( p,
"<?xml",
true, _encoding ) )
1599 data->
Stamp( p, _encoding );
1617 if (
StringEqual( p,
"version",
true, _encoding ) )
1620 p = attrib.
Parse( p, data, _encoding );
1623 else if (
StringEqual( p,
"encoding",
true, _encoding ) )
1626 p = attrib.
Parse( p, data, _encoding );
1629 else if (
StringEqual( p,
"standalone",
true, _encoding ) )
1632 p = attrib.
Parse( p, data, _encoding );
1647 for (
unsigned i=0;
i<
value.length();
i++ )
static const int utf8ByteTable[256]
virtual void StreamIn(std::istream *in, TIXML_STRING *tag)
virtual const char * Parse(const char *p, TiXmlParsingData *data, TiXmlEncoding encoding)
const unsigned char TIXML_UTF_LEAD_2
tuple start
Check for commandline option errors.
virtual void StreamIn(std::istream *in, TIXML_STRING *tag)=0
virtual void StreamIn(std::istream *in, TIXML_STRING *tag)
virtual const char * Parse(const char *p, TiXmlParsingData *data, TiXmlEncoding encoding)
static int ToLower(int v, TiXmlEncoding encoding)
static int IsAlphaNum(unsigned char anyByte, TiXmlEncoding encoding)
const TIXML_STRING & NameTStr() const
static const char * ReadName(const char *p, TIXML_STRING *name, TiXmlEncoding encoding)
virtual void StreamIn(std::istream *in, TIXML_STRING *tag)
const char * Value() const
Return the value of this attribute.
const TiXmlCursor & Cursor()
static const char * errorString[TIXML_ERROR_STRING_COUNT]
const unsigned char TIXML_UTF_LEAD_0
static void ConvertUTF32ToUTF8(unsigned long input, char *output, int *length)
static bool condenseWhiteSpace
static std::string const input
virtual const TiXmlDeclaration * ToDeclaration() const
Cast to a more defined type. Will return null if not of the requested type.
const char * Encoding() const
Encoding. Will return an empty string if none was found.
TiXmlNode * LinkEndChild(TiXmlNode *addThis)
const char * Name() const
Return the name of this attribute.
const unsigned char TIXML_UTF_LEAD_1
static bool IsWhiteSpace(char c)
TiXmlAttributeSet attributeSet
const TiXmlEncoding TIXML_DEFAULT_ENCODING
static bool IsWhiteSpaceCondensed()
Return the current white space setting.
static const char * ReadText(const char *in, TIXML_STRING *text, bool ignoreWhiteSpace, const char *endTag, bool ignoreCase, TiXmlEncoding encoding)
virtual const char * Parse(const char *p, TiXmlParsingData *data, TiXmlEncoding encoding)
void SetCDATA(bool _cdata)
Turns on or off a CDATA representation of text.
virtual const char * Parse(const char *p, TiXmlParsingData *data, TiXmlEncoding encoding)=0
virtual const char * Parse(const char *p, TiXmlParsingData *data, TiXmlEncoding encoding)
const TiXmlAttribute * Find(const char *_name) const
static Entity entity[NUM_ENTITY]
static const char * SkipWhiteSpace(const char *, TiXmlEncoding encoding)
friend class TiXmlElement
TiXmlNode * Identify(const char *start, TiXmlEncoding encoding)
virtual void StreamIn(std::istream *in, TIXML_STRING *tag)
static bool StreamTo(std::istream *in, int character, TIXML_STRING *tag)
virtual const TiXmlElement * ToElement() const
Cast to a more defined type. Will return null if not of the requested type.
const char * ReadValue(const char *in, TiXmlParsingData *prevData, TiXmlEncoding encoding)
void Add(TiXmlAttribute *attribute)
const TiXmlDocument * GetDocument() const
static const char * GetEntity(const char *in, char *value, int *length, TiXmlEncoding encoding)
virtual const char * Parse(const char *p, TiXmlParsingData *data, TiXmlEncoding encoding)
static bool StringEqual(const char *p, const char *endTag, bool ignoreCase, TiXmlEncoding encoding)
virtual const char * Parse(const char *p, TiXmlParsingData *data=0, TiXmlEncoding encoding=TIXML_DEFAULT_ENCODING)
void SetError(int err, const char *errorLocation, TiXmlParsingData *prevData, TiXmlEncoding encoding)
void Stamp(const char *now, TiXmlEncoding encoding)
virtual void StreamIn(std::istream *in, TIXML_STRING *tag)
char data[epos_bytes_allocation]
static const char * GetChar(const char *p, char *_value, int *length, TiXmlEncoding encoding)
TiXmlParsingData(const char *start, int _tabsize, int row, int col)
void SetDocument(TiXmlDocument *doc)
void SetValue(const char *_value)
Set the value.
static int IsAlpha(unsigned char anyByte, TiXmlEncoding encoding)
static bool StreamWhiteSpace(std::istream *in, TIXML_STRING *tag)
TiXmlCursor errorLocation