CMS 3D CMS Logo

/data/doxygen/doxygen-1.7.3/gen/CMSSW_4_2_8/src/FWCore/Utilities/src/tinyxmlparser.cc

Go to the documentation of this file.
00001 /*
00002 www.sourceforge.net/projects/tinyxml
00003 Original code (2.0 and earlier )copyright (c) 2000-2002 Lee Thomason (www.grinninglizard.com)
00004 
00005 This software is provided 'as-is', without any express or implied
00006 warranty. In no event will the authors be held liable for any
00007 damages arising from the use of this software.
00008 
00009 Permission is granted to anyone to use this software for any
00010 purpose, including commercial applications, and to alter it and
00011 redistribute it freely, subject to the following restrictions:
00012 
00013 1. The origin of this software must not be misrepresented; you must
00014 not claim that you wrote the original software. If you use this
00015 software in a product, an acknowledgment in the product documentation
00016 would be appreciated but is not required.
00017 
00018 2. Altered source versions must be plainly marked as such, and
00019 must not be misrepresented as being the original software.
00020 
00021 3. This notice may not be removed or altered from any source
00022 distribution.
00023 */
00024 
00025 /*
00026  * THIS FILE WAS ALTERED BY Eric Vaandering, 25 August 2009.
00027  */
00028 #define TIXML_USE_STL
00029 #include <ctype.h>
00030 #include <stddef.h>
00031 
00032 #include "FWCore/Utilities/interface/tinyxml.h"
00033 
00034 //#define DEBUG_PARSER
00035 #if defined( DEBUG_PARSER )
00036 #       if defined( DEBUG ) && defined( _MSC_VER )
00037 #               include <windows.h>
00038 #               define TIXML_LOG OutputDebugString
00039 #       else
00040 #               define TIXML_LOG printf
00041 #       endif
00042 #endif
00043 
00044 // Note tha "PutString" hardcodes the same list. This
00045 // is less flexible than it appears. Changing the entries
00046 // or order will break putstring.
00047 TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] =
00048 {
00049         { "&amp;",  5, '&' },
00050         { "&lt;",   4, '<' },
00051         { "&gt;",   4, '>' },
00052         { "&quot;", 6, '\"' },
00053         { "&apos;", 6, '\'' }
00054 };
00055 
00056 // Bunch of unicode info at:
00057 //              https://www.unicode.org/faq/utf_bom.html
00058 // Including the basic of this table, which determines the #bytes in the
00059 // sequence from the lead byte. 1 placed for invalid sequences --
00060 // although the result will be junk, pass it through as much as possible.
00061 // Beware of the non-characters in UTF-8:
00062 //                              ef bb bf (Microsoft "lead bytes")
00063 //                              ef bf be
00064 //                              ef bf bf
00065 
00066 const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
00067 const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
00068 const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
00069 
00070 const int TiXmlBase::utf8ByteTable[256] =
00071 {
00072         //      0       1       2       3       4       5       6       7       8       9       a       b       c       d       e       f
00073                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x00
00074                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x10
00075                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x20
00076                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x30
00077                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x40
00078                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x50
00079                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x60
00080                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x70 End of ASCII range
00081                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x80 0x80 to 0xc1 invalid
00082                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x90
00083                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0xa0
00084                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0xb0
00085                 1,      1,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      // 0xc0 0xc2 to 0xdf 2 byte
00086                 2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      // 0xd0
00087                 3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      // 0xe0 0xe0 to 0xef 3 byte
00088                 4,      4,      4,      4,      4,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1       // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
00089 };
00090 
00091 
00092 void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
00093 {
00094         const unsigned long BYTE_MASK = 0xBF;
00095         const unsigned long BYTE_MARK = 0x80;
00096         const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
00097 
00098         if (input < 0x80)
00099                 *length = 1;
00100         else if ( input < 0x800 )
00101                 *length = 2;
00102         else if ( input < 0x10000 )
00103                 *length = 3;
00104         else if ( input < 0x200000 )
00105                 *length = 4;
00106         else
00107                 { *length = 0; return; }        // This code won't covert this correctly anyway.
00108 
00109         output += *length;
00110 
00111         // Scary scary fall throughs.
00112         switch (*length)
00113         {
00114                 case 4:
00115                         --output;
00116                         *output = (char)((input | BYTE_MARK) & BYTE_MASK);
00117                         input >>= 6;
00118                 case 3:
00119                         --output;
00120                         *output = (char)((input | BYTE_MARK) & BYTE_MASK);
00121                         input >>= 6;
00122                 case 2:
00123                         --output;
00124                         *output = (char)((input | BYTE_MARK) & BYTE_MASK);
00125                         input >>= 6;
00126                 case 1:
00127                         --output;
00128                         *output = (char)(input | FIRST_BYTE_MARK[*length]);
00129         }
00130 }
00131 
00132 
00133 /*static*/ int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
00134 {
00135         // This will only work for low-ascii, everything else is assumed to be a valid
00136         // letter. I'm not sure this is the best approach, but it is quite tricky trying
00137         // to figure out alhabetical vs. not across encoding. So take a very
00138         // conservative approach.
00139 
00140 //      if ( encoding == TIXML_ENCODING_UTF8 )
00141 //      {
00142                 if ( anyByte < 127 )
00143                         return isalpha( anyByte );
00144                 else
00145                         return 1;       // What else to do? The unicode set is huge...get the english ones right.
00146 //      }
00147 //      else
00148 //      {
00149 //              return isalpha( anyByte );
00150 //      }
00151 }
00152 
00153 
00154 /*static*/ int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
00155 {
00156         // This will only work for low-ascii, everything else is assumed to be a valid
00157         // letter. I'm not sure this is the best approach, but it is quite tricky trying
00158         // to figure out alhabetical vs. not across encoding. So take a very
00159         // conservative approach.
00160 
00161 //      if ( encoding == TIXML_ENCODING_UTF8 )
00162 //      {
00163                 if ( anyByte < 127 )
00164                         return isalnum( anyByte );
00165                 else
00166                         return 1;       // What else to do? The unicode set is huge...get the english ones right.
00167 //      }
00168 //      else
00169 //      {
00170 //              return isalnum( anyByte );
00171 //      }
00172 }
00173 
00174 
00175 class TiXmlParsingData
00176 {
00177         friend class TiXmlDocument;
00178   public:
00179         void Stamp( const char* now, TiXmlEncoding encoding );
00180 
00181         const TiXmlCursor& Cursor()     { return cursor; }
00182 
00183   private:
00184         // Only used by the document!
00185         TiXmlParsingData( const char* start, int _tabsize, int row, int col )
00186         {
00187                 assert( start );
00188                 stamp = start;
00189                 tabsize = _tabsize;
00190                 cursor.row = row;
00191                 cursor.col = col;
00192         }
00193 
00194         TiXmlCursor             cursor;
00195         const char*             stamp;
00196         int                             tabsize;
00197 };
00198 
00199 
00200 void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding )
00201 {
00202         assert( now );
00203 
00204         // Do nothing if the tabsize is 0.
00205         if ( tabsize < 1 )
00206         {
00207                 return;
00208         }
00209 
00210         // Get the current row, column.
00211         int row = cursor.row;
00212         int col = cursor.col;
00213         const char* p = stamp;
00214         assert( p );
00215 
00216         while ( p < now )
00217         {
00218                 // Treat p as unsigned, so we have a happy compiler.
00219                 const unsigned char* pU = (const unsigned char*)p;
00220 
00221                 // Code contributed by Fletcher Dunn: (modified by lee)
00222                 switch (*pU) {
00223                         case 0:
00224                                 // We *should* never get here, but in case we do, don't
00225                                 // advance past the terminating null character, ever
00226                                 return;
00227 
00228                         case '\r':
00229                                 // bump down to the next line
00230                                 ++row;
00231                                 col = 0;
00232                                 // Eat the character
00233                                 ++p;
00234 
00235                                 // Check for \r\n sequence, and treat this as a single character
00236                                 if (*p == '\n') {
00237                                         ++p;
00238                                 }
00239                                 break;
00240 
00241                         case '\n':
00242                                 // bump down to the next line
00243                                 ++row;
00244                                 col = 0;
00245 
00246                                 // Eat the character
00247                                 ++p;
00248 
00249                                 // Check for \n\r sequence, and treat this as a single
00250                                 // character.  (Yes, this bizarre thing does occur still
00251                                 // on some arcane platforms...)
00252                                 if (*p == '\r') {
00253                                         ++p;
00254                                 }
00255                                 break;
00256 
00257                         case '\t':
00258                                 // Eat the character
00259                                 ++p;
00260 
00261                                 // Skip to next tab stop
00262                                 col = (col / tabsize + 1) * tabsize;
00263                                 break;
00264 
00265                         case TIXML_UTF_LEAD_0:
00266                                 if ( encoding == TIXML_ENCODING_UTF8 )
00267                                 {
00268                                         if ( *(p+1) && *(p+2) )
00269                                         {
00270                                                 // In these cases, don't advance the column. These are
00271                                                 // 0-width spaces.
00272                                                 if ( *(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2 )
00273                                                         p += 3;
00274                                                 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU )
00275                                                         p += 3;
00276                                                 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU )
00277                                                         p += 3;
00278                                                 else
00279                                                         { p +=3; ++col; }       // A normal character.
00280                                         }
00281                                 }
00282                                 else
00283                                 {
00284                                         ++p;
00285                                         ++col;
00286                                 }
00287                                 break;
00288 
00289                         default:
00290                                 if ( encoding == TIXML_ENCODING_UTF8 )
00291                                 {
00292                                         // Eat the 1 to 4 byte utf8 character.
00293                                         int step = TiXmlBase::utf8ByteTable[*((const unsigned char*)p)];
00294                                         if ( step == 0 )
00295                                                 step = 1;               // Error case from bad encoding, but handle gracefully.
00296                                         p += step;
00297 
00298                                         // Just advance one column, of course.
00299                                         ++col;
00300                                 }
00301                                 else
00302                                 {
00303                                         ++p;
00304                                         ++col;
00305                                 }
00306                                 break;
00307                 }
00308         }
00309         cursor.row = row;
00310         cursor.col = col;
00311         assert( cursor.row >= -1 );
00312         assert( cursor.col >= -1 );
00313         stamp = p;
00314         assert( stamp );
00315 }
00316 
00317 
00318 const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )
00319 {
00320         if ( !p || !*p )
00321         {
00322                 return 0;
00323         }
00324         if ( encoding == TIXML_ENCODING_UTF8 )
00325         {
00326                 while ( *p )
00327                 {
00328                         const unsigned char* pU = (const unsigned char*)p;
00329 
00330                         // Skip the stupid Microsoft UTF-8 Byte order marks
00331                         if (    *(pU+0)==TIXML_UTF_LEAD_0
00332                                  && *(pU+1)==TIXML_UTF_LEAD_1
00333                                  && *(pU+2)==TIXML_UTF_LEAD_2 )
00334                         {
00335                                 p += 3;
00336                                 continue;
00337                         }
00338                         else if(*(pU+0)==TIXML_UTF_LEAD_0
00339                                  && *(pU+1)==0xbfU
00340                                  && *(pU+2)==0xbeU )
00341                         {
00342                                 p += 3;
00343                                 continue;
00344                         }
00345                         else if(*(pU+0)==TIXML_UTF_LEAD_0
00346                                  && *(pU+1)==0xbfU
00347                                  && *(pU+2)==0xbfU )
00348                         {
00349                                 p += 3;
00350                                 continue;
00351                         }
00352 
00353                         if ( IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' )            // Still using old rules for white space.
00354                                 ++p;
00355                         else
00356                                 break;
00357                 }
00358         }
00359         else
00360         {
00361                 // Code altered by user.  An extra pair of () was added to eliminate a compiler warning.
00362                 while ( ( *p && IsWhiteSpace( *p ) ) || *p == '\n' || *p =='\r' )
00363                         ++p;
00364         }
00365 
00366         return p;
00367 }
00368 
00369 #ifdef TIXML_USE_STL
00370 /*static*/ bool TiXmlBase::StreamWhiteSpace( std::istream * in, TIXML_STRING * tag )
00371 {
00372         for( ;; )
00373         {
00374                 if ( !in->good() ) return false;
00375 
00376                 int c = in->peek();
00377                 // At this scope, we can't get to a document. So fail silently.
00378                 if ( !IsWhiteSpace( c ) || c <= 0 )
00379                         return true;
00380 
00381                 *tag += (char) in->get();
00382         }
00383 }
00384 
00385 /*static*/ bool TiXmlBase::StreamTo( std::istream * in, int character, TIXML_STRING * tag )
00386 {
00387         //assert( character > 0 && character < 128 );   // else it won't work in utf-8
00388         while ( in->good() )
00389         {
00390                 int c = in->peek();
00391                 if ( c == character )
00392                         return true;
00393                 if ( c <= 0 )           // Silent failure: can't get document at this scope
00394                         return false;
00395 
00396                 in->get();
00397                 *tag += (char) c;
00398         }
00399         return false;
00400 }
00401 #endif
00402 
00403 // One of TinyXML's more performance demanding functions. Try to keep the memory overhead down. The
00404 // "assign" optimization removes over 10% of the execution time.
00405 //
00406 const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding )
00407 {
00408         // Oddly, not supported on some comilers,
00409         //name->clear();
00410         // So use this:
00411         *name = "";
00412         assert( p );
00413 
00414         // Names start with letters or underscores.
00415         // Of course, in unicode, tinyxml has no idea what a letter *is*. The
00416         // algorithm is generous.
00417         //
00418         // After that, they can be letters, underscores, numbers,
00419         // hyphens, or colons. (Colons are valid ony for namespaces,
00420         // but tinyxml can't tell namespaces from names.)
00421         if (    p && *p
00422                  && ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) )
00423         {
00424                 const char* start = p;
00425                 while(          p && *p
00426                                 &&      (               IsAlphaNum( (unsigned char ) *p, encoding )
00427                                                  || *p == '_'
00428                                                  || *p == '-'
00429                                                  || *p == '.'
00430                                                  || *p == ':' ) )
00431                 {
00432                         //(*name) += *p; // expensive
00433                         ++p;
00434                 }
00435                 if ( p-start > 0 ) {
00436                         name->assign( start, p-start );
00437                 }
00438                 return p;
00439         }
00440         return 0;
00441 }
00442 
00443 const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding )
00444 {
00445         // Presume an entity, and pull it out.
00446     TIXML_STRING ent;
00447         int i;
00448         *length = 0;
00449 
00450         if ( *(p+1) && *(p+1) == '#' && *(p+2) )
00451         {
00452                 unsigned long ucs = 0;
00453                 ptrdiff_t delta = 0;
00454                 unsigned mult = 1;
00455 
00456                 if ( *(p+2) == 'x' )
00457                 {
00458                         // Hexadecimal.
00459                         if ( !*(p+3) ) return 0;
00460 
00461                         const char* q = p+3;
00462                         q = strchr( q, ';' );
00463 
00464                         if ( !q || !*q ) return 0;
00465 
00466                         delta = q-p;
00467                         --q;
00468 
00469                         while ( *q != 'x' )
00470                         {
00471                                 if ( *q >= '0' && *q <= '9' )
00472                                         ucs += mult * (*q - '0');
00473                                 else if ( *q >= 'a' && *q <= 'f' )
00474                                         ucs += mult * (*q - 'a' + 10);
00475                                 else if ( *q >= 'A' && *q <= 'F' )
00476                                         ucs += mult * (*q - 'A' + 10 );
00477                                 else
00478                                         return 0;
00479                                 mult *= 16;
00480                                 --q;
00481                         }
00482                 }
00483                 else
00484                 {
00485                         // Decimal.
00486                         if ( !*(p+2) ) return 0;
00487 
00488                         const char* q = p+2;
00489                         q = strchr( q, ';' );
00490 
00491                         if ( !q || !*q ) return 0;
00492 
00493                         delta = q-p;
00494                         --q;
00495 
00496                         while ( *q != '#' )
00497                         {
00498                                 if ( *q >= '0' && *q <= '9' )
00499                                         ucs += mult * (*q - '0');
00500                                 else
00501                                         return 0;
00502                                 mult *= 10;
00503                                 --q;
00504                         }
00505                 }
00506                 if ( encoding == TIXML_ENCODING_UTF8 )
00507                 {
00508                         // convert the UCS to UTF-8
00509                         ConvertUTF32ToUTF8( ucs, value, length );
00510                 }
00511                 else
00512                 {
00513                         *value = (char)ucs;
00514                         *length = 1;
00515                 }
00516                 return p + delta + 1;
00517         }
00518 
00519         // Now try to match it.
00520         for( i=0; i<NUM_ENTITY; ++i )
00521         {
00522                 if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )
00523                 {
00524                         assert( strlen( entity[i].str ) == entity[i].strLength );
00525                         *value = entity[i].chr;
00526                         *length = 1;
00527                         return ( p + entity[i].strLength );
00528                 }
00529         }
00530 
00531         // So it wasn't an entity, its unrecognized, or something like that.
00532         *value = *p;    // Don't put back the last one, since we return it!
00533         //*length = 1;  // Leave unrecognized entities - this doesn't really work.
00534                                         // Just writes strange XML.
00535         return p+1;
00536 }
00537 
00538 
00539 bool TiXmlBase::StringEqual( const char* p,
00540                                                          const char* tag,
00541                                                          bool ignoreCase,
00542                                                          TiXmlEncoding encoding )
00543 {
00544         assert( p );
00545         assert( tag );
00546         if ( !p || !*p )
00547         {
00548                 assert( 0 );
00549                 return false;
00550         }
00551 
00552         const char* q = p;
00553 
00554         if ( ignoreCase )
00555         {
00556                 while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) )
00557                 {
00558                         ++q;
00559                         ++tag;
00560                 }
00561 
00562                 if ( *tag == 0 )
00563                         return true;
00564         }
00565         else
00566         {
00567                 while ( *q && *tag && *q == *tag )
00568                 {
00569                         ++q;
00570                         ++tag;
00571                 }
00572 
00573                 if ( *tag == 0 )                // Have we found the end of the tag, and everything equal?
00574                         return true;
00575         }
00576         return false;
00577 }
00578 
00579 const char* TiXmlBase::ReadText(        const char* p,
00580                                                                         TIXML_STRING * text,
00581                                                                         bool trimWhiteSpace,
00582                                                                         const char* endTag,
00583                                                                         bool caseInsensitive,
00584                                                                         TiXmlEncoding encoding )
00585 {
00586     *text = "";
00587         if (    !trimWhiteSpace                 // certain tags always keep whitespace
00588                  || !condenseWhiteSpace )       // if true, whitespace is always kept
00589         {
00590                 // Keep all the white space.
00591                 while (    p && *p
00592                                 && !StringEqual( p, endTag, caseInsensitive, encoding )
00593                           )
00594                 {
00595                         int len;
00596                         char cArr[4] = { 0, 0, 0, 0 };
00597                         p = GetChar( p, cArr, &len, encoding );
00598                         text->append( cArr, len );
00599                 }
00600         }
00601         else
00602         {
00603                 bool whitespace = false;
00604 
00605                 // Remove leading white space:
00606                 p = SkipWhiteSpace( p, encoding );
00607                 while (    p && *p
00608                                 && !StringEqual( p, endTag, caseInsensitive, encoding ) )
00609                 {
00610                         if ( *p == '\r' || *p == '\n' )
00611                         {
00612                                 whitespace = true;
00613                                 ++p;
00614                         }
00615                         else if ( IsWhiteSpace( *p ) )
00616                         {
00617                                 whitespace = true;
00618                                 ++p;
00619                         }
00620                         else
00621                         {
00622                                 // If we've found whitespace, add it before the
00623                                 // new character. Any whitespace just becomes a space.
00624                                 if ( whitespace )
00625                                 {
00626                                         (*text) += ' ';
00627                                         whitespace = false;
00628                                 }
00629                                 int len;
00630                                 char cArr[4] = { 0, 0, 0, 0 };
00631                                 p = GetChar( p, cArr, &len, encoding );
00632                                 if ( len == 1 )
00633                                         (*text) += cArr[0];     // more efficient
00634                                 else
00635                                         text->append( cArr, len );
00636                         }
00637                 }
00638         }
00639         if ( p )
00640                 p += strlen( endTag );
00641         return p;
00642 }
00643 
00644 #ifdef TIXML_USE_STL
00645 
00646 void TiXmlDocument::StreamIn( std::istream * in, TIXML_STRING * tag )
00647 {
00648         // The basic issue with a document is that we don't know what we're
00649         // streaming. Read something presumed to be a tag (and hope), then
00650         // identify it, and call the appropriate stream method on the tag.
00651         //
00652         // This "pre-streaming" will never read the closing ">" so the
00653         // sub-tag can orient itself.
00654 
00655         if ( !StreamTo( in, '<', tag ) )
00656         {
00657                 SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00658                 return;
00659         }
00660 
00661         while ( in->good() )
00662         {
00663                 int tagIndex = (int) tag->length();
00664                 while ( in->good() && in->peek() != '>' )
00665                 {
00666                         int c = in->get();
00667                         if ( c <= 0 )
00668                         {
00669                                 SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00670                                 break;
00671                         }
00672                         (*tag) += (char) c;
00673                 }
00674 
00675                 if ( in->good() )
00676                 {
00677                         // We now have something we presume to be a node of
00678                         // some sort. Identify it, and call the node to
00679                         // continue streaming.
00680                         TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING );
00681 
00682                         if ( node )
00683                         {
00684                                 node->StreamIn( in, tag );
00685                                 bool isElement = node->ToElement() != 0;
00686                                 delete node;
00687                                 node = 0;
00688 
00689                                 // If this is the root element, we're done. Parsing will be
00690                                 // done by the >> operator.
00691                                 if ( isElement )
00692                                 {
00693                                         return;
00694                                 }
00695                         }
00696                         else
00697                         {
00698                                 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
00699                                 return;
00700                         }
00701                 }
00702         }
00703         // We should have returned sooner.
00704         SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
00705 }
00706 
00707 #endif
00708 
00709 const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding )
00710 {
00711         ClearError();
00712 
00713         // Parse away, at the document level. Since a document
00714         // contains nothing but other tags, most of what happens
00715         // here is skipping white space.
00716         if ( !p || !*p )
00717         {
00718                 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00719                 return 0;
00720         }
00721 
00722         // Note that, for a document, this needs to come
00723         // before the while space skip, so that parsing
00724         // starts from the pointer we are given.
00725         location.Clear();
00726         if ( prevData )
00727         {
00728                 location.row = prevData->cursor.row;
00729                 location.col = prevData->cursor.col;
00730         }
00731         else
00732         {
00733                 location.row = 0;
00734                 location.col = 0;
00735         }
00736         TiXmlParsingData data( p, TabSize(), location.row, location.col );
00737         location = data.Cursor();
00738 
00739         if ( encoding == TIXML_ENCODING_UNKNOWN )
00740         {
00741                 // Check for the Microsoft UTF-8 lead bytes.
00742                 const unsigned char* pU = (const unsigned char*)p;
00743                 if (    *(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0
00744                          && *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1
00745                          && *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 )
00746                 {
00747                         encoding = TIXML_ENCODING_UTF8;
00748                         useMicrosoftBOM = true;
00749                 }
00750         }
00751 
00752     p = SkipWhiteSpace( p, encoding );
00753         if ( !p )
00754         {
00755                 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00756                 return 0;
00757         }
00758 
00759         while ( p && *p )
00760         {
00761                 TiXmlNode* node = Identify( p, encoding );
00762                 if ( node )
00763                 {
00764                         p = node->Parse( p, &data, encoding );
00765                         LinkEndChild( node );
00766                 }
00767                 else
00768                 {
00769                         break;
00770                 }
00771 
00772                 // Did we get encoding info?
00773                 if (    encoding == TIXML_ENCODING_UNKNOWN
00774                          && node->ToDeclaration() )
00775                 {
00776                         TiXmlDeclaration* dec = node->ToDeclaration();
00777                         const char* enc = dec->Encoding();
00778                         assert( enc );
00779 
00780                         if ( *enc == 0 )
00781                                 encoding = TIXML_ENCODING_UTF8;
00782                         else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
00783                                 encoding = TIXML_ENCODING_UTF8;
00784                         else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
00785                                 encoding = TIXML_ENCODING_UTF8; // incorrect, but be nice
00786                         else
00787                                 encoding = TIXML_ENCODING_LEGACY;
00788                 }
00789 
00790                 p = SkipWhiteSpace( p, encoding );
00791         }
00792 
00793         // Was this empty?
00794         if ( !firstChild ) {
00795                 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );
00796                 return 0;
00797         }
00798 
00799         // All is well.
00800         return p;
00801 }
00802 
00803 void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding )
00804 {
00805         // The first error in a chain is more accurate - don't set again!
00806         if ( error )
00807                 return;
00808 
00809         assert( err > 0 && err < TIXML_ERROR_STRING_COUNT );
00810         error   = true;
00811         errorId = err;
00812         errorDesc = errorString[ errorId ];
00813 
00814         errorLocation.Clear();
00815         if ( pError && data )
00816         {
00817                 data->Stamp( pError, encoding );
00818                 errorLocation = data->Cursor();
00819         }
00820 }
00821 
00822 
00823 TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding )
00824 {
00825         TiXmlNode* returnNode = 0;
00826 
00827         p = SkipWhiteSpace( p, encoding );
00828         if( !p || !*p || *p != '<' )
00829         {
00830                 return 0;
00831         }
00832 
00833         TiXmlDocument* doc = GetDocument();
00834         p = SkipWhiteSpace( p, encoding );
00835 
00836         if ( !p || !*p )
00837         {
00838                 return 0;
00839         }
00840 
00841         // What is this thing?
00842         // - Elements start with a letter or underscore, but xml is reserved.
00843         // - Comments: <!--
00844         // - Decleration: <?xml
00845         // - Everthing else is unknown to tinyxml.
00846         //
00847 
00848         const char* xmlHeader = { "<?xml" };
00849         const char* commentHeader = { "<!--" };
00850         const char* dtdHeader = { "<!" };
00851         const char* cdataHeader = { "<![CDATA[" };
00852 
00853         if ( StringEqual( p, xmlHeader, true, encoding ) )
00854         {
00855                 #ifdef DEBUG_PARSER
00856                         TIXML_LOG( "XML parsing Declaration\n" );
00857                 #endif
00858                 returnNode = new TiXmlDeclaration();
00859         }
00860         else if ( StringEqual( p, commentHeader, false, encoding ) )
00861         {
00862                 #ifdef DEBUG_PARSER
00863                         TIXML_LOG( "XML parsing Comment\n" );
00864                 #endif
00865                 returnNode = new TiXmlComment();
00866         }
00867         else if ( StringEqual( p, cdataHeader, false, encoding ) )
00868         {
00869                 #ifdef DEBUG_PARSER
00870                         TIXML_LOG( "XML parsing CDATA\n" );
00871                 #endif
00872                 TiXmlText* text = new TiXmlText( "" );
00873                 text->SetCDATA( true );
00874                 returnNode = text;
00875         }
00876         else if ( StringEqual( p, dtdHeader, false, encoding ) )
00877         {
00878                 #ifdef DEBUG_PARSER
00879                         TIXML_LOG( "XML parsing Unknown(1)\n" );
00880                 #endif
00881                 returnNode = new TiXmlUnknown();
00882         }
00883         else if (    IsAlpha( *(p+1), encoding )
00884                           || *(p+1) == '_' )
00885         {
00886                 #ifdef DEBUG_PARSER
00887                         TIXML_LOG( "XML parsing Element\n" );
00888                 #endif
00889                 returnNode = new TiXmlElement( "" );
00890         }
00891         else
00892         {
00893                 #ifdef DEBUG_PARSER
00894                         TIXML_LOG( "XML parsing Unknown(2)\n" );
00895                 #endif
00896                 returnNode = new TiXmlUnknown();
00897         }
00898 
00899         if ( returnNode )
00900         {
00901                 // Set the parent, so it can report errors
00902                 returnNode->parent = this;
00903         }
00904         else
00905         {
00906                 if ( doc )
00907                         doc->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, TIXML_ENCODING_UNKNOWN );
00908         }
00909         return returnNode;
00910 }
00911 
00912 #ifdef TIXML_USE_STL
00913 
00914 void TiXmlElement::StreamIn (std::istream * in, TIXML_STRING * tag)
00915 {
00916         // We're called with some amount of pre-parsing. That is, some of "this"
00917         // element is in "tag". Go ahead and stream to the closing ">"
00918         while( in->good() )
00919         {
00920                 int c = in->get();
00921                 if ( c <= 0 )
00922                 {
00923                         TiXmlDocument* document = GetDocument();
00924                         if ( document )
00925                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00926                         return;
00927                 }
00928                 (*tag) += (char) c ;
00929 
00930                 if ( c == '>' )
00931                         break;
00932         }
00933 
00934         if ( tag->length() < 3 ) return;
00935 
00936         // Okay...if we are a "/>" tag, then we're done. We've read a complete tag.
00937         // If not, identify and stream.
00938 
00939         if (    tag->at( tag->length() - 1 ) == '>'
00940                  && tag->at( tag->length() - 2 ) == '/' )
00941         {
00942                 // All good!
00943                 return;
00944         }
00945         else if ( tag->at( tag->length() - 1 ) == '>' )
00946         {
00947                 // There is more. Could be:
00948                 //              text
00949                 //              cdata text (which looks like another node)
00950                 //              closing tag
00951                 //              another node.
00952                 for ( ;; )
00953                 {
00954                         StreamWhiteSpace( in, tag );
00955 
00956                         // Do we have text?
00957                         if ( in->good() && in->peek() != '<' )
00958                         {
00959                                 // Yep, text.
00960                                 TiXmlText text( "" );
00961                                 text.StreamIn( in, tag );
00962 
00963                                 // What follows text is a closing tag or another node.
00964                                 // Go around again and figure it out.
00965                                 continue;
00966                         }
00967 
00968                         // We now have either a closing tag...or another node.
00969                         // We should be at a "<", regardless.
00970                         if ( !in->good() ) return;
00971                         assert( in->peek() == '<' );
00972                         int tagIndex = (int) tag->length();
00973 
00974                         bool closingTag = false;
00975                         bool firstCharFound = false;
00976 
00977                         for( ;; )
00978                         {
00979                                 if ( !in->good() )
00980                                         return;
00981 
00982                                 int c = in->peek();
00983                                 if ( c <= 0 )
00984                                 {
00985                                         TiXmlDocument* document = GetDocument();
00986                                         if ( document )
00987                                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00988                                         return;
00989                                 }
00990 
00991                                 if ( c == '>' )
00992                                         break;
00993 
00994                                 *tag += (char) c;
00995                                 in->get();
00996 
00997                                 // Early out if we find the CDATA id.
00998                                 if ( c == '[' && tag->size() >= 9 )
00999                                 {
01000                                         size_t len = tag->size();
01001                                         const char* start = tag->c_str() + len - 9;
01002                                         if ( strcmp( start, "<![CDATA[" ) == 0 ) {
01003                                                 assert( !closingTag );
01004                                                 break;
01005                                         }
01006                                 }
01007 
01008                                 if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )
01009                                 {
01010                                         firstCharFound = true;
01011                                         if ( c == '/' )
01012                                                 closingTag = true;
01013                                 }
01014                         }
01015                         // If it was a closing tag, then read in the closing '>' to clean up the input stream.
01016                         // If it was not, the streaming will be done by the tag.
01017                         if ( closingTag )
01018                         {
01019                                 if ( !in->good() )
01020                                         return;
01021 
01022                                 int c = in->get();
01023                                 if ( c <= 0 )
01024                                 {
01025                                         TiXmlDocument* document = GetDocument();
01026                                         if ( document )
01027                                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01028                                         return;
01029                                 }
01030                                 assert( c == '>' );
01031                                 *tag += (char) c;
01032 
01033                                 // We are done, once we've found our closing tag.
01034                                 return;
01035                         }
01036                         else
01037                         {
01038                                 // If not a closing tag, id it, and stream.
01039                                 const char* tagloc = tag->c_str() + tagIndex;
01040                                 TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING );
01041                                 if ( !node )
01042                                         return;
01043                                 node->StreamIn( in, tag );
01044                                 delete node;
01045                                 node = 0;
01046 
01047                                 // No return: go around from the beginning: text, closing tag, or node.
01048                         }
01049                 }
01050         }
01051 }
01052 #endif
01053 
01054 const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01055 {
01056         p = SkipWhiteSpace( p, encoding );
01057         TiXmlDocument* document = GetDocument();
01058 
01059         if ( !p || !*p )
01060         {
01061                 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding );
01062                 return 0;
01063         }
01064 
01065         if ( data )
01066         {
01067                 data->Stamp( p, encoding );
01068                 location = data->Cursor();
01069         }
01070 
01071         if ( *p != '<' )
01072         {
01073                 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding );
01074                 return 0;
01075         }
01076 
01077         p = SkipWhiteSpace( p+1, encoding );
01078 
01079         // Read the name.
01080         const char* pErr = p;
01081 
01082     p = ReadName( p, &value, encoding );
01083         if ( !p || !*p )
01084         {
01085                 if ( document ) document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding );
01086                 return 0;
01087         }
01088 
01089     TIXML_STRING endTag ("</");
01090         endTag += value;
01091         endTag += ">";
01092 
01093         // Check for and read attributes. Also look for an empty
01094         // tag or an end tag.
01095         while ( p && *p )
01096         {
01097                 pErr = p;
01098                 p = SkipWhiteSpace( p, encoding );
01099                 if ( !p || !*p )
01100                 {
01101                         if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
01102                         return 0;
01103                 }
01104                 if ( *p == '/' )
01105                 {
01106                         ++p;
01107                         // Empty tag.
01108                         if ( *p  != '>' )
01109                         {
01110                                 if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );
01111                                 return 0;
01112                         }
01113                         return (p+1);
01114                 }
01115                 else if ( *p == '>' )
01116                 {
01117                         // Done with attributes (if there were any.)
01118                         // Read the value -- which can include other
01119                         // elements -- read the end tag, and return.
01120                         ++p;
01121                         p = ReadValue( p, data, encoding );             // Note this is an Element method, and will set the error if one happens.
01122                         if ( !p || !*p ) {
01123                                 // We were looking for the end tag, but found nothing.
01124                                 // Fix for [ 1663758 ] Failure to report error on bad XML
01125                                 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
01126                                 return 0;
01127                         }
01128 
01129                         // We should find the end tag now
01130                         if ( StringEqual( p, endTag.c_str(), false, encoding ) )
01131                         {
01132                                 p += endTag.length();
01133                                 return p;
01134                         }
01135                         else
01136                         {
01137                                 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
01138                                 return 0;
01139                         }
01140                 }
01141                 else
01142                 {
01143                         // Try to read an attribute:
01144                         TiXmlAttribute* attrib = new TiXmlAttribute();
01145                         if ( !attrib )
01146                         {
01147                                 if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, pErr, data, encoding );
01148                                 return 0;
01149                         }
01150 
01151                         attrib->SetDocument( document );
01152                         pErr = p;
01153                         p = attrib->Parse( p, data, encoding );
01154 
01155                         if ( !p || !*p )
01156                         {
01157                                 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
01158                                 delete attrib;
01159                                 return 0;
01160                         }
01161 
01162                         // Handle the strange case of double attributes:
01163                         #ifdef TIXML_USE_STL
01164                         TiXmlAttribute* node = attributeSet.Find( attrib->NameTStr() );
01165                         #else
01166                         TiXmlAttribute* node = attributeSet.Find( attrib->Name() );
01167                         #endif
01168                         if ( node )
01169                         {
01170                                 node->SetValue( attrib->Value() );
01171                                 delete attrib;
01172                                 return 0;
01173                         }
01174 
01175                         attributeSet.Add( attrib );
01176                 }
01177         }
01178         return p;
01179 }
01180 
01181 
01182 const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01183 {
01184         TiXmlDocument* document = GetDocument();
01185 
01186         // Read in text and elements in any order.
01187         const char* pWithWhiteSpace = p;
01188         p = SkipWhiteSpace( p, encoding );
01189 
01190         while ( p && *p )
01191         {
01192                 if ( *p != '<' )
01193                 {
01194                         // Take what we have, make a text element.
01195                         TiXmlText* textNode = new TiXmlText( "" );
01196 
01197                         if ( !textNode )
01198                         {
01199                                 if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, encoding );
01200                                     return 0;
01201                         }
01202 
01203                         if ( TiXmlBase::IsWhiteSpaceCondensed() )
01204                         {
01205                                 p = textNode->Parse( p, data, encoding );
01206                         }
01207                         else
01208                         {
01209                                 // Special case: we want to keep the white space
01210                                 // so that leading spaces aren't removed.
01211                                 p = textNode->Parse( pWithWhiteSpace, data, encoding );
01212                         }
01213 
01214                         if ( !textNode->Blank() )
01215                                 LinkEndChild( textNode );
01216                         else
01217                                 delete textNode;
01218                 }
01219                 else
01220                 {
01221                         // We hit a '<'
01222                         // Have we hit a new element or an end tag? This could also be
01223                         // a TiXmlText in the "CDATA" style.
01224                         if ( StringEqual( p, "</", false, encoding ) )
01225                         {
01226                                 return p;
01227                         }
01228                         else
01229                         {
01230                                 TiXmlNode* node = Identify( p, encoding );
01231                                 if ( node )
01232                                 {
01233                                         p = node->Parse( p, data, encoding );
01234                                         LinkEndChild( node );
01235                                 }
01236                                 else
01237                                 {
01238                                         return 0;
01239                                 }
01240                         }
01241                 }
01242                 pWithWhiteSpace = p;
01243                 p = SkipWhiteSpace( p, encoding );
01244         }
01245 
01246         if ( !p )
01247         {
01248                 if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding );
01249         }
01250         return p;
01251 }
01252 
01253 
01254 #ifdef TIXML_USE_STL
01255 void TiXmlUnknown::StreamIn( std::istream * in, TIXML_STRING * tag )
01256 {
01257         while ( in->good() )
01258         {
01259                 int c = in->get();
01260                 if ( c <= 0 )
01261                 {
01262                         TiXmlDocument* document = GetDocument();
01263                         if ( document )
01264                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01265                         return;
01266                 }
01267                 (*tag) += (char) c;
01268 
01269                 if ( c == '>' )
01270                 {
01271                         // All is well.
01272                         return;
01273                 }
01274         }
01275 }
01276 #endif
01277 
01278 
01279 const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01280 {
01281         TiXmlDocument* document = GetDocument();
01282         p = SkipWhiteSpace( p, encoding );
01283 
01284         if ( data )
01285         {
01286                 data->Stamp( p, encoding );
01287                 location = data->Cursor();
01288         }
01289         if ( !p || !*p || *p != '<' )
01290         {
01291                 if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding );
01292                 return 0;
01293         }
01294         ++p;
01295     value = "";
01296 
01297         while ( p && *p && *p != '>' )
01298         {
01299                 value += *p;
01300                 ++p;
01301         }
01302 
01303         if ( !p )
01304         {
01305                 if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding );
01306         }
01307         if ( *p == '>' )
01308                 return p+1;
01309         return p;
01310 }
01311 
01312 #ifdef TIXML_USE_STL
01313 void TiXmlComment::StreamIn( std::istream * in, TIXML_STRING * tag )
01314 {
01315         while ( in->good() )
01316         {
01317                 int c = in->get();
01318                 if ( c <= 0 )
01319                 {
01320                         TiXmlDocument* document = GetDocument();
01321                         if ( document )
01322                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01323                         return;
01324                 }
01325 
01326                 (*tag) += (char) c;
01327 
01328                 if ( c == '>'
01329                          && tag->at( tag->length() - 2 ) == '-'
01330                          && tag->at( tag->length() - 3 ) == '-' )
01331                 {
01332                         // All is well.
01333                         return;
01334                 }
01335         }
01336 }
01337 #endif
01338 
01339 
01340 const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01341 {
01342         TiXmlDocument* document = GetDocument();
01343         value = "";
01344 
01345         p = SkipWhiteSpace( p, encoding );
01346 
01347         if ( data )
01348         {
01349                 data->Stamp( p, encoding );
01350                 location = data->Cursor();
01351         }
01352         const char* startTag = "<!--";
01353         const char* endTag   = "-->";
01354 
01355         if ( !StringEqual( p, startTag, false, encoding ) )
01356         {
01357                 document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding );
01358                 return 0;
01359         }
01360         p += strlen( startTag );
01361 
01362         // [ 1475201 ] TinyXML parses entities in comments
01363         // Oops - ReadText doesn't work, because we don't want to parse the entities.
01364         // p = ReadText( p, &value, false, endTag, false, encoding );
01365         //
01366         // from the XML spec:
01367         /*
01368          [Definition: Comments may appear anywhere in a document outside other markup; in addition,
01369                       they may appear within the document type declaration at places allowed by the grammar.
01370                                   They are not part of the document's character data; an XML processor MAY, but need not,
01371                                   make it possible for an application to retrieve the text of comments. For compatibility,
01372                                   the string "--" (double-hyphen) MUST NOT occur within comments.] Parameter entity
01373                                   references MUST NOT be recognized within comments.
01374 
01375                                   An example of a comment:
01376 
01377                                   <!-- declarations for <head> & <body> -->
01378         */
01379 
01380     value = "";
01381         // Keep all the white space.
01382         while ( p && *p && !StringEqual( p, endTag, false, encoding ) )
01383         {
01384                 value.append( p, 1 );
01385                 ++p;
01386         }
01387         if ( p )
01388                 p += strlen( endTag );
01389 
01390         return p;
01391 }
01392 
01393 
01394 const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01395 {
01396         p = SkipWhiteSpace( p, encoding );
01397         if ( !p || !*p ) return 0;
01398 
01399 //      int tabsize = 4;
01400 //      if ( document )
01401 //              tabsize = document->TabSize();
01402 
01403         if ( data )
01404         {
01405                 data->Stamp( p, encoding );
01406                 location = data->Cursor();
01407         }
01408         // Read the name, the '=' and the value.
01409         const char* pErr = p;
01410         p = ReadName( p, &name, encoding );
01411         if ( !p || !*p )
01412         {
01413                 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
01414                 return 0;
01415         }
01416         p = SkipWhiteSpace( p, encoding );
01417         if ( !p || !*p || *p != '=' )
01418         {
01419                 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
01420                 return 0;
01421         }
01422 
01423         ++p;    // skip '='
01424         p = SkipWhiteSpace( p, encoding );
01425         if ( !p || !*p )
01426         {
01427                 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
01428                 return 0;
01429         }
01430 
01431         const char* end;
01432         const char SINGLE_QUOTE = '\'';
01433         const char DOUBLE_QUOTE = '\"';
01434 
01435         if ( *p == SINGLE_QUOTE )
01436         {
01437                 ++p;
01438                 end = "\'";             // single quote in string
01439                 p = ReadText( p, &value, false, end, false, encoding );
01440         }
01441         else if ( *p == DOUBLE_QUOTE )
01442         {
01443                 ++p;
01444                 end = "\"";             // double quote in string
01445                 p = ReadText( p, &value, false, end, false, encoding );
01446         }
01447         else
01448         {
01449                 // All attribute values should be in single or double quotes.
01450                 // But this is such a common error that the parser will try
01451                 // its best, even without them.
01452                 value = "";
01453                 while (    p && *p                                                                                      // existence
01454                                 && !IsWhiteSpace( *p ) && *p != '\n' && *p != '\r'      // whitespace
01455                                 && *p != '/' && *p != '>' )                                                     // tag end
01456                 {
01457                         if ( *p == SINGLE_QUOTE || *p == DOUBLE_QUOTE ) {
01458                                 // [ 1451649 ] Attribute values with trailing quotes not handled correctly
01459                                 // We did not have an opening quote but seem to have a
01460                                 // closing one. Give up and throw an error.
01461                                 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
01462                                 return 0;
01463                         }
01464                         value += *p;
01465                         ++p;
01466                 }
01467         }
01468         return p;
01469 }
01470 
01471 #ifdef TIXML_USE_STL
01472 void TiXmlText::StreamIn( std::istream * in, TIXML_STRING * tag )
01473 {
01474         while ( in->good() )
01475         {
01476                 int c = in->peek();
01477                 if ( !cdata && (c == '<' ) )
01478                 {
01479                         return;
01480                 }
01481                 if ( c <= 0 )
01482                 {
01483                         TiXmlDocument* document = GetDocument();
01484                         if ( document )
01485                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01486                         return;
01487                 }
01488 
01489                 (*tag) += (char) c;
01490                 in->get();      // "commits" the peek made above
01491 
01492                 if ( cdata && c == '>' && tag->size() >= 3 ) {
01493                         size_t len = tag->size();
01494                         if ( (*tag)[len-2] == ']' && (*tag)[len-3] == ']' ) {
01495                                 // terminator of cdata.
01496                                 return;
01497                         }
01498                 }
01499         }
01500 }
01501 #endif
01502 
01503 const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01504 {
01505         value = "";
01506         TiXmlDocument* document = GetDocument();
01507 
01508         if ( data )
01509         {
01510                 data->Stamp( p, encoding );
01511                 location = data->Cursor();
01512         }
01513 
01514         const char* const startTag = "<![CDATA[";
01515         const char* const endTag   = "]]>";
01516 
01517         if ( cdata || StringEqual( p, startTag, false, encoding ) )
01518         {
01519                 cdata = true;
01520 
01521                 if ( !StringEqual( p, startTag, false, encoding ) )
01522                 {
01523                         document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding );
01524                         return 0;
01525                 }
01526                 p += strlen( startTag );
01527 
01528                 // Keep all the white space, ignore the encoding, etc.
01529                 while (    p && *p
01530                                 && !StringEqual( p, endTag, false, encoding )
01531                           )
01532                 {
01533                         value += *p;
01534                         ++p;
01535                 }
01536 
01537                 TIXML_STRING dummy;
01538                 p = ReadText( p, &dummy, false, endTag, false, encoding );
01539                 return p;
01540         }
01541         else
01542         {
01543                 bool ignoreWhite = true;
01544 
01545                 const char* end = "<";
01546                 p = ReadText( p, &value, ignoreWhite, end, false, encoding );
01547                 if ( p )
01548                         return p-1;     // don't truncate the '<'
01549                 return 0;
01550         }
01551 }
01552 
01553 #ifdef TIXML_USE_STL
01554 void TiXmlDeclaration::StreamIn( std::istream * in, TIXML_STRING * tag )
01555 {
01556         while ( in->good() )
01557         {
01558                 int c = in->get();
01559                 if ( c <= 0 )
01560                 {
01561                         TiXmlDocument* document = GetDocument();
01562                         if ( document )
01563                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01564                         return;
01565                 }
01566                 (*tag) += (char) c;
01567 
01568                 if ( c == '>' )
01569                 {
01570                         // All is well.
01571                         return;
01572                 }
01573         }
01574 }
01575 #endif
01576 
01577 const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding )
01578 {
01579         p = SkipWhiteSpace( p, _encoding );
01580         // Find the beginning, find the end, and look for
01581         // the stuff in-between.
01582         TiXmlDocument* document = GetDocument();
01583         if ( !p || !*p || !StringEqual( p, "<?xml", true, _encoding ) )
01584         {
01585                 if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding );
01586                 return 0;
01587         }
01588         if ( data )
01589         {
01590                 data->Stamp( p, _encoding );
01591                 location = data->Cursor();
01592         }
01593         p += 5;
01594 
01595         version = "";
01596         encoding = "";
01597         standalone = "";
01598 
01599         while ( p && *p )
01600         {
01601                 if ( *p == '>' )
01602                 {
01603                         ++p;
01604                         return p;
01605                 }
01606 
01607                 p = SkipWhiteSpace( p, _encoding );
01608                 if ( StringEqual( p, "version", true, _encoding ) )
01609                 {
01610                         TiXmlAttribute attrib;
01611                         p = attrib.Parse( p, data, _encoding );
01612                         version = attrib.Value();
01613                 }
01614                 else if ( StringEqual( p, "encoding", true, _encoding ) )
01615                 {
01616                         TiXmlAttribute attrib;
01617                         p = attrib.Parse( p, data, _encoding );
01618                         encoding = attrib.Value();
01619                 }
01620                 else if ( StringEqual( p, "standalone", true, _encoding ) )
01621                 {
01622                         TiXmlAttribute attrib;
01623                         p = attrib.Parse( p, data, _encoding );
01624                         standalone = attrib.Value();
01625                 }
01626                 else
01627                 {
01628                         // Read over whatever it is.
01629                         while( p && *p && *p != '>' && !IsWhiteSpace( *p ) )
01630                                 ++p;
01631                 }
01632         }
01633         return 0;
01634 }
01635 
01636 bool TiXmlText::Blank() const
01637 {
01638         for ( unsigned i=0; i<value.length(); i++ )
01639                 if ( !IsWhiteSpace( value[i] ) )
01640                         return false;
01641         return true;
01642 }
01643