CMS 3D CMS Logo

/data/refman/pasoursint/CMSSW_5_3_3/src/FWCore/Utilities/src/tinyxmlparser.cc

Go to the documentation of this file.
00001 /*
00002 www.sourceforge.net/projects/tinyxml
00003 Original code (2.0 and earlier )copyright (c) 2000-2002 Lee Thomason (www.grinninglizard.com)
00004 
00005 This software is provided 'as-is', without any express or implied
00006 warranty. In no event will the authors be held liable for any
00007 damages arising from the use of this software.
00008 
00009 Permission is granted to anyone to use this software for any
00010 purpose, including commercial applications, and to alter it and
00011 redistribute it freely, subject to the following restrictions:
00012 
00013 1. The origin of this software must not be misrepresented; you must
00014 not claim that you wrote the original software. If you use this
00015 software in a product, an acknowledgment in the product documentation
00016 would be appreciated but is not required.
00017 
00018 2. Altered source versions must be plainly marked as such, and
00019 must not be misrepresented as being the original software.
00020 
00021 3. This notice may not be removed or altered from any source
00022 distribution.
00023 */
00024 
00025 /*
00026  * THIS FILE WAS ALTERED BY Eric Vaandering, 25 August 2009.
00027  *
00028  * THIS FILE WAS ALTERED BY Bill Tanenbaum, 25 September 2011
00029  * to fix Coverity error (use after free).
00030  */
00031 #define TIXML_USE_STL
00032 #include <ctype.h>
00033 #include <stddef.h>
00034 
00035 #include "FWCore/Utilities/interface/tinyxml.h"
00036 
00037 //#define DEBUG_PARSER
00038 #if defined( DEBUG_PARSER )
00039 #       if defined( DEBUG ) && defined( _MSC_VER )
00040 #               include <windows.h>
00041 #               define TIXML_LOG OutputDebugString
00042 #       else
00043 #               define TIXML_LOG printf
00044 #       endif
00045 #endif
00046 
00047 // Note tha "PutString" hardcodes the same list. This
00048 // is less flexible than it appears. Changing the entries
00049 // or order will break putstring.
00050 TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] =
00051 {
00052         { "&amp;",  5, '&' },
00053         { "&lt;",   4, '<' },
00054         { "&gt;",   4, '>' },
00055         { "&quot;", 6, '\"' },
00056         { "&apos;", 6, '\'' }
00057 };
00058 
00059 // Bunch of unicode info at:
00060 //              https://www.unicode.org/faq/utf_bom.html
00061 // Including the basic of this table, which determines the #bytes in the
00062 // sequence from the lead byte. 1 placed for invalid sequences --
00063 // although the result will be junk, pass it through as much as possible.
00064 // Beware of the non-characters in UTF-8:
00065 //                              ef bb bf (Microsoft "lead bytes")
00066 //                              ef bf be
00067 //                              ef bf bf
00068 
00069 const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
00070 const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
00071 const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
00072 
00073 const int TiXmlBase::utf8ByteTable[256] =
00074 {
00075         //      0       1       2       3       4       5       6       7       8       9       a       b       c       d       e       f
00076                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x00
00077                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x10
00078                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x20
00079                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x30
00080                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x40
00081                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x50
00082                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x60
00083                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x70 End of ASCII range
00084                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x80 0x80 to 0xc1 invalid
00085                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x90
00086                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0xa0
00087                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0xb0
00088                 1,      1,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      // 0xc0 0xc2 to 0xdf 2 byte
00089                 2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      // 0xd0
00090                 3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      // 0xe0 0xe0 to 0xef 3 byte
00091                 4,      4,      4,      4,      4,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1       // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
00092 };
00093 
00094 
00095 void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
00096 {
00097         const unsigned long BYTE_MASK = 0xBF;
00098         const unsigned long BYTE_MARK = 0x80;
00099         const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
00100 
00101         if (input < 0x80)
00102                 *length = 1;
00103         else if ( input < 0x800 )
00104                 *length = 2;
00105         else if ( input < 0x10000 )
00106                 *length = 3;
00107         else if ( input < 0x200000 )
00108                 *length = 4;
00109         else
00110                 { *length = 0; return; }        // This code won't covert this correctly anyway.
00111 
00112         output += *length;
00113 
00114         // Scary scary fall throughs.
00115         switch (*length)
00116         {
00117                 case 4:
00118                         --output;
00119                         *output = (char)((input | BYTE_MARK) & BYTE_MASK);
00120                         input >>= 6;
00121                 case 3:
00122                         --output;
00123                         *output = (char)((input | BYTE_MARK) & BYTE_MASK);
00124                         input >>= 6;
00125                 case 2:
00126                         --output;
00127                         *output = (char)((input | BYTE_MARK) & BYTE_MASK);
00128                         input >>= 6;
00129                 case 1:
00130                         --output;
00131                         *output = (char)(input | FIRST_BYTE_MARK[*length]);
00132         }
00133 }
00134 
00135 
00136 /*static*/ int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
00137 {
00138         // This will only work for low-ascii, everything else is assumed to be a valid
00139         // letter. I'm not sure this is the best approach, but it is quite tricky trying
00140         // to figure out alhabetical vs. not across encoding. So take a very
00141         // conservative approach.
00142 
00143 //      if ( encoding == TIXML_ENCODING_UTF8 )
00144 //      {
00145                 if ( anyByte < 127 )
00146                         return isalpha( anyByte );
00147                 else
00148                         return 1;       // What else to do? The unicode set is huge...get the english ones right.
00149 //      }
00150 //      else
00151 //      {
00152 //              return isalpha( anyByte );
00153 //      }
00154 }
00155 
00156 
00157 /*static*/ int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
00158 {
00159         // This will only work for low-ascii, everything else is assumed to be a valid
00160         // letter. I'm not sure this is the best approach, but it is quite tricky trying
00161         // to figure out alhabetical vs. not across encoding. So take a very
00162         // conservative approach.
00163 
00164 //      if ( encoding == TIXML_ENCODING_UTF8 )
00165 //      {
00166                 if ( anyByte < 127 )
00167                         return isalnum( anyByte );
00168                 else
00169                         return 1;       // What else to do? The unicode set is huge...get the english ones right.
00170 //      }
00171 //      else
00172 //      {
00173 //              return isalnum( anyByte );
00174 //      }
00175 }
00176 
00177 
00178 class TiXmlParsingData
00179 {
00180         friend class TiXmlDocument;
00181   public:
00182         void Stamp( const char* now, TiXmlEncoding encoding );
00183 
00184         const TiXmlCursor& Cursor()     { return cursor; }
00185 
00186   private:
00187         // Only used by the document!
00188         TiXmlParsingData( const char* start, int _tabsize, int row, int col )
00189         {
00190                 assert( start );
00191                 stamp = start;
00192                 tabsize = _tabsize;
00193                 cursor.row = row;
00194                 cursor.col = col;
00195         }
00196 
00197         TiXmlCursor             cursor;
00198         const char*             stamp;
00199         int                             tabsize;
00200 };
00201 
00202 
00203 void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding )
00204 {
00205         assert( now );
00206 
00207         // Do nothing if the tabsize is 0.
00208         if ( tabsize < 1 )
00209         {
00210                 return;
00211         }
00212 
00213         // Get the current row, column.
00214         int row = cursor.row;
00215         int col = cursor.col;
00216         const char* p = stamp;
00217         assert( p );
00218 
00219         while ( p < now )
00220         {
00221                 // Treat p as unsigned, so we have a happy compiler.
00222                 const unsigned char* pU = (const unsigned char*)p;
00223 
00224                 // Code contributed by Fletcher Dunn: (modified by lee)
00225                 switch (*pU) {
00226                         case 0:
00227                                 // We *should* never get here, but in case we do, don't
00228                                 // advance past the terminating null character, ever
00229                                 return;
00230 
00231                         case '\r':
00232                                 // bump down to the next line
00233                                 ++row;
00234                                 col = 0;
00235                                 // Eat the character
00236                                 ++p;
00237 
00238                                 // Check for \r\n sequence, and treat this as a single character
00239                                 if (*p == '\n') {
00240                                         ++p;
00241                                 }
00242                                 break;
00243 
00244                         case '\n':
00245                                 // bump down to the next line
00246                                 ++row;
00247                                 col = 0;
00248 
00249                                 // Eat the character
00250                                 ++p;
00251 
00252                                 // Check for \n\r sequence, and treat this as a single
00253                                 // character.  (Yes, this bizarre thing does occur still
00254                                 // on some arcane platforms...)
00255                                 if (*p == '\r') {
00256                                         ++p;
00257                                 }
00258                                 break;
00259 
00260                         case '\t':
00261                                 // Eat the character
00262                                 ++p;
00263 
00264                                 // Skip to next tab stop
00265                                 col = (col / tabsize + 1) * tabsize;
00266                                 break;
00267 
00268                         case TIXML_UTF_LEAD_0:
00269                                 if ( encoding == TIXML_ENCODING_UTF8 )
00270                                 {
00271                                         if ( *(p+1) && *(p+2) )
00272                                         {
00273                                                 // In these cases, don't advance the column. These are
00274                                                 // 0-width spaces.
00275                                                 if ( *(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2 )
00276                                                         p += 3;
00277                                                 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU )
00278                                                         p += 3;
00279                                                 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU )
00280                                                         p += 3;
00281                                                 else
00282                                                         { p +=3; ++col; }       // A normal character.
00283                                         }
00284                                 }
00285                                 else
00286                                 {
00287                                         ++p;
00288                                         ++col;
00289                                 }
00290                                 break;
00291 
00292                         default:
00293                                 if ( encoding == TIXML_ENCODING_UTF8 )
00294                                 {
00295                                         // Eat the 1 to 4 byte utf8 character.
00296                                         int step = TiXmlBase::utf8ByteTable[*((const unsigned char*)p)];
00297                                         if ( step == 0 )
00298                                                 step = 1;               // Error case from bad encoding, but handle gracefully.
00299                                         p += step;
00300 
00301                                         // Just advance one column, of course.
00302                                         ++col;
00303                                 }
00304                                 else
00305                                 {
00306                                         ++p;
00307                                         ++col;
00308                                 }
00309                                 break;
00310                 }
00311         }
00312         cursor.row = row;
00313         cursor.col = col;
00314         assert( cursor.row >= -1 );
00315         assert( cursor.col >= -1 );
00316         stamp = p;
00317         assert( stamp );
00318 }
00319 
00320 
00321 const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )
00322 {
00323         if ( !p || !*p )
00324         {
00325                 return 0;
00326         }
00327         if ( encoding == TIXML_ENCODING_UTF8 )
00328         {
00329                 while ( *p )
00330                 {
00331                         const unsigned char* pU = (const unsigned char*)p;
00332 
00333                         // Skip the stupid Microsoft UTF-8 Byte order marks
00334                         if (    *(pU+0)==TIXML_UTF_LEAD_0
00335                                  && *(pU+1)==TIXML_UTF_LEAD_1
00336                                  && *(pU+2)==TIXML_UTF_LEAD_2 )
00337                         {
00338                                 p += 3;
00339                                 continue;
00340                         }
00341                         else if(*(pU+0)==TIXML_UTF_LEAD_0
00342                                  && *(pU+1)==0xbfU
00343                                  && *(pU+2)==0xbeU )
00344                         {
00345                                 p += 3;
00346                                 continue;
00347                         }
00348                         else if(*(pU+0)==TIXML_UTF_LEAD_0
00349                                  && *(pU+1)==0xbfU
00350                                  && *(pU+2)==0xbfU )
00351                         {
00352                                 p += 3;
00353                                 continue;
00354                         }
00355 
00356                         if ( IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' )            // Still using old rules for white space.
00357                                 ++p;
00358                         else
00359                                 break;
00360                 }
00361         }
00362         else
00363         {
00364                 // Code altered by user.  An extra pair of () was added to eliminate a compiler warning.
00365                 while ( ( *p && IsWhiteSpace( *p ) ) || *p == '\n' || *p =='\r' )
00366                         ++p;
00367         }
00368 
00369         return p;
00370 }
00371 
00372 #ifdef TIXML_USE_STL
00373 /*static*/ bool TiXmlBase::StreamWhiteSpace( std::istream * in, TIXML_STRING * tag )
00374 {
00375         for( ;; )
00376         {
00377                 if ( !in->good() ) return false;
00378 
00379                 int c = in->peek();
00380                 // At this scope, we can't get to a document. So fail silently.
00381                 if ( !IsWhiteSpace( c ) || c <= 0 )
00382                         return true;
00383 
00384                 *tag += (char) in->get();
00385         }
00386 }
00387 
00388 /*static*/ bool TiXmlBase::StreamTo( std::istream * in, int character, TIXML_STRING * tag )
00389 {
00390         //assert( character > 0 && character < 128 );   // else it won't work in utf-8
00391         while ( in->good() )
00392         {
00393                 int c = in->peek();
00394                 if ( c == character )
00395                         return true;
00396                 if ( c <= 0 )           // Silent failure: can't get document at this scope
00397                         return false;
00398 
00399                 in->get();
00400                 *tag += (char) c;
00401         }
00402         return false;
00403 }
00404 #endif
00405 
00406 // One of TinyXML's more performance demanding functions. Try to keep the memory overhead down. The
00407 // "assign" optimization removes over 10% of the execution time.
00408 //
00409 const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding )
00410 {
00411         // Oddly, not supported on some comilers,
00412         //name->clear();
00413         // So use this:
00414         *name = "";
00415         assert( p );
00416 
00417         // Names start with letters or underscores.
00418         // Of course, in unicode, tinyxml has no idea what a letter *is*. The
00419         // algorithm is generous.
00420         //
00421         // After that, they can be letters, underscores, numbers,
00422         // hyphens, or colons. (Colons are valid ony for namespaces,
00423         // but tinyxml can't tell namespaces from names.)
00424         if (    p && *p
00425                  && ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) )
00426         {
00427                 const char* start = p;
00428                 while(          p && *p
00429                                 &&      (               IsAlphaNum( (unsigned char ) *p, encoding )
00430                                                  || *p == '_'
00431                                                  || *p == '-'
00432                                                  || *p == '.'
00433                                                  || *p == ':' ) )
00434                 {
00435                         //(*name) += *p; // expensive
00436                         ++p;
00437                 }
00438                 if ( p-start > 0 ) {
00439                         name->assign( start, p-start );
00440                 }
00441                 return p;
00442         }
00443         return 0;
00444 }
00445 
00446 const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding )
00447 {
00448         // Presume an entity, and pull it out.
00449     TIXML_STRING ent;
00450         int i;
00451         *length = 0;
00452 
00453         if ( *(p+1) && *(p+1) == '#' && *(p+2) )
00454         {
00455                 unsigned long ucs = 0;
00456                 ptrdiff_t delta = 0;
00457                 unsigned mult = 1;
00458 
00459                 if ( *(p+2) == 'x' )
00460                 {
00461                         // Hexadecimal.
00462                         if ( !*(p+3) ) return 0;
00463 
00464                         const char* q = p+3;
00465                         q = strchr( q, ';' );
00466 
00467                         if ( !q || !*q ) return 0;
00468 
00469                         delta = q-p;
00470                         --q;
00471 
00472                         while ( *q != 'x' )
00473                         {
00474                                 if ( *q >= '0' && *q <= '9' )
00475                                         ucs += mult * (*q - '0');
00476                                 else if ( *q >= 'a' && *q <= 'f' )
00477                                         ucs += mult * (*q - 'a' + 10);
00478                                 else if ( *q >= 'A' && *q <= 'F' )
00479                                         ucs += mult * (*q - 'A' + 10 );
00480                                 else
00481                                         return 0;
00482                                 mult *= 16;
00483                                 --q;
00484                         }
00485                 }
00486                 else
00487                 {
00488                         // Decimal.
00489                         if ( !*(p+2) ) return 0;
00490 
00491                         const char* q = p+2;
00492                         q = strchr( q, ';' );
00493 
00494                         if ( !q || !*q ) return 0;
00495 
00496                         delta = q-p;
00497                         --q;
00498 
00499                         while ( *q != '#' )
00500                         {
00501                                 if ( *q >= '0' && *q <= '9' )
00502                                         ucs += mult * (*q - '0');
00503                                 else
00504                                         return 0;
00505                                 mult *= 10;
00506                                 --q;
00507                         }
00508                 }
00509                 if ( encoding == TIXML_ENCODING_UTF8 )
00510                 {
00511                         // convert the UCS to UTF-8
00512                         ConvertUTF32ToUTF8( ucs, value, length );
00513                 }
00514                 else
00515                 {
00516                         *value = (char)ucs;
00517                         *length = 1;
00518                 }
00519                 return p + delta + 1;
00520         }
00521 
00522         // Now try to match it.
00523         for( i=0; i<NUM_ENTITY; ++i )
00524         {
00525                 if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )
00526                 {
00527                         assert( strlen( entity[i].str ) == entity[i].strLength );
00528                         *value = entity[i].chr;
00529                         *length = 1;
00530                         return ( p + entity[i].strLength );
00531                 }
00532         }
00533 
00534         // So it wasn't an entity, its unrecognized, or something like that.
00535         *value = *p;    // Don't put back the last one, since we return it!
00536         //*length = 1;  // Leave unrecognized entities - this doesn't really work.
00537                                         // Just writes strange XML.
00538         return p+1;
00539 }
00540 
00541 
00542 bool TiXmlBase::StringEqual( const char* p,
00543                                                          const char* tag,
00544                                                          bool ignoreCase,
00545                                                          TiXmlEncoding encoding )
00546 {
00547         assert( p );
00548         assert( tag );
00549         if ( !p || !*p )
00550         {
00551                 assert( 0 );
00552                 return false;
00553         }
00554 
00555         const char* q = p;
00556 
00557         if ( ignoreCase )
00558         {
00559                 while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) )
00560                 {
00561                         ++q;
00562                         ++tag;
00563                 }
00564 
00565                 if ( *tag == 0 )
00566                         return true;
00567         }
00568         else
00569         {
00570                 while ( *q && *tag && *q == *tag )
00571                 {
00572                         ++q;
00573                         ++tag;
00574                 }
00575 
00576                 if ( *tag == 0 )                // Have we found the end of the tag, and everything equal?
00577                         return true;
00578         }
00579         return false;
00580 }
00581 
00582 const char* TiXmlBase::ReadText(        const char* p,
00583                                                                         TIXML_STRING * text,
00584                                                                         bool trimWhiteSpace,
00585                                                                         const char* endTag,
00586                                                                         bool caseInsensitive,
00587                                                                         TiXmlEncoding encoding )
00588 {
00589     *text = "";
00590         if (    !trimWhiteSpace                 // certain tags always keep whitespace
00591                  || !condenseWhiteSpace )       // if true, whitespace is always kept
00592         {
00593                 // Keep all the white space.
00594                 while (    p && *p
00595                                 && !StringEqual( p, endTag, caseInsensitive, encoding )
00596                           )
00597                 {
00598                         int len;
00599                         char cArr[4] = { 0, 0, 0, 0 };
00600                         p = GetChar( p, cArr, &len, encoding );
00601                         text->append( cArr, len );
00602                 }
00603         }
00604         else
00605         {
00606                 bool whitespace = false;
00607 
00608                 // Remove leading white space:
00609                 p = SkipWhiteSpace( p, encoding );
00610                 while (    p && *p
00611                                 && !StringEqual( p, endTag, caseInsensitive, encoding ) )
00612                 {
00613                         if ( *p == '\r' || *p == '\n' )
00614                         {
00615                                 whitespace = true;
00616                                 ++p;
00617                         }
00618                         else if ( IsWhiteSpace( *p ) )
00619                         {
00620                                 whitespace = true;
00621                                 ++p;
00622                         }
00623                         else
00624                         {
00625                                 // If we've found whitespace, add it before the
00626                                 // new character. Any whitespace just becomes a space.
00627                                 if ( whitespace )
00628                                 {
00629                                         (*text) += ' ';
00630                                         whitespace = false;
00631                                 }
00632                                 int len;
00633                                 char cArr[4] = { 0, 0, 0, 0 };
00634                                 p = GetChar( p, cArr, &len, encoding );
00635                                 if ( len == 1 )
00636                                         (*text) += cArr[0];     // more efficient
00637                                 else
00638                                         text->append( cArr, len );
00639                         }
00640                 }
00641         }
00642         if ( p )
00643                 p += strlen( endTag );
00644         return p;
00645 }
00646 
00647 #ifdef TIXML_USE_STL
00648 
00649 void TiXmlDocument::StreamIn( std::istream * in, TIXML_STRING * tag )
00650 {
00651         // The basic issue with a document is that we don't know what we're
00652         // streaming. Read something presumed to be a tag (and hope), then
00653         // identify it, and call the appropriate stream method on the tag.
00654         //
00655         // This "pre-streaming" will never read the closing ">" so the
00656         // sub-tag can orient itself.
00657 
00658         if ( !StreamTo( in, '<', tag ) )
00659         {
00660                 SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00661                 return;
00662         }
00663 
00664         while ( in->good() )
00665         {
00666                 int tagIndex = (int) tag->length();
00667                 while ( in->good() && in->peek() != '>' )
00668                 {
00669                         int c = in->get();
00670                         if ( c <= 0 )
00671                         {
00672                                 SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00673                                 break;
00674                         }
00675                         (*tag) += (char) c;
00676                 }
00677 
00678                 if ( in->good() )
00679                 {
00680                         // We now have something we presume to be a node of
00681                         // some sort. Identify it, and call the node to
00682                         // continue streaming.
00683                         TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING );
00684 
00685                         if ( node )
00686                         {
00687                                 node->StreamIn( in, tag );
00688                                 bool isElement = node->ToElement() != 0;
00689                                 delete node;
00690                                 node = 0;
00691 
00692                                 // If this is the root element, we're done. Parsing will be
00693                                 // done by the >> operator.
00694                                 if ( isElement )
00695                                 {
00696                                         return;
00697                                 }
00698                         }
00699                         else
00700                         {
00701                                 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
00702                                 return;
00703                         }
00704                 }
00705         }
00706         // We should have returned sooner.
00707         SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
00708 }
00709 
00710 #endif
00711 
00712 const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding )
00713 {
00714         ClearError();
00715 
00716         // Parse away, at the document level. Since a document
00717         // contains nothing but other tags, most of what happens
00718         // here is skipping white space.
00719         if ( !p || !*p )
00720         {
00721                 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00722                 return 0;
00723         }
00724 
00725         // Note that, for a document, this needs to come
00726         // before the while space skip, so that parsing
00727         // starts from the pointer we are given.
00728         location.Clear();
00729         if ( prevData )
00730         {
00731                 location.row = prevData->cursor.row;
00732                 location.col = prevData->cursor.col;
00733         }
00734         else
00735         {
00736                 location.row = 0;
00737                 location.col = 0;
00738         }
00739         TiXmlParsingData data( p, TabSize(), location.row, location.col );
00740         location = data.Cursor();
00741 
00742         if ( encoding == TIXML_ENCODING_UNKNOWN )
00743         {
00744                 // Check for the Microsoft UTF-8 lead bytes.
00745                 const unsigned char* pU = (const unsigned char*)p;
00746                 if (    *(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0
00747                          && *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1
00748                          && *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 )
00749                 {
00750                         encoding = TIXML_ENCODING_UTF8;
00751                         useMicrosoftBOM = true;
00752                 }
00753         }
00754 
00755     p = SkipWhiteSpace( p, encoding );
00756         if ( !p )
00757         {
00758                 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00759                 return 0;
00760         }
00761 
00762         while ( p && *p )
00763         {
00764                 TiXmlNode* node = Identify( p, encoding );
00765                 if ( node )
00766                 {
00767                         p = node->Parse( p, &data, encoding );
00768                         // LinkEndChild just returns the input pointer unmodified,
00769                         // except if node is deleted, in which case it returns 0.
00770                         node = LinkEndChild( node );
00771                 }
00772                 else
00773                 {
00774                         break;
00775                 }
00776 
00777                 // Did we get encoding info?
00778                 if (    encoding == TIXML_ENCODING_UNKNOWN
00779                          && node && node->ToDeclaration() )
00780                 {
00781                         TiXmlDeclaration* dec = node->ToDeclaration();
00782                         const char* enc = dec->Encoding();
00783                         assert( enc );
00784 
00785                         if ( *enc == 0 )
00786                                 encoding = TIXML_ENCODING_UTF8;
00787                         else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
00788                                 encoding = TIXML_ENCODING_UTF8;
00789                         else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
00790                                 encoding = TIXML_ENCODING_UTF8; // incorrect, but be nice
00791                         else
00792                                 encoding = TIXML_ENCODING_LEGACY;
00793                 }
00794 
00795                 p = SkipWhiteSpace( p, encoding );
00796         }
00797 
00798         // Was this empty?
00799         if ( !firstChild ) {
00800                 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );
00801                 return 0;
00802         }
00803 
00804         // All is well.
00805         return p;
00806 }
00807 
00808 void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding )
00809 {
00810         // The first error in a chain is more accurate - don't set again!
00811         if ( error )
00812                 return;
00813 
00814         assert( err > 0 && err < TIXML_ERROR_STRING_COUNT );
00815         error   = true;
00816         errorId = err;
00817         errorDesc = errorString[ errorId ];
00818 
00819         errorLocation.Clear();
00820         if ( pError && data )
00821         {
00822                 data->Stamp( pError, encoding );
00823                 errorLocation = data->Cursor();
00824         }
00825 }
00826 
00827 
00828 TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding )
00829 {
00830         TiXmlNode* returnNode = 0;
00831 
00832         p = SkipWhiteSpace( p, encoding );
00833         if( !p || !*p || *p != '<' )
00834         {
00835                 return 0;
00836         }
00837 
00838         TiXmlDocument* doc = GetDocument();
00839         p = SkipWhiteSpace( p, encoding );
00840 
00841         if ( !p || !*p )
00842         {
00843                 return 0;
00844         }
00845 
00846         // What is this thing?
00847         // - Elements start with a letter or underscore, but xml is reserved.
00848         // - Comments: <!--
00849         // - Decleration: <?xml
00850         // - Everthing else is unknown to tinyxml.
00851         //
00852 
00853         const char* xmlHeader = { "<?xml" };
00854         const char* commentHeader = { "<!--" };
00855         const char* dtdHeader = { "<!" };
00856         const char* cdataHeader = { "<![CDATA[" };
00857 
00858         if ( StringEqual( p, xmlHeader, true, encoding ) )
00859         {
00860                 #ifdef DEBUG_PARSER
00861                         TIXML_LOG( "XML parsing Declaration\n" );
00862                 #endif
00863                 returnNode = new TiXmlDeclaration();
00864         }
00865         else if ( StringEqual( p, commentHeader, false, encoding ) )
00866         {
00867                 #ifdef DEBUG_PARSER
00868                         TIXML_LOG( "XML parsing Comment\n" );
00869                 #endif
00870                 returnNode = new TiXmlComment();
00871         }
00872         else if ( StringEqual( p, cdataHeader, false, encoding ) )
00873         {
00874                 #ifdef DEBUG_PARSER
00875                         TIXML_LOG( "XML parsing CDATA\n" );
00876                 #endif
00877                 TiXmlText* text = new TiXmlText( "" );
00878                 text->SetCDATA( true );
00879                 returnNode = text;
00880         }
00881         else if ( StringEqual( p, dtdHeader, false, encoding ) )
00882         {
00883                 #ifdef DEBUG_PARSER
00884                         TIXML_LOG( "XML parsing Unknown(1)\n" );
00885                 #endif
00886                 returnNode = new TiXmlUnknown();
00887         }
00888         else if (    IsAlpha( *(p+1), encoding )
00889                           || *(p+1) == '_' )
00890         {
00891                 #ifdef DEBUG_PARSER
00892                         TIXML_LOG( "XML parsing Element\n" );
00893                 #endif
00894                 returnNode = new TiXmlElement( "" );
00895         }
00896         else
00897         {
00898                 #ifdef DEBUG_PARSER
00899                         TIXML_LOG( "XML parsing Unknown(2)\n" );
00900                 #endif
00901                 returnNode = new TiXmlUnknown();
00902         }
00903 
00904         if ( returnNode )
00905         {
00906                 // Set the parent, so it can report errors
00907                 returnNode->parent = this;
00908         }
00909         else
00910         {
00911                 if ( doc )
00912                         doc->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, TIXML_ENCODING_UNKNOWN );
00913         }
00914         return returnNode;
00915 }
00916 
00917 #ifdef TIXML_USE_STL
00918 
00919 void TiXmlElement::StreamIn (std::istream * in, TIXML_STRING * tag)
00920 {
00921         // We're called with some amount of pre-parsing. That is, some of "this"
00922         // element is in "tag". Go ahead and stream to the closing ">"
00923         while( in->good() )
00924         {
00925                 int c = in->get();
00926                 if ( c <= 0 )
00927                 {
00928                         TiXmlDocument* document = GetDocument();
00929                         if ( document )
00930                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00931                         return;
00932                 }
00933                 (*tag) += (char) c ;
00934 
00935                 if ( c == '>' )
00936                         break;
00937         }
00938 
00939         if ( tag->length() < 3 ) return;
00940 
00941         // Okay...if we are a "/>" tag, then we're done. We've read a complete tag.
00942         // If not, identify and stream.
00943 
00944         if (    tag->at( tag->length() - 1 ) == '>'
00945                  && tag->at( tag->length() - 2 ) == '/' )
00946         {
00947                 // All good!
00948                 return;
00949         }
00950         else if ( tag->at( tag->length() - 1 ) == '>' )
00951         {
00952                 // There is more. Could be:
00953                 //              text
00954                 //              cdata text (which looks like another node)
00955                 //              closing tag
00956                 //              another node.
00957                 for ( ;; )
00958                 {
00959                         StreamWhiteSpace( in, tag );
00960 
00961                         // Do we have text?
00962                         if ( in->good() && in->peek() != '<' )
00963                         {
00964                                 // Yep, text.
00965                                 TiXmlText text( "" );
00966                                 text.StreamIn( in, tag );
00967 
00968                                 // What follows text is a closing tag or another node.
00969                                 // Go around again and figure it out.
00970                                 continue;
00971                         }
00972 
00973                         // We now have either a closing tag...or another node.
00974                         // We should be at a "<", regardless.
00975                         if ( !in->good() ) return;
00976                         assert( in->peek() == '<' );
00977                         int tagIndex = (int) tag->length();
00978 
00979                         bool closingTag = false;
00980                         bool firstCharFound = false;
00981 
00982                         for( ;; )
00983                         {
00984                                 if ( !in->good() )
00985                                         return;
00986 
00987                                 int c = in->peek();
00988                                 if ( c <= 0 )
00989                                 {
00990                                         TiXmlDocument* document = GetDocument();
00991                                         if ( document )
00992                                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00993                                         return;
00994                                 }
00995 
00996                                 if ( c == '>' )
00997                                         break;
00998 
00999                                 *tag += (char) c;
01000                                 in->get();
01001 
01002                                 // Early out if we find the CDATA id.
01003                                 if ( c == '[' && tag->size() >= 9 )
01004                                 {
01005                                         size_t len = tag->size();
01006                                         const char* start = tag->c_str() + len - 9;
01007                                         if ( strcmp( start, "<![CDATA[" ) == 0 ) {
01008                                                 assert( !closingTag );
01009                                                 break;
01010                                         }
01011                                 }
01012 
01013                                 if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )
01014                                 {
01015                                         firstCharFound = true;
01016                                         if ( c == '/' )
01017                                                 closingTag = true;
01018                                 }
01019                         }
01020                         // If it was a closing tag, then read in the closing '>' to clean up the input stream.
01021                         // If it was not, the streaming will be done by the tag.
01022                         if ( closingTag )
01023                         {
01024                                 if ( !in->good() )
01025                                         return;
01026 
01027                                 int c = in->get();
01028                                 if ( c <= 0 )
01029                                 {
01030                                         TiXmlDocument* document = GetDocument();
01031                                         if ( document )
01032                                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01033                                         return;
01034                                 }
01035                                 assert( c == '>' );
01036                                 *tag += (char) c;
01037 
01038                                 // We are done, once we've found our closing tag.
01039                                 return;
01040                         }
01041                         else
01042                         {
01043                                 // If not a closing tag, id it, and stream.
01044                                 const char* tagloc = tag->c_str() + tagIndex;
01045                                 TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING );
01046                                 if ( !node )
01047                                         return;
01048                                 node->StreamIn( in, tag );
01049                                 delete node;
01050                                 node = 0;
01051 
01052                                 // No return: go around from the beginning: text, closing tag, or node.
01053                         }
01054                 }
01055         }
01056 }
01057 #endif
01058 
01059 const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01060 {
01061         p = SkipWhiteSpace( p, encoding );
01062         TiXmlDocument* document = GetDocument();
01063 
01064         if ( !p || !*p )
01065         {
01066                 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding );
01067                 return 0;
01068         }
01069 
01070         if ( data )
01071         {
01072                 data->Stamp( p, encoding );
01073                 location = data->Cursor();
01074         }
01075 
01076         if ( *p != '<' )
01077         {
01078                 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding );
01079                 return 0;
01080         }
01081 
01082         p = SkipWhiteSpace( p+1, encoding );
01083 
01084         // Read the name.
01085         const char* pErr = p;
01086 
01087     p = ReadName( p, &value, encoding );
01088         if ( !p || !*p )
01089         {
01090                 if ( document ) document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding );
01091                 return 0;
01092         }
01093 
01094     TIXML_STRING endTag ("</");
01095         endTag += value;
01096         endTag += ">";
01097 
01098         // Check for and read attributes. Also look for an empty
01099         // tag or an end tag.
01100         while ( p && *p )
01101         {
01102                 pErr = p;
01103                 p = SkipWhiteSpace( p, encoding );
01104                 if ( !p || !*p )
01105                 {
01106                         if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
01107                         return 0;
01108                 }
01109                 if ( *p == '/' )
01110                 {
01111                         ++p;
01112                         // Empty tag.
01113                         if ( *p  != '>' )
01114                         {
01115                                 if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );
01116                                 return 0;
01117                         }
01118                         return (p+1);
01119                 }
01120                 else if ( *p == '>' )
01121                 {
01122                         // Done with attributes (if there were any.)
01123                         // Read the value -- which can include other
01124                         // elements -- read the end tag, and return.
01125                         ++p;
01126                         p = ReadValue( p, data, encoding );             // Note this is an Element method, and will set the error if one happens.
01127                         if ( !p || !*p ) {
01128                                 // We were looking for the end tag, but found nothing.
01129                                 // Fix for [ 1663758 ] Failure to report error on bad XML
01130                                 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
01131                                 return 0;
01132                         }
01133 
01134                         // We should find the end tag now
01135                         if ( StringEqual( p, endTag.c_str(), false, encoding ) )
01136                         {
01137                                 p += endTag.length();
01138                                 return p;
01139                         }
01140                         else
01141                         {
01142                                 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
01143                                 return 0;
01144                         }
01145                 }
01146                 else
01147                 {
01148                         // Try to read an attribute:
01149                         TiXmlAttribute* attrib = new TiXmlAttribute();
01150                         if ( !attrib )
01151                         {
01152                                 if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, pErr, data, encoding );
01153                                 return 0;
01154                         }
01155 
01156                         attrib->SetDocument( document );
01157                         pErr = p;
01158                         p = attrib->Parse( p, data, encoding );
01159 
01160                         if ( !p || !*p )
01161                         {
01162                                 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
01163                                 delete attrib;
01164                                 return 0;
01165                         }
01166 
01167                         // Handle the strange case of double attributes:
01168                         #ifdef TIXML_USE_STL
01169                         TiXmlAttribute* node = attributeSet.Find( attrib->NameTStr() );
01170                         #else
01171                         TiXmlAttribute* node = attributeSet.Find( attrib->Name() );
01172                         #endif
01173                         if ( node )
01174                         {
01175                                 node->SetValue( attrib->Value() );
01176                                 delete attrib;
01177                                 return 0;
01178                         }
01179 
01180                         attributeSet.Add( attrib );
01181                 }
01182         }
01183         return p;
01184 }
01185 
01186 
01187 const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01188 {
01189         TiXmlDocument* document = GetDocument();
01190 
01191         // Read in text and elements in any order.
01192         const char* pWithWhiteSpace = p;
01193         p = SkipWhiteSpace( p, encoding );
01194 
01195         while ( p && *p )
01196         {
01197                 if ( *p != '<' )
01198                 {
01199                         // Take what we have, make a text element.
01200                         TiXmlText* textNode = new TiXmlText( "" );
01201 
01202                         if ( !textNode )
01203                         {
01204                                 if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, encoding );
01205                                     return 0;
01206                         }
01207 
01208                         if ( TiXmlBase::IsWhiteSpaceCondensed() )
01209                         {
01210                                 p = textNode->Parse( p, data, encoding );
01211                         }
01212                         else
01213                         {
01214                                 // Special case: we want to keep the white space
01215                                 // so that leading spaces aren't removed.
01216                                 p = textNode->Parse( pWithWhiteSpace, data, encoding );
01217                         }
01218 
01219                         if ( !textNode->Blank() )
01220                                 LinkEndChild( textNode );
01221                         else
01222                                 delete textNode;
01223                 }
01224                 else
01225                 {
01226                         // We hit a '<'
01227                         // Have we hit a new element or an end tag? This could also be
01228                         // a TiXmlText in the "CDATA" style.
01229                         if ( StringEqual( p, "</", false, encoding ) )
01230                         {
01231                                 return p;
01232                         }
01233                         else
01234                         {
01235                                 TiXmlNode* node = Identify( p, encoding );
01236                                 if ( node )
01237                                 {
01238                                         p = node->Parse( p, data, encoding );
01239                                         LinkEndChild( node );
01240                                 }
01241                                 else
01242                                 {
01243                                         return 0;
01244                                 }
01245                         }
01246                 }
01247                 pWithWhiteSpace = p;
01248                 p = SkipWhiteSpace( p, encoding );
01249         }
01250 
01251         if ( !p )
01252         {
01253                 if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding );
01254         }
01255         return p;
01256 }
01257 
01258 
01259 #ifdef TIXML_USE_STL
01260 void TiXmlUnknown::StreamIn( std::istream * in, TIXML_STRING * tag )
01261 {
01262         while ( in->good() )
01263         {
01264                 int c = in->get();
01265                 if ( c <= 0 )
01266                 {
01267                         TiXmlDocument* document = GetDocument();
01268                         if ( document )
01269                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01270                         return;
01271                 }
01272                 (*tag) += (char) c;
01273 
01274                 if ( c == '>' )
01275                 {
01276                         // All is well.
01277                         return;
01278                 }
01279         }
01280 }
01281 #endif
01282 
01283 
01284 const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01285 {
01286         TiXmlDocument* document = GetDocument();
01287         p = SkipWhiteSpace( p, encoding );
01288 
01289         if ( data )
01290         {
01291                 data->Stamp( p, encoding );
01292                 location = data->Cursor();
01293         }
01294         if ( !p || !*p || *p != '<' )
01295         {
01296                 if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding );
01297                 return 0;
01298         }
01299         ++p;
01300     value = "";
01301 
01302         while ( p && *p && *p != '>' )
01303         {
01304                 value += *p;
01305                 ++p;
01306         }
01307 
01308         if ( !p )
01309         {
01310                 if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding );
01311         }
01312         if ( *p == '>' )
01313                 return p+1;
01314         return p;
01315 }
01316 
01317 #ifdef TIXML_USE_STL
01318 void TiXmlComment::StreamIn( std::istream * in, TIXML_STRING * tag )
01319 {
01320         while ( in->good() )
01321         {
01322                 int c = in->get();
01323                 if ( c <= 0 )
01324                 {
01325                         TiXmlDocument* document = GetDocument();
01326                         if ( document )
01327                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01328                         return;
01329                 }
01330 
01331                 (*tag) += (char) c;
01332 
01333                 if ( c == '>'
01334                          && tag->at( tag->length() - 2 ) == '-'
01335                          && tag->at( tag->length() - 3 ) == '-' )
01336                 {
01337                         // All is well.
01338                         return;
01339                 }
01340         }
01341 }
01342 #endif
01343 
01344 
01345 const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01346 {
01347         TiXmlDocument* document = GetDocument();
01348         value = "";
01349 
01350         p = SkipWhiteSpace( p, encoding );
01351 
01352         if ( data )
01353         {
01354                 data->Stamp( p, encoding );
01355                 location = data->Cursor();
01356         }
01357         const char* startTag = "<!--";
01358         const char* endTag   = "-->";
01359 
01360         if ( !StringEqual( p, startTag, false, encoding ) )
01361         {
01362                 document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding );
01363                 return 0;
01364         }
01365         p += strlen( startTag );
01366 
01367         // [ 1475201 ] TinyXML parses entities in comments
01368         // Oops - ReadText doesn't work, because we don't want to parse the entities.
01369         // p = ReadText( p, &value, false, endTag, false, encoding );
01370         //
01371         // from the XML spec:
01372         /*
01373          [Definition: Comments may appear anywhere in a document outside other markup; in addition,
01374                       they may appear within the document type declaration at places allowed by the grammar.
01375                                   They are not part of the document's character data; an XML processor MAY, but need not,
01376                                   make it possible for an application to retrieve the text of comments. For compatibility,
01377                                   the string "--" (double-hyphen) MUST NOT occur within comments.] Parameter entity
01378                                   references MUST NOT be recognized within comments.
01379 
01380                                   An example of a comment:
01381 
01382                                   <!-- declarations for <head> & <body> -->
01383         */
01384 
01385     value = "";
01386         // Keep all the white space.
01387         while ( p && *p && !StringEqual( p, endTag, false, encoding ) )
01388         {
01389                 value.append( p, 1 );
01390                 ++p;
01391         }
01392         if ( p )
01393                 p += strlen( endTag );
01394 
01395         return p;
01396 }
01397 
01398 
01399 const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01400 {
01401         p = SkipWhiteSpace( p, encoding );
01402         if ( !p || !*p ) return 0;
01403 
01404 //      int tabsize = 4;
01405 //      if ( document )
01406 //              tabsize = document->TabSize();
01407 
01408         if ( data )
01409         {
01410                 data->Stamp( p, encoding );
01411                 location = data->Cursor();
01412         }
01413         // Read the name, the '=' and the value.
01414         const char* pErr = p;
01415         p = ReadName( p, &name, encoding );
01416         if ( !p || !*p )
01417         {
01418                 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
01419                 return 0;
01420         }
01421         p = SkipWhiteSpace( p, encoding );
01422         if ( !p || !*p || *p != '=' )
01423         {
01424                 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
01425                 return 0;
01426         }
01427 
01428         ++p;    // skip '='
01429         p = SkipWhiteSpace( p, encoding );
01430         if ( !p || !*p )
01431         {
01432                 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
01433                 return 0;
01434         }
01435 
01436         const char* end;
01437         const char SINGLE_QUOTE = '\'';
01438         const char DOUBLE_QUOTE = '\"';
01439 
01440         if ( *p == SINGLE_QUOTE )
01441         {
01442                 ++p;
01443                 end = "\'";             // single quote in string
01444                 p = ReadText( p, &value, false, end, false, encoding );
01445         }
01446         else if ( *p == DOUBLE_QUOTE )
01447         {
01448                 ++p;
01449                 end = "\"";             // double quote in string
01450                 p = ReadText( p, &value, false, end, false, encoding );
01451         }
01452         else
01453         {
01454                 // All attribute values should be in single or double quotes.
01455                 // But this is such a common error that the parser will try
01456                 // its best, even without them.
01457                 value = "";
01458                 while (    p && *p                                                                                      // existence
01459                                 && !IsWhiteSpace( *p ) && *p != '\n' && *p != '\r'      // whitespace
01460                                 && *p != '/' && *p != '>' )                                                     // tag end
01461                 {
01462                         if ( *p == SINGLE_QUOTE || *p == DOUBLE_QUOTE ) {
01463                                 // [ 1451649 ] Attribute values with trailing quotes not handled correctly
01464                                 // We did not have an opening quote but seem to have a
01465                                 // closing one. Give up and throw an error.
01466                                 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
01467                                 return 0;
01468                         }
01469                         value += *p;
01470                         ++p;
01471                 }
01472         }
01473         return p;
01474 }
01475 
01476 #ifdef TIXML_USE_STL
01477 void TiXmlText::StreamIn( std::istream * in, TIXML_STRING * tag )
01478 {
01479         while ( in->good() )
01480         {
01481                 int c = in->peek();
01482                 if ( !cdata && (c == '<' ) )
01483                 {
01484                         return;
01485                 }
01486                 if ( c <= 0 )
01487                 {
01488                         TiXmlDocument* document = GetDocument();
01489                         if ( document )
01490                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01491                         return;
01492                 }
01493 
01494                 (*tag) += (char) c;
01495                 in->get();      // "commits" the peek made above
01496 
01497                 if ( cdata && c == '>' && tag->size() >= 3 ) {
01498                         size_t len = tag->size();
01499                         if ( (*tag)[len-2] == ']' && (*tag)[len-3] == ']' ) {
01500                                 // terminator of cdata.
01501                                 return;
01502                         }
01503                 }
01504         }
01505 }
01506 #endif
01507 
01508 const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01509 {
01510         value = "";
01511         TiXmlDocument* document = GetDocument();
01512 
01513         if ( data )
01514         {
01515                 data->Stamp( p, encoding );
01516                 location = data->Cursor();
01517         }
01518 
01519         const char* const startTag = "<![CDATA[";
01520         const char* const endTag   = "]]>";
01521 
01522         if ( cdata || StringEqual( p, startTag, false, encoding ) )
01523         {
01524                 cdata = true;
01525 
01526                 if ( !StringEqual( p, startTag, false, encoding ) )
01527                 {
01528                         document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding );
01529                         return 0;
01530                 }
01531                 p += strlen( startTag );
01532 
01533                 // Keep all the white space, ignore the encoding, etc.
01534                 while (    p && *p
01535                                 && !StringEqual( p, endTag, false, encoding )
01536                           )
01537                 {
01538                         value += *p;
01539                         ++p;
01540                 }
01541 
01542                 TIXML_STRING dummy;
01543                 p = ReadText( p, &dummy, false, endTag, false, encoding );
01544                 return p;
01545         }
01546         else
01547         {
01548                 bool ignoreWhite = true;
01549 
01550                 const char* end = "<";
01551                 p = ReadText( p, &value, ignoreWhite, end, false, encoding );
01552                 if ( p )
01553                         return p-1;     // don't truncate the '<'
01554                 return 0;
01555         }
01556 }
01557 
01558 #ifdef TIXML_USE_STL
01559 void TiXmlDeclaration::StreamIn( std::istream * in, TIXML_STRING * tag )
01560 {
01561         while ( in->good() )
01562         {
01563                 int c = in->get();
01564                 if ( c <= 0 )
01565                 {
01566                         TiXmlDocument* document = GetDocument();
01567                         if ( document )
01568                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01569                         return;
01570                 }
01571                 (*tag) += (char) c;
01572 
01573                 if ( c == '>' )
01574                 {
01575                         // All is well.
01576                         return;
01577                 }
01578         }
01579 }
01580 #endif
01581 
01582 const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding )
01583 {
01584         p = SkipWhiteSpace( p, _encoding );
01585         // Find the beginning, find the end, and look for
01586         // the stuff in-between.
01587         TiXmlDocument* document = GetDocument();
01588         if ( !p || !*p || !StringEqual( p, "<?xml", true, _encoding ) )
01589         {
01590                 if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding );
01591                 return 0;
01592         }
01593         if ( data )
01594         {
01595                 data->Stamp( p, _encoding );
01596                 location = data->Cursor();
01597         }
01598         p += 5;
01599 
01600         version = "";
01601         encoding = "";
01602         standalone = "";
01603 
01604         while ( p && *p )
01605         {
01606                 if ( *p == '>' )
01607                 {
01608                         ++p;
01609                         return p;
01610                 }
01611 
01612                 p = SkipWhiteSpace( p, _encoding );
01613                 if ( StringEqual( p, "version", true, _encoding ) )
01614                 {
01615                         TiXmlAttribute attrib;
01616                         p = attrib.Parse( p, data, _encoding );
01617                         version = attrib.Value();
01618                 }
01619                 else if ( StringEqual( p, "encoding", true, _encoding ) )
01620                 {
01621                         TiXmlAttribute attrib;
01622                         p = attrib.Parse( p, data, _encoding );
01623                         encoding = attrib.Value();
01624                 }
01625                 else if ( StringEqual( p, "standalone", true, _encoding ) )
01626                 {
01627                         TiXmlAttribute attrib;
01628                         p = attrib.Parse( p, data, _encoding );
01629                         standalone = attrib.Value();
01630                 }
01631                 else
01632                 {
01633                         // Read over whatever it is.
01634                         while( p && *p && *p != '>' && !IsWhiteSpace( *p ) )
01635                                 ++p;
01636                 }
01637         }
01638         return 0;
01639 }
01640 
01641 bool TiXmlText::Blank() const
01642 {
01643         for ( unsigned i=0; i<value.length(); i++ )
01644                 if ( !IsWhiteSpace( value[i] ) )
01645                         return false;
01646         return true;
01647 }
01648