CMS 3D CMS Logo

/afs/cern.ch/work/a/aaltunda/public/www/CMSSW_6_2_7/src/FWCore/Utilities/src/tinyxmlparser.cc

Go to the documentation of this file.
00001 /*
00002 www.sourceforge.net/projects/tinyxml
00003 Original code (2.0 and earlier )copyright (c) 2000-2002 Lee Thomason (www.grinninglizard.com)
00004 
00005 This software is provided 'as-is', without any express or implied
00006 warranty. In no event will the authors be held liable for any
00007 damages arising from the use of this software.
00008 
00009 Permission is granted to anyone to use this software for any
00010 purpose, including commercial applications, and to alter it and
00011 redistribute it freely, subject to the following restrictions:
00012 
00013 1. The origin of this software must not be misrepresented; you must
00014 not claim that you wrote the original software. If you use this
00015 software in a product, an acknowledgment in the product documentation
00016 would be appreciated but is not required.
00017 
00018 2. Altered source versions must be plainly marked as such, and
00019 must not be misrepresented as being the original software.
00020 
00021 3. This notice may not be removed or altered from any source
00022 distribution.
00023 */
00024 
00025 /*
00026  * THIS FILE WAS ALTERED BY Eric Vaandering, 25 August 2009.
00027  *
00028  * THIS FILE WAS ALTERED BY Bill Tanenbaum, 25 September 2011
00029  * to fix Coverity error (use after free).
00030 
00031  * THIS FILE WAS ALTERED BY Bill Tanenbaum, 17 May 2012
00032  * to fix bugus uninitialized variable error reported by clang static analyzer.
00033  * The error was bogus, but trivial to eliminate.  All three places
00034  * where a variable was declared without being initialized were fixed.
00035  */
00036 #define TIXML_USE_STL
00037 #include <ctype.h>
00038 #include <stddef.h>
00039 
00040 #include "FWCore/Utilities/interface/tinyxml.h"
00041 
00042 //#define DEBUG_PARSER
00043 #if defined( DEBUG_PARSER )
00044 #       if defined( DEBUG ) && defined( _MSC_VER )
00045 #               include <windows.h>
00046 #               define TIXML_LOG OutputDebugString
00047 #       else
00048 #               define TIXML_LOG printf
00049 #       endif
00050 #endif
00051 
00052 // Note tha "PutString" hardcodes the same list. This
00053 // is less flexible than it appears. Changing the entries
00054 // or order will break putstring.
00055 TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] =
00056 {
00057         { "&amp;",  5, '&' },
00058         { "&lt;",   4, '<' },
00059         { "&gt;",   4, '>' },
00060         { "&quot;", 6, '\"' },
00061         { "&apos;", 6, '\'' }
00062 };
00063 
00064 // Bunch of unicode info at:
00065 //              http://www.unicode.org/faq/utf_bom.html
00066 // Including the basic of this table, which determines the #bytes in the
00067 // sequence from the lead byte. 1 placed for invalid sequences --
00068 // although the result will be junk, pass it through as much as possible.
00069 // Beware of the non-characters in UTF-8:
00070 //                              ef bb bf (Microsoft "lead bytes")
00071 //                              ef bf be
00072 //                              ef bf bf
00073 
00074 const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
00075 const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
00076 const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
00077 
00078 const int TiXmlBase::utf8ByteTable[256] =
00079 {
00080         //      0       1       2       3       4       5       6       7       8       9       a       b       c       d       e       f
00081                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x00
00082                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x10
00083                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x20
00084                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x30
00085                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x40
00086                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x50
00087                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x60
00088                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x70 End of ASCII range
00089                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x80 0x80 to 0xc1 invalid
00090                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x90
00091                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0xa0
00092                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0xb0
00093                 1,      1,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      // 0xc0 0xc2 to 0xdf 2 byte
00094                 2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      // 0xd0
00095                 3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      // 0xe0 0xe0 to 0xef 3 byte
00096                 4,      4,      4,      4,      4,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1       // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
00097 };
00098 
00099 
00100 void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
00101 {
00102         const unsigned long BYTE_MASK = 0xBF;
00103         const unsigned long BYTE_MARK = 0x80;
00104         const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
00105 
00106         if (input < 0x80)
00107                 *length = 1;
00108         else if ( input < 0x800 )
00109                 *length = 2;
00110         else if ( input < 0x10000 )
00111                 *length = 3;
00112         else if ( input < 0x200000 )
00113                 *length = 4;
00114         else
00115                 { *length = 0; return; }        // This code won't covert this correctly anyway.
00116 
00117         output += *length;
00118 
00119         // Scary scary fall throughs.
00120         switch (*length)
00121         {
00122                 case 4:
00123                         --output;
00124                         *output = (char)((input | BYTE_MARK) & BYTE_MASK);
00125                         input >>= 6;
00126                 case 3:
00127                         --output;
00128                         *output = (char)((input | BYTE_MARK) & BYTE_MASK);
00129                         input >>= 6;
00130                 case 2:
00131                         --output;
00132                         *output = (char)((input | BYTE_MARK) & BYTE_MASK);
00133                         input >>= 6;
00134                 case 1:
00135                         --output;
00136                         *output = (char)(input | FIRST_BYTE_MARK[*length]);
00137         }
00138 }
00139 
00140 
00141 /*static*/ int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
00142 {
00143         // This will only work for low-ascii, everything else is assumed to be a valid
00144         // letter. I'm not sure this is the best approach, but it is quite tricky trying
00145         // to figure out alhabetical vs. not across encoding. So take a very
00146         // conservative approach.
00147 
00148 //      if ( encoding == TIXML_ENCODING_UTF8 )
00149 //      {
00150                 if ( anyByte < 127 )
00151                         return isalpha( anyByte );
00152                 else
00153                         return 1;       // What else to do? The unicode set is huge...get the english ones right.
00154 //      }
00155 //      else
00156 //      {
00157 //              return isalpha( anyByte );
00158 //      }
00159 }
00160 
00161 
00162 /*static*/ int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
00163 {
00164         // This will only work for low-ascii, everything else is assumed to be a valid
00165         // letter. I'm not sure this is the best approach, but it is quite tricky trying
00166         // to figure out alhabetical vs. not across encoding. So take a very
00167         // conservative approach.
00168 
00169 //      if ( encoding == TIXML_ENCODING_UTF8 )
00170 //      {
00171                 if ( anyByte < 127 )
00172                         return isalnum( anyByte );
00173                 else
00174                         return 1;       // What else to do? The unicode set is huge...get the english ones right.
00175 //      }
00176 //      else
00177 //      {
00178 //              return isalnum( anyByte );
00179 //      }
00180 }
00181 
00182 
00183 class TiXmlParsingData
00184 {
00185         friend class TiXmlDocument;
00186   public:
00187         void Stamp( const char* now, TiXmlEncoding encoding );
00188 
00189         const TiXmlCursor& Cursor()     { return cursor; }
00190 
00191   private:
00192         // Only used by the document!
00193         TiXmlParsingData( const char* start, int _tabsize, int row, int col )
00194         {
00195                 assert( start );
00196                 stamp = start;
00197                 tabsize = _tabsize;
00198                 cursor.row = row;
00199                 cursor.col = col;
00200         }
00201 
00202         TiXmlCursor             cursor;
00203         const char*             stamp;
00204         int                             tabsize;
00205 };
00206 
00207 
00208 void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding )
00209 {
00210         assert( now );
00211 
00212         // Do nothing if the tabsize is 0.
00213         if ( tabsize < 1 )
00214         {
00215                 return;
00216         }
00217 
00218         // Get the current row, column.
00219         int row = cursor.row;
00220         int col = cursor.col;
00221         const char* p = stamp;
00222         assert( p );
00223 
00224         while ( p < now )
00225         {
00226                 // Treat p as unsigned, so we have a happy compiler.
00227                 const unsigned char* pU = (const unsigned char*)p;
00228 
00229                 // Code contributed by Fletcher Dunn: (modified by lee)
00230                 switch (*pU) {
00231                         case 0:
00232                                 // We *should* never get here, but in case we do, don't
00233                                 // advance past the terminating null character, ever
00234                                 return;
00235 
00236                         case '\r':
00237                                 // bump down to the next line
00238                                 ++row;
00239                                 col = 0;
00240                                 // Eat the character
00241                                 ++p;
00242 
00243                                 // Check for \r\n sequence, and treat this as a single character
00244                                 if (*p == '\n') {
00245                                         ++p;
00246                                 }
00247                                 break;
00248 
00249                         case '\n':
00250                                 // bump down to the next line
00251                                 ++row;
00252                                 col = 0;
00253 
00254                                 // Eat the character
00255                                 ++p;
00256 
00257                                 // Check for \n\r sequence, and treat this as a single
00258                                 // character.  (Yes, this bizarre thing does occur still
00259                                 // on some arcane platforms...)
00260                                 if (*p == '\r') {
00261                                         ++p;
00262                                 }
00263                                 break;
00264 
00265                         case '\t':
00266                                 // Eat the character
00267                                 ++p;
00268 
00269                                 // Skip to next tab stop
00270                                 col = (col / tabsize + 1) * tabsize;
00271                                 break;
00272 
00273                         case TIXML_UTF_LEAD_0:
00274                                 if ( encoding == TIXML_ENCODING_UTF8 )
00275                                 {
00276                                         if ( *(p+1) && *(p+2) )
00277                                         {
00278                                                 // In these cases, don't advance the column. These are
00279                                                 // 0-width spaces.
00280                                                 if ( *(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2 )
00281                                                         p += 3;
00282                                                 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU )
00283                                                         p += 3;
00284                                                 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU )
00285                                                         p += 3;
00286                                                 else
00287                                                         { p +=3; ++col; }       // A normal character.
00288                                         }
00289                                 }
00290                                 else
00291                                 {
00292                                         ++p;
00293                                         ++col;
00294                                 }
00295                                 break;
00296 
00297                         default:
00298                                 if ( encoding == TIXML_ENCODING_UTF8 )
00299                                 {
00300                                         // Eat the 1 to 4 byte utf8 character.
00301                                         int step = TiXmlBase::utf8ByteTable[*((const unsigned char*)p)];
00302                                         if ( step == 0 )
00303                                                 step = 1;               // Error case from bad encoding, but handle gracefully.
00304                                         p += step;
00305 
00306                                         // Just advance one column, of course.
00307                                         ++col;
00308                                 }
00309                                 else
00310                                 {
00311                                         ++p;
00312                                         ++col;
00313                                 }
00314                                 break;
00315                 }
00316         }
00317         cursor.row = row;
00318         cursor.col = col;
00319         assert( cursor.row >= -1 );
00320         assert( cursor.col >= -1 );
00321         stamp = p;
00322         assert( stamp );
00323 }
00324 
00325 
00326 const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )
00327 {
00328         if ( !p || !*p )
00329         {
00330                 return 0;
00331         }
00332         if ( encoding == TIXML_ENCODING_UTF8 )
00333         {
00334                 while ( *p )
00335                 {
00336                         const unsigned char* pU = (const unsigned char*)p;
00337 
00338                         // Skip the stupid Microsoft UTF-8 Byte order marks
00339                         if (    *(pU+0)==TIXML_UTF_LEAD_0
00340                                  && *(pU+1)==TIXML_UTF_LEAD_1
00341                                  && *(pU+2)==TIXML_UTF_LEAD_2 )
00342                         {
00343                                 p += 3;
00344                                 continue;
00345                         }
00346                         else if(*(pU+0)==TIXML_UTF_LEAD_0
00347                                  && *(pU+1)==0xbfU
00348                                  && *(pU+2)==0xbeU )
00349                         {
00350                                 p += 3;
00351                                 continue;
00352                         }
00353                         else if(*(pU+0)==TIXML_UTF_LEAD_0
00354                                  && *(pU+1)==0xbfU
00355                                  && *(pU+2)==0xbfU )
00356                         {
00357                                 p += 3;
00358                                 continue;
00359                         }
00360 
00361                         if ( IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' )            // Still using old rules for white space.
00362                                 ++p;
00363                         else
00364                                 break;
00365                 }
00366         }
00367         else
00368         {
00369                 // Code altered by user.  An extra pair of () was added to eliminate a compiler warning.
00370                 while ( ( *p && IsWhiteSpace( *p ) ) || *p == '\n' || *p =='\r' )
00371                         ++p;
00372         }
00373 
00374         return p;
00375 }
00376 
00377 #ifdef TIXML_USE_STL
00378 /*static*/ bool TiXmlBase::StreamWhiteSpace( std::istream * in, TIXML_STRING * tag )
00379 {
00380         for( ;; )
00381         {
00382                 if ( !in->good() ) return false;
00383 
00384                 int c = in->peek();
00385                 // At this scope, we can't get to a document. So fail silently.
00386                 if ( !IsWhiteSpace( c ) || c <= 0 )
00387                         return true;
00388 
00389                 *tag += (char) in->get();
00390         }
00391 }
00392 
00393 /*static*/ bool TiXmlBase::StreamTo( std::istream * in, int character, TIXML_STRING * tag )
00394 {
00395         //assert( character > 0 && character < 128 );   // else it won't work in utf-8
00396         while ( in->good() )
00397         {
00398                 int c = in->peek();
00399                 if ( c == character )
00400                         return true;
00401                 if ( c <= 0 )           // Silent failure: can't get document at this scope
00402                         return false;
00403 
00404                 in->get();
00405                 *tag += (char) c;
00406         }
00407         return false;
00408 }
00409 #endif
00410 
00411 // One of TinyXML's more performance demanding functions. Try to keep the memory overhead down. The
00412 // "assign" optimization removes over 10% of the execution time.
00413 //
00414 const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding )
00415 {
00416         // Oddly, not supported on some comilers,
00417         //name->clear();
00418         // So use this:
00419         *name = "";
00420         assert( p );
00421 
00422         // Names start with letters or underscores.
00423         // Of course, in unicode, tinyxml has no idea what a letter *is*. The
00424         // algorithm is generous.
00425         //
00426         // After that, they can be letters, underscores, numbers,
00427         // hyphens, or colons. (Colons are valid ony for namespaces,
00428         // but tinyxml can't tell namespaces from names.)
00429         if (    p && *p
00430                  && ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) )
00431         {
00432                 const char* start = p;
00433                 while(          p && *p
00434                                 &&      (               IsAlphaNum( (unsigned char ) *p, encoding )
00435                                                  || *p == '_'
00436                                                  || *p == '-'
00437                                                  || *p == '.'
00438                                                  || *p == ':' ) )
00439                 {
00440                         //(*name) += *p; // expensive
00441                         ++p;
00442                 }
00443                 if ( p-start > 0 ) {
00444                         name->assign( start, p-start );
00445                 }
00446                 return p;
00447         }
00448         return 0;
00449 }
00450 
00451 const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding )
00452 {
00453         // Presume an entity, and pull it out.
00454     TIXML_STRING ent;
00455         *length = 0;
00456 
00457         if ( *(p+1) && *(p+1) == '#' && *(p+2) )
00458         {
00459                 unsigned long ucs = 0;
00460                 ptrdiff_t delta = 0;
00461                 unsigned mult = 1;
00462 
00463                 if ( *(p+2) == 'x' )
00464                 {
00465                         // Hexadecimal.
00466                         if ( !*(p+3) ) return 0;
00467 
00468                         const char* q = p+3;
00469                         q = strchr( q, ';' );
00470 
00471                         if ( !q || !*q ) return 0;
00472 
00473                         delta = q-p;
00474                         --q;
00475 
00476                         while ( *q != 'x' )
00477                         {
00478                                 if ( *q >= '0' && *q <= '9' )
00479                                         ucs += mult * (*q - '0');
00480                                 else if ( *q >= 'a' && *q <= 'f' )
00481                                         ucs += mult * (*q - 'a' + 10);
00482                                 else if ( *q >= 'A' && *q <= 'F' )
00483                                         ucs += mult * (*q - 'A' + 10 );
00484                                 else
00485                                         return 0;
00486                                 mult *= 16;
00487                                 --q;
00488                         }
00489                 }
00490                 else
00491                 {
00492                         // Decimal.
00493                         if ( !*(p+2) ) return 0;
00494 
00495                         const char* q = p+2;
00496                         q = strchr( q, ';' );
00497 
00498                         if ( !q || !*q ) return 0;
00499 
00500                         delta = q-p;
00501                         --q;
00502 
00503                         while ( *q != '#' )
00504                         {
00505                                 if ( *q >= '0' && *q <= '9' )
00506                                         ucs += mult * (*q - '0');
00507                                 else
00508                                         return 0;
00509                                 mult *= 10;
00510                                 --q;
00511                         }
00512                 }
00513                 if ( encoding == TIXML_ENCODING_UTF8 )
00514                 {
00515                         // convert the UCS to UTF-8
00516                         ConvertUTF32ToUTF8( ucs, value, length );
00517                 }
00518                 else
00519                 {
00520                         *value = (char)ucs;
00521                         *length = 1;
00522                 }
00523                 return p + delta + 1;
00524         }
00525 
00526         // Now try to match it.
00527         for( int i=0; i<NUM_ENTITY; ++i )
00528         {
00529                 if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )
00530                 {
00531                         assert( strlen( entity[i].str ) == entity[i].strLength );
00532                         *value = entity[i].chr;
00533                         *length = 1;
00534                         return ( p + entity[i].strLength );
00535                 }
00536         }
00537 
00538         // So it wasn't an entity, its unrecognized, or something like that.
00539         *value = *p;    // Don't put back the last one, since we return it!
00540         //*length = 1;  // Leave unrecognized entities - this doesn't really work.
00541                                         // Just writes strange XML.
00542         return p+1;
00543 }
00544 
00545 
00546 bool TiXmlBase::StringEqual( const char* p,
00547                                                          const char* tag,
00548                                                          bool ignoreCase,
00549                                                          TiXmlEncoding encoding )
00550 {
00551         assert( p );
00552         assert( tag );
00553         if ( !p || !*p )
00554         {
00555                 assert( 0 );
00556                 return false;
00557         }
00558 
00559         const char* q = p;
00560 
00561         if ( ignoreCase )
00562         {
00563                 while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) )
00564                 {
00565                         ++q;
00566                         ++tag;
00567                 }
00568 
00569                 if ( *tag == 0 )
00570                         return true;
00571         }
00572         else
00573         {
00574                 while ( *q && *tag && *q == *tag )
00575                 {
00576                         ++q;
00577                         ++tag;
00578                 }
00579 
00580                 if ( *tag == 0 )                // Have we found the end of the tag, and everything equal?
00581                         return true;
00582         }
00583         return false;
00584 }
00585 
00586 const char* TiXmlBase::ReadText(        const char* p,
00587                                                                         TIXML_STRING * text,
00588                                                                         bool trimWhiteSpace,
00589                                                                         const char* endTag,
00590                                                                         bool caseInsensitive,
00591                                                                         TiXmlEncoding encoding )
00592 {
00593     *text = "";
00594         if (    !trimWhiteSpace                 // certain tags always keep whitespace
00595                  || !condenseWhiteSpace )       // if true, whitespace is always kept
00596         {
00597                 // Keep all the white space.
00598                 while (    p && *p
00599                                 && !StringEqual( p, endTag, caseInsensitive, encoding )
00600                           )
00601                 {
00602                         int len = 0;
00603                         char cArr[4] = { 0, 0, 0, 0 };
00604                         p = GetChar( p, cArr, &len, encoding );
00605                         text->append( cArr, len );
00606                 }
00607         }
00608         else
00609         {
00610                 bool whitespace = false;
00611 
00612                 // Remove leading white space:
00613                 p = SkipWhiteSpace( p, encoding );
00614                 while (    p && *p
00615                                 && !StringEqual( p, endTag, caseInsensitive, encoding ) )
00616                 {
00617                         if ( *p == '\r' || *p == '\n' )
00618                         {
00619                                 whitespace = true;
00620                                 ++p;
00621                         }
00622                         else if ( IsWhiteSpace( *p ) )
00623                         {
00624                                 whitespace = true;
00625                                 ++p;
00626                         }
00627                         else
00628                         {
00629                                 // If we've found whitespace, add it before the
00630                                 // new character. Any whitespace just becomes a space.
00631                                 if ( whitespace )
00632                                 {
00633                                         (*text) += ' ';
00634                                         whitespace = false;
00635                                 }
00636                                 int len = 0;
00637                                 char cArr[4] = { 0, 0, 0, 0 };
00638                                 p = GetChar( p, cArr, &len, encoding );
00639                                 if ( len == 1 )
00640                                         (*text) += cArr[0];     // more efficient
00641                                 else
00642                                         text->append( cArr, len );
00643                         }
00644                 }
00645         }
00646         if ( p )
00647                 p += strlen( endTag );
00648         return p;
00649 }
00650 
00651 #ifdef TIXML_USE_STL
00652 
00653 void TiXmlDocument::StreamIn( std::istream * in, TIXML_STRING * tag )
00654 {
00655         // The basic issue with a document is that we don't know what we're
00656         // streaming. Read something presumed to be a tag (and hope), then
00657         // identify it, and call the appropriate stream method on the tag.
00658         //
00659         // This "pre-streaming" will never read the closing ">" so the
00660         // sub-tag can orient itself.
00661 
00662         if ( !StreamTo( in, '<', tag ) )
00663         {
00664                 SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00665                 return;
00666         }
00667 
00668         while ( in->good() )
00669         {
00670                 int tagIndex = (int) tag->length();
00671                 while ( in->good() && in->peek() != '>' )
00672                 {
00673                         int c = in->get();
00674                         if ( c <= 0 )
00675                         {
00676                                 SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00677                                 break;
00678                         }
00679                         (*tag) += (char) c;
00680                 }
00681 
00682                 if ( in->good() )
00683                 {
00684                         // We now have something we presume to be a node of
00685                         // some sort. Identify it, and call the node to
00686                         // continue streaming.
00687                         TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING );
00688 
00689                         if ( node )
00690                         {
00691                                 node->StreamIn( in, tag );
00692                                 bool isElement = node->ToElement() != 0;
00693                                 delete node;
00694                                 node = 0;
00695 
00696                                 // If this is the root element, we're done. Parsing will be
00697                                 // done by the >> operator.
00698                                 if ( isElement )
00699                                 {
00700                                         return;
00701                                 }
00702                         }
00703                         else
00704                         {
00705                                 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
00706                                 return;
00707                         }
00708                 }
00709         }
00710         // We should have returned sooner.
00711         SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
00712 }
00713 
00714 #endif
00715 
00716 const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding )
00717 {
00718         ClearError();
00719 
00720         // Parse away, at the document level. Since a document
00721         // contains nothing but other tags, most of what happens
00722         // here is skipping white space.
00723         if ( !p || !*p )
00724         {
00725                 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00726                 return 0;
00727         }
00728 
00729         // Note that, for a document, this needs to come
00730         // before the while space skip, so that parsing
00731         // starts from the pointer we are given.
00732         location.Clear();
00733         if ( prevData )
00734         {
00735                 location.row = prevData->cursor.row;
00736                 location.col = prevData->cursor.col;
00737         }
00738         else
00739         {
00740                 location.row = 0;
00741                 location.col = 0;
00742         }
00743         TiXmlParsingData data( p, TabSize(), location.row, location.col );
00744         location = data.Cursor();
00745 
00746         if ( encoding == TIXML_ENCODING_UNKNOWN )
00747         {
00748                 // Check for the Microsoft UTF-8 lead bytes.
00749                 const unsigned char* pU = (const unsigned char*)p;
00750                 if (    *(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0
00751                          && *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1
00752                          && *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 )
00753                 {
00754                         encoding = TIXML_ENCODING_UTF8;
00755                         useMicrosoftBOM = true;
00756                 }
00757         }
00758 
00759     p = SkipWhiteSpace( p, encoding );
00760         if ( !p )
00761         {
00762                 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00763                 return 0;
00764         }
00765 
00766         while ( p && *p )
00767         {
00768                 TiXmlNode* node = Identify( p, encoding );
00769                 if ( node )
00770                 {
00771                         p = node->Parse( p, &data, encoding );
00772                         // LinkEndChild just returns the input pointer unmodified,
00773                         // except if node is deleted, in which case it returns 0.
00774                         node = LinkEndChild( node );
00775                 }
00776                 else
00777                 {
00778                         break;
00779                 }
00780 
00781                 // Did we get encoding info?
00782                 if (    encoding == TIXML_ENCODING_UNKNOWN
00783                          && node && node->ToDeclaration() )
00784                 {
00785                         TiXmlDeclaration* dec = node->ToDeclaration();
00786                         const char* enc = dec->Encoding();
00787                         assert( enc );
00788 
00789                         if ( *enc == 0 )
00790                                 encoding = TIXML_ENCODING_UTF8;
00791                         else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
00792                                 encoding = TIXML_ENCODING_UTF8;
00793                         else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
00794                                 encoding = TIXML_ENCODING_UTF8; // incorrect, but be nice
00795                         else
00796                                 encoding = TIXML_ENCODING_LEGACY;
00797                 }
00798 
00799                 p = SkipWhiteSpace( p, encoding );
00800         }
00801 
00802         // Was this empty?
00803         if ( !firstChild ) {
00804                 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );
00805                 return 0;
00806         }
00807 
00808         // All is well.
00809         return p;
00810 }
00811 
00812 void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding )
00813 {
00814         // The first error in a chain is more accurate - don't set again!
00815         if ( error )
00816                 return;
00817 
00818         assert( err > 0 && err < TIXML_ERROR_STRING_COUNT );
00819         error   = true;
00820         errorId = err;
00821         errorDesc = errorString[ errorId ];
00822 
00823         errorLocation.Clear();
00824         if ( pError && data )
00825         {
00826                 data->Stamp( pError, encoding );
00827                 errorLocation = data->Cursor();
00828         }
00829 }
00830 
00831 
00832 TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding )
00833 {
00834         TiXmlNode* returnNode = 0;
00835 
00836         p = SkipWhiteSpace( p, encoding );
00837         if( !p || !*p || *p != '<' )
00838         {
00839                 return 0;
00840         }
00841 
00842         TiXmlDocument* doc = GetDocument();
00843         p = SkipWhiteSpace( p, encoding );
00844 
00845         if ( !p || !*p )
00846         {
00847                 return 0;
00848         }
00849 
00850         // What is this thing?
00851         // - Elements start with a letter or underscore, but xml is reserved.
00852         // - Comments: <!--
00853         // - Decleration: <?xml
00854         // - Everthing else is unknown to tinyxml.
00855         //
00856 
00857         const char* xmlHeader = { "<?xml" };
00858         const char* commentHeader = { "<!--" };
00859         const char* dtdHeader = { "<!" };
00860         const char* cdataHeader = { "<![CDATA[" };
00861 
00862         if ( StringEqual( p, xmlHeader, true, encoding ) )
00863         {
00864                 #ifdef DEBUG_PARSER
00865                         TIXML_LOG( "XML parsing Declaration\n" );
00866                 #endif
00867                 returnNode = new TiXmlDeclaration();
00868         }
00869         else if ( StringEqual( p, commentHeader, false, encoding ) )
00870         {
00871                 #ifdef DEBUG_PARSER
00872                         TIXML_LOG( "XML parsing Comment\n" );
00873                 #endif
00874                 returnNode = new TiXmlComment();
00875         }
00876         else if ( StringEqual( p, cdataHeader, false, encoding ) )
00877         {
00878                 #ifdef DEBUG_PARSER
00879                         TIXML_LOG( "XML parsing CDATA\n" );
00880                 #endif
00881                 TiXmlText* text = new TiXmlText( "" );
00882                 text->SetCDATA( true );
00883                 returnNode = text;
00884         }
00885         else if ( StringEqual( p, dtdHeader, false, encoding ) )
00886         {
00887                 #ifdef DEBUG_PARSER
00888                         TIXML_LOG( "XML parsing Unknown(1)\n" );
00889                 #endif
00890                 returnNode = new TiXmlUnknown();
00891         }
00892         else if (    IsAlpha( *(p+1), encoding )
00893                           || *(p+1) == '_' )
00894         {
00895                 #ifdef DEBUG_PARSER
00896                         TIXML_LOG( "XML parsing Element\n" );
00897                 #endif
00898                 returnNode = new TiXmlElement( "" );
00899         }
00900         else
00901         {
00902                 #ifdef DEBUG_PARSER
00903                         TIXML_LOG( "XML parsing Unknown(2)\n" );
00904                 #endif
00905                 returnNode = new TiXmlUnknown();
00906         }
00907 
00908         if ( returnNode )
00909         {
00910                 // Set the parent, so it can report errors
00911                 returnNode->parent = this;
00912         }
00913         else
00914         {
00915                 if ( doc )
00916                         doc->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, TIXML_ENCODING_UNKNOWN );
00917         }
00918         return returnNode;
00919 }
00920 
00921 #ifdef TIXML_USE_STL
00922 
00923 void TiXmlElement::StreamIn (std::istream * in, TIXML_STRING * tag)
00924 {
00925         // We're called with some amount of pre-parsing. That is, some of "this"
00926         // element is in "tag". Go ahead and stream to the closing ">"
00927         while( in->good() )
00928         {
00929                 int c = in->get();
00930                 if ( c <= 0 )
00931                 {
00932                         TiXmlDocument* document = GetDocument();
00933                         if ( document )
00934                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00935                         return;
00936                 }
00937                 (*tag) += (char) c ;
00938 
00939                 if ( c == '>' )
00940                         break;
00941         }
00942 
00943         if ( tag->length() < 3 ) return;
00944 
00945         // Okay...if we are a "/>" tag, then we're done. We've read a complete tag.
00946         // If not, identify and stream.
00947 
00948         if (    tag->at( tag->length() - 1 ) == '>'
00949                  && tag->at( tag->length() - 2 ) == '/' )
00950         {
00951                 // All good!
00952                 return;
00953         }
00954         else if ( tag->at( tag->length() - 1 ) == '>' )
00955         {
00956                 // There is more. Could be:
00957                 //              text
00958                 //              cdata text (which looks like another node)
00959                 //              closing tag
00960                 //              another node.
00961                 for ( ;; )
00962                 {
00963                         StreamWhiteSpace( in, tag );
00964 
00965                         // Do we have text?
00966                         if ( in->good() && in->peek() != '<' )
00967                         {
00968                                 // Yep, text.
00969                                 TiXmlText text( "" );
00970                                 text.StreamIn( in, tag );
00971 
00972                                 // What follows text is a closing tag or another node.
00973                                 // Go around again and figure it out.
00974                                 continue;
00975                         }
00976 
00977                         // We now have either a closing tag...or another node.
00978                         // We should be at a "<", regardless.
00979                         if ( !in->good() ) return;
00980                         assert( in->peek() == '<' );
00981                         int tagIndex = (int) tag->length();
00982 
00983                         bool closingTag = false;
00984                         bool firstCharFound = false;
00985 
00986                         for( ;; )
00987                         {
00988                                 if ( !in->good() )
00989                                         return;
00990 
00991                                 int c = in->peek();
00992                                 if ( c <= 0 )
00993                                 {
00994                                         TiXmlDocument* document = GetDocument();
00995                                         if ( document )
00996                                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00997                                         return;
00998                                 }
00999 
01000                                 if ( c == '>' )
01001                                         break;
01002 
01003                                 *tag += (char) c;
01004                                 in->get();
01005 
01006                                 // Early out if we find the CDATA id.
01007                                 if ( c == '[' && tag->size() >= 9 )
01008                                 {
01009                                         size_t len = tag->size();
01010                                         const char* start = tag->c_str() + len - 9;
01011                                         if ( strcmp( start, "<![CDATA[" ) == 0 ) {
01012                                                 assert( !closingTag );
01013                                                 break;
01014                                         }
01015                                 }
01016 
01017                                 if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )
01018                                 {
01019                                         firstCharFound = true;
01020                                         if ( c == '/' )
01021                                                 closingTag = true;
01022                                 }
01023                         }
01024                         // If it was a closing tag, then read in the closing '>' to clean up the input stream.
01025                         // If it was not, the streaming will be done by the tag.
01026                         if ( closingTag )
01027                         {
01028                                 if ( !in->good() )
01029                                         return;
01030 
01031                                 int c = in->get();
01032                                 if ( c <= 0 )
01033                                 {
01034                                         TiXmlDocument* document = GetDocument();
01035                                         if ( document )
01036                                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01037                                         return;
01038                                 }
01039                                 assert( c == '>' );
01040                                 *tag += (char) c;
01041 
01042                                 // We are done, once we've found our closing tag.
01043                                 return;
01044                         }
01045                         else
01046                         {
01047                                 // If not a closing tag, id it, and stream.
01048                                 const char* tagloc = tag->c_str() + tagIndex;
01049                                 TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING );
01050                                 if ( !node )
01051                                         return;
01052                                 node->StreamIn( in, tag );
01053                                 delete node;
01054                                 node = 0;
01055 
01056                                 // No return: go around from the beginning: text, closing tag, or node.
01057                         }
01058                 }
01059         }
01060 }
01061 #endif
01062 
01063 const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01064 {
01065         p = SkipWhiteSpace( p, encoding );
01066         TiXmlDocument* document = GetDocument();
01067 
01068         if ( !p || !*p )
01069         {
01070                 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding );
01071                 return 0;
01072         }
01073 
01074         if ( data )
01075         {
01076                 data->Stamp( p, encoding );
01077                 location = data->Cursor();
01078         }
01079 
01080         if ( *p != '<' )
01081         {
01082                 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding );
01083                 return 0;
01084         }
01085 
01086         p = SkipWhiteSpace( p+1, encoding );
01087 
01088         // Read the name.
01089         const char* pErr = p;
01090 
01091     p = ReadName( p, &value, encoding );
01092         if ( !p || !*p )
01093         {
01094                 if ( document ) document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding );
01095                 return 0;
01096         }
01097 
01098     TIXML_STRING endTag ("</");
01099         endTag += value;
01100         endTag += ">";
01101 
01102         // Check for and read attributes. Also look for an empty
01103         // tag or an end tag.
01104         while ( p && *p )
01105         {
01106                 pErr = p;
01107                 p = SkipWhiteSpace( p, encoding );
01108                 if ( !p || !*p )
01109                 {
01110                         if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
01111                         return 0;
01112                 }
01113                 if ( *p == '/' )
01114                 {
01115                         ++p;
01116                         // Empty tag.
01117                         if ( *p  != '>' )
01118                         {
01119                                 if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );
01120                                 return 0;
01121                         }
01122                         return (p+1);
01123                 }
01124                 else if ( *p == '>' )
01125                 {
01126                         // Done with attributes (if there were any.)
01127                         // Read the value -- which can include other
01128                         // elements -- read the end tag, and return.
01129                         ++p;
01130                         p = ReadValue( p, data, encoding );             // Note this is an Element method, and will set the error if one happens.
01131                         if ( !p || !*p ) {
01132                                 // We were looking for the end tag, but found nothing.
01133                                 // Fix for [ 1663758 ] Failure to report error on bad XML
01134                                 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
01135                                 return 0;
01136                         }
01137 
01138                         // We should find the end tag now
01139                         if ( StringEqual( p, endTag.c_str(), false, encoding ) )
01140                         {
01141                                 p += endTag.length();
01142                                 return p;
01143                         }
01144                         else
01145                         {
01146                                 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
01147                                 return 0;
01148                         }
01149                 }
01150                 else
01151                 {
01152                         // Try to read an attribute:
01153                         TiXmlAttribute* attrib = new TiXmlAttribute();
01154                         if ( !attrib )
01155                         {
01156                                 if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, pErr, data, encoding );
01157                                 return 0;
01158                         }
01159 
01160                         attrib->SetDocument( document );
01161                         pErr = p;
01162                         p = attrib->Parse( p, data, encoding );
01163 
01164                         if ( !p || !*p )
01165                         {
01166                                 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
01167                                 delete attrib;
01168                                 return 0;
01169                         }
01170 
01171                         // Handle the strange case of double attributes:
01172                         #ifdef TIXML_USE_STL
01173                         TiXmlAttribute* node = attributeSet.Find( attrib->NameTStr() );
01174                         #else
01175                         TiXmlAttribute* node = attributeSet.Find( attrib->Name() );
01176                         #endif
01177                         if ( node )
01178                         {
01179                                 node->SetValue( attrib->Value() );
01180                                 delete attrib;
01181                                 return 0;
01182                         }
01183 
01184                         attributeSet.Add( attrib );
01185                 }
01186         }
01187         return p;
01188 }
01189 
01190 
01191 const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01192 {
01193         TiXmlDocument* document = GetDocument();
01194 
01195         // Read in text and elements in any order.
01196         const char* pWithWhiteSpace = p;
01197         p = SkipWhiteSpace( p, encoding );
01198 
01199         while ( p && *p )
01200         {
01201                 if ( *p != '<' )
01202                 {
01203                         // Take what we have, make a text element.
01204                         TiXmlText* textNode = new TiXmlText( "" );
01205 
01206                         if ( !textNode )
01207                         {
01208                                 if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, encoding );
01209                                     return 0;
01210                         }
01211 
01212                         if ( TiXmlBase::IsWhiteSpaceCondensed() )
01213                         {
01214                                 p = textNode->Parse( p, data, encoding );
01215                         }
01216                         else
01217                         {
01218                                 // Special case: we want to keep the white space
01219                                 // so that leading spaces aren't removed.
01220                                 p = textNode->Parse( pWithWhiteSpace, data, encoding );
01221                         }
01222 
01223                         if ( !textNode->Blank() )
01224                                 LinkEndChild( textNode );
01225                         else
01226                                 delete textNode;
01227                 }
01228                 else
01229                 {
01230                         // We hit a '<'
01231                         // Have we hit a new element or an end tag? This could also be
01232                         // a TiXmlText in the "CDATA" style.
01233                         if ( StringEqual( p, "</", false, encoding ) )
01234                         {
01235                                 return p;
01236                         }
01237                         else
01238                         {
01239                                 TiXmlNode* node = Identify( p, encoding );
01240                                 if ( node )
01241                                 {
01242                                         p = node->Parse( p, data, encoding );
01243                                         LinkEndChild( node );
01244                                 }
01245                                 else
01246                                 {
01247                                         return 0;
01248                                 }
01249                         }
01250                 }
01251                 pWithWhiteSpace = p;
01252                 p = SkipWhiteSpace( p, encoding );
01253         }
01254 
01255         if ( !p )
01256         {
01257                 if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding );
01258         }
01259         return p;
01260 }
01261 
01262 
01263 #ifdef TIXML_USE_STL
01264 void TiXmlUnknown::StreamIn( std::istream * in, TIXML_STRING * tag )
01265 {
01266         while ( in->good() )
01267         {
01268                 int c = in->get();
01269                 if ( c <= 0 )
01270                 {
01271                         TiXmlDocument* document = GetDocument();
01272                         if ( document )
01273                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01274                         return;
01275                 }
01276                 (*tag) += (char) c;
01277 
01278                 if ( c == '>' )
01279                 {
01280                         // All is well.
01281                         return;
01282                 }
01283         }
01284 }
01285 #endif
01286 
01287 
01288 const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01289 {
01290         TiXmlDocument* document = GetDocument();
01291         p = SkipWhiteSpace( p, encoding );
01292 
01293         if ( data )
01294         {
01295                 data->Stamp( p, encoding );
01296                 location = data->Cursor();
01297         }
01298         if ( !p || !*p || *p != '<' )
01299         {
01300                 if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding );
01301                 return 0;
01302         }
01303         ++p;
01304     value = "";
01305 
01306         while ( p && *p && *p != '>' )
01307         {
01308                 value += *p;
01309                 ++p;
01310         }
01311 
01312         if ( !p )
01313         {
01314                 if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding );
01315         }
01316         if ( *p == '>' )
01317                 return p+1;
01318         return p;
01319 }
01320 
01321 #ifdef TIXML_USE_STL
01322 void TiXmlComment::StreamIn( std::istream * in, TIXML_STRING * tag )
01323 {
01324         while ( in->good() )
01325         {
01326                 int c = in->get();
01327                 if ( c <= 0 )
01328                 {
01329                         TiXmlDocument* document = GetDocument();
01330                         if ( document )
01331                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01332                         return;
01333                 }
01334 
01335                 (*tag) += (char) c;
01336 
01337                 if ( c == '>'
01338                          && tag->at( tag->length() - 2 ) == '-'
01339                          && tag->at( tag->length() - 3 ) == '-' )
01340                 {
01341                         // All is well.
01342                         return;
01343                 }
01344         }
01345 }
01346 #endif
01347 
01348 
01349 const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01350 {
01351         TiXmlDocument* document = GetDocument();
01352         value = "";
01353 
01354         p = SkipWhiteSpace( p, encoding );
01355 
01356         if ( data )
01357         {
01358                 data->Stamp( p, encoding );
01359                 location = data->Cursor();
01360         }
01361         const char* startTag = "<!--";
01362         const char* endTag   = "-->";
01363 
01364         if ( !StringEqual( p, startTag, false, encoding ) )
01365         {
01366                 document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding );
01367                 return 0;
01368         }
01369         p += strlen( startTag );
01370 
01371         // [ 1475201 ] TinyXML parses entities in comments
01372         // Oops - ReadText doesn't work, because we don't want to parse the entities.
01373         // p = ReadText( p, &value, false, endTag, false, encoding );
01374         //
01375         // from the XML spec:
01376         /*
01377          [Definition: Comments may appear anywhere in a document outside other markup; in addition,
01378                       they may appear within the document type declaration at places allowed by the grammar.
01379                                   They are not part of the document's character data; an XML processor MAY, but need not,
01380                                   make it possible for an application to retrieve the text of comments. For compatibility,
01381                                   the string "--" (double-hyphen) MUST NOT occur within comments.] Parameter entity
01382                                   references MUST NOT be recognized within comments.
01383 
01384                                   An example of a comment:
01385 
01386                                   <!-- declarations for <head> & <body> -->
01387         */
01388 
01389     value = "";
01390         // Keep all the white space.
01391         while ( p && *p && !StringEqual( p, endTag, false, encoding ) )
01392         {
01393                 value.append( p, 1 );
01394                 ++p;
01395         }
01396         if ( p )
01397                 p += strlen( endTag );
01398 
01399         return p;
01400 }
01401 
01402 
01403 const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01404 {
01405         p = SkipWhiteSpace( p, encoding );
01406         if ( !p || !*p ) return 0;
01407 
01408 //      int tabsize = 4;
01409 //      if ( document )
01410 //              tabsize = document->TabSize();
01411 
01412         if ( data )
01413         {
01414                 data->Stamp( p, encoding );
01415                 location = data->Cursor();
01416         }
01417         // Read the name, the '=' and the value.
01418         const char* pErr = p;
01419         p = ReadName( p, &name, encoding );
01420         if ( !p || !*p )
01421         {
01422                 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
01423                 return 0;
01424         }
01425         p = SkipWhiteSpace( p, encoding );
01426         if ( !p || !*p || *p != '=' )
01427         {
01428                 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
01429                 return 0;
01430         }
01431 
01432         ++p;    // skip '='
01433         p = SkipWhiteSpace( p, encoding );
01434         if ( !p || !*p )
01435         {
01436                 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
01437                 return 0;
01438         }
01439 
01440         const char* end;
01441         const char SINGLE_QUOTE = '\'';
01442         const char DOUBLE_QUOTE = '\"';
01443 
01444         if ( *p == SINGLE_QUOTE )
01445         {
01446                 ++p;
01447                 end = "\'";             // single quote in string
01448                 p = ReadText( p, &value, false, end, false, encoding );
01449         }
01450         else if ( *p == DOUBLE_QUOTE )
01451         {
01452                 ++p;
01453                 end = "\"";             // double quote in string
01454                 p = ReadText( p, &value, false, end, false, encoding );
01455         }
01456         else
01457         {
01458                 // All attribute values should be in single or double quotes.
01459                 // But this is such a common error that the parser will try
01460                 // its best, even without them.
01461                 value = "";
01462                 while (    p && *p                                                                                      // existence
01463                                 && !IsWhiteSpace( *p ) && *p != '\n' && *p != '\r'      // whitespace
01464                                 && *p != '/' && *p != '>' )                                                     // tag end
01465                 {
01466                         if ( *p == SINGLE_QUOTE || *p == DOUBLE_QUOTE ) {
01467                                 // [ 1451649 ] Attribute values with trailing quotes not handled correctly
01468                                 // We did not have an opening quote but seem to have a
01469                                 // closing one. Give up and throw an error.
01470                                 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
01471                                 return 0;
01472                         }
01473                         value += *p;
01474                         ++p;
01475                 }
01476         }
01477         return p;
01478 }
01479 
01480 #ifdef TIXML_USE_STL
01481 void TiXmlText::StreamIn( std::istream * in, TIXML_STRING * tag )
01482 {
01483         while ( in->good() )
01484         {
01485                 int c = in->peek();
01486                 if ( !cdata && (c == '<' ) )
01487                 {
01488                         return;
01489                 }
01490                 if ( c <= 0 )
01491                 {
01492                         TiXmlDocument* document = GetDocument();
01493                         if ( document )
01494                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01495                         return;
01496                 }
01497 
01498                 (*tag) += (char) c;
01499                 in->get();      // "commits" the peek made above
01500 
01501                 if ( cdata && c == '>' && tag->size() >= 3 ) {
01502                         size_t len = tag->size();
01503                         if ( (*tag)[len-2] == ']' && (*tag)[len-3] == ']' ) {
01504                                 // terminator of cdata.
01505                                 return;
01506                         }
01507                 }
01508         }
01509 }
01510 #endif
01511 
01512 const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01513 {
01514         value = "";
01515         TiXmlDocument* document = GetDocument();
01516 
01517         if ( data )
01518         {
01519                 data->Stamp( p, encoding );
01520                 location = data->Cursor();
01521         }
01522 
01523         const char* const startTag = "<![CDATA[";
01524         const char* const endTag   = "]]>";
01525 
01526         if ( cdata || StringEqual( p, startTag, false, encoding ) )
01527         {
01528                 cdata = true;
01529 
01530                 if ( !StringEqual( p, startTag, false, encoding ) )
01531                 {
01532                         document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding );
01533                         return 0;
01534                 }
01535                 p += strlen( startTag );
01536 
01537                 // Keep all the white space, ignore the encoding, etc.
01538                 while (    p && *p
01539                                 && !StringEqual( p, endTag, false, encoding )
01540                           )
01541                 {
01542                         value += *p;
01543                         ++p;
01544                 }
01545 
01546                 TIXML_STRING dummy;
01547                 p = ReadText( p, &dummy, false, endTag, false, encoding );
01548                 return p;
01549         }
01550         else
01551         {
01552                 bool ignoreWhite = true;
01553 
01554                 const char* end = "<";
01555                 p = ReadText( p, &value, ignoreWhite, end, false, encoding );
01556                 if ( p )
01557                         return p-1;     // don't truncate the '<'
01558                 return 0;
01559         }
01560 }
01561 
01562 #ifdef TIXML_USE_STL
01563 void TiXmlDeclaration::StreamIn( std::istream * in, TIXML_STRING * tag )
01564 {
01565         while ( in->good() )
01566         {
01567                 int c = in->get();
01568                 if ( c <= 0 )
01569                 {
01570                         TiXmlDocument* document = GetDocument();
01571                         if ( document )
01572                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01573                         return;
01574                 }
01575                 (*tag) += (char) c;
01576 
01577                 if ( c == '>' )
01578                 {
01579                         // All is well.
01580                         return;
01581                 }
01582         }
01583 }
01584 #endif
01585 
01586 const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding )
01587 {
01588         p = SkipWhiteSpace( p, _encoding );
01589         // Find the beginning, find the end, and look for
01590         // the stuff in-between.
01591         TiXmlDocument* document = GetDocument();
01592         if ( !p || !*p || !StringEqual( p, "<?xml", true, _encoding ) )
01593         {
01594                 if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding );
01595                 return 0;
01596         }
01597         if ( data )
01598         {
01599                 data->Stamp( p, _encoding );
01600                 location = data->Cursor();
01601         }
01602         p += 5;
01603 
01604         version = "";
01605         encoding = "";
01606         standalone = "";
01607 
01608         while ( p && *p )
01609         {
01610                 if ( *p == '>' )
01611                 {
01612                         ++p;
01613                         return p;
01614                 }
01615 
01616                 p = SkipWhiteSpace( p, _encoding );
01617                 if ( StringEqual( p, "version", true, _encoding ) )
01618                 {
01619                         TiXmlAttribute attrib;
01620                         p = attrib.Parse( p, data, _encoding );
01621                         version = attrib.Value();
01622                 }
01623                 else if ( StringEqual( p, "encoding", true, _encoding ) )
01624                 {
01625                         TiXmlAttribute attrib;
01626                         p = attrib.Parse( p, data, _encoding );
01627                         encoding = attrib.Value();
01628                 }
01629                 else if ( StringEqual( p, "standalone", true, _encoding ) )
01630                 {
01631                         TiXmlAttribute attrib;
01632                         p = attrib.Parse( p, data, _encoding );
01633                         standalone = attrib.Value();
01634                 }
01635                 else
01636                 {
01637                         // Read over whatever it is.
01638                         while( p && *p && *p != '>' && !IsWhiteSpace( *p ) )
01639                                 ++p;
01640                 }
01641         }
01642         return 0;
01643 }
01644 
01645 bool TiXmlText::Blank() const
01646 {
01647         for ( unsigned i=0; i<value.length(); i++ )
01648                 if ( !IsWhiteSpace( value[i] ) )
01649                         return false;
01650         return true;
01651 }
01652