src/XML/tinyxmlparser_inl.h

0001 #ifndef DDCORE_SRC_XML_TINYXMLPARSER_INL_H
0002 #define DDCORE_SRC_XML_TINYXMLPARSER_INL_H
0003
0004 /*
0005   www.sourceforge.net/projects/tinyxml
0006   Original code (2.0 and earlier )copyright (c) 2000-2002 Lee Thomason (www.grinninglizard.com)
0007
0008   This software is provided 'as-is', without any express or implied
0009   warranty. In no event will the authors be held liable for any
0010   damages arising from the use of this software.
0011
0012   Permission is granted to anyone to use this software for any
0013   purpose, including commercial applications, and to alter it and
0014   redistribute it freely, subject to the following restrictions:
0015
0016   1. The origin of this software must not be misrepresented; you must
0017   not claim that you wrote the original software. If you use this
0018   software in a product, an acknowledgment in the product documentation
0019   would be appreciated but is not required.
0020
0021   2. Altered source versions must be plainly marked as such, and
0022   must not be misrepresented as being the original software.
0023
0024   3. This notice may not be removed or altered from any source
0025   distribution.
0026
0027   F.Gaede, DESY : changed extension to .cc  for use with marlin
0028   and include from "marlin/tinyxml.h"
0029
0030 */
0031
0032 #include <ctype.h>
0033 #include <stddef.h>
0034
0035 #include <XML/tinyxml.h>
0036
0037 //#define DEBUG_PARSER
0038 #if defined( DEBUG_PARSER )
0039 #       if defined( DEBUG ) && defined( _MSC_VER )
0040 #               include <windows.h>
0041 #               define TIXML_LOG OutputDebugString
0042 #       else
0043 #               define TIXML_LOG printf
0044 #       endif
0045 #endif
0046
0047
0048
0049 // Note tha "PutString" hardcodes the same list. This
0050 // is less flexible than it appears. Changing the entries
0051 // or order will break putstring.
0052 TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] =
0053   {
0054     //FIXME: workaround for processor conditions of type &&
0055     //needs to be tested if there are no adverse effects due to this change!!
0056     { "&",  1, '&' },
0057     //{ "&amp;",  5, '&' },
0058
0059     { "&lt;",   4, '<' },
0060     { "&gt;",   4, '>' },
0061     { "&quot;", 6, '\"' },
0062     { "&apos;", 6, '\'' }
0063   };
0064
0065 // Bunch of unicode info at:
0066 //              http://www.unicode.org/faq/utf_bom.html
0067 // Including the basic of this table, which determines the #bytes in the
0068 // sequence from the lead byte. 1 placed for invalid sequences --
0069 // although the result will be junk, pass it through as much as possible.
0070 // Beware of the non-characters in UTF-8:
0071 //                              ef bb bf (Microsoft "lead bytes")
0072 //                              ef bf be
0073 //                              ef bf bf
0074
0075 const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
0076 const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
0077 const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
0078
0079 const int TiXmlBase::utf8ByteTable[256] =
0080   {
0081     //  0       1       2       3       4       5       6       7       8       9       a       b       c       d       e       f
0082     1,  1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x00
0083     1,  1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x10
0084     1,  1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x20
0085     1,  1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x30
0086     1,  1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x40
0087     1,  1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x50
0088     1,  1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x60
0089     1,  1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x70 End of ASCII range
0090     1,  1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x80 0x80 to 0xc1 invalid
0091     1,  1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x90
0092     1,  1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0xa0
0093     1,  1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0xb0
0094     1,  1,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      // 0xc0 0xc2 to 0xdf 2 byte
0095     2,  2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      // 0xd0
0096     3,  3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      // 0xe0 0xe0 to 0xef 3 byte
0097     4,  4,      4,      4,      4,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1       // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
0098   };
0099
0100
0101 void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
0102 {
0103   const unsigned long BYTE_MASK = 0xBF;
0104   const unsigned long BYTE_MARK = 0x80;
0105   const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
0106
0107   if (input < 0x80)
0108     *length = 1;
0109   else if ( input < 0x800 )
0110     *length = 2;
0111   else if ( input < 0x10000 )
0112     *length = 3;
0113   else if ( input < 0x200000 )
0114     *length = 4;
0115   else
0116   { *length = 0; return; }    // This code won't covert this correctly anyway.
0117
0118   output += *length;
0119
0120   // Scary scary fall throughs.
0121   switch (*length)
0122   {
0123   case 4:
0124     --output;
0125     *output = (char)((input | BYTE_MARK) & BYTE_MASK);
0126     input >>= 6;
0127     [[fallthrough]];
0128   case 3:
0129     --output;
0130     *output = (char)((input | BYTE_MARK) & BYTE_MASK);
0131     input >>= 6;
0132     [[fallthrough]];
0133   case 2:
0134     --output;
0135     *output = (char)((input | BYTE_MARK) & BYTE_MASK);
0136     input >>= 6;
0137     [[fallthrough]];
0138   case 1:
0139     --output;
0140     *output = (char)(input | FIRST_BYTE_MARK[*length]);
0141     [[fallthrough]];
0142   default:
0143     break;
0144   }
0145 }
0146
0147
0148 /*static*/ int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
0149 {
0150   // This will only work for low-ascii, everything else is assumed to be a valid
0151   // letter. I'm not sure this is the best approach, but it is quite tricky trying
0152   // to figure out alhabetical vs. not across encoding. So take a very
0153   // conservative approach.
0154
0155   //    if ( encoding == TIXML_ENCODING_UTF8 )
0156   //    {
0157   if ( anyByte < 127 )
0158     return isalpha( anyByte );
0159   else
0160     return 1;   // What else to do? The unicode set is huge...get the english ones right.
0161   //    }
0162   //    else
0163   //    {
0164   //            return isalpha( anyByte );
0165   //    }
0166 }
0167
0168
0169 /*static*/ int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
0170 {
0171   // This will only work for low-ascii, everything else is assumed to be a valid
0172   // letter. I'm not sure this is the best approach, but it is quite tricky trying
0173   // to figure out alhabetical vs. not across encoding. So take a very
0174   // conservative approach.
0175
0176   //    if ( encoding == TIXML_ENCODING_UTF8 )
0177   //    {
0178   if ( anyByte < 127 )
0179     return isalnum( anyByte );
0180   else
0181     return 1;   // What else to do? The unicode set is huge...get the english ones right.
0182   //    }
0183   //    else
0184   //    {
0185   //            return isalnum( anyByte );
0186   //    }
0187 }
0188
0189
0190 class TiXmlParsingData
0191 {
0192   friend class TiXmlDocument;
0193 public:
0194   void Stamp( const char* now, TiXmlEncoding encoding );
0195
0196   const TiXmlCursor& Cursor()   { return cursor; }
0197
0198 private:
0199   // Only used by the document!
0200   TiXmlParsingData( const char* start, int _tabsize, int row, int col )
0201   {
0202     assert( start );
0203     stamp = start;
0204     tabsize = _tabsize;
0205     cursor.row = row;
0206     cursor.col = col;
0207   }
0208
0209   TiXmlCursor           cursor;
0210   const char*           stamp;
0211   int                           tabsize;
0212 };
0213
0214
0215 void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding )
0216 {
0217   assert( now );
0218
0219   // Do nothing if the tabsize is 0.
0220   if ( tabsize < 1 )
0221   {
0222     return;
0223   }
0224
0225   // Get the current row, column.
0226   int row = cursor.row;
0227   int col = cursor.col;
0228   const char* p = stamp;
0229   assert( p );
0230
0231   while ( p < now )
0232   {
0233     // Treat p as unsigned, so we have a happy compiler.
0234     const unsigned char* pU = (const unsigned char*)p;
0235
0236     // Code contributed by Fletcher Dunn: (modified by lee)
0237     switch (*pU) {
0238     case 0:
0239       // We *should* never get here, but in case we do, don't
0240       // advance past the terminating null character, ever
0241       return;
0242
0243     case '\r':
0244       // bump down to the next line
0245       ++row;
0246       col = 0;
0247       // Eat the character
0248       ++p;
0249
0250       // Check for \r\n sequence, and treat this as a single character
0251       if (*p == '\n') {
0252         ++p;
0253       }
0254       break;
0255
0256     case '\n':
0257       // bump down to the next line
0258       ++row;
0259       col = 0;
0260
0261       // Eat the character
0262       ++p;
0263
0264       // Check for \n\r sequence, and treat this as a single
0265       // character.  (Yes, this bizarre thing does occur still
0266       // on some arcane platforms...)
0267       if (*p == '\r') {
0268         ++p;
0269       }
0270       break;
0271
0272     case '\t':
0273       // Eat the character
0274       ++p;
0275
0276       // Skip to next tab stop
0277       col = (col / tabsize + 1) * tabsize;
0278       break;
0279
0280     case TIXML_UTF_LEAD_0:
0281       if ( encoding == TIXML_ENCODING_UTF8 )
0282       {
0283         if ( *(p+1) && *(p+2) )
0284         {
0285           // In these cases, don't advance the column. These are
0286           // 0-width spaces.
0287           if ( *(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2 )
0288             p += 3;
0289           else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU )
0290             p += 3;
0291           else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU )
0292             p += 3;
0293           else
0294           { p +=3; ++col; }     // A normal character.
0295         }
0296       }
0297       else
0298       {
0299         ++p;
0300         ++col;
0301       }
0302       break;
0303
0304     default:
0305       if ( encoding == TIXML_ENCODING_UTF8 )
0306       {
0307         // Eat the 1 to 4 byte utf8 character.
0308         int step = TiXmlBase::utf8ByteTable[*((const unsigned char*)p)];
0309         if ( step == 0 )
0310           step = 1;         // Error case from bad encoding, but handle gracefully.
0311         p += step;
0312
0313         // Just advance one column, of course.
0314         ++col;
0315       }
0316       else
0317       {
0318         ++p;
0319         ++col;
0320       }
0321       break;
0322     }
0323   }
0324   cursor.row = row;
0325   cursor.col = col;
0326   assert( cursor.row >= -1 );
0327   assert( cursor.col >= -1 );
0328   stamp = p;
0329   assert( stamp );
0330 }
0331
0332
0333 const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )
0334 {
0335   if ( !p || !*p )
0336   {
0337     return 0;
0338   }
0339   if ( encoding == TIXML_ENCODING_UTF8 )
0340   {
0341     while ( *p )
0342     {
0343       const unsigned char* pU = (const unsigned char*)p;
0344
0345       // Skip the stupid Microsoft UTF-8 Byte order marks
0346       if (  *(pU+0)==TIXML_UTF_LEAD_0
0347             && *(pU+1)==TIXML_UTF_LEAD_1
0348             && *(pU+2)==TIXML_UTF_LEAD_2 )
0349       {
0350         p += 3;
0351         continue;
0352       }
0353       else if(*(pU+0)==TIXML_UTF_LEAD_0
0354               && *(pU+1)==0xbfU
0355               && *(pU+2)==0xbeU )
0356       {
0357         p += 3;
0358         continue;
0359       }
0360       else if(*(pU+0)==TIXML_UTF_LEAD_0
0361               && *(pU+1)==0xbfU
0362               && *(pU+2)==0xbfU )
0363       {
0364         p += 3;
0365         continue;
0366       }
0367
0368       if ( IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' )          // Still using old rules for white space.
0369         ++p;
0370       else
0371         break;
0372     }
0373   }
0374   else
0375   {
0376     while ( ( *p && IsWhiteSpace( *p ) ) || *p == '\n' || *p =='\r' )
0377       ++p;
0378   }
0379
0380   return p;
0381 }
0382
0383 #ifdef TIXML_USE_STL
0384 /*static*/ bool TiXmlBase::StreamWhiteSpace( std::istream * in, TIXML_STRING * tag )
0385 {
0386   for( ;; )
0387   {
0388     if ( !in->good() ) return false;
0389
0390     int c = in->peek();
0391     // At this scope, we can't get to a document. So fail silently.
0392     if ( !IsWhiteSpace( c ) || c <= 0 )
0393       return true;
0394
0395     *tag += (char) in->get();
0396   }
0397 }
0398
0399 /*static*/ bool TiXmlBase::StreamTo( std::istream * in, int character, TIXML_STRING * tag )
0400 {
0401   //assert( character > 0 && character < 128 ); // else it won't work in utf-8
0402   while ( in->good() )
0403   {
0404     int c = in->peek();
0405     if ( c == character )
0406       return true;
0407     if ( c <= 0 )             // Silent failure: can't get document at this scope
0408       return false;
0409
0410     in->get();
0411     *tag += (char) c;
0412   }
0413   return false;
0414 }
0415 #endif
0416
0417 // One of TinyXML's more performance demanding functions. Try to keep the memory overhead down. The
0418 // "assign" optimization removes over 10% of the execution time.
0419 //
0420 const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding )
0421 {
0422   // Oddly, not supported on some comilers,
0423   //name->clear();
0424   // So use this:
0425   *name = "";
0426   assert( p );
0427
0428   // Names start with letters or underscores.
0429   // Of course, in unicode, tinyxml has no idea what a letter *is*. The
0430   // algorithm is generous.
0431   //
0432   // After that, they can be letters, underscores, numbers,
0433   // hyphens, or colons. (Colons are valid ony for namespaces,
0434   // but tinyxml can't tell namespaces from names.)
0435   if (    p && *p
0436           && ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) )
0437   {
0438     const char* start = p;
0439     while(            p && *p
0440                       &&      (               IsAlphaNum( (unsigned char ) *p, encoding )
0441                                               || *p == '_'
0442                                               || *p == '-'
0443                                               || *p == '.'
0444                                               || *p == ':' ) )
0445     {
0446       //(*name) += *p; // expensive
0447       ++p;
0448     }
0449     if ( p-start > 0 ) {
0450       name->assign( start, p-start );
0451     }
0452     return p;
0453   }
0454   return 0;
0455 }
0456
0457 const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding )
0458 {
0459   // Presume an entity, and pull it out.
0460   TIXML_STRING ent;
0461   int i;
0462   *length = 0;
0463
0464   if ( *(p+1) && *(p+1) == '#' && *(p+2) )
0465   {
0466     unsigned long ucs = 0;
0467     ptrdiff_t delta = 0;
0468     unsigned mult = 1;
0469
0470     if ( *(p+2) == 'x' )
0471     {
0472       // Hexadecimal.
0473       if ( !*(p+3) ) return 0;
0474
0475       const char* q = p+3;
0476       q = strchr( q, ';' );
0477
0478       if ( !q || !*q ) return 0;
0479
0480       delta = q-p;
0481       --q;
0482
0483       while ( *q != 'x' )
0484       {
0485         if ( *q >= '0' && *q <= '9' )
0486           ucs += mult * (*q - '0');
0487         else if ( *q >= 'a' && *q <= 'f' )
0488           ucs += mult * (*q - 'a' + 10);
0489         else if ( *q >= 'A' && *q <= 'F' )
0490           ucs += mult * (*q - 'A' + 10 );
0491         else
0492           return 0;
0493         mult *= 16;
0494         --q;
0495       }
0496     }
0497     else
0498     {
0499       // Decimal.
0500       if ( !*(p+2) ) return 0;
0501
0502       const char* q = p+2;
0503       q = strchr( q, ';' );
0504
0505       if ( !q || !*q ) return 0;
0506
0507       delta = q-p;
0508       --q;
0509
0510       while ( *q != '#' )
0511       {
0512         if ( *q >= '0' && *q <= '9' )
0513           ucs += mult * (*q - '0');
0514         else
0515           return 0;
0516         mult *= 10;
0517         --q;
0518       }
0519     }
0520     if ( encoding == TIXML_ENCODING_UTF8 )
0521     {
0522       // convert the UCS to UTF-8
0523       ConvertUTF32ToUTF8( ucs, value, length );
0524     }
0525     else
0526     {
0527       *value = (char)ucs;
0528       *length = 1;
0529     }
0530     return p + delta + 1;
0531   }
0532
0533   // Now try to match it.
0534   for( i=0; i<NUM_ENTITY; ++i )
0535   {
0536     if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )
0537     {
0538       assert( strlen( entity[i].str ) == entity[i].strLength );
0539       *value = entity[i].chr;
0540       *length = 1;
0541       return ( p + entity[i].strLength );
0542     }
0543   }
0544
0545   // So it wasn't an entity, its unrecognized, or something like that.
0546   *value = *p;  // Don't put back the last one, since we return it!
0547   //*length = 1;        // Leave unrecognized entities - this doesn't really work.
0548   // Just writes strange XML.
0549   return p+1;
0550 }
0551
0552
0553 bool TiXmlBase::StringEqual( const char* p,
0554                              const char* tag,
0555                              bool ignoreCase,
0556                              TiXmlEncoding encoding )
0557 {
0558   assert( p );
0559   assert( tag );
0560   if ( !p || !*p )
0561   {
0562     assert( 0 );
0563     return false;
0564   }
0565
0566   const char* q = p;
0567
0568   if ( ignoreCase )
0569   {
0570     while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) )
0571     {
0572       ++q;
0573       ++tag;
0574     }
0575
0576     if ( *tag == 0 )
0577       return true;
0578   }
0579   else
0580   {
0581     while ( *q && *tag && *q == *tag )
0582     {
0583       ++q;
0584       ++tag;
0585     }
0586
0587     if ( *tag == 0 )          // Have we found the end of the tag, and everything equal?
0588       return true;
0589   }
0590   return false;
0591 }
0592
0593 const char* TiXmlBase::ReadText(        const char* p,
0594                                         TIXML_STRING * text,
0595                                         bool trimWhiteSpace,
0596                                         const char* endTag,
0597                                         bool caseInsensitive,
0598                                         TiXmlEncoding encoding )
0599 {
0600   *text = "";
0601   if (    !trimWhiteSpace                       // certain tags always keep whitespace
0602           || !condenseWhiteSpace )      // if true, whitespace is always kept
0603   {
0604     // Keep all the white space.
0605     while (      p && *p
0606                  && !StringEqual( p, endTag, caseInsensitive, encoding )
0607                  )
0608     {
0609       int len;
0610       char cArr[4] = { 0, 0, 0, 0 };
0611       p = GetChar( p, cArr, &len, encoding );
0612       text->append( cArr, len );
0613     }
0614   }
0615   else
0616   {
0617     bool whitespace = false;
0618
0619     // Remove leading white space:
0620     p = SkipWhiteSpace( p, encoding );
0621     while (      p && *p
0622                  && !StringEqual( p, endTag, caseInsensitive, encoding ) )
0623     {
0624       if ( *p == '\r' || *p == '\n' )
0625       {
0626         whitespace = true;
0627         ++p;
0628       }
0629       else if ( IsWhiteSpace( *p ) )
0630       {
0631         whitespace = true;
0632         ++p;
0633       }
0634       else
0635       {
0636         // If we've found whitespace, add it before the
0637         // new character. Any whitespace just becomes a space.
0638         if ( whitespace )
0639         {
0640           (*text) += ' ';
0641           whitespace = false;
0642         }
0643         int len;
0644         char cArr[4] = { 0, 0, 0, 0 };
0645         p = GetChar( p, cArr, &len, encoding );
0646         if ( len == 1 )
0647           (*text) += cArr[0];     // more efficient
0648         else
0649           text->append( cArr, len );
0650       }
0651     }
0652   }
0653   if ( p )
0654     p += strlen( endTag );
0655   return p;
0656 }
0657
0658 #ifdef TIXML_USE_STL
0659
0660 void TiXmlDocument::StreamIn( std::istream * in, TIXML_STRING * tag )
0661 {
0662   // The basic issue with a document is that we don't know what we're
0663   // streaming. Read something presumed to be a tag (and hope), then
0664   // identify it, and call the appropriate stream method on the tag.
0665   //
0666   // This "pre-streaming" will never read the closing ">" so the
0667   // sub-tag can orient itself.
0668
0669   if ( !StreamTo( in, '<', tag ) )
0670   {
0671     SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
0672     return;
0673   }
0674
0675   while ( in->good() )
0676   {
0677     int tagIndex = (int) tag->length();
0678     while ( in->good() && in->peek() != '>' )
0679     {
0680       int c = in->get();
0681       if ( c <= 0 )
0682       {
0683         SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
0684         break;
0685       }
0686       (*tag) += (char) c;
0687     }
0688
0689     if ( in->good() )
0690     {
0691       // We now have something we presume to be a node of
0692       // some sort. Identify it, and call the node to
0693       // continue streaming.
0694       TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING );
0695
0696       if ( node )
0697       {
0698         node->StreamIn( in, tag );
0699         bool isElement = node->ToElement() != 0;
0700         delete node;
0701         node = 0;
0702
0703         // If this is the root element, we're done. Parsing will be
0704         // done by the >> operator.
0705         if ( isElement )
0706         {
0707           return;
0708         }
0709       }
0710       else
0711       {
0712         SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
0713         return;
0714       }
0715     }
0716   }
0717   // We should have returned sooner.
0718   SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
0719 }
0720
0721 #endif
0722
0723 const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding )
0724 {
0725   ClearError();
0726
0727   // Parse away, at the document level. Since a document
0728   // contains nothing but other tags, most of what happens
0729   // here is skipping white space.
0730   if ( !p || !*p )
0731   {
0732     SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
0733     return 0;
0734   }
0735
0736   // Note that, for a document, this needs to come
0737   // before the while space skip, so that parsing
0738   // starts from the pointer we are given.
0739   location.Clear();
0740   if ( prevData )
0741   {
0742     location.row = prevData->cursor.row;
0743     location.col = prevData->cursor.col;
0744   }
0745   else
0746   {
0747     location.row = 0;
0748     location.col = 0;
0749   }
0750   TiXmlParsingData data( p, TabSize(), location.row, location.col );
0751   location = data.Cursor();
0752
0753   if ( encoding == TIXML_ENCODING_UNKNOWN )
0754   {
0755     // Check for the Microsoft UTF-8 lead bytes.
0756     const unsigned char* pU = (const unsigned char*)p;
0757     if (      *(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0
0758               && *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1
0759               && *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 )
0760     {
0761       encoding = TIXML_ENCODING_UTF8;
0762       useMicrosoftBOM = true;
0763     }
0764   }
0765
0766   p = SkipWhiteSpace( p, encoding );
0767   if ( !p )
0768   {
0769     SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
0770     return 0;
0771   }
0772
0773   while ( p && *p )
0774   {
0775     TiXmlNode* node = Identify( p, encoding );
0776     if ( node )
0777     {
0778       p = node->Parse( p, &data, encoding );
0779       LinkEndChild( node );
0780     }
0781     else
0782     {
0783       break;
0784     }
0785
0786     // Did we get encoding info?
0787     if (    encoding == TIXML_ENCODING_UNKNOWN
0788             && node->ToDeclaration() )
0789     {
0790       TiXmlDeclaration* dec = node->ToDeclaration();
0791       const char* enc = dec->Encoding();
0792       assert( enc );
0793
0794       if ( *enc == 0 )
0795         encoding = TIXML_ENCODING_UTF8;
0796       else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
0797         encoding = TIXML_ENCODING_UTF8;
0798       else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
0799         encoding = TIXML_ENCODING_UTF8;     // incorrect, but be nice
0800       else
0801         encoding = TIXML_ENCODING_LEGACY;
0802     }
0803
0804     p = SkipWhiteSpace( p, encoding );
0805   }
0806
0807   // Was this empty?
0808   if ( !firstChild ) {
0809     SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );
0810     return 0;
0811   }
0812
0813   // All is well.
0814   return p;
0815 }
0816
0817 void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding )
0818 {
0819   // The first error in a chain is more accurate - don't set again!
0820   if ( error )
0821     return;
0822
0823   assert( err > 0 && err < TIXML_ERROR_STRING_COUNT );
0824   error   = true;
0825   errorId = err;
0826   errorDesc = errorString[ errorId ];
0827
0828   errorLocation.Clear();
0829   if ( pError && data )
0830   {
0831     data->Stamp( pError, encoding );
0832     errorLocation = data->Cursor();
0833   }
0834 }
0835
0836
0837 TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding )
0838 {
0839   TiXmlNode* returnNode = 0;
0840
0841   p = SkipWhiteSpace( p, encoding );
0842   if( !p || !*p || *p != '<' )
0843   {
0844     return 0;
0845   }
0846
0847   TiXmlDocument* doc = GetDocument();
0848   p = SkipWhiteSpace( p, encoding );
0849
0850   if ( !p || !*p )
0851   {
0852     return 0;
0853   }
0854
0855   // What is this thing?
0856   // - Elements start with a letter or underscore, but xml is reserved.
0857   // - Comments: <!--
0858   // - Decleration: <?xml
0859   // - Everthing else is unknown to tinyxml.
0860   //
0861
0862   const char* xmlHeader = { "<?xml" };
0863   const char* commentHeader = { "<!--" };
0864   const char* dtdHeader = { "<!" };
0865   const char* cdataHeader = { "<![CDATA[" };
0866
0867   if ( StringEqual( p, xmlHeader, true, encoding ) )
0868   {
0869 #ifdef DEBUG_PARSER
0870     TIXML_LOG( "XML parsing Declaration\n" );
0871 #endif
0872     returnNode = new TiXmlDeclaration();
0873   }
0874   else if ( StringEqual( p, commentHeader, false, encoding ) )
0875   {
0876 #ifdef DEBUG_PARSER
0877     TIXML_LOG( "XML parsing Comment\n" );
0878 #endif
0879     returnNode = new TiXmlComment();
0880   }
0881   else if ( StringEqual( p, cdataHeader, false, encoding ) )
0882   {
0883 #ifdef DEBUG_PARSER
0884     TIXML_LOG( "XML parsing CDATA\n" );
0885 #endif
0886     TiXmlText* text = new TiXmlText( "" );
0887     text->SetCDATA( true );
0888     returnNode = text;
0889   }
0890   else if ( StringEqual( p, dtdHeader, false, encoding ) )
0891   {
0892 #ifdef DEBUG_PARSER
0893     TIXML_LOG( "XML parsing Unknown(1)\n" );
0894 #endif
0895     returnNode = new TiXmlUnknown();
0896   }
0897   else if (    IsAlpha( *(p+1), encoding )
0898                || *(p+1) == '_' )
0899   {
0900 #ifdef DEBUG_PARSER
0901     TIXML_LOG( "XML parsing Element\n" );
0902 #endif
0903     returnNode = new TiXmlElement( "" );
0904   }
0905   else
0906   {
0907 #ifdef DEBUG_PARSER
0908     TIXML_LOG( "XML parsing Unknown(2)\n" );
0909 #endif
0910     returnNode = new TiXmlUnknown();
0911   }
0912
0913   if ( returnNode )
0914   {
0915     // Set the parent, so it can report errors
0916     returnNode->parent = this;
0917   }
0918   else
0919   {
0920     if ( doc )
0921       doc->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, TIXML_ENCODING_UNKNOWN );
0922   }
0923   return returnNode;
0924 }
0925
0926 #ifdef TIXML_USE_STL
0927
0928 void TiXmlElement::StreamIn (std::istream * in, TIXML_STRING * tag)
0929 {
0930   // We're called with some amount of pre-parsing. That is, some of "this"
0931   // element is in "tag". Go ahead and stream to the closing ">"
0932   while( in->good() )
0933   {
0934     int c = in->get();
0935     if ( c <= 0 )
0936     {
0937       TiXmlDocument* document = GetDocument();
0938       if ( document )
0939         document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
0940       return;
0941     }
0942     (*tag) += (char) c ;
0943
0944     if ( c == '>' )
0945       break;
0946   }
0947
0948   if ( tag->length() < 3 ) return;
0949
0950   // Okay...if we are a "/>" tag, then we're done. We've read a complete tag.
0951   // If not, identify and stream.
0952
0953   if (    tag->at( tag->length() - 1 ) == '>'
0954           && tag->at( tag->length() - 2 ) == '/' )
0955   {
0956     // All good!
0957     return;
0958   }
0959   else if ( tag->at( tag->length() - 1 ) == '>' )
0960   {
0961     // There is more. Could be:
0962     //                text
0963     //                cdata text (which looks like another node)
0964     //                closing tag
0965     //                another node.
0966     for ( ;; )
0967     {
0968       StreamWhiteSpace( in, tag );
0969
0970       // Do we have text?
0971       if ( in->good() && in->peek() != '<' )
0972       {
0973         // Yep, text.
0974         TiXmlText text( "" );
0975         text.StreamIn( in, tag );
0976
0977         // What follows text is a closing tag or another node.
0978         // Go around again and figure it out.
0979         continue;
0980       }
0981
0982       // We now have either a closing tag...or another node.
0983       // We should be at a "<", regardless.
0984       if ( !in->good() ) return;
0985       assert( in->peek() == '<' );
0986       int tagIndex = (int) tag->length();
0987
0988       bool closingTag = false;
0989       bool firstCharFound = false;
0990
0991       for( ;; )
0992       {
0993         if ( !in->good() )
0994           return;
0995
0996         int c = in->peek();
0997         if ( c <= 0 )
0998         {
0999           TiXmlDocument* document = GetDocument();
1000           if ( document )
1001             document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1002           return;
1003         }
1004
1005         if ( c == '>' )
1006           break;
1007
1008         *tag += (char) c;
1009         in->get();
1010
1011         // Early out if we find the CDATA id.
1012         if ( c == '[' && tag->size() >= 9 )
1013         {
1014           size_t len = tag->size();
1015           const char* start = tag->c_str() + len - 9;
1016           if ( strcmp( start, "<![CDATA[" ) == 0 ) {
1017             assert( !closingTag );
1018             break;
1019           }
1020         }
1021
1022         if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )
1023         {
1024           firstCharFound = true;
1025           if ( c == '/' )
1026             closingTag = true;
1027         }
1028       }
1029       // If it was a closing tag, then read in the closing '>' to clean up the input stream.
1030       // If it was not, the streaming will be done by the tag.
1031       if ( closingTag )
1032       {
1033         if ( !in->good() )
1034           return;
1035
1036         int c = in->get();
1037         if ( c <= 0 )
1038         {
1039           TiXmlDocument* document = GetDocument();
1040           if ( document )
1041             document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1042           return;
1043         }
1044         assert( c == '>' );
1045         *tag += (char) c;
1046
1047         // We are done, once we've found our closing tag.
1048         return;
1049       }
1050       else
1051       {
1052         // If not a closing tag, id it, and stream.
1053         const char* tagloc = tag->c_str() + tagIndex;
1054         TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING );
1055         if ( !node )
1056           return;
1057         node->StreamIn( in, tag );
1058         delete node;
1059         node = 0;
1060
1061         // No return: go around from the beginning: text, closing tag, or node.
1062       }
1063     }
1064   }
1065 }
1066 #endif
1067
1068 const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1069 {
1070   p = SkipWhiteSpace( p, encoding );
1071   TiXmlDocument* document = GetDocument();
1072
1073   if ( !p || !*p )
1074   {
1075     if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding );
1076     return 0;
1077   }
1078
1079   if ( data )
1080   {
1081     data->Stamp( p, encoding );
1082     location = data->Cursor();
1083   }
1084
1085   if ( *p != '<' )
1086   {
1087     if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding );
1088     return 0;
1089   }
1090
1091   p = SkipWhiteSpace( p+1, encoding );
1092
1093   // Read the name.
1094   const char* pErr = p;
1095
1096   p = ReadName( p, &value, encoding );
1097   if ( !p || !*p )
1098   {
1099     if ( document )   document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding );
1100     return 0;
1101   }
1102
1103   TIXML_STRING endTag ("</");
1104   endTag += value;
1105   endTag += ">";
1106
1107   // Check for and read attributes. Also look for an empty
1108   // tag or an end tag.
1109   while ( p && *p )
1110   {
1111     pErr = p;
1112     p = SkipWhiteSpace( p, encoding );
1113     if ( !p || !*p )
1114     {
1115       if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
1116       return 0;
1117     }
1118     if ( *p == '/' )
1119     {
1120       ++p;
1121       // Empty tag.
1122       if ( *p  != '>' )
1123       {
1124         if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );
1125         return 0;
1126       }
1127       return (p+1);
1128     }
1129     else if ( *p == '>' )
1130     {
1131       // Done with attributes (if there were any.)
1132       // Read the value -- which can include other
1133       // elements -- read the end tag, and return.
1134       ++p;
1135       p = ReadValue( p, data, encoding );           // Note this is an Element method, and will set the error if one happens.
1136       if ( !p || !*p )
1137         return 0;
1138
1139       // We should find the end tag now
1140       if ( StringEqual( p, endTag.c_str(), false, encoding ) )
1141       {
1142         p += endTag.length();
1143         return p;
1144       }
1145       else
1146       {
1147         if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
1148         return 0;
1149       }
1150     }
1151     else
1152     {
1153       // Try to read an attribute:
1154       TiXmlAttribute* attrib = new TiXmlAttribute();
1155       if ( !attrib )
1156       {
1157         if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, pErr, data, encoding );
1158         return 0;
1159       }
1160
1161       attrib->SetDocument( document );
1162       pErr = p;
1163       p = attrib->Parse( p, data, encoding );
1164
1165       if ( !p || !*p )
1166       {
1167         if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
1168         delete attrib;
1169         return 0;
1170       }
1171
1172       // Handle_t the strange case of double attributes:
1173 #ifdef TIXML_USE_STL
1174       TiXmlAttribute* node = attributeSet.Find( attrib->NameTStr() );
1175 #else
1176       TiXmlAttribute* node = attributeSet.Find( attrib->Name() );
1177 #endif
1178       if ( node )
1179       {
1180         node->SetValue( attrib->Value() );
1181         delete attrib;
1182         return 0;
1183       }
1184
1185       attributeSet.Add( attrib );
1186     }
1187   }
1188   return p;
1189 }
1190
1191
1192 const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1193 {
1194   TiXmlDocument* document = GetDocument();
1195
1196   // Read in text and elements in any order.
1197   const char* pWithWhiteSpace = p;
1198   p = SkipWhiteSpace( p, encoding );
1199
1200   while ( p && *p )
1201   {
1202     if ( *p != '<' )
1203     {
1204       // Take what we have, make a text element.
1205       TiXmlText* textNode = new TiXmlText( "" );
1206
1207       if ( !textNode )
1208       {
1209         if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, encoding );
1210         return 0;
1211       }
1212
1213       if ( TiXmlBase::IsWhiteSpaceCondensed() )
1214       {
1215         p = textNode->Parse( p, data, encoding );
1216       }
1217       else
1218       {
1219         // Special case: we want to keep the white space
1220         // so that leading spaces aren't removed.
1221         p = textNode->Parse( pWithWhiteSpace, data, encoding );
1222       }
1223
1224       if ( !textNode->Blank() )
1225         LinkEndChild( textNode );
1226       else
1227         delete textNode;
1228     }
1229     else
1230     {
1231       // We hit a '<'
1232       // Have we hit a new element or an end tag? This could also be
1233       // a TiXmlText in the "CDATA" style.
1234       if ( StringEqual( p, "</", false, encoding ) )
1235       {
1236         return p;
1237       }
1238       else
1239       {
1240         TiXmlNode* node = Identify( p, encoding );
1241         if ( node )
1242         {
1243           p = node->Parse( p, data, encoding );
1244           LinkEndChild( node );
1245         }
1246         else
1247         {
1248           return 0;
1249         }
1250       }
1251     }
1252     pWithWhiteSpace = p;
1253     p = SkipWhiteSpace( p, encoding );
1254   }
1255
1256   if ( !p )
1257   {
1258     if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding );
1259   }
1260   return p;
1261 }
1262
1263
1264 #ifdef TIXML_USE_STL
1265 void TiXmlUnknown::StreamIn( std::istream * in, TIXML_STRING * tag )
1266 {
1267   while ( in->good() )
1268   {
1269     int c = in->get();
1270     if ( c <= 0 )
1271     {
1272       TiXmlDocument* document = GetDocument();
1273       if ( document )
1274         document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1275       return;
1276     }
1277     (*tag) += (char) c;
1278
1279     if ( c == '>' )
1280     {
1281       // All is well.
1282       return;
1283     }
1284   }
1285 }
1286 #endif
1287
1288
1289 const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1290 {
1291   TiXmlDocument* document = GetDocument();
1292   p = SkipWhiteSpace( p, encoding );
1293
1294   if ( data )
1295   {
1296     data->Stamp( p, encoding );
1297     location = data->Cursor();
1298   }
1299   if ( !p || !*p || *p != '<' )
1300   {
1301     if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding );
1302     return 0;
1303   }
1304   ++p;
1305   value = "";
1306
1307   while ( p && *p && *p != '>' )
1308   {
1309     value += *p;
1310     ++p;
1311   }
1312
1313   if ( !p )
1314   {
1315     if ( document )   document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding );
1316   }
1317   if ( *p == '>' )
1318     return p+1;
1319   return p;
1320 }
1321
1322 #ifdef TIXML_USE_STL
1323 void TiXmlComment::StreamIn( std::istream * in, TIXML_STRING * tag )
1324 {
1325   while ( in->good() )
1326   {
1327     int c = in->get();
1328     if ( c <= 0 )
1329     {
1330       TiXmlDocument* document = GetDocument();
1331       if ( document )
1332         document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1333       return;
1334     }
1335
1336     (*tag) += (char) c;
1337
1338     if ( c == '>'
1339          && tag->at( tag->length() - 2 ) == '-'
1340          && tag->at( tag->length() - 3 ) == '-' )
1341     {
1342       // All is well.
1343       return;
1344     }
1345   }
1346 }
1347 #endif
1348
1349
1350 const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1351 {
1352   TiXmlDocument* document = GetDocument();
1353   value = "";
1354
1355   p = SkipWhiteSpace( p, encoding );
1356
1357   if ( data )
1358   {
1359     data->Stamp( p, encoding );
1360     location = data->Cursor();
1361   }
1362   const char* startTag = "<!--";
1363   const char* endTag   = "-->";
1364
1365   if ( !StringEqual( p, startTag, false, encoding ) )
1366   {
1367     document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding );
1368     return 0;
1369   }
1370   p += strlen( startTag );
1371   p = ReadText( p, &value, false, endTag, false, encoding );
1372   return p;
1373 }
1374
1375
1376 const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1377 {
1378   p = SkipWhiteSpace( p, encoding );
1379   if ( !p || !*p ) return 0;
1380
1381   //    int tabsize = 4;
1382   //    if ( document )
1383   //            tabsize = document->TabSize();
1384
1385   if ( data )
1386   {
1387     data->Stamp( p, encoding );
1388     location = data->Cursor();
1389   }
1390   // Read the name, the '=' and the value.
1391   const char* pErr = p;
1392   p = ReadName( p, &name, encoding );
1393   if ( !p || !*p )
1394   {
1395     if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
1396     return 0;
1397   }
1398   p = SkipWhiteSpace( p, encoding );
1399   if ( !p || !*p || *p != '=' )
1400   {
1401     if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1402     return 0;
1403   }
1404
1405   ++p;  // skip '='
1406   p = SkipWhiteSpace( p, encoding );
1407   if ( !p || !*p )
1408   {
1409     if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1410     return 0;
1411   }
1412
1413   const char* end;
1414   const char SINGLE_QUOTE = '\'';
1415   const char DOUBLE_QUOTE = '\"';
1416
1417   if ( *p == SINGLE_QUOTE )
1418   {
1419     ++p;
1420     end = "\'";               // single quote in string
1421     p = ReadText( p, &value, false, end, false, encoding );
1422   }
1423   else if ( *p == DOUBLE_QUOTE )
1424   {
1425     ++p;
1426     end = "\"";               // double quote in string
1427     p = ReadText( p, &value, false, end, false, encoding );
1428   }
1429   else
1430   {
1431     // All attribute values should be in single or double quotes.
1432     // But this is such a common error that the parser will try
1433     // its best, even without them.
1434     value = "";
1435     while (    p && *p                                                                                        // existence
1436                && !IsWhiteSpace( *p ) && *p != '\n' && *p != '\r'     // whitespace
1437                && *p != '/' && *p != '>' )                                                    // tag end
1438     {
1439       if ( *p == SINGLE_QUOTE || *p == DOUBLE_QUOTE ) {
1440         // [ 1451649 ] Attribute values with trailing quotes not handled correctly
1441         // We did not have an opening quote but seem to have a
1442         // closing one. Give up and throw an error.
1443         if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1444         return 0;
1445       }
1446       value += *p;
1447       ++p;
1448     }
1449   }
1450   return p;
1451 }
1452
1453 #ifdef TIXML_USE_STL
1454 void TiXmlText::StreamIn( std::istream * in, TIXML_STRING * tag )
1455 {
1456   while ( in->good() )
1457   {
1458     int c = in->peek();
1459     if ( !cdata && (c == '<' ) )
1460     {
1461       return;
1462     }
1463     if ( c <= 0 )
1464     {
1465       TiXmlDocument* document = GetDocument();
1466       if ( document )
1467         document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1468       return;
1469     }
1470
1471     (*tag) += (char) c;
1472     in->get();        // "commits" the peek made above
1473
1474     if ( cdata && c == '>' && tag->size() >= 3 ) {
1475       size_t len = tag->size();
1476       if ( (*tag)[len-2] == ']' && (*tag)[len-3] == ']' ) {
1477         // terminator of cdata.
1478         return;
1479       }
1480     }
1481   }
1482 }
1483 #endif
1484
1485 const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1486 {
1487   value = "";
1488   TiXmlDocument* document = GetDocument();
1489
1490   if ( data )
1491   {
1492     data->Stamp( p, encoding );
1493     location = data->Cursor();
1494   }
1495
1496   const char* const startTag = "<![CDATA[";
1497   const char* const endTag   = "]]>";
1498
1499   if ( cdata || StringEqual( p, startTag, false, encoding ) )
1500   {
1501     cdata = true;
1502
1503     if ( !StringEqual( p, startTag, false, encoding ) )
1504     {
1505       document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding );
1506       return 0;
1507     }
1508     p += strlen( startTag );
1509
1510     // Keep all the white space, ignore the encoding, etc.
1511     while (      p && *p
1512                  && !StringEqual( p, endTag, false, encoding )
1513                  )
1514     {
1515       value += *p;
1516       ++p;
1517     }
1518
1519     TIXML_STRING dummy;
1520     p = ReadText( p, &dummy, false, endTag, false, encoding );
1521     return p;
1522   }
1523   else
1524   {
1525     bool ignoreWhite = true;
1526
1527     const char* end = "<";
1528     p = ReadText( p, &value, ignoreWhite, end, false, encoding );
1529     if ( p )
1530       return p-1;     // don't truncate the '<'
1531     return 0;
1532   }
1533 }
1534
1535 #ifdef TIXML_USE_STL
1536 void TiXmlDeclaration::StreamIn( std::istream * in, TIXML_STRING * tag )
1537 {
1538   while ( in->good() )
1539   {
1540     int c = in->get();
1541     if ( c <= 0 )
1542     {
1543       TiXmlDocument* document = GetDocument();
1544       if ( document )
1545         document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1546       return;
1547     }
1548     (*tag) += (char) c;
1549
1550     if ( c == '>' )
1551     {
1552       // All is well.
1553       return;
1554     }
1555   }
1556 }
1557 #endif
1558
1559 const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding )
1560 {
1561   p = SkipWhiteSpace( p, _encoding );
1562   // Find the beginning, find the end, and look for
1563   // the stuff in-between.
1564   TiXmlDocument* document = GetDocument();
1565   if ( !p || !*p || !StringEqual( p, "<?xml", true, _encoding ) )
1566   {
1567     if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding );
1568     return 0;
1569   }
1570   if ( data )
1571   {
1572     data->Stamp( p, _encoding );
1573     location = data->Cursor();
1574   }
1575   p += 5;
1576
1577   version = "";
1578   encoding = "";
1579   standalone = "";
1580
1581   while ( p && *p )
1582   {
1583     if ( *p == '>' )
1584     {
1585       ++p;
1586       return p;
1587     }
1588
1589     p = SkipWhiteSpace( p, _encoding );
1590     if ( StringEqual( p, "version", true, _encoding ) )
1591     {
1592       TiXmlAttribute attrib;
1593       p = attrib.Parse( p, data, _encoding );
1594       version = attrib.Value();
1595     }
1596     else if ( StringEqual( p, "encoding", true, _encoding ) )
1597     {
1598       TiXmlAttribute attrib;
1599       p = attrib.Parse( p, data, _encoding );
1600       encoding = attrib.Value();
1601     }
1602     else if ( StringEqual( p, "standalone", true, _encoding ) )
1603     {
1604       TiXmlAttribute attrib;
1605       p = attrib.Parse( p, data, _encoding );
1606       standalone = attrib.Value();
1607     }
1608     else
1609     {
1610       // Read over whatever it is.
1611       while( p && *p && *p != '>' && !IsWhiteSpace( *p ) )
1612         ++p;
1613     }
1614   }
1615   return 0;
1616 }
1617
1618 bool TiXmlText::Blank() const
1619 {
1620   for ( unsigned i=0; i<value.length(); i++ )
1621     if ( !IsWhiteSpace( value[i] ) )
1622       return false;
1623   return true;
1624 }
1625
1626
1627 #endif