Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-03-14 08:15:02

0001 #ifndef DDCORE_SRC_XML_TINYXMLPARSER_INL_H
0002 #define DDCORE_SRC_XML_TINYXMLPARSER_INL_H
0003 
0004 /*
0005   www.sourceforge.net/projects/tinyxml
0006   Original code (2.0 and earlier )copyright (c) 2000-2002 Lee Thomason (www.grinninglizard.com)
0007 
0008   This software is provided 'as-is', without any express or implied
0009   warranty. In no event will the authors be held liable for any
0010   damages arising from the use of this software.
0011 
0012   Permission is granted to anyone to use this software for any
0013   purpose, including commercial applications, and to alter it and
0014   redistribute it freely, subject to the following restrictions:
0015 
0016   1. The origin of this software must not be misrepresented; you must
0017   not claim that you wrote the original software. If you use this
0018   software in a product, an acknowledgment in the product documentation
0019   would be appreciated but is not required.
0020 
0021   2. Altered source versions must be plainly marked as such, and
0022   must not be misrepresented as being the original software.
0023 
0024   3. This notice may not be removed or altered from any source
0025   distribution.
0026 
0027   F.Gaede, DESY : changed extension to .cc  for use with marlin
0028   and include from "marlin/tinyxml.h"
0029 
0030 */
0031 
0032 #include <ctype.h>
0033 #include <stddef.h>
0034 
0035 #include <XML/tinyxml.h>
0036 
0037 //#define DEBUG_PARSER
0038 #if defined( DEBUG_PARSER )
0039 #       if defined( DEBUG ) && defined( _MSC_VER )
0040 #               include <windows.h>
0041 #               define TIXML_LOG OutputDebugString
0042 #       else
0043 #               define TIXML_LOG printf
0044 #       endif
0045 #endif
0046 
0047 // fallthrough only exists from c++17
0048 #if defined __has_cpp_attribute
0049     #if __has_cpp_attribute(fallthrough)
0050         #define ATTR_FALLTHROUGH [[fallthrough]]
0051     #else
0052         #define ATTR_FALLTHROUGH
0053     #endif
0054 #else
0055     #define ATTR_FALLTHROUGH
0056 #endif
0057 
0058 
0059 // Note tha "PutString" hardcodes the same list. This
0060 // is less flexible than it appears. Changing the entries
0061 // or order will break putstring.
0062 TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] =
0063   {
0064     //FIXME: workaround for processor conditions of type &&
0065     //needs to be tested if there are no adverse effects due to this change!!
0066     { "&",  1, '&' },
0067     //{ "&amp;",  5, '&' },
0068 
0069     { "&lt;",   4, '<' },
0070     { "&gt;",   4, '>' },
0071     { "&quot;", 6, '\"' },
0072     { "&apos;", 6, '\'' }
0073   };
0074 
0075 // Bunch of unicode info at:
0076 //              http://www.unicode.org/faq/utf_bom.html
0077 // Including the basic of this table, which determines the #bytes in the
0078 // sequence from the lead byte. 1 placed for invalid sequences --
0079 // although the result will be junk, pass it through as much as possible.
0080 // Beware of the non-characters in UTF-8:
0081 //                              ef bb bf (Microsoft "lead bytes")
0082 //                              ef bf be
0083 //                              ef bf bf
0084 
0085 const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
0086 const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
0087 const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
0088 
0089 const int TiXmlBase::utf8ByteTable[256] =
0090   {
0091     //  0       1       2       3       4       5       6       7       8       9       a       b       c       d       e       f
0092     1,  1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x00
0093     1,  1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x10
0094     1,  1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x20
0095     1,  1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x30
0096     1,  1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x40
0097     1,  1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x50
0098     1,  1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x60
0099     1,  1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x70 End of ASCII range
0100     1,  1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x80 0x80 to 0xc1 invalid
0101     1,  1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x90
0102     1,  1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0xa0
0103     1,  1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0xb0
0104     1,  1,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      // 0xc0 0xc2 to 0xdf 2 byte
0105     2,  2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      // 0xd0
0106     3,  3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      // 0xe0 0xe0 to 0xef 3 byte
0107     4,  4,      4,      4,      4,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1       // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
0108   };
0109 
0110 
0111 void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
0112 {
0113   const unsigned long BYTE_MASK = 0xBF;
0114   const unsigned long BYTE_MARK = 0x80;
0115   const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
0116 
0117   if (input < 0x80)
0118     *length = 1;
0119   else if ( input < 0x800 )
0120     *length = 2;
0121   else if ( input < 0x10000 )
0122     *length = 3;
0123   else if ( input < 0x200000 )
0124     *length = 4;
0125   else
0126   { *length = 0; return; }    // This code won't covert this correctly anyway.
0127 
0128   output += *length;
0129 
0130   // Scary scary fall throughs.
0131   switch (*length)
0132   {
0133   case 4:
0134     --output;
0135     *output = (char)((input | BYTE_MARK) & BYTE_MASK);
0136     input >>= 6;
0137     ATTR_FALLTHROUGH;
0138   case 3:
0139     --output;
0140     *output = (char)((input | BYTE_MARK) & BYTE_MASK);
0141     input >>= 6;
0142     ATTR_FALLTHROUGH;
0143   case 2:
0144     --output;
0145     *output = (char)((input | BYTE_MARK) & BYTE_MASK);
0146     input >>= 6;
0147     ATTR_FALLTHROUGH;
0148   case 1:
0149     --output;
0150     *output = (char)(input | FIRST_BYTE_MARK[*length]);
0151     ATTR_FALLTHROUGH;
0152   default:
0153     break;
0154   }
0155 }
0156 
0157 
0158 /*static*/ int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
0159 {
0160   // This will only work for low-ascii, everything else is assumed to be a valid
0161   // letter. I'm not sure this is the best approach, but it is quite tricky trying
0162   // to figure out alhabetical vs. not across encoding. So take a very
0163   // conservative approach.
0164 
0165   //    if ( encoding == TIXML_ENCODING_UTF8 )
0166   //    {
0167   if ( anyByte < 127 )
0168     return isalpha( anyByte );
0169   else
0170     return 1;   // What else to do? The unicode set is huge...get the english ones right.
0171   //    }
0172   //    else
0173   //    {
0174   //            return isalpha( anyByte );
0175   //    }
0176 }
0177 
0178 
0179 /*static*/ int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
0180 {
0181   // This will only work for low-ascii, everything else is assumed to be a valid
0182   // letter. I'm not sure this is the best approach, but it is quite tricky trying
0183   // to figure out alhabetical vs. not across encoding. So take a very
0184   // conservative approach.
0185 
0186   //    if ( encoding == TIXML_ENCODING_UTF8 )
0187   //    {
0188   if ( anyByte < 127 )
0189     return isalnum( anyByte );
0190   else
0191     return 1;   // What else to do? The unicode set is huge...get the english ones right.
0192   //    }
0193   //    else
0194   //    {
0195   //            return isalnum( anyByte );
0196   //    }
0197 }
0198 
0199 
0200 class TiXmlParsingData
0201 {
0202   friend class TiXmlDocument;
0203 public:
0204   void Stamp( const char* now, TiXmlEncoding encoding );
0205 
0206   const TiXmlCursor& Cursor()   { return cursor; }
0207 
0208 private:
0209   // Only used by the document!
0210   TiXmlParsingData( const char* start, int _tabsize, int row, int col )
0211   {
0212     assert( start );
0213     stamp = start;
0214     tabsize = _tabsize;
0215     cursor.row = row;
0216     cursor.col = col;
0217   }
0218 
0219   TiXmlCursor           cursor;
0220   const char*           stamp;
0221   int                           tabsize;
0222 };
0223 
0224 
0225 void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding )
0226 {
0227   assert( now );
0228 
0229   // Do nothing if the tabsize is 0.
0230   if ( tabsize < 1 )
0231   {
0232     return;
0233   }
0234 
0235   // Get the current row, column.
0236   int row = cursor.row;
0237   int col = cursor.col;
0238   const char* p = stamp;
0239   assert( p );
0240 
0241   while ( p < now )
0242   {
0243     // Treat p as unsigned, so we have a happy compiler.
0244     const unsigned char* pU = (const unsigned char*)p;
0245 
0246     // Code contributed by Fletcher Dunn: (modified by lee)
0247     switch (*pU) {
0248     case 0:
0249       // We *should* never get here, but in case we do, don't
0250       // advance past the terminating null character, ever
0251       return;
0252 
0253     case '\r':
0254       // bump down to the next line
0255       ++row;
0256       col = 0;
0257       // Eat the character
0258       ++p;
0259 
0260       // Check for \r\n sequence, and treat this as a single character
0261       if (*p == '\n') {
0262         ++p;
0263       }
0264       break;
0265 
0266     case '\n':
0267       // bump down to the next line
0268       ++row;
0269       col = 0;
0270 
0271       // Eat the character
0272       ++p;
0273 
0274       // Check for \n\r sequence, and treat this as a single
0275       // character.  (Yes, this bizarre thing does occur still
0276       // on some arcane platforms...)
0277       if (*p == '\r') {
0278         ++p;
0279       }
0280       break;
0281 
0282     case '\t':
0283       // Eat the character
0284       ++p;
0285 
0286       // Skip to next tab stop
0287       col = (col / tabsize + 1) * tabsize;
0288       break;
0289 
0290     case TIXML_UTF_LEAD_0:
0291       if ( encoding == TIXML_ENCODING_UTF8 )
0292       {
0293         if ( *(p+1) && *(p+2) )
0294         {
0295           // In these cases, don't advance the column. These are
0296           // 0-width spaces.
0297           if ( *(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2 )
0298             p += 3;
0299           else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU )
0300             p += 3;
0301           else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU )
0302             p += 3;
0303           else
0304           { p +=3; ++col; }     // A normal character.
0305         }
0306       }
0307       else
0308       {
0309         ++p;
0310         ++col;
0311       }
0312       break;
0313 
0314     default:
0315       if ( encoding == TIXML_ENCODING_UTF8 )
0316       {
0317         // Eat the 1 to 4 byte utf8 character.
0318         int step = TiXmlBase::utf8ByteTable[*((const unsigned char*)p)];
0319         if ( step == 0 )
0320           step = 1;         // Error case from bad encoding, but handle gracefully.
0321         p += step;
0322 
0323         // Just advance one column, of course.
0324         ++col;
0325       }
0326       else
0327       {
0328         ++p;
0329         ++col;
0330       }
0331       break;
0332     }
0333   }
0334   cursor.row = row;
0335   cursor.col = col;
0336   assert( cursor.row >= -1 );
0337   assert( cursor.col >= -1 );
0338   stamp = p;
0339   assert( stamp );
0340 }
0341 
0342 
0343 const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )
0344 {
0345   if ( !p || !*p )
0346   {
0347     return 0;
0348   }
0349   if ( encoding == TIXML_ENCODING_UTF8 )
0350   {
0351     while ( *p )
0352     {
0353       const unsigned char* pU = (const unsigned char*)p;
0354 
0355       // Skip the stupid Microsoft UTF-8 Byte order marks
0356       if (  *(pU+0)==TIXML_UTF_LEAD_0
0357             && *(pU+1)==TIXML_UTF_LEAD_1
0358             && *(pU+2)==TIXML_UTF_LEAD_2 )
0359       {
0360         p += 3;
0361         continue;
0362       }
0363       else if(*(pU+0)==TIXML_UTF_LEAD_0
0364               && *(pU+1)==0xbfU
0365               && *(pU+2)==0xbeU )
0366       {
0367         p += 3;
0368         continue;
0369       }
0370       else if(*(pU+0)==TIXML_UTF_LEAD_0
0371               && *(pU+1)==0xbfU
0372               && *(pU+2)==0xbfU )
0373       {
0374         p += 3;
0375         continue;
0376       }
0377 
0378       if ( IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' )          // Still using old rules for white space.
0379         ++p;
0380       else
0381         break;
0382     }
0383   }
0384   else
0385   {
0386     while ( ( *p && IsWhiteSpace( *p ) ) || *p == '\n' || *p =='\r' )
0387       ++p;
0388   }
0389 
0390   return p;
0391 }
0392 
0393 #ifdef TIXML_USE_STL
0394 /*static*/ bool TiXmlBase::StreamWhiteSpace( std::istream * in, TIXML_STRING * tag )
0395 {
0396   for( ;; )
0397   {
0398     if ( !in->good() ) return false;
0399 
0400     int c = in->peek();
0401     // At this scope, we can't get to a document. So fail silently.
0402     if ( !IsWhiteSpace( c ) || c <= 0 )
0403       return true;
0404 
0405     *tag += (char) in->get();
0406   }
0407 }
0408 
0409 /*static*/ bool TiXmlBase::StreamTo( std::istream * in, int character, TIXML_STRING * tag )
0410 {
0411   //assert( character > 0 && character < 128 ); // else it won't work in utf-8
0412   while ( in->good() )
0413   {
0414     int c = in->peek();
0415     if ( c == character )
0416       return true;
0417     if ( c <= 0 )             // Silent failure: can't get document at this scope
0418       return false;
0419 
0420     in->get();
0421     *tag += (char) c;
0422   }
0423   return false;
0424 }
0425 #endif
0426 
0427 // One of TinyXML's more performance demanding functions. Try to keep the memory overhead down. The
0428 // "assign" optimization removes over 10% of the execution time.
0429 //
0430 const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding )
0431 {
0432   // Oddly, not supported on some comilers,
0433   //name->clear();
0434   // So use this:
0435   *name = "";
0436   assert( p );
0437 
0438   // Names start with letters or underscores.
0439   // Of course, in unicode, tinyxml has no idea what a letter *is*. The
0440   // algorithm is generous.
0441   //
0442   // After that, they can be letters, underscores, numbers,
0443   // hyphens, or colons. (Colons are valid ony for namespaces,
0444   // but tinyxml can't tell namespaces from names.)
0445   if (    p && *p
0446           && ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) )
0447   {
0448     const char* start = p;
0449     while(            p && *p
0450                       &&      (               IsAlphaNum( (unsigned char ) *p, encoding )
0451                                               || *p == '_'
0452                                               || *p == '-'
0453                                               || *p == '.'
0454                                               || *p == ':' ) )
0455     {
0456       //(*name) += *p; // expensive
0457       ++p;
0458     }
0459     if ( p-start > 0 ) {
0460       name->assign( start, p-start );
0461     }
0462     return p;
0463   }
0464   return 0;
0465 }
0466 
0467 const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding )
0468 {
0469   // Presume an entity, and pull it out.
0470   TIXML_STRING ent;
0471   int i;
0472   *length = 0;
0473 
0474   if ( *(p+1) && *(p+1) == '#' && *(p+2) )
0475   {
0476     unsigned long ucs = 0;
0477     ptrdiff_t delta = 0;
0478     unsigned mult = 1;
0479 
0480     if ( *(p+2) == 'x' )
0481     {
0482       // Hexadecimal.
0483       if ( !*(p+3) ) return 0;
0484 
0485       const char* q = p+3;
0486       q = strchr( q, ';' );
0487 
0488       if ( !q || !*q ) return 0;
0489 
0490       delta = q-p;
0491       --q;
0492 
0493       while ( *q != 'x' )
0494       {
0495         if ( *q >= '0' && *q <= '9' )
0496           ucs += mult * (*q - '0');
0497         else if ( *q >= 'a' && *q <= 'f' )
0498           ucs += mult * (*q - 'a' + 10);
0499         else if ( *q >= 'A' && *q <= 'F' )
0500           ucs += mult * (*q - 'A' + 10 );
0501         else
0502           return 0;
0503         mult *= 16;
0504         --q;
0505       }
0506     }
0507     else
0508     {
0509       // Decimal.
0510       if ( !*(p+2) ) return 0;
0511 
0512       const char* q = p+2;
0513       q = strchr( q, ';' );
0514 
0515       if ( !q || !*q ) return 0;
0516 
0517       delta = q-p;
0518       --q;
0519 
0520       while ( *q != '#' )
0521       {
0522         if ( *q >= '0' && *q <= '9' )
0523           ucs += mult * (*q - '0');
0524         else
0525           return 0;
0526         mult *= 10;
0527         --q;
0528       }
0529     }
0530     if ( encoding == TIXML_ENCODING_UTF8 )
0531     {
0532       // convert the UCS to UTF-8
0533       ConvertUTF32ToUTF8( ucs, value, length );
0534     }
0535     else
0536     {
0537       *value = (char)ucs;
0538       *length = 1;
0539     }
0540     return p + delta + 1;
0541   }
0542 
0543   // Now try to match it.
0544   for( i=0; i<NUM_ENTITY; ++i )
0545   {
0546     if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )
0547     {
0548       assert( strlen( entity[i].str ) == entity[i].strLength );
0549       *value = entity[i].chr;
0550       *length = 1;
0551       return ( p + entity[i].strLength );
0552     }
0553   }
0554 
0555   // So it wasn't an entity, its unrecognized, or something like that.
0556   *value = *p;  // Don't put back the last one, since we return it!
0557   //*length = 1;        // Leave unrecognized entities - this doesn't really work.
0558   // Just writes strange XML.
0559   return p+1;
0560 }
0561 
0562 
0563 bool TiXmlBase::StringEqual( const char* p,
0564                              const char* tag,
0565                              bool ignoreCase,
0566                              TiXmlEncoding encoding )
0567 {
0568   assert( p );
0569   assert( tag );
0570   if ( !p || !*p )
0571   {
0572     assert( 0 );
0573     return false;
0574   }
0575 
0576   const char* q = p;
0577 
0578   if ( ignoreCase )
0579   {
0580     while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) )
0581     {
0582       ++q;
0583       ++tag;
0584     }
0585 
0586     if ( *tag == 0 )
0587       return true;
0588   }
0589   else
0590   {
0591     while ( *q && *tag && *q == *tag )
0592     {
0593       ++q;
0594       ++tag;
0595     }
0596 
0597     if ( *tag == 0 )          // Have we found the end of the tag, and everything equal?
0598       return true;
0599   }
0600   return false;
0601 }
0602 
0603 const char* TiXmlBase::ReadText(        const char* p,
0604                                         TIXML_STRING * text,
0605                                         bool trimWhiteSpace,
0606                                         const char* endTag,
0607                                         bool caseInsensitive,
0608                                         TiXmlEncoding encoding )
0609 {
0610   *text = "";
0611   if (    !trimWhiteSpace                       // certain tags always keep whitespace
0612           || !condenseWhiteSpace )      // if true, whitespace is always kept
0613   {
0614     // Keep all the white space.
0615     while (      p && *p
0616                  && !StringEqual( p, endTag, caseInsensitive, encoding )
0617                  )
0618     {
0619       int len;
0620       char cArr[4] = { 0, 0, 0, 0 };
0621       p = GetChar( p, cArr, &len, encoding );
0622       text->append( cArr, len );
0623     }
0624   }
0625   else
0626   {
0627     bool whitespace = false;
0628 
0629     // Remove leading white space:
0630     p = SkipWhiteSpace( p, encoding );
0631     while (      p && *p
0632                  && !StringEqual( p, endTag, caseInsensitive, encoding ) )
0633     {
0634       if ( *p == '\r' || *p == '\n' )
0635       {
0636         whitespace = true;
0637         ++p;
0638       }
0639       else if ( IsWhiteSpace( *p ) )
0640       {
0641         whitespace = true;
0642         ++p;
0643       }
0644       else
0645       {
0646         // If we've found whitespace, add it before the
0647         // new character. Any whitespace just becomes a space.
0648         if ( whitespace )
0649         {
0650           (*text) += ' ';
0651           whitespace = false;
0652         }
0653         int len;
0654         char cArr[4] = { 0, 0, 0, 0 };
0655         p = GetChar( p, cArr, &len, encoding );
0656         if ( len == 1 )
0657           (*text) += cArr[0];     // more efficient
0658         else
0659           text->append( cArr, len );
0660       }
0661     }
0662   }
0663   if ( p )
0664     p += strlen( endTag );
0665   return p;
0666 }
0667 
0668 #ifdef TIXML_USE_STL
0669 
0670 void TiXmlDocument::StreamIn( std::istream * in, TIXML_STRING * tag )
0671 {
0672   // The basic issue with a document is that we don't know what we're
0673   // streaming. Read something presumed to be a tag (and hope), then
0674   // identify it, and call the appropriate stream method on the tag.
0675   //
0676   // This "pre-streaming" will never read the closing ">" so the
0677   // sub-tag can orient itself.
0678 
0679   if ( !StreamTo( in, '<', tag ) )
0680   {
0681     SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
0682     return;
0683   }
0684 
0685   while ( in->good() )
0686   {
0687     int tagIndex = (int) tag->length();
0688     while ( in->good() && in->peek() != '>' )
0689     {
0690       int c = in->get();
0691       if ( c <= 0 )
0692       {
0693         SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
0694         break;
0695       }
0696       (*tag) += (char) c;
0697     }
0698 
0699     if ( in->good() )
0700     {
0701       // We now have something we presume to be a node of
0702       // some sort. Identify it, and call the node to
0703       // continue streaming.
0704       TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING );
0705 
0706       if ( node )
0707       {
0708         node->StreamIn( in, tag );
0709         bool isElement = node->ToElement() != 0;
0710         delete node;
0711         node = 0;
0712 
0713         // If this is the root element, we're done. Parsing will be
0714         // done by the >> operator.
0715         if ( isElement )
0716         {
0717           return;
0718         }
0719       }
0720       else
0721       {
0722         SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
0723         return;
0724       }
0725     }
0726   }
0727   // We should have returned sooner.
0728   SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
0729 }
0730 
0731 #endif
0732 
0733 const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding )
0734 {
0735   ClearError();
0736 
0737   // Parse away, at the document level. Since a document
0738   // contains nothing but other tags, most of what happens
0739   // here is skipping white space.
0740   if ( !p || !*p )
0741   {
0742     SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
0743     return 0;
0744   }
0745 
0746   // Note that, for a document, this needs to come
0747   // before the while space skip, so that parsing
0748   // starts from the pointer we are given.
0749   location.Clear();
0750   if ( prevData )
0751   {
0752     location.row = prevData->cursor.row;
0753     location.col = prevData->cursor.col;
0754   }
0755   else
0756   {
0757     location.row = 0;
0758     location.col = 0;
0759   }
0760   TiXmlParsingData data( p, TabSize(), location.row, location.col );
0761   location = data.Cursor();
0762 
0763   if ( encoding == TIXML_ENCODING_UNKNOWN )
0764   {
0765     // Check for the Microsoft UTF-8 lead bytes.
0766     const unsigned char* pU = (const unsigned char*)p;
0767     if (      *(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0
0768               && *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1
0769               && *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 )
0770     {
0771       encoding = TIXML_ENCODING_UTF8;
0772       useMicrosoftBOM = true;
0773     }
0774   }
0775 
0776   p = SkipWhiteSpace( p, encoding );
0777   if ( !p )
0778   {
0779     SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
0780     return 0;
0781   }
0782 
0783   while ( p && *p )
0784   {
0785     TiXmlNode* node = Identify( p, encoding );
0786     if ( node )
0787     {
0788       p = node->Parse( p, &data, encoding );
0789       LinkEndChild( node );
0790     }
0791     else
0792     {
0793       break;
0794     }
0795 
0796     // Did we get encoding info?
0797     if (    encoding == TIXML_ENCODING_UNKNOWN
0798             && node->ToDeclaration() )
0799     {
0800       TiXmlDeclaration* dec = node->ToDeclaration();
0801       const char* enc = dec->Encoding();
0802       assert( enc );
0803 
0804       if ( *enc == 0 )
0805         encoding = TIXML_ENCODING_UTF8;
0806       else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
0807         encoding = TIXML_ENCODING_UTF8;
0808       else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
0809         encoding = TIXML_ENCODING_UTF8;     // incorrect, but be nice
0810       else
0811         encoding = TIXML_ENCODING_LEGACY;
0812     }
0813 
0814     p = SkipWhiteSpace( p, encoding );
0815   }
0816 
0817   // Was this empty?
0818   if ( !firstChild ) {
0819     SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );
0820     return 0;
0821   }
0822 
0823   // All is well.
0824   return p;
0825 }
0826 
0827 void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding )
0828 {
0829   // The first error in a chain is more accurate - don't set again!
0830   if ( error )
0831     return;
0832 
0833   assert( err > 0 && err < TIXML_ERROR_STRING_COUNT );
0834   error   = true;
0835   errorId = err;
0836   errorDesc = errorString[ errorId ];
0837 
0838   errorLocation.Clear();
0839   if ( pError && data )
0840   {
0841     data->Stamp( pError, encoding );
0842     errorLocation = data->Cursor();
0843   }
0844 }
0845 
0846 
0847 TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding )
0848 {
0849   TiXmlNode* returnNode = 0;
0850 
0851   p = SkipWhiteSpace( p, encoding );
0852   if( !p || !*p || *p != '<' )
0853   {
0854     return 0;
0855   }
0856 
0857   TiXmlDocument* doc = GetDocument();
0858   p = SkipWhiteSpace( p, encoding );
0859 
0860   if ( !p || !*p )
0861   {
0862     return 0;
0863   }
0864 
0865   // What is this thing?
0866   // - Elements start with a letter or underscore, but xml is reserved.
0867   // - Comments: <!--
0868   // - Decleration: <?xml
0869   // - Everthing else is unknown to tinyxml.
0870   //
0871 
0872   const char* xmlHeader = { "<?xml" };
0873   const char* commentHeader = { "<!--" };
0874   const char* dtdHeader = { "<!" };
0875   const char* cdataHeader = { "<![CDATA[" };
0876 
0877   if ( StringEqual( p, xmlHeader, true, encoding ) )
0878   {
0879 #ifdef DEBUG_PARSER
0880     TIXML_LOG( "XML parsing Declaration\n" );
0881 #endif
0882     returnNode = new TiXmlDeclaration();
0883   }
0884   else if ( StringEqual( p, commentHeader, false, encoding ) )
0885   {
0886 #ifdef DEBUG_PARSER
0887     TIXML_LOG( "XML parsing Comment\n" );
0888 #endif
0889     returnNode = new TiXmlComment();
0890   }
0891   else if ( StringEqual( p, cdataHeader, false, encoding ) )
0892   {
0893 #ifdef DEBUG_PARSER
0894     TIXML_LOG( "XML parsing CDATA\n" );
0895 #endif
0896     TiXmlText* text = new TiXmlText( "" );
0897     text->SetCDATA( true );
0898     returnNode = text;
0899   }
0900   else if ( StringEqual( p, dtdHeader, false, encoding ) )
0901   {
0902 #ifdef DEBUG_PARSER
0903     TIXML_LOG( "XML parsing Unknown(1)\n" );
0904 #endif
0905     returnNode = new TiXmlUnknown();
0906   }
0907   else if (    IsAlpha( *(p+1), encoding )
0908                || *(p+1) == '_' )
0909   {
0910 #ifdef DEBUG_PARSER
0911     TIXML_LOG( "XML parsing Element\n" );
0912 #endif
0913     returnNode = new TiXmlElement( "" );
0914   }
0915   else
0916   {
0917 #ifdef DEBUG_PARSER
0918     TIXML_LOG( "XML parsing Unknown(2)\n" );
0919 #endif
0920     returnNode = new TiXmlUnknown();
0921   }
0922 
0923   if ( returnNode )
0924   {
0925     // Set the parent, so it can report errors
0926     returnNode->parent = this;
0927   }
0928   else
0929   {
0930     if ( doc )
0931       doc->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, TIXML_ENCODING_UNKNOWN );
0932   }
0933   return returnNode;
0934 }
0935 
0936 #ifdef TIXML_USE_STL
0937 
0938 void TiXmlElement::StreamIn (std::istream * in, TIXML_STRING * tag)
0939 {
0940   // We're called with some amount of pre-parsing. That is, some of "this"
0941   // element is in "tag". Go ahead and stream to the closing ">"
0942   while( in->good() )
0943   {
0944     int c = in->get();
0945     if ( c <= 0 )
0946     {
0947       TiXmlDocument* document = GetDocument();
0948       if ( document )
0949         document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
0950       return;
0951     }
0952     (*tag) += (char) c ;
0953 
0954     if ( c == '>' )
0955       break;
0956   }
0957 
0958   if ( tag->length() < 3 ) return;
0959 
0960   // Okay...if we are a "/>" tag, then we're done. We've read a complete tag.
0961   // If not, identify and stream.
0962 
0963   if (    tag->at( tag->length() - 1 ) == '>'
0964           && tag->at( tag->length() - 2 ) == '/' )
0965   {
0966     // All good!
0967     return;
0968   }
0969   else if ( tag->at( tag->length() - 1 ) == '>' )
0970   {
0971     // There is more. Could be:
0972     //                text
0973     //                cdata text (which looks like another node)
0974     //                closing tag
0975     //                another node.
0976     for ( ;; )
0977     {
0978       StreamWhiteSpace( in, tag );
0979 
0980       // Do we have text?
0981       if ( in->good() && in->peek() != '<' )
0982       {
0983         // Yep, text.
0984         TiXmlText text( "" );
0985         text.StreamIn( in, tag );
0986 
0987         // What follows text is a closing tag or another node.
0988         // Go around again and figure it out.
0989         continue;
0990       }
0991 
0992       // We now have either a closing tag...or another node.
0993       // We should be at a "<", regardless.
0994       if ( !in->good() ) return;
0995       assert( in->peek() == '<' );
0996       int tagIndex = (int) tag->length();
0997 
0998       bool closingTag = false;
0999       bool firstCharFound = false;
1000 
1001       for( ;; )
1002       {
1003         if ( !in->good() )
1004           return;
1005 
1006         int c = in->peek();
1007         if ( c <= 0 )
1008         {
1009           TiXmlDocument* document = GetDocument();
1010           if ( document )
1011             document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1012           return;
1013         }
1014 
1015         if ( c == '>' )
1016           break;
1017 
1018         *tag += (char) c;
1019         in->get();
1020 
1021         // Early out if we find the CDATA id.
1022         if ( c == '[' && tag->size() >= 9 )
1023         {
1024           size_t len = tag->size();
1025           const char* start = tag->c_str() + len - 9;
1026           if ( strcmp( start, "<![CDATA[" ) == 0 ) {
1027             assert( !closingTag );
1028             break;
1029           }
1030         }
1031 
1032         if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )
1033         {
1034           firstCharFound = true;
1035           if ( c == '/' )
1036             closingTag = true;
1037         }
1038       }
1039       // If it was a closing tag, then read in the closing '>' to clean up the input stream.
1040       // If it was not, the streaming will be done by the tag.
1041       if ( closingTag )
1042       {
1043         if ( !in->good() )
1044           return;
1045 
1046         int c = in->get();
1047         if ( c <= 0 )
1048         {
1049           TiXmlDocument* document = GetDocument();
1050           if ( document )
1051             document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1052           return;
1053         }
1054         assert( c == '>' );
1055         *tag += (char) c;
1056 
1057         // We are done, once we've found our closing tag.
1058         return;
1059       }
1060       else
1061       {
1062         // If not a closing tag, id it, and stream.
1063         const char* tagloc = tag->c_str() + tagIndex;
1064         TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING );
1065         if ( !node )
1066           return;
1067         node->StreamIn( in, tag );
1068         delete node;
1069         node = 0;
1070 
1071         // No return: go around from the beginning: text, closing tag, or node.
1072       }
1073     }
1074   }
1075 }
1076 #endif
1077 
1078 const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1079 {
1080   p = SkipWhiteSpace( p, encoding );
1081   TiXmlDocument* document = GetDocument();
1082 
1083   if ( !p || !*p )
1084   {
1085     if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding );
1086     return 0;
1087   }
1088 
1089   if ( data )
1090   {
1091     data->Stamp( p, encoding );
1092     location = data->Cursor();
1093   }
1094 
1095   if ( *p != '<' )
1096   {
1097     if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding );
1098     return 0;
1099   }
1100 
1101   p = SkipWhiteSpace( p+1, encoding );
1102 
1103   // Read the name.
1104   const char* pErr = p;
1105 
1106   p = ReadName( p, &value, encoding );
1107   if ( !p || !*p )
1108   {
1109     if ( document )   document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding );
1110     return 0;
1111   }
1112 
1113   TIXML_STRING endTag ("</");
1114   endTag += value;
1115   endTag += ">";
1116 
1117   // Check for and read attributes. Also look for an empty
1118   // tag or an end tag.
1119   while ( p && *p )
1120   {
1121     pErr = p;
1122     p = SkipWhiteSpace( p, encoding );
1123     if ( !p || !*p )
1124     {
1125       if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
1126       return 0;
1127     }
1128     if ( *p == '/' )
1129     {
1130       ++p;
1131       // Empty tag.
1132       if ( *p  != '>' )
1133       {
1134         if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );
1135         return 0;
1136       }
1137       return (p+1);
1138     }
1139     else if ( *p == '>' )
1140     {
1141       // Done with attributes (if there were any.)
1142       // Read the value -- which can include other
1143       // elements -- read the end tag, and return.
1144       ++p;
1145       p = ReadValue( p, data, encoding );           // Note this is an Element method, and will set the error if one happens.
1146       if ( !p || !*p )
1147         return 0;
1148 
1149       // We should find the end tag now
1150       if ( StringEqual( p, endTag.c_str(), false, encoding ) )
1151       {
1152         p += endTag.length();
1153         return p;
1154       }
1155       else
1156       {
1157         if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
1158         return 0;
1159       }
1160     }
1161     else
1162     {
1163       // Try to read an attribute:
1164       TiXmlAttribute* attrib = new TiXmlAttribute();
1165       if ( !attrib )
1166       {
1167         if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, pErr, data, encoding );
1168         return 0;
1169       }
1170 
1171       attrib->SetDocument( document );
1172       pErr = p;
1173       p = attrib->Parse( p, data, encoding );
1174 
1175       if ( !p || !*p )
1176       {
1177         if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
1178         delete attrib;
1179         return 0;
1180       }
1181 
1182       // Handle_t the strange case of double attributes:
1183 #ifdef TIXML_USE_STL
1184       TiXmlAttribute* node = attributeSet.Find( attrib->NameTStr() );
1185 #else
1186       TiXmlAttribute* node = attributeSet.Find( attrib->Name() );
1187 #endif
1188       if ( node )
1189       {
1190         node->SetValue( attrib->Value() );
1191         delete attrib;
1192         return 0;
1193       }
1194 
1195       attributeSet.Add( attrib );
1196     }
1197   }
1198   return p;
1199 }
1200 
1201 
1202 const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1203 {
1204   TiXmlDocument* document = GetDocument();
1205 
1206   // Read in text and elements in any order.
1207   const char* pWithWhiteSpace = p;
1208   p = SkipWhiteSpace( p, encoding );
1209 
1210   while ( p && *p )
1211   {
1212     if ( *p != '<' )
1213     {
1214       // Take what we have, make a text element.
1215       TiXmlText* textNode = new TiXmlText( "" );
1216 
1217       if ( !textNode )
1218       {
1219         if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, encoding );
1220         return 0;
1221       }
1222 
1223       if ( TiXmlBase::IsWhiteSpaceCondensed() )
1224       {
1225         p = textNode->Parse( p, data, encoding );
1226       }
1227       else
1228       {
1229         // Special case: we want to keep the white space
1230         // so that leading spaces aren't removed.
1231         p = textNode->Parse( pWithWhiteSpace, data, encoding );
1232       }
1233 
1234       if ( !textNode->Blank() )
1235         LinkEndChild( textNode );
1236       else
1237         delete textNode;
1238     }
1239     else
1240     {
1241       // We hit a '<'
1242       // Have we hit a new element or an end tag? This could also be
1243       // a TiXmlText in the "CDATA" style.
1244       if ( StringEqual( p, "</", false, encoding ) )
1245       {
1246         return p;
1247       }
1248       else
1249       {
1250         TiXmlNode* node = Identify( p, encoding );
1251         if ( node )
1252         {
1253           p = node->Parse( p, data, encoding );
1254           LinkEndChild( node );
1255         }
1256         else
1257         {
1258           return 0;
1259         }
1260       }
1261     }
1262     pWithWhiteSpace = p;
1263     p = SkipWhiteSpace( p, encoding );
1264   }
1265 
1266   if ( !p )
1267   {
1268     if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding );
1269   }
1270   return p;
1271 }
1272 
1273 
1274 #ifdef TIXML_USE_STL
1275 void TiXmlUnknown::StreamIn( std::istream * in, TIXML_STRING * tag )
1276 {
1277   while ( in->good() )
1278   {
1279     int c = in->get();
1280     if ( c <= 0 )
1281     {
1282       TiXmlDocument* document = GetDocument();
1283       if ( document )
1284         document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1285       return;
1286     }
1287     (*tag) += (char) c;
1288 
1289     if ( c == '>' )
1290     {
1291       // All is well.
1292       return;
1293     }
1294   }
1295 }
1296 #endif
1297 
1298 
1299 const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1300 {
1301   TiXmlDocument* document = GetDocument();
1302   p = SkipWhiteSpace( p, encoding );
1303 
1304   if ( data )
1305   {
1306     data->Stamp( p, encoding );
1307     location = data->Cursor();
1308   }
1309   if ( !p || !*p || *p != '<' )
1310   {
1311     if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding );
1312     return 0;
1313   }
1314   ++p;
1315   value = "";
1316 
1317   while ( p && *p && *p != '>' )
1318   {
1319     value += *p;
1320     ++p;
1321   }
1322 
1323   if ( !p )
1324   {
1325     if ( document )   document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding );
1326   }
1327   if ( *p == '>' )
1328     return p+1;
1329   return p;
1330 }
1331 
1332 #ifdef TIXML_USE_STL
1333 void TiXmlComment::StreamIn( std::istream * in, TIXML_STRING * tag )
1334 {
1335   while ( in->good() )
1336   {
1337     int c = in->get();
1338     if ( c <= 0 )
1339     {
1340       TiXmlDocument* document = GetDocument();
1341       if ( document )
1342         document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1343       return;
1344     }
1345 
1346     (*tag) += (char) c;
1347 
1348     if ( c == '>'
1349          && tag->at( tag->length() - 2 ) == '-'
1350          && tag->at( tag->length() - 3 ) == '-' )
1351     {
1352       // All is well.
1353       return;
1354     }
1355   }
1356 }
1357 #endif
1358 
1359 
1360 const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1361 {
1362   TiXmlDocument* document = GetDocument();
1363   value = "";
1364 
1365   p = SkipWhiteSpace( p, encoding );
1366 
1367   if ( data )
1368   {
1369     data->Stamp( p, encoding );
1370     location = data->Cursor();
1371   }
1372   const char* startTag = "<!--";
1373   const char* endTag   = "-->";
1374 
1375   if ( !StringEqual( p, startTag, false, encoding ) )
1376   {
1377     document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding );
1378     return 0;
1379   }
1380   p += strlen( startTag );
1381   p = ReadText( p, &value, false, endTag, false, encoding );
1382   return p;
1383 }
1384 
1385 
1386 const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1387 {
1388   p = SkipWhiteSpace( p, encoding );
1389   if ( !p || !*p ) return 0;
1390 
1391   //    int tabsize = 4;
1392   //    if ( document )
1393   //            tabsize = document->TabSize();
1394 
1395   if ( data )
1396   {
1397     data->Stamp( p, encoding );
1398     location = data->Cursor();
1399   }
1400   // Read the name, the '=' and the value.
1401   const char* pErr = p;
1402   p = ReadName( p, &name, encoding );
1403   if ( !p || !*p )
1404   {
1405     if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
1406     return 0;
1407   }
1408   p = SkipWhiteSpace( p, encoding );
1409   if ( !p || !*p || *p != '=' )
1410   {
1411     if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1412     return 0;
1413   }
1414 
1415   ++p;  // skip '='
1416   p = SkipWhiteSpace( p, encoding );
1417   if ( !p || !*p )
1418   {
1419     if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1420     return 0;
1421   }
1422 
1423   const char* end;
1424   const char SINGLE_QUOTE = '\'';
1425   const char DOUBLE_QUOTE = '\"';
1426 
1427   if ( *p == SINGLE_QUOTE )
1428   {
1429     ++p;
1430     end = "\'";               // single quote in string
1431     p = ReadText( p, &value, false, end, false, encoding );
1432   }
1433   else if ( *p == DOUBLE_QUOTE )
1434   {
1435     ++p;
1436     end = "\"";               // double quote in string
1437     p = ReadText( p, &value, false, end, false, encoding );
1438   }
1439   else
1440   {
1441     // All attribute values should be in single or double quotes.
1442     // But this is such a common error that the parser will try
1443     // its best, even without them.
1444     value = "";
1445     while (    p && *p                                                                                        // existence
1446                && !IsWhiteSpace( *p ) && *p != '\n' && *p != '\r'     // whitespace
1447                && *p != '/' && *p != '>' )                                                    // tag end
1448     {
1449       if ( *p == SINGLE_QUOTE || *p == DOUBLE_QUOTE ) {
1450         // [ 1451649 ] Attribute values with trailing quotes not handled correctly
1451         // We did not have an opening quote but seem to have a
1452         // closing one. Give up and throw an error.
1453         if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1454         return 0;
1455       }
1456       value += *p;
1457       ++p;
1458     }
1459   }
1460   return p;
1461 }
1462 
1463 #ifdef TIXML_USE_STL
1464 void TiXmlText::StreamIn( std::istream * in, TIXML_STRING * tag )
1465 {
1466   while ( in->good() )
1467   {
1468     int c = in->peek();
1469     if ( !cdata && (c == '<' ) )
1470     {
1471       return;
1472     }
1473     if ( c <= 0 )
1474     {
1475       TiXmlDocument* document = GetDocument();
1476       if ( document )
1477         document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1478       return;
1479     }
1480 
1481     (*tag) += (char) c;
1482     in->get();        // "commits" the peek made above
1483 
1484     if ( cdata && c == '>' && tag->size() >= 3 ) {
1485       size_t len = tag->size();
1486       if ( (*tag)[len-2] == ']' && (*tag)[len-3] == ']' ) {
1487         // terminator of cdata.
1488         return;
1489       }
1490     }
1491   }
1492 }
1493 #endif
1494 
1495 const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1496 {
1497   value = "";
1498   TiXmlDocument* document = GetDocument();
1499 
1500   if ( data )
1501   {
1502     data->Stamp( p, encoding );
1503     location = data->Cursor();
1504   }
1505 
1506   const char* const startTag = "<![CDATA[";
1507   const char* const endTag   = "]]>";
1508 
1509   if ( cdata || StringEqual( p, startTag, false, encoding ) )
1510   {
1511     cdata = true;
1512 
1513     if ( !StringEqual( p, startTag, false, encoding ) )
1514     {
1515       document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding );
1516       return 0;
1517     }
1518     p += strlen( startTag );
1519 
1520     // Keep all the white space, ignore the encoding, etc.
1521     while (      p && *p
1522                  && !StringEqual( p, endTag, false, encoding )
1523                  )
1524     {
1525       value += *p;
1526       ++p;
1527     }
1528 
1529     TIXML_STRING dummy;
1530     p = ReadText( p, &dummy, false, endTag, false, encoding );
1531     return p;
1532   }
1533   else
1534   {
1535     bool ignoreWhite = true;
1536 
1537     const char* end = "<";
1538     p = ReadText( p, &value, ignoreWhite, end, false, encoding );
1539     if ( p )
1540       return p-1;     // don't truncate the '<'
1541     return 0;
1542   }
1543 }
1544 
1545 #ifdef TIXML_USE_STL
1546 void TiXmlDeclaration::StreamIn( std::istream * in, TIXML_STRING * tag )
1547 {
1548   while ( in->good() )
1549   {
1550     int c = in->get();
1551     if ( c <= 0 )
1552     {
1553       TiXmlDocument* document = GetDocument();
1554       if ( document )
1555         document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1556       return;
1557     }
1558     (*tag) += (char) c;
1559 
1560     if ( c == '>' )
1561     {
1562       // All is well.
1563       return;
1564     }
1565   }
1566 }
1567 #endif
1568 
1569 const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding )
1570 {
1571   p = SkipWhiteSpace( p, _encoding );
1572   // Find the beginning, find the end, and look for
1573   // the stuff in-between.
1574   TiXmlDocument* document = GetDocument();
1575   if ( !p || !*p || !StringEqual( p, "<?xml", true, _encoding ) )
1576   {
1577     if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding );
1578     return 0;
1579   }
1580   if ( data )
1581   {
1582     data->Stamp( p, _encoding );
1583     location = data->Cursor();
1584   }
1585   p += 5;
1586 
1587   version = "";
1588   encoding = "";
1589   standalone = "";
1590 
1591   while ( p && *p )
1592   {
1593     if ( *p == '>' )
1594     {
1595       ++p;
1596       return p;
1597     }
1598 
1599     p = SkipWhiteSpace( p, _encoding );
1600     if ( StringEqual( p, "version", true, _encoding ) )
1601     {
1602       TiXmlAttribute attrib;
1603       p = attrib.Parse( p, data, _encoding );
1604       version = attrib.Value();
1605     }
1606     else if ( StringEqual( p, "encoding", true, _encoding ) )
1607     {
1608       TiXmlAttribute attrib;
1609       p = attrib.Parse( p, data, _encoding );
1610       encoding = attrib.Value();
1611     }
1612     else if ( StringEqual( p, "standalone", true, _encoding ) )
1613     {
1614       TiXmlAttribute attrib;
1615       p = attrib.Parse( p, data, _encoding );
1616       standalone = attrib.Value();
1617     }
1618     else
1619     {
1620       // Read over whatever it is.
1621       while( p && *p && *p != '>' && !IsWhiteSpace( *p ) )
1622         ++p;
1623     }
1624   }
1625   return 0;
1626 }
1627 
1628 bool TiXmlText::Blank() const
1629 {
1630   for ( unsigned i=0; i<value.length(); i++ )
1631     if ( !IsWhiteSpace( value[i] ) )
1632       return false;
1633   return true;
1634 }
1635 
1636 
1637 #endif