File indexing completed on 2025-12-16 10:34:13
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022 #if !defined(XERCESC_INCLUDE_GUARD_XMLREADER_HPP)
0023 #define XERCESC_INCLUDE_GUARD_XMLREADER_HPP
0024
0025 #include <xercesc/util/XMLChar.hpp>
0026 #include <xercesc/framework/XMLRecognizer.hpp>
0027 #include <xercesc/framework/XMLBuffer.hpp>
0028 #include <xercesc/util/TranscodingException.hpp>
0029
0030 XERCES_CPP_NAMESPACE_BEGIN
0031
0032 class InputSource;
0033 class BinInputStream;
0034 class ReaderMgr;
0035 class XMLScanner;
0036 class XMLTranscoder;
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053 class XMLPARSER_EXPORT XMLReader : public XMemory
0054 {
0055 public:
0056
0057
0058
0059 enum Types
0060 {
0061 Type_PE
0062 , Type_General
0063 };
0064
0065 enum Sources
0066 {
0067 Source_Internal
0068 , Source_External
0069 };
0070
0071 enum RefFrom
0072 {
0073 RefFrom_Literal
0074 , RefFrom_NonLiteral
0075 };
0076
0077 enum XMLVersion
0078 {
0079 XMLV1_0
0080 , XMLV1_1
0081 , XMLV_Unknown
0082 };
0083
0084
0085
0086
0087
0088 bool isAllSpaces
0089 (
0090 const XMLCh* const toCheck
0091 , const XMLSize_t count
0092 ) const;
0093
0094 bool containsWhiteSpace
0095 (
0096 const XMLCh* const toCheck
0097 , const XMLSize_t count
0098 ) const;
0099
0100
0101 bool isXMLLetter(const XMLCh toCheck) const;
0102 bool isFirstNameChar(const XMLCh toCheck) const;
0103 bool isNameChar(const XMLCh toCheck) const;
0104 bool isPlainContentChar(const XMLCh toCheck) const;
0105 bool isSpecialStartTagChar(const XMLCh toCheck) const;
0106 bool isXMLChar(const XMLCh toCheck) const;
0107 bool isWhitespace(const XMLCh toCheck) const;
0108 bool isControlChar(const XMLCh toCheck) const;
0109 bool isPublicIdChar(const XMLCh toCheck) const;
0110 bool isFirstNCNameChar(const XMLCh toCheck) const;
0111 bool isNCNameChar(const XMLCh toCheck) const;
0112
0113
0114
0115
0116 XMLReader
0117 (
0118 const XMLCh* const pubId
0119 , const XMLCh* const sysId
0120 , BinInputStream* const streamToAdopt
0121 , const RefFrom from
0122 , const Types type
0123 , const Sources source
0124 , const bool throwAtEnd = false
0125 , const bool calculateSrcOfs = true
0126 , XMLSize_t lowWaterMark = 100
0127 , const XMLVersion xmlVersion = XMLV1_0
0128 , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager
0129 );
0130
0131 XMLReader
0132 (
0133 const XMLCh* const pubId
0134 , const XMLCh* const sysId
0135 , BinInputStream* const streamToAdopt
0136 , const XMLCh* const encodingStr
0137 , const RefFrom from
0138 , const Types type
0139 , const Sources source
0140 , const bool throwAtEnd = false
0141 , const bool calculateSrcOfs = true
0142 , XMLSize_t lowWaterMark = 100
0143 , const XMLVersion xmlVersion = XMLV1_0
0144 , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager
0145 );
0146
0147 XMLReader
0148 (
0149 const XMLCh* const pubId
0150 , const XMLCh* const sysId
0151 , BinInputStream* const streamToAdopt
0152 , XMLRecognizer::Encodings encodingEnum
0153 , const RefFrom from
0154 , const Types type
0155 , const Sources source
0156 , const bool throwAtEnd = false
0157 , const bool calculateSrcOfs = true
0158 , XMLSize_t lowWaterMark = 100
0159 , const XMLVersion xmlVersion = XMLV1_0
0160 , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager
0161 );
0162
0163 ~XMLReader();
0164
0165
0166
0167
0168
0169 XMLSize_t charsLeftInBuffer() const;
0170 bool refreshCharBuffer();
0171
0172
0173
0174
0175
0176 bool getName(XMLBuffer& toFill, const bool token);
0177 bool getQName(XMLBuffer& toFill, int* colonPosition);
0178 bool getNCName(XMLBuffer& toFill);
0179 bool getNextChar(XMLCh& chGotten);
0180 bool getNextCharIfNot(const XMLCh chNotToGet, XMLCh& chGotten);
0181 void movePlainContentChars(XMLBuffer &dest);
0182 bool getSpaces(XMLBuffer& toFill);
0183 bool getUpToCharOrWS(XMLBuffer& toFill, const XMLCh toCheck);
0184 bool peekNextChar(XMLCh& chGotten);
0185 bool skipIfQuote(XMLCh& chGotten);
0186 bool skipSpaces(bool& skippedSomething, bool inDecl = false);
0187 bool skippedChar(const XMLCh toSkip);
0188 bool skippedSpace();
0189 bool skippedString(const XMLCh* const toSkip);
0190 bool skippedStringLong(const XMLCh* toSkip);
0191 bool peekString(const XMLCh* const toPeek);
0192
0193
0194
0195
0196
0197 XMLFileLoc getColumnNumber() const;
0198 const XMLCh* getEncodingStr() const;
0199 XMLFileLoc getLineNumber() const;
0200 bool getNoMoreFlag() const;
0201 const XMLCh* getPublicId() const;
0202 XMLSize_t getReaderNum() const;
0203 RefFrom getRefFrom() const;
0204 Sources getSource() const;
0205 XMLFilePos getSrcOffset() const;
0206 const XMLCh* getSystemId() const;
0207 bool getThrowAtEnd() const;
0208 Types getType() const;
0209
0210
0211
0212
0213
0214 bool setEncoding
0215 (
0216 const XMLCh* const newEncoding
0217 );
0218 void setReaderNum(const XMLSize_t newNum);
0219 void setThrowAtEnd(const bool newValue);
0220 void setXMLVersion(const XMLVersion version);
0221
0222
0223 private:
0224
0225
0226
0227 XMLReader(const XMLReader&);
0228 XMLReader& operator=(const XMLReader&);
0229
0230
0231
0232
0233
0234
0235
0236
0237
0238
0239
0240
0241
0242
0243
0244
0245
0246 enum Constants
0247 {
0248 kCharBufSize = 16 * 1024
0249 , kRawBufSize = 48 * 1024
0250 };
0251
0252
0253
0254
0255
0256 void checkForSwapped();
0257
0258 void doInitCharSizeChecks();
0259
0260 void doInitDecode();
0261
0262 XMLByte getNextRawByte
0263 (
0264 const bool eoiOk
0265 );
0266
0267 void refreshRawBuffer();
0268
0269 void setTranscoder
0270 (
0271 const XMLCh* const newEncoding
0272 );
0273
0274 XMLSize_t xcodeMoreChars
0275 (
0276 XMLCh* const bufToFill
0277 , unsigned char* const charSizes
0278 , const XMLSize_t maxChars
0279 );
0280
0281 void handleEOL
0282 (
0283 XMLCh& curCh
0284 , bool inDecl = false
0285 );
0286
0287
0288
0289
0290
0291
0292
0293
0294
0295
0296
0297
0298
0299
0300
0301
0302
0303
0304
0305
0306
0307
0308
0309
0310
0311
0312
0313
0314
0315
0316
0317
0318
0319
0320
0321
0322
0323
0324
0325
0326
0327
0328
0329
0330
0331
0332
0333
0334
0335
0336
0337
0338
0339
0340
0341
0342
0343
0344
0345
0346
0347
0348
0349
0350
0351
0352
0353
0354
0355
0356
0357
0358
0359
0360
0361
0362
0363
0364
0365
0366
0367
0368
0369
0370
0371
0372
0373
0374
0375
0376
0377
0378
0379
0380
0381
0382
0383
0384
0385
0386
0387
0388
0389
0390
0391
0392
0393
0394
0395
0396
0397
0398
0399
0400
0401
0402
0403
0404
0405
0406
0407
0408
0409
0410
0411
0412
0413
0414
0415
0416
0417
0418
0419
0420
0421
0422
0423
0424
0425
0426
0427
0428
0429
0430
0431
0432
0433
0434
0435
0436
0437 XMLSize_t fCharIndex;
0438 XMLCh fCharBuf[kCharBufSize];
0439 XMLSize_t fCharsAvail;
0440 unsigned char fCharSizeBuf[kCharBufSize];
0441 unsigned int fCharOfsBuf[kCharBufSize];
0442 XMLFileLoc fCurCol;
0443 XMLFileLoc fCurLine;
0444 XMLRecognizer::Encodings fEncoding;
0445 XMLCh* fEncodingStr;
0446 bool fForcedEncoding;
0447 bool fNoMore;
0448 XMLCh* fPublicId;
0449 XMLSize_t fRawBufIndex;
0450 XMLByte fRawByteBuf[kRawBufSize];
0451 XMLSize_t fRawBytesAvail;
0452 XMLSize_t fLowWaterMark;
0453 XMLSize_t fReaderNum;
0454 RefFrom fRefFrom;
0455 bool fSentTrailingSpace;
0456 Sources fSource;
0457 XMLFilePos fSrcOfsBase;
0458 bool fSrcOfsSupported;
0459 bool fCalculateSrcOfs;
0460 XMLCh* fSystemId;
0461 BinInputStream* fStream;
0462 bool fSwapped;
0463 bool fThrowAtEnd;
0464 XMLTranscoder* fTranscoder;
0465 Types fType;
0466 XMLByte* fgCharCharsTable;
0467 bool fNEL;
0468 XMLVersion fXMLVersion;
0469 MemoryManager* fMemoryManager;
0470 };
0471
0472
0473
0474
0475
0476 inline bool XMLReader::isNameChar(const XMLCh toCheck) const
0477 {
0478 return ((fgCharCharsTable[toCheck] & gNameCharMask) != 0);
0479 }
0480
0481 inline bool XMLReader::isNCNameChar(const XMLCh toCheck) const
0482 {
0483 return ((fgCharCharsTable[toCheck] & gNCNameCharMask) != 0);
0484 }
0485
0486 inline bool XMLReader::isPlainContentChar(const XMLCh toCheck) const
0487 {
0488 return ((fgCharCharsTable[toCheck] & gPlainContentCharMask) != 0);
0489 }
0490
0491
0492 inline bool XMLReader::isFirstNameChar(const XMLCh toCheck) const
0493 {
0494 return ((fgCharCharsTable[toCheck] & gFirstNameCharMask) != 0);
0495 }
0496
0497 inline bool XMLReader::isFirstNCNameChar(const XMLCh toCheck) const
0498 {
0499 return (((fgCharCharsTable[toCheck] & gFirstNameCharMask) != 0)
0500 && (toCheck != chColon));
0501 }
0502
0503 inline bool XMLReader::isSpecialStartTagChar(const XMLCh toCheck) const
0504 {
0505 return ((fgCharCharsTable[toCheck] & gSpecialStartTagCharMask) != 0);
0506 }
0507
0508 inline bool XMLReader::isXMLChar(const XMLCh toCheck) const
0509 {
0510 return ((fgCharCharsTable[toCheck] & gXMLCharMask) != 0);
0511 }
0512
0513 inline bool XMLReader::isXMLLetter(const XMLCh toCheck) const
0514 {
0515 return (((fgCharCharsTable[toCheck] & gFirstNameCharMask) != 0)
0516 && (toCheck != chColon) && (toCheck != chUnderscore));
0517 }
0518
0519 inline bool XMLReader::isWhitespace(const XMLCh toCheck) const
0520 {
0521 return ((fgCharCharsTable[toCheck] & gWhitespaceCharMask) != 0);
0522 }
0523
0524 inline bool XMLReader::isControlChar(const XMLCh toCheck) const
0525 {
0526 return ((fgCharCharsTable[toCheck] & gControlCharMask) != 0);
0527 }
0528
0529
0530
0531
0532 inline XMLSize_t XMLReader::charsLeftInBuffer() const
0533 {
0534 return fCharsAvail - fCharIndex;
0535 }
0536
0537
0538
0539
0540
0541 inline XMLFileLoc XMLReader::getColumnNumber() const
0542 {
0543 return fCurCol;
0544 }
0545
0546 inline const XMLCh* XMLReader::getEncodingStr() const
0547 {
0548 return fEncodingStr;
0549 }
0550
0551 inline XMLFileLoc XMLReader::getLineNumber() const
0552 {
0553 return fCurLine;
0554 }
0555
0556 inline bool XMLReader::getNoMoreFlag() const
0557 {
0558 return fNoMore;
0559 }
0560
0561 inline const XMLCh* XMLReader::getPublicId() const
0562 {
0563 return fPublicId;
0564 }
0565
0566 inline XMLSize_t XMLReader::getReaderNum() const
0567 {
0568 return fReaderNum;
0569 }
0570
0571 inline XMLReader::RefFrom XMLReader::getRefFrom() const
0572 {
0573 return fRefFrom;
0574 }
0575
0576 inline XMLReader::Sources XMLReader::getSource() const
0577 {
0578 return fSource;
0579 }
0580
0581 inline const XMLCh* XMLReader::getSystemId() const
0582 {
0583 return fSystemId;
0584 }
0585
0586 inline bool XMLReader::getThrowAtEnd() const
0587 {
0588 return fThrowAtEnd;
0589 }
0590
0591 inline XMLReader::Types XMLReader::getType() const
0592 {
0593 return fType;
0594 }
0595
0596
0597
0598
0599 inline void XMLReader::setReaderNum(const XMLSize_t newNum)
0600 {
0601 fReaderNum = newNum;
0602 }
0603
0604 inline void XMLReader::setThrowAtEnd(const bool newValue)
0605 {
0606 fThrowAtEnd = newValue;
0607 }
0608
0609 inline void XMLReader::setXMLVersion(const XMLVersion version)
0610 {
0611 fXMLVersion = version;
0612 if (version == XMLV1_1) {
0613 fNEL = true;
0614 fgCharCharsTable = XMLChar1_1::fgCharCharsTable1_1;
0615 }
0616 else {
0617 fNEL = XMLChar1_0::enableNEL;
0618 fgCharCharsTable = XMLChar1_0::fgCharCharsTable1_0;
0619 }
0620
0621 }
0622
0623
0624
0625
0626
0627
0628
0629
0630
0631
0632
0633
0634
0635 inline void XMLReader::movePlainContentChars(XMLBuffer &dest)
0636 {
0637 const XMLSize_t chunkSize = fCharsAvail - fCharIndex;
0638 const XMLCh* cursor = &fCharBuf[fCharIndex];
0639 XMLSize_t count=0;
0640 for(;count<chunkSize && (fgCharCharsTable[*cursor++] & gPlainContentCharMask) != 0;++count) ;
0641
0642 if (count!=0)
0643 {
0644 dest.append(&fCharBuf[fCharIndex], count);
0645 fCharIndex += count;
0646 fCurCol += (XMLFileLoc)count;
0647 }
0648 }
0649
0650
0651
0652
0653
0654 inline bool XMLReader::getNextCharIfNot(const XMLCh chNotToGet, XMLCh& chGotten)
0655 {
0656
0657
0658
0659
0660 if (fCharIndex >= fCharsAvail)
0661 {
0662
0663 if (fNoMore)
0664 return false;
0665
0666
0667 if (!refreshCharBuffer())
0668 return false;
0669 }
0670
0671
0672 if (fCharBuf[fCharIndex] == chNotToGet)
0673 return false;
0674
0675
0676 chGotten = fCharBuf[fCharIndex++];
0677
0678
0679
0680
0681
0682
0683
0684
0685
0686
0687
0688
0689
0690
0691
0692
0693
0694 if ( chGotten & (XMLCh) ~(chCR|chLF|chNEL|chLineSeparator) )
0695 {
0696 fCurCol++;
0697 } else
0698 {
0699 handleEOL(chGotten, false);
0700 }
0701
0702 return true;
0703 }
0704
0705
0706
0707
0708 inline bool XMLReader::getNextChar(XMLCh& chGotten)
0709 {
0710
0711
0712
0713
0714 if (fCharIndex >= fCharsAvail)
0715 {
0716
0717 if (fNoMore)
0718 return false;
0719
0720
0721 if (!refreshCharBuffer())
0722 return false;
0723 }
0724
0725 chGotten = fCharBuf[fCharIndex++];
0726
0727
0728
0729
0730
0731
0732
0733
0734
0735
0736
0737
0738
0739
0740
0741
0742
0743 if ( chGotten & (XMLCh) ~(chCR|chLF|chNEL|chLineSeparator) )
0744 {
0745 fCurCol++;
0746 } else
0747 {
0748 handleEOL(chGotten, false);
0749 }
0750
0751 return true;
0752 }
0753
0754
0755
0756
0757
0758 inline bool XMLReader::peekNextChar(XMLCh& chGotten)
0759 {
0760
0761
0762
0763
0764 if (fCharIndex >= fCharsAvail)
0765 {
0766
0767 if (!refreshCharBuffer())
0768 {
0769 chGotten = chNull;
0770 return false;
0771 }
0772 }
0773
0774 chGotten = fCharBuf[fCharIndex];
0775
0776
0777
0778
0779
0780
0781 if ((chGotten == chCR || (fNEL && (chGotten == chNEL || chGotten == chLineSeparator)))
0782 && (fSource == Source_External))
0783 chGotten = chLF;
0784
0785 return true;
0786 }
0787
0788 XERCES_CPP_NAMESPACE_END
0789
0790 #endif