Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-12-16 10:34:13

0001 /*
0002  * Licensed to the Apache Software Foundation (ASF) under one or more
0003  * contributor license agreements.  See the NOTICE file distributed with
0004  * this work for additional information regarding copyright ownership.
0005  * The ASF licenses this file to You under the Apache License, Version 2.0
0006  * (the "License"); you may not use this file except in compliance with
0007  * the License.  You may obtain a copy of the License at
0008  *
0009  *      http://www.apache.org/licenses/LICENSE-2.0
0010  *
0011  * Unless required by applicable law or agreed to in writing, software
0012  * distributed under the License is distributed on an "AS IS" BASIS,
0013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014  * See the License for the specific language governing permissions and
0015  * limitations under the License.
0016  */
0017 
0018 /*
0019  * $Id$
0020  */
0021 
0022 #if !defined(XERCESC_INCLUDE_GUARD_XMLREADER_HPP)
0023 #define XERCESC_INCLUDE_GUARD_XMLREADER_HPP
0024 
0025 #include <xercesc/util/XMLChar.hpp>
0026 #include <xercesc/framework/XMLRecognizer.hpp>
0027 #include <xercesc/framework/XMLBuffer.hpp>
0028 #include <xercesc/util/TranscodingException.hpp>
0029 
0030 XERCES_CPP_NAMESPACE_BEGIN
0031 
0032 class InputSource;
0033 class BinInputStream;
0034 class ReaderMgr;
0035 class XMLScanner;
0036 class XMLTranscoder;
0037 
0038 
0039 // ---------------------------------------------------------------------------
0040 //  Instances of this class are used to manage the content of entities. The
0041 //  scanner maintains a stack of these, one for each entity (this means entity
0042 //  in the sense of any parsed file or internal entity) currently being
0043 //  scanned. This class, given a binary input stream will handle reading in
0044 //  the data and decoding it from its external decoding into the internal
0045 //  Unicode format. Once internallized, this class provides the access
0046 //  methods to read in the data in various ways, maintains line and column
0047 //  information, and provides high performance character attribute checking
0048 //  methods.
0049 //
0050 //  This is NOT to be derived from.
0051 //
0052 // ---------------------------------------------------------------------------
0053 class XMLPARSER_EXPORT XMLReader : public XMemory
0054 {
0055 public:
0056     // -----------------------------------------------------------------------
0057     //  Public types
0058     // -----------------------------------------------------------------------
0059     enum Types
0060     {
0061         Type_PE
0062         , Type_General
0063     };
0064 
0065     enum Sources
0066     {
0067         Source_Internal
0068         , Source_External
0069     };
0070 
0071     enum RefFrom
0072     {
0073         RefFrom_Literal
0074         , RefFrom_NonLiteral
0075     };
0076 
0077     enum XMLVersion
0078     {
0079         XMLV1_0
0080         , XMLV1_1
0081         , XMLV_Unknown
0082     };
0083 
0084 
0085     // -----------------------------------------------------------------------
0086     //  Public, query methods
0087     // -----------------------------------------------------------------------
0088     bool isAllSpaces
0089     (
0090         const   XMLCh* const    toCheck
0091         , const XMLSize_t       count
0092     ) const;
0093 
0094     bool containsWhiteSpace
0095     (
0096         const   XMLCh* const    toCheck
0097         , const XMLSize_t       count
0098     ) const;
0099 
0100 
0101     bool isXMLLetter(const XMLCh toCheck) const;
0102     bool isFirstNameChar(const XMLCh toCheck) const;
0103     bool isNameChar(const XMLCh toCheck) const;
0104     bool isPlainContentChar(const XMLCh toCheck) const;
0105     bool isSpecialStartTagChar(const XMLCh toCheck) const;
0106     bool isXMLChar(const XMLCh toCheck) const;
0107     bool isWhitespace(const XMLCh toCheck) const;
0108     bool isControlChar(const XMLCh toCheck) const;
0109     bool isPublicIdChar(const XMLCh toCheck) const;
0110     bool isFirstNCNameChar(const XMLCh toCheck) const;
0111     bool isNCNameChar(const XMLCh toCheck) const;
0112 
0113     // -----------------------------------------------------------------------
0114     //  Constructors and Destructor
0115     // -----------------------------------------------------------------------
0116     XMLReader
0117     (
0118         const   XMLCh* const          pubId
0119         , const XMLCh* const          sysId
0120         ,       BinInputStream* const streamToAdopt
0121         , const RefFrom               from
0122         , const Types                 type
0123         , const Sources               source
0124         , const bool                  throwAtEnd = false
0125         , const bool                  calculateSrcOfs = true
0126         ,       XMLSize_t             lowWaterMark = 100
0127         , const XMLVersion            xmlVersion = XMLV1_0
0128         ,       MemoryManager* const  manager = XMLPlatformUtils::fgMemoryManager
0129     );
0130 
0131     XMLReader
0132     (
0133         const   XMLCh* const          pubId
0134         , const XMLCh* const          sysId
0135         ,       BinInputStream* const streamToAdopt
0136         , const XMLCh* const          encodingStr
0137         , const RefFrom               from
0138         , const Types                 type
0139         , const Sources               source
0140         , const bool                  throwAtEnd = false
0141         , const bool                  calculateSrcOfs = true
0142         ,       XMLSize_t             lowWaterMark = 100
0143         , const XMLVersion            xmlVersion = XMLV1_0
0144         ,       MemoryManager* const  manager = XMLPlatformUtils::fgMemoryManager
0145     );
0146 
0147     XMLReader
0148     (
0149         const   XMLCh* const          pubId
0150         , const XMLCh* const          sysId
0151         ,       BinInputStream* const streamToAdopt
0152         , XMLRecognizer::Encodings    encodingEnum
0153         , const RefFrom               from
0154         , const Types                 type
0155         , const Sources               source
0156         , const bool                  throwAtEnd = false
0157         , const bool                  calculateSrcOfs = true
0158         ,       XMLSize_t             lowWaterMark = 100
0159         , const XMLVersion            xmlVersion = XMLV1_0
0160         ,       MemoryManager* const  manager = XMLPlatformUtils::fgMemoryManager
0161     );
0162 
0163     ~XMLReader();
0164 
0165 
0166     // -----------------------------------------------------------------------
0167     //  Character buffer management methods
0168     // -----------------------------------------------------------------------
0169     XMLSize_t charsLeftInBuffer() const;
0170     bool refreshCharBuffer();
0171 
0172 
0173     // -----------------------------------------------------------------------
0174     //  Scanning methods
0175     // -----------------------------------------------------------------------
0176     bool getName(XMLBuffer& toFill, const bool token);
0177     bool getQName(XMLBuffer& toFill, int* colonPosition);
0178     bool getNCName(XMLBuffer& toFill);
0179     bool getNextChar(XMLCh& chGotten);
0180     bool getNextCharIfNot(const XMLCh chNotToGet, XMLCh& chGotten);
0181     void movePlainContentChars(XMLBuffer &dest);
0182     bool getSpaces(XMLBuffer& toFill);
0183     bool getUpToCharOrWS(XMLBuffer& toFill, const XMLCh toCheck);
0184     bool peekNextChar(XMLCh& chGotten);
0185     bool skipIfQuote(XMLCh& chGotten);
0186     bool skipSpaces(bool& skippedSomething, bool inDecl = false);
0187     bool skippedChar(const XMLCh toSkip);
0188     bool skippedSpace();
0189     bool skippedString(const XMLCh* const toSkip);
0190     bool skippedStringLong(const XMLCh* toSkip);
0191     bool peekString(const XMLCh* const toPeek);
0192 
0193 
0194     // -----------------------------------------------------------------------
0195     //  Getter methods
0196     // -----------------------------------------------------------------------
0197     XMLFileLoc getColumnNumber() const;
0198     const XMLCh* getEncodingStr() const;
0199     XMLFileLoc getLineNumber() const;
0200     bool getNoMoreFlag() const;
0201     const XMLCh* getPublicId() const;
0202     XMLSize_t getReaderNum() const;
0203     RefFrom getRefFrom() const;
0204     Sources getSource() const;
0205     XMLFilePos getSrcOffset() const;
0206     const XMLCh* getSystemId() const;
0207     bool getThrowAtEnd() const;
0208     Types getType() const;
0209 
0210 
0211     // -----------------------------------------------------------------------
0212     //  Setter methods
0213     // -----------------------------------------------------------------------
0214     bool setEncoding
0215     (
0216         const   XMLCh* const    newEncoding
0217     );
0218     void setReaderNum(const XMLSize_t newNum);
0219     void setThrowAtEnd(const bool newValue);
0220     void setXMLVersion(const XMLVersion version);
0221 
0222 
0223 private:
0224     // -----------------------------------------------------------------------
0225     //  Unimplemented constructors and operators
0226     // -----------------------------------------------------------------------
0227     XMLReader(const XMLReader&);
0228     XMLReader& operator=(const XMLReader&);
0229 
0230     // ---------------------------------------------------------------------------
0231     //  Class Constants
0232     //
0233     //  kCharBufSize
0234     //      The size of the character spool buffer that we use. Its not terribly
0235     //      large because its just getting filled with data from a raw byte
0236     //      buffer as we go along. We don't want to decode all the text at
0237     //      once before we find out that there is an error.
0238     //
0239     //      NOTE: This is a size in characters, not bytes.
0240     //
0241     //  kRawBufSize
0242     //      The size of the raw buffer from which raw bytes are spooled out
0243     //      as we transcode chunks of data. As it is emptied, it is filled back
0244     //      in again from the source stream.
0245     // ---------------------------------------------------------------------------
0246     enum Constants
0247     {
0248         kCharBufSize        = 16 * 1024
0249         , kRawBufSize       = 48 * 1024
0250     };
0251 
0252 
0253     // -----------------------------------------------------------------------
0254     //  Private helper methods
0255     // -----------------------------------------------------------------------
0256     void checkForSwapped();
0257 
0258     void doInitCharSizeChecks();
0259 
0260     void doInitDecode();
0261 
0262     XMLByte getNextRawByte
0263     (
0264         const   bool            eoiOk
0265     );
0266 
0267     void refreshRawBuffer();
0268 
0269     void setTranscoder
0270     (
0271         const   XMLCh* const    newEncoding
0272     );
0273 
0274     XMLSize_t xcodeMoreChars
0275     (
0276                 XMLCh* const            bufToFill
0277         ,       unsigned char* const    charSizes
0278         , const XMLSize_t               maxChars
0279     );
0280 
0281     void handleEOL
0282     (
0283               XMLCh&   curCh
0284             , bool     inDecl = false
0285     );
0286 
0287     // -----------------------------------------------------------------------
0288     //  Data members
0289     //
0290     //  fCharIndex
0291     //      The index into the character buffer. When this hits fCharsAvail
0292     //      then its time to refill.
0293     //
0294     //  fCharBuf
0295     //      A buffer that the reader manager fills up with transcoded
0296     //      characters a small amount at a time.
0297     //
0298     //  fCharsAvail
0299     //      The characters currently available in the character buffer.
0300     //
0301     //  fCharSizeBuf
0302     //      This buffer is an array that contains the number of source chars
0303     //      eaten to create each char in the fCharBuf buffer. So the entry
0304     //      fCharSizeBuf[x] is the number of source chars that were eaten
0305     //      to make the internalized char fCharBuf[x]. This only contains
0306     //      useful data if fSrcOfsSupported is true.
0307     //
0308     //  fCharOfsBuf
0309     //      This buffer is an array that contains the offset in the
0310     //      fRawByteBuf buffer of each char in the fCharBuf buffer. It
0311     //      only contains useful data if fSrcOfsSupported is true.
0312     //
0313     //  fCurCol
0314     //  fCurLine
0315     //      The current line and column that we are in within this reader's
0316     //      text.
0317     //
0318     //  fEncoding
0319     //      This is the rough encoding setting. This enum is set during
0320     //      construction and just tells us the rough family of encoding that
0321     //      we are doing.
0322     //
0323     //  fEncodingStr
0324     //      This is the name of the encoding we are using. It will be
0325     //      provisionally set during construction, from the auto-sensed
0326     //      encoding. But it might be overridden when the XMLDecl is finally
0327     //      seen by the scanner. It can also be forced to a particular
0328     //      encoding, in which case fForcedEncoding is set.
0329     //
0330     //  fForcedEncoding
0331     //      If the encoding if forced then this is set and all other
0332     //      information will be ignored. This encoding will be taken as
0333     //      gospel. This is done by calling an alternate constructor.
0334     //
0335     //  fNoMore
0336     //      This is set when the source text is exhausted. It lets us know
0337     //      quickly that no more text is available.
0338     //
0339     //  fRawBufIndex
0340     //      The current index into the raw byte buffer. When its equal to
0341     //      fRawBytesAvail then we need to read another buffer.
0342     //
0343     //  fRawByteBuf
0344     //      This is the raw byte buffer that is used to spool out bytes
0345     //      from into the fCharBuf buffer, as we transcode in blocks.
0346     //
0347     //  fRawBytesAvail
0348     //      The number of bytes currently available in the raw buffer. This
0349     //      helps deal with the last buffer's worth, which will usually not
0350     //      be a full one.
0351     //
0352     //  fLowWaterMark
0353     //      The low water mark for the raw byte buffer.
0354     //
0355     //
0356     //  fReaderNum
0357     //      Each reader from a particular reader manager (which means from a
0358     //      particular document) is given a unique number. The reader manager
0359     //      sets these numbers. They are used to catch things like partial
0360     //      markup errors.
0361     //
0362     //  fRefFrom
0363     //      This flag is provided in the ctor, and tells us if we represent
0364     //      some entity being expanded inside a literal. Sometimes things
0365     //      happen differently inside and outside literals.
0366     //
0367     //  fPublicId
0368     //  fSystemId
0369     //      These are the system and public ids of the source that this
0370     //      reader is reading.
0371     //
0372     //  fSentTrailingSpace
0373     //      If we are a PE entity being read and we not referenced from a
0374     //      literal, then a leading and trailing space must be faked into the
0375     //      data. This lets us know we've done the trailing space already (so
0376     //      we don't just keep doing it again and again.)
0377     //
0378     //  fSource
0379     //      Indicates whether the content this reader is spooling as already
0380     //      been internalized. This will prevent multiple processing of
0381     //      whitespace when an already internalized entity is being spooled
0382     //      out.
0383     //
0384     //  fSpareChar
0385     //      Some encodings can create two chars in an atomic way, e.g.
0386     //      surrogate pairs. We might not be able to store both, so we store
0387     //      it here until the next buffer transcoding operation.
0388     //
0389     //  fSrcOfsBase
0390     //      This is the base offset within the source of this entity. Values
0391     //      in the curent fCharSizeBuf array are relative to this value.
0392     //
0393     //  fSrcOfsSupported
0394     //      This flag is set to indicate whether source byte offset info
0395     //      is supported. For intrinsic encodings, its always set since we
0396     //      can always support it. For transcoder based encodings, we ask
0397     //      the transcoder if it supports it or not.
0398     //
0399     //  fStream
0400     //      This is the input stream that provides the data for the reader.
0401     //      Its always treated as a raw byte stream. The derived class will
0402     //      ask for buffers of text from it and will handle making some
0403     //      sense of it.
0404     //
0405     //  fSwapped
0406     //      If the encoding is one of the ones we do intrinsically, and its
0407     //      in a different byte order from our native order, then this is
0408     //      set to remind us to byte swap it during transcoding.
0409     //
0410     //  fThrowAtEnd
0411     //      Indicates whether the reader manager should throw an end of entity
0412     //      exception at the end of this reader instance. This is usually
0413     //      set for top level external entity references. It overrides the
0414     //      reader manager's global flag that controls throwing at the end
0415     //      of entities. Defaults to false.
0416     //
0417     //  fTranscoder
0418     //      If the encoding is not one that we handle intrinsically, then
0419     //      we use an an external transcoder to do it. This class is an
0420     //      abstraction that allows us to use pluggable external transcoding
0421     //      services (via XMLTransService in util.)
0422     //
0423     //  fType
0424     //      Indicates whether this reader represents a PE or not. If this
0425     //      flag is true and the fInLiteral flag is false, then we will put
0426     //      out an extra space at the end.
0427     //
0428     //  fgCharCharsTable;
0429     //      Pointer to XMLChar table, depends on XML version
0430     //
0431     //  fNEL
0432     //      Boolean indicates if NEL and LSEP should be recognized as NEL
0433     //
0434     //  fXMLVersion
0435     //      Enum to indicate if this Reader is conforming to XML 1.0 or XML 1.1
0436     // -----------------------------------------------------------------------
0437     XMLSize_t                   fCharIndex;
0438     XMLCh                       fCharBuf[kCharBufSize];
0439     XMLSize_t                   fCharsAvail;
0440     unsigned char               fCharSizeBuf[kCharBufSize];
0441     unsigned int                fCharOfsBuf[kCharBufSize];
0442     XMLFileLoc                  fCurCol;
0443     XMLFileLoc                  fCurLine;
0444     XMLRecognizer::Encodings    fEncoding;
0445     XMLCh*                      fEncodingStr;
0446     bool                        fForcedEncoding;
0447     bool                        fNoMore;
0448     XMLCh*                      fPublicId;
0449     XMLSize_t                   fRawBufIndex;
0450     XMLByte                     fRawByteBuf[kRawBufSize];
0451     XMLSize_t                   fRawBytesAvail;
0452     XMLSize_t                   fLowWaterMark;
0453     XMLSize_t                   fReaderNum;
0454     RefFrom                     fRefFrom;
0455     bool                        fSentTrailingSpace;
0456     Sources                     fSource;
0457     XMLFilePos                  fSrcOfsBase;
0458     bool                        fSrcOfsSupported;
0459     bool                        fCalculateSrcOfs;
0460     XMLCh*                      fSystemId;
0461     BinInputStream*             fStream;
0462     bool                        fSwapped;
0463     bool                        fThrowAtEnd;
0464     XMLTranscoder*              fTranscoder;
0465     Types                       fType;
0466     XMLByte*                    fgCharCharsTable;
0467     bool                        fNEL;
0468     XMLVersion                  fXMLVersion;
0469     MemoryManager*              fMemoryManager;
0470 };
0471 
0472 
0473 // ---------------------------------------------------------------------------
0474 //  XMLReader: Public, query methods
0475 // ---------------------------------------------------------------------------
0476 inline bool XMLReader::isNameChar(const XMLCh toCheck) const
0477 {
0478     return ((fgCharCharsTable[toCheck] & gNameCharMask) != 0);
0479 }
0480 
0481 inline bool XMLReader::isNCNameChar(const XMLCh toCheck) const
0482 {
0483     return ((fgCharCharsTable[toCheck] & gNCNameCharMask) != 0);
0484 }
0485 
0486 inline bool XMLReader::isPlainContentChar(const XMLCh toCheck) const
0487 {
0488     return ((fgCharCharsTable[toCheck] & gPlainContentCharMask) != 0);
0489 }
0490 
0491 
0492 inline bool XMLReader::isFirstNameChar(const XMLCh toCheck) const
0493 {
0494     return ((fgCharCharsTable[toCheck] & gFirstNameCharMask) != 0);
0495 }
0496 
0497 inline bool XMLReader::isFirstNCNameChar(const XMLCh toCheck) const
0498 {
0499     return (((fgCharCharsTable[toCheck] & gFirstNameCharMask) != 0)
0500             && (toCheck != chColon));
0501 }
0502 
0503 inline bool XMLReader::isSpecialStartTagChar(const XMLCh toCheck) const
0504 {
0505     return ((fgCharCharsTable[toCheck] & gSpecialStartTagCharMask) != 0);
0506 }
0507 
0508 inline bool XMLReader::isXMLChar(const XMLCh toCheck) const
0509 {
0510     return ((fgCharCharsTable[toCheck] & gXMLCharMask) != 0);
0511 }
0512 
0513 inline bool XMLReader::isXMLLetter(const XMLCh toCheck) const
0514 {
0515     return (((fgCharCharsTable[toCheck] & gFirstNameCharMask) != 0)
0516             && (toCheck != chColon) && (toCheck != chUnderscore));
0517 }
0518 
0519 inline bool XMLReader::isWhitespace(const XMLCh toCheck) const
0520 {
0521     return ((fgCharCharsTable[toCheck] & gWhitespaceCharMask) != 0);
0522 }
0523 
0524 inline bool XMLReader::isControlChar(const XMLCh toCheck) const
0525 {
0526     return ((fgCharCharsTable[toCheck] & gControlCharMask) != 0);
0527 }
0528 
0529 // ---------------------------------------------------------------------------
0530 //  XMLReader: Buffer management methods
0531 // ---------------------------------------------------------------------------
0532 inline XMLSize_t XMLReader::charsLeftInBuffer() const
0533 {
0534     return fCharsAvail - fCharIndex;
0535 }
0536 
0537 
0538 // ---------------------------------------------------------------------------
0539 //  XMLReader: Getter methods
0540 // ---------------------------------------------------------------------------
0541 inline XMLFileLoc XMLReader::getColumnNumber() const
0542 {
0543     return fCurCol;
0544 }
0545 
0546 inline const XMLCh* XMLReader::getEncodingStr() const
0547 {
0548     return fEncodingStr;
0549 }
0550 
0551 inline XMLFileLoc XMLReader::getLineNumber() const
0552 {
0553     return fCurLine;
0554 }
0555 
0556 inline bool XMLReader::getNoMoreFlag() const
0557 {
0558     return fNoMore;
0559 }
0560 
0561 inline const XMLCh* XMLReader::getPublicId() const
0562 {
0563     return fPublicId;
0564 }
0565 
0566 inline XMLSize_t XMLReader::getReaderNum() const
0567 {
0568     return fReaderNum;
0569 }
0570 
0571 inline XMLReader::RefFrom XMLReader::getRefFrom() const
0572 {
0573     return fRefFrom;
0574 }
0575 
0576 inline XMLReader::Sources XMLReader::getSource() const
0577 {
0578     return fSource;
0579 }
0580 
0581 inline const XMLCh* XMLReader::getSystemId() const
0582 {
0583     return fSystemId;
0584 }
0585 
0586 inline bool XMLReader::getThrowAtEnd() const
0587 {
0588     return fThrowAtEnd;
0589 }
0590 
0591 inline XMLReader::Types XMLReader::getType() const
0592 {
0593     return fType;
0594 }
0595 
0596 // ---------------------------------------------------------------------------
0597 //  XMLReader: Setter methods
0598 // ---------------------------------------------------------------------------
0599 inline void XMLReader::setReaderNum(const XMLSize_t newNum)
0600 {
0601     fReaderNum = newNum;
0602 }
0603 
0604 inline void XMLReader::setThrowAtEnd(const bool newValue)
0605 {
0606     fThrowAtEnd = newValue;
0607 }
0608 
0609 inline void XMLReader::setXMLVersion(const XMLVersion version)
0610 {
0611     fXMLVersion = version;
0612     if (version == XMLV1_1) {
0613         fNEL = true;
0614         fgCharCharsTable = XMLChar1_1::fgCharCharsTable1_1;
0615     }
0616     else {
0617         fNEL = XMLChar1_0::enableNEL;
0618         fgCharCharsTable = XMLChar1_0::fgCharCharsTable1_0;
0619     }
0620 
0621 }
0622 
0623 
0624 
0625 // ---------------------------------------------------------------------------
0626 //
0627 //  XMLReader: movePlainContentChars()
0628 //
0629 //       Move as many plain (no special handling of any sort required) content
0630 //       characters as possible from this reader to the supplied destination buffer.
0631 //
0632 //       This is THE hottest performance spot in the parser.
0633 //
0634 // ---------------------------------------------------------------------------
0635 inline void XMLReader::movePlainContentChars(XMLBuffer &dest)
0636 {
0637     const XMLSize_t chunkSize = fCharsAvail - fCharIndex;
0638     const XMLCh* cursor = &fCharBuf[fCharIndex];
0639     XMLSize_t count=0;
0640     for(;count<chunkSize && (fgCharCharsTable[*cursor++] & gPlainContentCharMask) != 0;++count) /*noop*/ ;
0641 
0642     if (count!=0)
0643     {
0644         dest.append(&fCharBuf[fCharIndex], count);
0645         fCharIndex += count;
0646         fCurCol    += (XMLFileLoc)count;
0647     }
0648 }
0649 
0650 
0651 // ---------------------------------------------------------------------------
0652 //  XMLReader: getNextCharIfNot() method inlined for speed
0653 // ---------------------------------------------------------------------------
0654 inline bool XMLReader::getNextCharIfNot(const XMLCh chNotToGet, XMLCh& chGotten)
0655 {
0656     //
0657     //  See if there is at least a char in the buffer. Else, do the buffer
0658     //  reload logic.
0659     //
0660     if (fCharIndex >= fCharsAvail)
0661     {
0662         // If fNoMore is set, then we have nothing else to give
0663         if (fNoMore)
0664             return false;
0665 
0666         // Try to refresh
0667         if (!refreshCharBuffer())
0668             return false;
0669     }
0670 
0671     // Check the next char
0672     if (fCharBuf[fCharIndex] == chNotToGet)
0673         return false;
0674 
0675     // Its not the one we want to skip so bump the index
0676     chGotten = fCharBuf[fCharIndex++];
0677 
0678     // Handle end of line normalization and line/col member maintenance.
0679     //
0680     // we can have end-of-line combinations with a leading
0681     // chCR(xD), chLF(xA), chNEL(x85), or chLineSeparator(x2028)
0682     //
0683     // 0000000000001101 chCR
0684     // 0000000000001010 chLF
0685     // 0000000010000101 chNEL
0686     // 0010000000101000 chLineSeparator
0687     // -----------------------
0688     // 1101111101010000 == ~(chCR|chLF|chNEL|chLineSeparator)
0689     //
0690     // if the result of the logical-& operation is
0691     // true  : 'curCh' can not be chCR, chLF, chNEL or chLineSeparator
0692     // false : 'curCh' can be chCR, chLF, chNEL or chLineSeparator
0693     //
0694     if ( chGotten & (XMLCh) ~(chCR|chLF|chNEL|chLineSeparator) )
0695     {
0696         fCurCol++;
0697     } else
0698     {
0699         handleEOL(chGotten, false);
0700     }
0701 
0702     return true;
0703 }
0704 
0705 // ---------------------------------------------------------------------------
0706 //  XMLReader: getNextChar() method inlined for speed
0707 // ---------------------------------------------------------------------------
0708 inline bool XMLReader::getNextChar(XMLCh& chGotten)
0709 {
0710     //
0711     //  See if there is at least a char in the buffer. Else, do the buffer
0712     //  reload logic.
0713     //
0714     if (fCharIndex >= fCharsAvail)
0715     {
0716         // If fNoMore is set, then we have nothing else to give
0717         if (fNoMore)
0718             return false;
0719 
0720         // Try to refresh
0721         if (!refreshCharBuffer())
0722             return false;
0723     }
0724 
0725     chGotten = fCharBuf[fCharIndex++];
0726 
0727     // Handle end of line normalization and line/col member maintenance.
0728     //
0729     // we can have end-of-line combinations with a leading
0730     // chCR(xD), chLF(xA), chNEL(x85), or chLineSeparator(x2028)
0731     //
0732     // 0000000000001101 chCR
0733     // 0000000000001010 chLF
0734     // 0000000010000101 chNEL
0735     // 0010000000101000 chLineSeparator
0736     // -----------------------
0737     // 1101111101010000 == ~(chCR|chLF|chNEL|chLineSeparator)
0738     //
0739     // if the result of the logical-& operation is
0740     // true  : 'curCh' can not be chCR, chLF, chNEL or chLineSeparator
0741     // false : 'curCh' can be chCR, chLF, chNEL or chLineSeparator
0742     //
0743     if ( chGotten & (XMLCh) ~(chCR|chLF|chNEL|chLineSeparator) )
0744     {
0745         fCurCol++;
0746     } else
0747     {
0748         handleEOL(chGotten, false);
0749     }
0750 
0751     return true;
0752 }
0753 
0754 
0755 // ---------------------------------------------------------------------------
0756 //  XMLReader: peekNextChar() method inlined for speed
0757 // ---------------------------------------------------------------------------
0758 inline bool XMLReader::peekNextChar(XMLCh& chGotten)
0759 {
0760     //
0761     //  If there is something still in the buffer, get it. Else do the reload
0762     //  scenario.
0763     //
0764     if (fCharIndex >= fCharsAvail)
0765     {
0766         // Try to refresh the buffer
0767         if (!refreshCharBuffer())
0768         {
0769             chGotten = chNull;
0770             return false;
0771         }
0772     }
0773 
0774     chGotten = fCharBuf[fCharIndex];
0775 
0776     //
0777     //  Even though we are only peeking, we have to act the same as the
0778     //  normal char get method in regards to newline normalization, though
0779     //  its not as complicated as the actual character getting method's.
0780     //
0781     if ((chGotten == chCR || (fNEL && (chGotten == chNEL || chGotten == chLineSeparator)))
0782         && (fSource == Source_External))
0783         chGotten = chLF;
0784 
0785     return true;
0786 }
0787 
0788 XERCES_CPP_NAMESPACE_END
0789 
0790 #endif