Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-30 10:27:27

0001 /*
0002  * Licensed to the Apache Software Foundation (ASF) under one or more
0003  * contributor license agreements.  See the NOTICE file distributed with
0004  * this work for additional information regarding copyright ownership.
0005  * The ASF licenses this file to You under the Apache License, Version 2.0
0006  * (the "License"); you may not use this file except in compliance with
0007  * the License.  You may obtain a copy of the License at
0008  * 
0009  *      http://www.apache.org/licenses/LICENSE-2.0
0010  * 
0011  * Unless required by applicable law or agreed to in writing, software
0012  * distributed under the License is distributed on an "AS IS" BASIS,
0013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014  * See the License for the specific language governing permissions and
0015  * limitations under the License.
0016  */
0017 
0018 /*
0019  * $Id$
0020  */
0021 
0022 #if !defined(XERCESC_INCLUDE_GUARD_XMLCHAR_HPP)
0023 #define XERCESC_INCLUDE_GUARD_XMLCHAR_HPP
0024 
0025 #include <xercesc/util/XMLUniDefs.hpp>
0026 
0027 XERCES_CPP_NAMESPACE_BEGIN
0028 
0029 // ---------------------------------------------------------------------------
0030 //  This file defines Char and utility that conforms to XML 1.0 and XML 1.1
0031 // ---------------------------------------------------------------------------
0032 // Masks for the fgCharCharsTable1_0 array
0033 const XMLByte   gNCNameCharMask             = 0x1;
0034 const XMLByte   gFirstNameCharMask          = 0x2;
0035 const XMLByte   gNameCharMask               = 0x4;
0036 const XMLByte   gPlainContentCharMask       = 0x8;
0037 const XMLByte   gSpecialStartTagCharMask    = 0x10;
0038 const XMLByte   gControlCharMask            = 0x20;
0039 const XMLByte   gXMLCharMask                = 0x40;
0040 const XMLByte   gWhitespaceCharMask         = 0x80;
0041 
0042 // ---------------------------------------------------------------------------
0043 //  This class is for XML 1.0
0044 // ---------------------------------------------------------------------------
0045 class XMLUTIL_EXPORT XMLChar1_0
0046 {
0047 public:
0048     // -----------------------------------------------------------------------
0049     //  Public, static methods, check the string
0050     // -----------------------------------------------------------------------
0051     static bool isAllSpaces
0052     (
0053         const   XMLCh* const    toCheck
0054         , const XMLSize_t       count
0055     );
0056 
0057     static bool containsWhiteSpace
0058     (
0059         const   XMLCh* const    toCheck
0060         , const XMLSize_t       count
0061     );
0062 
0063     static bool isValidNmtoken
0064     (
0065         const   XMLCh*        const    toCheck
0066       , const   XMLSize_t              count
0067     );
0068 
0069     static bool isValidName
0070     (
0071         const   XMLCh* const    toCheck
0072         , const XMLSize_t       count
0073     );
0074 
0075     static bool isValidName
0076     (
0077         const   XMLCh* const    toCheck
0078     );
0079 
0080     static bool isValidNCName
0081     (
0082         const   XMLCh* const    toCheck
0083         , const XMLSize_t       count
0084     );
0085 
0086     static bool isValidQName
0087     (
0088         const   XMLCh* const    toCheck
0089         , const XMLSize_t       count
0090     );
0091 
0092     // -----------------------------------------------------------------------
0093     //  Public, static methods, check the XMLCh
0094     //  surrogate pair is assumed if second parameter is not null
0095     // -----------------------------------------------------------------------
0096     static bool isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2 = 0);
0097     static bool isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
0098     static bool isNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
0099     static bool isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
0100     static bool isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
0101     static bool isXMLChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
0102     static bool isWhitespace(const XMLCh toCheck);
0103     static bool isWhitespace(const XMLCh toCheck, const XMLCh toCheck2);
0104     static bool isControlChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
0105 
0106     static bool isPublicIdChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
0107     static bool isFirstNCNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
0108     static bool isNCNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
0109 
0110     // -----------------------------------------------------------------------
0111     //  Special Non-conformant Public, static methods
0112     // -----------------------------------------------------------------------
0113     /**
0114       * Return true if NEL (0x85) and LSEP (0x2028) to be treated as white space char.
0115       */
0116     static bool isNELRecognized();
0117 
0118     /**
0119       * Method to enable NEL (0x85) and LSEP (0x2028) to be treated as white space char.
0120       */
0121     static void enableNELWS();
0122 
0123 private:
0124     // -----------------------------------------------------------------------
0125     //  Unimplemented constructors and operators
0126     // -----------------------------------------------------------------------
0127     XMLChar1_0();
0128 
0129     // -----------------------------------------------------------------------
0130     //  Static data members
0131     //
0132     //  fgCharCharsTable1_0
0133     //      The character characteristics table. Bits in each byte, represent
0134     //      the characteristics of each character. It is generated via some
0135     //      code and then hard coded into the cpp file for speed.
0136     //
0137     //  fNEL
0138     //      Flag to represents whether NEL and LSEP newline recognition is enabled
0139     //      or disabled
0140     // -----------------------------------------------------------------------
0141     static XMLByte  fgCharCharsTable1_0[0x10000];
0142     static bool     enableNEL;
0143 
0144     friend class XMLReader;
0145 };
0146 
0147 
0148 // ---------------------------------------------------------------------------
0149 //  XMLReader: Public, static methods
0150 // ---------------------------------------------------------------------------
0151 inline bool XMLChar1_0::isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2)
0152 {
0153     // An XML letter is a FirstNameChar minus ':' and '_'.
0154     if (!toCheck2) {
0155         return (((fgCharCharsTable1_0[toCheck] & gFirstNameCharMask) != 0)
0156                 && (toCheck != chColon) && (toCheck != chUnderscore));
0157     }
0158     return false;
0159 }
0160 
0161 inline bool XMLChar1_0::isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2)
0162 {
0163     if (!toCheck2)
0164         return ((fgCharCharsTable1_0[toCheck] & gFirstNameCharMask) != 0);
0165     else {
0166         if ((toCheck >= 0xD800) && (toCheck <= 0xDB7F))
0167            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
0168                return true;
0169     }
0170     return false;
0171 }
0172 
0173 inline bool XMLChar1_0::isFirstNCNameChar(const XMLCh toCheck, const XMLCh toCheck2)
0174 {
0175     if (!toCheck2) {
0176         return (((fgCharCharsTable1_0[toCheck] & gFirstNameCharMask) != 0) && (toCheck != chColon));
0177     }
0178     else {
0179         if ((toCheck >= 0xD800) && (toCheck <= 0xDB7F))
0180            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
0181                return true;
0182     }
0183     return false;
0184 }
0185 
0186 inline bool XMLChar1_0::isNameChar(const XMLCh toCheck, const XMLCh toCheck2)
0187 {
0188     if (!toCheck2)
0189         return ((fgCharCharsTable1_0[toCheck] & gNameCharMask) != 0);
0190     else {
0191         if ((toCheck >= 0xD800) && (toCheck <= 0xDB7F))
0192            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
0193                return true;
0194     }
0195     return false;
0196 }
0197 
0198 inline bool XMLChar1_0::isNCNameChar(const XMLCh toCheck, const XMLCh toCheck2)
0199 {
0200     if (!toCheck2)
0201         return ((fgCharCharsTable1_0[toCheck] & gNCNameCharMask) != 0);
0202     else {
0203         if ((toCheck >= 0xD800) && (toCheck <= 0xDB7F))
0204            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
0205                return true;
0206     }
0207     return false;
0208 }
0209 
0210 inline bool XMLChar1_0::isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2)
0211 {
0212     if (!toCheck2)
0213         return ((fgCharCharsTable1_0[toCheck] & gPlainContentCharMask) != 0);
0214     else {
0215         if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF))
0216            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
0217                return true;
0218     }
0219     return false;
0220 }
0221 
0222 
0223 inline bool XMLChar1_0::isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2)
0224 {
0225     if (!toCheck2)
0226         return ((fgCharCharsTable1_0[toCheck] & gSpecialStartTagCharMask) != 0);
0227     return false;
0228 }
0229 
0230 inline bool XMLChar1_0::isXMLChar(const XMLCh toCheck, const XMLCh toCheck2)
0231 {
0232     if (!toCheck2)
0233         return ((fgCharCharsTable1_0[toCheck] & gXMLCharMask) != 0);
0234     else {
0235         if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF))
0236            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
0237                return true;
0238     }
0239     return false;
0240 }
0241 
0242 inline bool XMLChar1_0::isWhitespace(const XMLCh toCheck)
0243 {
0244     return ((fgCharCharsTable1_0[toCheck] & gWhitespaceCharMask) != 0);
0245 }
0246 
0247 inline bool XMLChar1_0::isWhitespace(const XMLCh toCheck, const XMLCh toCheck2)
0248 {
0249     if (!toCheck2)
0250         return ((fgCharCharsTable1_0[toCheck] & gWhitespaceCharMask) != 0);
0251     return false;
0252 }
0253 
0254 inline bool XMLChar1_0::isControlChar(const XMLCh toCheck, const XMLCh toCheck2)
0255 {
0256     if (!toCheck2)
0257         return ((fgCharCharsTable1_0[toCheck] & gControlCharMask) != 0);
0258     return false;
0259 }
0260 
0261 inline bool XMLChar1_0::isNELRecognized() {
0262 
0263     return enableNEL;
0264 }
0265 
0266 
0267 // ---------------------------------------------------------------------------
0268 //  This class is for XML 1.1
0269 // ---------------------------------------------------------------------------
0270 class XMLUTIL_EXPORT XMLChar1_1
0271 {
0272 public:
0273     // -----------------------------------------------------------------------
0274     //  Public, static methods, check the string
0275     // -----------------------------------------------------------------------
0276     static bool isAllSpaces
0277     (
0278         const   XMLCh* const    toCheck
0279         , const XMLSize_t       count
0280     );
0281 
0282     static bool containsWhiteSpace
0283     (
0284         const   XMLCh* const    toCheck
0285         , const XMLSize_t       count
0286     );
0287 
0288     static bool isValidNmtoken
0289     (
0290         const   XMLCh*        const    toCheck
0291       , const   XMLSize_t              count
0292     );
0293 
0294     static bool isValidName
0295     (
0296         const   XMLCh* const    toCheck
0297         , const XMLSize_t       count
0298     );
0299 
0300     static bool isValidName
0301     (
0302         const   XMLCh* const    toCheck
0303     );
0304 
0305     static bool isValidNCName
0306     (
0307         const   XMLCh* const    toCheck
0308         , const XMLSize_t       count
0309     );
0310 
0311     static bool isValidQName
0312     (
0313         const   XMLCh* const    toCheck
0314         , const XMLSize_t       count
0315     );
0316 
0317     // -----------------------------------------------------------------------
0318     //  Public, static methods, check the XMLCh
0319     // -----------------------------------------------------------------------
0320     static bool isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2 = 0);
0321     static bool isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
0322     static bool isNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
0323     static bool isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
0324     static bool isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
0325     static bool isXMLChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
0326     static bool isWhitespace(const XMLCh toCheck, const XMLCh toCheck2 = 0);
0327     static bool isControlChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
0328 
0329     static bool isPublicIdChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
0330     static bool isFirstNCNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
0331     static bool isNCNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
0332 
0333 private:
0334     // -----------------------------------------------------------------------
0335     //  Unimplemented constructors and operators
0336     // -----------------------------------------------------------------------
0337     XMLChar1_1();
0338 
0339     // -----------------------------------------------------------------------
0340     //  Static data members
0341     //
0342     //  fgCharCharsTable1_1
0343     //      The character characteristics table. Bits in each byte, represent
0344     //      the characteristics of each character. It is generated via some
0345     //      code and then hard coded into the cpp file for speed.
0346     //
0347     // -----------------------------------------------------------------------
0348     static XMLByte  fgCharCharsTable1_1[0x10000];
0349 
0350     friend class XMLReader;
0351 };
0352 
0353 
0354 // ---------------------------------------------------------------------------
0355 //  XMLReader: Public, static methods
0356 // ---------------------------------------------------------------------------
0357 inline bool XMLChar1_1::isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2)
0358 {
0359     /** XML 1.1 does not define a letter, so we use the 1.0 definition */
0360     return XMLChar1_0::isXMLLetter(toCheck, toCheck2);
0361 }
0362 
0363 inline bool XMLChar1_1::isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2)
0364 {
0365     if (!toCheck2)
0366         return ((fgCharCharsTable1_1[toCheck] & gFirstNameCharMask) != 0);
0367     else {
0368         if ((toCheck >= 0xD800) && (toCheck <= 0xDB7F))
0369            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
0370                return true;
0371     }
0372     return false;
0373 }
0374 
0375 inline bool XMLChar1_1::isFirstNCNameChar(const XMLCh toCheck, const XMLCh toCheck2)
0376 {
0377     if (!toCheck2) {
0378         return (((fgCharCharsTable1_1[toCheck] & gFirstNameCharMask) != 0) && (toCheck != chColon));
0379     }
0380     else {
0381         if ((toCheck >= 0xD800) && (toCheck <= 0xDB7F))
0382            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
0383                return true;
0384     }
0385     return false;
0386 }
0387 
0388 inline bool XMLChar1_1::isNameChar(const XMLCh toCheck, const XMLCh toCheck2)
0389 {
0390     if (!toCheck2)
0391         return ((fgCharCharsTable1_1[toCheck] & gNameCharMask) != 0);
0392     else {
0393         if ((toCheck >= 0xD800) && (toCheck <= 0xDB7F))
0394            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
0395                return true;
0396     }
0397     return false;
0398 }
0399 
0400 inline bool XMLChar1_1::isNCNameChar(const XMLCh toCheck, const XMLCh toCheck2)
0401 {
0402     if (!toCheck2)
0403         return ((fgCharCharsTable1_1[toCheck] & gNCNameCharMask) != 0);
0404     else {
0405         if ((toCheck >= 0xD800) && (toCheck <= 0xDB7F))
0406            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
0407                return true;
0408     }
0409     return false;
0410 }
0411 
0412 inline bool XMLChar1_1::isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2)
0413 {
0414     if (!toCheck2)
0415         return ((fgCharCharsTable1_1[toCheck] & gPlainContentCharMask) != 0);
0416     else {
0417         if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF))
0418            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
0419                return true;
0420     }
0421     return false;
0422 }
0423 
0424 
0425 inline bool XMLChar1_1::isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2)
0426 {
0427     if (!toCheck2)
0428         return ((fgCharCharsTable1_1[toCheck] & gSpecialStartTagCharMask) != 0);
0429     return false;
0430 }
0431 
0432 inline bool XMLChar1_1::isXMLChar(const XMLCh toCheck, const XMLCh toCheck2)
0433 {
0434     if (!toCheck2)
0435         return ((fgCharCharsTable1_1[toCheck] & gXMLCharMask) != 0);
0436     else {
0437         if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF))
0438            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
0439                return true;
0440     }
0441     return false;
0442 }
0443 
0444 inline bool XMLChar1_1::isWhitespace(const XMLCh toCheck, const XMLCh toCheck2)
0445 {
0446     if (!toCheck2)
0447         return ((fgCharCharsTable1_1[toCheck] & gWhitespaceCharMask) != 0);
0448     return false;
0449 }
0450 
0451 inline bool XMLChar1_1::isControlChar(const XMLCh toCheck, const XMLCh toCheck2)
0452 {
0453     if (!toCheck2)
0454         return ((fgCharCharsTable1_1[toCheck] & gControlCharMask) != 0);
0455     return false;
0456 }
0457 
0458 
0459 XERCES_CPP_NAMESPACE_END
0460 
0461 #endif