Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 10:15:01

0001 /*
0002  * Licensed to the Apache Software Foundation (ASF) under one or more
0003  * contributor license agreements.  See the NOTICE file distributed with
0004  * this work for additional information regarding copyright ownership.
0005  * The ASF licenses this file to You under the Apache License, Version 2.0
0006  * (the "License"); you may not use this file except in compliance with
0007  * the License.  You may obtain a copy of the License at
0008  * 
0009  *      http://www.apache.org/licenses/LICENSE-2.0
0010  * 
0011  * Unless required by applicable law or agreed to in writing, software
0012  * distributed under the License is distributed on an "AS IS" BASIS,
0013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014  * See the License for the specific language governing permissions and
0015  * limitations under the License.
0016  */
0017 
0018 /*
0019  * $Id$
0020  */
0021 
0022 #if !defined(XERCESC_INCLUDE_GUARD_REGXPARSER_HPP)
0023 #define XERCESC_INCLUDE_GUARD_REGXPARSER_HPP
0024 
0025 /*
0026  *    A regular expression parser
0027  */
0028 // ---------------------------------------------------------------------------
0029 //  Includes
0030 // ---------------------------------------------------------------------------
0031 #include <xercesc/util/RefVectorOf.hpp>
0032 #include <xercesc/util/XMLUniDefs.hpp>
0033 #include <xercesc/util/regx/Token.hpp>
0034 
0035 XERCES_CPP_NAMESPACE_BEGIN
0036 
0037 // ---------------------------------------------------------------------------
0038 //  Forward Declaration
0039 // ---------------------------------------------------------------------------
0040 class Token;
0041 class RangeToken;
0042 class TokenFactory;
0043 
0044 class XMLUTIL_EXPORT RegxParser : public XMemory
0045 {
0046 public:
0047 
0048     // -----------------------------------------------------------------------
0049     //  Public constant data
0050     // -----------------------------------------------------------------------
0051     // Parse tokens
0052     typedef enum {
0053         REGX_T_CHAR                     = 0,
0054         REGX_T_EOF                      = 1,
0055         REGX_T_OR                       = 2,
0056         REGX_T_STAR                     = 3,
0057         REGX_T_PLUS                     = 4,
0058         REGX_T_QUESTION                 = 5,
0059         REGX_T_LPAREN                   = 6,
0060         REGX_T_RPAREN                   = 7,
0061         REGX_T_DOT                      = 8,
0062         REGX_T_LBRACKET                 = 9,
0063         REGX_T_BACKSOLIDUS              = 10,
0064         REGX_T_CARET                    = 11,
0065         REGX_T_DOLLAR                   = 12,
0066         REGX_T_XMLSCHEMA_CC_SUBTRACTION    = 13
0067     } parserState;
0068 
0069     typedef enum {
0070         regexParserStateNormal = 0,
0071         regexParserStateInBrackets = 1
0072     } parserStateContext;
0073 
0074     // -----------------------------------------------------------------------
0075     //  Public Constructors and Destructor
0076     // -----------------------------------------------------------------------
0077     RegxParser(MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
0078     virtual ~RegxParser();
0079 
0080     // -----------------------------------------------------------------------
0081     //  Getter methods
0082     // -----------------------------------------------------------------------
0083     parserStateContext  getParseContext() const;
0084     parserState         getState() const;
0085     XMLInt32            getCharData() const;
0086     int                 getNoParen() const;
0087     XMLSize_t           getOffset() const;
0088     bool                hasBackReferences() const;
0089     TokenFactory*       getTokenFactory() const;
0090     int                 getOptions() const;
0091 
0092     // -----------------------------------------------------------------------
0093     //  Setter methods
0094     // -----------------------------------------------------------------------
0095     void setParseContext(const parserStateContext value);
0096     void setTokenFactory(TokenFactory* const tokFactory);
0097     void setOptions(const int options);
0098 
0099     // -----------------------------------------------------------------------
0100     //  Public Parsing methods
0101     // -----------------------------------------------------------------------
0102     Token* parse(const XMLCh* const regxStr, const int options);
0103 
0104 protected:
0105     // -----------------------------------------------------------------------
0106     //  Protected Helper methods
0107     // -----------------------------------------------------------------------
0108     virtual bool        checkQuestion(const XMLSize_t off);
0109     virtual XMLInt32    decodeEscaped();
0110     MemoryManager*      getMemoryManager() const;
0111     // -----------------------------------------------------------------------
0112     //  Protected Parsing/Processing methods
0113     // -----------------------------------------------------------------------
0114     void                processNext();
0115 
0116     Token*              parseRegx(const bool matchingRParen = false);
0117     virtual Token*      processCaret();
0118     virtual Token*      processDollar();
0119     virtual Token*      processBackReference();
0120     virtual Token*      processStar(Token* const tok);
0121     virtual Token*      processPlus(Token* const tok);
0122     virtual Token*      processQuestion(Token* const tok);
0123     virtual Token*      processParen();
0124 
0125     RangeToken*         parseCharacterClass(const bool useNRange);
0126     RangeToken*         processBacksolidus_pP(const XMLInt32 ch);
0127 
0128     // -----------------------------------------------------------------------
0129     //  Protected PreCreated RangeToken access methods
0130     // -----------------------------------------------------------------------
0131     RangeToken*         getTokenForShorthand(const XMLInt32 ch);
0132 
0133     bool isSet(const int flag);
0134 private:
0135     // -----------------------------------------------------------------------
0136     //  Private parsing/processing methods
0137     // -----------------------------------------------------------------------
0138     Token* parseTerm(const bool matchingRParen = false);
0139     Token* parseFactor();
0140     Token* parseAtom();
0141 
0142     // -----------------------------------------------------------------------
0143     //  Unimplemented constructors and operators
0144     // -----------------------------------------------------------------------
0145     RegxParser(const RegxParser&);
0146     RegxParser& operator=(const RegxParser&);
0147 
0148     // -----------------------------------------------------------------------
0149     //  Private data types
0150     // -----------------------------------------------------------------------
0151     class ReferencePosition : public XMemory
0152     {
0153         public :
0154             ReferencePosition(const int refNo, const XMLSize_t position);
0155 
0156             int            fReferenceNo;
0157             XMLSize_t   fPosition;
0158     };
0159 
0160     // -----------------------------------------------------------------------
0161     //  Private Helper methods
0162     // -----------------------------------------------------------------------
0163     int hexChar(const XMLInt32 ch);
0164 
0165     // -----------------------------------------------------------------------
0166     //  Private data members
0167     // -----------------------------------------------------------------------
0168     MemoryManager*                  fMemoryManager;
0169     bool                            fHasBackReferences;
0170     int                             fOptions;
0171     XMLSize_t                       fOffset;
0172     int                             fNoGroups;
0173     parserStateContext              fParseContext;
0174     XMLSize_t                       fStringLen;
0175     parserState                     fState;
0176     XMLInt32                        fCharData;
0177     XMLCh*                          fString;
0178     RefVectorOf<ReferencePosition>* fReferences;
0179     TokenFactory*                   fTokenFactory;
0180 };
0181 
0182 
0183 // ---------------------------------------------------------------------------
0184 //  RegxParser: Getter Methods
0185 // ---------------------------------------------------------------------------
0186 inline RegxParser::parserStateContext RegxParser::getParseContext() const {
0187 
0188     return fParseContext;
0189 }
0190 
0191 inline RegxParser::parserState RegxParser::getState() const {
0192 
0193     return fState;
0194 }
0195 
0196 inline XMLInt32 RegxParser::getCharData() const {
0197 
0198     return fCharData;
0199 }
0200 
0201 inline int RegxParser::getNoParen() const {
0202 
0203     return fNoGroups;
0204 }
0205 
0206 inline XMLSize_t RegxParser::getOffset() const {
0207 
0208     return fOffset;
0209 }
0210 
0211 inline bool RegxParser::hasBackReferences() const {
0212 
0213     return fHasBackReferences;
0214 }
0215 
0216 inline TokenFactory* RegxParser::getTokenFactory() const {
0217 
0218     return fTokenFactory;
0219 }
0220 
0221 inline MemoryManager* RegxParser::getMemoryManager() const {
0222     return fMemoryManager;
0223 }
0224 
0225 inline int RegxParser::getOptions() const {
0226 
0227     return fOptions;
0228 }
0229 
0230 // ---------------------------------------------------------------------------
0231 //  RegxParser: Setter Methods
0232 // ---------------------------------------------------------------------------
0233 inline void RegxParser::setParseContext(const RegxParser::parserStateContext value) {
0234 
0235     fParseContext = value;
0236 }
0237 
0238 inline void RegxParser::setTokenFactory(TokenFactory* const tokFactory) {
0239 
0240     fTokenFactory = tokFactory;
0241 }
0242 
0243 inline void RegxParser::setOptions(const int options) {
0244 
0245     fOptions = options;
0246 }
0247 
0248 // ---------------------------------------------------------------------------
0249 //  RegxParser: Helper Methods
0250 // ---------------------------------------------------------------------------
0251 inline bool RegxParser::isSet(const int flag) {
0252 
0253     return (fOptions & flag) == flag;
0254 }
0255 
0256 
0257 inline int RegxParser::hexChar(const XMLInt32 ch) {
0258 
0259     if (ch < chDigit_0 || ch > chLatin_f)
0260         return -1;
0261 
0262     if (ch <= chDigit_9)
0263         return ch - chDigit_0;
0264 
0265     if (ch < chLatin_A)
0266         return -1;
0267 
0268     if (ch <= chLatin_F)
0269         return ch - chLatin_A + 10;
0270 
0271     if (ch < chLatin_a)
0272         return -1;
0273 
0274     return ch - chLatin_a + 10;
0275 }
0276 
0277 XERCES_CPP_NAMESPACE_END
0278 
0279 #endif
0280 
0281 /**
0282   *    End file RegxParser.hpp
0283   */
0284