File indexing completed on 2025-01-18 10:15:01
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022 #if !defined(XERCESC_INCLUDE_GUARD_REGXPARSER_HPP)
0023 #define XERCESC_INCLUDE_GUARD_REGXPARSER_HPP
0024
0025
0026
0027
0028
0029
0030
0031 #include <xercesc/util/RefVectorOf.hpp>
0032 #include <xercesc/util/XMLUniDefs.hpp>
0033 #include <xercesc/util/regx/Token.hpp>
0034
0035 XERCES_CPP_NAMESPACE_BEGIN
0036
0037
0038
0039
0040 class Token;
0041 class RangeToken;
0042 class TokenFactory;
0043
0044 class XMLUTIL_EXPORT RegxParser : public XMemory
0045 {
0046 public:
0047
0048
0049
0050
0051
0052 typedef enum {
0053 REGX_T_CHAR = 0,
0054 REGX_T_EOF = 1,
0055 REGX_T_OR = 2,
0056 REGX_T_STAR = 3,
0057 REGX_T_PLUS = 4,
0058 REGX_T_QUESTION = 5,
0059 REGX_T_LPAREN = 6,
0060 REGX_T_RPAREN = 7,
0061 REGX_T_DOT = 8,
0062 REGX_T_LBRACKET = 9,
0063 REGX_T_BACKSOLIDUS = 10,
0064 REGX_T_CARET = 11,
0065 REGX_T_DOLLAR = 12,
0066 REGX_T_XMLSCHEMA_CC_SUBTRACTION = 13
0067 } parserState;
0068
0069 typedef enum {
0070 regexParserStateNormal = 0,
0071 regexParserStateInBrackets = 1
0072 } parserStateContext;
0073
0074
0075
0076
0077 RegxParser(MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
0078 virtual ~RegxParser();
0079
0080
0081
0082
0083 parserStateContext getParseContext() const;
0084 parserState getState() const;
0085 XMLInt32 getCharData() const;
0086 int getNoParen() const;
0087 XMLSize_t getOffset() const;
0088 bool hasBackReferences() const;
0089 TokenFactory* getTokenFactory() const;
0090 int getOptions() const;
0091
0092
0093
0094
0095 void setParseContext(const parserStateContext value);
0096 void setTokenFactory(TokenFactory* const tokFactory);
0097 void setOptions(const int options);
0098
0099
0100
0101
0102 Token* parse(const XMLCh* const regxStr, const int options);
0103
0104 protected:
0105
0106
0107
0108 virtual bool checkQuestion(const XMLSize_t off);
0109 virtual XMLInt32 decodeEscaped();
0110 MemoryManager* getMemoryManager() const;
0111
0112
0113
0114 void processNext();
0115
0116 Token* parseRegx(const bool matchingRParen = false);
0117 virtual Token* processCaret();
0118 virtual Token* processDollar();
0119 virtual Token* processBackReference();
0120 virtual Token* processStar(Token* const tok);
0121 virtual Token* processPlus(Token* const tok);
0122 virtual Token* processQuestion(Token* const tok);
0123 virtual Token* processParen();
0124
0125 RangeToken* parseCharacterClass(const bool useNRange);
0126 RangeToken* processBacksolidus_pP(const XMLInt32 ch);
0127
0128
0129
0130
0131 RangeToken* getTokenForShorthand(const XMLInt32 ch);
0132
0133 bool isSet(const int flag);
0134 private:
0135
0136
0137
0138 Token* parseTerm(const bool matchingRParen = false);
0139 Token* parseFactor();
0140 Token* parseAtom();
0141
0142
0143
0144
0145 RegxParser(const RegxParser&);
0146 RegxParser& operator=(const RegxParser&);
0147
0148
0149
0150
0151 class ReferencePosition : public XMemory
0152 {
0153 public :
0154 ReferencePosition(const int refNo, const XMLSize_t position);
0155
0156 int fReferenceNo;
0157 XMLSize_t fPosition;
0158 };
0159
0160
0161
0162
0163 int hexChar(const XMLInt32 ch);
0164
0165
0166
0167
0168 MemoryManager* fMemoryManager;
0169 bool fHasBackReferences;
0170 int fOptions;
0171 XMLSize_t fOffset;
0172 int fNoGroups;
0173 parserStateContext fParseContext;
0174 XMLSize_t fStringLen;
0175 parserState fState;
0176 XMLInt32 fCharData;
0177 XMLCh* fString;
0178 RefVectorOf<ReferencePosition>* fReferences;
0179 TokenFactory* fTokenFactory;
0180 };
0181
0182
0183
0184
0185
0186 inline RegxParser::parserStateContext RegxParser::getParseContext() const {
0187
0188 return fParseContext;
0189 }
0190
0191 inline RegxParser::parserState RegxParser::getState() const {
0192
0193 return fState;
0194 }
0195
0196 inline XMLInt32 RegxParser::getCharData() const {
0197
0198 return fCharData;
0199 }
0200
0201 inline int RegxParser::getNoParen() const {
0202
0203 return fNoGroups;
0204 }
0205
0206 inline XMLSize_t RegxParser::getOffset() const {
0207
0208 return fOffset;
0209 }
0210
0211 inline bool RegxParser::hasBackReferences() const {
0212
0213 return fHasBackReferences;
0214 }
0215
0216 inline TokenFactory* RegxParser::getTokenFactory() const {
0217
0218 return fTokenFactory;
0219 }
0220
0221 inline MemoryManager* RegxParser::getMemoryManager() const {
0222 return fMemoryManager;
0223 }
0224
0225 inline int RegxParser::getOptions() const {
0226
0227 return fOptions;
0228 }
0229
0230
0231
0232
0233 inline void RegxParser::setParseContext(const RegxParser::parserStateContext value) {
0234
0235 fParseContext = value;
0236 }
0237
0238 inline void RegxParser::setTokenFactory(TokenFactory* const tokFactory) {
0239
0240 fTokenFactory = tokFactory;
0241 }
0242
0243 inline void RegxParser::setOptions(const int options) {
0244
0245 fOptions = options;
0246 }
0247
0248
0249
0250
0251 inline bool RegxParser::isSet(const int flag) {
0252
0253 return (fOptions & flag) == flag;
0254 }
0255
0256
0257 inline int RegxParser::hexChar(const XMLInt32 ch) {
0258
0259 if (ch < chDigit_0 || ch > chLatin_f)
0260 return -1;
0261
0262 if (ch <= chDigit_9)
0263 return ch - chDigit_0;
0264
0265 if (ch < chLatin_A)
0266 return -1;
0267
0268 if (ch <= chLatin_F)
0269 return ch - chLatin_A + 10;
0270
0271 if (ch < chLatin_a)
0272 return -1;
0273
0274 return ch - chLatin_a + 10;
0275 }
0276
0277 XERCES_CPP_NAMESPACE_END
0278
0279 #endif
0280
0281
0282
0283
0284