|
||||
File indexing completed on 2025-01-18 10:15:01
0001 /* 0002 * Licensed to the Apache Software Foundation (ASF) under one or more 0003 * contributor license agreements. See the NOTICE file distributed with 0004 * this work for additional information regarding copyright ownership. 0005 * The ASF licenses this file to You under the Apache License, Version 2.0 0006 * (the "License"); you may not use this file except in compliance with 0007 * the License. You may obtain a copy of the License at 0008 * 0009 * http://www.apache.org/licenses/LICENSE-2.0 0010 * 0011 * Unless required by applicable law or agreed to in writing, software 0012 * distributed under the License is distributed on an "AS IS" BASIS, 0013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 0014 * See the License for the specific language governing permissions and 0015 * limitations under the License. 0016 */ 0017 0018 /* 0019 * $Id$ 0020 */ 0021 0022 #if !defined(XERCESC_INCLUDE_GUARD_REGULAREXPRESSION_HPP) 0023 #define XERCESC_INCLUDE_GUARD_REGULAREXPRESSION_HPP 0024 0025 // --------------------------------------------------------------------------- 0026 // Includes 0027 // --------------------------------------------------------------------------- 0028 #include <xercesc/util/RefArrayVectorOf.hpp> 0029 #include <xercesc/util/XMLString.hpp> 0030 #include <xercesc/util/Janitor.hpp> 0031 #include <xercesc/util/regx/Op.hpp> 0032 #include <xercesc/util/regx/TokenFactory.hpp> 0033 #include <xercesc/util/regx/BMPattern.hpp> 0034 #include <xercesc/util/regx/OpFactory.hpp> 0035 #include <xercesc/util/regx/RegxUtil.hpp> 0036 0037 XERCES_CPP_NAMESPACE_BEGIN 0038 0039 // --------------------------------------------------------------------------- 0040 // Forward Declaration 0041 // --------------------------------------------------------------------------- 0042 class RangeToken; 0043 class Match; 0044 class RegxParser; 0045 0046 /** 0047 * The RegularExpression class represents a parsed executable regular expression. 0048 * This class is thread safe. Two similar regular expression syntaxes are 0049 * supported: 0050 * 0051 * <ol> 0052 * <li><a href="http://www.w3.org/TR/xpath-functions/#regex-syntax">The XPath 2.0 / XQuery regular expression syntax.</a> 0053 * <li><a href="http://www.w3.org/TR/xmlschema-2/#regexs">The XML Schema regular expression syntax.</a></li> 0054 * </ol> 0055 * 0056 * XPath 2.0 regular expression syntax is used unless the "X" option is specified during construction. 0057 * 0058 * Options can be specified during construction to change the way that the regular expression is handled. 0059 * Options are specified by a string consisting of any number of the following characters: 0060 * 0061 * <table border='1'> 0062 * <tr> 0063 * <th>Character</th> 0064 * <th>Meaning</th> 0065 * </tr> 0066 * <tr> 0067 * <td valign='top' rowspan='1' colspan='1'>i</td> 0068 * <td valign='top' rowspan='1' colspan='1'><a href="http://www.w3.org/TR/xpath-functions/#flags"> 0069 * Ignore case</a> when matching the regular expression.</td> 0070 * </tr> 0071 * <tr> 0072 * <td valign='top' rowspan='1' colspan='1'>m</td> 0073 * <td valign='top' rowspan='1' colspan='1'><a href="http://www.w3.org/TR/xpath-functions/#flags"> 0074 * Multi-line mode</a>. The meta characters "^" and "$" will match the beginning and end of lines.</td> 0075 * </tr> 0076 * <tr> 0077 * <td valign='top' rowspan='1' colspan='1'>s</td> 0078 * <td valign='top' rowspan='1' colspan='1'><a href="http://www.w3.org/TR/xpath-functions/#flags"> 0079 * Single-line mode</a>. The meta character "." will match a newline character.</td> 0080 * </tr> 0081 * <tr> 0082 * <td valign='top' rowspan='1' colspan='1'>x</td> 0083 * <td valign='top' rowspan='1' colspan='1'>Allow extended comments.</td> 0084 * </tr> 0085 * <tr> 0086 * <td valign='top' rowspan='1' colspan='1'>F</td> 0087 * <td valign='top' rowspan='1' colspan='1'>Prohibit the fixed string optimization.</td> 0088 * </tr> 0089 * <tr> 0090 * <td valign='top' rowspan='1' colspan='1'>H</td> 0091 * <td valign='top' rowspan='1' colspan='1'>Prohibit the head character optimization.</td> 0092 * </tr> 0093 * <tr> 0094 * <td valign='top' rowspan='1' colspan='1'>X</td> 0095 * <td valign='top' rowspan='1' colspan='1'>Parse the regular expression according to the 0096 * <a href="http://www.w3.org/TR/xmlschema-2/#regexs">XML Schema regular expression syntax</a>.</td> 0097 * </tr> 0098 * </table> 0099 */ 0100 class XMLUTIL_EXPORT RegularExpression : public XMemory 0101 { 0102 public: 0103 // ----------------------------------------------------------------------- 0104 // Public Constructors and Destructor 0105 // ----------------------------------------------------------------------- 0106 0107 /** @name Constructors and destructor */ 0108 //@{ 0109 0110 /** Parses the given regular expression. 0111 * 0112 * @param pattern the regular expression in the local code page 0113 * @param manager the memory manager to use 0114 */ 0115 RegularExpression 0116 ( 0117 const char* const pattern 0118 , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager 0119 ); 0120 0121 /** Parses the given regular expression using the options specified. 0122 * 0123 * @param pattern the regular expression in the local code page 0124 * @param options the options string in the local code page 0125 * @param manager the memory manager to use 0126 */ 0127 RegularExpression 0128 ( 0129 const char* const pattern 0130 , const char* const options 0131 , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager 0132 ); 0133 0134 /** Parses the given regular expression. 0135 * 0136 * @param pattern the regular expression 0137 * @param manager the memory manager to use 0138 */ 0139 RegularExpression 0140 ( 0141 const XMLCh* const pattern 0142 , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager 0143 ); 0144 0145 /** Parses the given regular expression using the options specified. 0146 * 0147 * @param pattern the regular expression 0148 * @param options the options string 0149 * @param manager the memory manager to use 0150 */ 0151 RegularExpression 0152 ( 0153 const XMLCh* const pattern 0154 , const XMLCh* const options 0155 , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager 0156 ); 0157 0158 virtual ~RegularExpression(); 0159 0160 //@} 0161 0162 // ----------------------------------------------------------------------- 0163 // Public Constants 0164 // ----------------------------------------------------------------------- 0165 static const unsigned int IGNORE_CASE; 0166 static const unsigned int SINGLE_LINE; 0167 static const unsigned int MULTIPLE_LINE; 0168 static const unsigned int EXTENDED_COMMENT; 0169 static const unsigned int PROHIBIT_HEAD_CHARACTER_OPTIMIZATION; 0170 static const unsigned int PROHIBIT_FIXED_STRING_OPTIMIZATION; 0171 static const unsigned int XMLSCHEMA_MODE; 0172 typedef enum 0173 { 0174 wordTypeIgnore = 0, 0175 wordTypeLetter = 1, 0176 wordTypeOther = 2 0177 } wordType; 0178 0179 // ----------------------------------------------------------------------- 0180 // Public Helper methods 0181 // ----------------------------------------------------------------------- 0182 0183 /** @name Public helper methods */ 0184 //@{ 0185 0186 static int getOptionValue(const XMLCh ch); 0187 static bool isSet(const int options, const int flag); 0188 0189 //@} 0190 0191 // ----------------------------------------------------------------------- 0192 // Matching methods 0193 // ----------------------------------------------------------------------- 0194 0195 /** @name Matching methods */ 0196 //@{ 0197 0198 /** Tries to match the given null terminated string against the regular expression, returning 0199 * true if successful. 0200 * 0201 * @param matchString the string to match in the local code page 0202 * @param manager the memory manager to use 0203 * 0204 * @return Whether the string matched the regular expression or not. 0205 */ 0206 bool matches(const char* const matchString, 0207 MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const; 0208 0209 /** Tries to match the given string between the specified start and end offsets 0210 * against the regular expression, returning true if successful. 0211 * 0212 * @param matchString the string to match in the local code page 0213 * @param start the offset of the start of the string 0214 * @param end the offset of the end of the string 0215 * @param manager the memory manager to use 0216 * 0217 * @return Whether the string matched the regular expression or not. 0218 */ 0219 bool matches(const char* const matchString, const XMLSize_t start, const XMLSize_t end, 0220 MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const; 0221 0222 /** Tries to match the given null terminated string against the regular expression, returning 0223 * true if successful. 0224 * 0225 * @param matchString the string to match in the local code page 0226 * @param pMatch a Match object, which will be populated with the offsets for the 0227 * regular expression match and sub-matches. 0228 * @param manager the memory manager to use 0229 * 0230 * @return Whether the string matched the regular expression or not. 0231 */ 0232 bool matches(const char* const matchString, Match* const pMatch, 0233 MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const; 0234 0235 /** Tries to match the given string between the specified start and end offsets 0236 * against the regular expression, returning true if successful. 0237 * 0238 * @param matchString the string to match in the local code page 0239 * @param start the offset of the start of the string 0240 * @param end the offset of the end of the string 0241 * @param pMatch a Match object, which will be populated with the offsets for the 0242 * regular expression match and sub-matches. 0243 * @param manager the memory manager to use 0244 * 0245 * @return Whether the string matched the regular expression or not. 0246 */ 0247 bool matches(const char* const matchString, const XMLSize_t start, const XMLSize_t end, 0248 Match* const pMatch, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const; 0249 0250 /** Tries to match the given null terminated string against the regular expression, returning 0251 * true if successful. 0252 * 0253 * @param matchString the string to match 0254 * @param manager the memory manager to use 0255 * 0256 * @return Whether the string matched the regular expression or not. 0257 */ 0258 bool matches(const XMLCh* const matchString, 0259 MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const; 0260 0261 /** Tries to match the given string between the specified start and end offsets 0262 * against the regular expression, returning true if successful. 0263 * 0264 * @param matchString the string to match 0265 * @param start the offset of the start of the string 0266 * @param end the offset of the end of the string 0267 * @param manager the memory manager to use 0268 * 0269 * @return Whether the string matched the regular expression or not. 0270 */ 0271 bool matches(const XMLCh* const matchString, const XMLSize_t start, const XMLSize_t end, 0272 MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const; 0273 0274 /** Tries to match the given null terminated string against the regular expression, returning 0275 * true if successful. 0276 * 0277 * @param matchString the string to match 0278 * @param pMatch a Match object, which will be populated with the offsets for the 0279 * regular expression match and sub-matches. 0280 * @param manager the memory manager to use 0281 * 0282 * @return Whether the string matched the regular expression or not. 0283 */ 0284 bool matches(const XMLCh* const matchString, Match* const pMatch, 0285 MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const; 0286 0287 /** Tries to match the given string between the specified start and end offsets 0288 * against the regular expression, returning true if successful. 0289 * 0290 * @param matchString the string to match 0291 * @param start the offset of the start of the string 0292 * @param end the offset of the end of the string 0293 * @param pMatch a Match object, which will be populated with the offsets for the 0294 * regular expression match and sub-matches. 0295 * @param manager the memory manager to use 0296 * 0297 * @return Whether the string matched the regular expression or not. 0298 */ 0299 bool matches(const XMLCh* const matchString, const XMLSize_t start, const XMLSize_t end, 0300 Match* const pMatch, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const; 0301 0302 /** Tries to match the given string between the specified start and end offsets 0303 * against the regular expression. The subEx vector is populated with the details 0304 * for every non-overlapping occurrence of a match in the string. 0305 * 0306 * @param matchString the string to match 0307 * @param start the offset of the start of the string 0308 * @param end the offset of the end of the string 0309 * @param subEx a RefVectorOf Match objects, populated with the offsets for the 0310 * regular expression match and sub-matches. 0311 * @param manager the memory manager to use 0312 */ 0313 void allMatches(const XMLCh* const matchString, const XMLSize_t start, const XMLSize_t end, 0314 RefVectorOf<Match> *subEx, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const; 0315 0316 //@} 0317 0318 // ----------------------------------------------------------------------- 0319 // Tokenize methods 0320 // ----------------------------------------------------------------------- 0321 // Note: The caller owns the string vector that is returned, and is responsible 0322 // for deleting it. 0323 0324 /** @name Tokenize methods */ 0325 //@{ 0326 0327 /** Tokenizes the null terminated string according to the regular expression, returning 0328 * the parts of the string that do not match the regular expression. 0329 * 0330 * @param matchString the string to match in the local code page 0331 * @param manager the memory manager to use 0332 * 0333 * @return A RefArrayVectorOf sub-strings that do not match the regular expression allocated using the 0334 * given MemoryManager. The caller owns the string vector that is returned, and is responsible for 0335 * deleting it. 0336 */ 0337 RefArrayVectorOf<XMLCh> *tokenize(const char* const matchString, 0338 MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const; 0339 0340 /** Tokenizes the string between the specified start and end offsets according to the regular 0341 * expression, returning the parts of the string that do not match the regular expression. 0342 * 0343 * @param matchString the string to match in the local code page 0344 * @param start the offset of the start of the string 0345 * @param end the offset of the end of the string 0346 * @param manager the memory manager to use 0347 * 0348 * @return A RefArrayVectorOf sub-strings that do not match the regular expression allocated using the 0349 * given MemoryManager. The caller owns the string vector that is returned, and is responsible for 0350 * deleting it. 0351 */ 0352 RefArrayVectorOf<XMLCh> *tokenize(const char* const matchString, const XMLSize_t start, const XMLSize_t end, 0353 MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const; 0354 0355 /** Tokenizes the null terminated string according to the regular expression, returning 0356 * the parts of the string that do not match the regular expression. 0357 * 0358 * @param matchString the string to match 0359 * @param manager the memory manager to use 0360 * 0361 * @return A RefArrayVectorOf sub-strings that do not match the regular expression allocated using the 0362 * given MemoryManager. The caller owns the string vector that is returned, and is responsible for 0363 * deleting it. 0364 */ 0365 RefArrayVectorOf<XMLCh> *tokenize(const XMLCh* const matchString, 0366 MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const; 0367 0368 /** Tokenizes the string between the specified start and end offsets according to the regular 0369 * expression, returning the parts of the string that do not match the regular expression. 0370 * 0371 * @param matchString the string to match 0372 * @param start the offset of the start of the string 0373 * @param end the offset of the end of the string 0374 * @param manager the memory manager to use 0375 * 0376 * @return A RefArrayVectorOf sub-strings that do not match the regular expression allocated using the 0377 * given MemoryManager. The caller owns the string vector that is returned, and is responsible for 0378 * deleting it. 0379 */ 0380 RefArrayVectorOf<XMLCh> *tokenize(const XMLCh* const matchString, const XMLSize_t start, const XMLSize_t end, 0381 MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const; 0382 0383 //@} 0384 0385 // ----------------------------------------------------------------------- 0386 // Replace methods 0387 // ----------------------------------------------------------------------- 0388 // Note: The caller owns the XMLCh* that is returned, and is responsible for 0389 // deleting it. 0390 0391 /** @name Replace methods */ 0392 //@{ 0393 0394 /** Performs a search and replace on the given null terminated string, replacing 0395 * any substring that matches the regular expression with a string derived from 0396 * the <a href="http://www.w3.org/TR/xpath-functions/#func-replace">replacement string</a>. 0397 * 0398 * @param matchString the string to match in the local code page 0399 * @param replaceString the string to replace in the local code page 0400 * @param manager the memory manager to use 0401 * 0402 * @return The resulting string allocated using the given MemoryManager. The caller owns the string 0403 * that is returned, and is responsible for deleting it. 0404 */ 0405 XMLCh *replace(const char* const matchString, const char* const replaceString, 0406 MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const; 0407 0408 /** Performs a search and replace on the given string between the specified start and end offsets, replacing 0409 * any substring that matches the regular expression with a string derived from 0410 * the <a href="http://www.w3.org/TR/xpath-functions/#func-replace">replacement string</a>. 0411 * 0412 * @param matchString the string to match in the local code page 0413 * @param replaceString the string to replace in the local code page 0414 * @param start the offset of the start of the string 0415 * @param end the offset of the end of the string 0416 * @param manager the memory manager to use 0417 * 0418 * @return The resulting string allocated using the given MemoryManager. The caller owns the string 0419 * that is returned, and is responsible for deleting it. 0420 */ 0421 XMLCh *replace(const char* const matchString, const char* const replaceString, 0422 const XMLSize_t start, const XMLSize_t end, 0423 MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const; 0424 0425 /** Performs a search and replace on the given null terminated string, replacing 0426 * any substring that matches the regular expression with a string derived from 0427 * the <a href="http://www.w3.org/TR/xpath-functions/#func-replace">replacement string</a>. 0428 * 0429 * @param matchString the string to match 0430 * @param replaceString the string to replace 0431 * @param manager the memory manager to use 0432 * 0433 * @return The resulting string allocated using the given MemoryManager. The caller owns the string 0434 * that is returned, and is responsible for deleting it. 0435 */ 0436 XMLCh *replace(const XMLCh* const matchString, const XMLCh* const replaceString, 0437 MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const; 0438 0439 /** Performs a search and replace on the given string between the specified start and end offsets, replacing 0440 * any substring that matches the regular expression with a string derived from 0441 * the <a href="http://www.w3.org/TR/xpath-functions/#func-replace">replacement string</a>. 0442 * 0443 * @param matchString the string to match 0444 * @param replaceString the string to replace 0445 * @param start the offset of the start of the string 0446 * @param end the offset of the end of the string 0447 * @param manager the memory manager to use 0448 * 0449 * @return The resulting string allocated using the given MemoryManager. The caller owns the string 0450 * that is returned, and is responsible for deleting it. 0451 */ 0452 XMLCh *replace(const XMLCh* const matchString, const XMLCh* const replaceString, 0453 const XMLSize_t start, const XMLSize_t end, 0454 MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const; 0455 0456 //@} 0457 0458 // ----------------------------------------------------------------------- 0459 // Static initialize and cleanup methods 0460 // ----------------------------------------------------------------------- 0461 0462 /** @name Static initilize and cleanup methods */ 0463 //@{ 0464 0465 static void 0466 staticInitialize(MemoryManager* memoryManager); 0467 0468 static void 0469 staticCleanup(); 0470 0471 //@} 0472 0473 protected: 0474 virtual RegxParser* getRegexParser(const int options, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager); 0475 0476 // ----------------------------------------------------------------------- 0477 // Cleanup methods 0478 // ----------------------------------------------------------------------- 0479 void cleanUp(); 0480 0481 // ----------------------------------------------------------------------- 0482 // Setter methods 0483 // ----------------------------------------------------------------------- 0484 void setPattern(const XMLCh* const pattern, const XMLCh* const options=0); 0485 0486 // ----------------------------------------------------------------------- 0487 // Protected data types 0488 // ----------------------------------------------------------------------- 0489 class XMLUTIL_EXPORT Context : public XMemory 0490 { 0491 public : 0492 Context(MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager); 0493 Context(Context* src); 0494 ~Context(); 0495 0496 Context& operator= (const Context& other); 0497 inline const XMLCh* getString() const { return fString; } 0498 void reset(const XMLCh* const string, const XMLSize_t stringLen, 0499 const XMLSize_t start, const XMLSize_t limit, const int noClosures, 0500 const unsigned int options); 0501 bool nextCh(XMLInt32& ch, XMLSize_t& offset); 0502 0503 bool fAdoptMatch; 0504 XMLSize_t fStart; 0505 XMLSize_t fLimit; 0506 XMLSize_t fLength; // fLimit - fStart 0507 int fSize; 0508 XMLSize_t fStringMaxLen; 0509 int* fOffsets; 0510 Match* fMatch; 0511 const XMLCh* fString; 0512 unsigned int fOptions; 0513 MemoryManager* fMemoryManager; 0514 }; 0515 0516 // ----------------------------------------------------------------------- 0517 // Unimplemented constructors and operators 0518 // ----------------------------------------------------------------------- 0519 RegularExpression(const RegularExpression&); 0520 RegularExpression& operator=(const RegularExpression&); 0521 0522 // ----------------------------------------------------------------------- 0523 // Protected Helper methods 0524 // ----------------------------------------------------------------------- 0525 void prepare(); 0526 int parseOptions(const XMLCh* const options); 0527 0528 /** 0529 * Matching helpers 0530 */ 0531 int match(Context* const context, const Op* const operations, XMLSize_t offset) const; 0532 bool matchIgnoreCase(const XMLInt32 ch1, const XMLInt32 ch2) const; 0533 0534 /** 0535 * Helper methods used by match(Context* ...) 0536 */ 0537 bool matchChar(Context* const context, const XMLInt32 ch, XMLSize_t& offset, 0538 const bool ignoreCase) const; 0539 bool matchDot(Context* const context, XMLSize_t& offset) const; 0540 bool matchRange(Context* const context, const Op* const op, 0541 XMLSize_t& offset, const bool ignoreCase) const; 0542 bool matchAnchor(Context* const context, const XMLInt32 ch, 0543 const XMLSize_t offset) const; 0544 bool matchBackReference(Context* const context, const XMLInt32 ch, 0545 XMLSize_t& offset, const bool ignoreCase) const; 0546 bool matchString(Context* const context, const XMLCh* const literal, 0547 XMLSize_t& offset, const bool ignoreCase) const; 0548 int matchUnion(Context* const context, const Op* const op, XMLSize_t offset) const; 0549 int matchCapture(Context* const context, const Op* const op, XMLSize_t offset) const; 0550 0551 /** 0552 * Replace helpers 0553 */ 0554 void subInExp(const XMLCh* const repString, 0555 const XMLCh* const origString, 0556 const Match* subEx, 0557 XMLBuffer &result, 0558 MemoryManager* const manager) const; 0559 /** 0560 * Converts a token tree into an operation tree 0561 */ 0562 void compile(const Token* const token); 0563 Op* compile(const Token* const token, Op* const next, 0564 const bool reverse); 0565 /** 0566 * Helper methods used by compile 0567 */ 0568 Op* compileUnion(const Token* const token, Op* const next, 0569 const bool reverse); 0570 Op* compileParenthesis(const Token* const token, Op* const next, 0571 const bool reverse); 0572 Op* compileConcat(const Token* const token, Op* const next, 0573 const bool reverse); 0574 Op* compileClosure(const Token* const token, Op* const next, 0575 const bool reverse, const Token::tokType tkType); 0576 0577 bool doTokenOverlap(const Op* op, Token* token); 0578 0579 // ----------------------------------------------------------------------- 0580 // Protected data members 0581 // ----------------------------------------------------------------------- 0582 bool fHasBackReferences; 0583 bool fFixedStringOnly; 0584 int fNoGroups; 0585 XMLSize_t fMinLength; 0586 unsigned int fNoClosures; 0587 unsigned int fOptions; 0588 const BMPattern* fBMPattern; 0589 XMLCh* fPattern; 0590 XMLCh* fFixedString; 0591 const Op* fOperations; 0592 Token* fTokenTree; 0593 RangeToken* fFirstChar; 0594 static RangeToken* fWordRange; 0595 OpFactory fOpFactory; 0596 TokenFactory* fTokenFactory; 0597 MemoryManager* fMemoryManager; 0598 }; 0599 0600 0601 0602 // ----------------------------------------------------------------------- 0603 // RegularExpression: Static initialize and cleanup methods 0604 // ----------------------------------------------------------------------- 0605 inline void RegularExpression::staticCleanup() 0606 { 0607 fWordRange = 0; 0608 } 0609 0610 // --------------------------------------------------------------------------- 0611 // RegularExpression: Cleanup methods 0612 // --------------------------------------------------------------------------- 0613 inline void RegularExpression::cleanUp() { 0614 0615 fMemoryManager->deallocate(fPattern);//delete [] fPattern; 0616 fMemoryManager->deallocate(fFixedString);//delete [] fFixedString; 0617 delete fBMPattern; 0618 delete fTokenFactory; 0619 } 0620 0621 // --------------------------------------------------------------------------- 0622 // RegularExpression: Helper methods 0623 // --------------------------------------------------------------------------- 0624 inline bool RegularExpression::isSet(const int options, const int flag) { 0625 0626 return (options & flag) == flag; 0627 } 0628 0629 0630 inline Op* RegularExpression::compileUnion(const Token* const token, 0631 Op* const next, 0632 const bool reverse) { 0633 0634 XMLSize_t tokSize = token->size(); 0635 UnionOp* uniOp = fOpFactory.createUnionOp(tokSize); 0636 0637 for (XMLSize_t i=0; i<tokSize; i++) { 0638 0639 uniOp->addElement(compile(token->getChild(i), next, reverse)); 0640 } 0641 0642 return uniOp; 0643 } 0644 0645 0646 inline Op* RegularExpression::compileParenthesis(const Token* const token, 0647 Op* const next, 0648 const bool reverse) { 0649 0650 if (token->getNoParen() == 0) 0651 return compile(token->getChild(0), next, reverse); 0652 0653 Op* captureOp = 0; 0654 0655 if (reverse) { 0656 0657 captureOp = fOpFactory.createCaptureOp(token->getNoParen(), next); 0658 captureOp = compile(token->getChild(0), captureOp, reverse); 0659 0660 return fOpFactory.createCaptureOp(-token->getNoParen(), captureOp); 0661 } 0662 0663 captureOp = fOpFactory.createCaptureOp(-token->getNoParen(), next); 0664 captureOp = compile(token->getChild(0), captureOp, reverse); 0665 0666 return fOpFactory.createCaptureOp(token->getNoParen(), captureOp); 0667 } 0668 0669 inline Op* RegularExpression::compileConcat(const Token* const token, 0670 Op* const next, 0671 const bool reverse) { 0672 0673 Op* ret = next; 0674 XMLSize_t tokSize = token->size(); 0675 0676 if (!reverse) { 0677 0678 for (XMLSize_t i= tokSize; i>0; i--) { 0679 ret = compile(token->getChild(i-1), ret, false); 0680 } 0681 } 0682 else { 0683 0684 for (XMLSize_t i= 0; i< tokSize; i++) { 0685 ret = compile(token->getChild(i), ret, true); 0686 } 0687 } 0688 0689 return ret; 0690 } 0691 0692 inline Op* RegularExpression::compileClosure(const Token* const token, 0693 Op* const next, 0694 const bool reverse, 0695 const Token::tokType tkType) { 0696 0697 Op* ret = 0; 0698 Token* childTok = token->getChild(0); 0699 int min = token->getMin(); 0700 int max = token->getMax(); 0701 0702 if (min >= 0 && min == max) { 0703 0704 ret = next; 0705 for (int i=0; i< min; i++) { 0706 ret = compile(childTok, ret, reverse); 0707 } 0708 0709 return ret; 0710 } 0711 0712 if (min > 0 && max > 0) 0713 max -= min; 0714 0715 if (max > 0) { 0716 0717 ret = next; 0718 for (int i=0; i<max; i++) { 0719 0720 ChildOp* childOp = fOpFactory.createQuestionOp( 0721 tkType == Token::T_NONGREEDYCLOSURE); 0722 0723 childOp->setNextOp(next); 0724 childOp->setChild(compile(childTok, ret, reverse)); 0725 ret = childOp; 0726 } 0727 } 0728 else { 0729 0730 ChildOp* childOp = 0; 0731 0732 if (tkType == Token::T_NONGREEDYCLOSURE) { 0733 childOp = fOpFactory.createNonGreedyClosureOp(); 0734 } 0735 else { 0736 0737 if (childTok->getMinLength() == 0) 0738 childOp = fOpFactory.createClosureOp(fNoClosures++); 0739 else 0740 childOp = fOpFactory.createClosureOp(-1); 0741 } 0742 0743 childOp->setNextOp(next); 0744 if(next==NULL || !doTokenOverlap(next, childTok)) 0745 { 0746 childOp->setOpType(tkType == Token::T_NONGREEDYCLOSURE?Op::O_FINITE_NONGREEDYCLOSURE:Op::O_FINITE_CLOSURE); 0747 childOp->setChild(compile(childTok, NULL, reverse)); 0748 } 0749 else 0750 { 0751 childOp->setChild(compile(childTok, childOp, reverse)); 0752 } 0753 ret = childOp; 0754 } 0755 0756 if (min > 0) { 0757 0758 for (int i=0; i< min; i++) { 0759 ret = compile(childTok, ret, reverse); 0760 } 0761 } 0762 0763 return ret; 0764 } 0765 0766 XERCES_CPP_NAMESPACE_END 0767 0768 #endif 0769 /** 0770 * End of file RegularExpression.hpp 0771 */ 0772
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |