Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 10:15:01

0001 /*
0002  * Licensed to the Apache Software Foundation (ASF) under one or more
0003  * contributor license agreements.  See the NOTICE file distributed with
0004  * this work for additional information regarding copyright ownership.
0005  * The ASF licenses this file to You under the Apache License, Version 2.0
0006  * (the "License"); you may not use this file except in compliance with
0007  * the License.  You may obtain a copy of the License at
0008  *
0009  *      http://www.apache.org/licenses/LICENSE-2.0
0010  *
0011  * Unless required by applicable law or agreed to in writing, software
0012  * distributed under the License is distributed on an "AS IS" BASIS,
0013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014  * See the License for the specific language governing permissions and
0015  * limitations under the License.
0016  */
0017 
0018 /*
0019  * $Id$
0020  */
0021 
0022 #if !defined(XERCESC_INCLUDE_GUARD_REGULAREXPRESSION_HPP)
0023 #define XERCESC_INCLUDE_GUARD_REGULAREXPRESSION_HPP
0024 
0025 // ---------------------------------------------------------------------------
0026 //  Includes
0027 // ---------------------------------------------------------------------------
0028 #include <xercesc/util/RefArrayVectorOf.hpp>
0029 #include <xercesc/util/XMLString.hpp>
0030 #include <xercesc/util/Janitor.hpp>
0031 #include <xercesc/util/regx/Op.hpp>
0032 #include <xercesc/util/regx/TokenFactory.hpp>
0033 #include <xercesc/util/regx/BMPattern.hpp>
0034 #include <xercesc/util/regx/OpFactory.hpp>
0035 #include <xercesc/util/regx/RegxUtil.hpp>
0036 
0037 XERCES_CPP_NAMESPACE_BEGIN
0038 
0039 // ---------------------------------------------------------------------------
0040 //  Forward Declaration
0041 // ---------------------------------------------------------------------------
0042 class RangeToken;
0043 class Match;
0044 class RegxParser;
0045 
0046 /**
0047  * The RegularExpression class represents a parsed executable regular expression.
0048  * This class is thread safe. Two similar regular expression syntaxes are
0049  * supported:
0050  *
0051  * <ol>
0052  * <li><a href="http://www.w3.org/TR/xpath-functions/#regex-syntax">The XPath 2.0 / XQuery regular expression syntax.</a>
0053  * <li><a href="http://www.w3.org/TR/xmlschema-2/#regexs">The XML Schema regular expression syntax.</a></li>
0054  * </ol>
0055  * 
0056  * XPath 2.0 regular expression syntax is used unless the "X" option is specified during construction.
0057  *
0058  * Options can be specified during construction to change the way that the regular expression is handled.
0059  * Options are specified by a string consisting of any number of the following characters:
0060  *
0061  * <table border='1'>
0062  * <tr>
0063  * <th>Character</th>
0064  * <th>Meaning</th>
0065  * </tr>
0066  * <tr>
0067  * <td valign='top' rowspan='1' colspan='1'>i</td>
0068  * <td valign='top' rowspan='1' colspan='1'><a href="http://www.w3.org/TR/xpath-functions/#flags">
0069  * Ignore case</a> when matching the regular expression.</td>
0070  * </tr>
0071  * <tr>
0072  * <td valign='top' rowspan='1' colspan='1'>m</td>
0073  * <td valign='top' rowspan='1' colspan='1'><a href="http://www.w3.org/TR/xpath-functions/#flags">
0074  * Multi-line mode</a>. The meta characters "^" and "$" will match the beginning and end of lines.</td>
0075  * </tr>
0076  * <tr>
0077  * <td valign='top' rowspan='1' colspan='1'>s</td>
0078  * <td valign='top' rowspan='1' colspan='1'><a href="http://www.w3.org/TR/xpath-functions/#flags">
0079  * Single-line mode</a>. The meta character "." will match a newline character.</td>
0080  * </tr>
0081  * <tr>
0082  * <td valign='top' rowspan='1' colspan='1'>x</td>
0083  * <td valign='top' rowspan='1' colspan='1'>Allow extended comments.</td>
0084  * </tr>
0085  * <tr>
0086  * <td valign='top' rowspan='1' colspan='1'>F</td>
0087  * <td valign='top' rowspan='1' colspan='1'>Prohibit the fixed string optimization.</td>
0088  * </tr>
0089  * <tr>
0090  * <td valign='top' rowspan='1' colspan='1'>H</td>
0091  * <td valign='top' rowspan='1' colspan='1'>Prohibit the head character optimization.</td>
0092  * </tr>
0093  * <tr>
0094  * <td valign='top' rowspan='1' colspan='1'>X</td>
0095  * <td valign='top' rowspan='1' colspan='1'>Parse the regular expression according to the
0096  * <a href="http://www.w3.org/TR/xmlschema-2/#regexs">XML Schema regular expression syntax</a>.</td>
0097  * </tr>
0098  * </table>
0099  */
0100 class XMLUTIL_EXPORT RegularExpression : public XMemory
0101 {
0102 public:
0103     // -----------------------------------------------------------------------
0104     //  Public Constructors and Destructor
0105     // -----------------------------------------------------------------------
0106 
0107     /** @name Constructors and destructor */
0108     //@{
0109 
0110     /** Parses the given regular expression.
0111       *
0112       * @param pattern the regular expression in the local code page
0113       * @param manager the memory manager to use
0114       */
0115     RegularExpression
0116     (
0117         const char* const pattern
0118         , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager
0119     );
0120 
0121     /** Parses the given regular expression using the options specified.
0122       *
0123       * @param pattern the regular expression in the local code page
0124       * @param options the options string in the local code page
0125       * @param manager the memory manager to use
0126       */
0127     RegularExpression
0128     (
0129         const char* const pattern
0130         , const char* const options
0131         , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager
0132     );
0133 
0134     /** Parses the given regular expression.
0135       *
0136       * @param pattern the regular expression
0137       * @param manager the memory manager to use
0138       */
0139     RegularExpression
0140     (
0141         const XMLCh* const pattern
0142         , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager
0143     );
0144 
0145     /** Parses the given regular expression using the options specified.
0146       *
0147       * @param pattern the regular expression
0148       * @param options the options string
0149       * @param manager the memory manager to use
0150       */
0151     RegularExpression
0152     (
0153         const XMLCh* const pattern
0154         , const XMLCh* const options
0155         , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager
0156     );
0157 
0158     virtual ~RegularExpression();
0159 
0160     //@}
0161 
0162     // -----------------------------------------------------------------------
0163     //  Public Constants
0164     // -----------------------------------------------------------------------
0165     static const unsigned int   IGNORE_CASE;
0166     static const unsigned int   SINGLE_LINE;
0167     static const unsigned int   MULTIPLE_LINE;
0168     static const unsigned int   EXTENDED_COMMENT;
0169     static const unsigned int   PROHIBIT_HEAD_CHARACTER_OPTIMIZATION;
0170     static const unsigned int   PROHIBIT_FIXED_STRING_OPTIMIZATION;
0171     static const unsigned int   XMLSCHEMA_MODE;
0172     typedef enum
0173     {
0174         wordTypeIgnore = 0,
0175         wordTypeLetter = 1,
0176         wordTypeOther = 2
0177     } wordType;
0178 
0179     // -----------------------------------------------------------------------
0180     //  Public Helper methods
0181     // -----------------------------------------------------------------------
0182 
0183     /** @name Public helper methods */
0184     //@{
0185 
0186     static int getOptionValue(const XMLCh ch);
0187     static bool isSet(const int options, const int flag);
0188 
0189     //@}
0190 
0191     // -----------------------------------------------------------------------
0192     //  Matching methods
0193     // -----------------------------------------------------------------------
0194 
0195     /** @name Matching methods */
0196     //@{
0197 
0198     /** Tries to match the given null terminated string against the regular expression, returning
0199       * true if successful.
0200       *
0201       * @param matchString the string to match in the local code page
0202       * @param manager     the memory manager to use
0203       *
0204       * @return Whether the string matched the regular expression or not.
0205       */
0206     bool matches(const char* const matchString,
0207                  MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
0208 
0209     /** Tries to match the given string between the specified start and end offsets
0210       * against the regular expression, returning true if successful.
0211       *
0212       * @param matchString the string to match in the local code page
0213       * @param start       the offset of the start of the string
0214       * @param end         the offset of the end of the string
0215       * @param manager     the memory manager to use
0216       *
0217       * @return Whether the string matched the regular expression or not.
0218       */
0219     bool matches(const char* const matchString, const XMLSize_t start, const XMLSize_t end,
0220                  MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
0221 
0222     /** Tries to match the given null terminated string against the regular expression, returning
0223       * true if successful.
0224       *
0225       * @param matchString the string to match in the local code page
0226       * @param pMatch      a Match object, which will be populated with the offsets for the
0227       * regular expression match and sub-matches.
0228       * @param manager     the memory manager to use
0229       *
0230       * @return Whether the string matched the regular expression or not.
0231       */
0232     bool matches(const char* const matchString, Match* const pMatch,
0233                  MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
0234 
0235     /** Tries to match the given string between the specified start and end offsets
0236       * against the regular expression, returning true if successful.
0237       *
0238       * @param matchString the string to match in the local code page
0239       * @param start       the offset of the start of the string
0240       * @param end         the offset of the end of the string
0241       * @param pMatch      a Match object, which will be populated with the offsets for the
0242       * regular expression match and sub-matches.
0243       * @param manager     the memory manager to use
0244       *
0245       * @return Whether the string matched the regular expression or not.
0246       */
0247     bool matches(const char* const matchString, const XMLSize_t start, const XMLSize_t end,
0248                  Match* const pMatch, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
0249 
0250     /** Tries to match the given null terminated string against the regular expression, returning
0251       * true if successful.
0252       *
0253       * @param matchString the string to match
0254       * @param manager     the memory manager to use
0255       *
0256       * @return Whether the string matched the regular expression or not.
0257       */
0258     bool matches(const XMLCh* const matchString,
0259                  MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
0260 
0261     /** Tries to match the given string between the specified start and end offsets
0262       * against the regular expression, returning true if successful.
0263       *
0264       * @param matchString the string to match
0265       * @param start       the offset of the start of the string
0266       * @param end         the offset of the end of the string
0267       * @param manager     the memory manager to use
0268       *
0269       * @return Whether the string matched the regular expression or not.
0270       */
0271     bool matches(const XMLCh* const matchString, const XMLSize_t start, const XMLSize_t end,
0272                  MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
0273 
0274     /** Tries to match the given null terminated string against the regular expression, returning
0275       * true if successful.
0276       *
0277       * @param matchString the string to match
0278       * @param pMatch      a Match object, which will be populated with the offsets for the
0279       * regular expression match and sub-matches.
0280       * @param manager     the memory manager to use
0281       *
0282       * @return Whether the string matched the regular expression or not.
0283       */
0284     bool matches(const XMLCh* const matchString, Match* const pMatch,
0285                  MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
0286 
0287     /** Tries to match the given string between the specified start and end offsets
0288       * against the regular expression, returning true if successful.
0289       *
0290       * @param matchString the string to match
0291       * @param start       the offset of the start of the string
0292       * @param end         the offset of the end of the string
0293       * @param pMatch      a Match object, which will be populated with the offsets for the
0294       * regular expression match and sub-matches.
0295       * @param manager     the memory manager to use
0296       *
0297       * @return Whether the string matched the regular expression or not.
0298       */
0299     bool matches(const XMLCh* const matchString, const XMLSize_t start, const XMLSize_t end,
0300                  Match* const pMatch, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
0301 
0302     /** Tries to match the given string between the specified start and end offsets
0303       * against the regular expression. The subEx vector is populated with the details
0304       * for every non-overlapping occurrence of a match in the string.
0305       *
0306       * @param matchString the string to match
0307       * @param start       the offset of the start of the string
0308       * @param end         the offset of the end of the string
0309       * @param subEx       a RefVectorOf Match objects, populated with the offsets for the
0310       * regular expression match and sub-matches.
0311       * @param manager     the memory manager to use
0312       */
0313     void allMatches(const XMLCh* const matchString, const XMLSize_t start, const XMLSize_t end,
0314                     RefVectorOf<Match> *subEx, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
0315 
0316     //@}
0317 
0318     // -----------------------------------------------------------------------
0319     //  Tokenize methods
0320     // -----------------------------------------------------------------------
0321     // Note: The caller owns the string vector that is returned, and is responsible
0322     //       for deleting it.
0323 
0324     /** @name Tokenize methods */
0325     //@{
0326 
0327     /** Tokenizes the null terminated string according to the regular expression, returning
0328       * the parts of the string that do not match the regular expression.
0329       *
0330       * @param matchString the string to match in the local code page
0331       * @param manager     the memory manager to use
0332       *
0333       * @return A RefArrayVectorOf sub-strings that do not match the regular expression allocated using the
0334       * given MemoryManager. The caller owns the string vector that is returned, and is responsible for
0335       * deleting it.
0336       */
0337     RefArrayVectorOf<XMLCh> *tokenize(const char* const matchString,
0338                                       MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
0339 
0340     /** Tokenizes the string between the specified start and end offsets according to the regular
0341       * expression, returning the parts of the string that do not match the regular expression.
0342       *
0343       * @param matchString the string to match in the local code page
0344       * @param start       the offset of the start of the string
0345       * @param end         the offset of the end of the string
0346       * @param manager     the memory manager to use
0347       *
0348       * @return A RefArrayVectorOf sub-strings that do not match the regular expression allocated using the
0349       * given MemoryManager. The caller owns the string vector that is returned, and is responsible for
0350       * deleting it.
0351       */
0352     RefArrayVectorOf<XMLCh> *tokenize(const char* const matchString, const XMLSize_t start, const XMLSize_t end,
0353                                       MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
0354 
0355     /** Tokenizes the null terminated string according to the regular expression, returning
0356       * the parts of the string that do not match the regular expression.
0357       *
0358       * @param matchString the string to match
0359       * @param manager     the memory manager to use
0360       *
0361       * @return A RefArrayVectorOf sub-strings that do not match the regular expression allocated using the
0362       * given MemoryManager. The caller owns the string vector that is returned, and is responsible for
0363       * deleting it.
0364       */
0365     RefArrayVectorOf<XMLCh> *tokenize(const XMLCh* const matchString,
0366                                       MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
0367 
0368     /** Tokenizes the string between the specified start and end offsets according to the regular
0369       * expression, returning the parts of the string that do not match the regular expression.
0370       *
0371       * @param matchString the string to match
0372       * @param start       the offset of the start of the string
0373       * @param end         the offset of the end of the string
0374       * @param manager     the memory manager to use
0375       *
0376       * @return A RefArrayVectorOf sub-strings that do not match the regular expression allocated using the
0377       * given MemoryManager. The caller owns the string vector that is returned, and is responsible for
0378       * deleting it.
0379       */
0380     RefArrayVectorOf<XMLCh> *tokenize(const XMLCh* const matchString, const XMLSize_t start, const XMLSize_t end,
0381                                       MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
0382 
0383     //@}
0384 
0385     // -----------------------------------------------------------------------
0386     //  Replace methods
0387     // -----------------------------------------------------------------------
0388     // Note: The caller owns the XMLCh* that is returned, and is responsible for
0389     //       deleting it.
0390 
0391     /** @name Replace methods */
0392     //@{
0393 
0394     /** Performs a search and replace on the given null terminated string, replacing
0395       * any substring that matches the regular expression with a string derived from
0396       * the <a href="http://www.w3.org/TR/xpath-functions/#func-replace">replacement string</a>.
0397       *
0398       * @param matchString   the string to match in the local code page
0399       * @param replaceString the string to replace in the local code page
0400       * @param manager       the memory manager to use
0401       *
0402       * @return The resulting string allocated using the given MemoryManager. The caller owns the string
0403       * that is returned, and is responsible for deleting it.
0404       */
0405     XMLCh *replace(const char* const matchString, const char* const replaceString,
0406                    MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
0407 
0408     /** Performs a search and replace on the given string between the specified start and end offsets, replacing
0409       * any substring that matches the regular expression with a string derived from
0410       * the <a href="http://www.w3.org/TR/xpath-functions/#func-replace">replacement string</a>.
0411       *
0412       * @param matchString   the string to match in the local code page
0413       * @param replaceString the string to replace in the local code page
0414       * @param start         the offset of the start of the string
0415       * @param end           the offset of the end of the string
0416       * @param manager       the memory manager to use
0417       *
0418       * @return The resulting string allocated using the given MemoryManager. The caller owns the string
0419       * that is returned, and is responsible for deleting it.
0420       */
0421     XMLCh *replace(const char* const matchString, const char* const replaceString,
0422                    const XMLSize_t start, const XMLSize_t end,
0423                    MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
0424 
0425     /** Performs a search and replace on the given null terminated string, replacing
0426       * any substring that matches the regular expression with a string derived from
0427       * the <a href="http://www.w3.org/TR/xpath-functions/#func-replace">replacement string</a>.
0428       *
0429       * @param matchString   the string to match
0430       * @param replaceString the string to replace
0431       * @param manager       the memory manager to use
0432       *
0433       * @return The resulting string allocated using the given MemoryManager. The caller owns the string
0434       * that is returned, and is responsible for deleting it.
0435       */
0436     XMLCh *replace(const XMLCh* const matchString, const XMLCh* const replaceString,
0437                    MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
0438 
0439     /** Performs a search and replace on the given string between the specified start and end offsets, replacing
0440       * any substring that matches the regular expression with a string derived from
0441       * the <a href="http://www.w3.org/TR/xpath-functions/#func-replace">replacement string</a>.
0442       *
0443       * @param matchString   the string to match
0444       * @param replaceString the string to replace
0445       * @param start         the offset of the start of the string
0446       * @param end           the offset of the end of the string
0447       * @param manager       the memory manager to use
0448       *
0449       * @return The resulting string allocated using the given MemoryManager. The caller owns the string
0450       * that is returned, and is responsible for deleting it.
0451       */
0452     XMLCh *replace(const XMLCh* const matchString, const XMLCh* const replaceString,
0453                    const XMLSize_t start, const XMLSize_t end,
0454                    MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
0455 
0456     //@}
0457 
0458     // -----------------------------------------------------------------------
0459     //  Static initialize and cleanup methods
0460     // -----------------------------------------------------------------------
0461 
0462     /** @name Static initilize and cleanup methods */
0463     //@{
0464 
0465     static void
0466     staticInitialize(MemoryManager*  memoryManager);
0467 
0468     static void
0469     staticCleanup();
0470 
0471     //@}
0472 
0473 protected:
0474     virtual RegxParser* getRegexParser(const int options, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
0475 
0476     // -----------------------------------------------------------------------
0477     //  Cleanup methods
0478     // -----------------------------------------------------------------------
0479     void cleanUp();
0480 
0481     // -----------------------------------------------------------------------
0482     //  Setter methods
0483     // -----------------------------------------------------------------------
0484     void setPattern(const XMLCh* const pattern, const XMLCh* const options=0);
0485 
0486     // -----------------------------------------------------------------------
0487     //  Protected data types
0488     // -----------------------------------------------------------------------
0489     class XMLUTIL_EXPORT Context : public XMemory
0490     {
0491         public :
0492             Context(MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
0493             Context(Context* src);
0494             ~Context();
0495 
0496             Context& operator= (const Context& other);
0497             inline const XMLCh* getString() const { return fString; }
0498             void reset(const XMLCh* const string, const XMLSize_t stringLen,
0499                        const XMLSize_t start, const XMLSize_t limit, const int noClosures,
0500                        const unsigned int options);
0501             bool nextCh(XMLInt32& ch, XMLSize_t& offset);
0502 
0503             bool           fAdoptMatch;
0504             XMLSize_t      fStart;
0505             XMLSize_t      fLimit;
0506             XMLSize_t      fLength;    // fLimit - fStart
0507             int            fSize;
0508             XMLSize_t      fStringMaxLen;
0509             int*           fOffsets;
0510             Match*         fMatch;
0511             const XMLCh*   fString;
0512             unsigned int   fOptions;
0513             MemoryManager* fMemoryManager;
0514     };
0515 
0516     // -----------------------------------------------------------------------
0517     //  Unimplemented constructors and operators
0518     // -----------------------------------------------------------------------
0519     RegularExpression(const RegularExpression&);
0520     RegularExpression& operator=(const RegularExpression&);
0521 
0522     // -----------------------------------------------------------------------
0523     //  Protected Helper methods
0524     // -----------------------------------------------------------------------
0525     void prepare();
0526     int parseOptions(const XMLCh* const options);
0527 
0528     /**
0529       *    Matching helpers
0530       */
0531     int match(Context* const context, const Op* const operations, XMLSize_t offset) const;
0532     bool matchIgnoreCase(const XMLInt32 ch1, const XMLInt32 ch2) const;
0533 
0534     /**
0535       *    Helper methods used by match(Context* ...)
0536       */
0537     bool matchChar(Context* const context, const XMLInt32 ch, XMLSize_t& offset,
0538                    const bool ignoreCase) const;
0539     bool matchDot(Context* const context, XMLSize_t& offset) const;
0540     bool matchRange(Context* const context, const Op* const op,
0541                     XMLSize_t& offset, const bool ignoreCase) const;
0542     bool matchAnchor(Context* const context, const XMLInt32 ch,
0543                      const XMLSize_t offset) const;
0544     bool matchBackReference(Context* const context, const XMLInt32 ch,
0545                             XMLSize_t& offset, const bool ignoreCase) const;
0546     bool matchString(Context* const context, const XMLCh* const literal,
0547                      XMLSize_t& offset, const bool ignoreCase) const;
0548     int  matchUnion(Context* const context, const Op* const op, XMLSize_t offset) const;
0549     int matchCapture(Context* const context, const Op* const op, XMLSize_t offset) const;
0550 
0551     /**
0552      *    Replace helpers
0553      */
0554     void subInExp(const XMLCh* const repString,
0555                   const XMLCh* const origString,
0556                   const Match* subEx,
0557                   XMLBuffer &result,
0558                   MemoryManager* const manager) const;
0559     /**
0560      *    Converts a token tree into an operation tree
0561      */
0562     void compile(const Token* const token);
0563     Op*  compile(const Token* const token, Op* const next,
0564                  const bool reverse);
0565     /**
0566       *    Helper methods used by compile
0567       */
0568     Op* compileUnion(const Token* const token, Op* const next,
0569                      const bool reverse);
0570     Op* compileParenthesis(const Token* const token, Op* const next,
0571                            const bool reverse);
0572     Op* compileConcat(const Token* const token, Op* const next,
0573                       const bool reverse);
0574     Op* compileClosure(const Token* const token, Op* const next,
0575                        const bool reverse, const Token::tokType tkType);
0576 
0577     bool doTokenOverlap(const Op* op, Token* token);
0578 
0579     // -----------------------------------------------------------------------
0580     //  Protected data members
0581     // -----------------------------------------------------------------------
0582     bool               fHasBackReferences;
0583     bool               fFixedStringOnly;
0584     int                fNoGroups;
0585     XMLSize_t          fMinLength;
0586     unsigned int       fNoClosures;
0587     unsigned int       fOptions;
0588     const BMPattern*   fBMPattern;
0589     XMLCh*             fPattern;
0590     XMLCh*             fFixedString;
0591     const Op*          fOperations;
0592     Token*             fTokenTree;
0593     RangeToken*        fFirstChar;
0594     static RangeToken* fWordRange;
0595     OpFactory          fOpFactory;
0596     TokenFactory*      fTokenFactory;
0597     MemoryManager*     fMemoryManager;
0598 };
0599 
0600 
0601 
0602   // -----------------------------------------------------------------------
0603   //  RegularExpression: Static initialize and cleanup methods
0604   // -----------------------------------------------------------------------
0605   inline void RegularExpression::staticCleanup()
0606   {
0607       fWordRange = 0;
0608   }
0609 
0610   // ---------------------------------------------------------------------------
0611   //  RegularExpression: Cleanup methods
0612   // ---------------------------------------------------------------------------
0613   inline void RegularExpression::cleanUp() {
0614 
0615       fMemoryManager->deallocate(fPattern);//delete [] fPattern;
0616       fMemoryManager->deallocate(fFixedString);//delete [] fFixedString;
0617       delete fBMPattern;
0618       delete fTokenFactory;
0619   }
0620 
0621   // ---------------------------------------------------------------------------
0622   //  RegularExpression: Helper methods
0623   // ---------------------------------------------------------------------------
0624   inline bool RegularExpression::isSet(const int options, const int flag) {
0625 
0626       return (options & flag) == flag;
0627   }
0628 
0629 
0630   inline Op* RegularExpression::compileUnion(const Token* const token,
0631                                              Op* const next,
0632                                              const bool reverse) {
0633 
0634       XMLSize_t tokSize = token->size();
0635       UnionOp* uniOp = fOpFactory.createUnionOp(tokSize);
0636 
0637       for (XMLSize_t i=0; i<tokSize; i++) {
0638 
0639           uniOp->addElement(compile(token->getChild(i), next, reverse));
0640       }
0641 
0642       return uniOp;
0643   }
0644 
0645 
0646   inline Op* RegularExpression::compileParenthesis(const Token* const token,
0647                                                    Op* const next,
0648                                                    const bool reverse) {
0649 
0650       if (token->getNoParen() == 0)
0651           return compile(token->getChild(0), next, reverse);
0652 
0653       Op* captureOp    = 0;
0654 
0655       if (reverse) {
0656 
0657           captureOp = fOpFactory.createCaptureOp(token->getNoParen(), next);
0658           captureOp = compile(token->getChild(0), captureOp, reverse);
0659 
0660           return fOpFactory.createCaptureOp(-token->getNoParen(), captureOp);
0661       }
0662 
0663       captureOp = fOpFactory.createCaptureOp(-token->getNoParen(), next);
0664       captureOp = compile(token->getChild(0), captureOp, reverse);
0665 
0666       return fOpFactory.createCaptureOp(token->getNoParen(), captureOp);
0667   }
0668 
0669   inline Op* RegularExpression::compileConcat(const Token* const token,
0670                                               Op*  const next,
0671                                               const bool reverse) {
0672 
0673       Op* ret = next;
0674       XMLSize_t tokSize = token->size();
0675 
0676       if (!reverse) {
0677 
0678           for (XMLSize_t i= tokSize; i>0; i--) {
0679               ret = compile(token->getChild(i-1), ret, false);
0680           }
0681       }
0682       else {
0683 
0684           for (XMLSize_t i= 0; i< tokSize; i++) {
0685               ret = compile(token->getChild(i), ret, true);
0686           }
0687       }
0688 
0689       return ret;
0690   }
0691 
0692   inline Op* RegularExpression::compileClosure(const Token* const token,
0693                                                Op* const next,
0694                                                const bool reverse,
0695                                                const Token::tokType tkType) {
0696 
0697       Op*    ret      = 0;
0698       Token* childTok = token->getChild(0);
0699       int    min      = token->getMin();
0700       int    max      = token->getMax();
0701 
0702       if (min >= 0 && min == max) {
0703 
0704           ret = next;
0705           for (int i=0; i< min; i++) {
0706               ret = compile(childTok, ret, reverse);
0707           }
0708 
0709           return ret;
0710       }
0711 
0712       if (min > 0 && max > 0)
0713           max -= min;
0714 
0715       if (max > 0) {
0716 
0717           ret = next;
0718           for (int i=0; i<max; i++) {
0719 
0720               ChildOp* childOp = fOpFactory.createQuestionOp(
0721                   tkType == Token::T_NONGREEDYCLOSURE);
0722 
0723               childOp->setNextOp(next);
0724               childOp->setChild(compile(childTok, ret, reverse));
0725               ret = childOp;
0726           }
0727       }
0728       else {
0729 
0730           ChildOp* childOp = 0;
0731 
0732           if (tkType == Token::T_NONGREEDYCLOSURE) {
0733               childOp = fOpFactory.createNonGreedyClosureOp();
0734           }
0735           else {
0736 
0737               if (childTok->getMinLength() == 0)
0738                   childOp = fOpFactory.createClosureOp(fNoClosures++);
0739               else
0740                   childOp = fOpFactory.createClosureOp(-1);
0741           }
0742 
0743           childOp->setNextOp(next);
0744           if(next==NULL || !doTokenOverlap(next, childTok))
0745           {
0746               childOp->setOpType(tkType == Token::T_NONGREEDYCLOSURE?Op::O_FINITE_NONGREEDYCLOSURE:Op::O_FINITE_CLOSURE);
0747               childOp->setChild(compile(childTok, NULL, reverse));
0748           }
0749           else
0750           {
0751               childOp->setChild(compile(childTok, childOp, reverse));
0752           }
0753           ret = childOp;
0754       }
0755 
0756       if (min > 0) {
0757 
0758           for (int i=0; i< min; i++) {
0759               ret = compile(childTok, ret, reverse);
0760           }
0761       }
0762 
0763       return ret;
0764   }
0765 
0766 XERCES_CPP_NAMESPACE_END
0767 
0768 #endif
0769 /**
0770   * End of file RegularExpression.hpp
0771   */
0772