Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2024-05-18 08:30:10

0001 // Copyright (c) 2005, Google Inc.
0002 // All rights reserved.
0003 //
0004 // Redistribution and use in source and binary forms, with or without
0005 // modification, are permitted provided that the following conditions are
0006 // met:
0007 //
0008 //     * Redistributions of source code must retain the above copyright
0009 // notice, this list of conditions and the following disclaimer.
0010 //     * Redistributions in binary form must reproduce the above
0011 // copyright notice, this list of conditions and the following disclaimer
0012 // in the documentation and/or other materials provided with the
0013 // distribution.
0014 //     * Neither the name of Google Inc. nor the names of its
0015 // contributors may be used to endorse or promote products derived from
0016 // this software without specific prior written permission.
0017 //
0018 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
0019 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
0020 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
0021 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
0022 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
0023 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
0024 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
0025 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
0026 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
0027 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
0028 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
0029 //
0030 // Author: Sanjay Ghemawat
0031 // Support for PCRE_XXX modifiers added by Giuseppe Maxia, July 2005
0032 
0033 #ifndef _PCRECPP_H
0034 #define _PCRECPP_H
0035 
0036 // C++ interface to the pcre regular-expression library.  RE supports
0037 // Perl-style regular expressions (with extensions like \d, \w, \s,
0038 // ...).
0039 //
0040 // -----------------------------------------------------------------------
0041 // REGEXP SYNTAX:
0042 //
0043 // This module is part of the pcre library and hence supports its syntax
0044 // for regular expressions.
0045 //
0046 // The syntax is pretty similar to Perl's.  For those not familiar
0047 // with Perl's regular expressions, here are some examples of the most
0048 // commonly used extensions:
0049 //
0050 //   "hello (\\w+) world"  -- \w matches a "word" character
0051 //   "version (\\d+)"      -- \d matches a digit
0052 //   "hello\\s+world"      -- \s matches any whitespace character
0053 //   "\\b(\\w+)\\b"        -- \b matches empty string at a word boundary
0054 //   "(?i)hello"           -- (?i) turns on case-insensitive matching
0055 //   "/\\*(.*?)\\*/"       -- .*? matches . minimum no. of times possible
0056 //
0057 // -----------------------------------------------------------------------
0058 // MATCHING INTERFACE:
0059 //
0060 // The "FullMatch" operation checks that supplied text matches a
0061 // supplied pattern exactly.
0062 //
0063 // Example: successful match
0064 //    pcrecpp::RE re("h.*o");
0065 //    re.FullMatch("hello");
0066 //
0067 // Example: unsuccessful match (requires full match):
0068 //    pcrecpp::RE re("e");
0069 //    !re.FullMatch("hello");
0070 //
0071 // Example: creating a temporary RE object:
0072 //    pcrecpp::RE("h.*o").FullMatch("hello");
0073 //
0074 // You can pass in a "const char*" or a "string" for "text".  The
0075 // examples below tend to use a const char*.
0076 //
0077 // You can, as in the different examples above, store the RE object
0078 // explicitly in a variable or use a temporary RE object.  The
0079 // examples below use one mode or the other arbitrarily.  Either
0080 // could correctly be used for any of these examples.
0081 //
0082 // -----------------------------------------------------------------------
0083 // MATCHING WITH SUB-STRING EXTRACTION:
0084 //
0085 // You can supply extra pointer arguments to extract matched subpieces.
0086 //
0087 // Example: extracts "ruby" into "s" and 1234 into "i"
0088 //    int i;
0089 //    string s;
0090 //    pcrecpp::RE re("(\\w+):(\\d+)");
0091 //    re.FullMatch("ruby:1234", &s, &i);
0092 //
0093 // Example: does not try to extract any extra sub-patterns
0094 //    re.FullMatch("ruby:1234", &s);
0095 //
0096 // Example: does not try to extract into NULL
0097 //    re.FullMatch("ruby:1234", NULL, &i);
0098 //
0099 // Example: integer overflow causes failure
0100 //    !re.FullMatch("ruby:1234567891234", NULL, &i);
0101 //
0102 // Example: fails because there aren't enough sub-patterns:
0103 //    !pcrecpp::RE("\\w+:\\d+").FullMatch("ruby:1234", &s);
0104 //
0105 // Example: fails because string cannot be stored in integer
0106 //    !pcrecpp::RE("(.*)").FullMatch("ruby", &i);
0107 //
0108 // The provided pointer arguments can be pointers to any scalar numeric
0109 // type, or one of
0110 //    string        (matched piece is copied to string)
0111 //    StringPiece   (StringPiece is mutated to point to matched piece)
0112 //    T             (where "bool T::ParseFrom(const char*, int)" exists)
0113 //    NULL          (the corresponding matched sub-pattern is not copied)
0114 //
0115 // CAVEAT: An optional sub-pattern that does not exist in the matched
0116 // string is assigned the empty string.  Therefore, the following will
0117 // return false (because the empty string is not a valid number):
0118 //    int number;
0119 //    pcrecpp::RE::FullMatch("abc", "[a-z]+(\\d+)?", &number);
0120 //
0121 // -----------------------------------------------------------------------
0122 // DO_MATCH
0123 //
0124 // The matching interface supports at most 16 arguments per call.
0125 // If you need more, consider using the more general interface
0126 // pcrecpp::RE::DoMatch().  See pcrecpp.h for the signature for DoMatch.
0127 //
0128 // -----------------------------------------------------------------------
0129 // PARTIAL MATCHES
0130 //
0131 // You can use the "PartialMatch" operation when you want the pattern
0132 // to match any substring of the text.
0133 //
0134 // Example: simple search for a string:
0135 //    pcrecpp::RE("ell").PartialMatch("hello");
0136 //
0137 // Example: find first number in a string:
0138 //    int number;
0139 //    pcrecpp::RE re("(\\d+)");
0140 //    re.PartialMatch("x*100 + 20", &number);
0141 //    assert(number == 100);
0142 //
0143 // -----------------------------------------------------------------------
0144 // UTF-8 AND THE MATCHING INTERFACE:
0145 //
0146 // By default, pattern and text are plain text, one byte per character.
0147 // The UTF8 flag, passed to the constructor, causes both pattern
0148 // and string to be treated as UTF-8 text, still a byte stream but
0149 // potentially multiple bytes per character. In practice, the text
0150 // is likelier to be UTF-8 than the pattern, but the match returned
0151 // may depend on the UTF8 flag, so always use it when matching
0152 // UTF8 text.  E.g., "." will match one byte normally but with UTF8
0153 // set may match up to three bytes of a multi-byte character.
0154 //
0155 // Example:
0156 //    pcrecpp::RE_Options options;
0157 //    options.set_utf8();
0158 //    pcrecpp::RE re(utf8_pattern, options);
0159 //    re.FullMatch(utf8_string);
0160 //
0161 // Example: using the convenience function UTF8():
0162 //    pcrecpp::RE re(utf8_pattern, pcrecpp::UTF8());
0163 //    re.FullMatch(utf8_string);
0164 //
0165 // NOTE: The UTF8 option is ignored if pcre was not configured with the
0166 //       --enable-utf8 flag.
0167 //
0168 // -----------------------------------------------------------------------
0169 // PASSING MODIFIERS TO THE REGULAR EXPRESSION ENGINE
0170 //
0171 // PCRE defines some modifiers to change the behavior of the regular
0172 // expression engine.
0173 // The C++ wrapper defines an auxiliary class, RE_Options, as a vehicle
0174 // to pass such modifiers to a RE class.
0175 //
0176 // Currently, the following modifiers are supported
0177 //
0178 //    modifier              description               Perl corresponding
0179 //
0180 //    PCRE_CASELESS         case insensitive match    /i
0181 //    PCRE_MULTILINE        multiple lines match      /m
0182 //    PCRE_DOTALL           dot matches newlines      /s
0183 //    PCRE_DOLLAR_ENDONLY   $ matches only at end     N/A
0184 //    PCRE_EXTRA            strict escape parsing     N/A
0185 //    PCRE_EXTENDED         ignore whitespaces        /x
0186 //    PCRE_UTF8             handles UTF8 chars        built-in
0187 //    PCRE_UNGREEDY         reverses * and *?         N/A
0188 //    PCRE_NO_AUTO_CAPTURE  disables matching parens  N/A (*)
0189 //
0190 // (For a full account on how each modifier works, please check the
0191 // PCRE API reference manual).
0192 //
0193 // (*) Both Perl and PCRE allow non matching parentheses by means of the
0194 // "?:" modifier within the pattern itself. e.g. (?:ab|cd) does not
0195 // capture, while (ab|cd) does.
0196 //
0197 // For each modifier, there are two member functions whose name is made
0198 // out of the modifier in lowercase, without the "PCRE_" prefix. For
0199 // instance, PCRE_CASELESS is handled by
0200 //    bool caseless(),
0201 // which returns true if the modifier is set, and
0202 //    RE_Options & set_caseless(bool),
0203 // which sets or unsets the modifier.
0204 //
0205 // Moreover, PCRE_EXTRA_MATCH_LIMIT can be accessed through the
0206 // set_match_limit() and match_limit() member functions.
0207 // Setting match_limit to a non-zero value will limit the executation of
0208 // pcre to keep it from doing bad things like blowing the stack or taking
0209 // an eternity to return a result.  A value of 5000 is good enough to stop
0210 // stack blowup in a 2MB thread stack.  Setting match_limit to zero will
0211 // disable match limiting.  Alternately, you can set match_limit_recursion()
0212 // which uses PCRE_EXTRA_MATCH_LIMIT_RECURSION to limit how much pcre
0213 // recurses.  match_limit() caps the number of matches pcre does;
0214 // match_limit_recrusion() caps the depth of recursion.
0215 //
0216 // Normally, to pass one or more modifiers to a RE class, you declare
0217 // a RE_Options object, set the appropriate options, and pass this
0218 // object to a RE constructor. Example:
0219 //
0220 //    RE_options opt;
0221 //    opt.set_caseless(true);
0222 //
0223 //    if (RE("HELLO", opt).PartialMatch("hello world")) ...
0224 //
0225 // RE_options has two constructors. The default constructor takes no
0226 // arguments and creates a set of flags that are off by default.
0227 //
0228 // The optional parameter 'option_flags' is to facilitate transfer
0229 // of legacy code from C programs.  This lets you do
0230 //    RE(pattern, RE_Options(PCRE_CASELESS|PCRE_MULTILINE)).PartialMatch(str);
0231 //
0232 // But new code is better off doing
0233 //    RE(pattern,
0234 //      RE_Options().set_caseless(true).set_multiline(true)).PartialMatch(str);
0235 // (See below)
0236 //
0237 // If you are going to pass one of the most used modifiers, there are some
0238 // convenience functions that return a RE_Options class with the
0239 // appropriate modifier already set:
0240 // CASELESS(), UTF8(), MULTILINE(), DOTALL(), EXTENDED()
0241 //
0242 // If you need to set several options at once, and you don't want to go
0243 // through the pains of declaring a RE_Options object and setting several
0244 // options, there is a parallel method that give you such ability on the
0245 // fly. You can concatenate several set_xxxxx member functions, since each
0246 // of them returns a reference to its class object.  e.g.: to pass
0247 // PCRE_CASELESS, PCRE_EXTENDED, and PCRE_MULTILINE to a RE with one
0248 // statement, you may write
0249 //
0250 //    RE(" ^ xyz \\s+ .* blah$", RE_Options()
0251 //                            .set_caseless(true)
0252 //                            .set_extended(true)
0253 //                            .set_multiline(true)).PartialMatch(sometext);
0254 //
0255 // -----------------------------------------------------------------------
0256 // SCANNING TEXT INCREMENTALLY
0257 //
0258 // The "Consume" operation may be useful if you want to repeatedly
0259 // match regular expressions at the front of a string and skip over
0260 // them as they match.  This requires use of the "StringPiece" type,
0261 // which represents a sub-range of a real string.  Like RE, StringPiece
0262 // is defined in the pcrecpp namespace.
0263 //
0264 // Example: read lines of the form "var = value" from a string.
0265 //    string contents = ...;                 // Fill string somehow
0266 //    pcrecpp::StringPiece input(contents);  // Wrap in a StringPiece
0267 //
0268 //    string var;
0269 //    int value;
0270 //    pcrecpp::RE re("(\\w+) = (\\d+)\n");
0271 //    while (re.Consume(&input, &var, &value)) {
0272 //      ...;
0273 //    }
0274 //
0275 // Each successful call to "Consume" will set "var/value", and also
0276 // advance "input" so it points past the matched text.
0277 //
0278 // The "FindAndConsume" operation is similar to "Consume" but does not
0279 // anchor your match at the beginning of the string.  For example, you
0280 // could extract all words from a string by repeatedly calling
0281 //     pcrecpp::RE("(\\w+)").FindAndConsume(&input, &word)
0282 //
0283 // -----------------------------------------------------------------------
0284 // PARSING HEX/OCTAL/C-RADIX NUMBERS
0285 //
0286 // By default, if you pass a pointer to a numeric value, the
0287 // corresponding text is interpreted as a base-10 number.  You can
0288 // instead wrap the pointer with a call to one of the operators Hex(),
0289 // Octal(), or CRadix() to interpret the text in another base.  The
0290 // CRadix operator interprets C-style "0" (base-8) and "0x" (base-16)
0291 // prefixes, but defaults to base-10.
0292 //
0293 // Example:
0294 //   int a, b, c, d;
0295 //   pcrecpp::RE re("(.*) (.*) (.*) (.*)");
0296 //   re.FullMatch("100 40 0100 0x40",
0297 //                pcrecpp::Octal(&a), pcrecpp::Hex(&b),
0298 //                pcrecpp::CRadix(&c), pcrecpp::CRadix(&d));
0299 // will leave 64 in a, b, c, and d.
0300 //
0301 // -----------------------------------------------------------------------
0302 // REPLACING PARTS OF STRINGS
0303 //
0304 // You can replace the first match of "pattern" in "str" with
0305 // "rewrite".  Within "rewrite", backslash-escaped digits (\1 to \9)
0306 // can be used to insert text matching corresponding parenthesized
0307 // group from the pattern.  \0 in "rewrite" refers to the entire
0308 // matching text.  E.g.,
0309 //
0310 //   string s = "yabba dabba doo";
0311 //   pcrecpp::RE("b+").Replace("d", &s);
0312 //
0313 // will leave "s" containing "yada dabba doo".  The result is true if
0314 // the pattern matches and a replacement occurs, or false otherwise.
0315 //
0316 // GlobalReplace() is like Replace(), except that it replaces all
0317 // occurrences of the pattern in the string with the rewrite.
0318 // Replacements are not subject to re-matching.  E.g.,
0319 //
0320 //   string s = "yabba dabba doo";
0321 //   pcrecpp::RE("b+").GlobalReplace("d", &s);
0322 //
0323 // will leave "s" containing "yada dada doo".  It returns the number
0324 // of replacements made.
0325 //
0326 // Extract() is like Replace(), except that if the pattern matches,
0327 // "rewrite" is copied into "out" (an additional argument) with
0328 // substitutions.  The non-matching portions of "text" are ignored.
0329 // Returns true iff a match occurred and the extraction happened
0330 // successfully.  If no match occurs, the string is left unaffected.
0331 
0332 
0333 #include <string>
0334 #include <pcre.h>
0335 #include <pcrecpparg.h>   // defines the Arg class
0336 // This isn't technically needed here, but we include it
0337 // anyway so folks who include pcrecpp.h don't have to.
0338 #include <pcre_stringpiece.h>
0339 
0340 namespace pcrecpp {
0341 
0342 #define PCRE_SET_OR_CLEAR(b, o) \
0343     if (b) all_options_ |= (o); else all_options_ &= ~(o); \
0344     return *this
0345 
0346 #define PCRE_IS_SET(o)  \
0347         (all_options_ & o) == o
0348 
0349 /***** Compiling regular expressions: the RE class *****/
0350 
0351 // RE_Options allow you to set options to be passed along to pcre,
0352 // along with other options we put on top of pcre.
0353 // Only 9 modifiers, plus match_limit and match_limit_recursion,
0354 // are supported now.
0355 class PCRECPP_EXP_DEFN RE_Options {
0356  public:
0357   // constructor
0358   RE_Options() : match_limit_(0), match_limit_recursion_(0), all_options_(0) {}
0359 
0360   // alternative constructor.
0361   // To facilitate transfer of legacy code from C programs
0362   //
0363   // This lets you do
0364   //    RE(pattern, RE_Options(PCRE_CASELESS|PCRE_MULTILINE)).PartialMatch(str);
0365   // But new code is better off doing
0366   //    RE(pattern,
0367   //      RE_Options().set_caseless(true).set_multiline(true)).PartialMatch(str);
0368   RE_Options(int option_flags) : match_limit_(0), match_limit_recursion_(0),
0369                                  all_options_(option_flags) {}
0370   // we're fine with the default destructor, copy constructor, etc.
0371 
0372   // accessors and mutators
0373   int match_limit() const { return match_limit_; };
0374   RE_Options &set_match_limit(int limit) {
0375     match_limit_ = limit;
0376     return *this;
0377   }
0378 
0379   int match_limit_recursion() const { return match_limit_recursion_; };
0380   RE_Options &set_match_limit_recursion(int limit) {
0381     match_limit_recursion_ = limit;
0382     return *this;
0383   }
0384 
0385   bool caseless() const {
0386     return PCRE_IS_SET(PCRE_CASELESS);
0387   }
0388   RE_Options &set_caseless(bool x) {
0389     PCRE_SET_OR_CLEAR(x, PCRE_CASELESS);
0390   }
0391 
0392   bool multiline() const {
0393     return PCRE_IS_SET(PCRE_MULTILINE);
0394   }
0395   RE_Options &set_multiline(bool x) {
0396     PCRE_SET_OR_CLEAR(x, PCRE_MULTILINE);
0397   }
0398 
0399   bool dotall() const {
0400     return PCRE_IS_SET(PCRE_DOTALL);
0401   }
0402   RE_Options &set_dotall(bool x) {
0403     PCRE_SET_OR_CLEAR(x, PCRE_DOTALL);
0404   }
0405 
0406   bool extended() const {
0407     return PCRE_IS_SET(PCRE_EXTENDED);
0408   }
0409   RE_Options &set_extended(bool x) {
0410     PCRE_SET_OR_CLEAR(x, PCRE_EXTENDED);
0411   }
0412 
0413   bool dollar_endonly() const {
0414     return PCRE_IS_SET(PCRE_DOLLAR_ENDONLY);
0415   }
0416   RE_Options &set_dollar_endonly(bool x) {
0417     PCRE_SET_OR_CLEAR(x, PCRE_DOLLAR_ENDONLY);
0418   }
0419 
0420   bool extra() const {
0421     return PCRE_IS_SET(PCRE_EXTRA);
0422   }
0423   RE_Options &set_extra(bool x) {
0424     PCRE_SET_OR_CLEAR(x, PCRE_EXTRA);
0425   }
0426 
0427   bool ungreedy() const {
0428     return PCRE_IS_SET(PCRE_UNGREEDY);
0429   }
0430   RE_Options &set_ungreedy(bool x) {
0431     PCRE_SET_OR_CLEAR(x, PCRE_UNGREEDY);
0432   }
0433 
0434   bool utf8() const {
0435     return PCRE_IS_SET(PCRE_UTF8);
0436   }
0437   RE_Options &set_utf8(bool x) {
0438     PCRE_SET_OR_CLEAR(x, PCRE_UTF8);
0439   }
0440 
0441   bool no_auto_capture() const {
0442     return PCRE_IS_SET(PCRE_NO_AUTO_CAPTURE);
0443   }
0444   RE_Options &set_no_auto_capture(bool x) {
0445     PCRE_SET_OR_CLEAR(x, PCRE_NO_AUTO_CAPTURE);
0446   }
0447 
0448   RE_Options &set_all_options(int opt) {
0449     all_options_ = opt;
0450     return *this;
0451   }
0452   int all_options() const {
0453     return all_options_ ;
0454   }
0455 
0456   // TODO: add other pcre flags
0457 
0458  private:
0459   int match_limit_;
0460   int match_limit_recursion_;
0461   int all_options_;
0462 };
0463 
0464 // These functions return some common RE_Options
0465 static inline RE_Options UTF8() {
0466   return RE_Options().set_utf8(true);
0467 }
0468 
0469 static inline RE_Options CASELESS() {
0470   return RE_Options().set_caseless(true);
0471 }
0472 static inline RE_Options MULTILINE() {
0473   return RE_Options().set_multiline(true);
0474 }
0475 
0476 static inline RE_Options DOTALL() {
0477   return RE_Options().set_dotall(true);
0478 }
0479 
0480 static inline RE_Options EXTENDED() {
0481   return RE_Options().set_extended(true);
0482 }
0483 
0484 // Interface for regular expression matching.  Also corresponds to a
0485 // pre-compiled regular expression.  An "RE" object is safe for
0486 // concurrent use by multiple threads.
0487 class PCRECPP_EXP_DEFN RE {
0488  public:
0489   // We provide implicit conversions from strings so that users can
0490   // pass in a string or a "const char*" wherever an "RE" is expected.
0491   RE(const string& pat) { Init(pat, NULL); }
0492   RE(const string& pat, const RE_Options& option) { Init(pat, &option); }
0493   RE(const char* pat) { Init(pat, NULL); }
0494   RE(const char* pat, const RE_Options& option) { Init(pat, &option); }
0495   RE(const unsigned char* pat) {
0496     Init(reinterpret_cast<const char*>(pat), NULL);
0497   }
0498   RE(const unsigned char* pat, const RE_Options& option) {
0499     Init(reinterpret_cast<const char*>(pat), &option);
0500   }
0501 
0502   // Copy constructor & assignment - note that these are expensive
0503   // because they recompile the expression.
0504   RE(const RE& re) { Init(re.pattern_, &re.options_); }
0505   const RE& operator=(const RE& re) {
0506     if (this != &re) {
0507       Cleanup();
0508 
0509       // This is the code that originally came from Google
0510       // Init(re.pattern_.c_str(), &re.options_);
0511 
0512       // This is the replacement from Ari Pollak
0513       Init(re.pattern_, &re.options_);
0514     }
0515     return *this;
0516   }
0517 
0518 
0519   ~RE();
0520 
0521   // The string specification for this RE.  E.g.
0522   //   RE re("ab*c?d+");
0523   //   re.pattern();    // "ab*c?d+"
0524   const string& pattern() const { return pattern_; }
0525 
0526   // If RE could not be created properly, returns an error string.
0527   // Else returns the empty string.
0528   const string& error() const { return *error_; }
0529 
0530   /***** The useful part: the matching interface *****/
0531 
0532   // This is provided so one can do pattern.ReplaceAll() just as
0533   // easily as ReplaceAll(pattern-text, ....)
0534 
0535   bool FullMatch(const StringPiece& text,
0536                  const Arg& ptr1 = no_arg,
0537                  const Arg& ptr2 = no_arg,
0538                  const Arg& ptr3 = no_arg,
0539                  const Arg& ptr4 = no_arg,
0540                  const Arg& ptr5 = no_arg,
0541                  const Arg& ptr6 = no_arg,
0542                  const Arg& ptr7 = no_arg,
0543                  const Arg& ptr8 = no_arg,
0544                  const Arg& ptr9 = no_arg,
0545                  const Arg& ptr10 = no_arg,
0546                  const Arg& ptr11 = no_arg,
0547                  const Arg& ptr12 = no_arg,
0548                  const Arg& ptr13 = no_arg,
0549                  const Arg& ptr14 = no_arg,
0550                  const Arg& ptr15 = no_arg,
0551                  const Arg& ptr16 = no_arg) const;
0552 
0553   bool PartialMatch(const StringPiece& text,
0554                     const Arg& ptr1 = no_arg,
0555                     const Arg& ptr2 = no_arg,
0556                     const Arg& ptr3 = no_arg,
0557                     const Arg& ptr4 = no_arg,
0558                     const Arg& ptr5 = no_arg,
0559                     const Arg& ptr6 = no_arg,
0560                     const Arg& ptr7 = no_arg,
0561                     const Arg& ptr8 = no_arg,
0562                     const Arg& ptr9 = no_arg,
0563                     const Arg& ptr10 = no_arg,
0564                     const Arg& ptr11 = no_arg,
0565                     const Arg& ptr12 = no_arg,
0566                     const Arg& ptr13 = no_arg,
0567                     const Arg& ptr14 = no_arg,
0568                     const Arg& ptr15 = no_arg,
0569                     const Arg& ptr16 = no_arg) const;
0570 
0571   bool Consume(StringPiece* input,
0572                const Arg& ptr1 = no_arg,
0573                const Arg& ptr2 = no_arg,
0574                const Arg& ptr3 = no_arg,
0575                const Arg& ptr4 = no_arg,
0576                const Arg& ptr5 = no_arg,
0577                const Arg& ptr6 = no_arg,
0578                const Arg& ptr7 = no_arg,
0579                const Arg& ptr8 = no_arg,
0580                const Arg& ptr9 = no_arg,
0581                const Arg& ptr10 = no_arg,
0582                const Arg& ptr11 = no_arg,
0583                const Arg& ptr12 = no_arg,
0584                const Arg& ptr13 = no_arg,
0585                const Arg& ptr14 = no_arg,
0586                const Arg& ptr15 = no_arg,
0587                const Arg& ptr16 = no_arg) const;
0588 
0589   bool FindAndConsume(StringPiece* input,
0590                       const Arg& ptr1 = no_arg,
0591                       const Arg& ptr2 = no_arg,
0592                       const Arg& ptr3 = no_arg,
0593                       const Arg& ptr4 = no_arg,
0594                       const Arg& ptr5 = no_arg,
0595                       const Arg& ptr6 = no_arg,
0596                       const Arg& ptr7 = no_arg,
0597                       const Arg& ptr8 = no_arg,
0598                       const Arg& ptr9 = no_arg,
0599                       const Arg& ptr10 = no_arg,
0600                       const Arg& ptr11 = no_arg,
0601                       const Arg& ptr12 = no_arg,
0602                       const Arg& ptr13 = no_arg,
0603                       const Arg& ptr14 = no_arg,
0604                       const Arg& ptr15 = no_arg,
0605                       const Arg& ptr16 = no_arg) const;
0606 
0607   bool Replace(const StringPiece& rewrite,
0608                string *str) const;
0609 
0610   int GlobalReplace(const StringPiece& rewrite,
0611                     string *str) const;
0612 
0613   bool Extract(const StringPiece &rewrite,
0614                const StringPiece &text,
0615                string *out) const;
0616 
0617   // Escapes all potentially meaningful regexp characters in
0618   // 'unquoted'.  The returned string, used as a regular expression,
0619   // will exactly match the original string.  For example,
0620   //           1.5-2.0?
0621   // may become:
0622   //           1\.5\-2\.0\?
0623   // Note QuoteMeta behaves the same as perl's QuoteMeta function,
0624   // *except* that it escapes the NUL character (\0) as backslash + 0,
0625   // rather than backslash + NUL.
0626   static string QuoteMeta(const StringPiece& unquoted);
0627 
0628 
0629   /***** Generic matching interface *****/
0630 
0631   // Type of match (TODO: Should be restructured as part of RE_Options)
0632   enum Anchor {
0633     UNANCHORED,         // No anchoring
0634     ANCHOR_START,       // Anchor at start only
0635     ANCHOR_BOTH         // Anchor at start and end
0636   };
0637 
0638   // General matching routine.  Stores the length of the match in
0639   // "*consumed" if successful.
0640   bool DoMatch(const StringPiece& text,
0641                Anchor anchor,
0642                int* consumed,
0643                const Arg* const* args, int n) const;
0644 
0645   // Return the number of capturing subpatterns, or -1 if the
0646   // regexp wasn't valid on construction.
0647   int NumberOfCapturingGroups() const;
0648 
0649   // The default value for an argument, to indicate the end of the argument
0650   // list. This must be used only in optional argument defaults. It should NOT
0651   // be passed explicitly. Some people have tried to use it like this:
0652   //
0653   //   FullMatch(x, y, &z, no_arg, &w);
0654   //
0655   // This is a mistake, and will not work.
0656   static Arg no_arg;
0657 
0658  private:
0659 
0660   void Init(const string& pattern, const RE_Options* options);
0661   void Cleanup();
0662 
0663   // Match against "text", filling in "vec" (up to "vecsize" * 2/3) with
0664   // pairs of integers for the beginning and end positions of matched
0665   // text.  The first pair corresponds to the entire matched text;
0666   // subsequent pairs correspond, in order, to parentheses-captured
0667   // matches.  Returns the number of pairs (one more than the number of
0668   // the last subpattern with a match) if matching was successful
0669   // and zero if the match failed.
0670   // I.e. for RE("(foo)|(bar)|(baz)") it will return 2, 3, and 4 when matching
0671   // against "foo", "bar", and "baz" respectively.
0672   // When matching RE("(foo)|hello") against "hello", it will return 1.
0673   // But the values for all subpattern are filled in into "vec".
0674   int TryMatch(const StringPiece& text,
0675                int startpos,
0676                Anchor anchor,
0677                bool empty_ok,
0678                int *vec,
0679                int vecsize) const;
0680 
0681   // Append the "rewrite" string, with backslash subsitutions from "text"
0682   // and "vec", to string "out".
0683   bool Rewrite(string *out,
0684                const StringPiece& rewrite,
0685                const StringPiece& text,
0686                int *vec,
0687                int veclen) const;
0688 
0689   // internal implementation for DoMatch
0690   bool DoMatchImpl(const StringPiece& text,
0691                    Anchor anchor,
0692                    int* consumed,
0693                    const Arg* const args[],
0694                    int n,
0695                    int* vec,
0696                    int vecsize) const;
0697 
0698   // Compile the regexp for the specified anchoring mode
0699   pcre* Compile(Anchor anchor);
0700 
0701   string        pattern_;
0702   RE_Options    options_;
0703   pcre*         re_full_;       // For full matches
0704   pcre*         re_partial_;    // For partial matches
0705   const string* error_;         // Error indicator (or points to empty string)
0706 };
0707 
0708 }   // namespace pcrecpp
0709 
0710 #endif /* _PCRECPP_H */