Back to home page

EIC code displayed by LXR

 
 

    


Warning, file /include/unicode/uregex.h was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).

0001 // © 2016 and later: Unicode, Inc. and others.
0002 // License & terms of use: http://www.unicode.org/copyright.html
0003 /*
0004 **********************************************************************
0005 *   Copyright (C) 2004-2016, International Business Machines
0006 *   Corporation and others.  All Rights Reserved.
0007 **********************************************************************
0008 *   file name:  uregex.h
0009 *   encoding:   UTF-8
0010 *   indentation:4
0011 *
0012 *   created on: 2004mar09
0013 *   created by: Andy Heninger
0014 *
0015 *   ICU Regular Expressions, API for C
0016 */
0017 
0018 /**
0019  * \file
0020  * \brief C API: Regular Expressions
0021  *
0022  * <p>This is a C wrapper around the C++ RegexPattern and RegexMatcher classes.</p>
0023  */
0024 
0025 #ifndef UREGEX_H
0026 #define UREGEX_H
0027 
0028 #include "unicode/utext.h"
0029 #include "unicode/utypes.h"
0030 
0031 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
0032 
0033 #include "unicode/parseerr.h"
0034 
0035 #if U_SHOW_CPLUSPLUS_API
0036 #include "unicode/localpointer.h"
0037 #endif   // U_SHOW_CPLUSPLUS_API
0038 
0039 struct URegularExpression;
0040 /**
0041   * Structure representing a compiled regular expression, plus the results
0042   *    of a match operation.
0043   * @stable ICU 3.0
0044   */
0045 typedef struct URegularExpression URegularExpression;
0046 
0047 
0048 /**
0049  * Constants for Regular Expression Match Modes.
0050  * @stable ICU 2.4
0051  */
0052 typedef enum URegexpFlag{
0053 
0054 #ifndef U_HIDE_DRAFT_API 
0055     /** Forces normalization of pattern and strings. 
0056     Not implemented yet, just a placeholder, hence draft. 
0057     @draft ICU 2.4 */
0058     UREGEX_CANON_EQ         = 128,
0059 #endif /* U_HIDE_DRAFT_API */
0060     /**  Enable case insensitive matching.  @stable ICU 2.4 */
0061     UREGEX_CASE_INSENSITIVE = 2,
0062 
0063     /**  Allow white space and comments within patterns  @stable ICU 2.4 */
0064     UREGEX_COMMENTS         = 4,
0065 
0066     /**  If set, '.' matches line terminators,  otherwise '.' matching stops at line end.
0067       *  @stable ICU 2.4 */
0068     UREGEX_DOTALL           = 32,
0069     
0070     /**  If set, treat the entire pattern as a literal string.  
0071       *  Metacharacters or escape sequences in the input sequence will be given 
0072       *  no special meaning. 
0073       *
0074       *  The flag UREGEX_CASE_INSENSITIVE retains its impact
0075       *  on matching when used in conjunction with this flag.
0076       *  The other flags become superfluous.
0077       *
0078       * @stable ICU 4.0
0079       */
0080     UREGEX_LITERAL = 16,
0081 
0082     /**   Control behavior of "$" and "^"
0083       *    If set, recognize line terminators within string,
0084       *    otherwise, match only at start and end of input string.
0085       *   @stable ICU 2.4 */
0086     UREGEX_MULTILINE        = 8,
0087     
0088     /**   Unix-only line endings.
0089       *   When this mode is enabled, only \\u000a is recognized as a line ending
0090       *    in the behavior of ., ^, and $.
0091       *   @stable ICU 4.0
0092       */
0093     UREGEX_UNIX_LINES = 1,
0094 
0095     /**  Unicode word boundaries.
0096       *     If set, \b uses the Unicode TR 29 definition of word boundaries.
0097       *     Warning: Unicode word boundaries are quite different from
0098       *     traditional regular expression word boundaries.  See
0099       *     http://unicode.org/reports/tr29/#Word_Boundaries
0100       *     @stable ICU 2.8
0101       */
0102     UREGEX_UWORD            = 256,
0103 
0104      /**  Error on Unrecognized backslash escapes.
0105        *     If set, fail with an error on patterns that contain
0106        *     backslash-escaped ASCII letters without a known special
0107        *     meaning.  If this flag is not set, these
0108        *     escaped letters represent themselves.
0109        *     @stable ICU 4.0
0110        */
0111      UREGEX_ERROR_ON_UNKNOWN_ESCAPES = 512
0112 
0113 }  URegexpFlag;
0114 
0115 /**
0116   *  Open (compile) an ICU regular expression.  Compiles the regular expression in
0117   *  string form into an internal representation using the specified match mode flags.
0118   *  The resulting regular expression handle can then be used to perform various
0119   *   matching operations.
0120   * 
0121   *
0122   * @param pattern        The Regular Expression pattern to be compiled. 
0123   * @param patternLength  The length of the pattern, or -1 if the pattern is
0124   *                       NUL terminated.
0125   * @param flags          Flags that alter the default matching behavior for
0126   *                       the regular expression, UREGEX_CASE_INSENSITIVE, for
0127   *                       example.  For default behavior, set this parameter to zero.
0128   *                       See <code>enum URegexpFlag</code>.  All desired flags
0129   *                       are bitwise-ORed together.
0130   * @param pe             Receives the position (line and column numbers) of any syntax
0131   *                       error within the source regular expression string.  If this
0132   *                       information is not wanted, pass NULL for this parameter.
0133   * @param status         Receives error detected by this function.
0134   * @stable ICU 3.0
0135   *
0136   */
0137 U_CAPI URegularExpression * U_EXPORT2
0138 uregex_open( const  UChar          *pattern,
0139                     int32_t         patternLength,
0140                     uint32_t        flags,
0141                     UParseError    *pe,
0142                     UErrorCode     *status);
0143 
0144 /**
0145   *  Open (compile) an ICU regular expression.  Compiles the regular expression in
0146   *  string form into an internal representation using the specified match mode flags.
0147   *  The resulting regular expression handle can then be used to perform various
0148   *   matching operations.
0149   *  <p>
0150   *  The contents of the pattern UText will be extracted and saved. Ownership of the
0151   *   UText struct itself remains with the caller. This is to match the behavior of
0152   *   uregex_open().
0153   *
0154   * @param pattern        The Regular Expression pattern to be compiled. 
0155   * @param flags          Flags that alter the default matching behavior for
0156   *                       the regular expression, UREGEX_CASE_INSENSITIVE, for
0157   *                       example.  For default behavior, set this parameter to zero.
0158   *                       See <code>enum URegexpFlag</code>.  All desired flags
0159   *                       are bitwise-ORed together.
0160   * @param pe             Receives the position (line and column numbers) of any syntax
0161   *                       error within the source regular expression string.  If this
0162   *                       information is not wanted, pass NULL for this parameter.
0163   * @param status         Receives error detected by this function.
0164   *
0165   * @stable ICU 4.6
0166   */
0167 U_CAPI URegularExpression *  U_EXPORT2
0168 uregex_openUText(UText          *pattern,
0169                  uint32_t        flags,
0170                  UParseError    *pe,
0171                  UErrorCode     *status);
0172 
0173 #if !UCONFIG_NO_CONVERSION
0174 /**
0175   *  Open (compile) an ICU regular expression.  The resulting regular expression
0176   *   handle can then be used to perform various matching operations.
0177   *  <p>
0178   *   This function is the same as uregex_open, except that the pattern
0179   *   is supplied as an 8 bit char * string in the default code page.
0180   *
0181   * @param pattern        The Regular Expression pattern to be compiled, 
0182   *                       NUL terminated.  
0183   * @param flags          Flags that alter the default matching behavior for
0184   *                       the regular expression, UREGEX_CASE_INSENSITIVE, for
0185   *                       example.  For default behavior, set this parameter to zero.
0186   *                       See <code>enum URegexpFlag</code>.  All desired flags
0187   *                       are bitwise-ORed together.
0188   * @param pe             Receives the position (line and column numbers) of any syntax
0189   *                       error within the source regular expression string.  If this
0190   *                       information is not wanted, pass NULL for this parameter.
0191   * @param status         Receives errors detected by this function.
0192   * @return               The URegularExpression object representing the compiled
0193   *                       pattern.
0194   *
0195   * @stable ICU 3.0
0196   */
0197 U_CAPI URegularExpression * U_EXPORT2
0198 uregex_openC( const char           *pattern,
0199                     uint32_t        flags,
0200                     UParseError    *pe,
0201                     UErrorCode     *status);
0202 #endif
0203 
0204 
0205 
0206 /**
0207   *  Close the regular expression, recovering all resources (memory) it
0208   *   was holding.
0209   *
0210   * @param regexp   The regular expression to be closed.
0211   * @stable ICU 3.0
0212   */
0213 U_CAPI void U_EXPORT2 
0214 uregex_close(URegularExpression *regexp);
0215 
0216 #if U_SHOW_CPLUSPLUS_API
0217 
0218 U_NAMESPACE_BEGIN
0219 
0220 /**
0221  * \class LocalURegularExpressionPointer
0222  * "Smart pointer" class, closes a URegularExpression via uregex_close().
0223  * For most methods see the LocalPointerBase base class.
0224  *
0225  * @see LocalPointerBase
0226  * @see LocalPointer
0227  * @stable ICU 4.4
0228  */
0229 U_DEFINE_LOCAL_OPEN_POINTER(LocalURegularExpressionPointer, URegularExpression, uregex_close);
0230 
0231 U_NAMESPACE_END
0232 
0233 #endif
0234 
0235 /**
0236  * Make a copy of a compiled regular expression.  Cloning a regular
0237  * expression is faster than opening a second instance from the source
0238  * form of the expression, and requires less memory.
0239  * <p>
0240  * Note that the current input string and the position of any matched text
0241  *  within it are not cloned; only the pattern itself and the
0242  *  match mode flags are copied.
0243  * <p>
0244  * Cloning can be particularly useful to threaded applications that perform
0245  * multiple match operations in parallel.  Each concurrent RE
0246  * operation requires its own instance of a URegularExpression.
0247  *
0248  * @param regexp   The compiled regular expression to be cloned.
0249  * @param status   Receives indication of any errors encountered
0250  * @return the cloned copy of the compiled regular expression.
0251  * @stable ICU 3.0
0252  */
0253 U_CAPI URegularExpression * U_EXPORT2 
0254 uregex_clone(const URegularExpression *regexp, UErrorCode *status);
0255 
0256 /**
0257  *  Returns a pointer to the source form of the pattern for this regular expression.
0258  *  This function will work even if the pattern was originally specified as a UText.
0259  *
0260  * @param regexp     The compiled regular expression.
0261  * @param patLength  This output parameter will be set to the length of the
0262  *                   pattern string.  A NULL pointer may be used here if the
0263  *                   pattern length is not needed, as would be the case if
0264  *                   the pattern is known in advance to be a NUL terminated
0265  *                   string.
0266  * @param status     Receives errors detected by this function.
0267  * @return a pointer to the pattern string.  The storage for the string is
0268  *                   owned by the regular expression object, and must not be
0269  *                   altered or deleted by the application.  The returned string
0270  *                   will remain valid until the regular expression is closed.
0271  * @stable ICU 3.0
0272  */
0273 U_CAPI const UChar * U_EXPORT2 
0274 uregex_pattern(const URegularExpression *regexp,
0275                      int32_t            *patLength,
0276                      UErrorCode         *status);
0277 
0278 /**
0279  *  Returns the source text of the pattern for this regular expression.
0280  *  This function will work even if the pattern was originally specified as a UChar string.
0281  *
0282  * @param regexp     The compiled regular expression.
0283  * @param status     Receives errors detected by this function.
0284  * @return the pattern text.  The storage for the text is owned by the regular expression
0285  *                   object, and must not be altered or deleted.
0286  *
0287  * @stable ICU 4.6
0288  */
0289 U_CAPI UText * U_EXPORT2 
0290 uregex_patternUText(const URegularExpression *regexp,
0291                           UErrorCode         *status);
0292 
0293 /**
0294   * Get the match mode flags that were specified when compiling this regular expression.
0295   * @param status   Receives errors detected by this function.
0296   * @param regexp   The compiled regular expression.
0297   * @return         The match mode flags
0298   * @see URegexpFlag
0299   * @stable ICU 3.0
0300   */
0301 U_CAPI int32_t U_EXPORT2 
0302 uregex_flags(const  URegularExpression   *regexp,
0303                     UErrorCode           *status);
0304 
0305 
0306 /**
0307   *  Set the subject text string upon which the regular expression will look for matches.
0308   *  This function may be called any number of times, allowing the regular
0309   *  expression pattern to be applied to different strings.
0310   *  <p>
0311   *  Regular expression matching operations work directly on the application's
0312   *  string data.  No copy is made.  The subject string data must not be
0313   *  altered after calling this function until after all regular expression
0314   *  operations involving this string data are completed.  
0315   *  <p>
0316   *  Zero length strings are permitted.  In this case, no subsequent match
0317   *  operation will dereference the text string pointer.
0318   *
0319   * @param regexp     The compiled regular expression.
0320   * @param text       The subject text string.
0321   * @param textLength The length of the subject text, or -1 if the string
0322   *                   is NUL terminated.
0323   * @param status     Receives errors detected by this function.
0324   * @stable ICU 3.0
0325   */
0326 U_CAPI void U_EXPORT2 
0327 uregex_setText(URegularExpression *regexp,
0328                const UChar        *text,
0329                int32_t             textLength,
0330                UErrorCode         *status);
0331 
0332 
0333 /**
0334   *  Set the subject text string upon which the regular expression will look for matches.
0335   *  This function may be called any number of times, allowing the regular
0336   *  expression pattern to be applied to different strings.
0337   *  <p>
0338   *  Regular expression matching operations work directly on the application's
0339   *  string data; only a shallow clone is made.  The subject string data must not be
0340   *  altered after calling this function until after all regular expression
0341   *  operations involving this string data are completed.  
0342   *
0343   * @param regexp     The compiled regular expression.
0344   * @param text       The subject text string.
0345   * @param status     Receives errors detected by this function.
0346   *
0347   * @stable ICU 4.6
0348   */
0349 U_CAPI void U_EXPORT2 
0350 uregex_setUText(URegularExpression *regexp,
0351                 UText              *text,
0352                 UErrorCode         *status);
0353 
0354 /**
0355   *  Get the subject text that is currently associated with this 
0356   *   regular expression object.  If the input was supplied using uregex_setText(),
0357   *   that pointer will be returned.  Otherwise, the characters in the input will
0358   *   be extracted to a buffer and returned.  In either case, ownership remains
0359   *   with the regular expression object.
0360   *
0361   *  This function will work even if the input was originally specified as a UText.
0362   *
0363   * @param regexp      The compiled regular expression.
0364   * @param textLength  The length of the string is returned in this output parameter. 
0365   *                    A NULL pointer may be used here if the
0366   *                    text length is not needed, as would be the case if
0367   *                    the text is known in advance to be a NUL terminated
0368   *                    string.
0369   * @param status      Receives errors detected by this function.
0370   * @return            Pointer to the subject text string currently associated with
0371   *                    this regular expression.
0372   * @stable ICU 3.0
0373   */
0374 U_CAPI const UChar * U_EXPORT2 
0375 uregex_getText(URegularExpression *regexp,
0376                int32_t            *textLength,
0377                UErrorCode         *status);
0378 
0379 /**
0380   *  Get the subject text that is currently associated with this 
0381   *   regular expression object.
0382   *
0383   *  This function will work even if the input was originally specified as a UChar string.
0384   *
0385   * @param regexp      The compiled regular expression.
0386   * @param dest        A mutable UText in which to store the current input.
0387   *                    If NULL, a new UText will be created as an immutable shallow clone
0388   *                    of the actual input string.
0389   * @param status      Receives errors detected by this function.
0390   * @return            The subject text currently associated with this regular expression.
0391   *                    If a pre-allocated UText was provided, it will always be used and returned.
0392   *
0393   * @stable ICU 4.6
0394   */
0395 U_CAPI UText * U_EXPORT2 
0396 uregex_getUText(URegularExpression *regexp,
0397                 UText              *dest,
0398                 UErrorCode         *status);
0399 
0400 /**
0401   *  Set the subject text string upon which the regular expression is looking for matches
0402   *  without changing any other aspect of the matching state.
0403   *  The new and previous text strings must have the same content.
0404   *
0405   *  This function is intended for use in environments where ICU is operating on 
0406   *  strings that may move around in memory.  It provides a mechanism for notifying
0407   *  ICU that the string has been relocated, and providing a new UText to access the
0408   *  string in its new position.
0409   *
0410   *  Note that the regular expression implementation never copies the underlying text
0411   *  of a string being matched, but always operates directly on the original text 
0412   *  provided by the user. Refreshing simply drops the references to the old text 
0413   *  and replaces them with references to the new.
0414   *
0415   *  Caution:  this function is normally used only by very specialized
0416   *            system-level code.   One example use case is with garbage collection 
0417   *            that moves the text in memory. 
0418   *
0419   * @param regexp     The compiled regular expression.
0420   * @param text       The new (moved) text string.
0421   * @param status     Receives errors detected by this function.
0422   *
0423   * @stable ICU 4.8
0424   */
0425 U_CAPI void U_EXPORT2 
0426 uregex_refreshUText(URegularExpression *regexp,
0427                     UText              *text,
0428                     UErrorCode         *status);
0429 
0430 /**
0431   *   Attempts to match the input string against the pattern.
0432   *   To succeed, the match must extend to the end of the string,
0433   *   or cover the complete match region.
0434   *
0435   *   If startIndex >= zero the match operation starts at the specified
0436   *   index and must extend to the end of the input string.  Any region
0437   *   that has been specified is reset.
0438   *
0439   *   If startIndex == -1 the match must cover the input region, or the entire
0440   *   input string if no region has been set.  This directly corresponds to
0441   *   Matcher.matches() in Java
0442   *
0443   *    @param  regexp      The compiled regular expression.
0444   *    @param  startIndex  The input string (native) index at which to begin matching, or -1
0445   *                        to match the input Region.
0446   *    @param  status      Receives errors detected by this function.
0447   *    @return             true if there is a match
0448   *    @stable ICU 3.0
0449   */
0450 U_CAPI UBool U_EXPORT2 
0451 uregex_matches(URegularExpression *regexp,
0452                 int32_t            startIndex,
0453                 UErrorCode        *status);
0454 
0455 /**
0456   *   64bit version of uregex_matches.
0457   *   Attempts to match the input string against the pattern.
0458   *   To succeed, the match must extend to the end of the string,
0459   *   or cover the complete match region.
0460   *
0461   *   If startIndex >= zero the match operation starts at the specified
0462   *   index and must extend to the end of the input string.  Any region
0463   *   that has been specified is reset.
0464   *
0465   *   If startIndex == -1 the match must cover the input region, or the entire
0466   *   input string if no region has been set.  This directly corresponds to
0467   *   Matcher.matches() in Java
0468   *
0469   *    @param  regexp      The compiled regular expression.
0470   *    @param  startIndex  The input string (native) index at which to begin matching, or -1
0471   *                        to match the input Region.
0472   *    @param  status      Receives errors detected by this function.
0473   *    @return             true if there is a match
0474   *   @stable ICU 4.6
0475   */
0476 U_CAPI UBool U_EXPORT2 
0477 uregex_matches64(URegularExpression *regexp,
0478                  int64_t            startIndex,
0479                  UErrorCode        *status);
0480 
0481 /**
0482   *   Attempts to match the input string, starting from the specified index, against the pattern.
0483   *   The match may be of any length, and is not required to extend to the end
0484   *   of the input string.  Contrast with uregex_matches().
0485   *
0486   *   <p>If startIndex is >= 0 any input region that was set for this
0487   *   URegularExpression is reset before the operation begins.
0488   *
0489   *   <p>If the specified starting index == -1 the match begins at the start of the input 
0490   *   region, or at the start of the full string if no region has been specified.
0491   *   This corresponds directly with Matcher.lookingAt() in Java.
0492   *
0493   *   <p>If the match succeeds then more information can be obtained via the
0494   *    <code>uregexp_start()</code>, <code>uregexp_end()</code>,
0495   *    and <code>uregex_group()</code> functions.</p>
0496   *
0497   *    @param   regexp      The compiled regular expression.
0498   *    @param   startIndex  The input string (native) index at which to begin matching, or
0499   *                         -1 to match the Input Region
0500   *    @param   status      A reference to a UErrorCode to receive any errors.
0501   *    @return  true if there is a match.
0502   *    @stable ICU 3.0
0503   */
0504 U_CAPI UBool U_EXPORT2 
0505 uregex_lookingAt(URegularExpression *regexp,
0506                  int32_t             startIndex,
0507                  UErrorCode         *status);
0508 
0509 /**
0510   *   64bit version of uregex_lookingAt.
0511   *   Attempts to match the input string, starting from the specified index, against the pattern.
0512   *   The match may be of any length, and is not required to extend to the end
0513   *   of the input string.  Contrast with uregex_matches().
0514   *
0515   *   <p>If startIndex is >= 0 any input region that was set for this
0516   *   URegularExpression is reset before the operation begins.
0517   *
0518   *   <p>If the specified starting index == -1 the match begins at the start of the input 
0519   *   region, or at the start of the full string if no region has been specified.
0520   *   This corresponds directly with Matcher.lookingAt() in Java.
0521   *
0522   *   <p>If the match succeeds then more information can be obtained via the
0523   *    <code>uregexp_start()</code>, <code>uregexp_end()</code>,
0524   *    and <code>uregex_group()</code> functions.</p>
0525   *
0526   *    @param   regexp      The compiled regular expression.
0527   *    @param   startIndex  The input string (native) index at which to begin matching, or
0528   *                         -1 to match the Input Region
0529   *    @param   status      A reference to a UErrorCode to receive any errors.
0530   *    @return  true if there is a match.
0531   *    @stable ICU 4.6
0532   */
0533 U_CAPI UBool U_EXPORT2 
0534 uregex_lookingAt64(URegularExpression *regexp,
0535                    int64_t             startIndex,
0536                    UErrorCode         *status);
0537 
0538 /**
0539   *   Find the first matching substring of the input string that matches the pattern.
0540   *   If startIndex is >= zero the search for a match begins at the specified index,
0541   *          and any match region is reset.  This corresponds directly with
0542   *          Matcher.find(startIndex) in Java.
0543   *
0544   *   If startIndex == -1 the search begins at the start of the input region,
0545   *           or at the start of the full string if no region has been specified.
0546   *
0547   *   If a match is found, <code>uregex_start(), uregex_end()</code>, and
0548   *   <code>uregex_group()</code> will provide more information regarding the match.
0549   *
0550   *   @param   regexp      The compiled regular expression.
0551   *   @param   startIndex  The position (native) in the input string to begin the search, or
0552   *                        -1 to search within the Input Region.
0553   *   @param   status      A reference to a UErrorCode to receive any errors.
0554   *   @return              true if a match is found.
0555   *   @stable ICU 3.0
0556   */
0557 U_CAPI UBool U_EXPORT2 
0558 uregex_find(URegularExpression *regexp,
0559             int32_t             startIndex, 
0560             UErrorCode         *status);
0561 
0562 /**
0563   *   64bit version of uregex_find.
0564   *   Find the first matching substring of the input string that matches the pattern.
0565   *   If startIndex is >= zero the search for a match begins at the specified index,
0566   *          and any match region is reset.  This corresponds directly with
0567   *          Matcher.find(startIndex) in Java.
0568   *
0569   *   If startIndex == -1 the search begins at the start of the input region,
0570   *           or at the start of the full string if no region has been specified.
0571   *
0572   *   If a match is found, <code>uregex_start(), uregex_end()</code>, and
0573   *   <code>uregex_group()</code> will provide more information regarding the match.
0574   *
0575   *   @param   regexp      The compiled regular expression.
0576   *   @param   startIndex  The position (native) in the input string to begin the search, or
0577   *                        -1 to search within the Input Region.
0578   *   @param   status      A reference to a UErrorCode to receive any errors.
0579   *   @return              true if a match is found.
0580   *   @stable ICU 4.6
0581   */
0582 U_CAPI UBool U_EXPORT2 
0583 uregex_find64(URegularExpression *regexp,
0584               int64_t             startIndex, 
0585               UErrorCode         *status);
0586 
0587 /**
0588   *  Find the next pattern match in the input string.  Begin searching 
0589   *  the input at the location following the end of he previous match, 
0590   *  or at the start of the string (or region) if there is no 
0591   *  previous match.  If a match is found, <code>uregex_start(), uregex_end()</code>, and
0592   *  <code>uregex_group()</code> will provide more information regarding the match.
0593   *
0594   *  @param   regexp      The compiled regular expression.
0595   *  @param   status      A reference to a UErrorCode to receive any errors.
0596   *  @return              true if a match is found.
0597   *  @see uregex_reset
0598   *  @stable ICU 3.0
0599   */
0600 U_CAPI UBool U_EXPORT2 
0601 uregex_findNext(URegularExpression *regexp,
0602                 UErrorCode         *status);
0603 
0604 /**
0605   *   Get the number of capturing groups in this regular expression's pattern.
0606   *   @param   regexp      The compiled regular expression.
0607   *   @param   status      A reference to a UErrorCode to receive any errors.
0608   *   @return the number of capture groups
0609   *   @stable ICU 3.0
0610   */
0611 U_CAPI int32_t U_EXPORT2 
0612 uregex_groupCount(URegularExpression *regexp,
0613                   UErrorCode         *status);
0614 
0615 /**
0616   * Get the group number corresponding to a named capture group.
0617   * The returned number can be used with any function that access
0618   * capture groups by number.
0619   *
0620   * The function returns an error status if the specified name does not
0621   * appear in the pattern.
0622   *
0623   * @param  regexp      The compiled regular expression.
0624   * @param  groupName   The capture group name.
0625   * @param  nameLength  The length of the name, or -1 if the name is a
0626   *                     nul-terminated string.
0627   * @param  status      A pointer to a UErrorCode to receive any errors.
0628   *
0629   * @stable ICU 55
0630   */
0631 U_CAPI int32_t U_EXPORT2
0632 uregex_groupNumberFromName(URegularExpression *regexp,
0633                            const UChar        *groupName,
0634                            int32_t             nameLength,
0635                            UErrorCode          *status);
0636 
0637 
0638 /**
0639   * Get the group number corresponding to a named capture group.
0640   * The returned number can be used with any function that access
0641   * capture groups by number.
0642   *
0643   * The function returns an error status if the specified name does not
0644   * appear in the pattern.
0645   *
0646   * @param  regexp      The compiled regular expression.
0647   * @param  groupName   The capture group name,
0648   *                     platform invariant characters only.
0649   * @param  nameLength  The length of the name, or -1 if the name is
0650   *                     nul-terminated.
0651   * @param  status      A pointer to a UErrorCode to receive any errors.
0652   *
0653   * @stable ICU 55
0654   */
0655 U_CAPI int32_t U_EXPORT2
0656 uregex_groupNumberFromCName(URegularExpression *regexp,
0657                             const char         *groupName,
0658                             int32_t             nameLength,
0659                             UErrorCode          *status);
0660 
0661 /** Extract the string for the specified matching expression or subexpression.
0662   * Group #0 is the complete string of matched text.
0663   * Group #1 is the text matched by the first set of capturing parentheses.
0664   *
0665   *   @param   regexp       The compiled regular expression.
0666   *   @param   groupNum     The capture group to extract.  Group 0 is the complete
0667   *                         match.  The value of this parameter must be
0668   *                         less than or equal to the number of capture groups in
0669   *                         the pattern.
0670   *   @param   dest         Buffer to receive the matching string data
0671   *   @param   destCapacity Capacity of the dest buffer.
0672   *   @param   status       A reference to a UErrorCode to receive any errors.
0673   *   @return               Length of matching data,
0674   *                         or -1 if no applicable match.
0675   *   @stable ICU 3.0
0676   */
0677 U_CAPI int32_t U_EXPORT2 
0678 uregex_group(URegularExpression *regexp,
0679              int32_t             groupNum,
0680              UChar              *dest,
0681              int32_t             destCapacity,
0682              UErrorCode          *status);
0683 
0684 /** Returns a shallow immutable clone of the entire input string with the current index set
0685   *   to the beginning of the requested capture group.  The capture group length is also
0686   *   returned via groupLength.
0687   * Group #0 is the complete string of matched text.
0688   * Group #1 is the text matched by the first set of capturing parentheses.
0689   *
0690   *   @param   regexp       The compiled regular expression.
0691   *   @param   groupNum     The capture group to extract.  Group 0 is the complete
0692   *                         match.  The value of this parameter must be
0693   *                         less than or equal to the number of capture groups in
0694   *                         the pattern.
0695   *   @param   dest         A mutable UText in which to store the current input.
0696   *                         If NULL, a new UText will be created as an immutable shallow clone
0697   *                         of the entire input string.
0698   *   @param   groupLength  The group length of the desired capture group. Output parameter.
0699   *   @param   status       A reference to a UErrorCode to receive any errors.
0700   *   @return               The subject text currently associated with this regular expression.
0701   *                         If a pre-allocated UText was provided, it will always be used and returned.
0702 
0703   *
0704   *   @stable ICU 4.6
0705   */
0706 U_CAPI UText * U_EXPORT2 
0707 uregex_groupUText(URegularExpression *regexp,
0708                   int32_t             groupNum,
0709                   UText              *dest,
0710                   int64_t            *groupLength,
0711                   UErrorCode         *status);
0712 
0713 /**
0714   *   Returns the index in the input string of the start of the text matched by the
0715   *   specified capture group during the previous match operation.  Return -1 if
0716   *   the capture group was not part of the last match.
0717   *   Group #0 refers to the complete range of matched text.
0718   *   Group #1 refers to the text matched by the first set of capturing parentheses.
0719   *
0720   *    @param   regexp      The compiled regular expression.
0721   *    @param   groupNum    The capture group number
0722   *    @param   status      A reference to a UErrorCode to receive any errors.
0723   *    @return              the starting (native) position in the input of the text matched 
0724   *                         by the specified group.
0725   *    @stable ICU 3.0
0726   */
0727 U_CAPI int32_t U_EXPORT2 
0728 uregex_start(URegularExpression *regexp,
0729              int32_t             groupNum,
0730              UErrorCode          *status);
0731 
0732 /**
0733   *   64bit version of uregex_start.
0734   *   Returns the index in the input string of the start of the text matched by the
0735   *   specified capture group during the previous match operation.  Return -1 if
0736   *   the capture group was not part of the last match.
0737   *   Group #0 refers to the complete range of matched text.
0738   *   Group #1 refers to the text matched by the first set of capturing parentheses.
0739   *
0740   *    @param   regexp      The compiled regular expression.
0741   *    @param   groupNum    The capture group number
0742   *    @param   status      A reference to a UErrorCode to receive any errors.
0743   *    @return              the starting (native) position in the input of the text matched 
0744   *                         by the specified group.
0745   *   @stable ICU 4.6
0746   */
0747 U_CAPI int64_t U_EXPORT2 
0748 uregex_start64(URegularExpression *regexp,
0749                int32_t             groupNum,
0750                UErrorCode          *status);
0751 
0752 /**
0753   *   Returns the index in the input string of the position following the end
0754   *   of the text matched by the specified capture group.
0755   *   Return -1 if the capture group was not part of the last match.
0756   *   Group #0 refers to the complete range of matched text.
0757   *   Group #1 refers to the text matched by the first set of capturing parentheses.
0758   *
0759   *    @param   regexp      The compiled regular expression.
0760   *    @param   groupNum    The capture group number
0761   *    @param   status      A reference to a UErrorCode to receive any errors.
0762   *    @return              the (native) index of the position following the last matched character.
0763   *    @stable ICU 3.0
0764   */
0765 U_CAPI int32_t U_EXPORT2 
0766 uregex_end(URegularExpression   *regexp,
0767            int32_t               groupNum,
0768            UErrorCode           *status);
0769 
0770 /**
0771   *   64bit version of uregex_end.
0772   *   Returns the index in the input string of the position following the end
0773   *   of the text matched by the specified capture group.
0774   *   Return -1 if the capture group was not part of the last match.
0775   *   Group #0 refers to the complete range of matched text.
0776   *   Group #1 refers to the text matched by the first set of capturing parentheses.
0777   *
0778   *    @param   regexp      The compiled regular expression.
0779   *    @param   groupNum    The capture group number
0780   *    @param   status      A reference to a UErrorCode to receive any errors.
0781   *    @return              the (native) index of the position following the last matched character.
0782   *   @stable ICU 4.6
0783   */
0784 U_CAPI int64_t U_EXPORT2 
0785 uregex_end64(URegularExpression *regexp,
0786              int32_t               groupNum,
0787              UErrorCode           *status);
0788 
0789 /**
0790   *  Reset any saved state from the previous match.  Has the effect of
0791   *  causing uregex_findNext to begin at the specified index, and causing
0792   *  uregex_start(), uregex_end() and uregex_group() to return an error 
0793   *  indicating that there is no match information available.  Clears any
0794   *  match region that may have been set.
0795   *
0796   *    @param   regexp      The compiled regular expression.
0797   *    @param   index       The position (native) in the text at which a
0798   *                         uregex_findNext() should begin searching.
0799   *    @param   status      A reference to a UErrorCode to receive any errors.
0800   *    @stable ICU 3.0
0801   */
0802 U_CAPI void U_EXPORT2 
0803 uregex_reset(URegularExpression    *regexp,
0804              int32_t               index,
0805              UErrorCode            *status);
0806 
0807 /**
0808   *  64bit version of uregex_reset.
0809   *  Reset any saved state from the previous match.  Has the effect of
0810   *  causing uregex_findNext to begin at the specified index, and causing
0811   *  uregex_start(), uregex_end() and uregex_group() to return an error 
0812   *  indicating that there is no match information available.  Clears any
0813   *  match region that may have been set.
0814   *
0815   *    @param   regexp      The compiled regular expression.
0816   *    @param   index       The position (native) in the text at which a
0817   *                         uregex_findNext() should begin searching.
0818   *    @param   status      A reference to a UErrorCode to receive any errors.
0819   *    @stable ICU 4.6
0820   */
0821 U_CAPI void U_EXPORT2 
0822 uregex_reset64(URegularExpression  *regexp,
0823                int64_t               index,
0824                UErrorCode            *status);
0825 
0826 /**
0827   * Sets the limits of the matching region for this URegularExpression.
0828   * The region is the part of the input string that will be considered when matching.
0829   * Invoking this method resets any saved state from the previous match, 
0830   * then sets the region to start at the index specified by the start parameter
0831   * and end at the index specified by the end parameter.
0832   *
0833   * Depending on the transparency and anchoring being used (see useTransparentBounds
0834   * and useAnchoringBounds), certain constructs such as anchors may behave differently
0835   * at or around the boundaries of the region
0836   *
0837   * The function will fail if start is greater than limit, or if either index
0838   *  is less than zero or greater than the length of the string being matched.
0839   *
0840   * @param regexp The compiled regular expression.
0841   * @param regionStart  The (native) index to begin searches at.
0842   * @param regionLimit  The (native) index to end searches at (exclusive).
0843   * @param status A pointer to a UErrorCode to receive any errors.
0844   * @stable ICU 4.0
0845   */
0846 U_CAPI void U_EXPORT2
0847 uregex_setRegion(URegularExpression   *regexp,
0848                  int32_t               regionStart,
0849                  int32_t               regionLimit,
0850                  UErrorCode           *status);
0851 
0852 /**
0853   * 64bit version of uregex_setRegion.
0854   * Sets the limits of the matching region for this URegularExpression.
0855   * The region is the part of the input string that will be considered when matching.
0856   * Invoking this method resets any saved state from the previous match, 
0857   * then sets the region to start at the index specified by the start parameter
0858   * and end at the index specified by the end parameter.
0859   *
0860   * Depending on the transparency and anchoring being used (see useTransparentBounds
0861   * and useAnchoringBounds), certain constructs such as anchors may behave differently
0862   * at or around the boundaries of the region
0863   *
0864   * The function will fail if start is greater than limit, or if either index
0865   *  is less than zero or greater than the length of the string being matched.
0866   *
0867   * @param regexp The compiled regular expression.
0868   * @param regionStart  The (native) index to begin searches at.
0869   * @param regionLimit  The (native) index to end searches at (exclusive).
0870   * @param status A pointer to a UErrorCode to receive any errors.
0871   * @stable ICU 4.6
0872   */
0873 U_CAPI void U_EXPORT2 
0874 uregex_setRegion64(URegularExpression *regexp,
0875                  int64_t               regionStart,
0876                  int64_t               regionLimit,
0877                  UErrorCode           *status);
0878 
0879 /**
0880   *  Set the matching region and the starting index for subsequent matches
0881   *  in a single operation.
0882   *  This is useful because the usual function for setting the starting
0883   *  index, urgex_reset(), also resets any region limits.
0884   *
0885   * @param regexp The compiled regular expression.
0886   * @param regionStart  The (native) index to begin searches at.
0887   * @param regionLimit  The (native) index to end searches at (exclusive).
0888   * @param startIndex   The index in the input text at which the next 
0889   *                     match operation should begin.
0890   * @param status A pointer to a UErrorCode to receive any errors.
0891   * @stable ICU 4.6
0892   */
0893 U_CAPI void U_EXPORT2 
0894 uregex_setRegionAndStart(URegularExpression *regexp,
0895                  int64_t               regionStart,
0896                  int64_t               regionLimit,
0897                  int64_t               startIndex,
0898                  UErrorCode           *status);
0899 
0900 /**
0901   * Reports the start index of the matching region. Any matches found are limited to
0902   * to the region bounded by regionStart (inclusive) and regionEnd (exclusive).
0903   *
0904   * @param regexp The compiled regular expression.
0905   * @param status A pointer to a UErrorCode to receive any errors.
0906   * @return The starting (native) index of this matcher's region.
0907   * @stable ICU 4.0
0908   */
0909 U_CAPI int32_t U_EXPORT2
0910 uregex_regionStart(const  URegularExpression   *regexp,
0911                           UErrorCode           *status);
0912 
0913 /**
0914   * 64bit version of uregex_regionStart.
0915   * Reports the start index of the matching region. Any matches found are limited to
0916   * to the region bounded by regionStart (inclusive) and regionEnd (exclusive).
0917   *
0918   * @param regexp The compiled regular expression.
0919   * @param status A pointer to a UErrorCode to receive any errors.
0920   * @return The starting (native) index of this matcher's region.
0921   * @stable ICU 4.6
0922   */
0923 U_CAPI int64_t U_EXPORT2 
0924 uregex_regionStart64(const  URegularExpression   *regexp,
0925                             UErrorCode           *status);
0926 
0927 /**
0928   * Reports the end index (exclusive) of the matching region for this URegularExpression.
0929   * Any matches found are limited to to the region bounded by regionStart (inclusive)
0930   * and regionEnd (exclusive).
0931   *
0932   * @param regexp The compiled regular expression.
0933   * @param status A pointer to a UErrorCode to receive any errors.
0934   * @return The ending point (native) of this matcher's region.
0935   * @stable ICU 4.0
0936   */
0937 U_CAPI int32_t U_EXPORT2
0938 uregex_regionEnd(const  URegularExpression   *regexp,
0939                         UErrorCode           *status);
0940 
0941 /**
0942   * 64bit version of uregex_regionEnd.
0943   * Reports the end index (exclusive) of the matching region for this URegularExpression.
0944   * Any matches found are limited to to the region bounded by regionStart (inclusive)
0945   * and regionEnd (exclusive).
0946   *
0947   * @param regexp The compiled regular expression.
0948   * @param status A pointer to a UErrorCode to receive any errors.
0949   * @return The ending point (native) of this matcher's region.
0950   * @stable ICU 4.6
0951   */
0952 U_CAPI int64_t U_EXPORT2 
0953 uregex_regionEnd64(const  URegularExpression   *regexp,
0954                           UErrorCode           *status);
0955 
0956 /**
0957   * Queries the transparency of region bounds for this URegularExpression.
0958   * See useTransparentBounds for a description of transparent and opaque bounds.
0959   * By default, matching boundaries are opaque.
0960   *
0961   * @param regexp The compiled regular expression.
0962   * @param status A pointer to a UErrorCode to receive any errors.
0963   * @return true if this matcher is using opaque bounds, false if it is not.
0964   * @stable ICU 4.0
0965   */
0966 U_CAPI UBool U_EXPORT2
0967 uregex_hasTransparentBounds(const  URegularExpression   *regexp,
0968                                    UErrorCode           *status);
0969 
0970 
0971 /**
0972   * Sets the transparency of region bounds for this URegularExpression.
0973   * Invoking this function with an argument of true will set matches to use transparent bounds.
0974   * If the boolean argument is false, then opaque bounds will be used.
0975   *
0976   * Using transparent bounds, the boundaries of the matching region are transparent
0977   * to lookahead, lookbehind, and boundary matching constructs. Those constructs can
0978   * see text beyond the boundaries of the region while checking for a match.
0979   *
0980   * With opaque bounds, no text outside of the matching region is visible to lookahead,
0981   * lookbehind, and boundary matching constructs.
0982   *
0983   * By default, opaque bounds are used.
0984   *
0985   * @param   regexp The compiled regular expression.
0986   * @param   b      true for transparent bounds; false for opaque bounds
0987   * @param   status A pointer to a UErrorCode to receive any errors.
0988   * @stable ICU 4.0
0989   **/
0990 U_CAPI void U_EXPORT2  
0991 uregex_useTransparentBounds(URegularExpression   *regexp, 
0992                             UBool                b,
0993                             UErrorCode           *status);
0994 
0995 
0996 /**
0997   * Return true if this URegularExpression is using anchoring bounds.
0998   * By default, anchoring region bounds are used.
0999   *
1000   * @param  regexp The compiled regular expression.
1001   * @param  status A pointer to a UErrorCode to receive any errors.
1002   * @return true if this matcher is using anchoring bounds.
1003   * @stable ICU 4.0
1004   */
1005 U_CAPI UBool U_EXPORT2
1006 uregex_hasAnchoringBounds(const  URegularExpression   *regexp,
1007                                  UErrorCode           *status);
1008 
1009 
1010 /**
1011   * Set whether this URegularExpression is using Anchoring Bounds for its region.
1012   * With anchoring bounds, pattern anchors such as ^ and $ will match at the start
1013   * and end of the region.  Without Anchoring Bounds, anchors will only match at
1014   * the positions they would in the complete text.
1015   *
1016   * Anchoring Bounds are the default for regions.
1017   *
1018   * @param regexp The compiled regular expression.
1019   * @param b      true if to enable anchoring bounds; false to disable them.
1020   * @param status A pointer to a UErrorCode to receive any errors.
1021   * @stable ICU 4.0
1022   */
1023 U_CAPI void U_EXPORT2
1024 uregex_useAnchoringBounds(URegularExpression   *regexp,
1025                           UBool                 b,
1026                           UErrorCode           *status);
1027 
1028 /**
1029   * Return true if the most recent matching operation touched the
1030   *  end of the text being processed.  In this case, additional input text could
1031   *  change the results of that match.
1032   *
1033   *  @param regexp The compiled regular expression.
1034   *  @param status A pointer to a UErrorCode to receive any errors.
1035   *  @return  true if the most recent match hit the end of input
1036   *  @stable ICU 4.0
1037   */
1038 U_CAPI UBool U_EXPORT2
1039 uregex_hitEnd(const  URegularExpression   *regexp,
1040                      UErrorCode           *status);
1041 
1042 /**
1043   * Return true the most recent match succeeded and additional input could cause
1044   * it to fail. If this function returns false and a match was found, then more input
1045   * might change the match but the match won't be lost. If a match was not found,
1046   * then requireEnd has no meaning.
1047   *
1048   * @param regexp The compiled regular expression.
1049   * @param status A pointer to a UErrorCode to receive any errors.
1050   * @return true  if more input could cause the most recent match to no longer match.
1051   * @stable ICU 4.0
1052   */
1053 U_CAPI UBool U_EXPORT2   
1054 uregex_requireEnd(const  URegularExpression   *regexp,
1055                          UErrorCode           *status);
1056 
1057 
1058 
1059 
1060 
1061 /**
1062   *    Replaces every substring of the input that matches the pattern
1063   *    with the given replacement string.  This is a convenience function that
1064   *    provides a complete find-and-replace-all operation.
1065   *
1066   *    This method scans the input string looking for matches of the pattern. 
1067   *    Input that is not part of any match is copied unchanged to the
1068   *    destination buffer.  Matched regions are replaced in the output
1069   *    buffer by the replacement string.   The replacement string may contain
1070   *    references to capture groups; these take the form of $1, $2, etc.
1071   *
1072   *    @param   regexp             The compiled regular expression.
1073   *    @param   replacementText    A string containing the replacement text.
1074   *    @param   replacementLength  The length of the replacement string, or
1075   *                                -1 if it is NUL terminated.
1076   *    @param   destBuf            A (UChar *) buffer that will receive the result.
1077   *    @param   destCapacity       The capacity of the destination buffer.
1078   *    @param   status             A reference to a UErrorCode to receive any errors.
1079   *    @return                     The length of the string resulting from the find
1080   *                                and replace operation.  In the event that the
1081   *                                destination capacity is inadequate, the return value
1082   *                                is still the full length of the untruncated string.
1083   *    @stable ICU 3.0
1084   */
1085 U_CAPI int32_t U_EXPORT2 
1086 uregex_replaceAll(URegularExpression    *regexp,
1087                   const UChar           *replacementText,
1088                   int32_t                replacementLength,
1089                   UChar                 *destBuf,
1090                   int32_t                destCapacity,
1091                   UErrorCode            *status);
1092 
1093 /**
1094   *    Replaces every substring of the input that matches the pattern
1095   *    with the given replacement string.  This is a convenience function that
1096   *    provides a complete find-and-replace-all operation.
1097   *
1098   *    This method scans the input string looking for matches of the pattern. 
1099   *    Input that is not part of any match is copied unchanged to the
1100   *    destination buffer.  Matched regions are replaced in the output
1101   *    buffer by the replacement string.   The replacement string may contain
1102   *    references to capture groups; these take the form of $1, $2, etc.
1103   *
1104   *    @param   regexp         The compiled regular expression.
1105   *    @param   replacement    A string containing the replacement text.
1106   *    @param   dest           A mutable UText that will receive the result.
1107   *                             If NULL, a new UText will be created (which may not be mutable).
1108   *    @param   status         A reference to a UErrorCode to receive any errors.
1109   *    @return                 A UText containing the results of the find and replace.
1110   *                             If a pre-allocated UText was provided, it will always be used and returned.
1111   *
1112   *    @stable ICU 4.6
1113   */
1114 U_CAPI UText * U_EXPORT2 
1115 uregex_replaceAllUText(URegularExpression *regexp,
1116                        UText              *replacement,
1117                        UText              *dest,
1118                        UErrorCode         *status);
1119 
1120 /**
1121   *    Replaces the first substring of the input that matches the pattern
1122   *    with the given replacement string.  This is a convenience function that
1123   *    provides a complete find-and-replace operation.
1124   *
1125   *    This method scans the input string looking for a match of the pattern. 
1126   *    All input that is not part of the match is copied unchanged to the
1127   *    destination buffer.  The matched region is replaced in the output
1128   *    buffer by the replacement string.   The replacement string may contain
1129   *    references to capture groups; these take the form of $1, $2, etc.
1130   *
1131   *    @param   regexp             The compiled regular expression.
1132   *    @param   replacementText    A string containing the replacement text.
1133   *    @param   replacementLength  The length of the replacement string, or
1134   *                                -1 if it is NUL terminated.
1135   *    @param   destBuf            A (UChar *) buffer that will receive the result.
1136   *    @param   destCapacity       The capacity of the destination buffer.
1137   *    @param   status             a reference to a UErrorCode to receive any errors.
1138   *    @return                     The length of the string resulting from the find
1139   *                                and replace operation.  In the event that the
1140   *                                destination capacity is inadequate, the return value
1141   *                                is still the full length of the untruncated string.
1142   *    @stable ICU 3.0
1143   */
1144 U_CAPI int32_t U_EXPORT2 
1145 uregex_replaceFirst(URegularExpression  *regexp,
1146                     const UChar         *replacementText,
1147                     int32_t              replacementLength,
1148                     UChar               *destBuf,
1149                     int32_t              destCapacity,
1150                     UErrorCode          *status);
1151 
1152 /**
1153   *    Replaces the first substring of the input that matches the pattern
1154   *    with the given replacement string.  This is a convenience function that
1155   *    provides a complete find-and-replace operation.
1156   *
1157   *    This method scans the input string looking for a match of the pattern. 
1158   *    All input that is not part of the match is copied unchanged to the
1159   *    destination buffer.  The matched region is replaced in the output
1160   *    buffer by the replacement string.   The replacement string may contain
1161   *    references to capture groups; these take the form of $1, $2, etc.
1162   *
1163   *    @param   regexp         The compiled regular expression.
1164   *    @param   replacement    A string containing the replacement text.
1165   *    @param   dest           A mutable UText that will receive the result.
1166   *                             If NULL, a new UText will be created (which may not be mutable).
1167   *    @param   status         A reference to a UErrorCode to receive any errors.
1168   *    @return                 A UText containing the results of the find and replace.
1169   *                             If a pre-allocated UText was provided, it will always be used and returned.
1170   *
1171   *    @stable ICU 4.6
1172   */
1173 U_CAPI UText * U_EXPORT2 
1174 uregex_replaceFirstUText(URegularExpression *regexp,
1175                          UText              *replacement,
1176                          UText              *dest,
1177                          UErrorCode         *status);
1178 
1179 /**
1180   *   Implements a replace operation intended to be used as part of an
1181   *   incremental find-and-replace.
1182   *
1183   *   <p>The input string, starting from the end of the previous match and ending at
1184   *   the start of the current match, is appended to the destination string.  Then the
1185   *   replacement string is appended to the output string,
1186   *   including handling any substitutions of captured text.</p>
1187   *
1188   *   <p>A note on preflight computation of buffersize and error handling:
1189   *   Calls to uregex_appendReplacement() and uregex_appendTail() are
1190   *   designed to be chained, one after another, with the destination
1191   *   buffer pointer and buffer capacity updated after each in preparation
1192   *   to for the next.  If the destination buffer is exhausted partway through such a
1193   *   sequence, a U_BUFFER_OVERFLOW_ERROR status will be returned.  Normal
1194   *   ICU conventions are for a function to perform no action if it is
1195   *   called with an error status, but for this one case, uregex_appendRepacement()
1196   *   will operate normally so that buffer size computations will complete
1197   *   correctly.
1198   *
1199   *   <p>For simple, prepackaged, non-incremental find-and-replace
1200   *      operations, see replaceFirst() or replaceAll().</p>
1201   *
1202   *   @param   regexp      The regular expression object.  
1203   *   @param   replacementText The string that will replace the matched portion of the
1204   *                        input string as it is copied to the destination buffer.
1205   *                        The replacement text may contain references ($1, for
1206   *                        example) to capture groups from the match.
1207   *   @param   replacementLength  The length of the replacement text string,
1208   *                        or -1 if the string is NUL terminated.
1209   *   @param   destBuf     The buffer into which the results of the
1210   *                        find-and-replace are placed.  On return, this pointer
1211   *                        will be updated to refer to the beginning of the
1212   *                        unused portion of buffer, leaving it in position for
1213   *                        a subsequent call to this function.
1214   *   @param   destCapacity The size of the output buffer,  On return, this
1215   *                        parameter will be updated to reflect the space remaining
1216   *                        unused in the output buffer.
1217   *   @param   status      A reference to a UErrorCode to receive any errors. 
1218   *   @return              The length of the result string.  In the event that
1219   *                        destCapacity is inadequate, the full length of the
1220   *                        untruncated output string is returned.
1221   *
1222   *   @stable ICU 3.0
1223   *
1224   */
1225 U_CAPI int32_t U_EXPORT2 
1226 uregex_appendReplacement(URegularExpression    *regexp,
1227                          const UChar           *replacementText,
1228                          int32_t                replacementLength,
1229                          UChar                **destBuf,
1230                          int32_t               *destCapacity,
1231                          UErrorCode            *status);
1232 
1233 /**
1234   *   Implements a replace operation intended to be used as part of an
1235   *   incremental find-and-replace.
1236   *
1237   *   <p>The input string, starting from the end of the previous match and ending at
1238   *   the start of the current match, is appended to the destination string.  Then the
1239   *   replacement string is appended to the output string,
1240   *   including handling any substitutions of captured text.</p>
1241   *
1242   *   <p>For simple, prepackaged, non-incremental find-and-replace
1243   *      operations, see replaceFirst() or replaceAll().</p>
1244   *
1245   *   @param   regexp      The regular expression object.  
1246   *   @param   replacementText The string that will replace the matched portion of the
1247   *                        input string as it is copied to the destination buffer.
1248   *                        The replacement text may contain references ($1, for
1249   *                        example) to capture groups from the match.
1250   *   @param   dest        A mutable UText that will receive the result. Must not be NULL.
1251   *   @param   status      A reference to a UErrorCode to receive any errors. 
1252   *
1253   *   @stable ICU 4.6
1254   */
1255 U_CAPI void U_EXPORT2 
1256 uregex_appendReplacementUText(URegularExpression    *regexp,
1257                               UText                 *replacementText,
1258                               UText                 *dest,
1259                               UErrorCode            *status);
1260 
1261 /**
1262   * As the final step in a find-and-replace operation, append the remainder
1263   * of the input string, starting at the position following the last match,
1264   * to the destination string. <code>uregex_appendTail()</code> is intended 
1265   *  to be invoked after one or more invocations of the
1266   *  <code>uregex_appendReplacement()</code> function.
1267   *
1268   *   @param   regexp      The regular expression object.  This is needed to 
1269   *                        obtain the input string and with the position
1270   *                        of the last match within it.
1271   *   @param   destBuf     The buffer in which the results of the
1272   *                        find-and-replace are placed.  On return, the pointer
1273   *                        will be updated to refer to the beginning of the
1274   *                        unused portion of buffer.
1275   *   @param   destCapacity The size of the output buffer,  On return, this
1276   *                        value will be updated to reflect the space remaining
1277   *                        unused in the output buffer.
1278   *   @param   status      A reference to a UErrorCode to receive any errors. 
1279   *   @return              The length of the result string.  In the event that
1280   *                        destCapacity is inadequate, the full length of the
1281   *                        untruncated output string is returned.
1282   *
1283   *   @stable ICU 3.0
1284   */
1285 U_CAPI int32_t U_EXPORT2 
1286 uregex_appendTail(URegularExpression    *regexp,
1287                   UChar                **destBuf,
1288                   int32_t               *destCapacity,
1289                   UErrorCode            *status);
1290 
1291 /**
1292   * As the final step in a find-and-replace operation, append the remainder
1293   * of the input string, starting at the position following the last match,
1294   * to the destination string. <code>uregex_appendTailUText()</code> is intended 
1295   *  to be invoked after one or more invocations of the
1296   *  <code>uregex_appendReplacementUText()</code> function.
1297   *
1298   *   @param   regexp      The regular expression object.  This is needed to 
1299   *                        obtain the input string and with the position
1300   *                        of the last match within it.
1301   *   @param   dest        A mutable UText that will receive the result. Must not be NULL.
1302   *
1303   *   @param status        Error code
1304   *
1305   *   @return              The destination UText.
1306   *
1307   *   @stable ICU 4.6
1308   */
1309 U_CAPI UText * U_EXPORT2 
1310 uregex_appendTailUText(URegularExpression    *regexp,
1311                        UText                 *dest,
1312                        UErrorCode            *status);
1313 
1314  /**
1315    * Split a string into fields.  Somewhat like split() from Perl.
1316    *  The pattern matches identify delimiters that separate the input
1317    *  into fields.  The input data between the matches becomes the
1318    *  fields themselves.
1319    *
1320    *  Each of the fields is copied from the input string to the destination
1321    *  buffer, and NUL terminated.  The position of each field within
1322    *  the destination buffer is returned in the destFields array.
1323    *
1324    *  If the delimiter pattern includes capture groups, the captured text will
1325    *  also appear in the destination array of output strings, interspersed
1326    *  with the fields.  This is similar to Perl, but differs from Java, 
1327    *  which ignores the presence of capture groups in the pattern.
1328    * 
1329    *  Trailing empty fields will always be returned, assuming sufficient
1330    *  destination capacity.  This differs from the default behavior for Java
1331    *  and Perl where trailing empty fields are not returned.
1332    *
1333    *  The number of strings produced by the split operation is returned.
1334    *  This count includes the strings from capture groups in the delimiter pattern.
1335    *  This behavior differs from Java, which ignores capture groups.
1336    * 
1337    *    @param   regexp      The compiled regular expression.
1338    *    @param   destBuf     A (UChar *) buffer to receive the fields that
1339    *                         are extracted from the input string. These
1340    *                         field pointers will refer to positions within the
1341    *                         destination buffer supplied by the caller.  Any
1342    *                         extra positions within the destFields array will be
1343    *                         set to NULL.
1344    *    @param   destCapacity The capacity of the destBuf.
1345    *    @param   requiredCapacity  The actual capacity required of the destBuf.
1346    *                         If destCapacity is too small, requiredCapacity will return 
1347    *                         the total capacity required to hold all of the output, and
1348    *                         a U_BUFFER_OVERFLOW_ERROR will be returned.
1349    *    @param   destFields  An array to be filled with the position of each
1350    *                         of the extracted fields within destBuf.
1351    *    @param   destFieldsCapacity  The number of elements in the destFields array.
1352    *                If the number of fields found is less than destFieldsCapacity,
1353    *                the extra destFields elements are set to zero.
1354    *                If destFieldsCapacity is too small, the trailing part of the
1355    *                input, including any field delimiters, is treated as if it
1356    *                were the last field - it is copied to the destBuf, and
1357    *                its position is in the destBuf is stored in the last element
1358    *                of destFields.  This behavior mimics that of Perl.  It is not
1359    *                an error condition, and no error status is returned when all destField
1360    *                positions are used.
1361    * @param status  A reference to a UErrorCode to receive any errors.
1362    * @return        The number of fields into which the input string was split.
1363    * @stable ICU 3.0
1364    */
1365 U_CAPI int32_t U_EXPORT2 
1366 uregex_split(   URegularExpression      *regexp,
1367                   UChar                 *destBuf,
1368                   int32_t                destCapacity,
1369                   int32_t               *requiredCapacity,
1370                   UChar                 *destFields[],
1371                   int32_t                destFieldsCapacity,
1372                   UErrorCode            *status);
1373 
1374   /**
1375    * Split a string into fields.  Somewhat like split() from Perl.
1376    * The pattern matches identify delimiters that separate the input
1377    *  into fields.  The input data between the matches becomes the
1378    *  fields themselves.
1379    * <p>
1380    * The behavior of this function is not very closely aligned with uregex_split();
1381    * instead, it is based on (and implemented directly on top of) the C++ split method.
1382    *
1383    * @param regexp  The compiled regular expression.
1384    * @param destFields    An array of mutable UText structs to receive the results of the split.
1385    *                If a field is NULL, a new UText is allocated to contain the results for
1386    *                that field. This new UText is not guaranteed to be mutable.
1387    * @param destFieldsCapacity  The number of elements in the destination array.
1388    *                If the number of fields found is less than destCapacity, the
1389    *                extra strings in the destination array are not altered.
1390    *                If the number of destination strings is less than the number
1391    *                of fields, the trailing part of the input string, including any
1392    *                field delimiters, is placed in the last destination string.
1393    *                This behavior mimics that of Perl.  It is not  an error condition, and no
1394    *                error status is returned when all destField positions are used.
1395    * @param status  A reference to a UErrorCode to receive any errors.
1396    * @return        The number of fields into which the input string was split.
1397    *
1398    * @stable ICU 4.6
1399    */
1400 U_CAPI int32_t U_EXPORT2 
1401 uregex_splitUText(URegularExpression    *regexp,
1402                   UText                 *destFields[],
1403                   int32_t                destFieldsCapacity,
1404                   UErrorCode            *status);
1405 
1406 /**
1407  * Set a processing time limit for match operations with this URegularExpression.
1408  *
1409  * Some patterns, when matching certain strings, can run in exponential time.
1410  * For practical purposes, the match operation may appear to be in an
1411  * infinite loop.
1412  * When a limit is set a match operation will fail with an error if the
1413  * limit is exceeded.
1414  * <p>
1415  * The units of the limit are steps of the match engine.
1416  * Correspondence with actual processor time will depend on the speed
1417  * of the processor and the details of the specific pattern, but will
1418  * typically be on the order of milliseconds.
1419  * <p>
1420  * By default, the matching time is not limited.
1421  * <p>
1422  *
1423  * @param   regexp      The compiled regular expression.
1424  * @param   limit       The limit value, or 0 for no limit.
1425  * @param   status      A reference to a UErrorCode to receive any errors.
1426  * @stable ICU 4.0
1427  */
1428 U_CAPI void U_EXPORT2
1429 uregex_setTimeLimit(URegularExpression      *regexp,
1430                     int32_t                  limit,
1431                     UErrorCode              *status);
1432 
1433 /**
1434  * Get the time limit for for matches with this URegularExpression.
1435  * A return value of zero indicates that there is no limit.
1436  *
1437  * @param   regexp      The compiled regular expression.
1438  * @param   status      A reference to a UErrorCode to receive any errors.
1439  * @return the maximum allowed time for a match, in units of processing steps.
1440  * @stable ICU 4.0
1441  */
1442 U_CAPI int32_t U_EXPORT2
1443 uregex_getTimeLimit(const URegularExpression      *regexp,
1444                           UErrorCode              *status);
1445 
1446 /**
1447  * Set the amount of heap storage available for use by the match backtracking stack.
1448  * <p>
1449  * ICU uses a backtracking regular expression engine, with the backtrack stack
1450  * maintained on the heap.  This function sets the limit to the amount of memory
1451  * that can be used  for this purpose.  A backtracking stack overflow will
1452  * result in an error from the match operation that caused it.
1453  * <p>
1454  * A limit is desirable because a malicious or poorly designed pattern can use
1455  * excessive memory, potentially crashing the process.  A limit is enabled
1456  * by default.
1457  * <p>
1458  * @param   regexp      The compiled regular expression.
1459  * @param   limit       The maximum size, in bytes, of the matching backtrack stack.
1460  *                      A value of zero means no limit.
1461  *                      The limit must be greater than or equal to zero.
1462  * @param   status      A reference to a UErrorCode to receive any errors.
1463  *
1464  * @stable ICU 4.0
1465  */
1466 U_CAPI void U_EXPORT2
1467 uregex_setStackLimit(URegularExpression      *regexp,
1468                      int32_t                  limit,
1469                      UErrorCode              *status);
1470 
1471 /**
1472  * Get the size of the heap storage available for use by the back tracking stack.
1473  *
1474  * @return  the maximum backtracking stack size, in bytes, or zero if the
1475  *          stack size is unlimited.
1476  * @stable ICU 4.0
1477  */
1478 U_CAPI int32_t U_EXPORT2
1479 uregex_getStackLimit(const URegularExpression      *regexp,
1480                            UErrorCode              *status);
1481 
1482 
1483 /**
1484  * Function pointer for a regular expression matching callback function.
1485  * When set, a callback function will be called periodically during matching
1486  * operations.  If the call back function returns false, the matching
1487  * operation will be terminated early.
1488  *
1489  * Note:  the callback function must not call other functions on this
1490  *        URegularExpression.
1491  *
1492  * @param context  context pointer.  The callback function will be invoked
1493  *                 with the context specified at the time that
1494  *                 uregex_setMatchCallback() is called.
1495  * @param steps    the accumulated processing time, in match steps, 
1496  *                 for this matching operation.
1497  * @return         true to continue the matching operation.
1498  *                 false to terminate the matching operation.
1499  * @stable ICU 4.0
1500  */
1501 U_CDECL_BEGIN
1502 typedef UBool U_CALLCONV URegexMatchCallback (
1503                    const void *context,
1504                    int32_t     steps);
1505 U_CDECL_END
1506 
1507 /**
1508  * Set a callback function for this URegularExpression.
1509  * During matching operations the function will be called periodically,
1510  * giving the application the opportunity to terminate a long-running
1511  * match.
1512  *
1513  * @param   regexp      The compiled regular expression.
1514  * @param   callback    A pointer to the user-supplied callback function.
1515  * @param   context     User context pointer.  The value supplied at the
1516  *                      time the callback function is set will be saved
1517  *                      and passed to the callback each time that it is called.
1518  * @param   status      A reference to a UErrorCode to receive any errors.
1519  * @stable ICU 4.0
1520  */
1521 U_CAPI void U_EXPORT2
1522 uregex_setMatchCallback(URegularExpression      *regexp,
1523                         URegexMatchCallback     *callback,
1524                         const void              *context,
1525                         UErrorCode              *status);
1526 
1527 
1528 /**
1529  *  Get the callback function for this URegularExpression.
1530  *
1531  * @param   regexp      The compiled regular expression.
1532  * @param   callback    Out parameter, receives a pointer to the user-supplied 
1533  *                      callback function.
1534  * @param   context     Out parameter, receives the user context pointer that
1535  *                      was set when uregex_setMatchCallback() was called.
1536  * @param   status      A reference to a UErrorCode to receive any errors.
1537  * @stable ICU 4.0
1538  */
1539 U_CAPI void U_EXPORT2
1540 uregex_getMatchCallback(const URegularExpression    *regexp,
1541                         URegexMatchCallback        **callback,
1542                         const void                 **context,
1543                         UErrorCode                  *status);
1544 
1545 /**
1546  * Function pointer for a regular expression find callback function.
1547  * 
1548  * When set, a callback function will be called during a find operation
1549  * and for operations that depend on find, such as findNext, split and some replace
1550  * operations like replaceFirst.
1551  * The callback will usually be called after each attempt at a match, but this is not a
1552  * guarantee that the callback will be invoked at each character.  For finds where the
1553  * match engine is invoked at each character, this may be close to true, but less likely
1554  * for more optimized loops where the pattern is known to only start, and the match
1555  * engine invoked, at certain characters.
1556  * When invoked, this callback will specify the index at which a match operation is about
1557  * to be attempted, giving the application the opportunity to terminate a long-running
1558  * find operation.
1559  * 
1560  * If the call back function returns false, the find operation will be terminated early.
1561  *
1562  * Note:  the callback function must not call other functions on this
1563  *        URegularExpression
1564  *
1565  * @param context  context pointer.  The callback function will be invoked
1566  *                 with the context specified at the time that
1567  *                 uregex_setFindProgressCallback() is called.
1568  * @param matchIndex  the next index at which a match attempt will be attempted for this
1569  *                 find operation.  If this callback interrupts the search, this is the
1570  *                 index at which a find/findNext operation may be re-initiated.
1571  * @return         true to continue the matching operation.
1572  *                 false to terminate the matching operation.
1573  * @stable ICU 4.6
1574  */
1575 U_CDECL_BEGIN
1576 typedef UBool U_CALLCONV URegexFindProgressCallback (
1577                    const void *context,
1578                    int64_t     matchIndex);
1579 U_CDECL_END
1580 
1581 
1582 /**
1583  *  Set the find progress callback function for this URegularExpression.
1584  *
1585  * @param   regexp      The compiled regular expression.
1586  * @param   callback    A pointer to the user-supplied callback function.
1587  * @param   context     User context pointer.  The value supplied at the
1588  *                      time the callback function is set will be saved
1589  *                      and passed to the callback each time that it is called.
1590  * @param   status      A reference to a UErrorCode to receive any errors.
1591  * @stable ICU 4.6
1592  */
1593 U_CAPI void U_EXPORT2
1594 uregex_setFindProgressCallback(URegularExpression              *regexp,
1595                                 URegexFindProgressCallback      *callback,
1596                                 const void                      *context,
1597                                 UErrorCode                      *status);
1598 
1599 /**
1600  *  Get the find progress callback function for this URegularExpression.
1601  *
1602  * @param   regexp      The compiled regular expression.
1603  * @param   callback    Out parameter, receives a pointer to the user-supplied 
1604  *                      callback function.
1605  * @param   context     Out parameter, receives the user context pointer that
1606  *                      was set when uregex_setFindProgressCallback() was called.
1607  * @param   status      A reference to a UErrorCode to receive any errors.
1608  * @stable ICU 4.6
1609  */
1610 U_CAPI void U_EXPORT2
1611 uregex_getFindProgressCallback(const URegularExpression          *regexp,
1612                                 URegexFindProgressCallback        **callback,
1613                                 const void                        **context,
1614                                 UErrorCode                        *status);
1615 
1616 #endif   /*  !UCONFIG_NO_REGULAR_EXPRESSIONS  */
1617 #endif   /*  UREGEX_H  */