|
||||
File indexing completed on 2025-01-18 10:13:12
0001 // © 2016 and later: Unicode, Inc. and others. 0002 // License & terms of use: http://www.unicode.org/copyright.html 0003 /* 0004 * Copyright (C) 2001-2005, International Business Machines Corporation and others. All Rights Reserved. 0005 ********************************************************************** 0006 * Date Name Description 0007 * 07/18/01 aliu Creation. 0008 ********************************************************************** 0009 */ 0010 #ifndef UNIMATCH_H 0011 #define UNIMATCH_H 0012 0013 #include "unicode/utypes.h" 0014 0015 /** 0016 * \file 0017 * \brief C++ API: Unicode Matcher 0018 */ 0019 0020 #if U_SHOW_CPLUSPLUS_API 0021 0022 U_NAMESPACE_BEGIN 0023 0024 class Replaceable; 0025 class UnicodeString; 0026 class UnicodeSet; 0027 0028 /** 0029 * Constants returned by <code>UnicodeMatcher::matches()</code> 0030 * indicating the degree of match. 0031 * @stable ICU 2.4 0032 */ 0033 enum UMatchDegree { 0034 /** 0035 * Constant returned by <code>matches()</code> indicating a 0036 * mismatch between the text and this matcher. The text contains 0037 * a character which does not match, or the text does not contain 0038 * all desired characters for a non-incremental match. 0039 * @stable ICU 2.4 0040 */ 0041 U_MISMATCH, 0042 0043 /** 0044 * Constant returned by <code>matches()</code> indicating a 0045 * partial match between the text and this matcher. This value is 0046 * only returned for incremental match operations. All characters 0047 * of the text match, but more characters are required for a 0048 * complete match. Alternatively, for variable-length matchers, 0049 * all characters of the text match, and if more characters were 0050 * supplied at limit, they might also match. 0051 * @stable ICU 2.4 0052 */ 0053 U_PARTIAL_MATCH, 0054 0055 /** 0056 * Constant returned by <code>matches()</code> indicating a 0057 * complete match between the text and this matcher. For an 0058 * incremental variable-length match, this value is returned if 0059 * the given text matches, and it is known that additional 0060 * characters would not alter the extent of the match. 0061 * @stable ICU 2.4 0062 */ 0063 U_MATCH 0064 }; 0065 0066 /** 0067 * <code>UnicodeMatcher</code> defines a protocol for objects that can 0068 * match a range of characters in a Replaceable string. 0069 * @stable ICU 2.4 0070 */ 0071 class U_COMMON_API UnicodeMatcher /* not : public UObject because this is an interface/mixin class */ { 0072 0073 public: 0074 /** 0075 * Destructor. 0076 * @stable ICU 2.4 0077 */ 0078 virtual ~UnicodeMatcher(); 0079 0080 /** 0081 * Return a UMatchDegree value indicating the degree of match for 0082 * the given text at the given offset. Zero, one, or more 0083 * characters may be matched. 0084 * 0085 * Matching in the forward direction is indicated by limit > 0086 * offset. Characters from offset forwards to limit-1 will be 0087 * considered for matching. 0088 * 0089 * Matching in the reverse direction is indicated by limit < 0090 * offset. Characters from offset backwards to limit+1 will be 0091 * considered for matching. 0092 * 0093 * If limit == offset then the only match possible is a zero 0094 * character match (which subclasses may implement if desired). 0095 * 0096 * As a side effect, advance the offset parameter to the limit of 0097 * the matched substring. In the forward direction, this will be 0098 * the index of the last matched character plus one. In the 0099 * reverse direction, this will be the index of the last matched 0100 * character minus one. 0101 * 0102 * <p>Note: This method is not const because some classes may 0103 * modify their state as the result of a match. 0104 * 0105 * @param text the text to be matched 0106 * @param offset on input, the index into text at which to begin 0107 * matching. On output, the limit of the matched text. The 0108 * number of matched characters is the output value of offset 0109 * minus the input value. Offset should always point to the 0110 * HIGH SURROGATE (leading code unit) of a pair of surrogates, 0111 * both on entry and upon return. 0112 * @param limit the limit index of text to be matched. Greater 0113 * than offset for a forward direction match, less than offset for 0114 * a backward direction match. The last character to be 0115 * considered for matching will be text.charAt(limit-1) in the 0116 * forward direction or text.charAt(limit+1) in the backward 0117 * direction. 0118 * @param incremental if true, then assume further characters may 0119 * be inserted at limit and check for partial matching. Otherwise 0120 * assume the text as given is complete. 0121 * @return a match degree value indicating a full match, a partial 0122 * match, or a mismatch. If incremental is false then 0123 * U_PARTIAL_MATCH should never be returned. 0124 * @stable ICU 2.4 0125 */ 0126 virtual UMatchDegree matches(const Replaceable& text, 0127 int32_t& offset, 0128 int32_t limit, 0129 UBool incremental) = 0; 0130 0131 /** 0132 * Returns a string representation of this matcher. If the result of 0133 * calling this function is passed to the appropriate parser, it 0134 * will produce another matcher that is equal to this one. 0135 * @param result the string to receive the pattern. Previous 0136 * contents will be deleted. 0137 * @param escapeUnprintable if true then convert unprintable 0138 * character to their hex escape representations, \\uxxxx or 0139 * \\Uxxxxxxxx. Unprintable characters are those other than 0140 * U+000A, U+0020..U+007E. 0141 * @stable ICU 2.4 0142 */ 0143 virtual UnicodeString& toPattern(UnicodeString& result, 0144 UBool escapeUnprintable = false) const = 0; 0145 0146 /** 0147 * Returns true if this matcher will match a character c, where c 0148 * & 0xFF == v, at offset, in the forward direction (with limit > 0149 * offset). This is used by <tt>RuleBasedTransliterator</tt> for 0150 * indexing. 0151 * @stable ICU 2.4 0152 */ 0153 virtual UBool matchesIndexValue(uint8_t v) const = 0; 0154 0155 /** 0156 * Union the set of all characters that may be matched by this object 0157 * into the given set. 0158 * @param toUnionTo the set into which to union the source characters 0159 * @stable ICU 2.4 0160 */ 0161 virtual void addMatchSetTo(UnicodeSet& toUnionTo) const = 0; 0162 }; 0163 0164 U_NAMESPACE_END 0165 0166 #endif /* U_SHOW_CPLUSPLUS_API */ 0167 0168 #endif
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |