|
||||
Warning, file /include/unicode/caniter.h was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).
0001 // © 2016 and later: Unicode, Inc. and others. 0002 // License & terms of use: http://www.unicode.org/copyright.html 0003 /* 0004 ******************************************************************************* 0005 * Copyright (C) 1996-2014, International Business Machines Corporation and 0006 * others. All Rights Reserved. 0007 ******************************************************************************* 0008 */ 0009 0010 #ifndef CANITER_H 0011 #define CANITER_H 0012 0013 #include "unicode/utypes.h" 0014 0015 #if U_SHOW_CPLUSPLUS_API 0016 0017 #if !UCONFIG_NO_NORMALIZATION 0018 0019 #include "unicode/uobject.h" 0020 #include "unicode/unistr.h" 0021 0022 /** 0023 * \file 0024 * \brief C++ API: Canonical Iterator 0025 */ 0026 0027 /** Should permutation skip characters with combining class zero 0028 * Should be either true or false. This is a compile time option 0029 * @stable ICU 2.4 0030 */ 0031 #ifndef CANITER_SKIP_ZEROES 0032 #define CANITER_SKIP_ZEROES true 0033 #endif 0034 0035 U_NAMESPACE_BEGIN 0036 0037 class Hashtable; 0038 class Normalizer2; 0039 class Normalizer2Impl; 0040 0041 /** 0042 * This class allows one to iterate through all the strings that are canonically equivalent to a given 0043 * string. For example, here are some sample results: 0044 Results for: {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA} 0045 1: \\u0041\\u030A\\u0064\\u0307\\u0327 0046 = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA} 0047 2: \\u0041\\u030A\\u0064\\u0327\\u0307 0048 = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE} 0049 3: \\u0041\\u030A\\u1E0B\\u0327 0050 = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA} 0051 4: \\u0041\\u030A\\u1E11\\u0307 0052 = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE} 0053 5: \\u00C5\\u0064\\u0307\\u0327 0054 = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA} 0055 6: \\u00C5\\u0064\\u0327\\u0307 0056 = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE} 0057 7: \\u00C5\\u1E0B\\u0327 0058 = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA} 0059 8: \\u00C5\\u1E11\\u0307 0060 = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE} 0061 9: \\u212B\\u0064\\u0307\\u0327 0062 = {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA} 0063 10: \\u212B\\u0064\\u0327\\u0307 0064 = {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE} 0065 11: \\u212B\\u1E0B\\u0327 0066 = {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA} 0067 12: \\u212B\\u1E11\\u0307 0068 = {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE} 0069 *<br>Note: the code is intended for use with small strings, and is not suitable for larger ones, 0070 * since it has not been optimized for that situation. 0071 * Note, CanonicalIterator is not intended to be subclassed. 0072 * @author M. Davis 0073 * @author C++ port by V. Weinstein 0074 * @stable ICU 2.4 0075 */ 0076 class U_COMMON_API CanonicalIterator final : public UObject { 0077 public: 0078 /** 0079 * Construct a CanonicalIterator object 0080 * @param source string to get results for 0081 * @param status Fill-in parameter which receives the status of this operation. 0082 * @stable ICU 2.4 0083 */ 0084 CanonicalIterator(const UnicodeString &source, UErrorCode &status); 0085 0086 /** Destructor 0087 * Cleans pieces 0088 * @stable ICU 2.4 0089 */ 0090 virtual ~CanonicalIterator(); 0091 0092 /** 0093 * Gets the NFD form of the current source we are iterating over. 0094 * @return gets the source: NOTE: it is the NFD form of source 0095 * @stable ICU 2.4 0096 */ 0097 UnicodeString getSource(); 0098 0099 /** 0100 * Resets the iterator so that one can start again from the beginning. 0101 * @stable ICU 2.4 0102 */ 0103 void reset(); 0104 0105 /** 0106 * Get the next canonically equivalent string. 0107 * <br><b>Warning: The strings are not guaranteed to be in any particular order.</b> 0108 * @return the next string that is canonically equivalent. A bogus string is returned when 0109 * the iteration is done. 0110 * @stable ICU 2.4 0111 */ 0112 UnicodeString next(); 0113 0114 /** 0115 * Set a new source for this iterator. Allows object reuse. 0116 * @param newSource the source string to iterate against. This allows the same iterator to be used 0117 * while changing the source string, saving object creation. 0118 * @param status Fill-in parameter which receives the status of this operation. 0119 * @stable ICU 2.4 0120 */ 0121 void setSource(const UnicodeString &newSource, UErrorCode &status); 0122 0123 #ifndef U_HIDE_INTERNAL_API 0124 /** 0125 * Dumb recursive implementation of permutation. 0126 * TODO: optimize 0127 * @param source the string to find permutations for 0128 * @param skipZeros determine if skip zeros 0129 * @param result the results in a set. 0130 * @param status Fill-in parameter which receives the status of this operation. 0131 * @internal 0132 */ 0133 static void U_EXPORT2 permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status); 0134 #endif /* U_HIDE_INTERNAL_API */ 0135 0136 /** 0137 * ICU "poor man's RTTI", returns a UClassID for this class. 0138 * 0139 * @stable ICU 2.2 0140 */ 0141 static UClassID U_EXPORT2 getStaticClassID(); 0142 0143 /** 0144 * ICU "poor man's RTTI", returns a UClassID for the actual class. 0145 * 0146 * @stable ICU 2.2 0147 */ 0148 virtual UClassID getDynamicClassID() const override; 0149 0150 private: 0151 // ===================== PRIVATES ============================== 0152 // private default constructor 0153 CanonicalIterator() = delete; 0154 0155 0156 /** 0157 * Copy constructor. Private for now. 0158 * @internal (private) 0159 */ 0160 CanonicalIterator(const CanonicalIterator& other) = delete; 0161 0162 /** 0163 * Assignment operator. Private for now. 0164 * @internal (private) 0165 */ 0166 CanonicalIterator& operator=(const CanonicalIterator& other) = delete; 0167 0168 // fields 0169 UnicodeString source; 0170 UBool done; 0171 0172 // 2 dimensional array holds the pieces of the string with 0173 // their different canonically equivalent representations 0174 UnicodeString **pieces; 0175 int32_t pieces_length; 0176 int32_t *pieces_lengths; 0177 0178 // current is used in iterating to combine pieces 0179 int32_t *current; 0180 int32_t current_length; 0181 0182 // transient fields 0183 UnicodeString buffer; 0184 0185 const Normalizer2 &nfd; 0186 const Normalizer2Impl &nfcImpl; 0187 0188 // we have a segment, in NFD. Find all the strings that are canonically equivalent to it. 0189 UnicodeString *getEquivalents(const UnicodeString &segment, int32_t &result_len, UErrorCode &status); //private String[] getEquivalents(String segment) 0190 0191 //Set getEquivalents2(String segment); 0192 Hashtable *getEquivalents2(Hashtable *fillinResult, const char16_t *segment, int32_t segLen, UErrorCode &status); 0193 //Hashtable *getEquivalents2(const UnicodeString &segment, int32_t segLen, UErrorCode &status); 0194 0195 /** 0196 * See if the decomposition of cp2 is at segment starting at segmentPos 0197 * (with canonical rearrangement!) 0198 * If so, take the remainder, and return the equivalents 0199 */ 0200 //Set extract(int comp, String segment, int segmentPos, StringBuffer buffer); 0201 Hashtable *extract(Hashtable *fillinResult, UChar32 comp, const char16_t *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status); 0202 //Hashtable *extract(UChar32 comp, const UnicodeString &segment, int32_t segLen, int32_t segmentPos, UErrorCode &status); 0203 0204 void cleanPieces(); 0205 0206 }; 0207 0208 U_NAMESPACE_END 0209 0210 #endif /* #if !UCONFIG_NO_NORMALIZATION */ 0211 0212 #endif /* U_SHOW_CPLUSPLUS_API */ 0213 0214 #endif
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |