|
||||
File indexing completed on 2025-01-18 10:13:03
0001 // © 2016 and later: Unicode, Inc. and others. 0002 // License & terms of use: http://www.unicode.org/copyright.html 0003 /* 0004 ****************************************************************************** 0005 * Copyright (C) 1997-2014, International Business Machines 0006 * Corporation and others. All Rights Reserved. 0007 ****************************************************************************** 0008 */ 0009 0010 /** 0011 * \file 0012 * \brief C++ API: Collation Element Iterator. 0013 */ 0014 0015 /** 0016 * File coleitr.h 0017 * 0018 * Created by: Helena Shih 0019 * 0020 * Modification History: 0021 * 0022 * Date Name Description 0023 * 0024 * 8/18/97 helena Added internal API documentation. 0025 * 08/03/98 erm Synched with 1.2 version CollationElementIterator.java 0026 * 12/10/99 aliu Ported Thai collation support from Java. 0027 * 01/25/01 swquek Modified into a C++ wrapper calling C APIs (ucoliter.h) 0028 * 02/19/01 swquek Removed CollationElementsIterator() since it is 0029 * private constructor and no calls are made to it 0030 * 2012-2014 markus Rewritten in C++ again. 0031 */ 0032 0033 #ifndef COLEITR_H 0034 #define COLEITR_H 0035 0036 #include "unicode/utypes.h" 0037 0038 #if U_SHOW_CPLUSPLUS_API 0039 0040 #if !UCONFIG_NO_COLLATION 0041 0042 #include "unicode/unistr.h" 0043 #include "unicode/uobject.h" 0044 0045 struct UCollationElements; 0046 struct UHashtable; 0047 0048 U_NAMESPACE_BEGIN 0049 0050 struct CollationData; 0051 0052 class CharacterIterator; 0053 class CollationIterator; 0054 class RuleBasedCollator; 0055 class UCollationPCE; 0056 class UVector32; 0057 0058 /** 0059 * The CollationElementIterator class is used as an iterator to walk through 0060 * each character of an international string. Use the iterator to return the 0061 * ordering priority of the positioned character. The ordering priority of a 0062 * character, which we refer to as a key, defines how a character is collated in 0063 * the given collation object. 0064 * For example, consider the following in Slovak and in traditional Spanish collation: 0065 * <pre> 0066 * "ca" -> the first key is key('c') and second key is key('a'). 0067 * "cha" -> the first key is key('ch') and second key is key('a').</pre> 0068 * And in German phonebook collation, 0069 * <pre> \htmlonly "æb"-> the first key is key('a'), the second key is key('e'), and 0070 * the third key is key('b'). \endhtmlonly </pre> 0071 * The key of a character, is an integer composed of primary order(short), 0072 * secondary order(char), and tertiary order(char). Java strictly defines the 0073 * size and signedness of its primitive data types. Therefore, the static 0074 * functions primaryOrder(), secondaryOrder(), and tertiaryOrder() return 0075 * int32_t to ensure the correctness of the key value. 0076 * <p>Example of the iterator usage: (without error checking) 0077 * <pre> 0078 * \code 0079 * void CollationElementIterator_Example() 0080 * { 0081 * UnicodeString str = "This is a test"; 0082 * UErrorCode success = U_ZERO_ERROR; 0083 * RuleBasedCollator* rbc = 0084 * (RuleBasedCollator*) RuleBasedCollator::createInstance(success); 0085 * CollationElementIterator* c = 0086 * rbc->createCollationElementIterator( str ); 0087 * int32_t order = c->next(success); 0088 * c->reset(); 0089 * order = c->previous(success); 0090 * delete c; 0091 * delete rbc; 0092 * } 0093 * \endcode 0094 * </pre> 0095 * <p> 0096 * The method next() returns the collation order of the next character based on 0097 * the comparison level of the collator. The method previous() returns the 0098 * collation order of the previous character based on the comparison level of 0099 * the collator. The Collation Element Iterator moves only in one direction 0100 * between calls to reset(), setOffset(), or setText(). That is, next() 0101 * and previous() can not be inter-used. Whenever previous() is to be called after 0102 * next() or vice versa, reset(), setOffset() or setText() has to be called first 0103 * to reset the status, shifting pointers to either the end or the start of 0104 * the string (reset() or setText()), or the specified position (setOffset()). 0105 * Hence at the next call of next() or previous(), the first or last collation order, 0106 * or collation order at the specified position will be returned. If a change of 0107 * direction is done without one of these calls, the result is undefined. 0108 * <p> 0109 * The result of a forward iterate (next()) and reversed result of the backward 0110 * iterate (previous()) on the same string are equivalent, if collation orders 0111 * with the value 0 are ignored. 0112 * Character based on the comparison level of the collator. A collation order 0113 * consists of primary order, secondary order and tertiary order. The data 0114 * type of the collation order is <strong>int32_t</strong>. 0115 * 0116 * Note, CollationElementIterator should not be subclassed. 0117 * @see Collator 0118 * @see RuleBasedCollator 0119 * @version 1.8 Jan 16 2001 0120 */ 0121 class U_I18N_API CollationElementIterator final : public UObject { 0122 public: 0123 0124 // CollationElementIterator public data member ------------------------------ 0125 0126 enum { 0127 /** 0128 * NULLORDER indicates that an error has occurred while processing 0129 * @stable ICU 2.0 0130 */ 0131 NULLORDER = (int32_t)0xffffffff 0132 }; 0133 0134 // CollationElementIterator public constructor/destructor ------------------- 0135 0136 /** 0137 * Copy constructor. 0138 * 0139 * @param other the object to be copied from 0140 * @stable ICU 2.0 0141 */ 0142 CollationElementIterator(const CollationElementIterator& other); 0143 0144 /** 0145 * Destructor 0146 * @stable ICU 2.0 0147 */ 0148 virtual ~CollationElementIterator(); 0149 0150 // CollationElementIterator public methods ---------------------------------- 0151 0152 /** 0153 * Returns true if "other" is the same as "this" 0154 * 0155 * @param other the object to be compared 0156 * @return true if "other" is the same as "this" 0157 * @stable ICU 2.0 0158 */ 0159 bool operator==(const CollationElementIterator& other) const; 0160 0161 /** 0162 * Returns true if "other" is not the same as "this". 0163 * 0164 * @param other the object to be compared 0165 * @return true if "other" is not the same as "this" 0166 * @stable ICU 2.0 0167 */ 0168 bool operator!=(const CollationElementIterator& other) const; 0169 0170 /** 0171 * Resets the cursor to the beginning of the string. 0172 * @stable ICU 2.0 0173 */ 0174 void reset(void); 0175 0176 /** 0177 * Gets the ordering priority of the next character in the string. 0178 * @param status the error code status. 0179 * @return the next character's ordering. otherwise returns NULLORDER if an 0180 * error has occurred or if the end of string has been reached 0181 * @stable ICU 2.0 0182 */ 0183 int32_t next(UErrorCode& status); 0184 0185 /** 0186 * Get the ordering priority of the previous collation element in the string. 0187 * @param status the error code status. 0188 * @return the previous element's ordering. otherwise returns NULLORDER if an 0189 * error has occurred or if the start of string has been reached 0190 * @stable ICU 2.0 0191 */ 0192 int32_t previous(UErrorCode& status); 0193 0194 /** 0195 * Gets the primary order of a collation order. 0196 * @param order the collation order 0197 * @return the primary order of a collation order. 0198 * @stable ICU 2.0 0199 */ 0200 static inline int32_t primaryOrder(int32_t order); 0201 0202 /** 0203 * Gets the secondary order of a collation order. 0204 * @param order the collation order 0205 * @return the secondary order of a collation order. 0206 * @stable ICU 2.0 0207 */ 0208 static inline int32_t secondaryOrder(int32_t order); 0209 0210 /** 0211 * Gets the tertiary order of a collation order. 0212 * @param order the collation order 0213 * @return the tertiary order of a collation order. 0214 * @stable ICU 2.0 0215 */ 0216 static inline int32_t tertiaryOrder(int32_t order); 0217 0218 /** 0219 * Return the maximum length of any expansion sequences that end with the 0220 * specified comparison order. 0221 * @param order a collation order returned by previous or next. 0222 * @return maximum size of the expansion sequences ending with the collation 0223 * element or 1 if collation element does not occur at the end of any 0224 * expansion sequence 0225 * @stable ICU 2.0 0226 */ 0227 int32_t getMaxExpansion(int32_t order) const; 0228 0229 /** 0230 * Gets the comparison order in the desired strength. Ignore the other 0231 * differences. 0232 * @param order The order value 0233 * @stable ICU 2.0 0234 */ 0235 int32_t strengthOrder(int32_t order) const; 0236 0237 /** 0238 * Sets the source string. 0239 * @param str the source string. 0240 * @param status the error code status. 0241 * @stable ICU 2.0 0242 */ 0243 void setText(const UnicodeString& str, UErrorCode& status); 0244 0245 /** 0246 * Sets the source string. 0247 * @param str the source character iterator. 0248 * @param status the error code status. 0249 * @stable ICU 2.0 0250 */ 0251 void setText(CharacterIterator& str, UErrorCode& status); 0252 0253 /** 0254 * Checks if a comparison order is ignorable. 0255 * @param order the collation order. 0256 * @return true if a character is ignorable, false otherwise. 0257 * @stable ICU 2.0 0258 */ 0259 static inline UBool isIgnorable(int32_t order); 0260 0261 /** 0262 * Gets the offset of the currently processed character in the source string. 0263 * @return the offset of the character. 0264 * @stable ICU 2.0 0265 */ 0266 int32_t getOffset(void) const; 0267 0268 /** 0269 * Sets the offset of the currently processed character in the source string. 0270 * @param newOffset the new offset. 0271 * @param status the error code status. 0272 * @return the offset of the character. 0273 * @stable ICU 2.0 0274 */ 0275 void setOffset(int32_t newOffset, UErrorCode& status); 0276 0277 /** 0278 * ICU "poor man's RTTI", returns a UClassID for the actual class. 0279 * 0280 * @stable ICU 2.2 0281 */ 0282 virtual UClassID getDynamicClassID() const override; 0283 0284 /** 0285 * ICU "poor man's RTTI", returns a UClassID for this class. 0286 * 0287 * @stable ICU 2.2 0288 */ 0289 static UClassID U_EXPORT2 getStaticClassID(); 0290 0291 #ifndef U_HIDE_INTERNAL_API 0292 /** @internal */ 0293 static inline CollationElementIterator *fromUCollationElements(UCollationElements *uc) { 0294 return reinterpret_cast<CollationElementIterator *>(uc); 0295 } 0296 /** @internal */ 0297 static inline const CollationElementIterator *fromUCollationElements(const UCollationElements *uc) { 0298 return reinterpret_cast<const CollationElementIterator *>(uc); 0299 } 0300 /** @internal */ 0301 inline UCollationElements *toUCollationElements() { 0302 return reinterpret_cast<UCollationElements *>(this); 0303 } 0304 /** @internal */ 0305 inline const UCollationElements *toUCollationElements() const { 0306 return reinterpret_cast<const UCollationElements *>(this); 0307 } 0308 #endif // U_HIDE_INTERNAL_API 0309 0310 private: 0311 friend class RuleBasedCollator; 0312 friend class UCollationPCE; 0313 0314 /** 0315 * CollationElementIterator constructor. This takes the source string and the 0316 * collation object. The cursor will walk thru the source string based on the 0317 * predefined collation rules. If the source string is empty, NULLORDER will 0318 * be returned on the calls to next(). 0319 * @param sourceText the source string. 0320 * @param order the collation object. 0321 * @param status the error code status. 0322 */ 0323 CollationElementIterator(const UnicodeString& sourceText, 0324 const RuleBasedCollator* order, UErrorCode& status); 0325 // Note: The constructors should take settings & tailoring, not a collator, 0326 // to avoid circular dependencies. 0327 // However, for operator==() we would need to be able to compare tailoring data for equality 0328 // without making CollationData or CollationTailoring depend on TailoredSet. 0329 // (See the implementation of RuleBasedCollator::operator==().) 0330 // That might require creating an intermediate class that would be used 0331 // by both CollationElementIterator and RuleBasedCollator 0332 // but only contain the part of RBC== related to data and rules. 0333 0334 /** 0335 * CollationElementIterator constructor. This takes the source string and the 0336 * collation object. The cursor will walk thru the source string based on the 0337 * predefined collation rules. If the source string is empty, NULLORDER will 0338 * be returned on the calls to next(). 0339 * @param sourceText the source string. 0340 * @param order the collation object. 0341 * @param status the error code status. 0342 */ 0343 CollationElementIterator(const CharacterIterator& sourceText, 0344 const RuleBasedCollator* order, UErrorCode& status); 0345 0346 /** 0347 * Assignment operator 0348 * 0349 * @param other the object to be copied 0350 */ 0351 const CollationElementIterator& 0352 operator=(const CollationElementIterator& other); 0353 0354 CollationElementIterator() = delete; // default constructor not implemented 0355 0356 /** Normalizes dir_=1 (just after setOffset()) to dir_=0 (just after reset()). */ 0357 inline int8_t normalizeDir() const { return dir_ == 1 ? 0 : dir_; } 0358 0359 static UHashtable *computeMaxExpansions(const CollationData *data, UErrorCode &errorCode); 0360 0361 static int32_t getMaxExpansion(const UHashtable *maxExpansions, int32_t order); 0362 0363 // CollationElementIterator private data members ---------------------------- 0364 0365 CollationIterator *iter_; // owned 0366 const RuleBasedCollator *rbc_; // aliased 0367 uint32_t otherHalf_; 0368 /** 0369 * <0: backwards; 0: just after reset() (previous() begins from end); 0370 * 1: just after setOffset(); >1: forward 0371 */ 0372 int8_t dir_; 0373 /** 0374 * Stores offsets from expansions and from unsafe-backwards iteration, 0375 * so that getOffset() returns intermediate offsets for the CEs 0376 * that are consistent with forward iteration. 0377 */ 0378 UVector32 *offsets_; 0379 0380 UnicodeString string_; 0381 }; 0382 0383 // CollationElementIterator inline method definitions -------------------------- 0384 0385 inline int32_t CollationElementIterator::primaryOrder(int32_t order) 0386 { 0387 return (order >> 16) & 0xffff; 0388 } 0389 0390 inline int32_t CollationElementIterator::secondaryOrder(int32_t order) 0391 { 0392 return (order >> 8) & 0xff; 0393 } 0394 0395 inline int32_t CollationElementIterator::tertiaryOrder(int32_t order) 0396 { 0397 return order & 0xff; 0398 } 0399 0400 inline UBool CollationElementIterator::isIgnorable(int32_t order) 0401 { 0402 return (order & 0xffff0000) == 0; 0403 } 0404 0405 U_NAMESPACE_END 0406 0407 #endif /* #if !UCONFIG_NO_COLLATION */ 0408 0409 #endif /* U_SHOW_CPLUSPLUS_API */ 0410 0411 #endif
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |