|
||||
File indexing completed on 2025-01-18 10:13:08
0001 // © 2016 and later: Unicode, Inc. and others. 0002 // License & terms of use: http://www.unicode.org/copyright.html 0003 /* 0004 ***************************************************************************** 0005 * Copyright (C) 1996-2014, International Business Machines Corporation and others. 0006 * All Rights Reserved. 0007 ***************************************************************************** 0008 * 0009 * File sortkey.h 0010 * 0011 * Created by: Helena Shih 0012 * 0013 * Modification History: 0014 * 0015 * Date Name Description 0016 * 0017 * 6/20/97 helena Java class name change. 0018 * 8/18/97 helena Added internal API documentation. 0019 * 6/26/98 erm Changed to use byte arrays and memcmp. 0020 ***************************************************************************** 0021 */ 0022 0023 #ifndef SORTKEY_H 0024 #define SORTKEY_H 0025 0026 #include "unicode/utypes.h" 0027 0028 #if U_SHOW_CPLUSPLUS_API 0029 0030 /** 0031 * \file 0032 * \brief C++ API: Keys for comparing strings multiple times. 0033 */ 0034 0035 #if !UCONFIG_NO_COLLATION 0036 0037 #include "unicode/uobject.h" 0038 #include "unicode/unistr.h" 0039 #include "unicode/coll.h" 0040 0041 U_NAMESPACE_BEGIN 0042 0043 /* forward declaration */ 0044 class RuleBasedCollator; 0045 class CollationKeyByteSink; 0046 0047 /** 0048 * 0049 * Collation keys are generated by the Collator class. Use the CollationKey objects 0050 * instead of Collator to compare strings multiple times. A CollationKey 0051 * preprocesses the comparison information from the Collator object to 0052 * make the comparison faster. If you are not going to comparing strings 0053 * multiple times, then using the Collator object is generally faster, 0054 * since it only processes as much of the string as needed to make a 0055 * comparison. 0056 * <p> For example (with strength == tertiary) 0057 * <p>When comparing "Abernathy" to "Baggins-Smythworthy", Collator 0058 * only needs to process a couple of characters, while a comparison 0059 * with CollationKeys will process all of the characters. On the other hand, 0060 * if you are doing a sort of a number of fields, it is much faster to use 0061 * CollationKeys, since you will be comparing strings multiple times. 0062 * <p>Typical use of CollationKeys are in databases, where you store a CollationKey 0063 * in a hidden field, and use it for sorting or indexing. 0064 * 0065 * <p>Example of use: 0066 * <pre> 0067 * \code 0068 * UErrorCode success = U_ZERO_ERROR; 0069 * Collator* myCollator = Collator::createInstance(success); 0070 * CollationKey* keys = new CollationKey [3]; 0071 * myCollator->getCollationKey("Tom", keys[0], success ); 0072 * myCollator->getCollationKey("Dick", keys[1], success ); 0073 * myCollator->getCollationKey("Harry", keys[2], success ); 0074 * 0075 * // Inside body of sort routine, compare keys this way: 0076 * CollationKey tmp; 0077 * if(keys[0].compareTo( keys[1] ) > 0 ) { 0078 * tmp = keys[0]; keys[0] = keys[1]; keys[1] = tmp; 0079 * } 0080 * //... 0081 * \endcode 0082 * </pre> 0083 * <p>Because Collator::compare()'s algorithm is complex, it is faster to sort 0084 * long lists of words by retrieving collation keys with Collator::getCollationKey(). 0085 * You can then cache the collation keys and compare them using CollationKey::compareTo(). 0086 * <p> 0087 * <strong>Note:</strong> <code>Collator</code>s with different Locale, 0088 * CollationStrength and DecompositionMode settings will return different 0089 * CollationKeys for the same set of strings. Locales have specific 0090 * collation rules, and the way in which secondary and tertiary differences 0091 * are taken into account, for example, will result in different CollationKeys 0092 * for same strings. 0093 * <p> 0094 0095 * @see Collator 0096 * @see RuleBasedCollator 0097 * @version 1.3 12/18/96 0098 * @author Helena Shih 0099 * @stable ICU 2.0 0100 */ 0101 class U_I18N_API CollationKey : public UObject { 0102 public: 0103 /** 0104 * This creates an empty collation key based on the null string. An empty 0105 * collation key contains no sorting information. When comparing two empty 0106 * collation keys, the result is Collator::EQUAL. Comparing empty collation key 0107 * with non-empty collation key is always Collator::LESS. 0108 * @stable ICU 2.0 0109 */ 0110 CollationKey(); 0111 0112 0113 /** 0114 * Creates a collation key based on the collation key values. 0115 * @param values the collation key values 0116 * @param count number of collation key values, including trailing nulls. 0117 * @stable ICU 2.0 0118 */ 0119 CollationKey(const uint8_t* values, 0120 int32_t count); 0121 0122 /** 0123 * Copy constructor. 0124 * @param other the object to be copied. 0125 * @stable ICU 2.0 0126 */ 0127 CollationKey(const CollationKey& other); 0128 0129 /** 0130 * Sort key destructor. 0131 * @stable ICU 2.0 0132 */ 0133 virtual ~CollationKey(); 0134 0135 /** 0136 * Assignment operator 0137 * @param other the object to be copied. 0138 * @stable ICU 2.0 0139 */ 0140 const CollationKey& operator=(const CollationKey& other); 0141 0142 /** 0143 * Compare if two collation keys are the same. 0144 * @param source the collation key to compare to. 0145 * @return Returns true if two collation keys are equal, false otherwise. 0146 * @stable ICU 2.0 0147 */ 0148 bool operator==(const CollationKey& source) const; 0149 0150 /** 0151 * Compare if two collation keys are not the same. 0152 * @param source the collation key to compare to. 0153 * @return Returns true if two collation keys are different, false otherwise. 0154 * @stable ICU 2.0 0155 */ 0156 bool operator!=(const CollationKey& source) const; 0157 0158 0159 /** 0160 * Test to see if the key is in an invalid state. The key will be in an 0161 * invalid state if it couldn't allocate memory for some operation. 0162 * @return Returns true if the key is in an invalid, false otherwise. 0163 * @stable ICU 2.0 0164 */ 0165 UBool isBogus(void) const; 0166 0167 /** 0168 * Returns a pointer to the collation key values. The storage is owned 0169 * by the collation key and the pointer will become invalid if the key 0170 * is deleted. 0171 * @param count the output parameter of number of collation key values, 0172 * including any trailing nulls. 0173 * @return a pointer to the collation key values. 0174 * @stable ICU 2.0 0175 */ 0176 const uint8_t* getByteArray(int32_t& count) const; 0177 0178 #ifdef U_USE_COLLATION_KEY_DEPRECATES 0179 /** 0180 * Extracts the collation key values into a new array. The caller owns 0181 * this storage and should free it. 0182 * @param count the output parameter of number of collation key values, 0183 * including any trailing nulls. 0184 * @obsolete ICU 2.6. Use getByteArray instead since this API will be removed in that release. 0185 */ 0186 uint8_t* toByteArray(int32_t& count) const; 0187 #endif 0188 0189 #ifndef U_HIDE_DEPRECATED_API 0190 /** 0191 * Convenience method which does a string(bit-wise) comparison of the 0192 * two collation keys. 0193 * @param target target collation key to be compared with 0194 * @return Returns Collator::LESS if sourceKey < targetKey, 0195 * Collator::GREATER if sourceKey > targetKey and Collator::EQUAL 0196 * otherwise. 0197 * @deprecated ICU 2.6 use the overload with error code 0198 */ 0199 Collator::EComparisonResult compareTo(const CollationKey& target) const; 0200 #endif /* U_HIDE_DEPRECATED_API */ 0201 0202 /** 0203 * Convenience method which does a string(bit-wise) comparison of the 0204 * two collation keys. 0205 * @param target target collation key to be compared with 0206 * @param status error code 0207 * @return Returns UCOL_LESS if sourceKey < targetKey, 0208 * UCOL_GREATER if sourceKey > targetKey and UCOL_EQUAL 0209 * otherwise. 0210 * @stable ICU 2.6 0211 */ 0212 UCollationResult compareTo(const CollationKey& target, UErrorCode &status) const; 0213 0214 /** 0215 * Creates an integer that is unique to the collation key. NOTE: this 0216 * is not the same as String.hashCode. 0217 * <p>Example of use: 0218 * <pre> 0219 * . UErrorCode status = U_ZERO_ERROR; 0220 * . Collator *myCollation = Collator::createInstance(Locale::US, status); 0221 * . if (U_FAILURE(status)) return; 0222 * . CollationKey key1, key2; 0223 * . UErrorCode status1 = U_ZERO_ERROR, status2 = U_ZERO_ERROR; 0224 * . myCollation->getCollationKey("abc", key1, status1); 0225 * . if (U_FAILURE(status1)) { delete myCollation; return; } 0226 * . myCollation->getCollationKey("ABC", key2, status2); 0227 * . if (U_FAILURE(status2)) { delete myCollation; return; } 0228 * . // key1.hashCode() != key2.hashCode() 0229 * </pre> 0230 * @return the hash value based on the string's collation order. 0231 * @see UnicodeString#hashCode 0232 * @stable ICU 2.0 0233 */ 0234 int32_t hashCode(void) const; 0235 0236 /** 0237 * ICU "poor man's RTTI", returns a UClassID for the actual class. 0238 * @stable ICU 2.2 0239 */ 0240 virtual UClassID getDynamicClassID() const override; 0241 0242 /** 0243 * ICU "poor man's RTTI", returns a UClassID for this class. 0244 * @stable ICU 2.2 0245 */ 0246 static UClassID U_EXPORT2 getStaticClassID(); 0247 0248 private: 0249 /** 0250 * Replaces the current bytes buffer with a new one of newCapacity 0251 * and copies length bytes from the old buffer to the new one. 0252 * @return the new buffer, or nullptr if the allocation failed 0253 */ 0254 uint8_t *reallocate(int32_t newCapacity, int32_t length); 0255 /** 0256 * Set a new length for a new sort key in the existing fBytes. 0257 */ 0258 void setLength(int32_t newLength); 0259 0260 uint8_t *getBytes() { 0261 return (fFlagAndLength >= 0) ? fUnion.fStackBuffer : fUnion.fFields.fBytes; 0262 } 0263 const uint8_t *getBytes() const { 0264 return (fFlagAndLength >= 0) ? fUnion.fStackBuffer : fUnion.fFields.fBytes; 0265 } 0266 int32_t getCapacity() const { 0267 return (fFlagAndLength >= 0) ? (int32_t)sizeof(fUnion) : fUnion.fFields.fCapacity; 0268 } 0269 int32_t getLength() const { return fFlagAndLength & 0x7fffffff; } 0270 0271 /** 0272 * Set the CollationKey to a "bogus" or invalid state 0273 * @return this CollationKey 0274 */ 0275 CollationKey& setToBogus(void); 0276 /** 0277 * Resets this CollationKey to an empty state 0278 * @return this CollationKey 0279 */ 0280 CollationKey& reset(void); 0281 0282 /** 0283 * Allow private access to RuleBasedCollator 0284 */ 0285 friend class RuleBasedCollator; 0286 friend class CollationKeyByteSink; 0287 0288 // Class fields. sizeof(CollationKey) is intended to be 48 bytes 0289 // on a machine with 64-bit pointers. 0290 // We use a union to maximize the size of the internal buffer, 0291 // similar to UnicodeString but not as tight and complex. 0292 0293 // (implicit) *vtable; 0294 /** 0295 * Sort key length and flag. 0296 * Bit 31 is set if the buffer is heap-allocated. 0297 * Bits 30..0 contain the sort key length. 0298 */ 0299 int32_t fFlagAndLength; 0300 /** 0301 * Unique hash value of this CollationKey. 0302 * Special value 2 if the key is bogus. 0303 */ 0304 mutable int32_t fHashCode; 0305 /** 0306 * fUnion provides 32 bytes for the internal buffer or for 0307 * pointer+capacity. 0308 */ 0309 union StackBufferOrFields { 0310 /** fStackBuffer is used iff fFlagAndLength>=0, else fFields is used */ 0311 uint8_t fStackBuffer[32]; 0312 struct { 0313 uint8_t *fBytes; 0314 int32_t fCapacity; 0315 } fFields; 0316 } fUnion; 0317 }; 0318 0319 inline bool 0320 CollationKey::operator!=(const CollationKey& other) const 0321 { 0322 return !(*this == other); 0323 } 0324 0325 inline UBool 0326 CollationKey::isBogus() const 0327 { 0328 return fHashCode == 2; // kBogusHashCode 0329 } 0330 0331 inline const uint8_t* 0332 CollationKey::getByteArray(int32_t &count) const 0333 { 0334 count = getLength(); 0335 return getBytes(); 0336 } 0337 0338 U_NAMESPACE_END 0339 0340 #endif /* #if !UCONFIG_NO_COLLATION */ 0341 0342 #endif /* U_SHOW_CPLUSPLUS_API */ 0343 0344 #endif
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |