Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 10:13:08

0001 // © 2016 and later: Unicode, Inc. and others.
0002 // License & terms of use: http://www.unicode.org/copyright.html
0003 /*
0004  *****************************************************************************
0005  * Copyright (C) 1996-2014, International Business Machines Corporation and others.
0006  * All Rights Reserved.
0007  *****************************************************************************
0008  *
0009  * File sortkey.h
0010  *
0011  * Created by: Helena Shih
0012  *
0013  * Modification History:
0014  *
0015  *  Date         Name          Description
0016  *
0017  *  6/20/97     helena      Java class name change.
0018  *  8/18/97     helena      Added internal API documentation.
0019  *  6/26/98     erm         Changed to use byte arrays and memcmp.
0020  *****************************************************************************
0021  */
0022 
0023 #ifndef SORTKEY_H
0024 #define SORTKEY_H
0025 
0026 #include "unicode/utypes.h"
0027 
0028 #if U_SHOW_CPLUSPLUS_API
0029 
0030 /**
0031  * \file 
0032  * \brief C++ API: Keys for comparing strings multiple times. 
0033  */
0034  
0035 #if !UCONFIG_NO_COLLATION
0036 
0037 #include "unicode/uobject.h"
0038 #include "unicode/unistr.h"
0039 #include "unicode/coll.h"
0040 
0041 U_NAMESPACE_BEGIN
0042 
0043 /* forward declaration */
0044 class RuleBasedCollator;
0045 class CollationKeyByteSink;
0046 
0047 /**
0048  *
0049  * Collation keys are generated by the Collator class.  Use the CollationKey objects
0050  * instead of Collator to compare strings multiple times.  A CollationKey
0051  * preprocesses the comparison information from the Collator object to
0052  * make the comparison faster.  If you are not going to comparing strings
0053  * multiple times, then using the Collator object is generally faster,
0054  * since it only processes as much of the string as needed to make a
0055  * comparison.
0056  * <p> For example (with strength == tertiary)
0057  * <p>When comparing "Abernathy" to "Baggins-Smythworthy", Collator
0058  * only needs to process a couple of characters, while a comparison
0059  * with CollationKeys will process all of the characters.  On the other hand,
0060  * if you are doing a sort of a number of fields, it is much faster to use
0061  * CollationKeys, since you will be comparing strings multiple times.
0062  * <p>Typical use of CollationKeys are in databases, where you store a CollationKey
0063  * in a hidden field, and use it for sorting or indexing.
0064  *
0065  * <p>Example of use:
0066  * <pre>
0067  * \code
0068  *     UErrorCode success = U_ZERO_ERROR;
0069  *     Collator* myCollator = Collator::createInstance(success);
0070  *     CollationKey* keys = new CollationKey [3];
0071  *     myCollator->getCollationKey("Tom", keys[0], success );
0072  *     myCollator->getCollationKey("Dick", keys[1], success );
0073  *     myCollator->getCollationKey("Harry", keys[2], success );
0074  *
0075  *     // Inside body of sort routine, compare keys this way:
0076  *     CollationKey tmp;
0077  *     if(keys[0].compareTo( keys[1] ) > 0 ) {
0078  *         tmp = keys[0]; keys[0] = keys[1]; keys[1] = tmp;
0079  *     }
0080  *     //...
0081  * \endcode
0082  * </pre>
0083  * <p>Because Collator::compare()'s algorithm is complex, it is faster to sort
0084  * long lists of words by retrieving collation keys with Collator::getCollationKey().
0085  * You can then cache the collation keys and compare them using CollationKey::compareTo().
0086  * <p>
0087  * <strong>Note:</strong> <code>Collator</code>s with different Locale,
0088  * CollationStrength and DecompositionMode settings will return different
0089  * CollationKeys for the same set of strings. Locales have specific
0090  * collation rules, and the way in which secondary and tertiary differences
0091  * are taken into account, for example, will result in different CollationKeys
0092  * for same strings.
0093  * <p>
0094 
0095  * @see          Collator
0096  * @see          RuleBasedCollator
0097  * @version      1.3 12/18/96
0098  * @author       Helena Shih
0099  * @stable ICU 2.0
0100  */
0101 class U_I18N_API CollationKey : public UObject {
0102 public:
0103     /**
0104     * This creates an empty collation key based on the null string.  An empty
0105     * collation key contains no sorting information.  When comparing two empty
0106     * collation keys, the result is Collator::EQUAL.  Comparing empty collation key
0107     * with non-empty collation key is always Collator::LESS.
0108     * @stable ICU 2.0
0109     */
0110     CollationKey();
0111 
0112 
0113     /**
0114     * Creates a collation key based on the collation key values.
0115     * @param values the collation key values
0116     * @param count number of collation key values, including trailing nulls.
0117     * @stable ICU 2.0
0118     */
0119     CollationKey(const  uint8_t*    values,
0120                 int32_t     count);
0121 
0122     /**
0123     * Copy constructor.
0124     * @param other    the object to be copied.
0125     * @stable ICU 2.0
0126     */
0127     CollationKey(const CollationKey& other);
0128 
0129     /**
0130     * Sort key destructor.
0131     * @stable ICU 2.0
0132     */
0133     virtual ~CollationKey();
0134 
0135     /**
0136     * Assignment operator
0137     * @param other    the object to be copied.
0138     * @stable ICU 2.0
0139     */
0140     const   CollationKey&   operator=(const CollationKey& other);
0141 
0142     /**
0143     * Compare if two collation keys are the same.
0144     * @param source the collation key to compare to.
0145     * @return Returns true if two collation keys are equal, false otherwise.
0146     * @stable ICU 2.0
0147     */
0148     bool                    operator==(const CollationKey& source) const;
0149 
0150     /**
0151     * Compare if two collation keys are not the same.
0152     * @param source the collation key to compare to.
0153     * @return Returns true if two collation keys are different, false otherwise.
0154     * @stable ICU 2.0
0155     */
0156     bool                    operator!=(const CollationKey& source) const;
0157 
0158 
0159     /**
0160     * Test to see if the key is in an invalid state. The key will be in an
0161     * invalid state if it couldn't allocate memory for some operation.
0162     * @return Returns true if the key is in an invalid, false otherwise.
0163     * @stable ICU 2.0
0164     */
0165     UBool                   isBogus(void) const;
0166 
0167     /**
0168     * Returns a pointer to the collation key values. The storage is owned
0169     * by the collation key and the pointer will become invalid if the key
0170     * is deleted.
0171     * @param count the output parameter of number of collation key values,
0172     * including any trailing nulls.
0173     * @return a pointer to the collation key values.
0174     * @stable ICU 2.0
0175     */
0176     const    uint8_t*       getByteArray(int32_t& count) const;
0177 
0178 #ifdef U_USE_COLLATION_KEY_DEPRECATES
0179     /**
0180     * Extracts the collation key values into a new array. The caller owns
0181     * this storage and should free it.
0182     * @param count the output parameter of number of collation key values,
0183     * including any trailing nulls.
0184     * @obsolete ICU 2.6. Use getByteArray instead since this API will be removed in that release.
0185     */
0186     uint8_t*                toByteArray(int32_t& count) const;
0187 #endif
0188 
0189 #ifndef U_HIDE_DEPRECATED_API 
0190     /**
0191     * Convenience method which does a string(bit-wise) comparison of the
0192     * two collation keys.
0193     * @param target target collation key to be compared with
0194     * @return Returns Collator::LESS if sourceKey &lt; targetKey,
0195     * Collator::GREATER if sourceKey > targetKey and Collator::EQUAL
0196     * otherwise.
0197     * @deprecated ICU 2.6 use the overload with error code
0198     */
0199     Collator::EComparisonResult compareTo(const CollationKey& target) const;
0200 #endif  /* U_HIDE_DEPRECATED_API */
0201 
0202     /**
0203     * Convenience method which does a string(bit-wise) comparison of the
0204     * two collation keys.
0205     * @param target target collation key to be compared with
0206     * @param status error code
0207     * @return Returns UCOL_LESS if sourceKey &lt; targetKey,
0208     * UCOL_GREATER if sourceKey > targetKey and UCOL_EQUAL
0209     * otherwise.
0210     * @stable ICU 2.6
0211     */
0212     UCollationResult compareTo(const CollationKey& target, UErrorCode &status) const;
0213 
0214     /**
0215     * Creates an integer that is unique to the collation key.  NOTE: this
0216     * is not the same as String.hashCode.
0217     * <p>Example of use:
0218     * <pre>
0219     * .    UErrorCode status = U_ZERO_ERROR;
0220     * .    Collator *myCollation = Collator::createInstance(Locale::US, status);
0221     * .    if (U_FAILURE(status)) return;
0222     * .    CollationKey key1, key2;
0223     * .    UErrorCode status1 = U_ZERO_ERROR, status2 = U_ZERO_ERROR;
0224     * .    myCollation->getCollationKey("abc", key1, status1);
0225     * .    if (U_FAILURE(status1)) { delete myCollation; return; }
0226     * .    myCollation->getCollationKey("ABC", key2, status2);
0227     * .    if (U_FAILURE(status2)) { delete myCollation; return; }
0228     * .    // key1.hashCode() != key2.hashCode()
0229     * </pre>
0230     * @return the hash value based on the string's collation order.
0231     * @see UnicodeString#hashCode
0232     * @stable ICU 2.0
0233     */
0234     int32_t                 hashCode(void) const;
0235 
0236     /**
0237      * ICU "poor man's RTTI", returns a UClassID for the actual class.
0238      * @stable ICU 2.2
0239      */
0240     virtual UClassID getDynamicClassID() const override;
0241 
0242     /**
0243      * ICU "poor man's RTTI", returns a UClassID for this class.
0244      * @stable ICU 2.2
0245      */
0246     static UClassID U_EXPORT2 getStaticClassID();
0247 
0248 private:
0249     /**
0250      * Replaces the current bytes buffer with a new one of newCapacity
0251      * and copies length bytes from the old buffer to the new one.
0252      * @return the new buffer, or nullptr if the allocation failed
0253      */
0254     uint8_t *reallocate(int32_t newCapacity, int32_t length);
0255     /**
0256      * Set a new length for a new sort key in the existing fBytes.
0257      */
0258     void setLength(int32_t newLength);
0259 
0260     uint8_t *getBytes() {
0261         return (fFlagAndLength >= 0) ? fUnion.fStackBuffer : fUnion.fFields.fBytes;
0262     }
0263     const uint8_t *getBytes() const {
0264         return (fFlagAndLength >= 0) ? fUnion.fStackBuffer : fUnion.fFields.fBytes;
0265     }
0266     int32_t getCapacity() const {
0267         return (fFlagAndLength >= 0) ? (int32_t)sizeof(fUnion) : fUnion.fFields.fCapacity;
0268     }
0269     int32_t getLength() const { return fFlagAndLength & 0x7fffffff; }
0270 
0271     /**
0272     * Set the CollationKey to a "bogus" or invalid state
0273     * @return this CollationKey
0274     */
0275     CollationKey&           setToBogus(void);
0276     /**
0277     * Resets this CollationKey to an empty state
0278     * @return this CollationKey
0279     */
0280     CollationKey&           reset(void);
0281 
0282     /**
0283     * Allow private access to RuleBasedCollator
0284     */
0285     friend  class           RuleBasedCollator;
0286     friend  class           CollationKeyByteSink;
0287 
0288     // Class fields. sizeof(CollationKey) is intended to be 48 bytes
0289     // on a machine with 64-bit pointers.
0290     // We use a union to maximize the size of the internal buffer,
0291     // similar to UnicodeString but not as tight and complex.
0292 
0293     // (implicit) *vtable;
0294     /**
0295      * Sort key length and flag.
0296      * Bit 31 is set if the buffer is heap-allocated.
0297      * Bits 30..0 contain the sort key length.
0298      */
0299     int32_t fFlagAndLength;
0300     /**
0301     * Unique hash value of this CollationKey.
0302     * Special value 2 if the key is bogus.
0303     */
0304     mutable int32_t fHashCode;
0305     /**
0306      * fUnion provides 32 bytes for the internal buffer or for
0307      * pointer+capacity.
0308      */
0309     union StackBufferOrFields {
0310         /** fStackBuffer is used iff fFlagAndLength>=0, else fFields is used */
0311         uint8_t fStackBuffer[32];
0312         struct {
0313             uint8_t *fBytes;
0314             int32_t fCapacity;
0315         } fFields;
0316     } fUnion;
0317 };
0318 
0319 inline bool
0320 CollationKey::operator!=(const CollationKey& other) const
0321 {
0322     return !(*this == other);
0323 }
0324 
0325 inline UBool
0326 CollationKey::isBogus() const
0327 {
0328     return fHashCode == 2;  // kBogusHashCode
0329 }
0330 
0331 inline const uint8_t*
0332 CollationKey::getByteArray(int32_t &count) const
0333 {
0334     count = getLength();
0335     return getBytes();
0336 }
0337 
0338 U_NAMESPACE_END
0339 
0340 #endif /* #if !UCONFIG_NO_COLLATION */
0341 
0342 #endif /* U_SHOW_CPLUSPLUS_API */
0343 
0344 #endif