|
||||
Warning, file /include/unicode/tblcoll.h was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).
0001 // © 2016 and later: Unicode, Inc. and others. 0002 // License & terms of use: http://www.unicode.org/copyright.html 0003 /* 0004 ****************************************************************************** 0005 * Copyright (C) 1996-2016, International Business Machines Corporation and 0006 * others. All Rights Reserved. 0007 ****************************************************************************** 0008 */ 0009 0010 /** 0011 * \file 0012 * \brief C++ API: The RuleBasedCollator class implements the Collator abstract base class. 0013 */ 0014 0015 /** 0016 * File tblcoll.h 0017 * 0018 * Created by: Helena Shih 0019 * 0020 * Modification History: 0021 * 0022 * Date Name Description 0023 * 2/5/97 aliu Added streamIn and streamOut methods. Added 0024 * constructor which reads RuleBasedCollator object from 0025 * a binary file. Added writeToFile method which streams 0026 * RuleBasedCollator out to a binary file. The streamIn 0027 * and streamOut methods use istream and ostream objects 0028 * in binary mode. 0029 * 2/12/97 aliu Modified to use TableCollationData sub-object to 0030 * hold invariant data. 0031 * 2/13/97 aliu Moved several methods into this class from Collation. 0032 * Added a private RuleBasedCollator(Locale&) constructor, 0033 * to be used by Collator::createDefault(). General 0034 * clean up. 0035 * 2/20/97 helena Added clone, operator==, operator!=, operator=, and copy 0036 * constructor and getDynamicClassID. 0037 * 3/5/97 aliu Modified constructFromFile() to add parameter 0038 * specifying whether or not binary loading is to be 0039 * attempted. This is required for dynamic rule loading. 0040 * 05/07/97 helena Added memory allocation error detection. 0041 * 6/17/97 helena Added IDENTICAL strength for compare, changed getRules to 0042 * use MergeCollation::getPattern. 0043 * 6/20/97 helena Java class name change. 0044 * 8/18/97 helena Added internal API documentation. 0045 * 09/03/97 helena Added createCollationKeyValues(). 0046 * 02/10/98 damiba Added compare with "length" parameter 0047 * 08/05/98 erm Synched with 1.2 version of RuleBasedCollator.java 0048 * 04/23/99 stephen Removed EDecompositionMode, merged with 0049 * Normalizer::EMode 0050 * 06/14/99 stephen Removed kResourceBundleSuffix 0051 * 11/02/99 helena Collator performance enhancements. Eliminates the 0052 * UnicodeString construction and special case for NO_OP. 0053 * 11/23/99 srl More performance enhancements. Updates to NormalizerIterator 0054 * internal state management. 0055 * 12/15/99 aliu Update to support Thai collation. Move NormalizerIterator 0056 * to implementation file. 0057 * 01/29/01 synwee Modified into a C++ wrapper which calls C API 0058 * (ucol.h) 0059 * 2012-2014 markus Rewritten in C++ again. 0060 */ 0061 0062 #ifndef TBLCOLL_H 0063 #define TBLCOLL_H 0064 0065 #include "unicode/utypes.h" 0066 0067 #if U_SHOW_CPLUSPLUS_API 0068 0069 #if !UCONFIG_NO_COLLATION 0070 0071 #include "unicode/coll.h" 0072 #include "unicode/locid.h" 0073 #include "unicode/uiter.h" 0074 #include "unicode/ucol.h" 0075 0076 U_NAMESPACE_BEGIN 0077 0078 struct CollationCacheEntry; 0079 struct CollationData; 0080 struct CollationSettings; 0081 struct CollationTailoring; 0082 /** 0083 * @stable ICU 2.0 0084 */ 0085 class StringSearch; 0086 /** 0087 * @stable ICU 2.0 0088 */ 0089 class CollationElementIterator; 0090 class CollationKey; 0091 class SortKeyByteSink; 0092 class UnicodeSet; 0093 class UnicodeString; 0094 class UVector64; 0095 0096 /** 0097 * The RuleBasedCollator class provides the implementation of 0098 * Collator, using data-driven tables. The user can create a customized 0099 * table-based collation. 0100 * <p> 0101 * For more information about the collation service see 0102 * <a href="https://unicode-org.github.io/icu/userguide/collation">the User Guide</a>. 0103 * <p> 0104 * Collation service provides correct sorting orders for most locales supported in ICU. 0105 * If specific data for a locale is not available, the orders eventually falls back 0106 * to the <a href="http://www.unicode.org/reports/tr35/tr35-collation.html#Root_Collation">CLDR root sort order</a>. 0107 * <p> 0108 * Sort ordering may be customized by providing your own set of rules. For more on 0109 * this subject see the <a href="https://unicode-org.github.io/icu/userguide/collation/customization"> 0110 * Collation Customization</a> section of the User Guide. 0111 * <p> 0112 * Note, RuleBasedCollator is not to be subclassed. 0113 * @see Collator 0114 */ 0115 class U_I18N_API RuleBasedCollator final : public Collator { 0116 public: 0117 /** 0118 * RuleBasedCollator constructor. This takes the table rules and builds a 0119 * collation table out of them. Please see RuleBasedCollator class 0120 * description for more details on the collation rule syntax. 0121 * @param rules the collation rules to build the collation table from. 0122 * @param status reporting a success or an error. 0123 * @stable ICU 2.0 0124 */ 0125 RuleBasedCollator(const UnicodeString& rules, UErrorCode& status); 0126 0127 /** 0128 * RuleBasedCollator constructor. This takes the table rules and builds a 0129 * collation table out of them. Please see RuleBasedCollator class 0130 * description for more details on the collation rule syntax. 0131 * @param rules the collation rules to build the collation table from. 0132 * @param collationStrength strength for comparison 0133 * @param status reporting a success or an error. 0134 * @stable ICU 2.0 0135 */ 0136 RuleBasedCollator(const UnicodeString& rules, 0137 ECollationStrength collationStrength, 0138 UErrorCode& status); 0139 0140 /** 0141 * RuleBasedCollator constructor. This takes the table rules and builds a 0142 * collation table out of them. Please see RuleBasedCollator class 0143 * description for more details on the collation rule syntax. 0144 * @param rules the collation rules to build the collation table from. 0145 * @param decompositionMode the normalisation mode 0146 * @param status reporting a success or an error. 0147 * @stable ICU 2.0 0148 */ 0149 RuleBasedCollator(const UnicodeString& rules, 0150 UColAttributeValue decompositionMode, 0151 UErrorCode& status); 0152 0153 /** 0154 * RuleBasedCollator constructor. This takes the table rules and builds a 0155 * collation table out of them. Please see RuleBasedCollator class 0156 * description for more details on the collation rule syntax. 0157 * @param rules the collation rules to build the collation table from. 0158 * @param collationStrength strength for comparison 0159 * @param decompositionMode the normalisation mode 0160 * @param status reporting a success or an error. 0161 * @stable ICU 2.0 0162 */ 0163 RuleBasedCollator(const UnicodeString& rules, 0164 ECollationStrength collationStrength, 0165 UColAttributeValue decompositionMode, 0166 UErrorCode& status); 0167 0168 #ifndef U_HIDE_INTERNAL_API 0169 /** 0170 * TODO: document & propose as public API 0171 * @internal 0172 */ 0173 RuleBasedCollator(const UnicodeString &rules, 0174 UParseError &parseError, UnicodeString &reason, 0175 UErrorCode &errorCode); 0176 #endif /* U_HIDE_INTERNAL_API */ 0177 0178 /** 0179 * Copy constructor. 0180 * @param other the RuleBasedCollator object to be copied 0181 * @stable ICU 2.0 0182 */ 0183 RuleBasedCollator(const RuleBasedCollator& other); 0184 0185 0186 /** Opens a collator from a collator binary image created using 0187 * cloneBinary. Binary image used in instantiation of the 0188 * collator remains owned by the user and should stay around for 0189 * the lifetime of the collator. The API also takes a base collator 0190 * which must be the root collator. 0191 * @param bin binary image owned by the user and required through the 0192 * lifetime of the collator 0193 * @param length size of the image. If negative, the API will try to 0194 * figure out the length of the image 0195 * @param base Base collator, for lookup of untailored characters. 0196 * Must be the root collator, must not be nullptr. 0197 * The base is required to be present through the lifetime of the collator. 0198 * @param status for catching errors 0199 * @return newly created collator 0200 * @see cloneBinary 0201 * @stable ICU 3.4 0202 */ 0203 RuleBasedCollator(const uint8_t *bin, int32_t length, 0204 const RuleBasedCollator *base, 0205 UErrorCode &status); 0206 0207 /** 0208 * Destructor. 0209 * @stable ICU 2.0 0210 */ 0211 virtual ~RuleBasedCollator(); 0212 0213 /** 0214 * Assignment operator. 0215 * @param other other RuleBasedCollator object to copy from. 0216 * @stable ICU 2.0 0217 */ 0218 RuleBasedCollator& operator=(const RuleBasedCollator& other); 0219 0220 /** 0221 * Returns true if argument is the same as this object. 0222 * @param other Collator object to be compared. 0223 * @return true if arguments is the same as this object. 0224 * @stable ICU 2.0 0225 */ 0226 virtual bool operator==(const Collator& other) const override; 0227 0228 /** 0229 * Makes a copy of this object. 0230 * @return a copy of this object, owned by the caller 0231 * @stable ICU 2.0 0232 */ 0233 virtual RuleBasedCollator* clone() const override; 0234 0235 /** 0236 * Creates a collation element iterator for the source string. The caller of 0237 * this method is responsible for the memory management of the return 0238 * pointer. 0239 * @param source the string over which the CollationElementIterator will 0240 * iterate. 0241 * @return the collation element iterator of the source string using this as 0242 * the based Collator. 0243 * @stable ICU 2.2 0244 */ 0245 virtual CollationElementIterator* createCollationElementIterator( 0246 const UnicodeString& source) const; 0247 0248 /** 0249 * Creates a collation element iterator for the source. The caller of this 0250 * method is responsible for the memory management of the returned pointer. 0251 * @param source the CharacterIterator which produces the characters over 0252 * which the CollationElementItgerator will iterate. 0253 * @return the collation element iterator of the source using this as the 0254 * based Collator. 0255 * @stable ICU 2.2 0256 */ 0257 virtual CollationElementIterator* createCollationElementIterator( 0258 const CharacterIterator& source) const; 0259 0260 // Make deprecated versions of Collator::compare() visible. 0261 using Collator::compare; 0262 0263 /** 0264 * The comparison function compares the character data stored in two 0265 * different strings. Returns information about whether a string is less 0266 * than, greater than or equal to another string. 0267 * @param source the source string to be compared with. 0268 * @param target the string that is to be compared with the source string. 0269 * @param status possible error code 0270 * @return Returns an enum value. UCOL_GREATER if source is greater 0271 * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less 0272 * than target 0273 * @stable ICU 2.6 0274 **/ 0275 virtual UCollationResult compare(const UnicodeString& source, 0276 const UnicodeString& target, 0277 UErrorCode &status) const override; 0278 0279 /** 0280 * Does the same thing as compare but limits the comparison to a specified 0281 * length 0282 * @param source the source string to be compared with. 0283 * @param target the string that is to be compared with the source string. 0284 * @param length the length the comparison is limited to 0285 * @param status possible error code 0286 * @return Returns an enum value. UCOL_GREATER if source (up to the specified 0287 * length) is greater than target; UCOL_EQUAL if source (up to specified 0288 * length) is equal to target; UCOL_LESS if source (up to the specified 0289 * length) is less than target. 0290 * @stable ICU 2.6 0291 */ 0292 virtual UCollationResult compare(const UnicodeString& source, 0293 const UnicodeString& target, 0294 int32_t length, 0295 UErrorCode &status) const override; 0296 0297 /** 0298 * The comparison function compares the character data stored in two 0299 * different string arrays. Returns information about whether a string array 0300 * is less than, greater than or equal to another string array. 0301 * @param source the source string array to be compared with. 0302 * @param sourceLength the length of the source string array. If this value 0303 * is equal to -1, the string array is null-terminated. 0304 * @param target the string that is to be compared with the source string. 0305 * @param targetLength the length of the target string array. If this value 0306 * is equal to -1, the string array is null-terminated. 0307 * @param status possible error code 0308 * @return Returns an enum value. UCOL_GREATER if source is greater 0309 * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less 0310 * than target 0311 * @stable ICU 2.6 0312 */ 0313 virtual UCollationResult compare(const char16_t* source, int32_t sourceLength, 0314 const char16_t* target, int32_t targetLength, 0315 UErrorCode &status) const override; 0316 0317 /** 0318 * Compares two strings using the Collator. 0319 * Returns whether the first one compares less than/equal to/greater than 0320 * the second one. 0321 * This version takes UCharIterator input. 0322 * @param sIter the first ("source") string iterator 0323 * @param tIter the second ("target") string iterator 0324 * @param status ICU status 0325 * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER 0326 * @stable ICU 4.2 0327 */ 0328 virtual UCollationResult compare(UCharIterator &sIter, 0329 UCharIterator &tIter, 0330 UErrorCode &status) const override; 0331 0332 /** 0333 * Compares two UTF-8 strings using the Collator. 0334 * Returns whether the first one compares less than/equal to/greater than 0335 * the second one. 0336 * This version takes UTF-8 input. 0337 * Note that a StringPiece can be implicitly constructed 0338 * from a std::string or a NUL-terminated const char * string. 0339 * @param source the first UTF-8 string 0340 * @param target the second UTF-8 string 0341 * @param status ICU status 0342 * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER 0343 * @stable ICU 51 0344 */ 0345 virtual UCollationResult compareUTF8(const StringPiece &source, 0346 const StringPiece &target, 0347 UErrorCode &status) const override; 0348 0349 /** 0350 * Transforms the string into a series of characters 0351 * that can be compared with CollationKey.compare(). 0352 * 0353 * Note that sort keys are often less efficient than simply doing comparison. 0354 * For more details, see the ICU User Guide. 0355 * 0356 * @param source the source string. 0357 * @param key the transformed key of the source string. 0358 * @param status the error code status. 0359 * @return the transformed key. 0360 * @see CollationKey 0361 * @stable ICU 2.0 0362 */ 0363 virtual CollationKey& getCollationKey(const UnicodeString& source, 0364 CollationKey& key, 0365 UErrorCode& status) const override; 0366 0367 /** 0368 * Transforms a specified region of the string into a series of characters 0369 * that can be compared with CollationKey.compare. 0370 * 0371 * Note that sort keys are often less efficient than simply doing comparison. 0372 * For more details, see the ICU User Guide. 0373 * 0374 * @param source the source string. 0375 * @param sourceLength the length of the source string. 0376 * @param key the transformed key of the source string. 0377 * @param status the error code status. 0378 * @return the transformed key. 0379 * @see CollationKey 0380 * @stable ICU 2.0 0381 */ 0382 virtual CollationKey& getCollationKey(const char16_t *source, 0383 int32_t sourceLength, 0384 CollationKey& key, 0385 UErrorCode& status) const override; 0386 0387 /** 0388 * Generates the hash code for the rule-based collation object. 0389 * @return the hash code. 0390 * @stable ICU 2.0 0391 */ 0392 virtual int32_t hashCode() const override; 0393 0394 #ifndef U_FORCE_HIDE_DEPRECATED_API 0395 /** 0396 * Gets the locale of the Collator 0397 * @param type can be either requested, valid or actual locale. For more 0398 * information see the definition of ULocDataLocaleType in 0399 * uloc.h 0400 * @param status the error code status. 0401 * @return locale where the collation data lives. If the collator 0402 * was instantiated from rules, locale is empty. 0403 * @deprecated ICU 2.8 likely to change in ICU 3.0, based on feedback 0404 */ 0405 virtual Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const override; 0406 #endif // U_FORCE_HIDE_DEPRECATED_API 0407 0408 /** 0409 * Gets the tailoring rules for this collator. 0410 * @return the collation tailoring from which this collator was created 0411 * @stable ICU 2.0 0412 */ 0413 const UnicodeString& getRules() const; 0414 0415 /** 0416 * Gets the version information for a Collator. 0417 * @param info the version # information, the result will be filled in 0418 * @stable ICU 2.0 0419 */ 0420 virtual void getVersion(UVersionInfo info) const override; 0421 0422 #ifndef U_HIDE_DEPRECATED_API 0423 /** 0424 * Returns the maximum length of any expansion sequences that end with the 0425 * specified comparison order. 0426 * 0427 * This is specific to the kind of collation element values and sequences 0428 * returned by the CollationElementIterator. 0429 * Call CollationElementIterator::getMaxExpansion() instead. 0430 * 0431 * @param order a collation order returned by CollationElementIterator::previous 0432 * or CollationElementIterator::next. 0433 * @return maximum size of the expansion sequences ending with the collation 0434 * element, or 1 if the collation element does not occur at the end of 0435 * any expansion sequence 0436 * @see CollationElementIterator#getMaxExpansion 0437 * @deprecated ICU 51 Use CollationElementIterator::getMaxExpansion() instead. 0438 */ 0439 int32_t getMaxExpansion(int32_t order) const; 0440 #endif /* U_HIDE_DEPRECATED_API */ 0441 0442 /** 0443 * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This 0444 * method is to implement a simple version of RTTI, since not all C++ 0445 * compilers support genuine RTTI. Polymorphic operator==() and clone() 0446 * methods call this method. 0447 * @return The class ID for this object. All objects of a given class have 0448 * the same class ID. Objects of other classes have different class 0449 * IDs. 0450 * @stable ICU 2.0 0451 */ 0452 virtual UClassID getDynamicClassID(void) const override; 0453 0454 /** 0455 * Returns the class ID for this class. This is useful only for comparing to 0456 * a return value from getDynamicClassID(). For example: 0457 * <pre> 0458 * Base* polymorphic_pointer = createPolymorphicObject(); 0459 * if (polymorphic_pointer->getDynamicClassID() == 0460 * Derived::getStaticClassID()) ... 0461 * </pre> 0462 * @return The class ID for all objects of this class. 0463 * @stable ICU 2.0 0464 */ 0465 static UClassID U_EXPORT2 getStaticClassID(void); 0466 0467 #ifndef U_HIDE_DEPRECATED_API 0468 /** 0469 * Do not use this method: The caller and the ICU library might use different heaps. 0470 * Use cloneBinary() instead which writes to caller-provided memory. 0471 * 0472 * Returns a binary format of this collator. 0473 * @param length Returns the length of the data, in bytes 0474 * @param status the error code status. 0475 * @return memory, owned by the caller, of size 'length' bytes. 0476 * @deprecated ICU 52. Use cloneBinary() instead. 0477 */ 0478 uint8_t *cloneRuleData(int32_t &length, UErrorCode &status) const; 0479 #endif /* U_HIDE_DEPRECATED_API */ 0480 0481 /** Creates a binary image of a collator. This binary image can be stored and 0482 * later used to instantiate a collator using ucol_openBinary. 0483 * This API supports preflighting. 0484 * @param buffer a fill-in buffer to receive the binary image 0485 * @param capacity capacity of the destination buffer 0486 * @param status for catching errors 0487 * @return size of the image 0488 * @see ucol_openBinary 0489 * @stable ICU 3.4 0490 */ 0491 int32_t cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status) const; 0492 0493 /** 0494 * Returns current rules. Delta defines whether full rules are returned or 0495 * just the tailoring. 0496 * 0497 * getRules(void) should normally be used instead. 0498 * See https://unicode-org.github.io/icu/userguide/collation/customization#building-on-existing-locales 0499 * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES. 0500 * @param buffer UnicodeString to store the result rules 0501 * @stable ICU 2.2 0502 * @see UCOL_FULL_RULES 0503 */ 0504 void getRules(UColRuleOption delta, UnicodeString &buffer) const; 0505 0506 /** 0507 * Universal attribute setter 0508 * @param attr attribute type 0509 * @param value attribute value 0510 * @param status to indicate whether the operation went on smoothly or there were errors 0511 * @stable ICU 2.2 0512 */ 0513 virtual void setAttribute(UColAttribute attr, UColAttributeValue value, 0514 UErrorCode &status) override; 0515 0516 /** 0517 * Universal attribute getter. 0518 * @param attr attribute type 0519 * @param status to indicate whether the operation went on smoothly or there were errors 0520 * @return attribute value 0521 * @stable ICU 2.2 0522 */ 0523 virtual UColAttributeValue getAttribute(UColAttribute attr, 0524 UErrorCode &status) const override; 0525 0526 /** 0527 * Sets the variable top to the top of the specified reordering group. 0528 * The variable top determines the highest-sorting character 0529 * which is affected by UCOL_ALTERNATE_HANDLING. 0530 * If that attribute is set to UCOL_NON_IGNORABLE, then the variable top has no effect. 0531 * @param group one of UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION, 0532 * UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_CURRENCY; 0533 * or UCOL_REORDER_CODE_DEFAULT to restore the default max variable group 0534 * @param errorCode Standard ICU error code. Its input value must 0535 * pass the U_SUCCESS() test, or else the function returns 0536 * immediately. Check for U_FAILURE() on output or use with 0537 * function chaining. (See User Guide for details.) 0538 * @return *this 0539 * @see getMaxVariable 0540 * @stable ICU 53 0541 */ 0542 virtual Collator &setMaxVariable(UColReorderCode group, UErrorCode &errorCode) override; 0543 0544 /** 0545 * Returns the maximum reordering group whose characters are affected by UCOL_ALTERNATE_HANDLING. 0546 * @return the maximum variable reordering group. 0547 * @see setMaxVariable 0548 * @stable ICU 53 0549 */ 0550 virtual UColReorderCode getMaxVariable() const override; 0551 0552 #ifndef U_FORCE_HIDE_DEPRECATED_API 0553 /** 0554 * Sets the variable top to the primary weight of the specified string. 0555 * 0556 * Beginning with ICU 53, the variable top is pinned to 0557 * the top of one of the supported reordering groups, 0558 * and it must not be beyond the last of those groups. 0559 * See setMaxVariable(). 0560 * @param varTop one or more (if contraction) char16_ts to which the variable top should be set 0561 * @param len length of variable top string. If -1 it is considered to be zero terminated. 0562 * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br> 0563 * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction<br> 0564 * U_ILLEGAL_ARGUMENT_ERROR if the variable top is beyond 0565 * the last reordering group supported by setMaxVariable() 0566 * @return variable top primary weight 0567 * @deprecated ICU 53 Call setMaxVariable() instead. 0568 */ 0569 virtual uint32_t setVariableTop(const char16_t *varTop, int32_t len, UErrorCode &status) override; 0570 0571 /** 0572 * Sets the variable top to the primary weight of the specified string. 0573 * 0574 * Beginning with ICU 53, the variable top is pinned to 0575 * the top of one of the supported reordering groups, 0576 * and it must not be beyond the last of those groups. 0577 * See setMaxVariable(). 0578 * @param varTop a UnicodeString size 1 or more (if contraction) of char16_ts to which the variable top should be set 0579 * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br> 0580 * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction<br> 0581 * U_ILLEGAL_ARGUMENT_ERROR if the variable top is beyond 0582 * the last reordering group supported by setMaxVariable() 0583 * @return variable top primary weight 0584 * @deprecated ICU 53 Call setMaxVariable() instead. 0585 */ 0586 virtual uint32_t setVariableTop(const UnicodeString &varTop, UErrorCode &status) override; 0587 0588 /** 0589 * Sets the variable top to the specified primary weight. 0590 * 0591 * Beginning with ICU 53, the variable top is pinned to 0592 * the top of one of the supported reordering groups, 0593 * and it must not be beyond the last of those groups. 0594 * See setMaxVariable(). 0595 * @param varTop primary weight, as returned by setVariableTop or ucol_getVariableTop 0596 * @param status error code 0597 * @deprecated ICU 53 Call setMaxVariable() instead. 0598 */ 0599 virtual void setVariableTop(uint32_t varTop, UErrorCode &status) override; 0600 #endif // U_FORCE_HIDE_DEPRECATED_API 0601 0602 /** 0603 * Gets the variable top value of a Collator. 0604 * @param status error code (not changed by function). If error code is set, the return value is undefined. 0605 * @return the variable top primary weight 0606 * @see getMaxVariable 0607 * @stable ICU 2.0 0608 */ 0609 virtual uint32_t getVariableTop(UErrorCode &status) const override; 0610 0611 /** 0612 * Get a UnicodeSet that contains all the characters and sequences tailored in 0613 * this collator. 0614 * @param status error code of the operation 0615 * @return a pointer to a UnicodeSet object containing all the 0616 * code points and sequences that may sort differently than 0617 * in the root collator. The object must be disposed of by using delete 0618 * @stable ICU 2.4 0619 */ 0620 virtual UnicodeSet *getTailoredSet(UErrorCode &status) const override; 0621 0622 /** 0623 * Get the sort key as an array of bytes from a UnicodeString. 0624 * 0625 * Note that sort keys are often less efficient than simply doing comparison. 0626 * For more details, see the ICU User Guide. 0627 * 0628 * @param source string to be processed. 0629 * @param result buffer to store result in. If nullptr, number of bytes needed 0630 * will be returned. 0631 * @param resultLength length of the result buffer. If if not enough the 0632 * buffer will be filled to capacity. 0633 * @return Number of bytes needed for storing the sort key 0634 * @stable ICU 2.0 0635 */ 0636 virtual int32_t getSortKey(const UnicodeString& source, uint8_t *result, 0637 int32_t resultLength) const override; 0638 0639 /** 0640 * Get the sort key as an array of bytes from a char16_t buffer. 0641 * 0642 * Note that sort keys are often less efficient than simply doing comparison. 0643 * For more details, see the ICU User Guide. 0644 * 0645 * @param source string to be processed. 0646 * @param sourceLength length of string to be processed. If -1, the string 0647 * is 0 terminated and length will be decided by the function. 0648 * @param result buffer to store result in. If nullptr, number of bytes needed 0649 * will be returned. 0650 * @param resultLength length of the result buffer. If if not enough the 0651 * buffer will be filled to capacity. 0652 * @return Number of bytes needed for storing the sort key 0653 * @stable ICU 2.2 0654 */ 0655 virtual int32_t getSortKey(const char16_t *source, int32_t sourceLength, 0656 uint8_t *result, int32_t resultLength) const override; 0657 0658 /** 0659 * Retrieves the reordering codes for this collator. 0660 * @param dest The array to fill with the script ordering. 0661 * @param destCapacity The length of dest. If it is 0, then dest may be nullptr and the function 0662 * will only return the length of the result without writing any codes (pre-flighting). 0663 * @param status A reference to an error code value, which must not indicate 0664 * a failure before the function call. 0665 * @return The length of the script ordering array. 0666 * @see ucol_setReorderCodes 0667 * @see Collator#getEquivalentReorderCodes 0668 * @see Collator#setReorderCodes 0669 * @stable ICU 4.8 0670 */ 0671 virtual int32_t getReorderCodes(int32_t *dest, 0672 int32_t destCapacity, 0673 UErrorCode& status) const override; 0674 0675 /** 0676 * Sets the ordering of scripts for this collator. 0677 * @param reorderCodes An array of script codes in the new order. This can be nullptr if the 0678 * length is also set to 0. An empty array will clear any reordering codes on the collator. 0679 * @param reorderCodesLength The length of reorderCodes. 0680 * @param status error code 0681 * @see ucol_setReorderCodes 0682 * @see Collator#getReorderCodes 0683 * @see Collator#getEquivalentReorderCodes 0684 * @stable ICU 4.8 0685 */ 0686 virtual void setReorderCodes(const int32_t* reorderCodes, 0687 int32_t reorderCodesLength, 0688 UErrorCode& status) override; 0689 0690 /** 0691 * Implements ucol_strcollUTF8(). 0692 * @internal 0693 */ 0694 virtual UCollationResult internalCompareUTF8( 0695 const char *left, int32_t leftLength, 0696 const char *right, int32_t rightLength, 0697 UErrorCode &errorCode) const override; 0698 0699 /** Get the short definition string for a collator. This internal API harvests the collator's 0700 * locale and the attribute set and produces a string that can be used for opening 0701 * a collator with the same attributes using the ucol_openFromShortString API. 0702 * This string will be normalized. 0703 * The structure and the syntax of the string is defined in the "Naming collators" 0704 * section of the users guide: 0705 * https://unicode-org.github.io/icu/userguide/collation/concepts#collator-naming-scheme 0706 * This function supports preflighting. 0707 * 0708 * This is internal, and intended to be used with delegate converters. 0709 * 0710 * @param locale a locale that will appear as a collators locale in the resulting 0711 * short string definition. If nullptr, the locale will be harvested 0712 * from the collator. 0713 * @param buffer space to hold the resulting string 0714 * @param capacity capacity of the buffer 0715 * @param status for returning errors. All the preflighting errors are featured 0716 * @return length of the resulting string 0717 * @see ucol_openFromShortString 0718 * @see ucol_normalizeShortDefinitionString 0719 * @see ucol_getShortDefinitionString 0720 * @internal 0721 */ 0722 virtual int32_t internalGetShortDefinitionString(const char *locale, 0723 char *buffer, 0724 int32_t capacity, 0725 UErrorCode &status) const override; 0726 0727 /** 0728 * Implements ucol_nextSortKeyPart(). 0729 * @internal 0730 */ 0731 virtual int32_t internalNextSortKeyPart( 0732 UCharIterator *iter, uint32_t state[2], 0733 uint8_t *dest, int32_t count, UErrorCode &errorCode) const override; 0734 0735 // Do not enclose the default constructor with #ifndef U_HIDE_INTERNAL_API 0736 /** 0737 * Only for use in ucol_openRules(). 0738 * @internal 0739 */ 0740 RuleBasedCollator(); 0741 0742 #ifndef U_HIDE_INTERNAL_API 0743 /** 0744 * Implements ucol_getLocaleByType(). 0745 * Needed because the lifetime of the locale ID string must match that of the collator. 0746 * getLocale() returns a copy of a Locale, with minimal lifetime in a C wrapper. 0747 * @internal 0748 */ 0749 const char *internalGetLocaleID(ULocDataLocaleType type, UErrorCode &errorCode) const; 0750 0751 /** 0752 * Implements ucol_getContractionsAndExpansions(). 0753 * Gets this collator's sets of contraction strings and/or 0754 * characters and strings that map to multiple collation elements (expansions). 0755 * If addPrefixes is true, then contractions that are expressed as 0756 * prefix/pre-context rules are included. 0757 * @param contractions if not nullptr, the set to hold the contractions 0758 * @param expansions if not nullptr, the set to hold the expansions 0759 * @param addPrefixes include prefix contextual mappings 0760 * @param errorCode in/out ICU error code 0761 * @internal 0762 */ 0763 void internalGetContractionsAndExpansions( 0764 UnicodeSet *contractions, UnicodeSet *expansions, 0765 UBool addPrefixes, UErrorCode &errorCode) const; 0766 0767 /** 0768 * Adds the contractions that start with character c to the set. 0769 * Ignores prefixes. Used by AlphabeticIndex. 0770 * @internal 0771 */ 0772 void internalAddContractions(UChar32 c, UnicodeSet &set, UErrorCode &errorCode) const; 0773 0774 /** 0775 * Implements from-rule constructors, and ucol_openRules(). 0776 * @internal 0777 */ 0778 void internalBuildTailoring( 0779 const UnicodeString &rules, 0780 int32_t strength, 0781 UColAttributeValue decompositionMode, 0782 UParseError *outParseError, UnicodeString *outReason, 0783 UErrorCode &errorCode); 0784 0785 /** @internal */ 0786 static inline RuleBasedCollator *rbcFromUCollator(UCollator *uc) { 0787 return dynamic_cast<RuleBasedCollator *>(fromUCollator(uc)); 0788 } 0789 /** @internal */ 0790 static inline const RuleBasedCollator *rbcFromUCollator(const UCollator *uc) { 0791 return dynamic_cast<const RuleBasedCollator *>(fromUCollator(uc)); 0792 } 0793 0794 /** 0795 * Appends the CEs for the string to the vector. 0796 * @internal for tests & tools 0797 */ 0798 void internalGetCEs(const UnicodeString &str, UVector64 &ces, UErrorCode &errorCode) const; 0799 #endif // U_HIDE_INTERNAL_API 0800 0801 protected: 0802 /** 0803 * Used internally by registration to define the requested and valid locales. 0804 * @param requestedLocale the requested locale 0805 * @param validLocale the valid locale 0806 * @param actualLocale the actual locale 0807 * @internal 0808 */ 0809 virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale) override; 0810 0811 private: 0812 friend class CollationElementIterator; 0813 friend class Collator; 0814 0815 RuleBasedCollator(const CollationCacheEntry *entry); 0816 0817 /** 0818 * Enumeration of attributes that are relevant for short definition strings 0819 * (e.g., ucol_getShortDefinitionString()). 0820 * Effectively extends UColAttribute. 0821 */ 0822 enum Attributes { 0823 ATTR_VARIABLE_TOP = UCOL_ATTRIBUTE_COUNT, 0824 ATTR_LIMIT 0825 }; 0826 0827 void adoptTailoring(CollationTailoring *t, UErrorCode &errorCode); 0828 0829 // Both lengths must be <0 or else both must be >=0. 0830 UCollationResult doCompare(const char16_t *left, int32_t leftLength, 0831 const char16_t *right, int32_t rightLength, 0832 UErrorCode &errorCode) const; 0833 UCollationResult doCompare(const uint8_t *left, int32_t leftLength, 0834 const uint8_t *right, int32_t rightLength, 0835 UErrorCode &errorCode) const; 0836 0837 void writeSortKey(const char16_t *s, int32_t length, 0838 SortKeyByteSink &sink, UErrorCode &errorCode) const; 0839 0840 void writeIdenticalLevel(const char16_t *s, const char16_t *limit, 0841 SortKeyByteSink &sink, UErrorCode &errorCode) const; 0842 0843 const CollationSettings &getDefaultSettings() const; 0844 0845 void setAttributeDefault(int32_t attribute) { 0846 explicitlySetAttributes &= ~((uint32_t)1 << attribute); 0847 } 0848 void setAttributeExplicitly(int32_t attribute) { 0849 explicitlySetAttributes |= (uint32_t)1 << attribute; 0850 } 0851 UBool attributeHasBeenSetExplicitly(int32_t attribute) const { 0852 // assert(0 <= attribute < ATTR_LIMIT); 0853 return (UBool)((explicitlySetAttributes & ((uint32_t)1 << attribute)) != 0); 0854 } 0855 0856 /** 0857 * Tests whether a character is "unsafe" for use as a collation starting point. 0858 * 0859 * @param c code point or code unit 0860 * @return true if c is unsafe 0861 * @see CollationElementIterator#setOffset(int) 0862 */ 0863 UBool isUnsafe(UChar32 c) const; 0864 0865 static void U_CALLCONV computeMaxExpansions(const CollationTailoring *t, UErrorCode &errorCode); 0866 UBool initMaxExpansions(UErrorCode &errorCode) const; 0867 0868 void setFastLatinOptions(CollationSettings &ownedSettings) const; 0869 0870 const CollationData *data; 0871 const CollationSettings *settings; // reference-counted 0872 const CollationTailoring *tailoring; // alias of cacheEntry->tailoring 0873 const CollationCacheEntry *cacheEntry; // reference-counted 0874 Locale validLocale; 0875 uint32_t explicitlySetAttributes; 0876 0877 UBool actualLocaleIsSameAsValid; 0878 }; 0879 0880 U_NAMESPACE_END 0881 0882 #endif // !UCONFIG_NO_COLLATION 0883 0884 #endif /* U_SHOW_CPLUSPLUS_API */ 0885 0886 #endif // TBLCOLL_H
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |