|
||||
Warning, file /include/unicode/ucol.h was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).
0001 // © 2016 and later: Unicode, Inc. and others. 0002 // License & terms of use: http://www.unicode.org/copyright.html 0003 /* 0004 ******************************************************************************* 0005 * Copyright (c) 1996-2015, International Business Machines Corporation and others. 0006 * All Rights Reserved. 0007 ******************************************************************************* 0008 */ 0009 0010 #ifndef UCOL_H 0011 #define UCOL_H 0012 0013 #include "unicode/utypes.h" 0014 0015 #if !UCONFIG_NO_COLLATION 0016 0017 #include "unicode/unorm.h" 0018 #include "unicode/parseerr.h" 0019 #include "unicode/uloc.h" 0020 #include "unicode/uset.h" 0021 #include "unicode/uscript.h" 0022 0023 #if U_SHOW_CPLUSPLUS_API 0024 #include "unicode/localpointer.h" 0025 #endif // U_SHOW_CPLUSPLUS_API 0026 0027 /** 0028 * \file 0029 * \brief C API: Collator 0030 * 0031 * <h2> Collator C API </h2> 0032 * 0033 * The C API for Collator performs locale-sensitive 0034 * string comparison. You use this service to build 0035 * searching and sorting routines for natural language text. 0036 * <p> 0037 * For more information about the collation service see 0038 * <a href="https://unicode-org.github.io/icu/userguide/collation">the User Guide</a>. 0039 * <p> 0040 * Collation service provides correct sorting orders for most locales supported in ICU. 0041 * If specific data for a locale is not available, the orders eventually falls back 0042 * to the <a href="http://www.unicode.org/reports/tr35/tr35-collation.html#Root_Collation">CLDR root sort order</a>. 0043 * <p> 0044 * Sort ordering may be customized by providing your own set of rules. For more on 0045 * this subject see the <a href="https://unicode-org.github.io/icu/userguide/collation/customization"> 0046 * Collation Customization</a> section of the User Guide. 0047 * <p> 0048 * @see UCollationResult 0049 * @see UNormalizationMode 0050 * @see UCollationStrength 0051 * @see UCollationElements 0052 */ 0053 0054 /** A collator. 0055 * For usage in C programs. 0056 */ 0057 struct UCollator; 0058 /** structure representing a collator object instance 0059 * @stable ICU 2.0 0060 */ 0061 typedef struct UCollator UCollator; 0062 0063 0064 /** 0065 * UCOL_LESS is returned if source string is compared to be less than target 0066 * string in the ucol_strcoll() method. 0067 * UCOL_EQUAL is returned if source string is compared to be equal to target 0068 * string in the ucol_strcoll() method. 0069 * UCOL_GREATER is returned if source string is compared to be greater than 0070 * target string in the ucol_strcoll() method. 0071 * @see ucol_strcoll() 0072 * <p> 0073 * Possible values for a comparison result 0074 * @stable ICU 2.0 0075 */ 0076 typedef enum { 0077 /** string a == string b */ 0078 UCOL_EQUAL = 0, 0079 /** string a > string b */ 0080 UCOL_GREATER = 1, 0081 /** string a < string b */ 0082 UCOL_LESS = -1 0083 } UCollationResult ; 0084 0085 0086 /** Enum containing attribute values for controlling collation behavior. 0087 * Here are all the allowable values. Not every attribute can take every value. The only 0088 * universal value is UCOL_DEFAULT, which resets the attribute value to the predefined 0089 * value for that locale 0090 * @stable ICU 2.0 0091 */ 0092 typedef enum { 0093 /** accepted by most attributes */ 0094 UCOL_DEFAULT = -1, 0095 0096 /** Primary collation strength */ 0097 UCOL_PRIMARY = 0, 0098 /** Secondary collation strength */ 0099 UCOL_SECONDARY = 1, 0100 /** Tertiary collation strength */ 0101 UCOL_TERTIARY = 2, 0102 /** Default collation strength */ 0103 UCOL_DEFAULT_STRENGTH = UCOL_TERTIARY, 0104 UCOL_CE_STRENGTH_LIMIT, 0105 /** Quaternary collation strength */ 0106 UCOL_QUATERNARY=3, 0107 /** Identical collation strength */ 0108 UCOL_IDENTICAL=15, 0109 UCOL_STRENGTH_LIMIT, 0110 0111 /** Turn the feature off - works for UCOL_FRENCH_COLLATION, 0112 UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE 0113 & UCOL_DECOMPOSITION_MODE*/ 0114 UCOL_OFF = 16, 0115 /** Turn the feature on - works for UCOL_FRENCH_COLLATION, 0116 UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE 0117 & UCOL_DECOMPOSITION_MODE*/ 0118 UCOL_ON = 17, 0119 0120 /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be shifted */ 0121 UCOL_SHIFTED = 20, 0122 /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be non ignorable */ 0123 UCOL_NON_IGNORABLE = 21, 0124 0125 /** Valid for UCOL_CASE_FIRST - 0126 lower case sorts before upper case */ 0127 UCOL_LOWER_FIRST = 24, 0128 /** upper case sorts before lower case */ 0129 UCOL_UPPER_FIRST = 25, 0130 0131 #ifndef U_HIDE_DEPRECATED_API 0132 /** 0133 * One more than the highest normal UColAttributeValue value. 0134 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 0135 */ 0136 UCOL_ATTRIBUTE_VALUE_COUNT 0137 #endif /* U_HIDE_DEPRECATED_API */ 0138 } UColAttributeValue; 0139 0140 /** 0141 * Enum containing the codes for reordering segments of the collation table that are not script 0142 * codes. These reordering codes are to be used in conjunction with the script codes. 0143 * @see ucol_getReorderCodes 0144 * @see ucol_setReorderCodes 0145 * @see ucol_getEquivalentReorderCodes 0146 * @see UScriptCode 0147 * @stable ICU 4.8 0148 */ 0149 typedef enum { 0150 /** 0151 * A special reordering code that is used to specify the default 0152 * reordering codes for a locale. 0153 * @stable ICU 4.8 0154 */ 0155 UCOL_REORDER_CODE_DEFAULT = -1, 0156 /** 0157 * A special reordering code that is used to specify no reordering codes. 0158 * @stable ICU 4.8 0159 */ 0160 UCOL_REORDER_CODE_NONE = USCRIPT_UNKNOWN, 0161 /** 0162 * A special reordering code that is used to specify all other codes used for 0163 * reordering except for the codes lised as UColReorderCode values and those 0164 * listed explicitly in a reordering. 0165 * @stable ICU 4.8 0166 */ 0167 UCOL_REORDER_CODE_OTHERS = USCRIPT_UNKNOWN, 0168 /** 0169 * Characters with the space property. 0170 * This is equivalent to the rule value "space". 0171 * @stable ICU 4.8 0172 */ 0173 UCOL_REORDER_CODE_SPACE = 0x1000, 0174 /** 0175 * The first entry in the enumeration of reordering groups. This is intended for use in 0176 * range checking and enumeration of the reorder codes. 0177 * @stable ICU 4.8 0178 */ 0179 UCOL_REORDER_CODE_FIRST = UCOL_REORDER_CODE_SPACE, 0180 /** 0181 * Characters with the punctuation property. 0182 * This is equivalent to the rule value "punct". 0183 * @stable ICU 4.8 0184 */ 0185 UCOL_REORDER_CODE_PUNCTUATION = 0x1001, 0186 /** 0187 * Characters with the symbol property. 0188 * This is equivalent to the rule value "symbol". 0189 * @stable ICU 4.8 0190 */ 0191 UCOL_REORDER_CODE_SYMBOL = 0x1002, 0192 /** 0193 * Characters with the currency property. 0194 * This is equivalent to the rule value "currency". 0195 * @stable ICU 4.8 0196 */ 0197 UCOL_REORDER_CODE_CURRENCY = 0x1003, 0198 /** 0199 * Characters with the digit property. 0200 * This is equivalent to the rule value "digit". 0201 * @stable ICU 4.8 0202 */ 0203 UCOL_REORDER_CODE_DIGIT = 0x1004, 0204 #ifndef U_HIDE_DEPRECATED_API 0205 /** 0206 * One more than the highest normal UColReorderCode value. 0207 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 0208 */ 0209 UCOL_REORDER_CODE_LIMIT = 0x1005 0210 #endif /* U_HIDE_DEPRECATED_API */ 0211 } UColReorderCode; 0212 0213 /** 0214 * Base letter represents a primary difference. Set comparison 0215 * level to UCOL_PRIMARY to ignore secondary and tertiary differences. 0216 * Use this to set the strength of a Collator object. 0217 * Example of primary difference, "abc" < "abd" 0218 * 0219 * Diacritical differences on the same base letter represent a secondary 0220 * difference. Set comparison level to UCOL_SECONDARY to ignore tertiary 0221 * differences. Use this to set the strength of a Collator object. 0222 * Example of secondary difference, "ä" >> "a". 0223 * 0224 * Uppercase and lowercase versions of the same character represents a 0225 * tertiary difference. Set comparison level to UCOL_TERTIARY to include 0226 * all comparison differences. Use this to set the strength of a Collator 0227 * object. 0228 * Example of tertiary difference, "abc" <<< "ABC". 0229 * 0230 * Two characters are considered "identical" when they have the same 0231 * unicode spellings. UCOL_IDENTICAL. 0232 * For example, "ä" == "ä". 0233 * 0234 * UCollationStrength is also used to determine the strength of sort keys 0235 * generated from UCollator objects 0236 * These values can be now found in the UColAttributeValue enum. 0237 * @stable ICU 2.0 0238 **/ 0239 typedef UColAttributeValue UCollationStrength; 0240 0241 /** Attributes that collation service understands. All the attributes can take UCOL_DEFAULT 0242 * value, as well as the values specific to each one. 0243 * @stable ICU 2.0 0244 */ 0245 typedef enum { 0246 /** Attribute for direction of secondary weights - used in Canadian French. 0247 * Acceptable values are UCOL_ON, which results in secondary weights 0248 * being considered backwards and UCOL_OFF which treats secondary 0249 * weights in the order they appear. 0250 * @stable ICU 2.0 0251 */ 0252 UCOL_FRENCH_COLLATION, 0253 /** Attribute for handling variable elements. 0254 * Acceptable values are UCOL_NON_IGNORABLE 0255 * which treats all the codepoints with non-ignorable 0256 * primary weights in the same way, 0257 * and UCOL_SHIFTED which causes codepoints with primary 0258 * weights that are equal or below the variable top value 0259 * to be ignored on primary level and moved to the quaternary 0260 * level. The default setting in a Collator object depends on the 0261 * locale data loaded from the resources. For most locales, the 0262 * default is UCOL_NON_IGNORABLE, but for others, such as "th", 0263 * the default could be UCOL_SHIFTED. 0264 * @stable ICU 2.0 0265 */ 0266 UCOL_ALTERNATE_HANDLING, 0267 /** Controls the ordering of upper and lower case letters. 0268 * Acceptable values are UCOL_OFF, which orders 0269 * upper and lower case letters in accordance to their tertiary 0270 * weights, UCOL_UPPER_FIRST which forces upper case letters to 0271 * sort before lower case letters, and UCOL_LOWER_FIRST which does 0272 * the opposite. The default setting in a Collator object depends on the 0273 * locale data loaded from the resources. For most locales, the 0274 * default is UCOL_OFF, but for others, such as "da" or "mt", 0275 * the default could be UCOL_UPPER. 0276 * @stable ICU 2.0 0277 */ 0278 UCOL_CASE_FIRST, 0279 /** Controls whether an extra case level (positioned before the third 0280 * level) is generated or not. Acceptable values are UCOL_OFF, 0281 * when case level is not generated, and UCOL_ON which causes the case 0282 * level to be generated. Contents of the case level are affected by 0283 * the value of UCOL_CASE_FIRST attribute. A simple way to ignore 0284 * accent differences in a string is to set the strength to UCOL_PRIMARY 0285 * and enable case level. The default setting in a Collator object depends 0286 * on the locale data loaded from the resources. 0287 * @stable ICU 2.0 0288 */ 0289 UCOL_CASE_LEVEL, 0290 /** Controls whether the normalization check and necessary normalizations 0291 * are performed. When set to UCOL_OFF no normalization check 0292 * is performed. The correctness of the result is guaranteed only if the 0293 * input data is in so-called FCD form (see users manual for more info). 0294 * When set to UCOL_ON, an incremental check is performed to see whether 0295 * the input data is in the FCD form. If the data is not in the FCD form, 0296 * incremental NFD normalization is performed. The default setting in a 0297 * Collator object depends on the locale data loaded from the resources. 0298 * For many locales, the default is UCOL_OFF, but for others, such as "hi" 0299 * "vi', or "bn", * the default could be UCOL_ON. 0300 * @stable ICU 2.0 0301 */ 0302 UCOL_NORMALIZATION_MODE, 0303 /** An alias for UCOL_NORMALIZATION_MODE attribute. 0304 * @stable ICU 2.0 0305 */ 0306 UCOL_DECOMPOSITION_MODE = UCOL_NORMALIZATION_MODE, 0307 /** The strength attribute. Can be either UCOL_PRIMARY, UCOL_SECONDARY, 0308 * UCOL_TERTIARY, UCOL_QUATERNARY or UCOL_IDENTICAL. The usual strength 0309 * for most locales (except Japanese) is tertiary. 0310 * 0311 * Quaternary strength 0312 * is useful when combined with shifted setting for alternate handling 0313 * attribute and for JIS X 4061 collation, when it is used to distinguish 0314 * between Katakana and Hiragana. 0315 * Otherwise, quaternary level 0316 * is affected only by the number of non-ignorable code points in 0317 * the string. 0318 * 0319 * Identical strength is rarely useful, as it amounts 0320 * to codepoints of the NFD form of the string. 0321 * @stable ICU 2.0 0322 */ 0323 UCOL_STRENGTH, 0324 #ifndef U_HIDE_DEPRECATED_API 0325 /** When turned on, this attribute positions Hiragana before all 0326 * non-ignorables on quaternary level This is a sneaky way to produce JIS 0327 * sort order. 0328 * 0329 * This attribute was an implementation detail of the CLDR Japanese tailoring. 0330 * Since ICU 50, this attribute is not settable any more via API functions. 0331 * Since CLDR 25/ICU 53, explicit quaternary relations are used 0332 * to achieve the same Japanese sort order. 0333 * 0334 * @deprecated ICU 50 Implementation detail, cannot be set via API, was removed from implementation. 0335 */ 0336 UCOL_HIRAGANA_QUATERNARY_MODE = UCOL_STRENGTH + 1, 0337 #endif /* U_HIDE_DEPRECATED_API */ 0338 /** 0339 * When turned on, this attribute makes 0340 * substrings of digits sort according to their numeric values. 0341 * 0342 * This is a way to get '100' to sort AFTER '2'. Note that the longest 0343 * digit substring that can be treated as a single unit is 0344 * 254 digits (not counting leading zeros). If a digit substring is 0345 * longer than that, the digits beyond the limit will be treated as a 0346 * separate digit substring. 0347 * 0348 * A "digit" in this sense is a code point with General_Category=Nd, 0349 * which does not include circled numbers, roman numerals, etc. 0350 * Only a contiguous digit substring is considered, that is, 0351 * non-negative integers without separators. 0352 * There is no support for plus/minus signs, decimals, exponents, etc. 0353 * 0354 * @stable ICU 2.8 0355 */ 0356 UCOL_NUMERIC_COLLATION = UCOL_STRENGTH + 2, 0357 0358 /* Do not conditionalize the following with #ifndef U_HIDE_DEPRECATED_API, 0359 * it is needed for layout of RuleBasedCollator object. */ 0360 #ifndef U_FORCE_HIDE_DEPRECATED_API 0361 /** 0362 * One more than the highest normal UColAttribute value. 0363 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 0364 */ 0365 UCOL_ATTRIBUTE_COUNT 0366 #endif // U_FORCE_HIDE_DEPRECATED_API 0367 } UColAttribute; 0368 0369 /** Options for retrieving the rule string 0370 * @stable ICU 2.0 0371 */ 0372 typedef enum { 0373 /** 0374 * Retrieves the tailoring rules only. 0375 * Same as calling the version of getRules() without UColRuleOption. 0376 * @stable ICU 2.0 0377 */ 0378 UCOL_TAILORING_ONLY, 0379 /** 0380 * Retrieves the "UCA rules" concatenated with the tailoring rules. 0381 * The "UCA rules" are an <i>approximation</i> of the root collator's sort order. 0382 * They are almost never used or useful at runtime and can be removed from the data. 0383 * See https://unicode-org.github.io/icu/userguide/collation/customization#building-on-existing-locales 0384 * @stable ICU 2.0 0385 */ 0386 UCOL_FULL_RULES 0387 } UColRuleOption ; 0388 0389 /** 0390 * Open a UCollator for comparing strings. 0391 * 0392 * For some languages, multiple collation types are available; 0393 * for example, "de@collation=phonebook". 0394 * Starting with ICU 54, collation attributes can be specified via locale keywords as well, 0395 * in the old locale extension syntax ("el@colCaseFirst=upper") 0396 * or in language tag syntax ("el-u-kf-upper"). 0397 * See <a href="https://unicode-org.github.io/icu/userguide/collation/api">User Guide: Collation API</a>. 0398 * 0399 * The UCollator pointer is used in all the calls to the Collation 0400 * service. After finished, collator must be disposed of by calling 0401 * {@link #ucol_close }. 0402 * @param loc The locale containing the required collation rules. 0403 * Special values for locales can be passed in - 0404 * if NULL is passed for the locale, the default locale 0405 * collation rules will be used. If empty string ("") or 0406 * "root" are passed, the root collator will be returned. 0407 * @param status A pointer to a UErrorCode to receive any errors 0408 * @return A pointer to a UCollator, or 0 if an error occurred. 0409 * @see ucol_openRules 0410 * @see ucol_clone 0411 * @see ucol_close 0412 * @stable ICU 2.0 0413 */ 0414 U_CAPI UCollator* U_EXPORT2 0415 ucol_open(const char *loc, UErrorCode *status); 0416 0417 /** 0418 * Produce a UCollator instance according to the rules supplied. 0419 * The rules are used to change the default ordering, defined in the 0420 * UCA in a process called tailoring. The resulting UCollator pointer 0421 * can be used in the same way as the one obtained by {@link #ucol_strcoll }. 0422 * @param rules A string describing the collation rules. For the syntax 0423 * of the rules please see users guide. 0424 * @param rulesLength The length of rules, or -1 if null-terminated. 0425 * @param normalizationMode The normalization mode: One of 0426 * UCOL_OFF (expect the text to not need normalization), 0427 * UCOL_ON (normalize), or 0428 * UCOL_DEFAULT (set the mode according to the rules) 0429 * @param strength The default collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY, 0430 * UCOL_TERTIARY, UCOL_IDENTICAL,UCOL_DEFAULT_STRENGTH - can be also set in the rules. 0431 * @param parseError A pointer to UParseError to receive information about errors 0432 * occurred during parsing. This argument can currently be set 0433 * to NULL, but at users own risk. Please provide a real structure. 0434 * @param status A pointer to a UErrorCode to receive any errors 0435 * @return A pointer to a UCollator. It is not guaranteed that NULL be returned in case 0436 * of error - please use status argument to check for errors. 0437 * @see ucol_open 0438 * @see ucol_clone 0439 * @see ucol_close 0440 * @stable ICU 2.0 0441 */ 0442 U_CAPI UCollator* U_EXPORT2 0443 ucol_openRules( const UChar *rules, 0444 int32_t rulesLength, 0445 UColAttributeValue normalizationMode, 0446 UCollationStrength strength, 0447 UParseError *parseError, 0448 UErrorCode *status); 0449 0450 #ifndef U_HIDE_DEPRECATED_API 0451 /** 0452 * Open a collator defined by a short form string. 0453 * The structure and the syntax of the string is defined in the "Naming collators" 0454 * section of the users guide: 0455 * https://unicode-org.github.io/icu/userguide/collation/concepts#collator-naming-scheme 0456 * Attributes are overridden by the subsequent attributes. So, for "S2_S3", final 0457 * strength will be 3. 3066bis locale overrides individual locale parts. 0458 * The call to this function is equivalent to a call to ucol_open, followed by a 0459 * series of calls to ucol_setAttribute and ucol_setVariableTop. 0460 * @param definition A short string containing a locale and a set of attributes. 0461 * Attributes not explicitly mentioned are left at the default 0462 * state for a locale. 0463 * @param parseError if not NULL, structure that will get filled with error's pre 0464 * and post context in case of error. 0465 * @param forceDefaults if false, the settings that are the same as the collator 0466 * default settings will not be applied (for example, setting 0467 * French secondary on a French collator would not be executed). 0468 * If true, all the settings will be applied regardless of the 0469 * collator default value. If the definition 0470 * strings are to be cached, should be set to false. 0471 * @param status Error code. Apart from regular error conditions connected to 0472 * instantiating collators (like out of memory or similar), this 0473 * API will return an error if an invalid attribute or attribute/value 0474 * combination is specified. 0475 * @return A pointer to a UCollator or 0 if an error occurred (including an 0476 * invalid attribute). 0477 * @see ucol_open 0478 * @see ucol_setAttribute 0479 * @see ucol_setVariableTop 0480 * @see ucol_getShortDefinitionString 0481 * @see ucol_normalizeShortDefinitionString 0482 * @deprecated ICU 54 Use ucol_open() with language tag collation keywords instead. 0483 */ 0484 U_DEPRECATED UCollator* U_EXPORT2 0485 ucol_openFromShortString( const char *definition, 0486 UBool forceDefaults, 0487 UParseError *parseError, 0488 UErrorCode *status); 0489 #endif /* U_HIDE_DEPRECATED_API */ 0490 0491 #ifndef U_HIDE_DEPRECATED_API 0492 /** 0493 * Get a set containing the contractions defined by the collator. The set includes 0494 * both the root collator's contractions and the contractions defined by the collator. This set 0495 * will contain only strings. If a tailoring explicitly suppresses contractions from 0496 * the root collator (like Russian), removed contractions will not be in the resulting set. 0497 * @param coll collator 0498 * @param conts the set to hold the result. It gets emptied before 0499 * contractions are added. 0500 * @param status to hold the error code 0501 * @return the size of the contraction set 0502 * 0503 * @deprecated ICU 3.4, use ucol_getContractionsAndExpansions instead 0504 */ 0505 U_DEPRECATED int32_t U_EXPORT2 0506 ucol_getContractions( const UCollator *coll, 0507 USet *conts, 0508 UErrorCode *status); 0509 #endif /* U_HIDE_DEPRECATED_API */ 0510 0511 /** 0512 * Get a set containing the expansions defined by the collator. The set includes 0513 * both the root collator's expansions and the expansions defined by the tailoring 0514 * @param coll collator 0515 * @param contractions if not NULL, the set to hold the contractions 0516 * @param expansions if not NULL, the set to hold the expansions 0517 * @param addPrefixes add the prefix contextual elements to contractions 0518 * @param status to hold the error code 0519 * 0520 * @stable ICU 3.4 0521 */ 0522 U_CAPI void U_EXPORT2 0523 ucol_getContractionsAndExpansions( const UCollator *coll, 0524 USet *contractions, USet *expansions, 0525 UBool addPrefixes, UErrorCode *status); 0526 0527 /** 0528 * Close a UCollator. 0529 * Once closed, a UCollator should not be used. Every open collator should 0530 * be closed. Otherwise, a memory leak will result. 0531 * @param coll The UCollator to close. 0532 * @see ucol_open 0533 * @see ucol_openRules 0534 * @see ucol_clone 0535 * @stable ICU 2.0 0536 */ 0537 U_CAPI void U_EXPORT2 0538 ucol_close(UCollator *coll); 0539 0540 #if U_SHOW_CPLUSPLUS_API 0541 0542 U_NAMESPACE_BEGIN 0543 0544 /** 0545 * \class LocalUCollatorPointer 0546 * "Smart pointer" class, closes a UCollator via ucol_close(). 0547 * For most methods see the LocalPointerBase base class. 0548 * 0549 * @see LocalPointerBase 0550 * @see LocalPointer 0551 * @stable ICU 4.4 0552 */ 0553 U_DEFINE_LOCAL_OPEN_POINTER(LocalUCollatorPointer, UCollator, ucol_close); 0554 0555 U_NAMESPACE_END 0556 0557 #endif 0558 0559 /** 0560 * Compare two strings. 0561 * The strings will be compared using the options already specified. 0562 * @param coll The UCollator containing the comparison rules. 0563 * @param source The source string. 0564 * @param sourceLength The length of source, or -1 if null-terminated. 0565 * @param target The target string. 0566 * @param targetLength The length of target, or -1 if null-terminated. 0567 * @return The result of comparing the strings; one of UCOL_EQUAL, 0568 * UCOL_GREATER, UCOL_LESS 0569 * @see ucol_greater 0570 * @see ucol_greaterOrEqual 0571 * @see ucol_equal 0572 * @stable ICU 2.0 0573 */ 0574 U_CAPI UCollationResult U_EXPORT2 0575 ucol_strcoll( const UCollator *coll, 0576 const UChar *source, 0577 int32_t sourceLength, 0578 const UChar *target, 0579 int32_t targetLength); 0580 0581 /** 0582 * Compare two strings in UTF-8. 0583 * The strings will be compared using the options already specified. 0584 * Note: When input string contains malformed a UTF-8 byte sequence, 0585 * this function treats these bytes as REPLACEMENT CHARACTER (U+FFFD). 0586 * @param coll The UCollator containing the comparison rules. 0587 * @param source The source UTF-8 string. 0588 * @param sourceLength The length of source, or -1 if null-terminated. 0589 * @param target The target UTF-8 string. 0590 * @param targetLength The length of target, or -1 if null-terminated. 0591 * @param status A pointer to a UErrorCode to receive any errors 0592 * @return The result of comparing the strings; one of UCOL_EQUAL, 0593 * UCOL_GREATER, UCOL_LESS 0594 * @see ucol_greater 0595 * @see ucol_greaterOrEqual 0596 * @see ucol_equal 0597 * @stable ICU 50 0598 */ 0599 U_CAPI UCollationResult U_EXPORT2 0600 ucol_strcollUTF8( 0601 const UCollator *coll, 0602 const char *source, 0603 int32_t sourceLength, 0604 const char *target, 0605 int32_t targetLength, 0606 UErrorCode *status); 0607 0608 /** 0609 * Determine if one string is greater than another. 0610 * This function is equivalent to {@link #ucol_strcoll } == UCOL_GREATER 0611 * @param coll The UCollator containing the comparison rules. 0612 * @param source The source string. 0613 * @param sourceLength The length of source, or -1 if null-terminated. 0614 * @param target The target string. 0615 * @param targetLength The length of target, or -1 if null-terminated. 0616 * @return true if source is greater than target, false otherwise. 0617 * @see ucol_strcoll 0618 * @see ucol_greaterOrEqual 0619 * @see ucol_equal 0620 * @stable ICU 2.0 0621 */ 0622 U_CAPI UBool U_EXPORT2 0623 ucol_greater(const UCollator *coll, 0624 const UChar *source, int32_t sourceLength, 0625 const UChar *target, int32_t targetLength); 0626 0627 /** 0628 * Determine if one string is greater than or equal to another. 0629 * This function is equivalent to {@link #ucol_strcoll } != UCOL_LESS 0630 * @param coll The UCollator containing the comparison rules. 0631 * @param source The source string. 0632 * @param sourceLength The length of source, or -1 if null-terminated. 0633 * @param target The target string. 0634 * @param targetLength The length of target, or -1 if null-terminated. 0635 * @return true if source is greater than or equal to target, false otherwise. 0636 * @see ucol_strcoll 0637 * @see ucol_greater 0638 * @see ucol_equal 0639 * @stable ICU 2.0 0640 */ 0641 U_CAPI UBool U_EXPORT2 0642 ucol_greaterOrEqual(const UCollator *coll, 0643 const UChar *source, int32_t sourceLength, 0644 const UChar *target, int32_t targetLength); 0645 0646 /** 0647 * Compare two strings for equality. 0648 * This function is equivalent to {@link #ucol_strcoll } == UCOL_EQUAL 0649 * @param coll The UCollator containing the comparison rules. 0650 * @param source The source string. 0651 * @param sourceLength The length of source, or -1 if null-terminated. 0652 * @param target The target string. 0653 * @param targetLength The length of target, or -1 if null-terminated. 0654 * @return true if source is equal to target, false otherwise 0655 * @see ucol_strcoll 0656 * @see ucol_greater 0657 * @see ucol_greaterOrEqual 0658 * @stable ICU 2.0 0659 */ 0660 U_CAPI UBool U_EXPORT2 0661 ucol_equal(const UCollator *coll, 0662 const UChar *source, int32_t sourceLength, 0663 const UChar *target, int32_t targetLength); 0664 0665 /** 0666 * Compare two UTF-8 encoded strings. 0667 * The strings will be compared using the options already specified. 0668 * @param coll The UCollator containing the comparison rules. 0669 * @param sIter The source string iterator. 0670 * @param tIter The target string iterator. 0671 * @return The result of comparing the strings; one of UCOL_EQUAL, 0672 * UCOL_GREATER, UCOL_LESS 0673 * @param status A pointer to a UErrorCode to receive any errors 0674 * @see ucol_strcoll 0675 * @stable ICU 2.6 0676 */ 0677 U_CAPI UCollationResult U_EXPORT2 0678 ucol_strcollIter( const UCollator *coll, 0679 UCharIterator *sIter, 0680 UCharIterator *tIter, 0681 UErrorCode *status); 0682 0683 /** 0684 * Get the collation strength used in a UCollator. 0685 * The strength influences how strings are compared. 0686 * @param coll The UCollator to query. 0687 * @return The collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY, 0688 * UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL 0689 * @see ucol_setStrength 0690 * @stable ICU 2.0 0691 */ 0692 U_CAPI UCollationStrength U_EXPORT2 0693 ucol_getStrength(const UCollator *coll); 0694 0695 /** 0696 * Set the collation strength used in a UCollator. 0697 * The strength influences how strings are compared. 0698 * @param coll The UCollator to set. 0699 * @param strength The desired collation strength; one of UCOL_PRIMARY, 0700 * UCOL_SECONDARY, UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL, UCOL_DEFAULT 0701 * @see ucol_getStrength 0702 * @stable ICU 2.0 0703 */ 0704 U_CAPI void U_EXPORT2 0705 ucol_setStrength(UCollator *coll, 0706 UCollationStrength strength); 0707 0708 /** 0709 * Retrieves the reordering codes for this collator. 0710 * These reordering codes are a combination of UScript codes and UColReorderCode entries. 0711 * @param coll The UCollator to query. 0712 * @param dest The array to fill with the script ordering. 0713 * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function 0714 * will only return the length of the result without writing any codes (pre-flighting). 0715 * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a 0716 * failure before the function call. 0717 * @return The number of reordering codes written to the dest array. 0718 * @see ucol_setReorderCodes 0719 * @see ucol_getEquivalentReorderCodes 0720 * @see UScriptCode 0721 * @see UColReorderCode 0722 * @stable ICU 4.8 0723 */ 0724 U_CAPI int32_t U_EXPORT2 0725 ucol_getReorderCodes(const UCollator* coll, 0726 int32_t* dest, 0727 int32_t destCapacity, 0728 UErrorCode *pErrorCode); 0729 /** 0730 * Sets the reordering codes for this collator. 0731 * Collation reordering allows scripts and some other groups of characters 0732 * to be moved relative to each other. This reordering is done on top of 0733 * the DUCET/CLDR standard collation order. Reordering can specify groups to be placed 0734 * at the start and/or the end of the collation order. These groups are specified using 0735 * UScript codes and UColReorderCode entries. 0736 * 0737 * <p>By default, reordering codes specified for the start of the order are placed in the 0738 * order given after several special non-script blocks. These special groups of characters 0739 * are space, punctuation, symbol, currency, and digit. These special groups are represented with 0740 * UColReorderCode entries. Script groups can be intermingled with 0741 * these special non-script groups if those special groups are explicitly specified in the reordering. 0742 * 0743 * <p>The special code OTHERS stands for any script that is not explicitly 0744 * mentioned in the list of reordering codes given. Anything that is after OTHERS 0745 * will go at the very end of the reordering in the order given. 0746 * 0747 * <p>The special reorder code DEFAULT will reset the reordering for this collator 0748 * to the default for this collator. The default reordering may be the DUCET/CLDR order or may be a reordering that 0749 * was specified when this collator was created from resource data or from rules. The 0750 * DEFAULT code <b>must</b> be the sole code supplied when it is used. 0751 * If not, then U_ILLEGAL_ARGUMENT_ERROR will be set. 0752 * 0753 * <p>The special reorder code NONE will remove any reordering for this collator. 0754 * The result of setting no reordering will be to have the DUCET/CLDR ordering used. The 0755 * NONE code <b>must</b> be the sole code supplied when it is used. 0756 * 0757 * @param coll The UCollator to set. 0758 * @param reorderCodes An array of script codes in the new order. This can be NULL if the 0759 * length is also set to 0. An empty array will clear any reordering codes on the collator. 0760 * @param reorderCodesLength The length of reorderCodes. 0761 * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a 0762 * failure before the function call. 0763 * @see ucol_getReorderCodes 0764 * @see ucol_getEquivalentReorderCodes 0765 * @see UScriptCode 0766 * @see UColReorderCode 0767 * @stable ICU 4.8 0768 */ 0769 U_CAPI void U_EXPORT2 0770 ucol_setReorderCodes(UCollator* coll, 0771 const int32_t* reorderCodes, 0772 int32_t reorderCodesLength, 0773 UErrorCode *pErrorCode); 0774 0775 /** 0776 * Retrieves the reorder codes that are grouped with the given reorder code. Some reorder 0777 * codes will be grouped and must reorder together. 0778 * Beginning with ICU 55, scripts only reorder together if they are primary-equal, 0779 * for example Hiragana and Katakana. 0780 * 0781 * @param reorderCode The reorder code to determine equivalence for. 0782 * @param dest The array to fill with the script ordering. 0783 * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function 0784 * will only return the length of the result without writing any codes (pre-flighting). 0785 * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate 0786 * a failure before the function call. 0787 * @return The number of reordering codes written to the dest array. 0788 * @see ucol_setReorderCodes 0789 * @see ucol_getReorderCodes 0790 * @see UScriptCode 0791 * @see UColReorderCode 0792 * @stable ICU 4.8 0793 */ 0794 U_CAPI int32_t U_EXPORT2 0795 ucol_getEquivalentReorderCodes(int32_t reorderCode, 0796 int32_t* dest, 0797 int32_t destCapacity, 0798 UErrorCode *pErrorCode); 0799 0800 /** 0801 * Get the display name for a UCollator. 0802 * The display name is suitable for presentation to a user. 0803 * @param objLoc The locale of the collator in question. 0804 * @param dispLoc The locale for display. 0805 * @param result A pointer to a buffer to receive the attribute. 0806 * @param resultLength The maximum size of result. 0807 * @param status A pointer to a UErrorCode to receive any errors 0808 * @return The total buffer size needed; if greater than resultLength, 0809 * the output was truncated. 0810 * @stable ICU 2.0 0811 */ 0812 U_CAPI int32_t U_EXPORT2 0813 ucol_getDisplayName( const char *objLoc, 0814 const char *dispLoc, 0815 UChar *result, 0816 int32_t resultLength, 0817 UErrorCode *status); 0818 0819 /** 0820 * Get a locale for which collation rules are available. 0821 * A UCollator in a locale returned by this function will perform the correct 0822 * collation for the locale. 0823 * @param localeIndex The index of the desired locale. 0824 * @return A locale for which collation rules are available, or 0 if none. 0825 * @see ucol_countAvailable 0826 * @stable ICU 2.0 0827 */ 0828 U_CAPI const char* U_EXPORT2 0829 ucol_getAvailable(int32_t localeIndex); 0830 0831 /** 0832 * Determine how many locales have collation rules available. 0833 * This function is most useful as determining the loop ending condition for 0834 * calls to {@link #ucol_getAvailable }. 0835 * @return The number of locales for which collation rules are available. 0836 * @see ucol_getAvailable 0837 * @stable ICU 2.0 0838 */ 0839 U_CAPI int32_t U_EXPORT2 0840 ucol_countAvailable(void); 0841 0842 #if !UCONFIG_NO_SERVICE 0843 /** 0844 * Create a string enumerator of all locales for which a valid 0845 * collator may be opened. 0846 * @param status input-output error code 0847 * @return a string enumeration over locale strings. The caller is 0848 * responsible for closing the result. 0849 * @stable ICU 3.0 0850 */ 0851 U_CAPI UEnumeration* U_EXPORT2 0852 ucol_openAvailableLocales(UErrorCode *status); 0853 #endif 0854 0855 /** 0856 * Create a string enumerator of all possible keywords that are relevant to 0857 * collation. At this point, the only recognized keyword for this 0858 * service is "collation". 0859 * @param status input-output error code 0860 * @return a string enumeration over locale strings. The caller is 0861 * responsible for closing the result. 0862 * @stable ICU 3.0 0863 */ 0864 U_CAPI UEnumeration* U_EXPORT2 0865 ucol_getKeywords(UErrorCode *status); 0866 0867 /** 0868 * Given a keyword, create a string enumeration of all values 0869 * for that keyword that are currently in use. 0870 * @param keyword a particular keyword as enumerated by 0871 * ucol_getKeywords. If any other keyword is passed in, *status is set 0872 * to U_ILLEGAL_ARGUMENT_ERROR. 0873 * @param status input-output error code 0874 * @return a string enumeration over collation keyword values, or NULL 0875 * upon error. The caller is responsible for closing the result. 0876 * @stable ICU 3.0 0877 */ 0878 U_CAPI UEnumeration* U_EXPORT2 0879 ucol_getKeywordValues(const char *keyword, UErrorCode *status); 0880 0881 /** 0882 * Given a key and a locale, returns an array of string values in a preferred 0883 * order that would make a difference. These are all and only those values where 0884 * the open (creation) of the service with the locale formed from the input locale 0885 * plus input keyword and that value has different behavior than creation with the 0886 * input locale alone. 0887 * @param key one of the keys supported by this service. For now, only 0888 * "collation" is supported. 0889 * @param locale the locale 0890 * @param commonlyUsed if set to true it will return only commonly used values 0891 * with the given locale in preferred order. Otherwise, 0892 * it will return all the available values for the locale. 0893 * @param status error status 0894 * @return a string enumeration over keyword values for the given key and the locale. 0895 * @stable ICU 4.2 0896 */ 0897 U_CAPI UEnumeration* U_EXPORT2 0898 ucol_getKeywordValuesForLocale(const char* key, 0899 const char* locale, 0900 UBool commonlyUsed, 0901 UErrorCode* status); 0902 0903 /** 0904 * Return the functionally equivalent locale for the specified 0905 * input locale, with respect to given keyword, for the 0906 * collation service. If two different input locale + keyword 0907 * combinations produce the same result locale, then collators 0908 * instantiated for these two different input locales will behave 0909 * equivalently. The converse is not always true; two collators 0910 * may in fact be equivalent, but return different results, due to 0911 * internal details. The return result has no other meaning than 0912 * that stated above, and implies nothing as to the relationship 0913 * between the two locales. This is intended for use by 0914 * applications who wish to cache collators, or otherwise reuse 0915 * collators when possible. The functional equivalent may change 0916 * over time. For more information, please see the <a 0917 * href="https://unicode-org.github.io/icu/userguide/locale#locales-and-services"> 0918 * Locales and Services</a> section of the ICU User Guide. 0919 * @param result fillin for the functionally equivalent result locale 0920 * @param resultCapacity capacity of the fillin buffer 0921 * @param keyword a particular keyword as enumerated by 0922 * ucol_getKeywords. 0923 * @param locale the specified input locale 0924 * @param isAvailable if non-NULL, pointer to a fillin parameter that 0925 * on return indicates whether the specified input locale was 'available' 0926 * to the collation service. A locale is defined as 'available' if it 0927 * physically exists within the collation locale data. 0928 * @param status pointer to input-output error code 0929 * @return the actual buffer size needed for the locale. If greater 0930 * than resultCapacity, the returned full name will be truncated and 0931 * an error code will be returned. 0932 * @stable ICU 3.0 0933 */ 0934 U_CAPI int32_t U_EXPORT2 0935 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity, 0936 const char* keyword, const char* locale, 0937 UBool* isAvailable, UErrorCode* status); 0938 0939 /** 0940 * Get the collation tailoring rules from a UCollator. 0941 * The rules will follow the rule syntax. 0942 * @param coll The UCollator to query. 0943 * @param length 0944 * @return The collation tailoring rules. 0945 * @stable ICU 2.0 0946 */ 0947 U_CAPI const UChar* U_EXPORT2 0948 ucol_getRules( const UCollator *coll, 0949 int32_t *length); 0950 0951 #ifndef U_HIDE_DEPRECATED_API 0952 /** Get the short definition string for a collator. This API harvests the collator's 0953 * locale and the attribute set and produces a string that can be used for opening 0954 * a collator with the same attributes using the ucol_openFromShortString API. 0955 * This string will be normalized. 0956 * The structure and the syntax of the string is defined in the "Naming collators" 0957 * section of the users guide: 0958 * https://unicode-org.github.io/icu/userguide/collation/concepts#collator-naming-scheme 0959 * This API supports preflighting. 0960 * @param coll a collator 0961 * @param locale a locale that will appear as a collators locale in the resulting 0962 * short string definition. If NULL, the locale will be harvested 0963 * from the collator. 0964 * @param buffer space to hold the resulting string 0965 * @param capacity capacity of the buffer 0966 * @param status for returning errors. All the preflighting errors are featured 0967 * @return length of the resulting string 0968 * @see ucol_openFromShortString 0969 * @see ucol_normalizeShortDefinitionString 0970 * @deprecated ICU 54 0971 */ 0972 U_DEPRECATED int32_t U_EXPORT2 0973 ucol_getShortDefinitionString(const UCollator *coll, 0974 const char *locale, 0975 char *buffer, 0976 int32_t capacity, 0977 UErrorCode *status); 0978 0979 /** Verifies and normalizes short definition string. 0980 * Normalized short definition string has all the option sorted by the argument name, 0981 * so that equivalent definition strings are the same. 0982 * This API supports preflighting. 0983 * @param source definition string 0984 * @param destination space to hold the resulting string 0985 * @param capacity capacity of the buffer 0986 * @param parseError if not NULL, structure that will get filled with error's pre 0987 * and post context in case of error. 0988 * @param status Error code. This API will return an error if an invalid attribute 0989 * or attribute/value combination is specified. All the preflighting 0990 * errors are also featured 0991 * @return length of the resulting normalized string. 0992 * 0993 * @see ucol_openFromShortString 0994 * @see ucol_getShortDefinitionString 0995 * 0996 * @deprecated ICU 54 0997 */ 0998 U_DEPRECATED int32_t U_EXPORT2 0999 ucol_normalizeShortDefinitionString(const char *source, 1000 char *destination, 1001 int32_t capacity, 1002 UParseError *parseError, 1003 UErrorCode *status); 1004 #endif /* U_HIDE_DEPRECATED_API */ 1005 1006 1007 /** 1008 * Get a sort key for a string from a UCollator. 1009 * Sort keys may be compared using <TT>strcmp</TT>. 1010 * 1011 * Note that sort keys are often less efficient than simply doing comparison. 1012 * For more details, see the ICU User Guide. 1013 * 1014 * Like ICU functions that write to an output buffer, the buffer contents 1015 * is undefined if the buffer capacity (resultLength parameter) is too small. 1016 * Unlike ICU functions that write a string to an output buffer, 1017 * the terminating zero byte is counted in the sort key length. 1018 * @param coll The UCollator containing the collation rules. 1019 * @param source The string to transform. 1020 * @param sourceLength The length of source, or -1 if null-terminated. 1021 * @param result A pointer to a buffer to receive the attribute. 1022 * @param resultLength The maximum size of result. 1023 * @return The size needed to fully store the sort key. 1024 * If there was an internal error generating the sort key, 1025 * a zero value is returned. 1026 * @see ucol_keyHashCode 1027 * @stable ICU 2.0 1028 */ 1029 U_CAPI int32_t U_EXPORT2 1030 ucol_getSortKey(const UCollator *coll, 1031 const UChar *source, 1032 int32_t sourceLength, 1033 uint8_t *result, 1034 int32_t resultLength); 1035 1036 1037 /** Gets the next count bytes of a sort key. Caller needs 1038 * to preserve state array between calls and to provide 1039 * the same type of UCharIterator set with the same string. 1040 * The destination buffer provided must be big enough to store 1041 * the number of requested bytes. 1042 * 1043 * The generated sort key may or may not be compatible with 1044 * sort keys generated using ucol_getSortKey(). 1045 * @param coll The UCollator containing the collation rules. 1046 * @param iter UCharIterator containing the string we need 1047 * the sort key to be calculated for. 1048 * @param state Opaque state of sortkey iteration. 1049 * @param dest Buffer to hold the resulting sortkey part 1050 * @param count number of sort key bytes required. 1051 * @param status error code indicator. 1052 * @return the actual number of bytes of a sortkey. It can be 1053 * smaller than count if we have reached the end of 1054 * the sort key. 1055 * @stable ICU 2.6 1056 */ 1057 U_CAPI int32_t U_EXPORT2 1058 ucol_nextSortKeyPart(const UCollator *coll, 1059 UCharIterator *iter, 1060 uint32_t state[2], 1061 uint8_t *dest, int32_t count, 1062 UErrorCode *status); 1063 1064 /** enum that is taken by ucol_getBound API 1065 * See below for explanation 1066 * do not change the values assigned to the 1067 * members of this enum. Underlying code 1068 * depends on them having these numbers 1069 * @stable ICU 2.0 1070 */ 1071 typedef enum { 1072 /** lower bound */ 1073 UCOL_BOUND_LOWER = 0, 1074 /** upper bound that will match strings of exact size */ 1075 UCOL_BOUND_UPPER = 1, 1076 /** upper bound that will match all the strings that have the same initial substring as the given string */ 1077 UCOL_BOUND_UPPER_LONG = 2, 1078 #ifndef U_HIDE_DEPRECATED_API 1079 /** 1080 * One more than the highest normal UColBoundMode value. 1081 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 1082 */ 1083 UCOL_BOUND_VALUE_COUNT 1084 #endif /* U_HIDE_DEPRECATED_API */ 1085 } UColBoundMode; 1086 1087 /** 1088 * Produce a bound for a given sortkey and a number of levels. 1089 * Return value is always the number of bytes needed, regardless of 1090 * whether the result buffer was big enough or even valid.<br> 1091 * Resulting bounds can be used to produce a range of strings that are 1092 * between upper and lower bounds. For example, if bounds are produced 1093 * for a sortkey of string "smith", strings between upper and lower 1094 * bounds with one level would include "Smith", "SMITH", "sMiTh".<br> 1095 * There are two upper bounds that can be produced. If UCOL_BOUND_UPPER 1096 * is produced, strings matched would be as above. However, if bound 1097 * produced using UCOL_BOUND_UPPER_LONG is used, the above example will 1098 * also match "Smithsonian" and similar.<br> 1099 * For more on usage, see example in cintltst/capitst.c in procedure 1100 * TestBounds. 1101 * Sort keys may be compared using <TT>strcmp</TT>. 1102 * @param source The source sortkey. 1103 * @param sourceLength The length of source, or -1 if null-terminated. 1104 * (If an unmodified sortkey is passed, it is always null 1105 * terminated). 1106 * @param boundType Type of bound required. It can be UCOL_BOUND_LOWER, which 1107 * produces a lower inclusive bound, UCOL_BOUND_UPPER, that 1108 * produces upper bound that matches strings of the same length 1109 * or UCOL_BOUND_UPPER_LONG that matches strings that have the 1110 * same starting substring as the source string. 1111 * @param noOfLevels Number of levels required in the resulting bound (for most 1112 * uses, the recommended value is 1). See users guide for 1113 * explanation on number of levels a sortkey can have. 1114 * @param result A pointer to a buffer to receive the resulting sortkey. 1115 * @param resultLength The maximum size of result. 1116 * @param status Used for returning error code if something went wrong. If the 1117 * number of levels requested is higher than the number of levels 1118 * in the source key, a warning (U_SORT_KEY_TOO_SHORT_WARNING) is 1119 * issued. 1120 * @return The size needed to fully store the bound. 1121 * @see ucol_keyHashCode 1122 * @stable ICU 2.1 1123 */ 1124 U_CAPI int32_t U_EXPORT2 1125 ucol_getBound(const uint8_t *source, 1126 int32_t sourceLength, 1127 UColBoundMode boundType, 1128 uint32_t noOfLevels, 1129 uint8_t *result, 1130 int32_t resultLength, 1131 UErrorCode *status); 1132 1133 /** 1134 * Gets the version information for a Collator. Version is currently 1135 * an opaque 32-bit number which depends, among other things, on major 1136 * versions of the collator tailoring and UCA. 1137 * @param coll The UCollator to query. 1138 * @param info the version # information, the result will be filled in 1139 * @stable ICU 2.0 1140 */ 1141 U_CAPI void U_EXPORT2 1142 ucol_getVersion(const UCollator* coll, UVersionInfo info); 1143 1144 /** 1145 * Gets the UCA version information for a Collator. Version is the 1146 * UCA version number (3.1.1, 4.0). 1147 * @param coll The UCollator to query. 1148 * @param info the version # information, the result will be filled in 1149 * @stable ICU 2.8 1150 */ 1151 U_CAPI void U_EXPORT2 1152 ucol_getUCAVersion(const UCollator* coll, UVersionInfo info); 1153 1154 /** 1155 * Merges two sort keys. The levels are merged with their corresponding counterparts 1156 * (primaries with primaries, secondaries with secondaries etc.). Between the values 1157 * from the same level a separator is inserted. 1158 * 1159 * This is useful, for example, for combining sort keys from first and last names 1160 * to sort such pairs. 1161 * See http://www.unicode.org/reports/tr10/#Merging_Sort_Keys 1162 * 1163 * The recommended way to achieve "merged" sorting is by 1164 * concatenating strings with U+FFFE between them. 1165 * The concatenation has the same sort order as the merged sort keys, 1166 * but merge(getSortKey(str1), getSortKey(str2)) may differ from getSortKey(str1 + '\\uFFFE' + str2). 1167 * Using strings with U+FFFE may yield shorter sort keys. 1168 * 1169 * For details about Sort Key Features see 1170 * https://unicode-org.github.io/icu/userguide/collation/api#sort-key-features 1171 * 1172 * It is possible to merge multiple sort keys by consecutively merging 1173 * another one with the intermediate result. 1174 * 1175 * The length of the merge result is the sum of the lengths of the input sort keys. 1176 * 1177 * Example (uncompressed): 1178 * <pre>191B1D 01 050505 01 910505 00 1179 * 1F2123 01 050505 01 910505 00</pre> 1180 * will be merged as 1181 * <pre>191B1D 02 1F2123 01 050505 02 050505 01 910505 02 910505 00</pre> 1182 * 1183 * If the destination buffer is not big enough, then its contents are undefined. 1184 * If any of source lengths are zero or any of the source pointers are NULL/undefined, 1185 * the result is of size zero. 1186 * 1187 * @param src1 the first sort key 1188 * @param src1Length the length of the first sort key, including the zero byte at the end; 1189 * can be -1 if the function is to find the length 1190 * @param src2 the second sort key 1191 * @param src2Length the length of the second sort key, including the zero byte at the end; 1192 * can be -1 if the function is to find the length 1193 * @param dest the buffer where the merged sort key is written, 1194 * can be NULL if destCapacity==0 1195 * @param destCapacity the number of bytes in the dest buffer 1196 * @return the length of the merged sort key, src1Length+src2Length; 1197 * can be larger than destCapacity, or 0 if an error occurs (only for illegal arguments), 1198 * in which cases the contents of dest is undefined 1199 * @stable ICU 2.0 1200 */ 1201 U_CAPI int32_t U_EXPORT2 1202 ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length, 1203 const uint8_t *src2, int32_t src2Length, 1204 uint8_t *dest, int32_t destCapacity); 1205 1206 /** 1207 * Universal attribute setter 1208 * @param coll collator which attributes are to be changed 1209 * @param attr attribute type 1210 * @param value attribute value 1211 * @param status to indicate whether the operation went on smoothly or there were errors 1212 * @see UColAttribute 1213 * @see UColAttributeValue 1214 * @see ucol_getAttribute 1215 * @stable ICU 2.0 1216 */ 1217 U_CAPI void U_EXPORT2 1218 ucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status); 1219 1220 /** 1221 * Universal attribute getter 1222 * @param coll collator which attributes are to be changed 1223 * @param attr attribute type 1224 * @return attribute value 1225 * @param status to indicate whether the operation went on smoothly or there were errors 1226 * @see UColAttribute 1227 * @see UColAttributeValue 1228 * @see ucol_setAttribute 1229 * @stable ICU 2.0 1230 */ 1231 U_CAPI UColAttributeValue U_EXPORT2 1232 ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status); 1233 1234 /** 1235 * Sets the variable top to the top of the specified reordering group. 1236 * The variable top determines the highest-sorting character 1237 * which is affected by UCOL_ALTERNATE_HANDLING. 1238 * If that attribute is set to UCOL_NON_IGNORABLE, then the variable top has no effect. 1239 * @param coll the collator 1240 * @param group one of UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION, 1241 * UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_CURRENCY; 1242 * or UCOL_REORDER_CODE_DEFAULT to restore the default max variable group 1243 * @param pErrorCode Standard ICU error code. Its input value must 1244 * pass the U_SUCCESS() test, or else the function returns 1245 * immediately. Check for U_FAILURE() on output or use with 1246 * function chaining. (See User Guide for details.) 1247 * @see ucol_getMaxVariable 1248 * @stable ICU 53 1249 */ 1250 U_CAPI void U_EXPORT2 1251 ucol_setMaxVariable(UCollator *coll, UColReorderCode group, UErrorCode *pErrorCode); 1252 1253 /** 1254 * Returns the maximum reordering group whose characters are affected by UCOL_ALTERNATE_HANDLING. 1255 * @param coll the collator 1256 * @return the maximum variable reordering group. 1257 * @see ucol_setMaxVariable 1258 * @stable ICU 53 1259 */ 1260 U_CAPI UColReorderCode U_EXPORT2 1261 ucol_getMaxVariable(const UCollator *coll); 1262 1263 #ifndef U_HIDE_DEPRECATED_API 1264 /** 1265 * Sets the variable top to the primary weight of the specified string. 1266 * 1267 * Beginning with ICU 53, the variable top is pinned to 1268 * the top of one of the supported reordering groups, 1269 * and it must not be beyond the last of those groups. 1270 * See ucol_setMaxVariable(). 1271 * @param coll the collator 1272 * @param varTop one or more (if contraction) UChars to which the variable top should be set 1273 * @param len length of variable top string. If -1 it is considered to be zero terminated. 1274 * @param status error code. If error code is set, the return value is undefined. 1275 * Errors set by this function are:<br> 1276 * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction<br> 1277 * U_ILLEGAL_ARGUMENT_ERROR if the variable top is beyond 1278 * the last reordering group supported by ucol_setMaxVariable() 1279 * @return variable top primary weight 1280 * @see ucol_getVariableTop 1281 * @see ucol_restoreVariableTop 1282 * @deprecated ICU 53 Call ucol_setMaxVariable() instead. 1283 */ 1284 U_DEPRECATED uint32_t U_EXPORT2 1285 ucol_setVariableTop(UCollator *coll, 1286 const UChar *varTop, int32_t len, 1287 UErrorCode *status); 1288 #endif /* U_HIDE_DEPRECATED_API */ 1289 1290 /** 1291 * Gets the variable top value of a Collator. 1292 * @param coll collator which variable top needs to be retrieved 1293 * @param status error code (not changed by function). If error code is set, 1294 * the return value is undefined. 1295 * @return the variable top primary weight 1296 * @see ucol_getMaxVariable 1297 * @see ucol_setVariableTop 1298 * @see ucol_restoreVariableTop 1299 * @stable ICU 2.0 1300 */ 1301 U_CAPI uint32_t U_EXPORT2 ucol_getVariableTop(const UCollator *coll, UErrorCode *status); 1302 1303 #ifndef U_HIDE_DEPRECATED_API 1304 /** 1305 * Sets the variable top to the specified primary weight. 1306 * 1307 * Beginning with ICU 53, the variable top is pinned to 1308 * the top of one of the supported reordering groups, 1309 * and it must not be beyond the last of those groups. 1310 * See ucol_setMaxVariable(). 1311 * @param coll collator to be set 1312 * @param varTop primary weight, as returned by ucol_setVariableTop or ucol_getVariableTop 1313 * @param status error code 1314 * @see ucol_getVariableTop 1315 * @see ucol_setVariableTop 1316 * @deprecated ICU 53 Call ucol_setMaxVariable() instead. 1317 */ 1318 U_DEPRECATED void U_EXPORT2 1319 ucol_restoreVariableTop(UCollator *coll, const uint32_t varTop, UErrorCode *status); 1320 #endif /* U_HIDE_DEPRECATED_API */ 1321 1322 /** 1323 * Thread safe cloning operation. The result is a clone of a given collator. 1324 * @param coll collator to be cloned 1325 * @param status to indicate whether the operation went on smoothly or there were errors 1326 * @return pointer to the new clone 1327 * @see ucol_open 1328 * @see ucol_openRules 1329 * @see ucol_close 1330 * @stable ICU 71 1331 */ 1332 U_CAPI UCollator* U_EXPORT2 ucol_clone(const UCollator *coll, UErrorCode *status); 1333 1334 #ifndef U_HIDE_DEPRECATED_API 1335 1336 /** 1337 * Thread safe cloning operation. The result is a clone of a given collator. 1338 * @param coll collator to be cloned 1339 * @param stackBuffer <em>Deprecated functionality as of ICU 52, use NULL.</em><br> 1340 * user allocated space for the new clone. 1341 * If NULL new memory will be allocated. 1342 * If buffer is not large enough, new memory will be allocated. 1343 * Clients can use the U_COL_SAFECLONE_BUFFERSIZE. 1344 * @param pBufferSize <em>Deprecated functionality as of ICU 52, use NULL or 1.</em><br> 1345 * pointer to size of allocated space. 1346 * If *pBufferSize == 0, a sufficient size for use in cloning will 1347 * be returned ('pre-flighting') 1348 * If *pBufferSize is not enough for a stack-based safe clone, 1349 * new memory will be allocated. 1350 * @param status to indicate whether the operation went on smoothly or there were errors 1351 * An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used 1352 * if pBufferSize != NULL and any allocations were necessary 1353 * @return pointer to the new clone 1354 * @see ucol_open 1355 * @see ucol_openRules 1356 * @see ucol_close 1357 * @deprecated ICU 71 Use ucol_clone() instead. 1358 */ 1359 U_DEPRECATED UCollator* U_EXPORT2 1360 ucol_safeClone(const UCollator *coll, 1361 void *stackBuffer, 1362 int32_t *pBufferSize, 1363 UErrorCode *status); 1364 1365 1366 /** default memory size for the new clone. 1367 * @deprecated ICU 52. Do not rely on ucol_safeClone() cloning into any provided buffer. 1368 */ 1369 #define U_COL_SAFECLONE_BUFFERSIZE 1 1370 1371 #endif /* U_HIDE_DEPRECATED_API */ 1372 1373 /** 1374 * Returns current rules. Delta defines whether full rules are returned or just the tailoring. 1375 * Returns number of UChars needed to store rules. If buffer is NULL or bufferLen is not enough 1376 * to store rules, will store up to available space. 1377 * 1378 * ucol_getRules() should normally be used instead. 1379 * See https://unicode-org.github.io/icu/userguide/collation/customization#building-on-existing-locales 1380 * @param coll collator to get the rules from 1381 * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES. 1382 * @param buffer buffer to store the result in. If NULL, you'll get no rules. 1383 * @param bufferLen length of buffer to store rules in. If less than needed you'll get only the part that fits in. 1384 * @return current rules 1385 * @stable ICU 2.0 1386 * @see UCOL_FULL_RULES 1387 */ 1388 U_CAPI int32_t U_EXPORT2 1389 ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen); 1390 1391 #ifndef U_HIDE_DEPRECATED_API 1392 /** 1393 * gets the locale name of the collator. If the collator 1394 * is instantiated from the rules, then this function returns 1395 * NULL. 1396 * @param coll The UCollator for which the locale is needed 1397 * @param type You can choose between requested, valid and actual 1398 * locale. For description see the definition of 1399 * ULocDataLocaleType in uloc.h 1400 * @param status error code of the operation 1401 * @return real locale name from which the collation data comes. 1402 * If the collator was instantiated from rules, returns 1403 * NULL. 1404 * @deprecated ICU 2.8 Use ucol_getLocaleByType instead 1405 */ 1406 U_DEPRECATED const char * U_EXPORT2 1407 ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status); 1408 #endif /* U_HIDE_DEPRECATED_API */ 1409 1410 /** 1411 * gets the locale name of the collator. If the collator 1412 * is instantiated from the rules, then this function returns 1413 * NULL. 1414 * @param coll The UCollator for which the locale is needed 1415 * @param type You can choose between requested, valid and actual 1416 * locale. For description see the definition of 1417 * ULocDataLocaleType in uloc.h 1418 * @param status error code of the operation 1419 * @return real locale name from which the collation data comes. 1420 * If the collator was instantiated from rules, returns 1421 * NULL. 1422 * @stable ICU 2.8 1423 */ 1424 U_CAPI const char * U_EXPORT2 1425 ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status); 1426 1427 /** 1428 * Get a Unicode set that contains all the characters and sequences tailored in 1429 * this collator. The result must be disposed of by using uset_close. 1430 * @param coll The UCollator for which we want to get tailored chars 1431 * @param status error code of the operation 1432 * @return a pointer to newly created USet. Must be be disposed by using uset_close 1433 * @see ucol_openRules 1434 * @see uset_close 1435 * @stable ICU 2.4 1436 */ 1437 U_CAPI USet * U_EXPORT2 1438 ucol_getTailoredSet(const UCollator *coll, UErrorCode *status); 1439 1440 #ifndef U_HIDE_INTERNAL_API 1441 /** Calculates the set of unsafe code points, given a collator. 1442 * A character is unsafe if you could append any character and cause the ordering to alter significantly. 1443 * Collation sorts in normalized order, so anything that rearranges in normalization can cause this. 1444 * Thus if you have a character like a_umlaut, and you add a lower_dot to it, 1445 * then it normalizes to a_lower_dot + umlaut, and sorts differently. 1446 * @param coll Collator 1447 * @param unsafe a fill-in set to receive the unsafe points 1448 * @param status for catching errors 1449 * @return number of elements in the set 1450 * @internal ICU 3.0 1451 */ 1452 U_CAPI int32_t U_EXPORT2 1453 ucol_getUnsafeSet( const UCollator *coll, 1454 USet *unsafe, 1455 UErrorCode *status); 1456 1457 /** Touches all resources needed for instantiating a collator from a short string definition, 1458 * thus filling up the cache. 1459 * @param definition A short string containing a locale and a set of attributes. 1460 * Attributes not explicitly mentioned are left at the default 1461 * state for a locale. 1462 * @param parseError if not NULL, structure that will get filled with error's pre 1463 * and post context in case of error. 1464 * @param forceDefaults if false, the settings that are the same as the collator 1465 * default settings will not be applied (for example, setting 1466 * French secondary on a French collator would not be executed). 1467 * If true, all the settings will be applied regardless of the 1468 * collator default value. If the definition 1469 * strings are to be cached, should be set to false. 1470 * @param status Error code. Apart from regular error conditions connected to 1471 * instantiating collators (like out of memory or similar), this 1472 * API will return an error if an invalid attribute or attribute/value 1473 * combination is specified. 1474 * @see ucol_openFromShortString 1475 * @internal ICU 3.2.1 1476 */ 1477 U_CAPI void U_EXPORT2 1478 ucol_prepareShortStringOpen( const char *definition, 1479 UBool forceDefaults, 1480 UParseError *parseError, 1481 UErrorCode *status); 1482 #endif /* U_HIDE_INTERNAL_API */ 1483 1484 /** Creates a binary image of a collator. This binary image can be stored and 1485 * later used to instantiate a collator using ucol_openBinary. 1486 * This API supports preflighting. 1487 * @param coll Collator 1488 * @param buffer a fill-in buffer to receive the binary image 1489 * @param capacity capacity of the destination buffer 1490 * @param status for catching errors 1491 * @return size of the image 1492 * @see ucol_openBinary 1493 * @stable ICU 3.2 1494 */ 1495 U_CAPI int32_t U_EXPORT2 1496 ucol_cloneBinary(const UCollator *coll, 1497 uint8_t *buffer, int32_t capacity, 1498 UErrorCode *status); 1499 1500 /** Opens a collator from a collator binary image created using 1501 * ucol_cloneBinary. Binary image used in instantiation of the 1502 * collator remains owned by the user and should stay around for 1503 * the lifetime of the collator. The API also takes a base collator 1504 * which must be the root collator. 1505 * @param bin binary image owned by the user and required through the 1506 * lifetime of the collator 1507 * @param length size of the image. If negative, the API will try to 1508 * figure out the length of the image 1509 * @param base Base collator, for lookup of untailored characters. 1510 * Must be the root collator, must not be NULL. 1511 * The base is required to be present through the lifetime of the collator. 1512 * @param status for catching errors 1513 * @return newly created collator 1514 * @see ucol_cloneBinary 1515 * @stable ICU 3.2 1516 */ 1517 U_CAPI UCollator* U_EXPORT2 1518 ucol_openBinary(const uint8_t *bin, int32_t length, 1519 const UCollator *base, 1520 UErrorCode *status); 1521 1522 1523 #endif /* #if !UCONFIG_NO_COLLATION */ 1524 1525 #endif
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |