Back to home page

EIC code displayed by LXR

 
 

    


Warning, file /include/unicode/ucol.h was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).

0001 // © 2016 and later: Unicode, Inc. and others.
0002 // License & terms of use: http://www.unicode.org/copyright.html
0003 /*
0004 *******************************************************************************
0005 * Copyright (c) 1996-2015, International Business Machines Corporation and others.
0006 * All Rights Reserved.
0007 *******************************************************************************
0008 */
0009 
0010 #ifndef UCOL_H
0011 #define UCOL_H
0012 
0013 #include "unicode/utypes.h"
0014 
0015 #if !UCONFIG_NO_COLLATION
0016 
0017 #include "unicode/unorm.h"
0018 #include "unicode/parseerr.h"
0019 #include "unicode/uloc.h"
0020 #include "unicode/uset.h"
0021 #include "unicode/uscript.h"
0022 
0023 #if U_SHOW_CPLUSPLUS_API
0024 #include "unicode/localpointer.h"
0025 #endif   // U_SHOW_CPLUSPLUS_API
0026 
0027 /**
0028  * \file
0029  * \brief C API: Collator 
0030  *
0031  * <h2> Collator C API </h2>
0032  *
0033  * The C API for Collator performs locale-sensitive
0034  * string comparison. You use this service to build
0035  * searching and sorting routines for natural language text.
0036  * <p>
0037  * For more information about the collation service see 
0038  * <a href="https://unicode-org.github.io/icu/userguide/collation">the User Guide</a>.
0039  * <p>
0040  * Collation service provides correct sorting orders for most locales supported in ICU. 
0041  * If specific data for a locale is not available, the orders eventually falls back
0042  * to the <a href="http://www.unicode.org/reports/tr35/tr35-collation.html#Root_Collation">CLDR root sort order</a>. 
0043  * <p>
0044  * Sort ordering may be customized by providing your own set of rules. For more on
0045  * this subject see the <a href="https://unicode-org.github.io/icu/userguide/collation/customization">
0046  * Collation Customization</a> section of the User Guide.
0047  * <p>
0048  * @see         UCollationResult
0049  * @see         UNormalizationMode
0050  * @see         UCollationStrength
0051  * @see         UCollationElements
0052  */
0053 
0054 /** A collator.
0055 *  For usage in C programs.
0056 */
0057 struct UCollator;
0058 /** structure representing a collator object instance 
0059  * @stable ICU 2.0
0060  */
0061 typedef struct UCollator UCollator;
0062 
0063 
0064 /**
0065  * UCOL_LESS is returned if source string is compared to be less than target
0066  * string in the ucol_strcoll() method.
0067  * UCOL_EQUAL is returned if source string is compared to be equal to target
0068  * string in the ucol_strcoll() method.
0069  * UCOL_GREATER is returned if source string is compared to be greater than
0070  * target string in the ucol_strcoll() method.
0071  * @see ucol_strcoll()
0072  * <p>
0073  * Possible values for a comparison result 
0074  * @stable ICU 2.0
0075  */
0076 typedef enum {
0077   /** string a == string b */
0078   UCOL_EQUAL    = 0,
0079   /** string a > string b */
0080   UCOL_GREATER    = 1,
0081   /** string a < string b */
0082   UCOL_LESS    = -1
0083 } UCollationResult ;
0084 
0085 
0086 /** Enum containing attribute values for controlling collation behavior.
0087  * Here are all the allowable values. Not every attribute can take every value. The only
0088  * universal value is UCOL_DEFAULT, which resets the attribute value to the predefined  
0089  * value for that locale 
0090  * @stable ICU 2.0
0091  */
0092 typedef enum {
0093   /** accepted by most attributes */
0094   UCOL_DEFAULT = -1,
0095 
0096   /** Primary collation strength */
0097   UCOL_PRIMARY = 0,
0098   /** Secondary collation strength */
0099   UCOL_SECONDARY = 1,
0100   /** Tertiary collation strength */
0101   UCOL_TERTIARY = 2,
0102   /** Default collation strength */
0103   UCOL_DEFAULT_STRENGTH = UCOL_TERTIARY,
0104   UCOL_CE_STRENGTH_LIMIT,
0105   /** Quaternary collation strength */
0106   UCOL_QUATERNARY=3,
0107   /** Identical collation strength */
0108   UCOL_IDENTICAL=15,
0109   UCOL_STRENGTH_LIMIT,
0110 
0111   /** Turn the feature off - works for UCOL_FRENCH_COLLATION, 
0112       UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE
0113       & UCOL_DECOMPOSITION_MODE*/
0114   UCOL_OFF = 16,
0115   /** Turn the feature on - works for UCOL_FRENCH_COLLATION, 
0116       UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE
0117       & UCOL_DECOMPOSITION_MODE*/
0118   UCOL_ON = 17,
0119   
0120   /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be shifted */
0121   UCOL_SHIFTED = 20,
0122   /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be non ignorable */
0123   UCOL_NON_IGNORABLE = 21,
0124 
0125   /** Valid for UCOL_CASE_FIRST - 
0126       lower case sorts before upper case */
0127   UCOL_LOWER_FIRST = 24,
0128   /** upper case sorts before lower case */
0129   UCOL_UPPER_FIRST = 25,
0130 
0131 #ifndef U_HIDE_DEPRECATED_API
0132     /**
0133      * One more than the highest normal UColAttributeValue value.
0134      * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
0135      */
0136   UCOL_ATTRIBUTE_VALUE_COUNT
0137 #endif  /* U_HIDE_DEPRECATED_API */
0138 } UColAttributeValue;
0139 
0140 /**
0141  * Enum containing the codes for reordering segments of the collation table that are not script
0142  * codes. These reordering codes are to be used in conjunction with the script codes.
0143  * @see ucol_getReorderCodes
0144  * @see ucol_setReorderCodes
0145  * @see ucol_getEquivalentReorderCodes
0146  * @see UScriptCode
0147  * @stable ICU 4.8
0148  */
0149  typedef enum {
0150    /**
0151     * A special reordering code that is used to specify the default
0152     * reordering codes for a locale.
0153     * @stable ICU 4.8
0154     */   
0155     UCOL_REORDER_CODE_DEFAULT       = -1,
0156    /**
0157     * A special reordering code that is used to specify no reordering codes.
0158     * @stable ICU 4.8
0159     */   
0160     UCOL_REORDER_CODE_NONE          = USCRIPT_UNKNOWN,
0161    /**
0162     * A special reordering code that is used to specify all other codes used for
0163     * reordering except for the codes lised as UColReorderCode values and those
0164     * listed explicitly in a reordering.
0165     * @stable ICU 4.8
0166     */   
0167     UCOL_REORDER_CODE_OTHERS        = USCRIPT_UNKNOWN,
0168    /**
0169     * Characters with the space property.
0170     * This is equivalent to the rule value "space".
0171     * @stable ICU 4.8
0172     */    
0173     UCOL_REORDER_CODE_SPACE         = 0x1000,
0174    /**
0175     * The first entry in the enumeration of reordering groups. This is intended for use in
0176     * range checking and enumeration of the reorder codes.
0177     * @stable ICU 4.8
0178     */    
0179     UCOL_REORDER_CODE_FIRST         = UCOL_REORDER_CODE_SPACE,
0180    /**
0181     * Characters with the punctuation property.
0182     * This is equivalent to the rule value "punct".
0183     * @stable ICU 4.8
0184     */    
0185     UCOL_REORDER_CODE_PUNCTUATION   = 0x1001,
0186    /**
0187     * Characters with the symbol property.
0188     * This is equivalent to the rule value "symbol".
0189     * @stable ICU 4.8
0190     */    
0191     UCOL_REORDER_CODE_SYMBOL        = 0x1002,
0192    /**
0193     * Characters with the currency property.
0194     * This is equivalent to the rule value "currency".
0195     * @stable ICU 4.8
0196     */    
0197     UCOL_REORDER_CODE_CURRENCY      = 0x1003,
0198    /**
0199     * Characters with the digit property.
0200     * This is equivalent to the rule value "digit".
0201     * @stable ICU 4.8
0202     */    
0203     UCOL_REORDER_CODE_DIGIT         = 0x1004,
0204 #ifndef U_HIDE_DEPRECATED_API
0205     /**
0206      * One more than the highest normal UColReorderCode value.
0207      * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
0208      */
0209     UCOL_REORDER_CODE_LIMIT         = 0x1005
0210 #endif  /* U_HIDE_DEPRECATED_API */
0211 } UColReorderCode;
0212 
0213 /**
0214  * Base letter represents a primary difference.  Set comparison
0215  * level to UCOL_PRIMARY to ignore secondary and tertiary differences.
0216  * Use this to set the strength of a Collator object.
0217  * Example of primary difference, "abc" &lt; "abd"
0218  * 
0219  * Diacritical differences on the same base letter represent a secondary
0220  * difference.  Set comparison level to UCOL_SECONDARY to ignore tertiary
0221  * differences. Use this to set the strength of a Collator object.
0222  * Example of secondary difference, "&auml;" >> "a".
0223  *
0224  * Uppercase and lowercase versions of the same character represents a
0225  * tertiary difference.  Set comparison level to UCOL_TERTIARY to include
0226  * all comparison differences. Use this to set the strength of a Collator
0227  * object.
0228  * Example of tertiary difference, "abc" &lt;&lt;&lt; "ABC".
0229  *
0230  * Two characters are considered "identical" when they have the same
0231  * unicode spellings.  UCOL_IDENTICAL.
0232  * For example, "&auml;" == "&auml;".
0233  *
0234  * UCollationStrength is also used to determine the strength of sort keys 
0235  * generated from UCollator objects
0236  * These values can be now found in the UColAttributeValue enum.
0237  * @stable ICU 2.0
0238  **/
0239 typedef UColAttributeValue UCollationStrength;
0240 
0241 /** Attributes that collation service understands. All the attributes can take UCOL_DEFAULT
0242  * value, as well as the values specific to each one. 
0243  * @stable ICU 2.0
0244  */
0245 typedef enum {
0246      /** Attribute for direction of secondary weights - used in Canadian French.
0247       * Acceptable values are UCOL_ON, which results in secondary weights
0248       * being considered backwards and UCOL_OFF which treats secondary
0249       * weights in the order they appear.
0250       * @stable ICU 2.0
0251       */
0252      UCOL_FRENCH_COLLATION, 
0253      /** Attribute for handling variable elements.
0254       * Acceptable values are UCOL_NON_IGNORABLE
0255       * which treats all the codepoints with non-ignorable
0256       * primary weights in the same way,
0257       * and UCOL_SHIFTED which causes codepoints with primary
0258       * weights that are equal or below the variable top value
0259       * to be ignored on primary level and moved to the quaternary
0260       * level. The default setting in a Collator object depends on the
0261       * locale data loaded from the resources. For most locales, the
0262       * default is UCOL_NON_IGNORABLE, but for others, such as "th",
0263       * the default could be UCOL_SHIFTED.
0264       * @stable ICU 2.0
0265       */
0266      UCOL_ALTERNATE_HANDLING,
0267      /** Controls the ordering of upper and lower case letters.
0268       * Acceptable values are UCOL_OFF, which orders
0269       * upper and lower case letters in accordance to their tertiary
0270       * weights, UCOL_UPPER_FIRST which forces upper case letters to
0271       * sort before lower case letters, and UCOL_LOWER_FIRST which does
0272       * the opposite. The default setting in a Collator object depends on the
0273       * locale data loaded from the resources. For most locales, the
0274       * default is UCOL_OFF, but for others, such as "da" or "mt",
0275       * the default could be UCOL_UPPER.
0276       * @stable ICU 2.0
0277       */
0278      UCOL_CASE_FIRST,
0279      /** Controls whether an extra case level (positioned before the third
0280       * level) is generated or not. Acceptable values are UCOL_OFF,
0281       * when case level is not generated, and UCOL_ON which causes the case
0282       * level to be generated. Contents of the case level are affected by
0283       * the value of UCOL_CASE_FIRST attribute. A simple way to ignore
0284       * accent differences in a string is to set the strength to UCOL_PRIMARY
0285       * and enable case level. The default setting in a Collator object depends
0286       * on the locale data loaded from the resources.
0287       * @stable ICU 2.0
0288       */
0289      UCOL_CASE_LEVEL,
0290      /** Controls whether the normalization check and necessary normalizations
0291       * are performed. When set to UCOL_OFF no normalization check
0292       * is performed. The correctness of the result is guaranteed only if the
0293       * input data is in so-called FCD form (see users manual for more info).
0294       * When set to UCOL_ON, an incremental check is performed to see whether
0295       * the input data is in the FCD form. If the data is not in the FCD form,
0296       * incremental NFD normalization is performed. The default setting in a
0297       * Collator object depends on the locale data loaded from the resources.
0298       * For many locales, the default is UCOL_OFF, but for others, such as "hi"
0299       * "vi', or "bn", * the default could be UCOL_ON.
0300       * @stable ICU 2.0
0301       */
0302      UCOL_NORMALIZATION_MODE, 
0303      /** An alias for UCOL_NORMALIZATION_MODE attribute.
0304       * @stable ICU 2.0
0305       */
0306      UCOL_DECOMPOSITION_MODE = UCOL_NORMALIZATION_MODE,
0307      /** The strength attribute. Can be either UCOL_PRIMARY, UCOL_SECONDARY,
0308       * UCOL_TERTIARY, UCOL_QUATERNARY or UCOL_IDENTICAL. The usual strength
0309       * for most locales (except Japanese) is tertiary.
0310       *
0311       * Quaternary strength 
0312       * is useful when combined with shifted setting for alternate handling
0313       * attribute and for JIS X 4061 collation, when it is used to distinguish
0314       * between Katakana and Hiragana.
0315       * Otherwise, quaternary level
0316       * is affected only by the number of non-ignorable code points in
0317       * the string.
0318       *
0319       * Identical strength is rarely useful, as it amounts 
0320       * to codepoints of the NFD form of the string.
0321       * @stable ICU 2.0
0322       */
0323      UCOL_STRENGTH,  
0324 #ifndef U_HIDE_DEPRECATED_API
0325      /** When turned on, this attribute positions Hiragana before all  
0326       * non-ignorables on quaternary level This is a sneaky way to produce JIS
0327       * sort order.
0328       *
0329       * This attribute was an implementation detail of the CLDR Japanese tailoring.
0330       * Since ICU 50, this attribute is not settable any more via API functions.
0331       * Since CLDR 25/ICU 53, explicit quaternary relations are used
0332       * to achieve the same Japanese sort order.
0333       *
0334       * @deprecated ICU 50 Implementation detail, cannot be set via API, was removed from implementation.
0335       */
0336      UCOL_HIRAGANA_QUATERNARY_MODE = UCOL_STRENGTH + 1,
0337 #endif  /* U_HIDE_DEPRECATED_API */
0338      /**
0339       * When turned on, this attribute makes
0340       * substrings of digits sort according to their numeric values.
0341       *
0342       * This is a way to get '100' to sort AFTER '2'. Note that the longest
0343       * digit substring that can be treated as a single unit is
0344       * 254 digits (not counting leading zeros). If a digit substring is
0345       * longer than that, the digits beyond the limit will be treated as a
0346       * separate digit substring.
0347       *
0348       * A "digit" in this sense is a code point with General_Category=Nd,
0349       * which does not include circled numbers, roman numerals, etc.
0350       * Only a contiguous digit substring is considered, that is,
0351       * non-negative integers without separators.
0352       * There is no support for plus/minus signs, decimals, exponents, etc.
0353       *
0354       * @stable ICU 2.8
0355       */
0356      UCOL_NUMERIC_COLLATION = UCOL_STRENGTH + 2, 
0357 
0358     /* Do not conditionalize the following with #ifndef U_HIDE_DEPRECATED_API,
0359      * it is needed for layout of RuleBasedCollator object. */
0360 #ifndef U_FORCE_HIDE_DEPRECATED_API
0361     /**
0362      * One more than the highest normal UColAttribute value.
0363      * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
0364      */
0365      UCOL_ATTRIBUTE_COUNT
0366 #endif  // U_FORCE_HIDE_DEPRECATED_API
0367 } UColAttribute;
0368 
0369 /** Options for retrieving the rule string 
0370  *  @stable ICU 2.0
0371  */
0372 typedef enum {
0373   /**
0374    * Retrieves the tailoring rules only.
0375    * Same as calling the version of getRules() without UColRuleOption.
0376    * @stable ICU 2.0
0377    */
0378   UCOL_TAILORING_ONLY, 
0379   /**
0380    * Retrieves the "UCA rules" concatenated with the tailoring rules.
0381    * The "UCA rules" are an <i>approximation</i> of the root collator's sort order.
0382    * They are almost never used or useful at runtime and can be removed from the data.
0383    * See https://unicode-org.github.io/icu/userguide/collation/customization#building-on-existing-locales
0384    * @stable ICU 2.0
0385    */
0386   UCOL_FULL_RULES 
0387 } UColRuleOption ;
0388 
0389 /**
0390  * Open a UCollator for comparing strings.
0391  *
0392  * For some languages, multiple collation types are available;
0393  * for example, "de@collation=phonebook".
0394  * Starting with ICU 54, collation attributes can be specified via locale keywords as well,
0395  * in the old locale extension syntax ("el@colCaseFirst=upper")
0396  * or in language tag syntax ("el-u-kf-upper").
0397  * See <a href="https://unicode-org.github.io/icu/userguide/collation/api">User Guide: Collation API</a>.
0398  *
0399  * The UCollator pointer is used in all the calls to the Collation 
0400  * service. After finished, collator must be disposed of by calling
0401  * {@link #ucol_close }.
0402  * @param loc The locale containing the required collation rules. 
0403  *            Special values for locales can be passed in - 
0404  *            if NULL is passed for the locale, the default locale
0405  *            collation rules will be used. If empty string ("") or
0406  *            "root" are passed, the root collator will be returned.
0407  * @param status A pointer to a UErrorCode to receive any errors
0408  * @return A pointer to a UCollator, or 0 if an error occurred.
0409  * @see ucol_openRules
0410  * @see ucol_clone
0411  * @see ucol_close
0412  * @stable ICU 2.0
0413  */
0414 U_CAPI UCollator* U_EXPORT2 
0415 ucol_open(const char *loc, UErrorCode *status);
0416 
0417 /**
0418  * Produce a UCollator instance according to the rules supplied.
0419  * The rules are used to change the default ordering, defined in the
0420  * UCA in a process called tailoring. The resulting UCollator pointer
0421  * can be used in the same way as the one obtained by {@link #ucol_strcoll }.
0422  * @param rules A string describing the collation rules. For the syntax
0423  *              of the rules please see users guide.
0424  * @param rulesLength The length of rules, or -1 if null-terminated.
0425  * @param normalizationMode The normalization mode: One of
0426  *             UCOL_OFF     (expect the text to not need normalization),
0427  *             UCOL_ON      (normalize), or
0428  *             UCOL_DEFAULT (set the mode according to the rules)
0429  * @param strength The default collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY,
0430  * UCOL_TERTIARY, UCOL_IDENTICAL,UCOL_DEFAULT_STRENGTH - can be also set in the rules.
0431  * @param parseError  A pointer to UParseError to receive information about errors
0432  *                    occurred during parsing. This argument can currently be set
0433  *                    to NULL, but at users own risk. Please provide a real structure.
0434  * @param status A pointer to a UErrorCode to receive any errors
0435  * @return A pointer to a UCollator. It is not guaranteed that NULL be returned in case
0436  *         of error - please use status argument to check for errors.
0437  * @see ucol_open
0438  * @see ucol_clone
0439  * @see ucol_close
0440  * @stable ICU 2.0
0441  */
0442 U_CAPI UCollator* U_EXPORT2 
0443 ucol_openRules( const UChar        *rules,
0444                 int32_t            rulesLength,
0445                 UColAttributeValue normalizationMode,
0446                 UCollationStrength strength,
0447                 UParseError        *parseError,
0448                 UErrorCode         *status);
0449 
0450 #ifndef U_HIDE_DEPRECATED_API
0451 /** 
0452  * Open a collator defined by a short form string.
0453  * The structure and the syntax of the string is defined in the "Naming collators"
0454  * section of the users guide: 
0455  * https://unicode-org.github.io/icu/userguide/collation/concepts#collator-naming-scheme
0456  * Attributes are overridden by the subsequent attributes. So, for "S2_S3", final
0457  * strength will be 3. 3066bis locale overrides individual locale parts.
0458  * The call to this function is equivalent to a call to ucol_open, followed by a 
0459  * series of calls to ucol_setAttribute and ucol_setVariableTop.
0460  * @param definition A short string containing a locale and a set of attributes. 
0461  *                   Attributes not explicitly mentioned are left at the default
0462  *                   state for a locale.
0463  * @param parseError if not NULL, structure that will get filled with error's pre
0464  *                   and post context in case of error.
0465  * @param forceDefaults if false, the settings that are the same as the collator 
0466  *                   default settings will not be applied (for example, setting
0467  *                   French secondary on a French collator would not be executed). 
0468  *                   If true, all the settings will be applied regardless of the 
0469  *                   collator default value. If the definition
0470  *                   strings are to be cached, should be set to false.
0471  * @param status     Error code. Apart from regular error conditions connected to 
0472  *                   instantiating collators (like out of memory or similar), this
0473  *                   API will return an error if an invalid attribute or attribute/value
0474  *                   combination is specified.
0475  * @return           A pointer to a UCollator or 0 if an error occurred (including an 
0476  *                   invalid attribute).
0477  * @see ucol_open
0478  * @see ucol_setAttribute
0479  * @see ucol_setVariableTop
0480  * @see ucol_getShortDefinitionString
0481  * @see ucol_normalizeShortDefinitionString
0482  * @deprecated ICU 54 Use ucol_open() with language tag collation keywords instead.
0483  */
0484 U_DEPRECATED UCollator* U_EXPORT2
0485 ucol_openFromShortString( const char *definition,
0486                           UBool forceDefaults,
0487                           UParseError *parseError,
0488                           UErrorCode *status);
0489 #endif  /* U_HIDE_DEPRECATED_API */
0490 
0491 #ifndef U_HIDE_DEPRECATED_API
0492 /**
0493  * Get a set containing the contractions defined by the collator. The set includes
0494  * both the root collator's contractions and the contractions defined by the collator. This set
0495  * will contain only strings. If a tailoring explicitly suppresses contractions from 
0496  * the root collator (like Russian), removed contractions will not be in the resulting set.
0497  * @param coll collator 
0498  * @param conts the set to hold the result. It gets emptied before
0499  *              contractions are added. 
0500  * @param status to hold the error code
0501  * @return the size of the contraction set
0502  *
0503  * @deprecated ICU 3.4, use ucol_getContractionsAndExpansions instead
0504  */
0505 U_DEPRECATED int32_t U_EXPORT2
0506 ucol_getContractions( const UCollator *coll,
0507                   USet *conts,
0508                   UErrorCode *status);
0509 #endif  /* U_HIDE_DEPRECATED_API */
0510 
0511 /**
0512  * Get a set containing the expansions defined by the collator. The set includes
0513  * both the root collator's expansions and the expansions defined by the tailoring
0514  * @param coll collator
0515  * @param contractions if not NULL, the set to hold the contractions
0516  * @param expansions if not NULL, the set to hold the expansions
0517  * @param addPrefixes add the prefix contextual elements to contractions
0518  * @param status to hold the error code
0519  *
0520  * @stable ICU 3.4
0521  */
0522 U_CAPI void U_EXPORT2
0523 ucol_getContractionsAndExpansions( const UCollator *coll,
0524                   USet *contractions, USet *expansions,
0525                   UBool addPrefixes, UErrorCode *status);
0526 
0527 /** 
0528  * Close a UCollator.
0529  * Once closed, a UCollator should not be used. Every open collator should
0530  * be closed. Otherwise, a memory leak will result.
0531  * @param coll The UCollator to close.
0532  * @see ucol_open
0533  * @see ucol_openRules
0534  * @see ucol_clone
0535  * @stable ICU 2.0
0536  */
0537 U_CAPI void U_EXPORT2 
0538 ucol_close(UCollator *coll);
0539 
0540 #if U_SHOW_CPLUSPLUS_API
0541 
0542 U_NAMESPACE_BEGIN
0543 
0544 /**
0545  * \class LocalUCollatorPointer
0546  * "Smart pointer" class, closes a UCollator via ucol_close().
0547  * For most methods see the LocalPointerBase base class.
0548  *
0549  * @see LocalPointerBase
0550  * @see LocalPointer
0551  * @stable ICU 4.4
0552  */
0553 U_DEFINE_LOCAL_OPEN_POINTER(LocalUCollatorPointer, UCollator, ucol_close);
0554 
0555 U_NAMESPACE_END
0556 
0557 #endif
0558 
0559 /**
0560  * Compare two strings.
0561  * The strings will be compared using the options already specified.
0562  * @param coll The UCollator containing the comparison rules.
0563  * @param source The source string.
0564  * @param sourceLength The length of source, or -1 if null-terminated.
0565  * @param target The target string.
0566  * @param targetLength The length of target, or -1 if null-terminated.
0567  * @return The result of comparing the strings; one of UCOL_EQUAL,
0568  * UCOL_GREATER, UCOL_LESS
0569  * @see ucol_greater
0570  * @see ucol_greaterOrEqual
0571  * @see ucol_equal
0572  * @stable ICU 2.0
0573  */
0574 U_CAPI UCollationResult U_EXPORT2 
0575 ucol_strcoll(    const    UCollator    *coll,
0576         const    UChar        *source,
0577         int32_t            sourceLength,
0578         const    UChar        *target,
0579         int32_t            targetLength);
0580 
0581 /** 
0582 * Compare two strings in UTF-8. 
0583 * The strings will be compared using the options already specified. 
0584 * Note: When input string contains malformed a UTF-8 byte sequence, 
0585 * this function treats these bytes as REPLACEMENT CHARACTER (U+FFFD).
0586 * @param coll The UCollator containing the comparison rules. 
0587 * @param source The source UTF-8 string. 
0588 * @param sourceLength The length of source, or -1 if null-terminated. 
0589 * @param target The target UTF-8 string. 
0590 * @param targetLength The length of target, or -1 if null-terminated. 
0591 * @param status A pointer to a UErrorCode to receive any errors 
0592 * @return The result of comparing the strings; one of UCOL_EQUAL, 
0593 * UCOL_GREATER, UCOL_LESS 
0594 * @see ucol_greater 
0595 * @see ucol_greaterOrEqual 
0596 * @see ucol_equal 
0597 * @stable ICU 50 
0598 */ 
0599 U_CAPI UCollationResult U_EXPORT2
0600 ucol_strcollUTF8(
0601         const UCollator *coll,
0602         const char      *source,
0603         int32_t         sourceLength,
0604         const char      *target,
0605         int32_t         targetLength,
0606         UErrorCode      *status);
0607 
0608 /**
0609  * Determine if one string is greater than another.
0610  * This function is equivalent to {@link #ucol_strcoll } == UCOL_GREATER
0611  * @param coll The UCollator containing the comparison rules.
0612  * @param source The source string.
0613  * @param sourceLength The length of source, or -1 if null-terminated.
0614  * @param target The target string.
0615  * @param targetLength The length of target, or -1 if null-terminated.
0616  * @return true if source is greater than target, false otherwise.
0617  * @see ucol_strcoll
0618  * @see ucol_greaterOrEqual
0619  * @see ucol_equal
0620  * @stable ICU 2.0
0621  */
0622 U_CAPI UBool U_EXPORT2 
0623 ucol_greater(const UCollator *coll,
0624              const UChar     *source, int32_t sourceLength,
0625              const UChar     *target, int32_t targetLength);
0626 
0627 /**
0628  * Determine if one string is greater than or equal to another.
0629  * This function is equivalent to {@link #ucol_strcoll } != UCOL_LESS
0630  * @param coll The UCollator containing the comparison rules.
0631  * @param source The source string.
0632  * @param sourceLength The length of source, or -1 if null-terminated.
0633  * @param target The target string.
0634  * @param targetLength The length of target, or -1 if null-terminated.
0635  * @return true if source is greater than or equal to target, false otherwise.
0636  * @see ucol_strcoll
0637  * @see ucol_greater
0638  * @see ucol_equal
0639  * @stable ICU 2.0
0640  */
0641 U_CAPI UBool U_EXPORT2 
0642 ucol_greaterOrEqual(const UCollator *coll,
0643                     const UChar     *source, int32_t sourceLength,
0644                     const UChar     *target, int32_t targetLength);
0645 
0646 /**
0647  * Compare two strings for equality.
0648  * This function is equivalent to {@link #ucol_strcoll } == UCOL_EQUAL
0649  * @param coll The UCollator containing the comparison rules.
0650  * @param source The source string.
0651  * @param sourceLength The length of source, or -1 if null-terminated.
0652  * @param target The target string.
0653  * @param targetLength The length of target, or -1 if null-terminated.
0654  * @return true if source is equal to target, false otherwise
0655  * @see ucol_strcoll
0656  * @see ucol_greater
0657  * @see ucol_greaterOrEqual
0658  * @stable ICU 2.0
0659  */
0660 U_CAPI UBool U_EXPORT2 
0661 ucol_equal(const UCollator *coll,
0662            const UChar     *source, int32_t sourceLength,
0663            const UChar     *target, int32_t targetLength);
0664 
0665 /**
0666  * Compare two UTF-8 encoded strings.
0667  * The strings will be compared using the options already specified.
0668  * @param coll The UCollator containing the comparison rules.
0669  * @param sIter The source string iterator.
0670  * @param tIter The target string iterator.
0671  * @return The result of comparing the strings; one of UCOL_EQUAL,
0672  * UCOL_GREATER, UCOL_LESS
0673  * @param status A pointer to a UErrorCode to receive any errors
0674  * @see ucol_strcoll
0675  * @stable ICU 2.6
0676  */
0677 U_CAPI UCollationResult U_EXPORT2 
0678 ucol_strcollIter(  const    UCollator    *coll,
0679                   UCharIterator *sIter,
0680                   UCharIterator *tIter,
0681                   UErrorCode *status);
0682 
0683 /**
0684  * Get the collation strength used in a UCollator.
0685  * The strength influences how strings are compared.
0686  * @param coll The UCollator to query.
0687  * @return The collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY,
0688  * UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL
0689  * @see ucol_setStrength
0690  * @stable ICU 2.0
0691  */
0692 U_CAPI UCollationStrength U_EXPORT2 
0693 ucol_getStrength(const UCollator *coll);
0694 
0695 /**
0696  * Set the collation strength used in a UCollator.
0697  * The strength influences how strings are compared.
0698  * @param coll The UCollator to set.
0699  * @param strength The desired collation strength; one of UCOL_PRIMARY, 
0700  * UCOL_SECONDARY, UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL, UCOL_DEFAULT
0701  * @see ucol_getStrength
0702  * @stable ICU 2.0
0703  */
0704 U_CAPI void U_EXPORT2 
0705 ucol_setStrength(UCollator *coll,
0706                  UCollationStrength strength);
0707 
0708 /**
0709  * Retrieves the reordering codes for this collator.
0710  * These reordering codes are a combination of UScript codes and UColReorderCode entries.
0711  * @param coll The UCollator to query.
0712  * @param dest The array to fill with the script ordering.
0713  * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function 
0714  * will only return the length of the result without writing any codes (pre-flighting).
0715  * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a 
0716  * failure before the function call.
0717  * @return The number of reordering codes written to the dest array.
0718  * @see ucol_setReorderCodes
0719  * @see ucol_getEquivalentReorderCodes
0720  * @see UScriptCode
0721  * @see UColReorderCode
0722  * @stable ICU 4.8
0723  */
0724 U_CAPI int32_t U_EXPORT2 
0725 ucol_getReorderCodes(const UCollator* coll,
0726                     int32_t* dest,
0727                     int32_t destCapacity,
0728                     UErrorCode *pErrorCode);
0729 /** 
0730  * Sets the reordering codes for this collator.
0731  * Collation reordering allows scripts and some other groups of characters
0732  * to be moved relative to each other. This reordering is done on top of
0733  * the DUCET/CLDR standard collation order. Reordering can specify groups to be placed 
0734  * at the start and/or the end of the collation order. These groups are specified using
0735  * UScript codes and UColReorderCode entries.
0736  *
0737  * <p>By default, reordering codes specified for the start of the order are placed in the 
0738  * order given after several special non-script blocks. These special groups of characters
0739  * are space, punctuation, symbol, currency, and digit. These special groups are represented with
0740  * UColReorderCode entries. Script groups can be intermingled with 
0741  * these special non-script groups if those special groups are explicitly specified in the reordering.
0742  *
0743  * <p>The special code OTHERS stands for any script that is not explicitly 
0744  * mentioned in the list of reordering codes given. Anything that is after OTHERS
0745  * will go at the very end of the reordering in the order given.
0746  *
0747  * <p>The special reorder code DEFAULT will reset the reordering for this collator
0748  * to the default for this collator. The default reordering may be the DUCET/CLDR order or may be a reordering that
0749  * was specified when this collator was created from resource data or from rules. The 
0750  * DEFAULT code <b>must</b> be the sole code supplied when it is used.
0751  * If not, then U_ILLEGAL_ARGUMENT_ERROR will be set.
0752  *
0753  * <p>The special reorder code NONE will remove any reordering for this collator.
0754  * The result of setting no reordering will be to have the DUCET/CLDR ordering used. The 
0755  * NONE code <b>must</b> be the sole code supplied when it is used.
0756  *
0757  * @param coll The UCollator to set.
0758  * @param reorderCodes An array of script codes in the new order. This can be NULL if the 
0759  * length is also set to 0. An empty array will clear any reordering codes on the collator.
0760  * @param reorderCodesLength The length of reorderCodes.
0761  * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a
0762  * failure before the function call.
0763  * @see ucol_getReorderCodes
0764  * @see ucol_getEquivalentReorderCodes
0765  * @see UScriptCode
0766  * @see UColReorderCode
0767  * @stable ICU 4.8
0768  */ 
0769 U_CAPI void U_EXPORT2 
0770 ucol_setReorderCodes(UCollator* coll,
0771                     const int32_t* reorderCodes,
0772                     int32_t reorderCodesLength,
0773                     UErrorCode *pErrorCode);
0774 
0775 /**
0776  * Retrieves the reorder codes that are grouped with the given reorder code. Some reorder
0777  * codes will be grouped and must reorder together.
0778  * Beginning with ICU 55, scripts only reorder together if they are primary-equal,
0779  * for example Hiragana and Katakana.
0780  *
0781  * @param reorderCode The reorder code to determine equivalence for.
0782  * @param dest The array to fill with the script ordering.
0783  * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function
0784  * will only return the length of the result without writing any codes (pre-flighting).
0785  * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate 
0786  * a failure before the function call.
0787  * @return The number of reordering codes written to the dest array.
0788  * @see ucol_setReorderCodes
0789  * @see ucol_getReorderCodes
0790  * @see UScriptCode
0791  * @see UColReorderCode
0792  * @stable ICU 4.8
0793  */
0794 U_CAPI int32_t U_EXPORT2 
0795 ucol_getEquivalentReorderCodes(int32_t reorderCode,
0796                     int32_t* dest,
0797                     int32_t destCapacity,
0798                     UErrorCode *pErrorCode);
0799 
0800 /**
0801  * Get the display name for a UCollator.
0802  * The display name is suitable for presentation to a user.
0803  * @param objLoc The locale of the collator in question.
0804  * @param dispLoc The locale for display.
0805  * @param result A pointer to a buffer to receive the attribute.
0806  * @param resultLength The maximum size of result.
0807  * @param status A pointer to a UErrorCode to receive any errors
0808  * @return The total buffer size needed; if greater than resultLength,
0809  * the output was truncated.
0810  * @stable ICU 2.0
0811  */
0812 U_CAPI int32_t U_EXPORT2 
0813 ucol_getDisplayName(    const    char        *objLoc,
0814             const    char        *dispLoc,
0815             UChar             *result,
0816             int32_t         resultLength,
0817             UErrorCode        *status);
0818 
0819 /**
0820  * Get a locale for which collation rules are available.
0821  * A UCollator in a locale returned by this function will perform the correct
0822  * collation for the locale.
0823  * @param localeIndex The index of the desired locale.
0824  * @return A locale for which collation rules are available, or 0 if none.
0825  * @see ucol_countAvailable
0826  * @stable ICU 2.0
0827  */
0828 U_CAPI const char* U_EXPORT2 
0829 ucol_getAvailable(int32_t localeIndex);
0830 
0831 /**
0832  * Determine how many locales have collation rules available.
0833  * This function is most useful as determining the loop ending condition for
0834  * calls to {@link #ucol_getAvailable }.
0835  * @return The number of locales for which collation rules are available.
0836  * @see ucol_getAvailable
0837  * @stable ICU 2.0
0838  */
0839 U_CAPI int32_t U_EXPORT2 
0840 ucol_countAvailable(void);
0841 
0842 #if !UCONFIG_NO_SERVICE
0843 /**
0844  * Create a string enumerator of all locales for which a valid
0845  * collator may be opened.
0846  * @param status input-output error code
0847  * @return a string enumeration over locale strings. The caller is
0848  * responsible for closing the result.
0849  * @stable ICU 3.0
0850  */
0851 U_CAPI UEnumeration* U_EXPORT2
0852 ucol_openAvailableLocales(UErrorCode *status);
0853 #endif
0854 
0855 /**
0856  * Create a string enumerator of all possible keywords that are relevant to
0857  * collation. At this point, the only recognized keyword for this
0858  * service is "collation".
0859  * @param status input-output error code
0860  * @return a string enumeration over locale strings. The caller is
0861  * responsible for closing the result.
0862  * @stable ICU 3.0
0863  */
0864 U_CAPI UEnumeration* U_EXPORT2
0865 ucol_getKeywords(UErrorCode *status);
0866 
0867 /**
0868  * Given a keyword, create a string enumeration of all values
0869  * for that keyword that are currently in use.
0870  * @param keyword a particular keyword as enumerated by
0871  * ucol_getKeywords. If any other keyword is passed in, *status is set
0872  * to U_ILLEGAL_ARGUMENT_ERROR.
0873  * @param status input-output error code
0874  * @return a string enumeration over collation keyword values, or NULL
0875  * upon error. The caller is responsible for closing the result.
0876  * @stable ICU 3.0
0877  */
0878 U_CAPI UEnumeration* U_EXPORT2
0879 ucol_getKeywordValues(const char *keyword, UErrorCode *status);
0880 
0881 /**
0882  * Given a key and a locale, returns an array of string values in a preferred
0883  * order that would make a difference. These are all and only those values where
0884  * the open (creation) of the service with the locale formed from the input locale
0885  * plus input keyword and that value has different behavior than creation with the
0886  * input locale alone.
0887  * @param key           one of the keys supported by this service.  For now, only
0888  *                      "collation" is supported.
0889  * @param locale        the locale
0890  * @param commonlyUsed  if set to true it will return only commonly used values
0891  *                      with the given locale in preferred order.  Otherwise,
0892  *                      it will return all the available values for the locale.
0893  * @param status error status
0894  * @return a string enumeration over keyword values for the given key and the locale.
0895  * @stable ICU 4.2
0896  */
0897 U_CAPI UEnumeration* U_EXPORT2
0898 ucol_getKeywordValuesForLocale(const char* key,
0899                                const char* locale,
0900                                UBool commonlyUsed,
0901                                UErrorCode* status);
0902 
0903 /**
0904  * Return the functionally equivalent locale for the specified
0905  * input locale, with respect to given keyword, for the
0906  * collation service. If two different input locale + keyword
0907  * combinations produce the same result locale, then collators
0908  * instantiated for these two different input locales will behave
0909  * equivalently. The converse is not always true; two collators
0910  * may in fact be equivalent, but return different results, due to
0911  * internal details. The return result has no other meaning than
0912  * that stated above, and implies nothing as to the relationship
0913  * between the two locales. This is intended for use by
0914  * applications who wish to cache collators, or otherwise reuse
0915  * collators when possible. The functional equivalent may change
0916  * over time. For more information, please see the <a
0917  * href="https://unicode-org.github.io/icu/userguide/locale#locales-and-services">
0918  * Locales and Services</a> section of the ICU User Guide.
0919  * @param result fillin for the functionally equivalent result locale
0920  * @param resultCapacity capacity of the fillin buffer
0921  * @param keyword a particular keyword as enumerated by
0922  * ucol_getKeywords.
0923  * @param locale the specified input locale
0924  * @param isAvailable if non-NULL, pointer to a fillin parameter that
0925  * on return indicates whether the specified input locale was 'available'
0926  * to the collation service. A locale is defined as 'available' if it
0927  * physically exists within the collation locale data.
0928  * @param status pointer to input-output error code
0929  * @return the actual buffer size needed for the locale. If greater
0930  * than resultCapacity, the returned full name will be truncated and
0931  * an error code will be returned.
0932  * @stable ICU 3.0
0933  */
0934 U_CAPI int32_t U_EXPORT2
0935 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
0936                              const char* keyword, const char* locale,
0937                              UBool* isAvailable, UErrorCode* status);
0938 
0939 /**
0940  * Get the collation tailoring rules from a UCollator.
0941  * The rules will follow the rule syntax.
0942  * @param coll The UCollator to query.
0943  * @param length 
0944  * @return The collation tailoring rules.
0945  * @stable ICU 2.0
0946  */
0947 U_CAPI const UChar* U_EXPORT2 
0948 ucol_getRules(    const    UCollator    *coll, 
0949         int32_t            *length);
0950 
0951 #ifndef U_HIDE_DEPRECATED_API
0952 /** Get the short definition string for a collator. This API harvests the collator's
0953  *  locale and the attribute set and produces a string that can be used for opening 
0954  *  a collator with the same attributes using the ucol_openFromShortString API.
0955  *  This string will be normalized.
0956  *  The structure and the syntax of the string is defined in the "Naming collators"
0957  *  section of the users guide: 
0958  *  https://unicode-org.github.io/icu/userguide/collation/concepts#collator-naming-scheme
0959  *  This API supports preflighting.
0960  *  @param coll a collator
0961  *  @param locale a locale that will appear as a collators locale in the resulting
0962  *                short string definition. If NULL, the locale will be harvested 
0963  *                from the collator.
0964  *  @param buffer space to hold the resulting string
0965  *  @param capacity capacity of the buffer
0966  *  @param status for returning errors. All the preflighting errors are featured
0967  *  @return length of the resulting string
0968  *  @see ucol_openFromShortString
0969  *  @see ucol_normalizeShortDefinitionString
0970  *  @deprecated ICU 54
0971  */
0972 U_DEPRECATED int32_t U_EXPORT2
0973 ucol_getShortDefinitionString(const UCollator *coll,
0974                               const char *locale,
0975                               char *buffer,
0976                               int32_t capacity,
0977                               UErrorCode *status);
0978 
0979 /** Verifies and normalizes short definition string.
0980  *  Normalized short definition string has all the option sorted by the argument name,
0981  *  so that equivalent definition strings are the same. 
0982  *  This API supports preflighting.
0983  *  @param source definition string
0984  *  @param destination space to hold the resulting string
0985  *  @param capacity capacity of the buffer
0986  *  @param parseError if not NULL, structure that will get filled with error's pre
0987  *                   and post context in case of error.
0988  *  @param status     Error code. This API will return an error if an invalid attribute 
0989  *                    or attribute/value combination is specified. All the preflighting 
0990  *                    errors are also featured
0991  *  @return length of the resulting normalized string.
0992  *
0993  *  @see ucol_openFromShortString
0994  *  @see ucol_getShortDefinitionString
0995  * 
0996  *  @deprecated ICU 54
0997  */
0998 U_DEPRECATED int32_t U_EXPORT2
0999 ucol_normalizeShortDefinitionString(const char *source,
1000                                     char *destination,
1001                                     int32_t capacity,
1002                                     UParseError *parseError,
1003                                     UErrorCode *status);
1004 #endif  /* U_HIDE_DEPRECATED_API */
1005 
1006 
1007 /**
1008  * Get a sort key for a string from a UCollator.
1009  * Sort keys may be compared using <TT>strcmp</TT>.
1010  *
1011  * Note that sort keys are often less efficient than simply doing comparison.  
1012  * For more details, see the ICU User Guide.
1013  *
1014  * Like ICU functions that write to an output buffer, the buffer contents
1015  * is undefined if the buffer capacity (resultLength parameter) is too small.
1016  * Unlike ICU functions that write a string to an output buffer,
1017  * the terminating zero byte is counted in the sort key length.
1018  * @param coll The UCollator containing the collation rules.
1019  * @param source The string to transform.
1020  * @param sourceLength The length of source, or -1 if null-terminated.
1021  * @param result A pointer to a buffer to receive the attribute.
1022  * @param resultLength The maximum size of result.
1023  * @return The size needed to fully store the sort key.
1024  *      If there was an internal error generating the sort key,
1025  *      a zero value is returned.
1026  * @see ucol_keyHashCode
1027  * @stable ICU 2.0
1028  */
1029 U_CAPI int32_t U_EXPORT2 
1030 ucol_getSortKey(const    UCollator    *coll,
1031         const    UChar        *source,
1032         int32_t        sourceLength,
1033         uint8_t        *result,
1034         int32_t        resultLength);
1035 
1036 
1037 /** Gets the next count bytes of a sort key. Caller needs
1038  *  to preserve state array between calls and to provide
1039  *  the same type of UCharIterator set with the same string.
1040  *  The destination buffer provided must be big enough to store
1041  *  the number of requested bytes.
1042  *
1043  *  The generated sort key may or may not be compatible with
1044  *  sort keys generated using ucol_getSortKey().
1045  *  @param coll The UCollator containing the collation rules.
1046  *  @param iter UCharIterator containing the string we need 
1047  *              the sort key to be calculated for.
1048  *  @param state Opaque state of sortkey iteration.
1049  *  @param dest Buffer to hold the resulting sortkey part
1050  *  @param count number of sort key bytes required.
1051  *  @param status error code indicator.
1052  *  @return the actual number of bytes of a sortkey. It can be
1053  *          smaller than count if we have reached the end of 
1054  *          the sort key.
1055  *  @stable ICU 2.6
1056  */
1057 U_CAPI int32_t U_EXPORT2 
1058 ucol_nextSortKeyPart(const UCollator *coll,
1059                      UCharIterator *iter,
1060                      uint32_t state[2],
1061                      uint8_t *dest, int32_t count,
1062                      UErrorCode *status);
1063 
1064 /** enum that is taken by ucol_getBound API 
1065  * See below for explanation                
1066  * do not change the values assigned to the 
1067  * members of this enum. Underlying code    
1068  * depends on them having these numbers     
1069  * @stable ICU 2.0
1070  */
1071 typedef enum {
1072   /** lower bound */
1073   UCOL_BOUND_LOWER = 0,
1074   /** upper bound that will match strings of exact size */
1075   UCOL_BOUND_UPPER = 1,
1076   /** upper bound that will match all the strings that have the same initial substring as the given string */
1077   UCOL_BOUND_UPPER_LONG = 2,
1078 #ifndef U_HIDE_DEPRECATED_API
1079     /**
1080      * One more than the highest normal UColBoundMode value.
1081      * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
1082      */
1083     UCOL_BOUND_VALUE_COUNT
1084 #endif  /* U_HIDE_DEPRECATED_API */
1085 } UColBoundMode;
1086 
1087 /**
1088  * Produce a bound for a given sortkey and a number of levels.
1089  * Return value is always the number of bytes needed, regardless of 
1090  * whether the result buffer was big enough or even valid.<br>
1091  * Resulting bounds can be used to produce a range of strings that are
1092  * between upper and lower bounds. For example, if bounds are produced
1093  * for a sortkey of string "smith", strings between upper and lower 
1094  * bounds with one level would include "Smith", "SMITH", "sMiTh".<br>
1095  * There are two upper bounds that can be produced. If UCOL_BOUND_UPPER
1096  * is produced, strings matched would be as above. However, if bound
1097  * produced using UCOL_BOUND_UPPER_LONG is used, the above example will
1098  * also match "Smithsonian" and similar.<br>
1099  * For more on usage, see example in cintltst/capitst.c in procedure
1100  * TestBounds.
1101  * Sort keys may be compared using <TT>strcmp</TT>.
1102  * @param source The source sortkey.
1103  * @param sourceLength The length of source, or -1 if null-terminated. 
1104  *                     (If an unmodified sortkey is passed, it is always null 
1105  *                      terminated).
1106  * @param boundType Type of bound required. It can be UCOL_BOUND_LOWER, which 
1107  *                  produces a lower inclusive bound, UCOL_BOUND_UPPER, that 
1108  *                  produces upper bound that matches strings of the same length 
1109  *                  or UCOL_BOUND_UPPER_LONG that matches strings that have the 
1110  *                  same starting substring as the source string.
1111  * @param noOfLevels  Number of levels required in the resulting bound (for most 
1112  *                    uses, the recommended value is 1). See users guide for 
1113  *                    explanation on number of levels a sortkey can have.
1114  * @param result A pointer to a buffer to receive the resulting sortkey.
1115  * @param resultLength The maximum size of result.
1116  * @param status Used for returning error code if something went wrong. If the 
1117  *               number of levels requested is higher than the number of levels
1118  *               in the source key, a warning (U_SORT_KEY_TOO_SHORT_WARNING) is 
1119  *               issued.
1120  * @return The size needed to fully store the bound. 
1121  * @see ucol_keyHashCode
1122  * @stable ICU 2.1
1123  */
1124 U_CAPI int32_t U_EXPORT2 
1125 ucol_getBound(const uint8_t       *source,
1126         int32_t             sourceLength,
1127         UColBoundMode       boundType,
1128         uint32_t            noOfLevels,
1129         uint8_t             *result,
1130         int32_t             resultLength,
1131         UErrorCode          *status);
1132         
1133 /**
1134  * Gets the version information for a Collator. Version is currently
1135  * an opaque 32-bit number which depends, among other things, on major
1136  * versions of the collator tailoring and UCA.
1137  * @param coll The UCollator to query.
1138  * @param info the version # information, the result will be filled in
1139  * @stable ICU 2.0
1140  */
1141 U_CAPI void U_EXPORT2
1142 ucol_getVersion(const UCollator* coll, UVersionInfo info);
1143 
1144 /**
1145  * Gets the UCA version information for a Collator. Version is the
1146  * UCA version number (3.1.1, 4.0).
1147  * @param coll The UCollator to query.
1148  * @param info the version # information, the result will be filled in
1149  * @stable ICU 2.8
1150  */
1151 U_CAPI void U_EXPORT2
1152 ucol_getUCAVersion(const UCollator* coll, UVersionInfo info);
1153 
1154 /**
1155  * Merges two sort keys. The levels are merged with their corresponding counterparts
1156  * (primaries with primaries, secondaries with secondaries etc.). Between the values
1157  * from the same level a separator is inserted.
1158  *
1159  * This is useful, for example, for combining sort keys from first and last names
1160  * to sort such pairs.
1161  * See http://www.unicode.org/reports/tr10/#Merging_Sort_Keys
1162  *
1163  * The recommended way to achieve "merged" sorting is by
1164  * concatenating strings with U+FFFE between them.
1165  * The concatenation has the same sort order as the merged sort keys,
1166  * but merge(getSortKey(str1), getSortKey(str2)) may differ from getSortKey(str1 + '\\uFFFE' + str2).
1167  * Using strings with U+FFFE may yield shorter sort keys.
1168  *
1169  * For details about Sort Key Features see
1170  * https://unicode-org.github.io/icu/userguide/collation/api#sort-key-features
1171  *
1172  * It is possible to merge multiple sort keys by consecutively merging
1173  * another one with the intermediate result.
1174  *
1175  * The length of the merge result is the sum of the lengths of the input sort keys.
1176  *
1177  * Example (uncompressed):
1178  * <pre>191B1D 01 050505 01 910505 00
1179  * 1F2123 01 050505 01 910505 00</pre>
1180  * will be merged as 
1181  * <pre>191B1D 02 1F2123 01 050505 02 050505 01 910505 02 910505 00</pre>
1182  *
1183  * If the destination buffer is not big enough, then its contents are undefined.
1184  * If any of source lengths are zero or any of the source pointers are NULL/undefined,
1185  * the result is of size zero.
1186  *
1187  * @param src1 the first sort key
1188  * @param src1Length the length of the first sort key, including the zero byte at the end;
1189  *        can be -1 if the function is to find the length
1190  * @param src2 the second sort key
1191  * @param src2Length the length of the second sort key, including the zero byte at the end;
1192  *        can be -1 if the function is to find the length
1193  * @param dest the buffer where the merged sort key is written,
1194  *        can be NULL if destCapacity==0
1195  * @param destCapacity the number of bytes in the dest buffer
1196  * @return the length of the merged sort key, src1Length+src2Length;
1197  *         can be larger than destCapacity, or 0 if an error occurs (only for illegal arguments),
1198  *         in which cases the contents of dest is undefined
1199  * @stable ICU 2.0
1200  */
1201 U_CAPI int32_t U_EXPORT2 
1202 ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length,
1203                    const uint8_t *src2, int32_t src2Length,
1204                    uint8_t *dest, int32_t destCapacity);
1205 
1206 /**
1207  * Universal attribute setter
1208  * @param coll collator which attributes are to be changed
1209  * @param attr attribute type 
1210  * @param value attribute value
1211  * @param status to indicate whether the operation went on smoothly or there were errors
1212  * @see UColAttribute
1213  * @see UColAttributeValue
1214  * @see ucol_getAttribute
1215  * @stable ICU 2.0
1216  */
1217 U_CAPI void U_EXPORT2 
1218 ucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status);
1219 
1220 /**
1221  * Universal attribute getter
1222  * @param coll collator which attributes are to be changed
1223  * @param attr attribute type
1224  * @return attribute value
1225  * @param status to indicate whether the operation went on smoothly or there were errors
1226  * @see UColAttribute
1227  * @see UColAttributeValue
1228  * @see ucol_setAttribute
1229  * @stable ICU 2.0
1230  */
1231 U_CAPI UColAttributeValue  U_EXPORT2 
1232 ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status);
1233 
1234 /**
1235  * Sets the variable top to the top of the specified reordering group.
1236  * The variable top determines the highest-sorting character
1237  * which is affected by UCOL_ALTERNATE_HANDLING.
1238  * If that attribute is set to UCOL_NON_IGNORABLE, then the variable top has no effect.
1239  * @param coll the collator
1240  * @param group one of UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION,
1241  *              UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_CURRENCY;
1242  *              or UCOL_REORDER_CODE_DEFAULT to restore the default max variable group
1243  * @param pErrorCode Standard ICU error code. Its input value must
1244  *                   pass the U_SUCCESS() test, or else the function returns
1245  *                   immediately. Check for U_FAILURE() on output or use with
1246  *                   function chaining. (See User Guide for details.)
1247  * @see ucol_getMaxVariable
1248  * @stable ICU 53
1249  */
1250 U_CAPI void U_EXPORT2
1251 ucol_setMaxVariable(UCollator *coll, UColReorderCode group, UErrorCode *pErrorCode);
1252 
1253 /**
1254  * Returns the maximum reordering group whose characters are affected by UCOL_ALTERNATE_HANDLING.
1255  * @param coll the collator
1256  * @return the maximum variable reordering group.
1257  * @see ucol_setMaxVariable
1258  * @stable ICU 53
1259  */
1260 U_CAPI UColReorderCode U_EXPORT2
1261 ucol_getMaxVariable(const UCollator *coll);
1262 
1263 #ifndef U_HIDE_DEPRECATED_API
1264 /**
1265  * Sets the variable top to the primary weight of the specified string.
1266  *
1267  * Beginning with ICU 53, the variable top is pinned to
1268  * the top of one of the supported reordering groups,
1269  * and it must not be beyond the last of those groups.
1270  * See ucol_setMaxVariable().
1271  * @param coll the collator
1272  * @param varTop one or more (if contraction) UChars to which the variable top should be set
1273  * @param len length of variable top string. If -1 it is considered to be zero terminated.
1274  * @param status error code. If error code is set, the return value is undefined.
1275  *               Errors set by this function are:<br>
1276  *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction<br>
1277  *    U_ILLEGAL_ARGUMENT_ERROR if the variable top is beyond
1278  *    the last reordering group supported by ucol_setMaxVariable()
1279  * @return variable top primary weight
1280  * @see ucol_getVariableTop
1281  * @see ucol_restoreVariableTop
1282  * @deprecated ICU 53 Call ucol_setMaxVariable() instead.
1283  */
1284 U_DEPRECATED uint32_t U_EXPORT2 
1285 ucol_setVariableTop(UCollator *coll, 
1286                     const UChar *varTop, int32_t len, 
1287                     UErrorCode *status);
1288 #endif  /* U_HIDE_DEPRECATED_API */
1289 
1290 /** 
1291  * Gets the variable top value of a Collator. 
1292  * @param coll collator which variable top needs to be retrieved
1293  * @param status error code (not changed by function). If error code is set, 
1294  *               the return value is undefined.
1295  * @return the variable top primary weight
1296  * @see ucol_getMaxVariable
1297  * @see ucol_setVariableTop
1298  * @see ucol_restoreVariableTop
1299  * @stable ICU 2.0
1300  */
1301 U_CAPI uint32_t U_EXPORT2 ucol_getVariableTop(const UCollator *coll, UErrorCode *status);
1302 
1303 #ifndef U_HIDE_DEPRECATED_API
1304 /**
1305  * Sets the variable top to the specified primary weight.
1306  *
1307  * Beginning with ICU 53, the variable top is pinned to
1308  * the top of one of the supported reordering groups,
1309  * and it must not be beyond the last of those groups.
1310  * See ucol_setMaxVariable().
1311  * @param coll collator to be set
1312  * @param varTop primary weight, as returned by ucol_setVariableTop or ucol_getVariableTop
1313  * @param status error code
1314  * @see ucol_getVariableTop
1315  * @see ucol_setVariableTop
1316  * @deprecated ICU 53 Call ucol_setMaxVariable() instead.
1317  */
1318 U_DEPRECATED void U_EXPORT2 
1319 ucol_restoreVariableTop(UCollator *coll, const uint32_t varTop, UErrorCode *status);
1320 #endif  /* U_HIDE_DEPRECATED_API */
1321 
1322 /**
1323  * Thread safe cloning operation. The result is a clone of a given collator.
1324  * @param coll collator to be cloned
1325  * @param status to indicate whether the operation went on smoothly or there were errors
1326  * @return pointer to the new clone
1327  * @see ucol_open
1328  * @see ucol_openRules
1329  * @see ucol_close
1330  * @stable ICU 71
1331  */
1332 U_CAPI UCollator* U_EXPORT2 ucol_clone(const UCollator *coll, UErrorCode *status);
1333 
1334 #ifndef U_HIDE_DEPRECATED_API
1335 
1336 /**
1337  * Thread safe cloning operation. The result is a clone of a given collator.
1338  * @param coll collator to be cloned
1339  * @param stackBuffer <em>Deprecated functionality as of ICU 52, use NULL.</em><br>
1340  * user allocated space for the new clone. 
1341  * If NULL new memory will be allocated. 
1342  *  If buffer is not large enough, new memory will be allocated.
1343  *  Clients can use the U_COL_SAFECLONE_BUFFERSIZE.
1344  * @param pBufferSize <em>Deprecated functionality as of ICU 52, use NULL or 1.</em><br>
1345  *  pointer to size of allocated space. 
1346  *  If *pBufferSize == 0, a sufficient size for use in cloning will 
1347  *  be returned ('pre-flighting')
1348  *  If *pBufferSize is not enough for a stack-based safe clone, 
1349  *  new memory will be allocated.
1350  * @param status to indicate whether the operation went on smoothly or there were errors
1351  *    An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used
1352  * if pBufferSize != NULL and any allocations were necessary
1353  * @return pointer to the new clone
1354  * @see ucol_open
1355  * @see ucol_openRules
1356  * @see ucol_close
1357  * @deprecated ICU 71 Use ucol_clone() instead.
1358  */
1359 U_DEPRECATED UCollator* U_EXPORT2
1360 ucol_safeClone(const UCollator *coll,
1361                void            *stackBuffer,
1362                int32_t         *pBufferSize,
1363                UErrorCode      *status);
1364 
1365 
1366 /** default memory size for the new clone.
1367  * @deprecated ICU 52. Do not rely on ucol_safeClone() cloning into any provided buffer.
1368  */
1369 #define U_COL_SAFECLONE_BUFFERSIZE 1
1370 
1371 #endif /* U_HIDE_DEPRECATED_API */
1372 
1373 /**
1374  * Returns current rules. Delta defines whether full rules are returned or just the tailoring. 
1375  * Returns number of UChars needed to store rules. If buffer is NULL or bufferLen is not enough 
1376  * to store rules, will store up to available space.
1377  *
1378  * ucol_getRules() should normally be used instead.
1379  * See https://unicode-org.github.io/icu/userguide/collation/customization#building-on-existing-locales
1380  * @param coll collator to get the rules from
1381  * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES. 
1382  * @param buffer buffer to store the result in. If NULL, you'll get no rules.
1383  * @param bufferLen length of buffer to store rules in. If less than needed you'll get only the part that fits in.
1384  * @return current rules
1385  * @stable ICU 2.0
1386  * @see UCOL_FULL_RULES
1387  */
1388 U_CAPI int32_t U_EXPORT2 
1389 ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen);
1390 
1391 #ifndef U_HIDE_DEPRECATED_API
1392 /**
1393  * gets the locale name of the collator. If the collator
1394  * is instantiated from the rules, then this function returns
1395  * NULL.
1396  * @param coll The UCollator for which the locale is needed
1397  * @param type You can choose between requested, valid and actual
1398  *             locale. For description see the definition of
1399  *             ULocDataLocaleType in uloc.h
1400  * @param status error code of the operation
1401  * @return real locale name from which the collation data comes. 
1402  *         If the collator was instantiated from rules, returns
1403  *         NULL.
1404  * @deprecated ICU 2.8 Use ucol_getLocaleByType instead
1405  */
1406 U_DEPRECATED const char * U_EXPORT2
1407 ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status);
1408 #endif  /* U_HIDE_DEPRECATED_API */
1409 
1410 /**
1411  * gets the locale name of the collator. If the collator
1412  * is instantiated from the rules, then this function returns
1413  * NULL.
1414  * @param coll The UCollator for which the locale is needed
1415  * @param type You can choose between requested, valid and actual
1416  *             locale. For description see the definition of
1417  *             ULocDataLocaleType in uloc.h
1418  * @param status error code of the operation
1419  * @return real locale name from which the collation data comes. 
1420  *         If the collator was instantiated from rules, returns
1421  *         NULL.
1422  * @stable ICU 2.8
1423  */
1424 U_CAPI const char * U_EXPORT2
1425 ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status);
1426 
1427 /**
1428  * Get a Unicode set that contains all the characters and sequences tailored in 
1429  * this collator. The result must be disposed of by using uset_close.
1430  * @param coll        The UCollator for which we want to get tailored chars
1431  * @param status      error code of the operation
1432  * @return a pointer to newly created USet. Must be be disposed by using uset_close
1433  * @see ucol_openRules
1434  * @see uset_close
1435  * @stable ICU 2.4
1436  */
1437 U_CAPI USet * U_EXPORT2
1438 ucol_getTailoredSet(const UCollator *coll, UErrorCode *status);
1439 
1440 #ifndef U_HIDE_INTERNAL_API
1441 /** Calculates the set of unsafe code points, given a collator.
1442  *   A character is unsafe if you could append any character and cause the ordering to alter significantly.
1443  *   Collation sorts in normalized order, so anything that rearranges in normalization can cause this.
1444  *   Thus if you have a character like a_umlaut, and you add a lower_dot to it,
1445  *   then it normalizes to a_lower_dot + umlaut, and sorts differently.
1446  *  @param coll Collator
1447  *  @param unsafe a fill-in set to receive the unsafe points
1448  *  @param status for catching errors
1449  *  @return number of elements in the set
1450  *  @internal ICU 3.0
1451  */
1452 U_CAPI int32_t U_EXPORT2
1453 ucol_getUnsafeSet( const UCollator *coll,
1454                   USet *unsafe,
1455                   UErrorCode *status);
1456 
1457 /** Touches all resources needed for instantiating a collator from a short string definition,
1458  *  thus filling up the cache.
1459  * @param definition A short string containing a locale and a set of attributes. 
1460  *                   Attributes not explicitly mentioned are left at the default
1461  *                   state for a locale.
1462  * @param parseError if not NULL, structure that will get filled with error's pre
1463  *                   and post context in case of error.
1464  * @param forceDefaults if false, the settings that are the same as the collator 
1465  *                   default settings will not be applied (for example, setting
1466  *                   French secondary on a French collator would not be executed). 
1467  *                   If true, all the settings will be applied regardless of the 
1468  *                   collator default value. If the definition
1469  *                   strings are to be cached, should be set to false.
1470  * @param status     Error code. Apart from regular error conditions connected to 
1471  *                   instantiating collators (like out of memory or similar), this
1472  *                   API will return an error if an invalid attribute or attribute/value
1473  *                   combination is specified.
1474  * @see ucol_openFromShortString
1475  * @internal ICU 3.2.1
1476  */
1477 U_CAPI void U_EXPORT2
1478 ucol_prepareShortStringOpen( const char *definition,
1479                           UBool forceDefaults,
1480                           UParseError *parseError,
1481                           UErrorCode *status);
1482 #endif  /* U_HIDE_INTERNAL_API */
1483 
1484 /** Creates a binary image of a collator. This binary image can be stored and 
1485  *  later used to instantiate a collator using ucol_openBinary.
1486  *  This API supports preflighting.
1487  *  @param coll Collator
1488  *  @param buffer a fill-in buffer to receive the binary image
1489  *  @param capacity capacity of the destination buffer
1490  *  @param status for catching errors
1491  *  @return size of the image
1492  *  @see ucol_openBinary
1493  *  @stable ICU 3.2
1494  */
1495 U_CAPI int32_t U_EXPORT2
1496 ucol_cloneBinary(const UCollator *coll,
1497                  uint8_t *buffer, int32_t capacity,
1498                  UErrorCode *status);
1499 
1500 /** Opens a collator from a collator binary image created using
1501  *  ucol_cloneBinary. Binary image used in instantiation of the 
1502  *  collator remains owned by the user and should stay around for 
1503  *  the lifetime of the collator. The API also takes a base collator
1504  *  which must be the root collator.
1505  *  @param bin binary image owned by the user and required through the
1506  *             lifetime of the collator
1507  *  @param length size of the image. If negative, the API will try to
1508  *                figure out the length of the image
1509  *  @param base Base collator, for lookup of untailored characters.
1510  *              Must be the root collator, must not be NULL.
1511  *              The base is required to be present through the lifetime of the collator.
1512  *  @param status for catching errors
1513  *  @return newly created collator
1514  *  @see ucol_cloneBinary
1515  *  @stable ICU 3.2
1516  */
1517 U_CAPI UCollator* U_EXPORT2
1518 ucol_openBinary(const uint8_t *bin, int32_t length, 
1519                 const UCollator *base, 
1520                 UErrorCode *status);
1521 
1522 
1523 #endif /* #if !UCONFIG_NO_COLLATION */
1524 
1525 #endif