Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 10:13:15

0001 // © 2016 and later: Unicode, Inc. and others.
0002 // License & terms of use: http://www.unicode.org/copyright.html
0003 /*
0004 *******************************************************************************
0005 *
0006 *   Copyright (C) 2002-2014, International Business Machines
0007 *   Corporation and others.  All Rights Reserved.
0008 *
0009 *******************************************************************************
0010 *   file name:  uset.h
0011 *   encoding:   UTF-8
0012 *   tab size:   8 (not used)
0013 *   indentation:4
0014 *
0015 *   created on: 2002mar07
0016 *   created by: Markus W. Scherer
0017 *
0018 *   C version of UnicodeSet.
0019 */
0020 
0021 
0022 /**
0023  * \file
0024  * \brief C API: Unicode Set
0025  *
0026  * <p>This is a C wrapper around the C++ UnicodeSet class.</p>
0027  */
0028 
0029 #ifndef __USET_H__
0030 #define __USET_H__
0031 
0032 #include "unicode/utypes.h"
0033 #include "unicode/uchar.h"
0034 
0035 #if U_SHOW_CPLUSPLUS_API
0036 #include "unicode/localpointer.h"
0037 #endif   // U_SHOW_CPLUSPLUS_API
0038 
0039 #ifndef USET_DEFINED
0040 
0041 #ifndef U_IN_DOXYGEN
0042 #define USET_DEFINED
0043 #endif
0044 /**
0045  * USet is the C API type corresponding to C++ class UnicodeSet.
0046  * Use the uset_* API to manipulate.  Create with
0047  * uset_open*, and destroy with uset_close.
0048  * @stable ICU 2.4
0049  */
0050 typedef struct USet USet;
0051 #endif
0052 
0053 /**
0054  * Bitmask values to be passed to uset_openPatternOptions() or
0055  * uset_applyPattern() taking an option parameter.
0056  *
0057  * Use at most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE.
0058  * These case options are mutually exclusive.
0059  *
0060  * Undefined options bits are ignored, and reserved for future use.
0061  *
0062  * @stable ICU 2.4
0063  */
0064 enum {
0065     /**
0066      * Ignore white space within patterns unless quoted or escaped.
0067      * @stable ICU 2.4
0068      */
0069     USET_IGNORE_SPACE = 1,
0070 
0071     /**
0072      * Enable case insensitive matching.  E.g., "[ab]" with this flag
0073      * will match 'a', 'A', 'b', and 'B'.  "[^ab]" with this flag will
0074      * match all except 'a', 'A', 'b', and 'B'. This performs a full
0075      * closure over case mappings, e.g. 'ſ' (U+017F long s) for 's'.
0076      *
0077      * The resulting set is a superset of the input for the code points but
0078      * not for the strings.
0079      * It performs a case mapping closure of the code points and adds
0080      * full case folding strings for the code points, and reduces strings of
0081      * the original set to their full case folding equivalents.
0082      *
0083      * This is designed for case-insensitive matches, for example
0084      * in regular expressions. The full code point case closure allows checking of
0085      * an input character directly against the closure set.
0086      * Strings are matched by comparing the case-folded form from the closure
0087      * set with an incremental case folding of the string in question.
0088      *
0089      * The closure set will also contain single code points if the original
0090      * set contained case-equivalent strings (like U+00DF for "ss" or "Ss" etc.).
0091      * This is not necessary (that is, redundant) for the above matching method
0092      * but results in the same closure sets regardless of whether the original
0093      * set contained the code point or a string.
0094      *
0095      * @stable ICU 2.4
0096      */
0097     USET_CASE_INSENSITIVE = 2,
0098 
0099     /**
0100      * Adds all case mappings for each element in the set.
0101      * This adds the full lower-, title-, and uppercase mappings as well as the full case folding
0102      * of each existing element in the set.
0103      *
0104      * Unlike the “case insensitive” options, this does not perform a closure.
0105      * For example, it does not add 'ſ' (U+017F long s) for 's',
0106      * 'K' (U+212A Kelvin sign) for 'k', or replace set strings by their case-folded versions.
0107      *
0108      * @stable ICU 3.2
0109      */
0110     USET_ADD_CASE_MAPPINGS = 4,
0111 
0112 #ifndef U_HIDE_DRAFT_API
0113     /**
0114      * Enable case insensitive matching.
0115      * Same as USET_CASE_INSENSITIVE but using only Simple_Case_Folding (scf) mappings,
0116      * which map each code point to one code point,
0117      * not full Case_Folding (cf) mappings, which map some code points to multiple code points.
0118      *
0119      * This is designed for case-insensitive matches, for example in certain
0120      * regular expression implementations where only Simple_Case_Folding mappings are used,
0121      * such as in ECMAScript (JavaScript) regular expressions.
0122      *
0123      * @draft ICU 73
0124      */
0125     USET_SIMPLE_CASE_INSENSITIVE = 6
0126 #endif  // U_HIDE_DRAFT_API
0127 };
0128 
0129 /**
0130  * Argument values for whether span() and similar functions continue while
0131  * the current character is contained vs. not contained in the set.
0132  *
0133  * The functionality is straightforward for sets with only single code points,
0134  * without strings (which is the common case):
0135  * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE work the same.
0136  * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE are inverses of USET_SPAN_NOT_CONTAINED.
0137  * - span() and spanBack() partition any string the same way when
0138  *   alternating between span(USET_SPAN_NOT_CONTAINED) and
0139  *   span(either "contained" condition).
0140  * - Using a complemented (inverted) set and the opposite span conditions
0141  *   yields the same results.
0142  *
0143  * When a set contains multi-code point strings, then these statements may not
0144  * be true, depending on the strings in the set (for example, whether they
0145  * overlap with each other) and the string that is processed.
0146  * For a set with strings:
0147  * - The complement of the set contains the opposite set of code points,
0148  *   but the same set of strings.
0149  *   Therefore, complementing both the set and the span conditions
0150  *   may yield different results.
0151  * - When starting spans at different positions in a string
0152  *   (span(s, ...) vs. span(s+1, ...)) the ends of the spans may be different
0153  *   because a set string may start before the later position.
0154  * - span(USET_SPAN_SIMPLE) may be shorter than
0155  *   span(USET_SPAN_CONTAINED) because it will not recursively try
0156  *   all possible paths.
0157  *   For example, with a set which contains the three strings "xy", "xya" and "ax",
0158  *   span("xyax", USET_SPAN_CONTAINED) will return 4 but
0159  *   span("xyax", USET_SPAN_SIMPLE) will return 3.
0160  *   span(USET_SPAN_SIMPLE) will never be longer than
0161  *   span(USET_SPAN_CONTAINED).
0162  * - With either "contained" condition, span() and spanBack() may partition
0163  *   a string in different ways.
0164  *   For example, with a set which contains the two strings "ab" and "ba",
0165  *   and when processing the string "aba",
0166  *   span() will yield contained/not-contained boundaries of { 0, 2, 3 }
0167  *   while spanBack() will yield boundaries of { 0, 1, 3 }.
0168  *
0169  * Note: If it is important to get the same boundaries whether iterating forward
0170  * or backward through a string, then either only span() should be used and
0171  * the boundaries cached for backward operation, or an ICU BreakIterator
0172  * could be used.
0173  *
0174  * Note: Unpaired surrogates are treated like surrogate code points.
0175  * Similarly, set strings match only on code point boundaries,
0176  * never in the middle of a surrogate pair.
0177  * Illegal UTF-8 sequences are treated like U+FFFD.
0178  * When processing UTF-8 strings, malformed set strings
0179  * (strings with unpaired surrogates which cannot be converted to UTF-8)
0180  * are ignored.
0181  *
0182  * @stable ICU 3.8
0183  */
0184 typedef enum USetSpanCondition {
0185     /**
0186      * Continues a span() while there is no set element at the current position.
0187      * Increments by one code point at a time.
0188      * Stops before the first set element (character or string).
0189      * (For code points only, this is like while contains(current)==false).
0190      *
0191      * When span() returns, the substring between where it started and the position
0192      * it returned consists only of characters that are not in the set,
0193      * and none of its strings overlap with the span.
0194      *
0195      * @stable ICU 3.8
0196      */
0197     USET_SPAN_NOT_CONTAINED = 0,
0198     /**
0199      * Spans the longest substring that is a concatenation of set elements (characters or strings).
0200      * (For characters only, this is like while contains(current)==true).
0201      *
0202      * When span() returns, the substring between where it started and the position
0203      * it returned consists only of set elements (characters or strings) that are in the set.
0204      *
0205      * If a set contains strings, then the span will be the longest substring for which there
0206      * exists at least one non-overlapping concatenation of set elements (characters or strings).
0207      * This is equivalent to a POSIX regular expression for <code>(OR of each set element)*</code>.
0208      * (Java/ICU/Perl regex stops at the first match of an OR.)
0209      *
0210      * @stable ICU 3.8
0211      */
0212     USET_SPAN_CONTAINED = 1,
0213     /**
0214      * Continues a span() while there is a set element at the current position.
0215      * Increments by the longest matching element at each position.
0216      * (For characters only, this is like while contains(current)==true).
0217      *
0218      * When span() returns, the substring between where it started and the position
0219      * it returned consists only of set elements (characters or strings) that are in the set.
0220      *
0221      * If a set only contains single characters, then this is the same
0222      * as USET_SPAN_CONTAINED.
0223      *
0224      * If a set contains strings, then the span will be the longest substring
0225      * with a match at each position with the longest single set element (character or string).
0226      *
0227      * Use this span condition together with other longest-match algorithms,
0228      * such as ICU converters (ucnv_getUnicodeSet()).
0229      *
0230      * @stable ICU 3.8
0231      */
0232     USET_SPAN_SIMPLE = 2,
0233 #ifndef U_HIDE_DEPRECATED_API
0234     /**
0235      * One more than the last span condition.
0236      * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
0237      */
0238     USET_SPAN_CONDITION_COUNT
0239 #endif  // U_HIDE_DEPRECATED_API
0240 } USetSpanCondition;
0241 
0242 enum {
0243     /**
0244      * Capacity of USerializedSet::staticArray.
0245      * Enough for any single-code point set.
0246      * Also provides padding for nice sizeof(USerializedSet).
0247      * @stable ICU 2.4
0248      */
0249     USET_SERIALIZED_STATIC_ARRAY_CAPACITY=8
0250 };
0251 
0252 /**
0253  * A serialized form of a Unicode set.  Limited manipulations are
0254  * possible directly on a serialized set.  See below.
0255  * @stable ICU 2.4
0256  */
0257 typedef struct USerializedSet {
0258     /**
0259      * The serialized Unicode Set.
0260      * @stable ICU 2.4
0261      */
0262     const uint16_t *array;
0263     /**
0264      * The length of the array that contains BMP characters.
0265      * @stable ICU 2.4
0266      */
0267     int32_t bmpLength;
0268     /**
0269      * The total length of the array.
0270      * @stable ICU 2.4
0271      */
0272     int32_t length;
0273     /**
0274      * A small buffer for the array to reduce memory allocations.
0275      * @stable ICU 2.4
0276      */
0277     uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY];
0278 } USerializedSet;
0279 
0280 /*********************************************************************
0281  * USet API
0282  *********************************************************************/
0283 
0284 /**
0285  * Create an empty USet object.
0286  * Equivalent to uset_open(1, 0).
0287  * @return a newly created USet.  The caller must call uset_close() on
0288  * it when done.
0289  * @stable ICU 4.2
0290  */
0291 U_CAPI USet* U_EXPORT2
0292 uset_openEmpty(void);
0293 
0294 /**
0295  * Creates a USet object that contains the range of characters
0296  * start..end, inclusive.  If <code>start > end</code> 
0297  * then an empty set is created (same as using uset_openEmpty()).
0298  * @param start first character of the range, inclusive
0299  * @param end last character of the range, inclusive
0300  * @return a newly created USet.  The caller must call uset_close() on
0301  * it when done.
0302  * @stable ICU 2.4
0303  */
0304 U_CAPI USet* U_EXPORT2
0305 uset_open(UChar32 start, UChar32 end);
0306 
0307 /**
0308  * Creates a set from the given pattern.  See the UnicodeSet class
0309  * description for the syntax of the pattern language.
0310  * @param pattern a string specifying what characters are in the set
0311  * @param patternLength the length of the pattern, or -1 if null
0312  * terminated
0313  * @param ec the error code
0314  * @stable ICU 2.4
0315  */
0316 U_CAPI USet* U_EXPORT2
0317 uset_openPattern(const UChar* pattern, int32_t patternLength,
0318                  UErrorCode* ec);
0319 
0320 /**
0321  * Creates a set from the given pattern.  See the UnicodeSet class
0322  * description for the syntax of the pattern language.
0323  * @param pattern a string specifying what characters are in the set
0324  * @param patternLength the length of the pattern, or -1 if null
0325  * terminated
0326  * @param options bitmask for options to apply to the pattern.
0327  * Valid options are USET_IGNORE_SPACE and
0328  * at most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE.
0329  * These case options are mutually exclusive.
0330  * @param ec the error code
0331  * @stable ICU 2.4
0332  */
0333 U_CAPI USet* U_EXPORT2
0334 uset_openPatternOptions(const UChar* pattern, int32_t patternLength,
0335                  uint32_t options,
0336                  UErrorCode* ec);
0337 
0338 /**
0339  * Disposes of the storage used by a USet object.  This function should
0340  * be called exactly once for objects returned by uset_open().
0341  * @param set the object to dispose of
0342  * @stable ICU 2.4
0343  */
0344 U_CAPI void U_EXPORT2
0345 uset_close(USet* set);
0346 
0347 #if U_SHOW_CPLUSPLUS_API
0348 
0349 U_NAMESPACE_BEGIN
0350 
0351 /**
0352  * \class LocalUSetPointer
0353  * "Smart pointer" class, closes a USet via uset_close().
0354  * For most methods see the LocalPointerBase base class.
0355  *
0356  * @see LocalPointerBase
0357  * @see LocalPointer
0358  * @stable ICU 4.4
0359  */
0360 U_DEFINE_LOCAL_OPEN_POINTER(LocalUSetPointer, USet, uset_close);
0361 
0362 U_NAMESPACE_END
0363 
0364 #endif
0365 
0366 /**
0367  * Returns a copy of this object.
0368  * If this set is frozen, then the clone will be frozen as well.
0369  * Use uset_cloneAsThawed() for a mutable clone of a frozen set.
0370  * @param set the original set
0371  * @return the newly allocated copy of the set
0372  * @see uset_cloneAsThawed
0373  * @stable ICU 3.8
0374  */
0375 U_CAPI USet * U_EXPORT2
0376 uset_clone(const USet *set);
0377 
0378 /**
0379  * Determines whether the set has been frozen (made immutable) or not.
0380  * See the ICU4J Freezable interface for details.
0381  * @param set the set
0382  * @return true/false for whether the set has been frozen
0383  * @see uset_freeze
0384  * @see uset_cloneAsThawed
0385  * @stable ICU 3.8
0386  */
0387 U_CAPI UBool U_EXPORT2
0388 uset_isFrozen(const USet *set);
0389 
0390 /**
0391  * Freeze the set (make it immutable).
0392  * Once frozen, it cannot be unfrozen and is therefore thread-safe
0393  * until it is deleted.
0394  * See the ICU4J Freezable interface for details.
0395  * Freezing the set may also make some operations faster, for example
0396  * uset_contains() and uset_span().
0397  * A frozen set will not be modified. (It remains frozen.)
0398  * @param set the set
0399  * @return the same set, now frozen
0400  * @see uset_isFrozen
0401  * @see uset_cloneAsThawed
0402  * @stable ICU 3.8
0403  */
0404 U_CAPI void U_EXPORT2
0405 uset_freeze(USet *set);
0406 
0407 /**
0408  * Clone the set and make the clone mutable.
0409  * See the ICU4J Freezable interface for details.
0410  * @param set the set
0411  * @return the mutable clone
0412  * @see uset_freeze
0413  * @see uset_isFrozen
0414  * @see uset_clone
0415  * @stable ICU 3.8
0416  */
0417 U_CAPI USet * U_EXPORT2
0418 uset_cloneAsThawed(const USet *set);
0419 
0420 /**
0421  * Causes the USet object to represent the range <code>start - end</code>.
0422  * If <code>start > end</code> then this USet is set to an empty range.
0423  * A frozen set will not be modified.
0424  * @param set the object to set to the given range
0425  * @param start first character in the set, inclusive
0426  * @param end last character in the set, inclusive
0427  * @stable ICU 3.2
0428  */
0429 U_CAPI void U_EXPORT2
0430 uset_set(USet* set,
0431          UChar32 start, UChar32 end);
0432 
0433 /**
0434  * Modifies the set to represent the set specified by the given
0435  * pattern. See the UnicodeSet class description for the syntax of 
0436  * the pattern language. See also the User Guide chapter about UnicodeSet.
0437  * <em>Empties the set passed before applying the pattern.</em>
0438  * A frozen set will not be modified.
0439  * @param set               The set to which the pattern is to be applied. 
0440  * @param pattern           A pointer to UChar string specifying what characters are in the set.
0441  *                          The character at pattern[0] must be a '['.
0442  * @param patternLength     The length of the UChar string. -1 if NUL terminated.
0443  * @param options           A bitmask for options to apply to the pattern.
0444  *                          Valid options are USET_IGNORE_SPACE and
0445  *                          at most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS,
0446  *                          USET_SIMPLE_CASE_INSENSITIVE.
0447  *                          These case options are mutually exclusive.
0448  * @param status            Returns an error if the pattern cannot be parsed.
0449  * @return                  Upon successful parse, the value is either
0450  *                          the index of the character after the closing ']' 
0451  *                          of the parsed pattern.
0452  *                          If the status code indicates failure, then the return value 
0453  *                          is the index of the error in the source.
0454  *
0455  * @stable ICU 2.8
0456  */
0457 U_CAPI int32_t U_EXPORT2 
0458 uset_applyPattern(USet *set,
0459                   const UChar *pattern, int32_t patternLength,
0460                   uint32_t options,
0461                   UErrorCode *status);
0462 
0463 /**
0464  * Modifies the set to contain those code points which have the given value
0465  * for the given binary or enumerated property, as returned by
0466  * u_getIntPropertyValue.  Prior contents of this set are lost.
0467  * A frozen set will not be modified.
0468  *
0469  * @param set the object to contain the code points defined by the property
0470  *
0471  * @param prop a property in the range UCHAR_BIN_START..UCHAR_BIN_LIMIT-1
0472  * or UCHAR_INT_START..UCHAR_INT_LIMIT-1
0473  * or UCHAR_MASK_START..UCHAR_MASK_LIMIT-1.
0474  *
0475  * @param value a value in the range u_getIntPropertyMinValue(prop)..
0476  * u_getIntPropertyMaxValue(prop), with one exception.  If prop is
0477  * UCHAR_GENERAL_CATEGORY_MASK, then value should not be a UCharCategory, but
0478  * rather a mask value produced by U_GET_GC_MASK().  This allows grouped
0479  * categories such as [:L:] to be represented.
0480  *
0481  * @param ec error code input/output parameter
0482  *
0483  * @stable ICU 3.2
0484  */
0485 U_CAPI void U_EXPORT2
0486 uset_applyIntPropertyValue(USet* set,
0487                            UProperty prop, int32_t value, UErrorCode* ec);
0488 
0489 /**
0490  * Modifies the set to contain those code points which have the
0491  * given value for the given property.  Prior contents of this
0492  * set are lost.
0493  * A frozen set will not be modified.
0494  *
0495  * @param set the object to contain the code points defined by the given
0496  * property and value alias
0497  *
0498  * @param prop a string specifying a property alias, either short or long.
0499  * The name is matched loosely.  See PropertyAliases.txt for names and a
0500  * description of loose matching.  If the value string is empty, then this
0501  * string is interpreted as either a General_Category value alias, a Script
0502  * value alias, a binary property alias, or a special ID.  Special IDs are
0503  * matched loosely and correspond to the following sets:
0504  *
0505  * "ANY" = [\\u0000-\\U0010FFFF],
0506  * "ASCII" = [\\u0000-\\u007F],
0507  * "Assigned" = [:^Cn:].
0508  *
0509  * @param propLength the length of the prop, or -1 if NULL
0510  *
0511  * @param value a string specifying a value alias, either short or long.
0512  * The name is matched loosely.  See PropertyValueAliases.txt for names
0513  * and a description of loose matching.  In addition to aliases listed,
0514  * numeric values and canonical combining classes may be expressed
0515  * numerically, e.g., ("nv", "0.5") or ("ccc", "220").  The value string
0516  * may also be empty.
0517  *
0518  * @param valueLength the length of the value, or -1 if NULL
0519  *
0520  * @param ec error code input/output parameter
0521  *
0522  * @stable ICU 3.2
0523  */
0524 U_CAPI void U_EXPORT2
0525 uset_applyPropertyAlias(USet* set,
0526                         const UChar *prop, int32_t propLength,
0527                         const UChar *value, int32_t valueLength,
0528                         UErrorCode* ec);
0529 
0530 /**
0531  * Return true if the given position, in the given pattern, appears
0532  * to be the start of a UnicodeSet pattern.
0533  *
0534  * @param pattern a string specifying the pattern
0535  * @param patternLength the length of the pattern, or -1 if NULL
0536  * @param pos the given position
0537  * @stable ICU 3.2
0538  */
0539 U_CAPI UBool U_EXPORT2
0540 uset_resemblesPattern(const UChar *pattern, int32_t patternLength,
0541                       int32_t pos);
0542 
0543 /**
0544  * Returns a string representation of this set.  If the result of
0545  * calling this function is passed to a uset_openPattern(), it
0546  * will produce another set that is equal to this one.
0547  * @param set the set
0548  * @param result the string to receive the rules, may be NULL
0549  * @param resultCapacity the capacity of result, may be 0 if result is NULL
0550  * @param escapeUnprintable if true then convert unprintable
0551  * character to their hex escape representations, \\uxxxx or
0552  * \\Uxxxxxxxx.  Unprintable characters are those other than
0553  * U+000A, U+0020..U+007E.
0554  * @param ec error code.
0555  * @return length of string, possibly larger than resultCapacity
0556  * @stable ICU 2.4
0557  */
0558 U_CAPI int32_t U_EXPORT2
0559 uset_toPattern(const USet* set,
0560                UChar* result, int32_t resultCapacity,
0561                UBool escapeUnprintable,
0562                UErrorCode* ec);
0563 
0564 /**
0565  * Adds the given character to the given USet.  After this call,
0566  * uset_contains(set, c) will return true.
0567  * A frozen set will not be modified.
0568  * @param set the object to which to add the character
0569  * @param c the character to add
0570  * @stable ICU 2.4
0571  */
0572 U_CAPI void U_EXPORT2
0573 uset_add(USet* set, UChar32 c);
0574 
0575 /**
0576  * Adds all of the elements in the specified set to this set if
0577  * they're not already present.  This operation effectively
0578  * modifies this set so that its value is the <i>union</i> of the two
0579  * sets.  The behavior of this operation is unspecified if the specified
0580  * collection is modified while the operation is in progress.
0581  * A frozen set will not be modified.
0582  *
0583  * @param set the object to which to add the set
0584  * @param additionalSet the source set whose elements are to be added to this set.
0585  * @stable ICU 2.6
0586  */
0587 U_CAPI void U_EXPORT2
0588 uset_addAll(USet* set, const USet *additionalSet);
0589 
0590 /**
0591  * Adds the given range of characters to the given USet.  After this call,
0592  * uset_contains(set, start, end) will return true.
0593  * A frozen set will not be modified.
0594  * @param set the object to which to add the character
0595  * @param start the first character of the range to add, inclusive
0596  * @param end the last character of the range to add, inclusive
0597  * @stable ICU 2.2
0598  */
0599 U_CAPI void U_EXPORT2
0600 uset_addRange(USet* set, UChar32 start, UChar32 end);
0601 
0602 /**
0603  * Adds the given string to the given USet.  After this call,
0604  * uset_containsString(set, str, strLen) will return true.
0605  * A frozen set will not be modified.
0606  * @param set the object to which to add the character
0607  * @param str the string to add
0608  * @param strLen the length of the string or -1 if null terminated.
0609  * @stable ICU 2.4
0610  */
0611 U_CAPI void U_EXPORT2
0612 uset_addString(USet* set, const UChar* str, int32_t strLen);
0613 
0614 /**
0615  * Adds each of the characters in this string to the set. Note: "ch" => {"c", "h"}
0616  * If this set already contains any particular character, it has no effect on that character.
0617  * A frozen set will not be modified.
0618  * @param set the object to which to add the character
0619  * @param str the source string
0620  * @param strLen the length of the string or -1 if null terminated.
0621  * @stable ICU 3.4
0622  */
0623 U_CAPI void U_EXPORT2
0624 uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen);
0625 
0626 /**
0627  * Removes the given character from the given USet.  After this call,
0628  * uset_contains(set, c) will return false.
0629  * A frozen set will not be modified.
0630  * @param set the object from which to remove the character
0631  * @param c the character to remove
0632  * @stable ICU 2.4
0633  */
0634 U_CAPI void U_EXPORT2
0635 uset_remove(USet* set, UChar32 c);
0636 
0637 /**
0638  * Removes the given range of characters from the given USet.  After this call,
0639  * uset_contains(set, start, end) will return false.
0640  * A frozen set will not be modified.
0641  * @param set the object to which to add the character
0642  * @param start the first character of the range to remove, inclusive
0643  * @param end the last character of the range to remove, inclusive
0644  * @stable ICU 2.2
0645  */
0646 U_CAPI void U_EXPORT2
0647 uset_removeRange(USet* set, UChar32 start, UChar32 end);
0648 
0649 /**
0650  * Removes the given string to the given USet.  After this call,
0651  * uset_containsString(set, str, strLen) will return false.
0652  * A frozen set will not be modified.
0653  * @param set the object to which to add the character
0654  * @param str the string to remove
0655  * @param strLen the length of the string or -1 if null terminated.
0656  * @stable ICU 2.4
0657  */
0658 U_CAPI void U_EXPORT2
0659 uset_removeString(USet* set, const UChar* str, int32_t strLen);
0660 
0661 /**
0662  * Removes EACH of the characters in this string. Note: "ch" == {"c", "h"}
0663  * A frozen set will not be modified.
0664  *
0665  * @param set the object to be modified
0666  * @param str the string
0667  * @param length the length of the string, or -1 if NUL-terminated
0668  * @stable ICU 69
0669  */
0670 U_CAPI void U_EXPORT2
0671 uset_removeAllCodePoints(USet *set, const UChar *str, int32_t length);
0672 
0673 /**
0674  * Removes from this set all of its elements that are contained in the
0675  * specified set.  This operation effectively modifies this
0676  * set so that its value is the <i>asymmetric set difference</i> of
0677  * the two sets.
0678  * A frozen set will not be modified.
0679  * @param set the object from which the elements are to be removed
0680  * @param removeSet the object that defines which elements will be
0681  * removed from this set
0682  * @stable ICU 3.2
0683  */
0684 U_CAPI void U_EXPORT2
0685 uset_removeAll(USet* set, const USet* removeSet);
0686 
0687 /**
0688  * Retain only the elements in this set that are contained in the
0689  * specified range.  If <code>start > end</code> then an empty range is
0690  * retained, leaving the set empty.  This is equivalent to
0691  * a boolean logic AND, or a set INTERSECTION.
0692  * A frozen set will not be modified.
0693  *
0694  * @param set the object for which to retain only the specified range
0695  * @param start first character, inclusive, of range
0696  * @param end last character, inclusive, of range
0697  * @stable ICU 3.2
0698  */
0699 U_CAPI void U_EXPORT2
0700 uset_retain(USet* set, UChar32 start, UChar32 end);
0701 
0702 /**
0703  * Retains only the specified string from this set if it is present.
0704  * Upon return this set will be empty if it did not contain s, or
0705  * will only contain s if it did contain s.
0706  * A frozen set will not be modified.
0707  *
0708  * @param set the object to be modified
0709  * @param str the string
0710  * @param length the length of the string, or -1 if NUL-terminated
0711  * @stable ICU 69
0712  */
0713 U_CAPI void U_EXPORT2
0714 uset_retainString(USet *set, const UChar *str, int32_t length);
0715 
0716 /**
0717  * Retains EACH of the characters in this string. Note: "ch" == {"c", "h"}
0718  * A frozen set will not be modified.
0719  *
0720  * @param set the object to be modified
0721  * @param str the string
0722  * @param length the length of the string, or -1 if NUL-terminated
0723  * @stable ICU 69
0724  */
0725 U_CAPI void U_EXPORT2
0726 uset_retainAllCodePoints(USet *set, const UChar *str, int32_t length);
0727 
0728 /**
0729  * Retains only the elements in this set that are contained in the
0730  * specified set.  In other words, removes from this set all of
0731  * its elements that are not contained in the specified set.  This
0732  * operation effectively modifies this set so that its value is
0733  * the <i>intersection</i> of the two sets.
0734  * A frozen set will not be modified.
0735  *
0736  * @param set the object on which to perform the retain
0737  * @param retain set that defines which elements this set will retain
0738  * @stable ICU 3.2
0739  */
0740 U_CAPI void U_EXPORT2
0741 uset_retainAll(USet* set, const USet* retain);
0742 
0743 /**
0744  * Reallocate this objects internal structures to take up the least
0745  * possible space, without changing this object's value.
0746  * A frozen set will not be modified.
0747  *
0748  * @param set the object on which to perform the compact
0749  * @stable ICU 3.2
0750  */
0751 U_CAPI void U_EXPORT2
0752 uset_compact(USet* set);
0753 
0754 /**
0755  * This is equivalent to
0756  * <code>uset_complementRange(set, 0, 0x10FFFF)</code>.
0757  *
0758  * <strong>Note:</strong> This performs a symmetric difference with all code points
0759  * <em>and thus retains all multicharacter strings</em>.
0760  * In order to achieve a “code point complement” (all code points minus this set),
0761  * the easiest is to <code>uset_complement(set); uset_removeAllStrings(set);</code>.
0762  *
0763  * A frozen set will not be modified.
0764  * @param set the set
0765  * @stable ICU 2.4
0766  */
0767 U_CAPI void U_EXPORT2
0768 uset_complement(USet* set);
0769 
0770 /**
0771  * Complements the specified range in this set.  Any character in
0772  * the range will be removed if it is in this set, or will be
0773  * added if it is not in this set.  If <code>start > end</code>
0774  * then an empty range is complemented, leaving the set unchanged.
0775  * This is equivalent to a boolean logic XOR.
0776  * A frozen set will not be modified.
0777  *
0778  * @param set the object to be modified
0779  * @param start first character, inclusive, of range
0780  * @param end last character, inclusive, of range
0781  * @stable ICU 69
0782  */
0783 U_CAPI void U_EXPORT2
0784 uset_complementRange(USet *set, UChar32 start, UChar32 end);
0785 
0786 /**
0787  * Complements the specified string in this set.
0788  * The string will be removed if it is in this set, or will be added if it is not in this set.
0789  * A frozen set will not be modified.
0790  *
0791  * @param set the object to be modified
0792  * @param str the string
0793  * @param length the length of the string, or -1 if NUL-terminated
0794  * @stable ICU 69
0795  */
0796 U_CAPI void U_EXPORT2
0797 uset_complementString(USet *set, const UChar *str, int32_t length);
0798 
0799 /**
0800  * Complements EACH of the characters in this string. Note: "ch" == {"c", "h"}
0801  * A frozen set will not be modified.
0802  *
0803  * @param set the object to be modified
0804  * @param str the string
0805  * @param length the length of the string, or -1 if NUL-terminated
0806  * @stable ICU 69
0807  */
0808 U_CAPI void U_EXPORT2
0809 uset_complementAllCodePoints(USet *set, const UChar *str, int32_t length);
0810 
0811 /**
0812  * Complements in this set all elements contained in the specified
0813  * set.  Any character in the other set will be removed if it is
0814  * in this set, or will be added if it is not in this set.
0815  * A frozen set will not be modified.
0816  *
0817  * @param set the set with which to complement
0818  * @param complement set that defines which elements will be xor'ed
0819  * from this set.
0820  * @stable ICU 3.2
0821  */
0822 U_CAPI void U_EXPORT2
0823 uset_complementAll(USet* set, const USet* complement);
0824 
0825 /**
0826  * Removes all of the elements from this set.  This set will be
0827  * empty after this call returns.
0828  * A frozen set will not be modified.
0829  * @param set the set
0830  * @stable ICU 2.4
0831  */
0832 U_CAPI void U_EXPORT2
0833 uset_clear(USet* set);
0834 
0835 /**
0836  * Close this set over the given attribute.  For the attribute
0837  * USET_CASE_INSENSITIVE, the result is to modify this set so that:
0838  *
0839  * 1. For each character or string 'a' in this set, all strings or
0840  * characters 'b' such that foldCase(a) == foldCase(b) are added
0841  * to this set.
0842  *
0843  * 2. For each string 'e' in the resulting set, if e !=
0844  * foldCase(e), 'e' will be removed.
0845  *
0846  * Example: [aq\\u00DF{Bc}{bC}{Fi}] => [aAqQ\\u00DF\\uFB01{ss}{bc}{fi}]
0847  *
0848  * (Here foldCase(x) refers to the operation u_strFoldCase, and a
0849  * == b denotes that the contents are the same, not pointer
0850  * comparison.)
0851  *
0852  * A frozen set will not be modified.
0853  *
0854  * @param set the set
0855  *
0856  * @param attributes bitmask for attributes to close over.
0857  * Valid options:
0858  * At most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE.
0859  * These case options are mutually exclusive.
0860  * Unrelated options bits are ignored.
0861  * @stable ICU 4.2
0862  */
0863 U_CAPI void U_EXPORT2
0864 uset_closeOver(USet* set, int32_t attributes);
0865 
0866 /**
0867  * Remove all strings from this set.
0868  *
0869  * @param set the set
0870  * @stable ICU 4.2
0871  */
0872 U_CAPI void U_EXPORT2
0873 uset_removeAllStrings(USet* set);
0874 
0875 /**
0876  * Returns true if the given USet contains no characters and no
0877  * strings.
0878  * @param set the set
0879  * @return true if set is empty
0880  * @stable ICU 2.4
0881  */
0882 U_CAPI UBool U_EXPORT2
0883 uset_isEmpty(const USet* set);
0884 
0885 /**
0886  * @param set the set
0887  * @return true if this set contains multi-character strings or the empty string.
0888  * @stable ICU 70
0889  */
0890 U_CAPI UBool U_EXPORT2
0891 uset_hasStrings(const USet *set);
0892 
0893 /**
0894  * Returns true if the given USet contains the given character.
0895  * This function works faster with a frozen set.
0896  * @param set the set
0897  * @param c The codepoint to check for within the set
0898  * @return true if set contains c
0899  * @stable ICU 2.4
0900  */
0901 U_CAPI UBool U_EXPORT2
0902 uset_contains(const USet* set, UChar32 c);
0903 
0904 /**
0905  * Returns true if the given USet contains all characters c
0906  * where start <= c && c <= end.
0907  * @param set the set
0908  * @param start the first character of the range to test, inclusive
0909  * @param end the last character of the range to test, inclusive
0910  * @return true if set contains the range
0911  * @stable ICU 2.2
0912  */
0913 U_CAPI UBool U_EXPORT2
0914 uset_containsRange(const USet* set, UChar32 start, UChar32 end);
0915 
0916 /**
0917  * Returns true if the given USet contains the given string.
0918  * @param set the set
0919  * @param str the string
0920  * @param strLen the length of the string or -1 if null terminated.
0921  * @return true if set contains str
0922  * @stable ICU 2.4
0923  */
0924 U_CAPI UBool U_EXPORT2
0925 uset_containsString(const USet* set, const UChar* str, int32_t strLen);
0926 
0927 /**
0928  * Returns the index of the given character within this set, where
0929  * the set is ordered by ascending code point.  If the character
0930  * is not in this set, return -1.  The inverse of this method is
0931  * <code>charAt()</code>.
0932  * @param set the set
0933  * @param c the character to obtain the index for
0934  * @return an index from 0..size()-1, or -1
0935  * @stable ICU 3.2
0936  */
0937 U_CAPI int32_t U_EXPORT2
0938 uset_indexOf(const USet* set, UChar32 c);
0939 
0940 /**
0941  * Returns the character at the given index within this set, where
0942  * the set is ordered by ascending code point.  If the index is
0943  * out of range for characters, returns (UChar32)-1.
0944  * The inverse of this method is <code>indexOf()</code>.
0945  *
0946  * For iteration, this is slower than uset_getRangeCount()/uset_getItemCount()
0947  * with uset_getItem(), because for each call it skips linearly over <code>index</code>
0948  * characters in the ranges.
0949  *
0950  * @param set the set
0951  * @param charIndex an index from 0..size()-1 to obtain the char for
0952  * @return the character at the given index, or (UChar32)-1.
0953  * @stable ICU 3.2
0954  */
0955 U_CAPI UChar32 U_EXPORT2
0956 uset_charAt(const USet* set, int32_t charIndex);
0957 
0958 /**
0959  * Returns the number of characters and strings contained in this set.
0960  * The last (uset_getItemCount() - uset_getRangeCount()) items are strings.
0961  *
0962  * This is slower than uset_getRangeCount() and uset_getItemCount() because
0963  * it counts the code points of all ranges.
0964  *
0965  * @param set the set
0966  * @return a non-negative integer counting the characters and strings
0967  * contained in set
0968  * @stable ICU 2.4
0969  * @see uset_getRangeCount
0970  */
0971 U_CAPI int32_t U_EXPORT2
0972 uset_size(const USet* set);
0973 
0974 /**
0975  * @param set the set
0976  * @return the number of ranges in this set.
0977  * @stable ICU 70
0978  * @see uset_getItemCount
0979  * @see uset_getItem
0980  * @see uset_size
0981  */
0982 U_CAPI int32_t U_EXPORT2
0983 uset_getRangeCount(const USet *set);
0984 
0985 /**
0986  * Returns the number of items in this set.  An item is either a range
0987  * of characters or a single multicharacter string.
0988  * @param set the set
0989  * @return a non-negative integer counting the character ranges
0990  * and/or strings contained in set
0991  * @stable ICU 2.4
0992  */
0993 U_CAPI int32_t U_EXPORT2
0994 uset_getItemCount(const USet* set);
0995 
0996 /**
0997  * Returns an item of this set.  An item is either a range of
0998  * characters or a single multicharacter string (which can be the empty string).
0999  *
1000  * If <code>itemIndex</code> is less than uset_getRangeCount(), then this function returns 0,
1001  * and the range is <code>*start</code>..<code>*end</code>.
1002  *
1003  * If <code>itemIndex</code> is at least uset_getRangeCount() and less than uset_getItemCount(), then
1004  * this function copies the string into <code>str[strCapacity]</code> and
1005  * returns the length of the string (0 for the empty string).
1006  *
1007  * If <code>itemIndex</code> is out of range, then this function returns -1.
1008  *
1009  * Note that 0 is returned for each range as well as for the empty string.
1010  *
1011  * @param set the set
1012  * @param itemIndex a non-negative integer in the range 0..uset_getItemCount(set)-1
1013  * @param start pointer to variable to receive first character in range, inclusive;
1014  *              can be NULL for a string item
1015  * @param end pointer to variable to receive last character in range, inclusive;
1016  *            can be NULL for a string item
1017  * @param str buffer to receive the string, may be NULL
1018  * @param strCapacity capacity of str, or 0 if str is NULL
1019  * @param ec error code; U_INDEX_OUTOFBOUNDS_ERROR if the itemIndex is out of range
1020  * @return the length of the string (0 or >= 2), or 0 if the item is a range,
1021  *         or -1 if the itemIndex is out of range
1022  * @stable ICU 2.4
1023  */
1024 U_CAPI int32_t U_EXPORT2
1025 uset_getItem(const USet* set, int32_t itemIndex,
1026              UChar32* start, UChar32* end,
1027              UChar* str, int32_t strCapacity,
1028              UErrorCode* ec);
1029 
1030 /**
1031  * Returns true if set1 contains all the characters and strings
1032  * of set2. It answers the question, 'Is set1 a superset of set2?'
1033  * @param set1 set to be checked for containment
1034  * @param set2 set to be checked for containment
1035  * @return true if the test condition is met
1036  * @stable ICU 3.2
1037  */
1038 U_CAPI UBool U_EXPORT2
1039 uset_containsAll(const USet* set1, const USet* set2);
1040 
1041 /**
1042  * Returns true if this set contains all the characters
1043  * of the given string. This is does not check containment of grapheme
1044  * clusters, like uset_containsString.
1045  * @param set set of characters to be checked for containment
1046  * @param str string containing codepoints to be checked for containment
1047  * @param strLen the length of the string or -1 if null terminated.
1048  * @return true if the test condition is met
1049  * @stable ICU 3.4
1050  */
1051 U_CAPI UBool U_EXPORT2
1052 uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen);
1053 
1054 /**
1055  * Returns true if set1 contains none of the characters and strings
1056  * of set2. It answers the question, 'Is set1 a disjoint set of set2?'
1057  * @param set1 set to be checked for containment
1058  * @param set2 set to be checked for containment
1059  * @return true if the test condition is met
1060  * @stable ICU 3.2
1061  */
1062 U_CAPI UBool U_EXPORT2
1063 uset_containsNone(const USet* set1, const USet* set2);
1064 
1065 /**
1066  * Returns true if set1 contains some of the characters and strings
1067  * of set2. It answers the question, 'Does set1 and set2 have an intersection?'
1068  * @param set1 set to be checked for containment
1069  * @param set2 set to be checked for containment
1070  * @return true if the test condition is met
1071  * @stable ICU 3.2
1072  */
1073 U_CAPI UBool U_EXPORT2
1074 uset_containsSome(const USet* set1, const USet* set2);
1075 
1076 /**
1077  * Returns the length of the initial substring of the input string which
1078  * consists only of characters and strings that are contained in this set
1079  * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
1080  * or only of characters and strings that are not contained
1081  * in this set (USET_SPAN_NOT_CONTAINED).
1082  * See USetSpanCondition for details.
1083  * Similar to the strspn() C library function.
1084  * Unpaired surrogates are treated according to contains() of their surrogate code points.
1085  * This function works faster with a frozen set and with a non-negative string length argument.
1086  * @param set the set
1087  * @param s start of the string
1088  * @param length of the string; can be -1 for NUL-terminated
1089  * @param spanCondition specifies the containment condition
1090  * @return the length of the initial substring according to the spanCondition;
1091  *         0 if the start of the string does not fit the spanCondition
1092  * @stable ICU 3.8
1093  * @see USetSpanCondition
1094  */
1095 U_CAPI int32_t U_EXPORT2
1096 uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
1097 
1098 /**
1099  * Returns the start of the trailing substring of the input string which
1100  * consists only of characters and strings that are contained in this set
1101  * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
1102  * or only of characters and strings that are not contained
1103  * in this set (USET_SPAN_NOT_CONTAINED).
1104  * See USetSpanCondition for details.
1105  * Unpaired surrogates are treated according to contains() of their surrogate code points.
1106  * This function works faster with a frozen set and with a non-negative string length argument.
1107  * @param set the set
1108  * @param s start of the string
1109  * @param length of the string; can be -1 for NUL-terminated
1110  * @param spanCondition specifies the containment condition
1111  * @return the start of the trailing substring according to the spanCondition;
1112  *         the string length if the end of the string does not fit the spanCondition
1113  * @stable ICU 3.8
1114  * @see USetSpanCondition
1115  */
1116 U_CAPI int32_t U_EXPORT2
1117 uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
1118 
1119 /**
1120  * Returns the length of the initial substring of the input string which
1121  * consists only of characters and strings that are contained in this set
1122  * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
1123  * or only of characters and strings that are not contained
1124  * in this set (USET_SPAN_NOT_CONTAINED).
1125  * See USetSpanCondition for details.
1126  * Similar to the strspn() C library function.
1127  * Malformed byte sequences are treated according to contains(0xfffd).
1128  * This function works faster with a frozen set and with a non-negative string length argument.
1129  * @param set the set
1130  * @param s start of the string (UTF-8)
1131  * @param length of the string; can be -1 for NUL-terminated
1132  * @param spanCondition specifies the containment condition
1133  * @return the length of the initial substring according to the spanCondition;
1134  *         0 if the start of the string does not fit the spanCondition
1135  * @stable ICU 3.8
1136  * @see USetSpanCondition
1137  */
1138 U_CAPI int32_t U_EXPORT2
1139 uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
1140 
1141 /**
1142  * Returns the start of the trailing substring of the input string which
1143  * consists only of characters and strings that are contained in this set
1144  * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
1145  * or only of characters and strings that are not contained
1146  * in this set (USET_SPAN_NOT_CONTAINED).
1147  * See USetSpanCondition for details.
1148  * Malformed byte sequences are treated according to contains(0xfffd).
1149  * This function works faster with a frozen set and with a non-negative string length argument.
1150  * @param set the set
1151  * @param s start of the string (UTF-8)
1152  * @param length of the string; can be -1 for NUL-terminated
1153  * @param spanCondition specifies the containment condition
1154  * @return the start of the trailing substring according to the spanCondition;
1155  *         the string length if the end of the string does not fit the spanCondition
1156  * @stable ICU 3.8
1157  * @see USetSpanCondition
1158  */
1159 U_CAPI int32_t U_EXPORT2
1160 uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
1161 
1162 /**
1163  * Returns true if set1 contains all of the characters and strings
1164  * of set2, and vis versa. It answers the question, 'Is set1 equal to set2?'
1165  * @param set1 set to be checked for containment
1166  * @param set2 set to be checked for containment
1167  * @return true if the test condition is met
1168  * @stable ICU 3.2
1169  */
1170 U_CAPI UBool U_EXPORT2
1171 uset_equals(const USet* set1, const USet* set2);
1172 
1173 /*********************************************************************
1174  * Serialized set API
1175  *********************************************************************/
1176 
1177 /**
1178  * Serializes this set into an array of 16-bit integers.  Serialization
1179  * (currently) only records the characters in the set; multicharacter
1180  * strings are ignored.
1181  *
1182  * The array
1183  * has following format (each line is one 16-bit integer):
1184  *
1185  *  length     = (n+2*m) | (m!=0?0x8000:0)
1186  *  bmpLength  = n; present if m!=0
1187  *  bmp[0]
1188  *  bmp[1]
1189  *  ...
1190  *  bmp[n-1]
1191  *  supp-high[0]
1192  *  supp-low[0]
1193  *  supp-high[1]
1194  *  supp-low[1]
1195  *  ...
1196  *  supp-high[m-1]
1197  *  supp-low[m-1]
1198  *
1199  * The array starts with a header.  After the header are n bmp
1200  * code points, then m supplementary code points.  Either n or m
1201  * or both may be zero.  n+2*m is always <= 0x7FFF.
1202  *
1203  * If there are no supplementary characters (if m==0) then the
1204  * header is one 16-bit integer, 'length', with value n.
1205  *
1206  * If there are supplementary characters (if m!=0) then the header
1207  * is two 16-bit integers.  The first, 'length', has value
1208  * (n+2*m)|0x8000.  The second, 'bmpLength', has value n.
1209  *
1210  * After the header the code points are stored in ascending order.
1211  * Supplementary code points are stored as most significant 16
1212  * bits followed by least significant 16 bits.
1213  *
1214  * @param set the set
1215  * @param dest pointer to buffer of destCapacity 16-bit integers.
1216  * May be NULL only if destCapacity is zero.
1217  * @param destCapacity size of dest, or zero.  Must not be negative.
1218  * @param pErrorCode pointer to the error code.  Will be set to
1219  * U_INDEX_OUTOFBOUNDS_ERROR if n+2*m > 0x7FFF.  Will be set to
1220  * U_BUFFER_OVERFLOW_ERROR if n+2*m+(m!=0?2:1) > destCapacity.
1221  * @return the total length of the serialized format, including
1222  * the header, that is, n+2*m+(m!=0?2:1), or 0 on error other
1223  * than U_BUFFER_OVERFLOW_ERROR.
1224  * @stable ICU 2.4
1225  */
1226 U_CAPI int32_t U_EXPORT2
1227 uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* pErrorCode);
1228 
1229 /**
1230  * Given a serialized array, fill in the given serialized set object.
1231  * @param fillSet pointer to result
1232  * @param src pointer to start of array
1233  * @param srcLength length of array
1234  * @return true if the given array is valid, otherwise false
1235  * @stable ICU 2.4
1236  */
1237 U_CAPI UBool U_EXPORT2
1238 uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength);
1239 
1240 /**
1241  * Set the USerializedSet to contain the given character (and nothing
1242  * else).
1243  * @param fillSet pointer to result
1244  * @param c The codepoint to set
1245  * @stable ICU 2.4
1246  */
1247 U_CAPI void U_EXPORT2
1248 uset_setSerializedToOne(USerializedSet* fillSet, UChar32 c);
1249 
1250 /**
1251  * Returns true if the given USerializedSet contains the given
1252  * character.
1253  * @param set the serialized set
1254  * @param c The codepoint to check for within the set
1255  * @return true if set contains c
1256  * @stable ICU 2.4
1257  */
1258 U_CAPI UBool U_EXPORT2
1259 uset_serializedContains(const USerializedSet* set, UChar32 c);
1260 
1261 /**
1262  * Returns the number of disjoint ranges of characters contained in
1263  * the given serialized set.  Ignores any strings contained in the
1264  * set.
1265  * @param set the serialized set
1266  * @return a non-negative integer counting the character ranges
1267  * contained in set
1268  * @stable ICU 2.4
1269  */
1270 U_CAPI int32_t U_EXPORT2
1271 uset_getSerializedRangeCount(const USerializedSet* set);
1272 
1273 /**
1274  * Returns a range of characters contained in the given serialized
1275  * set.
1276  * @param set the serialized set
1277  * @param rangeIndex a non-negative integer in the range 0..
1278  * uset_getSerializedRangeCount(set)-1
1279  * @param pStart pointer to variable to receive first character
1280  * in range, inclusive
1281  * @param pEnd pointer to variable to receive last character in range,
1282  * inclusive
1283  * @return true if rangeIndex is valid, otherwise false
1284  * @stable ICU 2.4
1285  */
1286 U_CAPI UBool U_EXPORT2
1287 uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
1288                         UChar32* pStart, UChar32* pEnd);
1289 
1290 #endif