|
||||
File indexing completed on 2025-01-18 10:13:15
0001 // © 2016 and later: Unicode, Inc. and others. 0002 // License & terms of use: http://www.unicode.org/copyright.html 0003 /* 0004 ******************************************************************************* 0005 * 0006 * Copyright (C) 2002-2014, International Business Machines 0007 * Corporation and others. All Rights Reserved. 0008 * 0009 ******************************************************************************* 0010 * file name: uset.h 0011 * encoding: UTF-8 0012 * tab size: 8 (not used) 0013 * indentation:4 0014 * 0015 * created on: 2002mar07 0016 * created by: Markus W. Scherer 0017 * 0018 * C version of UnicodeSet. 0019 */ 0020 0021 0022 /** 0023 * \file 0024 * \brief C API: Unicode Set 0025 * 0026 * <p>This is a C wrapper around the C++ UnicodeSet class.</p> 0027 */ 0028 0029 #ifndef __USET_H__ 0030 #define __USET_H__ 0031 0032 #include "unicode/utypes.h" 0033 #include "unicode/uchar.h" 0034 0035 #if U_SHOW_CPLUSPLUS_API 0036 #include "unicode/localpointer.h" 0037 #endif // U_SHOW_CPLUSPLUS_API 0038 0039 #ifndef USET_DEFINED 0040 0041 #ifndef U_IN_DOXYGEN 0042 #define USET_DEFINED 0043 #endif 0044 /** 0045 * USet is the C API type corresponding to C++ class UnicodeSet. 0046 * Use the uset_* API to manipulate. Create with 0047 * uset_open*, and destroy with uset_close. 0048 * @stable ICU 2.4 0049 */ 0050 typedef struct USet USet; 0051 #endif 0052 0053 /** 0054 * Bitmask values to be passed to uset_openPatternOptions() or 0055 * uset_applyPattern() taking an option parameter. 0056 * 0057 * Use at most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE. 0058 * These case options are mutually exclusive. 0059 * 0060 * Undefined options bits are ignored, and reserved for future use. 0061 * 0062 * @stable ICU 2.4 0063 */ 0064 enum { 0065 /** 0066 * Ignore white space within patterns unless quoted or escaped. 0067 * @stable ICU 2.4 0068 */ 0069 USET_IGNORE_SPACE = 1, 0070 0071 /** 0072 * Enable case insensitive matching. E.g., "[ab]" with this flag 0073 * will match 'a', 'A', 'b', and 'B'. "[^ab]" with this flag will 0074 * match all except 'a', 'A', 'b', and 'B'. This performs a full 0075 * closure over case mappings, e.g. 'ſ' (U+017F long s) for 's'. 0076 * 0077 * The resulting set is a superset of the input for the code points but 0078 * not for the strings. 0079 * It performs a case mapping closure of the code points and adds 0080 * full case folding strings for the code points, and reduces strings of 0081 * the original set to their full case folding equivalents. 0082 * 0083 * This is designed for case-insensitive matches, for example 0084 * in regular expressions. The full code point case closure allows checking of 0085 * an input character directly against the closure set. 0086 * Strings are matched by comparing the case-folded form from the closure 0087 * set with an incremental case folding of the string in question. 0088 * 0089 * The closure set will also contain single code points if the original 0090 * set contained case-equivalent strings (like U+00DF for "ss" or "Ss" etc.). 0091 * This is not necessary (that is, redundant) for the above matching method 0092 * but results in the same closure sets regardless of whether the original 0093 * set contained the code point or a string. 0094 * 0095 * @stable ICU 2.4 0096 */ 0097 USET_CASE_INSENSITIVE = 2, 0098 0099 /** 0100 * Adds all case mappings for each element in the set. 0101 * This adds the full lower-, title-, and uppercase mappings as well as the full case folding 0102 * of each existing element in the set. 0103 * 0104 * Unlike the “case insensitive” options, this does not perform a closure. 0105 * For example, it does not add 'ſ' (U+017F long s) for 's', 0106 * 'K' (U+212A Kelvin sign) for 'k', or replace set strings by their case-folded versions. 0107 * 0108 * @stable ICU 3.2 0109 */ 0110 USET_ADD_CASE_MAPPINGS = 4, 0111 0112 #ifndef U_HIDE_DRAFT_API 0113 /** 0114 * Enable case insensitive matching. 0115 * Same as USET_CASE_INSENSITIVE but using only Simple_Case_Folding (scf) mappings, 0116 * which map each code point to one code point, 0117 * not full Case_Folding (cf) mappings, which map some code points to multiple code points. 0118 * 0119 * This is designed for case-insensitive matches, for example in certain 0120 * regular expression implementations where only Simple_Case_Folding mappings are used, 0121 * such as in ECMAScript (JavaScript) regular expressions. 0122 * 0123 * @draft ICU 73 0124 */ 0125 USET_SIMPLE_CASE_INSENSITIVE = 6 0126 #endif // U_HIDE_DRAFT_API 0127 }; 0128 0129 /** 0130 * Argument values for whether span() and similar functions continue while 0131 * the current character is contained vs. not contained in the set. 0132 * 0133 * The functionality is straightforward for sets with only single code points, 0134 * without strings (which is the common case): 0135 * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE work the same. 0136 * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE are inverses of USET_SPAN_NOT_CONTAINED. 0137 * - span() and spanBack() partition any string the same way when 0138 * alternating between span(USET_SPAN_NOT_CONTAINED) and 0139 * span(either "contained" condition). 0140 * - Using a complemented (inverted) set and the opposite span conditions 0141 * yields the same results. 0142 * 0143 * When a set contains multi-code point strings, then these statements may not 0144 * be true, depending on the strings in the set (for example, whether they 0145 * overlap with each other) and the string that is processed. 0146 * For a set with strings: 0147 * - The complement of the set contains the opposite set of code points, 0148 * but the same set of strings. 0149 * Therefore, complementing both the set and the span conditions 0150 * may yield different results. 0151 * - When starting spans at different positions in a string 0152 * (span(s, ...) vs. span(s+1, ...)) the ends of the spans may be different 0153 * because a set string may start before the later position. 0154 * - span(USET_SPAN_SIMPLE) may be shorter than 0155 * span(USET_SPAN_CONTAINED) because it will not recursively try 0156 * all possible paths. 0157 * For example, with a set which contains the three strings "xy", "xya" and "ax", 0158 * span("xyax", USET_SPAN_CONTAINED) will return 4 but 0159 * span("xyax", USET_SPAN_SIMPLE) will return 3. 0160 * span(USET_SPAN_SIMPLE) will never be longer than 0161 * span(USET_SPAN_CONTAINED). 0162 * - With either "contained" condition, span() and spanBack() may partition 0163 * a string in different ways. 0164 * For example, with a set which contains the two strings "ab" and "ba", 0165 * and when processing the string "aba", 0166 * span() will yield contained/not-contained boundaries of { 0, 2, 3 } 0167 * while spanBack() will yield boundaries of { 0, 1, 3 }. 0168 * 0169 * Note: If it is important to get the same boundaries whether iterating forward 0170 * or backward through a string, then either only span() should be used and 0171 * the boundaries cached for backward operation, or an ICU BreakIterator 0172 * could be used. 0173 * 0174 * Note: Unpaired surrogates are treated like surrogate code points. 0175 * Similarly, set strings match only on code point boundaries, 0176 * never in the middle of a surrogate pair. 0177 * Illegal UTF-8 sequences are treated like U+FFFD. 0178 * When processing UTF-8 strings, malformed set strings 0179 * (strings with unpaired surrogates which cannot be converted to UTF-8) 0180 * are ignored. 0181 * 0182 * @stable ICU 3.8 0183 */ 0184 typedef enum USetSpanCondition { 0185 /** 0186 * Continues a span() while there is no set element at the current position. 0187 * Increments by one code point at a time. 0188 * Stops before the first set element (character or string). 0189 * (For code points only, this is like while contains(current)==false). 0190 * 0191 * When span() returns, the substring between where it started and the position 0192 * it returned consists only of characters that are not in the set, 0193 * and none of its strings overlap with the span. 0194 * 0195 * @stable ICU 3.8 0196 */ 0197 USET_SPAN_NOT_CONTAINED = 0, 0198 /** 0199 * Spans the longest substring that is a concatenation of set elements (characters or strings). 0200 * (For characters only, this is like while contains(current)==true). 0201 * 0202 * When span() returns, the substring between where it started and the position 0203 * it returned consists only of set elements (characters or strings) that are in the set. 0204 * 0205 * If a set contains strings, then the span will be the longest substring for which there 0206 * exists at least one non-overlapping concatenation of set elements (characters or strings). 0207 * This is equivalent to a POSIX regular expression for <code>(OR of each set element)*</code>. 0208 * (Java/ICU/Perl regex stops at the first match of an OR.) 0209 * 0210 * @stable ICU 3.8 0211 */ 0212 USET_SPAN_CONTAINED = 1, 0213 /** 0214 * Continues a span() while there is a set element at the current position. 0215 * Increments by the longest matching element at each position. 0216 * (For characters only, this is like while contains(current)==true). 0217 * 0218 * When span() returns, the substring between where it started and the position 0219 * it returned consists only of set elements (characters or strings) that are in the set. 0220 * 0221 * If a set only contains single characters, then this is the same 0222 * as USET_SPAN_CONTAINED. 0223 * 0224 * If a set contains strings, then the span will be the longest substring 0225 * with a match at each position with the longest single set element (character or string). 0226 * 0227 * Use this span condition together with other longest-match algorithms, 0228 * such as ICU converters (ucnv_getUnicodeSet()). 0229 * 0230 * @stable ICU 3.8 0231 */ 0232 USET_SPAN_SIMPLE = 2, 0233 #ifndef U_HIDE_DEPRECATED_API 0234 /** 0235 * One more than the last span condition. 0236 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 0237 */ 0238 USET_SPAN_CONDITION_COUNT 0239 #endif // U_HIDE_DEPRECATED_API 0240 } USetSpanCondition; 0241 0242 enum { 0243 /** 0244 * Capacity of USerializedSet::staticArray. 0245 * Enough for any single-code point set. 0246 * Also provides padding for nice sizeof(USerializedSet). 0247 * @stable ICU 2.4 0248 */ 0249 USET_SERIALIZED_STATIC_ARRAY_CAPACITY=8 0250 }; 0251 0252 /** 0253 * A serialized form of a Unicode set. Limited manipulations are 0254 * possible directly on a serialized set. See below. 0255 * @stable ICU 2.4 0256 */ 0257 typedef struct USerializedSet { 0258 /** 0259 * The serialized Unicode Set. 0260 * @stable ICU 2.4 0261 */ 0262 const uint16_t *array; 0263 /** 0264 * The length of the array that contains BMP characters. 0265 * @stable ICU 2.4 0266 */ 0267 int32_t bmpLength; 0268 /** 0269 * The total length of the array. 0270 * @stable ICU 2.4 0271 */ 0272 int32_t length; 0273 /** 0274 * A small buffer for the array to reduce memory allocations. 0275 * @stable ICU 2.4 0276 */ 0277 uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY]; 0278 } USerializedSet; 0279 0280 /********************************************************************* 0281 * USet API 0282 *********************************************************************/ 0283 0284 /** 0285 * Create an empty USet object. 0286 * Equivalent to uset_open(1, 0). 0287 * @return a newly created USet. The caller must call uset_close() on 0288 * it when done. 0289 * @stable ICU 4.2 0290 */ 0291 U_CAPI USet* U_EXPORT2 0292 uset_openEmpty(void); 0293 0294 /** 0295 * Creates a USet object that contains the range of characters 0296 * start..end, inclusive. If <code>start > end</code> 0297 * then an empty set is created (same as using uset_openEmpty()). 0298 * @param start first character of the range, inclusive 0299 * @param end last character of the range, inclusive 0300 * @return a newly created USet. The caller must call uset_close() on 0301 * it when done. 0302 * @stable ICU 2.4 0303 */ 0304 U_CAPI USet* U_EXPORT2 0305 uset_open(UChar32 start, UChar32 end); 0306 0307 /** 0308 * Creates a set from the given pattern. See the UnicodeSet class 0309 * description for the syntax of the pattern language. 0310 * @param pattern a string specifying what characters are in the set 0311 * @param patternLength the length of the pattern, or -1 if null 0312 * terminated 0313 * @param ec the error code 0314 * @stable ICU 2.4 0315 */ 0316 U_CAPI USet* U_EXPORT2 0317 uset_openPattern(const UChar* pattern, int32_t patternLength, 0318 UErrorCode* ec); 0319 0320 /** 0321 * Creates a set from the given pattern. See the UnicodeSet class 0322 * description for the syntax of the pattern language. 0323 * @param pattern a string specifying what characters are in the set 0324 * @param patternLength the length of the pattern, or -1 if null 0325 * terminated 0326 * @param options bitmask for options to apply to the pattern. 0327 * Valid options are USET_IGNORE_SPACE and 0328 * at most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE. 0329 * These case options are mutually exclusive. 0330 * @param ec the error code 0331 * @stable ICU 2.4 0332 */ 0333 U_CAPI USet* U_EXPORT2 0334 uset_openPatternOptions(const UChar* pattern, int32_t patternLength, 0335 uint32_t options, 0336 UErrorCode* ec); 0337 0338 /** 0339 * Disposes of the storage used by a USet object. This function should 0340 * be called exactly once for objects returned by uset_open(). 0341 * @param set the object to dispose of 0342 * @stable ICU 2.4 0343 */ 0344 U_CAPI void U_EXPORT2 0345 uset_close(USet* set); 0346 0347 #if U_SHOW_CPLUSPLUS_API 0348 0349 U_NAMESPACE_BEGIN 0350 0351 /** 0352 * \class LocalUSetPointer 0353 * "Smart pointer" class, closes a USet via uset_close(). 0354 * For most methods see the LocalPointerBase base class. 0355 * 0356 * @see LocalPointerBase 0357 * @see LocalPointer 0358 * @stable ICU 4.4 0359 */ 0360 U_DEFINE_LOCAL_OPEN_POINTER(LocalUSetPointer, USet, uset_close); 0361 0362 U_NAMESPACE_END 0363 0364 #endif 0365 0366 /** 0367 * Returns a copy of this object. 0368 * If this set is frozen, then the clone will be frozen as well. 0369 * Use uset_cloneAsThawed() for a mutable clone of a frozen set. 0370 * @param set the original set 0371 * @return the newly allocated copy of the set 0372 * @see uset_cloneAsThawed 0373 * @stable ICU 3.8 0374 */ 0375 U_CAPI USet * U_EXPORT2 0376 uset_clone(const USet *set); 0377 0378 /** 0379 * Determines whether the set has been frozen (made immutable) or not. 0380 * See the ICU4J Freezable interface for details. 0381 * @param set the set 0382 * @return true/false for whether the set has been frozen 0383 * @see uset_freeze 0384 * @see uset_cloneAsThawed 0385 * @stable ICU 3.8 0386 */ 0387 U_CAPI UBool U_EXPORT2 0388 uset_isFrozen(const USet *set); 0389 0390 /** 0391 * Freeze the set (make it immutable). 0392 * Once frozen, it cannot be unfrozen and is therefore thread-safe 0393 * until it is deleted. 0394 * See the ICU4J Freezable interface for details. 0395 * Freezing the set may also make some operations faster, for example 0396 * uset_contains() and uset_span(). 0397 * A frozen set will not be modified. (It remains frozen.) 0398 * @param set the set 0399 * @return the same set, now frozen 0400 * @see uset_isFrozen 0401 * @see uset_cloneAsThawed 0402 * @stable ICU 3.8 0403 */ 0404 U_CAPI void U_EXPORT2 0405 uset_freeze(USet *set); 0406 0407 /** 0408 * Clone the set and make the clone mutable. 0409 * See the ICU4J Freezable interface for details. 0410 * @param set the set 0411 * @return the mutable clone 0412 * @see uset_freeze 0413 * @see uset_isFrozen 0414 * @see uset_clone 0415 * @stable ICU 3.8 0416 */ 0417 U_CAPI USet * U_EXPORT2 0418 uset_cloneAsThawed(const USet *set); 0419 0420 /** 0421 * Causes the USet object to represent the range <code>start - end</code>. 0422 * If <code>start > end</code> then this USet is set to an empty range. 0423 * A frozen set will not be modified. 0424 * @param set the object to set to the given range 0425 * @param start first character in the set, inclusive 0426 * @param end last character in the set, inclusive 0427 * @stable ICU 3.2 0428 */ 0429 U_CAPI void U_EXPORT2 0430 uset_set(USet* set, 0431 UChar32 start, UChar32 end); 0432 0433 /** 0434 * Modifies the set to represent the set specified by the given 0435 * pattern. See the UnicodeSet class description for the syntax of 0436 * the pattern language. See also the User Guide chapter about UnicodeSet. 0437 * <em>Empties the set passed before applying the pattern.</em> 0438 * A frozen set will not be modified. 0439 * @param set The set to which the pattern is to be applied. 0440 * @param pattern A pointer to UChar string specifying what characters are in the set. 0441 * The character at pattern[0] must be a '['. 0442 * @param patternLength The length of the UChar string. -1 if NUL terminated. 0443 * @param options A bitmask for options to apply to the pattern. 0444 * Valid options are USET_IGNORE_SPACE and 0445 * at most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, 0446 * USET_SIMPLE_CASE_INSENSITIVE. 0447 * These case options are mutually exclusive. 0448 * @param status Returns an error if the pattern cannot be parsed. 0449 * @return Upon successful parse, the value is either 0450 * the index of the character after the closing ']' 0451 * of the parsed pattern. 0452 * If the status code indicates failure, then the return value 0453 * is the index of the error in the source. 0454 * 0455 * @stable ICU 2.8 0456 */ 0457 U_CAPI int32_t U_EXPORT2 0458 uset_applyPattern(USet *set, 0459 const UChar *pattern, int32_t patternLength, 0460 uint32_t options, 0461 UErrorCode *status); 0462 0463 /** 0464 * Modifies the set to contain those code points which have the given value 0465 * for the given binary or enumerated property, as returned by 0466 * u_getIntPropertyValue. Prior contents of this set are lost. 0467 * A frozen set will not be modified. 0468 * 0469 * @param set the object to contain the code points defined by the property 0470 * 0471 * @param prop a property in the range UCHAR_BIN_START..UCHAR_BIN_LIMIT-1 0472 * or UCHAR_INT_START..UCHAR_INT_LIMIT-1 0473 * or UCHAR_MASK_START..UCHAR_MASK_LIMIT-1. 0474 * 0475 * @param value a value in the range u_getIntPropertyMinValue(prop).. 0476 * u_getIntPropertyMaxValue(prop), with one exception. If prop is 0477 * UCHAR_GENERAL_CATEGORY_MASK, then value should not be a UCharCategory, but 0478 * rather a mask value produced by U_GET_GC_MASK(). This allows grouped 0479 * categories such as [:L:] to be represented. 0480 * 0481 * @param ec error code input/output parameter 0482 * 0483 * @stable ICU 3.2 0484 */ 0485 U_CAPI void U_EXPORT2 0486 uset_applyIntPropertyValue(USet* set, 0487 UProperty prop, int32_t value, UErrorCode* ec); 0488 0489 /** 0490 * Modifies the set to contain those code points which have the 0491 * given value for the given property. Prior contents of this 0492 * set are lost. 0493 * A frozen set will not be modified. 0494 * 0495 * @param set the object to contain the code points defined by the given 0496 * property and value alias 0497 * 0498 * @param prop a string specifying a property alias, either short or long. 0499 * The name is matched loosely. See PropertyAliases.txt for names and a 0500 * description of loose matching. If the value string is empty, then this 0501 * string is interpreted as either a General_Category value alias, a Script 0502 * value alias, a binary property alias, or a special ID. Special IDs are 0503 * matched loosely and correspond to the following sets: 0504 * 0505 * "ANY" = [\\u0000-\\U0010FFFF], 0506 * "ASCII" = [\\u0000-\\u007F], 0507 * "Assigned" = [:^Cn:]. 0508 * 0509 * @param propLength the length of the prop, or -1 if NULL 0510 * 0511 * @param value a string specifying a value alias, either short or long. 0512 * The name is matched loosely. See PropertyValueAliases.txt for names 0513 * and a description of loose matching. In addition to aliases listed, 0514 * numeric values and canonical combining classes may be expressed 0515 * numerically, e.g., ("nv", "0.5") or ("ccc", "220"). The value string 0516 * may also be empty. 0517 * 0518 * @param valueLength the length of the value, or -1 if NULL 0519 * 0520 * @param ec error code input/output parameter 0521 * 0522 * @stable ICU 3.2 0523 */ 0524 U_CAPI void U_EXPORT2 0525 uset_applyPropertyAlias(USet* set, 0526 const UChar *prop, int32_t propLength, 0527 const UChar *value, int32_t valueLength, 0528 UErrorCode* ec); 0529 0530 /** 0531 * Return true if the given position, in the given pattern, appears 0532 * to be the start of a UnicodeSet pattern. 0533 * 0534 * @param pattern a string specifying the pattern 0535 * @param patternLength the length of the pattern, or -1 if NULL 0536 * @param pos the given position 0537 * @stable ICU 3.2 0538 */ 0539 U_CAPI UBool U_EXPORT2 0540 uset_resemblesPattern(const UChar *pattern, int32_t patternLength, 0541 int32_t pos); 0542 0543 /** 0544 * Returns a string representation of this set. If the result of 0545 * calling this function is passed to a uset_openPattern(), it 0546 * will produce another set that is equal to this one. 0547 * @param set the set 0548 * @param result the string to receive the rules, may be NULL 0549 * @param resultCapacity the capacity of result, may be 0 if result is NULL 0550 * @param escapeUnprintable if true then convert unprintable 0551 * character to their hex escape representations, \\uxxxx or 0552 * \\Uxxxxxxxx. Unprintable characters are those other than 0553 * U+000A, U+0020..U+007E. 0554 * @param ec error code. 0555 * @return length of string, possibly larger than resultCapacity 0556 * @stable ICU 2.4 0557 */ 0558 U_CAPI int32_t U_EXPORT2 0559 uset_toPattern(const USet* set, 0560 UChar* result, int32_t resultCapacity, 0561 UBool escapeUnprintable, 0562 UErrorCode* ec); 0563 0564 /** 0565 * Adds the given character to the given USet. After this call, 0566 * uset_contains(set, c) will return true. 0567 * A frozen set will not be modified. 0568 * @param set the object to which to add the character 0569 * @param c the character to add 0570 * @stable ICU 2.4 0571 */ 0572 U_CAPI void U_EXPORT2 0573 uset_add(USet* set, UChar32 c); 0574 0575 /** 0576 * Adds all of the elements in the specified set to this set if 0577 * they're not already present. This operation effectively 0578 * modifies this set so that its value is the <i>union</i> of the two 0579 * sets. The behavior of this operation is unspecified if the specified 0580 * collection is modified while the operation is in progress. 0581 * A frozen set will not be modified. 0582 * 0583 * @param set the object to which to add the set 0584 * @param additionalSet the source set whose elements are to be added to this set. 0585 * @stable ICU 2.6 0586 */ 0587 U_CAPI void U_EXPORT2 0588 uset_addAll(USet* set, const USet *additionalSet); 0589 0590 /** 0591 * Adds the given range of characters to the given USet. After this call, 0592 * uset_contains(set, start, end) will return true. 0593 * A frozen set will not be modified. 0594 * @param set the object to which to add the character 0595 * @param start the first character of the range to add, inclusive 0596 * @param end the last character of the range to add, inclusive 0597 * @stable ICU 2.2 0598 */ 0599 U_CAPI void U_EXPORT2 0600 uset_addRange(USet* set, UChar32 start, UChar32 end); 0601 0602 /** 0603 * Adds the given string to the given USet. After this call, 0604 * uset_containsString(set, str, strLen) will return true. 0605 * A frozen set will not be modified. 0606 * @param set the object to which to add the character 0607 * @param str the string to add 0608 * @param strLen the length of the string or -1 if null terminated. 0609 * @stable ICU 2.4 0610 */ 0611 U_CAPI void U_EXPORT2 0612 uset_addString(USet* set, const UChar* str, int32_t strLen); 0613 0614 /** 0615 * Adds each of the characters in this string to the set. Note: "ch" => {"c", "h"} 0616 * If this set already contains any particular character, it has no effect on that character. 0617 * A frozen set will not be modified. 0618 * @param set the object to which to add the character 0619 * @param str the source string 0620 * @param strLen the length of the string or -1 if null terminated. 0621 * @stable ICU 3.4 0622 */ 0623 U_CAPI void U_EXPORT2 0624 uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen); 0625 0626 /** 0627 * Removes the given character from the given USet. After this call, 0628 * uset_contains(set, c) will return false. 0629 * A frozen set will not be modified. 0630 * @param set the object from which to remove the character 0631 * @param c the character to remove 0632 * @stable ICU 2.4 0633 */ 0634 U_CAPI void U_EXPORT2 0635 uset_remove(USet* set, UChar32 c); 0636 0637 /** 0638 * Removes the given range of characters from the given USet. After this call, 0639 * uset_contains(set, start, end) will return false. 0640 * A frozen set will not be modified. 0641 * @param set the object to which to add the character 0642 * @param start the first character of the range to remove, inclusive 0643 * @param end the last character of the range to remove, inclusive 0644 * @stable ICU 2.2 0645 */ 0646 U_CAPI void U_EXPORT2 0647 uset_removeRange(USet* set, UChar32 start, UChar32 end); 0648 0649 /** 0650 * Removes the given string to the given USet. After this call, 0651 * uset_containsString(set, str, strLen) will return false. 0652 * A frozen set will not be modified. 0653 * @param set the object to which to add the character 0654 * @param str the string to remove 0655 * @param strLen the length of the string or -1 if null terminated. 0656 * @stable ICU 2.4 0657 */ 0658 U_CAPI void U_EXPORT2 0659 uset_removeString(USet* set, const UChar* str, int32_t strLen); 0660 0661 /** 0662 * Removes EACH of the characters in this string. Note: "ch" == {"c", "h"} 0663 * A frozen set will not be modified. 0664 * 0665 * @param set the object to be modified 0666 * @param str the string 0667 * @param length the length of the string, or -1 if NUL-terminated 0668 * @stable ICU 69 0669 */ 0670 U_CAPI void U_EXPORT2 0671 uset_removeAllCodePoints(USet *set, const UChar *str, int32_t length); 0672 0673 /** 0674 * Removes from this set all of its elements that are contained in the 0675 * specified set. This operation effectively modifies this 0676 * set so that its value is the <i>asymmetric set difference</i> of 0677 * the two sets. 0678 * A frozen set will not be modified. 0679 * @param set the object from which the elements are to be removed 0680 * @param removeSet the object that defines which elements will be 0681 * removed from this set 0682 * @stable ICU 3.2 0683 */ 0684 U_CAPI void U_EXPORT2 0685 uset_removeAll(USet* set, const USet* removeSet); 0686 0687 /** 0688 * Retain only the elements in this set that are contained in the 0689 * specified range. If <code>start > end</code> then an empty range is 0690 * retained, leaving the set empty. This is equivalent to 0691 * a boolean logic AND, or a set INTERSECTION. 0692 * A frozen set will not be modified. 0693 * 0694 * @param set the object for which to retain only the specified range 0695 * @param start first character, inclusive, of range 0696 * @param end last character, inclusive, of range 0697 * @stable ICU 3.2 0698 */ 0699 U_CAPI void U_EXPORT2 0700 uset_retain(USet* set, UChar32 start, UChar32 end); 0701 0702 /** 0703 * Retains only the specified string from this set if it is present. 0704 * Upon return this set will be empty if it did not contain s, or 0705 * will only contain s if it did contain s. 0706 * A frozen set will not be modified. 0707 * 0708 * @param set the object to be modified 0709 * @param str the string 0710 * @param length the length of the string, or -1 if NUL-terminated 0711 * @stable ICU 69 0712 */ 0713 U_CAPI void U_EXPORT2 0714 uset_retainString(USet *set, const UChar *str, int32_t length); 0715 0716 /** 0717 * Retains EACH of the characters in this string. Note: "ch" == {"c", "h"} 0718 * A frozen set will not be modified. 0719 * 0720 * @param set the object to be modified 0721 * @param str the string 0722 * @param length the length of the string, or -1 if NUL-terminated 0723 * @stable ICU 69 0724 */ 0725 U_CAPI void U_EXPORT2 0726 uset_retainAllCodePoints(USet *set, const UChar *str, int32_t length); 0727 0728 /** 0729 * Retains only the elements in this set that are contained in the 0730 * specified set. In other words, removes from this set all of 0731 * its elements that are not contained in the specified set. This 0732 * operation effectively modifies this set so that its value is 0733 * the <i>intersection</i> of the two sets. 0734 * A frozen set will not be modified. 0735 * 0736 * @param set the object on which to perform the retain 0737 * @param retain set that defines which elements this set will retain 0738 * @stable ICU 3.2 0739 */ 0740 U_CAPI void U_EXPORT2 0741 uset_retainAll(USet* set, const USet* retain); 0742 0743 /** 0744 * Reallocate this objects internal structures to take up the least 0745 * possible space, without changing this object's value. 0746 * A frozen set will not be modified. 0747 * 0748 * @param set the object on which to perform the compact 0749 * @stable ICU 3.2 0750 */ 0751 U_CAPI void U_EXPORT2 0752 uset_compact(USet* set); 0753 0754 /** 0755 * This is equivalent to 0756 * <code>uset_complementRange(set, 0, 0x10FFFF)</code>. 0757 * 0758 * <strong>Note:</strong> This performs a symmetric difference with all code points 0759 * <em>and thus retains all multicharacter strings</em>. 0760 * In order to achieve a “code point complement” (all code points minus this set), 0761 * the easiest is to <code>uset_complement(set); uset_removeAllStrings(set);</code>. 0762 * 0763 * A frozen set will not be modified. 0764 * @param set the set 0765 * @stable ICU 2.4 0766 */ 0767 U_CAPI void U_EXPORT2 0768 uset_complement(USet* set); 0769 0770 /** 0771 * Complements the specified range in this set. Any character in 0772 * the range will be removed if it is in this set, or will be 0773 * added if it is not in this set. If <code>start > end</code> 0774 * then an empty range is complemented, leaving the set unchanged. 0775 * This is equivalent to a boolean logic XOR. 0776 * A frozen set will not be modified. 0777 * 0778 * @param set the object to be modified 0779 * @param start first character, inclusive, of range 0780 * @param end last character, inclusive, of range 0781 * @stable ICU 69 0782 */ 0783 U_CAPI void U_EXPORT2 0784 uset_complementRange(USet *set, UChar32 start, UChar32 end); 0785 0786 /** 0787 * Complements the specified string in this set. 0788 * The string will be removed if it is in this set, or will be added if it is not in this set. 0789 * A frozen set will not be modified. 0790 * 0791 * @param set the object to be modified 0792 * @param str the string 0793 * @param length the length of the string, or -1 if NUL-terminated 0794 * @stable ICU 69 0795 */ 0796 U_CAPI void U_EXPORT2 0797 uset_complementString(USet *set, const UChar *str, int32_t length); 0798 0799 /** 0800 * Complements EACH of the characters in this string. Note: "ch" == {"c", "h"} 0801 * A frozen set will not be modified. 0802 * 0803 * @param set the object to be modified 0804 * @param str the string 0805 * @param length the length of the string, or -1 if NUL-terminated 0806 * @stable ICU 69 0807 */ 0808 U_CAPI void U_EXPORT2 0809 uset_complementAllCodePoints(USet *set, const UChar *str, int32_t length); 0810 0811 /** 0812 * Complements in this set all elements contained in the specified 0813 * set. Any character in the other set will be removed if it is 0814 * in this set, or will be added if it is not in this set. 0815 * A frozen set will not be modified. 0816 * 0817 * @param set the set with which to complement 0818 * @param complement set that defines which elements will be xor'ed 0819 * from this set. 0820 * @stable ICU 3.2 0821 */ 0822 U_CAPI void U_EXPORT2 0823 uset_complementAll(USet* set, const USet* complement); 0824 0825 /** 0826 * Removes all of the elements from this set. This set will be 0827 * empty after this call returns. 0828 * A frozen set will not be modified. 0829 * @param set the set 0830 * @stable ICU 2.4 0831 */ 0832 U_CAPI void U_EXPORT2 0833 uset_clear(USet* set); 0834 0835 /** 0836 * Close this set over the given attribute. For the attribute 0837 * USET_CASE_INSENSITIVE, the result is to modify this set so that: 0838 * 0839 * 1. For each character or string 'a' in this set, all strings or 0840 * characters 'b' such that foldCase(a) == foldCase(b) are added 0841 * to this set. 0842 * 0843 * 2. For each string 'e' in the resulting set, if e != 0844 * foldCase(e), 'e' will be removed. 0845 * 0846 * Example: [aq\\u00DF{Bc}{bC}{Fi}] => [aAqQ\\u00DF\\uFB01{ss}{bc}{fi}] 0847 * 0848 * (Here foldCase(x) refers to the operation u_strFoldCase, and a 0849 * == b denotes that the contents are the same, not pointer 0850 * comparison.) 0851 * 0852 * A frozen set will not be modified. 0853 * 0854 * @param set the set 0855 * 0856 * @param attributes bitmask for attributes to close over. 0857 * Valid options: 0858 * At most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE. 0859 * These case options are mutually exclusive. 0860 * Unrelated options bits are ignored. 0861 * @stable ICU 4.2 0862 */ 0863 U_CAPI void U_EXPORT2 0864 uset_closeOver(USet* set, int32_t attributes); 0865 0866 /** 0867 * Remove all strings from this set. 0868 * 0869 * @param set the set 0870 * @stable ICU 4.2 0871 */ 0872 U_CAPI void U_EXPORT2 0873 uset_removeAllStrings(USet* set); 0874 0875 /** 0876 * Returns true if the given USet contains no characters and no 0877 * strings. 0878 * @param set the set 0879 * @return true if set is empty 0880 * @stable ICU 2.4 0881 */ 0882 U_CAPI UBool U_EXPORT2 0883 uset_isEmpty(const USet* set); 0884 0885 /** 0886 * @param set the set 0887 * @return true if this set contains multi-character strings or the empty string. 0888 * @stable ICU 70 0889 */ 0890 U_CAPI UBool U_EXPORT2 0891 uset_hasStrings(const USet *set); 0892 0893 /** 0894 * Returns true if the given USet contains the given character. 0895 * This function works faster with a frozen set. 0896 * @param set the set 0897 * @param c The codepoint to check for within the set 0898 * @return true if set contains c 0899 * @stable ICU 2.4 0900 */ 0901 U_CAPI UBool U_EXPORT2 0902 uset_contains(const USet* set, UChar32 c); 0903 0904 /** 0905 * Returns true if the given USet contains all characters c 0906 * where start <= c && c <= end. 0907 * @param set the set 0908 * @param start the first character of the range to test, inclusive 0909 * @param end the last character of the range to test, inclusive 0910 * @return true if set contains the range 0911 * @stable ICU 2.2 0912 */ 0913 U_CAPI UBool U_EXPORT2 0914 uset_containsRange(const USet* set, UChar32 start, UChar32 end); 0915 0916 /** 0917 * Returns true if the given USet contains the given string. 0918 * @param set the set 0919 * @param str the string 0920 * @param strLen the length of the string or -1 if null terminated. 0921 * @return true if set contains str 0922 * @stable ICU 2.4 0923 */ 0924 U_CAPI UBool U_EXPORT2 0925 uset_containsString(const USet* set, const UChar* str, int32_t strLen); 0926 0927 /** 0928 * Returns the index of the given character within this set, where 0929 * the set is ordered by ascending code point. If the character 0930 * is not in this set, return -1. The inverse of this method is 0931 * <code>charAt()</code>. 0932 * @param set the set 0933 * @param c the character to obtain the index for 0934 * @return an index from 0..size()-1, or -1 0935 * @stable ICU 3.2 0936 */ 0937 U_CAPI int32_t U_EXPORT2 0938 uset_indexOf(const USet* set, UChar32 c); 0939 0940 /** 0941 * Returns the character at the given index within this set, where 0942 * the set is ordered by ascending code point. If the index is 0943 * out of range for characters, returns (UChar32)-1. 0944 * The inverse of this method is <code>indexOf()</code>. 0945 * 0946 * For iteration, this is slower than uset_getRangeCount()/uset_getItemCount() 0947 * with uset_getItem(), because for each call it skips linearly over <code>index</code> 0948 * characters in the ranges. 0949 * 0950 * @param set the set 0951 * @param charIndex an index from 0..size()-1 to obtain the char for 0952 * @return the character at the given index, or (UChar32)-1. 0953 * @stable ICU 3.2 0954 */ 0955 U_CAPI UChar32 U_EXPORT2 0956 uset_charAt(const USet* set, int32_t charIndex); 0957 0958 /** 0959 * Returns the number of characters and strings contained in this set. 0960 * The last (uset_getItemCount() - uset_getRangeCount()) items are strings. 0961 * 0962 * This is slower than uset_getRangeCount() and uset_getItemCount() because 0963 * it counts the code points of all ranges. 0964 * 0965 * @param set the set 0966 * @return a non-negative integer counting the characters and strings 0967 * contained in set 0968 * @stable ICU 2.4 0969 * @see uset_getRangeCount 0970 */ 0971 U_CAPI int32_t U_EXPORT2 0972 uset_size(const USet* set); 0973 0974 /** 0975 * @param set the set 0976 * @return the number of ranges in this set. 0977 * @stable ICU 70 0978 * @see uset_getItemCount 0979 * @see uset_getItem 0980 * @see uset_size 0981 */ 0982 U_CAPI int32_t U_EXPORT2 0983 uset_getRangeCount(const USet *set); 0984 0985 /** 0986 * Returns the number of items in this set. An item is either a range 0987 * of characters or a single multicharacter string. 0988 * @param set the set 0989 * @return a non-negative integer counting the character ranges 0990 * and/or strings contained in set 0991 * @stable ICU 2.4 0992 */ 0993 U_CAPI int32_t U_EXPORT2 0994 uset_getItemCount(const USet* set); 0995 0996 /** 0997 * Returns an item of this set. An item is either a range of 0998 * characters or a single multicharacter string (which can be the empty string). 0999 * 1000 * If <code>itemIndex</code> is less than uset_getRangeCount(), then this function returns 0, 1001 * and the range is <code>*start</code>..<code>*end</code>. 1002 * 1003 * If <code>itemIndex</code> is at least uset_getRangeCount() and less than uset_getItemCount(), then 1004 * this function copies the string into <code>str[strCapacity]</code> and 1005 * returns the length of the string (0 for the empty string). 1006 * 1007 * If <code>itemIndex</code> is out of range, then this function returns -1. 1008 * 1009 * Note that 0 is returned for each range as well as for the empty string. 1010 * 1011 * @param set the set 1012 * @param itemIndex a non-negative integer in the range 0..uset_getItemCount(set)-1 1013 * @param start pointer to variable to receive first character in range, inclusive; 1014 * can be NULL for a string item 1015 * @param end pointer to variable to receive last character in range, inclusive; 1016 * can be NULL for a string item 1017 * @param str buffer to receive the string, may be NULL 1018 * @param strCapacity capacity of str, or 0 if str is NULL 1019 * @param ec error code; U_INDEX_OUTOFBOUNDS_ERROR if the itemIndex is out of range 1020 * @return the length of the string (0 or >= 2), or 0 if the item is a range, 1021 * or -1 if the itemIndex is out of range 1022 * @stable ICU 2.4 1023 */ 1024 U_CAPI int32_t U_EXPORT2 1025 uset_getItem(const USet* set, int32_t itemIndex, 1026 UChar32* start, UChar32* end, 1027 UChar* str, int32_t strCapacity, 1028 UErrorCode* ec); 1029 1030 /** 1031 * Returns true if set1 contains all the characters and strings 1032 * of set2. It answers the question, 'Is set1 a superset of set2?' 1033 * @param set1 set to be checked for containment 1034 * @param set2 set to be checked for containment 1035 * @return true if the test condition is met 1036 * @stable ICU 3.2 1037 */ 1038 U_CAPI UBool U_EXPORT2 1039 uset_containsAll(const USet* set1, const USet* set2); 1040 1041 /** 1042 * Returns true if this set contains all the characters 1043 * of the given string. This is does not check containment of grapheme 1044 * clusters, like uset_containsString. 1045 * @param set set of characters to be checked for containment 1046 * @param str string containing codepoints to be checked for containment 1047 * @param strLen the length of the string or -1 if null terminated. 1048 * @return true if the test condition is met 1049 * @stable ICU 3.4 1050 */ 1051 U_CAPI UBool U_EXPORT2 1052 uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen); 1053 1054 /** 1055 * Returns true if set1 contains none of the characters and strings 1056 * of set2. It answers the question, 'Is set1 a disjoint set of set2?' 1057 * @param set1 set to be checked for containment 1058 * @param set2 set to be checked for containment 1059 * @return true if the test condition is met 1060 * @stable ICU 3.2 1061 */ 1062 U_CAPI UBool U_EXPORT2 1063 uset_containsNone(const USet* set1, const USet* set2); 1064 1065 /** 1066 * Returns true if set1 contains some of the characters and strings 1067 * of set2. It answers the question, 'Does set1 and set2 have an intersection?' 1068 * @param set1 set to be checked for containment 1069 * @param set2 set to be checked for containment 1070 * @return true if the test condition is met 1071 * @stable ICU 3.2 1072 */ 1073 U_CAPI UBool U_EXPORT2 1074 uset_containsSome(const USet* set1, const USet* set2); 1075 1076 /** 1077 * Returns the length of the initial substring of the input string which 1078 * consists only of characters and strings that are contained in this set 1079 * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE), 1080 * or only of characters and strings that are not contained 1081 * in this set (USET_SPAN_NOT_CONTAINED). 1082 * See USetSpanCondition for details. 1083 * Similar to the strspn() C library function. 1084 * Unpaired surrogates are treated according to contains() of their surrogate code points. 1085 * This function works faster with a frozen set and with a non-negative string length argument. 1086 * @param set the set 1087 * @param s start of the string 1088 * @param length of the string; can be -1 for NUL-terminated 1089 * @param spanCondition specifies the containment condition 1090 * @return the length of the initial substring according to the spanCondition; 1091 * 0 if the start of the string does not fit the spanCondition 1092 * @stable ICU 3.8 1093 * @see USetSpanCondition 1094 */ 1095 U_CAPI int32_t U_EXPORT2 1096 uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition); 1097 1098 /** 1099 * Returns the start of the trailing substring of the input string which 1100 * consists only of characters and strings that are contained in this set 1101 * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE), 1102 * or only of characters and strings that are not contained 1103 * in this set (USET_SPAN_NOT_CONTAINED). 1104 * See USetSpanCondition for details. 1105 * Unpaired surrogates are treated according to contains() of their surrogate code points. 1106 * This function works faster with a frozen set and with a non-negative string length argument. 1107 * @param set the set 1108 * @param s start of the string 1109 * @param length of the string; can be -1 for NUL-terminated 1110 * @param spanCondition specifies the containment condition 1111 * @return the start of the trailing substring according to the spanCondition; 1112 * the string length if the end of the string does not fit the spanCondition 1113 * @stable ICU 3.8 1114 * @see USetSpanCondition 1115 */ 1116 U_CAPI int32_t U_EXPORT2 1117 uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition); 1118 1119 /** 1120 * Returns the length of the initial substring of the input string which 1121 * consists only of characters and strings that are contained in this set 1122 * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE), 1123 * or only of characters and strings that are not contained 1124 * in this set (USET_SPAN_NOT_CONTAINED). 1125 * See USetSpanCondition for details. 1126 * Similar to the strspn() C library function. 1127 * Malformed byte sequences are treated according to contains(0xfffd). 1128 * This function works faster with a frozen set and with a non-negative string length argument. 1129 * @param set the set 1130 * @param s start of the string (UTF-8) 1131 * @param length of the string; can be -1 for NUL-terminated 1132 * @param spanCondition specifies the containment condition 1133 * @return the length of the initial substring according to the spanCondition; 1134 * 0 if the start of the string does not fit the spanCondition 1135 * @stable ICU 3.8 1136 * @see USetSpanCondition 1137 */ 1138 U_CAPI int32_t U_EXPORT2 1139 uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition); 1140 1141 /** 1142 * Returns the start of the trailing substring of the input string which 1143 * consists only of characters and strings that are contained in this set 1144 * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE), 1145 * or only of characters and strings that are not contained 1146 * in this set (USET_SPAN_NOT_CONTAINED). 1147 * See USetSpanCondition for details. 1148 * Malformed byte sequences are treated according to contains(0xfffd). 1149 * This function works faster with a frozen set and with a non-negative string length argument. 1150 * @param set the set 1151 * @param s start of the string (UTF-8) 1152 * @param length of the string; can be -1 for NUL-terminated 1153 * @param spanCondition specifies the containment condition 1154 * @return the start of the trailing substring according to the spanCondition; 1155 * the string length if the end of the string does not fit the spanCondition 1156 * @stable ICU 3.8 1157 * @see USetSpanCondition 1158 */ 1159 U_CAPI int32_t U_EXPORT2 1160 uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition); 1161 1162 /** 1163 * Returns true if set1 contains all of the characters and strings 1164 * of set2, and vis versa. It answers the question, 'Is set1 equal to set2?' 1165 * @param set1 set to be checked for containment 1166 * @param set2 set to be checked for containment 1167 * @return true if the test condition is met 1168 * @stable ICU 3.2 1169 */ 1170 U_CAPI UBool U_EXPORT2 1171 uset_equals(const USet* set1, const USet* set2); 1172 1173 /********************************************************************* 1174 * Serialized set API 1175 *********************************************************************/ 1176 1177 /** 1178 * Serializes this set into an array of 16-bit integers. Serialization 1179 * (currently) only records the characters in the set; multicharacter 1180 * strings are ignored. 1181 * 1182 * The array 1183 * has following format (each line is one 16-bit integer): 1184 * 1185 * length = (n+2*m) | (m!=0?0x8000:0) 1186 * bmpLength = n; present if m!=0 1187 * bmp[0] 1188 * bmp[1] 1189 * ... 1190 * bmp[n-1] 1191 * supp-high[0] 1192 * supp-low[0] 1193 * supp-high[1] 1194 * supp-low[1] 1195 * ... 1196 * supp-high[m-1] 1197 * supp-low[m-1] 1198 * 1199 * The array starts with a header. After the header are n bmp 1200 * code points, then m supplementary code points. Either n or m 1201 * or both may be zero. n+2*m is always <= 0x7FFF. 1202 * 1203 * If there are no supplementary characters (if m==0) then the 1204 * header is one 16-bit integer, 'length', with value n. 1205 * 1206 * If there are supplementary characters (if m!=0) then the header 1207 * is two 16-bit integers. The first, 'length', has value 1208 * (n+2*m)|0x8000. The second, 'bmpLength', has value n. 1209 * 1210 * After the header the code points are stored in ascending order. 1211 * Supplementary code points are stored as most significant 16 1212 * bits followed by least significant 16 bits. 1213 * 1214 * @param set the set 1215 * @param dest pointer to buffer of destCapacity 16-bit integers. 1216 * May be NULL only if destCapacity is zero. 1217 * @param destCapacity size of dest, or zero. Must not be negative. 1218 * @param pErrorCode pointer to the error code. Will be set to 1219 * U_INDEX_OUTOFBOUNDS_ERROR if n+2*m > 0x7FFF. Will be set to 1220 * U_BUFFER_OVERFLOW_ERROR if n+2*m+(m!=0?2:1) > destCapacity. 1221 * @return the total length of the serialized format, including 1222 * the header, that is, n+2*m+(m!=0?2:1), or 0 on error other 1223 * than U_BUFFER_OVERFLOW_ERROR. 1224 * @stable ICU 2.4 1225 */ 1226 U_CAPI int32_t U_EXPORT2 1227 uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* pErrorCode); 1228 1229 /** 1230 * Given a serialized array, fill in the given serialized set object. 1231 * @param fillSet pointer to result 1232 * @param src pointer to start of array 1233 * @param srcLength length of array 1234 * @return true if the given array is valid, otherwise false 1235 * @stable ICU 2.4 1236 */ 1237 U_CAPI UBool U_EXPORT2 1238 uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength); 1239 1240 /** 1241 * Set the USerializedSet to contain the given character (and nothing 1242 * else). 1243 * @param fillSet pointer to result 1244 * @param c The codepoint to set 1245 * @stable ICU 2.4 1246 */ 1247 U_CAPI void U_EXPORT2 1248 uset_setSerializedToOne(USerializedSet* fillSet, UChar32 c); 1249 1250 /** 1251 * Returns true if the given USerializedSet contains the given 1252 * character. 1253 * @param set the serialized set 1254 * @param c The codepoint to check for within the set 1255 * @return true if set contains c 1256 * @stable ICU 2.4 1257 */ 1258 U_CAPI UBool U_EXPORT2 1259 uset_serializedContains(const USerializedSet* set, UChar32 c); 1260 1261 /** 1262 * Returns the number of disjoint ranges of characters contained in 1263 * the given serialized set. Ignores any strings contained in the 1264 * set. 1265 * @param set the serialized set 1266 * @return a non-negative integer counting the character ranges 1267 * contained in set 1268 * @stable ICU 2.4 1269 */ 1270 U_CAPI int32_t U_EXPORT2 1271 uset_getSerializedRangeCount(const USerializedSet* set); 1272 1273 /** 1274 * Returns a range of characters contained in the given serialized 1275 * set. 1276 * @param set the serialized set 1277 * @param rangeIndex a non-negative integer in the range 0.. 1278 * uset_getSerializedRangeCount(set)-1 1279 * @param pStart pointer to variable to receive first character 1280 * in range, inclusive 1281 * @param pEnd pointer to variable to receive last character in range, 1282 * inclusive 1283 * @return true if rangeIndex is valid, otherwise false 1284 * @stable ICU 2.4 1285 */ 1286 U_CAPI UBool U_EXPORT2 1287 uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex, 1288 UChar32* pStart, UChar32* pEnd); 1289 1290 #endif
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |