Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 10:13:08

0001 // © 2017 and later: Unicode, Inc. and others.
0002 // License & terms of use: http://www.unicode.org/copyright.html
0003 
0004 // stringoptions.h
0005 // created: 2017jun08 Markus W. Scherer
0006 
0007 #ifndef __STRINGOPTIONS_H__
0008 #define __STRINGOPTIONS_H__
0009 
0010 #include "unicode/utypes.h"
0011 
0012 /**
0013  * \file
0014  * \brief C API: Bit set option bit constants for various string and character processing functions.
0015  */
0016 
0017 /**
0018  * Option value for case folding: Use default mappings defined in CaseFolding.txt.
0019  *
0020  * @stable ICU 2.0
0021  */
0022 #define U_FOLD_CASE_DEFAULT 0
0023 
0024 /**
0025  * Option value for case folding:
0026  *
0027  * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
0028  * and dotless i appropriately for Turkic languages (tr, az).
0029  *
0030  * Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
0031  * are to be included for default mappings and
0032  * excluded for the Turkic-specific mappings.
0033  *
0034  * Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
0035  * are to be excluded for default mappings and
0036  * included for the Turkic-specific mappings.
0037  *
0038  * @stable ICU 2.0
0039  */
0040 #define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
0041 
0042 /**
0043  * Titlecase the string as a whole rather than each word.
0044  * (Titlecase only the character at index 0, possibly adjusted.)
0045  * Option bits value for titlecasing APIs that take an options bit set.
0046  *
0047  * It is an error to specify multiple titlecasing iterator options together,
0048  * including both an options bit and an explicit BreakIterator.
0049  *
0050  * @see U_TITLECASE_ADJUST_TO_CASED
0051  * @stable ICU 60
0052  */
0053 #define U_TITLECASE_WHOLE_STRING 0x20
0054 
0055 /**
0056  * Titlecase sentences rather than words.
0057  * (Titlecase only the first character of each sentence, possibly adjusted.)
0058  * Option bits value for titlecasing APIs that take an options bit set.
0059  *
0060  * It is an error to specify multiple titlecasing iterator options together,
0061  * including both an options bit and an explicit BreakIterator.
0062  *
0063  * @see U_TITLECASE_ADJUST_TO_CASED
0064  * @stable ICU 60
0065  */
0066 #define U_TITLECASE_SENTENCES 0x40
0067 
0068 /**
0069  * Do not lowercase non-initial parts of words when titlecasing.
0070  * Option bit for titlecasing APIs that take an options bit set.
0071  *
0072  * By default, titlecasing will titlecase the character at each
0073  * (possibly adjusted) BreakIterator index and
0074  * lowercase all other characters up to the next iterator index.
0075  * With this option, the other characters will not be modified.
0076  *
0077  * @see U_TITLECASE_ADJUST_TO_CASED
0078  * @see UnicodeString::toTitle
0079  * @see CaseMap::toTitle
0080  * @see ucasemap_setOptions
0081  * @see ucasemap_toTitle
0082  * @see ucasemap_utf8ToTitle
0083  * @stable ICU 3.8
0084  */
0085 #define U_TITLECASE_NO_LOWERCASE 0x100
0086 
0087 /**
0088  * Do not adjust the titlecasing BreakIterator indexes;
0089  * titlecase exactly the characters at breaks from the iterator.
0090  * Option bit for titlecasing APIs that take an options bit set.
0091  *
0092  * By default, titlecasing will take each break iterator index,
0093  * adjust it to the next relevant character (see U_TITLECASE_ADJUST_TO_CASED),
0094  * and titlecase that one.
0095  *
0096  * Other characters are lowercased.
0097  *
0098  * It is an error to specify multiple titlecasing adjustment options together.
0099  *
0100  * @see U_TITLECASE_ADJUST_TO_CASED
0101  * @see U_TITLECASE_NO_LOWERCASE
0102  * @see UnicodeString::toTitle
0103  * @see CaseMap::toTitle
0104  * @see ucasemap_setOptions
0105  * @see ucasemap_toTitle
0106  * @see ucasemap_utf8ToTitle
0107  * @stable ICU 3.8
0108  */
0109 #define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
0110 
0111 /**
0112  * Adjust each titlecasing BreakIterator index to the next cased character.
0113  * (See the Unicode Standard, chapter 3, Default Case Conversion, R3 toTitlecase(X).)
0114  * Option bit for titlecasing APIs that take an options bit set.
0115  *
0116  * This used to be the default index adjustment in ICU.
0117  * Since ICU 60, the default index adjustment is to the next character that is
0118  * a letter, number, symbol, or private use code point.
0119  * (Uncased modifier letters are skipped.)
0120  * The difference in behavior is small for word titlecasing,
0121  * but the new adjustment is much better for whole-string and sentence titlecasing:
0122  * It yields "49ers" and "«丰(abc)»" instead of "49Ers" and "«丰(Abc)»".
0123  *
0124  * It is an error to specify multiple titlecasing adjustment options together.
0125  *
0126  * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
0127  * @stable ICU 60
0128  */
0129 #define U_TITLECASE_ADJUST_TO_CASED 0x400
0130 
0131 /**
0132  * Option for string transformation functions to not first reset the Edits object.
0133  * Used for example in some case-mapping and normalization functions.
0134  *
0135  * @see CaseMap
0136  * @see Edits
0137  * @see Normalizer2
0138  * @stable ICU 60
0139  */
0140 #define U_EDITS_NO_RESET 0x2000
0141 
0142 /**
0143  * Omit unchanged text when recording how source substrings
0144  * relate to changed and unchanged result substrings.
0145  * Used for example in some case-mapping and normalization functions.
0146  *
0147  * @see CaseMap
0148  * @see Edits
0149  * @see Normalizer2
0150  * @stable ICU 60
0151  */
0152 #define U_OMIT_UNCHANGED_TEXT 0x4000
0153 
0154 /**
0155  * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
0156  * Compare strings in code point order instead of code unit order.
0157  * @stable ICU 2.2
0158  */
0159 #define U_COMPARE_CODE_POINT_ORDER  0x8000
0160 
0161 /**
0162  * Option bit for unorm_compare:
0163  * Perform case-insensitive comparison.
0164  * @stable ICU 2.2
0165  */
0166 #define U_COMPARE_IGNORE_CASE       0x10000
0167 
0168 /**
0169  * Option bit for unorm_compare:
0170  * Both input strings are assumed to fulfill FCD conditions.
0171  * @stable ICU 2.2
0172  */
0173 #define UNORM_INPUT_IS_FCD          0x20000
0174 
0175 // Related definitions elsewhere.
0176 // Options that are not meaningful in the same functions
0177 // can share the same bits.
0178 //
0179 // Public:
0180 // unicode/unorm.h #define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20
0181 //
0182 // Internal: (may change or be removed)
0183 // ucase.h #define _STRCASECMP_OPTIONS_MASK 0xffff
0184 // ucase.h #define _FOLD_CASE_OPTIONS_MASK 7
0185 // ucasemap_imp.h #define U_TITLECASE_ITERATOR_MASK 0xe0
0186 // ucasemap_imp.h #define U_TITLECASE_ADJUSTMENT_MASK 0x600
0187 // ustr_imp.h #define _STRNCMP_STYLE 0x1000
0188 // unormcmp.cpp #define _COMPARE_EQUIV 0x80000
0189 
0190 #endif  // __STRINGOPTIONS_H__