Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-17 09:56:13

0001 /* DO NOT EDIT! GENERATED AUTOMATICALLY! */
0002 /* Unicode character classification and properties.
0003    Copyright (C) 2002, 2005-2024 Free Software Foundation, Inc.
0004 
0005    This file is free software: you can redistribute it and/or modify
0006    it under the terms of the GNU Lesser General Public License as
0007    published by the Free Software Foundation; either version 2.1 of the
0008    License, or (at your option) any later version.
0009 
0010    This file is distributed in the hope that it will be useful,
0011    but WITHOUT ANY WARRANTY; without even the implied warranty of
0012    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
0013    GNU Lesser General Public License for more details.
0014 
0015    You should have received a copy of the GNU Lesser General Public License
0016    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
0017 
0018 #ifndef _UNICTYPE_H
0019 #define _UNICTYPE_H
0020 
0021 #include "unitypes.h"
0022 
0023 /* Get bool.  */
0024 #include <unistring/stdbool.h>
0025 
0026 /* Get size_t.  */
0027 #include <stddef.h>
0028 
0029 #if 1
0030 # include <unistring/woe32dll.h>
0031 #else
0032 # define LIBUNISTRING_DLL_VARIABLE
0033 #endif
0034 
0035 #ifdef __cplusplus
0036 extern "C" {
0037 #endif
0038 
0039 /* ========================================================================= */
0040 
0041 /* Field 1 of Unicode Character Database: Character name.
0042    See "uniname.h".  */
0043 
0044 /* ========================================================================= */
0045 
0046 /* Field 2 of Unicode Character Database: General category.  */
0047 
0048 /* Data type denoting a General category value.  This is not just a bitmask,
0049    but rather a bitmask and a pointer to the lookup table, so that programs
0050    that use only the predefined bitmasks (i.e. don't combine bitmasks with &
0051    and |) don't have a link-time dependency towards the big general table.  */
0052 typedef struct
0053 {
0054   uint32_t bitmask : 31;
0055   /*bool*/ unsigned int generic : 1;
0056   union
0057   {
0058     const void *table;                               /* when generic is 0 */
0059     bool (*lookup_fn) (ucs4_t uc, uint32_t bitmask); /* when generic is 1 */
0060   } lookup;
0061 }
0062 uc_general_category_t;
0063 
0064 /* Bits and bit masks denoting General category values.  UnicodeData-3.2.0.html
0065    says a 32-bit integer will always suffice to represent them.
0066    These bit masks can only be used with the uc_is_general_category_withtable
0067    function.  */
0068 enum
0069 {
0070   UC_CATEGORY_MASK_L  = 0x0000001f,
0071   UC_CATEGORY_MASK_LC = 0x00000007,
0072   UC_CATEGORY_MASK_Lu = 0x00000001,
0073   UC_CATEGORY_MASK_Ll = 0x00000002,
0074   UC_CATEGORY_MASK_Lt = 0x00000004,
0075   UC_CATEGORY_MASK_Lm = 0x00000008,
0076   UC_CATEGORY_MASK_Lo = 0x00000010,
0077   UC_CATEGORY_MASK_M  = 0x000000e0,
0078   UC_CATEGORY_MASK_Mn = 0x00000020,
0079   UC_CATEGORY_MASK_Mc = 0x00000040,
0080   UC_CATEGORY_MASK_Me = 0x00000080,
0081   UC_CATEGORY_MASK_N  = 0x00000700,
0082   UC_CATEGORY_MASK_Nd = 0x00000100,
0083   UC_CATEGORY_MASK_Nl = 0x00000200,
0084   UC_CATEGORY_MASK_No = 0x00000400,
0085   UC_CATEGORY_MASK_P  = 0x0003f800,
0086   UC_CATEGORY_MASK_Pc = 0x00000800,
0087   UC_CATEGORY_MASK_Pd = 0x00001000,
0088   UC_CATEGORY_MASK_Ps = 0x00002000,
0089   UC_CATEGORY_MASK_Pe = 0x00004000,
0090   UC_CATEGORY_MASK_Pi = 0x00008000,
0091   UC_CATEGORY_MASK_Pf = 0x00010000,
0092   UC_CATEGORY_MASK_Po = 0x00020000,
0093   UC_CATEGORY_MASK_S  = 0x003c0000,
0094   UC_CATEGORY_MASK_Sm = 0x00040000,
0095   UC_CATEGORY_MASK_Sc = 0x00080000,
0096   UC_CATEGORY_MASK_Sk = 0x00100000,
0097   UC_CATEGORY_MASK_So = 0x00200000,
0098   UC_CATEGORY_MASK_Z  = 0x01c00000,
0099   UC_CATEGORY_MASK_Zs = 0x00400000,
0100   UC_CATEGORY_MASK_Zl = 0x00800000,
0101   UC_CATEGORY_MASK_Zp = 0x01000000,
0102   UC_CATEGORY_MASK_C  = 0x3e000000,
0103   UC_CATEGORY_MASK_Cc = 0x02000000,
0104   UC_CATEGORY_MASK_Cf = 0x04000000,
0105   UC_CATEGORY_MASK_Cs = 0x08000000,
0106   UC_CATEGORY_MASK_Co = 0x10000000,
0107   UC_CATEGORY_MASK_Cn = 0x20000000
0108 };
0109 
0110 /* Predefined General category values.  */
0111 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_L;
0112 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_LC;
0113 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Lu;
0114 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Ll;
0115 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Lt;
0116 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Lm;
0117 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Lo;
0118 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_M;
0119 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Mn;
0120 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Mc;
0121 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Me;
0122 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_N;
0123 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Nd;
0124 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Nl;
0125 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_No;
0126 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_P;
0127 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Pc;
0128 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Pd;
0129 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Ps;
0130 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Pe;
0131 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Pi;
0132 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Pf;
0133 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Po;
0134 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_S;
0135 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Sm;
0136 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Sc;
0137 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Sk;
0138 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_So;
0139 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Z;
0140 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Zs;
0141 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Zl;
0142 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Zp;
0143 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_C;
0144 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Cc;
0145 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Cf;
0146 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Cs;
0147 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Co;
0148 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Cn;
0149 /* Non-public.  */
0150 extern const uc_general_category_t _UC_CATEGORY_NONE;
0151 
0152 /* Alias names for predefined General category values.  */
0153 #define UC_LETTER                    UC_CATEGORY_L
0154 #define UC_CASED_LETTER              UC_CATEGORY_LC
0155 #define UC_UPPERCASE_LETTER          UC_CATEGORY_Lu
0156 #define UC_LOWERCASE_LETTER          UC_CATEGORY_Ll
0157 #define UC_TITLECASE_LETTER          UC_CATEGORY_Lt
0158 #define UC_MODIFIER_LETTER           UC_CATEGORY_Lm
0159 #define UC_OTHER_LETTER              UC_CATEGORY_Lo
0160 #define UC_MARK                      UC_CATEGORY_M
0161 #define UC_NON_SPACING_MARK          UC_CATEGORY_Mn
0162 #define UC_COMBINING_SPACING_MARK    UC_CATEGORY_Mc
0163 #define UC_ENCLOSING_MARK            UC_CATEGORY_Me
0164 #define UC_NUMBER                    UC_CATEGORY_N
0165 #define UC_DECIMAL_DIGIT_NUMBER      UC_CATEGORY_Nd
0166 #define UC_LETTER_NUMBER             UC_CATEGORY_Nl
0167 #define UC_OTHER_NUMBER              UC_CATEGORY_No
0168 #define UC_PUNCTUATION               UC_CATEGORY_P
0169 #define UC_CONNECTOR_PUNCTUATION     UC_CATEGORY_Pc
0170 #define UC_DASH_PUNCTUATION          UC_CATEGORY_Pd
0171 #define UC_OPEN_PUNCTUATION          UC_CATEGORY_Ps /* a.k.a. UC_START_PUNCTUATION */
0172 #define UC_CLOSE_PUNCTUATION         UC_CATEGORY_Pe /* a.k.a. UC_END_PUNCTUATION */
0173 #define UC_INITIAL_QUOTE_PUNCTUATION UC_CATEGORY_Pi
0174 #define UC_FINAL_QUOTE_PUNCTUATION   UC_CATEGORY_Pf
0175 #define UC_OTHER_PUNCTUATION         UC_CATEGORY_Po
0176 #define UC_SYMBOL                    UC_CATEGORY_S
0177 #define UC_MATH_SYMBOL               UC_CATEGORY_Sm
0178 #define UC_CURRENCY_SYMBOL           UC_CATEGORY_Sc
0179 #define UC_MODIFIER_SYMBOL           UC_CATEGORY_Sk
0180 #define UC_OTHER_SYMBOL              UC_CATEGORY_So
0181 #define UC_SEPARATOR                 UC_CATEGORY_Z
0182 #define UC_SPACE_SEPARATOR           UC_CATEGORY_Zs
0183 #define UC_LINE_SEPARATOR            UC_CATEGORY_Zl
0184 #define UC_PARAGRAPH_SEPARATOR       UC_CATEGORY_Zp
0185 #define UC_OTHER                     UC_CATEGORY_C
0186 #define UC_CONTROL                   UC_CATEGORY_Cc
0187 #define UC_FORMAT                    UC_CATEGORY_Cf
0188 #define UC_SURROGATE                 UC_CATEGORY_Cs /* all of them are invalid characters */
0189 #define UC_PRIVATE_USE               UC_CATEGORY_Co
0190 #define UC_UNASSIGNED                UC_CATEGORY_Cn /* some of them are invalid characters */
0191 
0192 /* Return the union of two general categories.
0193    This corresponds to the unions of the two sets of characters.  */
0194 extern uc_general_category_t
0195        uc_general_category_or (uc_general_category_t category1,
0196                                uc_general_category_t category2);
0197 
0198 /* Return the intersection of two general categories as bit masks.
0199    This *does*not* correspond to the intersection of the two sets of
0200    characters.  */
0201 extern uc_general_category_t
0202        uc_general_category_and (uc_general_category_t category1,
0203                                 uc_general_category_t category2);
0204 
0205 /* Return the intersection of a general category with the complement of a
0206    second general category, as bit masks.
0207    This *does*not* correspond to the intersection with complement, when
0208    viewing the categories as sets of characters.  */
0209 extern uc_general_category_t
0210        uc_general_category_and_not (uc_general_category_t category1,
0211                                     uc_general_category_t category2);
0212 
0213 /* Return the name of a general category.  */
0214 extern const char *
0215        uc_general_category_name (uc_general_category_t category)
0216        _UC_ATTRIBUTE_PURE;
0217 
0218 /* Return the long name of a general category.  */
0219 extern const char *
0220        uc_general_category_long_name (uc_general_category_t category)
0221        _UC_ATTRIBUTE_PURE;
0222 
0223 /* Return the general category given by name, e.g. "Lu", or by long name,
0224    e.g. "Uppercase Letter".  */
0225 extern uc_general_category_t
0226        uc_general_category_byname (const char *category_name)
0227        _UC_ATTRIBUTE_PURE;
0228 
0229 /* Return the general category of a Unicode character.  */
0230 extern uc_general_category_t
0231        uc_general_category (ucs4_t uc)
0232        _UC_ATTRIBUTE_PURE;
0233 
0234 /* Test whether a Unicode character belongs to a given category.
0235    The CATEGORY argument can be the combination of several predefined
0236    general categories.  */
0237 extern bool
0238        uc_is_general_category (ucs4_t uc, uc_general_category_t category)
0239        _UC_ATTRIBUTE_PURE;
0240 /* Likewise.  This function uses a big table comprising all categories.  */
0241 extern bool
0242        uc_is_general_category_withtable (ucs4_t uc, uint32_t bitmask)
0243        _UC_ATTRIBUTE_CONST;
0244 
0245 /* ========================================================================= */
0246 
0247 /* Field 3 of Unicode Character Database: Canonical combining class.  */
0248 
0249 /* The possible results of uc_combining_class (0..255) are described in
0250    UCD.html.  The list here is not definitive; more values can be added
0251    in future versions.  */
0252 enum
0253 {
0254   UC_CCC_NR   =   0, /* Not Reordered */
0255   UC_CCC_OV   =   1, /* Overlay */
0256   UC_CCC_NK   =   7, /* Nukta */
0257   UC_CCC_KV   =   8, /* Kana Voicing */
0258   UC_CCC_VR   =   9, /* Virama */
0259   UC_CCC_ATBL = 200, /* Attached Below Left */
0260   UC_CCC_ATB  = 202, /* Attached Below */
0261   UC_CCC_ATA  = 214, /* Attached Above */
0262   UC_CCC_ATAR = 216, /* Attached Above Right */
0263   UC_CCC_BL   = 218, /* Below Left */
0264   UC_CCC_B    = 220, /* Below */
0265   UC_CCC_BR   = 222, /* Below Right */
0266   UC_CCC_L    = 224, /* Left */
0267   UC_CCC_R    = 226, /* Right */
0268   UC_CCC_AL   = 228, /* Above Left */
0269   UC_CCC_A    = 230, /* Above */
0270   UC_CCC_AR   = 232, /* Above Right */
0271   UC_CCC_DB   = 233, /* Double Below */
0272   UC_CCC_DA   = 234, /* Double Above */
0273   UC_CCC_IS   = 240  /* Iota Subscript */
0274 };
0275 
0276 /* Return the canonical combining class of a Unicode character.  */
0277 extern int
0278        uc_combining_class (ucs4_t uc)
0279        _UC_ATTRIBUTE_CONST;
0280 
0281 /* Return the name of a canonical combining class.  */
0282 extern const char *
0283        uc_combining_class_name (int ccc)
0284        _UC_ATTRIBUTE_CONST;
0285 
0286 /* Return the long name of a canonical combining class.  */
0287 extern const char *
0288        uc_combining_class_long_name (int ccc)
0289        _UC_ATTRIBUTE_CONST;
0290 
0291 /* Return the canonical combining class given by name, e.g. "BL", or by long
0292    name, e.g. "Below Left".  */
0293 extern int
0294        uc_combining_class_byname (const char *ccc_name)
0295        _UC_ATTRIBUTE_PURE;
0296 
0297 /* ========================================================================= */
0298 
0299 /* Field 4 of Unicode Character Database: Bidi class.
0300    Before Unicode 4.0, this field was called "Bidirectional category".  */
0301 
0302 enum
0303 {
0304   UC_BIDI_L,   /* Left-to-Right */
0305   UC_BIDI_LRE, /* Left-to-Right Embedding */
0306   UC_BIDI_LRO, /* Left-to-Right Override */
0307   UC_BIDI_R,   /* Right-to-Left */
0308   UC_BIDI_AL,  /* Right-to-Left Arabic */
0309   UC_BIDI_RLE, /* Right-to-Left Embedding */
0310   UC_BIDI_RLO, /* Right-to-Left Override */
0311   UC_BIDI_PDF, /* Pop Directional Format */
0312   UC_BIDI_EN,  /* European Number */
0313   UC_BIDI_ES,  /* European Number Separator */
0314   UC_BIDI_ET,  /* European Number Terminator */
0315   UC_BIDI_AN,  /* Arabic Number */
0316   UC_BIDI_CS,  /* Common Number Separator */
0317   UC_BIDI_NSM, /* Non-Spacing Mark */
0318   UC_BIDI_BN,  /* Boundary Neutral */
0319   UC_BIDI_B,   /* Paragraph Separator */
0320   UC_BIDI_S,   /* Segment Separator */
0321   UC_BIDI_WS,  /* Whitespace */
0322   UC_BIDI_ON,  /* Other Neutral */
0323   UC_BIDI_LRI, /* Left-to-Right Isolate */
0324   UC_BIDI_RLI, /* Right-to-Left Isolate */
0325   UC_BIDI_FSI, /* First Strong Isolate */
0326   UC_BIDI_PDI  /* Pop Directional Isolate */
0327 };
0328 
0329 /* Return the name of a bidi class.  */
0330 extern const char *
0331        uc_bidi_class_name (int bidi_class)
0332        _UC_ATTRIBUTE_CONST;
0333 /* Same; obsolete function name.  */
0334 extern const char *
0335        uc_bidi_category_name (int category)
0336        _UC_ATTRIBUTE_CONST;
0337 
0338 /* Return the long name of a bidi class.  */
0339 extern const char *
0340        uc_bidi_class_long_name (int bidi_class)
0341        _UC_ATTRIBUTE_CONST;
0342 
0343 /* Return the bidi class given by name, e.g. "LRE", or by long name, e.g.
0344    "Left-to-Right Embedding".  */
0345 extern int
0346        uc_bidi_class_byname (const char *bidi_class_name)
0347        _UC_ATTRIBUTE_PURE;
0348 /* Same; obsolete function name.  */
0349 extern int
0350        uc_bidi_category_byname (const char *category_name)
0351        _UC_ATTRIBUTE_PURE;
0352 
0353 /* Return the bidi class of a Unicode character.  */
0354 extern int
0355        uc_bidi_class (ucs4_t uc)
0356        _UC_ATTRIBUTE_CONST;
0357 /* Same; obsolete function name.  */
0358 extern int
0359        uc_bidi_category (ucs4_t uc)
0360        _UC_ATTRIBUTE_CONST;
0361 
0362 /* Test whether a Unicode character belongs to a given bidi class.  */
0363 extern bool
0364        uc_is_bidi_class (ucs4_t uc, int bidi_class)
0365        _UC_ATTRIBUTE_CONST;
0366 /* Same; obsolete function name.  */
0367 extern bool
0368        uc_is_bidi_category (ucs4_t uc, int category)
0369        _UC_ATTRIBUTE_CONST;
0370 
0371 /* ========================================================================= */
0372 
0373 /* Field 5 of Unicode Character Database: Character decomposition mapping.
0374    See "uninorm.h".  */
0375 
0376 /* ========================================================================= */
0377 
0378 /* Field 6 of Unicode Character Database: Decimal digit value.  */
0379 
0380 /* Return the decimal digit value of a Unicode character.  */
0381 extern int
0382        uc_decimal_value (ucs4_t uc)
0383        _UC_ATTRIBUTE_CONST;
0384 
0385 /* ========================================================================= */
0386 
0387 /* Field 7 of Unicode Character Database: Digit value.  */
0388 
0389 /* Return the digit value of a Unicode character.  */
0390 extern int
0391        uc_digit_value (ucs4_t uc)
0392        _UC_ATTRIBUTE_CONST;
0393 
0394 /* ========================================================================= */
0395 
0396 /* Field 8 of Unicode Character Database: Numeric value.  */
0397 
0398 /* Return the numeric value of a Unicode character.  */
0399 typedef struct
0400 {
0401   int numerator;
0402   int denominator;
0403 }
0404 uc_fraction_t;
0405 extern uc_fraction_t
0406        uc_numeric_value (ucs4_t uc)
0407        _UC_ATTRIBUTE_CONST;
0408 
0409 /* ========================================================================= */
0410 
0411 /* Field 9 of Unicode Character Database: Mirrored.  */
0412 
0413 /* Return the mirrored character of a Unicode character UC in *PUC.  */
0414 extern bool
0415        uc_mirror_char (ucs4_t uc, ucs4_t *puc);
0416 
0417 /* ========================================================================= */
0418 
0419 /* Field 10 of Unicode Character Database: Unicode 1.0 Name.
0420    Not available in this library.  */
0421 
0422 /* ========================================================================= */
0423 
0424 /* Field 11 of Unicode Character Database: ISO 10646 comment.
0425    Not available in this library.  */
0426 
0427 /* ========================================================================= */
0428 
0429 /* Field 12, 13, 14 of Unicode Character Database: Uppercase mapping,
0430    lowercase mapping, titlecase mapping.  See "unicase.h".  */
0431 
0432 /* ========================================================================= */
0433 
0434 /* Field 2 of the file ArabicShaping.txt in the Unicode Character Database.  */
0435 
0436 /* Possible joining types.  */
0437 enum
0438 {
0439   UC_JOINING_TYPE_U, /* Non_Joining */
0440   UC_JOINING_TYPE_T, /* Transparent */
0441   UC_JOINING_TYPE_C, /* Join_Causing */
0442   UC_JOINING_TYPE_L, /* Left_Joining */
0443   UC_JOINING_TYPE_R, /* Right_Joining */
0444   UC_JOINING_TYPE_D  /* Dual_Joining */
0445 };
0446 
0447 /* Return the name of a joining type.  */
0448 extern const char *
0449        uc_joining_type_name (int joining_type)
0450        _UC_ATTRIBUTE_CONST;
0451 
0452 /* Return the long name of a joining type.  */
0453 extern const char *
0454        uc_joining_type_long_name (int joining_type)
0455        _UC_ATTRIBUTE_CONST;
0456 
0457 /* Return the joining type given by name, e.g. "D", or by long name, e.g.
0458    "Dual Joining".  */
0459 extern int
0460        uc_joining_type_byname (const char *joining_type_name)
0461        _UC_ATTRIBUTE_PURE;
0462 
0463 /* Return the joining type of a Unicode character.  */
0464 extern int
0465        uc_joining_type (ucs4_t uc)
0466        _UC_ATTRIBUTE_CONST;
0467 
0468 /* ========================================================================= */
0469 
0470 /* Field 3 of the file ArabicShaping.txt in the Unicode Character Database.  */
0471 
0472 /* Possible joining groups.
0473    This enumeration may be extended in the future.  */
0474 enum
0475 {
0476   UC_JOINING_GROUP_NONE,                     /* No_Joining_Group */
0477   UC_JOINING_GROUP_AIN,                      /* Ain */
0478   UC_JOINING_GROUP_ALAPH,                    /* Alaph */
0479   UC_JOINING_GROUP_ALEF,                     /* Alef */
0480   UC_JOINING_GROUP_BEH,                      /* Beh */
0481   UC_JOINING_GROUP_BETH,                     /* Beth */
0482   UC_JOINING_GROUP_BURUSHASKI_YEH_BARREE,    /* Burushaski_Yeh_Barree */
0483   UC_JOINING_GROUP_DAL,                      /* Dal */
0484   UC_JOINING_GROUP_DALATH_RISH,              /* Dalath_Rish */
0485   UC_JOINING_GROUP_E,                        /* E */
0486   UC_JOINING_GROUP_FARSI_YEH,                /* Farsi_Yeh */
0487   UC_JOINING_GROUP_FE,                       /* Fe */
0488   UC_JOINING_GROUP_FEH,                      /* Feh */
0489   UC_JOINING_GROUP_FINAL_SEMKATH,            /* Final_Semkath */
0490   UC_JOINING_GROUP_GAF,                      /* Gaf */
0491   UC_JOINING_GROUP_GAMAL,                    /* Gamal */
0492   UC_JOINING_GROUP_HAH,                      /* Hah */
0493   UC_JOINING_GROUP_HE,                       /* He */
0494   UC_JOINING_GROUP_HEH,                      /* Heh */
0495   UC_JOINING_GROUP_HEH_GOAL,                 /* Heh_Goal */
0496   UC_JOINING_GROUP_HETH,                     /* Heth */
0497   UC_JOINING_GROUP_KAF,                      /* Kaf */
0498   UC_JOINING_GROUP_KAPH,                     /* Kaph */
0499   UC_JOINING_GROUP_KHAPH,                    /* Khaph */
0500   UC_JOINING_GROUP_KNOTTED_HEH,              /* Knotted_Heh */
0501   UC_JOINING_GROUP_LAM,                      /* Lam */
0502   UC_JOINING_GROUP_LAMADH,                   /* Lamadh */
0503   UC_JOINING_GROUP_MEEM,                     /* Meem */
0504   UC_JOINING_GROUP_MIM,                      /* Mim */
0505   UC_JOINING_GROUP_NOON,                     /* Noon */
0506   UC_JOINING_GROUP_NUN,                      /* Nun */
0507   UC_JOINING_GROUP_NYA,                      /* Nya */
0508   UC_JOINING_GROUP_PE,                       /* Pe */
0509   UC_JOINING_GROUP_QAF,                      /* Qaf */
0510   UC_JOINING_GROUP_QAPH,                     /* Qaph */
0511   UC_JOINING_GROUP_REH,                      /* Reh */
0512   UC_JOINING_GROUP_REVERSED_PE,              /* Reversed_Pe */
0513   UC_JOINING_GROUP_SAD,                      /* Sad */
0514   UC_JOINING_GROUP_SADHE,                    /* Sadhe */
0515   UC_JOINING_GROUP_SEEN,                     /* Seen */
0516   UC_JOINING_GROUP_SEMKATH,                  /* Semkath */
0517   UC_JOINING_GROUP_SHIN,                     /* Shin */
0518   UC_JOINING_GROUP_SWASH_KAF,                /* Swash_Kaf */
0519   UC_JOINING_GROUP_SYRIAC_WAW,               /* Syriac_Waw */
0520   UC_JOINING_GROUP_TAH,                      /* Tah */
0521   UC_JOINING_GROUP_TAW,                      /* Taw */
0522   UC_JOINING_GROUP_TEH_MARBUTA,              /* Teh_Marbuta */
0523   UC_JOINING_GROUP_TEH_MARBUTA_GOAL,         /* Teh_Marbuta_Goal */
0524   UC_JOINING_GROUP_TETH,                     /* Teth */
0525   UC_JOINING_GROUP_WAW,                      /* Waw */
0526   UC_JOINING_GROUP_YEH,                      /* Yeh */
0527   UC_JOINING_GROUP_YEH_BARREE,               /* Yeh_Barree */
0528   UC_JOINING_GROUP_YEH_WITH_TAIL,            /* Yeh_With_Tail */
0529   UC_JOINING_GROUP_YUDH,                     /* Yudh */
0530   UC_JOINING_GROUP_YUDH_HE,                  /* Yudh_He */
0531   UC_JOINING_GROUP_ZAIN,                     /* Zain */
0532   UC_JOINING_GROUP_ZHAIN,                    /* Zhain */
0533   UC_JOINING_GROUP_ROHINGYA_YEH,             /* Rohingya_Yeh */
0534   UC_JOINING_GROUP_STRAIGHT_WAW,             /* Straight_Waw */
0535   UC_JOINING_GROUP_MANICHAEAN_ALEPH,         /* Manichaean_Aleph */
0536   UC_JOINING_GROUP_MANICHAEAN_BETH,          /* Manichaean_Beth */
0537   UC_JOINING_GROUP_MANICHAEAN_GIMEL,         /* Manichaean_Gimel */
0538   UC_JOINING_GROUP_MANICHAEAN_DALETH,        /* Manichaean_Daleth */
0539   UC_JOINING_GROUP_MANICHAEAN_WAW,           /* Manichaean_Waw */
0540   UC_JOINING_GROUP_MANICHAEAN_ZAYIN,         /* Manichaean_Zayin */
0541   UC_JOINING_GROUP_MANICHAEAN_HETH,          /* Manichaean_Heth */
0542   UC_JOINING_GROUP_MANICHAEAN_TETH,          /* Manichaean_Teth */
0543   UC_JOINING_GROUP_MANICHAEAN_YODH,          /* Manichaean_Yodh */
0544   UC_JOINING_GROUP_MANICHAEAN_KAPH,          /* Manichaean_Kaph */
0545   UC_JOINING_GROUP_MANICHAEAN_LAMEDH,        /* Manichaean_Lamedh */
0546   UC_JOINING_GROUP_MANICHAEAN_DHAMEDH,       /* Manichaean_Dhamedh */
0547   UC_JOINING_GROUP_MANICHAEAN_THAMEDH,       /* Manichaean_Thamedh */
0548   UC_JOINING_GROUP_MANICHAEAN_MEM,           /* Manichaean_Mem */
0549   UC_JOINING_GROUP_MANICHAEAN_NUN,           /* Manichaean_Nun */
0550   UC_JOINING_GROUP_MANICHAEAN_SAMEKH,        /* Manichaean_Aleph */
0551   UC_JOINING_GROUP_MANICHAEAN_AYIN,          /* Manichaean_Ayin */
0552   UC_JOINING_GROUP_MANICHAEAN_PE,            /* Manichaean_Pe */
0553   UC_JOINING_GROUP_MANICHAEAN_SADHE,         /* Manichaean_Sadhe */
0554   UC_JOINING_GROUP_MANICHAEAN_QOPH,          /* Manichaean_Qoph */
0555   UC_JOINING_GROUP_MANICHAEAN_RESH,          /* Manichaean_Resh */
0556   UC_JOINING_GROUP_MANICHAEAN_TAW,           /* Manichaean_Taw */
0557   UC_JOINING_GROUP_MANICHAEAN_ONE,           /* Manichaean_One */
0558   UC_JOINING_GROUP_MANICHAEAN_FIVE,          /* Manichaean_Five */
0559   UC_JOINING_GROUP_MANICHAEAN_TEN,           /* Manichaean_Ten */
0560   UC_JOINING_GROUP_MANICHAEAN_TWENTY,        /* Manichaean_Twenty */
0561   UC_JOINING_GROUP_MANICHAEAN_HUNDRED,       /* Manichaean_Hundred */
0562   UC_JOINING_GROUP_AFRICAN_FEH,              /* African_Feh */
0563   UC_JOINING_GROUP_AFRICAN_QAF,              /* African_Qaf */
0564   UC_JOINING_GROUP_AFRICAN_NOON,             /* African_Noon */
0565   UC_JOINING_GROUP_MALAYALAM_NGA,            /* Malayalam_Nga */
0566   UC_JOINING_GROUP_MALAYALAM_JA,             /* Malayalam_Ja */
0567   UC_JOINING_GROUP_MALAYALAM_NYA,            /* Malayalam_Nya */
0568   UC_JOINING_GROUP_MALAYALAM_TTA,            /* Malayalam_Tta */
0569   UC_JOINING_GROUP_MALAYALAM_NNA,            /* Malayalam_Nna */
0570   UC_JOINING_GROUP_MALAYALAM_NNNA,           /* Malayalam_Nnna */
0571   UC_JOINING_GROUP_MALAYALAM_BHA,            /* Malayalam_Bha */
0572   UC_JOINING_GROUP_MALAYALAM_RA,             /* Malayalam_Ra */
0573   UC_JOINING_GROUP_MALAYALAM_LLA,            /* Malayalam_Lla */
0574   UC_JOINING_GROUP_MALAYALAM_LLLA,           /* Malayalam_Llla */
0575   UC_JOINING_GROUP_MALAYALAM_SSA,            /* Malayalam_Ssa */
0576   UC_JOINING_GROUP_HANIFI_ROHINGYA_PA,       /* Hanifi_Rohingya_Pa */
0577   UC_JOINING_GROUP_HANIFI_ROHINGYA_KINNA_YA, /* Hanifi_Rohingya_Kinna_Ya */
0578   UC_JOINING_GROUP_THIN_YEH,                 /* Thin_Yeh */
0579   UC_JOINING_GROUP_VERTICAL_TAIL             /* Vertical_Tail */
0580 };
0581 
0582 /* Return the name of a joining group.  */
0583 extern const char *
0584        uc_joining_group_name (int joining_group)
0585        _UC_ATTRIBUTE_CONST;
0586 
0587 /* Return the joining group given by name, e.g. "Teh_Marbuta".  */
0588 extern int
0589        uc_joining_group_byname (const char *joining_group_name)
0590        _UC_ATTRIBUTE_PURE;
0591 
0592 /* Return the joining group of a Unicode character.  */
0593 extern int
0594        uc_joining_group (ucs4_t uc)
0595        _UC_ATTRIBUTE_CONST;
0596 
0597 /* ========================================================================= */
0598 
0599 /* Common API for properties.  */
0600 
0601 /* Data type denoting a property.  This is not just a number, but rather a
0602    pointer to the test functions, so that programs that use only few of the
0603    properties don't have a link-time dependency towards all the tables.  */
0604 typedef struct
0605 {
0606   bool (*test_fn) (ucs4_t uc);
0607 }
0608 uc_property_t;
0609 
0610 /* Predefined properties.  */
0611 /* General.  */
0612 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_WHITE_SPACE;
0613 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_ALPHABETIC;
0614 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_OTHER_ALPHABETIC;
0615 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_NOT_A_CHARACTER;
0616 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_DEFAULT_IGNORABLE_CODE_POINT;
0617 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_OTHER_DEFAULT_IGNORABLE_CODE_POINT;
0618 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_DEPRECATED;
0619 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_LOGICAL_ORDER_EXCEPTION;
0620 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_VARIATION_SELECTOR;
0621 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_PRIVATE_USE;
0622 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_UNASSIGNED_CODE_VALUE;
0623 /* Case.  */
0624 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_UPPERCASE;
0625 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_OTHER_UPPERCASE;
0626 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_LOWERCASE;
0627 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_OTHER_LOWERCASE;
0628 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_TITLECASE;
0629 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_CASED;
0630 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_CASE_IGNORABLE;
0631 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_CHANGES_WHEN_LOWERCASED;
0632 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_CHANGES_WHEN_UPPERCASED;
0633 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_CHANGES_WHEN_TITLECASED;
0634 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_CHANGES_WHEN_CASEFOLDED;
0635 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_CHANGES_WHEN_CASEMAPPED;
0636 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_SOFT_DOTTED;
0637 /* Identifiers.  */
0638 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_ID_START;
0639 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_OTHER_ID_START;
0640 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_ID_CONTINUE;
0641 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_OTHER_ID_CONTINUE;
0642 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_XID_START;
0643 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_XID_CONTINUE;
0644 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_ID_COMPAT_MATH_START;
0645 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_ID_COMPAT_MATH_CONTINUE;
0646 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_PATTERN_WHITE_SPACE;
0647 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_PATTERN_SYNTAX;
0648 /* Shaping and rendering.  */
0649 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_JOIN_CONTROL;
0650 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_GRAPHEME_BASE;
0651 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_GRAPHEME_EXTEND;
0652 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_OTHER_GRAPHEME_EXTEND;
0653 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_GRAPHEME_LINK;
0654 /* Bidi.  */
0655 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_CONTROL;
0656 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_LEFT_TO_RIGHT;
0657 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_HEBREW_RIGHT_TO_LEFT;
0658 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_ARABIC_RIGHT_TO_LEFT;
0659 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_EUROPEAN_DIGIT;
0660 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_EUR_NUM_SEPARATOR;
0661 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_EUR_NUM_TERMINATOR;
0662 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_ARABIC_DIGIT;
0663 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_COMMON_SEPARATOR;
0664 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_BLOCK_SEPARATOR;
0665 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_SEGMENT_SEPARATOR;
0666 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_WHITESPACE;
0667 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_NON_SPACING_MARK;
0668 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_BOUNDARY_NEUTRAL;
0669 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_PDF;
0670 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_EMBEDDING_OR_OVERRIDE;
0671 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_OTHER_NEUTRAL;
0672 /* Numeric.  */
0673 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_HEX_DIGIT;
0674 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_ASCII_HEX_DIGIT;
0675 /* CJK.  */
0676 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_IDEOGRAPHIC;
0677 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_UNIFIED_IDEOGRAPH;
0678 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_RADICAL;
0679 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_IDS_UNARY_OPERATOR;
0680 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_IDS_BINARY_OPERATOR;
0681 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_IDS_TRINARY_OPERATOR;
0682 /* Emoji.  */
0683 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_EMOJI;
0684 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_EMOJI_PRESENTATION;
0685 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_EMOJI_MODIFIER;
0686 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_EMOJI_MODIFIER_BASE;
0687 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_EMOJI_COMPONENT;
0688 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_EXTENDED_PICTOGRAPHIC;
0689 /* Misc.  */
0690 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_ZERO_WIDTH;
0691 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_SPACE;
0692 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_NON_BREAK;
0693 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_ISO_CONTROL;
0694 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_FORMAT_CONTROL;
0695 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_PREPENDED_CONCATENATION_MARK;
0696 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_DASH;
0697 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_HYPHEN;
0698 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_PUNCTUATION;
0699 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_LINE_SEPARATOR;
0700 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_PARAGRAPH_SEPARATOR;
0701 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_QUOTATION_MARK;
0702 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_SENTENCE_TERMINAL;
0703 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_TERMINAL_PUNCTUATION;
0704 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_CURRENCY_SYMBOL;
0705 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_MATH;
0706 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_OTHER_MATH;
0707 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_PAIRED_PUNCTUATION;
0708 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_LEFT_OF_PAIR;
0709 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_COMBINING;
0710 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_COMPOSITE;
0711 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_DECIMAL_DIGIT;
0712 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_NUMERIC;
0713 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_DIACRITIC;
0714 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_EXTENDER;
0715 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_IGNORABLE_CONTROL;
0716 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_REGIONAL_INDICATOR;
0717 
0718 /* Return the property given by name, e.g. "White space".  */
0719 extern uc_property_t
0720        uc_property_byname (const char *property_name);
0721 
0722 /* Test whether a property is valid.  */
0723 #define uc_property_is_valid(property) ((property).test_fn != NULL)
0724 
0725 /* Test whether a Unicode character has a given property.  */
0726 extern bool
0727        uc_is_property (ucs4_t uc, uc_property_t property);
0728 extern bool uc_is_property_white_space (ucs4_t uc)
0729        _UC_ATTRIBUTE_CONST;
0730 extern bool uc_is_property_alphabetic (ucs4_t uc)
0731        _UC_ATTRIBUTE_CONST;
0732 extern bool uc_is_property_other_alphabetic (ucs4_t uc)
0733        _UC_ATTRIBUTE_CONST;
0734 extern bool uc_is_property_not_a_character (ucs4_t uc)
0735        _UC_ATTRIBUTE_CONST;
0736 extern bool uc_is_property_default_ignorable_code_point (ucs4_t uc)
0737        _UC_ATTRIBUTE_CONST;
0738 extern bool uc_is_property_other_default_ignorable_code_point (ucs4_t uc)
0739        _UC_ATTRIBUTE_CONST;
0740 extern bool uc_is_property_deprecated (ucs4_t uc)
0741        _UC_ATTRIBUTE_CONST;
0742 extern bool uc_is_property_logical_order_exception (ucs4_t uc)
0743        _UC_ATTRIBUTE_CONST;
0744 extern bool uc_is_property_variation_selector (ucs4_t uc)
0745        _UC_ATTRIBUTE_CONST;
0746 extern bool uc_is_property_private_use (ucs4_t uc)
0747        _UC_ATTRIBUTE_CONST;
0748 extern bool uc_is_property_unassigned_code_value (ucs4_t uc)
0749        _UC_ATTRIBUTE_CONST;
0750 extern bool uc_is_property_uppercase (ucs4_t uc)
0751        _UC_ATTRIBUTE_CONST;
0752 extern bool uc_is_property_other_uppercase (ucs4_t uc)
0753        _UC_ATTRIBUTE_CONST;
0754 extern bool uc_is_property_lowercase (ucs4_t uc)
0755        _UC_ATTRIBUTE_CONST;
0756 extern bool uc_is_property_other_lowercase (ucs4_t uc)
0757        _UC_ATTRIBUTE_CONST;
0758 extern bool uc_is_property_titlecase (ucs4_t uc)
0759        _UC_ATTRIBUTE_CONST;
0760 extern bool uc_is_property_cased (ucs4_t uc)
0761        _UC_ATTRIBUTE_CONST;
0762 extern bool uc_is_property_case_ignorable (ucs4_t uc)
0763        _UC_ATTRIBUTE_CONST;
0764 extern bool uc_is_property_changes_when_lowercased (ucs4_t uc)
0765        _UC_ATTRIBUTE_CONST;
0766 extern bool uc_is_property_changes_when_uppercased (ucs4_t uc)
0767        _UC_ATTRIBUTE_CONST;
0768 extern bool uc_is_property_changes_when_titlecased (ucs4_t uc)
0769        _UC_ATTRIBUTE_CONST;
0770 extern bool uc_is_property_changes_when_casefolded (ucs4_t uc)
0771        _UC_ATTRIBUTE_CONST;
0772 extern bool uc_is_property_changes_when_casemapped (ucs4_t uc)
0773        _UC_ATTRIBUTE_CONST;
0774 extern bool uc_is_property_soft_dotted (ucs4_t uc)
0775        _UC_ATTRIBUTE_CONST;
0776 extern bool uc_is_property_id_start (ucs4_t uc)
0777        _UC_ATTRIBUTE_CONST;
0778 extern bool uc_is_property_other_id_start (ucs4_t uc)
0779        _UC_ATTRIBUTE_CONST;
0780 extern bool uc_is_property_id_continue (ucs4_t uc)
0781        _UC_ATTRIBUTE_CONST;
0782 extern bool uc_is_property_other_id_continue (ucs4_t uc)
0783        _UC_ATTRIBUTE_CONST;
0784 extern bool uc_is_property_xid_start (ucs4_t uc)
0785        _UC_ATTRIBUTE_CONST;
0786 extern bool uc_is_property_xid_continue (ucs4_t uc)
0787        _UC_ATTRIBUTE_CONST;
0788 extern bool uc_is_property_id_compat_math_start (ucs4_t uc)
0789        _UC_ATTRIBUTE_CONST;
0790 extern bool uc_is_property_id_compat_math_continue (ucs4_t uc)
0791        _UC_ATTRIBUTE_CONST;
0792 extern bool uc_is_property_pattern_white_space (ucs4_t uc)
0793        _UC_ATTRIBUTE_CONST;
0794 extern bool uc_is_property_pattern_syntax (ucs4_t uc)
0795        _UC_ATTRIBUTE_CONST;
0796 extern bool uc_is_property_join_control (ucs4_t uc)
0797        _UC_ATTRIBUTE_CONST;
0798 extern bool uc_is_property_grapheme_base (ucs4_t uc)
0799        _UC_ATTRIBUTE_CONST;
0800 extern bool uc_is_property_grapheme_extend (ucs4_t uc)
0801        _UC_ATTRIBUTE_CONST;
0802 extern bool uc_is_property_other_grapheme_extend (ucs4_t uc)
0803        _UC_ATTRIBUTE_CONST;
0804 extern bool uc_is_property_grapheme_link (ucs4_t uc)
0805        _UC_ATTRIBUTE_CONST;
0806 extern bool uc_is_property_bidi_control (ucs4_t uc)
0807        _UC_ATTRIBUTE_CONST;
0808 extern bool uc_is_property_bidi_left_to_right (ucs4_t uc)
0809        _UC_ATTRIBUTE_CONST;
0810 extern bool uc_is_property_bidi_hebrew_right_to_left (ucs4_t uc)
0811        _UC_ATTRIBUTE_CONST;
0812 extern bool uc_is_property_bidi_arabic_right_to_left (ucs4_t uc)
0813        _UC_ATTRIBUTE_CONST;
0814 extern bool uc_is_property_bidi_european_digit (ucs4_t uc)
0815        _UC_ATTRIBUTE_CONST;
0816 extern bool uc_is_property_bidi_eur_num_separator (ucs4_t uc)
0817        _UC_ATTRIBUTE_CONST;
0818 extern bool uc_is_property_bidi_eur_num_terminator (ucs4_t uc)
0819        _UC_ATTRIBUTE_CONST;
0820 extern bool uc_is_property_bidi_arabic_digit (ucs4_t uc)
0821        _UC_ATTRIBUTE_CONST;
0822 extern bool uc_is_property_bidi_common_separator (ucs4_t uc)
0823        _UC_ATTRIBUTE_CONST;
0824 extern bool uc_is_property_bidi_block_separator (ucs4_t uc)
0825        _UC_ATTRIBUTE_CONST;
0826 extern bool uc_is_property_bidi_segment_separator (ucs4_t uc)
0827        _UC_ATTRIBUTE_CONST;
0828 extern bool uc_is_property_bidi_whitespace (ucs4_t uc)
0829        _UC_ATTRIBUTE_CONST;
0830 extern bool uc_is_property_bidi_non_spacing_mark (ucs4_t uc)
0831        _UC_ATTRIBUTE_CONST;
0832 extern bool uc_is_property_bidi_boundary_neutral (ucs4_t uc)
0833        _UC_ATTRIBUTE_CONST;
0834 extern bool uc_is_property_bidi_pdf (ucs4_t uc)
0835        _UC_ATTRIBUTE_CONST;
0836 extern bool uc_is_property_bidi_embedding_or_override (ucs4_t uc)
0837        _UC_ATTRIBUTE_CONST;
0838 extern bool uc_is_property_bidi_other_neutral (ucs4_t uc)
0839        _UC_ATTRIBUTE_CONST;
0840 extern bool uc_is_property_hex_digit (ucs4_t uc)
0841        _UC_ATTRIBUTE_CONST;
0842 extern bool uc_is_property_ascii_hex_digit (ucs4_t uc)
0843        _UC_ATTRIBUTE_CONST;
0844 extern bool uc_is_property_ideographic (ucs4_t uc)
0845        _UC_ATTRIBUTE_CONST;
0846 extern bool uc_is_property_unified_ideograph (ucs4_t uc)
0847        _UC_ATTRIBUTE_CONST;
0848 extern bool uc_is_property_radical (ucs4_t uc)
0849        _UC_ATTRIBUTE_CONST;
0850 extern bool uc_is_property_ids_unary_operator (ucs4_t uc)
0851        _UC_ATTRIBUTE_CONST;
0852 extern bool uc_is_property_ids_binary_operator (ucs4_t uc)
0853        _UC_ATTRIBUTE_CONST;
0854 extern bool uc_is_property_ids_trinary_operator (ucs4_t uc)
0855        _UC_ATTRIBUTE_CONST;
0856 extern bool uc_is_property_emoji (ucs4_t uc)
0857        _UC_ATTRIBUTE_CONST;
0858 extern bool uc_is_property_emoji_presentation (ucs4_t uc)
0859        _UC_ATTRIBUTE_CONST;
0860 extern bool uc_is_property_emoji_modifier (ucs4_t uc)
0861        _UC_ATTRIBUTE_CONST;
0862 extern bool uc_is_property_emoji_modifier_base (ucs4_t uc)
0863        _UC_ATTRIBUTE_CONST;
0864 extern bool uc_is_property_emoji_component (ucs4_t uc)
0865        _UC_ATTRIBUTE_CONST;
0866 extern bool uc_is_property_extended_pictographic (ucs4_t uc)
0867        _UC_ATTRIBUTE_CONST;
0868 extern bool uc_is_property_zero_width (ucs4_t uc)
0869        _UC_ATTRIBUTE_CONST;
0870 extern bool uc_is_property_space (ucs4_t uc)
0871        _UC_ATTRIBUTE_CONST;
0872 extern bool uc_is_property_non_break (ucs4_t uc)
0873        _UC_ATTRIBUTE_CONST;
0874 extern bool uc_is_property_iso_control (ucs4_t uc)
0875        _UC_ATTRIBUTE_CONST;
0876 extern bool uc_is_property_format_control (ucs4_t uc)
0877        _UC_ATTRIBUTE_CONST;
0878 extern bool uc_is_property_prepended_concatenation_mark (ucs4_t uc)
0879        _UC_ATTRIBUTE_CONST;
0880 extern bool uc_is_property_dash (ucs4_t uc)
0881        _UC_ATTRIBUTE_CONST;
0882 extern bool uc_is_property_hyphen (ucs4_t uc)
0883        _UC_ATTRIBUTE_CONST;
0884 extern bool uc_is_property_punctuation (ucs4_t uc)
0885        _UC_ATTRIBUTE_CONST;
0886 extern bool uc_is_property_line_separator (ucs4_t uc)
0887        _UC_ATTRIBUTE_CONST;
0888 extern bool uc_is_property_paragraph_separator (ucs4_t uc)
0889        _UC_ATTRIBUTE_CONST;
0890 extern bool uc_is_property_quotation_mark (ucs4_t uc)
0891        _UC_ATTRIBUTE_CONST;
0892 extern bool uc_is_property_sentence_terminal (ucs4_t uc)
0893        _UC_ATTRIBUTE_CONST;
0894 extern bool uc_is_property_terminal_punctuation (ucs4_t uc)
0895        _UC_ATTRIBUTE_CONST;
0896 extern bool uc_is_property_currency_symbol (ucs4_t uc)
0897        _UC_ATTRIBUTE_CONST;
0898 extern bool uc_is_property_math (ucs4_t uc)
0899        _UC_ATTRIBUTE_CONST;
0900 extern bool uc_is_property_other_math (ucs4_t uc)
0901        _UC_ATTRIBUTE_CONST;
0902 extern bool uc_is_property_paired_punctuation (ucs4_t uc)
0903        _UC_ATTRIBUTE_CONST;
0904 extern bool uc_is_property_left_of_pair (ucs4_t uc)
0905        _UC_ATTRIBUTE_CONST;
0906 extern bool uc_is_property_combining (ucs4_t uc)
0907        _UC_ATTRIBUTE_CONST;
0908 extern bool uc_is_property_composite (ucs4_t uc)
0909        _UC_ATTRIBUTE_CONST;
0910 extern bool uc_is_property_decimal_digit (ucs4_t uc)
0911        _UC_ATTRIBUTE_CONST;
0912 extern bool uc_is_property_numeric (ucs4_t uc)
0913        _UC_ATTRIBUTE_CONST;
0914 extern bool uc_is_property_diacritic (ucs4_t uc)
0915        _UC_ATTRIBUTE_CONST;
0916 extern bool uc_is_property_extender (ucs4_t uc)
0917        _UC_ATTRIBUTE_CONST;
0918 extern bool uc_is_property_ignorable_control (ucs4_t uc)
0919        _UC_ATTRIBUTE_CONST;
0920 extern bool uc_is_property_regional_indicator (ucs4_t uc)
0921        _UC_ATTRIBUTE_CONST;
0922 
0923 /* ========================================================================= */
0924 
0925 /* Other attributes.  */
0926 
0927 /* ------------------------------------------------------------------------- */
0928 
0929 /* Indic_Conjunct_Break (InCB): from the file DerivedCoreProperties.txt
0930    in the Unicode Character Database.  */
0931 
0932 /* Possible values of the Indic_Conjunct_Break attribute.
0933    This enumeration may be extended in the future.  */
0934 enum
0935 {
0936   UC_INDIC_CONJUNCT_BREAK_NONE,              /* None */
0937   UC_INDIC_CONJUNCT_BREAK_CONSONANT,         /* Consonant */
0938   UC_INDIC_CONJUNCT_BREAK_LINKER,            /* Linker */
0939   UC_INDIC_CONJUNCT_BREAK_EXTEND             /* Extend */
0940 };
0941 
0942 /* Return the name of an Indic_Conjunct_Break value.  */
0943 extern const char *
0944        uc_indic_conjunct_break_name (int indic_conjunct_break)
0945        _UC_ATTRIBUTE_CONST;
0946 
0947 /* Return the Indic_Conjunct_Break value given by name, e.g. "Consonant".  */
0948 extern int
0949        uc_indic_conjunct_break_byname (const char *indic_conjunct_break_name)
0950        _UC_ATTRIBUTE_PURE;
0951 
0952 /* Return the Indic_Conjunct_Break attribute of a Unicode character.  */
0953 extern int
0954        uc_indic_conjunct_break (ucs4_t uc)
0955        _UC_ATTRIBUTE_CONST;
0956 
0957 /* ========================================================================= */
0958 
0959 /* Subdivision of the Unicode characters into scripts.  */
0960 
0961 typedef struct
0962 {
0963   unsigned int code : 21;
0964   unsigned int start : 1;
0965   unsigned int end : 1;
0966 }
0967 uc_interval_t;
0968 typedef struct
0969 {
0970   unsigned int nintervals;
0971   const uc_interval_t *intervals;
0972   const char *name;
0973 }
0974 uc_script_t;
0975 
0976 /* Return the script of a Unicode character.  */
0977 extern const uc_script_t *
0978        uc_script (ucs4_t uc)
0979        _UC_ATTRIBUTE_CONST;
0980 
0981 /* Return the script given by name, e.g. "HAN".  */
0982 extern const uc_script_t *
0983        uc_script_byname (const char *script_name)
0984        _UC_ATTRIBUTE_PURE;
0985 
0986 /* Test whether a Unicode character belongs to a given script.  */
0987 extern bool
0988        uc_is_script (ucs4_t uc, const uc_script_t *script)
0989        _UC_ATTRIBUTE_PURE;
0990 
0991 /* Get the list of all scripts.  */
0992 extern void
0993        uc_all_scripts (const uc_script_t **scripts, size_t *count);
0994 
0995 /* ========================================================================= */
0996 
0997 /* Subdivision of the Unicode character range into blocks.  */
0998 
0999 typedef struct
1000 {
1001   ucs4_t start;
1002   ucs4_t end;
1003   const char *name;
1004 }
1005 uc_block_t;
1006 
1007 /* Return the block a character belongs to.  */
1008 extern const uc_block_t *
1009        uc_block (ucs4_t uc)
1010        _UC_ATTRIBUTE_CONST;
1011 
1012 /* Test whether a Unicode character belongs to a given block.  */
1013 extern bool
1014        uc_is_block (ucs4_t uc, const uc_block_t *block)
1015        _UC_ATTRIBUTE_PURE;
1016 
1017 /* Get the list of all blocks.  */
1018 extern void
1019        uc_all_blocks (const uc_block_t **blocks, size_t *count);
1020 
1021 /* ========================================================================= */
1022 
1023 /* Properties taken from language standards.  */
1024 
1025 /* Test whether a Unicode character is considered whitespace in ISO C 99.  */
1026 extern bool
1027        uc_is_c_whitespace (ucs4_t uc)
1028        _UC_ATTRIBUTE_CONST;
1029 
1030 /* Test whether a Unicode character is considered whitespace in Java.  */
1031 extern bool
1032        uc_is_java_whitespace (ucs4_t uc)
1033        _UC_ATTRIBUTE_CONST;
1034 
1035 enum
1036 {
1037   UC_IDENTIFIER_START,    /* valid as first or subsequent character */
1038   UC_IDENTIFIER_VALID,    /* valid as subsequent character only */
1039   UC_IDENTIFIER_INVALID,  /* not valid */
1040   UC_IDENTIFIER_IGNORABLE /* ignorable (Java only) */
1041 };
1042 
1043 /* Return the categorization of a Unicode character w.r.t. the ISO C 99
1044    identifier syntax.  */
1045 extern int
1046        uc_c_ident_category (ucs4_t uc)
1047        _UC_ATTRIBUTE_CONST;
1048 
1049 /* Return the categorization of a Unicode character w.r.t. the Java
1050    identifier syntax.  */
1051 extern int
1052        uc_java_ident_category (ucs4_t uc)
1053        _UC_ATTRIBUTE_CONST;
1054 
1055 /* ========================================================================= */
1056 
1057 /* Like ISO C <ctype.h> and <wctype.h>.  These functions are deprecated,
1058    because this set of functions was designed with ASCII in mind and cannot
1059    reflect the more diverse reality of the Unicode character set.  But they
1060    can be a quick-and-dirty porting aid when migrating from wchar_t APIs
1061    to Unicode strings.  */
1062 
1063 /* Test for any character for which 'uc_is_alpha' or 'uc_is_digit' is true.  */
1064 extern bool
1065        uc_is_alnum (ucs4_t uc)
1066        _UC_ATTRIBUTE_CONST;
1067 
1068 /* Test for any character for which 'uc_is_upper' or 'uc_is_lower' is true,
1069    or any character that is one of a locale-specific set of characters for
1070    which none of 'uc_is_cntrl', 'uc_is_digit', 'uc_is_punct', or 'uc_is_space'
1071    is true.  */
1072 extern bool
1073        uc_is_alpha (ucs4_t uc)
1074        _UC_ATTRIBUTE_CONST;
1075 
1076 /* Test for any control character.  */
1077 extern bool
1078        uc_is_cntrl (ucs4_t uc)
1079        _UC_ATTRIBUTE_CONST;
1080 
1081 /* Test for any character that corresponds to a decimal-digit character.  */
1082 extern bool
1083        uc_is_digit (ucs4_t uc)
1084        _UC_ATTRIBUTE_CONST;
1085 
1086 /* Test for any character for which 'uc_is_print' is true and 'uc_is_space'
1087    is false.  */
1088 extern bool
1089        uc_is_graph (ucs4_t uc)
1090        _UC_ATTRIBUTE_CONST;
1091 
1092 /* Test for any character that corresponds to a lowercase letter or is one
1093    of a locale-specific set of characters for which none of 'uc_is_cntrl',
1094    'uc_is_digit', 'uc_is_punct', or 'uc_is_space' is true.  */
1095 extern bool
1096        uc_is_lower (ucs4_t uc)
1097        _UC_ATTRIBUTE_CONST;
1098 
1099 /* Test for any printing character.  */
1100 extern bool
1101        uc_is_print (ucs4_t uc)
1102        _UC_ATTRIBUTE_CONST;
1103 
1104 /* Test for any printing character that is one of a locale-specific set of
1105    characters for which neither 'uc_is_space' nor 'uc_is_alnum' is true.  */
1106 extern bool
1107        uc_is_punct (ucs4_t uc)
1108        _UC_ATTRIBUTE_CONST;
1109 
1110 /* Test for any character that corresponds to a locale-specific set of
1111    characters for which none of 'uc_is_alnum', 'uc_is_graph', or 'uc_is_punct'
1112    is true.  */
1113 extern bool
1114        uc_is_space (ucs4_t uc)
1115        _UC_ATTRIBUTE_CONST;
1116 
1117 /* Test for any character that corresponds to an uppercase letter or is one
1118    of a locale-specific set of character for which none of 'uc_is_cntrl',
1119    'uc_is_digit', 'uc_is_punct', or 'uc_is_space' is true.  */
1120 extern bool
1121        uc_is_upper (ucs4_t uc)
1122        _UC_ATTRIBUTE_CONST;
1123 
1124 /* Test for any character that corresponds to a hexadecimal-digit
1125    character.  */
1126 extern bool
1127        uc_is_xdigit (ucs4_t uc)
1128        _UC_ATTRIBUTE_CONST;
1129 
1130 /* GNU extension. */
1131 /* Test for any character that corresponds to a standard blank character or
1132    a locale-specific set of characters for which 'uc_is_alnum' is false.  */
1133 extern bool
1134        uc_is_blank (ucs4_t uc)
1135        _UC_ATTRIBUTE_CONST;
1136 
1137 /* ========================================================================= */
1138 
1139 #ifdef __cplusplus
1140 }
1141 #endif
1142 
1143 #endif /* _UNICTYPE_H */