Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2024-05-18 08:30:28

0001 /* DO NOT EDIT! GENERATED AUTOMATICALLY! */
0002 /* Unicode character classification and properties.
0003    Copyright (C) 2002, 2005-2022 Free Software Foundation, Inc.
0004 
0005    This file is free software: you can redistribute it and/or modify
0006    it under the terms of the GNU Lesser General Public License as
0007    published by the Free Software Foundation; either version 2.1 of the
0008    License, or (at your option) any later version.
0009 
0010    This file is distributed in the hope that it will be useful,
0011    but WITHOUT ANY WARRANTY; without even the implied warranty of
0012    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
0013    GNU Lesser General Public License for more details.
0014 
0015    You should have received a copy of the GNU Lesser General Public License
0016    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
0017 
0018 #ifndef _UNICTYPE_H
0019 #define _UNICTYPE_H
0020 
0021 #include "unitypes.h"
0022 
0023 /* Get LIBUNISTRING_DLL_VARIABLE.  */
0024 #include <unistring/woe32dll.h>
0025 
0026 /* Get bool.  */
0027 #include <unistring/stdbool.h>
0028 
0029 /* Get size_t.  */
0030 #include <stddef.h>
0031 
0032 #ifdef __cplusplus
0033 extern "C" {
0034 #endif
0035 
0036 /* ========================================================================= */
0037 
0038 /* Field 1 of Unicode Character Database: Character name.
0039    See "uniname.h".  */
0040 
0041 /* ========================================================================= */
0042 
0043 /* Field 2 of Unicode Character Database: General category.  */
0044 
0045 /* Data type denoting a General category value.  This is not just a bitmask,
0046    but rather a bitmask and a pointer to the lookup table, so that programs
0047    that use only the predefined bitmasks (i.e. don't combine bitmasks with &
0048    and |) don't have a link-time dependency towards the big general table.  */
0049 typedef struct
0050 {
0051   uint32_t bitmask : 31;
0052   /*bool*/ unsigned int generic : 1;
0053   union
0054   {
0055     const void *table;                               /* when generic is 0 */
0056     bool (*lookup_fn) (ucs4_t uc, uint32_t bitmask); /* when generic is 1 */
0057   } lookup;
0058 }
0059 uc_general_category_t;
0060 
0061 /* Bits and bit masks denoting General category values.  UnicodeData-3.2.0.html
0062    says a 32-bit integer will always suffice to represent them.
0063    These bit masks can only be used with the uc_is_general_category_withtable
0064    function.  */
0065 enum
0066 {
0067   UC_CATEGORY_MASK_L  = 0x0000001f,
0068   UC_CATEGORY_MASK_LC = 0x00000007,
0069   UC_CATEGORY_MASK_Lu = 0x00000001,
0070   UC_CATEGORY_MASK_Ll = 0x00000002,
0071   UC_CATEGORY_MASK_Lt = 0x00000004,
0072   UC_CATEGORY_MASK_Lm = 0x00000008,
0073   UC_CATEGORY_MASK_Lo = 0x00000010,
0074   UC_CATEGORY_MASK_M  = 0x000000e0,
0075   UC_CATEGORY_MASK_Mn = 0x00000020,
0076   UC_CATEGORY_MASK_Mc = 0x00000040,
0077   UC_CATEGORY_MASK_Me = 0x00000080,
0078   UC_CATEGORY_MASK_N  = 0x00000700,
0079   UC_CATEGORY_MASK_Nd = 0x00000100,
0080   UC_CATEGORY_MASK_Nl = 0x00000200,
0081   UC_CATEGORY_MASK_No = 0x00000400,
0082   UC_CATEGORY_MASK_P  = 0x0003f800,
0083   UC_CATEGORY_MASK_Pc = 0x00000800,
0084   UC_CATEGORY_MASK_Pd = 0x00001000,
0085   UC_CATEGORY_MASK_Ps = 0x00002000,
0086   UC_CATEGORY_MASK_Pe = 0x00004000,
0087   UC_CATEGORY_MASK_Pi = 0x00008000,
0088   UC_CATEGORY_MASK_Pf = 0x00010000,
0089   UC_CATEGORY_MASK_Po = 0x00020000,
0090   UC_CATEGORY_MASK_S  = 0x003c0000,
0091   UC_CATEGORY_MASK_Sm = 0x00040000,
0092   UC_CATEGORY_MASK_Sc = 0x00080000,
0093   UC_CATEGORY_MASK_Sk = 0x00100000,
0094   UC_CATEGORY_MASK_So = 0x00200000,
0095   UC_CATEGORY_MASK_Z  = 0x01c00000,
0096   UC_CATEGORY_MASK_Zs = 0x00400000,
0097   UC_CATEGORY_MASK_Zl = 0x00800000,
0098   UC_CATEGORY_MASK_Zp = 0x01000000,
0099   UC_CATEGORY_MASK_C  = 0x3e000000,
0100   UC_CATEGORY_MASK_Cc = 0x02000000,
0101   UC_CATEGORY_MASK_Cf = 0x04000000,
0102   UC_CATEGORY_MASK_Cs = 0x08000000,
0103   UC_CATEGORY_MASK_Co = 0x10000000,
0104   UC_CATEGORY_MASK_Cn = 0x20000000
0105 };
0106 
0107 /* Predefined General category values.  */
0108 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_L;
0109 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_LC;
0110 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Lu;
0111 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Ll;
0112 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Lt;
0113 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Lm;
0114 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Lo;
0115 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_M;
0116 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Mn;
0117 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Mc;
0118 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Me;
0119 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_N;
0120 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Nd;
0121 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Nl;
0122 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_No;
0123 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_P;
0124 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Pc;
0125 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Pd;
0126 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Ps;
0127 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Pe;
0128 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Pi;
0129 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Pf;
0130 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Po;
0131 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_S;
0132 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Sm;
0133 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Sc;
0134 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Sk;
0135 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_So;
0136 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Z;
0137 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Zs;
0138 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Zl;
0139 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Zp;
0140 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_C;
0141 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Cc;
0142 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Cf;
0143 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Cs;
0144 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Co;
0145 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Cn;
0146 /* Non-public.  */
0147 extern const uc_general_category_t _UC_CATEGORY_NONE;
0148 
0149 /* Alias names for predefined General category values.  */
0150 #define UC_LETTER                    UC_CATEGORY_L
0151 #define UC_CASED_LETTER              UC_CATEGORY_LC
0152 #define UC_UPPERCASE_LETTER          UC_CATEGORY_Lu
0153 #define UC_LOWERCASE_LETTER          UC_CATEGORY_Ll
0154 #define UC_TITLECASE_LETTER          UC_CATEGORY_Lt
0155 #define UC_MODIFIER_LETTER           UC_CATEGORY_Lm
0156 #define UC_OTHER_LETTER              UC_CATEGORY_Lo
0157 #define UC_MARK                      UC_CATEGORY_M
0158 #define UC_NON_SPACING_MARK          UC_CATEGORY_Mn
0159 #define UC_COMBINING_SPACING_MARK    UC_CATEGORY_Mc
0160 #define UC_ENCLOSING_MARK            UC_CATEGORY_Me
0161 #define UC_NUMBER                    UC_CATEGORY_N
0162 #define UC_DECIMAL_DIGIT_NUMBER      UC_CATEGORY_Nd
0163 #define UC_LETTER_NUMBER             UC_CATEGORY_Nl
0164 #define UC_OTHER_NUMBER              UC_CATEGORY_No
0165 #define UC_PUNCTUATION               UC_CATEGORY_P
0166 #define UC_CONNECTOR_PUNCTUATION     UC_CATEGORY_Pc
0167 #define UC_DASH_PUNCTUATION          UC_CATEGORY_Pd
0168 #define UC_OPEN_PUNCTUATION          UC_CATEGORY_Ps /* a.k.a. UC_START_PUNCTUATION */
0169 #define UC_CLOSE_PUNCTUATION         UC_CATEGORY_Pe /* a.k.a. UC_END_PUNCTUATION */
0170 #define UC_INITIAL_QUOTE_PUNCTUATION UC_CATEGORY_Pi
0171 #define UC_FINAL_QUOTE_PUNCTUATION   UC_CATEGORY_Pf
0172 #define UC_OTHER_PUNCTUATION         UC_CATEGORY_Po
0173 #define UC_SYMBOL                    UC_CATEGORY_S
0174 #define UC_MATH_SYMBOL               UC_CATEGORY_Sm
0175 #define UC_CURRENCY_SYMBOL           UC_CATEGORY_Sc
0176 #define UC_MODIFIER_SYMBOL           UC_CATEGORY_Sk
0177 #define UC_OTHER_SYMBOL              UC_CATEGORY_So
0178 #define UC_SEPARATOR                 UC_CATEGORY_Z
0179 #define UC_SPACE_SEPARATOR           UC_CATEGORY_Zs
0180 #define UC_LINE_SEPARATOR            UC_CATEGORY_Zl
0181 #define UC_PARAGRAPH_SEPARATOR       UC_CATEGORY_Zp
0182 #define UC_OTHER                     UC_CATEGORY_C
0183 #define UC_CONTROL                   UC_CATEGORY_Cc
0184 #define UC_FORMAT                    UC_CATEGORY_Cf
0185 #define UC_SURROGATE                 UC_CATEGORY_Cs /* all of them are invalid characters */
0186 #define UC_PRIVATE_USE               UC_CATEGORY_Co
0187 #define UC_UNASSIGNED                UC_CATEGORY_Cn /* some of them are invalid characters */
0188 
0189 /* Return the union of two general categories.
0190    This corresponds to the unions of the two sets of characters.  */
0191 extern uc_general_category_t
0192        uc_general_category_or (uc_general_category_t category1,
0193                                uc_general_category_t category2);
0194 
0195 /* Return the intersection of two general categories as bit masks.
0196    This *does*not* correspond to the intersection of the two sets of
0197    characters.  */
0198 extern uc_general_category_t
0199        uc_general_category_and (uc_general_category_t category1,
0200                                 uc_general_category_t category2);
0201 
0202 /* Return the intersection of a general category with the complement of a
0203    second general category, as bit masks.
0204    This *does*not* correspond to the intersection with complement, when
0205    viewing the categories as sets of characters.  */
0206 extern uc_general_category_t
0207        uc_general_category_and_not (uc_general_category_t category1,
0208                                     uc_general_category_t category2);
0209 
0210 /* Return the name of a general category.  */
0211 extern const char *
0212        uc_general_category_name (uc_general_category_t category)
0213        _UC_ATTRIBUTE_PURE;
0214 
0215 /* Return the long name of a general category.  */
0216 extern const char *
0217        uc_general_category_long_name (uc_general_category_t category)
0218        _UC_ATTRIBUTE_PURE;
0219 
0220 /* Return the general category given by name, e.g. "Lu", or by long name,
0221    e.g. "Uppercase Letter".  */
0222 extern uc_general_category_t
0223        uc_general_category_byname (const char *category_name)
0224        _UC_ATTRIBUTE_PURE;
0225 
0226 /* Return the general category of a Unicode character.  */
0227 extern uc_general_category_t
0228        uc_general_category (ucs4_t uc)
0229        _UC_ATTRIBUTE_PURE;
0230 
0231 /* Test whether a Unicode character belongs to a given category.
0232    The CATEGORY argument can be the combination of several predefined
0233    general categories.  */
0234 extern bool
0235        uc_is_general_category (ucs4_t uc, uc_general_category_t category)
0236        _UC_ATTRIBUTE_PURE;
0237 /* Likewise.  This function uses a big table comprising all categories.  */
0238 extern bool
0239        uc_is_general_category_withtable (ucs4_t uc, uint32_t bitmask)
0240        _UC_ATTRIBUTE_CONST;
0241 
0242 /* ========================================================================= */
0243 
0244 /* Field 3 of Unicode Character Database: Canonical combining class.  */
0245 
0246 /* The possible results of uc_combining_class (0..255) are described in
0247    UCD.html.  The list here is not definitive; more values can be added
0248    in future versions.  */
0249 enum
0250 {
0251   UC_CCC_NR   =   0, /* Not Reordered */
0252   UC_CCC_OV   =   1, /* Overlay */
0253   UC_CCC_NK   =   7, /* Nukta */
0254   UC_CCC_KV   =   8, /* Kana Voicing */
0255   UC_CCC_VR   =   9, /* Virama */
0256   UC_CCC_ATBL = 200, /* Attached Below Left */
0257   UC_CCC_ATB  = 202, /* Attached Below */
0258   UC_CCC_ATA  = 214, /* Attached Above */
0259   UC_CCC_ATAR = 216, /* Attached Above Right */
0260   UC_CCC_BL   = 218, /* Below Left */
0261   UC_CCC_B    = 220, /* Below */
0262   UC_CCC_BR   = 222, /* Below Right */
0263   UC_CCC_L    = 224, /* Left */
0264   UC_CCC_R    = 226, /* Right */
0265   UC_CCC_AL   = 228, /* Above Left */
0266   UC_CCC_A    = 230, /* Above */
0267   UC_CCC_AR   = 232, /* Above Right */
0268   UC_CCC_DB   = 233, /* Double Below */
0269   UC_CCC_DA   = 234, /* Double Above */
0270   UC_CCC_IS   = 240  /* Iota Subscript */
0271 };
0272 
0273 /* Return the canonical combining class of a Unicode character.  */
0274 extern int
0275        uc_combining_class (ucs4_t uc)
0276        _UC_ATTRIBUTE_CONST;
0277 
0278 /* Return the name of a canonical combining class.  */
0279 extern const char *
0280        uc_combining_class_name (int ccc)
0281        _UC_ATTRIBUTE_CONST;
0282 
0283 /* Return the long name of a canonical combining class.  */
0284 extern const char *
0285        uc_combining_class_long_name (int ccc)
0286        _UC_ATTRIBUTE_CONST;
0287 
0288 /* Return the canonical combining class given by name, e.g. "BL", or by long
0289    name, e.g. "Below Left".  */
0290 extern int
0291        uc_combining_class_byname (const char *ccc_name)
0292        _UC_ATTRIBUTE_PURE;
0293 
0294 /* ========================================================================= */
0295 
0296 /* Field 4 of Unicode Character Database: Bidi class.
0297    Before Unicode 4.0, this field was called "Bidirectional category".  */
0298 
0299 enum
0300 {
0301   UC_BIDI_L,   /* Left-to-Right */
0302   UC_BIDI_LRE, /* Left-to-Right Embedding */
0303   UC_BIDI_LRO, /* Left-to-Right Override */
0304   UC_BIDI_R,   /* Right-to-Left */
0305   UC_BIDI_AL,  /* Right-to-Left Arabic */
0306   UC_BIDI_RLE, /* Right-to-Left Embedding */
0307   UC_BIDI_RLO, /* Right-to-Left Override */
0308   UC_BIDI_PDF, /* Pop Directional Format */
0309   UC_BIDI_EN,  /* European Number */
0310   UC_BIDI_ES,  /* European Number Separator */
0311   UC_BIDI_ET,  /* European Number Terminator */
0312   UC_BIDI_AN,  /* Arabic Number */
0313   UC_BIDI_CS,  /* Common Number Separator */
0314   UC_BIDI_NSM, /* Non-Spacing Mark */
0315   UC_BIDI_BN,  /* Boundary Neutral */
0316   UC_BIDI_B,   /* Paragraph Separator */
0317   UC_BIDI_S,   /* Segment Separator */
0318   UC_BIDI_WS,  /* Whitespace */
0319   UC_BIDI_ON,  /* Other Neutral */
0320   UC_BIDI_LRI, /* Left-to-Right Isolate */
0321   UC_BIDI_RLI, /* Right-to-Left Isolate */
0322   UC_BIDI_FSI, /* First Strong Isolate */
0323   UC_BIDI_PDI  /* Pop Directional Isolate */
0324 };
0325 
0326 /* Return the name of a bidi class.  */
0327 extern const char *
0328        uc_bidi_class_name (int bidi_class)
0329        _UC_ATTRIBUTE_CONST;
0330 /* Same; obsolete function name.  */
0331 extern const char *
0332        uc_bidi_category_name (int category)
0333        _UC_ATTRIBUTE_CONST;
0334 
0335 /* Return the long name of a bidi class.  */
0336 extern const char *
0337        uc_bidi_class_long_name (int bidi_class)
0338        _UC_ATTRIBUTE_CONST;
0339 
0340 /* Return the bidi class given by name, e.g. "LRE", or by long name, e.g.
0341    "Left-to-Right Embedding".  */
0342 extern int
0343        uc_bidi_class_byname (const char *bidi_class_name)
0344        _UC_ATTRIBUTE_PURE;
0345 /* Same; obsolete function name.  */
0346 extern int
0347        uc_bidi_category_byname (const char *category_name)
0348        _UC_ATTRIBUTE_PURE;
0349 
0350 /* Return the bidi class of a Unicode character.  */
0351 extern int
0352        uc_bidi_class (ucs4_t uc)
0353        _UC_ATTRIBUTE_CONST;
0354 /* Same; obsolete function name.  */
0355 extern int
0356        uc_bidi_category (ucs4_t uc)
0357        _UC_ATTRIBUTE_CONST;
0358 
0359 /* Test whether a Unicode character belongs to a given bidi class.  */
0360 extern bool
0361        uc_is_bidi_class (ucs4_t uc, int bidi_class)
0362        _UC_ATTRIBUTE_CONST;
0363 /* Same; obsolete function name.  */
0364 extern bool
0365        uc_is_bidi_category (ucs4_t uc, int category)
0366        _UC_ATTRIBUTE_CONST;
0367 
0368 /* ========================================================================= */
0369 
0370 /* Field 5 of Unicode Character Database: Character decomposition mapping.
0371    See "uninorm.h".  */
0372 
0373 /* ========================================================================= */
0374 
0375 /* Field 6 of Unicode Character Database: Decimal digit value.  */
0376 
0377 /* Return the decimal digit value of a Unicode character.  */
0378 extern int
0379        uc_decimal_value (ucs4_t uc)
0380        _UC_ATTRIBUTE_CONST;
0381 
0382 /* ========================================================================= */
0383 
0384 /* Field 7 of Unicode Character Database: Digit value.  */
0385 
0386 /* Return the digit value of a Unicode character.  */
0387 extern int
0388        uc_digit_value (ucs4_t uc)
0389        _UC_ATTRIBUTE_CONST;
0390 
0391 /* ========================================================================= */
0392 
0393 /* Field 8 of Unicode Character Database: Numeric value.  */
0394 
0395 /* Return the numeric value of a Unicode character.  */
0396 typedef struct
0397 {
0398   int numerator;
0399   int denominator;
0400 }
0401 uc_fraction_t;
0402 extern uc_fraction_t
0403        uc_numeric_value (ucs4_t uc)
0404        _UC_ATTRIBUTE_CONST;
0405 
0406 /* ========================================================================= */
0407 
0408 /* Field 9 of Unicode Character Database: Mirrored.  */
0409 
0410 /* Return the mirrored character of a Unicode character UC in *PUC.  */
0411 extern bool
0412        uc_mirror_char (ucs4_t uc, ucs4_t *puc);
0413 
0414 /* ========================================================================= */
0415 
0416 /* Field 10 of Unicode Character Database: Unicode 1.0 Name.
0417    Not available in this library.  */
0418 
0419 /* ========================================================================= */
0420 
0421 /* Field 11 of Unicode Character Database: ISO 10646 comment.
0422    Not available in this library.  */
0423 
0424 /* ========================================================================= */
0425 
0426 /* Field 12, 13, 14 of Unicode Character Database: Uppercase mapping,
0427    lowercase mapping, titlecase mapping.  See "unicase.h".  */
0428 
0429 /* ========================================================================= */
0430 
0431 /* Field 2 of the file ArabicShaping.txt in the Unicode Character Database.  */
0432 
0433 /* Possible joining types.  */
0434 enum
0435 {
0436   UC_JOINING_TYPE_U, /* Non_Joining */
0437   UC_JOINING_TYPE_T, /* Transparent */
0438   UC_JOINING_TYPE_C, /* Join_Causing */
0439   UC_JOINING_TYPE_L, /* Left_Joining */
0440   UC_JOINING_TYPE_R, /* Right_Joining */
0441   UC_JOINING_TYPE_D  /* Dual_Joining */
0442 };
0443 
0444 /* Return the name of a joining type.  */
0445 extern const char *
0446        uc_joining_type_name (int joining_type)
0447        _UC_ATTRIBUTE_CONST;
0448 
0449 /* Return the long name of a joining type.  */
0450 extern const char *
0451        uc_joining_type_long_name (int joining_type)
0452        _UC_ATTRIBUTE_CONST;
0453 
0454 /* Return the joining type given by name, e.g. "D", or by long name, e.g.
0455    "Dual Joining".  */
0456 extern int
0457        uc_joining_type_byname (const char *joining_type_name)
0458        _UC_ATTRIBUTE_PURE;
0459 
0460 /* Return the joining type of a Unicode character.  */
0461 extern int
0462        uc_joining_type (ucs4_t uc)
0463        _UC_ATTRIBUTE_CONST;
0464 
0465 /* ========================================================================= */
0466 
0467 /* Field 3 of the file ArabicShaping.txt in the Unicode Character Database.  */
0468 
0469 /* Possible joining groups.
0470    This enumeration may be extended in the future.  */
0471 enum
0472 {
0473   UC_JOINING_GROUP_NONE,                     /* No_Joining_Group */
0474   UC_JOINING_GROUP_AIN,                      /* Ain */
0475   UC_JOINING_GROUP_ALAPH,                    /* Alaph */
0476   UC_JOINING_GROUP_ALEF,                     /* Alef */
0477   UC_JOINING_GROUP_BEH,                      /* Beh */
0478   UC_JOINING_GROUP_BETH,                     /* Beth */
0479   UC_JOINING_GROUP_BURUSHASKI_YEH_BARREE,    /* Burushaski_Yeh_Barree */
0480   UC_JOINING_GROUP_DAL,                      /* Dal */
0481   UC_JOINING_GROUP_DALATH_RISH,              /* Dalath_Rish */
0482   UC_JOINING_GROUP_E,                        /* E */
0483   UC_JOINING_GROUP_FARSI_YEH,                /* Farsi_Yeh */
0484   UC_JOINING_GROUP_FE,                       /* Fe */
0485   UC_JOINING_GROUP_FEH,                      /* Feh */
0486   UC_JOINING_GROUP_FINAL_SEMKATH,            /* Final_Semkath */
0487   UC_JOINING_GROUP_GAF,                      /* Gaf */
0488   UC_JOINING_GROUP_GAMAL,                    /* Gamal */
0489   UC_JOINING_GROUP_HAH,                      /* Hah */
0490   UC_JOINING_GROUP_HE,                       /* He */
0491   UC_JOINING_GROUP_HEH,                      /* Heh */
0492   UC_JOINING_GROUP_HEH_GOAL,                 /* Heh_Goal */
0493   UC_JOINING_GROUP_HETH,                     /* Heth */
0494   UC_JOINING_GROUP_KAF,                      /* Kaf */
0495   UC_JOINING_GROUP_KAPH,                     /* Kaph */
0496   UC_JOINING_GROUP_KHAPH,                    /* Khaph */
0497   UC_JOINING_GROUP_KNOTTED_HEH,              /* Knotted_Heh */
0498   UC_JOINING_GROUP_LAM,                      /* Lam */
0499   UC_JOINING_GROUP_LAMADH,                   /* Lamadh */
0500   UC_JOINING_GROUP_MEEM,                     /* Meem */
0501   UC_JOINING_GROUP_MIM,                      /* Mim */
0502   UC_JOINING_GROUP_NOON,                     /* Noon */
0503   UC_JOINING_GROUP_NUN,                      /* Nun */
0504   UC_JOINING_GROUP_NYA,                      /* Nya */
0505   UC_JOINING_GROUP_PE,                       /* Pe */
0506   UC_JOINING_GROUP_QAF,                      /* Qaf */
0507   UC_JOINING_GROUP_QAPH,                     /* Qaph */
0508   UC_JOINING_GROUP_REH,                      /* Reh */
0509   UC_JOINING_GROUP_REVERSED_PE,              /* Reversed_Pe */
0510   UC_JOINING_GROUP_SAD,                      /* Sad */
0511   UC_JOINING_GROUP_SADHE,                    /* Sadhe */
0512   UC_JOINING_GROUP_SEEN,                     /* Seen */
0513   UC_JOINING_GROUP_SEMKATH,                  /* Semkath */
0514   UC_JOINING_GROUP_SHIN,                     /* Shin */
0515   UC_JOINING_GROUP_SWASH_KAF,                /* Swash_Kaf */
0516   UC_JOINING_GROUP_SYRIAC_WAW,               /* Syriac_Waw */
0517   UC_JOINING_GROUP_TAH,                      /* Tah */
0518   UC_JOINING_GROUP_TAW,                      /* Taw */
0519   UC_JOINING_GROUP_TEH_MARBUTA,              /* Teh_Marbuta */
0520   UC_JOINING_GROUP_TEH_MARBUTA_GOAL,         /* Teh_Marbuta_Goal */
0521   UC_JOINING_GROUP_TETH,                     /* Teth */
0522   UC_JOINING_GROUP_WAW,                      /* Waw */
0523   UC_JOINING_GROUP_YEH,                      /* Yeh */
0524   UC_JOINING_GROUP_YEH_BARREE,               /* Yeh_Barree */
0525   UC_JOINING_GROUP_YEH_WITH_TAIL,            /* Yeh_With_Tail */
0526   UC_JOINING_GROUP_YUDH,                     /* Yudh */
0527   UC_JOINING_GROUP_YUDH_HE,                  /* Yudh_He */
0528   UC_JOINING_GROUP_ZAIN,                     /* Zain */
0529   UC_JOINING_GROUP_ZHAIN,                    /* Zhain */
0530   UC_JOINING_GROUP_ROHINGYA_YEH,             /* Rohingya_Yeh */
0531   UC_JOINING_GROUP_STRAIGHT_WAW,             /* Straight_Waw */
0532   UC_JOINING_GROUP_MANICHAEAN_ALEPH,         /* Manichaean_Aleph */
0533   UC_JOINING_GROUP_MANICHAEAN_BETH,          /* Manichaean_Beth */
0534   UC_JOINING_GROUP_MANICHAEAN_GIMEL,         /* Manichaean_Gimel */
0535   UC_JOINING_GROUP_MANICHAEAN_DALETH,        /* Manichaean_Daleth */
0536   UC_JOINING_GROUP_MANICHAEAN_WAW,           /* Manichaean_Waw */
0537   UC_JOINING_GROUP_MANICHAEAN_ZAYIN,         /* Manichaean_Zayin */
0538   UC_JOINING_GROUP_MANICHAEAN_HETH,          /* Manichaean_Heth */
0539   UC_JOINING_GROUP_MANICHAEAN_TETH,          /* Manichaean_Teth */
0540   UC_JOINING_GROUP_MANICHAEAN_YODH,          /* Manichaean_Yodh */
0541   UC_JOINING_GROUP_MANICHAEAN_KAPH,          /* Manichaean_Kaph */
0542   UC_JOINING_GROUP_MANICHAEAN_LAMEDH,        /* Manichaean_Lamedh */
0543   UC_JOINING_GROUP_MANICHAEAN_DHAMEDH,       /* Manichaean_Dhamedh */
0544   UC_JOINING_GROUP_MANICHAEAN_THAMEDH,       /* Manichaean_Thamedh */
0545   UC_JOINING_GROUP_MANICHAEAN_MEM,           /* Manichaean_Mem */
0546   UC_JOINING_GROUP_MANICHAEAN_NUN,           /* Manichaean_Nun */
0547   UC_JOINING_GROUP_MANICHAEAN_SAMEKH,        /* Manichaean_Aleph */
0548   UC_JOINING_GROUP_MANICHAEAN_AYIN,          /* Manichaean_Ayin */
0549   UC_JOINING_GROUP_MANICHAEAN_PE,            /* Manichaean_Pe */
0550   UC_JOINING_GROUP_MANICHAEAN_SADHE,         /* Manichaean_Sadhe */
0551   UC_JOINING_GROUP_MANICHAEAN_QOPH,          /* Manichaean_Qoph */
0552   UC_JOINING_GROUP_MANICHAEAN_RESH,          /* Manichaean_Resh */
0553   UC_JOINING_GROUP_MANICHAEAN_TAW,           /* Manichaean_Taw */
0554   UC_JOINING_GROUP_MANICHAEAN_ONE,           /* Manichaean_One */
0555   UC_JOINING_GROUP_MANICHAEAN_FIVE,          /* Manichaean_Five */
0556   UC_JOINING_GROUP_MANICHAEAN_TEN,           /* Manichaean_Ten */
0557   UC_JOINING_GROUP_MANICHAEAN_TWENTY,        /* Manichaean_Twenty */
0558   UC_JOINING_GROUP_MANICHAEAN_HUNDRED,       /* Manichaean_Hundred */
0559   UC_JOINING_GROUP_AFRICAN_FEH,              /* African_Feh */
0560   UC_JOINING_GROUP_AFRICAN_QAF,              /* African_Qaf */
0561   UC_JOINING_GROUP_AFRICAN_NOON,             /* African_Noon */
0562   UC_JOINING_GROUP_MALAYALAM_NGA,            /* Malayalam_Nga */
0563   UC_JOINING_GROUP_MALAYALAM_JA,             /* Malayalam_Ja */
0564   UC_JOINING_GROUP_MALAYALAM_NYA,            /* Malayalam_Nya */
0565   UC_JOINING_GROUP_MALAYALAM_TTA,            /* Malayalam_Tta */
0566   UC_JOINING_GROUP_MALAYALAM_NNA,            /* Malayalam_Nna */
0567   UC_JOINING_GROUP_MALAYALAM_NNNA,           /* Malayalam_Nnna */
0568   UC_JOINING_GROUP_MALAYALAM_BHA,            /* Malayalam_Bha */
0569   UC_JOINING_GROUP_MALAYALAM_RA,             /* Malayalam_Ra */
0570   UC_JOINING_GROUP_MALAYALAM_LLA,            /* Malayalam_Lla */
0571   UC_JOINING_GROUP_MALAYALAM_LLLA,           /* Malayalam_Llla */
0572   UC_JOINING_GROUP_MALAYALAM_SSA,            /* Malayalam_Ssa */
0573   UC_JOINING_GROUP_HANIFI_ROHINGYA_PA,       /* Hanifi_Rohingya_Pa */
0574   UC_JOINING_GROUP_HANIFI_ROHINGYA_KINNA_YA, /* Hanifi_Rohingya_Kinna_Ya */
0575   UC_JOINING_GROUP_THIN_YEH,                 /* Thin_Yeh */
0576   UC_JOINING_GROUP_VERTICAL_TAIL             /* Vertical_Tail */
0577 };
0578 
0579 /* Return the name of a joining group.  */
0580 extern const char *
0581        uc_joining_group_name (int joining_group)
0582        _UC_ATTRIBUTE_CONST;
0583 
0584 /* Return the joining group given by name, e.g. "Teh_Marbuta".  */
0585 extern int
0586        uc_joining_group_byname (const char *joining_group_name)
0587        _UC_ATTRIBUTE_PURE;
0588 
0589 /* Return the joining group of a Unicode character.  */
0590 extern int
0591        uc_joining_group (ucs4_t uc)
0592        _UC_ATTRIBUTE_CONST;
0593 
0594 /* ========================================================================= */
0595 
0596 /* Common API for properties.  */
0597 
0598 /* Data type denoting a property.  This is not just a number, but rather a
0599    pointer to the test functions, so that programs that use only few of the
0600    properties don't have a link-time dependency towards all the tables.  */
0601 typedef struct
0602 {
0603   bool (*test_fn) (ucs4_t uc);
0604 }
0605 uc_property_t;
0606 
0607 /* Predefined properties.  */
0608 /* General.  */
0609 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_WHITE_SPACE;
0610 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_ALPHABETIC;
0611 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_OTHER_ALPHABETIC;
0612 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_NOT_A_CHARACTER;
0613 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_DEFAULT_IGNORABLE_CODE_POINT;
0614 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_OTHER_DEFAULT_IGNORABLE_CODE_POINT;
0615 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_DEPRECATED;
0616 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_LOGICAL_ORDER_EXCEPTION;
0617 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_VARIATION_SELECTOR;
0618 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_PRIVATE_USE;
0619 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_UNASSIGNED_CODE_VALUE;
0620 /* Case.  */
0621 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_UPPERCASE;
0622 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_OTHER_UPPERCASE;
0623 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_LOWERCASE;
0624 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_OTHER_LOWERCASE;
0625 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_TITLECASE;
0626 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_CASED;
0627 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_CASE_IGNORABLE;
0628 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_CHANGES_WHEN_LOWERCASED;
0629 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_CHANGES_WHEN_UPPERCASED;
0630 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_CHANGES_WHEN_TITLECASED;
0631 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_CHANGES_WHEN_CASEFOLDED;
0632 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_CHANGES_WHEN_CASEMAPPED;
0633 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_SOFT_DOTTED;
0634 /* Identifiers.  */
0635 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_ID_START;
0636 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_OTHER_ID_START;
0637 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_ID_CONTINUE;
0638 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_OTHER_ID_CONTINUE;
0639 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_XID_START;
0640 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_XID_CONTINUE;
0641 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_PATTERN_WHITE_SPACE;
0642 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_PATTERN_SYNTAX;
0643 /* Shaping and rendering.  */
0644 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_JOIN_CONTROL;
0645 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_GRAPHEME_BASE;
0646 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_GRAPHEME_EXTEND;
0647 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_OTHER_GRAPHEME_EXTEND;
0648 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_GRAPHEME_LINK;
0649 /* Bidi.  */
0650 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_CONTROL;
0651 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_LEFT_TO_RIGHT;
0652 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_HEBREW_RIGHT_TO_LEFT;
0653 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_ARABIC_RIGHT_TO_LEFT;
0654 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_EUROPEAN_DIGIT;
0655 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_EUR_NUM_SEPARATOR;
0656 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_EUR_NUM_TERMINATOR;
0657 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_ARABIC_DIGIT;
0658 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_COMMON_SEPARATOR;
0659 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_BLOCK_SEPARATOR;
0660 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_SEGMENT_SEPARATOR;
0661 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_WHITESPACE;
0662 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_NON_SPACING_MARK;
0663 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_BOUNDARY_NEUTRAL;
0664 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_PDF;
0665 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_EMBEDDING_OR_OVERRIDE;
0666 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_OTHER_NEUTRAL;
0667 /* Numeric.  */
0668 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_HEX_DIGIT;
0669 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_ASCII_HEX_DIGIT;
0670 /* CJK.  */
0671 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_IDEOGRAPHIC;
0672 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_UNIFIED_IDEOGRAPH;
0673 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_RADICAL;
0674 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_IDS_BINARY_OPERATOR;
0675 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_IDS_TRINARY_OPERATOR;
0676 /* Emoji.  */
0677 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_EMOJI;
0678 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_EMOJI_PRESENTATION;
0679 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_EMOJI_MODIFIER;
0680 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_EMOJI_MODIFIER_BASE;
0681 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_EMOJI_COMPONENT;
0682 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_EXTENDED_PICTOGRAPHIC;
0683 /* Misc.  */
0684 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_ZERO_WIDTH;
0685 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_SPACE;
0686 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_NON_BREAK;
0687 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_ISO_CONTROL;
0688 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_FORMAT_CONTROL;
0689 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_DASH;
0690 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_HYPHEN;
0691 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_PUNCTUATION;
0692 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_LINE_SEPARATOR;
0693 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_PARAGRAPH_SEPARATOR;
0694 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_QUOTATION_MARK;
0695 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_SENTENCE_TERMINAL;
0696 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_TERMINAL_PUNCTUATION;
0697 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_CURRENCY_SYMBOL;
0698 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_MATH;
0699 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_OTHER_MATH;
0700 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_PAIRED_PUNCTUATION;
0701 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_LEFT_OF_PAIR;
0702 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_COMBINING;
0703 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_COMPOSITE;
0704 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_DECIMAL_DIGIT;
0705 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_NUMERIC;
0706 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_DIACRITIC;
0707 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_EXTENDER;
0708 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_IGNORABLE_CONTROL;
0709 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_REGIONAL_INDICATOR;
0710 
0711 /* Return the property given by name, e.g. "White space".  */
0712 extern uc_property_t
0713        uc_property_byname (const char *property_name);
0714 
0715 /* Test whether a property is valid.  */
0716 #define uc_property_is_valid(property) ((property).test_fn != NULL)
0717 
0718 /* Test whether a Unicode character has a given property.  */
0719 extern bool
0720        uc_is_property (ucs4_t uc, uc_property_t property);
0721 extern bool uc_is_property_white_space (ucs4_t uc)
0722        _UC_ATTRIBUTE_CONST;
0723 extern bool uc_is_property_alphabetic (ucs4_t uc)
0724        _UC_ATTRIBUTE_CONST;
0725 extern bool uc_is_property_other_alphabetic (ucs4_t uc)
0726        _UC_ATTRIBUTE_CONST;
0727 extern bool uc_is_property_not_a_character (ucs4_t uc)
0728        _UC_ATTRIBUTE_CONST;
0729 extern bool uc_is_property_default_ignorable_code_point (ucs4_t uc)
0730        _UC_ATTRIBUTE_CONST;
0731 extern bool uc_is_property_other_default_ignorable_code_point (ucs4_t uc)
0732        _UC_ATTRIBUTE_CONST;
0733 extern bool uc_is_property_deprecated (ucs4_t uc)
0734        _UC_ATTRIBUTE_CONST;
0735 extern bool uc_is_property_logical_order_exception (ucs4_t uc)
0736        _UC_ATTRIBUTE_CONST;
0737 extern bool uc_is_property_variation_selector (ucs4_t uc)
0738        _UC_ATTRIBUTE_CONST;
0739 extern bool uc_is_property_private_use (ucs4_t uc)
0740        _UC_ATTRIBUTE_CONST;
0741 extern bool uc_is_property_unassigned_code_value (ucs4_t uc)
0742        _UC_ATTRIBUTE_CONST;
0743 extern bool uc_is_property_uppercase (ucs4_t uc)
0744        _UC_ATTRIBUTE_CONST;
0745 extern bool uc_is_property_other_uppercase (ucs4_t uc)
0746        _UC_ATTRIBUTE_CONST;
0747 extern bool uc_is_property_lowercase (ucs4_t uc)
0748        _UC_ATTRIBUTE_CONST;
0749 extern bool uc_is_property_other_lowercase (ucs4_t uc)
0750        _UC_ATTRIBUTE_CONST;
0751 extern bool uc_is_property_titlecase (ucs4_t uc)
0752        _UC_ATTRIBUTE_CONST;
0753 extern bool uc_is_property_cased (ucs4_t uc)
0754        _UC_ATTRIBUTE_CONST;
0755 extern bool uc_is_property_case_ignorable (ucs4_t uc)
0756        _UC_ATTRIBUTE_CONST;
0757 extern bool uc_is_property_changes_when_lowercased (ucs4_t uc)
0758        _UC_ATTRIBUTE_CONST;
0759 extern bool uc_is_property_changes_when_uppercased (ucs4_t uc)
0760        _UC_ATTRIBUTE_CONST;
0761 extern bool uc_is_property_changes_when_titlecased (ucs4_t uc)
0762        _UC_ATTRIBUTE_CONST;
0763 extern bool uc_is_property_changes_when_casefolded (ucs4_t uc)
0764        _UC_ATTRIBUTE_CONST;
0765 extern bool uc_is_property_changes_when_casemapped (ucs4_t uc)
0766        _UC_ATTRIBUTE_CONST;
0767 extern bool uc_is_property_soft_dotted (ucs4_t uc)
0768        _UC_ATTRIBUTE_CONST;
0769 extern bool uc_is_property_id_start (ucs4_t uc)
0770        _UC_ATTRIBUTE_CONST;
0771 extern bool uc_is_property_other_id_start (ucs4_t uc)
0772        _UC_ATTRIBUTE_CONST;
0773 extern bool uc_is_property_id_continue (ucs4_t uc)
0774        _UC_ATTRIBUTE_CONST;
0775 extern bool uc_is_property_other_id_continue (ucs4_t uc)
0776        _UC_ATTRIBUTE_CONST;
0777 extern bool uc_is_property_xid_start (ucs4_t uc)
0778        _UC_ATTRIBUTE_CONST;
0779 extern bool uc_is_property_xid_continue (ucs4_t uc)
0780        _UC_ATTRIBUTE_CONST;
0781 extern bool uc_is_property_pattern_white_space (ucs4_t uc)
0782        _UC_ATTRIBUTE_CONST;
0783 extern bool uc_is_property_pattern_syntax (ucs4_t uc)
0784        _UC_ATTRIBUTE_CONST;
0785 extern bool uc_is_property_join_control (ucs4_t uc)
0786        _UC_ATTRIBUTE_CONST;
0787 extern bool uc_is_property_grapheme_base (ucs4_t uc)
0788        _UC_ATTRIBUTE_CONST;
0789 extern bool uc_is_property_grapheme_extend (ucs4_t uc)
0790        _UC_ATTRIBUTE_CONST;
0791 extern bool uc_is_property_other_grapheme_extend (ucs4_t uc)
0792        _UC_ATTRIBUTE_CONST;
0793 extern bool uc_is_property_grapheme_link (ucs4_t uc)
0794        _UC_ATTRIBUTE_CONST;
0795 extern bool uc_is_property_bidi_control (ucs4_t uc)
0796        _UC_ATTRIBUTE_CONST;
0797 extern bool uc_is_property_bidi_left_to_right (ucs4_t uc)
0798        _UC_ATTRIBUTE_CONST;
0799 extern bool uc_is_property_bidi_hebrew_right_to_left (ucs4_t uc)
0800        _UC_ATTRIBUTE_CONST;
0801 extern bool uc_is_property_bidi_arabic_right_to_left (ucs4_t uc)
0802        _UC_ATTRIBUTE_CONST;
0803 extern bool uc_is_property_bidi_european_digit (ucs4_t uc)
0804        _UC_ATTRIBUTE_CONST;
0805 extern bool uc_is_property_bidi_eur_num_separator (ucs4_t uc)
0806        _UC_ATTRIBUTE_CONST;
0807 extern bool uc_is_property_bidi_eur_num_terminator (ucs4_t uc)
0808        _UC_ATTRIBUTE_CONST;
0809 extern bool uc_is_property_bidi_arabic_digit (ucs4_t uc)
0810        _UC_ATTRIBUTE_CONST;
0811 extern bool uc_is_property_bidi_common_separator (ucs4_t uc)
0812        _UC_ATTRIBUTE_CONST;
0813 extern bool uc_is_property_bidi_block_separator (ucs4_t uc)
0814        _UC_ATTRIBUTE_CONST;
0815 extern bool uc_is_property_bidi_segment_separator (ucs4_t uc)
0816        _UC_ATTRIBUTE_CONST;
0817 extern bool uc_is_property_bidi_whitespace (ucs4_t uc)
0818        _UC_ATTRIBUTE_CONST;
0819 extern bool uc_is_property_bidi_non_spacing_mark (ucs4_t uc)
0820        _UC_ATTRIBUTE_CONST;
0821 extern bool uc_is_property_bidi_boundary_neutral (ucs4_t uc)
0822        _UC_ATTRIBUTE_CONST;
0823 extern bool uc_is_property_bidi_pdf (ucs4_t uc)
0824        _UC_ATTRIBUTE_CONST;
0825 extern bool uc_is_property_bidi_embedding_or_override (ucs4_t uc)
0826        _UC_ATTRIBUTE_CONST;
0827 extern bool uc_is_property_bidi_other_neutral (ucs4_t uc)
0828        _UC_ATTRIBUTE_CONST;
0829 extern bool uc_is_property_hex_digit (ucs4_t uc)
0830        _UC_ATTRIBUTE_CONST;
0831 extern bool uc_is_property_ascii_hex_digit (ucs4_t uc)
0832        _UC_ATTRIBUTE_CONST;
0833 extern bool uc_is_property_ideographic (ucs4_t uc)
0834        _UC_ATTRIBUTE_CONST;
0835 extern bool uc_is_property_unified_ideograph (ucs4_t uc)
0836        _UC_ATTRIBUTE_CONST;
0837 extern bool uc_is_property_radical (ucs4_t uc)
0838        _UC_ATTRIBUTE_CONST;
0839 extern bool uc_is_property_ids_binary_operator (ucs4_t uc)
0840        _UC_ATTRIBUTE_CONST;
0841 extern bool uc_is_property_ids_trinary_operator (ucs4_t uc)
0842        _UC_ATTRIBUTE_CONST;
0843 extern bool uc_is_property_emoji (ucs4_t uc)
0844        _UC_ATTRIBUTE_CONST;
0845 extern bool uc_is_property_emoji_presentation (ucs4_t uc)
0846        _UC_ATTRIBUTE_CONST;
0847 extern bool uc_is_property_emoji_modifier (ucs4_t uc)
0848        _UC_ATTRIBUTE_CONST;
0849 extern bool uc_is_property_emoji_modifier_base (ucs4_t uc)
0850        _UC_ATTRIBUTE_CONST;
0851 extern bool uc_is_property_emoji_component (ucs4_t uc)
0852        _UC_ATTRIBUTE_CONST;
0853 extern bool uc_is_property_extended_pictographic (ucs4_t uc)
0854        _UC_ATTRIBUTE_CONST;
0855 extern bool uc_is_property_zero_width (ucs4_t uc)
0856        _UC_ATTRIBUTE_CONST;
0857 extern bool uc_is_property_space (ucs4_t uc)
0858        _UC_ATTRIBUTE_CONST;
0859 extern bool uc_is_property_non_break (ucs4_t uc)
0860        _UC_ATTRIBUTE_CONST;
0861 extern bool uc_is_property_iso_control (ucs4_t uc)
0862        _UC_ATTRIBUTE_CONST;
0863 extern bool uc_is_property_format_control (ucs4_t uc)
0864        _UC_ATTRIBUTE_CONST;
0865 extern bool uc_is_property_dash (ucs4_t uc)
0866        _UC_ATTRIBUTE_CONST;
0867 extern bool uc_is_property_hyphen (ucs4_t uc)
0868        _UC_ATTRIBUTE_CONST;
0869 extern bool uc_is_property_punctuation (ucs4_t uc)
0870        _UC_ATTRIBUTE_CONST;
0871 extern bool uc_is_property_line_separator (ucs4_t uc)
0872        _UC_ATTRIBUTE_CONST;
0873 extern bool uc_is_property_paragraph_separator (ucs4_t uc)
0874        _UC_ATTRIBUTE_CONST;
0875 extern bool uc_is_property_quotation_mark (ucs4_t uc)
0876        _UC_ATTRIBUTE_CONST;
0877 extern bool uc_is_property_sentence_terminal (ucs4_t uc)
0878        _UC_ATTRIBUTE_CONST;
0879 extern bool uc_is_property_terminal_punctuation (ucs4_t uc)
0880        _UC_ATTRIBUTE_CONST;
0881 extern bool uc_is_property_currency_symbol (ucs4_t uc)
0882        _UC_ATTRIBUTE_CONST;
0883 extern bool uc_is_property_math (ucs4_t uc)
0884        _UC_ATTRIBUTE_CONST;
0885 extern bool uc_is_property_other_math (ucs4_t uc)
0886        _UC_ATTRIBUTE_CONST;
0887 extern bool uc_is_property_paired_punctuation (ucs4_t uc)
0888        _UC_ATTRIBUTE_CONST;
0889 extern bool uc_is_property_left_of_pair (ucs4_t uc)
0890        _UC_ATTRIBUTE_CONST;
0891 extern bool uc_is_property_combining (ucs4_t uc)
0892        _UC_ATTRIBUTE_CONST;
0893 extern bool uc_is_property_composite (ucs4_t uc)
0894        _UC_ATTRIBUTE_CONST;
0895 extern bool uc_is_property_decimal_digit (ucs4_t uc)
0896        _UC_ATTRIBUTE_CONST;
0897 extern bool uc_is_property_numeric (ucs4_t uc)
0898        _UC_ATTRIBUTE_CONST;
0899 extern bool uc_is_property_diacritic (ucs4_t uc)
0900        _UC_ATTRIBUTE_CONST;
0901 extern bool uc_is_property_extender (ucs4_t uc)
0902        _UC_ATTRIBUTE_CONST;
0903 extern bool uc_is_property_ignorable_control (ucs4_t uc)
0904        _UC_ATTRIBUTE_CONST;
0905 extern bool uc_is_property_regional_indicator (ucs4_t uc)
0906        _UC_ATTRIBUTE_CONST;
0907 
0908 /* ========================================================================= */
0909 
0910 /* Subdivision of the Unicode characters into scripts.  */
0911 
0912 typedef struct
0913 {
0914   unsigned int code : 21;
0915   unsigned int start : 1;
0916   unsigned int end : 1;
0917 }
0918 uc_interval_t;
0919 typedef struct
0920 {
0921   unsigned int nintervals;
0922   const uc_interval_t *intervals;
0923   const char *name;
0924 }
0925 uc_script_t;
0926 
0927 /* Return the script of a Unicode character.  */
0928 extern const uc_script_t *
0929        uc_script (ucs4_t uc)
0930        _UC_ATTRIBUTE_CONST;
0931 
0932 /* Return the script given by name, e.g. "HAN".  */
0933 extern const uc_script_t *
0934        uc_script_byname (const char *script_name)
0935        _UC_ATTRIBUTE_PURE;
0936 
0937 /* Test whether a Unicode character belongs to a given script.  */
0938 extern bool
0939        uc_is_script (ucs4_t uc, const uc_script_t *script)
0940        _UC_ATTRIBUTE_PURE;
0941 
0942 /* Get the list of all scripts.  */
0943 extern void
0944        uc_all_scripts (const uc_script_t **scripts, size_t *count);
0945 
0946 /* ========================================================================= */
0947 
0948 /* Subdivision of the Unicode character range into blocks.  */
0949 
0950 typedef struct
0951 {
0952   ucs4_t start;
0953   ucs4_t end;
0954   const char *name;
0955 }
0956 uc_block_t;
0957 
0958 /* Return the block a character belongs to.  */
0959 extern const uc_block_t *
0960        uc_block (ucs4_t uc)
0961        _UC_ATTRIBUTE_CONST;
0962 
0963 /* Test whether a Unicode character belongs to a given block.  */
0964 extern bool
0965        uc_is_block (ucs4_t uc, const uc_block_t *block)
0966        _UC_ATTRIBUTE_PURE;
0967 
0968 /* Get the list of all blocks.  */
0969 extern void
0970        uc_all_blocks (const uc_block_t **blocks, size_t *count);
0971 
0972 /* ========================================================================= */
0973 
0974 /* Properties taken from language standards.  */
0975 
0976 /* Test whether a Unicode character is considered whitespace in ISO C 99.  */
0977 extern bool
0978        uc_is_c_whitespace (ucs4_t uc)
0979        _UC_ATTRIBUTE_CONST;
0980 
0981 /* Test whether a Unicode character is considered whitespace in Java.  */
0982 extern bool
0983        uc_is_java_whitespace (ucs4_t uc)
0984        _UC_ATTRIBUTE_CONST;
0985 
0986 enum
0987 {
0988   UC_IDENTIFIER_START,    /* valid as first or subsequent character */
0989   UC_IDENTIFIER_VALID,    /* valid as subsequent character only */
0990   UC_IDENTIFIER_INVALID,  /* not valid */
0991   UC_IDENTIFIER_IGNORABLE /* ignorable (Java only) */
0992 };
0993 
0994 /* Return the categorization of a Unicode character w.r.t. the ISO C 99
0995    identifier syntax.  */
0996 extern int
0997        uc_c_ident_category (ucs4_t uc)
0998        _UC_ATTRIBUTE_CONST;
0999 
1000 /* Return the categorization of a Unicode character w.r.t. the Java
1001    identifier syntax.  */
1002 extern int
1003        uc_java_ident_category (ucs4_t uc)
1004        _UC_ATTRIBUTE_CONST;
1005 
1006 /* ========================================================================= */
1007 
1008 /* Like ISO C <ctype.h> and <wctype.h>.  These functions are deprecated,
1009    because this set of functions was designed with ASCII in mind and cannot
1010    reflect the more diverse reality of the Unicode character set.  But they
1011    can be a quick-and-dirty porting aid when migrating from wchar_t APIs
1012    to Unicode strings.  */
1013 
1014 /* Test for any character for which 'uc_is_alpha' or 'uc_is_digit' is true.  */
1015 extern bool
1016        uc_is_alnum (ucs4_t uc)
1017        _UC_ATTRIBUTE_CONST;
1018 
1019 /* Test for any character for which 'uc_is_upper' or 'uc_is_lower' is true,
1020    or any character that is one of a locale-specific set of characters for
1021    which none of 'uc_is_cntrl', 'uc_is_digit', 'uc_is_punct', or 'uc_is_space'
1022    is true.  */
1023 extern bool
1024        uc_is_alpha (ucs4_t uc)
1025        _UC_ATTRIBUTE_CONST;
1026 
1027 /* Test for any control character.  */
1028 extern bool
1029        uc_is_cntrl (ucs4_t uc)
1030        _UC_ATTRIBUTE_CONST;
1031 
1032 /* Test for any character that corresponds to a decimal-digit character.  */
1033 extern bool
1034        uc_is_digit (ucs4_t uc)
1035        _UC_ATTRIBUTE_CONST;
1036 
1037 /* Test for any character for which 'uc_is_print' is true and 'uc_is_space'
1038    is false.  */
1039 extern bool
1040        uc_is_graph (ucs4_t uc)
1041        _UC_ATTRIBUTE_CONST;
1042 
1043 /* Test for any character that corresponds to a lowercase letter or is one
1044    of a locale-specific set of characters for which none of 'uc_is_cntrl',
1045    'uc_is_digit', 'uc_is_punct', or 'uc_is_space' is true.  */
1046 extern bool
1047        uc_is_lower (ucs4_t uc)
1048        _UC_ATTRIBUTE_CONST;
1049 
1050 /* Test for any printing character.  */
1051 extern bool
1052        uc_is_print (ucs4_t uc)
1053        _UC_ATTRIBUTE_CONST;
1054 
1055 /* Test for any printing character that is one of a locale-specific set of
1056    characters for which neither 'uc_is_space' nor 'uc_is_alnum' is true.  */
1057 extern bool
1058        uc_is_punct (ucs4_t uc)
1059        _UC_ATTRIBUTE_CONST;
1060 
1061 /* Test for any character that corresponds to a locale-specific set of
1062    characters for which none of 'uc_is_alnum', 'uc_is_graph', or 'uc_is_punct'
1063    is true.  */
1064 extern bool
1065        uc_is_space (ucs4_t uc)
1066        _UC_ATTRIBUTE_CONST;
1067 
1068 /* Test for any character that corresponds to an uppercase letter or is one
1069    of a locale-specific set of character for which none of 'uc_is_cntrl',
1070    'uc_is_digit', 'uc_is_punct', or 'uc_is_space' is true.  */
1071 extern bool
1072        uc_is_upper (ucs4_t uc)
1073        _UC_ATTRIBUTE_CONST;
1074 
1075 /* Test for any character that corresponds to a hexadecimal-digit
1076    character.  */
1077 extern bool
1078        uc_is_xdigit (ucs4_t uc)
1079        _UC_ATTRIBUTE_CONST;
1080 
1081 /* GNU extension. */
1082 /* Test for any character that corresponds to a standard blank character or
1083    a locale-specific set of characters for which 'uc_is_alnum' is false.  */
1084 extern bool
1085        uc_is_blank (ucs4_t uc)
1086        _UC_ATTRIBUTE_CONST;
1087 
1088 /* ========================================================================= */
1089 
1090 #ifdef __cplusplus
1091 }
1092 #endif
1093 
1094 #endif /* _UNICTYPE_H */