|
||||
File indexing completed on 2025-01-17 09:56:13
0001 /* DO NOT EDIT! GENERATED AUTOMATICALLY! */ 0002 /* Grapheme cluster breaks in Unicode strings. 0003 Copyright (C) 2010-2024 Free Software Foundation, Inc. 0004 Written by Ben Pfaff <blp@cs.stanford.edu>, 2010. 0005 0006 This file is free software. 0007 It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+". 0008 You can redistribute it and/or modify it under either 0009 - the terms of the GNU Lesser General Public License as published 0010 by the Free Software Foundation, either version 3, or (at your 0011 option) any later version, or 0012 - the terms of the GNU General Public License as published by the 0013 Free Software Foundation; either version 2, or (at your option) 0014 any later version, or 0015 - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+". 0016 0017 This file is distributed in the hope that it will be useful, 0018 but WITHOUT ANY WARRANTY; without even the implied warranty of 0019 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 0020 Lesser General Public License and the GNU General Public License 0021 for more details. 0022 0023 You should have received a copy of the GNU Lesser General Public 0024 License and of the GNU General Public License along with this 0025 program. If not, see <https://www.gnu.org/licenses/>. */ 0026 0027 #ifndef _UNIGBRK_H 0028 #define _UNIGBRK_H 0029 0030 /* Get bool. */ 0031 #include <unistring/stdbool.h> 0032 0033 /* Get size_t. */ 0034 #include <stddef.h> 0035 0036 #include "unitypes.h" 0037 0038 #ifdef __cplusplus 0039 extern "C" { 0040 #endif 0041 0042 /* ========================================================================= */ 0043 0044 /* Property defined in Unicode Standard Annex #29, section "Grapheme Cluster 0045 Boundaries" 0046 <https://unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries> */ 0047 0048 /* Possible values of the Grapheme_Cluster_Break property. 0049 This enumeration may be extended in the future. */ 0050 enum 0051 { 0052 GBP_OTHER = 0, 0053 GBP_CR = 1, 0054 GBP_LF = 2, 0055 GBP_CONTROL = 3, 0056 GBP_EXTEND = 4, 0057 GBP_PREPEND = 5, 0058 GBP_SPACINGMARK = 6, 0059 GBP_L = 7, 0060 GBP_V = 8, 0061 GBP_T = 9, 0062 GBP_LV = 10, 0063 GBP_LVT = 11, 0064 GBP_RI = 12, 0065 GBP_ZWJ = 13, 0066 GBP_EB = 14, /* obsolete */ 0067 GBP_EM = 15, /* obsolete */ 0068 GBP_GAZ = 16, /* obsolete */ 0069 GBP_EBG = 17 /* obsolete */ 0070 }; 0071 0072 /* Return the Grapheme_Cluster_Break property of a Unicode character. */ 0073 extern int 0074 uc_graphemeclusterbreak_property (ucs4_t uc) 0075 _UC_ATTRIBUTE_CONST; 0076 0077 /* ========================================================================= */ 0078 0079 /* Grapheme cluster breaks. */ 0080 0081 /* Returns true if there is a grapheme cluster boundary between Unicode code 0082 points A and B. A "grapheme cluster" is an approximation to a 0083 user-perceived character, which sometimes corresponds to multiple code 0084 points. For example, an English letter followed by an acute accent can be 0085 expressed as two consecutive Unicode code points, but it is perceived by the 0086 user as only a single character and therefore constitutes a single grapheme 0087 cluster. 0088 0089 Implements extended (not legacy) grapheme cluster rules, because UAX #29 0090 indicates that they are preferred. 0091 0092 Note: This function does not work right with syllables in Indic scripts or 0093 emojis, because it does not look at the characters before A and after B. 0094 0095 Use A == 0 or B == 0 to indicate start of text or end of text, 0096 respectively. */ 0097 extern bool 0098 uc_is_grapheme_break (ucs4_t a, ucs4_t b) 0099 _UC_ATTRIBUTE_CONST; 0100 0101 /* Returns the start of the next grapheme cluster following S, or NULL if the 0102 end of the string has been reached. 0103 Note: These functions do not work right with syllables in Indic scripts or 0104 emojis, because they do not consider the characters before S. */ 0105 extern const uint8_t * 0106 u8_grapheme_next (const uint8_t *s, const uint8_t *end) 0107 _UC_ATTRIBUTE_PURE; 0108 extern const uint16_t * 0109 u16_grapheme_next (const uint16_t *s, const uint16_t *end) 0110 _UC_ATTRIBUTE_PURE; 0111 extern const uint32_t * 0112 u32_grapheme_next (const uint32_t *s, const uint32_t *end) 0113 _UC_ATTRIBUTE_PURE; 0114 0115 /* Returns the start of the previous grapheme cluster before S, or NULL if the 0116 start of the string has been reached. 0117 Note: These functions do not work right with syllables in Indic scripts or 0118 emojis, because they do not consider the characters at or after S. */ 0119 extern const uint8_t * 0120 u8_grapheme_prev (const uint8_t *s, const uint8_t *start) 0121 _UC_ATTRIBUTE_PURE; 0122 extern const uint16_t * 0123 u16_grapheme_prev (const uint16_t *s, const uint16_t *start) 0124 _UC_ATTRIBUTE_PURE; 0125 extern const uint32_t * 0126 u32_grapheme_prev (const uint32_t *s, const uint32_t *start) 0127 _UC_ATTRIBUTE_PURE; 0128 0129 /* Determine the grapheme cluster boundaries in S, and store the result at 0130 p[0..n-1]. p[i] = 1 means that a new grapheme cluster begins at s[i]. p[i] 0131 = 0 means that s[i-1] and s[i] are part of the same grapheme cluster. p[0] 0132 will always be 1. 0133 */ 0134 extern void 0135 u8_grapheme_breaks (const uint8_t *s, size_t n, char *p); 0136 extern void 0137 u16_grapheme_breaks (const uint16_t *s, size_t n, char *p); 0138 extern void 0139 u32_grapheme_breaks (const uint32_t *s, size_t n, char *p); 0140 extern void 0141 ulc_grapheme_breaks (const char *s, size_t n, char *p); 0142 extern void 0143 uc_grapheme_breaks (const ucs4_t *s, size_t n, char *p); 0144 0145 /* ========================================================================= */ 0146 0147 #ifdef __cplusplus 0148 } 0149 #endif 0150 0151 0152 #endif /* _UNIGBRK_H */
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |