Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-17 09:56:13

0001 /* DO NOT EDIT! GENERATED AUTOMATICALLY! */
0002 /* Grapheme cluster breaks in Unicode strings.
0003    Copyright (C) 2010-2024 Free Software Foundation, Inc.
0004    Written by Ben Pfaff <blp@cs.stanford.edu>, 2010.
0005 
0006    This file is free software.
0007    It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".
0008    You can redistribute it and/or modify it under either
0009      - the terms of the GNU Lesser General Public License as published
0010        by the Free Software Foundation, either version 3, or (at your
0011        option) any later version, or
0012      - the terms of the GNU General Public License as published by the
0013        Free Software Foundation; either version 2, or (at your option)
0014        any later version, or
0015      - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".
0016 
0017    This file is distributed in the hope that it will be useful,
0018    but WITHOUT ANY WARRANTY; without even the implied warranty of
0019    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
0020    Lesser General Public License and the GNU General Public License
0021    for more details.
0022 
0023    You should have received a copy of the GNU Lesser General Public
0024    License and of the GNU General Public License along with this
0025    program.  If not, see <https://www.gnu.org/licenses/>.  */
0026 
0027 #ifndef _UNIGBRK_H
0028 #define _UNIGBRK_H
0029 
0030 /* Get bool.  */
0031 #include <unistring/stdbool.h>
0032 
0033 /* Get size_t. */
0034 #include <stddef.h>
0035 
0036 #include "unitypes.h"
0037 
0038 #ifdef __cplusplus
0039 extern "C" {
0040 #endif
0041 
0042 /* ========================================================================= */
0043 
0044 /* Property defined in Unicode Standard Annex #29, section "Grapheme Cluster
0045    Boundaries"
0046    <https://unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries> */
0047 
0048 /* Possible values of the Grapheme_Cluster_Break property.
0049    This enumeration may be extended in the future.  */
0050 enum
0051 {
0052   GBP_OTHER        = 0,
0053   GBP_CR           = 1,
0054   GBP_LF           = 2,
0055   GBP_CONTROL      = 3,
0056   GBP_EXTEND       = 4,
0057   GBP_PREPEND      = 5,
0058   GBP_SPACINGMARK  = 6,
0059   GBP_L            = 7,
0060   GBP_V            = 8,
0061   GBP_T            = 9,
0062   GBP_LV           = 10,
0063   GBP_LVT          = 11,
0064   GBP_RI           = 12,
0065   GBP_ZWJ          = 13,
0066   GBP_EB           = 14, /* obsolete */
0067   GBP_EM           = 15, /* obsolete */
0068   GBP_GAZ          = 16, /* obsolete */
0069   GBP_EBG          = 17  /* obsolete */
0070 };
0071 
0072 /* Return the Grapheme_Cluster_Break property of a Unicode character. */
0073 extern int
0074        uc_graphemeclusterbreak_property (ucs4_t uc)
0075        _UC_ATTRIBUTE_CONST;
0076 
0077 /* ========================================================================= */
0078 
0079 /* Grapheme cluster breaks.  */
0080 
0081 /* Returns true if there is a grapheme cluster boundary between Unicode code
0082    points A and B.  A "grapheme cluster" is an approximation to a
0083    user-perceived character, which sometimes corresponds to multiple code
0084    points.  For example, an English letter followed by an acute accent can be
0085    expressed as two consecutive Unicode code points, but it is perceived by the
0086    user as only a single character and therefore constitutes a single grapheme
0087    cluster.
0088 
0089    Implements extended (not legacy) grapheme cluster rules, because UAX #29
0090    indicates that they are preferred.
0091 
0092    Note: This function does not work right with syllables in Indic scripts or
0093    emojis, because it does not look at the characters before A and after B.
0094 
0095    Use A == 0 or B == 0 to indicate start of text or end of text,
0096    respectively. */
0097 extern bool
0098        uc_is_grapheme_break (ucs4_t a, ucs4_t b)
0099        _UC_ATTRIBUTE_CONST;
0100 
0101 /* Returns the start of the next grapheme cluster following S, or NULL if the
0102    end of the string has been reached.
0103    Note: These functions do not work right with syllables in Indic scripts or
0104    emojis, because they do not consider the characters before S. */
0105 extern const uint8_t *
0106        u8_grapheme_next (const uint8_t *s, const uint8_t *end)
0107        _UC_ATTRIBUTE_PURE;
0108 extern const uint16_t *
0109        u16_grapheme_next (const uint16_t *s, const uint16_t *end)
0110        _UC_ATTRIBUTE_PURE;
0111 extern const uint32_t *
0112        u32_grapheme_next (const uint32_t *s, const uint32_t *end)
0113        _UC_ATTRIBUTE_PURE;
0114 
0115 /* Returns the start of the previous grapheme cluster before S, or NULL if the
0116    start of the string has been reached.
0117    Note: These functions do not work right with syllables in Indic scripts or
0118    emojis, because they do not consider the characters at or after S. */
0119 extern const uint8_t *
0120        u8_grapheme_prev (const uint8_t *s, const uint8_t *start)
0121        _UC_ATTRIBUTE_PURE;
0122 extern const uint16_t *
0123        u16_grapheme_prev (const uint16_t *s, const uint16_t *start)
0124        _UC_ATTRIBUTE_PURE;
0125 extern const uint32_t *
0126        u32_grapheme_prev (const uint32_t *s, const uint32_t *start)
0127        _UC_ATTRIBUTE_PURE;
0128 
0129 /* Determine the grapheme cluster boundaries in S, and store the result at
0130    p[0..n-1].  p[i] = 1 means that a new grapheme cluster begins at s[i].  p[i]
0131    = 0 means that s[i-1] and s[i] are part of the same grapheme cluster.  p[0]
0132    will always be 1.
0133  */
0134 extern void
0135        u8_grapheme_breaks (const uint8_t *s, size_t n, char *p);
0136 extern void
0137        u16_grapheme_breaks (const uint16_t *s, size_t n, char *p);
0138 extern void
0139        u32_grapheme_breaks (const uint32_t *s, size_t n, char *p);
0140 extern void
0141        ulc_grapheme_breaks (const char *s, size_t n, char *p);
0142 extern void
0143        uc_grapheme_breaks (const ucs4_t *s, size_t n, char *p);
0144 
0145 /* ========================================================================= */
0146 
0147 #ifdef __cplusplus
0148 }
0149 #endif
0150 
0151 
0152 #endif /* _UNIGBRK_H */