Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2024-05-18 08:30:28

0001 /* DO NOT EDIT! GENERATED AUTOMATICALLY! */
0002 /* Grapheme cluster breaks in Unicode strings.
0003    Copyright (C) 2010-2022 Free Software Foundation, Inc.
0004    Written by Ben Pfaff <blp@cs.stanford.edu>, 2010.
0005 
0006    This file is free software.
0007    It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".
0008    You can redistribute it and/or modify it under either
0009      - the terms of the GNU Lesser General Public License as published
0010        by the Free Software Foundation, either version 3, or (at your
0011        option) any later version, or
0012      - the terms of the GNU General Public License as published by the
0013        Free Software Foundation; either version 2, or (at your option)
0014        any later version, or
0015      - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".
0016 
0017    This file is distributed in the hope that it will be useful,
0018    but WITHOUT ANY WARRANTY; without even the implied warranty of
0019    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
0020    Lesser General Public License and the GNU General Public License
0021    for more details.
0022 
0023    You should have received a copy of the GNU Lesser General Public
0024    License and of the GNU General Public License along with this
0025    program.  If not, see <https://www.gnu.org/licenses/>.  */
0026 
0027 #ifndef _UNIGBRK_H
0028 #define _UNIGBRK_H
0029 
0030 /* Get bool.  */
0031 #include <unistring/stdbool.h>
0032 
0033 /* Get size_t. */
0034 #include <stddef.h>
0035 
0036 #include "unitypes.h"
0037 
0038 #ifdef __cplusplus
0039 extern "C" {
0040 #endif
0041 
0042 /* ========================================================================= */
0043 
0044 /* Property defined in Unicode Standard Annex #29, section "Grapheme Cluster
0045    Boundaries"
0046    <https://unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries> */
0047 
0048 /* Possible values of the Grapheme_Cluster_Break property.
0049    This enumeration may be extended in the future.  */
0050 enum
0051 {
0052   GBP_OTHER        = 0,
0053   GBP_CR           = 1,
0054   GBP_LF           = 2,
0055   GBP_CONTROL      = 3,
0056   GBP_EXTEND       = 4,
0057   GBP_PREPEND      = 5,
0058   GBP_SPACINGMARK  = 6,
0059   GBP_L            = 7,
0060   GBP_V            = 8,
0061   GBP_T            = 9,
0062   GBP_LV           = 10,
0063   GBP_LVT          = 11,
0064   GBP_RI           = 12,
0065   GBP_ZWJ          = 13,
0066   GBP_EB           = 14, /* obsolete */
0067   GBP_EM           = 15, /* obsolete */
0068   GBP_GAZ          = 16, /* obsolete */
0069   GBP_EBG          = 17  /* obsolete */
0070 };
0071 
0072 /* Return the Grapheme_Cluster_Break property of a Unicode character. */
0073 extern int
0074        uc_graphemeclusterbreak_property (ucs4_t uc)
0075        _UC_ATTRIBUTE_CONST;
0076 
0077 /* ========================================================================= */
0078 
0079 /* Grapheme cluster breaks.  */
0080 
0081 /* Returns true if there is a grapheme cluster boundary between Unicode code
0082    points A and B.  A "grapheme cluster" is an approximation to a
0083    user-perceived character, which sometimes corresponds to multiple code
0084    points.  For example, an English letter followed by an acute accent can be
0085    expressed as two consecutive Unicode code points, but it is perceived by the
0086    user as only a single character and therefore constitutes a single grapheme
0087    cluster.
0088 
0089    Implements extended (not legacy) grapheme cluster rules, because UAX #29
0090    indicates that they are preferred.
0091 
0092    Use A == 0 or B == 0 to indicate start of text or end of text,
0093    respectively. */
0094 extern bool
0095        uc_is_grapheme_break (ucs4_t a, ucs4_t b)
0096        _UC_ATTRIBUTE_CONST;
0097 
0098 /* Returns the start of the next grapheme cluster following S, or NULL if the
0099    end of the string has been reached. */
0100 extern const uint8_t *
0101        u8_grapheme_next (const uint8_t *s, const uint8_t *end)
0102        _UC_ATTRIBUTE_PURE;
0103 extern const uint16_t *
0104        u16_grapheme_next (const uint16_t *s, const uint16_t *end)
0105        _UC_ATTRIBUTE_PURE;
0106 extern const uint32_t *
0107        u32_grapheme_next (const uint32_t *s, const uint32_t *end)
0108        _UC_ATTRIBUTE_PURE;
0109 
0110 /* Returns the start of the previous grapheme cluster before S, or NULL if the
0111    start of the string has been reached. */
0112 extern const uint8_t *
0113        u8_grapheme_prev (const uint8_t *s, const uint8_t *start)
0114        _UC_ATTRIBUTE_PURE;
0115 extern const uint16_t *
0116        u16_grapheme_prev (const uint16_t *s, const uint16_t *start)
0117        _UC_ATTRIBUTE_PURE;
0118 extern const uint32_t *
0119        u32_grapheme_prev (const uint32_t *s, const uint32_t *start)
0120        _UC_ATTRIBUTE_PURE;
0121 
0122 /* Determine the grapheme cluster boundaries in S, and store the result at
0123    p[0..n-1].  p[i] = 1 means that a new grapheme cluster begins at s[i].  p[i]
0124    = 0 means that s[i-1] and s[i] are part of the same grapheme cluster.  p[0]
0125    will always be 1.
0126  */
0127 extern void
0128        u8_grapheme_breaks (const uint8_t *s, size_t n, char *p);
0129 extern void
0130        u16_grapheme_breaks (const uint16_t *s, size_t n, char *p);
0131 extern void
0132        u32_grapheme_breaks (const uint32_t *s, size_t n, char *p);
0133 extern void
0134        ulc_grapheme_breaks (const char *s, size_t n, char *p);
0135 extern void
0136        uc_grapheme_breaks (const ucs4_t *s, size_t n, char *p);
0137 
0138 /* ========================================================================= */
0139 
0140 #ifdef __cplusplus
0141 }
0142 #endif
0143 
0144 
0145 #endif /* _UNIGBRK_H */