|
|
|||
File indexing completed on 2025-12-10 10:23:54
0001 //======================================================================== 0002 // 0003 // UTF.h 0004 // 0005 // This file is licensed under the GPLv2 or later 0006 // 0007 // Copyright (C) 2012, 2017, 2021 Adrian Johnson <ajohnson@redneon.com> 0008 // Copyright (C) 2016 Jason Crain <jason@aquaticape.us> 0009 // Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich 0010 // Copyright (C) 2018 Nelson Benítez León <nbenitezl@gmail.com> 0011 // Copyright (C) 2019-2022 Albert Astals Cid <aacid@kde.org> 0012 // Copyright (C) 2021 Georgiy Sgibnev <georgiy@sgibnev.com>. Work sponsored by lab50.net. 0013 // 0014 //======================================================================== 0015 0016 #ifndef UTF_H 0017 #define UTF_H 0018 0019 #include <cstdint> 0020 #include <climits> 0021 #include <memory> 0022 0023 #include "goo/GooString.h" 0024 #include "CharTypes.h" 0025 #include "poppler_private_export.h" 0026 0027 // Convert a UTF-16 string to a UCS-4 0028 // utf16 - utf16 bytes 0029 // utf16_len - number of UTF-16 characters 0030 // ucs4_out - if not NULL, allocates and returns UCS-4 string. Free with gfree. 0031 // returns number of UCS-4 characters 0032 int UTF16toUCS4(const Unicode *utf16, int utf16Len, Unicode **ucs4_out); 0033 0034 // Convert a PDF Text String to UCS-4 0035 // s - PDF text string 0036 // ucs4 - if the number of UCS-4 characters is > 0, allocates and 0037 // returns UCS-4 string. Free with gfree. 0038 // returns number of UCS-4 characters 0039 int POPPLER_PRIVATE_EXPORT TextStringToUCS4(const std::string &textStr, Unicode **ucs4); 0040 0041 // check if UCS-4 character is valid 0042 bool UnicodeIsValid(Unicode ucs4); 0043 0044 // is a unicode whitespace character 0045 bool UnicodeIsWhitespace(Unicode ucs4); 0046 0047 // Count number of UCS-4 characters required to convert a UTF-8 string to 0048 // UCS-4 (excluding terminating NULL). 0049 int POPPLER_PRIVATE_EXPORT utf8CountUCS4(const char *utf8); 0050 0051 // Convert a UTF-8 string to a UCS-4 0052 // utf8 - utf8 bytes 0053 // ucs4_out - if not NULL, allocates and returns UCS-4 string. Free with gfree. 0054 // returns number of UCS-4 characters 0055 int POPPLER_PRIVATE_EXPORT utf8ToUCS4(const char *utf8, Unicode **ucs4_out); 0056 0057 // Count number of UTF-16 code units required to convert a UTF-8 string 0058 // (excluding terminating NULL). Each invalid byte is counted as a 0059 // code point since the UTF-8 conversion functions will replace it with 0060 // REPLACEMENT_CHAR. 0061 int POPPLER_PRIVATE_EXPORT utf8CountUtf16CodeUnits(const char *utf8); 0062 0063 // Convert UTF-8 to UTF-16 0064 // utf8- UTF-8 string to convert. If not null terminated, set maxUtf8 to num 0065 // bytes to convert 0066 // utf16 - output buffer to write UTF-16 to. Output will always be null terminated. 0067 // maxUtf16 - maximum size of output buffer including space for null. 0068 // maxUtf8 - maximum number of UTF-8 bytes to convert. Conversion stops when 0069 // either this count is reached or a null is encountered. 0070 // Returns number of UTF-16 code units written (excluding NULL). 0071 int POPPLER_PRIVATE_EXPORT utf8ToUtf16(const char *utf8, uint16_t *utf16, int maxUtf16 = INT_MAX, int maxUtf8 = INT_MAX); 0072 0073 // Allocate utf16 string and convert utf8 into it. 0074 uint16_t POPPLER_PRIVATE_EXPORT *utf8ToUtf16(const char *utf8, int *len = nullptr); 0075 0076 // Converts a UTF-8 string to a big endian UTF-16 string with BOM. 0077 // The caller owns the returned pointer. 0078 // utf8 - UTF-8 string to convert. An empty string is acceptable. 0079 // Returns a big endian UTF-16 string with BOM or an empty string without BOM. 0080 std::unique_ptr<GooString> POPPLER_PRIVATE_EXPORT utf8ToUtf16WithBom(const std::string &utf8); 0081 0082 // Count number of UTF-8 bytes required to convert a UTF-16 string to 0083 // UTF-8 (excluding terminating NULL). 0084 int POPPLER_PRIVATE_EXPORT utf16CountUtf8Bytes(const uint16_t *utf16); 0085 0086 // Convert UTF-16 to UTF-8 0087 // utf16- UTF-16 string to convert. If not null terminated, set maxUtf16 to num 0088 // code units to convert 0089 // utf8 - output buffer to write UTF-8 to. Output will always be null terminated. 0090 // maxUtf8 - maximum size of output buffer including space for null. 0091 // maxUtf16 - maximum number of UTF-16 code units to convert. Conversion stops when 0092 // either this count is reached or a null is encountered. 0093 // Returns number of UTF-8 bytes written (excluding NULL). 0094 int POPPLER_PRIVATE_EXPORT utf16ToUtf8(const uint16_t *utf16, char *utf8, int maxUtf8 = INT_MAX, int maxUtf16 = INT_MAX); 0095 0096 // Allocate utf8 string and convert utf16 into it. 0097 char POPPLER_PRIVATE_EXPORT *utf16ToUtf8(const uint16_t *utf16, int *len = nullptr); 0098 0099 // Convert a UCS-4 string to pure ASCII (7bit) 0100 // in - UCS-4 string bytes 0101 // len - number of UCS-4 characters 0102 // ucs4_out - if not NULL, allocates and returns UCS-4 string. Free with gfree. 0103 // out_len - number of UCS-4 characters in ucs4_out. 0104 // in_idx - if not NULL, the int array returned by the out fourth parameter of 0105 // unicodeNormalizeNFKC() function. Optional, needed for @indices out parameter. 0106 // indices - if not NULL, @indices is assigned the location of a newly-allocated array 0107 // of length @out_len + 1, for each character in the ascii string giving the index 0108 // of the corresponding character in the text of the line (thanks to this info 0109 // being passed in @in_idx parameter). 0110 void POPPLER_PRIVATE_EXPORT unicodeToAscii7(const Unicode *in, int len, Unicode **ucs4_out, int *out_len, const int *in_idx, int **indices); 0111 0112 #endif
| [ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
|
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
|