|
|
|||
File indexing completed on 2026-05-03 08:13:50
0001 // -*- C++ -*- 0002 //===----------------------------------------------------------------------===// 0003 // 0004 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 0005 // See https://llvm.org/LICENSE.txt for license information. 0006 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 0007 // 0008 //===----------------------------------------------------------------------===// 0009 0010 // WARNING, this entire header is generated by 0011 // utils/generate_indic_conjunct_break_table.py 0012 // DO NOT MODIFY! 0013 0014 // UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE 0015 // 0016 // See Terms of Use <https://www.unicode.org/copyright.html> 0017 // for definitions of Unicode Inc.'s Data Files and Software. 0018 // 0019 // NOTICE TO USER: Carefully read the following legal agreement. 0020 // BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S 0021 // DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"), 0022 // YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE 0023 // TERMS AND CONDITIONS OF THIS AGREEMENT. 0024 // IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE 0025 // THE DATA FILES OR SOFTWARE. 0026 // 0027 // COPYRIGHT AND PERMISSION NOTICE 0028 // 0029 // Copyright (c) 1991-2022 Unicode, Inc. All rights reserved. 0030 // Distributed under the Terms of Use in https://www.unicode.org/copyright.html. 0031 // 0032 // Permission is hereby granted, free of charge, to any person obtaining 0033 // a copy of the Unicode data files and any associated documentation 0034 // (the "Data Files") or Unicode software and any associated documentation 0035 // (the "Software") to deal in the Data Files or Software 0036 // without restriction, including without limitation the rights to use, 0037 // copy, modify, merge, publish, distribute, and/or sell copies of 0038 // the Data Files or Software, and to permit persons to whom the Data Files 0039 // or Software are furnished to do so, provided that either 0040 // (a) this copyright and permission notice appear with all copies 0041 // of the Data Files or Software, or 0042 // (b) this copyright and permission notice appear in associated 0043 // Documentation. 0044 // 0045 // THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF 0046 // ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE 0047 // WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 0048 // NONINFRINGEMENT OF THIRD PARTY RIGHTS. 0049 // IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS 0050 // NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL 0051 // DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, 0052 // DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER 0053 // TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 0054 // PERFORMANCE OF THE DATA FILES OR SOFTWARE. 0055 // 0056 // Except as contained in this notice, the name of a copyright holder 0057 // shall not be used in advertising or otherwise to promote the sale, 0058 // use or other dealings in these Data Files or Software without prior 0059 // written authorization of the copyright holder. 0060 0061 #ifndef _LIBCPP___FORMAT_INDIC_CONJUNCT_BREAK_TABLE_H 0062 #define _LIBCPP___FORMAT_INDIC_CONJUNCT_BREAK_TABLE_H 0063 0064 #include <__algorithm/ranges_upper_bound.h> 0065 #include <__config> 0066 #include <__cstddef/ptrdiff_t.h> 0067 #include <__iterator/access.h> 0068 #include <cstdint> 0069 0070 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) 0071 # pragma GCC system_header 0072 #endif 0073 0074 _LIBCPP_BEGIN_NAMESPACE_STD 0075 0076 #if _LIBCPP_STD_VER >= 20 0077 0078 namespace __indic_conjunct_break { 0079 0080 enum class __property : uint8_t { 0081 // Values generated from the data files. 0082 __Consonant, 0083 __Extend, 0084 __Linker, 0085 0086 // The code unit has none of above properties. 0087 __none 0088 }; 0089 0090 /// The entries of the indic conjunct break property table. 0091 /// 0092 /// The data is generated from 0093 /// - https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt 0094 /// 0095 /// The data has 3 values 0096 /// - bits [0, 1] The property. One of the values generated from the datafiles 0097 /// of \ref __property 0098 /// - bits [2, 10] The size of the range. 0099 /// - bits [11, 31] The lower bound code point of the range. The upper bound of 0100 /// the range is lower bound + size. 0101 /// 0102 /// The 9 bits for the size allow a maximum range of 512 elements. Some ranges 0103 /// in the Unicode tables are larger. They are stored in multiple consecutive 0104 /// ranges in the data table. An alternative would be to store the sizes in a 0105 /// separate 16-bit value. The original MSVC STL code had such an approach, but 0106 /// this approach uses less space for the data and is about 4% faster in the 0107 /// following benchmark. 0108 /// libcxx/benchmarks/std_format_spec_string_unicode.bench.cpp 0109 // clang-format off 0110 _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[201] = { 0111 0x00180139, 0112 0x001a807d, 0113 0x00241811, 0114 0x002c88b1, 0115 0x002df801, 0116 0x002e0805, 0117 0x002e2005, 0118 0x002e3801, 0119 0x00308029, 0120 0x00325851, 0121 0x00338001, 0122 0x0036b019, 0123 0x0036f815, 0124 0x00373805, 0125 0x0037500d, 0126 0x00388801, 0127 0x00398069, 0128 0x003f5821, 0129 0x003fe801, 0130 0x0040b00d, 0131 0x0040d821, 0132 0x00412809, 0133 0x00414811, 0134 0x0042c809, 0135 0x0044c01d, 0136 0x0046505d, 0137 0x00471871, 0138 0x0048a890, 0139 0x0049e001, 0140 0x004a6802, 0141 0x004a880d, 0142 0x004ac01c, 0143 0x004bc01c, 0144 0x004ca84c, 0145 0x004d5018, 0146 0x004d9000, 0147 0x004db00c, 0148 0x004de001, 0149 0x004e6802, 0150 0x004ee004, 0151 0x004ef800, 0152 0x004f8004, 0153 0x004ff001, 0154 0x0051e001, 0155 0x0054a84c, 0156 0x00555018, 0157 0x00559004, 0158 0x0055a810, 0159 0x0055e001, 0160 0x00566802, 0161 0x0057c800, 0162 0x0058a84c, 0163 0x00595018, 0164 0x00599004, 0165 0x0059a810, 0166 0x0059e001, 0167 0x005a6802, 0168 0x005ae004, 0169 0x005af800, 0170 0x005b8800, 0171 0x0060a84c, 0172 0x0061503c, 0173 0x0061e001, 0174 0x00626802, 0175 0x0062a805, 0176 0x0062c008, 0177 0x0065e001, 0178 0x0068a894, 0179 0x0069d805, 0180 0x006a6802, 0181 0x0071c009, 0182 0x0072400d, 0183 0x0075c009, 0184 0x0076400d, 0185 0x0078c005, 0186 0x0079a801, 0187 0x0079b801, 0188 0x0079c801, 0189 0x007b8805, 0190 0x007ba001, 0191 0x007bd00d, 0192 0x007c0001, 0193 0x007c1009, 0194 0x007c3005, 0195 0x007e3001, 0196 0x0081b801, 0197 0x0081c805, 0198 0x00846801, 0199 0x009ae809, 0200 0x00b8a001, 0201 0x00be9001, 0202 0x00bee801, 0203 0x00c54801, 0204 0x00c9c809, 0205 0x00d0b805, 0206 0x00d30001, 0207 0x00d3a81d, 0208 0x00d3f801, 0209 0x00d58035, 0210 0x00d5f83d, 0211 0x00d9a001, 0212 0x00db5821, 0213 0x00dd5801, 0214 0x00df3001, 0215 0x00e1b801, 0216 0x00e68009, 0217 0x00e6a031, 0218 0x00e71019, 0219 0x00e76801, 0220 0x00e7a001, 0221 0x00e7c005, 0222 0x00ee00fd, 0223 0x01006801, 0224 0x01068031, 0225 0x01070801, 0226 0x0107282d, 0227 0x01677809, 0228 0x016bf801, 0229 0x016f007d, 0230 0x01815015, 0231 0x0184c805, 0232 0x05337801, 0233 0x0533a025, 0234 0x0534f005, 0235 0x05378005, 0236 0x05416001, 0237 0x05470045, 0238 0x05495809, 0239 0x054d9801, 0240 0x05558001, 0241 0x05559009, 0242 0x0555b805, 0243 0x0555f005, 0244 0x05560801, 0245 0x0557b001, 0246 0x055f6801, 0247 0x07d8f001, 0248 0x07f1003d, 0249 0x080fe801, 0250 0x08170001, 0251 0x081bb011, 0252 0x08506801, 0253 0x08507801, 0254 0x0851c009, 0255 0x0851f801, 0256 0x08572805, 0257 0x0869200d, 0258 0x08755805, 0259 0x0877e809, 0260 0x087a3029, 0261 0x087c100d, 0262 0x08838001, 0263 0x0883f801, 0264 0x0885d001, 0265 0x08880009, 0266 0x08899805, 0267 0x088b9801, 0268 0x088e5001, 0269 0x0891b001, 0270 0x08974805, 0271 0x0899d805, 0272 0x089b3019, 0273 0x089b8011, 0274 0x08a23001, 0275 0x08a2f001, 0276 0x08a61801, 0277 0x08ae0001, 0278 0x08b5b801, 0279 0x08b95801, 0280 0x08c1d001, 0281 0x08c9f001, 0282 0x08ca1801, 0283 0x08d1a001, 0284 0x08d23801, 0285 0x08d4c801, 0286 0x08ea1001, 0287 0x08ea2005, 0288 0x08ecb801, 0289 0x08fa1001, 0290 0x0b578011, 0291 0x0b598019, 0292 0x0de4f001, 0293 0x0e8b2801, 0294 0x0e8b3809, 0295 0x0e8b7011, 0296 0x0e8bd81d, 0297 0x0e8c2819, 0298 0x0e8d500d, 0299 0x0e921009, 0300 0x0f000019, 0301 0x0f004041, 0302 0x0f00d819, 0303 0x0f011805, 0304 0x0f013011, 0305 0x0f047801, 0306 0x0f098019, 0307 0x0f157001, 0308 0x0f17600d, 0309 0x0f27600d, 0310 0x0f468019, 0311 0x0f4a2019}; 0312 // clang-format on 0313 0314 /// Returns the indic conjuct break property of a code point. 0315 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr __property __get_property(const char32_t __code_point) noexcept { 0316 // The algorithm searches for the upper bound of the range and, when found, 0317 // steps back one entry. This algorithm is used since the code point can be 0318 // anywhere in the range. After a lower bound is found the next step is to 0319 // compare whether the code unit is indeed in the range. 0320 // 0321 // Since the entry contains a code unit, size, and property the code point 0322 // being sought needs to be adjusted. Just shifting the code point to the 0323 // proper position doesn't work; suppose an entry has property 0, size 1, 0324 // and lower bound 3. This results in the entry 0x1810. 0325 // When searching for code point 3 it will search for 0x1800, find 0x1810 0326 // and moves to the previous entry. Thus the lower bound value will never 0327 // be found. 0328 // The simple solution is to set the bits belonging to the property and 0329 // size. Then the upper bound for code point 3 will return the entry after 0330 // 0x1810. After moving to the previous entry the algorithm arrives at the 0331 // correct entry. 0332 ptrdiff_t __i = std::ranges::upper_bound(__entries, (__code_point << 11) | 0x7ffu) - __entries; 0333 if (__i == 0) 0334 return __property::__none; 0335 0336 --__i; 0337 uint32_t __upper_bound = (__entries[__i] >> 11) + ((__entries[__i] >> 2) & 0b1'1111'1111); 0338 if (__code_point <= __upper_bound) 0339 return static_cast<__property>(__entries[__i] & 0b11); 0340 0341 return __property::__none; 0342 } 0343 0344 } // namespace __indic_conjunct_break 0345 0346 #endif // _LIBCPP_STD_VER >= 20 0347 0348 _LIBCPP_END_NAMESPACE_STD 0349 0350 #endif // _LIBCPP___FORMAT_INDIC_CONJUNCT_BREAK_TABLE_H
| [ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
|
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
|