Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-05-03 08:13:29

0001 // -*- C++ -*-
0002 //===----------------------------------------------------------------------===//
0003 //
0004 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
0005 // See https://llvm.org/LICENSE.txt for license information.
0006 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0007 //
0008 //===----------------------------------------------------------------------===//
0009 
0010 // WARNING, this entire header is generated by
0011 // utils/generate_indic_conjunct_break_table.py
0012 // DO NOT MODIFY!
0013 
0014 // UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE
0015 //
0016 // See Terms of Use <https://www.unicode.org/copyright.html>
0017 // for definitions of Unicode Inc.'s Data Files and Software.
0018 //
0019 // NOTICE TO USER: Carefully read the following legal agreement.
0020 // BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S
0021 // DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"),
0022 // YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
0023 // TERMS AND CONDITIONS OF THIS AGREEMENT.
0024 // IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE
0025 // THE DATA FILES OR SOFTWARE.
0026 //
0027 // COPYRIGHT AND PERMISSION NOTICE
0028 //
0029 // Copyright (c) 1991-2022 Unicode, Inc. All rights reserved.
0030 // Distributed under the Terms of Use in https://www.unicode.org/copyright.html.
0031 //
0032 // Permission is hereby granted, free of charge, to any person obtaining
0033 // a copy of the Unicode data files and any associated documentation
0034 // (the "Data Files") or Unicode software and any associated documentation
0035 // (the "Software") to deal in the Data Files or Software
0036 // without restriction, including without limitation the rights to use,
0037 // copy, modify, merge, publish, distribute, and/or sell copies of
0038 // the Data Files or Software, and to permit persons to whom the Data Files
0039 // or Software are furnished to do so, provided that either
0040 // (a) this copyright and permission notice appear with all copies
0041 // of the Data Files or Software, or
0042 // (b) this copyright and permission notice appear in associated
0043 // Documentation.
0044 //
0045 // THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
0046 // ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
0047 // WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
0048 // NONINFRINGEMENT OF THIRD PARTY RIGHTS.
0049 // IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
0050 // NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
0051 // DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
0052 // DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
0053 // TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
0054 // PERFORMANCE OF THE DATA FILES OR SOFTWARE.
0055 //
0056 // Except as contained in this notice, the name of a copyright holder
0057 // shall not be used in advertising or otherwise to promote the sale,
0058 // use or other dealings in these Data Files or Software without prior
0059 // written authorization of the copyright holder.
0060 
0061 #ifndef _LIBCPP___CXX03___FORMAT_INDIC_CONJUNCT_BREAK_TABLE_H
0062 #define _LIBCPP___CXX03___FORMAT_INDIC_CONJUNCT_BREAK_TABLE_H
0063 
0064 #include <__cxx03/__algorithm/ranges_upper_bound.h>
0065 #include <__cxx03/__config>
0066 #include <__cxx03/__iterator/access.h>
0067 #include <__cxx03/cstddef>
0068 #include <__cxx03/cstdint>
0069 
0070 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
0071 #  pragma GCC system_header
0072 #endif
0073 
0074 _LIBCPP_BEGIN_NAMESPACE_STD
0075 
0076 #if _LIBCPP_STD_VER >= 20
0077 
0078 namespace __indic_conjunct_break {
0079 
0080 enum class __property : uint8_t {
0081   // Values generated from the data files.
0082   __Consonant,
0083   __Extend,
0084   __Linker,
0085 
0086   // The code unit has none of above properties.
0087   __none
0088 };
0089 
0090 /// The entries of the indic conjunct break property table.
0091 ///
0092 /// The data is generated from
0093 /// -  https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
0094 ///
0095 /// The data has 3 values
0096 /// - bits [0, 1] The property. One of the values generated from the datafiles
0097 ///   of \ref __property
0098 /// - bits [2, 10] The size of the range.
0099 /// - bits [11, 31] The lower bound code point of the range. The upper bound of
0100 ///   the range is lower bound + size.
0101 ///
0102 /// The 9 bits for the size allow a maximum range of 512 elements. Some ranges
0103 /// in the Unicode tables are larger. They are stored in multiple consecutive
0104 /// ranges in the data table. An alternative would be to store the sizes in a
0105 /// separate 16-bit value. The original MSVC STL code had such an approach, but
0106 /// this approach uses less space for the data and is about 4% faster in the
0107 /// following benchmark.
0108 /// libcxx/benchmarks/std_format_spec_string_unicode.bench.cpp
0109 // clang-format off
0110 _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[201] = {
0111     0x00180139,
0112     0x001a807d,
0113     0x00241811,
0114     0x002c88b1,
0115     0x002df801,
0116     0x002e0805,
0117     0x002e2005,
0118     0x002e3801,
0119     0x00308029,
0120     0x00325851,
0121     0x00338001,
0122     0x0036b019,
0123     0x0036f815,
0124     0x00373805,
0125     0x0037500d,
0126     0x00388801,
0127     0x00398069,
0128     0x003f5821,
0129     0x003fe801,
0130     0x0040b00d,
0131     0x0040d821,
0132     0x00412809,
0133     0x00414811,
0134     0x0042c809,
0135     0x0044c01d,
0136     0x0046505d,
0137     0x00471871,
0138     0x0048a890,
0139     0x0049e001,
0140     0x004a6802,
0141     0x004a880d,
0142     0x004ac01c,
0143     0x004bc01c,
0144     0x004ca84c,
0145     0x004d5018,
0146     0x004d9000,
0147     0x004db00c,
0148     0x004de001,
0149     0x004e6802,
0150     0x004ee004,
0151     0x004ef800,
0152     0x004f8004,
0153     0x004ff001,
0154     0x0051e001,
0155     0x0054a84c,
0156     0x00555018,
0157     0x00559004,
0158     0x0055a810,
0159     0x0055e001,
0160     0x00566802,
0161     0x0057c800,
0162     0x0058a84c,
0163     0x00595018,
0164     0x00599004,
0165     0x0059a810,
0166     0x0059e001,
0167     0x005a6802,
0168     0x005ae004,
0169     0x005af800,
0170     0x005b8800,
0171     0x0060a84c,
0172     0x0061503c,
0173     0x0061e001,
0174     0x00626802,
0175     0x0062a805,
0176     0x0062c008,
0177     0x0065e001,
0178     0x0068a894,
0179     0x0069d805,
0180     0x006a6802,
0181     0x0071c009,
0182     0x0072400d,
0183     0x0075c009,
0184     0x0076400d,
0185     0x0078c005,
0186     0x0079a801,
0187     0x0079b801,
0188     0x0079c801,
0189     0x007b8805,
0190     0x007ba001,
0191     0x007bd00d,
0192     0x007c0001,
0193     0x007c1009,
0194     0x007c3005,
0195     0x007e3001,
0196     0x0081b801,
0197     0x0081c805,
0198     0x00846801,
0199     0x009ae809,
0200     0x00b8a001,
0201     0x00be9001,
0202     0x00bee801,
0203     0x00c54801,
0204     0x00c9c809,
0205     0x00d0b805,
0206     0x00d30001,
0207     0x00d3a81d,
0208     0x00d3f801,
0209     0x00d58035,
0210     0x00d5f83d,
0211     0x00d9a001,
0212     0x00db5821,
0213     0x00dd5801,
0214     0x00df3001,
0215     0x00e1b801,
0216     0x00e68009,
0217     0x00e6a031,
0218     0x00e71019,
0219     0x00e76801,
0220     0x00e7a001,
0221     0x00e7c005,
0222     0x00ee00fd,
0223     0x01006801,
0224     0x01068031,
0225     0x01070801,
0226     0x0107282d,
0227     0x01677809,
0228     0x016bf801,
0229     0x016f007d,
0230     0x01815015,
0231     0x0184c805,
0232     0x05337801,
0233     0x0533a025,
0234     0x0534f005,
0235     0x05378005,
0236     0x05416001,
0237     0x05470045,
0238     0x05495809,
0239     0x054d9801,
0240     0x05558001,
0241     0x05559009,
0242     0x0555b805,
0243     0x0555f005,
0244     0x05560801,
0245     0x0557b001,
0246     0x055f6801,
0247     0x07d8f001,
0248     0x07f1003d,
0249     0x080fe801,
0250     0x08170001,
0251     0x081bb011,
0252     0x08506801,
0253     0x08507801,
0254     0x0851c009,
0255     0x0851f801,
0256     0x08572805,
0257     0x0869200d,
0258     0x08755805,
0259     0x0877e809,
0260     0x087a3029,
0261     0x087c100d,
0262     0x08838001,
0263     0x0883f801,
0264     0x0885d001,
0265     0x08880009,
0266     0x08899805,
0267     0x088b9801,
0268     0x088e5001,
0269     0x0891b001,
0270     0x08974805,
0271     0x0899d805,
0272     0x089b3019,
0273     0x089b8011,
0274     0x08a23001,
0275     0x08a2f001,
0276     0x08a61801,
0277     0x08ae0001,
0278     0x08b5b801,
0279     0x08b95801,
0280     0x08c1d001,
0281     0x08c9f001,
0282     0x08ca1801,
0283     0x08d1a001,
0284     0x08d23801,
0285     0x08d4c801,
0286     0x08ea1001,
0287     0x08ea2005,
0288     0x08ecb801,
0289     0x08fa1001,
0290     0x0b578011,
0291     0x0b598019,
0292     0x0de4f001,
0293     0x0e8b2801,
0294     0x0e8b3809,
0295     0x0e8b7011,
0296     0x0e8bd81d,
0297     0x0e8c2819,
0298     0x0e8d500d,
0299     0x0e921009,
0300     0x0f000019,
0301     0x0f004041,
0302     0x0f00d819,
0303     0x0f011805,
0304     0x0f013011,
0305     0x0f047801,
0306     0x0f098019,
0307     0x0f157001,
0308     0x0f17600d,
0309     0x0f27600d,
0310     0x0f468019,
0311     0x0f4a2019};
0312 // clang-format on
0313 
0314 /// Returns the indic conjuct break property of a code point.
0315 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr __property __get_property(const char32_t __code_point) noexcept {
0316   // The algorithm searches for the upper bound of the range and, when found,
0317   // steps back one entry. This algorithm is used since the code point can be
0318   // anywhere in the range. After a lower bound is found the next step is to
0319   // compare whether the code unit is indeed in the range.
0320   //
0321   // Since the entry contains a code unit, size, and property the code point
0322   // being sought needs to be adjusted. Just shifting the code point to the
0323   // proper position doesn't work; suppose an entry has property 0, size 1,
0324   // and lower bound 3. This results in the entry 0x1810.
0325   // When searching for code point 3 it will search for 0x1800, find 0x1810
0326   // and moves to the previous entry. Thus the lower bound value will never
0327   // be found.
0328   // The simple solution is to set the bits belonging to the property and
0329   // size. Then the upper bound for code point 3 will return the entry after
0330   // 0x1810. After moving to the previous entry the algorithm arrives at the
0331   // correct entry.
0332   ptrdiff_t __i = std::ranges::upper_bound(__entries, (__code_point << 11) | 0x7ffu) - __entries;
0333   if (__i == 0)
0334     return __property::__none;
0335 
0336   --__i;
0337   uint32_t __upper_bound = (__entries[__i] >> 11) + ((__entries[__i] >> 2) & 0b1'1111'1111);
0338   if (__code_point <= __upper_bound)
0339     return static_cast<__property>(__entries[__i] & 0b11);
0340 
0341   return __property::__none;
0342 }
0343 
0344 } // namespace __indic_conjunct_break
0345 
0346 #endif //_LIBCPP_STD_VER >= 20
0347 
0348 _LIBCPP_END_NAMESPACE_STD
0349 
0350 #endif // _LIBCPP___CXX03___FORMAT_INDIC_CONJUNCT_BREAK_TABLE_H