Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-31 10:02:39

0001 /*=============================================================================
0002     Copyright (c) 2001-2014 Joel de Guzman
0003     Copyright (c) 2023 Nikita Kniazev
0004 
0005     Distributed under the Boost Software License, Version 1.0. (See accompanying
0006     file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
0007 ==============================================================================*/
0008 #if !defined(BOOST_SPIRIT_X3_UC_TYPES_NOVEMBER_23_2008_0840PM)
0009 #define BOOST_SPIRIT_X3_UC_TYPES_NOVEMBER_23_2008_0840PM
0010 
0011 #include <boost/config.hpp>
0012 #include <type_traits>
0013 #include <string>
0014 
0015 namespace boost { namespace spirit { namespace x3
0016 {
0017     typedef char32_t ucs4_char;
0018     typedef char utf8_char;
0019     typedef std::basic_string<ucs4_char> ucs4_string;
0020     typedef std::basic_string<utf8_char> utf8_string;
0021 
0022 namespace detail {
0023     inline void utf8_put_encode(utf8_string& out, ucs4_char x)
0024     {
0025         // https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf D90
0026         if (BOOST_UNLIKELY(x > 0x10FFFFul || (0xD7FFul < x && x < 0xE000ul)))
0027             x = 0xFFFDul;
0028 
0029         // Table 3-6. UTF-8 Bit Distribution
0030         if (x < 0x80ul) {
0031             out.push_back(static_cast<unsigned char>(x));
0032         }
0033         else if (x < 0x800ul) {
0034             out.push_back(static_cast<unsigned char>(0xC0ul + (x >> 6)));
0035             out.push_back(static_cast<unsigned char>(0x80ul + (x & 0x3Ful)));
0036         }
0037         else if (x < 0x10000ul) {
0038             out.push_back(static_cast<unsigned char>(0xE0ul + (x >> 12)));
0039             out.push_back(static_cast<unsigned char>(0x80ul + ((x >> 6) & 0x3Ful)));
0040             out.push_back(static_cast<unsigned char>(0x80ul + (x & 0x3Ful)));
0041         }
0042         else {
0043             out.push_back(static_cast<unsigned char>(0xF0ul + (x >> 18)));
0044             out.push_back(static_cast<unsigned char>(0x80ul + ((x >> 12) & 0x3Ful)));
0045             out.push_back(static_cast<unsigned char>(0x80ul + ((x >> 6) & 0x3Ful)));
0046             out.push_back(static_cast<unsigned char>(0x80ul + (x & 0x3Ful)));
0047         }
0048     }
0049 }
0050 
0051     template <typename Char>
0052     inline utf8_string to_utf8(Char value)
0053     {
0054         utf8_string result;
0055         typedef typename std::make_unsigned<Char>::type UChar;
0056         detail::utf8_put_encode(result, static_cast<UChar>(value));
0057         return result;
0058     }
0059 
0060     template <typename Char>
0061     inline utf8_string to_utf8(Char const* str)
0062     {
0063         utf8_string result;
0064         typedef typename std::make_unsigned<Char>::type UChar;
0065         while (*str)
0066             detail::utf8_put_encode(result, static_cast<UChar>(*str++));
0067         return result;
0068     }
0069 
0070     template <typename Char, typename Traits, typename Allocator>
0071     inline utf8_string
0072     to_utf8(std::basic_string<Char, Traits, Allocator> const& str)
0073     {
0074         utf8_string result;
0075         typedef typename std::make_unsigned<Char>::type UChar;
0076         for (Char ch : str)
0077             detail::utf8_put_encode(result, static_cast<UChar>(ch));
0078         return result;
0079     }
0080 
0081     // Assume wchar_t content is UTF-16 on MSVC, or mingw/wineg++ with -fshort-wchar
0082 #if defined(_MSC_VER) || defined(__SIZEOF_WCHAR_T__) && __SIZEOF_WCHAR_T__ == 2
0083     inline utf8_string to_utf8(wchar_t value)
0084     {
0085         utf8_string result;
0086         detail::utf8_put_encode(result, static_cast<std::make_unsigned<wchar_t>::type>(value));
0087         return result;
0088     }
0089 
0090 namespace detail {
0091     inline ucs4_char decode_utf16(wchar_t const*& s)
0092     {
0093         typedef std::make_unsigned<wchar_t>::type uwchar_t;
0094 
0095         uwchar_t x(*s);
0096         if (x < 0xD800ul || x > 0xDFFFul)
0097             return x;
0098 
0099         // expected high-surrogate
0100         if (BOOST_UNLIKELY((x >> 10) != 0b110110ul))
0101             return 0xFFFDul;
0102 
0103         uwchar_t y(*++s);
0104         // expected low-surrogate
0105         if (BOOST_UNLIKELY((y >> 10) != 0b110111ul))
0106             return 0xFFFDul;
0107 
0108         return ((x & 0x3FFul) << 10) + (y & 0x3FFul) + 0x10000ul;
0109     }
0110 }
0111 
0112     inline utf8_string to_utf8(wchar_t const* str)
0113     {
0114         utf8_string result;
0115         for (ucs4_char c; (c = detail::decode_utf16(str)) != ucs4_char(); ++str)
0116             detail::utf8_put_encode(result, c);
0117         return result;
0118     }
0119 
0120     template <typename Traits, typename Allocator>
0121     inline utf8_string
0122     to_utf8(std::basic_string<wchar_t, Traits, Allocator> const& str)
0123     {
0124         return to_utf8(str.c_str());
0125     }
0126 #endif
0127 }}}
0128 
0129 #endif