Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-19 09:47:55

0001 /*=============================================================================
0002     Copyright (c) 2001-2011 Joel de Guzman
0003     Copyright (c) 2023 Nikita Kniazev
0004 
0005     Distributed under the Boost Software License, Version 1.0. (See accompanying
0006     file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
0007 ==============================================================================*/
0008 #if !defined(BOOST_SPIRIT_UC_TYPES_NOVEMBER_23_2008_0840PM)
0009 #define BOOST_SPIRIT_UC_TYPES_NOVEMBER_23_2008_0840PM
0010 
0011 #if defined(_MSC_VER)
0012 #pragma once
0013 #endif
0014 
0015 #include <boost/config.hpp>
0016 #include <boost/cstdint.hpp>
0017 #include <boost/type_traits/make_unsigned.hpp>
0018 #include <string>
0019 
0020 namespace boost { namespace spirit
0021 {
0022     typedef ::boost::uint32_t ucs4_char;
0023     typedef char utf8_char;
0024     typedef std::basic_string<ucs4_char> ucs4_string;
0025     typedef std::basic_string<utf8_char> utf8_string;
0026 
0027 namespace detail {
0028     inline void utf8_put_encode(utf8_string& out, ucs4_char x)
0029     {
0030         // https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf D90
0031         if (BOOST_UNLIKELY(x > 0x10FFFFul || (0xD7FFul < x && x < 0xE000ul)))
0032             x = 0xFFFDul;
0033 
0034         // Table 3-6. UTF-8 Bit Distribution
0035         if (x < 0x80ul) {
0036             out.push_back(static_cast<unsigned char>(x));
0037         }
0038         else if (x < 0x800ul) {
0039             out.push_back(static_cast<unsigned char>(0xC0ul + (x >> 6)));
0040             out.push_back(static_cast<unsigned char>(0x80ul + (x & 0x3Ful)));
0041         }
0042         else if (x < 0x10000ul) {
0043             out.push_back(static_cast<unsigned char>(0xE0ul + (x >> 12)));
0044             out.push_back(static_cast<unsigned char>(0x80ul + ((x >> 6) & 0x3Ful)));
0045             out.push_back(static_cast<unsigned char>(0x80ul + (x & 0x3Ful)));
0046         }
0047         else {
0048             out.push_back(static_cast<unsigned char>(0xF0ul + (x >> 18)));
0049             out.push_back(static_cast<unsigned char>(0x80ul + ((x >> 12) & 0x3Ful)));
0050             out.push_back(static_cast<unsigned char>(0x80ul + ((x >> 6) & 0x3Ful)));
0051             out.push_back(static_cast<unsigned char>(0x80ul + (x & 0x3Ful)));
0052         }
0053     }
0054 }
0055 
0056     template <typename Char>
0057     inline utf8_string to_utf8(Char value)
0058     {
0059         utf8_string result;
0060         typedef typename make_unsigned<Char>::type UChar;
0061         detail::utf8_put_encode(result, static_cast<UChar>(value));
0062         return result;
0063     }
0064 
0065     template <typename Char>
0066     inline utf8_string to_utf8(Char const* str)
0067     {
0068         utf8_string result;
0069         typedef typename make_unsigned<Char>::type UChar;
0070         while (*str)
0071             detail::utf8_put_encode(result, static_cast<UChar>(*str++));
0072         return result;
0073     }
0074 
0075     template <typename Char, typename Traits, typename Allocator>
0076     inline utf8_string
0077     to_utf8(std::basic_string<Char, Traits, Allocator> const& str)
0078     {
0079         utf8_string result;
0080         typedef typename make_unsigned<Char>::type UChar;
0081         for (Char const* ptr = str.data(),
0082                        * end = ptr + str.size(); ptr < end; ++ptr)
0083             detail::utf8_put_encode(result, static_cast<UChar>(*ptr));
0084         return result;
0085     }
0086 
0087     // Assume wchar_t content is UTF-16 on MSVC, or mingw/wineg++ with -fshort-wchar
0088 #if defined(_MSC_VER) || defined(__SIZEOF_WCHAR_T__) && __SIZEOF_WCHAR_T__ == 2
0089     inline utf8_string to_utf8(wchar_t value)
0090     {
0091         utf8_string result;
0092         detail::utf8_put_encode(result, static_cast<make_unsigned<wchar_t>::type>(value));
0093         return result;
0094     }
0095 
0096 namespace detail {
0097     inline ucs4_char decode_utf16(wchar_t const*& s)
0098     {
0099         typedef make_unsigned<wchar_t>::type uwchar_t;
0100 
0101         uwchar_t x(*s);
0102         if (x < 0xD800ul || x > 0xDFFFul)
0103             return x;
0104 
0105         // expected high-surrogate
0106         if (BOOST_UNLIKELY((x >> 10) != 0x36ul))
0107             return 0xFFFDul;
0108 
0109         uwchar_t y(*++s);
0110         // expected low-surrogate
0111         if (BOOST_UNLIKELY((y >> 10) != 0x37ul))
0112             return 0xFFFDul;
0113 
0114         return ((x & 0x3FFul) << 10) + (y & 0x3FFul) + 0x10000ul;
0115     }
0116 }
0117 
0118     inline utf8_string to_utf8(wchar_t const* str)
0119     {
0120         utf8_string result;
0121         for (ucs4_char c; (c = detail::decode_utf16(str)) != ucs4_char(); ++str)
0122             detail::utf8_put_encode(result, c);
0123         return result;
0124     }
0125 
0126     template <typename Traits, typename Allocator>
0127     inline utf8_string
0128     to_utf8(std::basic_string<wchar_t, Traits, Allocator> const& str)
0129     {
0130         return to_utf8(str.c_str());
0131     }
0132 #endif
0133 }}
0134 
0135 #endif