File indexing completed on 2025-01-19 09:47:55
0001
0002
0003
0004
0005
0006
0007
0008 #if !defined(BOOST_SPIRIT_UC_TYPES_NOVEMBER_23_2008_0840PM)
0009 #define BOOST_SPIRIT_UC_TYPES_NOVEMBER_23_2008_0840PM
0010
0011 #if defined(_MSC_VER)
0012 #pragma once
0013 #endif
0014
0015 #include <boost/config.hpp>
0016 #include <boost/cstdint.hpp>
0017 #include <boost/type_traits/make_unsigned.hpp>
0018 #include <string>
0019
0020 namespace boost { namespace spirit
0021 {
0022 typedef ::boost::uint32_t ucs4_char;
0023 typedef char utf8_char;
0024 typedef std::basic_string<ucs4_char> ucs4_string;
0025 typedef std::basic_string<utf8_char> utf8_string;
0026
0027 namespace detail {
0028 inline void utf8_put_encode(utf8_string& out, ucs4_char x)
0029 {
0030
0031 if (BOOST_UNLIKELY(x > 0x10FFFFul || (0xD7FFul < x && x < 0xE000ul)))
0032 x = 0xFFFDul;
0033
0034
0035 if (x < 0x80ul) {
0036 out.push_back(static_cast<unsigned char>(x));
0037 }
0038 else if (x < 0x800ul) {
0039 out.push_back(static_cast<unsigned char>(0xC0ul + (x >> 6)));
0040 out.push_back(static_cast<unsigned char>(0x80ul + (x & 0x3Ful)));
0041 }
0042 else if (x < 0x10000ul) {
0043 out.push_back(static_cast<unsigned char>(0xE0ul + (x >> 12)));
0044 out.push_back(static_cast<unsigned char>(0x80ul + ((x >> 6) & 0x3Ful)));
0045 out.push_back(static_cast<unsigned char>(0x80ul + (x & 0x3Ful)));
0046 }
0047 else {
0048 out.push_back(static_cast<unsigned char>(0xF0ul + (x >> 18)));
0049 out.push_back(static_cast<unsigned char>(0x80ul + ((x >> 12) & 0x3Ful)));
0050 out.push_back(static_cast<unsigned char>(0x80ul + ((x >> 6) & 0x3Ful)));
0051 out.push_back(static_cast<unsigned char>(0x80ul + (x & 0x3Ful)));
0052 }
0053 }
0054 }
0055
0056 template <typename Char>
0057 inline utf8_string to_utf8(Char value)
0058 {
0059 utf8_string result;
0060 typedef typename make_unsigned<Char>::type UChar;
0061 detail::utf8_put_encode(result, static_cast<UChar>(value));
0062 return result;
0063 }
0064
0065 template <typename Char>
0066 inline utf8_string to_utf8(Char const* str)
0067 {
0068 utf8_string result;
0069 typedef typename make_unsigned<Char>::type UChar;
0070 while (*str)
0071 detail::utf8_put_encode(result, static_cast<UChar>(*str++));
0072 return result;
0073 }
0074
0075 template <typename Char, typename Traits, typename Allocator>
0076 inline utf8_string
0077 to_utf8(std::basic_string<Char, Traits, Allocator> const& str)
0078 {
0079 utf8_string result;
0080 typedef typename make_unsigned<Char>::type UChar;
0081 for (Char const* ptr = str.data(),
0082 * end = ptr + str.size(); ptr < end; ++ptr)
0083 detail::utf8_put_encode(result, static_cast<UChar>(*ptr));
0084 return result;
0085 }
0086
0087
0088 #if defined(_MSC_VER) || defined(__SIZEOF_WCHAR_T__) && __SIZEOF_WCHAR_T__ == 2
0089 inline utf8_string to_utf8(wchar_t value)
0090 {
0091 utf8_string result;
0092 detail::utf8_put_encode(result, static_cast<make_unsigned<wchar_t>::type>(value));
0093 return result;
0094 }
0095
0096 namespace detail {
0097 inline ucs4_char decode_utf16(wchar_t const*& s)
0098 {
0099 typedef make_unsigned<wchar_t>::type uwchar_t;
0100
0101 uwchar_t x(*s);
0102 if (x < 0xD800ul || x > 0xDFFFul)
0103 return x;
0104
0105
0106 if (BOOST_UNLIKELY((x >> 10) != 0x36ul))
0107 return 0xFFFDul;
0108
0109 uwchar_t y(*++s);
0110
0111 if (BOOST_UNLIKELY((y >> 10) != 0x37ul))
0112 return 0xFFFDul;
0113
0114 return ((x & 0x3FFul) << 10) + (y & 0x3FFul) + 0x10000ul;
0115 }
0116 }
0117
0118 inline utf8_string to_utf8(wchar_t const* str)
0119 {
0120 utf8_string result;
0121 for (ucs4_char c; (c = detail::decode_utf16(str)) != ucs4_char(); ++str)
0122 detail::utf8_put_encode(result, c);
0123 return result;
0124 }
0125
0126 template <typename Traits, typename Allocator>
0127 inline utf8_string
0128 to_utf8(std::basic_string<wchar_t, Traits, Allocator> const& str)
0129 {
0130 return to_utf8(str.c_str());
0131 }
0132 #endif
0133 }}
0134
0135 #endif