File indexing completed on 2025-01-31 10:02:39
0001
0002
0003
0004
0005
0006
0007
0008 #if !defined(BOOST_SPIRIT_X3_UC_TYPES_NOVEMBER_23_2008_0840PM)
0009 #define BOOST_SPIRIT_X3_UC_TYPES_NOVEMBER_23_2008_0840PM
0010
0011 #include <boost/config.hpp>
0012 #include <type_traits>
0013 #include <string>
0014
0015 namespace boost { namespace spirit { namespace x3
0016 {
0017 typedef char32_t ucs4_char;
0018 typedef char utf8_char;
0019 typedef std::basic_string<ucs4_char> ucs4_string;
0020 typedef std::basic_string<utf8_char> utf8_string;
0021
0022 namespace detail {
0023 inline void utf8_put_encode(utf8_string& out, ucs4_char x)
0024 {
0025
0026 if (BOOST_UNLIKELY(x > 0x10FFFFul || (0xD7FFul < x && x < 0xE000ul)))
0027 x = 0xFFFDul;
0028
0029
0030 if (x < 0x80ul) {
0031 out.push_back(static_cast<unsigned char>(x));
0032 }
0033 else if (x < 0x800ul) {
0034 out.push_back(static_cast<unsigned char>(0xC0ul + (x >> 6)));
0035 out.push_back(static_cast<unsigned char>(0x80ul + (x & 0x3Ful)));
0036 }
0037 else if (x < 0x10000ul) {
0038 out.push_back(static_cast<unsigned char>(0xE0ul + (x >> 12)));
0039 out.push_back(static_cast<unsigned char>(0x80ul + ((x >> 6) & 0x3Ful)));
0040 out.push_back(static_cast<unsigned char>(0x80ul + (x & 0x3Ful)));
0041 }
0042 else {
0043 out.push_back(static_cast<unsigned char>(0xF0ul + (x >> 18)));
0044 out.push_back(static_cast<unsigned char>(0x80ul + ((x >> 12) & 0x3Ful)));
0045 out.push_back(static_cast<unsigned char>(0x80ul + ((x >> 6) & 0x3Ful)));
0046 out.push_back(static_cast<unsigned char>(0x80ul + (x & 0x3Ful)));
0047 }
0048 }
0049 }
0050
0051 template <typename Char>
0052 inline utf8_string to_utf8(Char value)
0053 {
0054 utf8_string result;
0055 typedef typename std::make_unsigned<Char>::type UChar;
0056 detail::utf8_put_encode(result, static_cast<UChar>(value));
0057 return result;
0058 }
0059
0060 template <typename Char>
0061 inline utf8_string to_utf8(Char const* str)
0062 {
0063 utf8_string result;
0064 typedef typename std::make_unsigned<Char>::type UChar;
0065 while (*str)
0066 detail::utf8_put_encode(result, static_cast<UChar>(*str++));
0067 return result;
0068 }
0069
0070 template <typename Char, typename Traits, typename Allocator>
0071 inline utf8_string
0072 to_utf8(std::basic_string<Char, Traits, Allocator> const& str)
0073 {
0074 utf8_string result;
0075 typedef typename std::make_unsigned<Char>::type UChar;
0076 for (Char ch : str)
0077 detail::utf8_put_encode(result, static_cast<UChar>(ch));
0078 return result;
0079 }
0080
0081
0082 #if defined(_MSC_VER) || defined(__SIZEOF_WCHAR_T__) && __SIZEOF_WCHAR_T__ == 2
0083 inline utf8_string to_utf8(wchar_t value)
0084 {
0085 utf8_string result;
0086 detail::utf8_put_encode(result, static_cast<std::make_unsigned<wchar_t>::type>(value));
0087 return result;
0088 }
0089
0090 namespace detail {
0091 inline ucs4_char decode_utf16(wchar_t const*& s)
0092 {
0093 typedef std::make_unsigned<wchar_t>::type uwchar_t;
0094
0095 uwchar_t x(*s);
0096 if (x < 0xD800ul || x > 0xDFFFul)
0097 return x;
0098
0099
0100 if (BOOST_UNLIKELY((x >> 10) != 0b110110ul))
0101 return 0xFFFDul;
0102
0103 uwchar_t y(*++s);
0104
0105 if (BOOST_UNLIKELY((y >> 10) != 0b110111ul))
0106 return 0xFFFDul;
0107
0108 return ((x & 0x3FFul) << 10) + (y & 0x3FFul) + 0x10000ul;
0109 }
0110 }
0111
0112 inline utf8_string to_utf8(wchar_t const* str)
0113 {
0114 utf8_string result;
0115 for (ucs4_char c; (c = detail::decode_utf16(str)) != ucs4_char(); ++str)
0116 detail::utf8_put_encode(result, c);
0117 return result;
0118 }
0119
0120 template <typename Traits, typename Allocator>
0121 inline utf8_string
0122 to_utf8(std::basic_string<wchar_t, Traits, Allocator> const& str)
0123 {
0124 return to_utf8(str.c_str());
0125 }
0126 #endif
0127 }}}
0128
0129 #endif