File indexing completed on 2025-01-18 09:50:18
0001 #ifndef BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_WIDE_ENCODING_HPP
0002 #define BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_WIDE_ENCODING_HPP
0003
0004 #include <boost/assert.hpp>
0005 #include <boost/range/iterator_range_core.hpp>
0006
0007 #include <utility>
0008
0009 namespace boost { namespace property_tree {
0010 namespace json_parser { namespace detail
0011 {
0012
0013 struct external_wide_encoding
0014 {
0015 typedef wchar_t external_char;
0016
0017 bool is_nl(wchar_t c) const { return c == L'\n'; }
0018 bool is_ws(wchar_t c) const {
0019 return c == L' ' || c == L'\t' || c == L'\n' || c == L'\r';
0020 }
0021
0022 bool is_minus(wchar_t c) const { return c == L'-'; }
0023 bool is_plusminus(wchar_t c) const { return c == L'+' || c == L'-'; }
0024 bool is_dot(wchar_t c) const { return c == L'.'; }
0025 bool is_eE(wchar_t c) const { return c == L'e' || c == L'E'; }
0026 bool is_0(wchar_t c) const { return c == L'0'; }
0027 bool is_digit(wchar_t c) const { return c >= L'0' && c <= L'9'; }
0028 bool is_digit0(wchar_t c) const { return c >= L'1' && c <= L'9'; }
0029
0030 bool is_quote(wchar_t c) const { return c == L'"'; }
0031 bool is_backslash(wchar_t c) const { return c == L'\\'; }
0032 bool is_slash(wchar_t c) const { return c == L'/'; }
0033
0034 bool is_comma(wchar_t c) const { return c == L','; }
0035 bool is_open_bracket(wchar_t c) const { return c == L'['; }
0036 bool is_close_bracket(wchar_t c) const { return c == L']'; }
0037 bool is_colon(wchar_t c) const { return c == L':'; }
0038 bool is_open_brace(wchar_t c) const { return c == L'{'; }
0039 bool is_close_brace(wchar_t c) const { return c == L'}'; }
0040
0041 bool is_a(wchar_t c) const { return c == L'a'; }
0042 bool is_b(wchar_t c) const { return c == L'b'; }
0043 bool is_e(wchar_t c) const { return c == L'e'; }
0044 bool is_f(wchar_t c) const { return c == L'f'; }
0045 bool is_l(wchar_t c) const { return c == L'l'; }
0046 bool is_n(wchar_t c) const { return c == L'n'; }
0047 bool is_r(wchar_t c) const { return c == L'r'; }
0048 bool is_s(wchar_t c) const { return c == L's'; }
0049 bool is_t(wchar_t c) const { return c == L't'; }
0050 bool is_u(wchar_t c) const { return c == L'u'; }
0051
0052 int decode_hexdigit(wchar_t c) {
0053 if (c >= L'0' && c <= L'9') return c - L'0';
0054 if (c >= L'A' && c <= L'F') return c - L'A' + 10;
0055 if (c >= L'a' && c <= L'f') return c - L'a' + 10;
0056 return -1;
0057 }
0058 };
0059
0060 template <bool B> struct is_utf16 {};
0061
0062 class wide_wide_encoding : public external_wide_encoding
0063 {
0064 typedef is_utf16<sizeof(wchar_t) == 2> test_utf16;
0065 public:
0066 typedef wchar_t internal_char;
0067
0068 template <typename Iterator>
0069 boost::iterator_range<Iterator>
0070 to_internal(Iterator first, Iterator last) const {
0071 return boost::make_iterator_range(first, last);
0072 }
0073
0074 wchar_t to_internal_trivial(wchar_t c) const {
0075 BOOST_ASSERT(!is_surrogate_high(c) && !is_surrogate_low(c));
0076 return c;
0077 }
0078
0079 template <typename Iterator, typename Sentinel,
0080 typename EncodingErrorFn>
0081 void skip_codepoint(Iterator& cur, Sentinel end,
0082 EncodingErrorFn error_fn) const {
0083 transcode_codepoint(cur, end, DoNothing(), error_fn);
0084 }
0085
0086 template <typename Iterator, typename Sentinel, typename TranscodedFn,
0087 typename EncodingErrorFn>
0088 void transcode_codepoint(Iterator& cur, Sentinel end,
0089 TranscodedFn transcoded_fn, EncodingErrorFn error_fn) const {
0090 return transcode_codepoint(cur, end, transcoded_fn, error_fn,
0091 test_utf16());
0092 }
0093
0094 template <typename TranscodedFn>
0095 void feed_codepoint(unsigned codepoint,
0096 TranscodedFn transcoded_fn) const {
0097 feed_codepoint(codepoint, transcoded_fn, test_utf16());
0098 }
0099
0100 template <typename Iterator, typename Sentinel>
0101 void skip_introduction(Iterator& cur, Sentinel end) const {
0102
0103 if (cur != end && *cur == 0xfeff) {
0104 ++cur;
0105 }
0106 }
0107
0108 private:
0109 struct DoNothing {
0110 void operator ()(wchar_t) const {}
0111 };
0112
0113 template <typename Iterator, typename Sentinel, typename TranscodedFn,
0114 typename EncodingErrorFn>
0115 void transcode_codepoint(Iterator& cur, Sentinel,
0116 TranscodedFn transcoded_fn,
0117 EncodingErrorFn error_fn,
0118 is_utf16<false>) const {
0119 wchar_t c = *cur;
0120 if (c < 0x20) {
0121 error_fn();
0122 }
0123 transcoded_fn(c);
0124 ++cur;
0125 }
0126 template <typename Iterator, typename Sentinel, typename TranscodedFn,
0127 typename EncodingErrorFn>
0128 void transcode_codepoint(Iterator& cur, Sentinel end,
0129 TranscodedFn transcoded_fn,
0130 EncodingErrorFn error_fn,
0131 is_utf16<true>) const {
0132 wchar_t c = *cur;
0133 if (c < 0x20) {
0134 error_fn();
0135 }
0136 if (is_surrogate_low(c)) {
0137 error_fn();
0138 }
0139 transcoded_fn(c);
0140 ++cur;
0141 if (is_surrogate_high(c)) {
0142 if (cur == end) {
0143 error_fn();
0144 }
0145 c = *cur;
0146 if (!is_surrogate_low(c)) {
0147 error_fn();
0148 }
0149 transcoded_fn(c);
0150 ++cur;
0151 }
0152 }
0153
0154 template <typename TranscodedFn>
0155 void feed_codepoint(unsigned codepoint, TranscodedFn transcoded_fn,
0156 is_utf16<false>) const {
0157 transcoded_fn(static_cast<wchar_t>(codepoint));
0158 }
0159 template <typename TranscodedFn>
0160 void feed_codepoint(unsigned codepoint, TranscodedFn transcoded_fn,
0161 is_utf16<true>) const {
0162 if (codepoint < 0x10000) {
0163 transcoded_fn(static_cast<wchar_t>(codepoint));
0164 } else {
0165 codepoint -= 0x10000;
0166 transcoded_fn(static_cast<wchar_t>((codepoint >> 10) | 0xd800));
0167 transcoded_fn(static_cast<wchar_t>(
0168 (codepoint & 0x3ff) | 0xdc00));
0169 }
0170 }
0171
0172 static bool is_surrogate_high(unsigned codepoint) {
0173 return (codepoint & 0xfc00) == 0xd800;
0174 }
0175 static bool is_surrogate_low(unsigned codepoint) {
0176 return (codepoint & 0xfc00) == 0xdc00;
0177 }
0178 };
0179
0180 }}}}
0181
0182 #endif