File indexing completed on 2025-01-18 09:50:18
0001 #ifndef BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_PARSER_HPP
0002 #define BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_PARSER_HPP
0003
0004 #include <boost/property_tree/json_parser/error.hpp>
0005
0006 #include <boost/core/ref.hpp>
0007 #include <boost/bind/bind.hpp>
0008
0009 #include <iterator>
0010 #include <sstream>
0011 #include <string>
0012
0013 namespace boost { namespace property_tree {
0014 namespace json_parser { namespace detail
0015 {
0016
0017 template <typename Encoding, typename Iterator, typename Sentinel>
0018 class source
0019 {
0020 public:
0021 typedef typename std::iterator_traits<Iterator>::value_type
0022 code_unit;
0023 typedef bool (Encoding::*encoding_predicate)(code_unit c) const;
0024
0025 explicit source(Encoding& encoding) : encoding(encoding) {}
0026
0027 template <typename Range>
0028 void set_input(const std::string& filename, const Range& r)
0029 {
0030 this->filename = filename;
0031 cur = r.begin();
0032 end = r.end();
0033
0034
0035
0036
0037 encoding.skip_introduction(cur, end);
0038 line = 1;
0039 offset = 0;
0040 }
0041
0042 bool done() const { return cur == end; }
0043
0044 void parse_error(const char* msg) {
0045 BOOST_PROPERTY_TREE_THROW(
0046 json_parser_error(msg, filename, line));
0047 }
0048
0049 void next() {
0050 if (encoding.is_nl(*cur)) {
0051 ++line;
0052 offset = 0;
0053 } else {
0054 ++offset;
0055 }
0056 ++cur;
0057 }
0058
0059 template <typename Action>
0060 bool have(encoding_predicate p, Action& a) {
0061 bool found = cur != end && (encoding.*p)(*cur);
0062 if (found) {
0063 a(*cur);
0064 next();
0065 }
0066 return found;
0067 }
0068
0069 bool have(encoding_predicate p) {
0070 DoNothing n;
0071 return have(p, n);
0072 }
0073
0074 template <typename Action>
0075 void expect(encoding_predicate p, const char* msg, Action& a) {
0076 if (!have(p, a)) {
0077 parse_error(msg);
0078 }
0079 }
0080
0081 void expect(encoding_predicate p, const char* msg) {
0082 DoNothing n;
0083 expect(p, msg, n);
0084 }
0085
0086 code_unit need_cur(const char* msg) {
0087 if (cur == end) {
0088 parse_error(msg);
0089 }
0090 return *cur;
0091 }
0092
0093 Iterator& raw_cur() { return cur; }
0094 Sentinel raw_end() { return end; }
0095
0096 private:
0097 struct DoNothing {
0098 void operator ()(code_unit) const {}
0099 };
0100
0101 Encoding& encoding;
0102 Iterator cur;
0103 Sentinel end;
0104 std::string filename;
0105 int line;
0106 int offset;
0107 };
0108
0109 template <typename Callbacks, typename Encoding, typename Iterator,
0110 typename = typename std::iterator_traits<Iterator>
0111 ::iterator_category>
0112 class number_callback_adapter
0113 {
0114 public:
0115 number_callback_adapter(Callbacks& callbacks, Encoding& encoding,
0116 Iterator& cur)
0117 : callbacks(callbacks), encoding(encoding), first(cur), cur(cur)
0118 {}
0119
0120 void operator ()(typename Encoding::external_char) {}
0121
0122 void finish() const {
0123 callbacks.on_number(encoding.to_internal(first, cur));
0124 }
0125
0126 private:
0127 number_callback_adapter(const number_callback_adapter&);
0128
0129 Callbacks& callbacks;
0130 Encoding& encoding;
0131 Iterator first;
0132 Iterator& cur;
0133 };
0134
0135 template <typename Callbacks, typename Encoding, typename Iterator>
0136 class number_callback_adapter<Callbacks, Encoding, Iterator,
0137 std::input_iterator_tag>
0138 {
0139 public:
0140 number_callback_adapter(Callbacks& callbacks, Encoding& encoding,
0141 Iterator&)
0142 : callbacks(callbacks), encoding(encoding), first(true)
0143 {}
0144
0145 void operator ()(typename Encoding::external_char c) {
0146 if (first) {
0147 callbacks.on_begin_number();
0148 first = false;
0149 }
0150 callbacks.on_digit(encoding.to_internal_trivial(c));
0151 }
0152
0153 void finish() const {
0154 callbacks.on_end_number();
0155 }
0156 private:
0157 number_callback_adapter(const number_callback_adapter&);
0158
0159 Callbacks& callbacks;
0160 Encoding& encoding;
0161 bool first;
0162 };
0163
0164 template <typename Callbacks, typename Encoding, typename Iterator,
0165 typename = typename std::iterator_traits<Iterator>
0166 ::iterator_category>
0167 class string_callback_adapter
0168 {
0169 public:
0170 string_callback_adapter(Callbacks& callbacks, Encoding& encoding,
0171 Iterator& cur)
0172 : callbacks(callbacks), encoding(encoding), cur(cur),
0173 run_begin(cur)
0174 {}
0175
0176 void start_run() {
0177 run_begin = cur;
0178 }
0179
0180 void finish_run() {
0181 callbacks.on_code_units(encoding.to_internal(run_begin, cur));
0182 }
0183
0184 template <typename Sentinel, typename EncodingErrorFn>
0185 void process_codepoint(Sentinel end, EncodingErrorFn error_fn) {
0186 encoding.skip_codepoint(cur, end, error_fn);
0187 }
0188
0189 private:
0190 string_callback_adapter(const string_callback_adapter&);
0191
0192 Callbacks& callbacks;
0193 Encoding& encoding;
0194 Iterator& cur;
0195 Iterator run_begin;
0196 };
0197
0198 template <typename Callbacks, typename Encoding, typename Iterator>
0199 class string_callback_adapter<Callbacks, Encoding, Iterator,
0200 std::input_iterator_tag>
0201 {
0202 public:
0203 string_callback_adapter(Callbacks& callbacks, Encoding& encoding,
0204 Iterator& cur)
0205 : callbacks(callbacks), encoding(encoding), cur(cur)
0206 {}
0207
0208 void start_run() {}
0209
0210 void finish_run() {}
0211
0212 template <typename Sentinel, typename EncodingErrorFn>
0213 void process_codepoint(Sentinel end, EncodingErrorFn error_fn) {
0214 encoding.transcode_codepoint(cur, end,
0215 boost::bind(&Callbacks::on_code_unit,
0216 boost::ref(callbacks), boost::placeholders::_1),
0217 error_fn);
0218 }
0219
0220 private:
0221 string_callback_adapter(const string_callback_adapter&);
0222
0223 Callbacks& callbacks;
0224 Encoding& encoding;
0225 Iterator& cur;
0226 };
0227
0228 template <typename Callbacks, typename Encoding, typename Iterator,
0229 typename Sentinel>
0230 class parser
0231 {
0232 typedef detail::number_callback_adapter<Callbacks, Encoding, Iterator>
0233 number_adapter;
0234 typedef detail::string_callback_adapter<Callbacks, Encoding, Iterator>
0235 string_adapter;
0236 typedef detail::source<Encoding, Iterator, Sentinel> source;
0237 typedef typename source::code_unit code_unit;
0238
0239 public:
0240 parser(Callbacks& callbacks, Encoding& encoding)
0241 : callbacks(callbacks), encoding(encoding), src(encoding)
0242 {}
0243
0244 template <typename Range>
0245 void set_input(const std::string& filename, const Range& r) {
0246 src.set_input(filename, r);
0247 }
0248
0249 void finish() {
0250 skip_ws();
0251 if (!src.done()) {
0252 parse_error("garbage after data");
0253 }
0254 }
0255
0256 void parse_value() {
0257 if (parse_object()) return;
0258 if (parse_array()) return;
0259 if (parse_string()) return;
0260 if (parse_boolean()) return;
0261 if (parse_null()) return;
0262 if (parse_number()) return;
0263 parse_error("expected value");
0264 }
0265
0266 bool parse_null() {
0267 skip_ws();
0268 if (!have(&Encoding::is_n)) {
0269 return false;
0270 }
0271 expect(&Encoding::is_u, "expected 'null'");
0272 expect(&Encoding::is_l, "expected 'null'");
0273 expect(&Encoding::is_l, "expected 'null'");
0274 callbacks.on_null();
0275 return true;
0276 }
0277
0278 bool parse_boolean() {
0279 skip_ws();
0280 if (have(&Encoding::is_t)) {
0281 expect(&Encoding::is_r, "expected 'true'");
0282 expect(&Encoding::is_u, "expected 'true'");
0283 expect(&Encoding::is_e, "expected 'true'");
0284 callbacks.on_boolean(true);
0285 return true;
0286 }
0287 if (have(&Encoding::is_f)) {
0288 expect(&Encoding::is_a, "expected 'false'");
0289 expect(&Encoding::is_l, "expected 'false'");
0290 expect(&Encoding::is_s, "expected 'false'");
0291 expect(&Encoding::is_e, "expected 'false'");
0292 callbacks.on_boolean(false);
0293 return true;
0294 }
0295 return false;
0296 }
0297
0298 bool parse_number() {
0299 skip_ws();
0300
0301 number_adapter adapter(callbacks, encoding, src.raw_cur());
0302 bool started = false;
0303 if (have(&Encoding::is_minus, adapter)) {
0304 started = true;
0305 }
0306 if (!have(&Encoding::is_0, adapter) && !parse_int_part(adapter)) {
0307 if (started) {
0308 parse_error("expected digits after -");
0309 }
0310 return false;
0311 }
0312 parse_frac_part(adapter);
0313 parse_exp_part(adapter);
0314 adapter.finish();
0315 return true;
0316 }
0317
0318 bool parse_string() {
0319 skip_ws();
0320
0321 if (!have(&Encoding::is_quote)) {
0322 return false;
0323 }
0324
0325 callbacks.on_begin_string();
0326 string_adapter adapter(callbacks, encoding, src.raw_cur());
0327 while (!encoding.is_quote(need_cur("unterminated string"))) {
0328 if (encoding.is_backslash(*src.raw_cur())) {
0329 adapter.finish_run();
0330 next();
0331 parse_escape();
0332 adapter.start_run();
0333 } else {
0334 adapter.process_codepoint(src.raw_end(),
0335 boost::bind(&parser::parse_error,
0336 this, "invalid code sequence"));
0337 }
0338 }
0339 adapter.finish_run();
0340 callbacks.on_end_string();
0341 next();
0342 return true;
0343 }
0344
0345 bool parse_array() {
0346 skip_ws();
0347
0348 if (!have(&Encoding::is_open_bracket)) {
0349 return false;
0350 }
0351
0352 callbacks.on_begin_array();
0353 skip_ws();
0354 if (have(&Encoding::is_close_bracket)) {
0355 callbacks.on_end_array();
0356 return true;
0357 }
0358 do {
0359 parse_value();
0360 skip_ws();
0361 } while (have(&Encoding::is_comma));
0362 expect(&Encoding::is_close_bracket, "expected ']' or ','");
0363 callbacks.on_end_array();
0364 return true;
0365 }
0366
0367 bool parse_object() {
0368 skip_ws();
0369
0370 if (!have(&Encoding::is_open_brace)) {
0371 return false;
0372 }
0373
0374 callbacks.on_begin_object();
0375 skip_ws();
0376 if (have(&Encoding::is_close_brace)) {
0377 callbacks.on_end_object();
0378 return true;
0379 }
0380 do {
0381 if (!parse_string()) {
0382 parse_error("expected key string");
0383 }
0384 skip_ws();
0385 expect(&Encoding::is_colon, "expected ':'");
0386 parse_value();
0387 skip_ws();
0388 } while (have(&Encoding::is_comma));
0389 expect(&Encoding::is_close_brace, "expected '}' or ','");
0390 callbacks.on_end_object();
0391 return true;
0392 }
0393
0394 private:
0395 typedef typename source::encoding_predicate encoding_predicate;
0396
0397 void parse_error(const char* msg) { src.parse_error(msg); }
0398 void next() { src.next(); }
0399 template <typename Action>
0400 bool have(encoding_predicate p, Action& a) { return src.have(p, a); }
0401 bool have(encoding_predicate p) { return src.have(p); }
0402 template <typename Action>
0403 void expect(encoding_predicate p, const char* msg, Action& a) {
0404 src.expect(p, msg, a);
0405 }
0406 void expect(encoding_predicate p, const char* msg) {
0407 src.expect(p, msg);
0408 }
0409 code_unit need_cur(const char* msg) { return src.need_cur(msg); }
0410
0411 void skip_ws() {
0412 while (have(&Encoding::is_ws)) {
0413 }
0414 }
0415
0416 bool parse_int_part(number_adapter& action) {
0417 if (!have(&Encoding::is_digit0, action)) {
0418 return false;
0419 }
0420 parse_digits(action);
0421 return true;
0422 }
0423
0424 void parse_frac_part(number_adapter& action) {
0425 if (!have(&Encoding::is_dot, action)) {
0426 return;
0427 }
0428 expect(&Encoding::is_digit, "need at least one digit after '.'",
0429 action);
0430 parse_digits(action);
0431 }
0432
0433 void parse_exp_part(number_adapter& action) {
0434 if (!have(&Encoding::is_eE, action)) {
0435 return;
0436 }
0437 have(&Encoding::is_plusminus, action);
0438 expect(&Encoding::is_digit, "need at least one digit in exponent",
0439 action);
0440 parse_digits(action);
0441 }
0442
0443 void parse_digits(number_adapter& action) {
0444 while (have(&Encoding::is_digit, action)) {
0445 }
0446 }
0447
0448 void parse_escape() {
0449 if (have(&Encoding::is_quote)) {
0450 feed(0x22);
0451 } else if (have(&Encoding::is_backslash)) {
0452 feed(0x5c);
0453 } else if (have(&Encoding::is_slash)) {
0454 feed(0x2f);
0455 } else if (have(&Encoding::is_b)) {
0456 feed(0x08);
0457 } else if (have(&Encoding::is_f)) {
0458 feed(0x0c);
0459 } else if (have(&Encoding::is_n)) {
0460 feed(0x0a);
0461 } else if (have(&Encoding::is_r)) {
0462 feed(0x0d);
0463 } else if (have(&Encoding::is_t)) {
0464 feed(0x09);
0465 } else if (have(&Encoding::is_u)) {
0466 parse_codepoint_ref();
0467 } else {
0468 parse_error("invalid escape sequence");
0469 }
0470 }
0471
0472 unsigned parse_hex_quad() {
0473 unsigned codepoint = 0;
0474 for (int i = 0; i < 4; ++i) {
0475 int value = encoding.decode_hexdigit(
0476 need_cur("invalid escape sequence"));
0477 if (value < 0) {
0478 parse_error("invalid escape sequence");
0479 }
0480 codepoint *= 16;
0481 codepoint += value;
0482 next();
0483 }
0484 return codepoint;
0485 }
0486
0487 static bool is_surrogate_high(unsigned codepoint) {
0488 return (codepoint & 0xfc00) == 0xd800;
0489 }
0490 static bool is_surrogate_low(unsigned codepoint) {
0491 return (codepoint & 0xfc00) == 0xdc00;
0492 }
0493 static unsigned combine_surrogates(unsigned high, unsigned low) {
0494 return 0x010000 + (((high & 0x3ff) << 10) | (low & 0x3ff));
0495 }
0496
0497 void parse_codepoint_ref() {
0498 unsigned codepoint = parse_hex_quad();
0499 if (is_surrogate_low(codepoint)) {
0500 parse_error("invalid codepoint, stray low surrogate");
0501 }
0502 if (is_surrogate_high(codepoint)) {
0503 expect(&Encoding::is_backslash,
0504 "invalid codepoint, stray high surrogate");
0505 expect(&Encoding::is_u,
0506 "expected codepoint reference after high surrogate");
0507 int low = parse_hex_quad();
0508 if (!is_surrogate_low(low)) {
0509 parse_error("expected low surrogate after high surrogate");
0510 }
0511 codepoint = combine_surrogates(codepoint, low);
0512 }
0513 feed(codepoint);
0514 }
0515
0516 void feed(unsigned codepoint) {
0517 encoding.feed_codepoint(codepoint,
0518 boost::bind(&Callbacks::on_code_unit,
0519 boost::ref(callbacks), boost::placeholders::_1));
0520 }
0521
0522 Callbacks& callbacks;
0523 Encoding& encoding;
0524 source src;
0525 };
0526
0527 }}}}
0528
0529 #endif