File indexing completed on 2025-07-14 08:45:41
0001 #ifndef BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_PARSER_HPP
0002 #define BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_PARSER_HPP
0003
0004 #include <boost/property_tree/json_parser/error.hpp>
0005
0006 #include <boost/core/ref.hpp>
0007 #include <boost/bind/bind.hpp>
0008 #include <boost/bind/placeholders.hpp>
0009
0010 #include <iterator>
0011 #include <sstream>
0012 #include <string>
0013
0014 namespace boost { namespace property_tree {
0015 namespace json_parser { namespace detail
0016 {
0017
0018 template <typename Encoding, typename Iterator, typename Sentinel>
0019 class source
0020 {
0021 public:
0022 typedef typename std::iterator_traits<Iterator>::value_type
0023 code_unit;
0024 typedef bool (Encoding::*encoding_predicate)(code_unit c) const;
0025
0026 explicit source(Encoding& encoding) : encoding(encoding) {}
0027
0028 template <typename Range>
0029 void set_input(const std::string& filename, const Range& r)
0030 {
0031 this->filename = filename;
0032 cur = r.begin();
0033 end = r.end();
0034
0035
0036
0037
0038 encoding.skip_introduction(cur, end);
0039 line = 1;
0040 offset = 0;
0041 }
0042
0043 bool done() const { return cur == end; }
0044
0045 void parse_error(const char* msg) {
0046 BOOST_PROPERTY_TREE_THROW(
0047 json_parser_error(msg, filename, line));
0048 }
0049
0050 void next() {
0051 if (encoding.is_nl(*cur)) {
0052 ++line;
0053 offset = 0;
0054 } else {
0055 ++offset;
0056 }
0057 ++cur;
0058 }
0059
0060 template <typename Action>
0061 bool have(encoding_predicate p, Action& a) {
0062 bool found = cur != end && (encoding.*p)(*cur);
0063 if (found) {
0064 a(*cur);
0065 next();
0066 }
0067 return found;
0068 }
0069
0070 bool have(encoding_predicate p) {
0071 DoNothing n;
0072 return have(p, n);
0073 }
0074
0075 template <typename Action>
0076 void expect(encoding_predicate p, const char* msg, Action& a) {
0077 if (!have(p, a)) {
0078 parse_error(msg);
0079 }
0080 }
0081
0082 void expect(encoding_predicate p, const char* msg) {
0083 DoNothing n;
0084 expect(p, msg, n);
0085 }
0086
0087 code_unit need_cur(const char* msg) {
0088 if (cur == end) {
0089 parse_error(msg);
0090 }
0091 return *cur;
0092 }
0093
0094 Iterator& raw_cur() { return cur; }
0095 Sentinel raw_end() { return end; }
0096
0097 private:
0098 struct DoNothing {
0099 void operator ()(code_unit) const {}
0100 };
0101
0102 Encoding& encoding;
0103 Iterator cur;
0104 Sentinel end;
0105 std::string filename;
0106 int line;
0107 int offset;
0108 };
0109
0110 template <typename Callbacks, typename Encoding, typename Iterator,
0111 typename = typename std::iterator_traits<Iterator>
0112 ::iterator_category>
0113 class number_callback_adapter
0114 {
0115 public:
0116 number_callback_adapter(Callbacks& callbacks, Encoding& encoding,
0117 Iterator& cur)
0118 : callbacks(callbacks), encoding(encoding), first(cur), cur(cur)
0119 {}
0120
0121 void operator ()(typename Encoding::external_char) {}
0122
0123 void finish() const {
0124 callbacks.on_number(encoding.to_internal(first, cur));
0125 }
0126
0127 private:
0128 number_callback_adapter(const number_callback_adapter&);
0129
0130 Callbacks& callbacks;
0131 Encoding& encoding;
0132 Iterator first;
0133 Iterator& cur;
0134 };
0135
0136 template <typename Callbacks, typename Encoding, typename Iterator>
0137 class number_callback_adapter<Callbacks, Encoding, Iterator,
0138 std::input_iterator_tag>
0139 {
0140 public:
0141 number_callback_adapter(Callbacks& callbacks, Encoding& encoding,
0142 Iterator&)
0143 : callbacks(callbacks), encoding(encoding), first(true)
0144 {}
0145
0146 void operator ()(typename Encoding::external_char c) {
0147 if (first) {
0148 callbacks.on_begin_number();
0149 first = false;
0150 }
0151 callbacks.on_digit(encoding.to_internal_trivial(c));
0152 }
0153
0154 void finish() const {
0155 callbacks.on_end_number();
0156 }
0157 private:
0158 number_callback_adapter(const number_callback_adapter&);
0159
0160 Callbacks& callbacks;
0161 Encoding& encoding;
0162 bool first;
0163 };
0164
0165 template <typename Callbacks, typename Encoding, typename Iterator,
0166 typename = typename std::iterator_traits<Iterator>
0167 ::iterator_category>
0168 class string_callback_adapter
0169 {
0170 public:
0171 string_callback_adapter(Callbacks& callbacks, Encoding& encoding,
0172 Iterator& cur)
0173 : callbacks(callbacks), encoding(encoding), cur(cur),
0174 run_begin(cur)
0175 {}
0176
0177 void start_run() {
0178 run_begin = cur;
0179 }
0180
0181 void finish_run() {
0182 callbacks.on_code_units(encoding.to_internal(run_begin, cur));
0183 }
0184
0185 template <typename Sentinel, typename EncodingErrorFn>
0186 void process_codepoint(Sentinel end, EncodingErrorFn error_fn) {
0187 encoding.skip_codepoint(cur, end, error_fn);
0188 }
0189
0190 private:
0191 string_callback_adapter(const string_callback_adapter&);
0192
0193 Callbacks& callbacks;
0194 Encoding& encoding;
0195 Iterator& cur;
0196 Iterator run_begin;
0197 };
0198
0199 template <typename Callbacks, typename Encoding, typename Iterator>
0200 class string_callback_adapter<Callbacks, Encoding, Iterator,
0201 std::input_iterator_tag>
0202 {
0203 public:
0204 string_callback_adapter(Callbacks& callbacks, Encoding& encoding,
0205 Iterator& cur)
0206 : callbacks(callbacks), encoding(encoding), cur(cur)
0207 {}
0208
0209 void start_run() {}
0210
0211 void finish_run() {}
0212
0213 template <typename Sentinel, typename EncodingErrorFn>
0214 void process_codepoint(Sentinel end, EncodingErrorFn error_fn) {
0215 encoding.transcode_codepoint(cur, end,
0216 boost::bind(&Callbacks::on_code_unit,
0217 boost::ref(callbacks), boost::placeholders::_1),
0218 error_fn);
0219 }
0220
0221 private:
0222 string_callback_adapter(const string_callback_adapter&);
0223
0224 Callbacks& callbacks;
0225 Encoding& encoding;
0226 Iterator& cur;
0227 };
0228
0229 template <typename Callbacks, typename Encoding, typename Iterator,
0230 typename Sentinel>
0231 class parser
0232 {
0233 typedef detail::number_callback_adapter<Callbacks, Encoding, Iterator>
0234 number_adapter;
0235 typedef detail::string_callback_adapter<Callbacks, Encoding, Iterator>
0236 string_adapter;
0237 typedef detail::source<Encoding, Iterator, Sentinel> source;
0238 typedef typename source::code_unit code_unit;
0239
0240 public:
0241 parser(Callbacks& callbacks, Encoding& encoding)
0242 : callbacks(callbacks), encoding(encoding), src(encoding)
0243 {}
0244
0245 template <typename Range>
0246 void set_input(const std::string& filename, const Range& r) {
0247 src.set_input(filename, r);
0248 }
0249
0250 void finish() {
0251 skip_ws();
0252 if (!src.done()) {
0253 parse_error("garbage after data");
0254 }
0255 }
0256
0257 void parse_value() {
0258 if (parse_object()) return;
0259 if (parse_array()) return;
0260 if (parse_string()) return;
0261 if (parse_boolean()) return;
0262 if (parse_null()) return;
0263 if (parse_number()) return;
0264 parse_error("expected value");
0265 }
0266
0267 bool parse_null() {
0268 skip_ws();
0269 if (!have(&Encoding::is_n)) {
0270 return false;
0271 }
0272 expect(&Encoding::is_u, "expected 'null'");
0273 expect(&Encoding::is_l, "expected 'null'");
0274 expect(&Encoding::is_l, "expected 'null'");
0275 callbacks.on_null();
0276 return true;
0277 }
0278
0279 bool parse_boolean() {
0280 skip_ws();
0281 if (have(&Encoding::is_t)) {
0282 expect(&Encoding::is_r, "expected 'true'");
0283 expect(&Encoding::is_u, "expected 'true'");
0284 expect(&Encoding::is_e, "expected 'true'");
0285 callbacks.on_boolean(true);
0286 return true;
0287 }
0288 if (have(&Encoding::is_f)) {
0289 expect(&Encoding::is_a, "expected 'false'");
0290 expect(&Encoding::is_l, "expected 'false'");
0291 expect(&Encoding::is_s, "expected 'false'");
0292 expect(&Encoding::is_e, "expected 'false'");
0293 callbacks.on_boolean(false);
0294 return true;
0295 }
0296 return false;
0297 }
0298
0299 bool parse_number() {
0300 skip_ws();
0301
0302 number_adapter adapter(callbacks, encoding, src.raw_cur());
0303 bool started = false;
0304 if (have(&Encoding::is_minus, adapter)) {
0305 started = true;
0306 }
0307 if (!have(&Encoding::is_0, adapter) && !parse_int_part(adapter)) {
0308 if (started) {
0309 parse_error("expected digits after -");
0310 }
0311 return false;
0312 }
0313 parse_frac_part(adapter);
0314 parse_exp_part(adapter);
0315 adapter.finish();
0316 return true;
0317 }
0318
0319 bool parse_string() {
0320 skip_ws();
0321
0322 if (!have(&Encoding::is_quote)) {
0323 return false;
0324 }
0325
0326 callbacks.on_begin_string();
0327 string_adapter adapter(callbacks, encoding, src.raw_cur());
0328 while (!encoding.is_quote(need_cur("unterminated string"))) {
0329 if (encoding.is_backslash(*src.raw_cur())) {
0330 adapter.finish_run();
0331 next();
0332 parse_escape();
0333 adapter.start_run();
0334 } else {
0335 adapter.process_codepoint(src.raw_end(),
0336 boost::bind(&parser::parse_error,
0337 this, "invalid code sequence"));
0338 }
0339 }
0340 adapter.finish_run();
0341 callbacks.on_end_string();
0342 next();
0343 return true;
0344 }
0345
0346 bool parse_array() {
0347 skip_ws();
0348
0349 if (!have(&Encoding::is_open_bracket)) {
0350 return false;
0351 }
0352
0353 callbacks.on_begin_array();
0354 skip_ws();
0355 if (have(&Encoding::is_close_bracket)) {
0356 callbacks.on_end_array();
0357 return true;
0358 }
0359 do {
0360 parse_value();
0361 skip_ws();
0362 } while (have(&Encoding::is_comma));
0363 expect(&Encoding::is_close_bracket, "expected ']' or ','");
0364 callbacks.on_end_array();
0365 return true;
0366 }
0367
0368 bool parse_object() {
0369 skip_ws();
0370
0371 if (!have(&Encoding::is_open_brace)) {
0372 return false;
0373 }
0374
0375 callbacks.on_begin_object();
0376 skip_ws();
0377 if (have(&Encoding::is_close_brace)) {
0378 callbacks.on_end_object();
0379 return true;
0380 }
0381 do {
0382 if (!parse_string()) {
0383 parse_error("expected key string");
0384 }
0385 skip_ws();
0386 expect(&Encoding::is_colon, "expected ':'");
0387 parse_value();
0388 skip_ws();
0389 } while (have(&Encoding::is_comma));
0390 expect(&Encoding::is_close_brace, "expected '}' or ','");
0391 callbacks.on_end_object();
0392 return true;
0393 }
0394
0395 private:
0396 typedef typename source::encoding_predicate encoding_predicate;
0397
0398 void parse_error(const char* msg) { src.parse_error(msg); }
0399 void next() { src.next(); }
0400 template <typename Action>
0401 bool have(encoding_predicate p, Action& a) { return src.have(p, a); }
0402 bool have(encoding_predicate p) { return src.have(p); }
0403 template <typename Action>
0404 void expect(encoding_predicate p, const char* msg, Action& a) {
0405 src.expect(p, msg, a);
0406 }
0407 void expect(encoding_predicate p, const char* msg) {
0408 src.expect(p, msg);
0409 }
0410 code_unit need_cur(const char* msg) { return src.need_cur(msg); }
0411
0412 void skip_ws() {
0413 while (have(&Encoding::is_ws)) {
0414 }
0415 }
0416
0417 bool parse_int_part(number_adapter& action) {
0418 if (!have(&Encoding::is_digit0, action)) {
0419 return false;
0420 }
0421 parse_digits(action);
0422 return true;
0423 }
0424
0425 void parse_frac_part(number_adapter& action) {
0426 if (!have(&Encoding::is_dot, action)) {
0427 return;
0428 }
0429 expect(&Encoding::is_digit, "need at least one digit after '.'",
0430 action);
0431 parse_digits(action);
0432 }
0433
0434 void parse_exp_part(number_adapter& action) {
0435 if (!have(&Encoding::is_eE, action)) {
0436 return;
0437 }
0438 have(&Encoding::is_plusminus, action);
0439 expect(&Encoding::is_digit, "need at least one digit in exponent",
0440 action);
0441 parse_digits(action);
0442 }
0443
0444 void parse_digits(number_adapter& action) {
0445 while (have(&Encoding::is_digit, action)) {
0446 }
0447 }
0448
0449 void parse_escape() {
0450 if (have(&Encoding::is_quote)) {
0451 feed(0x22);
0452 } else if (have(&Encoding::is_backslash)) {
0453 feed(0x5c);
0454 } else if (have(&Encoding::is_slash)) {
0455 feed(0x2f);
0456 } else if (have(&Encoding::is_b)) {
0457 feed(0x08);
0458 } else if (have(&Encoding::is_f)) {
0459 feed(0x0c);
0460 } else if (have(&Encoding::is_n)) {
0461 feed(0x0a);
0462 } else if (have(&Encoding::is_r)) {
0463 feed(0x0d);
0464 } else if (have(&Encoding::is_t)) {
0465 feed(0x09);
0466 } else if (have(&Encoding::is_u)) {
0467 parse_codepoint_ref();
0468 } else {
0469 parse_error("invalid escape sequence");
0470 }
0471 }
0472
0473 unsigned parse_hex_quad() {
0474 unsigned codepoint = 0;
0475 for (int i = 0; i < 4; ++i) {
0476 int value = encoding.decode_hexdigit(
0477 need_cur("invalid escape sequence"));
0478 if (value < 0) {
0479 parse_error("invalid escape sequence");
0480 }
0481 codepoint *= 16;
0482 codepoint += value;
0483 next();
0484 }
0485 return codepoint;
0486 }
0487
0488 static bool is_surrogate_high(unsigned codepoint) {
0489 return (codepoint & 0xfc00) == 0xd800;
0490 }
0491 static bool is_surrogate_low(unsigned codepoint) {
0492 return (codepoint & 0xfc00) == 0xdc00;
0493 }
0494 static unsigned combine_surrogates(unsigned high, unsigned low) {
0495 return 0x010000 + (((high & 0x3ff) << 10) | (low & 0x3ff));
0496 }
0497
0498 void parse_codepoint_ref() {
0499 unsigned codepoint = parse_hex_quad();
0500 if (is_surrogate_low(codepoint)) {
0501 parse_error("invalid codepoint, stray low surrogate");
0502 }
0503 if (is_surrogate_high(codepoint)) {
0504 expect(&Encoding::is_backslash,
0505 "invalid codepoint, stray high surrogate");
0506 expect(&Encoding::is_u,
0507 "expected codepoint reference after high surrogate");
0508 int low = parse_hex_quad();
0509 if (!is_surrogate_low(low)) {
0510 parse_error("expected low surrogate after high surrogate");
0511 }
0512 codepoint = combine_surrogates(codepoint, low);
0513 }
0514 feed(codepoint);
0515 }
0516
0517 void feed(unsigned codepoint) {
0518 encoding.feed_codepoint(codepoint,
0519 boost::bind(&Callbacks::on_code_unit,
0520 boost::ref(callbacks), boost::placeholders::_1));
0521 }
0522
0523 Callbacks& callbacks;
0524 Encoding& encoding;
0525 source src;
0526 };
0527
0528 }}}}
0529
0530 #endif