Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-07-14 08:45:41

0001 #ifndef BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_PARSER_HPP
0002 #define BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_PARSER_HPP
0003 
0004 #include <boost/property_tree/json_parser/error.hpp>
0005 
0006 #include <boost/core/ref.hpp>
0007 #include <boost/bind/bind.hpp>
0008 #include <boost/bind/placeholders.hpp>
0009 
0010 #include <iterator>
0011 #include <sstream>
0012 #include <string>
0013 
0014 namespace boost { namespace property_tree {
0015     namespace json_parser { namespace detail
0016 {
0017 
0018     template <typename Encoding, typename Iterator, typename Sentinel>
0019     class source
0020     {
0021     public:
0022         typedef typename std::iterator_traits<Iterator>::value_type
0023             code_unit;
0024         typedef bool (Encoding::*encoding_predicate)(code_unit c) const;
0025 
0026         explicit source(Encoding& encoding) : encoding(encoding) {}
0027 
0028         template <typename Range>
0029         void set_input(const std::string& filename, const Range& r)
0030         {
0031             this->filename = filename;
0032             cur = r.begin();
0033             end = r.end();
0034             // Note that there is no backtracking, so if e.g. a UTF-8 file
0035             // starts with something that initially looks like a BOM but isn't,
0036             // there's trouble.
0037             // However, no valid JSON file can start with a UTF-8 EF byte.
0038             encoding.skip_introduction(cur, end);
0039             line = 1;
0040             offset = 0;
0041         }
0042 
0043         bool done() const { return cur == end; }
0044 
0045         void parse_error(const char* msg) {
0046             BOOST_PROPERTY_TREE_THROW(
0047                 json_parser_error(msg, filename, line));
0048         }
0049 
0050         void next() {
0051             if (encoding.is_nl(*cur)) {
0052                 ++line;
0053                 offset = 0;
0054             } else {
0055                 ++offset;
0056             }
0057             ++cur;
0058         }
0059 
0060         template <typename Action>
0061         bool have(encoding_predicate p, Action& a) {
0062             bool found = cur != end && (encoding.*p)(*cur);
0063             if (found) {
0064                 a(*cur);
0065                 next();
0066             }
0067             return found;
0068         }
0069 
0070         bool have(encoding_predicate p) {
0071             DoNothing n;
0072             return have(p, n);
0073         }
0074 
0075         template <typename Action>
0076         void expect(encoding_predicate p, const char* msg, Action& a) {
0077             if (!have(p, a)) {
0078                 parse_error(msg);
0079             }
0080         }
0081 
0082         void expect(encoding_predicate p, const char* msg) {
0083             DoNothing n;
0084             expect(p, msg, n);
0085         }
0086 
0087         code_unit need_cur(const char* msg) {
0088             if (cur == end) {
0089                 parse_error(msg);
0090             }
0091             return *cur;
0092         }
0093 
0094         Iterator& raw_cur() { return cur; }
0095         Sentinel raw_end() { return end; }
0096 
0097     private:
0098         struct DoNothing {
0099             void operator ()(code_unit) const {}
0100         };
0101 
0102         Encoding& encoding;
0103         Iterator cur;
0104         Sentinel end;
0105         std::string filename;
0106         int line;
0107         int offset;
0108     };
0109 
0110     template <typename Callbacks, typename Encoding, typename Iterator,
0111         typename = typename std::iterator_traits<Iterator>
0112             ::iterator_category>
0113     class number_callback_adapter
0114     {
0115     public:
0116         number_callback_adapter(Callbacks& callbacks, Encoding& encoding,
0117                                 Iterator& cur)
0118             : callbacks(callbacks), encoding(encoding), first(cur), cur(cur)
0119         {}
0120 
0121         void operator ()(typename Encoding::external_char) {}
0122 
0123         void finish() const {
0124             callbacks.on_number(encoding.to_internal(first, cur));
0125         }
0126 
0127     private:
0128         number_callback_adapter(const number_callback_adapter&);
0129 
0130         Callbacks& callbacks;
0131         Encoding& encoding;
0132         Iterator first;
0133         Iterator& cur;
0134     };
0135 
0136     template <typename Callbacks, typename Encoding, typename Iterator>
0137     class number_callback_adapter<Callbacks, Encoding, Iterator,
0138                                   std::input_iterator_tag>
0139     {
0140     public:
0141         number_callback_adapter(Callbacks& callbacks, Encoding& encoding,
0142                                 Iterator&)
0143             : callbacks(callbacks), encoding(encoding), first(true)
0144         {}
0145 
0146         void operator ()(typename Encoding::external_char c) {
0147             if (first) {
0148                 callbacks.on_begin_number();
0149                 first = false;
0150             }
0151             callbacks.on_digit(encoding.to_internal_trivial(c));
0152         }
0153 
0154         void finish() const {
0155             callbacks.on_end_number();
0156         }
0157     private:
0158         number_callback_adapter(const number_callback_adapter&);
0159 
0160         Callbacks& callbacks;
0161         Encoding& encoding;
0162         bool first;
0163     };
0164 
0165     template <typename Callbacks, typename Encoding, typename Iterator,
0166         typename = typename std::iterator_traits<Iterator>
0167             ::iterator_category>
0168     class string_callback_adapter
0169     {
0170     public:
0171         string_callback_adapter(Callbacks& callbacks, Encoding& encoding,
0172                                 Iterator& cur)
0173             : callbacks(callbacks), encoding(encoding), cur(cur),
0174               run_begin(cur)
0175         {}
0176 
0177         void start_run() {
0178             run_begin = cur;
0179         }
0180 
0181         void finish_run() {
0182             callbacks.on_code_units(encoding.to_internal(run_begin, cur));
0183         }
0184 
0185         template <typename Sentinel, typename EncodingErrorFn>
0186         void process_codepoint(Sentinel end, EncodingErrorFn error_fn) {
0187             encoding.skip_codepoint(cur, end, error_fn);
0188         }
0189 
0190     private:
0191         string_callback_adapter(const string_callback_adapter&);
0192 
0193         Callbacks& callbacks;
0194         Encoding& encoding;
0195         Iterator& cur;
0196         Iterator run_begin;
0197     };
0198 
0199     template <typename Callbacks, typename Encoding, typename Iterator>
0200     class string_callback_adapter<Callbacks, Encoding, Iterator,
0201                                   std::input_iterator_tag>
0202     {
0203     public:
0204         string_callback_adapter(Callbacks& callbacks, Encoding& encoding,
0205                                 Iterator& cur)
0206             : callbacks(callbacks), encoding(encoding), cur(cur)
0207         {}
0208 
0209         void start_run() {}
0210 
0211         void finish_run() {}
0212 
0213         template <typename Sentinel, typename EncodingErrorFn>
0214         void process_codepoint(Sentinel end, EncodingErrorFn error_fn) {
0215             encoding.transcode_codepoint(cur, end,
0216                 boost::bind(&Callbacks::on_code_unit,
0217                             boost::ref(callbacks), boost::placeholders::_1),
0218                 error_fn);
0219         }
0220 
0221     private:
0222         string_callback_adapter(const string_callback_adapter&);
0223 
0224         Callbacks& callbacks;
0225         Encoding& encoding;
0226         Iterator& cur;
0227     };
0228 
0229     template <typename Callbacks, typename Encoding, typename Iterator,
0230               typename Sentinel>
0231     class parser
0232     {
0233         typedef detail::number_callback_adapter<Callbacks, Encoding, Iterator>
0234             number_adapter;
0235         typedef detail::string_callback_adapter<Callbacks, Encoding, Iterator>
0236             string_adapter;
0237         typedef detail::source<Encoding, Iterator, Sentinel> source;
0238         typedef typename source::code_unit code_unit;
0239 
0240     public:
0241         parser(Callbacks& callbacks, Encoding& encoding)
0242             : callbacks(callbacks), encoding(encoding), src(encoding)
0243         {}
0244 
0245         template <typename Range>
0246         void set_input(const std::string& filename, const Range& r) {
0247             src.set_input(filename, r);
0248         }
0249 
0250         void finish() {
0251             skip_ws();
0252             if (!src.done()) {
0253                 parse_error("garbage after data");
0254             }
0255         }
0256 
0257         void parse_value() {
0258             if (parse_object()) return;
0259             if (parse_array()) return;
0260             if (parse_string()) return;
0261             if (parse_boolean()) return;
0262             if (parse_null()) return;
0263             if (parse_number()) return;
0264             parse_error("expected value");
0265         }
0266 
0267         bool parse_null() {
0268             skip_ws();
0269             if (!have(&Encoding::is_n)) {
0270                 return false;
0271             }
0272             expect(&Encoding::is_u, "expected 'null'");
0273             expect(&Encoding::is_l, "expected 'null'");
0274             expect(&Encoding::is_l, "expected 'null'");
0275             callbacks.on_null();
0276             return true;
0277         }
0278 
0279         bool parse_boolean() {
0280             skip_ws();
0281             if (have(&Encoding::is_t)) {
0282                 expect(&Encoding::is_r, "expected 'true'");
0283                 expect(&Encoding::is_u, "expected 'true'");
0284                 expect(&Encoding::is_e, "expected 'true'");
0285                 callbacks.on_boolean(true);
0286                 return true;
0287             }
0288             if (have(&Encoding::is_f)) {
0289                 expect(&Encoding::is_a, "expected 'false'");
0290                 expect(&Encoding::is_l, "expected 'false'");
0291                 expect(&Encoding::is_s, "expected 'false'");
0292                 expect(&Encoding::is_e, "expected 'false'");
0293                 callbacks.on_boolean(false);
0294                 return true;
0295             }
0296             return false;
0297         }
0298 
0299         bool parse_number() {
0300             skip_ws();
0301 
0302             number_adapter adapter(callbacks, encoding, src.raw_cur());
0303             bool started = false;
0304             if (have(&Encoding::is_minus, adapter)) {
0305                 started = true;
0306             }
0307             if (!have(&Encoding::is_0, adapter) && !parse_int_part(adapter)) {
0308                 if (started) {
0309                     parse_error("expected digits after -");
0310                 }
0311                 return false;
0312             }
0313             parse_frac_part(adapter);
0314             parse_exp_part(adapter);
0315             adapter.finish();
0316             return true;
0317         }
0318 
0319         bool parse_string() {
0320             skip_ws();
0321 
0322             if (!have(&Encoding::is_quote)) {
0323                 return false;
0324             }
0325 
0326             callbacks.on_begin_string();
0327             string_adapter adapter(callbacks, encoding, src.raw_cur());
0328             while (!encoding.is_quote(need_cur("unterminated string"))) {
0329                 if (encoding.is_backslash(*src.raw_cur())) {
0330                     adapter.finish_run();
0331                     next();
0332                     parse_escape();
0333                     adapter.start_run();
0334                 } else {
0335                     adapter.process_codepoint(src.raw_end(),
0336                         boost::bind(&parser::parse_error,
0337                                     this, "invalid code sequence"));
0338                 }
0339             }
0340             adapter.finish_run();
0341             callbacks.on_end_string();
0342             next();
0343             return true;
0344         }
0345 
0346         bool parse_array() {
0347             skip_ws();
0348 
0349             if (!have(&Encoding::is_open_bracket)) {
0350                 return false;
0351             }
0352 
0353             callbacks.on_begin_array();
0354             skip_ws();
0355             if (have(&Encoding::is_close_bracket)) {
0356                 callbacks.on_end_array();
0357                 return true;
0358             }
0359             do {
0360                 parse_value();
0361                 skip_ws();
0362             } while (have(&Encoding::is_comma));
0363             expect(&Encoding::is_close_bracket, "expected ']' or ','");
0364             callbacks.on_end_array();
0365             return true;
0366         }
0367 
0368         bool parse_object() {
0369             skip_ws();
0370 
0371             if (!have(&Encoding::is_open_brace)) {
0372                 return false;
0373             }
0374 
0375             callbacks.on_begin_object();
0376             skip_ws();
0377             if (have(&Encoding::is_close_brace)) {
0378                 callbacks.on_end_object();
0379                 return true;
0380             }
0381             do {
0382                 if (!parse_string()) {
0383                     parse_error("expected key string");
0384                 }
0385                 skip_ws();
0386                 expect(&Encoding::is_colon, "expected ':'");
0387                 parse_value();
0388                 skip_ws();
0389             } while (have(&Encoding::is_comma));
0390             expect(&Encoding::is_close_brace, "expected '}' or ','");
0391             callbacks.on_end_object();
0392             return true;
0393         }
0394 
0395     private:
0396         typedef typename source::encoding_predicate encoding_predicate;
0397 
0398         void parse_error(const char* msg) { src.parse_error(msg); }
0399         void next() { src.next(); }
0400         template <typename Action>
0401         bool have(encoding_predicate p, Action& a) { return src.have(p, a); }
0402         bool have(encoding_predicate p) { return src.have(p); }
0403         template <typename Action>
0404         void expect(encoding_predicate p, const char* msg, Action& a) {
0405             src.expect(p, msg, a);
0406         }
0407         void expect(encoding_predicate p, const char* msg) {
0408             src.expect(p, msg);
0409         }
0410         code_unit need_cur(const char* msg) { return src.need_cur(msg); }
0411 
0412         void skip_ws() {
0413             while (have(&Encoding::is_ws)) {
0414             }
0415         }
0416 
0417         bool parse_int_part(number_adapter& action) {
0418             if (!have(&Encoding::is_digit0, action)) {
0419                 return false;
0420             }
0421             parse_digits(action);
0422             return true;
0423         }
0424 
0425         void parse_frac_part(number_adapter& action) {
0426             if (!have(&Encoding::is_dot, action)) {
0427                 return;
0428             }
0429             expect(&Encoding::is_digit, "need at least one digit after '.'",
0430                    action);
0431             parse_digits(action);
0432         }
0433 
0434         void parse_exp_part(number_adapter& action) {
0435             if (!have(&Encoding::is_eE, action)) {
0436                 return;
0437             }
0438             have(&Encoding::is_plusminus, action);
0439             expect(&Encoding::is_digit, "need at least one digit in exponent",
0440                    action);
0441             parse_digits(action);
0442         }
0443 
0444         void parse_digits(number_adapter& action) {
0445             while (have(&Encoding::is_digit, action)) {
0446             }
0447         }
0448 
0449         void parse_escape() {
0450             if (have(&Encoding::is_quote)) {
0451                 feed(0x22);
0452             } else if (have(&Encoding::is_backslash)) {
0453                 feed(0x5c);
0454             } else if (have(&Encoding::is_slash)) {
0455                 feed(0x2f);
0456             } else if (have(&Encoding::is_b)) {
0457                 feed(0x08); // backspace
0458             } else if (have(&Encoding::is_f)) {
0459                 feed(0x0c); // formfeed
0460             } else if (have(&Encoding::is_n)) {
0461                 feed(0x0a); // line feed
0462             } else if (have(&Encoding::is_r)) {
0463                 feed(0x0d); // carriage return
0464             } else if (have(&Encoding::is_t)) {
0465                 feed(0x09); // horizontal tab
0466             } else if (have(&Encoding::is_u)) {
0467                 parse_codepoint_ref();
0468             } else {
0469                 parse_error("invalid escape sequence");
0470             }
0471         }
0472 
0473         unsigned parse_hex_quad() {
0474             unsigned codepoint = 0;
0475             for (int i = 0; i < 4; ++i) {
0476                 int value = encoding.decode_hexdigit(
0477                     need_cur("invalid escape sequence"));
0478                 if (value < 0) {
0479                     parse_error("invalid escape sequence");
0480                 }
0481                 codepoint *= 16;
0482                 codepoint += value;
0483                 next();
0484             }
0485             return codepoint;
0486         }
0487 
0488         static bool is_surrogate_high(unsigned codepoint) {
0489             return (codepoint & 0xfc00) == 0xd800;
0490         }
0491         static bool is_surrogate_low(unsigned codepoint) {
0492             return (codepoint & 0xfc00) == 0xdc00;
0493         }
0494         static unsigned combine_surrogates(unsigned high, unsigned low) {
0495             return 0x010000 + (((high & 0x3ff) << 10) | (low & 0x3ff));
0496         }
0497 
0498         void parse_codepoint_ref() {
0499             unsigned codepoint = parse_hex_quad();
0500             if (is_surrogate_low(codepoint)) {
0501                 parse_error("invalid codepoint, stray low surrogate");
0502             }
0503             if (is_surrogate_high(codepoint)) {
0504                 expect(&Encoding::is_backslash,
0505                     "invalid codepoint, stray high surrogate");
0506                 expect(&Encoding::is_u,
0507                     "expected codepoint reference after high surrogate");
0508                 int low = parse_hex_quad();
0509                 if (!is_surrogate_low(low)) {
0510                     parse_error("expected low surrogate after high surrogate");
0511                 }
0512                 codepoint = combine_surrogates(codepoint, low);
0513             }
0514             feed(codepoint);
0515         }
0516 
0517         void feed(unsigned codepoint) {
0518             encoding.feed_codepoint(codepoint,
0519                                     boost::bind(&Callbacks::on_code_unit,
0520                                                 boost::ref(callbacks), boost::placeholders::_1));
0521         }
0522 
0523         Callbacks& callbacks;
0524         Encoding& encoding;
0525         source src;
0526     };
0527 
0528 }}}}
0529 
0530 #endif