Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 09:50:18

0001 #ifndef BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_PARSER_HPP
0002 #define BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_PARSER_HPP
0003 
0004 #include <boost/property_tree/json_parser/error.hpp>
0005 
0006 #include <boost/core/ref.hpp>
0007 #include <boost/bind/bind.hpp>
0008 
0009 #include <iterator>
0010 #include <sstream>
0011 #include <string>
0012 
0013 namespace boost { namespace property_tree {
0014     namespace json_parser { namespace detail
0015 {
0016 
0017     template <typename Encoding, typename Iterator, typename Sentinel>
0018     class source
0019     {
0020     public:
0021         typedef typename std::iterator_traits<Iterator>::value_type
0022             code_unit;
0023         typedef bool (Encoding::*encoding_predicate)(code_unit c) const;
0024 
0025         explicit source(Encoding& encoding) : encoding(encoding) {}
0026 
0027         template <typename Range>
0028         void set_input(const std::string& filename, const Range& r)
0029         {
0030             this->filename = filename;
0031             cur = r.begin();
0032             end = r.end();
0033             // Note that there is no backtracking, so if e.g. a UTF-8 file
0034             // starts with something that initially looks like a BOM but isn't,
0035             // there's trouble.
0036             // However, no valid JSON file can start with a UTF-8 EF byte.
0037             encoding.skip_introduction(cur, end);
0038             line = 1;
0039             offset = 0;
0040         }
0041 
0042         bool done() const { return cur == end; }
0043 
0044         void parse_error(const char* msg) {
0045             BOOST_PROPERTY_TREE_THROW(
0046                 json_parser_error(msg, filename, line));
0047         }
0048 
0049         void next() {
0050             if (encoding.is_nl(*cur)) {
0051                 ++line;
0052                 offset = 0;
0053             } else {
0054                 ++offset;
0055             }
0056             ++cur;
0057         }
0058 
0059         template <typename Action>
0060         bool have(encoding_predicate p, Action& a) {
0061             bool found = cur != end && (encoding.*p)(*cur);
0062             if (found) {
0063                 a(*cur);
0064                 next();
0065             }
0066             return found;
0067         }
0068 
0069         bool have(encoding_predicate p) {
0070             DoNothing n;
0071             return have(p, n);
0072         }
0073 
0074         template <typename Action>
0075         void expect(encoding_predicate p, const char* msg, Action& a) {
0076             if (!have(p, a)) {
0077                 parse_error(msg);
0078             }
0079         }
0080 
0081         void expect(encoding_predicate p, const char* msg) {
0082             DoNothing n;
0083             expect(p, msg, n);
0084         }
0085 
0086         code_unit need_cur(const char* msg) {
0087             if (cur == end) {
0088                 parse_error(msg);
0089             }
0090             return *cur;
0091         }
0092 
0093         Iterator& raw_cur() { return cur; }
0094         Sentinel raw_end() { return end; }
0095 
0096     private:
0097         struct DoNothing {
0098             void operator ()(code_unit) const {}
0099         };
0100 
0101         Encoding& encoding;
0102         Iterator cur;
0103         Sentinel end;
0104         std::string filename;
0105         int line;
0106         int offset;
0107     };
0108 
0109     template <typename Callbacks, typename Encoding, typename Iterator,
0110         typename = typename std::iterator_traits<Iterator>
0111             ::iterator_category>
0112     class number_callback_adapter
0113     {
0114     public:
0115         number_callback_adapter(Callbacks& callbacks, Encoding& encoding,
0116                                 Iterator& cur)
0117             : callbacks(callbacks), encoding(encoding), first(cur), cur(cur)
0118         {}
0119 
0120         void operator ()(typename Encoding::external_char) {}
0121 
0122         void finish() const {
0123             callbacks.on_number(encoding.to_internal(first, cur));
0124         }
0125 
0126     private:
0127         number_callback_adapter(const number_callback_adapter&);
0128 
0129         Callbacks& callbacks;
0130         Encoding& encoding;
0131         Iterator first;
0132         Iterator& cur;
0133     };
0134 
0135     template <typename Callbacks, typename Encoding, typename Iterator>
0136     class number_callback_adapter<Callbacks, Encoding, Iterator,
0137                                   std::input_iterator_tag>
0138     {
0139     public:
0140         number_callback_adapter(Callbacks& callbacks, Encoding& encoding,
0141                                 Iterator&)
0142             : callbacks(callbacks), encoding(encoding), first(true)
0143         {}
0144 
0145         void operator ()(typename Encoding::external_char c) {
0146             if (first) {
0147                 callbacks.on_begin_number();
0148                 first = false;
0149             }
0150             callbacks.on_digit(encoding.to_internal_trivial(c));
0151         }
0152 
0153         void finish() const {
0154             callbacks.on_end_number();
0155         }
0156     private:
0157         number_callback_adapter(const number_callback_adapter&);
0158 
0159         Callbacks& callbacks;
0160         Encoding& encoding;
0161         bool first;
0162     };
0163 
0164     template <typename Callbacks, typename Encoding, typename Iterator,
0165         typename = typename std::iterator_traits<Iterator>
0166             ::iterator_category>
0167     class string_callback_adapter
0168     {
0169     public:
0170         string_callback_adapter(Callbacks& callbacks, Encoding& encoding,
0171                                 Iterator& cur)
0172             : callbacks(callbacks), encoding(encoding), cur(cur),
0173               run_begin(cur)
0174         {}
0175 
0176         void start_run() {
0177             run_begin = cur;
0178         }
0179 
0180         void finish_run() {
0181             callbacks.on_code_units(encoding.to_internal(run_begin, cur));
0182         }
0183 
0184         template <typename Sentinel, typename EncodingErrorFn>
0185         void process_codepoint(Sentinel end, EncodingErrorFn error_fn) {
0186             encoding.skip_codepoint(cur, end, error_fn);
0187         }
0188 
0189     private:
0190         string_callback_adapter(const string_callback_adapter&);
0191 
0192         Callbacks& callbacks;
0193         Encoding& encoding;
0194         Iterator& cur;
0195         Iterator run_begin;
0196     };
0197 
0198     template <typename Callbacks, typename Encoding, typename Iterator>
0199     class string_callback_adapter<Callbacks, Encoding, Iterator,
0200                                   std::input_iterator_tag>
0201     {
0202     public:
0203         string_callback_adapter(Callbacks& callbacks, Encoding& encoding,
0204                                 Iterator& cur)
0205             : callbacks(callbacks), encoding(encoding), cur(cur)
0206         {}
0207 
0208         void start_run() {}
0209 
0210         void finish_run() {}
0211 
0212         template <typename Sentinel, typename EncodingErrorFn>
0213         void process_codepoint(Sentinel end, EncodingErrorFn error_fn) {
0214             encoding.transcode_codepoint(cur, end,
0215                 boost::bind(&Callbacks::on_code_unit,
0216                             boost::ref(callbacks), boost::placeholders::_1),
0217                 error_fn);
0218         }
0219 
0220     private:
0221         string_callback_adapter(const string_callback_adapter&);
0222 
0223         Callbacks& callbacks;
0224         Encoding& encoding;
0225         Iterator& cur;
0226     };
0227 
0228     template <typename Callbacks, typename Encoding, typename Iterator,
0229               typename Sentinel>
0230     class parser
0231     {
0232         typedef detail::number_callback_adapter<Callbacks, Encoding, Iterator>
0233             number_adapter;
0234         typedef detail::string_callback_adapter<Callbacks, Encoding, Iterator>
0235             string_adapter;
0236         typedef detail::source<Encoding, Iterator, Sentinel> source;
0237         typedef typename source::code_unit code_unit;
0238 
0239     public:
0240         parser(Callbacks& callbacks, Encoding& encoding)
0241             : callbacks(callbacks), encoding(encoding), src(encoding)
0242         {}
0243 
0244         template <typename Range>
0245         void set_input(const std::string& filename, const Range& r) {
0246             src.set_input(filename, r);
0247         }
0248 
0249         void finish() {
0250             skip_ws();
0251             if (!src.done()) {
0252                 parse_error("garbage after data");
0253             }
0254         }
0255 
0256         void parse_value() {
0257             if (parse_object()) return;
0258             if (parse_array()) return;
0259             if (parse_string()) return;
0260             if (parse_boolean()) return;
0261             if (parse_null()) return;
0262             if (parse_number()) return;
0263             parse_error("expected value");
0264         }
0265 
0266         bool parse_null() {
0267             skip_ws();
0268             if (!have(&Encoding::is_n)) {
0269                 return false;
0270             }
0271             expect(&Encoding::is_u, "expected 'null'");
0272             expect(&Encoding::is_l, "expected 'null'");
0273             expect(&Encoding::is_l, "expected 'null'");
0274             callbacks.on_null();
0275             return true;
0276         }
0277 
0278         bool parse_boolean() {
0279             skip_ws();
0280             if (have(&Encoding::is_t)) {
0281                 expect(&Encoding::is_r, "expected 'true'");
0282                 expect(&Encoding::is_u, "expected 'true'");
0283                 expect(&Encoding::is_e, "expected 'true'");
0284                 callbacks.on_boolean(true);
0285                 return true;
0286             }
0287             if (have(&Encoding::is_f)) {
0288                 expect(&Encoding::is_a, "expected 'false'");
0289                 expect(&Encoding::is_l, "expected 'false'");
0290                 expect(&Encoding::is_s, "expected 'false'");
0291                 expect(&Encoding::is_e, "expected 'false'");
0292                 callbacks.on_boolean(false);
0293                 return true;
0294             }
0295             return false;
0296         }
0297 
0298         bool parse_number() {
0299             skip_ws();
0300 
0301             number_adapter adapter(callbacks, encoding, src.raw_cur());
0302             bool started = false;
0303             if (have(&Encoding::is_minus, adapter)) {
0304                 started = true;
0305             }
0306             if (!have(&Encoding::is_0, adapter) && !parse_int_part(adapter)) {
0307                 if (started) {
0308                     parse_error("expected digits after -");
0309                 }
0310                 return false;
0311             }
0312             parse_frac_part(adapter);
0313             parse_exp_part(adapter);
0314             adapter.finish();
0315             return true;
0316         }
0317 
0318         bool parse_string() {
0319             skip_ws();
0320 
0321             if (!have(&Encoding::is_quote)) {
0322                 return false;
0323             }
0324 
0325             callbacks.on_begin_string();
0326             string_adapter adapter(callbacks, encoding, src.raw_cur());
0327             while (!encoding.is_quote(need_cur("unterminated string"))) {
0328                 if (encoding.is_backslash(*src.raw_cur())) {
0329                     adapter.finish_run();
0330                     next();
0331                     parse_escape();
0332                     adapter.start_run();
0333                 } else {
0334                     adapter.process_codepoint(src.raw_end(),
0335                         boost::bind(&parser::parse_error,
0336                                     this, "invalid code sequence"));
0337                 }
0338             }
0339             adapter.finish_run();
0340             callbacks.on_end_string();
0341             next();
0342             return true;
0343         }
0344 
0345         bool parse_array() {
0346             skip_ws();
0347 
0348             if (!have(&Encoding::is_open_bracket)) {
0349                 return false;
0350             }
0351 
0352             callbacks.on_begin_array();
0353             skip_ws();
0354             if (have(&Encoding::is_close_bracket)) {
0355                 callbacks.on_end_array();
0356                 return true;
0357             }
0358             do {
0359                 parse_value();
0360                 skip_ws();
0361             } while (have(&Encoding::is_comma));
0362             expect(&Encoding::is_close_bracket, "expected ']' or ','");
0363             callbacks.on_end_array();
0364             return true;
0365         }
0366 
0367         bool parse_object() {
0368             skip_ws();
0369 
0370             if (!have(&Encoding::is_open_brace)) {
0371                 return false;
0372             }
0373 
0374             callbacks.on_begin_object();
0375             skip_ws();
0376             if (have(&Encoding::is_close_brace)) {
0377                 callbacks.on_end_object();
0378                 return true;
0379             }
0380             do {
0381                 if (!parse_string()) {
0382                     parse_error("expected key string");
0383                 }
0384                 skip_ws();
0385                 expect(&Encoding::is_colon, "expected ':'");
0386                 parse_value();
0387                 skip_ws();
0388             } while (have(&Encoding::is_comma));
0389             expect(&Encoding::is_close_brace, "expected '}' or ','");
0390             callbacks.on_end_object();
0391             return true;
0392         }
0393 
0394     private:
0395         typedef typename source::encoding_predicate encoding_predicate;
0396 
0397         void parse_error(const char* msg) { src.parse_error(msg); }
0398         void next() { src.next(); }
0399         template <typename Action>
0400         bool have(encoding_predicate p, Action& a) { return src.have(p, a); }
0401         bool have(encoding_predicate p) { return src.have(p); }
0402         template <typename Action>
0403         void expect(encoding_predicate p, const char* msg, Action& a) {
0404             src.expect(p, msg, a);
0405         }
0406         void expect(encoding_predicate p, const char* msg) {
0407             src.expect(p, msg);
0408         }
0409         code_unit need_cur(const char* msg) { return src.need_cur(msg); }
0410 
0411         void skip_ws() {
0412             while (have(&Encoding::is_ws)) {
0413             }
0414         }
0415 
0416         bool parse_int_part(number_adapter& action) {
0417             if (!have(&Encoding::is_digit0, action)) {
0418                 return false;
0419             }
0420             parse_digits(action);
0421             return true;
0422         }
0423 
0424         void parse_frac_part(number_adapter& action) {
0425             if (!have(&Encoding::is_dot, action)) {
0426                 return;
0427             }
0428             expect(&Encoding::is_digit, "need at least one digit after '.'",
0429                    action);
0430             parse_digits(action);
0431         }
0432 
0433         void parse_exp_part(number_adapter& action) {
0434             if (!have(&Encoding::is_eE, action)) {
0435                 return;
0436             }
0437             have(&Encoding::is_plusminus, action);
0438             expect(&Encoding::is_digit, "need at least one digit in exponent",
0439                    action);
0440             parse_digits(action);
0441         }
0442 
0443         void parse_digits(number_adapter& action) {
0444             while (have(&Encoding::is_digit, action)) {
0445             }
0446         }
0447 
0448         void parse_escape() {
0449             if (have(&Encoding::is_quote)) {
0450                 feed(0x22);
0451             } else if (have(&Encoding::is_backslash)) {
0452                 feed(0x5c);
0453             } else if (have(&Encoding::is_slash)) {
0454                 feed(0x2f);
0455             } else if (have(&Encoding::is_b)) {
0456                 feed(0x08); // backspace
0457             } else if (have(&Encoding::is_f)) {
0458                 feed(0x0c); // formfeed
0459             } else if (have(&Encoding::is_n)) {
0460                 feed(0x0a); // line feed
0461             } else if (have(&Encoding::is_r)) {
0462                 feed(0x0d); // carriage return
0463             } else if (have(&Encoding::is_t)) {
0464                 feed(0x09); // horizontal tab
0465             } else if (have(&Encoding::is_u)) {
0466                 parse_codepoint_ref();
0467             } else {
0468                 parse_error("invalid escape sequence");
0469             }
0470         }
0471 
0472         unsigned parse_hex_quad() {
0473             unsigned codepoint = 0;
0474             for (int i = 0; i < 4; ++i) {
0475                 int value = encoding.decode_hexdigit(
0476                     need_cur("invalid escape sequence"));
0477                 if (value < 0) {
0478                     parse_error("invalid escape sequence");
0479                 }
0480                 codepoint *= 16;
0481                 codepoint += value;
0482                 next();
0483             }
0484             return codepoint;
0485         }
0486 
0487         static bool is_surrogate_high(unsigned codepoint) {
0488             return (codepoint & 0xfc00) == 0xd800;
0489         }
0490         static bool is_surrogate_low(unsigned codepoint) {
0491             return (codepoint & 0xfc00) == 0xdc00;
0492         }
0493         static unsigned combine_surrogates(unsigned high, unsigned low) {
0494             return 0x010000 + (((high & 0x3ff) << 10) | (low & 0x3ff));
0495         }
0496 
0497         void parse_codepoint_ref() {
0498             unsigned codepoint = parse_hex_quad();
0499             if (is_surrogate_low(codepoint)) {
0500                 parse_error("invalid codepoint, stray low surrogate");
0501             }
0502             if (is_surrogate_high(codepoint)) {
0503                 expect(&Encoding::is_backslash,
0504                     "invalid codepoint, stray high surrogate");
0505                 expect(&Encoding::is_u,
0506                     "expected codepoint reference after high surrogate");
0507                 int low = parse_hex_quad();
0508                 if (!is_surrogate_low(low)) {
0509                     parse_error("expected low surrogate after high surrogate");
0510                 }
0511                 codepoint = combine_surrogates(codepoint, low);
0512             }
0513             feed(codepoint);
0514         }
0515 
0516         void feed(unsigned codepoint) {
0517             encoding.feed_codepoint(codepoint,
0518                                     boost::bind(&Callbacks::on_code_unit,
0519                                                 boost::ref(callbacks), boost::placeholders::_1));
0520         }
0521 
0522         Callbacks& callbacks;
0523         Encoding& encoding;
0524         source src;
0525     };
0526 
0527 }}}}
0528 
0529 #endif