Back to home page

EIC code displayed by LXR

 
 

    


Warning, file /include/nlohmann/detail/input/parser.hpp was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).

0001 //     __ _____ _____ _____
0002 //  __|  |   __|     |   | |  JSON for Modern C++
0003 // |  |  |__   |  |  | | | |  version 3.11.3
0004 // |_____|_____|_____|_|___|  https://github.com/nlohmann/json
0005 //
0006 // SPDX-FileCopyrightText: 2013-2023 Niels Lohmann <https://nlohmann.me>
0007 // SPDX-License-Identifier: MIT
0008 
0009 #pragma once
0010 
0011 #include <cmath> // isfinite
0012 #include <cstdint> // uint8_t
0013 #include <functional> // function
0014 #include <string> // string
0015 #include <utility> // move
0016 #include <vector> // vector
0017 
0018 #include <nlohmann/detail/exceptions.hpp>
0019 #include <nlohmann/detail/input/input_adapters.hpp>
0020 #include <nlohmann/detail/input/json_sax.hpp>
0021 #include <nlohmann/detail/input/lexer.hpp>
0022 #include <nlohmann/detail/macro_scope.hpp>
0023 #include <nlohmann/detail/meta/is_sax.hpp>
0024 #include <nlohmann/detail/string_concat.hpp>
0025 #include <nlohmann/detail/value_t.hpp>
0026 
0027 NLOHMANN_JSON_NAMESPACE_BEGIN
0028 namespace detail
0029 {
0030 ////////////
0031 // parser //
0032 ////////////
0033 
0034 enum class parse_event_t : std::uint8_t
0035 {
0036     /// the parser read `{` and started to process a JSON object
0037     object_start,
0038     /// the parser read `}` and finished processing a JSON object
0039     object_end,
0040     /// the parser read `[` and started to process a JSON array
0041     array_start,
0042     /// the parser read `]` and finished processing a JSON array
0043     array_end,
0044     /// the parser read a key of a value in an object
0045     key,
0046     /// the parser finished reading a JSON value
0047     value
0048 };
0049 
0050 template<typename BasicJsonType>
0051 using parser_callback_t =
0052     std::function<bool(int /*depth*/, parse_event_t /*event*/, BasicJsonType& /*parsed*/)>;
0053 
0054 /*!
0055 @brief syntax analysis
0056 
0057 This class implements a recursive descent parser.
0058 */
0059 template<typename BasicJsonType, typename InputAdapterType>
0060 class parser
0061 {
0062     using number_integer_t = typename BasicJsonType::number_integer_t;
0063     using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
0064     using number_float_t = typename BasicJsonType::number_float_t;
0065     using string_t = typename BasicJsonType::string_t;
0066     using lexer_t = lexer<BasicJsonType, InputAdapterType>;
0067     using token_type = typename lexer_t::token_type;
0068 
0069   public:
0070     /// a parser reading from an input adapter
0071     explicit parser(InputAdapterType&& adapter,
0072                     const parser_callback_t<BasicJsonType> cb = nullptr,
0073                     const bool allow_exceptions_ = true,
0074                     const bool skip_comments = false)
0075         : callback(cb)
0076         , m_lexer(std::move(adapter), skip_comments)
0077         , allow_exceptions(allow_exceptions_)
0078     {
0079         // read first token
0080         get_token();
0081     }
0082 
0083     /*!
0084     @brief public parser interface
0085 
0086     @param[in] strict      whether to expect the last token to be EOF
0087     @param[in,out] result  parsed JSON value
0088 
0089     @throw parse_error.101 in case of an unexpected token
0090     @throw parse_error.102 if to_unicode fails or surrogate error
0091     @throw parse_error.103 if to_unicode fails
0092     */
0093     void parse(const bool strict, BasicJsonType& result)
0094     {
0095         if (callback)
0096         {
0097             json_sax_dom_callback_parser<BasicJsonType> sdp(result, callback, allow_exceptions);
0098             sax_parse_internal(&sdp);
0099 
0100             // in strict mode, input must be completely read
0101             if (strict && (get_token() != token_type::end_of_input))
0102             {
0103                 sdp.parse_error(m_lexer.get_position(),
0104                                 m_lexer.get_token_string(),
0105                                 parse_error::create(101, m_lexer.get_position(),
0106                                                     exception_message(token_type::end_of_input, "value"), nullptr));
0107             }
0108 
0109             // in case of an error, return discarded value
0110             if (sdp.is_errored())
0111             {
0112                 result = value_t::discarded;
0113                 return;
0114             }
0115 
0116             // set top-level value to null if it was discarded by the callback
0117             // function
0118             if (result.is_discarded())
0119             {
0120                 result = nullptr;
0121             }
0122         }
0123         else
0124         {
0125             json_sax_dom_parser<BasicJsonType> sdp(result, allow_exceptions);
0126             sax_parse_internal(&sdp);
0127 
0128             // in strict mode, input must be completely read
0129             if (strict && (get_token() != token_type::end_of_input))
0130             {
0131                 sdp.parse_error(m_lexer.get_position(),
0132                                 m_lexer.get_token_string(),
0133                                 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_of_input, "value"), nullptr));
0134             }
0135 
0136             // in case of an error, return discarded value
0137             if (sdp.is_errored())
0138             {
0139                 result = value_t::discarded;
0140                 return;
0141             }
0142         }
0143 
0144         result.assert_invariant();
0145     }
0146 
0147     /*!
0148     @brief public accept interface
0149 
0150     @param[in] strict  whether to expect the last token to be EOF
0151     @return whether the input is a proper JSON text
0152     */
0153     bool accept(const bool strict = true)
0154     {
0155         json_sax_acceptor<BasicJsonType> sax_acceptor;
0156         return sax_parse(&sax_acceptor, strict);
0157     }
0158 
0159     template<typename SAX>
0160     JSON_HEDLEY_NON_NULL(2)
0161     bool sax_parse(SAX* sax, const bool strict = true)
0162     {
0163         (void)detail::is_sax_static_asserts<SAX, BasicJsonType> {};
0164         const bool result = sax_parse_internal(sax);
0165 
0166         // strict mode: next byte must be EOF
0167         if (result && strict && (get_token() != token_type::end_of_input))
0168         {
0169             return sax->parse_error(m_lexer.get_position(),
0170                                     m_lexer.get_token_string(),
0171                                     parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_of_input, "value"), nullptr));
0172         }
0173 
0174         return result;
0175     }
0176 
0177   private:
0178     template<typename SAX>
0179     JSON_HEDLEY_NON_NULL(2)
0180     bool sax_parse_internal(SAX* sax)
0181     {
0182         // stack to remember the hierarchy of structured values we are parsing
0183         // true = array; false = object
0184         std::vector<bool> states;
0185         // value to avoid a goto (see comment where set to true)
0186         bool skip_to_state_evaluation = false;
0187 
0188         while (true)
0189         {
0190             if (!skip_to_state_evaluation)
0191             {
0192                 // invariant: get_token() was called before each iteration
0193                 switch (last_token)
0194                 {
0195                     case token_type::begin_object:
0196                     {
0197                         if (JSON_HEDLEY_UNLIKELY(!sax->start_object(static_cast<std::size_t>(-1))))
0198                         {
0199                             return false;
0200                         }
0201 
0202                         // closing } -> we are done
0203                         if (get_token() == token_type::end_object)
0204                         {
0205                             if (JSON_HEDLEY_UNLIKELY(!sax->end_object()))
0206                             {
0207                                 return false;
0208                             }
0209                             break;
0210                         }
0211 
0212                         // parse key
0213                         if (JSON_HEDLEY_UNLIKELY(last_token != token_type::value_string))
0214                         {
0215                             return sax->parse_error(m_lexer.get_position(),
0216                                                     m_lexer.get_token_string(),
0217                                                     parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string, "object key"), nullptr));
0218                         }
0219                         if (JSON_HEDLEY_UNLIKELY(!sax->key(m_lexer.get_string())))
0220                         {
0221                             return false;
0222                         }
0223 
0224                         // parse separator (:)
0225                         if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator))
0226                         {
0227                             return sax->parse_error(m_lexer.get_position(),
0228                                                     m_lexer.get_token_string(),
0229                                                     parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator, "object separator"), nullptr));
0230                         }
0231 
0232                         // remember we are now inside an object
0233                         states.push_back(false);
0234 
0235                         // parse values
0236                         get_token();
0237                         continue;
0238                     }
0239 
0240                     case token_type::begin_array:
0241                     {
0242                         if (JSON_HEDLEY_UNLIKELY(!sax->start_array(static_cast<std::size_t>(-1))))
0243                         {
0244                             return false;
0245                         }
0246 
0247                         // closing ] -> we are done
0248                         if (get_token() == token_type::end_array)
0249                         {
0250                             if (JSON_HEDLEY_UNLIKELY(!sax->end_array()))
0251                             {
0252                                 return false;
0253                             }
0254                             break;
0255                         }
0256 
0257                         // remember we are now inside an array
0258                         states.push_back(true);
0259 
0260                         // parse values (no need to call get_token)
0261                         continue;
0262                     }
0263 
0264                     case token_type::value_float:
0265                     {
0266                         const auto res = m_lexer.get_number_float();
0267 
0268                         if (JSON_HEDLEY_UNLIKELY(!std::isfinite(res)))
0269                         {
0270                             return sax->parse_error(m_lexer.get_position(),
0271                                                     m_lexer.get_token_string(),
0272                                                     out_of_range::create(406, concat("number overflow parsing '", m_lexer.get_token_string(), '\''), nullptr));
0273                         }
0274 
0275                         if (JSON_HEDLEY_UNLIKELY(!sax->number_float(res, m_lexer.get_string())))
0276                         {
0277                             return false;
0278                         }
0279 
0280                         break;
0281                     }
0282 
0283                     case token_type::literal_false:
0284                     {
0285                         if (JSON_HEDLEY_UNLIKELY(!sax->boolean(false)))
0286                         {
0287                             return false;
0288                         }
0289                         break;
0290                     }
0291 
0292                     case token_type::literal_null:
0293                     {
0294                         if (JSON_HEDLEY_UNLIKELY(!sax->null()))
0295                         {
0296                             return false;
0297                         }
0298                         break;
0299                     }
0300 
0301                     case token_type::literal_true:
0302                     {
0303                         if (JSON_HEDLEY_UNLIKELY(!sax->boolean(true)))
0304                         {
0305                             return false;
0306                         }
0307                         break;
0308                     }
0309 
0310                     case token_type::value_integer:
0311                     {
0312                         if (JSON_HEDLEY_UNLIKELY(!sax->number_integer(m_lexer.get_number_integer())))
0313                         {
0314                             return false;
0315                         }
0316                         break;
0317                     }
0318 
0319                     case token_type::value_string:
0320                     {
0321                         if (JSON_HEDLEY_UNLIKELY(!sax->string(m_lexer.get_string())))
0322                         {
0323                             return false;
0324                         }
0325                         break;
0326                     }
0327 
0328                     case token_type::value_unsigned:
0329                     {
0330                         if (JSON_HEDLEY_UNLIKELY(!sax->number_unsigned(m_lexer.get_number_unsigned())))
0331                         {
0332                             return false;
0333                         }
0334                         break;
0335                     }
0336 
0337                     case token_type::parse_error:
0338                     {
0339                         // using "uninitialized" to avoid "expected" message
0340                         return sax->parse_error(m_lexer.get_position(),
0341                                                 m_lexer.get_token_string(),
0342                                                 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::uninitialized, "value"), nullptr));
0343                     }
0344                     case token_type::end_of_input:
0345                     {
0346                         if (JSON_HEDLEY_UNLIKELY(m_lexer.get_position().chars_read_total == 1))
0347                         {
0348                             return sax->parse_error(m_lexer.get_position(),
0349                                                     m_lexer.get_token_string(),
0350                                                     parse_error::create(101, m_lexer.get_position(),
0351                                                             "attempting to parse an empty input; check that your input string or stream contains the expected JSON", nullptr));
0352                         }
0353 
0354                         return sax->parse_error(m_lexer.get_position(),
0355                                                 m_lexer.get_token_string(),
0356                                                 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::literal_or_value, "value"), nullptr));
0357                     }
0358                     case token_type::uninitialized:
0359                     case token_type::end_array:
0360                     case token_type::end_object:
0361                     case token_type::name_separator:
0362                     case token_type::value_separator:
0363                     case token_type::literal_or_value:
0364                     default: // the last token was unexpected
0365                     {
0366                         return sax->parse_error(m_lexer.get_position(),
0367                                                 m_lexer.get_token_string(),
0368                                                 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::literal_or_value, "value"), nullptr));
0369                     }
0370                 }
0371             }
0372             else
0373             {
0374                 skip_to_state_evaluation = false;
0375             }
0376 
0377             // we reached this line after we successfully parsed a value
0378             if (states.empty())
0379             {
0380                 // empty stack: we reached the end of the hierarchy: done
0381                 return true;
0382             }
0383 
0384             if (states.back())  // array
0385             {
0386                 // comma -> next value
0387                 if (get_token() == token_type::value_separator)
0388                 {
0389                     // parse a new value
0390                     get_token();
0391                     continue;
0392                 }
0393 
0394                 // closing ]
0395                 if (JSON_HEDLEY_LIKELY(last_token == token_type::end_array))
0396                 {
0397                     if (JSON_HEDLEY_UNLIKELY(!sax->end_array()))
0398                     {
0399                         return false;
0400                     }
0401 
0402                     // We are done with this array. Before we can parse a
0403                     // new value, we need to evaluate the new state first.
0404                     // By setting skip_to_state_evaluation to false, we
0405                     // are effectively jumping to the beginning of this if.
0406                     JSON_ASSERT(!states.empty());
0407                     states.pop_back();
0408                     skip_to_state_evaluation = true;
0409                     continue;
0410                 }
0411 
0412                 return sax->parse_error(m_lexer.get_position(),
0413                                         m_lexer.get_token_string(),
0414                                         parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_array, "array"), nullptr));
0415             }
0416 
0417             // states.back() is false -> object
0418 
0419             // comma -> next value
0420             if (get_token() == token_type::value_separator)
0421             {
0422                 // parse key
0423                 if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::value_string))
0424                 {
0425                     return sax->parse_error(m_lexer.get_position(),
0426                                             m_lexer.get_token_string(),
0427                                             parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string, "object key"), nullptr));
0428                 }
0429 
0430                 if (JSON_HEDLEY_UNLIKELY(!sax->key(m_lexer.get_string())))
0431                 {
0432                     return false;
0433                 }
0434 
0435                 // parse separator (:)
0436                 if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator))
0437                 {
0438                     return sax->parse_error(m_lexer.get_position(),
0439                                             m_lexer.get_token_string(),
0440                                             parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator, "object separator"), nullptr));
0441                 }
0442 
0443                 // parse values
0444                 get_token();
0445                 continue;
0446             }
0447 
0448             // closing }
0449             if (JSON_HEDLEY_LIKELY(last_token == token_type::end_object))
0450             {
0451                 if (JSON_HEDLEY_UNLIKELY(!sax->end_object()))
0452                 {
0453                     return false;
0454                 }
0455 
0456                 // We are done with this object. Before we can parse a
0457                 // new value, we need to evaluate the new state first.
0458                 // By setting skip_to_state_evaluation to false, we
0459                 // are effectively jumping to the beginning of this if.
0460                 JSON_ASSERT(!states.empty());
0461                 states.pop_back();
0462                 skip_to_state_evaluation = true;
0463                 continue;
0464             }
0465 
0466             return sax->parse_error(m_lexer.get_position(),
0467                                     m_lexer.get_token_string(),
0468                                     parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_object, "object"), nullptr));
0469         }
0470     }
0471 
0472     /// get next token from lexer
0473     token_type get_token()
0474     {
0475         return last_token = m_lexer.scan();
0476     }
0477 
0478     std::string exception_message(const token_type expected, const std::string& context)
0479     {
0480         std::string error_msg = "syntax error ";
0481 
0482         if (!context.empty())
0483         {
0484             error_msg += concat("while parsing ", context, ' ');
0485         }
0486 
0487         error_msg += "- ";
0488 
0489         if (last_token == token_type::parse_error)
0490         {
0491             error_msg += concat(m_lexer.get_error_message(), "; last read: '",
0492                                 m_lexer.get_token_string(), '\'');
0493         }
0494         else
0495         {
0496             error_msg += concat("unexpected ", lexer_t::token_type_name(last_token));
0497         }
0498 
0499         if (expected != token_type::uninitialized)
0500         {
0501             error_msg += concat("; expected ", lexer_t::token_type_name(expected));
0502         }
0503 
0504         return error_msg;
0505     }
0506 
0507   private:
0508     /// callback function
0509     const parser_callback_t<BasicJsonType> callback = nullptr;
0510     /// the type of the last read token
0511     token_type last_token = token_type::uninitialized;
0512     /// the lexer
0513     lexer_t m_lexer;
0514     /// whether to throw exceptions in case of errors
0515     const bool allow_exceptions = true;
0516 };
0517 
0518 }  // namespace detail
0519 NLOHMANN_JSON_NAMESPACE_END