detail/input/lexer.hpp

0001 //     __ _____ _____ _____
0002 //  __|  |   __|     |   | |  JSON for Modern C++
0003 // |  |  |__   |  |  | | | |  version 3.12.0
0004 // |_____|_____|_____|_|___|  https://github.com/nlohmann/json
0005 //
0006 // SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann <https://nlohmann.me>
0007 // SPDX-License-Identifier: MIT
0008
0009 #pragma once
0010
0011 #include <array> // array
0012 #include <clocale> // localeconv
0013 #include <cstddef> // size_t
0014 #include <cstdio> // snprintf
0015 #include <cstdlib> // strtof, strtod, strtold, strtoll, strtoull
0016 #include <initializer_list> // initializer_list
0017 #include <string> // char_traits, string
0018 #include <utility> // move
0019 #include <vector> // vector
0020
0021 #include <nlohmann/detail/input/input_adapters.hpp>
0022 #include <nlohmann/detail/input/position_t.hpp>
0023 #include <nlohmann/detail/macro_scope.hpp>
0024 #include <nlohmann/detail/meta/type_traits.hpp>
0025
0026 NLOHMANN_JSON_NAMESPACE_BEGIN
0027 namespace detail
0028 {
0029
0030 ///////////
0031 // lexer //
0032 ///////////
0033
0034 template<typename BasicJsonType>
0035 class lexer_base
0036 {
0037   public:
0038     /// token types for the parser
0039     enum class token_type
0040     {
0041         uninitialized,    ///< indicating the scanner is uninitialized
0042         literal_true,     ///< the `true` literal
0043         literal_false,    ///< the `false` literal
0044         literal_null,     ///< the `null` literal
0045         value_string,     ///< a string -- use get_string() for actual value
0046         value_unsigned,   ///< an unsigned integer -- use get_number_unsigned() for actual value
0047         value_integer,    ///< a signed integer -- use get_number_integer() for actual value
0048         value_float,      ///< an floating point number -- use get_number_float() for actual value
0049         begin_array,      ///< the character for array begin `[`
0050         begin_object,     ///< the character for object begin `{`
0051         end_array,        ///< the character for array end `]`
0052         end_object,       ///< the character for object end `}`
0053         name_separator,   ///< the name separator `:`
0054         value_separator,  ///< the value separator `,`
0055         parse_error,      ///< indicating a parse error
0056         end_of_input,     ///< indicating the end of the input buffer
0057         literal_or_value  ///< a literal or the begin of a value (only for diagnostics)
0058     };
0059
0060     /// return name of values of type token_type (only used for errors)
0061     JSON_HEDLEY_RETURNS_NON_NULL
0062     JSON_HEDLEY_CONST
0063     static const char* token_type_name(const token_type t) noexcept
0064     {
0065         switch (t)
0066         {
0067             case token_type::uninitialized:
0068                 return "<uninitialized>";
0069             case token_type::literal_true:
0070                 return "true literal";
0071             case token_type::literal_false:
0072                 return "false literal";
0073             case token_type::literal_null:
0074                 return "null literal";
0075             case token_type::value_string:
0076                 return "string literal";
0077             case token_type::value_unsigned:
0078             case token_type::value_integer:
0079             case token_type::value_float:
0080                 return "number literal";
0081             case token_type::begin_array:
0082                 return "'['";
0083             case token_type::begin_object:
0084                 return "'{'";
0085             case token_type::end_array:
0086                 return "']'";
0087             case token_type::end_object:
0088                 return "'}'";
0089             case token_type::name_separator:
0090                 return "':'";
0091             case token_type::value_separator:
0092                 return "','";
0093             case token_type::parse_error:
0094                 return "<parse error>";
0095             case token_type::end_of_input:
0096                 return "end of input";
0097             case token_type::literal_or_value:
0098                 return "'[', '{', or a literal";
0099             // LCOV_EXCL_START
0100             default: // catch non-enum values
0101                 return "unknown token";
0102                 // LCOV_EXCL_STOP
0103         }
0104     }
0105 };
0106 /*!
0107 @brief lexical analysis
0108
0109 This class organizes the lexical analysis during JSON deserialization.
0110 */
0111 template<typename BasicJsonType, typename InputAdapterType>
0112 class lexer : public lexer_base<BasicJsonType>
0113 {
0114     using number_integer_t = typename BasicJsonType::number_integer_t;
0115     using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
0116     using number_float_t = typename BasicJsonType::number_float_t;
0117     using string_t = typename BasicJsonType::string_t;
0118     using char_type = typename InputAdapterType::char_type;
0119     using char_int_type = typename char_traits<char_type>::int_type;
0120
0121   public:
0122     using token_type = typename lexer_base<BasicJsonType>::token_type;
0123
0124     explicit lexer(InputAdapterType&& adapter, bool ignore_comments_ = false) noexcept
0125         : ia(std::move(adapter))
0126         , ignore_comments(ignore_comments_)
0127         , decimal_point_char(static_cast<char_int_type>(get_decimal_point()))
0128     {}
0129
0130     // delete because of pointer members
0131     lexer(const lexer&) = delete;
0132     lexer(lexer&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor)
0133     lexer& operator=(lexer&) = delete;
0134     lexer& operator=(lexer&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor)
0135     ~lexer() = default;
0136
0137   private:
0138     /////////////////////
0139     // locales
0140     /////////////////////
0141
0142     /// return the locale-dependent decimal point
0143     JSON_HEDLEY_PURE
0144     static char get_decimal_point() noexcept
0145     {
0146         const auto* loc = localeconv();
0147         JSON_ASSERT(loc != nullptr);
0148         return (loc->decimal_point == nullptr) ? '.' : *(loc->decimal_point);
0149     }
0150
0151     /////////////////////
0152     // scan functions
0153     /////////////////////
0154
0155     /*!
0156     @brief get codepoint from 4 hex characters following `\u`
0157
0158     For input "\u c1 c2 c3 c4" the codepoint is:
0159       (c1 * 0x1000) + (c2 * 0x0100) + (c3 * 0x0010) + c4
0160     = (c1 << 12) + (c2 << 8) + (c3 << 4) + (c4 << 0)
0161
0162     Furthermore, the possible characters '0'..'9', 'A'..'F', and 'a'..'f'
0163     must be converted to the integers 0x0..0x9, 0xA..0xF, 0xA..0xF, resp. The
0164     conversion is done by subtracting the offset (0x30, 0x37, and 0x57)
0165     between the ASCII value of the character and the desired integer value.
0166
0167     @return codepoint (0x0000..0xFFFF) or -1 in case of an error (e.g. EOF or
0168             non-hex character)
0169     */
0170     int get_codepoint()
0171     {
0172         // this function only makes sense after reading `\u`
0173         JSON_ASSERT(current == 'u');
0174         int codepoint = 0;
0175
0176         const auto factors = { 12u, 8u, 4u, 0u };
0177         for (const auto factor : factors)
0178         {
0179             get();
0180
0181             if (current >= '0' && current <= '9')
0182             {
0183                 codepoint += static_cast<int>((static_cast<unsigned int>(current) - 0x30u) << factor);
0184             }
0185             else if (current >= 'A' && current <= 'F')
0186             {
0187                 codepoint += static_cast<int>((static_cast<unsigned int>(current) - 0x37u) << factor);
0188             }
0189             else if (current >= 'a' && current <= 'f')
0190             {
0191                 codepoint += static_cast<int>((static_cast<unsigned int>(current) - 0x57u) << factor);
0192             }
0193             else
0194             {
0195                 return -1;
0196             }
0197         }
0198
0199         JSON_ASSERT(0x0000 <= codepoint && codepoint <= 0xFFFF);
0200         return codepoint;
0201     }
0202
0203     /*!
0204     @brief check if the next byte(s) are inside a given range
0205
0206     Adds the current byte and, for each passed range, reads a new byte and
0207     checks if it is inside the range. If a violation was detected, set up an
0208     error message and return false. Otherwise, return true.
0209
0210     @param[in] ranges  list of integers; interpreted as list of pairs of
0211                        inclusive lower and upper bound, respectively
0212
0213     @pre The passed list @a ranges must have 2, 4, or 6 elements; that is,
0214          1, 2, or 3 pairs. This precondition is enforced by an assertion.
0215
0216     @return true if and only if no range violation was detected
0217     */
0218     bool next_byte_in_range(std::initializer_list<char_int_type> ranges)
0219     {
0220         JSON_ASSERT(ranges.size() == 2 || ranges.size() == 4 || ranges.size() == 6);
0221         add(current);
0222
0223         for (auto range = ranges.begin(); range != ranges.end(); ++range)
0224         {
0225             get();
0226             if (JSON_HEDLEY_LIKELY(*range <= current && current <= *(++range))) // NOLINT(bugprone-inc-dec-in-conditions)
0227             {
0228                 add(current);
0229             }
0230             else
0231             {
0232                 error_message = "invalid string: ill-formed UTF-8 byte";
0233                 return false;
0234             }
0235         }
0236
0237         return true;
0238     }
0239
0240     /*!
0241     @brief scan a string literal
0242
0243     This function scans a string according to Sect. 7 of RFC 8259. While
0244     scanning, bytes are escaped and copied into buffer token_buffer. Then the
0245     function returns successfully, token_buffer is *not* null-terminated (as it
0246     may contain \0 bytes), and token_buffer.size() is the number of bytes in the
0247     string.
0248
0249     @return token_type::value_string if string could be successfully scanned,
0250             token_type::parse_error otherwise
0251
0252     @note In case of errors, variable error_message contains a textual
0253           description.
0254     */
0255     token_type scan_string()
0256     {
0257         // reset token_buffer (ignore opening quote)
0258         reset();
0259
0260         // we entered the function by reading an open quote
0261         JSON_ASSERT(current == '\"');
0262
0263         while (true)
0264         {
0265             // get next character
0266             switch (get())
0267             {
0268                 // end of file while parsing string
0269                 case char_traits<char_type>::eof():
0270                 {
0271                     error_message = "invalid string: missing closing quote";
0272                     return token_type::parse_error;
0273                 }
0274
0275                 // closing quote
0276                 case '\"':
0277                 {
0278                     return token_type::value_string;
0279                 }
0280
0281                 // escapes
0282                 case '\\':
0283                 {
0284                     switch (get())
0285                     {
0286                         // quotation mark
0287                         case '\"':
0288                             add('\"');
0289                             break;
0290                         // reverse solidus
0291                         case '\\':
0292                             add('\\');
0293                             break;
0294                         // solidus
0295                         case '/':
0296                             add('/');
0297                             break;
0298                         // backspace
0299                         case 'b':
0300                             add('\b');
0301                             break;
0302                         // form feed
0303                         case 'f':
0304                             add('\f');
0305                             break;
0306                         // line feed
0307                         case 'n':
0308                             add('\n');
0309                             break;
0310                         // carriage return
0311                         case 'r':
0312                             add('\r');
0313                             break;
0314                         // tab
0315                         case 't':
0316                             add('\t');
0317                             break;
0318
0319                         // unicode escapes
0320                         case 'u':
0321                         {
0322                             const int codepoint1 = get_codepoint();
0323                             int codepoint = codepoint1; // start with codepoint1
0324
0325                             if (JSON_HEDLEY_UNLIKELY(codepoint1 == -1))
0326                             {
0327                                 error_message = "invalid string: '\\u' must be followed by 4 hex digits";
0328                                 return token_type::parse_error;
0329                             }
0330
0331                             // check if code point is a high surrogate
0332                             if (0xD800 <= codepoint1 && codepoint1 <= 0xDBFF)
0333                             {
0334                                 // expect next \uxxxx entry
0335                                 if (JSON_HEDLEY_LIKELY(get() == '\\' && get() == 'u'))
0336                                 {
0337                                     const int codepoint2 = get_codepoint();
0338
0339                                     if (JSON_HEDLEY_UNLIKELY(codepoint2 == -1))
0340                                     {
0341                                         error_message = "invalid string: '\\u' must be followed by 4 hex digits";
0342                                         return token_type::parse_error;
0343                                     }
0344
0345                                     // check if codepoint2 is a low surrogate
0346                                     if (JSON_HEDLEY_LIKELY(0xDC00 <= codepoint2 && codepoint2 <= 0xDFFF))
0347                                     {
0348                                         // overwrite codepoint
0349                                         codepoint = static_cast<int>(
0350                                                         // high surrogate occupies the most significant 22 bits
0351                                                         (static_cast<unsigned int>(codepoint1) << 10u)
0352                                                         // low surrogate occupies the least significant 15 bits
0353                                                         + static_cast<unsigned int>(codepoint2)
0354                                                         // there is still the 0xD800, 0xDC00 and 0x10000 noise
0355                                                         // in the result, so we have to subtract with:
0356                                                         // (0xD800 << 10) + DC00 - 0x10000 = 0x35FDC00
0357                                                         - 0x35FDC00u);
0358                                     }
0359                                     else
0360                                     {
0361                                         error_message = "invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF";
0362                                         return token_type::parse_error;
0363                                     }
0364                                 }
0365                                 else
0366                                 {
0367                                     error_message = "invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF";
0368                                     return token_type::parse_error;
0369                                 }
0370                             }
0371                             else
0372                             {
0373                                 if (JSON_HEDLEY_UNLIKELY(0xDC00 <= codepoint1 && codepoint1 <= 0xDFFF))
0374                                 {
0375                                     error_message = "invalid string: surrogate U+DC00..U+DFFF must follow U+D800..U+DBFF";
0376                                     return token_type::parse_error;
0377                                 }
0378                             }
0379
0380                             // result of the above calculation yields a proper codepoint
0381                             JSON_ASSERT(0x00 <= codepoint && codepoint <= 0x10FFFF);
0382
0383                             // translate codepoint into bytes
0384                             if (codepoint < 0x80)
0385                             {
0386                                 // 1-byte characters: 0xxxxxxx (ASCII)
0387                                 add(static_cast<char_int_type>(codepoint));
0388                             }
0389                             else if (codepoint <= 0x7FF)
0390                             {
0391                                 // 2-byte characters: 110xxxxx 10xxxxxx
0392                                 add(static_cast<char_int_type>(0xC0u | (static_cast<unsigned int>(codepoint) >> 6u)));
0393                                 add(static_cast<char_int_type>(0x80u | (static_cast<unsigned int>(codepoint) & 0x3Fu)));
0394                             }
0395                             else if (codepoint <= 0xFFFF)
0396                             {
0397                                 // 3-byte characters: 1110xxxx 10xxxxxx 10xxxxxx
0398                                 add(static_cast<char_int_type>(0xE0u | (static_cast<unsigned int>(codepoint) >> 12u)));
0399                                 add(static_cast<char_int_type>(0x80u | ((static_cast<unsigned int>(codepoint) >> 6u) & 0x3Fu)));
0400                                 add(static_cast<char_int_type>(0x80u | (static_cast<unsigned int>(codepoint) & 0x3Fu)));
0401                             }
0402                             else
0403                             {
0404                                 // 4-byte characters: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
0405                                 add(static_cast<char_int_type>(0xF0u | (static_cast<unsigned int>(codepoint) >> 18u)));
0406                                 add(static_cast<char_int_type>(0x80u | ((static_cast<unsigned int>(codepoint) >> 12u) & 0x3Fu)));
0407                                 add(static_cast<char_int_type>(0x80u | ((static_cast<unsigned int>(codepoint) >> 6u) & 0x3Fu)));
0408                                 add(static_cast<char_int_type>(0x80u | (static_cast<unsigned int>(codepoint) & 0x3Fu)));
0409                             }
0410
0411                             break;
0412                         }
0413
0414                         // other characters after escape
0415                         default:
0416                             error_message = "invalid string: forbidden character after backslash";
0417                             return token_type::parse_error;
0418                     }
0419
0420                     break;
0421                 }
0422
0423                 // invalid control characters
0424                 case 0x00:
0425                 {
0426                     error_message = "invalid string: control character U+0000 (NUL) must be escaped to \\u0000";
0427                     return token_type::parse_error;
0428                 }
0429
0430                 case 0x01:
0431                 {
0432                     error_message = "invalid string: control character U+0001 (SOH) must be escaped to \\u0001";
0433                     return token_type::parse_error;
0434                 }
0435
0436                 case 0x02:
0437                 {
0438                     error_message = "invalid string: control character U+0002 (STX) must be escaped to \\u0002";
0439                     return token_type::parse_error;
0440                 }
0441
0442                 case 0x03:
0443                 {
0444                     error_message = "invalid string: control character U+0003 (ETX) must be escaped to \\u0003";
0445                     return token_type::parse_error;
0446                 }
0447
0448                 case 0x04:
0449                 {
0450                     error_message = "invalid string: control character U+0004 (EOT) must be escaped to \\u0004";
0451                     return token_type::parse_error;
0452                 }
0453
0454                 case 0x05:
0455                 {
0456                     error_message = "invalid string: control character U+0005 (ENQ) must be escaped to \\u0005";
0457                     return token_type::parse_error;
0458                 }
0459
0460                 case 0x06:
0461                 {
0462                     error_message = "invalid string: control character U+0006 (ACK) must be escaped to \\u0006";
0463                     return token_type::parse_error;
0464                 }
0465
0466                 case 0x07:
0467                 {
0468                     error_message = "invalid string: control character U+0007 (BEL) must be escaped to \\u0007";
0469                     return token_type::parse_error;
0470                 }
0471
0472                 case 0x08:
0473                 {
0474                     error_message = "invalid string: control character U+0008 (BS) must be escaped to \\u0008 or \\b";
0475                     return token_type::parse_error;
0476                 }
0477
0478                 case 0x09:
0479                 {
0480                     error_message = "invalid string: control character U+0009 (HT) must be escaped to \\u0009 or \\t";
0481                     return token_type::parse_error;
0482                 }
0483
0484                 case 0x0A:
0485                 {
0486                     error_message = "invalid string: control character U+000A (LF) must be escaped to \\u000A or \\n";
0487                     return token_type::parse_error;
0488                 }
0489
0490                 case 0x0B:
0491                 {
0492                     error_message = "invalid string: control character U+000B (VT) must be escaped to \\u000B";
0493                     return token_type::parse_error;
0494                 }
0495
0496                 case 0x0C:
0497                 {
0498                     error_message = "invalid string: control character U+000C (FF) must be escaped to \\u000C or \\f";
0499                     return token_type::parse_error;
0500                 }
0501
0502                 case 0x0D:
0503                 {
0504                     error_message = "invalid string: control character U+000D (CR) must be escaped to \\u000D or \\r";
0505                     return token_type::parse_error;
0506                 }
0507
0508                 case 0x0E:
0509                 {
0510                     error_message = "invalid string: control character U+000E (SO) must be escaped to \\u000E";
0511                     return token_type::parse_error;
0512                 }
0513
0514                 case 0x0F:
0515                 {
0516                     error_message = "invalid string: control character U+000F (SI) must be escaped to \\u000F";
0517                     return token_type::parse_error;
0518                 }
0519
0520                 case 0x10:
0521                 {
0522                     error_message = "invalid string: control character U+0010 (DLE) must be escaped to \\u0010";
0523                     return token_type::parse_error;
0524                 }
0525
0526                 case 0x11:
0527                 {
0528                     error_message = "invalid string: control character U+0011 (DC1) must be escaped to \\u0011";
0529                     return token_type::parse_error;
0530                 }
0531
0532                 case 0x12:
0533                 {
0534                     error_message = "invalid string: control character U+0012 (DC2) must be escaped to \\u0012";
0535                     return token_type::parse_error;
0536                 }
0537
0538                 case 0x13:
0539                 {
0540                     error_message = "invalid string: control character U+0013 (DC3) must be escaped to \\u0013";
0541                     return token_type::parse_error;
0542                 }
0543
0544                 case 0x14:
0545                 {
0546                     error_message = "invalid string: control character U+0014 (DC4) must be escaped to \\u0014";
0547                     return token_type::parse_error;
0548                 }
0549
0550                 case 0x15:
0551                 {
0552                     error_message = "invalid string: control character U+0015 (NAK) must be escaped to \\u0015";
0553                     return token_type::parse_error;
0554                 }
0555
0556                 case 0x16:
0557                 {
0558                     error_message = "invalid string: control character U+0016 (SYN) must be escaped to \\u0016";
0559                     return token_type::parse_error;
0560                 }
0561
0562                 case 0x17:
0563                 {
0564                     error_message = "invalid string: control character U+0017 (ETB) must be escaped to \\u0017";
0565                     return token_type::parse_error;
0566                 }
0567
0568                 case 0x18:
0569                 {
0570                     error_message = "invalid string: control character U+0018 (CAN) must be escaped to \\u0018";
0571                     return token_type::parse_error;
0572                 }
0573
0574                 case 0x19:
0575                 {
0576                     error_message = "invalid string: control character U+0019 (EM) must be escaped to \\u0019";
0577                     return token_type::parse_error;
0578                 }
0579
0580                 case 0x1A:
0581                 {
0582                     error_message = "invalid string: control character U+001A (SUB) must be escaped to \\u001A";
0583                     return token_type::parse_error;
0584                 }
0585
0586                 case 0x1B:
0587                 {
0588                     error_message = "invalid string: control character U+001B (ESC) must be escaped to \\u001B";
0589                     return token_type::parse_error;
0590                 }
0591
0592                 case 0x1C:
0593                 {
0594                     error_message = "invalid string: control character U+001C (FS) must be escaped to \\u001C";
0595                     return token_type::parse_error;
0596                 }
0597
0598                 case 0x1D:
0599                 {
0600                     error_message = "invalid string: control character U+001D (GS) must be escaped to \\u001D";
0601                     return token_type::parse_error;
0602                 }
0603
0604                 case 0x1E:
0605                 {
0606                     error_message = "invalid string: control character U+001E (RS) must be escaped to \\u001E";
0607                     return token_type::parse_error;
0608                 }
0609
0610                 case 0x1F:
0611                 {
0612                     error_message = "invalid string: control character U+001F (US) must be escaped to \\u001F";
0613                     return token_type::parse_error;
0614                 }
0615
0616                 // U+0020..U+007F (except U+0022 (quote) and U+005C (backspace))
0617                 case 0x20:
0618                 case 0x21:
0619                 case 0x23:
0620                 case 0x24:
0621                 case 0x25:
0622                 case 0x26:
0623                 case 0x27:
0624                 case 0x28:
0625                 case 0x29:
0626                 case 0x2A:
0627                 case 0x2B:
0628                 case 0x2C:
0629                 case 0x2D:
0630                 case 0x2E:
0631                 case 0x2F:
0632                 case 0x30:
0633                 case 0x31:
0634                 case 0x32:
0635                 case 0x33:
0636                 case 0x34:
0637                 case 0x35:
0638                 case 0x36:
0639                 case 0x37:
0640                 case 0x38:
0641                 case 0x39:
0642                 case 0x3A:
0643                 case 0x3B:
0644                 case 0x3C:
0645                 case 0x3D:
0646                 case 0x3E:
0647                 case 0x3F:
0648                 case 0x40:
0649                 case 0x41:
0650                 case 0x42:
0651                 case 0x43:
0652                 case 0x44:
0653                 case 0x45:
0654                 case 0x46:
0655                 case 0x47:
0656                 case 0x48:
0657                 case 0x49:
0658                 case 0x4A:
0659                 case 0x4B:
0660                 case 0x4C:
0661                 case 0x4D:
0662                 case 0x4E:
0663                 case 0x4F:
0664                 case 0x50:
0665                 case 0x51:
0666                 case 0x52:
0667                 case 0x53:
0668                 case 0x54:
0669                 case 0x55:
0670                 case 0x56:
0671                 case 0x57:
0672                 case 0x58:
0673                 case 0x59:
0674                 case 0x5A:
0675                 case 0x5B:
0676                 case 0x5D:
0677                 case 0x5E:
0678                 case 0x5F:
0679                 case 0x60:
0680                 case 0x61:
0681                 case 0x62:
0682                 case 0x63:
0683                 case 0x64:
0684                 case 0x65:
0685                 case 0x66:
0686                 case 0x67:
0687                 case 0x68:
0688                 case 0x69:
0689                 case 0x6A:
0690                 case 0x6B:
0691                 case 0x6C:
0692                 case 0x6D:
0693                 case 0x6E:
0694                 case 0x6F:
0695                 case 0x70:
0696                 case 0x71:
0697                 case 0x72:
0698                 case 0x73:
0699                 case 0x74:
0700                 case 0x75:
0701                 case 0x76:
0702                 case 0x77:
0703                 case 0x78:
0704                 case 0x79:
0705                 case 0x7A:
0706                 case 0x7B:
0707                 case 0x7C:
0708                 case 0x7D:
0709                 case 0x7E:
0710                 case 0x7F:
0711                 {
0712                     add(current);
0713                     break;
0714                 }
0715
0716                 // U+0080..U+07FF: bytes C2..DF 80..BF
0717                 case 0xC2:
0718                 case 0xC3:
0719                 case 0xC4:
0720                 case 0xC5:
0721                 case 0xC6:
0722                 case 0xC7:
0723                 case 0xC8:
0724                 case 0xC9:
0725                 case 0xCA:
0726                 case 0xCB:
0727                 case 0xCC:
0728                 case 0xCD:
0729                 case 0xCE:
0730                 case 0xCF:
0731                 case 0xD0:
0732                 case 0xD1:
0733                 case 0xD2:
0734                 case 0xD3:
0735                 case 0xD4:
0736                 case 0xD5:
0737                 case 0xD6:
0738                 case 0xD7:
0739                 case 0xD8:
0740                 case 0xD9:
0741                 case 0xDA:
0742                 case 0xDB:
0743                 case 0xDC:
0744                 case 0xDD:
0745                 case 0xDE:
0746                 case 0xDF:
0747                 {
0748                     if (JSON_HEDLEY_UNLIKELY(!next_byte_in_range({0x80, 0xBF})))
0749                     {
0750                         return token_type::parse_error;
0751                     }
0752                     break;
0753                 }
0754
0755                 // U+0800..U+0FFF: bytes E0 A0..BF 80..BF
0756                 case 0xE0:
0757                 {
0758                     if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0xA0, 0xBF, 0x80, 0xBF}))))
0759                     {
0760                         return token_type::parse_error;
0761                     }
0762                     break;
0763                 }
0764
0765                 // U+1000..U+CFFF: bytes E1..EC 80..BF 80..BF
0766                 // U+E000..U+FFFF: bytes EE..EF 80..BF 80..BF
0767                 case 0xE1:
0768                 case 0xE2:
0769                 case 0xE3:
0770                 case 0xE4:
0771                 case 0xE5:
0772                 case 0xE6:
0773                 case 0xE7:
0774                 case 0xE8:
0775                 case 0xE9:
0776                 case 0xEA:
0777                 case 0xEB:
0778                 case 0xEC:
0779                 case 0xEE:
0780                 case 0xEF:
0781                 {
0782                     if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0xBF, 0x80, 0xBF}))))
0783                     {
0784                         return token_type::parse_error;
0785                     }
0786                     break;
0787                 }
0788
0789                 // U+D000..U+D7FF: bytes ED 80..9F 80..BF
0790                 case 0xED:
0791                 {
0792                     if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0x9F, 0x80, 0xBF}))))
0793                     {
0794                         return token_type::parse_error;
0795                     }
0796                     break;
0797                 }
0798
0799                 // U+10000..U+3FFFF F0 90..BF 80..BF 80..BF
0800                 case 0xF0:
0801                 {
0802                     if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF}))))
0803                     {
0804                         return token_type::parse_error;
0805                     }
0806                     break;
0807                 }
0808
0809                 // U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF
0810                 case 0xF1:
0811                 case 0xF2:
0812                 case 0xF3:
0813                 {
0814                     if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF}))))
0815                     {
0816                         return token_type::parse_error;
0817                     }
0818                     break;
0819                 }
0820
0821                 // U+100000..U+10FFFF F4 80..8F 80..BF 80..BF
0822                 case 0xF4:
0823                 {
0824                     if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF}))))
0825                     {
0826                         return token_type::parse_error;
0827                     }
0828                     break;
0829                 }
0830
0831                 // remaining bytes (80..C1 and F5..FF) are ill-formed
0832                 default:
0833                 {
0834                     error_message = "invalid string: ill-formed UTF-8 byte";
0835                     return token_type::parse_error;
0836                 }
0837             }
0838         }
0839     }
0840
0841     /*!
0842      * @brief scan a comment
0843      * @return whether comment could be scanned successfully
0844      */
0845     bool scan_comment()
0846     {
0847         switch (get())
0848         {
0849             // single-line comments skip input until a newline or EOF is read
0850             case '/':
0851             {
0852                 while (true)
0853                 {
0854                     switch (get())
0855                     {
0856                         case '\n':
0857                         case '\r':
0858                         case char_traits<char_type>::eof():
0859                         case '\0':
0860                             return true;
0861
0862                         default:
0863                             break;
0864                     }
0865                 }
0866             }
0867
0868             // multi-line comments skip input until */ is read
0869             case '*':
0870             {
0871                 while (true)
0872                 {
0873                     switch (get())
0874                     {
0875                         case char_traits<char_type>::eof():
0876                         case '\0':
0877                         {
0878                             error_message = "invalid comment; missing closing '*/'";
0879                             return false;
0880                         }
0881
0882                         case '*':
0883                         {
0884                             switch (get())
0885                             {
0886                                 case '/':
0887                                     return true;
0888
0889                                 default:
0890                                 {
0891                                     unget();
0892                                     continue;
0893                                 }
0894                             }
0895                         }
0896
0897                         default:
0898                             continue;
0899                     }
0900                 }
0901             }
0902
0903             // unexpected character after reading '/'
0904             default:
0905             {
0906                 error_message = "invalid comment; expecting '/' or '*' after '/'";
0907                 return false;
0908             }
0909         }
0910     }
0911
0912     JSON_HEDLEY_NON_NULL(2)
0913     static void strtof(float& f, const char* str, char** endptr) noexcept
0914     {
0915         f = std::strtof(str, endptr);
0916     }
0917
0918     JSON_HEDLEY_NON_NULL(2)
0919     static void strtof(double& f, const char* str, char** endptr) noexcept
0920     {
0921         f = std::strtod(str, endptr);
0922     }
0923
0924     JSON_HEDLEY_NON_NULL(2)
0925     static void strtof(long double& f, const char* str, char** endptr) noexcept
0926     {
0927         f = std::strtold(str, endptr);
0928     }
0929
0930     /*!
0931     @brief scan a number literal
0932
0933     This function scans a string according to Sect. 6 of RFC 8259.
0934
0935     The function is realized with a deterministic finite state machine derived
0936     from the grammar described in RFC 8259. Starting in state "init", the
0937     input is read and used to determined the next state. Only state "done"
0938     accepts the number. State "error" is a trap state to model errors. In the
0939     table below, "anything" means any character but the ones listed before.
0940
0941     state    | 0        | 1-9      | e E      | +       | -       | .        | anything
0942     ---------|----------|----------|----------|---------|---------|----------|-----------
0943     init     | zero     | any1     | [error]  | [error] | minus   | [error]  | [error]
0944     minus    | zero     | any1     | [error]  | [error] | [error] | [error]  | [error]
0945     zero     | done     | done     | exponent | done    | done    | decimal1 | done
0946     any1     | any1     | any1     | exponent | done    | done    | decimal1 | done
0947     decimal1 | decimal2 | decimal2 | [error]  | [error] | [error] | [error]  | [error]
0948     decimal2 | decimal2 | decimal2 | exponent | done    | done    | done     | done
0949     exponent | any2     | any2     | [error]  | sign    | sign    | [error]  | [error]
0950     sign     | any2     | any2     | [error]  | [error] | [error] | [error]  | [error]
0951     any2     | any2     | any2     | done     | done    | done    | done     | done
0952
0953     The state machine is realized with one label per state (prefixed with
0954     "scan_number_") and `goto` statements between them. The state machine
0955     contains cycles, but any cycle can be left when EOF is read. Therefore,
0956     the function is guaranteed to terminate.
0957
0958     During scanning, the read bytes are stored in token_buffer. This string is
0959     then converted to a signed integer, an unsigned integer, or a
0960     floating-point number.
0961
0962     @return token_type::value_unsigned, token_type::value_integer, or
0963             token_type::value_float if number could be successfully scanned,
0964             token_type::parse_error otherwise
0965
0966     @note The scanner is independent of the current locale. Internally, the
0967           locale's decimal point is used instead of `.` to work with the
0968           locale-dependent converters.
0969     */
0970     token_type scan_number()  // lgtm [cpp/use-of-goto] `goto` is used in this function to implement the number-parsing state machine described above. By design, any finite input will eventually reach the "done" state or return token_type::parse_error. In each intermediate state, 1 byte of the input is appended to the token_buffer vector, and only the already initialized variables token_buffer, number_type, and error_message are manipulated.
0971     {
0972         // reset token_buffer to store the number's bytes
0973         reset();
0974
0975         // the type of the parsed number; initially set to unsigned; will be
0976         // changed if minus sign, decimal point or exponent is read
0977         token_type number_type = token_type::value_unsigned;
0978
0979         // state (init): we just found out we need to scan a number
0980         switch (current)
0981         {
0982             case '-':
0983             {
0984                 add(current);
0985                 goto scan_number_minus;
0986             }
0987
0988             case '0':
0989             {
0990                 add(current);
0991                 goto scan_number_zero;
0992             }
0993
0994             case '1':
0995             case '2':
0996             case '3':
0997             case '4':
0998             case '5':
0999             case '6':
1000             case '7':
1001             case '8':
1002             case '9':
1003             {
1004                 add(current);
1005                 goto scan_number_any1;
1006             }
1007
1008             // all other characters are rejected outside scan_number()
1009             default:            // LCOV_EXCL_LINE
1010                 JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
1011         }
1012
1013 scan_number_minus:
1014         // state: we just parsed a leading minus sign
1015         number_type = token_type::value_integer;
1016         switch (get())
1017         {
1018             case '0':
1019             {
1020                 add(current);
1021                 goto scan_number_zero;
1022             }
1023
1024             case '1':
1025             case '2':
1026             case '3':
1027             case '4':
1028             case '5':
1029             case '6':
1030             case '7':
1031             case '8':
1032             case '9':
1033             {
1034                 add(current);
1035                 goto scan_number_any1;
1036             }
1037
1038             default:
1039             {
1040                 error_message = "invalid number; expected digit after '-'";
1041                 return token_type::parse_error;
1042             }
1043         }
1044
1045 scan_number_zero:
1046         // state: we just parse a zero (maybe with a leading minus sign)
1047         switch (get())
1048         {
1049             case '.':
1050             {
1051                 add(decimal_point_char);
1052                 decimal_point_position = token_buffer.size() - 1;
1053                 goto scan_number_decimal1;
1054             }
1055
1056             case 'e':
1057             case 'E':
1058             {
1059                 add(current);
1060                 goto scan_number_exponent;
1061             }
1062
1063             default:
1064                 goto scan_number_done;
1065         }
1066
1067 scan_number_any1:
1068         // state: we just parsed a number 0-9 (maybe with a leading minus sign)
1069         switch (get())
1070         {
1071             case '0':
1072             case '1':
1073             case '2':
1074             case '3':
1075             case '4':
1076             case '5':
1077             case '6':
1078             case '7':
1079             case '8':
1080             case '9':
1081             {
1082                 add(current);
1083                 goto scan_number_any1;
1084             }
1085
1086             case '.':
1087             {
1088                 add(decimal_point_char);
1089                 decimal_point_position = token_buffer.size() - 1;
1090                 goto scan_number_decimal1;
1091             }
1092
1093             case 'e':
1094             case 'E':
1095             {
1096                 add(current);
1097                 goto scan_number_exponent;
1098             }
1099
1100             default:
1101                 goto scan_number_done;
1102         }
1103
1104 scan_number_decimal1:
1105         // state: we just parsed a decimal point
1106         number_type = token_type::value_float;
1107         switch (get())
1108         {
1109             case '0':
1110             case '1':
1111             case '2':
1112             case '3':
1113             case '4':
1114             case '5':
1115             case '6':
1116             case '7':
1117             case '8':
1118             case '9':
1119             {
1120                 add(current);
1121                 goto scan_number_decimal2;
1122             }
1123
1124             default:
1125             {
1126                 error_message = "invalid number; expected digit after '.'";
1127                 return token_type::parse_error;
1128             }
1129         }
1130
1131 scan_number_decimal2:
1132         // we just parsed at least one number after a decimal point
1133         switch (get())
1134         {
1135             case '0':
1136             case '1':
1137             case '2':
1138             case '3':
1139             case '4':
1140             case '5':
1141             case '6':
1142             case '7':
1143             case '8':
1144             case '9':
1145             {
1146                 add(current);
1147                 goto scan_number_decimal2;
1148             }
1149
1150             case 'e':
1151             case 'E':
1152             {
1153                 add(current);
1154                 goto scan_number_exponent;
1155             }
1156
1157             default:
1158                 goto scan_number_done;
1159         }
1160
1161 scan_number_exponent:
1162         // we just parsed an exponent
1163         number_type = token_type::value_float;
1164         switch (get())
1165         {
1166             case '+':
1167             case '-':
1168             {
1169                 add(current);
1170                 goto scan_number_sign;
1171             }
1172
1173             case '0':
1174             case '1':
1175             case '2':
1176             case '3':
1177             case '4':
1178             case '5':
1179             case '6':
1180             case '7':
1181             case '8':
1182             case '9':
1183             {
1184                 add(current);
1185                 goto scan_number_any2;
1186             }
1187
1188             default:
1189             {
1190                 error_message =
1191                     "invalid number; expected '+', '-', or digit after exponent";
1192                 return token_type::parse_error;
1193             }
1194         }
1195
1196 scan_number_sign:
1197         // we just parsed an exponent sign
1198         switch (get())
1199         {
1200             case '0':
1201             case '1':
1202             case '2':
1203             case '3':
1204             case '4':
1205             case '5':
1206             case '6':
1207             case '7':
1208             case '8':
1209             case '9':
1210             {
1211                 add(current);
1212                 goto scan_number_any2;
1213             }
1214
1215             default:
1216             {
1217                 error_message = "invalid number; expected digit after exponent sign";
1218                 return token_type::parse_error;
1219             }
1220         }
1221
1222 scan_number_any2:
1223         // we just parsed a number after the exponent or exponent sign
1224         switch (get())
1225         {
1226             case '0':
1227             case '1':
1228             case '2':
1229             case '3':
1230             case '4':
1231             case '5':
1232             case '6':
1233             case '7':
1234             case '8':
1235             case '9':
1236             {
1237                 add(current);
1238                 goto scan_number_any2;
1239             }
1240
1241             default:
1242                 goto scan_number_done;
1243         }
1244
1245 scan_number_done:
1246         // unget the character after the number (we only read it to know that
1247         // we are done scanning a number)
1248         unget();
1249
1250         char* endptr = nullptr; // NOLINT(misc-const-correctness,cppcoreguidelines-pro-type-vararg,hicpp-vararg)
1251         errno = 0;
1252
1253         // try to parse integers first and fall back to floats
1254         if (number_type == token_type::value_unsigned)
1255         {
1256             const auto x = std::strtoull(token_buffer.data(), &endptr, 10);
1257
1258             // we checked the number format before
1259             JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size());
1260
1261             if (errno != ERANGE)
1262             {
1263                 value_unsigned = static_cast<number_unsigned_t>(x);
1264                 if (value_unsigned == x)
1265                 {
1266                     return token_type::value_unsigned;
1267                 }
1268             }
1269         }
1270         else if (number_type == token_type::value_integer)
1271         {
1272             const auto x = std::strtoll(token_buffer.data(), &endptr, 10);
1273
1274             // we checked the number format before
1275             JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size());
1276
1277             if (errno != ERANGE)
1278             {
1279                 value_integer = static_cast<number_integer_t>(x);
1280                 if (value_integer == x)
1281                 {
1282                     return token_type::value_integer;
1283                 }
1284             }
1285         }
1286
1287         // this code is reached if we parse a floating-point number or if an
1288         // integer conversion above failed
1289         strtof(value_float, token_buffer.data(), &endptr);
1290
1291         // we checked the number format before
1292         JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size());
1293
1294         return token_type::value_float;
1295     }
1296
1297     /*!
1298     @param[in] literal_text  the literal text to expect
1299     @param[in] length        the length of the passed literal text
1300     @param[in] return_type   the token type to return on success
1301     */
1302     JSON_HEDLEY_NON_NULL(2)
1303     token_type scan_literal(const char_type* literal_text, const std::size_t length,
1304                             token_type return_type)
1305     {
1306         JSON_ASSERT(char_traits<char_type>::to_char_type(current) == literal_text[0]);
1307         for (std::size_t i = 1; i < length; ++i)
1308         {
1309             if (JSON_HEDLEY_UNLIKELY(char_traits<char_type>::to_char_type(get()) != literal_text[i]))
1310             {
1311                 error_message = "invalid literal";
1312                 return token_type::parse_error;
1313             }
1314         }
1315         return return_type;
1316     }
1317
1318     /////////////////////
1319     // input management
1320     /////////////////////
1321
1322     /// reset token_buffer; current character is beginning of token
1323     void reset() noexcept
1324     {
1325         token_buffer.clear();
1326         token_string.clear();
1327         decimal_point_position = std::string::npos;
1328         token_string.push_back(char_traits<char_type>::to_char_type(current));
1329     }
1330
1331     /*
1332     @brief get next character from the input
1333
1334     This function provides the interface to the used input adapter. It does
1335     not throw in case the input reached EOF, but returns a
1336     `char_traits<char>::eof()` in that case.  Stores the scanned characters
1337     for use in error messages.
1338
1339     @return character read from the input
1340     */
1341     char_int_type get()
1342     {
1343         ++position.chars_read_total;
1344         ++position.chars_read_current_line;
1345
1346         if (next_unget)
1347         {
1348             // just reset the next_unget variable and work with current
1349             next_unget = false;
1350         }
1351         else
1352         {
1353             current = ia.get_character();
1354         }
1355
1356         if (JSON_HEDLEY_LIKELY(current != char_traits<char_type>::eof()))
1357         {
1358             token_string.push_back(char_traits<char_type>::to_char_type(current));
1359         }
1360
1361         if (current == '\n')
1362         {
1363             ++position.lines_read;
1364             position.chars_read_current_line = 0;
1365         }
1366
1367         return current;
1368     }
1369
1370     /*!
1371     @brief unget current character (read it again on next get)
1372
1373     We implement unget by setting variable next_unget to true. The input is not
1374     changed - we just simulate ungetting by modifying chars_read_total,
1375     chars_read_current_line, and token_string. The next call to get() will
1376     behave as if the unget character is read again.
1377     */
1378     void unget()
1379     {
1380         next_unget = true;
1381
1382         --position.chars_read_total;
1383
1384         // in case we "unget" a newline, we have to also decrement the lines_read
1385         if (position.chars_read_current_line == 0)
1386         {
1387             if (position.lines_read > 0)
1388             {
1389                 --position.lines_read;
1390             }
1391         }
1392         else
1393         {
1394             --position.chars_read_current_line;
1395         }
1396
1397         if (JSON_HEDLEY_LIKELY(current != char_traits<char_type>::eof()))
1398         {
1399             JSON_ASSERT(!token_string.empty());
1400             token_string.pop_back();
1401         }
1402     }
1403
1404     /// add a character to token_buffer
1405     void add(char_int_type c)
1406     {
1407         token_buffer.push_back(static_cast<typename string_t::value_type>(c));
1408     }
1409
1410   public:
1411     /////////////////////
1412     // value getters
1413     /////////////////////
1414
1415     /// return integer value
1416     constexpr number_integer_t get_number_integer() const noexcept
1417     {
1418         return value_integer;
1419     }
1420
1421     /// return unsigned integer value
1422     constexpr number_unsigned_t get_number_unsigned() const noexcept
1423     {
1424         return value_unsigned;
1425     }
1426
1427     /// return floating-point value
1428     constexpr number_float_t get_number_float() const noexcept
1429     {
1430         return value_float;
1431     }
1432
1433     /// return current string value (implicitly resets the token; useful only once)
1434     string_t& get_string()
1435     {
1436         // translate decimal points from locale back to '.' (#4084)
1437         if (decimal_point_char != '.' && decimal_point_position != std::string::npos)
1438         {
1439             token_buffer[decimal_point_position] = '.';
1440         }
1441         return token_buffer;
1442     }
1443
1444     /////////////////////
1445     // diagnostics
1446     /////////////////////
1447
1448     /// return position of last read token
1449     constexpr position_t get_position() const noexcept
1450     {
1451         return position;
1452     }
1453
1454     /// return the last read token (for errors only).  Will never contain EOF
1455     /// (an arbitrary value that is not a valid char value, often -1), because
1456     /// 255 may legitimately occur.  May contain NUL, which should be escaped.
1457     std::string get_token_string() const
1458     {
1459         // escape control characters
1460         std::string result;
1461         for (const auto c : token_string)
1462         {
1463             if (static_cast<unsigned char>(c) <= '\x1F')
1464             {
1465                 // escape control characters
1466                 std::array<char, 9> cs{{}};
1467                 static_cast<void>((std::snprintf)(cs.data(), cs.size(), "<U+%.4X>", static_cast<unsigned char>(c))); // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg)
1468                 result += cs.data();
1469             }
1470             else
1471             {
1472                 // add character as is
1473                 result.push_back(static_cast<std::string::value_type>(c));
1474             }
1475         }
1476
1477         return result;
1478     }
1479
1480     /// return syntax error message
1481     JSON_HEDLEY_RETURNS_NON_NULL
1482     constexpr const char* get_error_message() const noexcept
1483     {
1484         return error_message;
1485     }
1486
1487     /////////////////////
1488     // actual scanner
1489     /////////////////////
1490
1491     /*!
1492     @brief skip the UTF-8 byte order mark
1493     @return true iff there is no BOM or the correct BOM has been skipped
1494     */
1495     bool skip_bom()
1496     {
1497         if (get() == 0xEF)
1498         {
1499             // check if we completely parse the BOM
1500             return get() == 0xBB && get() == 0xBF;
1501         }
1502
1503         // the first character is not the beginning of the BOM; unget it to
1504         // process is later
1505         unget();
1506         return true;
1507     }
1508
1509     void skip_whitespace()
1510     {
1511         do
1512         {
1513             get();
1514         }
1515         while (current == ' ' || current == '\t' || current == '\n' || current == '\r');
1516     }
1517
1518     token_type scan()
1519     {
1520         // initially, skip the BOM
1521         if (position.chars_read_total == 0 && !skip_bom())
1522         {
1523             error_message = "invalid BOM; must be 0xEF 0xBB 0xBF if given";
1524             return token_type::parse_error;
1525         }
1526
1527         // read next character and ignore whitespace
1528         skip_whitespace();
1529
1530         // ignore comments
1531         while (ignore_comments && current == '/')
1532         {
1533             if (!scan_comment())
1534             {
1535                 return token_type::parse_error;
1536             }
1537
1538             // skip following whitespace
1539             skip_whitespace();
1540         }
1541
1542         switch (current)
1543         {
1544             // structural characters
1545             case '[':
1546                 return token_type::begin_array;
1547             case ']':
1548                 return token_type::end_array;
1549             case '{':
1550                 return token_type::begin_object;
1551             case '}':
1552                 return token_type::end_object;
1553             case ':':
1554                 return token_type::name_separator;
1555             case ',':
1556                 return token_type::value_separator;
1557
1558             // literals
1559             case 't':
1560             {
1561                 std::array<char_type, 4> true_literal = {{static_cast<char_type>('t'), static_cast<char_type>('r'), static_cast<char_type>('u'), static_cast<char_type>('e')}};
1562                 return scan_literal(true_literal.data(), true_literal.size(), token_type::literal_true);
1563             }
1564             case 'f':
1565             {
1566                 std::array<char_type, 5> false_literal = {{static_cast<char_type>('f'), static_cast<char_type>('a'), static_cast<char_type>('l'), static_cast<char_type>('s'), static_cast<char_type>('e')}};
1567                 return scan_literal(false_literal.data(), false_literal.size(), token_type::literal_false);
1568             }
1569             case 'n':
1570             {
1571                 std::array<char_type, 4> null_literal = {{static_cast<char_type>('n'), static_cast<char_type>('u'), static_cast<char_type>('l'), static_cast<char_type>('l')}};
1572                 return scan_literal(null_literal.data(), null_literal.size(), token_type::literal_null);
1573             }
1574
1575             // string
1576             case '\"':
1577                 return scan_string();
1578
1579             // number
1580             case '-':
1581             case '0':
1582             case '1':
1583             case '2':
1584             case '3':
1585             case '4':
1586             case '5':
1587             case '6':
1588             case '7':
1589             case '8':
1590             case '9':
1591                 return scan_number();
1592
1593             // end of input (the null byte is needed when parsing from
1594             // string literals)
1595             case '\0':
1596             case char_traits<char_type>::eof():
1597                 return token_type::end_of_input;
1598
1599             // error
1600             default:
1601                 error_message = "invalid literal";
1602                 return token_type::parse_error;
1603         }
1604     }
1605
1606   private:
1607     /// input adapter
1608     InputAdapterType ia;
1609
1610     /// whether comments should be ignored (true) or signaled as errors (false)
1611     const bool ignore_comments = false;
1612
1613     /// the current character
1614     char_int_type current = char_traits<char_type>::eof();
1615
1616     /// whether the next get() call should just return current
1617     bool next_unget = false;
1618
1619     /// the start position of the current token
1620     position_t position {};
1621
1622     /// raw input token string (for error messages)
1623     std::vector<char_type> token_string {};
1624
1625     /// buffer for variable-length tokens (numbers, strings)
1626     string_t token_buffer {};
1627
1628     /// a description of occurred lexer errors
1629     const char* error_message = "";
1630
1631     // number values
1632     number_integer_t value_integer = 0;
1633     number_unsigned_t value_unsigned = 0;
1634     number_float_t value_float = 0;
1635
1636     /// the decimal point
1637     const char_int_type decimal_point_char = '.';
1638     /// the position of the decimal point in the input
1639     std::size_t decimal_point_position = std::string::npos;
1640 };
1641
1642 }  // namespace detail
1643 NLOHMANN_JSON_NAMESPACE_END