File indexing completed on 2026-05-07 08:43:50
0001
0002
0003
0004
0005
0006
0007
0008
0009 #pragma once
0010
0011 #include <array> // array
0012 #include <clocale> // localeconv
0013 #include <cstddef> // size_t
0014 #include <cstdio> // snprintf
0015 #include <cstdlib> // strtof, strtod, strtold, strtoll, strtoull
0016 #include <initializer_list> // initializer_list
0017 #include <string> // char_traits, string
0018 #include <utility> // move
0019 #include <vector> // vector
0020
0021 #include <nlohmann/detail/input/input_adapters.hpp>
0022 #include <nlohmann/detail/input/position_t.hpp>
0023 #include <nlohmann/detail/macro_scope.hpp>
0024 #include <nlohmann/detail/meta/type_traits.hpp>
0025
0026 NLOHMANN_JSON_NAMESPACE_BEGIN
0027 namespace detail
0028 {
0029
0030
0031
0032
0033
0034 template<typename BasicJsonType>
0035 class lexer_base
0036 {
0037 public:
0038
0039 enum class token_type
0040 {
0041 uninitialized,
0042 literal_true,
0043 literal_false,
0044 literal_null,
0045 value_string,
0046 value_unsigned,
0047 value_integer,
0048 value_float,
0049 begin_array,
0050 begin_object,
0051 end_array,
0052 end_object,
0053 name_separator,
0054 value_separator,
0055 parse_error,
0056 end_of_input,
0057 literal_or_value
0058 };
0059
0060
0061 JSON_HEDLEY_RETURNS_NON_NULL
0062 JSON_HEDLEY_CONST
0063 static const char* token_type_name(const token_type t) noexcept
0064 {
0065 switch (t)
0066 {
0067 case token_type::uninitialized:
0068 return "<uninitialized>";
0069 case token_type::literal_true:
0070 return "true literal";
0071 case token_type::literal_false:
0072 return "false literal";
0073 case token_type::literal_null:
0074 return "null literal";
0075 case token_type::value_string:
0076 return "string literal";
0077 case token_type::value_unsigned:
0078 case token_type::value_integer:
0079 case token_type::value_float:
0080 return "number literal";
0081 case token_type::begin_array:
0082 return "'['";
0083 case token_type::begin_object:
0084 return "'{'";
0085 case token_type::end_array:
0086 return "']'";
0087 case token_type::end_object:
0088 return "'}'";
0089 case token_type::name_separator:
0090 return "':'";
0091 case token_type::value_separator:
0092 return "','";
0093 case token_type::parse_error:
0094 return "<parse error>";
0095 case token_type::end_of_input:
0096 return "end of input";
0097 case token_type::literal_or_value:
0098 return "'[', '{', or a literal";
0099
0100 default:
0101 return "unknown token";
0102
0103 }
0104 }
0105 };
0106
0107
0108
0109
0110
0111 template<typename BasicJsonType, typename InputAdapterType>
0112 class lexer : public lexer_base<BasicJsonType>
0113 {
0114 using number_integer_t = typename BasicJsonType::number_integer_t;
0115 using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
0116 using number_float_t = typename BasicJsonType::number_float_t;
0117 using string_t = typename BasicJsonType::string_t;
0118 using char_type = typename InputAdapterType::char_type;
0119 using char_int_type = typename char_traits<char_type>::int_type;
0120
0121 public:
0122 using token_type = typename lexer_base<BasicJsonType>::token_type;
0123
0124 explicit lexer(InputAdapterType&& adapter, bool ignore_comments_ = false) noexcept
0125 : ia(std::move(adapter))
0126 , ignore_comments(ignore_comments_)
0127 , decimal_point_char(static_cast<char_int_type>(get_decimal_point()))
0128 {}
0129
0130
0131 lexer(const lexer&) = delete;
0132 lexer(lexer&&) = default;
0133 lexer& operator=(lexer&) = delete;
0134 lexer& operator=(lexer&&) = default;
0135 ~lexer() = default;
0136
0137 private:
0138
0139
0140
0141
0142
0143 JSON_HEDLEY_PURE
0144 static char get_decimal_point() noexcept
0145 {
0146 const auto* loc = localeconv();
0147 JSON_ASSERT(loc != nullptr);
0148 return (loc->decimal_point == nullptr) ? '.' : *(loc->decimal_point);
0149 }
0150
0151
0152
0153
0154
0155
0156
0157
0158
0159
0160
0161
0162
0163
0164
0165
0166
0167
0168
0169
0170 int get_codepoint()
0171 {
0172
0173 JSON_ASSERT(current == 'u');
0174 int codepoint = 0;
0175
0176 const auto factors = { 12u, 8u, 4u, 0u };
0177 for (const auto factor : factors)
0178 {
0179 get();
0180
0181 if (current >= '0' && current <= '9')
0182 {
0183 codepoint += static_cast<int>((static_cast<unsigned int>(current) - 0x30u) << factor);
0184 }
0185 else if (current >= 'A' && current <= 'F')
0186 {
0187 codepoint += static_cast<int>((static_cast<unsigned int>(current) - 0x37u) << factor);
0188 }
0189 else if (current >= 'a' && current <= 'f')
0190 {
0191 codepoint += static_cast<int>((static_cast<unsigned int>(current) - 0x57u) << factor);
0192 }
0193 else
0194 {
0195 return -1;
0196 }
0197 }
0198
0199 JSON_ASSERT(0x0000 <= codepoint && codepoint <= 0xFFFF);
0200 return codepoint;
0201 }
0202
0203
0204
0205
0206
0207
0208
0209
0210
0211
0212
0213
0214
0215
0216
0217
0218 bool next_byte_in_range(std::initializer_list<char_int_type> ranges)
0219 {
0220 JSON_ASSERT(ranges.size() == 2 || ranges.size() == 4 || ranges.size() == 6);
0221 add(current);
0222
0223 for (auto range = ranges.begin(); range != ranges.end(); ++range)
0224 {
0225 get();
0226 if (JSON_HEDLEY_LIKELY(*range <= current && current <= *(++range)))
0227 {
0228 add(current);
0229 }
0230 else
0231 {
0232 error_message = "invalid string: ill-formed UTF-8 byte";
0233 return false;
0234 }
0235 }
0236
0237 return true;
0238 }
0239
0240
0241
0242
0243
0244
0245
0246
0247
0248
0249
0250
0251
0252
0253
0254
0255 token_type scan_string()
0256 {
0257
0258 reset();
0259
0260
0261 JSON_ASSERT(current == '\"');
0262
0263 while (true)
0264 {
0265
0266 switch (get())
0267 {
0268
0269 case char_traits<char_type>::eof():
0270 {
0271 error_message = "invalid string: missing closing quote";
0272 return token_type::parse_error;
0273 }
0274
0275
0276 case '\"':
0277 {
0278 return token_type::value_string;
0279 }
0280
0281
0282 case '\\':
0283 {
0284 switch (get())
0285 {
0286
0287 case '\"':
0288 add('\"');
0289 break;
0290
0291 case '\\':
0292 add('\\');
0293 break;
0294
0295 case '/':
0296 add('/');
0297 break;
0298
0299 case 'b':
0300 add('\b');
0301 break;
0302
0303 case 'f':
0304 add('\f');
0305 break;
0306
0307 case 'n':
0308 add('\n');
0309 break;
0310
0311 case 'r':
0312 add('\r');
0313 break;
0314
0315 case 't':
0316 add('\t');
0317 break;
0318
0319
0320 case 'u':
0321 {
0322 const int codepoint1 = get_codepoint();
0323 int codepoint = codepoint1;
0324
0325 if (JSON_HEDLEY_UNLIKELY(codepoint1 == -1))
0326 {
0327 error_message = "invalid string: '\\u' must be followed by 4 hex digits";
0328 return token_type::parse_error;
0329 }
0330
0331
0332 if (0xD800 <= codepoint1 && codepoint1 <= 0xDBFF)
0333 {
0334
0335 if (JSON_HEDLEY_LIKELY(get() == '\\' && get() == 'u'))
0336 {
0337 const int codepoint2 = get_codepoint();
0338
0339 if (JSON_HEDLEY_UNLIKELY(codepoint2 == -1))
0340 {
0341 error_message = "invalid string: '\\u' must be followed by 4 hex digits";
0342 return token_type::parse_error;
0343 }
0344
0345
0346 if (JSON_HEDLEY_LIKELY(0xDC00 <= codepoint2 && codepoint2 <= 0xDFFF))
0347 {
0348
0349 codepoint = static_cast<int>(
0350
0351 (static_cast<unsigned int>(codepoint1) << 10u)
0352
0353 + static_cast<unsigned int>(codepoint2)
0354
0355
0356
0357 - 0x35FDC00u);
0358 }
0359 else
0360 {
0361 error_message = "invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF";
0362 return token_type::parse_error;
0363 }
0364 }
0365 else
0366 {
0367 error_message = "invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF";
0368 return token_type::parse_error;
0369 }
0370 }
0371 else
0372 {
0373 if (JSON_HEDLEY_UNLIKELY(0xDC00 <= codepoint1 && codepoint1 <= 0xDFFF))
0374 {
0375 error_message = "invalid string: surrogate U+DC00..U+DFFF must follow U+D800..U+DBFF";
0376 return token_type::parse_error;
0377 }
0378 }
0379
0380
0381 JSON_ASSERT(0x00 <= codepoint && codepoint <= 0x10FFFF);
0382
0383
0384 if (codepoint < 0x80)
0385 {
0386
0387 add(static_cast<char_int_type>(codepoint));
0388 }
0389 else if (codepoint <= 0x7FF)
0390 {
0391
0392 add(static_cast<char_int_type>(0xC0u | (static_cast<unsigned int>(codepoint) >> 6u)));
0393 add(static_cast<char_int_type>(0x80u | (static_cast<unsigned int>(codepoint) & 0x3Fu)));
0394 }
0395 else if (codepoint <= 0xFFFF)
0396 {
0397
0398 add(static_cast<char_int_type>(0xE0u | (static_cast<unsigned int>(codepoint) >> 12u)));
0399 add(static_cast<char_int_type>(0x80u | ((static_cast<unsigned int>(codepoint) >> 6u) & 0x3Fu)));
0400 add(static_cast<char_int_type>(0x80u | (static_cast<unsigned int>(codepoint) & 0x3Fu)));
0401 }
0402 else
0403 {
0404
0405 add(static_cast<char_int_type>(0xF0u | (static_cast<unsigned int>(codepoint) >> 18u)));
0406 add(static_cast<char_int_type>(0x80u | ((static_cast<unsigned int>(codepoint) >> 12u) & 0x3Fu)));
0407 add(static_cast<char_int_type>(0x80u | ((static_cast<unsigned int>(codepoint) >> 6u) & 0x3Fu)));
0408 add(static_cast<char_int_type>(0x80u | (static_cast<unsigned int>(codepoint) & 0x3Fu)));
0409 }
0410
0411 break;
0412 }
0413
0414
0415 default:
0416 error_message = "invalid string: forbidden character after backslash";
0417 return token_type::parse_error;
0418 }
0419
0420 break;
0421 }
0422
0423
0424 case 0x00:
0425 {
0426 error_message = "invalid string: control character U+0000 (NUL) must be escaped to \\u0000";
0427 return token_type::parse_error;
0428 }
0429
0430 case 0x01:
0431 {
0432 error_message = "invalid string: control character U+0001 (SOH) must be escaped to \\u0001";
0433 return token_type::parse_error;
0434 }
0435
0436 case 0x02:
0437 {
0438 error_message = "invalid string: control character U+0002 (STX) must be escaped to \\u0002";
0439 return token_type::parse_error;
0440 }
0441
0442 case 0x03:
0443 {
0444 error_message = "invalid string: control character U+0003 (ETX) must be escaped to \\u0003";
0445 return token_type::parse_error;
0446 }
0447
0448 case 0x04:
0449 {
0450 error_message = "invalid string: control character U+0004 (EOT) must be escaped to \\u0004";
0451 return token_type::parse_error;
0452 }
0453
0454 case 0x05:
0455 {
0456 error_message = "invalid string: control character U+0005 (ENQ) must be escaped to \\u0005";
0457 return token_type::parse_error;
0458 }
0459
0460 case 0x06:
0461 {
0462 error_message = "invalid string: control character U+0006 (ACK) must be escaped to \\u0006";
0463 return token_type::parse_error;
0464 }
0465
0466 case 0x07:
0467 {
0468 error_message = "invalid string: control character U+0007 (BEL) must be escaped to \\u0007";
0469 return token_type::parse_error;
0470 }
0471
0472 case 0x08:
0473 {
0474 error_message = "invalid string: control character U+0008 (BS) must be escaped to \\u0008 or \\b";
0475 return token_type::parse_error;
0476 }
0477
0478 case 0x09:
0479 {
0480 error_message = "invalid string: control character U+0009 (HT) must be escaped to \\u0009 or \\t";
0481 return token_type::parse_error;
0482 }
0483
0484 case 0x0A:
0485 {
0486 error_message = "invalid string: control character U+000A (LF) must be escaped to \\u000A or \\n";
0487 return token_type::parse_error;
0488 }
0489
0490 case 0x0B:
0491 {
0492 error_message = "invalid string: control character U+000B (VT) must be escaped to \\u000B";
0493 return token_type::parse_error;
0494 }
0495
0496 case 0x0C:
0497 {
0498 error_message = "invalid string: control character U+000C (FF) must be escaped to \\u000C or \\f";
0499 return token_type::parse_error;
0500 }
0501
0502 case 0x0D:
0503 {
0504 error_message = "invalid string: control character U+000D (CR) must be escaped to \\u000D or \\r";
0505 return token_type::parse_error;
0506 }
0507
0508 case 0x0E:
0509 {
0510 error_message = "invalid string: control character U+000E (SO) must be escaped to \\u000E";
0511 return token_type::parse_error;
0512 }
0513
0514 case 0x0F:
0515 {
0516 error_message = "invalid string: control character U+000F (SI) must be escaped to \\u000F";
0517 return token_type::parse_error;
0518 }
0519
0520 case 0x10:
0521 {
0522 error_message = "invalid string: control character U+0010 (DLE) must be escaped to \\u0010";
0523 return token_type::parse_error;
0524 }
0525
0526 case 0x11:
0527 {
0528 error_message = "invalid string: control character U+0011 (DC1) must be escaped to \\u0011";
0529 return token_type::parse_error;
0530 }
0531
0532 case 0x12:
0533 {
0534 error_message = "invalid string: control character U+0012 (DC2) must be escaped to \\u0012";
0535 return token_type::parse_error;
0536 }
0537
0538 case 0x13:
0539 {
0540 error_message = "invalid string: control character U+0013 (DC3) must be escaped to \\u0013";
0541 return token_type::parse_error;
0542 }
0543
0544 case 0x14:
0545 {
0546 error_message = "invalid string: control character U+0014 (DC4) must be escaped to \\u0014";
0547 return token_type::parse_error;
0548 }
0549
0550 case 0x15:
0551 {
0552 error_message = "invalid string: control character U+0015 (NAK) must be escaped to \\u0015";
0553 return token_type::parse_error;
0554 }
0555
0556 case 0x16:
0557 {
0558 error_message = "invalid string: control character U+0016 (SYN) must be escaped to \\u0016";
0559 return token_type::parse_error;
0560 }
0561
0562 case 0x17:
0563 {
0564 error_message = "invalid string: control character U+0017 (ETB) must be escaped to \\u0017";
0565 return token_type::parse_error;
0566 }
0567
0568 case 0x18:
0569 {
0570 error_message = "invalid string: control character U+0018 (CAN) must be escaped to \\u0018";
0571 return token_type::parse_error;
0572 }
0573
0574 case 0x19:
0575 {
0576 error_message = "invalid string: control character U+0019 (EM) must be escaped to \\u0019";
0577 return token_type::parse_error;
0578 }
0579
0580 case 0x1A:
0581 {
0582 error_message = "invalid string: control character U+001A (SUB) must be escaped to \\u001A";
0583 return token_type::parse_error;
0584 }
0585
0586 case 0x1B:
0587 {
0588 error_message = "invalid string: control character U+001B (ESC) must be escaped to \\u001B";
0589 return token_type::parse_error;
0590 }
0591
0592 case 0x1C:
0593 {
0594 error_message = "invalid string: control character U+001C (FS) must be escaped to \\u001C";
0595 return token_type::parse_error;
0596 }
0597
0598 case 0x1D:
0599 {
0600 error_message = "invalid string: control character U+001D (GS) must be escaped to \\u001D";
0601 return token_type::parse_error;
0602 }
0603
0604 case 0x1E:
0605 {
0606 error_message = "invalid string: control character U+001E (RS) must be escaped to \\u001E";
0607 return token_type::parse_error;
0608 }
0609
0610 case 0x1F:
0611 {
0612 error_message = "invalid string: control character U+001F (US) must be escaped to \\u001F";
0613 return token_type::parse_error;
0614 }
0615
0616
0617 case 0x20:
0618 case 0x21:
0619 case 0x23:
0620 case 0x24:
0621 case 0x25:
0622 case 0x26:
0623 case 0x27:
0624 case 0x28:
0625 case 0x29:
0626 case 0x2A:
0627 case 0x2B:
0628 case 0x2C:
0629 case 0x2D:
0630 case 0x2E:
0631 case 0x2F:
0632 case 0x30:
0633 case 0x31:
0634 case 0x32:
0635 case 0x33:
0636 case 0x34:
0637 case 0x35:
0638 case 0x36:
0639 case 0x37:
0640 case 0x38:
0641 case 0x39:
0642 case 0x3A:
0643 case 0x3B:
0644 case 0x3C:
0645 case 0x3D:
0646 case 0x3E:
0647 case 0x3F:
0648 case 0x40:
0649 case 0x41:
0650 case 0x42:
0651 case 0x43:
0652 case 0x44:
0653 case 0x45:
0654 case 0x46:
0655 case 0x47:
0656 case 0x48:
0657 case 0x49:
0658 case 0x4A:
0659 case 0x4B:
0660 case 0x4C:
0661 case 0x4D:
0662 case 0x4E:
0663 case 0x4F:
0664 case 0x50:
0665 case 0x51:
0666 case 0x52:
0667 case 0x53:
0668 case 0x54:
0669 case 0x55:
0670 case 0x56:
0671 case 0x57:
0672 case 0x58:
0673 case 0x59:
0674 case 0x5A:
0675 case 0x5B:
0676 case 0x5D:
0677 case 0x5E:
0678 case 0x5F:
0679 case 0x60:
0680 case 0x61:
0681 case 0x62:
0682 case 0x63:
0683 case 0x64:
0684 case 0x65:
0685 case 0x66:
0686 case 0x67:
0687 case 0x68:
0688 case 0x69:
0689 case 0x6A:
0690 case 0x6B:
0691 case 0x6C:
0692 case 0x6D:
0693 case 0x6E:
0694 case 0x6F:
0695 case 0x70:
0696 case 0x71:
0697 case 0x72:
0698 case 0x73:
0699 case 0x74:
0700 case 0x75:
0701 case 0x76:
0702 case 0x77:
0703 case 0x78:
0704 case 0x79:
0705 case 0x7A:
0706 case 0x7B:
0707 case 0x7C:
0708 case 0x7D:
0709 case 0x7E:
0710 case 0x7F:
0711 {
0712 add(current);
0713 break;
0714 }
0715
0716
0717 case 0xC2:
0718 case 0xC3:
0719 case 0xC4:
0720 case 0xC5:
0721 case 0xC6:
0722 case 0xC7:
0723 case 0xC8:
0724 case 0xC9:
0725 case 0xCA:
0726 case 0xCB:
0727 case 0xCC:
0728 case 0xCD:
0729 case 0xCE:
0730 case 0xCF:
0731 case 0xD0:
0732 case 0xD1:
0733 case 0xD2:
0734 case 0xD3:
0735 case 0xD4:
0736 case 0xD5:
0737 case 0xD6:
0738 case 0xD7:
0739 case 0xD8:
0740 case 0xD9:
0741 case 0xDA:
0742 case 0xDB:
0743 case 0xDC:
0744 case 0xDD:
0745 case 0xDE:
0746 case 0xDF:
0747 {
0748 if (JSON_HEDLEY_UNLIKELY(!next_byte_in_range({0x80, 0xBF})))
0749 {
0750 return token_type::parse_error;
0751 }
0752 break;
0753 }
0754
0755
0756 case 0xE0:
0757 {
0758 if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0xA0, 0xBF, 0x80, 0xBF}))))
0759 {
0760 return token_type::parse_error;
0761 }
0762 break;
0763 }
0764
0765
0766
0767 case 0xE1:
0768 case 0xE2:
0769 case 0xE3:
0770 case 0xE4:
0771 case 0xE5:
0772 case 0xE6:
0773 case 0xE7:
0774 case 0xE8:
0775 case 0xE9:
0776 case 0xEA:
0777 case 0xEB:
0778 case 0xEC:
0779 case 0xEE:
0780 case 0xEF:
0781 {
0782 if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0xBF, 0x80, 0xBF}))))
0783 {
0784 return token_type::parse_error;
0785 }
0786 break;
0787 }
0788
0789
0790 case 0xED:
0791 {
0792 if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0x9F, 0x80, 0xBF}))))
0793 {
0794 return token_type::parse_error;
0795 }
0796 break;
0797 }
0798
0799
0800 case 0xF0:
0801 {
0802 if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF}))))
0803 {
0804 return token_type::parse_error;
0805 }
0806 break;
0807 }
0808
0809
0810 case 0xF1:
0811 case 0xF2:
0812 case 0xF3:
0813 {
0814 if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF}))))
0815 {
0816 return token_type::parse_error;
0817 }
0818 break;
0819 }
0820
0821
0822 case 0xF4:
0823 {
0824 if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF}))))
0825 {
0826 return token_type::parse_error;
0827 }
0828 break;
0829 }
0830
0831
0832 default:
0833 {
0834 error_message = "invalid string: ill-formed UTF-8 byte";
0835 return token_type::parse_error;
0836 }
0837 }
0838 }
0839 }
0840
0841
0842
0843
0844
0845 bool scan_comment()
0846 {
0847 switch (get())
0848 {
0849
0850 case '/':
0851 {
0852 while (true)
0853 {
0854 switch (get())
0855 {
0856 case '\n':
0857 case '\r':
0858 case char_traits<char_type>::eof():
0859 case '\0':
0860 return true;
0861
0862 default:
0863 break;
0864 }
0865 }
0866 }
0867
0868
0869 case '*':
0870 {
0871 while (true)
0872 {
0873 switch (get())
0874 {
0875 case char_traits<char_type>::eof():
0876 case '\0':
0877 {
0878 error_message = "invalid comment; missing closing '*/'";
0879 return false;
0880 }
0881
0882 case '*':
0883 {
0884 switch (get())
0885 {
0886 case '/':
0887 return true;
0888
0889 default:
0890 {
0891 unget();
0892 continue;
0893 }
0894 }
0895 }
0896
0897 default:
0898 continue;
0899 }
0900 }
0901 }
0902
0903
0904 default:
0905 {
0906 error_message = "invalid comment; expecting '/' or '*' after '/'";
0907 return false;
0908 }
0909 }
0910 }
0911
0912 JSON_HEDLEY_NON_NULL(2)
0913 static void strtof(float& f, const char* str, char** endptr) noexcept
0914 {
0915 f = std::strtof(str, endptr);
0916 }
0917
0918 JSON_HEDLEY_NON_NULL(2)
0919 static void strtof(double& f, const char* str, char** endptr) noexcept
0920 {
0921 f = std::strtod(str, endptr);
0922 }
0923
0924 JSON_HEDLEY_NON_NULL(2)
0925 static void strtof(long double& f, const char* str, char** endptr) noexcept
0926 {
0927 f = std::strtold(str, endptr);
0928 }
0929
0930
0931
0932
0933
0934
0935
0936
0937
0938
0939
0940
0941
0942
0943
0944
0945
0946
0947
0948
0949
0950
0951
0952
0953
0954
0955
0956
0957
0958
0959
0960
0961
0962
0963
0964
0965
0966
0967
0968
0969
0970 token_type scan_number()
0971 {
0972
0973 reset();
0974
0975
0976
0977 token_type number_type = token_type::value_unsigned;
0978
0979
0980 switch (current)
0981 {
0982 case '-':
0983 {
0984 add(current);
0985 goto scan_number_minus;
0986 }
0987
0988 case '0':
0989 {
0990 add(current);
0991 goto scan_number_zero;
0992 }
0993
0994 case '1':
0995 case '2':
0996 case '3':
0997 case '4':
0998 case '5':
0999 case '6':
1000 case '7':
1001 case '8':
1002 case '9':
1003 {
1004 add(current);
1005 goto scan_number_any1;
1006 }
1007
1008
1009 default:
1010 JSON_ASSERT(false);
1011 }
1012
1013 scan_number_minus:
1014
1015 number_type = token_type::value_integer;
1016 switch (get())
1017 {
1018 case '0':
1019 {
1020 add(current);
1021 goto scan_number_zero;
1022 }
1023
1024 case '1':
1025 case '2':
1026 case '3':
1027 case '4':
1028 case '5':
1029 case '6':
1030 case '7':
1031 case '8':
1032 case '9':
1033 {
1034 add(current);
1035 goto scan_number_any1;
1036 }
1037
1038 default:
1039 {
1040 error_message = "invalid number; expected digit after '-'";
1041 return token_type::parse_error;
1042 }
1043 }
1044
1045 scan_number_zero:
1046
1047 switch (get())
1048 {
1049 case '.':
1050 {
1051 add(decimal_point_char);
1052 decimal_point_position = token_buffer.size() - 1;
1053 goto scan_number_decimal1;
1054 }
1055
1056 case 'e':
1057 case 'E':
1058 {
1059 add(current);
1060 goto scan_number_exponent;
1061 }
1062
1063 default:
1064 goto scan_number_done;
1065 }
1066
1067 scan_number_any1:
1068
1069 switch (get())
1070 {
1071 case '0':
1072 case '1':
1073 case '2':
1074 case '3':
1075 case '4':
1076 case '5':
1077 case '6':
1078 case '7':
1079 case '8':
1080 case '9':
1081 {
1082 add(current);
1083 goto scan_number_any1;
1084 }
1085
1086 case '.':
1087 {
1088 add(decimal_point_char);
1089 decimal_point_position = token_buffer.size() - 1;
1090 goto scan_number_decimal1;
1091 }
1092
1093 case 'e':
1094 case 'E':
1095 {
1096 add(current);
1097 goto scan_number_exponent;
1098 }
1099
1100 default:
1101 goto scan_number_done;
1102 }
1103
1104 scan_number_decimal1:
1105
1106 number_type = token_type::value_float;
1107 switch (get())
1108 {
1109 case '0':
1110 case '1':
1111 case '2':
1112 case '3':
1113 case '4':
1114 case '5':
1115 case '6':
1116 case '7':
1117 case '8':
1118 case '9':
1119 {
1120 add(current);
1121 goto scan_number_decimal2;
1122 }
1123
1124 default:
1125 {
1126 error_message = "invalid number; expected digit after '.'";
1127 return token_type::parse_error;
1128 }
1129 }
1130
1131 scan_number_decimal2:
1132
1133 switch (get())
1134 {
1135 case '0':
1136 case '1':
1137 case '2':
1138 case '3':
1139 case '4':
1140 case '5':
1141 case '6':
1142 case '7':
1143 case '8':
1144 case '9':
1145 {
1146 add(current);
1147 goto scan_number_decimal2;
1148 }
1149
1150 case 'e':
1151 case 'E':
1152 {
1153 add(current);
1154 goto scan_number_exponent;
1155 }
1156
1157 default:
1158 goto scan_number_done;
1159 }
1160
1161 scan_number_exponent:
1162
1163 number_type = token_type::value_float;
1164 switch (get())
1165 {
1166 case '+':
1167 case '-':
1168 {
1169 add(current);
1170 goto scan_number_sign;
1171 }
1172
1173 case '0':
1174 case '1':
1175 case '2':
1176 case '3':
1177 case '4':
1178 case '5':
1179 case '6':
1180 case '7':
1181 case '8':
1182 case '9':
1183 {
1184 add(current);
1185 goto scan_number_any2;
1186 }
1187
1188 default:
1189 {
1190 error_message =
1191 "invalid number; expected '+', '-', or digit after exponent";
1192 return token_type::parse_error;
1193 }
1194 }
1195
1196 scan_number_sign:
1197
1198 switch (get())
1199 {
1200 case '0':
1201 case '1':
1202 case '2':
1203 case '3':
1204 case '4':
1205 case '5':
1206 case '6':
1207 case '7':
1208 case '8':
1209 case '9':
1210 {
1211 add(current);
1212 goto scan_number_any2;
1213 }
1214
1215 default:
1216 {
1217 error_message = "invalid number; expected digit after exponent sign";
1218 return token_type::parse_error;
1219 }
1220 }
1221
1222 scan_number_any2:
1223
1224 switch (get())
1225 {
1226 case '0':
1227 case '1':
1228 case '2':
1229 case '3':
1230 case '4':
1231 case '5':
1232 case '6':
1233 case '7':
1234 case '8':
1235 case '9':
1236 {
1237 add(current);
1238 goto scan_number_any2;
1239 }
1240
1241 default:
1242 goto scan_number_done;
1243 }
1244
1245 scan_number_done:
1246
1247
1248 unget();
1249
1250 char* endptr = nullptr;
1251 errno = 0;
1252
1253
1254 if (number_type == token_type::value_unsigned)
1255 {
1256 const auto x = std::strtoull(token_buffer.data(), &endptr, 10);
1257
1258
1259 JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size());
1260
1261 if (errno != ERANGE)
1262 {
1263 value_unsigned = static_cast<number_unsigned_t>(x);
1264 if (value_unsigned == x)
1265 {
1266 return token_type::value_unsigned;
1267 }
1268 }
1269 }
1270 else if (number_type == token_type::value_integer)
1271 {
1272 const auto x = std::strtoll(token_buffer.data(), &endptr, 10);
1273
1274
1275 JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size());
1276
1277 if (errno != ERANGE)
1278 {
1279 value_integer = static_cast<number_integer_t>(x);
1280 if (value_integer == x)
1281 {
1282 return token_type::value_integer;
1283 }
1284 }
1285 }
1286
1287
1288
1289 strtof(value_float, token_buffer.data(), &endptr);
1290
1291
1292 JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size());
1293
1294 return token_type::value_float;
1295 }
1296
1297
1298
1299
1300
1301
1302 JSON_HEDLEY_NON_NULL(2)
1303 token_type scan_literal(const char_type* literal_text, const std::size_t length,
1304 token_type return_type)
1305 {
1306 JSON_ASSERT(char_traits<char_type>::to_char_type(current) == literal_text[0]);
1307 for (std::size_t i = 1; i < length; ++i)
1308 {
1309 if (JSON_HEDLEY_UNLIKELY(char_traits<char_type>::to_char_type(get()) != literal_text[i]))
1310 {
1311 error_message = "invalid literal";
1312 return token_type::parse_error;
1313 }
1314 }
1315 return return_type;
1316 }
1317
1318
1319
1320
1321
1322
1323 void reset() noexcept
1324 {
1325 token_buffer.clear();
1326 token_string.clear();
1327 decimal_point_position = std::string::npos;
1328 token_string.push_back(char_traits<char_type>::to_char_type(current));
1329 }
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341 char_int_type get()
1342 {
1343 ++position.chars_read_total;
1344 ++position.chars_read_current_line;
1345
1346 if (next_unget)
1347 {
1348
1349 next_unget = false;
1350 }
1351 else
1352 {
1353 current = ia.get_character();
1354 }
1355
1356 if (JSON_HEDLEY_LIKELY(current != char_traits<char_type>::eof()))
1357 {
1358 token_string.push_back(char_traits<char_type>::to_char_type(current));
1359 }
1360
1361 if (current == '\n')
1362 {
1363 ++position.lines_read;
1364 position.chars_read_current_line = 0;
1365 }
1366
1367 return current;
1368 }
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378 void unget()
1379 {
1380 next_unget = true;
1381
1382 --position.chars_read_total;
1383
1384
1385 if (position.chars_read_current_line == 0)
1386 {
1387 if (position.lines_read > 0)
1388 {
1389 --position.lines_read;
1390 }
1391 }
1392 else
1393 {
1394 --position.chars_read_current_line;
1395 }
1396
1397 if (JSON_HEDLEY_LIKELY(current != char_traits<char_type>::eof()))
1398 {
1399 JSON_ASSERT(!token_string.empty());
1400 token_string.pop_back();
1401 }
1402 }
1403
1404
1405 void add(char_int_type c)
1406 {
1407 token_buffer.push_back(static_cast<typename string_t::value_type>(c));
1408 }
1409
1410 public:
1411
1412
1413
1414
1415
1416 constexpr number_integer_t get_number_integer() const noexcept
1417 {
1418 return value_integer;
1419 }
1420
1421
1422 constexpr number_unsigned_t get_number_unsigned() const noexcept
1423 {
1424 return value_unsigned;
1425 }
1426
1427
1428 constexpr number_float_t get_number_float() const noexcept
1429 {
1430 return value_float;
1431 }
1432
1433
1434 string_t& get_string()
1435 {
1436
1437 if (decimal_point_char != '.' && decimal_point_position != std::string::npos)
1438 {
1439 token_buffer[decimal_point_position] = '.';
1440 }
1441 return token_buffer;
1442 }
1443
1444
1445
1446
1447
1448
1449 constexpr position_t get_position() const noexcept
1450 {
1451 return position;
1452 }
1453
1454
1455
1456
1457 std::string get_token_string() const
1458 {
1459
1460 std::string result;
1461 for (const auto c : token_string)
1462 {
1463 if (static_cast<unsigned char>(c) <= '\x1F')
1464 {
1465
1466 std::array<char, 9> cs{{}};
1467 static_cast<void>((std::snprintf)(cs.data(), cs.size(), "<U+%.4X>", static_cast<unsigned char>(c)));
1468 result += cs.data();
1469 }
1470 else
1471 {
1472
1473 result.push_back(static_cast<std::string::value_type>(c));
1474 }
1475 }
1476
1477 return result;
1478 }
1479
1480
1481 JSON_HEDLEY_RETURNS_NON_NULL
1482 constexpr const char* get_error_message() const noexcept
1483 {
1484 return error_message;
1485 }
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495 bool skip_bom()
1496 {
1497 if (get() == 0xEF)
1498 {
1499
1500 return get() == 0xBB && get() == 0xBF;
1501 }
1502
1503
1504
1505 unget();
1506 return true;
1507 }
1508
1509 void skip_whitespace()
1510 {
1511 do
1512 {
1513 get();
1514 }
1515 while (current == ' ' || current == '\t' || current == '\n' || current == '\r');
1516 }
1517
1518 token_type scan()
1519 {
1520
1521 if (position.chars_read_total == 0 && !skip_bom())
1522 {
1523 error_message = "invalid BOM; must be 0xEF 0xBB 0xBF if given";
1524 return token_type::parse_error;
1525 }
1526
1527
1528 skip_whitespace();
1529
1530
1531 while (ignore_comments && current == '/')
1532 {
1533 if (!scan_comment())
1534 {
1535 return token_type::parse_error;
1536 }
1537
1538
1539 skip_whitespace();
1540 }
1541
1542 switch (current)
1543 {
1544
1545 case '[':
1546 return token_type::begin_array;
1547 case ']':
1548 return token_type::end_array;
1549 case '{':
1550 return token_type::begin_object;
1551 case '}':
1552 return token_type::end_object;
1553 case ':':
1554 return token_type::name_separator;
1555 case ',':
1556 return token_type::value_separator;
1557
1558
1559 case 't':
1560 {
1561 std::array<char_type, 4> true_literal = {{static_cast<char_type>('t'), static_cast<char_type>('r'), static_cast<char_type>('u'), static_cast<char_type>('e')}};
1562 return scan_literal(true_literal.data(), true_literal.size(), token_type::literal_true);
1563 }
1564 case 'f':
1565 {
1566 std::array<char_type, 5> false_literal = {{static_cast<char_type>('f'), static_cast<char_type>('a'), static_cast<char_type>('l'), static_cast<char_type>('s'), static_cast<char_type>('e')}};
1567 return scan_literal(false_literal.data(), false_literal.size(), token_type::literal_false);
1568 }
1569 case 'n':
1570 {
1571 std::array<char_type, 4> null_literal = {{static_cast<char_type>('n'), static_cast<char_type>('u'), static_cast<char_type>('l'), static_cast<char_type>('l')}};
1572 return scan_literal(null_literal.data(), null_literal.size(), token_type::literal_null);
1573 }
1574
1575
1576 case '\"':
1577 return scan_string();
1578
1579
1580 case '-':
1581 case '0':
1582 case '1':
1583 case '2':
1584 case '3':
1585 case '4':
1586 case '5':
1587 case '6':
1588 case '7':
1589 case '8':
1590 case '9':
1591 return scan_number();
1592
1593
1594
1595 case '\0':
1596 case char_traits<char_type>::eof():
1597 return token_type::end_of_input;
1598
1599
1600 default:
1601 error_message = "invalid literal";
1602 return token_type::parse_error;
1603 }
1604 }
1605
1606 private:
1607
1608 InputAdapterType ia;
1609
1610
1611 const bool ignore_comments = false;
1612
1613
1614 char_int_type current = char_traits<char_type>::eof();
1615
1616
1617 bool next_unget = false;
1618
1619
1620 position_t position {};
1621
1622
1623 std::vector<char_type> token_string {};
1624
1625
1626 string_t token_buffer {};
1627
1628
1629 const char* error_message = "";
1630
1631
1632 number_integer_t value_integer = 0;
1633 number_unsigned_t value_unsigned = 0;
1634 number_float_t value_float = 0;
1635
1636
1637 const char_int_type decimal_point_char = '.';
1638
1639 std::size_t decimal_point_position = std::string::npos;
1640 };
1641
1642 }
1643 NLOHMANN_JSON_NAMESPACE_END