File indexing completed on 2025-09-17 09:03:22
0001
0002
0003
0004
0005
0006
0007
0008
0009 #pragma once
0010
0011 #include <array> // array
0012 #include <clocale> // localeconv
0013 #include <cstddef> // size_t
0014 #include <cstdio> // snprintf
0015 #include <cstdlib> // strtof, strtod, strtold, strtoll, strtoull
0016 #include <initializer_list> // initializer_list
0017 #include <string> // char_traits, string
0018 #include <utility> // move
0019 #include <vector> // vector
0020
0021 #include <nlohmann/detail/input/input_adapters.hpp>
0022 #include <nlohmann/detail/input/position_t.hpp>
0023 #include <nlohmann/detail/macro_scope.hpp>
0024 #include <nlohmann/detail/meta/type_traits.hpp>
0025
0026 NLOHMANN_JSON_NAMESPACE_BEGIN
0027 namespace detail
0028 {
0029
0030
0031
0032
0033
0034 template<typename BasicJsonType>
0035 class lexer_base
0036 {
0037 public:
0038
0039 enum class token_type
0040 {
0041 uninitialized,
0042 literal_true,
0043 literal_false,
0044 literal_null,
0045 value_string,
0046 value_unsigned,
0047 value_integer,
0048 value_float,
0049 begin_array,
0050 begin_object,
0051 end_array,
0052 end_object,
0053 name_separator,
0054 value_separator,
0055 parse_error,
0056 end_of_input,
0057 literal_or_value
0058 };
0059
0060
0061 JSON_HEDLEY_RETURNS_NON_NULL
0062 JSON_HEDLEY_CONST
0063 static const char* token_type_name(const token_type t) noexcept
0064 {
0065 switch (t)
0066 {
0067 case token_type::uninitialized:
0068 return "<uninitialized>";
0069 case token_type::literal_true:
0070 return "true literal";
0071 case token_type::literal_false:
0072 return "false literal";
0073 case token_type::literal_null:
0074 return "null literal";
0075 case token_type::value_string:
0076 return "string literal";
0077 case token_type::value_unsigned:
0078 case token_type::value_integer:
0079 case token_type::value_float:
0080 return "number literal";
0081 case token_type::begin_array:
0082 return "'['";
0083 case token_type::begin_object:
0084 return "'{'";
0085 case token_type::end_array:
0086 return "']'";
0087 case token_type::end_object:
0088 return "'}'";
0089 case token_type::name_separator:
0090 return "':'";
0091 case token_type::value_separator:
0092 return "','";
0093 case token_type::parse_error:
0094 return "<parse error>";
0095 case token_type::end_of_input:
0096 return "end of input";
0097 case token_type::literal_or_value:
0098 return "'[', '{', or a literal";
0099
0100 default:
0101 return "unknown token";
0102
0103 }
0104 }
0105 };
0106
0107
0108
0109
0110
0111 template<typename BasicJsonType, typename InputAdapterType>
0112 class lexer : public lexer_base<BasicJsonType>
0113 {
0114 using number_integer_t = typename BasicJsonType::number_integer_t;
0115 using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
0116 using number_float_t = typename BasicJsonType::number_float_t;
0117 using string_t = typename BasicJsonType::string_t;
0118 using char_type = typename InputAdapterType::char_type;
0119 using char_int_type = typename char_traits<char_type>::int_type;
0120
0121 public:
0122 using token_type = typename lexer_base<BasicJsonType>::token_type;
0123
0124 explicit lexer(InputAdapterType&& adapter, bool ignore_comments_ = false) noexcept
0125 : ia(std::move(adapter))
0126 , ignore_comments(ignore_comments_)
0127 , decimal_point_char(static_cast<char_int_type>(get_decimal_point()))
0128 {}
0129
0130
0131 lexer(const lexer&) = delete;
0132 lexer(lexer&&) = default;
0133 lexer& operator=(lexer&) = delete;
0134 lexer& operator=(lexer&&) = default;
0135 ~lexer() = default;
0136
0137 private:
0138
0139
0140
0141
0142
0143 JSON_HEDLEY_PURE
0144 static char get_decimal_point() noexcept
0145 {
0146 const auto* loc = localeconv();
0147 JSON_ASSERT(loc != nullptr);
0148 return (loc->decimal_point == nullptr) ? '.' : *(loc->decimal_point);
0149 }
0150
0151
0152
0153
0154
0155
0156
0157
0158
0159
0160
0161
0162
0163
0164
0165
0166
0167
0168
0169
0170 int get_codepoint()
0171 {
0172
0173 JSON_ASSERT(current == 'u');
0174 int codepoint = 0;
0175
0176 const auto factors = { 12u, 8u, 4u, 0u };
0177 for (const auto factor : factors)
0178 {
0179 get();
0180
0181 if (current >= '0' && current <= '9')
0182 {
0183 codepoint += static_cast<int>((static_cast<unsigned int>(current) - 0x30u) << factor);
0184 }
0185 else if (current >= 'A' && current <= 'F')
0186 {
0187 codepoint += static_cast<int>((static_cast<unsigned int>(current) - 0x37u) << factor);
0188 }
0189 else if (current >= 'a' && current <= 'f')
0190 {
0191 codepoint += static_cast<int>((static_cast<unsigned int>(current) - 0x57u) << factor);
0192 }
0193 else
0194 {
0195 return -1;
0196 }
0197 }
0198
0199 JSON_ASSERT(0x0000 <= codepoint && codepoint <= 0xFFFF);
0200 return codepoint;
0201 }
0202
0203
0204
0205
0206
0207
0208
0209
0210
0211
0212
0213
0214
0215
0216
0217
0218 bool next_byte_in_range(std::initializer_list<char_int_type> ranges)
0219 {
0220 JSON_ASSERT(ranges.size() == 2 || ranges.size() == 4 || ranges.size() == 6);
0221 add(current);
0222
0223 for (auto range = ranges.begin(); range != ranges.end(); ++range)
0224 {
0225 get();
0226 if (JSON_HEDLEY_LIKELY(*range <= current && current <= *(++range)))
0227 {
0228 add(current);
0229 }
0230 else
0231 {
0232 error_message = "invalid string: ill-formed UTF-8 byte";
0233 return false;
0234 }
0235 }
0236
0237 return true;
0238 }
0239
0240
0241
0242
0243
0244
0245
0246
0247
0248
0249
0250
0251
0252
0253
0254
0255 token_type scan_string()
0256 {
0257
0258 reset();
0259
0260
0261 JSON_ASSERT(current == '\"');
0262
0263 while (true)
0264 {
0265
0266 switch (get())
0267 {
0268
0269 case char_traits<char_type>::eof():
0270 {
0271 error_message = "invalid string: missing closing quote";
0272 return token_type::parse_error;
0273 }
0274
0275
0276 case '\"':
0277 {
0278 return token_type::value_string;
0279 }
0280
0281
0282 case '\\':
0283 {
0284 switch (get())
0285 {
0286
0287 case '\"':
0288 add('\"');
0289 break;
0290
0291 case '\\':
0292 add('\\');
0293 break;
0294
0295 case '/':
0296 add('/');
0297 break;
0298
0299 case 'b':
0300 add('\b');
0301 break;
0302
0303 case 'f':
0304 add('\f');
0305 break;
0306
0307 case 'n':
0308 add('\n');
0309 break;
0310
0311 case 'r':
0312 add('\r');
0313 break;
0314
0315 case 't':
0316 add('\t');
0317 break;
0318
0319
0320 case 'u':
0321 {
0322 const int codepoint1 = get_codepoint();
0323 int codepoint = codepoint1;
0324
0325 if (JSON_HEDLEY_UNLIKELY(codepoint1 == -1))
0326 {
0327 error_message = "invalid string: '\\u' must be followed by 4 hex digits";
0328 return token_type::parse_error;
0329 }
0330
0331
0332 if (0xD800 <= codepoint1 && codepoint1 <= 0xDBFF)
0333 {
0334
0335 if (JSON_HEDLEY_LIKELY(get() == '\\' && get() == 'u'))
0336 {
0337 const int codepoint2 = get_codepoint();
0338
0339 if (JSON_HEDLEY_UNLIKELY(codepoint2 == -1))
0340 {
0341 error_message = "invalid string: '\\u' must be followed by 4 hex digits";
0342 return token_type::parse_error;
0343 }
0344
0345
0346 if (JSON_HEDLEY_LIKELY(0xDC00 <= codepoint2 && codepoint2 <= 0xDFFF))
0347 {
0348
0349 codepoint = static_cast<int>(
0350
0351 (static_cast<unsigned int>(codepoint1) << 10u)
0352
0353 + static_cast<unsigned int>(codepoint2)
0354
0355
0356
0357 - 0x35FDC00u);
0358 }
0359 else
0360 {
0361 error_message = "invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF";
0362 return token_type::parse_error;
0363 }
0364 }
0365 else
0366 {
0367 error_message = "invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF";
0368 return token_type::parse_error;
0369 }
0370 }
0371 else
0372 {
0373 if (JSON_HEDLEY_UNLIKELY(0xDC00 <= codepoint1 && codepoint1 <= 0xDFFF))
0374 {
0375 error_message = "invalid string: surrogate U+DC00..U+DFFF must follow U+D800..U+DBFF";
0376 return token_type::parse_error;
0377 }
0378 }
0379
0380
0381 JSON_ASSERT(0x00 <= codepoint && codepoint <= 0x10FFFF);
0382
0383
0384 if (codepoint < 0x80)
0385 {
0386
0387 add(static_cast<char_int_type>(codepoint));
0388 }
0389 else if (codepoint <= 0x7FF)
0390 {
0391
0392 add(static_cast<char_int_type>(0xC0u | (static_cast<unsigned int>(codepoint) >> 6u)));
0393 add(static_cast<char_int_type>(0x80u | (static_cast<unsigned int>(codepoint) & 0x3Fu)));
0394 }
0395 else if (codepoint <= 0xFFFF)
0396 {
0397
0398 add(static_cast<char_int_type>(0xE0u | (static_cast<unsigned int>(codepoint) >> 12u)));
0399 add(static_cast<char_int_type>(0x80u | ((static_cast<unsigned int>(codepoint) >> 6u) & 0x3Fu)));
0400 add(static_cast<char_int_type>(0x80u | (static_cast<unsigned int>(codepoint) & 0x3Fu)));
0401 }
0402 else
0403 {
0404
0405 add(static_cast<char_int_type>(0xF0u | (static_cast<unsigned int>(codepoint) >> 18u)));
0406 add(static_cast<char_int_type>(0x80u | ((static_cast<unsigned int>(codepoint) >> 12u) & 0x3Fu)));
0407 add(static_cast<char_int_type>(0x80u | ((static_cast<unsigned int>(codepoint) >> 6u) & 0x3Fu)));
0408 add(static_cast<char_int_type>(0x80u | (static_cast<unsigned int>(codepoint) & 0x3Fu)));
0409 }
0410
0411 break;
0412 }
0413
0414
0415 default:
0416 error_message = "invalid string: forbidden character after backslash";
0417 return token_type::parse_error;
0418 }
0419
0420 break;
0421 }
0422
0423
0424 case 0x00:
0425 {
0426 error_message = "invalid string: control character U+0000 (NUL) must be escaped to \\u0000";
0427 return token_type::parse_error;
0428 }
0429
0430 case 0x01:
0431 {
0432 error_message = "invalid string: control character U+0001 (SOH) must be escaped to \\u0001";
0433 return token_type::parse_error;
0434 }
0435
0436 case 0x02:
0437 {
0438 error_message = "invalid string: control character U+0002 (STX) must be escaped to \\u0002";
0439 return token_type::parse_error;
0440 }
0441
0442 case 0x03:
0443 {
0444 error_message = "invalid string: control character U+0003 (ETX) must be escaped to \\u0003";
0445 return token_type::parse_error;
0446 }
0447
0448 case 0x04:
0449 {
0450 error_message = "invalid string: control character U+0004 (EOT) must be escaped to \\u0004";
0451 return token_type::parse_error;
0452 }
0453
0454 case 0x05:
0455 {
0456 error_message = "invalid string: control character U+0005 (ENQ) must be escaped to \\u0005";
0457 return token_type::parse_error;
0458 }
0459
0460 case 0x06:
0461 {
0462 error_message = "invalid string: control character U+0006 (ACK) must be escaped to \\u0006";
0463 return token_type::parse_error;
0464 }
0465
0466 case 0x07:
0467 {
0468 error_message = "invalid string: control character U+0007 (BEL) must be escaped to \\u0007";
0469 return token_type::parse_error;
0470 }
0471
0472 case 0x08:
0473 {
0474 error_message = "invalid string: control character U+0008 (BS) must be escaped to \\u0008 or \\b";
0475 return token_type::parse_error;
0476 }
0477
0478 case 0x09:
0479 {
0480 error_message = "invalid string: control character U+0009 (HT) must be escaped to \\u0009 or \\t";
0481 return token_type::parse_error;
0482 }
0483
0484 case 0x0A:
0485 {
0486 error_message = "invalid string: control character U+000A (LF) must be escaped to \\u000A or \\n";
0487 return token_type::parse_error;
0488 }
0489
0490 case 0x0B:
0491 {
0492 error_message = "invalid string: control character U+000B (VT) must be escaped to \\u000B";
0493 return token_type::parse_error;
0494 }
0495
0496 case 0x0C:
0497 {
0498 error_message = "invalid string: control character U+000C (FF) must be escaped to \\u000C or \\f";
0499 return token_type::parse_error;
0500 }
0501
0502 case 0x0D:
0503 {
0504 error_message = "invalid string: control character U+000D (CR) must be escaped to \\u000D or \\r";
0505 return token_type::parse_error;
0506 }
0507
0508 case 0x0E:
0509 {
0510 error_message = "invalid string: control character U+000E (SO) must be escaped to \\u000E";
0511 return token_type::parse_error;
0512 }
0513
0514 case 0x0F:
0515 {
0516 error_message = "invalid string: control character U+000F (SI) must be escaped to \\u000F";
0517 return token_type::parse_error;
0518 }
0519
0520 case 0x10:
0521 {
0522 error_message = "invalid string: control character U+0010 (DLE) must be escaped to \\u0010";
0523 return token_type::parse_error;
0524 }
0525
0526 case 0x11:
0527 {
0528 error_message = "invalid string: control character U+0011 (DC1) must be escaped to \\u0011";
0529 return token_type::parse_error;
0530 }
0531
0532 case 0x12:
0533 {
0534 error_message = "invalid string: control character U+0012 (DC2) must be escaped to \\u0012";
0535 return token_type::parse_error;
0536 }
0537
0538 case 0x13:
0539 {
0540 error_message = "invalid string: control character U+0013 (DC3) must be escaped to \\u0013";
0541 return token_type::parse_error;
0542 }
0543
0544 case 0x14:
0545 {
0546 error_message = "invalid string: control character U+0014 (DC4) must be escaped to \\u0014";
0547 return token_type::parse_error;
0548 }
0549
0550 case 0x15:
0551 {
0552 error_message = "invalid string: control character U+0015 (NAK) must be escaped to \\u0015";
0553 return token_type::parse_error;
0554 }
0555
0556 case 0x16:
0557 {
0558 error_message = "invalid string: control character U+0016 (SYN) must be escaped to \\u0016";
0559 return token_type::parse_error;
0560 }
0561
0562 case 0x17:
0563 {
0564 error_message = "invalid string: control character U+0017 (ETB) must be escaped to \\u0017";
0565 return token_type::parse_error;
0566 }
0567
0568 case 0x18:
0569 {
0570 error_message = "invalid string: control character U+0018 (CAN) must be escaped to \\u0018";
0571 return token_type::parse_error;
0572 }
0573
0574 case 0x19:
0575 {
0576 error_message = "invalid string: control character U+0019 (EM) must be escaped to \\u0019";
0577 return token_type::parse_error;
0578 }
0579
0580 case 0x1A:
0581 {
0582 error_message = "invalid string: control character U+001A (SUB) must be escaped to \\u001A";
0583 return token_type::parse_error;
0584 }
0585
0586 case 0x1B:
0587 {
0588 error_message = "invalid string: control character U+001B (ESC) must be escaped to \\u001B";
0589 return token_type::parse_error;
0590 }
0591
0592 case 0x1C:
0593 {
0594 error_message = "invalid string: control character U+001C (FS) must be escaped to \\u001C";
0595 return token_type::parse_error;
0596 }
0597
0598 case 0x1D:
0599 {
0600 error_message = "invalid string: control character U+001D (GS) must be escaped to \\u001D";
0601 return token_type::parse_error;
0602 }
0603
0604 case 0x1E:
0605 {
0606 error_message = "invalid string: control character U+001E (RS) must be escaped to \\u001E";
0607 return token_type::parse_error;
0608 }
0609
0610 case 0x1F:
0611 {
0612 error_message = "invalid string: control character U+001F (US) must be escaped to \\u001F";
0613 return token_type::parse_error;
0614 }
0615
0616
0617 case 0x20:
0618 case 0x21:
0619 case 0x23:
0620 case 0x24:
0621 case 0x25:
0622 case 0x26:
0623 case 0x27:
0624 case 0x28:
0625 case 0x29:
0626 case 0x2A:
0627 case 0x2B:
0628 case 0x2C:
0629 case 0x2D:
0630 case 0x2E:
0631 case 0x2F:
0632 case 0x30:
0633 case 0x31:
0634 case 0x32:
0635 case 0x33:
0636 case 0x34:
0637 case 0x35:
0638 case 0x36:
0639 case 0x37:
0640 case 0x38:
0641 case 0x39:
0642 case 0x3A:
0643 case 0x3B:
0644 case 0x3C:
0645 case 0x3D:
0646 case 0x3E:
0647 case 0x3F:
0648 case 0x40:
0649 case 0x41:
0650 case 0x42:
0651 case 0x43:
0652 case 0x44:
0653 case 0x45:
0654 case 0x46:
0655 case 0x47:
0656 case 0x48:
0657 case 0x49:
0658 case 0x4A:
0659 case 0x4B:
0660 case 0x4C:
0661 case 0x4D:
0662 case 0x4E:
0663 case 0x4F:
0664 case 0x50:
0665 case 0x51:
0666 case 0x52:
0667 case 0x53:
0668 case 0x54:
0669 case 0x55:
0670 case 0x56:
0671 case 0x57:
0672 case 0x58:
0673 case 0x59:
0674 case 0x5A:
0675 case 0x5B:
0676 case 0x5D:
0677 case 0x5E:
0678 case 0x5F:
0679 case 0x60:
0680 case 0x61:
0681 case 0x62:
0682 case 0x63:
0683 case 0x64:
0684 case 0x65:
0685 case 0x66:
0686 case 0x67:
0687 case 0x68:
0688 case 0x69:
0689 case 0x6A:
0690 case 0x6B:
0691 case 0x6C:
0692 case 0x6D:
0693 case 0x6E:
0694 case 0x6F:
0695 case 0x70:
0696 case 0x71:
0697 case 0x72:
0698 case 0x73:
0699 case 0x74:
0700 case 0x75:
0701 case 0x76:
0702 case 0x77:
0703 case 0x78:
0704 case 0x79:
0705 case 0x7A:
0706 case 0x7B:
0707 case 0x7C:
0708 case 0x7D:
0709 case 0x7E:
0710 case 0x7F:
0711 {
0712 add(current);
0713 break;
0714 }
0715
0716
0717 case 0xC2:
0718 case 0xC3:
0719 case 0xC4:
0720 case 0xC5:
0721 case 0xC6:
0722 case 0xC7:
0723 case 0xC8:
0724 case 0xC9:
0725 case 0xCA:
0726 case 0xCB:
0727 case 0xCC:
0728 case 0xCD:
0729 case 0xCE:
0730 case 0xCF:
0731 case 0xD0:
0732 case 0xD1:
0733 case 0xD2:
0734 case 0xD3:
0735 case 0xD4:
0736 case 0xD5:
0737 case 0xD6:
0738 case 0xD7:
0739 case 0xD8:
0740 case 0xD9:
0741 case 0xDA:
0742 case 0xDB:
0743 case 0xDC:
0744 case 0xDD:
0745 case 0xDE:
0746 case 0xDF:
0747 {
0748 if (JSON_HEDLEY_UNLIKELY(!next_byte_in_range({0x80, 0xBF})))
0749 {
0750 return token_type::parse_error;
0751 }
0752 break;
0753 }
0754
0755
0756 case 0xE0:
0757 {
0758 if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0xA0, 0xBF, 0x80, 0xBF}))))
0759 {
0760 return token_type::parse_error;
0761 }
0762 break;
0763 }
0764
0765
0766
0767 case 0xE1:
0768 case 0xE2:
0769 case 0xE3:
0770 case 0xE4:
0771 case 0xE5:
0772 case 0xE6:
0773 case 0xE7:
0774 case 0xE8:
0775 case 0xE9:
0776 case 0xEA:
0777 case 0xEB:
0778 case 0xEC:
0779 case 0xEE:
0780 case 0xEF:
0781 {
0782 if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0xBF, 0x80, 0xBF}))))
0783 {
0784 return token_type::parse_error;
0785 }
0786 break;
0787 }
0788
0789
0790 case 0xED:
0791 {
0792 if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0x9F, 0x80, 0xBF}))))
0793 {
0794 return token_type::parse_error;
0795 }
0796 break;
0797 }
0798
0799
0800 case 0xF0:
0801 {
0802 if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF}))))
0803 {
0804 return token_type::parse_error;
0805 }
0806 break;
0807 }
0808
0809
0810 case 0xF1:
0811 case 0xF2:
0812 case 0xF3:
0813 {
0814 if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF}))))
0815 {
0816 return token_type::parse_error;
0817 }
0818 break;
0819 }
0820
0821
0822 case 0xF4:
0823 {
0824 if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF}))))
0825 {
0826 return token_type::parse_error;
0827 }
0828 break;
0829 }
0830
0831
0832 default:
0833 {
0834 error_message = "invalid string: ill-formed UTF-8 byte";
0835 return token_type::parse_error;
0836 }
0837 }
0838 }
0839 }
0840
0841
0842
0843
0844
0845 bool scan_comment()
0846 {
0847 switch (get())
0848 {
0849
0850 case '/':
0851 {
0852 while (true)
0853 {
0854 switch (get())
0855 {
0856 case '\n':
0857 case '\r':
0858 case char_traits<char_type>::eof():
0859 case '\0':
0860 return true;
0861
0862 default:
0863 break;
0864 }
0865 }
0866 }
0867
0868
0869 case '*':
0870 {
0871 while (true)
0872 {
0873 switch (get())
0874 {
0875 case char_traits<char_type>::eof():
0876 case '\0':
0877 {
0878 error_message = "invalid comment; missing closing '*/'";
0879 return false;
0880 }
0881
0882 case '*':
0883 {
0884 switch (get())
0885 {
0886 case '/':
0887 return true;
0888
0889 default:
0890 {
0891 unget();
0892 continue;
0893 }
0894 }
0895 }
0896
0897 default:
0898 continue;
0899 }
0900 }
0901 }
0902
0903
0904 default:
0905 {
0906 error_message = "invalid comment; expecting '/' or '*' after '/'";
0907 return false;
0908 }
0909 }
0910 }
0911
0912 JSON_HEDLEY_NON_NULL(2)
0913 static void strtof(float& f, const char* str, char** endptr) noexcept
0914 {
0915 f = std::strtof(str, endptr);
0916 }
0917
0918 JSON_HEDLEY_NON_NULL(2)
0919 static void strtof(double& f, const char* str, char** endptr) noexcept
0920 {
0921 f = std::strtod(str, endptr);
0922 }
0923
0924 JSON_HEDLEY_NON_NULL(2)
0925 static void strtof(long double& f, const char* str, char** endptr) noexcept
0926 {
0927 f = std::strtold(str, endptr);
0928 }
0929
0930
0931
0932
0933
0934
0935
0936
0937
0938
0939
0940
0941
0942
0943
0944
0945
0946
0947
0948
0949
0950
0951
0952
0953
0954
0955
0956
0957
0958
0959
0960
0961
0962
0963
0964
0965
0966
0967
0968
0969
0970 token_type scan_number()
0971 {
0972
0973 reset();
0974
0975
0976
0977 token_type number_type = token_type::value_unsigned;
0978
0979
0980 switch (current)
0981 {
0982 case '-':
0983 {
0984 add(current);
0985 goto scan_number_minus;
0986 }
0987
0988 case '0':
0989 {
0990 add(current);
0991 goto scan_number_zero;
0992 }
0993
0994 case '1':
0995 case '2':
0996 case '3':
0997 case '4':
0998 case '5':
0999 case '6':
1000 case '7':
1001 case '8':
1002 case '9':
1003 {
1004 add(current);
1005 goto scan_number_any1;
1006 }
1007
1008
1009 default:
1010 JSON_ASSERT(false);
1011 }
1012
1013 scan_number_minus:
1014
1015 number_type = token_type::value_integer;
1016 switch (get())
1017 {
1018 case '0':
1019 {
1020 add(current);
1021 goto scan_number_zero;
1022 }
1023
1024 case '1':
1025 case '2':
1026 case '3':
1027 case '4':
1028 case '5':
1029 case '6':
1030 case '7':
1031 case '8':
1032 case '9':
1033 {
1034 add(current);
1035 goto scan_number_any1;
1036 }
1037
1038 default:
1039 {
1040 error_message = "invalid number; expected digit after '-'";
1041 return token_type::parse_error;
1042 }
1043 }
1044
1045 scan_number_zero:
1046
1047 switch (get())
1048 {
1049 case '.':
1050 {
1051 add(decimal_point_char);
1052 goto scan_number_decimal1;
1053 }
1054
1055 case 'e':
1056 case 'E':
1057 {
1058 add(current);
1059 goto scan_number_exponent;
1060 }
1061
1062 default:
1063 goto scan_number_done;
1064 }
1065
1066 scan_number_any1:
1067
1068 switch (get())
1069 {
1070 case '0':
1071 case '1':
1072 case '2':
1073 case '3':
1074 case '4':
1075 case '5':
1076 case '6':
1077 case '7':
1078 case '8':
1079 case '9':
1080 {
1081 add(current);
1082 goto scan_number_any1;
1083 }
1084
1085 case '.':
1086 {
1087 add(decimal_point_char);
1088 goto scan_number_decimal1;
1089 }
1090
1091 case 'e':
1092 case 'E':
1093 {
1094 add(current);
1095 goto scan_number_exponent;
1096 }
1097
1098 default:
1099 goto scan_number_done;
1100 }
1101
1102 scan_number_decimal1:
1103
1104 number_type = token_type::value_float;
1105 switch (get())
1106 {
1107 case '0':
1108 case '1':
1109 case '2':
1110 case '3':
1111 case '4':
1112 case '5':
1113 case '6':
1114 case '7':
1115 case '8':
1116 case '9':
1117 {
1118 add(current);
1119 goto scan_number_decimal2;
1120 }
1121
1122 default:
1123 {
1124 error_message = "invalid number; expected digit after '.'";
1125 return token_type::parse_error;
1126 }
1127 }
1128
1129 scan_number_decimal2:
1130
1131 switch (get())
1132 {
1133 case '0':
1134 case '1':
1135 case '2':
1136 case '3':
1137 case '4':
1138 case '5':
1139 case '6':
1140 case '7':
1141 case '8':
1142 case '9':
1143 {
1144 add(current);
1145 goto scan_number_decimal2;
1146 }
1147
1148 case 'e':
1149 case 'E':
1150 {
1151 add(current);
1152 goto scan_number_exponent;
1153 }
1154
1155 default:
1156 goto scan_number_done;
1157 }
1158
1159 scan_number_exponent:
1160
1161 number_type = token_type::value_float;
1162 switch (get())
1163 {
1164 case '+':
1165 case '-':
1166 {
1167 add(current);
1168 goto scan_number_sign;
1169 }
1170
1171 case '0':
1172 case '1':
1173 case '2':
1174 case '3':
1175 case '4':
1176 case '5':
1177 case '6':
1178 case '7':
1179 case '8':
1180 case '9':
1181 {
1182 add(current);
1183 goto scan_number_any2;
1184 }
1185
1186 default:
1187 {
1188 error_message =
1189 "invalid number; expected '+', '-', or digit after exponent";
1190 return token_type::parse_error;
1191 }
1192 }
1193
1194 scan_number_sign:
1195
1196 switch (get())
1197 {
1198 case '0':
1199 case '1':
1200 case '2':
1201 case '3':
1202 case '4':
1203 case '5':
1204 case '6':
1205 case '7':
1206 case '8':
1207 case '9':
1208 {
1209 add(current);
1210 goto scan_number_any2;
1211 }
1212
1213 default:
1214 {
1215 error_message = "invalid number; expected digit after exponent sign";
1216 return token_type::parse_error;
1217 }
1218 }
1219
1220 scan_number_any2:
1221
1222 switch (get())
1223 {
1224 case '0':
1225 case '1':
1226 case '2':
1227 case '3':
1228 case '4':
1229 case '5':
1230 case '6':
1231 case '7':
1232 case '8':
1233 case '9':
1234 {
1235 add(current);
1236 goto scan_number_any2;
1237 }
1238
1239 default:
1240 goto scan_number_done;
1241 }
1242
1243 scan_number_done:
1244
1245
1246 unget();
1247
1248 char* endptr = nullptr;
1249 errno = 0;
1250
1251
1252 if (number_type == token_type::value_unsigned)
1253 {
1254 const auto x = std::strtoull(token_buffer.data(), &endptr, 10);
1255
1256
1257 JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size());
1258
1259 if (errno == 0)
1260 {
1261 value_unsigned = static_cast<number_unsigned_t>(x);
1262 if (value_unsigned == x)
1263 {
1264 return token_type::value_unsigned;
1265 }
1266 }
1267 }
1268 else if (number_type == token_type::value_integer)
1269 {
1270 const auto x = std::strtoll(token_buffer.data(), &endptr, 10);
1271
1272
1273 JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size());
1274
1275 if (errno == 0)
1276 {
1277 value_integer = static_cast<number_integer_t>(x);
1278 if (value_integer == x)
1279 {
1280 return token_type::value_integer;
1281 }
1282 }
1283 }
1284
1285
1286
1287 strtof(value_float, token_buffer.data(), &endptr);
1288
1289
1290 JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size());
1291
1292 return token_type::value_float;
1293 }
1294
1295
1296
1297
1298
1299
1300 JSON_HEDLEY_NON_NULL(2)
1301 token_type scan_literal(const char_type* literal_text, const std::size_t length,
1302 token_type return_type)
1303 {
1304 JSON_ASSERT(char_traits<char_type>::to_char_type(current) == literal_text[0]);
1305 for (std::size_t i = 1; i < length; ++i)
1306 {
1307 if (JSON_HEDLEY_UNLIKELY(char_traits<char_type>::to_char_type(get()) != literal_text[i]))
1308 {
1309 error_message = "invalid literal";
1310 return token_type::parse_error;
1311 }
1312 }
1313 return return_type;
1314 }
1315
1316
1317
1318
1319
1320
1321 void reset() noexcept
1322 {
1323 token_buffer.clear();
1324 token_string.clear();
1325 token_string.push_back(char_traits<char_type>::to_char_type(current));
1326 }
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338 char_int_type get()
1339 {
1340 ++position.chars_read_total;
1341 ++position.chars_read_current_line;
1342
1343 if (next_unget)
1344 {
1345
1346 next_unget = false;
1347 }
1348 else
1349 {
1350 current = ia.get_character();
1351 }
1352
1353 if (JSON_HEDLEY_LIKELY(current != char_traits<char_type>::eof()))
1354 {
1355 token_string.push_back(char_traits<char_type>::to_char_type(current));
1356 }
1357
1358 if (current == '\n')
1359 {
1360 ++position.lines_read;
1361 position.chars_read_current_line = 0;
1362 }
1363
1364 return current;
1365 }
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375 void unget()
1376 {
1377 next_unget = true;
1378
1379 --position.chars_read_total;
1380
1381
1382 if (position.chars_read_current_line == 0)
1383 {
1384 if (position.lines_read > 0)
1385 {
1386 --position.lines_read;
1387 }
1388 }
1389 else
1390 {
1391 --position.chars_read_current_line;
1392 }
1393
1394 if (JSON_HEDLEY_LIKELY(current != char_traits<char_type>::eof()))
1395 {
1396 JSON_ASSERT(!token_string.empty());
1397 token_string.pop_back();
1398 }
1399 }
1400
1401
1402 void add(char_int_type c)
1403 {
1404 token_buffer.push_back(static_cast<typename string_t::value_type>(c));
1405 }
1406
1407 public:
1408
1409
1410
1411
1412
1413 constexpr number_integer_t get_number_integer() const noexcept
1414 {
1415 return value_integer;
1416 }
1417
1418
1419 constexpr number_unsigned_t get_number_unsigned() const noexcept
1420 {
1421 return value_unsigned;
1422 }
1423
1424
1425 constexpr number_float_t get_number_float() const noexcept
1426 {
1427 return value_float;
1428 }
1429
1430
1431 string_t& get_string()
1432 {
1433 return token_buffer;
1434 }
1435
1436
1437
1438
1439
1440
1441 constexpr position_t get_position() const noexcept
1442 {
1443 return position;
1444 }
1445
1446
1447
1448
1449 std::string get_token_string() const
1450 {
1451
1452 std::string result;
1453 for (const auto c : token_string)
1454 {
1455 if (static_cast<unsigned char>(c) <= '\x1F')
1456 {
1457
1458 std::array<char, 9> cs{{}};
1459 static_cast<void>((std::snprintf)(cs.data(), cs.size(), "<U+%.4X>", static_cast<unsigned char>(c)));
1460 result += cs.data();
1461 }
1462 else
1463 {
1464
1465 result.push_back(static_cast<std::string::value_type>(c));
1466 }
1467 }
1468
1469 return result;
1470 }
1471
1472
1473 JSON_HEDLEY_RETURNS_NON_NULL
1474 constexpr const char* get_error_message() const noexcept
1475 {
1476 return error_message;
1477 }
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487 bool skip_bom()
1488 {
1489 if (get() == 0xEF)
1490 {
1491
1492 return get() == 0xBB && get() == 0xBF;
1493 }
1494
1495
1496
1497 unget();
1498 return true;
1499 }
1500
1501 void skip_whitespace()
1502 {
1503 do
1504 {
1505 get();
1506 }
1507 while (current == ' ' || current == '\t' || current == '\n' || current == '\r');
1508 }
1509
1510 token_type scan()
1511 {
1512
1513 if (position.chars_read_total == 0 && !skip_bom())
1514 {
1515 error_message = "invalid BOM; must be 0xEF 0xBB 0xBF if given";
1516 return token_type::parse_error;
1517 }
1518
1519
1520 skip_whitespace();
1521
1522
1523 while (ignore_comments && current == '/')
1524 {
1525 if (!scan_comment())
1526 {
1527 return token_type::parse_error;
1528 }
1529
1530
1531 skip_whitespace();
1532 }
1533
1534 switch (current)
1535 {
1536
1537 case '[':
1538 return token_type::begin_array;
1539 case ']':
1540 return token_type::end_array;
1541 case '{':
1542 return token_type::begin_object;
1543 case '}':
1544 return token_type::end_object;
1545 case ':':
1546 return token_type::name_separator;
1547 case ',':
1548 return token_type::value_separator;
1549
1550
1551 case 't':
1552 {
1553 std::array<char_type, 4> true_literal = {{static_cast<char_type>('t'), static_cast<char_type>('r'), static_cast<char_type>('u'), static_cast<char_type>('e')}};
1554 return scan_literal(true_literal.data(), true_literal.size(), token_type::literal_true);
1555 }
1556 case 'f':
1557 {
1558 std::array<char_type, 5> false_literal = {{static_cast<char_type>('f'), static_cast<char_type>('a'), static_cast<char_type>('l'), static_cast<char_type>('s'), static_cast<char_type>('e')}};
1559 return scan_literal(false_literal.data(), false_literal.size(), token_type::literal_false);
1560 }
1561 case 'n':
1562 {
1563 std::array<char_type, 4> null_literal = {{static_cast<char_type>('n'), static_cast<char_type>('u'), static_cast<char_type>('l'), static_cast<char_type>('l')}};
1564 return scan_literal(null_literal.data(), null_literal.size(), token_type::literal_null);
1565 }
1566
1567
1568 case '\"':
1569 return scan_string();
1570
1571
1572 case '-':
1573 case '0':
1574 case '1':
1575 case '2':
1576 case '3':
1577 case '4':
1578 case '5':
1579 case '6':
1580 case '7':
1581 case '8':
1582 case '9':
1583 return scan_number();
1584
1585
1586
1587 case '\0':
1588 case char_traits<char_type>::eof():
1589 return token_type::end_of_input;
1590
1591
1592 default:
1593 error_message = "invalid literal";
1594 return token_type::parse_error;
1595 }
1596 }
1597
1598 private:
1599
1600 InputAdapterType ia;
1601
1602
1603 const bool ignore_comments = false;
1604
1605
1606 char_int_type current = char_traits<char_type>::eof();
1607
1608
1609 bool next_unget = false;
1610
1611
1612 position_t position {};
1613
1614
1615 std::vector<char_type> token_string {};
1616
1617
1618 string_t token_buffer {};
1619
1620
1621 const char* error_message = "";
1622
1623
1624 number_integer_t value_integer = 0;
1625 number_unsigned_t value_unsigned = 0;
1626 number_float_t value_float = 0;
1627
1628
1629 const char_int_type decimal_point_char = '.';
1630 };
1631
1632 }
1633 NLOHMANN_JSON_NAMESPACE_END