File indexing completed on 2025-10-25 08:49:00
0001 
0002 
0003 
0004 
0005 
0006 
0007 
0008 
0009 #pragma once
0010 
0011 #include <array> // array
0012 #include <clocale> // localeconv
0013 #include <cstddef> // size_t
0014 #include <cstdio> // snprintf
0015 #include <cstdlib> // strtof, strtod, strtold, strtoll, strtoull
0016 #include <initializer_list> // initializer_list
0017 #include <string> // char_traits, string
0018 #include <utility> // move
0019 #include <vector> // vector
0020 
0021 #include <nlohmann/detail/input/input_adapters.hpp>
0022 #include <nlohmann/detail/input/position_t.hpp>
0023 #include <nlohmann/detail/macro_scope.hpp>
0024 #include <nlohmann/detail/meta/type_traits.hpp>
0025 
0026 NLOHMANN_JSON_NAMESPACE_BEGIN
0027 namespace detail
0028 {
0029 
0030 
0031 
0032 
0033 
0034 template<typename BasicJsonType>
0035 class lexer_base
0036 {
0037   public:
0038     
0039     enum class token_type
0040     {
0041         uninitialized,    
0042         literal_true,     
0043         literal_false,    
0044         literal_null,     
0045         value_string,     
0046         value_unsigned,   
0047         value_integer,    
0048         value_float,      
0049         begin_array,      
0050         begin_object,     
0051         end_array,        
0052         end_object,       
0053         name_separator,   
0054         value_separator,  
0055         parse_error,      
0056         end_of_input,     
0057         literal_or_value  
0058     };
0059 
0060     
0061     JSON_HEDLEY_RETURNS_NON_NULL
0062     JSON_HEDLEY_CONST
0063     static const char* token_type_name(const token_type t) noexcept
0064     {
0065         switch (t)
0066         {
0067             case token_type::uninitialized:
0068                 return "<uninitialized>";
0069             case token_type::literal_true:
0070                 return "true literal";
0071             case token_type::literal_false:
0072                 return "false literal";
0073             case token_type::literal_null:
0074                 return "null literal";
0075             case token_type::value_string:
0076                 return "string literal";
0077             case token_type::value_unsigned:
0078             case token_type::value_integer:
0079             case token_type::value_float:
0080                 return "number literal";
0081             case token_type::begin_array:
0082                 return "'['";
0083             case token_type::begin_object:
0084                 return "'{'";
0085             case token_type::end_array:
0086                 return "']'";
0087             case token_type::end_object:
0088                 return "'}'";
0089             case token_type::name_separator:
0090                 return "':'";
0091             case token_type::value_separator:
0092                 return "','";
0093             case token_type::parse_error:
0094                 return "<parse error>";
0095             case token_type::end_of_input:
0096                 return "end of input";
0097             case token_type::literal_or_value:
0098                 return "'[', '{', or a literal";
0099             
0100             default: 
0101                 return "unknown token";
0102                 
0103         }
0104     }
0105 };
0106 
0107 
0108 
0109 
0110 
0111 template<typename BasicJsonType, typename InputAdapterType>
0112 class lexer : public lexer_base<BasicJsonType>
0113 {
0114     using number_integer_t = typename BasicJsonType::number_integer_t;
0115     using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
0116     using number_float_t = typename BasicJsonType::number_float_t;
0117     using string_t = typename BasicJsonType::string_t;
0118     using char_type = typename InputAdapterType::char_type;
0119     using char_int_type = typename char_traits<char_type>::int_type;
0120 
0121   public:
0122     using token_type = typename lexer_base<BasicJsonType>::token_type;
0123 
0124     explicit lexer(InputAdapterType&& adapter, bool ignore_comments_ = false) noexcept
0125         : ia(std::move(adapter))
0126         , ignore_comments(ignore_comments_)
0127         , decimal_point_char(static_cast<char_int_type>(get_decimal_point()))
0128     {}
0129 
0130     
0131     lexer(const lexer&) = delete;
0132     lexer(lexer&&) = default; 
0133     lexer& operator=(lexer&) = delete;
0134     lexer& operator=(lexer&&) = default; 
0135     ~lexer() = default;
0136 
0137   private:
0138     
0139     
0140     
0141 
0142     
0143     JSON_HEDLEY_PURE
0144     static char get_decimal_point() noexcept
0145     {
0146         const auto* loc = localeconv();
0147         JSON_ASSERT(loc != nullptr);
0148         return (loc->decimal_point == nullptr) ? '.' : *(loc->decimal_point);
0149     }
0150 
0151     
0152     
0153     
0154 
0155     
0156 
0157 
0158 
0159 
0160 
0161 
0162 
0163 
0164 
0165 
0166 
0167 
0168 
0169 
0170     int get_codepoint()
0171     {
0172         
0173         JSON_ASSERT(current == 'u');
0174         int codepoint = 0;
0175 
0176         const auto factors = { 12u, 8u, 4u, 0u };
0177         for (const auto factor : factors)
0178         {
0179             get();
0180 
0181             if (current >= '0' && current <= '9')
0182             {
0183                 codepoint += static_cast<int>((static_cast<unsigned int>(current) - 0x30u) << factor);
0184             }
0185             else if (current >= 'A' && current <= 'F')
0186             {
0187                 codepoint += static_cast<int>((static_cast<unsigned int>(current) - 0x37u) << factor);
0188             }
0189             else if (current >= 'a' && current <= 'f')
0190             {
0191                 codepoint += static_cast<int>((static_cast<unsigned int>(current) - 0x57u) << factor);
0192             }
0193             else
0194             {
0195                 return -1;
0196             }
0197         }
0198 
0199         JSON_ASSERT(0x0000 <= codepoint && codepoint <= 0xFFFF);
0200         return codepoint;
0201     }
0202 
0203     
0204 
0205 
0206 
0207 
0208 
0209 
0210 
0211 
0212 
0213 
0214 
0215 
0216 
0217 
0218     bool next_byte_in_range(std::initializer_list<char_int_type> ranges)
0219     {
0220         JSON_ASSERT(ranges.size() == 2 || ranges.size() == 4 || ranges.size() == 6);
0221         add(current);
0222 
0223         for (auto range = ranges.begin(); range != ranges.end(); ++range)
0224         {
0225             get();
0226             if (JSON_HEDLEY_LIKELY(*range <= current && current <= *(++range))) 
0227             {
0228                 add(current);
0229             }
0230             else
0231             {
0232                 error_message = "invalid string: ill-formed UTF-8 byte";
0233                 return false;
0234             }
0235         }
0236 
0237         return true;
0238     }
0239 
0240     
0241 
0242 
0243 
0244 
0245 
0246 
0247 
0248 
0249 
0250 
0251 
0252 
0253 
0254 
0255     token_type scan_string()
0256     {
0257         
0258         reset();
0259 
0260         
0261         JSON_ASSERT(current == '\"');
0262 
0263         while (true)
0264         {
0265             
0266             switch (get())
0267             {
0268                 
0269                 case char_traits<char_type>::eof():
0270                 {
0271                     error_message = "invalid string: missing closing quote";
0272                     return token_type::parse_error;
0273                 }
0274 
0275                 
0276                 case '\"':
0277                 {
0278                     return token_type::value_string;
0279                 }
0280 
0281                 
0282                 case '\\':
0283                 {
0284                     switch (get())
0285                     {
0286                         
0287                         case '\"':
0288                             add('\"');
0289                             break;
0290                         
0291                         case '\\':
0292                             add('\\');
0293                             break;
0294                         
0295                         case '/':
0296                             add('/');
0297                             break;
0298                         
0299                         case 'b':
0300                             add('\b');
0301                             break;
0302                         
0303                         case 'f':
0304                             add('\f');
0305                             break;
0306                         
0307                         case 'n':
0308                             add('\n');
0309                             break;
0310                         
0311                         case 'r':
0312                             add('\r');
0313                             break;
0314                         
0315                         case 't':
0316                             add('\t');
0317                             break;
0318 
0319                         
0320                         case 'u':
0321                         {
0322                             const int codepoint1 = get_codepoint();
0323                             int codepoint = codepoint1; 
0324 
0325                             if (JSON_HEDLEY_UNLIKELY(codepoint1 == -1))
0326                             {
0327                                 error_message = "invalid string: '\\u' must be followed by 4 hex digits";
0328                                 return token_type::parse_error;
0329                             }
0330 
0331                             
0332                             if (0xD800 <= codepoint1 && codepoint1 <= 0xDBFF)
0333                             {
0334                                 
0335                                 if (JSON_HEDLEY_LIKELY(get() == '\\' && get() == 'u'))
0336                                 {
0337                                     const int codepoint2 = get_codepoint();
0338 
0339                                     if (JSON_HEDLEY_UNLIKELY(codepoint2 == -1))
0340                                     {
0341                                         error_message = "invalid string: '\\u' must be followed by 4 hex digits";
0342                                         return token_type::parse_error;
0343                                     }
0344 
0345                                     
0346                                     if (JSON_HEDLEY_LIKELY(0xDC00 <= codepoint2 && codepoint2 <= 0xDFFF))
0347                                     {
0348                                         
0349                                         codepoint = static_cast<int>(
0350                                                         
0351                                                         (static_cast<unsigned int>(codepoint1) << 10u)
0352                                                         
0353                                                         + static_cast<unsigned int>(codepoint2)
0354                                                         
0355                                                         
0356                                                         
0357                                                         - 0x35FDC00u);
0358                                     }
0359                                     else
0360                                     {
0361                                         error_message = "invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF";
0362                                         return token_type::parse_error;
0363                                     }
0364                                 }
0365                                 else
0366                                 {
0367                                     error_message = "invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF";
0368                                     return token_type::parse_error;
0369                                 }
0370                             }
0371                             else
0372                             {
0373                                 if (JSON_HEDLEY_UNLIKELY(0xDC00 <= codepoint1 && codepoint1 <= 0xDFFF))
0374                                 {
0375                                     error_message = "invalid string: surrogate U+DC00..U+DFFF must follow U+D800..U+DBFF";
0376                                     return token_type::parse_error;
0377                                 }
0378                             }
0379 
0380                             
0381                             JSON_ASSERT(0x00 <= codepoint && codepoint <= 0x10FFFF);
0382 
0383                             
0384                             if (codepoint < 0x80)
0385                             {
0386                                 
0387                                 add(static_cast<char_int_type>(codepoint));
0388                             }
0389                             else if (codepoint <= 0x7FF)
0390                             {
0391                                 
0392                                 add(static_cast<char_int_type>(0xC0u | (static_cast<unsigned int>(codepoint) >> 6u)));
0393                                 add(static_cast<char_int_type>(0x80u | (static_cast<unsigned int>(codepoint) & 0x3Fu)));
0394                             }
0395                             else if (codepoint <= 0xFFFF)
0396                             {
0397                                 
0398                                 add(static_cast<char_int_type>(0xE0u | (static_cast<unsigned int>(codepoint) >> 12u)));
0399                                 add(static_cast<char_int_type>(0x80u | ((static_cast<unsigned int>(codepoint) >> 6u) & 0x3Fu)));
0400                                 add(static_cast<char_int_type>(0x80u | (static_cast<unsigned int>(codepoint) & 0x3Fu)));
0401                             }
0402                             else
0403                             {
0404                                 
0405                                 add(static_cast<char_int_type>(0xF0u | (static_cast<unsigned int>(codepoint) >> 18u)));
0406                                 add(static_cast<char_int_type>(0x80u | ((static_cast<unsigned int>(codepoint) >> 12u) & 0x3Fu)));
0407                                 add(static_cast<char_int_type>(0x80u | ((static_cast<unsigned int>(codepoint) >> 6u) & 0x3Fu)));
0408                                 add(static_cast<char_int_type>(0x80u | (static_cast<unsigned int>(codepoint) & 0x3Fu)));
0409                             }
0410 
0411                             break;
0412                         }
0413 
0414                         
0415                         default:
0416                             error_message = "invalid string: forbidden character after backslash";
0417                             return token_type::parse_error;
0418                     }
0419 
0420                     break;
0421                 }
0422 
0423                 
0424                 case 0x00:
0425                 {
0426                     error_message = "invalid string: control character U+0000 (NUL) must be escaped to \\u0000";
0427                     return token_type::parse_error;
0428                 }
0429 
0430                 case 0x01:
0431                 {
0432                     error_message = "invalid string: control character U+0001 (SOH) must be escaped to \\u0001";
0433                     return token_type::parse_error;
0434                 }
0435 
0436                 case 0x02:
0437                 {
0438                     error_message = "invalid string: control character U+0002 (STX) must be escaped to \\u0002";
0439                     return token_type::parse_error;
0440                 }
0441 
0442                 case 0x03:
0443                 {
0444                     error_message = "invalid string: control character U+0003 (ETX) must be escaped to \\u0003";
0445                     return token_type::parse_error;
0446                 }
0447 
0448                 case 0x04:
0449                 {
0450                     error_message = "invalid string: control character U+0004 (EOT) must be escaped to \\u0004";
0451                     return token_type::parse_error;
0452                 }
0453 
0454                 case 0x05:
0455                 {
0456                     error_message = "invalid string: control character U+0005 (ENQ) must be escaped to \\u0005";
0457                     return token_type::parse_error;
0458                 }
0459 
0460                 case 0x06:
0461                 {
0462                     error_message = "invalid string: control character U+0006 (ACK) must be escaped to \\u0006";
0463                     return token_type::parse_error;
0464                 }
0465 
0466                 case 0x07:
0467                 {
0468                     error_message = "invalid string: control character U+0007 (BEL) must be escaped to \\u0007";
0469                     return token_type::parse_error;
0470                 }
0471 
0472                 case 0x08:
0473                 {
0474                     error_message = "invalid string: control character U+0008 (BS) must be escaped to \\u0008 or \\b";
0475                     return token_type::parse_error;
0476                 }
0477 
0478                 case 0x09:
0479                 {
0480                     error_message = "invalid string: control character U+0009 (HT) must be escaped to \\u0009 or \\t";
0481                     return token_type::parse_error;
0482                 }
0483 
0484                 case 0x0A:
0485                 {
0486                     error_message = "invalid string: control character U+000A (LF) must be escaped to \\u000A or \\n";
0487                     return token_type::parse_error;
0488                 }
0489 
0490                 case 0x0B:
0491                 {
0492                     error_message = "invalid string: control character U+000B (VT) must be escaped to \\u000B";
0493                     return token_type::parse_error;
0494                 }
0495 
0496                 case 0x0C:
0497                 {
0498                     error_message = "invalid string: control character U+000C (FF) must be escaped to \\u000C or \\f";
0499                     return token_type::parse_error;
0500                 }
0501 
0502                 case 0x0D:
0503                 {
0504                     error_message = "invalid string: control character U+000D (CR) must be escaped to \\u000D or \\r";
0505                     return token_type::parse_error;
0506                 }
0507 
0508                 case 0x0E:
0509                 {
0510                     error_message = "invalid string: control character U+000E (SO) must be escaped to \\u000E";
0511                     return token_type::parse_error;
0512                 }
0513 
0514                 case 0x0F:
0515                 {
0516                     error_message = "invalid string: control character U+000F (SI) must be escaped to \\u000F";
0517                     return token_type::parse_error;
0518                 }
0519 
0520                 case 0x10:
0521                 {
0522                     error_message = "invalid string: control character U+0010 (DLE) must be escaped to \\u0010";
0523                     return token_type::parse_error;
0524                 }
0525 
0526                 case 0x11:
0527                 {
0528                     error_message = "invalid string: control character U+0011 (DC1) must be escaped to \\u0011";
0529                     return token_type::parse_error;
0530                 }
0531 
0532                 case 0x12:
0533                 {
0534                     error_message = "invalid string: control character U+0012 (DC2) must be escaped to \\u0012";
0535                     return token_type::parse_error;
0536                 }
0537 
0538                 case 0x13:
0539                 {
0540                     error_message = "invalid string: control character U+0013 (DC3) must be escaped to \\u0013";
0541                     return token_type::parse_error;
0542                 }
0543 
0544                 case 0x14:
0545                 {
0546                     error_message = "invalid string: control character U+0014 (DC4) must be escaped to \\u0014";
0547                     return token_type::parse_error;
0548                 }
0549 
0550                 case 0x15:
0551                 {
0552                     error_message = "invalid string: control character U+0015 (NAK) must be escaped to \\u0015";
0553                     return token_type::parse_error;
0554                 }
0555 
0556                 case 0x16:
0557                 {
0558                     error_message = "invalid string: control character U+0016 (SYN) must be escaped to \\u0016";
0559                     return token_type::parse_error;
0560                 }
0561 
0562                 case 0x17:
0563                 {
0564                     error_message = "invalid string: control character U+0017 (ETB) must be escaped to \\u0017";
0565                     return token_type::parse_error;
0566                 }
0567 
0568                 case 0x18:
0569                 {
0570                     error_message = "invalid string: control character U+0018 (CAN) must be escaped to \\u0018";
0571                     return token_type::parse_error;
0572                 }
0573 
0574                 case 0x19:
0575                 {
0576                     error_message = "invalid string: control character U+0019 (EM) must be escaped to \\u0019";
0577                     return token_type::parse_error;
0578                 }
0579 
0580                 case 0x1A:
0581                 {
0582                     error_message = "invalid string: control character U+001A (SUB) must be escaped to \\u001A";
0583                     return token_type::parse_error;
0584                 }
0585 
0586                 case 0x1B:
0587                 {
0588                     error_message = "invalid string: control character U+001B (ESC) must be escaped to \\u001B";
0589                     return token_type::parse_error;
0590                 }
0591 
0592                 case 0x1C:
0593                 {
0594                     error_message = "invalid string: control character U+001C (FS) must be escaped to \\u001C";
0595                     return token_type::parse_error;
0596                 }
0597 
0598                 case 0x1D:
0599                 {
0600                     error_message = "invalid string: control character U+001D (GS) must be escaped to \\u001D";
0601                     return token_type::parse_error;
0602                 }
0603 
0604                 case 0x1E:
0605                 {
0606                     error_message = "invalid string: control character U+001E (RS) must be escaped to \\u001E";
0607                     return token_type::parse_error;
0608                 }
0609 
0610                 case 0x1F:
0611                 {
0612                     error_message = "invalid string: control character U+001F (US) must be escaped to \\u001F";
0613                     return token_type::parse_error;
0614                 }
0615 
0616                 
0617                 case 0x20:
0618                 case 0x21:
0619                 case 0x23:
0620                 case 0x24:
0621                 case 0x25:
0622                 case 0x26:
0623                 case 0x27:
0624                 case 0x28:
0625                 case 0x29:
0626                 case 0x2A:
0627                 case 0x2B:
0628                 case 0x2C:
0629                 case 0x2D:
0630                 case 0x2E:
0631                 case 0x2F:
0632                 case 0x30:
0633                 case 0x31:
0634                 case 0x32:
0635                 case 0x33:
0636                 case 0x34:
0637                 case 0x35:
0638                 case 0x36:
0639                 case 0x37:
0640                 case 0x38:
0641                 case 0x39:
0642                 case 0x3A:
0643                 case 0x3B:
0644                 case 0x3C:
0645                 case 0x3D:
0646                 case 0x3E:
0647                 case 0x3F:
0648                 case 0x40:
0649                 case 0x41:
0650                 case 0x42:
0651                 case 0x43:
0652                 case 0x44:
0653                 case 0x45:
0654                 case 0x46:
0655                 case 0x47:
0656                 case 0x48:
0657                 case 0x49:
0658                 case 0x4A:
0659                 case 0x4B:
0660                 case 0x4C:
0661                 case 0x4D:
0662                 case 0x4E:
0663                 case 0x4F:
0664                 case 0x50:
0665                 case 0x51:
0666                 case 0x52:
0667                 case 0x53:
0668                 case 0x54:
0669                 case 0x55:
0670                 case 0x56:
0671                 case 0x57:
0672                 case 0x58:
0673                 case 0x59:
0674                 case 0x5A:
0675                 case 0x5B:
0676                 case 0x5D:
0677                 case 0x5E:
0678                 case 0x5F:
0679                 case 0x60:
0680                 case 0x61:
0681                 case 0x62:
0682                 case 0x63:
0683                 case 0x64:
0684                 case 0x65:
0685                 case 0x66:
0686                 case 0x67:
0687                 case 0x68:
0688                 case 0x69:
0689                 case 0x6A:
0690                 case 0x6B:
0691                 case 0x6C:
0692                 case 0x6D:
0693                 case 0x6E:
0694                 case 0x6F:
0695                 case 0x70:
0696                 case 0x71:
0697                 case 0x72:
0698                 case 0x73:
0699                 case 0x74:
0700                 case 0x75:
0701                 case 0x76:
0702                 case 0x77:
0703                 case 0x78:
0704                 case 0x79:
0705                 case 0x7A:
0706                 case 0x7B:
0707                 case 0x7C:
0708                 case 0x7D:
0709                 case 0x7E:
0710                 case 0x7F:
0711                 {
0712                     add(current);
0713                     break;
0714                 }
0715 
0716                 
0717                 case 0xC2:
0718                 case 0xC3:
0719                 case 0xC4:
0720                 case 0xC5:
0721                 case 0xC6:
0722                 case 0xC7:
0723                 case 0xC8:
0724                 case 0xC9:
0725                 case 0xCA:
0726                 case 0xCB:
0727                 case 0xCC:
0728                 case 0xCD:
0729                 case 0xCE:
0730                 case 0xCF:
0731                 case 0xD0:
0732                 case 0xD1:
0733                 case 0xD2:
0734                 case 0xD3:
0735                 case 0xD4:
0736                 case 0xD5:
0737                 case 0xD6:
0738                 case 0xD7:
0739                 case 0xD8:
0740                 case 0xD9:
0741                 case 0xDA:
0742                 case 0xDB:
0743                 case 0xDC:
0744                 case 0xDD:
0745                 case 0xDE:
0746                 case 0xDF:
0747                 {
0748                     if (JSON_HEDLEY_UNLIKELY(!next_byte_in_range({0x80, 0xBF})))
0749                     {
0750                         return token_type::parse_error;
0751                     }
0752                     break;
0753                 }
0754 
0755                 
0756                 case 0xE0:
0757                 {
0758                     if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0xA0, 0xBF, 0x80, 0xBF}))))
0759                     {
0760                         return token_type::parse_error;
0761                     }
0762                     break;
0763                 }
0764 
0765                 
0766                 
0767                 case 0xE1:
0768                 case 0xE2:
0769                 case 0xE3:
0770                 case 0xE4:
0771                 case 0xE5:
0772                 case 0xE6:
0773                 case 0xE7:
0774                 case 0xE8:
0775                 case 0xE9:
0776                 case 0xEA:
0777                 case 0xEB:
0778                 case 0xEC:
0779                 case 0xEE:
0780                 case 0xEF:
0781                 {
0782                     if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0xBF, 0x80, 0xBF}))))
0783                     {
0784                         return token_type::parse_error;
0785                     }
0786                     break;
0787                 }
0788 
0789                 
0790                 case 0xED:
0791                 {
0792                     if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0x9F, 0x80, 0xBF}))))
0793                     {
0794                         return token_type::parse_error;
0795                     }
0796                     break;
0797                 }
0798 
0799                 
0800                 case 0xF0:
0801                 {
0802                     if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF}))))
0803                     {
0804                         return token_type::parse_error;
0805                     }
0806                     break;
0807                 }
0808 
0809                 
0810                 case 0xF1:
0811                 case 0xF2:
0812                 case 0xF3:
0813                 {
0814                     if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF}))))
0815                     {
0816                         return token_type::parse_error;
0817                     }
0818                     break;
0819                 }
0820 
0821                 
0822                 case 0xF4:
0823                 {
0824                     if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF}))))
0825                     {
0826                         return token_type::parse_error;
0827                     }
0828                     break;
0829                 }
0830 
0831                 
0832                 default:
0833                 {
0834                     error_message = "invalid string: ill-formed UTF-8 byte";
0835                     return token_type::parse_error;
0836                 }
0837             }
0838         }
0839     }
0840 
0841     
0842 
0843 
0844 
0845     bool scan_comment()
0846     {
0847         switch (get())
0848         {
0849             
0850             case '/':
0851             {
0852                 while (true)
0853                 {
0854                     switch (get())
0855                     {
0856                         case '\n':
0857                         case '\r':
0858                         case char_traits<char_type>::eof():
0859                         case '\0':
0860                             return true;
0861 
0862                         default:
0863                             break;
0864                     }
0865                 }
0866             }
0867 
0868             
0869             case '*':
0870             {
0871                 while (true)
0872                 {
0873                     switch (get())
0874                     {
0875                         case char_traits<char_type>::eof():
0876                         case '\0':
0877                         {
0878                             error_message = "invalid comment; missing closing '*/'";
0879                             return false;
0880                         }
0881 
0882                         case '*':
0883                         {
0884                             switch (get())
0885                             {
0886                                 case '/':
0887                                     return true;
0888 
0889                                 default:
0890                                 {
0891                                     unget();
0892                                     continue;
0893                                 }
0894                             }
0895                         }
0896 
0897                         default:
0898                             continue;
0899                     }
0900                 }
0901             }
0902 
0903             
0904             default:
0905             {
0906                 error_message = "invalid comment; expecting '/' or '*' after '/'";
0907                 return false;
0908             }
0909         }
0910     }
0911 
0912     JSON_HEDLEY_NON_NULL(2)
0913     static void strtof(float& f, const char* str, char** endptr) noexcept
0914     {
0915         f = std::strtof(str, endptr);
0916     }
0917 
0918     JSON_HEDLEY_NON_NULL(2)
0919     static void strtof(double& f, const char* str, char** endptr) noexcept
0920     {
0921         f = std::strtod(str, endptr);
0922     }
0923 
0924     JSON_HEDLEY_NON_NULL(2)
0925     static void strtof(long double& f, const char* str, char** endptr) noexcept
0926     {
0927         f = std::strtold(str, endptr);
0928     }
0929 
0930     
0931 
0932 
0933 
0934 
0935 
0936 
0937 
0938 
0939 
0940 
0941 
0942 
0943 
0944 
0945 
0946 
0947 
0948 
0949 
0950 
0951 
0952 
0953 
0954 
0955 
0956 
0957 
0958 
0959 
0960 
0961 
0962 
0963 
0964 
0965 
0966 
0967 
0968 
0969 
0970     token_type scan_number()  
0971     {
0972         
0973         reset();
0974 
0975         
0976         
0977         token_type number_type = token_type::value_unsigned;
0978 
0979         
0980         switch (current)
0981         {
0982             case '-':
0983             {
0984                 add(current);
0985                 goto scan_number_minus;
0986             }
0987 
0988             case '0':
0989             {
0990                 add(current);
0991                 goto scan_number_zero;
0992             }
0993 
0994             case '1':
0995             case '2':
0996             case '3':
0997             case '4':
0998             case '5':
0999             case '6':
1000             case '7':
1001             case '8':
1002             case '9':
1003             {
1004                 add(current);
1005                 goto scan_number_any1;
1006             }
1007 
1008             
1009             default:            
1010                 JSON_ASSERT(false); 
1011         }
1012 
1013 scan_number_minus:
1014         
1015         number_type = token_type::value_integer;
1016         switch (get())
1017         {
1018             case '0':
1019             {
1020                 add(current);
1021                 goto scan_number_zero;
1022             }
1023 
1024             case '1':
1025             case '2':
1026             case '3':
1027             case '4':
1028             case '5':
1029             case '6':
1030             case '7':
1031             case '8':
1032             case '9':
1033             {
1034                 add(current);
1035                 goto scan_number_any1;
1036             }
1037 
1038             default:
1039             {
1040                 error_message = "invalid number; expected digit after '-'";
1041                 return token_type::parse_error;
1042             }
1043         }
1044 
1045 scan_number_zero:
1046         
1047         switch (get())
1048         {
1049             case '.':
1050             {
1051                 add(decimal_point_char);
1052                 goto scan_number_decimal1;
1053             }
1054 
1055             case 'e':
1056             case 'E':
1057             {
1058                 add(current);
1059                 goto scan_number_exponent;
1060             }
1061 
1062             default:
1063                 goto scan_number_done;
1064         }
1065 
1066 scan_number_any1:
1067         
1068         switch (get())
1069         {
1070             case '0':
1071             case '1':
1072             case '2':
1073             case '3':
1074             case '4':
1075             case '5':
1076             case '6':
1077             case '7':
1078             case '8':
1079             case '9':
1080             {
1081                 add(current);
1082                 goto scan_number_any1;
1083             }
1084 
1085             case '.':
1086             {
1087                 add(decimal_point_char);
1088                 goto scan_number_decimal1;
1089             }
1090 
1091             case 'e':
1092             case 'E':
1093             {
1094                 add(current);
1095                 goto scan_number_exponent;
1096             }
1097 
1098             default:
1099                 goto scan_number_done;
1100         }
1101 
1102 scan_number_decimal1:
1103         
1104         number_type = token_type::value_float;
1105         switch (get())
1106         {
1107             case '0':
1108             case '1':
1109             case '2':
1110             case '3':
1111             case '4':
1112             case '5':
1113             case '6':
1114             case '7':
1115             case '8':
1116             case '9':
1117             {
1118                 add(current);
1119                 goto scan_number_decimal2;
1120             }
1121 
1122             default:
1123             {
1124                 error_message = "invalid number; expected digit after '.'";
1125                 return token_type::parse_error;
1126             }
1127         }
1128 
1129 scan_number_decimal2:
1130         
1131         switch (get())
1132         {
1133             case '0':
1134             case '1':
1135             case '2':
1136             case '3':
1137             case '4':
1138             case '5':
1139             case '6':
1140             case '7':
1141             case '8':
1142             case '9':
1143             {
1144                 add(current);
1145                 goto scan_number_decimal2;
1146             }
1147 
1148             case 'e':
1149             case 'E':
1150             {
1151                 add(current);
1152                 goto scan_number_exponent;
1153             }
1154 
1155             default:
1156                 goto scan_number_done;
1157         }
1158 
1159 scan_number_exponent:
1160         
1161         number_type = token_type::value_float;
1162         switch (get())
1163         {
1164             case '+':
1165             case '-':
1166             {
1167                 add(current);
1168                 goto scan_number_sign;
1169             }
1170 
1171             case '0':
1172             case '1':
1173             case '2':
1174             case '3':
1175             case '4':
1176             case '5':
1177             case '6':
1178             case '7':
1179             case '8':
1180             case '9':
1181             {
1182                 add(current);
1183                 goto scan_number_any2;
1184             }
1185 
1186             default:
1187             {
1188                 error_message =
1189                     "invalid number; expected '+', '-', or digit after exponent";
1190                 return token_type::parse_error;
1191             }
1192         }
1193 
1194 scan_number_sign:
1195         
1196         switch (get())
1197         {
1198             case '0':
1199             case '1':
1200             case '2':
1201             case '3':
1202             case '4':
1203             case '5':
1204             case '6':
1205             case '7':
1206             case '8':
1207             case '9':
1208             {
1209                 add(current);
1210                 goto scan_number_any2;
1211             }
1212 
1213             default:
1214             {
1215                 error_message = "invalid number; expected digit after exponent sign";
1216                 return token_type::parse_error;
1217             }
1218         }
1219 
1220 scan_number_any2:
1221         
1222         switch (get())
1223         {
1224             case '0':
1225             case '1':
1226             case '2':
1227             case '3':
1228             case '4':
1229             case '5':
1230             case '6':
1231             case '7':
1232             case '8':
1233             case '9':
1234             {
1235                 add(current);
1236                 goto scan_number_any2;
1237             }
1238 
1239             default:
1240                 goto scan_number_done;
1241         }
1242 
1243 scan_number_done:
1244         
1245         
1246         unget();
1247 
1248         char* endptr = nullptr; 
1249         errno = 0;
1250 
1251         
1252         if (number_type == token_type::value_unsigned)
1253         {
1254             const auto x = std::strtoull(token_buffer.data(), &endptr, 10);
1255 
1256             
1257             JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size());
1258 
1259             if (errno == 0)
1260             {
1261                 value_unsigned = static_cast<number_unsigned_t>(x);
1262                 if (value_unsigned == x)
1263                 {
1264                     return token_type::value_unsigned;
1265                 }
1266             }
1267         }
1268         else if (number_type == token_type::value_integer)
1269         {
1270             const auto x = std::strtoll(token_buffer.data(), &endptr, 10);
1271 
1272             
1273             JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size());
1274 
1275             if (errno == 0)
1276             {
1277                 value_integer = static_cast<number_integer_t>(x);
1278                 if (value_integer == x)
1279                 {
1280                     return token_type::value_integer;
1281                 }
1282             }
1283         }
1284 
1285         
1286         
1287         strtof(value_float, token_buffer.data(), &endptr);
1288 
1289         
1290         JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size());
1291 
1292         return token_type::value_float;
1293     }
1294 
1295     
1296 
1297 
1298 
1299 
1300     JSON_HEDLEY_NON_NULL(2)
1301     token_type scan_literal(const char_type* literal_text, const std::size_t length,
1302                             token_type return_type)
1303     {
1304         JSON_ASSERT(char_traits<char_type>::to_char_type(current) == literal_text[0]);
1305         for (std::size_t i = 1; i < length; ++i)
1306         {
1307             if (JSON_HEDLEY_UNLIKELY(char_traits<char_type>::to_char_type(get()) != literal_text[i]))
1308             {
1309                 error_message = "invalid literal";
1310                 return token_type::parse_error;
1311             }
1312         }
1313         return return_type;
1314     }
1315 
1316     
1317     
1318     
1319 
1320     
1321     void reset() noexcept
1322     {
1323         token_buffer.clear();
1324         token_string.clear();
1325         token_string.push_back(char_traits<char_type>::to_char_type(current));
1326     }
1327 
1328     
1329 
1330 
1331 
1332 
1333 
1334 
1335 
1336 
1337 
1338     char_int_type get()
1339     {
1340         ++position.chars_read_total;
1341         ++position.chars_read_current_line;
1342 
1343         if (next_unget)
1344         {
1345             
1346             next_unget = false;
1347         }
1348         else
1349         {
1350             current = ia.get_character();
1351         }
1352 
1353         if (JSON_HEDLEY_LIKELY(current != char_traits<char_type>::eof()))
1354         {
1355             token_string.push_back(char_traits<char_type>::to_char_type(current));
1356         }
1357 
1358         if (current == '\n')
1359         {
1360             ++position.lines_read;
1361             position.chars_read_current_line = 0;
1362         }
1363 
1364         return current;
1365     }
1366 
1367     
1368 
1369 
1370 
1371 
1372 
1373 
1374 
1375     void unget()
1376     {
1377         next_unget = true;
1378 
1379         --position.chars_read_total;
1380 
1381         
1382         if (position.chars_read_current_line == 0)
1383         {
1384             if (position.lines_read > 0)
1385             {
1386                 --position.lines_read;
1387             }
1388         }
1389         else
1390         {
1391             --position.chars_read_current_line;
1392         }
1393 
1394         if (JSON_HEDLEY_LIKELY(current != char_traits<char_type>::eof()))
1395         {
1396             JSON_ASSERT(!token_string.empty());
1397             token_string.pop_back();
1398         }
1399     }
1400 
1401     
1402     void add(char_int_type c)
1403     {
1404         token_buffer.push_back(static_cast<typename string_t::value_type>(c));
1405     }
1406 
1407   public:
1408     
1409     
1410     
1411 
1412     
1413     constexpr number_integer_t get_number_integer() const noexcept
1414     {
1415         return value_integer;
1416     }
1417 
1418     
1419     constexpr number_unsigned_t get_number_unsigned() const noexcept
1420     {
1421         return value_unsigned;
1422     }
1423 
1424     
1425     constexpr number_float_t get_number_float() const noexcept
1426     {
1427         return value_float;
1428     }
1429 
1430     
1431     string_t& get_string()
1432     {
1433         return token_buffer;
1434     }
1435 
1436     
1437     
1438     
1439 
1440     
1441     constexpr position_t get_position() const noexcept
1442     {
1443         return position;
1444     }
1445 
1446     
1447     
1448     
1449     std::string get_token_string() const
1450     {
1451         
1452         std::string result;
1453         for (const auto c : token_string)
1454         {
1455             if (static_cast<unsigned char>(c) <= '\x1F')
1456             {
1457                 
1458                 std::array<char, 9> cs{{}};
1459                 static_cast<void>((std::snprintf)(cs.data(), cs.size(), "<U+%.4X>", static_cast<unsigned char>(c))); 
1460                 result += cs.data();
1461             }
1462             else
1463             {
1464                 
1465                 result.push_back(static_cast<std::string::value_type>(c));
1466             }
1467         }
1468 
1469         return result;
1470     }
1471 
1472     
1473     JSON_HEDLEY_RETURNS_NON_NULL
1474     constexpr const char* get_error_message() const noexcept
1475     {
1476         return error_message;
1477     }
1478 
1479     
1480     
1481     
1482 
1483     
1484 
1485 
1486 
1487     bool skip_bom()
1488     {
1489         if (get() == 0xEF)
1490         {
1491             
1492             return get() == 0xBB && get() == 0xBF;
1493         }
1494 
1495         
1496         
1497         unget();
1498         return true;
1499     }
1500 
1501     void skip_whitespace()
1502     {
1503         do
1504         {
1505             get();
1506         }
1507         while (current == ' ' || current == '\t' || current == '\n' || current == '\r');
1508     }
1509 
1510     token_type scan()
1511     {
1512         
1513         if (position.chars_read_total == 0 && !skip_bom())
1514         {
1515             error_message = "invalid BOM; must be 0xEF 0xBB 0xBF if given";
1516             return token_type::parse_error;
1517         }
1518 
1519         
1520         skip_whitespace();
1521 
1522         
1523         while (ignore_comments && current == '/')
1524         {
1525             if (!scan_comment())
1526             {
1527                 return token_type::parse_error;
1528             }
1529 
1530             
1531             skip_whitespace();
1532         }
1533 
1534         switch (current)
1535         {
1536             
1537             case '[':
1538                 return token_type::begin_array;
1539             case ']':
1540                 return token_type::end_array;
1541             case '{':
1542                 return token_type::begin_object;
1543             case '}':
1544                 return token_type::end_object;
1545             case ':':
1546                 return token_type::name_separator;
1547             case ',':
1548                 return token_type::value_separator;
1549 
1550             
1551             case 't':
1552             {
1553                 std::array<char_type, 4> true_literal = {{static_cast<char_type>('t'), static_cast<char_type>('r'), static_cast<char_type>('u'), static_cast<char_type>('e')}};
1554                 return scan_literal(true_literal.data(), true_literal.size(), token_type::literal_true);
1555             }
1556             case 'f':
1557             {
1558                 std::array<char_type, 5> false_literal = {{static_cast<char_type>('f'), static_cast<char_type>('a'), static_cast<char_type>('l'), static_cast<char_type>('s'), static_cast<char_type>('e')}};
1559                 return scan_literal(false_literal.data(), false_literal.size(), token_type::literal_false);
1560             }
1561             case 'n':
1562             {
1563                 std::array<char_type, 4> null_literal = {{static_cast<char_type>('n'), static_cast<char_type>('u'), static_cast<char_type>('l'), static_cast<char_type>('l')}};
1564                 return scan_literal(null_literal.data(), null_literal.size(), token_type::literal_null);
1565             }
1566 
1567             
1568             case '\"':
1569                 return scan_string();
1570 
1571             
1572             case '-':
1573             case '0':
1574             case '1':
1575             case '2':
1576             case '3':
1577             case '4':
1578             case '5':
1579             case '6':
1580             case '7':
1581             case '8':
1582             case '9':
1583                 return scan_number();
1584 
1585             
1586             
1587             case '\0':
1588             case char_traits<char_type>::eof():
1589                 return token_type::end_of_input;
1590 
1591             
1592             default:
1593                 error_message = "invalid literal";
1594                 return token_type::parse_error;
1595         }
1596     }
1597 
1598   private:
1599     
1600     InputAdapterType ia;
1601 
1602     
1603     const bool ignore_comments = false;
1604 
1605     
1606     char_int_type current = char_traits<char_type>::eof();
1607 
1608     
1609     bool next_unget = false;
1610 
1611     
1612     position_t position {};
1613 
1614     
1615     std::vector<char_type> token_string {};
1616 
1617     
1618     string_t token_buffer {};
1619 
1620     
1621     const char* error_message = "";
1622 
1623     
1624     number_integer_t value_integer = 0;
1625     number_unsigned_t value_unsigned = 0;
1626     number_float_t value_float = 0;
1627 
1628     
1629     const char_int_type decimal_point_char = '.';
1630 };
1631 
1632 }  
1633 NLOHMANN_JSON_NAMESPACE_END