File indexing completed on 2025-02-28 10:10:19
0001
0002
0003
0004
0005
0006
0007
0008
0009 #ifndef GOOGLE_PROTOBUF_JSON_INTERNAL_LEXER_H__
0010 #define GOOGLE_PROTOBUF_JSON_INTERNAL_LEXER_H__
0011
0012 #include <array>
0013 #include <cfloat>
0014 #include <cmath>
0015 #include <cstdint>
0016 #include <iostream>
0017 #include <limits>
0018 #include <ostream>
0019 #include <string>
0020 #include <utility>
0021
0022 #include "absl/status/status.h"
0023 #include "absl/status/statusor.h"
0024 #include "absl/strings/match.h"
0025 #include "absl/strings/str_format.h"
0026 #include "absl/strings/string_view.h"
0027 #include "google/protobuf/descriptor.h"
0028 #include "google/protobuf/io/zero_copy_stream.h"
0029 #include "google/protobuf/json/internal/message_path.h"
0030 #include "google/protobuf/json/internal/zero_copy_buffered_stream.h"
0031 #include "google/protobuf/stubs/status_macros.h"
0032
0033
0034
0035 #include "google/protobuf/port_def.inc"
0036
0037 namespace google {
0038 namespace protobuf {
0039 namespace json_internal {
0040
0041
0042 struct ParseOptions {
0043 bool ignore_unknown_fields = false;
0044 bool case_insensitive_enum_parsing = false;
0045
0046 static constexpr size_t kDefaultDepth = 100;
0047
0048
0049
0050 int recursion_depth = kDefaultDepth;
0051
0052
0053
0054
0055
0056
0057
0058 bool allow_legacy_syntax = false;
0059 };
0060
0061
0062 struct JsonLocation {
0063
0064
0065 struct SourceLocation {
0066 static SourceLocation current() { return {}; }
0067 };
0068
0069
0070 size_t offset = 0;
0071 size_t line = 0;
0072 size_t col = 0;
0073 const MessagePath* path = nullptr;
0074
0075
0076 absl::Status Invalid(absl::string_view message,
0077 SourceLocation sl = SourceLocation::current()) const;
0078 };
0079
0080 template <typename T>
0081 struct LocationWith {
0082 T value;
0083 JsonLocation loc;
0084 };
0085
0086 class JsonLexer {
0087 public:
0088
0089 enum Kind {
0090 kObj,
0091 kArr,
0092 kStr,
0093 kNum,
0094 kTrue,
0095 kFalse,
0096 kNull,
0097 };
0098
0099 using SourceLocation = JsonLocation::SourceLocation;
0100
0101 JsonLexer(io::ZeroCopyInputStream* stream, const ParseOptions& options,
0102 MessagePath* path = nullptr, JsonLocation start = {})
0103 : stream_(stream), options_(options), json_loc_(start), path_(path) {
0104 json_loc_.path = path_;
0105 }
0106
0107 const ParseOptions& options() const { return options_; }
0108
0109 const MessagePath& path() const { return *path_; }
0110 MessagePath& path() { return *path_; }
0111
0112
0113 absl::Status Invalid(absl::string_view message,
0114 SourceLocation sl = SourceLocation::current()) {
0115 return json_loc_.Invalid(message, sl);
0116 }
0117
0118
0119
0120 absl::Status Expect(absl::string_view literal,
0121 SourceLocation sl = SourceLocation::current()) {
0122 RETURN_IF_ERROR(SkipToToken());
0123 auto buffering = stream_.BufferAtLeast(literal.size());
0124 RETURN_IF_ERROR(buffering.status());
0125
0126 if (!absl::StartsWith(stream_.Unread(), literal)) {
0127 return Invalid(
0128 absl::StrFormat("unexpected character: '%c'; expected '%s'",
0129 stream_.PeekChar(), literal),
0130 sl);
0131 }
0132
0133 return Advance(literal.size());
0134 }
0135
0136
0137
0138 bool Peek(absl::string_view literal) {
0139
0140
0141 (void)SkipToToken();
0142 auto ignored = stream_.BufferAtLeast(literal.size());
0143 if (!absl::StartsWith(stream_.Unread(), literal)) {
0144 return false;
0145 }
0146
0147
0148 (void)Advance(literal.size());
0149 return true;
0150 }
0151
0152
0153
0154 bool Peek(Kind needle) {
0155 auto kind = PeekKind();
0156 return kind.ok() && *kind == needle;
0157 }
0158
0159
0160
0161
0162
0163
0164 absl::Status SkipToToken();
0165
0166
0167
0168 absl::StatusOr<Kind> PeekKind();
0169
0170
0171 absl::StatusOr<LocationWith<double>> ParseNumber();
0172
0173
0174 absl::StatusOr<LocationWith<MaybeOwnedString>> ParseRawNumber();
0175
0176
0177
0178 absl::StatusOr<LocationWith<MaybeOwnedString>> ParseUtf8();
0179
0180
0181
0182
0183 template <typename F>
0184 absl::Status VisitArray(F f);
0185
0186
0187
0188
0189 template <typename F>
0190 absl::Status VisitObject(F f);
0191
0192
0193 absl::Status SkipValue();
0194
0195
0196
0197 bool AtEof() {
0198
0199
0200 (void)SkipToToken();
0201 return stream_.AtEof();
0202 }
0203
0204 absl::StatusOr<LocationWith<MaybeOwnedString>> Take(size_t len) {
0205 JsonLocation loc = json_loc_;
0206 auto taken = stream_.Take(len);
0207 RETURN_IF_ERROR(taken.status());
0208 return LocationWith<MaybeOwnedString>{*std::move(taken), loc};
0209 }
0210
0211 template <typename Pred>
0212 absl::StatusOr<LocationWith<MaybeOwnedString>> TakeWhile(Pred p) {
0213 JsonLocation loc = json_loc_;
0214 auto taken = stream_.TakeWhile(std::move(p));
0215 RETURN_IF_ERROR(taken.status());
0216 return LocationWith<MaybeOwnedString>{*std::move(taken), loc};
0217 }
0218
0219 LocationWith<Mark> BeginMark() { return {stream_.BeginMark(), json_loc_}; }
0220
0221 private:
0222 friend BufferingGuard;
0223 friend Mark;
0224 friend MaybeOwnedString;
0225
0226 absl::Status Push() {
0227 if (options_.recursion_depth == 0) {
0228 return Invalid("JSON content was too deeply nested");
0229 }
0230 --options_.recursion_depth;
0231 return absl::OkStatus();
0232 }
0233
0234 void Pop() { ++options_.recursion_depth; }
0235
0236
0237 absl::StatusOr<uint16_t> ParseU16HexCodepoint();
0238
0239
0240
0241
0242 absl::StatusOr<size_t> ParseUnicodeEscape(char out_utf8[4]);
0243
0244
0245
0246 absl::StatusOr<LocationWith<MaybeOwnedString>> ParseBareWord();
0247
0248 absl::Status Advance(size_t bytes) {
0249 RETURN_IF_ERROR(stream_.Advance(bytes));
0250 json_loc_.offset += static_cast<int>(bytes);
0251 json_loc_.col += static_cast<int>(bytes);
0252 return absl::OkStatus();
0253 }
0254
0255 ZeroCopyBufferedStream stream_;
0256
0257 ParseOptions options_;
0258 JsonLocation json_loc_;
0259 MessagePath* path_;
0260 };
0261
0262 template <typename F>
0263 absl::Status JsonLexer::VisitArray(F f) {
0264 RETURN_IF_ERROR(Expect("["));
0265 RETURN_IF_ERROR(Push());
0266
0267 if (Peek("]")) {
0268 Pop();
0269 return absl::OkStatus();
0270 }
0271
0272 bool has_comma = true;
0273 do {
0274 if (!has_comma) {
0275 return Invalid("expected ','");
0276 }
0277 RETURN_IF_ERROR(f());
0278 has_comma = Peek(",");
0279 } while (!Peek("]"));
0280
0281 if (!options_.allow_legacy_syntax && has_comma) {
0282 return Invalid("expected ']'");
0283 }
0284
0285 Pop();
0286 return absl::OkStatus();
0287 }
0288
0289
0290
0291
0292 template <typename F>
0293 absl::Status JsonLexer::VisitObject(F f) {
0294 RETURN_IF_ERROR(Expect("{"));
0295 RETURN_IF_ERROR(Push());
0296
0297 if (Peek("}")) {
0298 Pop();
0299 return absl::OkStatus();
0300 }
0301
0302 bool has_comma = true;
0303 do {
0304 if (!has_comma) {
0305 return Invalid("expected ','");
0306 }
0307 RETURN_IF_ERROR(SkipToToken());
0308
0309 absl::StatusOr<LocationWith<MaybeOwnedString>> key;
0310 if (stream_.PeekChar() == '"' || stream_.PeekChar() == '\'') {
0311 key = ParseUtf8();
0312 } else if (options_.allow_legacy_syntax) {
0313 key = ParseBareWord();
0314 } else {
0315 return Invalid("expected '\"'");
0316 }
0317
0318 RETURN_IF_ERROR(key.status());
0319 RETURN_IF_ERROR(Expect(":"));
0320 RETURN_IF_ERROR(f(*key));
0321 has_comma = Peek(",");
0322 } while (!Peek("}"));
0323 Pop();
0324
0325 if (!options_.allow_legacy_syntax && has_comma) {
0326 return Invalid("expected '}'");
0327 }
0328
0329 return absl::OkStatus();
0330 }
0331 }
0332 }
0333 }
0334
0335 #include "google/protobuf/port_undef.inc"
0336 #endif