google/protobuf/parse_context.h

0001 // Protocol Buffers - Google's data interchange format
0002 // Copyright 2008 Google Inc.  All rights reserved.
0003 //
0004 // Use of this source code is governed by a BSD-style
0005 // license that can be found in the LICENSE file or at
0006 // https://developers.google.com/open-source/licenses/bsd
0007
0008 #ifndef GOOGLE_PROTOBUF_PARSE_CONTEXT_H__
0009 #define GOOGLE_PROTOBUF_PARSE_CONTEXT_H__
0010
0011 #include <cstdint>
0012 #include <cstring>
0013 #include <string>
0014 #include <type_traits>
0015 #include <utility>
0016
0017 #include "absl/base/config.h"
0018 #include "absl/log/absl_check.h"
0019 #include "absl/log/absl_log.h"
0020 #include "absl/strings/cord.h"
0021 #include "absl/strings/internal/resize_uninitialized.h"
0022 #include "absl/strings/string_view.h"
0023 #include "absl/types/optional.h"
0024 #include "google/protobuf/arena.h"
0025 #include "google/protobuf/arenastring.h"
0026 #include "google/protobuf/endian.h"
0027 #include "google/protobuf/inlined_string_field.h"
0028 #include "google/protobuf/io/coded_stream.h"
0029 #include "google/protobuf/io/zero_copy_stream.h"
0030 #include "google/protobuf/metadata_lite.h"
0031 #include "google/protobuf/port.h"
0032 #include "google/protobuf/repeated_field.h"
0033 #include "google/protobuf/wire_format_lite.h"
0034
0035
0036 // Must be included last.
0037 #include "google/protobuf/port_def.inc"
0038
0039
0040 namespace google {
0041 namespace protobuf {
0042
0043 class UnknownFieldSet;
0044 class DescriptorPool;
0045 class MessageFactory;
0046
0047 namespace internal {
0048
0049 // Template code below needs to know about the existence of these functions.
0050 PROTOBUF_EXPORT void WriteVarint(uint32_t num, uint64_t val, std::string* s);
0051 PROTOBUF_EXPORT void WriteLengthDelimited(uint32_t num, absl::string_view val,
0052                                           std::string* s);
0053 // Inline because it is just forwarding to s->WriteVarint
0054 inline void WriteVarint(uint32_t num, uint64_t val, UnknownFieldSet* s);
0055 inline void WriteLengthDelimited(uint32_t num, absl::string_view val,
0056                                  UnknownFieldSet* s);
0057
0058
0059 // The basic abstraction the parser is designed for is a slight modification
0060 // of the ZeroCopyInputStream (ZCIS) abstraction. A ZCIS presents a serialized
0061 // stream as a series of buffers that concatenate to the full stream.
0062 // Pictorially a ZCIS presents a stream in chunks like so
0063 // [---------------------------------------------------------------]
0064 // [---------------------] chunk 1
0065 //                      [----------------------------] chunk 2
0066 //                                          chunk 3 [--------------]
0067 //
0068 // Where the '-' represent the bytes which are vertically lined up with the
0069 // bytes of the stream. The proto parser requires its input to be presented
0070 // similarly with the extra
0071 // property that each chunk has kSlopBytes past its end that overlaps with the
0072 // first kSlopBytes of the next chunk, or if there is no next chunk at least its
0073 // still valid to read those bytes. Again, pictorially, we now have
0074 //
0075 // [---------------------------------------------------------------]
0076 // [-------------------....] chunk 1
0077 //                    [------------------------....] chunk 2
0078 //                                    chunk 3 [------------------..**]
0079 //                                                      chunk 4 [--****]
0080 // Here '-' mean the bytes of the stream or chunk and '.' means bytes past the
0081 // chunk that match up with the start of the next chunk. Above each chunk has
0082 // 4 '.' after the chunk. In the case these 'overflow' bytes represents bytes
0083 // past the stream, indicated by '*' above, their values are unspecified. It is
0084 // still legal to read them (ie. should not segfault). Reading past the
0085 // end should be detected by the user and indicated as an error.
0086 //
0087 // The reason for this, admittedly, unconventional invariant is to ruthlessly
0088 // optimize the protobuf parser. Having an overlap helps in two important ways.
0089 // Firstly it alleviates having to performing bounds checks if a piece of code
0090 // is guaranteed to not read more than kSlopBytes. Secondly, and more
0091 // importantly, the protobuf wireformat is such that reading a key/value pair is
0092 // always less than 16 bytes. This removes the need to change to next buffer in
0093 // the middle of reading primitive values. Hence there is no need to store and
0094 // load the current position.
0095
0096 class PROTOBUF_EXPORT EpsCopyInputStream {
0097  public:
0098   enum { kMaxCordBytesToCopy = 512 };
0099   explicit EpsCopyInputStream(bool enable_aliasing)
0100       : aliasing_(enable_aliasing ? kOnPatch : kNoAliasing) {}
0101
0102   void BackUp(const char* ptr) {
0103     ABSL_DCHECK(ptr <= buffer_end_ + kSlopBytes);
0104     int count;
0105     if (next_chunk_ == patch_buffer_) {
0106       count = static_cast<int>(buffer_end_ + kSlopBytes - ptr);
0107     } else {
0108       count = size_ + static_cast<int>(buffer_end_ - ptr);
0109     }
0110     if (count > 0) StreamBackUp(count);
0111   }
0112
0113   // In sanitizer mode we use memory poisoning to guarantee that:
0114   //  - We do not read an uninitialized token.
0115   //  - We would like to verify that this token was consumed, but unfortunately
0116   //    __asan_address_is_poisoned is allowed to have false negatives.
0117   class LimitToken {
0118    public:
0119     LimitToken() { PROTOBUF_POISON_MEMORY_REGION(&token_, sizeof(token_)); }
0120
0121     explicit LimitToken(int token) : token_(token) {
0122       PROTOBUF_UNPOISON_MEMORY_REGION(&token_, sizeof(token_));
0123     }
0124
0125     LimitToken(const LimitToken&) = delete;
0126     LimitToken& operator=(const LimitToken&) = delete;
0127
0128     LimitToken(LimitToken&& other) { *this = std::move(other); }
0129
0130     LimitToken& operator=(LimitToken&& other) {
0131       PROTOBUF_UNPOISON_MEMORY_REGION(&token_, sizeof(token_));
0132       token_ = other.token_;
0133       PROTOBUF_POISON_MEMORY_REGION(&other.token_, sizeof(token_));
0134       return *this;
0135     }
0136
0137     ~LimitToken() { PROTOBUF_UNPOISON_MEMORY_REGION(&token_, sizeof(token_)); }
0138
0139     int token() && {
0140       int t = token_;
0141       PROTOBUF_POISON_MEMORY_REGION(&token_, sizeof(token_));
0142       return t;
0143     }
0144
0145    private:
0146     int token_;
0147   };
0148
0149   // If return value is negative it's an error
0150   PROTOBUF_NODISCARD LimitToken PushLimit(const char* ptr, int limit) {
0151     ABSL_DCHECK(limit >= 0 && limit <= INT_MAX - kSlopBytes);
0152     // This add is safe due to the invariant above, because
0153     // ptr - buffer_end_ <= kSlopBytes.
0154     limit += static_cast<int>(ptr - buffer_end_);
0155     limit_end_ = buffer_end_ + (std::min)(0, limit);
0156     auto old_limit = limit_;
0157     limit_ = limit;
0158     return LimitToken(old_limit - limit);
0159   }
0160
0161   PROTOBUF_NODISCARD bool PopLimit(LimitToken delta) {
0162     // We must update the limit first before the early return. Otherwise, we can
0163     // end up with an invalid limit and it can lead to integer overflows.
0164     limit_ = limit_ + std::move(delta).token();
0165     if (PROTOBUF_PREDICT_FALSE(!EndedAtLimit())) return false;
0166     // TODO We could remove this line and hoist the code to
0167     // DoneFallback. Study the perf/bin-size effects.
0168     limit_end_ = buffer_end_ + (std::min)(0, limit_);
0169     return true;
0170   }
0171
0172   PROTOBUF_NODISCARD const char* Skip(const char* ptr, int size) {
0173     if (size <= buffer_end_ + kSlopBytes - ptr) {
0174       return ptr + size;
0175     }
0176     return SkipFallback(ptr, size);
0177   }
0178   PROTOBUF_NODISCARD const char* ReadString(const char* ptr, int size,
0179                                             std::string* s) {
0180     if (size <= buffer_end_ + kSlopBytes - ptr) {
0181       // Fundamentally we just want to do assign to the string.
0182       // However micro-benchmarks regress on string reading cases. So we copy
0183       // the same logic from the old CodedInputStream ReadString. Note: as of
0184       // Apr 2021, this is still a significant win over `assign()`.
0185       absl::strings_internal::STLStringResizeUninitialized(s, size);
0186       char* z = &(*s)[0];
0187       memcpy(z, ptr, size);
0188       return ptr + size;
0189     }
0190     return ReadStringFallback(ptr, size, s);
0191   }
0192   PROTOBUF_NODISCARD const char* AppendString(const char* ptr, int size,
0193                                               std::string* s) {
0194     if (size <= buffer_end_ + kSlopBytes - ptr) {
0195       s->append(ptr, size);
0196       return ptr + size;
0197     }
0198     return AppendStringFallback(ptr, size, s);
0199   }
0200   // Implemented in arenastring.cc
0201   PROTOBUF_NODISCARD const char* ReadArenaString(const char* ptr,
0202                                                  ArenaStringPtr* s,
0203                                                  Arena* arena);
0204
0205   PROTOBUF_NODISCARD const char* ReadCord(const char* ptr, int size,
0206                                           ::absl::Cord* cord) {
0207     if (size <= std::min<int>(static_cast<int>(buffer_end_ + kSlopBytes - ptr),
0208                               kMaxCordBytesToCopy)) {
0209       *cord = absl::string_view(ptr, size);
0210       return ptr + size;
0211     }
0212     return ReadCordFallback(ptr, size, cord);
0213   }
0214
0215
0216   template <typename Tag, typename T>
0217   PROTOBUF_NODISCARD const char* ReadRepeatedFixed(const char* ptr,
0218                                                    Tag expected_tag,
0219                                                    RepeatedField<T>* out);
0220
0221   template <typename T>
0222   PROTOBUF_NODISCARD const char* ReadPackedFixed(const char* ptr, int size,
0223                                                  RepeatedField<T>* out);
0224   template <typename Add>
0225   PROTOBUF_NODISCARD const char* ReadPackedVarint(const char* ptr, Add add) {
0226     return ReadPackedVarint(ptr, add, [](int) {});
0227   }
0228   template <typename Add, typename SizeCb>
0229   PROTOBUF_NODISCARD const char* ReadPackedVarint(const char* ptr, Add add,
0230                                                   SizeCb size_callback);
0231
0232   uint32_t LastTag() const { return last_tag_minus_1_ + 1; }
0233   bool ConsumeEndGroup(uint32_t start_tag) {
0234     bool res = last_tag_minus_1_ == start_tag;
0235     last_tag_minus_1_ = 0;
0236     return res;
0237   }
0238   bool EndedAtLimit() const { return last_tag_minus_1_ == 0; }
0239   bool EndedAtEndOfStream() const { return last_tag_minus_1_ == 1; }
0240   void SetLastTag(uint32_t tag) { last_tag_minus_1_ = tag - 1; }
0241   void SetEndOfStream() { last_tag_minus_1_ = 1; }
0242   bool IsExceedingLimit(const char* ptr) {
0243     return ptr > limit_end_ &&
0244            (next_chunk_ == nullptr || ptr - buffer_end_ > limit_);
0245   }
0246   bool AliasingEnabled() const { return aliasing_ != kNoAliasing; }
0247   int BytesUntilLimit(const char* ptr) const {
0248     return limit_ + static_cast<int>(buffer_end_ - ptr);
0249   }
0250   // Maximum number of sequential bytes that can be read starting from `ptr`.
0251   int MaximumReadSize(const char* ptr) const {
0252     return static_cast<int>(limit_end_ - ptr) + kSlopBytes;
0253   }
0254   // Returns true if more data is available, if false is returned one has to
0255   // call Done for further checks.
0256   bool DataAvailable(const char* ptr) { return ptr < limit_end_; }
0257
0258  protected:
0259   // Returns true if limit (either an explicit limit or end of stream) is
0260   // reached. It aligns *ptr across buffer seams.
0261   // If limit is exceeded, it returns true and ptr is set to null.
0262   bool DoneWithCheck(const char** ptr, int d) {
0263     ABSL_DCHECK(*ptr);
0264     if (PROTOBUF_PREDICT_TRUE(*ptr < limit_end_)) return false;
0265     int overrun = static_cast<int>(*ptr - buffer_end_);
0266     ABSL_DCHECK_LE(overrun, kSlopBytes);  // Guaranteed by parse loop.
0267     if (overrun ==
0268         limit_) {  //  No need to flip buffers if we ended on a limit.
0269       // If we actually overrun the buffer and next_chunk_ is null, it means
0270       // the stream ended and we passed the stream end.
0271       if (overrun > 0 && next_chunk_ == nullptr) *ptr = nullptr;
0272       return true;
0273     }
0274     auto res = DoneFallback(overrun, d);
0275     *ptr = res.first;
0276     return res.second;
0277   }
0278
0279   const char* InitFrom(absl::string_view flat) {
0280     overall_limit_ = 0;
0281     if (flat.size() > kSlopBytes) {
0282       limit_ = kSlopBytes;
0283       limit_end_ = buffer_end_ = flat.data() + flat.size() - kSlopBytes;
0284       next_chunk_ = patch_buffer_;
0285       if (aliasing_ == kOnPatch) aliasing_ = kNoDelta;
0286       return flat.data();
0287     } else {
0288       if (!flat.empty()) {
0289         std::memcpy(patch_buffer_, flat.data(), flat.size());
0290       }
0291       limit_ = 0;
0292       limit_end_ = buffer_end_ = patch_buffer_ + flat.size();
0293       next_chunk_ = nullptr;
0294       if (aliasing_ == kOnPatch) {
0295         aliasing_ = reinterpret_cast<std::uintptr_t>(flat.data()) -
0296                     reinterpret_cast<std::uintptr_t>(patch_buffer_);
0297       }
0298       return patch_buffer_;
0299     }
0300   }
0301
0302   const char* InitFrom(io::ZeroCopyInputStream* zcis);
0303
0304   const char* InitFrom(io::ZeroCopyInputStream* zcis, int limit) {
0305     if (limit == -1) return InitFrom(zcis);
0306     overall_limit_ = limit;
0307     auto res = InitFrom(zcis);
0308     limit_ = limit - static_cast<int>(buffer_end_ - res);
0309     limit_end_ = buffer_end_ + (std::min)(0, limit_);
0310     return res;
0311   }
0312
0313  private:
0314   enum { kSlopBytes = 16, kPatchBufferSize = 32 };
0315   static_assert(kPatchBufferSize >= kSlopBytes * 2,
0316                 "Patch buffer needs to be at least large enough to hold all "
0317                 "the slop bytes from the previous buffer, plus the first "
0318                 "kSlopBytes from the next buffer.");
0319
0320   const char* limit_end_;  // buffer_end_ + min(limit_, 0)
0321   const char* buffer_end_;
0322   const char* next_chunk_;
0323   int size_;
0324   int limit_;  // relative to buffer_end_;
0325   io::ZeroCopyInputStream* zcis_ = nullptr;
0326   char patch_buffer_[kPatchBufferSize] = {};
0327   enum { kNoAliasing = 0, kOnPatch = 1, kNoDelta = 2 };
0328   std::uintptr_t aliasing_ = kNoAliasing;
0329   // This variable is used to communicate how the parse ended, in order to
0330   // completely verify the parsed data. A wire-format parse can end because of
0331   // one of the following conditions:
0332   // 1) A parse can end on a pushed limit.
0333   // 2) A parse can end on End Of Stream (EOS).
0334   // 3) A parse can end on 0 tag (only valid for toplevel message).
0335   // 4) A parse can end on an end-group tag.
0336   // This variable should always be set to 0, which indicates case 1. If the
0337   // parse terminated due to EOS (case 2), it's set to 1. In case the parse
0338   // ended due to a terminating tag (case 3 and 4) it's set to (tag - 1).
0339   // This var doesn't really belong in EpsCopyInputStream and should be part of
0340   // the ParseContext, but case 2 is most easily and optimally implemented in
0341   // DoneFallback.
0342   uint32_t last_tag_minus_1_ = 0;
0343   int overall_limit_ = INT_MAX;  // Overall limit independent of pushed limits.
0344   // Pretty random large number that seems like a safe allocation on most
0345   // systems. TODO do we need to set this as build flag?
0346   enum { kSafeStringSize = 50000000 };
0347
0348   // Advances to next buffer chunk returns a pointer to the same logical place
0349   // in the stream as set by overrun. Overrun indicates the position in the slop
0350   // region the parse was left (0 <= overrun <= kSlopBytes). Returns true if at
0351   // limit, at which point the returned pointer maybe null if there was an
0352   // error. The invariant of this function is that it's guaranteed that
0353   // kSlopBytes bytes can be accessed from the returned ptr. This function might
0354   // advance more buffers than one in the underlying ZeroCopyInputStream.
0355   std::pair<const char*, bool> DoneFallback(int overrun, int depth);
0356   // Advances to the next buffer, at most one call to Next() on the underlying
0357   // ZeroCopyInputStream is made. This function DOES NOT match the returned
0358   // pointer to where in the slop region the parse ends, hence no overrun
0359   // parameter. This is useful for string operations where you always copy
0360   // to the end of the buffer (including the slop region).
0361   const char* Next();
0362   // overrun is the location in the slop region the stream currently is
0363   // (0 <= overrun <= kSlopBytes). To prevent flipping to the next buffer of
0364   // the ZeroCopyInputStream in the case the parse will end in the last
0365   // kSlopBytes of the current buffer. depth is the current depth of nested
0366   // groups (or negative if the use case does not need careful tracking).
0367   inline const char* NextBuffer(int overrun, int depth);
0368   const char* SkipFallback(const char* ptr, int size);
0369   const char* AppendStringFallback(const char* ptr, int size, std::string* str);
0370   const char* ReadStringFallback(const char* ptr, int size, std::string* str);
0371   const char* ReadCordFallback(const char* ptr, int size, absl::Cord* cord);
0372   static bool ParseEndsInSlopRegion(const char* begin, int overrun, int depth);
0373   bool StreamNext(const void** data) {
0374     bool res = zcis_->Next(data, &size_);
0375     if (res) overall_limit_ -= size_;
0376     return res;
0377   }
0378   void StreamBackUp(int count) {
0379     zcis_->BackUp(count);
0380     overall_limit_ += count;
0381   }
0382
0383   template <typename A>
0384   const char* AppendSize(const char* ptr, int size, const A& append) {
0385     int chunk_size = static_cast<int>(buffer_end_ + kSlopBytes - ptr);
0386     do {
0387       ABSL_DCHECK(size > chunk_size);
0388       if (next_chunk_ == nullptr) return nullptr;
0389       append(ptr, chunk_size);
0390       ptr += chunk_size;
0391       size -= chunk_size;
0392       // TODO Next calls NextBuffer which generates buffers with
0393       // overlap and thus incurs cost of copying the slop regions. This is not
0394       // necessary for reading strings. We should just call Next buffers.
0395       if (limit_ <= kSlopBytes) return nullptr;
0396       ptr = Next();
0397       if (ptr == nullptr) return nullptr;  // passed the limit
0398       ptr += kSlopBytes;
0399       chunk_size = static_cast<int>(buffer_end_ + kSlopBytes - ptr);
0400     } while (size > chunk_size);
0401     append(ptr, size);
0402     return ptr + size;
0403   }
0404
0405   // AppendUntilEnd appends data until a limit (either a PushLimit or end of
0406   // stream. Normal payloads are from length delimited fields which have an
0407   // explicit size. Reading until limit only comes when the string takes
0408   // the place of a protobuf, ie RawMessage, lazy fields and implicit weak
0409   // messages. We keep these methods private and friend them.
0410   template <typename A>
0411   const char* AppendUntilEnd(const char* ptr, const A& append) {
0412     if (ptr - buffer_end_ > limit_) return nullptr;
0413     while (limit_ > kSlopBytes) {
0414       size_t chunk_size = buffer_end_ + kSlopBytes - ptr;
0415       append(ptr, chunk_size);
0416       ptr = Next();
0417       if (ptr == nullptr) return limit_end_;
0418       ptr += kSlopBytes;
0419     }
0420     auto end = buffer_end_ + limit_;
0421     ABSL_DCHECK(end >= ptr);
0422     append(ptr, end - ptr);
0423     return end;
0424   }
0425
0426   PROTOBUF_NODISCARD const char* AppendString(const char* ptr,
0427                                               std::string* str) {
0428     return AppendUntilEnd(
0429         ptr, [str](const char* p, ptrdiff_t s) { str->append(p, s); });
0430   }
0431   friend class ImplicitWeakMessage;
0432
0433   // Needs access to kSlopBytes.
0434   friend PROTOBUF_EXPORT std::pair<const char*, int32_t> ReadSizeFallback(
0435       const char* p, uint32_t res);
0436 };
0437
0438 using LazyEagerVerifyFnType = const char* (*)(const char* ptr,
0439                                               ParseContext* ctx);
0440 using LazyEagerVerifyFnRef = std::remove_pointer<LazyEagerVerifyFnType>::type&;
0441
0442 // ParseContext holds all data that is global to the entire parse. Most
0443 // importantly it contains the input stream, but also recursion depth and also
0444 // stores the end group tag, in case a parser ended on a endgroup, to verify
0445 // matching start/end group tags.
0446 class PROTOBUF_EXPORT ParseContext : public EpsCopyInputStream {
0447  public:
0448   struct Data {
0449     const DescriptorPool* pool = nullptr;
0450     MessageFactory* factory = nullptr;
0451   };
0452
0453   template <typename... T>
0454   ParseContext(int depth, bool aliasing, const char** start, T&&... args)
0455       : EpsCopyInputStream(aliasing), depth_(depth) {
0456     *start = InitFrom(std::forward<T>(args)...);
0457   }
0458
0459   struct Spawn {};
0460   static constexpr Spawn kSpawn = {};
0461
0462   // Creates a new context from a given "ctx" to inherit a few attributes to
0463   // emulate continued parsing. For example, recursion depth or descriptor pools
0464   // must be passed down to a new "spawned" context to maintain the same parse
0465   // context. Note that the spawned context always disables aliasing (different
0466   // input).
0467   template <typename... T>
0468   ParseContext(Spawn, const ParseContext& ctx, const char** start, T&&... args)
0469       : EpsCopyInputStream(false),
0470         depth_(ctx.depth_),
0471         data_(ctx.data_)
0472   {
0473     *start = InitFrom(std::forward<T>(args)...);
0474   }
0475
0476   // Move constructor and assignment operator are not supported because "ptr"
0477   // for parsing may have pointed to an inlined buffer (patch_buffer_) which can
0478   // be invalid afterwards.
0479   ParseContext(ParseContext&&) = delete;
0480   ParseContext& operator=(ParseContext&&) = delete;
0481   ParseContext& operator=(const ParseContext&) = delete;
0482
0483   void TrackCorrectEnding() { group_depth_ = 0; }
0484
0485   // Done should only be called when the parsing pointer is pointing to the
0486   // beginning of field data - that is, at a tag.  Or if it is NULL.
0487   bool Done(const char** ptr) { return DoneWithCheck(ptr, group_depth_); }
0488
0489   int depth() const { return depth_; }
0490
0491   Data& data() { return data_; }
0492   const Data& data() const { return data_; }
0493
0494   const char* ParseMessage(MessageLite* msg, const char* ptr);
0495
0496   // Read the length prefix, push the new limit, call the func(ptr), and then
0497   // pop the limit. Useful for situations that don't have an actual message.
0498   template <typename Func>
0499   PROTOBUF_NODISCARD const char* ParseLengthDelimitedInlined(const char*,
0500                                                              const Func& func);
0501
0502   // Push the recursion depth, call the func(ptr), and then pop depth. Useful
0503   // for situations that don't have an actual message.
0504   template <typename Func>
0505   PROTOBUF_NODISCARD const char* ParseGroupInlined(const char* ptr,
0506                                                    uint32_t start_tag,
0507                                                    const Func& func);
0508
0509   // Use a template to avoid the strong dep into TcParser. All callers will have
0510   // the dep.
0511   template <typename Parser = TcParser>
0512   PROTOBUF_ALWAYS_INLINE const char* ParseMessage(
0513       MessageLite* msg, const TcParseTableBase* tc_table, const char* ptr) {
0514     return ParseLengthDelimitedInlined(ptr, [&](const char* ptr) {
0515       return Parser::ParseLoop(msg, ptr, this, tc_table);
0516     });
0517   }
0518   template <typename Parser = TcParser>
0519   PROTOBUF_ALWAYS_INLINE const char* ParseGroup(
0520       MessageLite* msg, const TcParseTableBase* tc_table, const char* ptr,
0521       uint32_t start_tag) {
0522     return ParseGroupInlined(ptr, start_tag, [&](const char* ptr) {
0523       return Parser::ParseLoop(msg, ptr, this, tc_table);
0524     });
0525   }
0526
0527   PROTOBUF_NODISCARD PROTOBUF_NDEBUG_INLINE const char* ParseGroup(
0528       MessageLite* msg, const char* ptr, uint32_t tag) {
0529     if (--depth_ < 0) return nullptr;
0530     group_depth_++;
0531     auto old_depth = depth_;
0532     auto old_group_depth = group_depth_;
0533     ptr = msg->_InternalParse(ptr, this);
0534     if (ptr != nullptr) {
0535       ABSL_DCHECK_EQ(old_depth, depth_);
0536       ABSL_DCHECK_EQ(old_group_depth, group_depth_);
0537     }
0538     group_depth_--;
0539     depth_++;
0540     if (PROTOBUF_PREDICT_FALSE(!ConsumeEndGroup(tag))) return nullptr;
0541     return ptr;
0542   }
0543
0544  private:
0545   // Out-of-line routine to save space in ParseContext::ParseMessage<T>
0546   //   LimitToken old;
0547   //   ptr = ReadSizeAndPushLimitAndDepth(ptr, &old)
0548   // is equivalent to:
0549   //   int size = ReadSize(&ptr);
0550   //   if (!ptr) return nullptr;
0551   //   LimitToken old = PushLimit(ptr, size);
0552   //   if (--depth_ < 0) return nullptr;
0553   PROTOBUF_NODISCARD const char* ReadSizeAndPushLimitAndDepth(
0554       const char* ptr, LimitToken* old_limit);
0555
0556   // As above, but fully inlined for the cases where we care about performance
0557   // more than size. eg TcParser.
0558   PROTOBUF_NODISCARD PROTOBUF_ALWAYS_INLINE const char*
0559   ReadSizeAndPushLimitAndDepthInlined(const char* ptr, LimitToken* old_limit);
0560
0561   // The context keeps an internal stack to keep track of the recursive
0562   // part of the parse state.
0563   // Current depth of the active parser, depth counts down.
0564   // This is used to limit recursion depth (to prevent overflow on malicious
0565   // data), but is also used to index in stack_ to store the current state.
0566   int depth_;
0567   // Unfortunately necessary for the fringe case of ending on 0 or end-group tag
0568   // in the last kSlopBytes of a ZeroCopyInputStream chunk.
0569   int group_depth_ = INT_MIN;
0570   Data data_;
0571 };
0572
0573 template <int>
0574 struct EndianHelper;
0575
0576 template <>
0577 struct EndianHelper<1> {
0578   static uint8_t Load(const void* p) { return *static_cast<const uint8_t*>(p); }
0579 };
0580
0581 template <>
0582 struct EndianHelper<2> {
0583   static uint16_t Load(const void* p) {
0584     uint16_t tmp;
0585     std::memcpy(&tmp, p, 2);
0586     return little_endian::ToHost(tmp);
0587   }
0588 };
0589
0590 template <>
0591 struct EndianHelper<4> {
0592   static uint32_t Load(const void* p) {
0593     uint32_t tmp;
0594     std::memcpy(&tmp, p, 4);
0595     return little_endian::ToHost(tmp);
0596   }
0597 };
0598
0599 template <>
0600 struct EndianHelper<8> {
0601   static uint64_t Load(const void* p) {
0602     uint64_t tmp;
0603     std::memcpy(&tmp, p, 8);
0604     return little_endian::ToHost(tmp);
0605   }
0606 };
0607
0608 template <typename T>
0609 T UnalignedLoad(const char* p) {
0610   auto tmp = EndianHelper<sizeof(T)>::Load(p);
0611   T res;
0612   memcpy(&res, &tmp, sizeof(T));
0613   return res;
0614 }
0615 template <typename T, typename Void,
0616           typename = std::enable_if_t<std::is_same<Void, void>::value>>
0617 T UnalignedLoad(const Void* p) {
0618   return UnalignedLoad<T>(reinterpret_cast<const char*>(p));
0619 }
0620
0621 PROTOBUF_EXPORT
0622 std::pair<const char*, uint32_t> VarintParseSlow32(const char* p, uint32_t res);
0623 PROTOBUF_EXPORT
0624 std::pair<const char*, uint64_t> VarintParseSlow64(const char* p, uint32_t res);
0625
0626 inline const char* VarintParseSlow(const char* p, uint32_t res, uint32_t* out) {
0627   auto tmp = VarintParseSlow32(p, res);
0628   *out = tmp.second;
0629   return tmp.first;
0630 }
0631
0632 inline const char* VarintParseSlow(const char* p, uint32_t res, uint64_t* out) {
0633   auto tmp = VarintParseSlow64(p, res);
0634   *out = tmp.second;
0635   return tmp.first;
0636 }
0637
0638 #ifdef __aarch64__
0639 // Generally, speaking, the ARM-optimized Varint decode algorithm is to extract
0640 // and concatenate all potentially valid data bits, compute the actual length
0641 // of the Varint, and mask off the data bits which are not actually part of the
0642 // result.  More detail on the two main parts is shown below.
0643 //
0644 // 1) Extract and concatenate all potentially valid data bits.
0645 //    Two ARM-specific features help significantly:
0646 //    a) Efficient and non-destructive bit extraction (UBFX)
0647 //    b) A single instruction can perform both an OR with a shifted
0648 //       second operand in one cycle.  E.g., the following two lines do the same
0649 //       thing
0650 //       ```result = operand_1 | (operand2 << 7);```
0651 //       ```ORR %[result], %[operand_1], %[operand_2], LSL #7```
0652 //    The figure below shows the implementation for handling four chunks.
0653 //
0654 // Bits   32    31-24    23   22-16    15    14-8      7     6-0
0655 //      +----+---------+----+---------+----+---------+----+---------+
0656 //      |CB 3| Chunk 3 |CB 2| Chunk 2 |CB 1| Chunk 1 |CB 0| Chunk 0 |
0657 //      +----+---------+----+---------+----+---------+----+---------+
0658 //                |              |              |              |
0659 //               UBFX           UBFX           UBFX           UBFX    -- cycle 1
0660 //                |              |              |              |
0661 //                V              V              V              V
0662 //               Combined LSL #7 and ORR     Combined LSL #7 and ORR  -- cycle 2
0663 //                                 |             |
0664 //                                 V             V
0665 //                            Combined LSL #14 and ORR                -- cycle 3
0666 //                                       |
0667 //                                       V
0668 //                                Parsed bits 0-27
0669 //
0670 //
0671 // 2) Calculate the index of the cleared continuation bit in order to determine
0672 //    where the encoded Varint ends and the size of the decoded value.  The
0673 //    easiest way to do this is mask off all data bits, leaving just the
0674 //    continuation bits.  We actually need to do the masking on an inverted
0675 //    copy of the data, which leaves a 1 in all continuation bits which were
0676 //    originally clear.  The number of trailing zeroes in this value indicates
0677 //    the size of the Varint.
0678 //
0679 //  AND  0x80    0x80    0x80    0x80    0x80    0x80    0x80    0x80
0680 //
0681 // Bits   63      55      47      39      31      23      15       7
0682 //      +----+--+----+--+----+--+----+--+----+--+----+--+----+--+----+--+
0683 // ~    |CB 7|  |CB 6|  |CB 5|  |CB 4|  |CB 3|  |CB 2|  |CB 1|  |CB 0|  |
0684 //      +----+--+----+--+----+--+----+--+----+--+----+--+----+--+----+--+
0685 //         |       |       |       |       |       |       |       |
0686 //         V       V       V       V       V       V       V       V
0687 // Bits   63      55      47      39      31      23      15       7
0688 //      +----+--+----+--+----+--+----+--+----+--+----+--+----+--+----+--+
0689 //      |~CB 7|0|~CB 6|0|~CB 5|0|~CB 4|0|~CB 3|0|~CB 2|0|~CB 1|0|~CB 0|0|
0690 //      +----+--+----+--+----+--+----+--+----+--+----+--+----+--+----+--+
0691 //                                      |
0692 //                                     CTZ
0693 //                                      V
0694 //                     Index of first cleared continuation bit
0695 //
0696 //
0697 // While this is implemented in C++ significant care has been taken to ensure
0698 // the compiler emits the best instruction sequence.  In some cases we use the
0699 // following two functions to manipulate the compiler's scheduling decisions.
0700 //
0701 // Controls compiler scheduling by telling it that the first value is modified
0702 // by the second value the callsite.  This is useful if non-critical path
0703 // instructions are too aggressively scheduled, resulting in a slowdown of the
0704 // actual critical path due to opportunity costs.  An example usage is shown
0705 // where a false dependence of num_bits on result is added to prevent checking
0706 // for a very unlikely error until all critical path instructions have been
0707 // fetched.
0708 //
0709 // ```
0710 // num_bits = <multiple operations to calculate new num_bits value>
0711 // result = <multiple operations to calculate result>
0712 // num_bits = ValueBarrier(num_bits, result);
0713 // if (num_bits == 63) {
0714 //   ABSL_LOG(FATAL) << "Invalid num_bits value";
0715 // }
0716 // ```
0717 // Falsely indicate that the specific value is modified at this location.  This
0718 // prevents code which depends on this value from being scheduled earlier.
0719 template <typename V1Type>
0720 PROTOBUF_ALWAYS_INLINE inline V1Type ValueBarrier(V1Type value1) {
0721   asm("" : "+r"(value1));
0722   return value1;
0723 }
0724
0725 template <typename V1Type, typename V2Type>
0726 PROTOBUF_ALWAYS_INLINE inline V1Type ValueBarrier(V1Type value1,
0727                                                   V2Type value2) {
0728   asm("" : "+r"(value1) : "r"(value2));
0729   return value1;
0730 }
0731
0732 // Performs a 7 bit UBFX (Unsigned Bit Extract) starting at the indicated bit.
0733 static PROTOBUF_ALWAYS_INLINE inline uint64_t Ubfx7(uint64_t data,
0734                                                     uint64_t start) {
0735   return ValueBarrier((data >> start) & 0x7f);
0736 }
0737
0738 PROTOBUF_ALWAYS_INLINE inline uint64_t ExtractAndMergeTwoChunks(
0739     uint64_t data, uint64_t first_byte) {
0740   ABSL_DCHECK_LE(first_byte, 6U);
0741   uint64_t first = Ubfx7(data, first_byte * 8);
0742   uint64_t second = Ubfx7(data, (first_byte + 1) * 8);
0743   return ValueBarrier(first | (second << 7));
0744 }
0745
0746 struct SlowPathEncodedInfo {
0747   const char* p;
0748   uint64_t last8;
0749   uint64_t valid_bits;
0750   uint64_t valid_chunk_bits;
0751   uint64_t masked_cont_bits;
0752 };
0753
0754 // Performs multiple actions which are identical between 32 and 64 bit Varints
0755 // in order to compute the length of the encoded Varint and compute the new
0756 // of p.
0757 PROTOBUF_ALWAYS_INLINE inline SlowPathEncodedInfo ComputeLengthAndUpdateP(
0758     const char* p) {
0759   SlowPathEncodedInfo result;
0760   // Load the last two bytes of the encoded Varint.
0761   std::memcpy(&result.last8, p + 2, sizeof(result.last8));
0762   uint64_t mask = ValueBarrier(0x8080808080808080);
0763   // Only set continuation bits remain
0764   result.masked_cont_bits = ValueBarrier(mask & ~result.last8);
0765   // The first cleared continuation bit is the most significant 1 in the
0766   // reversed value.  Result is undefined for an input of 0 and we handle that
0767   // case below.
0768   result.valid_bits = absl::countr_zero(result.masked_cont_bits);
0769   // Calculates the number of chunks in the encoded Varint.  This value is low
0770   // by three as neither the cleared continuation chunk nor the first two chunks
0771   // are counted.
0772   uint64_t set_continuation_bits = result.valid_bits >> 3;
0773   // Update p to point past the encoded Varint.
0774   result.p = p + set_continuation_bits + 3;
0775   // Calculate number of valid data bits in the decoded value so invalid bits
0776   // can be masked off.  Value is too low by 14 but we account for that when
0777   // calculating the mask.
0778   result.valid_chunk_bits = result.valid_bits - set_continuation_bits;
0779   return result;
0780 }
0781
0782 inline PROTOBUF_ALWAYS_INLINE std::pair<const char*, uint64_t>
0783 VarintParseSlowArm64(const char* p, uint64_t first8) {
0784   constexpr uint64_t kResultMaskUnshifted = 0xffffffffffffc000ULL;
0785   constexpr uint64_t kFirstResultBitChunk2 = 2 * 7;
0786   constexpr uint64_t kFirstResultBitChunk4 = 4 * 7;
0787   constexpr uint64_t kFirstResultBitChunk6 = 6 * 7;
0788   constexpr uint64_t kFirstResultBitChunk8 = 8 * 7;
0789
0790   SlowPathEncodedInfo info = ComputeLengthAndUpdateP(p);
0791   // Extract data bits from the low six chunks.  This includes chunks zero and
0792   // one which we already know are valid.
0793   uint64_t merged_01 = ExtractAndMergeTwoChunks(first8, /*first_chunk=*/0);
0794   uint64_t merged_23 = ExtractAndMergeTwoChunks(first8, /*first_chunk=*/2);
0795   uint64_t merged_45 = ExtractAndMergeTwoChunks(first8, /*first_chunk=*/4);
0796   // Low 42 bits of decoded value.
0797   uint64_t result = merged_01 | (merged_23 << kFirstResultBitChunk2) |
0798                     (merged_45 << kFirstResultBitChunk4);
0799   // This immediate ends in 14 zeroes since valid_chunk_bits is too low by 14.
0800   uint64_t result_mask = kResultMaskUnshifted << info.valid_chunk_bits;
0801   //  iff the Varint i invalid.
0802   if (PROTOBUF_PREDICT_FALSE(info.masked_cont_bits == 0)) {
0803     return {nullptr, 0};
0804   }
0805   // Test for early exit if Varint does not exceed 6 chunks.  Branching on one
0806   // bit is faster on ARM than via a compare and branch.
0807   if (PROTOBUF_PREDICT_FALSE((info.valid_bits & 0x20) != 0)) {
0808     // Extract data bits from high four chunks.
0809     uint64_t merged_67 = ExtractAndMergeTwoChunks(first8, /*first_chunk=*/6);
0810     // Last two chunks come from last two bytes of info.last8.
0811     uint64_t merged_89 =
0812         ExtractAndMergeTwoChunks(info.last8, /*first_chunk=*/6);
0813     result |= merged_67 << kFirstResultBitChunk6;
0814     result |= merged_89 << kFirstResultBitChunk8;
0815     // Handle an invalid Varint with all 10 continuation bits set.
0816   }
0817   // Mask off invalid data bytes.
0818   result &= ~result_mask;
0819   return {info.p, result};
0820 }
0821
0822 // See comments in VarintParseSlowArm64 for a description of the algorithm.
0823 // Differences in the 32 bit version are noted below.
0824 inline PROTOBUF_ALWAYS_INLINE std::pair<const char*, uint32_t>
0825 VarintParseSlowArm32(const char* p, uint64_t first8) {
0826   constexpr uint64_t kResultMaskUnshifted = 0xffffffffffffc000ULL;
0827   constexpr uint64_t kFirstResultBitChunk1 = 1 * 7;
0828   constexpr uint64_t kFirstResultBitChunk3 = 3 * 7;
0829
0830   // This also skips the slop bytes.
0831   SlowPathEncodedInfo info = ComputeLengthAndUpdateP(p);
0832   // Extract data bits from chunks 1-4.  Chunk zero is merged in below.
0833   uint64_t merged_12 = ExtractAndMergeTwoChunks(first8, /*first_chunk=*/1);
0834   uint64_t merged_34 = ExtractAndMergeTwoChunks(first8, /*first_chunk=*/3);
0835   first8 = ValueBarrier(first8, p);
0836   uint64_t result = Ubfx7(first8, /*start=*/0);
0837   result = ValueBarrier(result | merged_12 << kFirstResultBitChunk1);
0838   result = ValueBarrier(result | merged_34 << kFirstResultBitChunk3);
0839   uint64_t result_mask = kResultMaskUnshifted << info.valid_chunk_bits;
0840   result &= ~result_mask;
0841   // It is extremely unlikely that a Varint is invalid so checking that
0842   // condition isn't on the critical path. Here we make sure that we don't do so
0843   // until result has been computed.
0844   info.masked_cont_bits = ValueBarrier(info.masked_cont_bits, result);
0845   if (PROTOBUF_PREDICT_FALSE(info.masked_cont_bits == 0)) {
0846     return {nullptr, 0};
0847   }
0848   return {info.p, result};
0849 }
0850
0851 static const char* VarintParseSlowArm(const char* p, uint32_t* out,
0852                                       uint64_t first8) {
0853   auto tmp = VarintParseSlowArm32(p, first8);
0854   *out = tmp.second;
0855   return tmp.first;
0856 }
0857
0858 static const char* VarintParseSlowArm(const char* p, uint64_t* out,
0859                                       uint64_t first8) {
0860   auto tmp = VarintParseSlowArm64(p, first8);
0861   *out = tmp.second;
0862   return tmp.first;
0863 }
0864 #endif
0865
0866 // The caller must ensure that p points to at least 10 valid bytes.
0867 template <typename T>
0868 PROTOBUF_NODISCARD const char* VarintParse(const char* p, T* out) {
0869 #if defined(__aarch64__) && defined(ABSL_IS_LITTLE_ENDIAN)
0870   // This optimization is not supported in big endian mode
0871   uint64_t first8;
0872   std::memcpy(&first8, p, sizeof(first8));
0873   if (PROTOBUF_PREDICT_TRUE((first8 & 0x80) == 0)) {
0874     *out = static_cast<uint8_t>(first8);
0875     return p + 1;
0876   }
0877   if (PROTOBUF_PREDICT_TRUE((first8 & 0x8000) == 0)) {
0878     uint64_t chunk1;
0879     uint64_t chunk2;
0880     // Extracting the two chunks this way gives a speedup for this path.
0881     chunk1 = Ubfx7(first8, 0);
0882     chunk2 = Ubfx7(first8, 8);
0883     *out = chunk1 | (chunk2 << 7);
0884     return p + 2;
0885   }
0886   return VarintParseSlowArm(p, out, first8);
0887 #else   // __aarch64__
0888   auto ptr = reinterpret_cast<const uint8_t*>(p);
0889   uint32_t res = ptr[0];
0890   if ((res & 0x80) == 0) {
0891     *out = res;
0892     return p + 1;
0893   }
0894   return VarintParseSlow(p, res, out);
0895 #endif  // __aarch64__
0896 }
0897
0898 // Used for tags, could read up to 5 bytes which must be available.
0899 // Caller must ensure it's safe to call.
0900
0901 PROTOBUF_EXPORT
0902 std::pair<const char*, uint32_t> ReadTagFallback(const char* p, uint32_t res);
0903
0904 // Same as ParseVarint but only accept 5 bytes at most.
0905 inline const char* ReadTag(const char* p, uint32_t* out,
0906                            uint32_t /*max_tag*/ = 0) {
0907   uint32_t res = static_cast<uint8_t>(p[0]);
0908   if (res < 128) {
0909     *out = res;
0910     return p + 1;
0911   }
0912   uint32_t second = static_cast<uint8_t>(p[1]);
0913   res += (second - 1) << 7;
0914   if (second < 128) {
0915     *out = res;
0916     return p + 2;
0917   }
0918   auto tmp = ReadTagFallback(p, res);
0919   *out = tmp.second;
0920   return tmp.first;
0921 }
0922
0923 // As above, but optimized to consume very few registers while still being fast,
0924 // ReadTagInlined is useful for callers that don't mind the extra code but would
0925 // like to avoid an extern function call causing spills into the stack.
0926 //
0927 // Two support routines for ReadTagInlined come first...
0928 template <class T>
0929 PROTOBUF_NODISCARD PROTOBUF_ALWAYS_INLINE constexpr T RotateLeft(
0930     T x, int s) noexcept {
0931   return static_cast<T>(x << (s & (std::numeric_limits<T>::digits - 1))) |
0932          static_cast<T>(x >> ((-s) & (std::numeric_limits<T>::digits - 1)));
0933 }
0934
0935 PROTOBUF_NODISCARD inline PROTOBUF_ALWAYS_INLINE uint64_t
0936 RotRight7AndReplaceLowByte(uint64_t res, const char& byte) {
0937   // TODO: remove the inline assembly
0938 #if defined(__x86_64__) && defined(__GNUC__)
0939   // This will only use one register for `res`.
0940   // `byte` comes as a reference to allow the compiler to generate code like:
0941   //
0942   //   rorq    $7, %rcx
0943   //   movb    1(%rax), %cl
0944   //
0945   // which avoids loading the incoming bytes into a separate register first.
0946   asm("ror $7,%0\n\t"
0947       "movb %1,%b0"
0948       : "+r"(res)
0949       : "m"(byte));
0950 #else
0951   res = RotateLeft(res, -7);
0952   res = res & ~0xFF;
0953   res |= 0xFF & byte;
0954 #endif
0955   return res;
0956 }
0957
0958 inline PROTOBUF_ALWAYS_INLINE const char* ReadTagInlined(const char* ptr,
0959                                                          uint32_t* out) {
0960   uint64_t res = 0xFF & ptr[0];
0961   if (PROTOBUF_PREDICT_FALSE(res >= 128)) {
0962     res = RotRight7AndReplaceLowByte(res, ptr[1]);
0963     if (PROTOBUF_PREDICT_FALSE(res & 0x80)) {
0964       res = RotRight7AndReplaceLowByte(res, ptr[2]);
0965       if (PROTOBUF_PREDICT_FALSE(res & 0x80)) {
0966         res = RotRight7AndReplaceLowByte(res, ptr[3]);
0967         if (PROTOBUF_PREDICT_FALSE(res & 0x80)) {
0968           // Note: this wouldn't work if res were 32-bit,
0969           // because then replacing the low byte would overwrite
0970           // the bottom 4 bits of the result.
0971           res = RotRight7AndReplaceLowByte(res, ptr[4]);
0972           if (PROTOBUF_PREDICT_FALSE(res & 0x80)) {
0973             // The proto format does not permit longer than 5-byte encodings for
0974             // tags.
0975             *out = 0;
0976             return nullptr;
0977           }
0978           *out = static_cast<uint32_t>(RotateLeft(res, 28));
0979 #if defined(__GNUC__)
0980           // Note: this asm statement prevents the compiler from
0981           // trying to share the "return ptr + constant" among all
0982           // branches.
0983           asm("" : "+r"(ptr));
0984 #endif
0985           return ptr + 5;
0986         }
0987         *out = static_cast<uint32_t>(RotateLeft(res, 21));
0988         return ptr + 4;
0989       }
0990       *out = static_cast<uint32_t>(RotateLeft(res, 14));
0991       return ptr + 3;
0992     }
0993     *out = static_cast<uint32_t>(RotateLeft(res, 7));
0994     return ptr + 2;
0995   }
0996   *out = static_cast<uint32_t>(res);
0997   return ptr + 1;
0998 }
0999
1000 // Decode 2 consecutive bytes of a varint and returns the value, shifted left
1001 // by 1. It simultaneous updates *ptr to *ptr + 1 or *ptr + 2 depending if the
1002 // first byte's continuation bit is set.
1003 // If bit 15 of return value is set (equivalent to the continuation bits of both
1004 // bytes being set) the varint continues, otherwise the parse is done. On x86
1005 // movsx eax, dil
1006 // and edi, eax
1007 // add eax, edi
1008 // adc [rsi], 1
1009 inline uint32_t DecodeTwoBytes(const char** ptr) {
1010   uint32_t value = UnalignedLoad<uint16_t>(*ptr);
1011   // Sign extend the low byte continuation bit
1012   uint32_t x = static_cast<int8_t>(value);
1013   value &= x;  // Mask out the high byte iff no continuation
1014   // This add is an amazing operation, it cancels the low byte continuation bit
1015   // from y transferring it to the carry. Simultaneously it also shifts the 7
1016   // LSB left by one tightly against high byte varint bits. Hence value now
1017   // contains the unpacked value shifted left by 1.
1018   value += x;
1019   // Use the carry to update the ptr appropriately.
1020   *ptr += value < x ? 2 : 1;
1021   return value;
1022 }
1023
1024 // More efficient varint parsing for big varints
1025 inline const char* ParseBigVarint(const char* p, uint64_t* out) {
1026   auto pnew = p;
1027   auto tmp = DecodeTwoBytes(&pnew);
1028   uint64_t res = tmp >> 1;
1029   if (PROTOBUF_PREDICT_TRUE(static_cast<std::int16_t>(tmp) >= 0)) {
1030     *out = res;
1031     return pnew;
1032   }
1033   for (std::uint32_t i = 1; i < 5; i++) {
1034     pnew = p + 2 * i;
1035     tmp = DecodeTwoBytes(&pnew);
1036     res += (static_cast<std::uint64_t>(tmp) - 2) << (14 * i - 1);
1037     if (PROTOBUF_PREDICT_TRUE(static_cast<std::int16_t>(tmp) >= 0)) {
1038       *out = res;
1039       return pnew;
1040     }
1041   }
1042   return nullptr;
1043 }
1044
1045 PROTOBUF_EXPORT
1046 std::pair<const char*, int32_t> ReadSizeFallback(const char* p, uint32_t first);
1047 // Used for tags, could read up to 5 bytes which must be available. Additionally
1048 // it makes sure the unsigned value fits a int32_t, otherwise returns nullptr.
1049 // Caller must ensure its safe to call.
1050 inline uint32_t ReadSize(const char** pp) {
1051   auto p = *pp;
1052   uint32_t res = static_cast<uint8_t>(p[0]);
1053   if (res < 128) {
1054     *pp = p + 1;
1055     return res;
1056   }
1057   auto x = ReadSizeFallback(p, res);
1058   *pp = x.first;
1059   return x.second;
1060 }
1061
1062 // Some convenience functions to simplify the generated parse loop code.
1063 // Returning the value and updating the buffer pointer allows for nicer
1064 // function composition. We rely on the compiler to inline this.
1065 // Also in debug compiles having local scoped variables tend to generated
1066 // stack frames that scale as O(num fields).
1067 inline uint64_t ReadVarint64(const char** p) {
1068   uint64_t tmp;
1069   *p = VarintParse(*p, &tmp);
1070   return tmp;
1071 }
1072
1073 inline uint32_t ReadVarint32(const char** p) {
1074   uint32_t tmp;
1075   *p = VarintParse(*p, &tmp);
1076   return tmp;
1077 }
1078
1079 inline int64_t ReadVarintZigZag64(const char** p) {
1080   uint64_t tmp;
1081   *p = VarintParse(*p, &tmp);
1082   return WireFormatLite::ZigZagDecode64(tmp);
1083 }
1084
1085 inline int32_t ReadVarintZigZag32(const char** p) {
1086   uint64_t tmp;
1087   *p = VarintParse(*p, &tmp);
1088   return WireFormatLite::ZigZagDecode32(static_cast<uint32_t>(tmp));
1089 }
1090
1091 template <typename Func>
1092 PROTOBUF_NODISCARD inline PROTOBUF_ALWAYS_INLINE const char*
1093 ParseContext::ParseLengthDelimitedInlined(const char* ptr, const Func& func) {
1094   LimitToken old;
1095   ptr = ReadSizeAndPushLimitAndDepthInlined(ptr, &old);
1096   if (ptr == nullptr) return ptr;
1097   auto old_depth = depth_;
1098   PROTOBUF_ALWAYS_INLINE_CALL ptr = func(ptr);
1099   if (ptr != nullptr) ABSL_DCHECK_EQ(old_depth, depth_);
1100   depth_++;
1101   if (!PopLimit(std::move(old))) return nullptr;
1102   return ptr;
1103 }
1104
1105 template <typename Func>
1106 PROTOBUF_NODISCARD inline PROTOBUF_ALWAYS_INLINE const char*
1107 ParseContext::ParseGroupInlined(const char* ptr, uint32_t start_tag,
1108                                 const Func& func) {
1109   if (--depth_ < 0) return nullptr;
1110   group_depth_++;
1111   auto old_depth = depth_;
1112   auto old_group_depth = group_depth_;
1113   PROTOBUF_ALWAYS_INLINE_CALL ptr = func(ptr);
1114   if (ptr != nullptr) {
1115     ABSL_DCHECK_EQ(old_depth, depth_);
1116     ABSL_DCHECK_EQ(old_group_depth, group_depth_);
1117   }
1118   group_depth_--;
1119   depth_++;
1120   if (PROTOBUF_PREDICT_FALSE(!ConsumeEndGroup(start_tag))) return nullptr;
1121   return ptr;
1122 }
1123
1124 inline const char* ParseContext::ReadSizeAndPushLimitAndDepthInlined(
1125     const char* ptr, LimitToken* old_limit) {
1126   int size = ReadSize(&ptr);
1127   if (PROTOBUF_PREDICT_FALSE(!ptr) || depth_ <= 0) {
1128     return nullptr;
1129   }
1130   *old_limit = PushLimit(ptr, size);
1131   --depth_;
1132   return ptr;
1133 }
1134
1135 template <typename Tag, typename T>
1136 const char* EpsCopyInputStream::ReadRepeatedFixed(const char* ptr,
1137                                                   Tag expected_tag,
1138                                                   RepeatedField<T>* out) {
1139   do {
1140     out->Add(UnalignedLoad<T>(ptr));
1141     ptr += sizeof(T);
1142     if (PROTOBUF_PREDICT_FALSE(ptr >= limit_end_)) return ptr;
1143   } while (UnalignedLoad<Tag>(ptr) == expected_tag && (ptr += sizeof(Tag)));
1144   return ptr;
1145 }
1146
1147 // Add any of the following lines to debug which parse function is failing.
1148
1149 #define GOOGLE_PROTOBUF_ASSERT_RETURN(predicate, ret) \
1150   if (!(predicate)) {                                  \
1151     /*  ::raise(SIGINT);  */                           \
1152     /*  ABSL_LOG(ERROR) << "Parse failure";  */        \
1153     return ret;                                        \
1154   }
1155
1156 #define GOOGLE_PROTOBUF_PARSER_ASSERT(predicate) \
1157   GOOGLE_PROTOBUF_ASSERT_RETURN(predicate, nullptr)
1158
1159 template <typename T>
1160 const char* EpsCopyInputStream::ReadPackedFixed(const char* ptr, int size,
1161                                                 RepeatedField<T>* out) {
1162   GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
1163   int nbytes = static_cast<int>(buffer_end_ + kSlopBytes - ptr);
1164   while (size > nbytes) {
1165     int num = nbytes / sizeof(T);
1166     int old_entries = out->size();
1167     out->Reserve(old_entries + num);
1168     int block_size = num * sizeof(T);
1169     auto dst = out->AddNAlreadyReserved(num);
1170 #ifdef ABSL_IS_LITTLE_ENDIAN
1171     std::memcpy(dst, ptr, block_size);
1172 #else
1173     for (int i = 0; i < num; i++)
1174       dst[i] = UnalignedLoad<T>(ptr + i * sizeof(T));
1175 #endif
1176     size -= block_size;
1177     if (limit_ <= kSlopBytes) return nullptr;
1178     ptr = Next();
1179     if (ptr == nullptr) return nullptr;
1180     ptr += kSlopBytes - (nbytes - block_size);
1181     nbytes = static_cast<int>(buffer_end_ + kSlopBytes - ptr);
1182   }
1183   int num = size / sizeof(T);
1184   int block_size = num * sizeof(T);
1185   if (num == 0) return size == block_size ? ptr : nullptr;
1186   int old_entries = out->size();
1187   out->Reserve(old_entries + num);
1188   auto dst = out->AddNAlreadyReserved(num);
1189 #ifdef ABSL_IS_LITTLE_ENDIAN
1190   ABSL_CHECK(dst != nullptr) << out << "," << num;
1191   std::memcpy(dst, ptr, block_size);
1192 #else
1193   for (int i = 0; i < num; i++) dst[i] = UnalignedLoad<T>(ptr + i * sizeof(T));
1194 #endif
1195   ptr += block_size;
1196   if (size != block_size) return nullptr;
1197   return ptr;
1198 }
1199
1200 template <typename Add>
1201 const char* ReadPackedVarintArray(const char* ptr, const char* end, Add add) {
1202   while (ptr < end) {
1203     uint64_t varint;
1204     ptr = VarintParse(ptr, &varint);
1205     if (ptr == nullptr) return nullptr;
1206     add(varint);
1207   }
1208   return ptr;
1209 }
1210
1211 template <typename Add, typename SizeCb>
1212 const char* EpsCopyInputStream::ReadPackedVarint(const char* ptr, Add add,
1213                                                  SizeCb size_callback) {
1214   int size = ReadSize(&ptr);
1215   size_callback(size);
1216
1217   GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
1218   int chunk_size = static_cast<int>(buffer_end_ - ptr);
1219   while (size > chunk_size) {
1220     ptr = ReadPackedVarintArray(ptr, buffer_end_, add);
1221     if (ptr == nullptr) return nullptr;
1222     int overrun = static_cast<int>(ptr - buffer_end_);
1223     ABSL_DCHECK(overrun >= 0 && overrun <= kSlopBytes);
1224     if (size - chunk_size <= kSlopBytes) {
1225       // The current buffer contains all the information needed, we don't need
1226       // to flip buffers. However we must parse from a buffer with enough space
1227       // so we are not prone to a buffer overflow.
1228       char buf[kSlopBytes + 10] = {};
1229       std::memcpy(buf, buffer_end_, kSlopBytes);
1230       ABSL_CHECK_LE(size - chunk_size, kSlopBytes);
1231       auto end = buf + (size - chunk_size);
1232       auto res = ReadPackedVarintArray(buf + overrun, end, add);
1233       if (res == nullptr || res != end) return nullptr;
1234       return buffer_end_ + (res - buf);
1235     }
1236     size -= overrun + chunk_size;
1237     ABSL_DCHECK_GT(size, 0);
1238     // We must flip buffers
1239     if (limit_ <= kSlopBytes) return nullptr;
1240     ptr = Next();
1241     if (ptr == nullptr) return nullptr;
1242     ptr += overrun;
1243     chunk_size = static_cast<int>(buffer_end_ - ptr);
1244   }
1245   auto end = ptr + size;
1246   ptr = ReadPackedVarintArray(ptr, end, add);
1247   return end == ptr ? ptr : nullptr;
1248 }
1249
1250 // Helper for verification of utf8
1251 PROTOBUF_EXPORT
1252 bool VerifyUTF8(absl::string_view s, const char* field_name);
1253
1254 inline bool VerifyUTF8(const std::string* s, const char* field_name) {
1255   return VerifyUTF8(*s, field_name);
1256 }
1257
1258 // All the string parsers with or without UTF checking and for all CTypes.
1259 PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* InlineGreedyStringParser(
1260     std::string* s, const char* ptr, ParseContext* ctx);
1261
1262 PROTOBUF_NODISCARD inline const char* InlineCordParser(::absl::Cord* cord,
1263                                                        const char* ptr,
1264                                                        ParseContext* ctx) {
1265   int size = ReadSize(&ptr);
1266   if (!ptr) return nullptr;
1267   return ctx->ReadCord(ptr, size, cord);
1268 }
1269
1270
1271 template <typename T>
1272 PROTOBUF_NODISCARD const char* FieldParser(uint64_t tag, T& field_parser,
1273                                            const char* ptr, ParseContext* ctx) {
1274   uint32_t number = tag >> 3;
1275   GOOGLE_PROTOBUF_PARSER_ASSERT(number != 0);
1276   using WireType = internal::WireFormatLite::WireType;
1277   switch (tag & 7) {
1278     case WireType::WIRETYPE_VARINT: {
1279       uint64_t value;
1280       ptr = VarintParse(ptr, &value);
1281       GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
1282       field_parser.AddVarint(number, value);
1283       break;
1284     }
1285     case WireType::WIRETYPE_FIXED64: {
1286       uint64_t value = UnalignedLoad<uint64_t>(ptr);
1287       ptr += 8;
1288       field_parser.AddFixed64(number, value);
1289       break;
1290     }
1291     case WireType::WIRETYPE_LENGTH_DELIMITED: {
1292       ptr = field_parser.ParseLengthDelimited(number, ptr, ctx);
1293       GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
1294       break;
1295     }
1296     case WireType::WIRETYPE_START_GROUP: {
1297       ptr = field_parser.ParseGroup(number, ptr, ctx);
1298       GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
1299       break;
1300     }
1301     case WireType::WIRETYPE_END_GROUP: {
1302       ABSL_LOG(FATAL) << "Can't happen";
1303       break;
1304     }
1305     case WireType::WIRETYPE_FIXED32: {
1306       uint32_t value = UnalignedLoad<uint32_t>(ptr);
1307       ptr += 4;
1308       field_parser.AddFixed32(number, value);
1309       break;
1310     }
1311     default:
1312       return nullptr;
1313   }
1314   return ptr;
1315 }
1316
1317 template <typename T>
1318 PROTOBUF_NODISCARD const char* WireFormatParser(T& field_parser,
1319                                                 const char* ptr,
1320                                                 ParseContext* ctx) {
1321   while (!ctx->Done(&ptr)) {
1322     uint32_t tag;
1323     ptr = ReadTag(ptr, &tag);
1324     GOOGLE_PROTOBUF_PARSER_ASSERT(ptr != nullptr);
1325     if (tag == 0 || (tag & 7) == 4) {
1326       ctx->SetLastTag(tag);
1327       return ptr;
1328     }
1329     ptr = FieldParser(tag, field_parser, ptr, ctx);
1330     GOOGLE_PROTOBUF_PARSER_ASSERT(ptr != nullptr);
1331   }
1332   return ptr;
1333 }
1334
1335 // The packed parsers parse repeated numeric primitives directly into  the
1336 // corresponding field
1337
1338 // These are packed varints
1339 PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedInt32Parser(
1340     void* object, const char* ptr, ParseContext* ctx);
1341 PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedUInt32Parser(
1342     void* object, const char* ptr, ParseContext* ctx);
1343 PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedInt64Parser(
1344     void* object, const char* ptr, ParseContext* ctx);
1345 PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedUInt64Parser(
1346     void* object, const char* ptr, ParseContext* ctx);
1347 PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedSInt32Parser(
1348     void* object, const char* ptr, ParseContext* ctx);
1349 PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedSInt64Parser(
1350     void* object, const char* ptr, ParseContext* ctx);
1351 PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedEnumParser(
1352     void* object, const char* ptr, ParseContext* ctx);
1353
1354 template <typename T>
1355 PROTOBUF_NODISCARD const char* PackedEnumParser(void* object, const char* ptr,
1356                                                 ParseContext* ctx,
1357                                                 bool (*is_valid)(int),
1358                                                 InternalMetadata* metadata,
1359                                                 int field_num) {
1360   return ctx->ReadPackedVarint(
1361       ptr, [object, is_valid, metadata, field_num](int32_t val) {
1362         if (is_valid(val)) {
1363           static_cast<RepeatedField<int>*>(object)->Add(val);
1364         } else {
1365           WriteVarint(field_num, val, metadata->mutable_unknown_fields<T>());
1366         }
1367       });
1368 }
1369
1370 template <typename T>
1371 PROTOBUF_NODISCARD const char* PackedEnumParserArg(
1372     void* object, const char* ptr, ParseContext* ctx,
1373     bool (*is_valid)(const void*, int), const void* data,
1374     InternalMetadata* metadata, int field_num) {
1375   return ctx->ReadPackedVarint(
1376       ptr, [object, is_valid, data, metadata, field_num](int32_t val) {
1377         if (is_valid(data, val)) {
1378           static_cast<RepeatedField<int>*>(object)->Add(val);
1379         } else {
1380           WriteVarint(field_num, val, metadata->mutable_unknown_fields<T>());
1381         }
1382       });
1383 }
1384
1385 PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedBoolParser(
1386     void* object, const char* ptr, ParseContext* ctx);
1387 PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedFixed32Parser(
1388     void* object, const char* ptr, ParseContext* ctx);
1389 PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedSFixed32Parser(
1390     void* object, const char* ptr, ParseContext* ctx);
1391 PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedFixed64Parser(
1392     void* object, const char* ptr, ParseContext* ctx);
1393 PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedSFixed64Parser(
1394     void* object, const char* ptr, ParseContext* ctx);
1395 PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedFloatParser(
1396     void* object, const char* ptr, ParseContext* ctx);
1397 PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedDoubleParser(
1398     void* object, const char* ptr, ParseContext* ctx);
1399
1400 // This is the only recursive parser.
1401 PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* UnknownGroupLiteParse(
1402     std::string* unknown, const char* ptr, ParseContext* ctx);
1403 // This is a helper to for the UnknownGroupLiteParse but is actually also
1404 // useful in the generated code. It uses overload on std::string* vs
1405 // UnknownFieldSet* to make the generated code isomorphic between full and lite.
1406 PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* UnknownFieldParse(
1407     uint32_t tag, std::string* unknown, const char* ptr, ParseContext* ctx);
1408
1409 }  // namespace internal
1410 }  // namespace protobuf
1411 }  // namespace google
1412
1413 #include "google/protobuf/port_undef.inc"
1414
1415 #endif  // GOOGLE_PROTOBUF_PARSE_CONTEXT_H__