Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-12-16 10:17:53

0001 // Protocol Buffers - Google's data interchange format
0002 // Copyright 2008 Google Inc.  All rights reserved.
0003 //
0004 // Use of this source code is governed by a BSD-style
0005 // license that can be found in the LICENSE file or at
0006 // https://developers.google.com/open-source/licenses/bsd
0007 
0008 #ifndef GOOGLE_PROTOBUF_JSON_INTERNAL_ZERO_COPY_BUFFERED_STREAM_H__
0009 #define GOOGLE_PROTOBUF_JSON_INTERNAL_ZERO_COPY_BUFFERED_STREAM_H__
0010 
0011 #include <algorithm>
0012 #include <cstdint>
0013 #include <iostream>
0014 #include <string>
0015 #include <utility>
0016 #include <vector>
0017 
0018 #include "absl/log/absl_check.h"
0019 #include "absl/log/absl_log.h"
0020 #include "absl/status/status.h"
0021 #include "absl/status/statusor.h"
0022 #include "absl/strings/str_format.h"
0023 #include "absl/strings/string_view.h"
0024 #include "google/protobuf/io/zero_copy_stream.h"
0025 #include "google/protobuf/stubs/status_macros.h"
0026 
0027 // Must be included last.
0028 #include "google/protobuf/port_def.inc"
0029 
0030 // Utilities for parsing contiguous buffers out of ZeroCopyInputStreams.
0031 
0032 namespace google {
0033 namespace protobuf {
0034 namespace json_internal {
0035 // Forward decl. for use by helper types below.
0036 class ZeroCopyBufferedStream;
0037 
0038 // An RAII type that represents holding a reference into the backing buffer
0039 // of a ZeroCopyBufferedStream. This allows for automatic management of the
0040 // backing buffer.
0041 class BufferingGuard {
0042  public:
0043   explicit BufferingGuard(ZeroCopyBufferedStream* owner = nullptr);
0044   ~BufferingGuard();
0045 
0046   BufferingGuard(const BufferingGuard& other) : BufferingGuard(other.owner_) {}
0047   BufferingGuard& operator=(const BufferingGuard& other) {
0048     this->~BufferingGuard();
0049     new (this) BufferingGuard(other);
0050     return *this;
0051   }
0052 
0053  private:
0054   friend class Mark;
0055   ZeroCopyBufferedStream* owner_ = nullptr;
0056 };
0057 
0058 // A string that may own its contents, or live inside of a buffer owned by
0059 // a ZeroCopyBufferedStream.
0060 //
0061 // Note that this type holds onto a reference to the owning
0062 // ZeroCopyBufferedStream; this allows it to be durable against strings being
0063 // moved around for buffering puroses.
0064 class MaybeOwnedString {
0065  public:
0066   explicit MaybeOwnedString(std::string value) : data_(std::move(value)) {}
0067   MaybeOwnedString(ZeroCopyBufferedStream* stream, size_t start, size_t len,
0068                    BufferingGuard token)
0069       : data_(StreamOwned{stream, start, len}), token_(token) {}
0070 
0071   // Returns the string as a view, regardless of whether it is owned or not.
0072   absl::string_view AsView() const {
0073     if (auto* unowned = absl::get_if<StreamOwned>(&data_)) {
0074       return unowned->AsView();
0075     }
0076 
0077     return absl::get<std::string>(data_);
0078   }
0079 
0080   operator absl::string_view() const { return AsView(); }  // NOLINT
0081 
0082   // Returns a reference to an owned string; if the wrapped string is not
0083   // owned, this function will perform a copy and make it owned.
0084   std::string& ToString() {
0085     if (auto* unowned = absl::get_if<StreamOwned>(&data_)) {
0086       data_ = std::string(unowned->AsView());
0087       token_ = BufferingGuard{};
0088     }
0089 
0090     return absl::get<std::string>(data_);
0091   }
0092 
0093   template <typename String>
0094   friend bool operator==(const MaybeOwnedString& lhs, const String& rhs) {
0095     return lhs.AsView() == rhs;
0096   }
0097   template <typename String>
0098   friend bool operator!=(const MaybeOwnedString& lhs, const String& rhs) {
0099     return !(lhs == rhs);
0100   }
0101 
0102  private:
0103   struct StreamOwned {
0104     ZeroCopyBufferedStream* stream;
0105     size_t start, len;
0106     absl::string_view AsView() const;
0107   };
0108   absl::variant<std::string, StreamOwned> data_;
0109   BufferingGuard token_;
0110 };
0111 
0112 // A mark in a stream. See ZeroCopyBufferedStream::Mark().
0113 class Mark {
0114  public:
0115   // Returns a maybe-owned string up to the unread bytes boundary, except for
0116   // the last `clip` bytes.
0117   MaybeOwnedString UpToUnread(size_t clip = 0) const;
0118 
0119   // Discards this mark and its hold on the buffer.
0120   void Discard() && { guard_ = BufferingGuard(); }
0121 
0122  private:
0123   friend ZeroCopyBufferedStream;
0124   Mark(size_t offset, BufferingGuard guard) : offset_(offset), guard_(guard) {}
0125 
0126   size_t offset_;
0127   BufferingGuard guard_;
0128 };
0129 
0130 // A wrapper over a ZeroCopyInputStream that allows doing as-needed buffer for
0131 // obtaining contiguous chunks larger than those the underlying stream might
0132 // provide, while minimizing the amount of actual copying.
0133 class ZeroCopyBufferedStream {
0134  public:
0135   explicit ZeroCopyBufferedStream(io::ZeroCopyInputStream* stream)
0136       : stream_(stream) {}
0137 
0138   // Returns whether the stream is currently at eof.
0139   //
0140   // This function will buffer at least one character to verify whether it
0141   // actually *is* at EOF.
0142   bool AtEof() {
0143     (void)BufferAtLeast(1);
0144     return eof_;
0145   }
0146 
0147   // Takes exactly n characters from a string.
0148   absl::StatusOr<MaybeOwnedString> Take(size_t len) {
0149     auto buffering = BufferAtLeast(len);
0150     RETURN_IF_ERROR(buffering.status());
0151 
0152     size_t start = cursor_;
0153     RETURN_IF_ERROR(Advance(len));
0154     return MaybeOwnedString(this, start, len, *buffering);
0155   }
0156 
0157   // Takes characters to form a string, according to the given predicate. Stops
0158   // early if an EOF is hit.
0159   //
0160   // The predicate must have type `(int, char) -> bool`; the first argument
0161   // is the index of the character.
0162   template <typename Pred>
0163   absl::StatusOr<MaybeOwnedString> TakeWhile(Pred p);
0164 
0165   // Places a mark in the stream, ensuring that all characters consumed after
0166   // the mark are buffered. This can be used to parse some characters and then
0167   // recover everything that follows as a contiguous string_view so that it may
0168   // be processed a second time.
0169   //
0170   // The returned value is an RAII type that ensure the buffer sticks around
0171   // long enough.
0172   Mark BeginMark() { return Mark(cursor_, BufferingGuard(this)); }
0173 
0174   // Peeks the next character in the stream.
0175   //
0176   // This function will not enable buffering on its own, and will read past the
0177   // end of the buffer if at EOF; BufferAtLeast() should be called before
0178   // calling this function.
0179   char PeekChar() {
0180     ABSL_DCHECK(!Unread().empty());
0181     return Unread()[0];
0182   }
0183 
0184   // Advances the cursor by the given number of bytes.
0185   absl::Status Advance(size_t bytes);
0186 
0187   // Returns a view of the current buffer, which may be either the owned
0188   // `buf_` or the stream-owned `last_chunk_`.
0189   //
0190   // The returned view is unstable: calling any function may invalidate it,
0191   // because there will not be a `BufferingGuard` to guard it.
0192   absl::string_view RawBuffer(size_t start,
0193                               size_t len = absl::string_view::npos) const;
0194 
0195   // Returns a view of RawBuffer, unread bytes; this will not be the entirety
0196   // of the underlying stream.
0197   absl::string_view Unread() const { return RawBuffer(cursor_); }
0198 
0199   bool IsBuffering() const { return using_buf_; }
0200 
0201   // Buffers at least `bytes` bytes ahead of the current cursor position,
0202   // possibly enabling buffering.
0203   //
0204   // Returns an error if that many bytes could not be RawBuffer.
0205   absl::StatusOr<BufferingGuard> BufferAtLeast(size_t bytes);
0206 
0207  private:
0208   friend BufferingGuard;
0209   friend Mark;
0210   friend MaybeOwnedString;
0211 
0212   // Increments the buffering refcount; this will also update `buffer_start_` if
0213   // necessary.
0214   void UpRefBuffer() {
0215     if (outstanding_buffer_borrows_++ == 0) {
0216       buffer_start_ = cursor_;
0217     }
0218   }
0219 
0220   // Decrements the buffering refcount; calling this function if the refcount is
0221   // zero is undefined behavior.
0222   //
0223   // This function should not be called directly; it is called automatically
0224   // by the destructor of `BufferingGuard`.
0225   void DownRefBuffer();
0226 
0227   // Obtains a new chunk from the underlying stream; returns whether there is
0228   // still more data to read.
0229   bool ReadChunk();
0230 
0231   // The streamer implements a buffering stream on top of the given stream, by
0232   // the following mechanism:
0233   // - `cursor_` is an offset into either `last_chunk_` or `buf_`, which can
0234   //   be obtained via RawBuffer() and Unread():
0235   //   - If `using_buf_` is true, it is an offset into `buf_`.
0236   //   - Otherwise it is an offset into `last_chunk_`.
0237   // - If `outstanding_buffer_borrows_ > 0`, someone needs the buffer to stick
0238   //   around. MaybeUnownedString::StreamOwned is implemented such that it does
0239   //   not hold onto `last_chunk_` directly, so we can freely copy it into
0240   //   `buf_` as needed arises.
0241   //   - Note that we can copy only part if we update `buffer_start_`; see
0242   //     RawBuffer().
0243   // - If we would read more data and `outstanding_buffer_borrows_ > 0`, instead
0244   //   of trashing `last_chunk_`, we copy it into `buf_` and append to `buf_`
0245   //   each time we read.
0246   // - If `outstanding_buffer_borrows_ == 0`, we can trash `buf_` and go back to
0247   //   using `last_chunk_` directly. See `DownRefBuffer()`.
0248   io::ZeroCopyInputStream* stream_;
0249   absl::string_view last_chunk_;
0250   std::vector<char> buf_;
0251   bool using_buf_ = false;
0252   size_t cursor_ = 0;
0253   // Invariant: this always refers to the earliest point at which we requested
0254   // buffering, since the last time outstanding_buffer_borrows_ was zero.
0255   size_t buffer_start_ = 0;
0256   bool eof_ = false;
0257   int outstanding_buffer_borrows_ = 0;
0258 };
0259 
0260 // These functions all rely on the definition of ZeroCopyBufferedStream, so must
0261 // come after it.
0262 inline BufferingGuard::BufferingGuard(ZeroCopyBufferedStream* owner)
0263     : owner_(owner) {
0264   if (owner_ != nullptr) {
0265     owner_->UpRefBuffer();
0266   }
0267 }
0268 
0269 inline BufferingGuard::~BufferingGuard() {
0270   if (owner_ != nullptr) {
0271     owner_->DownRefBuffer();
0272     owner_ = nullptr;
0273   }
0274 }
0275 
0276 inline absl::string_view MaybeOwnedString::StreamOwned::AsView() const {
0277   return stream->RawBuffer(start, len);
0278 }
0279 
0280 inline MaybeOwnedString Mark::UpToUnread(size_t clip) const {
0281   return MaybeOwnedString(guard_.owner_, offset_,
0282                           guard_.owner_->cursor_ - offset_ - clip, guard_);
0283 }
0284 
0285 template <typename Pred>
0286 absl::StatusOr<MaybeOwnedString> ZeroCopyBufferedStream::TakeWhile(Pred p) {
0287   size_t start = cursor_;
0288   BufferingGuard guard(this);
0289   while (true) {
0290     if (!BufferAtLeast(1).ok()) {
0291       // We treat EOF as ending the take, rather than being an error.
0292       break;
0293     }
0294     if (!p(cursor_ - start, PeekChar())) {
0295       break;
0296     }
0297     RETURN_IF_ERROR(Advance(1));
0298   }
0299 
0300   return MaybeOwnedString(this, start, cursor_ - start, guard);
0301 }
0302 
0303 inline absl::string_view ZeroCopyBufferedStream::RawBuffer(size_t start,
0304                                                            size_t len) const {
0305   absl::string_view view = last_chunk_;
0306   if (using_buf_) {
0307     ABSL_DCHECK_LE(buffer_start_, start);
0308     start -= buffer_start_;
0309     view = absl::string_view(buf_.data(), buf_.size());
0310   }
0311 #if 0
0312     // This print statement is especially useful for trouble-shooting low-level
0313     // bugs in the buffering logic.
0314     ABSL_LOG(INFO) << absl::StreamFormat("%s(\"%s\")[%d:%d]/%d:%d @ %p",
0315                                     using_buf_ ? "buf_" : "last_chunk_",
0316                                     view, start, static_cast<int>(len),
0317                                     buffer_start_, cursor_, this);
0318 #endif
0319   ABSL_DCHECK_LE(start, view.size());
0320   if (len == absl::string_view::npos) {
0321     return view.substr(start);
0322   }
0323 
0324   ABSL_DCHECK_LE(start + len, view.size());
0325   return view.substr(start, len);
0326 }
0327 }  // namespace json_internal
0328 }  // namespace protobuf
0329 }  // namespace google
0330 
0331 #include "google/protobuf/port_undef.inc"
0332 #endif  // GOOGLE_PROTOBUF_JSON_INTERNAL_ZERO_COPY_BUFFERED_STREAM_H__