Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 10:13:19

0001 // Protocol Buffers - Google's data interchange format
0002 // Copyright 2023 Google LLC.  All rights reserved.
0003 //
0004 // Use of this source code is governed by a BSD-style
0005 // license that can be found in the LICENSE file or at
0006 // https://developers.google.com/open-source/licenses/bsd
0007 
0008 #ifndef UPB_WIRE_EPS_COPY_INPUT_STREAM_H_
0009 #define UPB_WIRE_EPS_COPY_INPUT_STREAM_H_
0010 
0011 #include <string.h>
0012 
0013 #include "upb/mem/arena.h"
0014 
0015 // Must be last.
0016 #include "upb/port/def.inc"
0017 
0018 #ifdef __cplusplus
0019 extern "C" {
0020 #endif
0021 
0022 // The maximum number of bytes a single protobuf field can take up in the
0023 // wire format.  We only want to do one bounds check per field, so the input
0024 // stream guarantees that after upb_EpsCopyInputStream_IsDone() is called,
0025 // the decoder can read this many bytes without performing another bounds
0026 // check.  The stream will copy into a patch buffer as necessary to guarantee
0027 // this invariant.
0028 #define kUpb_EpsCopyInputStream_SlopBytes 16
0029 
0030 enum {
0031   kUpb_EpsCopyInputStream_NoAliasing = 0,
0032   kUpb_EpsCopyInputStream_OnPatch = 1,
0033   kUpb_EpsCopyInputStream_NoDelta = 2
0034 };
0035 
0036 typedef struct {
0037   const char* end;        // Can read up to SlopBytes bytes beyond this.
0038   const char* limit_ptr;  // For bounds checks, = end + UPB_MIN(limit, 0)
0039   uintptr_t aliasing;
0040   int limit;   // Submessage limit relative to end
0041   bool error;  // To distinguish between EOF and error.
0042   char patch[kUpb_EpsCopyInputStream_SlopBytes * 2];
0043 } upb_EpsCopyInputStream;
0044 
0045 // Returns true if the stream is in the error state. A stream enters the error
0046 // state when the user reads past a limit (caught in IsDone()) or the
0047 // ZeroCopyInputStream returns an error.
0048 UPB_INLINE bool upb_EpsCopyInputStream_IsError(upb_EpsCopyInputStream* e) {
0049   return e->error;
0050 }
0051 
0052 typedef const char* upb_EpsCopyInputStream_BufferFlipCallback(
0053     upb_EpsCopyInputStream* e, const char* old_end, const char* new_start);
0054 
0055 typedef const char* upb_EpsCopyInputStream_IsDoneFallbackFunc(
0056     upb_EpsCopyInputStream* e, const char* ptr, int overrun);
0057 
0058 // Initializes a upb_EpsCopyInputStream using the contents of the buffer
0059 // [*ptr, size].  Updates `*ptr` as necessary to guarantee that at least
0060 // kUpb_EpsCopyInputStream_SlopBytes are available to read.
0061 UPB_INLINE void upb_EpsCopyInputStream_Init(upb_EpsCopyInputStream* e,
0062                                             const char** ptr, size_t size,
0063                                             bool enable_aliasing) {
0064   if (size <= kUpb_EpsCopyInputStream_SlopBytes) {
0065     memset(&e->patch, 0, 32);
0066     if (size) memcpy(&e->patch, *ptr, size);
0067     e->aliasing = enable_aliasing ? (uintptr_t)*ptr - (uintptr_t)e->patch
0068                                   : kUpb_EpsCopyInputStream_NoAliasing;
0069     *ptr = e->patch;
0070     e->end = *ptr + size;
0071     e->limit = 0;
0072   } else {
0073     e->end = *ptr + size - kUpb_EpsCopyInputStream_SlopBytes;
0074     e->limit = kUpb_EpsCopyInputStream_SlopBytes;
0075     e->aliasing = enable_aliasing ? kUpb_EpsCopyInputStream_NoDelta
0076                                   : kUpb_EpsCopyInputStream_NoAliasing;
0077   }
0078   e->limit_ptr = e->end;
0079   e->error = false;
0080 }
0081 
0082 typedef enum {
0083   // The current stream position is at a limit.
0084   kUpb_IsDoneStatus_Done,
0085 
0086   // The current stream position is not at a limit.
0087   kUpb_IsDoneStatus_NotDone,
0088 
0089   // The current stream position is not at a limit, and the stream needs to
0090   // be flipped to a new buffer before more data can be read.
0091   kUpb_IsDoneStatus_NeedFallback,
0092 } upb_IsDoneStatus;
0093 
0094 // Returns the status of the current stream position.  This is a low-level
0095 // function, it is simpler to call upb_EpsCopyInputStream_IsDone() if possible.
0096 UPB_INLINE upb_IsDoneStatus upb_EpsCopyInputStream_IsDoneStatus(
0097     upb_EpsCopyInputStream* e, const char* ptr, int* overrun) {
0098   *overrun = ptr - e->end;
0099   if (UPB_LIKELY(ptr < e->limit_ptr)) {
0100     return kUpb_IsDoneStatus_NotDone;
0101   } else if (UPB_LIKELY(*overrun == e->limit)) {
0102     return kUpb_IsDoneStatus_Done;
0103   } else {
0104     return kUpb_IsDoneStatus_NeedFallback;
0105   }
0106 }
0107 
0108 // Returns true if the stream has hit a limit, either the current delimited
0109 // limit or the overall end-of-stream. As a side effect, this function may flip
0110 // the pointer to a new buffer if there are less than
0111 // kUpb_EpsCopyInputStream_SlopBytes of data to be read in the current buffer.
0112 //
0113 // Postcondition: if the function returns false, there are at least
0114 // kUpb_EpsCopyInputStream_SlopBytes of data available to read at *ptr.
0115 UPB_INLINE bool upb_EpsCopyInputStream_IsDoneWithCallback(
0116     upb_EpsCopyInputStream* e, const char** ptr,
0117     upb_EpsCopyInputStream_IsDoneFallbackFunc* func) {
0118   int overrun;
0119   switch (upb_EpsCopyInputStream_IsDoneStatus(e, *ptr, &overrun)) {
0120     case kUpb_IsDoneStatus_Done:
0121       return true;
0122     case kUpb_IsDoneStatus_NotDone:
0123       return false;
0124     case kUpb_IsDoneStatus_NeedFallback:
0125       *ptr = func(e, *ptr, overrun);
0126       return *ptr == NULL;
0127   }
0128   UPB_UNREACHABLE();
0129 }
0130 
0131 const char* _upb_EpsCopyInputStream_IsDoneFallbackNoCallback(
0132     upb_EpsCopyInputStream* e, const char* ptr, int overrun);
0133 
0134 // A simpler version of IsDoneWithCallback() that does not support a buffer flip
0135 // callback. Useful in cases where we do not need to insert custom logic at
0136 // every buffer flip.
0137 //
0138 // If this returns true, the user must call upb_EpsCopyInputStream_IsError()
0139 // to distinguish between EOF and error.
0140 UPB_INLINE bool upb_EpsCopyInputStream_IsDone(upb_EpsCopyInputStream* e,
0141                                               const char** ptr) {
0142   return upb_EpsCopyInputStream_IsDoneWithCallback(
0143       e, ptr, _upb_EpsCopyInputStream_IsDoneFallbackNoCallback);
0144 }
0145 
0146 // Returns the total number of bytes that are safe to read from the current
0147 // buffer without reading uninitialized or unallocated memory.
0148 //
0149 // Note that this check does not respect any semantic limits on the stream,
0150 // either limits from PushLimit() or the overall stream end, so some of these
0151 // bytes may have unpredictable, nonsense values in them. The guarantee is only
0152 // that the bytes are valid to read from the perspective of the C language
0153 // (ie. you can read without triggering UBSAN or ASAN).
0154 UPB_INLINE size_t upb_EpsCopyInputStream_BytesAvailable(
0155     upb_EpsCopyInputStream* e, const char* ptr) {
0156   return (e->end - ptr) + kUpb_EpsCopyInputStream_SlopBytes;
0157 }
0158 
0159 // Returns true if the given delimited field size is valid (it does not extend
0160 // beyond any previously-pushed limits).  `ptr` should point to the beginning
0161 // of the field data, after the delimited size.
0162 //
0163 // Note that this does *not* guarantee that all of the data for this field is in
0164 // the current buffer.
0165 UPB_INLINE bool upb_EpsCopyInputStream_CheckSize(
0166     const upb_EpsCopyInputStream* e, const char* ptr, int size) {
0167   UPB_ASSERT(size >= 0);
0168   return ptr - e->end + size <= e->limit;
0169 }
0170 
0171 UPB_INLINE bool _upb_EpsCopyInputStream_CheckSizeAvailable(
0172     upb_EpsCopyInputStream* e, const char* ptr, int size, bool submessage) {
0173   // This is one extra branch compared to the more normal:
0174   //   return (size_t)(end - ptr) < size;
0175   // However it is one less computation if we are just about to use "ptr + len":
0176   //   https://godbolt.org/z/35YGPz
0177   // In microbenchmarks this shows a small improvement.
0178   uintptr_t uptr = (uintptr_t)ptr;
0179   uintptr_t uend = (uintptr_t)e->limit_ptr;
0180   uintptr_t res = uptr + (size_t)size;
0181   if (!submessage) uend += kUpb_EpsCopyInputStream_SlopBytes;
0182   // NOTE: this check depends on having a linear address space.  This is not
0183   // technically guaranteed by uintptr_t.
0184   bool ret = res >= uptr && res <= uend;
0185   if (size < 0) UPB_ASSERT(!ret);
0186   return ret;
0187 }
0188 
0189 // Returns true if the given delimited field size is valid (it does not extend
0190 // beyond any previously-pushed limited) *and* all of the data for this field is
0191 // available to be read in the current buffer.
0192 //
0193 // If the size is negative, this function will always return false. This
0194 // property can be useful in some cases.
0195 UPB_INLINE bool upb_EpsCopyInputStream_CheckDataSizeAvailable(
0196     upb_EpsCopyInputStream* e, const char* ptr, int size) {
0197   return _upb_EpsCopyInputStream_CheckSizeAvailable(e, ptr, size, false);
0198 }
0199 
0200 // Returns true if the given sub-message size is valid (it does not extend
0201 // beyond any previously-pushed limited) *and* all of the data for this
0202 // sub-message is available to be parsed in the current buffer.
0203 //
0204 // This implies that all fields from the sub-message can be parsed from the
0205 // current buffer while maintaining the invariant that we always have at least
0206 // kUpb_EpsCopyInputStream_SlopBytes of data available past the beginning of
0207 // any individual field start.
0208 //
0209 // If the size is negative, this function will always return false. This
0210 // property can be useful in some cases.
0211 UPB_INLINE bool upb_EpsCopyInputStream_CheckSubMessageSizeAvailable(
0212     upb_EpsCopyInputStream* e, const char* ptr, int size) {
0213   return _upb_EpsCopyInputStream_CheckSizeAvailable(e, ptr, size, true);
0214 }
0215 
0216 // Returns true if aliasing_enabled=true was passed to
0217 // upb_EpsCopyInputStream_Init() when this stream was initialized.
0218 UPB_INLINE bool upb_EpsCopyInputStream_AliasingEnabled(
0219     upb_EpsCopyInputStream* e) {
0220   return e->aliasing != kUpb_EpsCopyInputStream_NoAliasing;
0221 }
0222 
0223 // Returns true if aliasing_enabled=true was passed to
0224 // upb_EpsCopyInputStream_Init() when this stream was initialized *and* we can
0225 // alias into the region [ptr, size] in an input buffer.
0226 UPB_INLINE bool upb_EpsCopyInputStream_AliasingAvailable(
0227     upb_EpsCopyInputStream* e, const char* ptr, size_t size) {
0228   // When EpsCopyInputStream supports streaming, this will need to become a
0229   // runtime check.
0230   return upb_EpsCopyInputStream_CheckDataSizeAvailable(e, ptr, size) &&
0231          e->aliasing >= kUpb_EpsCopyInputStream_NoDelta;
0232 }
0233 
0234 // Returns a pointer into an input buffer that corresponds to the parsing
0235 // pointer `ptr`.  The returned pointer may be the same as `ptr`, but also may
0236 // be different if we are currently parsing out of the patch buffer.
0237 //
0238 // REQUIRES: Aliasing must be available for the given pointer. If the input is a
0239 // flat buffer and aliasing is enabled, then aliasing will always be available.
0240 UPB_INLINE const char* upb_EpsCopyInputStream_GetAliasedPtr(
0241     upb_EpsCopyInputStream* e, const char* ptr) {
0242   UPB_ASSUME(upb_EpsCopyInputStream_AliasingAvailable(e, ptr, 0));
0243   uintptr_t delta =
0244       e->aliasing == kUpb_EpsCopyInputStream_NoDelta ? 0 : e->aliasing;
0245   return (const char*)((uintptr_t)ptr + delta);
0246 }
0247 
0248 // Reads string data from the input, aliasing into the input buffer instead of
0249 // copying. The parsing pointer is passed in `*ptr`, and will be updated if
0250 // necessary to point to the actual input buffer. Returns the new parsing
0251 // pointer, which will be advanced past the string data.
0252 //
0253 // REQUIRES: Aliasing must be available for this data region (test with
0254 // upb_EpsCopyInputStream_AliasingAvailable().
0255 UPB_INLINE const char* upb_EpsCopyInputStream_ReadStringAliased(
0256     upb_EpsCopyInputStream* e, const char** ptr, size_t size) {
0257   UPB_ASSUME(upb_EpsCopyInputStream_AliasingAvailable(e, *ptr, size));
0258   const char* ret = *ptr + size;
0259   *ptr = upb_EpsCopyInputStream_GetAliasedPtr(e, *ptr);
0260   UPB_ASSUME(ret != NULL);
0261   return ret;
0262 }
0263 
0264 // Skips `size` bytes of data from the input and returns a pointer past the end.
0265 // Returns NULL on end of stream or error.
0266 UPB_INLINE const char* upb_EpsCopyInputStream_Skip(upb_EpsCopyInputStream* e,
0267                                                    const char* ptr, int size) {
0268   if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(e, ptr, size)) return NULL;
0269   return ptr + size;
0270 }
0271 
0272 // Copies `size` bytes of data from the input `ptr` into the buffer `to`, and
0273 // returns a pointer past the end. Returns NULL on end of stream or error.
0274 UPB_INLINE const char* upb_EpsCopyInputStream_Copy(upb_EpsCopyInputStream* e,
0275                                                    const char* ptr, void* to,
0276                                                    int size) {
0277   if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(e, ptr, size)) return NULL;
0278   memcpy(to, ptr, size);
0279   return ptr + size;
0280 }
0281 
0282 // Reads string data from the stream and advances the pointer accordingly.
0283 // If aliasing was enabled when the stream was initialized, then the returned
0284 // pointer will point into the input buffer if possible, otherwise new data
0285 // will be allocated from arena and copied into. We may be forced to copy even
0286 // if aliasing was enabled if the input data spans input buffers.
0287 //
0288 // Returns NULL if memory allocation failed, or we reached a premature EOF.
0289 UPB_INLINE const char* upb_EpsCopyInputStream_ReadString(
0290     upb_EpsCopyInputStream* e, const char** ptr, size_t size,
0291     upb_Arena* arena) {
0292   if (upb_EpsCopyInputStream_AliasingAvailable(e, *ptr, size)) {
0293     return upb_EpsCopyInputStream_ReadStringAliased(e, ptr, size);
0294   } else {
0295     // We need to allocate and copy.
0296     if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(e, *ptr, size)) {
0297       return NULL;
0298     }
0299     UPB_ASSERT(arena);
0300     char* data = (char*)upb_Arena_Malloc(arena, size);
0301     if (!data) return NULL;
0302     const char* ret = upb_EpsCopyInputStream_Copy(e, *ptr, data, size);
0303     *ptr = data;
0304     return ret;
0305   }
0306 }
0307 
0308 UPB_INLINE void _upb_EpsCopyInputStream_CheckLimit(upb_EpsCopyInputStream* e) {
0309   UPB_ASSERT(e->limit_ptr == e->end + UPB_MIN(0, e->limit));
0310 }
0311 
0312 // Pushes a limit onto the stack of limits for the current stream.  The limit
0313 // will extend for `size` bytes beyond the position in `ptr`.  Future calls to
0314 // upb_EpsCopyInputStream_IsDone() will return `true` when the stream position
0315 // reaches this limit.
0316 //
0317 // Returns a delta that the caller must store and supply to PopLimit() below.
0318 UPB_INLINE int upb_EpsCopyInputStream_PushLimit(upb_EpsCopyInputStream* e,
0319                                                 const char* ptr, int size) {
0320   int limit = size + (int)(ptr - e->end);
0321   int delta = e->limit - limit;
0322   _upb_EpsCopyInputStream_CheckLimit(e);
0323   UPB_ASSERT(limit <= e->limit);
0324   e->limit = limit;
0325   e->limit_ptr = e->end + UPB_MIN(0, limit);
0326   _upb_EpsCopyInputStream_CheckLimit(e);
0327   return delta;
0328 }
0329 
0330 // Pops the last limit that was pushed on this stream.  This may only be called
0331 // once IsDone() returns true.  The user must pass the delta that was returned
0332 // from PushLimit().
0333 UPB_INLINE void upb_EpsCopyInputStream_PopLimit(upb_EpsCopyInputStream* e,
0334                                                 const char* ptr,
0335                                                 int saved_delta) {
0336   UPB_ASSERT(ptr - e->end == e->limit);
0337   _upb_EpsCopyInputStream_CheckLimit(e);
0338   e->limit += saved_delta;
0339   e->limit_ptr = e->end + UPB_MIN(0, e->limit);
0340   _upb_EpsCopyInputStream_CheckLimit(e);
0341 }
0342 
0343 UPB_INLINE const char* _upb_EpsCopyInputStream_IsDoneFallbackInline(
0344     upb_EpsCopyInputStream* e, const char* ptr, int overrun,
0345     upb_EpsCopyInputStream_BufferFlipCallback* callback) {
0346   if (overrun < e->limit) {
0347     // Need to copy remaining data into patch buffer.
0348     UPB_ASSERT(overrun < kUpb_EpsCopyInputStream_SlopBytes);
0349     const char* old_end = ptr;
0350     const char* new_start = &e->patch[0] + overrun;
0351     memset(e->patch + kUpb_EpsCopyInputStream_SlopBytes, 0,
0352            kUpb_EpsCopyInputStream_SlopBytes);
0353     memcpy(e->patch, e->end, kUpb_EpsCopyInputStream_SlopBytes);
0354     ptr = new_start;
0355     e->end = &e->patch[kUpb_EpsCopyInputStream_SlopBytes];
0356     e->limit -= kUpb_EpsCopyInputStream_SlopBytes;
0357     e->limit_ptr = e->end + e->limit;
0358     UPB_ASSERT(ptr < e->limit_ptr);
0359     if (e->aliasing != kUpb_EpsCopyInputStream_NoAliasing) {
0360       e->aliasing = (uintptr_t)old_end - (uintptr_t)new_start;
0361     }
0362     return callback(e, old_end, new_start);
0363   } else {
0364     UPB_ASSERT(overrun > e->limit);
0365     e->error = true;
0366     return callback(e, NULL, NULL);
0367   }
0368 }
0369 
0370 typedef const char* upb_EpsCopyInputStream_ParseDelimitedFunc(
0371     upb_EpsCopyInputStream* e, const char* ptr, void* ctx);
0372 
0373 // Tries to perform a fast-path handling of the given delimited message data.
0374 // If the sub-message beginning at `*ptr` and extending for `len` is short and
0375 // fits within this buffer, calls `func` with `ctx` as a parameter, where the
0376 // pushing and popping of limits is handled automatically and with lower cost
0377 // than the normal PushLimit()/PopLimit() sequence.
0378 UPB_FORCEINLINE bool upb_EpsCopyInputStream_TryParseDelimitedFast(
0379     upb_EpsCopyInputStream* e, const char** ptr, int len,
0380     upb_EpsCopyInputStream_ParseDelimitedFunc* func, void* ctx) {
0381   if (!upb_EpsCopyInputStream_CheckSubMessageSizeAvailable(e, *ptr, len)) {
0382     return false;
0383   }
0384 
0385   // Fast case: Sub-message is <128 bytes and fits in the current buffer.
0386   // This means we can preserve limit/limit_ptr verbatim.
0387   const char* saved_limit_ptr = e->limit_ptr;
0388   int saved_limit = e->limit;
0389   e->limit_ptr = *ptr + len;
0390   e->limit = e->limit_ptr - e->end;
0391   UPB_ASSERT(e->limit_ptr == e->end + UPB_MIN(0, e->limit));
0392   *ptr = func(e, *ptr, ctx);
0393   e->limit_ptr = saved_limit_ptr;
0394   e->limit = saved_limit;
0395   UPB_ASSERT(e->limit_ptr == e->end + UPB_MIN(0, e->limit));
0396   return true;
0397 }
0398 
0399 #ifdef __cplusplus
0400 } /* extern "C" */
0401 #endif
0402 
0403 #include "upb/port/undef.inc"
0404 
0405 #endif  // UPB_WIRE_EPS_COPY_INPUT_STREAM_H_