Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-06-24 08:36:39

0001 // © 2016 and later: Unicode, Inc. and others.
0002 // License & terms of use: http://www.unicode.org/copyright.html
0003 // Copyright (C) 2009-2013, International Business Machines
0004 // Corporation and others. All Rights Reserved.
0005 //
0006 // Copyright 2001 and onwards Google Inc.
0007 // Author: Sanjay Ghemawat
0008 
0009 // This code is a contribution of Google code, and the style used here is
0010 // a compromise between the original Google code and the ICU coding guidelines.
0011 // For example, data types are ICU-ified (size_t,int->int32_t),
0012 // and API comments doxygen-ified, but function names and behavior are
0013 // as in the original, if possible.
0014 // Assertion-style error handling, not available in ICU, was changed to
0015 // parameter "pinning" similar to UnicodeString.
0016 //
0017 // In addition, this is only a partial port of the original Google code,
0018 // limited to what was needed so far. The (nearly) complete original code
0019 // is in the ICU svn repository at icuhtml/trunk/design/strings/contrib
0020 // (see ICU ticket 6765, r25517).
0021 
0022 #ifndef __STRINGPIECE_H__
0023 #define __STRINGPIECE_H__
0024 
0025 /**
0026  * \file 
0027  * \brief C++ API: StringPiece: Read-only byte string wrapper class.
0028  */
0029 
0030 #include "unicode/utypes.h"
0031 
0032 #if U_SHOW_CPLUSPLUS_API
0033 
0034 #include <cstddef>
0035 #include <string_view>
0036 #include <type_traits>
0037 
0038 #include "unicode/uobject.h"
0039 #include "unicode/std_string.h"
0040 
0041 // Arghh!  I wish C++ literals were "string".
0042 
0043 U_NAMESPACE_BEGIN
0044 
0045 /**
0046  * A string-like object that points to a sized piece of memory.
0047  *
0048  * We provide non-explicit singleton constructors so users can pass
0049  * in a "const char*" or a "string" wherever a "StringPiece" is
0050  * expected.
0051  *
0052  * Functions or methods may use StringPiece parameters to accept either a
0053  * "const char*" or a "string" value that will be implicitly converted to a
0054  * StringPiece.
0055  *
0056  * Systematic usage of StringPiece is encouraged as it will reduce unnecessary
0057  * conversions from "const char*" to "string" and back again.
0058  *
0059  * @stable ICU 4.2
0060  */
0061 class U_COMMON_API StringPiece : public UMemory {
0062  private:
0063   const char*   ptr_;
0064   int32_t       length_;
0065 
0066  public:
0067   /**
0068    * Default constructor, creates an empty StringPiece.
0069    * @stable ICU 4.2
0070    */
0071   StringPiece() : ptr_(nullptr), length_(0) { }
0072 
0073   /**
0074    * Constructs from a NUL-terminated const char * pointer.
0075    * @param str a NUL-terminated const char * pointer
0076    * @stable ICU 4.2
0077    */
0078   StringPiece(const char* str);
0079 #if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
0080   /**
0081    * Constructs from a NUL-terminated const char8_t * pointer.
0082    * @param str a NUL-terminated const char8_t * pointer
0083    * @stable ICU 67
0084    */
0085   StringPiece(const char8_t* str) : StringPiece(reinterpret_cast<const char*>(str)) {}
0086 #endif
0087   /**
0088    * Constructs an empty StringPiece.
0089    * Needed for type disambiguation from multiple other overloads.
0090    * @param p nullptr
0091    * @stable ICU 67
0092    */
0093   StringPiece(std::nullptr_t p) : ptr_(p), length_(0) {}
0094 
0095   /**
0096    * Constructs from a std::string.
0097    * @stable ICU 4.2
0098    */
0099   StringPiece(const std::string& str)
0100     : ptr_(str.data()), length_(static_cast<int32_t>(str.size())) { }
0101 #if defined(__cpp_lib_char8_t) || defined(U_IN_DOXYGEN)
0102   /**
0103    * Constructs from a std::u8string.
0104    * @stable ICU 67
0105    */
0106   StringPiece(const std::u8string& str)
0107     : ptr_(reinterpret_cast<const char*>(str.data())),
0108       length_(static_cast<int32_t>(str.size())) { }
0109 #endif
0110 
0111   /**
0112    * Constructs from some other implementation of a string piece class, from any
0113    * C++ record type that has these two methods:
0114    *
0115    * \code{.cpp}
0116    *
0117    *   struct OtherStringPieceClass {
0118    *     const char* data();  // or const char8_t*
0119    *     size_t size();
0120    *   };
0121    *
0122    * \endcode
0123    *
0124    * The other string piece class will typically be std::string_view from C++17
0125    * or absl::string_view from Abseil.
0126    *
0127    * Starting with C++20, data() may also return a const char8_t* pointer,
0128    * as from std::u8string_view.
0129    *
0130    * @param str the other string piece
0131    * @stable ICU 65
0132    */
0133   template <typename T,
0134             typename = std::enable_if_t<
0135                 (std::is_same_v<decltype(T().data()), const char*>
0136 #if defined(__cpp_char8_t)
0137                     || std::is_same_v<decltype(T().data()), const char8_t*>
0138 #endif
0139                 ) &&
0140                 std::is_same_v<decltype(T().size()), size_t>>>
0141   StringPiece(T str)
0142       : ptr_(reinterpret_cast<const char*>(str.data())),
0143         length_(static_cast<int32_t>(str.size())) {}
0144 
0145   /**
0146    * Constructs from a const char * pointer and a specified length.
0147    * @param offset a const char * pointer (need not be terminated)
0148    * @param len the length of the string; must be non-negative
0149    * @stable ICU 4.2
0150    */
0151   StringPiece(const char* offset, int32_t len) : ptr_(offset), length_(len) { }
0152 #if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
0153   /**
0154    * Constructs from a const char8_t * pointer and a specified length.
0155    * @param str a const char8_t * pointer (need not be terminated)
0156    * @param len the length of the string; must be non-negative
0157    * @stable ICU 67
0158    */
0159   StringPiece(const char8_t* str, int32_t len) :
0160       StringPiece(reinterpret_cast<const char*>(str), len) {}
0161 #endif
0162 
0163   /**
0164    * Substring of another StringPiece.
0165    * @param x the other StringPiece
0166    * @param pos start position in x; must be non-negative and <= x.length().
0167    * @stable ICU 4.2
0168    */
0169   StringPiece(const StringPiece& x, int32_t pos);
0170   /**
0171    * Substring of another StringPiece.
0172    * @param x the other StringPiece
0173    * @param pos start position in x; must be non-negative and <= x.length().
0174    * @param len length of the substring;
0175    *            must be non-negative and will be pinned to at most x.length() - pos.
0176    * @stable ICU 4.2
0177    */
0178   StringPiece(const StringPiece& x, int32_t pos, int32_t len);
0179 
0180 #ifndef U_HIDE_INTERNAL_API
0181   /**
0182    * Converts to a std::string_view().
0183    * @internal
0184    */
0185   inline operator std::string_view() const {
0186     return {data(), static_cast<std::string_view::size_type>(size())};
0187   }
0188 #endif  // U_HIDE_INTERNAL_API
0189 
0190   /**
0191    * Returns the string pointer. May be nullptr if it is empty.
0192    *
0193    * data() may return a pointer to a buffer with embedded NULs, and the
0194    * returned buffer may or may not be null terminated.  Therefore it is
0195    * typically a mistake to pass data() to a routine that expects a NUL
0196    * terminated string.
0197    * @return the string pointer
0198    * @stable ICU 4.2
0199    */
0200   const char* data() const { return ptr_; }
0201   /**
0202    * Returns the string length. Same as length().
0203    * @return the string length
0204    * @stable ICU 4.2
0205    */
0206   int32_t size() const { return length_; }
0207   /**
0208    * Returns the string length. Same as size().
0209    * @return the string length
0210    * @stable ICU 4.2
0211    */
0212   int32_t length() const { return length_; }
0213   /**
0214    * Returns whether the string is empty.
0215    * @return true if the string is empty
0216    * @stable ICU 4.2
0217    */
0218   UBool empty() const { return length_ == 0; }
0219 
0220   /**
0221    * Sets to an empty string.
0222    * @stable ICU 4.2
0223    */
0224   void clear() { ptr_ = nullptr; length_ = 0; }
0225 
0226   /**
0227    * Reset the stringpiece to refer to new data.
0228    * @param xdata pointer the new string data.  Need not be nul terminated.
0229    * @param len the length of the new data
0230    * @stable ICU 4.8
0231    */
0232   void set(const char* xdata, int32_t len) { ptr_ = xdata; length_ = len; }
0233 
0234   /**
0235    * Reset the stringpiece to refer to new data.
0236    * @param str a pointer to a NUL-terminated string. 
0237    * @stable ICU 4.8
0238    */
0239   void set(const char* str);
0240 
0241 #if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
0242   /**
0243    * Resets the stringpiece to refer to new data.
0244    * @param xdata pointer the new string data. Need not be NUL-terminated.
0245    * @param len the length of the new data
0246    * @stable ICU 67
0247    */
0248   inline void set(const char8_t* xdata, int32_t len) {
0249       set(reinterpret_cast<const char*>(xdata), len);
0250   }
0251 
0252   /**
0253    * Resets the stringpiece to refer to new data.
0254    * @param str a pointer to a NUL-terminated string.
0255    * @stable ICU 67
0256    */
0257   inline void set(const char8_t* str) {
0258       set(reinterpret_cast<const char*>(str));
0259   }
0260 #endif
0261 
0262   /**
0263    * Removes the first n string units.
0264    * @param n prefix length, must be non-negative and <=length()
0265    * @stable ICU 4.2
0266    */
0267   void remove_prefix(int32_t n) {
0268     if (n >= 0) {
0269       if (n > length_) {
0270         n = length_;
0271       }
0272       ptr_ += n;
0273       length_ -= n;
0274     }
0275   }
0276 
0277   /**
0278    * Removes the last n string units.
0279    * @param n suffix length, must be non-negative and <=length()
0280    * @stable ICU 4.2
0281    */
0282   void remove_suffix(int32_t n) {
0283     if (n >= 0) {
0284       if (n <= length_) {
0285         length_ -= n;
0286       } else {
0287         length_ = 0;
0288       }
0289     }
0290   }
0291 
0292   /**
0293    * Searches the StringPiece for the given search string (needle);
0294    * @param needle The string for which to search.
0295    * @param offset Where to start searching within this string (haystack).
0296    * @return The offset of needle in haystack, or -1 if not found.
0297    * @stable ICU 67
0298    */
0299   int32_t find(StringPiece needle, int32_t offset);
0300 
0301   /**
0302    * Compares this StringPiece with the other StringPiece, with semantics
0303    * similar to std::string::compare().
0304    * @param other The string to compare to.
0305    * @return below zero if this < other; above zero if this > other; 0 if this == other.
0306    * @stable ICU 67
0307    */
0308   int32_t compare(StringPiece other);
0309 
0310   /**
0311    * Maximum integer, used as a default value for substring methods.
0312    * @stable ICU 4.2
0313    */
0314   static const int32_t npos; // = 0x7fffffff;
0315 
0316   /**
0317    * Returns a substring of this StringPiece.
0318    * @param pos start position; must be non-negative and <= length().
0319    * @param len length of the substring;
0320    *            must be non-negative and will be pinned to at most length() - pos.
0321    * @return the substring StringPiece
0322    * @stable ICU 4.2
0323    */
0324   StringPiece substr(int32_t pos, int32_t len = npos) const {
0325     return StringPiece(*this, pos, len);
0326   }
0327 };
0328 
0329 /**
0330  * Global operator == for StringPiece
0331  * @param x The first StringPiece to compare.
0332  * @param y The second StringPiece to compare.
0333  * @return true if the string data is equal
0334  * @stable ICU 4.8
0335  */
0336 U_EXPORT UBool U_EXPORT2 
0337 operator==(const StringPiece& x, const StringPiece& y);
0338 
0339 /**
0340  * Global operator != for StringPiece
0341  * @param x The first StringPiece to compare.
0342  * @param y The second StringPiece to compare.
0343  * @return true if the string data is not equal
0344  * @stable ICU 4.8
0345  */
0346 inline bool operator!=(const StringPiece& x, const StringPiece& y) {
0347   return !(x == y);
0348 }
0349 
0350 U_NAMESPACE_END
0351 
0352 #endif /* U_SHOW_CPLUSPLUS_API */
0353 
0354 #endif  // __STRINGPIECE_H__