Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-12-15 09:41:54

0001 //
0002 // Copyright 2017 The Abseil Authors.
0003 //
0004 // Licensed under the Apache License, Version 2.0 (the "License");
0005 // you may not use this file except in compliance with the License.
0006 // You may obtain a copy of the License at
0007 //
0008 //      https://www.apache.org/licenses/LICENSE-2.0
0009 //
0010 // Unless required by applicable law or agreed to in writing, software
0011 // distributed under the License is distributed on an "AS IS" BASIS,
0012 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0013 // See the License for the specific language governing permissions and
0014 // limitations under the License.
0015 //
0016 // -----------------------------------------------------------------------------
0017 // File: str_split.h
0018 // -----------------------------------------------------------------------------
0019 //
0020 // This file contains functions for splitting strings. It defines the main
0021 // `StrSplit()` function, several delimiters for determining the boundaries on
0022 // which to split the string, and predicates for filtering delimited results.
0023 // `StrSplit()` adapts the returned collection to the type specified by the
0024 // caller.
0025 //
0026 // Example:
0027 //
0028 //   // Splits the given string on commas. Returns the results in a
0029 //   // vector of strings.
0030 //   std::vector<std::string> v = absl::StrSplit("a,b,c", ',');
0031 //   // Can also use ","
0032 //   // v[0] == "a", v[1] == "b", v[2] == "c"
0033 //
0034 // See StrSplit() below for more information.
0035 #ifndef ABSL_STRINGS_STR_SPLIT_H_
0036 #define ABSL_STRINGS_STR_SPLIT_H_
0037 
0038 #include <algorithm>
0039 #include <cstddef>
0040 #include <map>
0041 #include <set>
0042 #include <string>
0043 #include <utility>
0044 #include <vector>
0045 
0046 #include "absl/base/internal/raw_logging.h"
0047 #include "absl/base/macros.h"
0048 #include "absl/strings/internal/str_split_internal.h"
0049 #include "absl/strings/string_view.h"
0050 #include "absl/strings/strip.h"
0051 
0052 namespace absl {
0053 ABSL_NAMESPACE_BEGIN
0054 
0055 //------------------------------------------------------------------------------
0056 // Delimiters
0057 //------------------------------------------------------------------------------
0058 //
0059 // `StrSplit()` uses delimiters to define the boundaries between elements in the
0060 // provided input. Several `Delimiter` types are defined below. If a string
0061 // (`const char*`, `std::string`, or `absl::string_view`) is passed in place of
0062 // an explicit `Delimiter` object, `StrSplit()` treats it the same way as if it
0063 // were passed a `ByString` delimiter.
0064 //
0065 // A `Delimiter` is an object with a `Find()` function that knows how to find
0066 // the first occurrence of itself in a given `absl::string_view`.
0067 //
0068 // The following `Delimiter` types are available for use within `StrSplit()`:
0069 //
0070 //   - `ByString` (default for string arguments)
0071 //   - `ByChar` (default for a char argument)
0072 //   - `ByAnyChar`
0073 //   - `ByLength`
0074 //   - `MaxSplits`
0075 //
0076 // A Delimiter's `Find()` member function will be passed an input `text` that is
0077 // to be split and a position (`pos`) to begin searching for the next delimiter
0078 // in `text`. The returned absl::string_view should refer to the next occurrence
0079 // (after `pos`) of the represented delimiter; this returned absl::string_view
0080 // represents the next location where the input `text` should be broken.
0081 //
0082 // The returned absl::string_view may be zero-length if the Delimiter does not
0083 // represent a part of the string (e.g., a fixed-length delimiter). If no
0084 // delimiter is found in the input `text`, a zero-length absl::string_view
0085 // referring to `text.end()` should be returned (e.g.,
0086 // `text.substr(text.size())`). It is important that the returned
0087 // absl::string_view always be within the bounds of the input `text` given as an
0088 // argument--it must not refer to a string that is physically located outside of
0089 // the given string.
0090 //
0091 // The following example is a simple Delimiter object that is created with a
0092 // single char and will look for that char in the text passed to the `Find()`
0093 // function:
0094 //
0095 //   struct SimpleDelimiter {
0096 //     const char c_;
0097 //     explicit SimpleDelimiter(char c) : c_(c) {}
0098 //     absl::string_view Find(absl::string_view text, size_t pos) {
0099 //       auto found = text.find(c_, pos);
0100 //       if (found == absl::string_view::npos)
0101 //         return text.substr(text.size());
0102 //
0103 //       return text.substr(found, 1);
0104 //     }
0105 //   };
0106 
0107 // ByString
0108 //
0109 // A sub-string delimiter. If `StrSplit()` is passed a string in place of a
0110 // `Delimiter` object, the string will be implicitly converted into a
0111 // `ByString` delimiter.
0112 //
0113 // Example:
0114 //
0115 //   // Because a string literal is converted to an `absl::ByString`,
0116 //   // the following two splits are equivalent.
0117 //
0118 //   std::vector<std::string> v1 = absl::StrSplit("a, b, c", ", ");
0119 //
0120 //   using absl::ByString;
0121 //   std::vector<std::string> v2 = absl::StrSplit("a, b, c",
0122 //                                                ByString(", "));
0123 //   // v[0] == "a", v[1] == "b", v[2] == "c"
0124 class ByString {
0125  public:
0126   explicit ByString(absl::string_view sp);
0127   absl::string_view Find(absl::string_view text, size_t pos) const;
0128 
0129  private:
0130   const std::string delimiter_;
0131 };
0132 
0133 // ByAsciiWhitespace
0134 //
0135 // A sub-string delimiter that splits by ASCII whitespace
0136 // (space, tab, vertical tab, formfeed, linefeed, or carriage return).
0137 // Note: you probably want to use absl::SkipEmpty() as well!
0138 //
0139 // This class is equivalent to ByAnyChar with ASCII whitespace chars.
0140 //
0141 // Example:
0142 //
0143 //   std::vector<std::string> v = absl::StrSplit(
0144 //       "a b\tc\n  d  \n", absl::ByAsciiWhitespace(), absl::SkipEmpty());
0145 //   // v[0] == "a", v[1] == "b", v[2] == "c", v[3] == "d"
0146 class ByAsciiWhitespace {
0147  public:
0148   absl::string_view Find(absl::string_view text, size_t pos) const;
0149 };
0150 
0151 // ByChar
0152 //
0153 // A single character delimiter. `ByChar` is functionally equivalent to a
0154 // 1-char string within a `ByString` delimiter, but slightly more efficient.
0155 //
0156 // Example:
0157 //
0158 //   // Because a char literal is converted to a absl::ByChar,
0159 //   // the following two splits are equivalent.
0160 //   std::vector<std::string> v1 = absl::StrSplit("a,b,c", ',');
0161 //   using absl::ByChar;
0162 //   std::vector<std::string> v2 = absl::StrSplit("a,b,c", ByChar(','));
0163 //   // v[0] == "a", v[1] == "b", v[2] == "c"
0164 //
0165 // `ByChar` is also the default delimiter if a single character is given
0166 // as the delimiter to `StrSplit()`. For example, the following calls are
0167 // equivalent:
0168 //
0169 //   std::vector<std::string> v = absl::StrSplit("a-b", '-');
0170 //
0171 //   using absl::ByChar;
0172 //   std::vector<std::string> v = absl::StrSplit("a-b", ByChar('-'));
0173 //
0174 class ByChar {
0175  public:
0176   explicit ByChar(char c) : c_(c) {}
0177   absl::string_view Find(absl::string_view text, size_t pos) const;
0178 
0179  private:
0180   char c_;
0181 };
0182 
0183 // ByAnyChar
0184 //
0185 // A delimiter that will match any of the given byte-sized characters within
0186 // its provided string.
0187 //
0188 // Note: this delimiter works with single-byte string data, but does not work
0189 // with variable-width encodings, such as UTF-8.
0190 //
0191 // Example:
0192 //
0193 //   using absl::ByAnyChar;
0194 //   std::vector<std::string> v = absl::StrSplit("a,b=c", ByAnyChar(",="));
0195 //   // v[0] == "a", v[1] == "b", v[2] == "c"
0196 //
0197 // If `ByAnyChar` is given the empty string, it behaves exactly like
0198 // `ByString` and matches each individual character in the input string.
0199 //
0200 class ByAnyChar {
0201  public:
0202   explicit ByAnyChar(absl::string_view sp);
0203   absl::string_view Find(absl::string_view text, size_t pos) const;
0204 
0205  private:
0206   const std::string delimiters_;
0207 };
0208 
0209 // ByLength
0210 //
0211 // A delimiter for splitting into equal-length strings. The length argument to
0212 // the constructor must be greater than 0.
0213 //
0214 // Note: this delimiter works with single-byte string data, but does not work
0215 // with variable-width encodings, such as UTF-8.
0216 //
0217 // Example:
0218 //
0219 //   using absl::ByLength;
0220 //   std::vector<std::string> v = absl::StrSplit("123456789", ByLength(3));
0221 
0222 //   // v[0] == "123", v[1] == "456", v[2] == "789"
0223 //
0224 // Note that the string does not have to be a multiple of the fixed split
0225 // length. In such a case, the last substring will be shorter.
0226 //
0227 //   using absl::ByLength;
0228 //   std::vector<std::string> v = absl::StrSplit("12345", ByLength(2));
0229 //
0230 //   // v[0] == "12", v[1] == "34", v[2] == "5"
0231 class ByLength {
0232  public:
0233   explicit ByLength(ptrdiff_t length);
0234   absl::string_view Find(absl::string_view text, size_t pos) const;
0235 
0236  private:
0237   const ptrdiff_t length_;
0238 };
0239 
0240 namespace strings_internal {
0241 
0242 // A traits-like metafunction for selecting the default Delimiter object type
0243 // for a particular Delimiter type. The base case simply exposes type Delimiter
0244 // itself as the delimiter's Type. However, there are specializations for
0245 // string-like objects that map them to the ByString delimiter object.
0246 // This allows functions like absl::StrSplit() and absl::MaxSplits() to accept
0247 // string-like objects (e.g., ',') as delimiter arguments but they will be
0248 // treated as if a ByString delimiter was given.
0249 template <typename Delimiter>
0250 struct SelectDelimiter {
0251   using type = Delimiter;
0252 };
0253 
0254 template <>
0255 struct SelectDelimiter<char> {
0256   using type = ByChar;
0257 };
0258 template <>
0259 struct SelectDelimiter<char*> {
0260   using type = ByString;
0261 };
0262 template <>
0263 struct SelectDelimiter<const char*> {
0264   using type = ByString;
0265 };
0266 template <>
0267 struct SelectDelimiter<absl::string_view> {
0268   using type = ByString;
0269 };
0270 template <>
0271 struct SelectDelimiter<std::string> {
0272   using type = ByString;
0273 };
0274 
0275 // Wraps another delimiter and sets a max number of matches for that delimiter.
0276 template <typename Delimiter>
0277 class MaxSplitsImpl {
0278  public:
0279   MaxSplitsImpl(Delimiter delimiter, int limit)
0280       : delimiter_(delimiter), limit_(limit), count_(0) {}
0281   absl::string_view Find(absl::string_view text, size_t pos) {
0282     if (count_++ == limit_) {
0283       return absl::string_view(text.data() + text.size(),
0284                                0);  // No more matches.
0285     }
0286     return delimiter_.Find(text, pos);
0287   }
0288 
0289  private:
0290   Delimiter delimiter_;
0291   const int limit_;
0292   int count_;
0293 };
0294 
0295 }  // namespace strings_internal
0296 
0297 // MaxSplits()
0298 //
0299 // A delimiter that limits the number of matches which can occur to the passed
0300 // `limit`. The last element in the returned collection will contain all
0301 // remaining unsplit pieces, which may contain instances of the delimiter.
0302 // The collection will contain at most `limit` + 1 elements.
0303 // Example:
0304 //
0305 //   using absl::MaxSplits;
0306 //   std::vector<std::string> v = absl::StrSplit("a,b,c", MaxSplits(',', 1));
0307 //
0308 //   // v[0] == "a", v[1] == "b,c"
0309 template <typename Delimiter>
0310 inline strings_internal::MaxSplitsImpl<
0311     typename strings_internal::SelectDelimiter<Delimiter>::type>
0312 MaxSplits(Delimiter delimiter, int limit) {
0313   typedef
0314       typename strings_internal::SelectDelimiter<Delimiter>::type DelimiterType;
0315   return strings_internal::MaxSplitsImpl<DelimiterType>(
0316       DelimiterType(delimiter), limit);
0317 }
0318 
0319 //------------------------------------------------------------------------------
0320 // Predicates
0321 //------------------------------------------------------------------------------
0322 //
0323 // Predicates filter the results of a `StrSplit()` by determining whether or not
0324 // a resultant element is included in the result set. A predicate may be passed
0325 // as an optional third argument to the `StrSplit()` function.
0326 //
0327 // Predicates are unary functions (or functors) that take a single
0328 // `absl::string_view` argument and return a bool indicating whether the
0329 // argument should be included (`true`) or excluded (`false`).
0330 //
0331 // Predicates are useful when filtering out empty substrings. By default, empty
0332 // substrings may be returned by `StrSplit()`, which is similar to the way split
0333 // functions work in other programming languages.
0334 
0335 // AllowEmpty()
0336 //
0337 // Always returns `true`, indicating that all strings--including empty
0338 // strings--should be included in the split output. This predicate is not
0339 // strictly needed because this is the default behavior of `StrSplit()`;
0340 // however, it might be useful at some call sites to make the intent explicit.
0341 //
0342 // Example:
0343 //
0344 //  std::vector<std::string> v = absl::StrSplit(" a , ,,b,", ',', AllowEmpty());
0345 //
0346 //  // v[0] == " a ", v[1] == " ", v[2] == "", v[3] = "b", v[4] == ""
0347 struct AllowEmpty {
0348   bool operator()(absl::string_view) const { return true; }
0349 };
0350 
0351 // SkipEmpty()
0352 //
0353 // Returns `false` if the given `absl::string_view` is empty, indicating that
0354 // `StrSplit()` should omit the empty string.
0355 //
0356 // Example:
0357 //
0358 //   std::vector<std::string> v = absl::StrSplit(",a,,b,", ',', SkipEmpty());
0359 //
0360 //   // v[0] == "a", v[1] == "b"
0361 //
0362 // Note: `SkipEmpty()` does not consider a string containing only whitespace
0363 // to be empty. To skip such whitespace as well, use the `SkipWhitespace()`
0364 // predicate.
0365 struct SkipEmpty {
0366   bool operator()(absl::string_view sp) const { return !sp.empty(); }
0367 };
0368 
0369 // SkipWhitespace()
0370 //
0371 // Returns `false` if the given `absl::string_view` is empty *or* contains only
0372 // whitespace, indicating that `StrSplit()` should omit the string.
0373 //
0374 // Example:
0375 //
0376 //   std::vector<std::string> v = absl::StrSplit(" a , ,,b,",
0377 //                                               ',', SkipWhitespace());
0378 //   // v[0] == " a ", v[1] == "b"
0379 //
0380 //   // SkipEmpty() would return whitespace elements
0381 //   std::vector<std::string> v = absl::StrSplit(" a , ,,b,", ',', SkipEmpty());
0382 //   // v[0] == " a ", v[1] == " ", v[2] == "b"
0383 struct SkipWhitespace {
0384   bool operator()(absl::string_view sp) const {
0385     sp = absl::StripAsciiWhitespace(sp);
0386     return !sp.empty();
0387   }
0388 };
0389 
0390 template <typename T>
0391 using EnableSplitIfString =
0392     typename std::enable_if<std::is_same<T, std::string>::value ||
0393                             std::is_same<T, const std::string>::value,
0394                             int>::type;
0395 
0396 //------------------------------------------------------------------------------
0397 //                                  StrSplit()
0398 //------------------------------------------------------------------------------
0399 
0400 // StrSplit()
0401 //
0402 // Splits a given string based on the provided `Delimiter` object, returning the
0403 // elements within the type specified by the caller. Optionally, you may pass a
0404 // `Predicate` to `StrSplit()` indicating whether to include or exclude the
0405 // resulting element within the final result set. (See the overviews for
0406 // Delimiters and Predicates above.)
0407 //
0408 // Example:
0409 //
0410 //   std::vector<std::string> v = absl::StrSplit("a,b,c,d", ',');
0411 //   // v[0] == "a", v[1] == "b", v[2] == "c", v[3] == "d"
0412 //
0413 // You can also provide an explicit `Delimiter` object:
0414 //
0415 // Example:
0416 //
0417 //   using absl::ByAnyChar;
0418 //   std::vector<std::string> v = absl::StrSplit("a,b=c", ByAnyChar(",="));
0419 //   // v[0] == "a", v[1] == "b", v[2] == "c"
0420 //
0421 // See above for more information on delimiters.
0422 //
0423 // By default, empty strings are included in the result set. You can optionally
0424 // include a third `Predicate` argument to apply a test for whether the
0425 // resultant element should be included in the result set:
0426 //
0427 // Example:
0428 //
0429 //   std::vector<std::string> v = absl::StrSplit(" a , ,,b,",
0430 //                                               ',', SkipWhitespace());
0431 //   // v[0] == " a ", v[1] == "b"
0432 //
0433 // See above for more information on predicates.
0434 //
0435 //------------------------------------------------------------------------------
0436 // StrSplit() Return Types
0437 //------------------------------------------------------------------------------
0438 //
0439 // The `StrSplit()` function adapts the returned collection to the collection
0440 // specified by the caller (e.g. `std::vector` above). The returned collections
0441 // may contain `std::string`, `absl::string_view` (in which case the original
0442 // string being split must ensure that it outlives the collection), or any
0443 // object that can be explicitly created from an `absl::string_view`. This
0444 // behavior works for:
0445 //
0446 // 1) All standard STL containers including `std::vector`, `std::list`,
0447 //    `std::deque`, `std::set`,`std::multiset`, 'std::map`, and `std::multimap`
0448 // 2) `std::pair` (which is not actually a container). See below.
0449 //
0450 // Example:
0451 //
0452 //   // The results are returned as `absl::string_view` objects. Note that we
0453 //   // have to ensure that the input string outlives any results.
0454 //   std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ',');
0455 //
0456 //   // Stores results in a std::set<std::string>, which also performs
0457 //   // de-duplication and orders the elements in ascending order.
0458 //   std::set<std::string> a = absl::StrSplit("b,a,c,a,b", ',');
0459 //   // a[0] == "a", a[1] == "b", a[2] == "c"
0460 //
0461 //   // `StrSplit()` can be used within a range-based for loop, in which case
0462 //   // each element will be of type `absl::string_view`.
0463 //   std::vector<std::string> v;
0464 //   for (const auto sv : absl::StrSplit("a,b,c", ',')) {
0465 //     if (sv != "b") v.emplace_back(sv);
0466 //   }
0467 //   // v[0] == "a", v[1] == "c"
0468 //
0469 //   // Stores results in a map. The map implementation assumes that the input
0470 //   // is provided as a series of key/value pairs. For example, the 0th element
0471 //   // resulting from the split will be stored as a key to the 1st element. If
0472 //   // an odd number of elements are resolved, the last element is paired with
0473 //   // a default-constructed value (e.g., empty string).
0474 //   std::map<std::string, std::string> m = absl::StrSplit("a,b,c", ',');
0475 //   // m["a"] == "b", m["c"] == ""     // last component value equals ""
0476 //
0477 // Splitting to `std::pair` is an interesting case because it can hold only two
0478 // elements and is not a collection type. When splitting to a `std::pair` the
0479 // first two split strings become the `std::pair` `.first` and `.second`
0480 // members, respectively. The remaining split substrings are discarded. If there
0481 // are less than two split substrings, the empty string is used for the
0482 // corresponding `std::pair` member.
0483 //
0484 // Example:
0485 //
0486 //   // Stores first two split strings as the members in a std::pair.
0487 //   std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ',');
0488 //   // p.first == "a", p.second == "b"       // "c" is omitted.
0489 //
0490 // The `StrSplit()` function can be used multiple times to perform more
0491 // complicated splitting logic, such as intelligently parsing key-value pairs.
0492 //
0493 // Example:
0494 //
0495 //   // The input string "a=b=c,d=e,f=,g" becomes
0496 //   // { "a" => "b=c", "d" => "e", "f" => "", "g" => "" }
0497 //   std::map<std::string, std::string> m;
0498 //   for (absl::string_view sp : absl::StrSplit("a=b=c,d=e,f=,g", ',')) {
0499 //     m.insert(absl::StrSplit(sp, absl::MaxSplits('=', 1)));
0500 //   }
0501 //   EXPECT_EQ("b=c", m.find("a")->second);
0502 //   EXPECT_EQ("e", m.find("d")->second);
0503 //   EXPECT_EQ("", m.find("f")->second);
0504 //   EXPECT_EQ("", m.find("g")->second);
0505 //
0506 // WARNING: Due to a legacy bug that is maintained for backward compatibility,
0507 // splitting the following empty string_views produces different results:
0508 //
0509 //   absl::StrSplit(absl::string_view(""), '-');  // {""}
0510 //   absl::StrSplit(absl::string_view(), '-');    // {}, but should be {""}
0511 //
0512 // Try not to depend on this distinction because the bug may one day be fixed.
0513 template <typename Delimiter>
0514 strings_internal::Splitter<
0515     typename strings_internal::SelectDelimiter<Delimiter>::type, AllowEmpty,
0516     absl::string_view>
0517 StrSplit(strings_internal::ConvertibleToStringView text, Delimiter d) {
0518   using DelimiterType =
0519       typename strings_internal::SelectDelimiter<Delimiter>::type;
0520   return strings_internal::Splitter<DelimiterType, AllowEmpty,
0521                                     absl::string_view>(
0522       text.value(), DelimiterType(d), AllowEmpty());
0523 }
0524 
0525 template <typename Delimiter, typename StringType,
0526           EnableSplitIfString<StringType> = 0>
0527 strings_internal::Splitter<
0528     typename strings_internal::SelectDelimiter<Delimiter>::type, AllowEmpty,
0529     std::string>
0530 StrSplit(StringType&& text, Delimiter d) {
0531   using DelimiterType =
0532       typename strings_internal::SelectDelimiter<Delimiter>::type;
0533   return strings_internal::Splitter<DelimiterType, AllowEmpty, std::string>(
0534       std::move(text), DelimiterType(d), AllowEmpty());
0535 }
0536 
0537 template <typename Delimiter, typename Predicate>
0538 strings_internal::Splitter<
0539     typename strings_internal::SelectDelimiter<Delimiter>::type, Predicate,
0540     absl::string_view>
0541 StrSplit(strings_internal::ConvertibleToStringView text, Delimiter d,
0542          Predicate p) {
0543   using DelimiterType =
0544       typename strings_internal::SelectDelimiter<Delimiter>::type;
0545   return strings_internal::Splitter<DelimiterType, Predicate,
0546                                     absl::string_view>(
0547       text.value(), DelimiterType(std::move(d)), std::move(p));
0548 }
0549 
0550 template <typename Delimiter, typename Predicate, typename StringType,
0551           EnableSplitIfString<StringType> = 0>
0552 strings_internal::Splitter<
0553     typename strings_internal::SelectDelimiter<Delimiter>::type, Predicate,
0554     std::string>
0555 StrSplit(StringType&& text, Delimiter d, Predicate p) {
0556   using DelimiterType =
0557       typename strings_internal::SelectDelimiter<Delimiter>::type;
0558   return strings_internal::Splitter<DelimiterType, Predicate, std::string>(
0559       std::move(text), DelimiterType(d), std::move(p));
0560 }
0561 
0562 ABSL_NAMESPACE_END
0563 }  // namespace absl
0564 
0565 #endif  // ABSL_STRINGS_STR_SPLIT_H_