|
|
|||
File indexing completed on 2025-12-15 09:41:54
0001 // 0002 // Copyright 2017 The Abseil Authors. 0003 // 0004 // Licensed under the Apache License, Version 2.0 (the "License"); 0005 // you may not use this file except in compliance with the License. 0006 // You may obtain a copy of the License at 0007 // 0008 // https://www.apache.org/licenses/LICENSE-2.0 0009 // 0010 // Unless required by applicable law or agreed to in writing, software 0011 // distributed under the License is distributed on an "AS IS" BASIS, 0012 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 0013 // See the License for the specific language governing permissions and 0014 // limitations under the License. 0015 // 0016 // ----------------------------------------------------------------------------- 0017 // File: str_split.h 0018 // ----------------------------------------------------------------------------- 0019 // 0020 // This file contains functions for splitting strings. It defines the main 0021 // `StrSplit()` function, several delimiters for determining the boundaries on 0022 // which to split the string, and predicates for filtering delimited results. 0023 // `StrSplit()` adapts the returned collection to the type specified by the 0024 // caller. 0025 // 0026 // Example: 0027 // 0028 // // Splits the given string on commas. Returns the results in a 0029 // // vector of strings. 0030 // std::vector<std::string> v = absl::StrSplit("a,b,c", ','); 0031 // // Can also use "," 0032 // // v[0] == "a", v[1] == "b", v[2] == "c" 0033 // 0034 // See StrSplit() below for more information. 0035 #ifndef ABSL_STRINGS_STR_SPLIT_H_ 0036 #define ABSL_STRINGS_STR_SPLIT_H_ 0037 0038 #include <algorithm> 0039 #include <cstddef> 0040 #include <map> 0041 #include <set> 0042 #include <string> 0043 #include <utility> 0044 #include <vector> 0045 0046 #include "absl/base/internal/raw_logging.h" 0047 #include "absl/base/macros.h" 0048 #include "absl/strings/internal/str_split_internal.h" 0049 #include "absl/strings/string_view.h" 0050 #include "absl/strings/strip.h" 0051 0052 namespace absl { 0053 ABSL_NAMESPACE_BEGIN 0054 0055 //------------------------------------------------------------------------------ 0056 // Delimiters 0057 //------------------------------------------------------------------------------ 0058 // 0059 // `StrSplit()` uses delimiters to define the boundaries between elements in the 0060 // provided input. Several `Delimiter` types are defined below. If a string 0061 // (`const char*`, `std::string`, or `absl::string_view`) is passed in place of 0062 // an explicit `Delimiter` object, `StrSplit()` treats it the same way as if it 0063 // were passed a `ByString` delimiter. 0064 // 0065 // A `Delimiter` is an object with a `Find()` function that knows how to find 0066 // the first occurrence of itself in a given `absl::string_view`. 0067 // 0068 // The following `Delimiter` types are available for use within `StrSplit()`: 0069 // 0070 // - `ByString` (default for string arguments) 0071 // - `ByChar` (default for a char argument) 0072 // - `ByAnyChar` 0073 // - `ByLength` 0074 // - `MaxSplits` 0075 // 0076 // A Delimiter's `Find()` member function will be passed an input `text` that is 0077 // to be split and a position (`pos`) to begin searching for the next delimiter 0078 // in `text`. The returned absl::string_view should refer to the next occurrence 0079 // (after `pos`) of the represented delimiter; this returned absl::string_view 0080 // represents the next location where the input `text` should be broken. 0081 // 0082 // The returned absl::string_view may be zero-length if the Delimiter does not 0083 // represent a part of the string (e.g., a fixed-length delimiter). If no 0084 // delimiter is found in the input `text`, a zero-length absl::string_view 0085 // referring to `text.end()` should be returned (e.g., 0086 // `text.substr(text.size())`). It is important that the returned 0087 // absl::string_view always be within the bounds of the input `text` given as an 0088 // argument--it must not refer to a string that is physically located outside of 0089 // the given string. 0090 // 0091 // The following example is a simple Delimiter object that is created with a 0092 // single char and will look for that char in the text passed to the `Find()` 0093 // function: 0094 // 0095 // struct SimpleDelimiter { 0096 // const char c_; 0097 // explicit SimpleDelimiter(char c) : c_(c) {} 0098 // absl::string_view Find(absl::string_view text, size_t pos) { 0099 // auto found = text.find(c_, pos); 0100 // if (found == absl::string_view::npos) 0101 // return text.substr(text.size()); 0102 // 0103 // return text.substr(found, 1); 0104 // } 0105 // }; 0106 0107 // ByString 0108 // 0109 // A sub-string delimiter. If `StrSplit()` is passed a string in place of a 0110 // `Delimiter` object, the string will be implicitly converted into a 0111 // `ByString` delimiter. 0112 // 0113 // Example: 0114 // 0115 // // Because a string literal is converted to an `absl::ByString`, 0116 // // the following two splits are equivalent. 0117 // 0118 // std::vector<std::string> v1 = absl::StrSplit("a, b, c", ", "); 0119 // 0120 // using absl::ByString; 0121 // std::vector<std::string> v2 = absl::StrSplit("a, b, c", 0122 // ByString(", ")); 0123 // // v[0] == "a", v[1] == "b", v[2] == "c" 0124 class ByString { 0125 public: 0126 explicit ByString(absl::string_view sp); 0127 absl::string_view Find(absl::string_view text, size_t pos) const; 0128 0129 private: 0130 const std::string delimiter_; 0131 }; 0132 0133 // ByAsciiWhitespace 0134 // 0135 // A sub-string delimiter that splits by ASCII whitespace 0136 // (space, tab, vertical tab, formfeed, linefeed, or carriage return). 0137 // Note: you probably want to use absl::SkipEmpty() as well! 0138 // 0139 // This class is equivalent to ByAnyChar with ASCII whitespace chars. 0140 // 0141 // Example: 0142 // 0143 // std::vector<std::string> v = absl::StrSplit( 0144 // "a b\tc\n d \n", absl::ByAsciiWhitespace(), absl::SkipEmpty()); 0145 // // v[0] == "a", v[1] == "b", v[2] == "c", v[3] == "d" 0146 class ByAsciiWhitespace { 0147 public: 0148 absl::string_view Find(absl::string_view text, size_t pos) const; 0149 }; 0150 0151 // ByChar 0152 // 0153 // A single character delimiter. `ByChar` is functionally equivalent to a 0154 // 1-char string within a `ByString` delimiter, but slightly more efficient. 0155 // 0156 // Example: 0157 // 0158 // // Because a char literal is converted to a absl::ByChar, 0159 // // the following two splits are equivalent. 0160 // std::vector<std::string> v1 = absl::StrSplit("a,b,c", ','); 0161 // using absl::ByChar; 0162 // std::vector<std::string> v2 = absl::StrSplit("a,b,c", ByChar(',')); 0163 // // v[0] == "a", v[1] == "b", v[2] == "c" 0164 // 0165 // `ByChar` is also the default delimiter if a single character is given 0166 // as the delimiter to `StrSplit()`. For example, the following calls are 0167 // equivalent: 0168 // 0169 // std::vector<std::string> v = absl::StrSplit("a-b", '-'); 0170 // 0171 // using absl::ByChar; 0172 // std::vector<std::string> v = absl::StrSplit("a-b", ByChar('-')); 0173 // 0174 class ByChar { 0175 public: 0176 explicit ByChar(char c) : c_(c) {} 0177 absl::string_view Find(absl::string_view text, size_t pos) const; 0178 0179 private: 0180 char c_; 0181 }; 0182 0183 // ByAnyChar 0184 // 0185 // A delimiter that will match any of the given byte-sized characters within 0186 // its provided string. 0187 // 0188 // Note: this delimiter works with single-byte string data, but does not work 0189 // with variable-width encodings, such as UTF-8. 0190 // 0191 // Example: 0192 // 0193 // using absl::ByAnyChar; 0194 // std::vector<std::string> v = absl::StrSplit("a,b=c", ByAnyChar(",=")); 0195 // // v[0] == "a", v[1] == "b", v[2] == "c" 0196 // 0197 // If `ByAnyChar` is given the empty string, it behaves exactly like 0198 // `ByString` and matches each individual character in the input string. 0199 // 0200 class ByAnyChar { 0201 public: 0202 explicit ByAnyChar(absl::string_view sp); 0203 absl::string_view Find(absl::string_view text, size_t pos) const; 0204 0205 private: 0206 const std::string delimiters_; 0207 }; 0208 0209 // ByLength 0210 // 0211 // A delimiter for splitting into equal-length strings. The length argument to 0212 // the constructor must be greater than 0. 0213 // 0214 // Note: this delimiter works with single-byte string data, but does not work 0215 // with variable-width encodings, such as UTF-8. 0216 // 0217 // Example: 0218 // 0219 // using absl::ByLength; 0220 // std::vector<std::string> v = absl::StrSplit("123456789", ByLength(3)); 0221 0222 // // v[0] == "123", v[1] == "456", v[2] == "789" 0223 // 0224 // Note that the string does not have to be a multiple of the fixed split 0225 // length. In such a case, the last substring will be shorter. 0226 // 0227 // using absl::ByLength; 0228 // std::vector<std::string> v = absl::StrSplit("12345", ByLength(2)); 0229 // 0230 // // v[0] == "12", v[1] == "34", v[2] == "5" 0231 class ByLength { 0232 public: 0233 explicit ByLength(ptrdiff_t length); 0234 absl::string_view Find(absl::string_view text, size_t pos) const; 0235 0236 private: 0237 const ptrdiff_t length_; 0238 }; 0239 0240 namespace strings_internal { 0241 0242 // A traits-like metafunction for selecting the default Delimiter object type 0243 // for a particular Delimiter type. The base case simply exposes type Delimiter 0244 // itself as the delimiter's Type. However, there are specializations for 0245 // string-like objects that map them to the ByString delimiter object. 0246 // This allows functions like absl::StrSplit() and absl::MaxSplits() to accept 0247 // string-like objects (e.g., ',') as delimiter arguments but they will be 0248 // treated as if a ByString delimiter was given. 0249 template <typename Delimiter> 0250 struct SelectDelimiter { 0251 using type = Delimiter; 0252 }; 0253 0254 template <> 0255 struct SelectDelimiter<char> { 0256 using type = ByChar; 0257 }; 0258 template <> 0259 struct SelectDelimiter<char*> { 0260 using type = ByString; 0261 }; 0262 template <> 0263 struct SelectDelimiter<const char*> { 0264 using type = ByString; 0265 }; 0266 template <> 0267 struct SelectDelimiter<absl::string_view> { 0268 using type = ByString; 0269 }; 0270 template <> 0271 struct SelectDelimiter<std::string> { 0272 using type = ByString; 0273 }; 0274 0275 // Wraps another delimiter and sets a max number of matches for that delimiter. 0276 template <typename Delimiter> 0277 class MaxSplitsImpl { 0278 public: 0279 MaxSplitsImpl(Delimiter delimiter, int limit) 0280 : delimiter_(delimiter), limit_(limit), count_(0) {} 0281 absl::string_view Find(absl::string_view text, size_t pos) { 0282 if (count_++ == limit_) { 0283 return absl::string_view(text.data() + text.size(), 0284 0); // No more matches. 0285 } 0286 return delimiter_.Find(text, pos); 0287 } 0288 0289 private: 0290 Delimiter delimiter_; 0291 const int limit_; 0292 int count_; 0293 }; 0294 0295 } // namespace strings_internal 0296 0297 // MaxSplits() 0298 // 0299 // A delimiter that limits the number of matches which can occur to the passed 0300 // `limit`. The last element in the returned collection will contain all 0301 // remaining unsplit pieces, which may contain instances of the delimiter. 0302 // The collection will contain at most `limit` + 1 elements. 0303 // Example: 0304 // 0305 // using absl::MaxSplits; 0306 // std::vector<std::string> v = absl::StrSplit("a,b,c", MaxSplits(',', 1)); 0307 // 0308 // // v[0] == "a", v[1] == "b,c" 0309 template <typename Delimiter> 0310 inline strings_internal::MaxSplitsImpl< 0311 typename strings_internal::SelectDelimiter<Delimiter>::type> 0312 MaxSplits(Delimiter delimiter, int limit) { 0313 typedef 0314 typename strings_internal::SelectDelimiter<Delimiter>::type DelimiterType; 0315 return strings_internal::MaxSplitsImpl<DelimiterType>( 0316 DelimiterType(delimiter), limit); 0317 } 0318 0319 //------------------------------------------------------------------------------ 0320 // Predicates 0321 //------------------------------------------------------------------------------ 0322 // 0323 // Predicates filter the results of a `StrSplit()` by determining whether or not 0324 // a resultant element is included in the result set. A predicate may be passed 0325 // as an optional third argument to the `StrSplit()` function. 0326 // 0327 // Predicates are unary functions (or functors) that take a single 0328 // `absl::string_view` argument and return a bool indicating whether the 0329 // argument should be included (`true`) or excluded (`false`). 0330 // 0331 // Predicates are useful when filtering out empty substrings. By default, empty 0332 // substrings may be returned by `StrSplit()`, which is similar to the way split 0333 // functions work in other programming languages. 0334 0335 // AllowEmpty() 0336 // 0337 // Always returns `true`, indicating that all strings--including empty 0338 // strings--should be included in the split output. This predicate is not 0339 // strictly needed because this is the default behavior of `StrSplit()`; 0340 // however, it might be useful at some call sites to make the intent explicit. 0341 // 0342 // Example: 0343 // 0344 // std::vector<std::string> v = absl::StrSplit(" a , ,,b,", ',', AllowEmpty()); 0345 // 0346 // // v[0] == " a ", v[1] == " ", v[2] == "", v[3] = "b", v[4] == "" 0347 struct AllowEmpty { 0348 bool operator()(absl::string_view) const { return true; } 0349 }; 0350 0351 // SkipEmpty() 0352 // 0353 // Returns `false` if the given `absl::string_view` is empty, indicating that 0354 // `StrSplit()` should omit the empty string. 0355 // 0356 // Example: 0357 // 0358 // std::vector<std::string> v = absl::StrSplit(",a,,b,", ',', SkipEmpty()); 0359 // 0360 // // v[0] == "a", v[1] == "b" 0361 // 0362 // Note: `SkipEmpty()` does not consider a string containing only whitespace 0363 // to be empty. To skip such whitespace as well, use the `SkipWhitespace()` 0364 // predicate. 0365 struct SkipEmpty { 0366 bool operator()(absl::string_view sp) const { return !sp.empty(); } 0367 }; 0368 0369 // SkipWhitespace() 0370 // 0371 // Returns `false` if the given `absl::string_view` is empty *or* contains only 0372 // whitespace, indicating that `StrSplit()` should omit the string. 0373 // 0374 // Example: 0375 // 0376 // std::vector<std::string> v = absl::StrSplit(" a , ,,b,", 0377 // ',', SkipWhitespace()); 0378 // // v[0] == " a ", v[1] == "b" 0379 // 0380 // // SkipEmpty() would return whitespace elements 0381 // std::vector<std::string> v = absl::StrSplit(" a , ,,b,", ',', SkipEmpty()); 0382 // // v[0] == " a ", v[1] == " ", v[2] == "b" 0383 struct SkipWhitespace { 0384 bool operator()(absl::string_view sp) const { 0385 sp = absl::StripAsciiWhitespace(sp); 0386 return !sp.empty(); 0387 } 0388 }; 0389 0390 template <typename T> 0391 using EnableSplitIfString = 0392 typename std::enable_if<std::is_same<T, std::string>::value || 0393 std::is_same<T, const std::string>::value, 0394 int>::type; 0395 0396 //------------------------------------------------------------------------------ 0397 // StrSplit() 0398 //------------------------------------------------------------------------------ 0399 0400 // StrSplit() 0401 // 0402 // Splits a given string based on the provided `Delimiter` object, returning the 0403 // elements within the type specified by the caller. Optionally, you may pass a 0404 // `Predicate` to `StrSplit()` indicating whether to include or exclude the 0405 // resulting element within the final result set. (See the overviews for 0406 // Delimiters and Predicates above.) 0407 // 0408 // Example: 0409 // 0410 // std::vector<std::string> v = absl::StrSplit("a,b,c,d", ','); 0411 // // v[0] == "a", v[1] == "b", v[2] == "c", v[3] == "d" 0412 // 0413 // You can also provide an explicit `Delimiter` object: 0414 // 0415 // Example: 0416 // 0417 // using absl::ByAnyChar; 0418 // std::vector<std::string> v = absl::StrSplit("a,b=c", ByAnyChar(",=")); 0419 // // v[0] == "a", v[1] == "b", v[2] == "c" 0420 // 0421 // See above for more information on delimiters. 0422 // 0423 // By default, empty strings are included in the result set. You can optionally 0424 // include a third `Predicate` argument to apply a test for whether the 0425 // resultant element should be included in the result set: 0426 // 0427 // Example: 0428 // 0429 // std::vector<std::string> v = absl::StrSplit(" a , ,,b,", 0430 // ',', SkipWhitespace()); 0431 // // v[0] == " a ", v[1] == "b" 0432 // 0433 // See above for more information on predicates. 0434 // 0435 //------------------------------------------------------------------------------ 0436 // StrSplit() Return Types 0437 //------------------------------------------------------------------------------ 0438 // 0439 // The `StrSplit()` function adapts the returned collection to the collection 0440 // specified by the caller (e.g. `std::vector` above). The returned collections 0441 // may contain `std::string`, `absl::string_view` (in which case the original 0442 // string being split must ensure that it outlives the collection), or any 0443 // object that can be explicitly created from an `absl::string_view`. This 0444 // behavior works for: 0445 // 0446 // 1) All standard STL containers including `std::vector`, `std::list`, 0447 // `std::deque`, `std::set`,`std::multiset`, 'std::map`, and `std::multimap` 0448 // 2) `std::pair` (which is not actually a container). See below. 0449 // 0450 // Example: 0451 // 0452 // // The results are returned as `absl::string_view` objects. Note that we 0453 // // have to ensure that the input string outlives any results. 0454 // std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ','); 0455 // 0456 // // Stores results in a std::set<std::string>, which also performs 0457 // // de-duplication and orders the elements in ascending order. 0458 // std::set<std::string> a = absl::StrSplit("b,a,c,a,b", ','); 0459 // // a[0] == "a", a[1] == "b", a[2] == "c" 0460 // 0461 // // `StrSplit()` can be used within a range-based for loop, in which case 0462 // // each element will be of type `absl::string_view`. 0463 // std::vector<std::string> v; 0464 // for (const auto sv : absl::StrSplit("a,b,c", ',')) { 0465 // if (sv != "b") v.emplace_back(sv); 0466 // } 0467 // // v[0] == "a", v[1] == "c" 0468 // 0469 // // Stores results in a map. The map implementation assumes that the input 0470 // // is provided as a series of key/value pairs. For example, the 0th element 0471 // // resulting from the split will be stored as a key to the 1st element. If 0472 // // an odd number of elements are resolved, the last element is paired with 0473 // // a default-constructed value (e.g., empty string). 0474 // std::map<std::string, std::string> m = absl::StrSplit("a,b,c", ','); 0475 // // m["a"] == "b", m["c"] == "" // last component value equals "" 0476 // 0477 // Splitting to `std::pair` is an interesting case because it can hold only two 0478 // elements and is not a collection type. When splitting to a `std::pair` the 0479 // first two split strings become the `std::pair` `.first` and `.second` 0480 // members, respectively. The remaining split substrings are discarded. If there 0481 // are less than two split substrings, the empty string is used for the 0482 // corresponding `std::pair` member. 0483 // 0484 // Example: 0485 // 0486 // // Stores first two split strings as the members in a std::pair. 0487 // std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ','); 0488 // // p.first == "a", p.second == "b" // "c" is omitted. 0489 // 0490 // The `StrSplit()` function can be used multiple times to perform more 0491 // complicated splitting logic, such as intelligently parsing key-value pairs. 0492 // 0493 // Example: 0494 // 0495 // // The input string "a=b=c,d=e,f=,g" becomes 0496 // // { "a" => "b=c", "d" => "e", "f" => "", "g" => "" } 0497 // std::map<std::string, std::string> m; 0498 // for (absl::string_view sp : absl::StrSplit("a=b=c,d=e,f=,g", ',')) { 0499 // m.insert(absl::StrSplit(sp, absl::MaxSplits('=', 1))); 0500 // } 0501 // EXPECT_EQ("b=c", m.find("a")->second); 0502 // EXPECT_EQ("e", m.find("d")->second); 0503 // EXPECT_EQ("", m.find("f")->second); 0504 // EXPECT_EQ("", m.find("g")->second); 0505 // 0506 // WARNING: Due to a legacy bug that is maintained for backward compatibility, 0507 // splitting the following empty string_views produces different results: 0508 // 0509 // absl::StrSplit(absl::string_view(""), '-'); // {""} 0510 // absl::StrSplit(absl::string_view(), '-'); // {}, but should be {""} 0511 // 0512 // Try not to depend on this distinction because the bug may one day be fixed. 0513 template <typename Delimiter> 0514 strings_internal::Splitter< 0515 typename strings_internal::SelectDelimiter<Delimiter>::type, AllowEmpty, 0516 absl::string_view> 0517 StrSplit(strings_internal::ConvertibleToStringView text, Delimiter d) { 0518 using DelimiterType = 0519 typename strings_internal::SelectDelimiter<Delimiter>::type; 0520 return strings_internal::Splitter<DelimiterType, AllowEmpty, 0521 absl::string_view>( 0522 text.value(), DelimiterType(d), AllowEmpty()); 0523 } 0524 0525 template <typename Delimiter, typename StringType, 0526 EnableSplitIfString<StringType> = 0> 0527 strings_internal::Splitter< 0528 typename strings_internal::SelectDelimiter<Delimiter>::type, AllowEmpty, 0529 std::string> 0530 StrSplit(StringType&& text, Delimiter d) { 0531 using DelimiterType = 0532 typename strings_internal::SelectDelimiter<Delimiter>::type; 0533 return strings_internal::Splitter<DelimiterType, AllowEmpty, std::string>( 0534 std::move(text), DelimiterType(d), AllowEmpty()); 0535 } 0536 0537 template <typename Delimiter, typename Predicate> 0538 strings_internal::Splitter< 0539 typename strings_internal::SelectDelimiter<Delimiter>::type, Predicate, 0540 absl::string_view> 0541 StrSplit(strings_internal::ConvertibleToStringView text, Delimiter d, 0542 Predicate p) { 0543 using DelimiterType = 0544 typename strings_internal::SelectDelimiter<Delimiter>::type; 0545 return strings_internal::Splitter<DelimiterType, Predicate, 0546 absl::string_view>( 0547 text.value(), DelimiterType(std::move(d)), std::move(p)); 0548 } 0549 0550 template <typename Delimiter, typename Predicate, typename StringType, 0551 EnableSplitIfString<StringType> = 0> 0552 strings_internal::Splitter< 0553 typename strings_internal::SelectDelimiter<Delimiter>::type, Predicate, 0554 std::string> 0555 StrSplit(StringType&& text, Delimiter d, Predicate p) { 0556 using DelimiterType = 0557 typename strings_internal::SelectDelimiter<Delimiter>::type; 0558 return strings_internal::Splitter<DelimiterType, Predicate, std::string>( 0559 std::move(text), DelimiterType(d), std::move(p)); 0560 } 0561 0562 ABSL_NAMESPACE_END 0563 } // namespace absl 0564 0565 #endif // ABSL_STRINGS_STR_SPLIT_H_
| [ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
|
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
|