|
||||
File indexing completed on 2024-11-15 09:01:13
0001 // Copyright 2021 The Abseil Authors 0002 // 0003 // Licensed under the Apache License, Version 2.0 (the "License"); 0004 // you may not use this file except in compliance with the License. 0005 // You may obtain a copy of the License at 0006 // 0007 // https://www.apache.org/licenses/LICENSE-2.0 0008 // 0009 // Unless required by applicable law or agreed to in writing, software 0010 // distributed under the License is distributed on an "AS IS" BASIS, 0011 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 0012 // See the License for the specific language governing permissions and 0013 // limitations under the License. 0014 0015 #ifndef ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_ 0016 #define ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_ 0017 0018 #include <cassert> 0019 #include <cstdint> 0020 #include <iosfwd> 0021 0022 #include "absl/base/config.h" 0023 #include "absl/base/internal/raw_logging.h" 0024 #include "absl/base/optimization.h" 0025 #include "absl/strings/internal/cord_data_edge.h" 0026 #include "absl/strings/internal/cord_internal.h" 0027 #include "absl/strings/internal/cord_rep_flat.h" 0028 #include "absl/strings/string_view.h" 0029 #include "absl/types/span.h" 0030 0031 namespace absl { 0032 ABSL_NAMESPACE_BEGIN 0033 namespace cord_internal { 0034 0035 // `SetCordBtreeExhaustiveValidation()` can be set to force exhaustive 0036 // validation in debug assertions, and code that calls `IsValid()` 0037 // explicitly. By default, assertions should be relatively cheap and 0038 // AssertValid() can easily lead to O(n^2) complexity as recursive / full tree 0039 // validation is O(n). 0040 void SetCordBtreeExhaustiveValidation(bool do_exaustive_validation); 0041 bool IsCordBtreeExhaustiveValidationEnabled(); 0042 0043 class CordRepBtreeNavigator; 0044 0045 // CordRepBtree is as the name implies a btree implementation of a Cordrep tree. 0046 // Data is stored at the leaf level only, non leaf nodes contain down pointers 0047 // only. Allowed types of data edges are FLAT, EXTERNAL and SUBSTRINGs of FLAT 0048 // or EXTERNAL nodes. The implementation allows for data to be added to either 0049 // end of the tree only, it does not provide any 'insert' logic. This has the 0050 // benefit that we can expect good fill ratios: all nodes except the outer 0051 // 'legs' will have 100% fill ratios for trees built using Append/Prepend 0052 // methods. Merged trees will typically have a fill ratio well above 50% as in a 0053 // similar fashion, one side of the merged tree will typically have a 100% fill 0054 // ratio, and the 'open' end will average 50%. All operations are O(log(n)) or 0055 // better, and the tree never needs balancing. 0056 // 0057 // All methods accepting a CordRep* or CordRepBtree* adopt a reference on that 0058 // input unless explicitly stated otherwise. All functions returning a CordRep* 0059 // or CordRepBtree* instance transfer a reference back to the caller. 0060 // Simplified, callers both 'donate' and 'consume' a reference count on each 0061 // call, simplifying the API. An example of building a tree: 0062 // 0063 // CordRepBtree* tree = CordRepBtree::Create(MakeFlat("Hello")); 0064 // tree = CordRepBtree::Append(tree, MakeFlat("world")); 0065 // 0066 // In the above example, all inputs are consumed, making each call affecting 0067 // `tree` reference count neutral. The returned `tree` value can be different 0068 // from the input if the input is shared with other threads, or if the tree 0069 // grows in height, but callers typically never have to concern themselves with 0070 // that and trust that all methods DTRT at all times. 0071 class CordRepBtree : public CordRep { 0072 public: 0073 // EdgeType identifies `front` and `back` enum values. 0074 // Various implementations in CordRepBtree such as `Add` and `Edge` are 0075 // generic and templated on operating on either of the boundary edges. 0076 // For more information on the possible edges contained in a CordRepBtree 0077 // instance see the documentation for `edges_`. 0078 enum class EdgeType { kFront, kBack }; 0079 0080 // Convenience constants into `EdgeType` 0081 static constexpr EdgeType kFront = EdgeType::kFront; 0082 static constexpr EdgeType kBack = EdgeType::kBack; 0083 0084 // Maximum number of edges: based on experiments and performance data, we can 0085 // pick suitable values resulting in optimum cacheline aligned values. The 0086 // preferred values are based on 64-bit systems where we aim to align this 0087 // class onto 64 bytes, i.e.: 6 = 64 bytes, 14 = 128 bytes, etc. 0088 // TODO(b/192061034): experiment with alternative sizes. 0089 static constexpr size_t kMaxCapacity = 6; 0090 0091 // Reasonable maximum height of the btree. We can expect a fill ratio of at 0092 // least 50%: trees are always expanded at the front or back. Concatenating 0093 // trees will then typically fold at the top most node, where the lower nodes 0094 // are at least at capacity on one side of joined inputs. At a lower fill 0095 // rate of 4 edges per node, we have capacity for ~16 million leaf nodes. 0096 // We will fail / abort if an application ever exceeds this height, which 0097 // should be extremely rare (near impossible) and be an indication of an 0098 // application error: we do not assume it reasonable for any application to 0099 // operate correctly with such monster trees. 0100 // Another compelling reason for the number `12` is that any contextual stack 0101 // required for navigation or insertion requires 12 words and 12 bytes, which 0102 // fits inside 2 cache lines with some room to spare, and is reasonable as a 0103 // local stack variable compared to Cord's current near 400 bytes stack use. 0104 // The maximum `height` value of a node is then `kMaxDepth - 1` as node height 0105 // values start with a value of 0 for leaf nodes. 0106 static constexpr size_t kMaxDepth = 12; 0107 // See comments on height() for why this is an int and not a size_t. 0108 static constexpr int kMaxHeight = static_cast<int>(kMaxDepth - 1); 0109 0110 // `Action` defines the action for unwinding changes done at the btree's leaf 0111 // level that need to be propagated up to the parent node(s). Each operation 0112 // on a node has an effect / action defined as follows: 0113 // - kSelf 0114 // The operation (add / update, etc) was performed directly on the node as 0115 // the node is private to the current thread (i.e.: not shared directly or 0116 // indirectly through a refcount > 1). Changes can be propagated directly to 0117 // all parent nodes as all parent nodes are also then private to the current 0118 // thread. 0119 // - kCopied 0120 // The operation (add / update, etc) was performed on a copy of the original 0121 // node, as the node is (potentially) directly or indirectly shared with 0122 // other threads. Changes need to be propagated into the parent nodes where 0123 // the old down pointer must be unreffed and replaced with this new copy. 0124 // Such changes to parent nodes may themselves require a copy if the parent 0125 // node is also shared. A kCopied action can propagate all the way to the 0126 // top node where we then must unref the `tree` input provided by the 0127 // caller, and return the new copy. 0128 // - kPopped 0129 // The operation (typically add) could not be satisfied due to insufficient 0130 // capacity in the targeted node, and a new 'leg' was created that needs to 0131 // be added into the parent node. For example, adding a FLAT inside a leaf 0132 // node that is at capacity will create a new leaf node containing that 0133 // FLAT, that needs to be 'popped' up the btree. Such 'pop' actions can 0134 // cascade up the tree if parent nodes are also at capacity. A 'Popped' 0135 // action propagating all the way to the top of the tree will result in 0136 // the tree becoming one level higher than the current tree through a final 0137 // `CordRepBtree::New(tree, popped)` call, resulting in a new top node 0138 // referencing the old tree and the new (fully popped upwards) 'leg'. 0139 enum Action { kSelf, kCopied, kPopped }; 0140 0141 // Result of an operation on a node. See the `Action` enum for details. 0142 struct OpResult { 0143 CordRepBtree* tree; 0144 Action action; 0145 }; 0146 0147 // Return value of the CopyPrefix and CopySuffix methods which can 0148 // return a node or data edge at any height inside the tree. 0149 // A height of 0 defines the lowest (leaf) node, a height of -1 identifies 0150 // `edge` as being a plain data node: EXTERNAL / FLAT or SUBSTRING thereof. 0151 struct CopyResult { 0152 CordRep* edge; 0153 int height; 0154 }; 0155 0156 // Logical position inside a node: 0157 // - index: index of the edge. 0158 // - n: size or offset value depending on context. 0159 struct Position { 0160 size_t index; 0161 size_t n; 0162 }; 0163 0164 // Creates a btree from the given input. Adopts a ref of `rep`. 0165 // If the input `rep` is itself a btree, i.e., `IsBtree()`, then this 0166 // function immediately returns `rep->btree()`. If the input is a valid data 0167 // edge (see IsDataEdge()), then a new leaf node is returned containing `rep` 0168 // as the sole data edge. Else, the input is assumed to be a (legacy) concat 0169 // tree, and the input is consumed and transformed into a btree(). 0170 static CordRepBtree* Create(CordRep* rep); 0171 0172 // Destroys the provided tree. Should only be called by cord internal API's, 0173 // typically after a ref_count.Decrement() on the last reference count. 0174 static void Destroy(CordRepBtree* tree); 0175 0176 // Destruction 0177 static void Delete(CordRepBtree* tree) { delete tree; } 0178 0179 // Use CordRep::Unref() as we overload for absl::Span<CordRep* const>. 0180 using CordRep::Unref; 0181 0182 // Unrefs all edges in `edges` which are assumed to be 'likely one'. 0183 static void Unref(absl::Span<CordRep* const> edges); 0184 0185 // Appends / Prepends an existing CordRep instance to this tree. 0186 // The below methods accept three types of input: 0187 // 1) `rep` is a data node (See `IsDataNode` for valid data edges). 0188 // `rep` is appended or prepended to this tree 'as is'. 0189 // 2) `rep` is a BTREE. 0190 // `rep` is merged into `tree` respecting the Append/Prepend order. 0191 // 3) `rep` is some other (legacy) type. 0192 // `rep` is converted in place and added to `tree` 0193 // Requires `tree` and `rep` to be not null. 0194 static CordRepBtree* Append(CordRepBtree* tree, CordRep* rep); 0195 static CordRepBtree* Prepend(CordRepBtree* tree, CordRep* rep); 0196 0197 // Append/Prepend the data in `data` to this tree. 0198 // The `extra` parameter defines how much extra capacity should be allocated 0199 // for any additional FLAT being allocated. This is an optimization hint from 0200 // the caller. For example, a caller may need to add 2 string_views of data 0201 // "abc" and "defghi" which are not consecutive. The caller can in this case 0202 // invoke `AddData(tree, "abc", 6)`, and any newly added flat is allocated 0203 // where possible with at least 6 bytes of extra capacity beyond `length`. 0204 // This helps avoiding data getting fragmented over multiple flats. 0205 // There is no limit on the size of `data`. If `data` can not be stored inside 0206 // a single flat, then the function will iteratively add flats until all data 0207 // has been consumed and appended or prepended to the tree. 0208 static CordRepBtree* Append(CordRepBtree* tree, string_view data, 0209 size_t extra = 0); 0210 static CordRepBtree* Prepend(CordRepBtree* tree, string_view data, 0211 size_t extra = 0); 0212 0213 // Returns a new tree, containing `n` bytes of data from this instance 0214 // starting at offset `offset`. Where possible, the returned tree shares 0215 // (re-uses) data edges and nodes with this instance to minimize the 0216 // combined memory footprint of both trees. 0217 // Requires `offset + n <= length`. Returns `nullptr` if `n` is zero. 0218 CordRep* SubTree(size_t offset, size_t n); 0219 0220 // Removes `n` trailing bytes from `tree`, and returns the resulting tree 0221 // or data edge. Returns `tree` if n is zero, and nullptr if n == length. 0222 // This function is logically identical to: 0223 // result = tree->SubTree(0, tree->length - n); 0224 // Unref(tree); 0225 // return result; 0226 // However, the actual implementation will as much as possible perform 'in 0227 // place' modifications on the tree on all nodes and edges that are mutable. 0228 // For example, in a fully privately owned tree with the last edge being a 0229 // flat of length 12, RemoveSuffix(1) will simply set the length of that data 0230 // edge to 11, and reduce the length of all nodes on the edge path by 1. 0231 static CordRep* RemoveSuffix(CordRepBtree* tree, size_t n); 0232 0233 // Returns the character at the given offset. 0234 char GetCharacter(size_t offset) const; 0235 0236 // Returns true if this node holds a single data edge, and if so, sets 0237 // `fragment` to reference the contained data. `fragment` is an optional 0238 // output parameter and allowed to be null. 0239 bool IsFlat(absl::string_view* fragment) const; 0240 0241 // Returns true if the data of `n` bytes starting at offset `offset` 0242 // is contained in a single data edge, and if so, sets fragment to reference 0243 // the contained data. `fragment` is an optional output parameter and allowed 0244 // to be null. 0245 bool IsFlat(size_t offset, size_t n, absl::string_view* fragment) const; 0246 0247 // Returns a span (mutable range of bytes) of up to `size` bytes into the 0248 // last FLAT data edge inside this tree under the following conditions: 0249 // - none of the nodes down into the FLAT node are shared. 0250 // - the last data edge in this tree is a non-shared FLAT. 0251 // - the referenced FLAT has additional capacity available. 0252 // If all these conditions are met, a non-empty span is returned, and the 0253 // length of the flat node and involved tree nodes have been increased by 0254 // `span.length()`. The caller is responsible for immediately assigning values 0255 // to all uninitialized data reference by the returned span. 0256 // Requires `this->refcount.IsOne()`: this function forces the caller to do 0257 // this fast path check on the top level node, as this is the most commonly 0258 // shared node of a cord tree. 0259 Span<char> GetAppendBuffer(size_t size); 0260 0261 // Extracts the right-most data edge from this tree iff: 0262 // - the tree and all internal edges to the right-most node are not shared. 0263 // - the right-most node is a FLAT node and not shared. 0264 // - the right-most node has at least the desired extra capacity. 0265 // 0266 // Returns {tree, nullptr} if any of the above conditions are not met. 0267 // This method effectively removes data from the tree. The intent of this 0268 // method is to allow applications appending small string data to use 0269 // pre-existing capacity, and add the modified rep back to the tree. 0270 // 0271 // Simplified such code would look similar to this: 0272 // void MyTreeBuilder::Append(string_view data) { 0273 // ExtractResult result = CordRepBtree::ExtractAppendBuffer(tree_, 1); 0274 // if (CordRep* rep = result.extracted) { 0275 // size_t available = rep->Capacity() - rep->length; 0276 // size_t n = std::min(data.size(), n); 0277 // memcpy(rep->Data(), data.data(), n); 0278 // rep->length += n; 0279 // data.remove_prefix(n); 0280 // if (!result.tree->IsBtree()) { 0281 // tree_ = CordRepBtree::Create(result.tree); 0282 // } 0283 // tree_ = CordRepBtree::Append(tree_, rep); 0284 // } 0285 // ... 0286 // // Remaining edge in `result.tree`. 0287 // } 0288 static ExtractResult ExtractAppendBuffer(CordRepBtree* tree, 0289 size_t extra_capacity = 1); 0290 0291 // Returns the `height` of the tree. The height of a tree is limited to 0292 // kMaxHeight. `height` is implemented as an `int` as in some places we 0293 // use negative (-1) values for 'data edges'. 0294 int height() const { return static_cast<int>(storage[0]); } 0295 0296 // Properties: begin, back, end, front/back boundary indexes. 0297 size_t begin() const { return static_cast<size_t>(storage[1]); } 0298 size_t back() const { return static_cast<size_t>(storage[2]) - 1; } 0299 size_t end() const { return static_cast<size_t>(storage[2]); } 0300 size_t index(EdgeType edge) const { 0301 return edge == kFront ? begin() : back(); 0302 } 0303 0304 // Properties: size and capacity. 0305 // `capacity` contains the current capacity of this instance, where 0306 // `kMaxCapacity` contains the maximum capacity of a btree node. 0307 // For now, `capacity` and `kMaxCapacity` return the same value, but this may 0308 // change in the future if we see benefit in dynamically sizing 'small' nodes 0309 // to 'large' nodes for large data trees. 0310 size_t size() const { return end() - begin(); } 0311 size_t capacity() const { return kMaxCapacity; } 0312 0313 // Edge access 0314 inline CordRep* Edge(size_t index) const; 0315 inline CordRep* Edge(EdgeType edge_type) const; 0316 inline absl::Span<CordRep* const> Edges() const; 0317 inline absl::Span<CordRep* const> Edges(size_t begin, size_t end) const; 0318 0319 // Returns reference to the data edge at `index`. 0320 // Requires this instance to be a leaf node, and `index` to be valid index. 0321 inline absl::string_view Data(size_t index) const; 0322 0323 // Diagnostics: returns true if `tree` is valid and internally consistent. 0324 // If `shallow` is false, then the provided top level node and all child nodes 0325 // below it are recursively checked. If `shallow` is true, only the provided 0326 // node in `tree` and the cumulative length, type and height of the direct 0327 // child nodes of `tree` are checked. The value of `shallow` is ignored if the 0328 // internal `cord_btree_exhaustive_validation` diagnostics variable is true, 0329 // in which case the performed validations works as if `shallow` were false. 0330 // This function is intended for debugging and testing purposes only. 0331 static bool IsValid(const CordRepBtree* tree, bool shallow = false); 0332 0333 // Diagnostics: asserts that the provided tree is valid. 0334 // `AssertValid()` performs a shallow validation by default. `shallow` can be 0335 // set to false in which case an exhaustive validation is performed. This 0336 // function is implemented in terms of calling `IsValid()` and asserting the 0337 // return value to be true. See `IsValid()` for more information. 0338 // This function is intended for debugging and testing purposes only. 0339 static CordRepBtree* AssertValid(CordRepBtree* tree, bool shallow = true); 0340 static const CordRepBtree* AssertValid(const CordRepBtree* tree, 0341 bool shallow = true); 0342 0343 // Diagnostics: dump the contents of this tree to `stream`. 0344 // This function is intended for debugging and testing purposes only. 0345 static void Dump(const CordRep* rep, std::ostream& stream); 0346 static void Dump(const CordRep* rep, absl::string_view label, 0347 std::ostream& stream); 0348 static void Dump(const CordRep* rep, absl::string_view label, 0349 bool include_contents, std::ostream& stream); 0350 0351 // Adds the edge `edge` to this node if possible. `owned` indicates if the 0352 // current node is potentially shared or not with other threads. Returns: 0353 // - {kSelf, <this>} 0354 // The edge was directly added to this node. 0355 // - {kCopied, <node>} 0356 // The edge was added to a copy of this node. 0357 // - {kPopped, New(edge, height())} 0358 // A new leg with the edge was created as this node has no extra capacity. 0359 template <EdgeType edge_type> 0360 inline OpResult AddEdge(bool owned, CordRep* edge, size_t delta); 0361 0362 // Replaces the front or back edge with the provided new edge. Returns: 0363 // - {kSelf, <this>} 0364 // The edge was directly set in this node. The old edge is unreffed. 0365 // - {kCopied, <node>} 0366 // A copy of this node was created with the new edge value. 0367 // In both cases, the function adopts a reference on `edge`. 0368 template <EdgeType edge_type> 0369 OpResult SetEdge(bool owned, CordRep* edge, size_t delta); 0370 0371 // Creates a new empty node at the specified height. 0372 static CordRepBtree* New(int height = 0); 0373 0374 // Creates a new node containing `rep`, with the height being computed 0375 // automatically based on the type of `rep`. 0376 static CordRepBtree* New(CordRep* rep); 0377 0378 // Creates a new node containing both `front` and `back` at height 0379 // `front.height() + 1`. Requires `back.height() == front.height()`. 0380 static CordRepBtree* New(CordRepBtree* front, CordRepBtree* back); 0381 0382 // Creates a fully balanced tree from the provided tree by rebuilding a new 0383 // tree from all data edges in the input. This function is automatically 0384 // invoked internally when the tree exceeds the maximum height. 0385 static CordRepBtree* Rebuild(CordRepBtree* tree); 0386 0387 private: 0388 CordRepBtree() = default; 0389 ~CordRepBtree() = default; 0390 0391 // Initializes the main properties `tag`, `begin`, `end`, `height`. 0392 inline void InitInstance(int height, size_t begin = 0, size_t end = 0); 0393 0394 // Direct property access begin / end 0395 void set_begin(size_t begin) { storage[1] = static_cast<uint8_t>(begin); } 0396 void set_end(size_t end) { storage[2] = static_cast<uint8_t>(end); } 0397 0398 // Decreases the value of `begin` by `n`, and returns the new value. Notice 0399 // how this returns the new value unlike atomic::fetch_add which returns the 0400 // old value. This is because this is used to prepend edges at 'begin - 1'. 0401 size_t sub_fetch_begin(size_t n) { 0402 storage[1] -= static_cast<uint8_t>(n); 0403 return storage[1]; 0404 } 0405 0406 // Increases the value of `end` by `n`, and returns the previous value. This 0407 // function is typically used to append edges at 'end'. 0408 size_t fetch_add_end(size_t n) { 0409 const uint8_t current = storage[2]; 0410 storage[2] = static_cast<uint8_t>(current + n); 0411 return current; 0412 } 0413 0414 // Returns the index of the last edge starting on, or before `offset`, with 0415 // `n` containing the relative offset of `offset` inside that edge. 0416 // Requires `offset` < length. 0417 Position IndexOf(size_t offset) const; 0418 0419 // Returns the index of the last edge starting before `offset`, with `n` 0420 // containing the relative offset of `offset` inside that edge. 0421 // This function is useful to find the edges for some span of bytes ending at 0422 // `offset` (i.e., `n` bytes). For example: 0423 // 0424 // Position pos = IndexBefore(n) 0425 // edges = Edges(begin(), pos.index) // All full edges (may be empty) 0426 // last = Sub(Edge(pos.index), 0, pos.n) // Last partial edge (may be empty) 0427 // 0428 // Requires 0 < `offset` <= length. 0429 Position IndexBefore(size_t offset) const; 0430 0431 // Returns the index of the edge ending at (or on) length `length`, and the 0432 // number of bytes inside that edge up to `length`. For example, if we have a 0433 // Node with 2 edges, one of 10 and one of 20 long, then IndexOfLength(27) 0434 // will return {1, 17}, and IndexOfLength(10) will return {0, 10}. 0435 Position IndexOfLength(size_t n) const; 0436 0437 // Identical to the above function except starting from the position `front`. 0438 // This function is equivalent to `IndexBefore(front.n + offset)`, with 0439 // the difference that this function is optimized to start at `front.index`. 0440 Position IndexBefore(Position front, size_t offset) const; 0441 0442 // Returns the index of the edge directly beyond the edge containing offset 0443 // `offset`, with `n` containing the distance of that edge from `offset`. 0444 // This function is useful for iteratively finding suffix nodes and remaining 0445 // partial bytes in left-most suffix nodes as for example in CopySuffix. 0446 // Requires `offset` < length. 0447 Position IndexBeyond(size_t offset) const; 0448 0449 // Creates a new leaf node containing as much data as possible from `data`. 0450 // The data is added either forwards or reversed depending on `edge_type`. 0451 // Callers must check the length of the returned node to determine if all data 0452 // was copied or not. 0453 // See the `Append/Prepend` function for the meaning and purpose of `extra`. 0454 template <EdgeType edge_type> 0455 static CordRepBtree* NewLeaf(absl::string_view data, size_t extra); 0456 0457 // Creates a raw copy of this Btree node with the specified length, copying 0458 // all properties, but without adding any references to existing edges. 0459 CordRepBtree* CopyRaw(size_t new_length) const; 0460 0461 // Creates a full copy of this Btree node, adding a reference on all edges. 0462 CordRepBtree* Copy() const; 0463 0464 // Creates a partial copy of this Btree node, copying all edges up to `end`, 0465 // adding a reference on each copied edge, and sets the length of the newly 0466 // created copy to `new_length`. 0467 CordRepBtree* CopyBeginTo(size_t end, size_t new_length) const; 0468 0469 // Returns a tree containing the edges [tree->begin(), end) and length 0470 // of `new_length`. This method consumes a reference on the provided 0471 // tree, and logically performs the following operation: 0472 // result = tree->CopyBeginTo(end, new_length); 0473 // CordRep::Unref(tree); 0474 // return result; 0475 static CordRepBtree* ConsumeBeginTo(CordRepBtree* tree, size_t end, 0476 size_t new_length); 0477 0478 // Creates a partial copy of this Btree node, copying all edges starting at 0479 // `begin`, adding a reference on each copied edge, and sets the length of 0480 // the newly created copy to `new_length`. 0481 CordRepBtree* CopyToEndFrom(size_t begin, size_t new_length) const; 0482 0483 // Extracts and returns the front edge from the provided tree. 0484 // This method consumes a reference on the provided tree, and logically 0485 // performs the following operation: 0486 // edge = CordRep::Ref(tree->Edge(kFront)); 0487 // CordRep::Unref(tree); 0488 // return edge; 0489 static CordRep* ExtractFront(CordRepBtree* tree); 0490 0491 // Returns a tree containing the result of appending `right` to `left`. 0492 static CordRepBtree* MergeTrees(CordRepBtree* left, CordRepBtree* right); 0493 0494 // Fallback functions for `Create()`, `Append()` and `Prepend()` which 0495 // deal with legacy / non conforming input, i.e.: CONCAT trees. 0496 static CordRepBtree* CreateSlow(CordRep* rep); 0497 static CordRepBtree* AppendSlow(CordRepBtree*, CordRep* rep); 0498 static CordRepBtree* PrependSlow(CordRepBtree*, CordRep* rep); 0499 0500 // Recursively rebuilds `tree` into `stack`. If 'consume` is set to true, the 0501 // function will consume a reference on `tree`. `stack` is a null terminated 0502 // array containing the new tree's state, with the current leaf node at 0503 // stack[0], and parent nodes above that, or null for 'top of tree'. 0504 static void Rebuild(CordRepBtree** stack, CordRepBtree* tree, bool consume); 0505 0506 // Aligns existing edges to start at index 0, to allow for a new edge to be 0507 // added to the back of the current edges. 0508 inline void AlignBegin(); 0509 0510 // Aligns existing edges to end at `capacity`, to allow for a new edge to be 0511 // added in front of the current edges. 0512 inline void AlignEnd(); 0513 0514 // Adds the provided edge to this node. 0515 // Requires this node to have capacity for the edge. Realigns / moves 0516 // existing edges as needed to prepend or append the new edge. 0517 template <EdgeType edge_type> 0518 inline void Add(CordRep* rep); 0519 0520 // Adds the provided edges to this node. 0521 // Requires this node to have capacity for the edges. Realigns / moves 0522 // existing edges as needed to prepend or append the new edges. 0523 template <EdgeType edge_type> 0524 inline void Add(absl::Span<CordRep* const>); 0525 0526 // Adds data from `data` to this node until either all data has been consumed, 0527 // or there is no more capacity for additional flat nodes inside this node. 0528 // Requires the current node to be a leaf node, data to be non empty, and the 0529 // current node to have capacity for at least one more data edge. 0530 // Returns any remaining data from `data` that was not added, which is 0531 // depending on the edge type (front / back) either the remaining prefix of 0532 // suffix of the input. 0533 // See the `Append/Prepend` function for the meaning and purpose of `extra`. 0534 template <EdgeType edge_type> 0535 absl::string_view AddData(absl::string_view data, size_t extra); 0536 0537 // Replace the front or back edge with the provided value. 0538 // Adopts a reference on `edge` and unrefs the old edge. 0539 template <EdgeType edge_type> 0540 inline void SetEdge(CordRep* edge); 0541 0542 // Returns a partial copy of the current tree containing the first `n` bytes 0543 // of data. `CopyResult` contains both the resulting edge and its height. The 0544 // resulting tree may be less high than the current tree, or even be a single 0545 // matching data edge if `allow_folding` is set to true. 0546 // For example, if `n == 1`, then the result will be the single data edge, and 0547 // height will be set to -1 (one below the owning leaf node). If n == 0, this 0548 // function returns null. Requires `n <= length` 0549 CopyResult CopyPrefix(size_t n, bool allow_folding = true); 0550 0551 // Returns a partial copy of the current tree containing all data starting 0552 // after `offset`. `CopyResult` contains both the resulting edge and its 0553 // height. The resulting tree may be less high than the current tree, or even 0554 // be a single matching data edge. For example, if `n == length - 1`, then the 0555 // result will be a single data edge, and height will be set to -1 (one below 0556 // the owning leaf node). 0557 // Requires `offset < length` 0558 CopyResult CopySuffix(size_t offset); 0559 0560 // Returns a OpResult value of {this, kSelf} or {Copy(), kCopied} 0561 // depending on the value of `owned`. 0562 inline OpResult ToOpResult(bool owned); 0563 0564 // Adds `rep` to the specified tree, returning the modified tree. 0565 template <EdgeType edge_type> 0566 static CordRepBtree* AddCordRep(CordRepBtree* tree, CordRep* rep); 0567 0568 // Adds `data` to the specified tree, returning the modified tree. 0569 // See the `Append/Prepend` function for the meaning and purpose of `extra`. 0570 template <EdgeType edge_type> 0571 static CordRepBtree* AddData(CordRepBtree* tree, absl::string_view data, 0572 size_t extra = 0); 0573 0574 // Merges `src` into `dst` with `src` being added either before (kFront) or 0575 // after (kBack) `dst`. Requires the height of `dst` to be greater than or 0576 // equal to the height of `src`. 0577 template <EdgeType edge_type> 0578 static CordRepBtree* Merge(CordRepBtree* dst, CordRepBtree* src); 0579 0580 // Fallback version of GetAppendBuffer for large trees: GetAppendBuffer() 0581 // implements an inlined version for trees of limited height (3 levels), 0582 // GetAppendBufferSlow implements the logic for large trees. 0583 Span<char> GetAppendBufferSlow(size_t size); 0584 0585 // `edges_` contains all edges starting from this instance. 0586 // These are explicitly `child` edges only, a cord btree (or any cord tree in 0587 // that respect) does not store `parent` pointers anywhere: multiple trees / 0588 // parents can reference the same shared child edge. The type of these edges 0589 // depends on the height of the node. `Leaf nodes` (height == 0) contain `data 0590 // edges` (external or flat nodes, or sub-strings thereof). All other nodes 0591 // (height > 0) contain pointers to BTREE nodes with a height of `height - 1`. 0592 CordRep* edges_[kMaxCapacity]; 0593 0594 friend class CordRepBtreeTestPeer; 0595 friend class CordRepBtreeNavigator; 0596 }; 0597 0598 inline CordRepBtree* CordRep::btree() { 0599 assert(IsBtree()); 0600 return static_cast<CordRepBtree*>(this); 0601 } 0602 0603 inline const CordRepBtree* CordRep::btree() const { 0604 assert(IsBtree()); 0605 return static_cast<const CordRepBtree*>(this); 0606 } 0607 0608 inline void CordRepBtree::InitInstance(int height, size_t begin, size_t end) { 0609 tag = BTREE; 0610 storage[0] = static_cast<uint8_t>(height); 0611 storage[1] = static_cast<uint8_t>(begin); 0612 storage[2] = static_cast<uint8_t>(end); 0613 } 0614 0615 inline CordRep* CordRepBtree::Edge(size_t index) const { 0616 assert(index >= begin()); 0617 assert(index < end()); 0618 return edges_[index]; 0619 } 0620 0621 inline CordRep* CordRepBtree::Edge(EdgeType edge_type) const { 0622 return edges_[edge_type == kFront ? begin() : back()]; 0623 } 0624 0625 inline absl::Span<CordRep* const> CordRepBtree::Edges() const { 0626 return {edges_ + begin(), size()}; 0627 } 0628 0629 inline absl::Span<CordRep* const> CordRepBtree::Edges(size_t begin, 0630 size_t end) const { 0631 assert(begin <= end); 0632 assert(begin >= this->begin()); 0633 assert(end <= this->end()); 0634 return {edges_ + begin, static_cast<size_t>(end - begin)}; 0635 } 0636 0637 inline absl::string_view CordRepBtree::Data(size_t index) const { 0638 assert(height() == 0); 0639 return EdgeData(Edge(index)); 0640 } 0641 0642 inline CordRepBtree* CordRepBtree::New(int height) { 0643 CordRepBtree* tree = new CordRepBtree; 0644 tree->length = 0; 0645 tree->InitInstance(height); 0646 return tree; 0647 } 0648 0649 inline CordRepBtree* CordRepBtree::New(CordRep* rep) { 0650 CordRepBtree* tree = new CordRepBtree; 0651 int height = rep->IsBtree() ? rep->btree()->height() + 1 : 0; 0652 tree->length = rep->length; 0653 tree->InitInstance(height, /*begin=*/0, /*end=*/1); 0654 tree->edges_[0] = rep; 0655 return tree; 0656 } 0657 0658 inline CordRepBtree* CordRepBtree::New(CordRepBtree* front, 0659 CordRepBtree* back) { 0660 assert(front->height() == back->height()); 0661 CordRepBtree* tree = new CordRepBtree; 0662 tree->length = front->length + back->length; 0663 tree->InitInstance(front->height() + 1, /*begin=*/0, /*end=*/2); 0664 tree->edges_[0] = front; 0665 tree->edges_[1] = back; 0666 return tree; 0667 } 0668 0669 inline void CordRepBtree::Unref(absl::Span<CordRep* const> edges) { 0670 for (CordRep* edge : edges) { 0671 if (ABSL_PREDICT_FALSE(!edge->refcount.Decrement())) { 0672 CordRep::Destroy(edge); 0673 } 0674 } 0675 } 0676 0677 inline CordRepBtree* CordRepBtree::CopyRaw(size_t new_length) const { 0678 CordRepBtree* tree = new CordRepBtree; 0679 0680 // `length` and `refcount` are the first members of `CordRepBtree`. 0681 // We initialize `length` using the given length, have `refcount` be set to 0682 // ref = 1 through its default constructor, and copy all data beyond 0683 // 'refcount' which starts with `tag` using a single memcpy: all contents 0684 // except `refcount` is trivially copyable, and the compiler does not 0685 // efficiently coalesce member-wise copy of these members. 0686 // See https://gcc.godbolt.org/z/qY8zsca6z 0687 // # LINT.IfChange(copy_raw) 0688 tree->length = new_length; 0689 uint8_t* dst = &tree->tag; 0690 const uint8_t* src = &tag; 0691 const ptrdiff_t offset = src - reinterpret_cast<const uint8_t*>(this); 0692 memcpy(dst, src, sizeof(CordRepBtree) - static_cast<size_t>(offset)); 0693 return tree; 0694 // # LINT.ThenChange() 0695 } 0696 0697 inline CordRepBtree* CordRepBtree::Copy() const { 0698 CordRepBtree* tree = CopyRaw(length); 0699 for (CordRep* rep : Edges()) CordRep::Ref(rep); 0700 return tree; 0701 } 0702 0703 inline CordRepBtree* CordRepBtree::CopyToEndFrom(size_t begin, 0704 size_t new_length) const { 0705 assert(begin >= this->begin()); 0706 assert(begin <= this->end()); 0707 CordRepBtree* tree = CopyRaw(new_length); 0708 tree->set_begin(begin); 0709 for (CordRep* edge : tree->Edges()) CordRep::Ref(edge); 0710 return tree; 0711 } 0712 0713 inline CordRepBtree* CordRepBtree::CopyBeginTo(size_t end, 0714 size_t new_length) const { 0715 assert(end <= capacity()); 0716 assert(end >= this->begin()); 0717 CordRepBtree* tree = CopyRaw(new_length); 0718 tree->set_end(end); 0719 for (CordRep* edge : tree->Edges()) CordRep::Ref(edge); 0720 return tree; 0721 } 0722 0723 inline void CordRepBtree::AlignBegin() { 0724 // The below code itself does not need to be fast as typically we have 0725 // mono-directional append/prepend calls, and `begin` / `end` are typically 0726 // adjusted no more than once. But we want to avoid potential register clobber 0727 // effects, making the compiler emit register save/store/spills, and minimize 0728 // the size of code. 0729 const size_t delta = begin(); 0730 if (ABSL_PREDICT_FALSE(delta != 0)) { 0731 const size_t new_end = end() - delta; 0732 set_begin(0); 0733 set_end(new_end); 0734 // TODO(mvels): we can write this using 2 loads / 2 stores depending on 0735 // total size for the kMaxCapacity = 6 case. I.e., we can branch (switch) on 0736 // size, and then do overlapping load/store of up to 4 pointers (inlined as 0737 // XMM, YMM or ZMM load/store) and up to 2 pointers (XMM / YMM), which is a) 0738 // compact and b) not clobbering any registers. 0739 ABSL_ASSUME(new_end <= kMaxCapacity); 0740 #ifdef __clang__ 0741 #pragma unroll 1 0742 #endif 0743 for (size_t i = 0; i < new_end; ++i) { 0744 edges_[i] = edges_[i + delta]; 0745 } 0746 } 0747 } 0748 0749 inline void CordRepBtree::AlignEnd() { 0750 // See comments in `AlignBegin` for motivation on the hand-rolled for loops. 0751 const size_t delta = capacity() - end(); 0752 if (delta != 0) { 0753 const size_t new_begin = begin() + delta; 0754 const size_t new_end = end() + delta; 0755 set_begin(new_begin); 0756 set_end(new_end); 0757 ABSL_ASSUME(new_end <= kMaxCapacity); 0758 #ifdef __clang__ 0759 #pragma unroll 1 0760 #endif 0761 for (size_t i = new_end - 1; i >= new_begin; --i) { 0762 edges_[i] = edges_[i - delta]; 0763 } 0764 } 0765 } 0766 0767 template <> 0768 inline void CordRepBtree::Add<CordRepBtree::kBack>(CordRep* rep) { 0769 AlignBegin(); 0770 edges_[fetch_add_end(1)] = rep; 0771 } 0772 0773 template <> 0774 inline void CordRepBtree::Add<CordRepBtree::kBack>( 0775 absl::Span<CordRep* const> edges) { 0776 AlignBegin(); 0777 size_t new_end = end(); 0778 for (CordRep* edge : edges) edges_[new_end++] = edge; 0779 set_end(new_end); 0780 } 0781 0782 template <> 0783 inline void CordRepBtree::Add<CordRepBtree::kFront>(CordRep* rep) { 0784 AlignEnd(); 0785 edges_[sub_fetch_begin(1)] = rep; 0786 } 0787 0788 template <> 0789 inline void CordRepBtree::Add<CordRepBtree::kFront>( 0790 absl::Span<CordRep* const> edges) { 0791 AlignEnd(); 0792 size_t new_begin = begin() - edges.size(); 0793 set_begin(new_begin); 0794 for (CordRep* edge : edges) edges_[new_begin++] = edge; 0795 } 0796 0797 template <CordRepBtree::EdgeType edge_type> 0798 inline void CordRepBtree::SetEdge(CordRep* edge) { 0799 const int idx = edge_type == kFront ? begin() : back(); 0800 CordRep::Unref(edges_[idx]); 0801 edges_[idx] = edge; 0802 } 0803 0804 inline CordRepBtree::OpResult CordRepBtree::ToOpResult(bool owned) { 0805 return owned ? OpResult{this, kSelf} : OpResult{Copy(), kCopied}; 0806 } 0807 0808 inline CordRepBtree::Position CordRepBtree::IndexOf(size_t offset) const { 0809 assert(offset < length); 0810 size_t index = begin(); 0811 while (offset >= edges_[index]->length) offset -= edges_[index++]->length; 0812 return {index, offset}; 0813 } 0814 0815 inline CordRepBtree::Position CordRepBtree::IndexBefore(size_t offset) const { 0816 assert(offset > 0); 0817 assert(offset <= length); 0818 size_t index = begin(); 0819 while (offset > edges_[index]->length) offset -= edges_[index++]->length; 0820 return {index, offset}; 0821 } 0822 0823 inline CordRepBtree::Position CordRepBtree::IndexBefore(Position front, 0824 size_t offset) const { 0825 size_t index = front.index; 0826 offset = offset + front.n; 0827 while (offset > edges_[index]->length) offset -= edges_[index++]->length; 0828 return {index, offset}; 0829 } 0830 0831 inline CordRepBtree::Position CordRepBtree::IndexOfLength(size_t n) const { 0832 assert(n <= length); 0833 size_t index = back(); 0834 size_t strip = length - n; 0835 while (strip >= edges_[index]->length) strip -= edges_[index--]->length; 0836 return {index, edges_[index]->length - strip}; 0837 } 0838 0839 inline CordRepBtree::Position CordRepBtree::IndexBeyond( 0840 const size_t offset) const { 0841 // We need to find the edge which `starting offset` is beyond (>=)`offset`. 0842 // For this we can't use the `offset -= length` logic of IndexOf. Instead, we 0843 // track the offset of the `current edge` in `off`, which we increase as we 0844 // iterate over the edges until we find the matching edge. 0845 size_t off = 0; 0846 size_t index = begin(); 0847 while (offset > off) off += edges_[index++]->length; 0848 return {index, off - offset}; 0849 } 0850 0851 inline CordRepBtree* CordRepBtree::Create(CordRep* rep) { 0852 if (IsDataEdge(rep)) return New(rep); 0853 return CreateSlow(rep); 0854 } 0855 0856 inline Span<char> CordRepBtree::GetAppendBuffer(size_t size) { 0857 assert(refcount.IsOne()); 0858 CordRepBtree* tree = this; 0859 const int height = this->height(); 0860 CordRepBtree* n1 = tree; 0861 CordRepBtree* n2 = tree; 0862 CordRepBtree* n3 = tree; 0863 switch (height) { 0864 case 3: 0865 tree = tree->Edge(kBack)->btree(); 0866 if (!tree->refcount.IsOne()) return {}; 0867 n2 = tree; 0868 ABSL_FALLTHROUGH_INTENDED; 0869 case 2: 0870 tree = tree->Edge(kBack)->btree(); 0871 if (!tree->refcount.IsOne()) return {}; 0872 n1 = tree; 0873 ABSL_FALLTHROUGH_INTENDED; 0874 case 1: 0875 tree = tree->Edge(kBack)->btree(); 0876 if (!tree->refcount.IsOne()) return {}; 0877 ABSL_FALLTHROUGH_INTENDED; 0878 case 0: 0879 CordRep* edge = tree->Edge(kBack); 0880 if (!edge->refcount.IsOne()) return {}; 0881 if (edge->tag < FLAT) return {}; 0882 size_t avail = edge->flat()->Capacity() - edge->length; 0883 if (avail == 0) return {}; 0884 size_t delta = (std::min)(size, avail); 0885 Span<char> span = {edge->flat()->Data() + edge->length, delta}; 0886 edge->length += delta; 0887 switch (height) { 0888 case 3: 0889 n3->length += delta; 0890 ABSL_FALLTHROUGH_INTENDED; 0891 case 2: 0892 n2->length += delta; 0893 ABSL_FALLTHROUGH_INTENDED; 0894 case 1: 0895 n1->length += delta; 0896 ABSL_FALLTHROUGH_INTENDED; 0897 case 0: 0898 tree->length += delta; 0899 return span; 0900 } 0901 break; 0902 } 0903 return GetAppendBufferSlow(size); 0904 } 0905 0906 extern template CordRepBtree* CordRepBtree::AddCordRep<CordRepBtree::kBack>( 0907 CordRepBtree* tree, CordRep* rep); 0908 0909 extern template CordRepBtree* CordRepBtree::AddCordRep<CordRepBtree::kFront>( 0910 CordRepBtree* tree, CordRep* rep); 0911 0912 inline CordRepBtree* CordRepBtree::Append(CordRepBtree* tree, CordRep* rep) { 0913 if (ABSL_PREDICT_TRUE(IsDataEdge(rep))) { 0914 return CordRepBtree::AddCordRep<kBack>(tree, rep); 0915 } 0916 return AppendSlow(tree, rep); 0917 } 0918 0919 inline CordRepBtree* CordRepBtree::Prepend(CordRepBtree* tree, CordRep* rep) { 0920 if (ABSL_PREDICT_TRUE(IsDataEdge(rep))) { 0921 return CordRepBtree::AddCordRep<kFront>(tree, rep); 0922 } 0923 return PrependSlow(tree, rep); 0924 } 0925 0926 #ifdef NDEBUG 0927 0928 inline CordRepBtree* CordRepBtree::AssertValid(CordRepBtree* tree, 0929 bool /* shallow */) { 0930 return tree; 0931 } 0932 0933 inline const CordRepBtree* CordRepBtree::AssertValid(const CordRepBtree* tree, 0934 bool /* shallow */) { 0935 return tree; 0936 } 0937 0938 #endif 0939 0940 } // namespace cord_internal 0941 ABSL_NAMESPACE_END 0942 } // namespace absl 0943 0944 #endif // ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |