Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2024-11-15 09:01:13

0001 // Copyright 2021 The Abseil Authors
0002 //
0003 // Licensed under the Apache License, Version 2.0 (the "License");
0004 // you may not use this file except in compliance with the License.
0005 // You may obtain a copy of the License at
0006 //
0007 //     https://www.apache.org/licenses/LICENSE-2.0
0008 //
0009 // Unless required by applicable law or agreed to in writing, software
0010 // distributed under the License is distributed on an "AS IS" BASIS,
0011 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0012 // See the License for the specific language governing permissions and
0013 // limitations under the License.
0014 
0015 #ifndef ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_
0016 #define ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_
0017 
0018 #include <cassert>
0019 #include <cstdint>
0020 #include <iosfwd>
0021 
0022 #include "absl/base/config.h"
0023 #include "absl/base/internal/raw_logging.h"
0024 #include "absl/base/optimization.h"
0025 #include "absl/strings/internal/cord_data_edge.h"
0026 #include "absl/strings/internal/cord_internal.h"
0027 #include "absl/strings/internal/cord_rep_flat.h"
0028 #include "absl/strings/string_view.h"
0029 #include "absl/types/span.h"
0030 
0031 namespace absl {
0032 ABSL_NAMESPACE_BEGIN
0033 namespace cord_internal {
0034 
0035 // `SetCordBtreeExhaustiveValidation()` can be set to force exhaustive
0036 // validation in debug assertions, and code that calls `IsValid()`
0037 // explicitly. By default, assertions should be relatively cheap and
0038 // AssertValid() can easily lead to O(n^2) complexity as recursive / full tree
0039 // validation is O(n).
0040 void SetCordBtreeExhaustiveValidation(bool do_exaustive_validation);
0041 bool IsCordBtreeExhaustiveValidationEnabled();
0042 
0043 class CordRepBtreeNavigator;
0044 
0045 // CordRepBtree is as the name implies a btree implementation of a Cordrep tree.
0046 // Data is stored at the leaf level only, non leaf nodes contain down pointers
0047 // only. Allowed types of data edges are FLAT, EXTERNAL and SUBSTRINGs of FLAT
0048 // or EXTERNAL nodes. The implementation allows for data to be added to either
0049 // end of the tree only, it does not provide any 'insert' logic. This has the
0050 // benefit that we can expect good fill ratios: all nodes except the outer
0051 // 'legs' will have 100% fill ratios for trees built using Append/Prepend
0052 // methods. Merged trees will typically have a fill ratio well above 50% as in a
0053 // similar fashion, one side of the merged tree will typically have a 100% fill
0054 // ratio, and the 'open' end will average 50%. All operations are O(log(n)) or
0055 // better, and the tree never needs balancing.
0056 //
0057 // All methods accepting a CordRep* or CordRepBtree* adopt a reference on that
0058 // input unless explicitly stated otherwise. All functions returning a CordRep*
0059 // or CordRepBtree* instance transfer a reference back to the caller.
0060 // Simplified, callers both 'donate' and 'consume' a reference count on each
0061 // call, simplifying the API. An example of building a tree:
0062 //
0063 //   CordRepBtree* tree = CordRepBtree::Create(MakeFlat("Hello"));
0064 //   tree = CordRepBtree::Append(tree, MakeFlat("world"));
0065 //
0066 // In the above example, all inputs are consumed, making each call affecting
0067 // `tree` reference count neutral. The returned `tree` value can be different
0068 // from the input if the input is shared with other threads, or if the tree
0069 // grows in height, but callers typically never have to concern themselves with
0070 // that and trust that all methods DTRT at all times.
0071 class CordRepBtree : public CordRep {
0072  public:
0073   // EdgeType identifies `front` and `back` enum values.
0074   // Various implementations in CordRepBtree such as `Add` and `Edge` are
0075   // generic and templated on operating on either of the boundary edges.
0076   // For more information on the possible edges contained in a CordRepBtree
0077   // instance see the documentation for `edges_`.
0078   enum class EdgeType { kFront, kBack };
0079 
0080   // Convenience constants into `EdgeType`
0081   static constexpr EdgeType kFront = EdgeType::kFront;
0082   static constexpr EdgeType kBack = EdgeType::kBack;
0083 
0084   // Maximum number of edges: based on experiments and performance data, we can
0085   // pick suitable values resulting in optimum cacheline aligned values. The
0086   // preferred values are based on 64-bit systems where we aim to align this
0087   // class onto 64 bytes, i.e.:  6 = 64 bytes, 14 = 128 bytes, etc.
0088   // TODO(b/192061034): experiment with alternative sizes.
0089   static constexpr size_t kMaxCapacity = 6;
0090 
0091   // Reasonable maximum height of the btree. We can expect a fill ratio of at
0092   // least 50%: trees are always expanded at the front or back. Concatenating
0093   // trees will then typically fold at the top most node, where the lower nodes
0094   // are at least at capacity on one side of joined inputs. At a lower fill
0095   // rate of 4 edges per node, we have capacity for ~16 million leaf nodes.
0096   // We will fail / abort if an application ever exceeds this height, which
0097   // should be extremely rare (near impossible) and be an indication of an
0098   // application error: we do not assume it reasonable for any application to
0099   // operate correctly with such monster trees.
0100   // Another compelling reason for the number `12` is that any contextual stack
0101   // required for navigation or insertion requires 12 words and 12 bytes, which
0102   // fits inside 2 cache lines with some room to spare, and is reasonable as a
0103   // local stack variable compared to Cord's current near 400 bytes stack use.
0104   // The maximum `height` value of a node is then `kMaxDepth - 1` as node height
0105   // values start with a value of 0 for leaf nodes.
0106   static constexpr size_t kMaxDepth = 12;
0107   // See comments on height() for why this is an int and not a size_t.
0108   static constexpr int kMaxHeight = static_cast<int>(kMaxDepth - 1);
0109 
0110   // `Action` defines the action for unwinding changes done at the btree's leaf
0111   // level that need to be propagated up to the parent node(s). Each operation
0112   // on a node has an effect / action defined as follows:
0113   // - kSelf
0114   //   The operation (add / update, etc) was performed directly on the node as
0115   //   the node is private to the current thread (i.e.: not shared directly or
0116   //   indirectly through a refcount > 1). Changes can be propagated directly to
0117   //   all parent nodes as all parent nodes are also then private to the current
0118   //   thread.
0119   // - kCopied
0120   //   The operation (add / update, etc) was performed on a copy of the original
0121   //   node, as the node is (potentially) directly or indirectly shared with
0122   //   other threads. Changes need to be propagated into the parent nodes where
0123   //   the old down pointer must be unreffed and replaced with this new copy.
0124   //   Such changes to parent nodes may themselves require a copy if the parent
0125   //   node is also shared. A kCopied action can propagate all the way to the
0126   //   top node where we then must unref the `tree` input provided by the
0127   //   caller, and return the new copy.
0128   // - kPopped
0129   //   The operation (typically add) could not be satisfied due to insufficient
0130   //   capacity in the targeted node, and a new 'leg' was created that needs to
0131   //   be added into the parent node. For example, adding a FLAT inside a leaf
0132   //   node that is at capacity will create a new leaf node containing that
0133   //   FLAT, that needs to be 'popped' up the btree. Such 'pop' actions can
0134   //   cascade up the tree if parent nodes are also at capacity. A 'Popped'
0135   //   action propagating all the way to the top of the tree will result in
0136   //   the tree becoming one level higher than the current tree through a final
0137   //   `CordRepBtree::New(tree, popped)` call, resulting in a new top node
0138   //   referencing the old tree and the new (fully popped upwards) 'leg'.
0139   enum Action { kSelf, kCopied, kPopped };
0140 
0141   // Result of an operation on a node. See the `Action` enum for details.
0142   struct OpResult {
0143     CordRepBtree* tree;
0144     Action action;
0145   };
0146 
0147   // Return value of the CopyPrefix and CopySuffix methods which can
0148   // return a node or data edge at any height inside the tree.
0149   // A height of 0 defines the lowest (leaf) node, a height of -1 identifies
0150   // `edge` as being a plain data node: EXTERNAL / FLAT or SUBSTRING thereof.
0151   struct CopyResult {
0152     CordRep* edge;
0153     int height;
0154   };
0155 
0156   // Logical position inside a node:
0157   // - index: index of the edge.
0158   // - n: size or offset value depending on context.
0159   struct Position {
0160     size_t index;
0161     size_t n;
0162   };
0163 
0164   // Creates a btree from the given input. Adopts a ref of `rep`.
0165   // If the input `rep` is itself a btree, i.e., `IsBtree()`, then this
0166   // function immediately returns `rep->btree()`. If the input is a valid data
0167   // edge (see IsDataEdge()), then a new leaf node is returned containing `rep`
0168   // as the sole data edge. Else, the input is assumed to be a (legacy) concat
0169   // tree, and the input is consumed and transformed into a btree().
0170   static CordRepBtree* Create(CordRep* rep);
0171 
0172   // Destroys the provided tree. Should only be called by cord internal API's,
0173   // typically after a ref_count.Decrement() on the last reference count.
0174   static void Destroy(CordRepBtree* tree);
0175 
0176   // Destruction
0177   static void Delete(CordRepBtree* tree) { delete tree; }
0178 
0179   // Use CordRep::Unref() as we overload for absl::Span<CordRep* const>.
0180   using CordRep::Unref;
0181 
0182   // Unrefs all edges in `edges` which are assumed to be 'likely one'.
0183   static void Unref(absl::Span<CordRep* const> edges);
0184 
0185   // Appends / Prepends an existing CordRep instance to this tree.
0186   // The below methods accept three types of input:
0187   // 1) `rep` is a data node (See `IsDataNode` for valid data edges).
0188   // `rep` is appended or prepended to this tree 'as is'.
0189   // 2) `rep` is a BTREE.
0190   // `rep` is merged into `tree` respecting the Append/Prepend order.
0191   // 3) `rep` is some other (legacy) type.
0192   // `rep` is converted in place and added to `tree`
0193   // Requires `tree` and `rep` to be not null.
0194   static CordRepBtree* Append(CordRepBtree* tree, CordRep* rep);
0195   static CordRepBtree* Prepend(CordRepBtree* tree, CordRep* rep);
0196 
0197   // Append/Prepend the data in `data` to this tree.
0198   // The `extra` parameter defines how much extra capacity should be allocated
0199   // for any additional FLAT being allocated. This is an optimization hint from
0200   // the caller. For example, a caller may need to add 2 string_views of data
0201   // "abc" and "defghi" which are not consecutive. The caller can in this case
0202   // invoke `AddData(tree, "abc", 6)`, and any newly added flat is allocated
0203   // where possible with at least 6 bytes of extra capacity beyond `length`.
0204   // This helps avoiding data getting fragmented over multiple flats.
0205   // There is no limit on the size of `data`. If `data` can not be stored inside
0206   // a single flat, then the function will iteratively add flats until all data
0207   // has been consumed and appended or prepended to the tree.
0208   static CordRepBtree* Append(CordRepBtree* tree, string_view data,
0209                               size_t extra = 0);
0210   static CordRepBtree* Prepend(CordRepBtree* tree, string_view data,
0211                                size_t extra = 0);
0212 
0213   // Returns a new tree, containing `n` bytes of data from this instance
0214   // starting at offset `offset`. Where possible, the returned tree shares
0215   // (re-uses) data edges and nodes with this instance to minimize the
0216   // combined memory footprint of both trees.
0217   // Requires `offset + n <= length`. Returns `nullptr` if `n` is zero.
0218   CordRep* SubTree(size_t offset, size_t n);
0219 
0220   // Removes `n` trailing bytes from `tree`, and returns the resulting tree
0221   // or data edge. Returns `tree` if n is zero, and nullptr if n == length.
0222   // This function is logically identical to:
0223   //   result = tree->SubTree(0, tree->length - n);
0224   //   Unref(tree);
0225   //   return result;
0226   // However, the actual implementation will as much as possible perform 'in
0227   // place' modifications on the tree on all nodes and edges that are mutable.
0228   // For example, in a fully privately owned tree with the last edge being a
0229   // flat of length 12, RemoveSuffix(1) will simply set the length of that data
0230   // edge to 11, and reduce the length of all nodes on the edge path by 1.
0231   static CordRep* RemoveSuffix(CordRepBtree* tree, size_t n);
0232 
0233   // Returns the character at the given offset.
0234   char GetCharacter(size_t offset) const;
0235 
0236   // Returns true if this node holds a single data edge, and if so, sets
0237   // `fragment` to reference the contained data. `fragment` is an optional
0238   // output parameter and allowed to be null.
0239   bool IsFlat(absl::string_view* fragment) const;
0240 
0241   // Returns true if the data of `n` bytes starting at offset `offset`
0242   // is contained in a single data edge, and if so, sets fragment to reference
0243   // the contained data. `fragment` is an optional output parameter and allowed
0244   // to be null.
0245   bool IsFlat(size_t offset, size_t n, absl::string_view* fragment) const;
0246 
0247   // Returns a span (mutable range of bytes) of up to `size` bytes into the
0248   // last FLAT data edge inside this tree under the following conditions:
0249   // - none of the nodes down into the FLAT node are shared.
0250   // - the last data edge in this tree is a non-shared FLAT.
0251   // - the referenced FLAT has additional capacity available.
0252   // If all these conditions are met, a non-empty span is returned, and the
0253   // length of the flat node and involved tree nodes have been increased by
0254   // `span.length()`. The caller is responsible for immediately assigning values
0255   // to all uninitialized data reference by the returned span.
0256   // Requires `this->refcount.IsOne()`: this function forces the caller to do
0257   // this fast path check on the top level node, as this is the most commonly
0258   // shared node of a cord tree.
0259   Span<char> GetAppendBuffer(size_t size);
0260 
0261   // Extracts the right-most data edge from this tree iff:
0262   // - the tree and all internal edges to the right-most node are not shared.
0263   // - the right-most node is a FLAT node and not shared.
0264   // - the right-most node has at least the desired extra capacity.
0265   //
0266   // Returns {tree, nullptr} if any of the above conditions are not met.
0267   // This method effectively removes data from the tree. The intent of this
0268   // method is to allow applications appending small string data to use
0269   // pre-existing capacity, and add the modified rep back to the tree.
0270   //
0271   // Simplified such code would look similar to this:
0272   //   void MyTreeBuilder::Append(string_view data) {
0273   //     ExtractResult result = CordRepBtree::ExtractAppendBuffer(tree_, 1);
0274   //     if (CordRep* rep = result.extracted) {
0275   //       size_t available = rep->Capacity() - rep->length;
0276   //       size_t n = std::min(data.size(), n);
0277   //       memcpy(rep->Data(), data.data(), n);
0278   //       rep->length += n;
0279   //       data.remove_prefix(n);
0280   //       if (!result.tree->IsBtree()) {
0281   //         tree_ = CordRepBtree::Create(result.tree);
0282   //       }
0283   //       tree_ = CordRepBtree::Append(tree_, rep);
0284   //     }
0285   //     ...
0286   //     // Remaining edge in `result.tree`.
0287   //   }
0288   static ExtractResult ExtractAppendBuffer(CordRepBtree* tree,
0289                                            size_t extra_capacity = 1);
0290 
0291   // Returns the `height` of the tree. The height of a tree is limited to
0292   // kMaxHeight. `height` is implemented as an `int` as in some places we
0293   // use negative (-1) values for 'data edges'.
0294   int height() const { return static_cast<int>(storage[0]); }
0295 
0296   // Properties: begin, back, end, front/back boundary indexes.
0297   size_t begin() const { return static_cast<size_t>(storage[1]); }
0298   size_t back() const { return static_cast<size_t>(storage[2]) - 1; }
0299   size_t end() const { return static_cast<size_t>(storage[2]); }
0300   size_t index(EdgeType edge) const {
0301     return edge == kFront ? begin() : back();
0302   }
0303 
0304   // Properties: size and capacity.
0305   // `capacity` contains the current capacity of this instance, where
0306   // `kMaxCapacity` contains the maximum capacity of a btree node.
0307   // For now, `capacity` and `kMaxCapacity` return the same value, but this may
0308   // change in the future if we see benefit in dynamically sizing 'small' nodes
0309   // to 'large' nodes for large data trees.
0310   size_t size() const { return end() - begin(); }
0311   size_t capacity() const { return kMaxCapacity; }
0312 
0313   // Edge access
0314   inline CordRep* Edge(size_t index) const;
0315   inline CordRep* Edge(EdgeType edge_type) const;
0316   inline absl::Span<CordRep* const> Edges() const;
0317   inline absl::Span<CordRep* const> Edges(size_t begin, size_t end) const;
0318 
0319   // Returns reference to the data edge at `index`.
0320   // Requires this instance to be a leaf node, and `index` to be valid index.
0321   inline absl::string_view Data(size_t index) const;
0322 
0323   // Diagnostics: returns true if `tree` is valid and internally consistent.
0324   // If `shallow` is false, then the provided top level node and all child nodes
0325   // below it are recursively checked. If `shallow` is true, only the provided
0326   // node in `tree` and the cumulative length, type and height of the direct
0327   // child nodes of `tree` are checked. The value of `shallow` is ignored if the
0328   // internal `cord_btree_exhaustive_validation` diagnostics variable is true,
0329   // in which case the performed validations works as if `shallow` were false.
0330   // This function is intended for debugging and testing purposes only.
0331   static bool IsValid(const CordRepBtree* tree, bool shallow = false);
0332 
0333   // Diagnostics: asserts that the provided tree is valid.
0334   // `AssertValid()` performs a shallow validation by default. `shallow` can be
0335   // set to false in which case an exhaustive validation is performed. This
0336   // function is implemented in terms of calling `IsValid()` and asserting the
0337   // return value to be true. See `IsValid()` for more information.
0338   // This function is intended for debugging and testing purposes only.
0339   static CordRepBtree* AssertValid(CordRepBtree* tree, bool shallow = true);
0340   static const CordRepBtree* AssertValid(const CordRepBtree* tree,
0341                                          bool shallow = true);
0342 
0343   // Diagnostics: dump the contents of this tree to `stream`.
0344   // This function is intended for debugging and testing purposes only.
0345   static void Dump(const CordRep* rep, std::ostream& stream);
0346   static void Dump(const CordRep* rep, absl::string_view label,
0347                    std::ostream& stream);
0348   static void Dump(const CordRep* rep, absl::string_view label,
0349                    bool include_contents, std::ostream& stream);
0350 
0351   // Adds the edge `edge` to this node if possible. `owned` indicates if the
0352   // current node is potentially shared or not with other threads. Returns:
0353   // - {kSelf, <this>}
0354   //   The edge was directly added to this node.
0355   // - {kCopied, <node>}
0356   //   The edge was added to a copy of this node.
0357   // - {kPopped, New(edge, height())}
0358   //   A new leg with the edge was created as this node has no extra capacity.
0359   template <EdgeType edge_type>
0360   inline OpResult AddEdge(bool owned, CordRep* edge, size_t delta);
0361 
0362   // Replaces the front or back edge with the provided new edge. Returns:
0363   // - {kSelf, <this>}
0364   //   The edge was directly set in this node. The old edge is unreffed.
0365   // - {kCopied, <node>}
0366   //   A copy of this node was created with the new edge value.
0367   // In both cases, the function adopts a reference on `edge`.
0368   template <EdgeType edge_type>
0369   OpResult SetEdge(bool owned, CordRep* edge, size_t delta);
0370 
0371   // Creates a new empty node at the specified height.
0372   static CordRepBtree* New(int height = 0);
0373 
0374   // Creates a new node containing `rep`, with the height being computed
0375   // automatically based on the type of `rep`.
0376   static CordRepBtree* New(CordRep* rep);
0377 
0378   // Creates a new node containing both `front` and `back` at height
0379   // `front.height() + 1`. Requires `back.height() == front.height()`.
0380   static CordRepBtree* New(CordRepBtree* front, CordRepBtree* back);
0381 
0382   // Creates a fully balanced tree from the provided tree by rebuilding a new
0383   // tree from all data edges in the input. This function is automatically
0384   // invoked internally when the tree exceeds the maximum height.
0385   static CordRepBtree* Rebuild(CordRepBtree* tree);
0386 
0387  private:
0388   CordRepBtree() = default;
0389   ~CordRepBtree() = default;
0390 
0391   // Initializes the main properties `tag`, `begin`, `end`, `height`.
0392   inline void InitInstance(int height, size_t begin = 0, size_t end = 0);
0393 
0394   // Direct property access begin / end
0395   void set_begin(size_t begin) { storage[1] = static_cast<uint8_t>(begin); }
0396   void set_end(size_t end) { storage[2] = static_cast<uint8_t>(end); }
0397 
0398   // Decreases the value of `begin` by `n`, and returns the new value. Notice
0399   // how this returns the new value unlike atomic::fetch_add which returns the
0400   // old value. This is because this is used to prepend edges at 'begin - 1'.
0401   size_t sub_fetch_begin(size_t n) {
0402     storage[1] -= static_cast<uint8_t>(n);
0403     return storage[1];
0404   }
0405 
0406   // Increases the value of `end` by `n`, and returns the previous value. This
0407   // function is typically used to append edges at 'end'.
0408   size_t fetch_add_end(size_t n) {
0409     const uint8_t current = storage[2];
0410     storage[2] = static_cast<uint8_t>(current + n);
0411     return current;
0412   }
0413 
0414   // Returns the index of the last edge starting on, or before `offset`, with
0415   // `n` containing the relative offset of `offset` inside that edge.
0416   // Requires `offset` < length.
0417   Position IndexOf(size_t offset) const;
0418 
0419   // Returns the index of the last edge starting before `offset`, with `n`
0420   // containing the relative offset of `offset` inside that edge.
0421   // This function is useful to find the edges for some span of bytes ending at
0422   // `offset` (i.e., `n` bytes). For example:
0423   //
0424   //   Position pos = IndexBefore(n)
0425   //   edges = Edges(begin(), pos.index)     // All full edges (may be empty)
0426   //   last = Sub(Edge(pos.index), 0, pos.n) // Last partial edge (may be empty)
0427   //
0428   // Requires 0 < `offset` <= length.
0429   Position IndexBefore(size_t offset) const;
0430 
0431   // Returns the index of the edge ending at (or on) length `length`, and the
0432   // number of bytes inside that edge up to `length`. For example, if we have a
0433   // Node with 2 edges, one of 10 and one of 20 long, then IndexOfLength(27)
0434   // will return {1, 17}, and IndexOfLength(10) will return {0, 10}.
0435   Position IndexOfLength(size_t n) const;
0436 
0437   // Identical to the above function except starting from the position `front`.
0438   // This function is equivalent to `IndexBefore(front.n + offset)`, with
0439   // the difference that this function is optimized to start at `front.index`.
0440   Position IndexBefore(Position front, size_t offset) const;
0441 
0442   // Returns the index of the edge directly beyond the edge containing offset
0443   // `offset`, with `n` containing the distance of that edge from `offset`.
0444   // This function is useful for iteratively finding suffix nodes and remaining
0445   // partial bytes in left-most suffix nodes as for example in CopySuffix.
0446   // Requires `offset` < length.
0447   Position IndexBeyond(size_t offset) const;
0448 
0449   // Creates a new leaf node containing as much data as possible from `data`.
0450   // The data is added either forwards or reversed depending on `edge_type`.
0451   // Callers must check the length of the returned node to determine if all data
0452   // was copied or not.
0453   // See the `Append/Prepend` function for the meaning and purpose of `extra`.
0454   template <EdgeType edge_type>
0455   static CordRepBtree* NewLeaf(absl::string_view data, size_t extra);
0456 
0457   // Creates a raw copy of this Btree node with the specified length, copying
0458   // all properties, but without adding any references to existing edges.
0459   CordRepBtree* CopyRaw(size_t new_length) const;
0460 
0461   // Creates a full copy of this Btree node, adding a reference on all edges.
0462   CordRepBtree* Copy() const;
0463 
0464   // Creates a partial copy of this Btree node, copying all edges up to `end`,
0465   // adding a reference on each copied edge, and sets the length of the newly
0466   // created copy to `new_length`.
0467   CordRepBtree* CopyBeginTo(size_t end, size_t new_length) const;
0468 
0469   // Returns a tree containing the edges [tree->begin(), end) and length
0470   // of `new_length`. This method consumes a reference on the provided
0471   // tree, and logically performs the following operation:
0472   //   result = tree->CopyBeginTo(end, new_length);
0473   //   CordRep::Unref(tree);
0474   //   return result;
0475   static CordRepBtree* ConsumeBeginTo(CordRepBtree* tree, size_t end,
0476                                       size_t new_length);
0477 
0478   // Creates a partial copy of this Btree node, copying all edges starting at
0479   // `begin`, adding a reference on each copied edge, and sets the length of
0480   // the newly created copy to `new_length`.
0481   CordRepBtree* CopyToEndFrom(size_t begin, size_t new_length) const;
0482 
0483   // Extracts and returns the front edge from the provided tree.
0484   // This method consumes a reference on the provided tree, and logically
0485   // performs the following operation:
0486   //   edge = CordRep::Ref(tree->Edge(kFront));
0487   //   CordRep::Unref(tree);
0488   //   return edge;
0489   static CordRep* ExtractFront(CordRepBtree* tree);
0490 
0491   // Returns a tree containing the result of appending `right` to `left`.
0492   static CordRepBtree* MergeTrees(CordRepBtree* left, CordRepBtree* right);
0493 
0494   // Fallback functions for `Create()`, `Append()` and `Prepend()` which
0495   // deal with legacy / non conforming input, i.e.: CONCAT trees.
0496   static CordRepBtree* CreateSlow(CordRep* rep);
0497   static CordRepBtree* AppendSlow(CordRepBtree*, CordRep* rep);
0498   static CordRepBtree* PrependSlow(CordRepBtree*, CordRep* rep);
0499 
0500   // Recursively rebuilds `tree` into `stack`. If 'consume` is set to true, the
0501   // function will consume a reference on `tree`. `stack` is a null terminated
0502   // array containing the new tree's state, with the current leaf node at
0503   // stack[0], and parent nodes above that, or null for 'top of tree'.
0504   static void Rebuild(CordRepBtree** stack, CordRepBtree* tree, bool consume);
0505 
0506   // Aligns existing edges to start at index 0, to allow for a new edge to be
0507   // added to the back of the current edges.
0508   inline void AlignBegin();
0509 
0510   // Aligns existing edges to end at `capacity`, to allow for a new edge to be
0511   // added in front of the current edges.
0512   inline void AlignEnd();
0513 
0514   // Adds the provided edge to this node.
0515   // Requires this node to have capacity for the edge. Realigns / moves
0516   // existing edges as needed to prepend or append the new edge.
0517   template <EdgeType edge_type>
0518   inline void Add(CordRep* rep);
0519 
0520   // Adds the provided edges to this node.
0521   // Requires this node to have capacity for the edges. Realigns / moves
0522   // existing edges as needed to prepend or append the new edges.
0523   template <EdgeType edge_type>
0524   inline void Add(absl::Span<CordRep* const>);
0525 
0526   // Adds data from `data` to this node until either all data has been consumed,
0527   // or there is no more capacity for additional flat nodes inside this node.
0528   // Requires the current node to be a leaf node, data to be non empty, and the
0529   // current node to have capacity for at least one more data edge.
0530   // Returns any remaining data from `data` that was not added, which is
0531   // depending on the edge type (front / back) either the remaining prefix of
0532   // suffix of the input.
0533   // See the `Append/Prepend` function for the meaning and purpose of `extra`.
0534   template <EdgeType edge_type>
0535   absl::string_view AddData(absl::string_view data, size_t extra);
0536 
0537   // Replace the front or back edge with the provided value.
0538   // Adopts a reference on `edge` and unrefs the old edge.
0539   template <EdgeType edge_type>
0540   inline void SetEdge(CordRep* edge);
0541 
0542   // Returns a partial copy of the current tree containing the first `n` bytes
0543   // of data. `CopyResult` contains both the resulting edge and its height. The
0544   // resulting tree may be less high than the current tree, or even be a single
0545   // matching data edge if `allow_folding` is set to true.
0546   // For example, if `n == 1`, then the result will be the single data edge, and
0547   // height will be set to -1 (one below the owning leaf node). If n == 0, this
0548   // function returns null. Requires `n <= length`
0549   CopyResult CopyPrefix(size_t n, bool allow_folding = true);
0550 
0551   // Returns a partial copy of the current tree containing all data starting
0552   // after `offset`. `CopyResult` contains both the resulting edge and its
0553   // height. The resulting tree may be less high than the current tree, or even
0554   // be a single matching data edge. For example, if `n == length - 1`, then the
0555   // result will be a single data edge, and height will be set to -1 (one below
0556   // the owning leaf node).
0557   // Requires `offset < length`
0558   CopyResult CopySuffix(size_t offset);
0559 
0560   // Returns a OpResult value of {this, kSelf} or {Copy(), kCopied}
0561   // depending on the value of `owned`.
0562   inline OpResult ToOpResult(bool owned);
0563 
0564   // Adds `rep` to the specified tree, returning the modified tree.
0565   template <EdgeType edge_type>
0566   static CordRepBtree* AddCordRep(CordRepBtree* tree, CordRep* rep);
0567 
0568   // Adds `data` to the specified tree, returning the modified tree.
0569   // See the `Append/Prepend` function for the meaning and purpose of `extra`.
0570   template <EdgeType edge_type>
0571   static CordRepBtree* AddData(CordRepBtree* tree, absl::string_view data,
0572                                size_t extra = 0);
0573 
0574   // Merges `src` into `dst` with `src` being added either before (kFront) or
0575   // after (kBack) `dst`. Requires the height of `dst` to be greater than or
0576   // equal to the height of `src`.
0577   template <EdgeType edge_type>
0578   static CordRepBtree* Merge(CordRepBtree* dst, CordRepBtree* src);
0579 
0580   // Fallback version of GetAppendBuffer for large trees: GetAppendBuffer()
0581   // implements an inlined version for trees of limited height (3 levels),
0582   // GetAppendBufferSlow implements the logic for large trees.
0583   Span<char> GetAppendBufferSlow(size_t size);
0584 
0585   // `edges_` contains all edges starting from this instance.
0586   // These are explicitly `child` edges only, a cord btree (or any cord tree in
0587   // that respect) does not store `parent` pointers anywhere: multiple trees /
0588   // parents can reference the same shared child edge. The type of these edges
0589   // depends on the height of the node. `Leaf nodes` (height == 0) contain `data
0590   // edges` (external or flat nodes, or sub-strings thereof). All other nodes
0591   // (height > 0) contain pointers to BTREE nodes with a height of `height - 1`.
0592   CordRep* edges_[kMaxCapacity];
0593 
0594   friend class CordRepBtreeTestPeer;
0595   friend class CordRepBtreeNavigator;
0596 };
0597 
0598 inline CordRepBtree* CordRep::btree() {
0599   assert(IsBtree());
0600   return static_cast<CordRepBtree*>(this);
0601 }
0602 
0603 inline const CordRepBtree* CordRep::btree() const {
0604   assert(IsBtree());
0605   return static_cast<const CordRepBtree*>(this);
0606 }
0607 
0608 inline void CordRepBtree::InitInstance(int height, size_t begin, size_t end) {
0609   tag = BTREE;
0610   storage[0] = static_cast<uint8_t>(height);
0611   storage[1] = static_cast<uint8_t>(begin);
0612   storage[2] = static_cast<uint8_t>(end);
0613 }
0614 
0615 inline CordRep* CordRepBtree::Edge(size_t index) const {
0616   assert(index >= begin());
0617   assert(index < end());
0618   return edges_[index];
0619 }
0620 
0621 inline CordRep* CordRepBtree::Edge(EdgeType edge_type) const {
0622   return edges_[edge_type == kFront ? begin() : back()];
0623 }
0624 
0625 inline absl::Span<CordRep* const> CordRepBtree::Edges() const {
0626   return {edges_ + begin(), size()};
0627 }
0628 
0629 inline absl::Span<CordRep* const> CordRepBtree::Edges(size_t begin,
0630                                                       size_t end) const {
0631   assert(begin <= end);
0632   assert(begin >= this->begin());
0633   assert(end <= this->end());
0634   return {edges_ + begin, static_cast<size_t>(end - begin)};
0635 }
0636 
0637 inline absl::string_view CordRepBtree::Data(size_t index) const {
0638   assert(height() == 0);
0639   return EdgeData(Edge(index));
0640 }
0641 
0642 inline CordRepBtree* CordRepBtree::New(int height) {
0643   CordRepBtree* tree = new CordRepBtree;
0644   tree->length = 0;
0645   tree->InitInstance(height);
0646   return tree;
0647 }
0648 
0649 inline CordRepBtree* CordRepBtree::New(CordRep* rep) {
0650   CordRepBtree* tree = new CordRepBtree;
0651   int height = rep->IsBtree() ? rep->btree()->height() + 1 : 0;
0652   tree->length = rep->length;
0653   tree->InitInstance(height, /*begin=*/0, /*end=*/1);
0654   tree->edges_[0] = rep;
0655   return tree;
0656 }
0657 
0658 inline CordRepBtree* CordRepBtree::New(CordRepBtree* front,
0659                                        CordRepBtree* back) {
0660   assert(front->height() == back->height());
0661   CordRepBtree* tree = new CordRepBtree;
0662   tree->length = front->length + back->length;
0663   tree->InitInstance(front->height() + 1, /*begin=*/0, /*end=*/2);
0664   tree->edges_[0] = front;
0665   tree->edges_[1] = back;
0666   return tree;
0667 }
0668 
0669 inline void CordRepBtree::Unref(absl::Span<CordRep* const> edges) {
0670   for (CordRep* edge : edges) {
0671     if (ABSL_PREDICT_FALSE(!edge->refcount.Decrement())) {
0672       CordRep::Destroy(edge);
0673     }
0674   }
0675 }
0676 
0677 inline CordRepBtree* CordRepBtree::CopyRaw(size_t new_length) const {
0678   CordRepBtree* tree = new CordRepBtree;
0679 
0680   // `length` and `refcount` are the first members of `CordRepBtree`.
0681   // We initialize `length` using the given length, have `refcount` be set to
0682   // ref = 1 through its default constructor, and copy all data beyond
0683   // 'refcount' which starts with `tag` using a single memcpy: all contents
0684   // except `refcount` is trivially copyable, and the compiler does not
0685   // efficiently coalesce member-wise copy of these members.
0686   // See https://gcc.godbolt.org/z/qY8zsca6z
0687   // # LINT.IfChange(copy_raw)
0688   tree->length = new_length;
0689   uint8_t* dst = &tree->tag;
0690   const uint8_t* src = &tag;
0691   const ptrdiff_t offset = src - reinterpret_cast<const uint8_t*>(this);
0692   memcpy(dst, src, sizeof(CordRepBtree) - static_cast<size_t>(offset));
0693   return tree;
0694   // # LINT.ThenChange()
0695 }
0696 
0697 inline CordRepBtree* CordRepBtree::Copy() const {
0698   CordRepBtree* tree = CopyRaw(length);
0699   for (CordRep* rep : Edges()) CordRep::Ref(rep);
0700   return tree;
0701 }
0702 
0703 inline CordRepBtree* CordRepBtree::CopyToEndFrom(size_t begin,
0704                                                  size_t new_length) const {
0705   assert(begin >= this->begin());
0706   assert(begin <= this->end());
0707   CordRepBtree* tree = CopyRaw(new_length);
0708   tree->set_begin(begin);
0709   for (CordRep* edge : tree->Edges()) CordRep::Ref(edge);
0710   return tree;
0711 }
0712 
0713 inline CordRepBtree* CordRepBtree::CopyBeginTo(size_t end,
0714                                                size_t new_length) const {
0715   assert(end <= capacity());
0716   assert(end >= this->begin());
0717   CordRepBtree* tree = CopyRaw(new_length);
0718   tree->set_end(end);
0719   for (CordRep* edge : tree->Edges()) CordRep::Ref(edge);
0720   return tree;
0721 }
0722 
0723 inline void CordRepBtree::AlignBegin() {
0724   // The below code itself does not need to be fast as typically we have
0725   // mono-directional append/prepend calls, and `begin` / `end` are typically
0726   // adjusted no more than once. But we want to avoid potential register clobber
0727   // effects, making the compiler emit register save/store/spills, and minimize
0728   // the size of code.
0729   const size_t delta = begin();
0730   if (ABSL_PREDICT_FALSE(delta != 0)) {
0731     const size_t new_end = end() - delta;
0732     set_begin(0);
0733     set_end(new_end);
0734     // TODO(mvels): we can write this using 2 loads / 2 stores depending on
0735     // total size for the kMaxCapacity = 6 case. I.e., we can branch (switch) on
0736     // size, and then do overlapping load/store of up to 4 pointers (inlined as
0737     // XMM, YMM or ZMM load/store) and up to 2 pointers (XMM / YMM), which is a)
0738     // compact and b) not clobbering any registers.
0739     ABSL_ASSUME(new_end <= kMaxCapacity);
0740 #ifdef __clang__
0741 #pragma unroll 1
0742 #endif
0743     for (size_t i = 0; i < new_end; ++i) {
0744       edges_[i] = edges_[i + delta];
0745     }
0746   }
0747 }
0748 
0749 inline void CordRepBtree::AlignEnd() {
0750   // See comments in `AlignBegin` for motivation on the hand-rolled for loops.
0751   const size_t delta = capacity() - end();
0752   if (delta != 0) {
0753     const size_t new_begin = begin() + delta;
0754     const size_t new_end = end() + delta;
0755     set_begin(new_begin);
0756     set_end(new_end);
0757     ABSL_ASSUME(new_end <= kMaxCapacity);
0758 #ifdef __clang__
0759 #pragma unroll 1
0760 #endif
0761     for (size_t i = new_end - 1; i >= new_begin; --i) {
0762       edges_[i] = edges_[i - delta];
0763     }
0764   }
0765 }
0766 
0767 template <>
0768 inline void CordRepBtree::Add<CordRepBtree::kBack>(CordRep* rep) {
0769   AlignBegin();
0770   edges_[fetch_add_end(1)] = rep;
0771 }
0772 
0773 template <>
0774 inline void CordRepBtree::Add<CordRepBtree::kBack>(
0775     absl::Span<CordRep* const> edges) {
0776   AlignBegin();
0777   size_t new_end = end();
0778   for (CordRep* edge : edges) edges_[new_end++] = edge;
0779   set_end(new_end);
0780 }
0781 
0782 template <>
0783 inline void CordRepBtree::Add<CordRepBtree::kFront>(CordRep* rep) {
0784   AlignEnd();
0785   edges_[sub_fetch_begin(1)] = rep;
0786 }
0787 
0788 template <>
0789 inline void CordRepBtree::Add<CordRepBtree::kFront>(
0790     absl::Span<CordRep* const> edges) {
0791   AlignEnd();
0792   size_t new_begin = begin() - edges.size();
0793   set_begin(new_begin);
0794   for (CordRep* edge : edges) edges_[new_begin++] = edge;
0795 }
0796 
0797 template <CordRepBtree::EdgeType edge_type>
0798 inline void CordRepBtree::SetEdge(CordRep* edge) {
0799   const int idx = edge_type == kFront ? begin() : back();
0800   CordRep::Unref(edges_[idx]);
0801   edges_[idx] = edge;
0802 }
0803 
0804 inline CordRepBtree::OpResult CordRepBtree::ToOpResult(bool owned) {
0805   return owned ? OpResult{this, kSelf} : OpResult{Copy(), kCopied};
0806 }
0807 
0808 inline CordRepBtree::Position CordRepBtree::IndexOf(size_t offset) const {
0809   assert(offset < length);
0810   size_t index = begin();
0811   while (offset >= edges_[index]->length) offset -= edges_[index++]->length;
0812   return {index, offset};
0813 }
0814 
0815 inline CordRepBtree::Position CordRepBtree::IndexBefore(size_t offset) const {
0816   assert(offset > 0);
0817   assert(offset <= length);
0818   size_t index = begin();
0819   while (offset > edges_[index]->length) offset -= edges_[index++]->length;
0820   return {index, offset};
0821 }
0822 
0823 inline CordRepBtree::Position CordRepBtree::IndexBefore(Position front,
0824                                                         size_t offset) const {
0825   size_t index = front.index;
0826   offset = offset + front.n;
0827   while (offset > edges_[index]->length) offset -= edges_[index++]->length;
0828   return {index, offset};
0829 }
0830 
0831 inline CordRepBtree::Position CordRepBtree::IndexOfLength(size_t n) const {
0832   assert(n <= length);
0833   size_t index = back();
0834   size_t strip = length - n;
0835   while (strip >= edges_[index]->length) strip -= edges_[index--]->length;
0836   return {index, edges_[index]->length - strip};
0837 }
0838 
0839 inline CordRepBtree::Position CordRepBtree::IndexBeyond(
0840     const size_t offset) const {
0841   // We need to find the edge which `starting offset` is beyond (>=)`offset`.
0842   // For this we can't use the `offset -= length` logic of IndexOf. Instead, we
0843   // track the offset of the `current edge` in `off`, which we increase as we
0844   // iterate over the edges until we find the matching edge.
0845   size_t off = 0;
0846   size_t index = begin();
0847   while (offset > off) off += edges_[index++]->length;
0848   return {index, off - offset};
0849 }
0850 
0851 inline CordRepBtree* CordRepBtree::Create(CordRep* rep) {
0852   if (IsDataEdge(rep)) return New(rep);
0853   return CreateSlow(rep);
0854 }
0855 
0856 inline Span<char> CordRepBtree::GetAppendBuffer(size_t size) {
0857   assert(refcount.IsOne());
0858   CordRepBtree* tree = this;
0859   const int height = this->height();
0860   CordRepBtree* n1 = tree;
0861   CordRepBtree* n2 = tree;
0862   CordRepBtree* n3 = tree;
0863   switch (height) {
0864     case 3:
0865       tree = tree->Edge(kBack)->btree();
0866       if (!tree->refcount.IsOne()) return {};
0867       n2 = tree;
0868       ABSL_FALLTHROUGH_INTENDED;
0869     case 2:
0870       tree = tree->Edge(kBack)->btree();
0871       if (!tree->refcount.IsOne()) return {};
0872       n1 = tree;
0873       ABSL_FALLTHROUGH_INTENDED;
0874     case 1:
0875       tree = tree->Edge(kBack)->btree();
0876       if (!tree->refcount.IsOne()) return {};
0877       ABSL_FALLTHROUGH_INTENDED;
0878     case 0:
0879       CordRep* edge = tree->Edge(kBack);
0880       if (!edge->refcount.IsOne()) return {};
0881       if (edge->tag < FLAT) return {};
0882       size_t avail = edge->flat()->Capacity() - edge->length;
0883       if (avail == 0) return {};
0884       size_t delta = (std::min)(size, avail);
0885       Span<char> span = {edge->flat()->Data() + edge->length, delta};
0886       edge->length += delta;
0887       switch (height) {
0888         case 3:
0889           n3->length += delta;
0890           ABSL_FALLTHROUGH_INTENDED;
0891         case 2:
0892           n2->length += delta;
0893           ABSL_FALLTHROUGH_INTENDED;
0894         case 1:
0895           n1->length += delta;
0896           ABSL_FALLTHROUGH_INTENDED;
0897         case 0:
0898           tree->length += delta;
0899           return span;
0900       }
0901       break;
0902   }
0903   return GetAppendBufferSlow(size);
0904 }
0905 
0906 extern template CordRepBtree* CordRepBtree::AddCordRep<CordRepBtree::kBack>(
0907     CordRepBtree* tree, CordRep* rep);
0908 
0909 extern template CordRepBtree* CordRepBtree::AddCordRep<CordRepBtree::kFront>(
0910     CordRepBtree* tree, CordRep* rep);
0911 
0912 inline CordRepBtree* CordRepBtree::Append(CordRepBtree* tree, CordRep* rep) {
0913   if (ABSL_PREDICT_TRUE(IsDataEdge(rep))) {
0914     return CordRepBtree::AddCordRep<kBack>(tree, rep);
0915   }
0916   return AppendSlow(tree, rep);
0917 }
0918 
0919 inline CordRepBtree* CordRepBtree::Prepend(CordRepBtree* tree, CordRep* rep) {
0920   if (ABSL_PREDICT_TRUE(IsDataEdge(rep))) {
0921     return CordRepBtree::AddCordRep<kFront>(tree, rep);
0922   }
0923   return PrependSlow(tree, rep);
0924 }
0925 
0926 #ifdef NDEBUG
0927 
0928 inline CordRepBtree* CordRepBtree::AssertValid(CordRepBtree* tree,
0929                                                bool /* shallow */) {
0930   return tree;
0931 }
0932 
0933 inline const CordRepBtree* CordRepBtree::AssertValid(const CordRepBtree* tree,
0934                                                      bool /* shallow */) {
0935   return tree;
0936 }
0937 
0938 #endif
0939 
0940 }  // namespace cord_internal
0941 ABSL_NAMESPACE_END
0942 }  // namespace absl
0943 
0944 #endif  // ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_