Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-05-10 08:37:12

0001 //===- Tree.h - structure of the syntax tree ------------------*- C++ -*-=====//
0002 //
0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
0004 // See https://llvm.org/LICENSE.txt for license information.
0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0006 //
0007 //===----------------------------------------------------------------------===//
0008 // Defines the basic structure of the syntax tree. There are two kinds of nodes:
0009 //   - leaf nodes correspond to tokens,
0010 //   - tree nodes correspond to language grammar constructs.
0011 //
0012 // The tree is initially built from an AST. Each node of a newly built tree
0013 // covers a continuous subrange of expanded tokens (i.e. tokens after
0014 // preprocessing), the specific tokens coverered are stored in the leaf nodes of
0015 // a tree. A post-order traversal of a tree will visit leaf nodes in an order
0016 // corresponding the original order of expanded tokens.
0017 //
0018 // This is still work in progress and highly experimental, we leave room for
0019 // ourselves to completely change the design and/or implementation.
0020 //===----------------------------------------------------------------------===//
0021 #ifndef LLVM_CLANG_TOOLING_SYNTAX_TREE_H
0022 #define LLVM_CLANG_TOOLING_SYNTAX_TREE_H
0023 
0024 #include "clang/Basic/TokenKinds.h"
0025 #include "clang/Tooling/Syntax/TokenManager.h"
0026 #include "llvm/ADT/iterator.h"
0027 #include "llvm/Support/Allocator.h"
0028 #include <cstdint>
0029 #include <vector>
0030 
0031 namespace clang {
0032 namespace syntax {
0033 
0034 /// A memory arena for syntax trees.
0035 // FIXME: use BumpPtrAllocator directly.
0036 class Arena {
0037 public:
0038   llvm::BumpPtrAllocator &getAllocator() { return Allocator; }
0039 private:
0040   /// Keeps all the allocated nodes and their intermediate data structures.
0041   llvm::BumpPtrAllocator Allocator;
0042 };
0043 
0044 class Tree;
0045 class TreeBuilder;
0046 class FactoryImpl;
0047 class MutationsImpl;
0048 
0049 enum class NodeKind : uint16_t;
0050 enum class NodeRole : uint8_t;
0051 
0052 /// A node in a syntax tree. Each node is either a Leaf (representing tokens) or
0053 /// a Tree (representing language constructrs).
0054 class Node {
0055 protected:
0056   /// Newly created nodes are detached from a tree, parent and sibling links are
0057   /// set when the node is added as a child to another one.
0058   Node(NodeKind Kind);
0059   /// Nodes are allocated on Arenas; the destructor is never called.
0060   ~Node() = default;
0061 
0062 public:
0063   /// Nodes cannot simply be copied without violating tree invariants.
0064   Node(const Node &) = delete;
0065   Node &operator=(const Node &) = delete;
0066   /// Idiomatically, nodes are allocated on an Arena and never moved.
0067   Node(Node &&) = delete;
0068   Node &operator=(Node &&) = delete;
0069 
0070   NodeKind getKind() const { return static_cast<NodeKind>(Kind); }
0071   NodeRole getRole() const { return static_cast<NodeRole>(Role); }
0072 
0073   /// Whether the node is detached from a tree, i.e. does not have a parent.
0074   bool isDetached() const;
0075   /// Whether the node was created from the AST backed by the source code
0076   /// rather than added later through mutation APIs or created with factory
0077   /// functions.
0078   /// When this flag is true, all subtrees are also original.
0079   /// This flag is set to false on any modifications to the node or any of its
0080   /// subtrees, even if this simply involves swapping existing subtrees.
0081   bool isOriginal() const { return Original; }
0082   /// If this function return false, the tree cannot be modified because there
0083   /// is no reasonable way to produce the corresponding textual replacements.
0084   /// This can happen when the node crosses macro expansion boundaries.
0085   ///
0086   /// Note that even if the node is not modifiable, its child nodes can be
0087   /// modifiable.
0088   bool canModify() const { return CanModify; }
0089 
0090   const Tree *getParent() const { return Parent; }
0091   Tree *getParent() { return Parent; }
0092 
0093   const Node *getNextSibling() const { return NextSibling; }
0094   Node *getNextSibling() { return NextSibling; }
0095   const Node *getPreviousSibling() const { return PreviousSibling; }
0096   Node *getPreviousSibling() { return PreviousSibling; }
0097 
0098   /// Dumps the structure of a subtree. For debugging and testing purposes.
0099   std::string dump(const TokenManager &SM) const;
0100   /// Dumps the tokens forming this subtree.
0101   std::string dumpTokens(const TokenManager &SM) const;
0102 
0103   /// Asserts invariants on this node of the tree and its immediate children.
0104   /// Will not recurse into the subtree. No-op if NDEBUG is set.
0105   void assertInvariants() const;
0106   /// Runs checkInvariants on all nodes in the subtree. No-op if NDEBUG is set.
0107   void assertInvariantsRecursive() const;
0108 
0109 private:
0110   // Tree is allowed to change the Parent link and Role.
0111   friend class Tree;
0112   // TreeBuilder is allowed to set the Original and CanModify flags.
0113   friend class TreeBuilder;
0114   // MutationsImpl sets roles and CanModify flag.
0115   friend class MutationsImpl;
0116   // FactoryImpl sets CanModify flag.
0117   friend class FactoryImpl;
0118 
0119   void setRole(NodeRole NR);
0120 
0121   Tree *Parent;
0122   Node *NextSibling;
0123   Node *PreviousSibling;
0124   unsigned Kind : 16;
0125   unsigned Role : 8;
0126   unsigned Original : 1;
0127   unsigned CanModify : 1;
0128 };
0129 
0130 /// A leaf node points to a single token.
0131 // FIXME: add TokenKind field (borrow some bits from the Node::kind).
0132 class Leaf final : public Node {
0133 public:
0134   Leaf(TokenManager::Key K);
0135   static bool classof(const Node *N);
0136 
0137   TokenManager::Key getTokenKey() const { return K; }
0138 
0139 private:
0140   TokenManager::Key K;
0141 };
0142 
0143 /// A node that has children and represents a syntactic language construct.
0144 class Tree : public Node {
0145   /// Iterator over children (common base for const/non-const).
0146   /// Not invalidated by tree mutations (holds a stable node pointer).
0147   template <typename DerivedT, typename NodeT>
0148   class ChildIteratorBase
0149       : public llvm::iterator_facade_base<DerivedT, std::forward_iterator_tag,
0150                                           NodeT> {
0151   protected:
0152     NodeT *N = nullptr;
0153     using Base = ChildIteratorBase;
0154 
0155   public:
0156     ChildIteratorBase() = default;
0157     explicit ChildIteratorBase(NodeT *N) : N(N) {}
0158 
0159     friend bool operator==(const DerivedT &LHS, const DerivedT &RHS) {
0160       return LHS.N == RHS.N;
0161     }
0162 
0163     NodeT &operator*() const { return *N; }
0164     DerivedT &operator++() {
0165       N = N->getNextSibling();
0166       return *static_cast<DerivedT *>(this);
0167     }
0168 
0169     /// Truthy if valid (not past-the-end).
0170     /// This allows: if (auto It = find_if(N.children(), ...) )
0171     explicit operator bool() const { return N != nullptr; }
0172     /// The element, or nullptr if past-the-end.
0173     NodeT *asPointer() const { return N; }
0174   };
0175 
0176 public:
0177   static bool classof(const Node *N);
0178 
0179   Node *getFirstChild() { return FirstChild; }
0180   const Node *getFirstChild() const { return FirstChild; }
0181   Node *getLastChild() { return LastChild; }
0182   const Node *getLastChild() const { return LastChild; }
0183 
0184   const Leaf *findFirstLeaf() const;
0185   Leaf *findFirstLeaf() {
0186     return const_cast<Leaf *>(const_cast<const Tree *>(this)->findFirstLeaf());
0187   }
0188 
0189   const Leaf *findLastLeaf() const;
0190   Leaf *findLastLeaf() {
0191     return const_cast<Leaf *>(const_cast<const Tree *>(this)->findLastLeaf());
0192   }
0193 
0194   /// child_iterator is not invalidated by mutations.
0195   struct ChildIterator : ChildIteratorBase<ChildIterator, Node> {
0196     using Base::ChildIteratorBase;
0197   };
0198   struct ConstChildIterator
0199       : ChildIteratorBase<ConstChildIterator, const Node> {
0200     using Base::ChildIteratorBase;
0201     ConstChildIterator() = default;
0202     ConstChildIterator(const ChildIterator &I) : Base(I.asPointer()) {}
0203   };
0204 
0205   llvm::iterator_range<ChildIterator> getChildren() {
0206     return {ChildIterator(getFirstChild()), ChildIterator()};
0207   }
0208   llvm::iterator_range<ConstChildIterator> getChildren() const {
0209     return {ConstChildIterator(getFirstChild()), ConstChildIterator()};
0210   }
0211 
0212   /// Find the first node with a corresponding role.
0213   const Node *findChild(NodeRole R) const;
0214   Node *findChild(NodeRole R) {
0215     return const_cast<Node *>(const_cast<const Tree *>(this)->findChild(R));
0216   }
0217 
0218 protected:
0219   using Node::Node;
0220 
0221 private:
0222   /// Append \p Child to the list of children and sets the parent pointer.
0223   /// A very low-level operation that does not check any invariants, only used
0224   /// by TreeBuilder and FactoryImpl.
0225   /// EXPECTS: Role != Detached.
0226   void appendChildLowLevel(Node *Child, NodeRole Role);
0227   /// Similar but prepends.
0228   void prependChildLowLevel(Node *Child, NodeRole Role);
0229 
0230   /// Like the previous overloads, but does not set role for \p Child.
0231   /// EXPECTS: Child->Role != Detached
0232   void appendChildLowLevel(Node *Child);
0233   void prependChildLowLevel(Node *Child);
0234   friend class TreeBuilder;
0235   friend class FactoryImpl;
0236 
0237   /// Replace a range of children [Begin, End) with a list of
0238   /// new nodes starting at \p New.
0239   /// Only used by MutationsImpl to implement higher-level mutation operations.
0240   /// (!) \p New can be null to model removal of the child range.
0241   /// (!) \p End can be null to model one past the end.
0242   /// (!) \p Begin can be null to model an append.
0243   void replaceChildRangeLowLevel(Node *Begin, Node *End, Node *New);
0244   friend class MutationsImpl;
0245 
0246   Node *FirstChild = nullptr;
0247   Node *LastChild = nullptr;
0248 };
0249 
0250 /// A list of Elements separated or terminated by a fixed token.
0251 ///
0252 /// This type models the following grammar construct:
0253 /// delimited-list(element, delimiter, termination, canBeEmpty)
0254 class List : public Tree {
0255 public:
0256   template <typename Element> struct ElementAndDelimiter {
0257     Element *element;
0258     Leaf *delimiter;
0259   };
0260 
0261   enum class TerminationKind {
0262     Terminated,
0263     MaybeTerminated,
0264     Separated,
0265   };
0266 
0267   using Tree::Tree;
0268   static bool classof(const Node *N);
0269   /// Returns the elements and corresponding delimiters. Missing elements
0270   /// and delimiters are represented as null pointers.
0271   ///
0272   /// For example, in a separated list:
0273   /// "a, b, c"  <=> [("a" , ","), ("b" , "," ), ("c" , null)]
0274   /// "a,  , c"  <=> [("a" , ","), (null, "," ), ("c" , null)]
0275   /// "a, b  c"  <=> [("a" , ","), ("b" , null), ("c" , null)]
0276   /// "a, b,"    <=> [("a" , ","), ("b" , "," ), (null, null)]
0277   ///
0278   /// In a terminated or maybe-terminated list:
0279   /// "a; b; c;" <=> [("a" , ";"), ("b" , ";" ), ("c" , ";" )]
0280   /// "a;  ; c;" <=> [("a" , ";"), (null, ";" ), ("c" , ";" )]
0281   /// "a; b  c;" <=> [("a" , ";"), ("b" , null), ("c" , ";" )]
0282   /// "a; b; c"  <=> [("a" , ";"), ("b" , ";" ), ("c" , null)]
0283   std::vector<ElementAndDelimiter<Node>> getElementsAsNodesAndDelimiters();
0284 
0285   /// Returns the elements of the list. Missing elements are represented
0286   /// as null pointers in the same way as in the return value of
0287   /// `getElementsAsNodesAndDelimiters()`.
0288   std::vector<Node *> getElementsAsNodes();
0289 
0290   // These can't be implemented with the information we have!
0291 
0292   /// Returns the appropriate delimiter for this list.
0293   ///
0294   /// Useful for discovering the correct delimiter to use when adding
0295   /// elements to empty or one-element lists.
0296   clang::tok::TokenKind getDelimiterTokenKind() const;
0297 
0298   TerminationKind getTerminationKind() const;
0299 
0300   /// Whether this list can be empty in syntactically and semantically correct
0301   /// code.
0302   ///
0303   /// This list may be empty when the source code has errors even if
0304   /// canBeEmpty() returns false.
0305   bool canBeEmpty() const;
0306 };
0307 
0308 } // namespace syntax
0309 } // namespace clang
0310 
0311 #endif