Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-05-10 08:44:35

0001 //===- YAMLParser.h - Simple YAML parser ------------------------*- C++ -*-===//
0002 //
0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
0004 // See https://llvm.org/LICENSE.txt for license information.
0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0006 //
0007 //===----------------------------------------------------------------------===//
0008 //
0009 //  This is a YAML 1.2 parser.
0010 //
0011 //  See http://www.yaml.org/spec/1.2/spec.html for the full standard.
0012 //
0013 //  This currently does not implement the following:
0014 //    * Tag resolution.
0015 //    * UTF-16.
0016 //    * BOMs anywhere other than the first Unicode scalar value in the file.
0017 //
0018 //  The most important class here is Stream. This represents a YAML stream with
0019 //  0, 1, or many documents.
0020 //
0021 //  SourceMgr sm;
0022 //  StringRef input = getInput();
0023 //  yaml::Stream stream(input, sm);
0024 //
0025 //  for (yaml::document_iterator di = stream.begin(), de = stream.end();
0026 //       di != de; ++di) {
0027 //    yaml::Node *n = di->getRoot();
0028 //    if (n) {
0029 //      // Do something with n...
0030 //    } else
0031 //      break;
0032 //  }
0033 //
0034 //===----------------------------------------------------------------------===//
0035 
0036 #ifndef LLVM_SUPPORT_YAMLPARSER_H
0037 #define LLVM_SUPPORT_YAMLPARSER_H
0038 
0039 #include "llvm/ADT/StringRef.h"
0040 #include "llvm/Support/Allocator.h"
0041 #include "llvm/Support/SMLoc.h"
0042 #include "llvm/Support/SourceMgr.h"
0043 #include <cassert>
0044 #include <cstddef>
0045 #include <iterator>
0046 #include <map>
0047 #include <memory>
0048 #include <optional>
0049 #include <string>
0050 #include <system_error>
0051 
0052 namespace llvm {
0053 
0054 class MemoryBufferRef;
0055 class raw_ostream;
0056 class Twine;
0057 
0058 namespace yaml {
0059 
0060 class Document;
0061 class document_iterator;
0062 class Node;
0063 class Scanner;
0064 struct Token;
0065 
0066 /// Dump all the tokens in this stream to OS.
0067 /// \returns true if there was an error, false otherwise.
0068 bool dumpTokens(StringRef Input, raw_ostream &);
0069 
0070 /// Scans all tokens in input without outputting anything. This is used
0071 ///        for benchmarking the tokenizer.
0072 /// \returns true if there was an error, false otherwise.
0073 bool scanTokens(StringRef Input);
0074 
0075 /// Escape \a Input for a double quoted scalar; if \p EscapePrintable
0076 /// is true, all UTF8 sequences will be escaped, if \p EscapePrintable is
0077 /// false, those UTF8 sequences encoding printable unicode scalars will not be
0078 /// escaped, but emitted verbatim.
0079 std::string escape(StringRef Input, bool EscapePrintable = true);
0080 
0081 /// Parse \p S as a bool according to https://yaml.org/type/bool.html.
0082 std::optional<bool> parseBool(StringRef S);
0083 
0084 /// This class represents a YAML stream potentially containing multiple
0085 ///        documents.
0086 class Stream {
0087 public:
0088   /// This keeps a reference to the string referenced by \p Input.
0089   Stream(StringRef Input, SourceMgr &, bool ShowColors = true,
0090          std::error_code *EC = nullptr);
0091 
0092   Stream(MemoryBufferRef InputBuffer, SourceMgr &, bool ShowColors = true,
0093          std::error_code *EC = nullptr);
0094   ~Stream();
0095 
0096   document_iterator begin();
0097   document_iterator end();
0098   void skip();
0099   bool failed();
0100 
0101   bool validate() {
0102     skip();
0103     return !failed();
0104   }
0105 
0106   void printError(Node *N, const Twine &Msg,
0107                   SourceMgr::DiagKind Kind = SourceMgr::DK_Error);
0108   void printError(const SMRange &Range, const Twine &Msg,
0109                   SourceMgr::DiagKind Kind = SourceMgr::DK_Error);
0110 
0111 private:
0112   friend class Document;
0113 
0114   std::unique_ptr<Scanner> scanner;
0115   std::unique_ptr<Document> CurrentDoc;
0116 };
0117 
0118 /// Abstract base class for all Nodes.
0119 class Node {
0120   virtual void anchor();
0121 
0122 public:
0123   enum NodeKind {
0124     NK_Null,
0125     NK_Scalar,
0126     NK_BlockScalar,
0127     NK_KeyValue,
0128     NK_Mapping,
0129     NK_Sequence,
0130     NK_Alias
0131   };
0132 
0133   Node(unsigned int Type, std::unique_ptr<Document> &, StringRef Anchor,
0134        StringRef Tag);
0135 
0136   // It's not safe to copy YAML nodes; the document is streamed and the position
0137   // is part of the state.
0138   Node(const Node &) = delete;
0139   void operator=(const Node &) = delete;
0140 
0141   void *operator new(size_t Size, BumpPtrAllocator &Alloc,
0142                      size_t Alignment = 16) noexcept {
0143     return Alloc.Allocate(Size, Alignment);
0144   }
0145 
0146   void operator delete(void *Ptr, BumpPtrAllocator &Alloc,
0147                        size_t Size) noexcept {
0148     Alloc.Deallocate(Ptr, Size, 0);
0149   }
0150 
0151   void operator delete(void *) noexcept = delete;
0152 
0153   /// Get the value of the anchor attached to this node. If it does not
0154   ///        have one, getAnchor().size() will be 0.
0155   StringRef getAnchor() const { return Anchor; }
0156 
0157   /// Get the tag as it was written in the document. This does not
0158   ///   perform tag resolution.
0159   StringRef getRawTag() const { return Tag; }
0160 
0161   /// Get the verbatium tag for a given Node. This performs tag resoluton
0162   ///   and substitution.
0163   std::string getVerbatimTag() const;
0164 
0165   SMRange getSourceRange() const { return SourceRange; }
0166   void setSourceRange(SMRange SR) { SourceRange = SR; }
0167 
0168   // These functions forward to Document and Scanner.
0169   Token &peekNext();
0170   Token getNext();
0171   Node *parseBlockNode();
0172   BumpPtrAllocator &getAllocator();
0173   void setError(const Twine &Message, Token &Location) const;
0174   bool failed() const;
0175 
0176   virtual void skip() {}
0177 
0178   unsigned int getType() const { return TypeID; }
0179 
0180 protected:
0181   std::unique_ptr<Document> &Doc;
0182   SMRange SourceRange;
0183 
0184   ~Node() = default;
0185 
0186 private:
0187   unsigned int TypeID;
0188   StringRef Anchor;
0189   /// The tag as typed in the document.
0190   StringRef Tag;
0191 };
0192 
0193 /// A null value.
0194 ///
0195 /// Example:
0196 ///   !!null null
0197 class NullNode final : public Node {
0198   void anchor() override;
0199 
0200 public:
0201   NullNode(std::unique_ptr<Document> &D)
0202       : Node(NK_Null, D, StringRef(), StringRef()) {}
0203 
0204   static bool classof(const Node *N) { return N->getType() == NK_Null; }
0205 };
0206 
0207 /// A scalar node is an opaque datum that can be presented as a
0208 ///        series of zero or more Unicode scalar values.
0209 ///
0210 /// Example:
0211 ///   Adena
0212 class ScalarNode final : public Node {
0213   void anchor() override;
0214 
0215 public:
0216   ScalarNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
0217              StringRef Val)
0218       : Node(NK_Scalar, D, Anchor, Tag), Value(Val) {
0219     SMLoc Start = SMLoc::getFromPointer(Val.begin());
0220     SMLoc End = SMLoc::getFromPointer(Val.end());
0221     SourceRange = SMRange(Start, End);
0222   }
0223 
0224   // Return Value without any escaping or folding or other fun YAML stuff. This
0225   // is the exact bytes that are contained in the file (after conversion to
0226   // utf8).
0227   StringRef getRawValue() const { return Value; }
0228 
0229   /// Gets the value of this node as a StringRef.
0230   ///
0231   /// \param Storage is used to store the content of the returned StringRef if
0232   ///        it requires any modification from how it appeared in the source.
0233   ///        This happens with escaped characters and multi-line literals.
0234   StringRef getValue(SmallVectorImpl<char> &Storage) const;
0235 
0236   static bool classof(const Node *N) {
0237     return N->getType() == NK_Scalar;
0238   }
0239 
0240 private:
0241   StringRef Value;
0242 
0243   StringRef getDoubleQuotedValue(StringRef UnquotedValue,
0244                                  SmallVectorImpl<char> &Storage) const;
0245 
0246   static StringRef getSingleQuotedValue(StringRef RawValue,
0247                                         SmallVectorImpl<char> &Storage);
0248 
0249   static StringRef getPlainValue(StringRef RawValue,
0250                                  SmallVectorImpl<char> &Storage);
0251 };
0252 
0253 /// A block scalar node is an opaque datum that can be presented as a
0254 ///        series of zero or more Unicode scalar values.
0255 ///
0256 /// Example:
0257 ///   |
0258 ///     Hello
0259 ///     World
0260 class BlockScalarNode final : public Node {
0261   void anchor() override;
0262 
0263 public:
0264   BlockScalarNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
0265                   StringRef Value, StringRef RawVal)
0266       : Node(NK_BlockScalar, D, Anchor, Tag), Value(Value) {
0267     SMLoc Start = SMLoc::getFromPointer(RawVal.begin());
0268     SMLoc End = SMLoc::getFromPointer(RawVal.end());
0269     SourceRange = SMRange(Start, End);
0270   }
0271 
0272   /// Gets the value of this node as a StringRef.
0273   StringRef getValue() const { return Value; }
0274 
0275   static bool classof(const Node *N) {
0276     return N->getType() == NK_BlockScalar;
0277   }
0278 
0279 private:
0280   StringRef Value;
0281 };
0282 
0283 /// A key and value pair. While not technically a Node under the YAML
0284 ///        representation graph, it is easier to treat them this way.
0285 ///
0286 /// TODO: Consider making this not a child of Node.
0287 ///
0288 /// Example:
0289 ///   Section: .text
0290 class KeyValueNode final : public Node {
0291   void anchor() override;
0292 
0293 public:
0294   KeyValueNode(std::unique_ptr<Document> &D)
0295       : Node(NK_KeyValue, D, StringRef(), StringRef()) {}
0296 
0297   /// Parse and return the key.
0298   ///
0299   /// This may be called multiple times.
0300   ///
0301   /// \returns The key, or nullptr if failed() == true.
0302   Node *getKey();
0303 
0304   /// Parse and return the value.
0305   ///
0306   /// This may be called multiple times.
0307   ///
0308   /// \returns The value, or nullptr if failed() == true.
0309   Node *getValue();
0310 
0311   void skip() override {
0312     if (Node *Key = getKey()) {
0313       Key->skip();
0314       if (Node *Val = getValue())
0315         Val->skip();
0316     }
0317   }
0318 
0319   static bool classof(const Node *N) {
0320     return N->getType() == NK_KeyValue;
0321   }
0322 
0323 private:
0324   Node *Key = nullptr;
0325   Node *Value = nullptr;
0326 };
0327 
0328 /// This is an iterator abstraction over YAML collections shared by both
0329 ///        sequences and maps.
0330 ///
0331 /// BaseT must have a ValueT* member named CurrentEntry and a member function
0332 /// increment() which must set CurrentEntry to 0 to create an end iterator.
0333 template <class BaseT, class ValueT> class basic_collection_iterator {
0334 public:
0335   using iterator_category = std::input_iterator_tag;
0336   using value_type = ValueT;
0337   using difference_type = std::ptrdiff_t;
0338   using pointer = value_type *;
0339   using reference = value_type &;
0340 
0341   basic_collection_iterator() = default;
0342   basic_collection_iterator(BaseT *B) : Base(B) {}
0343 
0344   ValueT *operator->() const {
0345     assert(Base && Base->CurrentEntry && "Attempted to access end iterator!");
0346     return Base->CurrentEntry;
0347   }
0348 
0349   ValueT &operator*() const {
0350     assert(Base && Base->CurrentEntry &&
0351            "Attempted to dereference end iterator!");
0352     return *Base->CurrentEntry;
0353   }
0354 
0355   operator ValueT *() const {
0356     assert(Base && Base->CurrentEntry && "Attempted to access end iterator!");
0357     return Base->CurrentEntry;
0358   }
0359 
0360   /// Note on EqualityComparable:
0361   ///
0362   /// The iterator is not re-entrant,
0363   /// it is meant to be used for parsing YAML on-demand
0364   /// Once iteration started - it can point only to one entry at a time
0365   /// hence Base.CurrentEntry and Other.Base.CurrentEntry are equal
0366   /// iff Base and Other.Base are equal.
0367   bool operator==(const basic_collection_iterator &Other) const {
0368     if (Base && (Base == Other.Base)) {
0369       assert((Base->CurrentEntry == Other.Base->CurrentEntry)
0370              && "Equal Bases expected to point to equal Entries");
0371     }
0372 
0373     return Base == Other.Base;
0374   }
0375 
0376   bool operator!=(const basic_collection_iterator &Other) const {
0377     return !(Base == Other.Base);
0378   }
0379 
0380   basic_collection_iterator &operator++() {
0381     assert(Base && "Attempted to advance iterator past end!");
0382     Base->increment();
0383     // Create an end iterator.
0384     if (!Base->CurrentEntry)
0385       Base = nullptr;
0386     return *this;
0387   }
0388 
0389 private:
0390   BaseT *Base = nullptr;
0391 };
0392 
0393 // The following two templates are used for both MappingNode and Sequence Node.
0394 template <class CollectionType>
0395 typename CollectionType::iterator begin(CollectionType &C) {
0396   assert(C.IsAtBeginning && "You may only iterate over a collection once!");
0397   C.IsAtBeginning = false;
0398   typename CollectionType::iterator ret(&C);
0399   ++ret;
0400   return ret;
0401 }
0402 
0403 template <class CollectionType> void skip(CollectionType &C) {
0404   // TODO: support skipping from the middle of a parsed collection ;/
0405   assert((C.IsAtBeginning || C.IsAtEnd) && "Cannot skip mid parse!");
0406   if (C.IsAtBeginning)
0407     for (typename CollectionType::iterator i = begin(C), e = C.end(); i != e;
0408          ++i)
0409       i->skip();
0410 }
0411 
0412 /// Represents a YAML map created from either a block map for a flow map.
0413 ///
0414 /// This parses the YAML stream as increment() is called.
0415 ///
0416 /// Example:
0417 ///   Name: _main
0418 ///   Scope: Global
0419 class MappingNode final : public Node {
0420   void anchor() override;
0421 
0422 public:
0423   enum MappingType {
0424     MT_Block,
0425     MT_Flow,
0426     MT_Inline ///< An inline mapping node is used for "[key: value]".
0427   };
0428 
0429   MappingNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
0430               MappingType MT)
0431       : Node(NK_Mapping, D, Anchor, Tag), Type(MT) {}
0432 
0433   friend class basic_collection_iterator<MappingNode, KeyValueNode>;
0434 
0435   using iterator = basic_collection_iterator<MappingNode, KeyValueNode>;
0436 
0437   template <class T> friend typename T::iterator yaml::begin(T &);
0438   template <class T> friend void yaml::skip(T &);
0439 
0440   iterator begin() { return yaml::begin(*this); }
0441 
0442   iterator end() { return iterator(); }
0443 
0444   void skip() override { yaml::skip(*this); }
0445 
0446   static bool classof(const Node *N) {
0447     return N->getType() == NK_Mapping;
0448   }
0449 
0450 private:
0451   MappingType Type;
0452   bool IsAtBeginning = true;
0453   bool IsAtEnd = false;
0454   KeyValueNode *CurrentEntry = nullptr;
0455 
0456   void increment();
0457 };
0458 
0459 /// Represents a YAML sequence created from either a block sequence for a
0460 ///        flow sequence.
0461 ///
0462 /// This parses the YAML stream as increment() is called.
0463 ///
0464 /// Example:
0465 ///   - Hello
0466 ///   - World
0467 class SequenceNode final : public Node {
0468   void anchor() override;
0469 
0470 public:
0471   enum SequenceType {
0472     ST_Block,
0473     ST_Flow,
0474     // Use for:
0475     //
0476     // key:
0477     // - val1
0478     // - val2
0479     //
0480     // As a BlockMappingEntry and BlockEnd are not created in this case.
0481     ST_Indentless
0482   };
0483 
0484   SequenceNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
0485                SequenceType ST)
0486       : Node(NK_Sequence, D, Anchor, Tag), SeqType(ST) {}
0487 
0488   friend class basic_collection_iterator<SequenceNode, Node>;
0489 
0490   using iterator = basic_collection_iterator<SequenceNode, Node>;
0491 
0492   template <class T> friend typename T::iterator yaml::begin(T &);
0493   template <class T> friend void yaml::skip(T &);
0494 
0495   void increment();
0496 
0497   iterator begin() { return yaml::begin(*this); }
0498 
0499   iterator end() { return iterator(); }
0500 
0501   void skip() override { yaml::skip(*this); }
0502 
0503   static bool classof(const Node *N) {
0504     return N->getType() == NK_Sequence;
0505   }
0506 
0507 private:
0508   SequenceType SeqType;
0509   bool IsAtBeginning = true;
0510   bool IsAtEnd = false;
0511   bool WasPreviousTokenFlowEntry = true; // Start with an imaginary ','.
0512   Node *CurrentEntry = nullptr;
0513 };
0514 
0515 /// Represents an alias to a Node with an anchor.
0516 ///
0517 /// Example:
0518 ///   *AnchorName
0519 class AliasNode final : public Node {
0520   void anchor() override;
0521 
0522 public:
0523   AliasNode(std::unique_ptr<Document> &D, StringRef Val)
0524       : Node(NK_Alias, D, StringRef(), StringRef()), Name(Val) {}
0525 
0526   StringRef getName() const { return Name; }
0527 
0528   static bool classof(const Node *N) { return N->getType() == NK_Alias; }
0529 
0530 private:
0531   StringRef Name;
0532 };
0533 
0534 /// A YAML Stream is a sequence of Documents. A document contains a root
0535 ///        node.
0536 class Document {
0537 public:
0538   Document(Stream &ParentStream);
0539 
0540   /// Root for parsing a node. Returns a single node.
0541   Node *parseBlockNode();
0542 
0543   /// Finish parsing the current document and return true if there are
0544   ///        more. Return false otherwise.
0545   bool skip();
0546 
0547   /// Parse and return the root level node.
0548   Node *getRoot() {
0549     if (Root)
0550       return Root;
0551     return Root = parseBlockNode();
0552   }
0553 
0554   const std::map<StringRef, StringRef> &getTagMap() const { return TagMap; }
0555 
0556 private:
0557   friend class Node;
0558   friend class document_iterator;
0559 
0560   /// Stream to read tokens from.
0561   Stream &stream;
0562 
0563   /// Used to allocate nodes to. All are destroyed without calling their
0564   ///        destructor when the document is destroyed.
0565   BumpPtrAllocator NodeAllocator;
0566 
0567   /// The root node. Used to support skipping a partially parsed
0568   ///        document.
0569   Node *Root;
0570 
0571   /// Maps tag prefixes to their expansion.
0572   std::map<StringRef, StringRef> TagMap;
0573 
0574   Token &peekNext();
0575   Token getNext();
0576   void setError(const Twine &Message, Token &Location) const;
0577   bool failed() const;
0578 
0579   /// Parse %BLAH directives and return true if any were encountered.
0580   bool parseDirectives();
0581 
0582   /// Parse %YAML
0583   void parseYAMLDirective();
0584 
0585   /// Parse %TAG
0586   void parseTAGDirective();
0587 
0588   /// Consume the next token and error if it is not \a TK.
0589   bool expectToken(int TK);
0590 };
0591 
0592 /// Iterator abstraction for Documents over a Stream.
0593 class document_iterator {
0594 public:
0595   document_iterator() = default;
0596   document_iterator(std::unique_ptr<Document> &D) : Doc(&D) {}
0597 
0598   bool operator==(const document_iterator &Other) const {
0599     if (isAtEnd() || Other.isAtEnd())
0600       return isAtEnd() && Other.isAtEnd();
0601 
0602     return Doc == Other.Doc;
0603   }
0604   bool operator!=(const document_iterator &Other) const {
0605     return !(*this == Other);
0606   }
0607 
0608   document_iterator operator++() {
0609     assert(Doc && "incrementing iterator past the end.");
0610     if (!(*Doc)->skip()) {
0611       Doc->reset(nullptr);
0612     } else {
0613       Stream &S = (*Doc)->stream;
0614       Doc->reset(new Document(S));
0615     }
0616     return *this;
0617   }
0618 
0619   Document &operator*() { return **Doc; }
0620 
0621   std::unique_ptr<Document> &operator->() { return *Doc; }
0622 
0623 private:
0624   bool isAtEnd() const { return !Doc || !*Doc; }
0625 
0626   std::unique_ptr<Document> *Doc = nullptr;
0627 };
0628 
0629 } // end namespace yaml
0630 
0631 } // end namespace llvm
0632 
0633 #endif // LLVM_SUPPORT_YAMLPARSER_H