Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-05-10 08:43:46

0001 //===- Markup.h -------------------------------------------------*- C++ -*-===//
0002 //
0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
0004 // See https://llvm.org/LICENSE.txt for license information.
0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0006 //
0007 //===----------------------------------------------------------------------===//
0008 ///
0009 /// \file
0010 /// This file declares the log symbolizer markup data model and parser.
0011 ///
0012 /// See https://llvm.org/docs/SymbolizerMarkupFormat.html
0013 ///
0014 //===----------------------------------------------------------------------===//
0015 
0016 #ifndef LLVM_DEBUGINFO_SYMBOLIZE_MARKUP_H
0017 #define LLVM_DEBUGINFO_SYMBOLIZE_MARKUP_H
0018 
0019 #include "llvm/ADT/SmallVector.h"
0020 #include "llvm/ADT/StringRef.h"
0021 #include "llvm/ADT/StringSet.h"
0022 #include "llvm/Support/Regex.h"
0023 
0024 namespace llvm {
0025 namespace symbolize {
0026 
0027 /// A node of symbolizer markup.
0028 ///
0029 /// If only the Text field is set, this represents a region of text outside a
0030 /// markup element. ANSI SGR control codes are also reported this way; if
0031 /// detected, then the control code will be the entirety of the Text field, and
0032 /// any surrounding text will be reported as preceding and following nodes.
0033 struct MarkupNode {
0034   /// The full text of this node in the input.
0035   StringRef Text;
0036 
0037   /// If this represents an element, the tag. Otherwise, empty.
0038   StringRef Tag;
0039 
0040   /// If this represents an element with fields, a list of the field contents.
0041   /// Otherwise, empty.
0042   SmallVector<StringRef> Fields;
0043 
0044   bool operator==(const MarkupNode &Other) const {
0045     return Text == Other.Text && Tag == Other.Tag && Fields == Other.Fields;
0046   }
0047   bool operator!=(const MarkupNode &Other) const { return !(*this == Other); }
0048 };
0049 
0050 /// Parses a log containing symbolizer markup into a sequence of nodes.
0051 class MarkupParser {
0052 public:
0053   MarkupParser(StringSet<> MultilineTags = {});
0054 
0055   /// Parses an individual \p Line of input.
0056   ///
0057   /// Nodes from the previous parseLine() call that haven't yet been extracted
0058   /// by nextNode() are discarded. The nodes returned by nextNode() may
0059   /// reference the input string, so it must be retained by the caller until the
0060   /// last use.
0061   ///
0062   /// Note that some elements may span multiple lines. If a line ends with the
0063   /// start of one of these elements, then no nodes will be produced until the
0064   /// either the end or something that cannot be part of an element is
0065   /// encountered. This may only occur after multiple calls to parseLine(),
0066   /// corresponding to the lines of the multi-line element.
0067   void parseLine(StringRef Line);
0068 
0069   /// Inform the parser of that the input stream has ended.
0070   ///
0071   /// This allows the parser to finish any deferred processing (e.g., an
0072   /// in-progress multi-line element) and may cause nextNode() to return
0073   /// additional nodes.
0074   void flush();
0075 
0076   /// Returns the next node in the input sequence.
0077   ///
0078   /// Calling nextNode() may invalidate the contents of the node returned by the
0079   /// previous call.
0080   ///
0081   /// \returns the next markup node or std::nullopt if none remain.
0082   std::optional<MarkupNode> nextNode();
0083 
0084   bool isSGR(const MarkupNode &Node) const {
0085     return SGRSyntax.match(Node.Text);
0086   }
0087 
0088 private:
0089   std::optional<MarkupNode> parseElement(StringRef Line);
0090   void parseTextOutsideMarkup(StringRef Text);
0091   std::optional<StringRef> parseMultiLineBegin(StringRef Line);
0092   std::optional<StringRef> parseMultiLineEnd(StringRef Line);
0093 
0094   // Tags of elements that can span multiple lines.
0095   const StringSet<> MultilineTags;
0096 
0097   // Contents of a multi-line element that has finished being parsed. Retained
0098   // to keep returned StringRefs for the contents valid.
0099   std::string FinishedMultiline;
0100 
0101   // Contents of a multi-line element that is still in the process of receiving
0102   // lines.
0103   std::string InProgressMultiline;
0104 
0105   // The line currently being parsed.
0106   StringRef Line;
0107 
0108   // Buffer for nodes parsed from the current line.
0109   SmallVector<MarkupNode> Buffer;
0110 
0111   // Next buffer index to return.
0112   size_t NextIdx;
0113 
0114   // Regular expression matching supported ANSI SGR escape sequences.
0115   const Regex SGRSyntax;
0116 };
0117 
0118 } // end namespace symbolize
0119 } // end namespace llvm
0120 
0121 #endif // LLVM_DEBUGINFO_SYMBOLIZE_MARKUP_H