Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-05-10 08:36:50

0001 //== clang/Basic/Sarif.h - SARIF Diagnostics Object Model -------*- C++ -*--==//
0002 //
0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
0004 // See https://llvm.org/LICENSE.txt for license information.
0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0006 //
0007 //===----------------------------------------------------------------------===//
0008 /// \file
0009 /// Defines clang::SarifDocumentWriter, clang::SarifRule, clang::SarifResult.
0010 ///
0011 /// The document built can be accessed as a JSON Object.
0012 /// Several value semantic types are also introduced which represent properties
0013 /// of the SARIF standard, such as 'artifact', 'result', 'rule'.
0014 ///
0015 /// A SARIF (Static Analysis Results Interchange Format) document is JSON
0016 /// document that describes in detail the results of running static analysis
0017 /// tools on a project. Each (non-trivial) document consists of at least one
0018 /// "run", which are themselves composed of details such as:
0019 /// * Tool: The tool that was run
0020 /// * Rules: The rules applied during the tool run, represented by
0021 ///   \c reportingDescriptor objects in SARIF
0022 /// * Results: The matches for the rules applied against the project(s) being
0023 ///   evaluated, represented by \c result objects in SARIF
0024 ///
0025 /// Reference:
0026 /// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html">The SARIF standard</a>
0027 /// 2. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317836">SARIF<pre>reportingDescriptor</pre></a>
0028 /// 3. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317638">SARIF<pre>result</pre></a>
0029 //===----------------------------------------------------------------------===//
0030 
0031 #ifndef LLVM_CLANG_BASIC_SARIF_H
0032 #define LLVM_CLANG_BASIC_SARIF_H
0033 
0034 #include "clang/Basic/SourceLocation.h"
0035 #include "clang/Basic/Version.h"
0036 #include "llvm/ADT/ArrayRef.h"
0037 #include "llvm/ADT/SmallVector.h"
0038 #include "llvm/ADT/StringMap.h"
0039 #include "llvm/ADT/StringRef.h"
0040 #include "llvm/Support/JSON.h"
0041 #include <cassert>
0042 #include <cstddef>
0043 #include <cstdint>
0044 #include <initializer_list>
0045 #include <optional>
0046 #include <string>
0047 
0048 namespace clang {
0049 
0050 class SarifDocumentWriter;
0051 class SourceManager;
0052 
0053 namespace detail {
0054 
0055 /// \internal
0056 /// An artifact location is SARIF's way of describing the complete location
0057 /// of an artifact encountered during analysis. The \c artifactLocation object
0058 /// typically consists of a URI, and/or an index to reference the artifact it
0059 /// locates.
0060 ///
0061 /// This builder makes an additional assumption: that every artifact encountered
0062 /// by \c clang will be a physical, top-level artifact. Which is why the static
0063 /// creation method \ref SarifArtifactLocation::create takes a mandatory URI
0064 /// parameter. The official standard states that either a \c URI or \c Index
0065 /// must be available in the object, \c clang picks the \c URI as a reasonable
0066 /// default, because it intends to deal in physical artifacts for now.
0067 ///
0068 /// Reference:
0069 /// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317427">artifactLocation object</a>
0070 /// 2. \ref SarifArtifact
0071 class SarifArtifactLocation {
0072 private:
0073   friend class clang::SarifDocumentWriter;
0074 
0075   std::optional<uint32_t> Index;
0076   std::string URI;
0077 
0078   SarifArtifactLocation() = delete;
0079   explicit SarifArtifactLocation(const std::string &URI) : URI(URI) {}
0080 
0081 public:
0082   static SarifArtifactLocation create(llvm::StringRef URI) {
0083     return SarifArtifactLocation{URI.str()};
0084   }
0085 
0086   SarifArtifactLocation setIndex(uint32_t Idx) {
0087     Index = Idx;
0088     return *this;
0089   }
0090 };
0091 
0092 /// \internal
0093 /// An artifact in SARIF is any object (a sequence of bytes) addressable by
0094 /// a URI (RFC 3986). The most common type of artifact for clang's use-case
0095 /// would be source files. SARIF's artifact object is described in detail in
0096 /// section 3.24.
0097 //
0098 /// Since every clang artifact MUST have a location (there being no nested
0099 /// artifacts), the creation method \ref SarifArtifact::create requires a
0100 /// \ref SarifArtifactLocation object.
0101 ///
0102 /// Reference:
0103 /// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317611">artifact object</a>
0104 class SarifArtifact {
0105 private:
0106   friend class clang::SarifDocumentWriter;
0107 
0108   std::optional<uint32_t> Offset;
0109   std::optional<size_t> Length;
0110   std::string MimeType;
0111   SarifArtifactLocation Location;
0112   llvm::SmallVector<std::string, 4> Roles;
0113 
0114   SarifArtifact() = delete;
0115 
0116   explicit SarifArtifact(const SarifArtifactLocation &Loc) : Location(Loc) {}
0117 
0118 public:
0119   static SarifArtifact create(const SarifArtifactLocation &Loc) {
0120     return SarifArtifact{Loc};
0121   }
0122 
0123   SarifArtifact setOffset(uint32_t ArtifactOffset) {
0124     Offset = ArtifactOffset;
0125     return *this;
0126   }
0127 
0128   SarifArtifact setLength(size_t NumBytes) {
0129     Length = NumBytes;
0130     return *this;
0131   }
0132 
0133   SarifArtifact setRoles(std::initializer_list<llvm::StringRef> ArtifactRoles) {
0134     Roles.assign(ArtifactRoles.begin(), ArtifactRoles.end());
0135     return *this;
0136   }
0137 
0138   SarifArtifact setMimeType(llvm::StringRef ArtifactMimeType) {
0139     MimeType = ArtifactMimeType.str();
0140     return *this;
0141   }
0142 };
0143 
0144 } // namespace detail
0145 
0146 enum class ThreadFlowImportance { Important, Essential, Unimportant };
0147 
0148 /// The level of severity associated with a \ref SarifResult.
0149 ///
0150 /// Of all the levels, \c None is the only one that is not associated with
0151 /// a failure.
0152 ///
0153 /// A typical mapping for clang's DiagnosticKind to SarifResultLevel would look
0154 /// like:
0155 /// * \c None: \ref clang::DiagnosticsEngine::Level::Remark, \ref clang::DiagnosticsEngine::Level::Ignored
0156 /// * \c Note: \ref clang::DiagnosticsEngine::Level::Note
0157 /// * \c Warning: \ref clang::DiagnosticsEngine::Level::Warning
0158 /// * \c Error could be generated from one of:
0159 ///   - \ref clang::DiagnosticsEngine::Level::Warning with \c -Werror
0160 ///   - \ref clang::DiagnosticsEngine::Level::Error
0161 ///   - \ref clang::DiagnosticsEngine::Level::Fatal when \ref clang::DiagnosticsEngine::ErrorsAsFatal is set.
0162 ///
0163 /// Reference:
0164 /// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317648">level property</a>
0165 enum class SarifResultLevel { None, Note, Warning, Error };
0166 
0167 /// A thread flow is a sequence of code locations that specify a possible path
0168 /// through a single thread of execution.
0169 /// A thread flow in SARIF is related to a code flow which describes
0170 /// the progress of one or more programs through one or more thread flows.
0171 ///
0172 /// Reference:
0173 /// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317744">threadFlow object</a>
0174 /// 2. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317740">codeFlow object</a>
0175 class ThreadFlow {
0176   friend class SarifDocumentWriter;
0177 
0178   CharSourceRange Range;
0179   ThreadFlowImportance Importance;
0180   std::string Message;
0181 
0182   ThreadFlow() = default;
0183 
0184 public:
0185   static ThreadFlow create() { return {}; }
0186 
0187   ThreadFlow setRange(const CharSourceRange &ItemRange) {
0188     assert(ItemRange.isCharRange() &&
0189            "ThreadFlows require a character granular source range!");
0190     Range = ItemRange;
0191     return *this;
0192   }
0193 
0194   ThreadFlow setImportance(const ThreadFlowImportance &ItemImportance) {
0195     Importance = ItemImportance;
0196     return *this;
0197   }
0198 
0199   ThreadFlow setMessage(llvm::StringRef ItemMessage) {
0200     Message = ItemMessage.str();
0201     return *this;
0202   }
0203 };
0204 
0205 /// A SARIF Reporting Configuration (\c reportingConfiguration) object contains
0206 /// properties for a \ref SarifRule that can be configured at runtime before
0207 /// analysis begins.
0208 ///
0209 /// Reference:
0210 /// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317852">reportingConfiguration object</a>
0211 class SarifReportingConfiguration {
0212   friend class clang::SarifDocumentWriter;
0213 
0214   bool Enabled = true;
0215   SarifResultLevel Level = SarifResultLevel::Warning;
0216   float Rank = -1.0f;
0217 
0218   SarifReportingConfiguration() = default;
0219 
0220 public:
0221   static SarifReportingConfiguration create() { return {}; };
0222 
0223   SarifReportingConfiguration disable() {
0224     Enabled = false;
0225     return *this;
0226   }
0227 
0228   SarifReportingConfiguration enable() {
0229     Enabled = true;
0230     return *this;
0231   }
0232 
0233   SarifReportingConfiguration setLevel(SarifResultLevel TheLevel) {
0234     Level = TheLevel;
0235     return *this;
0236   }
0237 
0238   SarifReportingConfiguration setRank(float TheRank) {
0239     assert(TheRank >= 0.0f && "Rule rank cannot be smaller than 0.0");
0240     assert(TheRank <= 100.0f && "Rule rank cannot be larger than 100.0");
0241     Rank = TheRank;
0242     return *this;
0243   }
0244 };
0245 
0246 /// A SARIF rule (\c reportingDescriptor object) contains information that
0247 /// describes a reporting item generated by a tool. A reporting item is
0248 /// either a result of analysis or notification of a condition encountered by
0249 /// the tool. Rules are arbitrary but are identifiable by a hierarchical
0250 /// rule-id.
0251 ///
0252 /// This builder provides an interface to create SARIF \c reportingDescriptor
0253 /// objects via the \ref SarifRule::create static method.
0254 ///
0255 /// Reference:
0256 /// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317836">reportingDescriptor object</a>
0257 class SarifRule {
0258   friend class clang::SarifDocumentWriter;
0259 
0260   std::string Name;
0261   std::string Id;
0262   std::string Description;
0263   std::string HelpURI;
0264   SarifReportingConfiguration DefaultConfiguration;
0265 
0266   SarifRule() : DefaultConfiguration(SarifReportingConfiguration::create()) {}
0267 
0268 public:
0269   static SarifRule create() { return {}; }
0270 
0271   SarifRule setName(llvm::StringRef RuleName) {
0272     Name = RuleName.str();
0273     return *this;
0274   }
0275 
0276   SarifRule setRuleId(llvm::StringRef RuleId) {
0277     Id = RuleId.str();
0278     return *this;
0279   }
0280 
0281   SarifRule setDescription(llvm::StringRef RuleDesc) {
0282     Description = RuleDesc.str();
0283     return *this;
0284   }
0285 
0286   SarifRule setHelpURI(llvm::StringRef RuleHelpURI) {
0287     HelpURI = RuleHelpURI.str();
0288     return *this;
0289   }
0290 
0291   SarifRule
0292   setDefaultConfiguration(const SarifReportingConfiguration &Configuration) {
0293     DefaultConfiguration = Configuration;
0294     return *this;
0295   }
0296 };
0297 
0298 /// A SARIF result (also called a "reporting item") is a unit of output
0299 /// produced when one of the tool's \c reportingDescriptor encounters a match
0300 /// on the file being analysed by the tool.
0301 ///
0302 /// This builder provides a \ref SarifResult::create static method that can be
0303 /// used to create an empty shell onto which attributes can be added using the
0304 /// \c setX(...) methods.
0305 ///
0306 /// For example:
0307 /// \code{.cpp}
0308 /// SarifResult result = SarifResult::create(...)
0309 ///                         .setRuleId(...)
0310 ///                         .setDiagnosticMessage(...);
0311 /// \endcode
0312 ///
0313 /// Reference:
0314 /// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317638">SARIF<pre>result</pre></a>
0315 class SarifResult {
0316   friend class clang::SarifDocumentWriter;
0317 
0318   // NOTE:
0319   // This type cannot fit all possible indexes representable by JSON, but is
0320   // chosen because it is the largest unsigned type that can be safely
0321   // converted to an \c int64_t.
0322   uint32_t RuleIdx;
0323   std::string RuleId;
0324   std::string DiagnosticMessage;
0325   llvm::SmallVector<CharSourceRange, 8> Locations;
0326   llvm::SmallVector<ThreadFlow, 8> ThreadFlows;
0327   std::optional<SarifResultLevel> LevelOverride;
0328 
0329   SarifResult() = delete;
0330   explicit SarifResult(uint32_t RuleIdx) : RuleIdx(RuleIdx) {}
0331 
0332 public:
0333   static SarifResult create(uint32_t RuleIdx) { return SarifResult{RuleIdx}; }
0334 
0335   SarifResult setIndex(uint32_t Idx) {
0336     RuleIdx = Idx;
0337     return *this;
0338   }
0339 
0340   SarifResult setRuleId(llvm::StringRef Id) {
0341     RuleId = Id.str();
0342     return *this;
0343   }
0344 
0345   SarifResult setDiagnosticMessage(llvm::StringRef Message) {
0346     DiagnosticMessage = Message.str();
0347     return *this;
0348   }
0349 
0350   SarifResult setLocations(llvm::ArrayRef<CharSourceRange> DiagLocs) {
0351 #ifndef NDEBUG
0352     for (const auto &Loc : DiagLocs) {
0353       assert(Loc.isCharRange() &&
0354              "SARIF Results require character granular source ranges!");
0355     }
0356 #endif
0357     Locations.assign(DiagLocs.begin(), DiagLocs.end());
0358     return *this;
0359   }
0360   SarifResult setThreadFlows(llvm::ArrayRef<ThreadFlow> ThreadFlowResults) {
0361     ThreadFlows.assign(ThreadFlowResults.begin(), ThreadFlowResults.end());
0362     return *this;
0363   }
0364 
0365   SarifResult setDiagnosticLevel(const SarifResultLevel &TheLevel) {
0366     LevelOverride = TheLevel;
0367     return *this;
0368   }
0369 };
0370 
0371 /// This class handles creating a valid SARIF document given various input
0372 /// attributes. However, it requires an ordering among certain method calls:
0373 ///
0374 /// 1. Because every SARIF document must contain at least 1 \c run, callers
0375 ///    must ensure that \ref SarifDocumentWriter::createRun is called before
0376 ///    any other methods.
0377 /// 2. If SarifDocumentWriter::endRun is called, callers MUST call
0378 ///    SarifDocumentWriter::createRun, before invoking any of the result
0379 ///    aggregation methods such as SarifDocumentWriter::appendResult etc.
0380 class SarifDocumentWriter {
0381 private:
0382   const llvm::StringRef SchemaURI{
0383       "https://docs.oasis-open.org/sarif/sarif/v2.1.0/cos02/schemas/"
0384       "sarif-schema-2.1.0.json"};
0385   const llvm::StringRef SchemaVersion{"2.1.0"};
0386 
0387   /// \internal
0388   /// Return a pointer to the current tool. Asserts that a run exists.
0389   llvm::json::Object &getCurrentTool();
0390 
0391   /// \internal
0392   /// Checks if there is a run associated with this document.
0393   ///
0394   /// \return true on success
0395   bool hasRun() const;
0396 
0397   /// \internal
0398   /// Reset portions of the internal state so that the document is ready to
0399   /// receive data for a new run.
0400   void reset();
0401 
0402   /// \internal
0403   /// Return a mutable reference to the current run, after asserting it exists.
0404   ///
0405   /// \note It is undefined behavior to call this if a run does not exist in
0406   /// the SARIF document.
0407   llvm::json::Object &getCurrentRun();
0408 
0409   /// Create a code flow object for the given threadflows.
0410   /// See \ref ThreadFlow.
0411   ///
0412   /// \note It is undefined behavior to call this if a run does not exist in
0413   /// the SARIF document.
0414   llvm::json::Object
0415   createCodeFlow(const llvm::ArrayRef<ThreadFlow> ThreadFlows);
0416 
0417   /// Add the given threadflows to the ones this SARIF document knows about.
0418   llvm::json::Array
0419   createThreadFlows(const llvm::ArrayRef<ThreadFlow> ThreadFlows);
0420 
0421   /// Add the given \ref CharSourceRange to the SARIF document as a physical
0422   /// location, with its corresponding artifact.
0423   llvm::json::Object createPhysicalLocation(const CharSourceRange &R);
0424 
0425 public:
0426   SarifDocumentWriter() = delete;
0427 
0428   /// Create a new empty SARIF document with the given source manager.
0429   SarifDocumentWriter(const SourceManager &SourceMgr) : SourceMgr(SourceMgr) {}
0430 
0431   /// Release resources held by this SARIF document.
0432   ~SarifDocumentWriter() = default;
0433 
0434   /// Create a new run with which any upcoming analysis will be associated.
0435   /// Each run requires specifying the tool that is generating reporting items.
0436   void createRun(const llvm::StringRef ShortToolName,
0437                  const llvm::StringRef LongToolName,
0438                  const llvm::StringRef ToolVersion = CLANG_VERSION_STRING);
0439 
0440   /// If there is a current run, end it.
0441   ///
0442   /// This method collects various book-keeping required to clear and close
0443   /// resources associated with the current run, but may also allocate some
0444   /// for the next run.
0445   ///
0446   /// Calling \ref endRun before associating a run through \ref createRun leads
0447   /// to undefined behaviour.
0448   void endRun();
0449 
0450   /// Associate the given rule with the current run.
0451   ///
0452   /// Returns an integer rule index for the created rule that is unique within
0453   /// the current run, which can then be used to create a \ref SarifResult
0454   /// to add to the current run. Note that a rule must exist before being
0455   /// referenced by a result.
0456   ///
0457   /// \pre
0458   /// There must be a run associated with the document, failing to do so will
0459   /// cause undefined behaviour.
0460   size_t createRule(const SarifRule &Rule);
0461 
0462   /// Append a new result to the currently in-flight run.
0463   ///
0464   /// \pre
0465   /// There must be a run associated with the document, failing to do so will
0466   /// cause undefined behaviour.
0467   /// \pre
0468   /// \c RuleIdx used to create the result must correspond to a rule known by
0469   /// the SARIF document. It must be the value returned by a previous call
0470   /// to \ref createRule.
0471   void appendResult(const SarifResult &SarifResult);
0472 
0473   /// Return the SARIF document in its current state.
0474   /// Calling this will trigger a copy of the internal state including all
0475   /// reported diagnostics, resulting in an expensive call.
0476   llvm::json::Object createDocument();
0477 
0478 private:
0479   /// Source Manager to use for the current SARIF document.
0480   const SourceManager &SourceMgr;
0481 
0482   /// Flag to track the state of this document:
0483   /// A closed document is one on which a new runs must be created.
0484   /// This could be a document that is freshly created, or has recently
0485   /// finished writing to a previous run.
0486   bool Closed = true;
0487 
0488   /// A sequence of SARIF runs.
0489   /// Each run object describes a single run of an analysis tool and contains
0490   /// the output of that run.
0491   ///
0492   /// Reference: <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317484">run object</a>
0493   llvm::json::Array Runs;
0494 
0495   /// The list of rules associated with the most recent active run. These are
0496   /// defined using the diagnostics passed to the SarifDocument. Each rule
0497   /// need not be unique through the result set. E.g. there may be several
0498   /// 'syntax' errors throughout code under analysis, each of which has its
0499   /// own specific diagnostic message (and consequently, RuleId). Rules are
0500   /// also known as "reportingDescriptor" objects in SARIF.
0501   ///
0502   /// Reference: <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317556">rules property</a>
0503   llvm::SmallVector<SarifRule, 32> CurrentRules;
0504 
0505   /// The list of artifacts that have been encountered on the most recent active
0506   /// run. An artifact is defined in SARIF as a sequence of bytes addressable
0507   /// by a URI. A common example for clang's case would be files named by
0508   /// filesystem paths.
0509   llvm::StringMap<detail::SarifArtifact> CurrentArtifacts;
0510 };
0511 } // namespace clang
0512 
0513 #endif // LLVM_CLANG_BASIC_SARIF_H