Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-05-10 08:36:24

0001 //===- DataflowAnalysis.h ---------------------------------------*- C++ -*-===//
0002 //
0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
0004 // See https://llvm.org/LICENSE.txt for license information.
0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0006 //
0007 //===----------------------------------------------------------------------===//
0008 //
0009 //  This file defines base types and functions for building dataflow analyses
0010 //  that run over Control-Flow Graphs (CFGs).
0011 //
0012 //===----------------------------------------------------------------------===//
0013 
0014 #ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSIS_H
0015 #define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSIS_H
0016 
0017 #include <iterator>
0018 #include <optional>
0019 #include <type_traits>
0020 #include <utility>
0021 #include <vector>
0022 
0023 #include "clang/AST/ASTContext.h"
0024 #include "clang/Analysis/CFG.h"
0025 #include "clang/Analysis/FlowSensitive/AdornedCFG.h"
0026 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
0027 #include "clang/Analysis/FlowSensitive/DataflowLattice.h"
0028 #include "clang/Analysis/FlowSensitive/MatchSwitch.h"
0029 #include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h"
0030 #include "clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h"
0031 #include "llvm/ADT/STLExtras.h"
0032 #include "llvm/ADT/STLFunctionalExtras.h"
0033 #include "llvm/ADT/SmallVector.h"
0034 #include "llvm/Support/Errc.h"
0035 #include "llvm/Support/Error.h"
0036 
0037 namespace clang {
0038 namespace dataflow {
0039 
0040 /// Base class template for dataflow analyses built on a single lattice type.
0041 ///
0042 /// Requirements:
0043 ///
0044 ///  `Derived` must be derived from a specialization of this class template and
0045 ///  must provide the following public members:
0046 ///   * `LatticeT initialElement()` - returns a lattice element that models the
0047 ///     initial state of a basic block;
0048 ///   * `void transfer(const CFGElement &, LatticeT &, Environment &)` - applies
0049 ///     the analysis transfer function for a given CFG element and lattice
0050 ///     element.
0051 ///
0052 ///  `Derived` can optionally provide the following members:
0053 ///  * `void transferBranch(bool Branch, const Stmt *Stmt, TypeErasedLattice &E,
0054 ///                         Environment &Env)` - applies the analysis transfer
0055 ///    function for a given edge from a CFG block of a conditional statement.
0056 ///
0057 ///  `Derived` can optionally override the virtual functions in the
0058 ///  `Environment::ValueModel` interface (which is an indirect base class of
0059 ///  this class).
0060 ///
0061 ///  `LatticeT` is a bounded join-semilattice that is used by `Derived` and must
0062 ///  provide the following public members:
0063 ///   * `LatticeJoinEffect join(const LatticeT &)` - joins the object and the
0064 ///     argument by computing their least upper bound, modifies the object if
0065 ///     necessary, and returns an effect indicating whether any changes were
0066 ///     made to it;
0067 ///     FIXME: make it `static LatticeT join(const LatticeT&, const LatticeT&)`
0068 ///   * `bool operator==(const LatticeT &) const` - returns true if and only if
0069 ///     the object is equal to the argument.
0070 ///
0071 /// `LatticeT` can optionally provide the following members:
0072 ///  * `LatticeJoinEffect widen(const LatticeT &Previous)` - replaces the
0073 ///    lattice element with an  approximation that can reach a fixed point more
0074 ///    quickly than iterated application of the transfer function alone. The
0075 ///    previous value is provided to inform the choice of widened value. The
0076 ///    function must also serve as a comparison operation, by indicating whether
0077 ///    the widened value is equivalent to the previous value with the returned
0078 ///    `LatticeJoinEffect`.
0079 template <typename Derived, typename LatticeT>
0080 class DataflowAnalysis : public TypeErasedDataflowAnalysis {
0081 public:
0082   /// Bounded join-semilattice that is used in the analysis.
0083   using Lattice = LatticeT;
0084 
0085   explicit DataflowAnalysis(ASTContext &Context) : Context(Context) {}
0086 
0087   explicit DataflowAnalysis(ASTContext &Context,
0088                             DataflowAnalysisOptions Options)
0089       : TypeErasedDataflowAnalysis(Options), Context(Context) {}
0090 
0091   ASTContext &getASTContext() final { return Context; }
0092 
0093   TypeErasedLattice typeErasedInitialElement() final {
0094     return {static_cast<Derived *>(this)->initialElement()};
0095   }
0096 
0097   TypeErasedLattice joinTypeErased(const TypeErasedLattice &E1,
0098                                    const TypeErasedLattice &E2) final {
0099     // FIXME: change the signature of join() to avoid copying here.
0100     Lattice L1 = llvm::any_cast<const Lattice &>(E1.Value);
0101     const Lattice &L2 = llvm::any_cast<const Lattice &>(E2.Value);
0102     L1.join(L2);
0103     return {std::move(L1)};
0104   }
0105 
0106   LatticeJoinEffect widenTypeErased(TypeErasedLattice &Current,
0107                                     const TypeErasedLattice &Previous) final {
0108     Lattice &C = llvm::any_cast<Lattice &>(Current.Value);
0109     const Lattice &P = llvm::any_cast<const Lattice &>(Previous.Value);
0110     return widenInternal(Rank0{}, C, P);
0111   }
0112 
0113   bool isEqualTypeErased(const TypeErasedLattice &E1,
0114                          const TypeErasedLattice &E2) final {
0115     const Lattice &L1 = llvm::any_cast<const Lattice &>(E1.Value);
0116     const Lattice &L2 = llvm::any_cast<const Lattice &>(E2.Value);
0117     return L1 == L2;
0118   }
0119 
0120   void transferTypeErased(const CFGElement &Element, TypeErasedLattice &E,
0121                           Environment &Env) final {
0122     Lattice &L = llvm::any_cast<Lattice &>(E.Value);
0123     static_cast<Derived *>(this)->transfer(Element, L, Env);
0124   }
0125 
0126   void transferBranchTypeErased(bool Branch, const Stmt *Stmt,
0127                                 TypeErasedLattice &E, Environment &Env) final {
0128     transferBranchInternal(Rank0{}, *static_cast<Derived *>(this), Branch, Stmt,
0129                            E, Env);
0130   }
0131 
0132 private:
0133   // These `Rank` structs are used for template metaprogramming to choose
0134   // between overloads.
0135   struct Rank1 {};
0136   struct Rank0 : Rank1 {};
0137 
0138   // The first-choice implementation: use `widen` when it is available.
0139   template <typename T>
0140   static auto widenInternal(Rank0, T &Current, const T &Prev)
0141       -> decltype(Current.widen(Prev)) {
0142     return Current.widen(Prev);
0143   }
0144 
0145   // The second-choice implementation: `widen` is unavailable. Widening is
0146   // merged with equality checking, so when widening is unimplemented, we
0147   // default to equality checking.
0148   static LatticeJoinEffect widenInternal(Rank1, const Lattice &Current,
0149                                          const Lattice &Prev) {
0150     return Prev == Current ? LatticeJoinEffect::Unchanged
0151                            : LatticeJoinEffect::Changed;
0152   }
0153 
0154   // The first-choice implementation: `transferBranch` is implemented.
0155   template <typename Analysis>
0156   static auto transferBranchInternal(Rank0, Analysis &A, bool Branch,
0157                                      const Stmt *Stmt, TypeErasedLattice &L,
0158                                      Environment &Env)
0159       -> std::void_t<decltype(A.transferBranch(
0160           Branch, Stmt, std::declval<LatticeT &>(), Env))> {
0161     A.transferBranch(Branch, Stmt, llvm::any_cast<Lattice &>(L.Value), Env);
0162   }
0163 
0164   // The second-choice implementation: `transferBranch` is unimplemented. No-op.
0165   template <typename Analysis>
0166   static void transferBranchInternal(Rank1, Analysis &A, bool, const Stmt *,
0167                                      TypeErasedLattice &, Environment &) {}
0168 
0169   ASTContext &Context;
0170 };
0171 
0172 // Model of the program at a given program point.
0173 template <typename LatticeT> struct DataflowAnalysisState {
0174   // Model of a program property.
0175   LatticeT Lattice;
0176 
0177   // Model of the state of the program (store and heap).
0178   Environment Env;
0179 };
0180 
0181 /// A callback to be called with the state before or after visiting a CFG
0182 /// element.
0183 template <typename AnalysisT>
0184 using CFGEltCallback = std::function<void(
0185     const CFGElement &,
0186     const DataflowAnalysisState<typename AnalysisT::Lattice> &)>;
0187 
0188 /// A pair of callbacks to be called with the state before and after visiting a
0189 /// CFG element.
0190 /// Either or both of the callbacks may be null.
0191 template <typename AnalysisT> struct CFGEltCallbacks {
0192   CFGEltCallback<AnalysisT> Before;
0193   CFGEltCallback<AnalysisT> After;
0194 };
0195 
0196 /// A callback for performing diagnosis on a CFG element, called with the state
0197 /// before or after visiting that CFG element. Returns a list of diagnostics
0198 /// to emit (if any).
0199 template <typename AnalysisT, typename Diagnostic>
0200 using DiagnosisCallback = llvm::function_ref<llvm::SmallVector<Diagnostic>(
0201     const CFGElement &, ASTContext &,
0202     const TransferStateForDiagnostics<typename AnalysisT::Lattice> &)>;
0203 
0204 /// A pair of callbacks for performing diagnosis on a CFG element, called with
0205 /// the state before and after visiting that CFG element.
0206 /// Either or both of the callbacks may be null.
0207 template <typename AnalysisT, typename Diagnostic> struct DiagnosisCallbacks {
0208   DiagnosisCallback<AnalysisT, Diagnostic> Before;
0209   DiagnosisCallback<AnalysisT, Diagnostic> After;
0210 };
0211 
0212 /// Default for the maximum number of SAT solver iterations during analysis.
0213 inline constexpr std::int64_t kDefaultMaxSATIterations = 1'000'000'000;
0214 
0215 /// Default for the maximum number of block visits during analysis.
0216 inline constexpr std::int32_t kDefaultMaxBlockVisits = 20'000;
0217 
0218 /// Performs dataflow analysis and returns a mapping from basic block IDs to
0219 /// dataflow analysis states that model the respective basic blocks. The
0220 /// returned vector, if any, will have the same size as the number of CFG
0221 /// blocks, with indices corresponding to basic block IDs. Returns an error if
0222 /// the dataflow analysis cannot be performed successfully. Otherwise, calls
0223 /// `PostAnalysisCallbacks` on each CFG element with the final analysis results
0224 /// before and after that program point.
0225 ///
0226 /// `MaxBlockVisits` caps the number of block visits during analysis. See
0227 /// `runTypeErasedDataflowAnalysis` for a full description. The default value is
0228 /// essentially arbitrary -- large enough to accommodate what seems like any
0229 /// reasonable CFG, but still small enough to limit the cost of hitting the
0230 /// limit.
0231 template <typename AnalysisT>
0232 llvm::Expected<std::vector<
0233     std::optional<DataflowAnalysisState<typename AnalysisT::Lattice>>>>
0234 runDataflowAnalysis(const AdornedCFG &ACFG, AnalysisT &Analysis,
0235                     const Environment &InitEnv,
0236                     CFGEltCallbacks<AnalysisT> PostAnalysisCallbacks = {},
0237                     std::int32_t MaxBlockVisits = kDefaultMaxBlockVisits) {
0238   CFGEltCallbacksTypeErased TypeErasedCallbacks;
0239   if (PostAnalysisCallbacks.Before) {
0240     TypeErasedCallbacks.Before =
0241         [&PostAnalysisCallbacks](const CFGElement &Element,
0242                                  const TypeErasedDataflowAnalysisState &State) {
0243           auto *Lattice =
0244               llvm::any_cast<typename AnalysisT::Lattice>(&State.Lattice.Value);
0245           // FIXME: we should not be copying the environment here!
0246           // Ultimately the `CFGEltCallback` only gets a const reference anyway.
0247           PostAnalysisCallbacks.Before(
0248               Element, DataflowAnalysisState<typename AnalysisT::Lattice>{
0249                            *Lattice, State.Env.fork()});
0250         };
0251   }
0252   if (PostAnalysisCallbacks.After) {
0253     TypeErasedCallbacks.After =
0254         [&PostAnalysisCallbacks](const CFGElement &Element,
0255                                  const TypeErasedDataflowAnalysisState &State) {
0256           auto *Lattice =
0257               llvm::any_cast<typename AnalysisT::Lattice>(&State.Lattice.Value);
0258           // FIXME: we should not be copying the environment here!
0259           // Ultimately the `CFGEltCallback` only gets a const reference anyway.
0260           PostAnalysisCallbacks.After(
0261               Element, DataflowAnalysisState<typename AnalysisT::Lattice>{
0262                            *Lattice, State.Env.fork()});
0263         };
0264   }
0265 
0266   auto TypeErasedBlockStates = runTypeErasedDataflowAnalysis(
0267       ACFG, Analysis, InitEnv, TypeErasedCallbacks, MaxBlockVisits);
0268   if (!TypeErasedBlockStates)
0269     return TypeErasedBlockStates.takeError();
0270 
0271   std::vector<std::optional<DataflowAnalysisState<typename AnalysisT::Lattice>>>
0272       BlockStates;
0273   BlockStates.reserve(TypeErasedBlockStates->size());
0274 
0275   llvm::transform(
0276       std::move(*TypeErasedBlockStates), std::back_inserter(BlockStates),
0277       [](auto &OptState) {
0278         return llvm::transformOptional(
0279             std::move(OptState), [](TypeErasedDataflowAnalysisState &&State) {
0280               return DataflowAnalysisState<typename AnalysisT::Lattice>{
0281                   llvm::any_cast<typename AnalysisT::Lattice>(
0282                       std::move(State.Lattice.Value)),
0283                   std::move(State.Env)};
0284             });
0285       });
0286   return std::move(BlockStates);
0287 }
0288 
0289 // Create an analysis class that is derived from `DataflowAnalysis`. This is an
0290 // SFINAE adapter that allows us to call two different variants of constructor
0291 // (either with or without the optional `Environment` parameter).
0292 // FIXME: Make all classes derived from `DataflowAnalysis` take an `Environment`
0293 // parameter in their constructor so that we can get rid of this abomination.
0294 template <typename AnalysisT>
0295 auto createAnalysis(ASTContext &ASTCtx, Environment &Env)
0296     -> decltype(AnalysisT(ASTCtx, Env)) {
0297   return AnalysisT(ASTCtx, Env);
0298 }
0299 template <typename AnalysisT>
0300 auto createAnalysis(ASTContext &ASTCtx, Environment &Env)
0301     -> decltype(AnalysisT(ASTCtx)) {
0302   return AnalysisT(ASTCtx);
0303 }
0304 
0305 /// Runs a dataflow analysis over the given function and then runs `Diagnoser`
0306 /// over the results. Returns a list of diagnostics for `FuncDecl` or an
0307 /// error. Currently, errors can occur (at least) because the analysis requires
0308 /// too many iterations over the CFG or the SAT solver times out.
0309 ///
0310 /// The default value of `MaxSATIterations` was chosen based on the following
0311 /// observations:
0312 /// - Non-pathological calls to the solver typically require only a few hundred
0313 ///   iterations.
0314 /// - This limit is still low enough to keep runtimes acceptable (on typical
0315 ///   machines) in cases where we hit the limit.
0316 ///
0317 /// `MaxBlockVisits` caps the number of block visits during analysis. See
0318 /// `runDataflowAnalysis` for a full description and explanation of the default
0319 /// value.
0320 template <typename AnalysisT, typename Diagnostic>
0321 llvm::Expected<llvm::SmallVector<Diagnostic>>
0322 diagnoseFunction(const FunctionDecl &FuncDecl, ASTContext &ASTCtx,
0323                  DiagnosisCallbacks<AnalysisT, Diagnostic> Diagnoser,
0324                  std::int64_t MaxSATIterations = kDefaultMaxSATIterations,
0325                  std::int32_t MaxBlockVisits = kDefaultMaxBlockVisits) {
0326   llvm::Expected<AdornedCFG> Context = AdornedCFG::build(FuncDecl);
0327   if (!Context)
0328     return Context.takeError();
0329 
0330   auto Solver = std::make_unique<WatchedLiteralsSolver>(MaxSATIterations);
0331   DataflowAnalysisContext AnalysisContext(*Solver);
0332   Environment Env(AnalysisContext, FuncDecl);
0333   AnalysisT Analysis = createAnalysis<AnalysisT>(ASTCtx, Env);
0334   llvm::SmallVector<Diagnostic> Diagnostics;
0335   CFGEltCallbacksTypeErased PostAnalysisCallbacks;
0336   if (Diagnoser.Before) {
0337     PostAnalysisCallbacks.Before =
0338         [&ASTCtx, &Diagnoser,
0339          &Diagnostics](const CFGElement &Elt,
0340                        const TypeErasedDataflowAnalysisState &State) mutable {
0341           auto EltDiagnostics = Diagnoser.Before(
0342               Elt, ASTCtx,
0343               TransferStateForDiagnostics<typename AnalysisT::Lattice>(
0344                   llvm::any_cast<const typename AnalysisT::Lattice &>(
0345                       State.Lattice.Value),
0346                   State.Env));
0347           llvm::move(EltDiagnostics, std::back_inserter(Diagnostics));
0348         };
0349   }
0350   if (Diagnoser.After) {
0351     PostAnalysisCallbacks.After =
0352         [&ASTCtx, &Diagnoser,
0353          &Diagnostics](const CFGElement &Elt,
0354                        const TypeErasedDataflowAnalysisState &State) mutable {
0355           auto EltDiagnostics = Diagnoser.After(
0356               Elt, ASTCtx,
0357               TransferStateForDiagnostics<typename AnalysisT::Lattice>(
0358                   llvm::any_cast<const typename AnalysisT::Lattice &>(
0359                       State.Lattice.Value),
0360                   State.Env));
0361           llvm::move(EltDiagnostics, std::back_inserter(Diagnostics));
0362         };
0363   }
0364   if (llvm::Error Err =
0365           runTypeErasedDataflowAnalysis(*Context, Analysis, Env,
0366                                         PostAnalysisCallbacks, MaxBlockVisits)
0367               .takeError())
0368     return std::move(Err);
0369 
0370   if (Solver->reachedLimit())
0371     return llvm::createStringError(llvm::errc::interrupted,
0372                                    "SAT solver timed out");
0373 
0374   return Diagnostics;
0375 }
0376 
0377 /// Overload that takes only one diagnosis callback, which is run on the state
0378 /// after visiting the `CFGElement`. This is provided for backwards
0379 /// compatibility; new callers should call the overload taking
0380 /// `DiagnosisCallbacks` instead.
0381 template <typename AnalysisT, typename Diagnostic>
0382 llvm::Expected<llvm::SmallVector<Diagnostic>>
0383 diagnoseFunction(const FunctionDecl &FuncDecl, ASTContext &ASTCtx,
0384                  DiagnosisCallback<AnalysisT, Diagnostic> Diagnoser,
0385                  std::int64_t MaxSATIterations = kDefaultMaxSATIterations,
0386                  std::int32_t MaxBlockVisits = kDefaultMaxBlockVisits) {
0387   DiagnosisCallbacks<AnalysisT, Diagnostic> Callbacks = {nullptr, Diagnoser};
0388   return diagnoseFunction(FuncDecl, ASTCtx, Callbacks, MaxSATIterations,
0389                           MaxBlockVisits);
0390 }
0391 
0392 /// Abstract base class for dataflow "models": reusable analysis components that
0393 /// model a particular aspect of program semantics in the `Environment`. For
0394 /// example, a model may capture a type and its related functions.
0395 class DataflowModel : public Environment::ValueModel {
0396 public:
0397   /// Return value indicates whether the model processed the `Element`.
0398   virtual bool transfer(const CFGElement &Element, Environment &Env) = 0;
0399 };
0400 
0401 } // namespace dataflow
0402 } // namespace clang
0403 
0404 #endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSIS_H