Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-05-10 08:37:09

0001 //===- SVals.h - Abstract Values for Static Analysis ------------*- C++ -*-===//
0002 //
0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
0004 // See https://llvm.org/LICENSE.txt for license information.
0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0006 //
0007 //===----------------------------------------------------------------------===//
0008 //
0009 //  This file defines SVal, Loc, and NonLoc, classes that represent
0010 //  abstract r-values for use with path-sensitive value tracking.
0011 //
0012 //===----------------------------------------------------------------------===//
0013 
0014 #ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SVALS_H
0015 #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SVALS_H
0016 
0017 #include "clang/AST/Expr.h"
0018 #include "clang/AST/Type.h"
0019 #include "clang/Basic/LLVM.h"
0020 #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntPtr.h"
0021 #include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h"
0022 #include "llvm/ADT/APSInt.h"
0023 #include "llvm/ADT/FoldingSet.h"
0024 #include "llvm/ADT/ImmutableList.h"
0025 #include "llvm/ADT/PointerUnion.h"
0026 #include "llvm/ADT/STLForwardCompat.h"
0027 #include "llvm/ADT/iterator_range.h"
0028 #include "llvm/Support/Casting.h"
0029 #include <cassert>
0030 #include <cstdint>
0031 #include <optional>
0032 #include <utility>
0033 
0034 //==------------------------------------------------------------------------==//
0035 //  Base SVal types.
0036 //==------------------------------------------------------------------------==//
0037 
0038 namespace clang {
0039 
0040 class CXXBaseSpecifier;
0041 class FunctionDecl;
0042 class LabelDecl;
0043 
0044 namespace ento {
0045 
0046 class CompoundValData;
0047 class LazyCompoundValData;
0048 class MemRegion;
0049 class PointerToMemberData;
0050 class SValBuilder;
0051 class TypedValueRegion;
0052 
0053 /// SVal - This represents a symbolic expression, which can be either
0054 ///  an L-value or an R-value.
0055 ///
0056 class SVal {
0057 public:
0058   enum SValKind : unsigned char {
0059 #define BASIC_SVAL(Id, Parent) Id##Kind,
0060 #define LOC_SVAL(Id, Parent) Loc##Id##Kind,
0061 #define NONLOC_SVAL(Id, Parent) NonLoc##Id##Kind,
0062 #define SVAL_RANGE(Id, First, Last)                                            \
0063   BEGIN_##Id = Id##First##Kind, END_##Id = Id##Last##Kind,
0064 #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.def"
0065   };
0066 
0067 protected:
0068   const void *Data = nullptr;
0069   SValKind Kind = UndefinedValKind;
0070 
0071   explicit SVal(SValKind Kind, const void *Data = nullptr)
0072       : Data(Data), Kind(Kind) {}
0073 
0074   template <typename T> const T *castDataAs() const {
0075     return static_cast<const T *>(Data);
0076   }
0077 
0078 public:
0079   explicit SVal() = default;
0080 
0081   /// Convert to the specified SVal type, asserting that this SVal is of
0082   /// the desired type.
0083   template <typename T> T castAs() const { return llvm::cast<T>(*this); }
0084 
0085   /// Convert to the specified SVal type, returning std::nullopt if this SVal is
0086   /// not of the desired type.
0087   template <typename T> std::optional<T> getAs() const {
0088     return llvm::dyn_cast<T>(*this);
0089   }
0090 
0091   SValKind getKind() const { return Kind; }
0092 
0093   StringRef getKindStr() const;
0094 
0095   // This method is required for using SVal in a FoldingSetNode.  It
0096   // extracts a unique signature for this SVal object.
0097   void Profile(llvm::FoldingSetNodeID &ID) const {
0098     ID.AddPointer(Data);
0099     ID.AddInteger(llvm::to_underlying(getKind()));
0100   }
0101 
0102   bool operator==(SVal R) const { return Kind == R.Kind && Data == R.Data; }
0103   bool operator!=(SVal R) const { return !(*this == R); }
0104 
0105   bool isUnknown() const { return getKind() == UnknownValKind; }
0106 
0107   bool isUndef() const { return getKind() == UndefinedValKind; }
0108 
0109   bool isUnknownOrUndef() const { return isUnknown() || isUndef(); }
0110 
0111   bool isValid() const { return !isUnknownOrUndef(); }
0112 
0113   bool isConstant() const;
0114 
0115   bool isConstant(int I) const;
0116 
0117   bool isZeroConstant() const;
0118 
0119   /// getAsFunctionDecl - If this SVal is a MemRegionVal and wraps a
0120   /// CodeTextRegion wrapping a FunctionDecl, return that FunctionDecl.
0121   /// Otherwise return 0.
0122   const FunctionDecl *getAsFunctionDecl() const;
0123 
0124   /// If this SVal is a location and wraps a symbol, return that
0125   ///  SymbolRef. Otherwise return 0.
0126   ///
0127   /// Casts are ignored during lookup.
0128   /// \param IncludeBaseRegions The boolean that controls whether the search
0129   /// should continue to the base regions if the region is not symbolic.
0130   SymbolRef getAsLocSymbol(bool IncludeBaseRegions = false) const;
0131 
0132   /// Get the symbol in the SVal or its base region.
0133   SymbolRef getLocSymbolInBase() const;
0134 
0135   /// If this SVal wraps a symbol return that SymbolRef.
0136   /// Otherwise, return 0.
0137   ///
0138   /// Casts are ignored during lookup.
0139   /// \param IncludeBaseRegions The boolean that controls whether the search
0140   /// should continue to the base regions if the region is not symbolic.
0141   SymbolRef getAsSymbol(bool IncludeBaseRegions = false) const;
0142 
0143   /// If this SVal is loc::ConcreteInt or nonloc::ConcreteInt,
0144   /// return a pointer to APSInt which is held in it.
0145   /// Otherwise, return nullptr.
0146   const llvm::APSInt *getAsInteger() const;
0147 
0148   const MemRegion *getAsRegion() const;
0149 
0150   /// printJson - Pretty-prints in JSON format.
0151   void printJson(raw_ostream &Out, bool AddQuotes) const;
0152 
0153   void dumpToStream(raw_ostream &OS) const;
0154   void dump() const;
0155 
0156   llvm::iterator_range<SymExpr::symbol_iterator> symbols() const {
0157     if (const SymExpr *SE = getAsSymbol(/*IncludeBaseRegions=*/true))
0158       return SE->symbols();
0159     SymExpr::symbol_iterator end{};
0160     return llvm::make_range(end, end);
0161   }
0162 
0163   /// Try to get a reasonable type for the given value.
0164   ///
0165   /// \returns The best approximation of the value type or Null.
0166   /// In theory, all symbolic values should be typed, but this function
0167   /// is still a WIP and might have a few blind spots.
0168   ///
0169   /// \note This function should not be used when the user has access to the
0170   /// bound expression AST node as well, since AST always has exact types.
0171   ///
0172   /// \note Loc values are interpreted as pointer rvalues for the purposes of
0173   /// this method.
0174   QualType getType(const ASTContext &) const;
0175 };
0176 
0177 inline raw_ostream &operator<<(raw_ostream &os, clang::ento::SVal V) {
0178   V.dumpToStream(os);
0179   return os;
0180 }
0181 
0182 namespace nonloc {
0183 /// Sub-kinds for NonLoc values.
0184 #define NONLOC_SVAL(Id, Parent)                                                \
0185   inline constexpr auto Id##Kind = SVal::SValKind::NonLoc##Id##Kind;
0186 #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.def"
0187 } // namespace nonloc
0188 
0189 namespace loc {
0190 /// Sub-kinds for Loc values.
0191 #define LOC_SVAL(Id, Parent)                                                   \
0192   inline constexpr auto Id##Kind = SVal::SValKind::Loc##Id##Kind;
0193 #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.def"
0194 } // namespace loc
0195 
0196 class UndefinedVal : public SVal {
0197 public:
0198   UndefinedVal() : SVal(UndefinedValKind) {}
0199   static bool classof(SVal V) { return V.getKind() == UndefinedValKind; }
0200 };
0201 
0202 class DefinedOrUnknownSVal : public SVal {
0203 public:
0204   // We want calling these methods to be a compiler error since they are
0205   // tautologically false.
0206   bool isUndef() const = delete;
0207   bool isValid() const = delete;
0208 
0209   static bool classof(SVal V) { return !V.isUndef(); }
0210 
0211 protected:
0212   explicit DefinedOrUnknownSVal(SValKind Kind, const void *Data = nullptr)
0213       : SVal(Kind, Data) {}
0214 };
0215 
0216 class UnknownVal : public DefinedOrUnknownSVal {
0217 public:
0218   explicit UnknownVal() : DefinedOrUnknownSVal(UnknownValKind) {}
0219 
0220   static bool classof(SVal V) { return V.getKind() == UnknownValKind; }
0221 };
0222 
0223 class DefinedSVal : public DefinedOrUnknownSVal {
0224 public:
0225   // We want calling these methods to be a compiler error since they are
0226   // tautologically true/false.
0227   bool isUnknown() const = delete;
0228   bool isUnknownOrUndef() const = delete;
0229   bool isValid() const = delete;
0230 
0231   static bool classof(SVal V) { return !V.isUnknownOrUndef(); }
0232 
0233 protected:
0234   explicit DefinedSVal(SValKind Kind, const void *Data)
0235       : DefinedOrUnknownSVal(Kind, Data) {}
0236 };
0237 
0238 class NonLoc : public DefinedSVal {
0239 protected:
0240   NonLoc(SValKind Kind, const void *Data) : DefinedSVal(Kind, Data) {}
0241 
0242 public:
0243   void dumpToStream(raw_ostream &Out) const;
0244 
0245   static bool isCompoundType(QualType T) {
0246     return T->isArrayType() || T->isRecordType() ||
0247            T->isAnyComplexType() || T->isVectorType();
0248   }
0249 
0250   static bool classof(SVal V) {
0251     return BEGIN_NonLoc <= V.getKind() && V.getKind() <= END_NonLoc;
0252   }
0253 };
0254 
0255 class Loc : public DefinedSVal {
0256 protected:
0257   Loc(SValKind Kind, const void *Data) : DefinedSVal(Kind, Data) {}
0258 
0259 public:
0260   void dumpToStream(raw_ostream &Out) const;
0261 
0262   static bool isLocType(QualType T) {
0263     return T->isAnyPointerType() || T->isBlockPointerType() ||
0264            T->isReferenceType() || T->isNullPtrType();
0265   }
0266 
0267   static bool classof(SVal V) {
0268     return BEGIN_Loc <= V.getKind() && V.getKind() <= END_Loc;
0269   }
0270 };
0271 
0272 //==------------------------------------------------------------------------==//
0273 //  Subclasses of NonLoc.
0274 //==------------------------------------------------------------------------==//
0275 
0276 namespace nonloc {
0277 
0278 /// Represents symbolic expression that isn't a location.
0279 class SymbolVal : public NonLoc {
0280 public:
0281   SymbolVal() = delete;
0282   explicit SymbolVal(SymbolRef Sym) : NonLoc(SymbolValKind, Sym) {
0283     assert(Sym);
0284     assert(!Loc::isLocType(Sym->getType()));
0285   }
0286 
0287   LLVM_ATTRIBUTE_RETURNS_NONNULL
0288   SymbolRef getSymbol() const {
0289     return (const SymExpr *) Data;
0290   }
0291 
0292   bool isExpression() const {
0293     return !isa<SymbolData>(getSymbol());
0294   }
0295 
0296   static bool classof(SVal V) { return V.getKind() == SymbolValKind; }
0297 };
0298 
0299 /// Value representing integer constant.
0300 class ConcreteInt : public NonLoc {
0301 public:
0302   explicit ConcreteInt(APSIntPtr V) : NonLoc(ConcreteIntKind, V.get()) {}
0303 
0304   APSIntPtr getValue() const {
0305     // This is safe because in the ctor we take a safe APSIntPtr.
0306     return APSIntPtr::unsafeConstructor(castDataAs<llvm::APSInt>());
0307   }
0308 
0309   static bool classof(SVal V) { return V.getKind() == ConcreteIntKind; }
0310 };
0311 
0312 class LocAsInteger : public NonLoc {
0313   friend class ento::SValBuilder;
0314 
0315   explicit LocAsInteger(const std::pair<SVal, uintptr_t> &data)
0316       : NonLoc(LocAsIntegerKind, &data) {
0317     // We do not need to represent loc::ConcreteInt as LocAsInteger,
0318     // as it'd collapse into a nonloc::ConcreteInt instead.
0319     [[maybe_unused]] SValKind K = data.first.getKind();
0320     assert(K == loc::MemRegionValKind || K == loc::GotoLabelKind);
0321   }
0322 
0323 public:
0324   Loc getLoc() const {
0325     return castDataAs<std::pair<SVal, uintptr_t>>()->first.castAs<Loc>();
0326   }
0327 
0328   unsigned getNumBits() const {
0329     return castDataAs<std::pair<SVal, uintptr_t>>()->second;
0330   }
0331 
0332   static bool classof(SVal V) { return V.getKind() == LocAsIntegerKind; }
0333 };
0334 
0335 /// The simplest example of a concrete compound value is nonloc::CompoundVal,
0336 /// which represents a concrete r-value of an initializer-list or a string.
0337 /// Internally, it contains an llvm::ImmutableList of SVal's stored inside the
0338 /// literal.
0339 class CompoundVal : public NonLoc {
0340   friend class ento::SValBuilder;
0341 
0342   explicit CompoundVal(const CompoundValData *D) : NonLoc(CompoundValKind, D) {
0343     assert(D);
0344   }
0345 
0346 public:
0347   LLVM_ATTRIBUTE_RETURNS_NONNULL
0348   const CompoundValData* getValue() const {
0349     return castDataAs<CompoundValData>();
0350   }
0351 
0352   using iterator = llvm::ImmutableList<SVal>::iterator;
0353   iterator begin() const;
0354   iterator end() const;
0355 
0356   static bool classof(SVal V) { return V.getKind() == CompoundValKind; }
0357 };
0358 
0359 /// While nonloc::CompoundVal covers a few simple use cases,
0360 /// nonloc::LazyCompoundVal is a more performant and flexible way to represent
0361 /// an rvalue of record type, so it shows up much more frequently during
0362 /// analysis. This value is an r-value that represents a snapshot of any
0363 /// structure "as a whole" at a given moment during the analysis. Such value is
0364 /// already quite far from being referred to as "concrete", as many fields
0365 /// inside it would be unknown or symbolic. nonloc::LazyCompoundVal operates by
0366 /// storing two things:
0367 ///   * a reference to the TypedValueRegion being snapshotted (yes, it is always
0368 ///     typed), and also
0369 ///   * a reference to the whole Store object, obtained from the ProgramState in
0370 ///     which the nonloc::LazyCompoundVal was created.
0371 ///
0372 /// Note that the old ProgramState and its Store is kept alive during the
0373 /// analysis because these are immutable functional data structures and each new
0374 /// Store value is represented as "earlier Store" + "additional binding".
0375 ///
0376 /// Essentially, nonloc::LazyCompoundVal is a performance optimization for the
0377 /// analyzer. Because Store is immutable, creating a nonloc::LazyCompoundVal is
0378 /// a very cheap operation. Note that the Store contains all region bindings in
0379 /// the program state, not only related to the region. Later, if necessary, such
0380 /// value can be unpacked -- eg. when it is assigned to another variable.
0381 ///
0382 /// If you ever need to inspect the contents of the LazyCompoundVal, you can use
0383 /// StoreManager::iterBindings(). It'll iterate through all values in the Store,
0384 /// but you're only interested in the ones that belong to
0385 /// LazyCompoundVal::getRegion(); other bindings are immaterial.
0386 ///
0387 /// NOTE: LazyCompoundVal::getRegion() itself is also immaterial (see the actual
0388 /// method docs for details).
0389 class LazyCompoundVal : public NonLoc {
0390   friend class ento::SValBuilder;
0391 
0392   explicit LazyCompoundVal(const LazyCompoundValData *D)
0393       : NonLoc(LazyCompoundValKind, D) {
0394     assert(D);
0395   }
0396 
0397 public:
0398   LLVM_ATTRIBUTE_RETURNS_NONNULL
0399   const LazyCompoundValData *getCVData() const {
0400     return castDataAs<LazyCompoundValData>();
0401   }
0402 
0403   /// It might return null.
0404   const void *getStore() const;
0405 
0406   /// This function itself is immaterial. It is only an implementation detail.
0407   /// LazyCompoundVal represents only the rvalue, the data (known or unknown)
0408   /// that *was* stored in that region *at some point in the past*. The region
0409   /// should not be used for any purpose other than figuring out what part of
0410   /// the frozen Store you're interested in. The value does not represent the
0411   /// *current* value of that region. Sometimes it may, but this should not be
0412   /// relied upon. Instead, if you want to figure out what region it represents,
0413   /// you typically need to see where you got it from in the first place. The
0414   /// region is absolutely not analogous to the C++ "this" pointer. It is also
0415   /// not a valid way to "materialize" the prvalue into a glvalue in C++,
0416   /// because the region represents the *old* storage (sometimes very old), not
0417   /// the *future* storage.
0418   LLVM_ATTRIBUTE_RETURNS_NONNULL
0419   const TypedValueRegion *getRegion() const;
0420 
0421   static bool classof(SVal V) { return V.getKind() == LazyCompoundValKind; }
0422 };
0423 
0424 /// Value representing pointer-to-member.
0425 ///
0426 /// This value is qualified as NonLoc because neither loading nor storing
0427 /// operations are applied to it. Instead, the analyzer uses the L-value coming
0428 /// from pointer-to-member applied to an object.
0429 /// This SVal is represented by a NamedDecl which can be a member function
0430 /// pointer or a member data pointer and an optional list of CXXBaseSpecifiers.
0431 /// This list is required to accumulate the pointer-to-member cast history to
0432 /// figure out the correct subobject field. In particular, implicit casts grow
0433 /// this list and explicit casts like static_cast shrink this list.
0434 class PointerToMember : public NonLoc {
0435   friend class ento::SValBuilder;
0436 
0437 public:
0438   using PTMDataType =
0439       llvm::PointerUnion<const NamedDecl *, const PointerToMemberData *>;
0440 
0441   const PTMDataType getPTMData() const {
0442     return PTMDataType::getFromOpaqueValue(const_cast<void *>(Data));
0443   }
0444 
0445   bool isNullMemberPointer() const;
0446 
0447   const NamedDecl *getDecl() const;
0448 
0449   template<typename AdjustedDecl>
0450   const AdjustedDecl *getDeclAs() const {
0451     return dyn_cast_or_null<AdjustedDecl>(getDecl());
0452   }
0453 
0454   using iterator = llvm::ImmutableList<const CXXBaseSpecifier *>::iterator;
0455 
0456   iterator begin() const;
0457   iterator end() const;
0458 
0459   static bool classof(SVal V) { return V.getKind() == PointerToMemberKind; }
0460 
0461 private:
0462   explicit PointerToMember(const PTMDataType D)
0463       : NonLoc(PointerToMemberKind, D.getOpaqueValue()) {}
0464 };
0465 
0466 } // namespace nonloc
0467 
0468 //==------------------------------------------------------------------------==//
0469 //  Subclasses of Loc.
0470 //==------------------------------------------------------------------------==//
0471 
0472 namespace loc {
0473 
0474 class GotoLabel : public Loc {
0475 public:
0476   explicit GotoLabel(const LabelDecl *Label) : Loc(GotoLabelKind, Label) {
0477     assert(Label);
0478   }
0479 
0480   const LabelDecl *getLabel() const { return castDataAs<LabelDecl>(); }
0481 
0482   static bool classof(SVal V) { return V.getKind() == GotoLabelKind; }
0483 };
0484 
0485 class MemRegionVal : public Loc {
0486 public:
0487   explicit MemRegionVal(const MemRegion *r) : Loc(MemRegionValKind, r) {
0488     assert(r);
0489   }
0490 
0491   /// Get the underlining region.
0492   LLVM_ATTRIBUTE_RETURNS_NONNULL
0493   const MemRegion *getRegion() const { return castDataAs<MemRegion>(); }
0494 
0495   /// Get the underlining region and strip casts.
0496   LLVM_ATTRIBUTE_RETURNS_NONNULL
0497   const MemRegion* stripCasts(bool StripBaseCasts = true) const;
0498 
0499   template <typename REGION>
0500   const REGION* getRegionAs() const {
0501     return dyn_cast<REGION>(getRegion());
0502   }
0503 
0504   bool operator==(const MemRegionVal &R) const {
0505     return getRegion() == R.getRegion();
0506   }
0507 
0508   bool operator!=(const MemRegionVal &R) const {
0509     return getRegion() != R.getRegion();
0510   }
0511 
0512   static bool classof(SVal V) { return V.getKind() == MemRegionValKind; }
0513 };
0514 
0515 class ConcreteInt : public Loc {
0516 public:
0517   explicit ConcreteInt(APSIntPtr V) : Loc(ConcreteIntKind, V.get()) {}
0518 
0519   APSIntPtr getValue() const {
0520     // This is safe because in the ctor we take a safe APSIntPtr.
0521     return APSIntPtr::unsafeConstructor(castDataAs<llvm::APSInt>());
0522   }
0523 
0524   static bool classof(SVal V) { return V.getKind() == ConcreteIntKind; }
0525 };
0526 
0527 } // namespace loc
0528 } // namespace ento
0529 } // namespace clang
0530 
0531 namespace llvm {
0532 template <typename To, typename From>
0533 struct CastInfo<
0534     To, From,
0535     std::enable_if_t<std::is_base_of<::clang::ento::SVal, From>::value>>
0536     : public CastIsPossible<To, ::clang::ento::SVal> {
0537   using Self = CastInfo<
0538       To, From,
0539       std::enable_if_t<std::is_base_of<::clang::ento::SVal, From>::value>>;
0540   static bool isPossible(const From &V) {
0541     return To::classof(*static_cast<const ::clang::ento::SVal *>(&V));
0542   }
0543   static std::optional<To> castFailed() { return std::optional<To>{}; }
0544   static To doCast(const From &f) {
0545     return *static_cast<const To *>(cast<::clang::ento::SVal>(&f));
0546   }
0547   static std::optional<To> doCastIfPossible(const From &f) {
0548     if (!Self::isPossible(f))
0549       return Self::castFailed();
0550     return doCast(f);
0551   }
0552 };
0553 } // namespace llvm
0554 
0555 #endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SVALS_H