Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-05-10 08:37:09

0001 //== RangedConstraintManager.h ----------------------------------*- C++ -*--==//
0002 //
0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
0004 // See https://llvm.org/LICENSE.txt for license information.
0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0006 //
0007 //===----------------------------------------------------------------------===//
0008 //
0009 //  Ranged constraint manager, built on SimpleConstraintManager.
0010 //
0011 //===----------------------------------------------------------------------===//
0012 
0013 #ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_RANGEDCONSTRAINTMANAGER_H
0014 #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_RANGEDCONSTRAINTMANAGER_H
0015 
0016 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
0017 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
0018 #include "clang/StaticAnalyzer/Core/PathSensitive/SimpleConstraintManager.h"
0019 #include "llvm/ADT/APSInt.h"
0020 #include "llvm/Support/Allocator.h"
0021 
0022 namespace clang {
0023 
0024 namespace ento {
0025 
0026 /// A Range represents the closed range [from, to].  The caller must
0027 /// guarantee that from <= to.  Note that Range is immutable, so as not
0028 /// to subvert RangeSet's immutability.
0029 class Range {
0030 public:
0031   Range(const llvm::APSInt &From, const llvm::APSInt &To) : Impl(&From, &To) {
0032     assert(From <= To);
0033   }
0034 
0035   Range(const llvm::APSInt &Point) : Range(Point, Point) {}
0036 
0037   bool Includes(const llvm::APSInt &Point) const {
0038     return From() <= Point && Point <= To();
0039   }
0040   const llvm::APSInt &From() const { return *Impl.first; }
0041   const llvm::APSInt &To() const { return *Impl.second; }
0042   const llvm::APSInt *getConcreteValue() const {
0043     return &From() == &To() ? &From() : nullptr;
0044   }
0045 
0046   void Profile(llvm::FoldingSetNodeID &ID) const {
0047     ID.AddPointer(&From());
0048     ID.AddPointer(&To());
0049   }
0050   void dump(raw_ostream &OS) const;
0051   void dump() const;
0052 
0053   // In order to keep non-overlapping ranges sorted, we can compare only From
0054   // points.
0055   bool operator<(const Range &RHS) const { return From() < RHS.From(); }
0056 
0057   bool operator==(const Range &RHS) const { return Impl == RHS.Impl; }
0058   bool operator!=(const Range &RHS) const { return !operator==(RHS); }
0059 
0060 private:
0061   std::pair<const llvm::APSInt *, const llvm::APSInt *> Impl;
0062 };
0063 
0064 /// @class RangeSet is a persistent set of non-overlapping ranges.
0065 ///
0066 /// New RangeSet objects can be ONLY produced by RangeSet::Factory object, which
0067 /// also supports the most common operations performed on range sets.
0068 ///
0069 /// Empty set corresponds to an overly constrained symbol meaning that there
0070 /// are no possible values for that symbol.
0071 class RangeSet {
0072 public:
0073   class Factory;
0074 
0075 private:
0076   // We use llvm::SmallVector as the underlying container for the following
0077   // reasons:
0078   //
0079   //   * Range sets are usually very simple, 1 or 2 ranges.
0080   //     That's why llvm::ImmutableSet is not perfect.
0081   //
0082   //   * Ranges in sets are NOT overlapping, so it is natural to keep them
0083   //     sorted for efficient operations and queries.  For this reason,
0084   //     llvm::SmallSet doesn't fit the requirements, it is not sorted when it
0085   //     is a vector.
0086   //
0087   //   * Range set operations usually a bit harder than add/remove a range.
0088   //     Complex operations might do many of those for just one range set.
0089   //     Formerly it used to be llvm::ImmutableSet, which is inefficient for our
0090   //     purposes as we want to make these operations BOTH immutable AND
0091   //     efficient.
0092   //
0093   //   * Iteration over ranges is widespread and a more cache-friendly
0094   //     structure is preferred.
0095   using ImplType = llvm::SmallVector<Range, 4>;
0096 
0097   struct ContainerType : public ImplType, public llvm::FoldingSetNode {
0098     void Profile(llvm::FoldingSetNodeID &ID) const {
0099       for (const Range &It : *this) {
0100         It.Profile(ID);
0101       }
0102     }
0103   };
0104   // This is a non-owning pointer to an actual container.
0105   // The memory is fully managed by the factory and is alive as long as the
0106   // factory itself is alive.
0107   // It is a pointer as opposed to a reference, so we can easily reassign
0108   // RangeSet objects.
0109   using UnderlyingType = const ContainerType *;
0110   UnderlyingType Impl;
0111 
0112 public:
0113   using const_iterator = ImplType::const_iterator;
0114 
0115   const_iterator begin() const { return Impl->begin(); }
0116   const_iterator end() const { return Impl->end(); }
0117   size_t size() const { return Impl->size(); }
0118 
0119   bool isEmpty() const { return Impl->empty(); }
0120 
0121   class Factory {
0122   public:
0123     Factory(BasicValueFactory &BV) : ValueFactory(BV) {}
0124 
0125     /// Create a new set with all ranges from both LHS and RHS.
0126     /// Possible intersections are not checked here.
0127     ///
0128     /// Complexity: O(N + M)
0129     ///             where N = size(LHS), M = size(RHS)
0130     RangeSet add(RangeSet LHS, RangeSet RHS);
0131     /// Create a new set with all ranges from the original set plus the new one.
0132     /// Possible intersections are not checked here.
0133     ///
0134     /// Complexity: O(N)
0135     ///             where N = size(Original)
0136     RangeSet add(RangeSet Original, Range Element);
0137     /// Create a new set with all ranges from the original set plus the point.
0138     /// Possible intersections are not checked here.
0139     ///
0140     /// Complexity: O(N)
0141     ///             where N = size(Original)
0142     RangeSet add(RangeSet Original, const llvm::APSInt &Point);
0143     /// Create a new set which is a union of two given ranges.
0144     /// Possible intersections are not checked here.
0145     ///
0146     /// Complexity: O(N + M)
0147     ///             where N = size(LHS), M = size(RHS)
0148     RangeSet unite(RangeSet LHS, RangeSet RHS);
0149     /// Create a new set by uniting given range set with the given range.
0150     /// All intersections and adjacent ranges are handled here.
0151     ///
0152     /// Complexity: O(N)
0153     ///             where N = size(Original)
0154     RangeSet unite(RangeSet Original, Range Element);
0155     /// Create a new set by uniting given range set with the given point.
0156     /// All intersections and adjacent ranges are handled here.
0157     ///
0158     /// Complexity: O(N)
0159     ///             where N = size(Original)
0160     RangeSet unite(RangeSet Original, llvm::APSInt Point);
0161     /// Create a new set by uniting given range set with the given range
0162     /// between points. All intersections and adjacent ranges are handled here.
0163     ///
0164     /// Complexity: O(N)
0165     ///             where N = size(Original)
0166     RangeSet unite(RangeSet Original, llvm::APSInt From, llvm::APSInt To);
0167 
0168     RangeSet getEmptySet() { return &EmptySet; }
0169 
0170     /// Create a new set with just one range.
0171     /// @{
0172     RangeSet getRangeSet(Range Origin);
0173     RangeSet getRangeSet(const llvm::APSInt &From, const llvm::APSInt &To) {
0174       return getRangeSet(Range(From, To));
0175     }
0176     RangeSet getRangeSet(const llvm::APSInt &Origin) {
0177       return getRangeSet(Origin, Origin);
0178     }
0179     /// @}
0180 
0181     /// Intersect the given range sets.
0182     ///
0183     /// Complexity: O(N + M)
0184     ///             where N = size(LHS), M = size(RHS)
0185     RangeSet intersect(RangeSet LHS, RangeSet RHS);
0186     /// Intersect the given set with the closed range [Lower, Upper].
0187     ///
0188     /// Unlike the Range type, this range uses modular arithmetic, corresponding
0189     /// to the common treatment of C integer overflow. Thus, if the Lower bound
0190     /// is greater than the Upper bound, the range is taken to wrap around. This
0191     /// is equivalent to taking the intersection with the two ranges [Min,
0192     /// Upper] and [Lower, Max], or, alternatively, /removing/ all integers
0193     /// between Upper and Lower.
0194     ///
0195     /// Complexity: O(N)
0196     ///             where N = size(What)
0197     RangeSet intersect(RangeSet What, llvm::APSInt Lower, llvm::APSInt Upper);
0198     /// Intersect the given range with the given point.
0199     ///
0200     /// The result can be either an empty set or a set containing the given
0201     /// point depending on whether the point is in the range set.
0202     ///
0203     /// Complexity: O(logN)
0204     ///             where N = size(What)
0205     RangeSet intersect(RangeSet What, llvm::APSInt Point);
0206 
0207     /// Delete the given point from the range set.
0208     ///
0209     /// Complexity: O(N)
0210     ///             where N = size(From)
0211     RangeSet deletePoint(RangeSet From, const llvm::APSInt &Point);
0212     /// Negate the given range set.
0213     ///
0214     /// Turn all [A, B] ranges to [-B, -A], when "-" is a C-like unary minus
0215     /// operation under the values of the type.
0216     ///
0217     /// We also handle MIN because applying unary minus to MIN does not change
0218     /// it.
0219     /// Example 1:
0220     /// char x = -128;        // -128 is a MIN value in a range of 'char'
0221     /// char y = -x;          // y: -128
0222     ///
0223     /// Example 2:
0224     /// unsigned char x = 0;  // 0 is a MIN value in a range of 'unsigned char'
0225     /// unsigned char y = -x; // y: 0
0226     ///
0227     /// And it makes us to separate the range
0228     /// like [MIN, N] to [MIN, MIN] U [-N, MAX].
0229     /// For instance, whole range is {-128..127} and subrange is [-128,-126],
0230     /// thus [-128,-127,-126,...] negates to [-128,...,126,127].
0231     ///
0232     /// Negate restores disrupted ranges on bounds,
0233     /// e.g. [MIN, B] => [MIN, MIN] U [-B, MAX] => [MIN, B].
0234     ///
0235     /// Negate is a self-inverse function, i.e. negate(negate(R)) == R.
0236     ///
0237     /// Complexity: O(N)
0238     ///             where N = size(What)
0239     RangeSet negate(RangeSet What);
0240     /// Performs promotions, truncations and conversions of the given set.
0241     ///
0242     /// This function is optimized for each of the six cast cases:
0243     /// - noop
0244     /// - conversion
0245     /// - truncation
0246     /// - truncation-conversion
0247     /// - promotion
0248     /// - promotion-conversion
0249     ///
0250     /// NOTE: This function is NOT self-inverse for truncations, because of
0251     ///       the higher bits loss:
0252     ///     - castTo(castTo(OrigRangeOfInt, char), int) != OrigRangeOfInt.
0253     ///     - castTo(castTo(OrigRangeOfChar, int), char) == OrigRangeOfChar.
0254     ///       But it is self-inverse for all the rest casts.
0255     ///
0256     /// Complexity:
0257     ///     - Noop                               O(1);
0258     ///     - Truncation                         O(N^2);
0259     ///     - Another case                       O(N);
0260     ///     where N = size(What)
0261     RangeSet castTo(RangeSet What, APSIntType Ty);
0262     RangeSet castTo(RangeSet What, QualType T);
0263 
0264     /// Return associated value factory.
0265     BasicValueFactory &getValueFactory() const { return ValueFactory; }
0266 
0267   private:
0268     /// Return a persistent version of the given container.
0269     RangeSet makePersistent(ContainerType &&From);
0270     /// Construct a new persistent version of the given container.
0271     ContainerType *construct(ContainerType &&From);
0272 
0273     RangeSet intersect(const ContainerType &LHS, const ContainerType &RHS);
0274     /// NOTE: This function relies on the fact that all values in the
0275     /// containers are persistent (created via BasicValueFactory::getValue).
0276     ContainerType unite(const ContainerType &LHS, const ContainerType &RHS);
0277 
0278     /// This is a helper function for `castTo` method. Implies not to be used
0279     /// separately.
0280     /// Performs a truncation case of a cast operation.
0281     ContainerType truncateTo(RangeSet What, APSIntType Ty);
0282 
0283     /// This is a helper function for `castTo` method. Implies not to be used
0284     /// separately.
0285     /// Performs a conversion case and a promotion-conversion case for signeds
0286     /// of a cast operation.
0287     ContainerType convertTo(RangeSet What, APSIntType Ty);
0288 
0289     /// This is a helper function for `castTo` method. Implies not to be used
0290     /// separately.
0291     /// Performs a promotion for unsigneds only.
0292     ContainerType promoteTo(RangeSet What, APSIntType Ty);
0293 
0294     // Many operations include producing new APSInt values and that's why
0295     // we need this factory.
0296     BasicValueFactory &ValueFactory;
0297     // Allocator for all the created containers.
0298     // Containers might own their own memory and that's why it is specific
0299     // for the type, so it calls container destructors upon deletion.
0300     llvm::SpecificBumpPtrAllocator<ContainerType> Arena;
0301     // Usually we deal with the same ranges and range sets over and over.
0302     // Here we track all created containers and try not to repeat ourselves.
0303     llvm::FoldingSet<ContainerType> Cache;
0304     static ContainerType EmptySet;
0305   };
0306 
0307   RangeSet(const RangeSet &) = default;
0308   RangeSet &operator=(const RangeSet &) = default;
0309   RangeSet(RangeSet &&) = default;
0310   RangeSet &operator=(RangeSet &&) = default;
0311   ~RangeSet() = default;
0312 
0313   /// Construct a new RangeSet representing '{ [From, To] }'.
0314   RangeSet(Factory &F, const llvm::APSInt &From, const llvm::APSInt &To)
0315       : RangeSet(F.getRangeSet(From, To)) {}
0316 
0317   /// Construct a new RangeSet representing the given point as a range.
0318   RangeSet(Factory &F, const llvm::APSInt &Point)
0319       : RangeSet(F.getRangeSet(Point)) {}
0320 
0321   static void Profile(llvm::FoldingSetNodeID &ID, const RangeSet &RS) {
0322     ID.AddPointer(RS.Impl);
0323   }
0324 
0325   /// Profile - Generates a hash profile of this RangeSet for use
0326   ///  by FoldingSet.
0327   void Profile(llvm::FoldingSetNodeID &ID) const { Profile(ID, *this); }
0328 
0329   /// getConcreteValue - If a symbol is constrained to equal a specific integer
0330   ///  constant then this method returns that value.  Otherwise, it returns
0331   ///  NULL.
0332   const llvm::APSInt *getConcreteValue() const {
0333     return Impl->size() == 1 ? begin()->getConcreteValue() : nullptr;
0334   }
0335 
0336   /// Get the minimal value covered by the ranges in the set.
0337   ///
0338   /// Complexity: O(1)
0339   const llvm::APSInt &getMinValue() const;
0340   /// Get the maximal value covered by the ranges in the set.
0341   ///
0342   /// Complexity: O(1)
0343   const llvm::APSInt &getMaxValue() const;
0344 
0345   bool isUnsigned() const;
0346   uint32_t getBitWidth() const;
0347   APSIntType getAPSIntType() const;
0348 
0349   /// Test whether the given point is contained by any of the ranges.
0350   ///
0351   /// Complexity: O(logN)
0352   ///             where N = size(this)
0353   bool contains(llvm::APSInt Point) const { return containsImpl(Point); }
0354 
0355   bool containsZero() const {
0356     APSIntType T{getMinValue()};
0357     return contains(T.getZeroValue());
0358   }
0359 
0360   /// Test if the range is the [0,0] range.
0361   ///
0362   /// Complexity: O(1)
0363   bool encodesFalseRange() const {
0364     const llvm::APSInt *Constant = getConcreteValue();
0365     return Constant && Constant->isZero();
0366   }
0367 
0368   /// Test if the range doesn't contain zero.
0369   ///
0370   /// Complexity: O(logN)
0371   ///             where N = size(this)
0372   bool encodesTrueRange() const { return !containsZero(); }
0373 
0374   void dump(raw_ostream &OS) const;
0375   void dump() const;
0376 
0377   bool operator==(const RangeSet &Other) const { return *Impl == *Other.Impl; }
0378   bool operator!=(const RangeSet &Other) const { return !(*this == Other); }
0379 
0380 private:
0381   /* implicit */ RangeSet(ContainerType *RawContainer) : Impl(RawContainer) {}
0382   /* implicit */ RangeSet(UnderlyingType Ptr) : Impl(Ptr) {}
0383 
0384   /// Pin given points to the type represented by the current range set.
0385   ///
0386   /// This makes parameter points to be in-out parameters.
0387   /// In order to maintain consistent types across all of the ranges in the set
0388   /// and to keep all the operations to compare ONLY points of the same type, we
0389   /// need to pin every point before any operation.
0390   ///
0391   /// @Returns true if the given points can be converted to the target type
0392   ///          without changing the values (i.e. trivially) and false otherwise.
0393   /// @{
0394   bool pin(llvm::APSInt &Lower, llvm::APSInt &Upper) const;
0395   bool pin(llvm::APSInt &Point) const;
0396   /// @}
0397 
0398   // This version of this function modifies its arguments (pins it).
0399   bool containsImpl(llvm::APSInt &Point) const;
0400 
0401   friend class Factory;
0402 };
0403 
0404 using ConstraintMap = llvm::ImmutableMap<SymbolRef, RangeSet>;
0405 ConstraintMap getConstraintMap(ProgramStateRef State);
0406 
0407 class RangedConstraintManager : public SimpleConstraintManager {
0408 public:
0409   RangedConstraintManager(ExprEngine *EE, SValBuilder &SB)
0410       : SimpleConstraintManager(EE, SB) {}
0411 
0412   ~RangedConstraintManager() override;
0413 
0414   //===------------------------------------------------------------------===//
0415   // Implementation for interface from SimpleConstraintManager.
0416   //===------------------------------------------------------------------===//
0417 
0418   ProgramStateRef assumeSym(ProgramStateRef State, SymbolRef Sym,
0419                             bool Assumption) override;
0420 
0421   ProgramStateRef assumeSymInclusiveRange(ProgramStateRef State, SymbolRef Sym,
0422                                           const llvm::APSInt &From,
0423                                           const llvm::APSInt &To,
0424                                           bool InRange) override;
0425 
0426   ProgramStateRef assumeSymUnsupported(ProgramStateRef State, SymbolRef Sym,
0427                                        bool Assumption) override;
0428 
0429 protected:
0430   /// Assume a constraint between a symbolic expression and a concrete integer.
0431   virtual ProgramStateRef assumeSymRel(ProgramStateRef State, SymbolRef Sym,
0432                                        BinaryOperator::Opcode op,
0433                                        const llvm::APSInt &Int);
0434 
0435   //===------------------------------------------------------------------===//
0436   // Interface that subclasses must implement.
0437   //===------------------------------------------------------------------===//
0438 
0439   // Each of these is of the form "$Sym+Adj <> V", where "<>" is the comparison
0440   // operation for the method being invoked.
0441 
0442   virtual ProgramStateRef assumeSymNE(ProgramStateRef State, SymbolRef Sym,
0443                                       const llvm::APSInt &V,
0444                                       const llvm::APSInt &Adjustment) = 0;
0445 
0446   virtual ProgramStateRef assumeSymEQ(ProgramStateRef State, SymbolRef Sym,
0447                                       const llvm::APSInt &V,
0448                                       const llvm::APSInt &Adjustment) = 0;
0449 
0450   virtual ProgramStateRef assumeSymLT(ProgramStateRef State, SymbolRef Sym,
0451                                       const llvm::APSInt &V,
0452                                       const llvm::APSInt &Adjustment) = 0;
0453 
0454   virtual ProgramStateRef assumeSymGT(ProgramStateRef State, SymbolRef Sym,
0455                                       const llvm::APSInt &V,
0456                                       const llvm::APSInt &Adjustment) = 0;
0457 
0458   virtual ProgramStateRef assumeSymLE(ProgramStateRef State, SymbolRef Sym,
0459                                       const llvm::APSInt &V,
0460                                       const llvm::APSInt &Adjustment) = 0;
0461 
0462   virtual ProgramStateRef assumeSymGE(ProgramStateRef State, SymbolRef Sym,
0463                                       const llvm::APSInt &V,
0464                                       const llvm::APSInt &Adjustment) = 0;
0465 
0466   virtual ProgramStateRef assumeSymWithinInclusiveRange(
0467       ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
0468       const llvm::APSInt &To, const llvm::APSInt &Adjustment) = 0;
0469 
0470   virtual ProgramStateRef assumeSymOutsideInclusiveRange(
0471       ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
0472       const llvm::APSInt &To, const llvm::APSInt &Adjustment) = 0;
0473 
0474   //===------------------------------------------------------------------===//
0475   // Internal implementation.
0476   //===------------------------------------------------------------------===//
0477 private:
0478   static void computeAdjustment(SymbolRef &Sym, llvm::APSInt &Adjustment);
0479 };
0480 
0481 /// Try to simplify a given symbolic expression based on the constraints in
0482 /// State. This is needed because the Environment bindings are not getting
0483 /// updated when a new constraint is added to the State. If the symbol is
0484 /// simplified to a non-symbol (e.g. to a constant) then the original symbol
0485 /// is returned. We use this function in the family of assumeSymNE/EQ/LT/../GE
0486 /// functions where we can work only with symbols. Use the other function
0487 /// (simplifyToSVal) if you are interested in a simplification that may yield
0488 /// a concrete constant value.
0489 SymbolRef simplify(ProgramStateRef State, SymbolRef Sym);
0490 
0491 /// Try to simplify a given symbolic expression's associated `SVal` based on the
0492 /// constraints in State. This is very similar to `simplify`, but this function
0493 /// always returns the simplified SVal. The simplified SVal might be a single
0494 /// constant (i.e. `ConcreteInt`).
0495 SVal simplifyToSVal(ProgramStateRef State, SymbolRef Sym);
0496 
0497 } // namespace ento
0498 } // namespace clang
0499 
0500 REGISTER_FACTORY_WITH_PROGRAMSTATE(ConstraintMap)
0501 
0502 #endif