|
|
|||
File indexing completed on 2026-05-10 08:37:09
0001 //== RangedConstraintManager.h ----------------------------------*- C++ -*--==// 0002 // 0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 0004 // See https://llvm.org/LICENSE.txt for license information. 0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 0006 // 0007 //===----------------------------------------------------------------------===// 0008 // 0009 // Ranged constraint manager, built on SimpleConstraintManager. 0010 // 0011 //===----------------------------------------------------------------------===// 0012 0013 #ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_RANGEDCONSTRAINTMANAGER_H 0014 #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_RANGEDCONSTRAINTMANAGER_H 0015 0016 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" 0017 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 0018 #include "clang/StaticAnalyzer/Core/PathSensitive/SimpleConstraintManager.h" 0019 #include "llvm/ADT/APSInt.h" 0020 #include "llvm/Support/Allocator.h" 0021 0022 namespace clang { 0023 0024 namespace ento { 0025 0026 /// A Range represents the closed range [from, to]. The caller must 0027 /// guarantee that from <= to. Note that Range is immutable, so as not 0028 /// to subvert RangeSet's immutability. 0029 class Range { 0030 public: 0031 Range(const llvm::APSInt &From, const llvm::APSInt &To) : Impl(&From, &To) { 0032 assert(From <= To); 0033 } 0034 0035 Range(const llvm::APSInt &Point) : Range(Point, Point) {} 0036 0037 bool Includes(const llvm::APSInt &Point) const { 0038 return From() <= Point && Point <= To(); 0039 } 0040 const llvm::APSInt &From() const { return *Impl.first; } 0041 const llvm::APSInt &To() const { return *Impl.second; } 0042 const llvm::APSInt *getConcreteValue() const { 0043 return &From() == &To() ? &From() : nullptr; 0044 } 0045 0046 void Profile(llvm::FoldingSetNodeID &ID) const { 0047 ID.AddPointer(&From()); 0048 ID.AddPointer(&To()); 0049 } 0050 void dump(raw_ostream &OS) const; 0051 void dump() const; 0052 0053 // In order to keep non-overlapping ranges sorted, we can compare only From 0054 // points. 0055 bool operator<(const Range &RHS) const { return From() < RHS.From(); } 0056 0057 bool operator==(const Range &RHS) const { return Impl == RHS.Impl; } 0058 bool operator!=(const Range &RHS) const { return !operator==(RHS); } 0059 0060 private: 0061 std::pair<const llvm::APSInt *, const llvm::APSInt *> Impl; 0062 }; 0063 0064 /// @class RangeSet is a persistent set of non-overlapping ranges. 0065 /// 0066 /// New RangeSet objects can be ONLY produced by RangeSet::Factory object, which 0067 /// also supports the most common operations performed on range sets. 0068 /// 0069 /// Empty set corresponds to an overly constrained symbol meaning that there 0070 /// are no possible values for that symbol. 0071 class RangeSet { 0072 public: 0073 class Factory; 0074 0075 private: 0076 // We use llvm::SmallVector as the underlying container for the following 0077 // reasons: 0078 // 0079 // * Range sets are usually very simple, 1 or 2 ranges. 0080 // That's why llvm::ImmutableSet is not perfect. 0081 // 0082 // * Ranges in sets are NOT overlapping, so it is natural to keep them 0083 // sorted for efficient operations and queries. For this reason, 0084 // llvm::SmallSet doesn't fit the requirements, it is not sorted when it 0085 // is a vector. 0086 // 0087 // * Range set operations usually a bit harder than add/remove a range. 0088 // Complex operations might do many of those for just one range set. 0089 // Formerly it used to be llvm::ImmutableSet, which is inefficient for our 0090 // purposes as we want to make these operations BOTH immutable AND 0091 // efficient. 0092 // 0093 // * Iteration over ranges is widespread and a more cache-friendly 0094 // structure is preferred. 0095 using ImplType = llvm::SmallVector<Range, 4>; 0096 0097 struct ContainerType : public ImplType, public llvm::FoldingSetNode { 0098 void Profile(llvm::FoldingSetNodeID &ID) const { 0099 for (const Range &It : *this) { 0100 It.Profile(ID); 0101 } 0102 } 0103 }; 0104 // This is a non-owning pointer to an actual container. 0105 // The memory is fully managed by the factory and is alive as long as the 0106 // factory itself is alive. 0107 // It is a pointer as opposed to a reference, so we can easily reassign 0108 // RangeSet objects. 0109 using UnderlyingType = const ContainerType *; 0110 UnderlyingType Impl; 0111 0112 public: 0113 using const_iterator = ImplType::const_iterator; 0114 0115 const_iterator begin() const { return Impl->begin(); } 0116 const_iterator end() const { return Impl->end(); } 0117 size_t size() const { return Impl->size(); } 0118 0119 bool isEmpty() const { return Impl->empty(); } 0120 0121 class Factory { 0122 public: 0123 Factory(BasicValueFactory &BV) : ValueFactory(BV) {} 0124 0125 /// Create a new set with all ranges from both LHS and RHS. 0126 /// Possible intersections are not checked here. 0127 /// 0128 /// Complexity: O(N + M) 0129 /// where N = size(LHS), M = size(RHS) 0130 RangeSet add(RangeSet LHS, RangeSet RHS); 0131 /// Create a new set with all ranges from the original set plus the new one. 0132 /// Possible intersections are not checked here. 0133 /// 0134 /// Complexity: O(N) 0135 /// where N = size(Original) 0136 RangeSet add(RangeSet Original, Range Element); 0137 /// Create a new set with all ranges from the original set plus the point. 0138 /// Possible intersections are not checked here. 0139 /// 0140 /// Complexity: O(N) 0141 /// where N = size(Original) 0142 RangeSet add(RangeSet Original, const llvm::APSInt &Point); 0143 /// Create a new set which is a union of two given ranges. 0144 /// Possible intersections are not checked here. 0145 /// 0146 /// Complexity: O(N + M) 0147 /// where N = size(LHS), M = size(RHS) 0148 RangeSet unite(RangeSet LHS, RangeSet RHS); 0149 /// Create a new set by uniting given range set with the given range. 0150 /// All intersections and adjacent ranges are handled here. 0151 /// 0152 /// Complexity: O(N) 0153 /// where N = size(Original) 0154 RangeSet unite(RangeSet Original, Range Element); 0155 /// Create a new set by uniting given range set with the given point. 0156 /// All intersections and adjacent ranges are handled here. 0157 /// 0158 /// Complexity: O(N) 0159 /// where N = size(Original) 0160 RangeSet unite(RangeSet Original, llvm::APSInt Point); 0161 /// Create a new set by uniting given range set with the given range 0162 /// between points. All intersections and adjacent ranges are handled here. 0163 /// 0164 /// Complexity: O(N) 0165 /// where N = size(Original) 0166 RangeSet unite(RangeSet Original, llvm::APSInt From, llvm::APSInt To); 0167 0168 RangeSet getEmptySet() { return &EmptySet; } 0169 0170 /// Create a new set with just one range. 0171 /// @{ 0172 RangeSet getRangeSet(Range Origin); 0173 RangeSet getRangeSet(const llvm::APSInt &From, const llvm::APSInt &To) { 0174 return getRangeSet(Range(From, To)); 0175 } 0176 RangeSet getRangeSet(const llvm::APSInt &Origin) { 0177 return getRangeSet(Origin, Origin); 0178 } 0179 /// @} 0180 0181 /// Intersect the given range sets. 0182 /// 0183 /// Complexity: O(N + M) 0184 /// where N = size(LHS), M = size(RHS) 0185 RangeSet intersect(RangeSet LHS, RangeSet RHS); 0186 /// Intersect the given set with the closed range [Lower, Upper]. 0187 /// 0188 /// Unlike the Range type, this range uses modular arithmetic, corresponding 0189 /// to the common treatment of C integer overflow. Thus, if the Lower bound 0190 /// is greater than the Upper bound, the range is taken to wrap around. This 0191 /// is equivalent to taking the intersection with the two ranges [Min, 0192 /// Upper] and [Lower, Max], or, alternatively, /removing/ all integers 0193 /// between Upper and Lower. 0194 /// 0195 /// Complexity: O(N) 0196 /// where N = size(What) 0197 RangeSet intersect(RangeSet What, llvm::APSInt Lower, llvm::APSInt Upper); 0198 /// Intersect the given range with the given point. 0199 /// 0200 /// The result can be either an empty set or a set containing the given 0201 /// point depending on whether the point is in the range set. 0202 /// 0203 /// Complexity: O(logN) 0204 /// where N = size(What) 0205 RangeSet intersect(RangeSet What, llvm::APSInt Point); 0206 0207 /// Delete the given point from the range set. 0208 /// 0209 /// Complexity: O(N) 0210 /// where N = size(From) 0211 RangeSet deletePoint(RangeSet From, const llvm::APSInt &Point); 0212 /// Negate the given range set. 0213 /// 0214 /// Turn all [A, B] ranges to [-B, -A], when "-" is a C-like unary minus 0215 /// operation under the values of the type. 0216 /// 0217 /// We also handle MIN because applying unary minus to MIN does not change 0218 /// it. 0219 /// Example 1: 0220 /// char x = -128; // -128 is a MIN value in a range of 'char' 0221 /// char y = -x; // y: -128 0222 /// 0223 /// Example 2: 0224 /// unsigned char x = 0; // 0 is a MIN value in a range of 'unsigned char' 0225 /// unsigned char y = -x; // y: 0 0226 /// 0227 /// And it makes us to separate the range 0228 /// like [MIN, N] to [MIN, MIN] U [-N, MAX]. 0229 /// For instance, whole range is {-128..127} and subrange is [-128,-126], 0230 /// thus [-128,-127,-126,...] negates to [-128,...,126,127]. 0231 /// 0232 /// Negate restores disrupted ranges on bounds, 0233 /// e.g. [MIN, B] => [MIN, MIN] U [-B, MAX] => [MIN, B]. 0234 /// 0235 /// Negate is a self-inverse function, i.e. negate(negate(R)) == R. 0236 /// 0237 /// Complexity: O(N) 0238 /// where N = size(What) 0239 RangeSet negate(RangeSet What); 0240 /// Performs promotions, truncations and conversions of the given set. 0241 /// 0242 /// This function is optimized for each of the six cast cases: 0243 /// - noop 0244 /// - conversion 0245 /// - truncation 0246 /// - truncation-conversion 0247 /// - promotion 0248 /// - promotion-conversion 0249 /// 0250 /// NOTE: This function is NOT self-inverse for truncations, because of 0251 /// the higher bits loss: 0252 /// - castTo(castTo(OrigRangeOfInt, char), int) != OrigRangeOfInt. 0253 /// - castTo(castTo(OrigRangeOfChar, int), char) == OrigRangeOfChar. 0254 /// But it is self-inverse for all the rest casts. 0255 /// 0256 /// Complexity: 0257 /// - Noop O(1); 0258 /// - Truncation O(N^2); 0259 /// - Another case O(N); 0260 /// where N = size(What) 0261 RangeSet castTo(RangeSet What, APSIntType Ty); 0262 RangeSet castTo(RangeSet What, QualType T); 0263 0264 /// Return associated value factory. 0265 BasicValueFactory &getValueFactory() const { return ValueFactory; } 0266 0267 private: 0268 /// Return a persistent version of the given container. 0269 RangeSet makePersistent(ContainerType &&From); 0270 /// Construct a new persistent version of the given container. 0271 ContainerType *construct(ContainerType &&From); 0272 0273 RangeSet intersect(const ContainerType &LHS, const ContainerType &RHS); 0274 /// NOTE: This function relies on the fact that all values in the 0275 /// containers are persistent (created via BasicValueFactory::getValue). 0276 ContainerType unite(const ContainerType &LHS, const ContainerType &RHS); 0277 0278 /// This is a helper function for `castTo` method. Implies not to be used 0279 /// separately. 0280 /// Performs a truncation case of a cast operation. 0281 ContainerType truncateTo(RangeSet What, APSIntType Ty); 0282 0283 /// This is a helper function for `castTo` method. Implies not to be used 0284 /// separately. 0285 /// Performs a conversion case and a promotion-conversion case for signeds 0286 /// of a cast operation. 0287 ContainerType convertTo(RangeSet What, APSIntType Ty); 0288 0289 /// This is a helper function for `castTo` method. Implies not to be used 0290 /// separately. 0291 /// Performs a promotion for unsigneds only. 0292 ContainerType promoteTo(RangeSet What, APSIntType Ty); 0293 0294 // Many operations include producing new APSInt values and that's why 0295 // we need this factory. 0296 BasicValueFactory &ValueFactory; 0297 // Allocator for all the created containers. 0298 // Containers might own their own memory and that's why it is specific 0299 // for the type, so it calls container destructors upon deletion. 0300 llvm::SpecificBumpPtrAllocator<ContainerType> Arena; 0301 // Usually we deal with the same ranges and range sets over and over. 0302 // Here we track all created containers and try not to repeat ourselves. 0303 llvm::FoldingSet<ContainerType> Cache; 0304 static ContainerType EmptySet; 0305 }; 0306 0307 RangeSet(const RangeSet &) = default; 0308 RangeSet &operator=(const RangeSet &) = default; 0309 RangeSet(RangeSet &&) = default; 0310 RangeSet &operator=(RangeSet &&) = default; 0311 ~RangeSet() = default; 0312 0313 /// Construct a new RangeSet representing '{ [From, To] }'. 0314 RangeSet(Factory &F, const llvm::APSInt &From, const llvm::APSInt &To) 0315 : RangeSet(F.getRangeSet(From, To)) {} 0316 0317 /// Construct a new RangeSet representing the given point as a range. 0318 RangeSet(Factory &F, const llvm::APSInt &Point) 0319 : RangeSet(F.getRangeSet(Point)) {} 0320 0321 static void Profile(llvm::FoldingSetNodeID &ID, const RangeSet &RS) { 0322 ID.AddPointer(RS.Impl); 0323 } 0324 0325 /// Profile - Generates a hash profile of this RangeSet for use 0326 /// by FoldingSet. 0327 void Profile(llvm::FoldingSetNodeID &ID) const { Profile(ID, *this); } 0328 0329 /// getConcreteValue - If a symbol is constrained to equal a specific integer 0330 /// constant then this method returns that value. Otherwise, it returns 0331 /// NULL. 0332 const llvm::APSInt *getConcreteValue() const { 0333 return Impl->size() == 1 ? begin()->getConcreteValue() : nullptr; 0334 } 0335 0336 /// Get the minimal value covered by the ranges in the set. 0337 /// 0338 /// Complexity: O(1) 0339 const llvm::APSInt &getMinValue() const; 0340 /// Get the maximal value covered by the ranges in the set. 0341 /// 0342 /// Complexity: O(1) 0343 const llvm::APSInt &getMaxValue() const; 0344 0345 bool isUnsigned() const; 0346 uint32_t getBitWidth() const; 0347 APSIntType getAPSIntType() const; 0348 0349 /// Test whether the given point is contained by any of the ranges. 0350 /// 0351 /// Complexity: O(logN) 0352 /// where N = size(this) 0353 bool contains(llvm::APSInt Point) const { return containsImpl(Point); } 0354 0355 bool containsZero() const { 0356 APSIntType T{getMinValue()}; 0357 return contains(T.getZeroValue()); 0358 } 0359 0360 /// Test if the range is the [0,0] range. 0361 /// 0362 /// Complexity: O(1) 0363 bool encodesFalseRange() const { 0364 const llvm::APSInt *Constant = getConcreteValue(); 0365 return Constant && Constant->isZero(); 0366 } 0367 0368 /// Test if the range doesn't contain zero. 0369 /// 0370 /// Complexity: O(logN) 0371 /// where N = size(this) 0372 bool encodesTrueRange() const { return !containsZero(); } 0373 0374 void dump(raw_ostream &OS) const; 0375 void dump() const; 0376 0377 bool operator==(const RangeSet &Other) const { return *Impl == *Other.Impl; } 0378 bool operator!=(const RangeSet &Other) const { return !(*this == Other); } 0379 0380 private: 0381 /* implicit */ RangeSet(ContainerType *RawContainer) : Impl(RawContainer) {} 0382 /* implicit */ RangeSet(UnderlyingType Ptr) : Impl(Ptr) {} 0383 0384 /// Pin given points to the type represented by the current range set. 0385 /// 0386 /// This makes parameter points to be in-out parameters. 0387 /// In order to maintain consistent types across all of the ranges in the set 0388 /// and to keep all the operations to compare ONLY points of the same type, we 0389 /// need to pin every point before any operation. 0390 /// 0391 /// @Returns true if the given points can be converted to the target type 0392 /// without changing the values (i.e. trivially) and false otherwise. 0393 /// @{ 0394 bool pin(llvm::APSInt &Lower, llvm::APSInt &Upper) const; 0395 bool pin(llvm::APSInt &Point) const; 0396 /// @} 0397 0398 // This version of this function modifies its arguments (pins it). 0399 bool containsImpl(llvm::APSInt &Point) const; 0400 0401 friend class Factory; 0402 }; 0403 0404 using ConstraintMap = llvm::ImmutableMap<SymbolRef, RangeSet>; 0405 ConstraintMap getConstraintMap(ProgramStateRef State); 0406 0407 class RangedConstraintManager : public SimpleConstraintManager { 0408 public: 0409 RangedConstraintManager(ExprEngine *EE, SValBuilder &SB) 0410 : SimpleConstraintManager(EE, SB) {} 0411 0412 ~RangedConstraintManager() override; 0413 0414 //===------------------------------------------------------------------===// 0415 // Implementation for interface from SimpleConstraintManager. 0416 //===------------------------------------------------------------------===// 0417 0418 ProgramStateRef assumeSym(ProgramStateRef State, SymbolRef Sym, 0419 bool Assumption) override; 0420 0421 ProgramStateRef assumeSymInclusiveRange(ProgramStateRef State, SymbolRef Sym, 0422 const llvm::APSInt &From, 0423 const llvm::APSInt &To, 0424 bool InRange) override; 0425 0426 ProgramStateRef assumeSymUnsupported(ProgramStateRef State, SymbolRef Sym, 0427 bool Assumption) override; 0428 0429 protected: 0430 /// Assume a constraint between a symbolic expression and a concrete integer. 0431 virtual ProgramStateRef assumeSymRel(ProgramStateRef State, SymbolRef Sym, 0432 BinaryOperator::Opcode op, 0433 const llvm::APSInt &Int); 0434 0435 //===------------------------------------------------------------------===// 0436 // Interface that subclasses must implement. 0437 //===------------------------------------------------------------------===// 0438 0439 // Each of these is of the form "$Sym+Adj <> V", where "<>" is the comparison 0440 // operation for the method being invoked. 0441 0442 virtual ProgramStateRef assumeSymNE(ProgramStateRef State, SymbolRef Sym, 0443 const llvm::APSInt &V, 0444 const llvm::APSInt &Adjustment) = 0; 0445 0446 virtual ProgramStateRef assumeSymEQ(ProgramStateRef State, SymbolRef Sym, 0447 const llvm::APSInt &V, 0448 const llvm::APSInt &Adjustment) = 0; 0449 0450 virtual ProgramStateRef assumeSymLT(ProgramStateRef State, SymbolRef Sym, 0451 const llvm::APSInt &V, 0452 const llvm::APSInt &Adjustment) = 0; 0453 0454 virtual ProgramStateRef assumeSymGT(ProgramStateRef State, SymbolRef Sym, 0455 const llvm::APSInt &V, 0456 const llvm::APSInt &Adjustment) = 0; 0457 0458 virtual ProgramStateRef assumeSymLE(ProgramStateRef State, SymbolRef Sym, 0459 const llvm::APSInt &V, 0460 const llvm::APSInt &Adjustment) = 0; 0461 0462 virtual ProgramStateRef assumeSymGE(ProgramStateRef State, SymbolRef Sym, 0463 const llvm::APSInt &V, 0464 const llvm::APSInt &Adjustment) = 0; 0465 0466 virtual ProgramStateRef assumeSymWithinInclusiveRange( 0467 ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From, 0468 const llvm::APSInt &To, const llvm::APSInt &Adjustment) = 0; 0469 0470 virtual ProgramStateRef assumeSymOutsideInclusiveRange( 0471 ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From, 0472 const llvm::APSInt &To, const llvm::APSInt &Adjustment) = 0; 0473 0474 //===------------------------------------------------------------------===// 0475 // Internal implementation. 0476 //===------------------------------------------------------------------===// 0477 private: 0478 static void computeAdjustment(SymbolRef &Sym, llvm::APSInt &Adjustment); 0479 }; 0480 0481 /// Try to simplify a given symbolic expression based on the constraints in 0482 /// State. This is needed because the Environment bindings are not getting 0483 /// updated when a new constraint is added to the State. If the symbol is 0484 /// simplified to a non-symbol (e.g. to a constant) then the original symbol 0485 /// is returned. We use this function in the family of assumeSymNE/EQ/LT/../GE 0486 /// functions where we can work only with symbols. Use the other function 0487 /// (simplifyToSVal) if you are interested in a simplification that may yield 0488 /// a concrete constant value. 0489 SymbolRef simplify(ProgramStateRef State, SymbolRef Sym); 0490 0491 /// Try to simplify a given symbolic expression's associated `SVal` based on the 0492 /// constraints in State. This is very similar to `simplify`, but this function 0493 /// always returns the simplified SVal. The simplified SVal might be a single 0494 /// constant (i.e. `ConcreteInt`). 0495 SVal simplifyToSVal(ProgramStateRef State, SymbolRef Sym); 0496 0497 } // namespace ento 0498 } // namespace clang 0499 0500 REGISTER_FACTORY_WITH_PROGRAMSTATE(ConstraintMap) 0501 0502 #endif
| [ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
|
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
|