|
|
|||
File indexing completed on 2026-05-10 08:43:14
0001 //===- llvm/Analysis/LoopCacheAnalysis.h ------------------------*- C++ -*-===// 0002 // 0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 0004 // See https://llvm.org/LICENSE.txt for license information. 0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 0006 // 0007 //===----------------------------------------------------------------------===// 0008 /// 0009 /// \file 0010 /// This file defines the interface for the loop cache analysis. 0011 /// 0012 //===----------------------------------------------------------------------===// 0013 0014 #ifndef LLVM_ANALYSIS_LOOPCACHEANALYSIS_H 0015 #define LLVM_ANALYSIS_LOOPCACHEANALYSIS_H 0016 0017 #include "llvm/Analysis/LoopAnalysisManager.h" 0018 #include "llvm/IR/PassManager.h" 0019 #include "llvm/Support/InstructionCost.h" 0020 #include <optional> 0021 0022 namespace llvm { 0023 0024 class AAResults; 0025 class DependenceInfo; 0026 class Instruction; 0027 class LPMUpdater; 0028 class raw_ostream; 0029 class LoopInfo; 0030 class Loop; 0031 class ScalarEvolution; 0032 class SCEV; 0033 class TargetTransformInfo; 0034 0035 using CacheCostTy = InstructionCost; 0036 using LoopVectorTy = SmallVector<Loop *, 8>; 0037 0038 /// Represents a memory reference as a base pointer and a set of indexing 0039 /// operations. For example given the array reference A[i][2j+1][3k+2] in a 0040 /// 3-dim loop nest: 0041 /// for(i=0;i<n;++i) 0042 /// for(j=0;j<m;++j) 0043 /// for(k=0;k<o;++k) 0044 /// ... A[i][2j+1][3k+2] ... 0045 /// We expect: 0046 /// BasePointer -> A 0047 /// Subscripts -> [{0,+,1}<%for.i>][{1,+,2}<%for.j>][{2,+,3}<%for.k>] 0048 /// Sizes -> [m][o][4] 0049 class IndexedReference { 0050 friend raw_ostream &operator<<(raw_ostream &OS, const IndexedReference &R); 0051 0052 public: 0053 /// Construct an indexed reference given a \p StoreOrLoadInst instruction. 0054 IndexedReference(Instruction &StoreOrLoadInst, const LoopInfo &LI, 0055 ScalarEvolution &SE); 0056 0057 bool isValid() const { return IsValid; } 0058 const SCEV *getBasePointer() const { return BasePointer; } 0059 size_t getNumSubscripts() const { return Subscripts.size(); } 0060 const SCEV *getSubscript(unsigned SubNum) const { 0061 assert(SubNum < getNumSubscripts() && "Invalid subscript number"); 0062 return Subscripts[SubNum]; 0063 } 0064 const SCEV *getFirstSubscript() const { 0065 assert(!Subscripts.empty() && "Expecting non-empty container"); 0066 return Subscripts.front(); 0067 } 0068 const SCEV *getLastSubscript() const { 0069 assert(!Subscripts.empty() && "Expecting non-empty container"); 0070 return Subscripts.back(); 0071 } 0072 0073 /// Return true/false if the current object and the indexed reference \p Other 0074 /// are/aren't in the same cache line of size \p CLS. Two references are in 0075 /// the same chace line iff the distance between them in the innermost 0076 /// dimension is less than the cache line size. Return std::nullopt if unsure. 0077 std::optional<bool> hasSpacialReuse(const IndexedReference &Other, 0078 unsigned CLS, AAResults &AA) const; 0079 0080 /// Return true if the current object and the indexed reference \p Other 0081 /// have distance smaller than \p MaxDistance in the dimension associated with 0082 /// the given loop \p L. Return false if the distance is not smaller than \p 0083 /// MaxDistance and std::nullopt if unsure. 0084 std::optional<bool> hasTemporalReuse(const IndexedReference &Other, 0085 unsigned MaxDistance, const Loop &L, 0086 DependenceInfo &DI, AAResults &AA) const; 0087 0088 /// Compute the cost of the reference w.r.t. the given loop \p L when it is 0089 /// considered in the innermost position in the loop nest. 0090 /// The cost is defined as: 0091 /// - equal to one if the reference is loop invariant, or 0092 /// - equal to '(TripCount * stride) / cache_line_size' if: 0093 /// + the reference stride is less than the cache line size, and 0094 /// + the coefficient of this loop's index variable used in all other 0095 /// subscripts is zero 0096 /// - or otherwise equal to 'TripCount'. 0097 CacheCostTy computeRefCost(const Loop &L, unsigned CLS) const; 0098 0099 private: 0100 /// Attempt to delinearize the indexed reference. 0101 bool delinearize(const LoopInfo &LI); 0102 0103 /// Attempt to delinearize \p AccessFn for fixed-size arrays. 0104 bool tryDelinearizeFixedSize(const SCEV *AccessFn, 0105 SmallVectorImpl<const SCEV *> &Subscripts); 0106 0107 /// Return true if the index reference is invariant with respect to loop \p L. 0108 bool isLoopInvariant(const Loop &L) const; 0109 0110 /// Return true if the indexed reference is 'consecutive' in loop \p L. 0111 /// An indexed reference is 'consecutive' if the only coefficient that uses 0112 /// the loop induction variable is the rightmost one, and the access stride is 0113 /// smaller than the cache line size \p CLS. Provide a valid \p Stride value 0114 /// if the indexed reference is 'consecutive'. 0115 bool isConsecutive(const Loop &L, const SCEV *&Stride, unsigned CLS) const; 0116 0117 /// Retrieve the index of the subscript corresponding to the given loop \p 0118 /// L. Return a zero-based positive index if the subscript index is 0119 /// succesfully located and a negative value otherwise. For example given the 0120 /// indexed reference 'A[i][2j+1][3k+2]', the call 0121 /// 'getSubscriptIndex(loop-k)' would return value 2. 0122 int getSubscriptIndex(const Loop &L) const; 0123 0124 /// Return the coefficient used in the rightmost dimension. 0125 const SCEV *getLastCoefficient() const; 0126 0127 /// Return true if the coefficient corresponding to induction variable of 0128 /// loop \p L in the given \p Subscript is zero or is loop invariant in \p L. 0129 bool isCoeffForLoopZeroOrInvariant(const SCEV &Subscript, 0130 const Loop &L) const; 0131 0132 /// Verify that the given \p Subscript is 'well formed' (must be a simple add 0133 /// recurrence). 0134 bool isSimpleAddRecurrence(const SCEV &Subscript, const Loop &L) const; 0135 0136 /// Return true if the given reference \p Other is definetely aliased with 0137 /// the indexed reference represented by this class. 0138 bool isAliased(const IndexedReference &Other, AAResults &AA) const; 0139 0140 private: 0141 /// True if the reference can be delinearized, false otherwise. 0142 bool IsValid = false; 0143 0144 /// Represent the memory reference instruction. 0145 Instruction &StoreOrLoadInst; 0146 0147 /// The base pointer of the memory reference. 0148 const SCEV *BasePointer = nullptr; 0149 0150 /// The subscript (indexes) of the memory reference. 0151 SmallVector<const SCEV *, 3> Subscripts; 0152 0153 /// The dimensions of the memory reference. 0154 SmallVector<const SCEV *, 3> Sizes; 0155 0156 ScalarEvolution &SE; 0157 }; 0158 0159 /// A reference group represents a set of memory references that exhibit 0160 /// temporal or spacial reuse. Two references belong to the same 0161 /// reference group with respect to a inner loop L iff: 0162 /// 1. they have a loop independent dependency, or 0163 /// 2. they have a loop carried dependence with a small dependence distance 0164 /// (e.g. less than 2) carried by the inner loop, or 0165 /// 3. they refer to the same array, and the subscript in their innermost 0166 /// dimension is less than or equal to 'd' (where 'd' is less than the cache 0167 /// line size) 0168 /// 0169 /// Intuitively a reference group represents memory references that access 0170 /// the same cache line. Conditions 1,2 above account for temporal reuse, while 0171 /// contition 3 accounts for spacial reuse. 0172 using ReferenceGroupTy = SmallVector<std::unique_ptr<IndexedReference>, 8>; 0173 using ReferenceGroupsTy = SmallVector<ReferenceGroupTy, 8>; 0174 0175 /// \c CacheCost represents the estimated cost of a inner loop as the number of 0176 /// cache lines used by the memory references it contains. 0177 /// The 'cache cost' of a loop 'L' in a loop nest 'LN' is computed as the sum of 0178 /// the cache costs of all of its reference groups when the loop is considered 0179 /// to be in the innermost position in the nest. 0180 /// A reference group represents memory references that fall into the same cache 0181 /// line. Each reference group is analysed with respect to the innermost loop in 0182 /// a loop nest. The cost of a reference is defined as follow: 0183 /// - one if it is loop invariant w.r.t the innermost loop, 0184 /// - equal to the loop trip count divided by the cache line times the 0185 /// reference stride if the reference stride is less than the cache line 0186 /// size (CLS), and the coefficient of this loop's index variable used in all 0187 /// other subscripts is zero (e.g. RefCost = TripCount/(CLS/RefStride)) 0188 /// - equal to the innermost loop trip count if the reference stride is greater 0189 /// or equal to the cache line size CLS. 0190 class CacheCost { 0191 friend raw_ostream &operator<<(raw_ostream &OS, const CacheCost &CC); 0192 using LoopTripCountTy = std::pair<const Loop *, unsigned>; 0193 using LoopCacheCostTy = std::pair<const Loop *, CacheCostTy>; 0194 0195 public: 0196 /// Construct a CacheCost object for the loop nest described by \p Loops. 0197 /// The optional parameter \p TRT can be used to specify the max. distance 0198 /// between array elements accessed in a loop so that the elements are 0199 /// classified to have temporal reuse. 0200 CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, ScalarEvolution &SE, 0201 TargetTransformInfo &TTI, AAResults &AA, DependenceInfo &DI, 0202 std::optional<unsigned> TRT = std::nullopt); 0203 0204 /// Create a CacheCost for the loop nest rooted by \p Root. 0205 /// The optional parameter \p TRT can be used to specify the max. distance 0206 /// between array elements accessed in a loop so that the elements are 0207 /// classified to have temporal reuse. 0208 static std::unique_ptr<CacheCost> 0209 getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, 0210 std::optional<unsigned> TRT = std::nullopt); 0211 0212 /// Return the estimated cost of loop \p L if the given loop is part of the 0213 /// loop nest associated with this object. Return -1 otherwise. 0214 CacheCostTy getLoopCost(const Loop &L) const { 0215 auto IT = llvm::find_if(LoopCosts, [&L](const LoopCacheCostTy &LCC) { 0216 return LCC.first == &L; 0217 }); 0218 return (IT != LoopCosts.end()) ? (*IT).second : -1; 0219 } 0220 0221 /// Return the estimated ordered loop costs. 0222 ArrayRef<LoopCacheCostTy> getLoopCosts() const { return LoopCosts; } 0223 0224 private: 0225 /// Calculate the cache footprint of each loop in the nest (when it is 0226 /// considered to be in the innermost position). 0227 void calculateCacheFootprint(); 0228 0229 /// Partition store/load instructions in the loop nest into reference groups. 0230 /// Two or more memory accesses belong in the same reference group if they 0231 /// share the same cache line. 0232 bool populateReferenceGroups(ReferenceGroupsTy &RefGroups) const; 0233 0234 /// Calculate the cost of the given loop \p L assuming it is the innermost 0235 /// loop in nest. 0236 CacheCostTy computeLoopCacheCost(const Loop &L, 0237 const ReferenceGroupsTy &RefGroups) const; 0238 0239 /// Compute the cost of a representative reference in reference group \p RG 0240 /// when the given loop \p L is considered as the innermost loop in the nest. 0241 /// The computed cost is an estimate for the number of cache lines used by the 0242 /// reference group. The representative reference cost is defined as: 0243 /// - equal to one if the reference is loop invariant, or 0244 /// - equal to '(TripCount * stride) / cache_line_size' if (a) loop \p L's 0245 /// induction variable is used only in the reference subscript associated 0246 /// with loop \p L, and (b) the reference stride is less than the cache 0247 /// line size, or 0248 /// - TripCount otherwise 0249 CacheCostTy computeRefGroupCacheCost(const ReferenceGroupTy &RG, 0250 const Loop &L) const; 0251 0252 /// Sort the LoopCosts vector by decreasing cache cost. 0253 void sortLoopCosts() { 0254 stable_sort(LoopCosts, 0255 [](const LoopCacheCostTy &A, const LoopCacheCostTy &B) { 0256 return A.second > B.second; 0257 }); 0258 } 0259 0260 private: 0261 /// Loops in the loop nest associated with this object. 0262 LoopVectorTy Loops; 0263 0264 /// Trip counts for the loops in the loop nest associated with this object. 0265 SmallVector<LoopTripCountTy, 3> TripCounts; 0266 0267 /// Cache costs for the loops in the loop nest associated with this object. 0268 SmallVector<LoopCacheCostTy, 3> LoopCosts; 0269 0270 /// The max. distance between array elements accessed in a loop so that the 0271 /// elements are classified to have temporal reuse. 0272 std::optional<unsigned> TRT; 0273 0274 const LoopInfo &LI; 0275 ScalarEvolution &SE; 0276 TargetTransformInfo &TTI; 0277 AAResults &AA; 0278 DependenceInfo &DI; 0279 }; 0280 0281 raw_ostream &operator<<(raw_ostream &OS, const IndexedReference &R); 0282 raw_ostream &operator<<(raw_ostream &OS, const CacheCost &CC); 0283 0284 /// Printer pass for the \c CacheCost results. 0285 class LoopCachePrinterPass : public PassInfoMixin<LoopCachePrinterPass> { 0286 raw_ostream &OS; 0287 0288 public: 0289 explicit LoopCachePrinterPass(raw_ostream &OS) : OS(OS) {} 0290 0291 PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, 0292 LoopStandardAnalysisResults &AR, LPMUpdater &U); 0293 0294 static bool isRequired() { return true; } 0295 }; 0296 0297 } // namespace llvm 0298 0299 #endif // LLVM_ANALYSIS_LOOPCACHEANALYSIS_H
| [ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
|
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
|