|
|
|||
File indexing completed on 2026-05-10 08:43:14
0001 //===- llvm/Analysis/LoopAccessAnalysis.h -----------------------*- C++ -*-===// 0002 // 0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 0004 // See https://llvm.org/LICENSE.txt for license information. 0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 0006 // 0007 //===----------------------------------------------------------------------===// 0008 // 0009 // This file defines the interface for the loop memory dependence framework that 0010 // was originally developed for the Loop Vectorizer. 0011 // 0012 //===----------------------------------------------------------------------===// 0013 0014 #ifndef LLVM_ANALYSIS_LOOPACCESSANALYSIS_H 0015 #define LLVM_ANALYSIS_LOOPACCESSANALYSIS_H 0016 0017 #include "llvm/ADT/EquivalenceClasses.h" 0018 #include "llvm/Analysis/ScalarEvolution.h" 0019 #include "llvm/IR/DiagnosticInfo.h" 0020 #include <optional> 0021 #include <variant> 0022 0023 namespace llvm { 0024 0025 class AAResults; 0026 class DataLayout; 0027 class Loop; 0028 class raw_ostream; 0029 class TargetTransformInfo; 0030 0031 /// Collection of parameters shared beetween the Loop Vectorizer and the 0032 /// Loop Access Analysis. 0033 struct VectorizerParams { 0034 /// Maximum SIMD width. 0035 static const unsigned MaxVectorWidth; 0036 0037 /// VF as overridden by the user. 0038 static unsigned VectorizationFactor; 0039 /// Interleave factor as overridden by the user. 0040 static unsigned VectorizationInterleave; 0041 /// True if force-vector-interleave was specified by the user. 0042 static bool isInterleaveForced(); 0043 0044 /// \When performing memory disambiguation checks at runtime do not 0045 /// make more than this number of comparisons. 0046 static unsigned RuntimeMemoryCheckThreshold; 0047 0048 // When creating runtime checks for nested loops, where possible try to 0049 // write the checks in a form that allows them to be easily hoisted out of 0050 // the outermost loop. For example, we can do this by expanding the range of 0051 // addresses considered to include the entire nested loop so that they are 0052 // loop invariant. 0053 static bool HoistRuntimeChecks; 0054 }; 0055 0056 /// Checks memory dependences among accesses to the same underlying 0057 /// object to determine whether there vectorization is legal or not (and at 0058 /// which vectorization factor). 0059 /// 0060 /// Note: This class will compute a conservative dependence for access to 0061 /// different underlying pointers. Clients, such as the loop vectorizer, will 0062 /// sometimes deal these potential dependencies by emitting runtime checks. 0063 /// 0064 /// We use the ScalarEvolution framework to symbolically evalutate access 0065 /// functions pairs. Since we currently don't restructure the loop we can rely 0066 /// on the program order of memory accesses to determine their safety. 0067 /// At the moment we will only deem accesses as safe for: 0068 /// * A negative constant distance assuming program order. 0069 /// 0070 /// Safe: tmp = a[i + 1]; OR a[i + 1] = x; 0071 /// a[i] = tmp; y = a[i]; 0072 /// 0073 /// The latter case is safe because later checks guarantuee that there can't 0074 /// be a cycle through a phi node (that is, we check that "x" and "y" is not 0075 /// the same variable: a header phi can only be an induction or a reduction, a 0076 /// reduction can't have a memory sink, an induction can't have a memory 0077 /// source). This is important and must not be violated (or we have to 0078 /// resort to checking for cycles through memory). 0079 /// 0080 /// * A positive constant distance assuming program order that is bigger 0081 /// than the biggest memory access. 0082 /// 0083 /// tmp = a[i] OR b[i] = x 0084 /// a[i+2] = tmp y = b[i+2]; 0085 /// 0086 /// Safe distance: 2 x sizeof(a[0]), and 2 x sizeof(b[0]), respectively. 0087 /// 0088 /// * Zero distances and all accesses have the same size. 0089 /// 0090 class MemoryDepChecker { 0091 public: 0092 typedef PointerIntPair<Value *, 1, bool> MemAccessInfo; 0093 typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList; 0094 /// Set of potential dependent memory accesses. 0095 typedef EquivalenceClasses<MemAccessInfo> DepCandidates; 0096 0097 /// Type to keep track of the status of the dependence check. The order of 0098 /// the elements is important and has to be from most permissive to least 0099 /// permissive. 0100 enum class VectorizationSafetyStatus { 0101 // Can vectorize safely without RT checks. All dependences are known to be 0102 // safe. 0103 Safe, 0104 // Can possibly vectorize with RT checks to overcome unknown dependencies. 0105 PossiblySafeWithRtChecks, 0106 // Cannot vectorize due to known unsafe dependencies. 0107 Unsafe, 0108 }; 0109 0110 /// Dependece between memory access instructions. 0111 struct Dependence { 0112 /// The type of the dependence. 0113 enum DepType { 0114 // No dependence. 0115 NoDep, 0116 // We couldn't determine the direction or the distance. 0117 Unknown, 0118 // At least one of the memory access instructions may access a loop 0119 // varying object, e.g. the address of underlying object is loaded inside 0120 // the loop, like A[B[i]]. We cannot determine direction or distance in 0121 // those cases, and also are unable to generate any runtime checks. 0122 IndirectUnsafe, 0123 0124 // Lexically forward. 0125 // 0126 // FIXME: If we only have loop-independent forward dependences (e.g. a 0127 // read and write of A[i]), LAA will locally deem the dependence "safe" 0128 // without querying the MemoryDepChecker. Therefore we can miss 0129 // enumerating loop-independent forward dependences in 0130 // getDependences. Note that as soon as there are different 0131 // indices used to access the same array, the MemoryDepChecker *is* 0132 // queried and the dependence list is complete. 0133 Forward, 0134 // Forward, but if vectorized, is likely to prevent store-to-load 0135 // forwarding. 0136 ForwardButPreventsForwarding, 0137 // Lexically backward. 0138 Backward, 0139 // Backward, but the distance allows a vectorization factor of dependent 0140 // on MinDepDistBytes. 0141 BackwardVectorizable, 0142 // Same, but may prevent store-to-load forwarding. 0143 BackwardVectorizableButPreventsForwarding 0144 }; 0145 0146 /// String version of the types. 0147 static const char *DepName[]; 0148 0149 /// Index of the source of the dependence in the InstMap vector. 0150 unsigned Source; 0151 /// Index of the destination of the dependence in the InstMap vector. 0152 unsigned Destination; 0153 /// The type of the dependence. 0154 DepType Type; 0155 0156 Dependence(unsigned Source, unsigned Destination, DepType Type) 0157 : Source(Source), Destination(Destination), Type(Type) {} 0158 0159 /// Return the source instruction of the dependence. 0160 Instruction *getSource(const MemoryDepChecker &DepChecker) const; 0161 /// Return the destination instruction of the dependence. 0162 Instruction *getDestination(const MemoryDepChecker &DepChecker) const; 0163 0164 /// Dependence types that don't prevent vectorization. 0165 static VectorizationSafetyStatus isSafeForVectorization(DepType Type); 0166 0167 /// Lexically forward dependence. 0168 bool isForward() const; 0169 /// Lexically backward dependence. 0170 bool isBackward() const; 0171 0172 /// May be a lexically backward dependence type (includes Unknown). 0173 bool isPossiblyBackward() const; 0174 0175 /// Print the dependence. \p Instr is used to map the instruction 0176 /// indices to instructions. 0177 void print(raw_ostream &OS, unsigned Depth, 0178 const SmallVectorImpl<Instruction *> &Instrs) const; 0179 }; 0180 0181 MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L, 0182 const DenseMap<Value *, const SCEV *> &SymbolicStrides, 0183 unsigned MaxTargetVectorWidthInBits) 0184 : PSE(PSE), InnermostLoop(L), SymbolicStrides(SymbolicStrides), 0185 MaxTargetVectorWidthInBits(MaxTargetVectorWidthInBits) {} 0186 0187 /// Register the location (instructions are given increasing numbers) 0188 /// of a write access. 0189 void addAccess(StoreInst *SI); 0190 0191 /// Register the location (instructions are given increasing numbers) 0192 /// of a write access. 0193 void addAccess(LoadInst *LI); 0194 0195 /// Check whether the dependencies between the accesses are safe. 0196 /// 0197 /// Only checks sets with elements in \p CheckDeps. 0198 bool areDepsSafe(const DepCandidates &AccessSets, 0199 const MemAccessInfoList &CheckDeps); 0200 0201 /// No memory dependence was encountered that would inhibit 0202 /// vectorization. 0203 bool isSafeForVectorization() const { 0204 return Status == VectorizationSafetyStatus::Safe; 0205 } 0206 0207 /// Return true if the number of elements that are safe to operate on 0208 /// simultaneously is not bounded. 0209 bool isSafeForAnyVectorWidth() const { 0210 return MaxSafeVectorWidthInBits == UINT_MAX; 0211 } 0212 0213 /// Return the number of elements that are safe to operate on 0214 /// simultaneously, multiplied by the size of the element in bits. 0215 uint64_t getMaxSafeVectorWidthInBits() const { 0216 return MaxSafeVectorWidthInBits; 0217 } 0218 0219 /// In same cases when the dependency check fails we can still 0220 /// vectorize the loop with a dynamic array access check. 0221 bool shouldRetryWithRuntimeCheck() const { 0222 return FoundNonConstantDistanceDependence && 0223 Status == VectorizationSafetyStatus::PossiblySafeWithRtChecks; 0224 } 0225 0226 /// Returns the memory dependences. If null is returned we exceeded 0227 /// the MaxDependences threshold and this information is not 0228 /// available. 0229 const SmallVectorImpl<Dependence> *getDependences() const { 0230 return RecordDependences ? &Dependences : nullptr; 0231 } 0232 0233 void clearDependences() { Dependences.clear(); } 0234 0235 /// The vector of memory access instructions. The indices are used as 0236 /// instruction identifiers in the Dependence class. 0237 const SmallVectorImpl<Instruction *> &getMemoryInstructions() const { 0238 return InstMap; 0239 } 0240 0241 /// Generate a mapping between the memory instructions and their 0242 /// indices according to program order. 0243 DenseMap<Instruction *, unsigned> generateInstructionOrderMap() const { 0244 DenseMap<Instruction *, unsigned> OrderMap; 0245 0246 for (unsigned I = 0; I < InstMap.size(); ++I) 0247 OrderMap[InstMap[I]] = I; 0248 0249 return OrderMap; 0250 } 0251 0252 /// Find the set of instructions that read or write via \p Ptr. 0253 SmallVector<Instruction *, 4> getInstructionsForAccess(Value *Ptr, 0254 bool isWrite) const; 0255 0256 /// Return the program order indices for the access location (Ptr, IsWrite). 0257 /// Returns an empty ArrayRef if there are no accesses for the location. 0258 ArrayRef<unsigned> getOrderForAccess(Value *Ptr, bool IsWrite) const { 0259 auto I = Accesses.find({Ptr, IsWrite}); 0260 if (I != Accesses.end()) 0261 return I->second; 0262 return {}; 0263 } 0264 0265 const Loop *getInnermostLoop() const { return InnermostLoop; } 0266 0267 DenseMap<std::pair<const SCEV *, Type *>, 0268 std::pair<const SCEV *, const SCEV *>> & 0269 getPointerBounds() { 0270 return PointerBounds; 0271 } 0272 0273 private: 0274 /// A wrapper around ScalarEvolution, used to add runtime SCEV checks, and 0275 /// applies dynamic knowledge to simplify SCEV expressions and convert them 0276 /// to a more usable form. We need this in case assumptions about SCEV 0277 /// expressions need to be made in order to avoid unknown dependences. For 0278 /// example we might assume a unit stride for a pointer in order to prove 0279 /// that a memory access is strided and doesn't wrap. 0280 PredicatedScalarEvolution &PSE; 0281 const Loop *InnermostLoop; 0282 0283 /// Reference to map of pointer values to 0284 /// their stride symbols, if they have a symbolic stride. 0285 const DenseMap<Value *, const SCEV *> &SymbolicStrides; 0286 0287 /// Maps access locations (ptr, read/write) to program order. 0288 DenseMap<MemAccessInfo, std::vector<unsigned> > Accesses; 0289 0290 /// Memory access instructions in program order. 0291 SmallVector<Instruction *, 16> InstMap; 0292 0293 /// The program order index to be used for the next instruction. 0294 unsigned AccessIdx = 0; 0295 0296 /// The smallest dependence distance in bytes in the loop. This may not be 0297 /// the same as the maximum number of bytes that are safe to operate on 0298 /// simultaneously. 0299 uint64_t MinDepDistBytes = 0; 0300 0301 /// Number of elements (from consecutive iterations) that are safe to 0302 /// operate on simultaneously, multiplied by the size of the element in bits. 0303 /// The size of the element is taken from the memory access that is most 0304 /// restrictive. 0305 uint64_t MaxSafeVectorWidthInBits = -1U; 0306 0307 /// If we see a non-constant dependence distance we can still try to 0308 /// vectorize this loop with runtime checks. 0309 bool FoundNonConstantDistanceDependence = false; 0310 0311 /// Result of the dependence checks, indicating whether the checked 0312 /// dependences are safe for vectorization, require RT checks or are known to 0313 /// be unsafe. 0314 VectorizationSafetyStatus Status = VectorizationSafetyStatus::Safe; 0315 0316 //// True if Dependences reflects the dependences in the 0317 //// loop. If false we exceeded MaxDependences and 0318 //// Dependences is invalid. 0319 bool RecordDependences = true; 0320 0321 /// Memory dependences collected during the analysis. Only valid if 0322 /// RecordDependences is true. 0323 SmallVector<Dependence, 8> Dependences; 0324 0325 /// The maximum width of a target's vector registers multiplied by 2 to also 0326 /// roughly account for additional interleaving. Is used to decide if a 0327 /// backwards dependence with non-constant stride should be classified as 0328 /// backwards-vectorizable or unknown (triggering a runtime check). 0329 unsigned MaxTargetVectorWidthInBits = 0; 0330 0331 /// Mapping of SCEV expressions to their expanded pointer bounds (pair of 0332 /// start and end pointer expressions). 0333 DenseMap<std::pair<const SCEV *, Type *>, 0334 std::pair<const SCEV *, const SCEV *>> 0335 PointerBounds; 0336 0337 /// Cache for the loop guards of InnermostLoop. 0338 std::optional<ScalarEvolution::LoopGuards> LoopGuards; 0339 0340 /// Check whether there is a plausible dependence between the two 0341 /// accesses. 0342 /// 0343 /// Access \p A must happen before \p B in program order. The two indices 0344 /// identify the index into the program order map. 0345 /// 0346 /// This function checks whether there is a plausible dependence (or the 0347 /// absence of such can't be proved) between the two accesses. If there is a 0348 /// plausible dependence but the dependence distance is bigger than one 0349 /// element access it records this distance in \p MinDepDistBytes (if this 0350 /// distance is smaller than any other distance encountered so far). 0351 /// Otherwise, this function returns true signaling a possible dependence. 0352 Dependence::DepType isDependent(const MemAccessInfo &A, unsigned AIdx, 0353 const MemAccessInfo &B, unsigned BIdx); 0354 0355 /// Check whether the data dependence could prevent store-load 0356 /// forwarding. 0357 /// 0358 /// \return false if we shouldn't vectorize at all or avoid larger 0359 /// vectorization factors by limiting MinDepDistBytes. 0360 bool couldPreventStoreLoadForward(uint64_t Distance, uint64_t TypeByteSize); 0361 0362 /// Updates the current safety status with \p S. We can go from Safe to 0363 /// either PossiblySafeWithRtChecks or Unsafe and from 0364 /// PossiblySafeWithRtChecks to Unsafe. 0365 void mergeInStatus(VectorizationSafetyStatus S); 0366 0367 struct DepDistanceStrideAndSizeInfo { 0368 const SCEV *Dist; 0369 0370 /// Strides could either be scaled (in bytes, taking the size of the 0371 /// underlying type into account), or unscaled (in indexing units; unscaled 0372 /// stride = scaled stride / size of underlying type). Here, strides are 0373 /// unscaled. 0374 uint64_t MaxStride; 0375 std::optional<uint64_t> CommonStride; 0376 0377 bool ShouldRetryWithRuntimeCheck; 0378 uint64_t TypeByteSize; 0379 bool AIsWrite; 0380 bool BIsWrite; 0381 0382 DepDistanceStrideAndSizeInfo(const SCEV *Dist, uint64_t MaxStride, 0383 std::optional<uint64_t> CommonStride, 0384 bool ShouldRetryWithRuntimeCheck, 0385 uint64_t TypeByteSize, bool AIsWrite, 0386 bool BIsWrite) 0387 : Dist(Dist), MaxStride(MaxStride), CommonStride(CommonStride), 0388 ShouldRetryWithRuntimeCheck(ShouldRetryWithRuntimeCheck), 0389 TypeByteSize(TypeByteSize), AIsWrite(AIsWrite), BIsWrite(BIsWrite) {} 0390 }; 0391 0392 /// Get the dependence distance, strides, type size and whether it is a write 0393 /// for the dependence between A and B. Returns a DepType, if we can prove 0394 /// there's no dependence or the analysis fails. Outlined to lambda to limit 0395 /// he scope of various temporary variables, like A/BPtr, StrideA/BPtr and 0396 /// others. Returns either the dependence result, if it could already be 0397 /// determined, or a struct containing (Distance, Stride, TypeSize, AIsWrite, 0398 /// BIsWrite). 0399 std::variant<Dependence::DepType, DepDistanceStrideAndSizeInfo> 0400 getDependenceDistanceStrideAndSize(const MemAccessInfo &A, Instruction *AInst, 0401 const MemAccessInfo &B, 0402 Instruction *BInst); 0403 }; 0404 0405 class RuntimePointerChecking; 0406 /// A grouping of pointers. A single memcheck is required between 0407 /// two groups. 0408 struct RuntimeCheckingPtrGroup { 0409 /// Create a new pointer checking group containing a single 0410 /// pointer, with index \p Index in RtCheck. 0411 RuntimeCheckingPtrGroup(unsigned Index, 0412 const RuntimePointerChecking &RtCheck); 0413 0414 /// Tries to add the pointer recorded in RtCheck at index 0415 /// \p Index to this pointer checking group. We can only add a pointer 0416 /// to a checking group if we will still be able to get 0417 /// the upper and lower bounds of the check. Returns true in case 0418 /// of success, false otherwise. 0419 bool addPointer(unsigned Index, const RuntimePointerChecking &RtCheck); 0420 bool addPointer(unsigned Index, const SCEV *Start, const SCEV *End, 0421 unsigned AS, bool NeedsFreeze, ScalarEvolution &SE); 0422 0423 /// The SCEV expression which represents the upper bound of all the 0424 /// pointers in this group. 0425 const SCEV *High; 0426 /// The SCEV expression which represents the lower bound of all the 0427 /// pointers in this group. 0428 const SCEV *Low; 0429 /// Indices of all the pointers that constitute this grouping. 0430 SmallVector<unsigned, 2> Members; 0431 /// Address space of the involved pointers. 0432 unsigned AddressSpace; 0433 /// Whether the pointer needs to be frozen after expansion, e.g. because it 0434 /// may be poison outside the loop. 0435 bool NeedsFreeze = false; 0436 }; 0437 0438 /// A memcheck which made up of a pair of grouped pointers. 0439 typedef std::pair<const RuntimeCheckingPtrGroup *, 0440 const RuntimeCheckingPtrGroup *> 0441 RuntimePointerCheck; 0442 0443 struct PointerDiffInfo { 0444 const SCEV *SrcStart; 0445 const SCEV *SinkStart; 0446 unsigned AccessSize; 0447 bool NeedsFreeze; 0448 0449 PointerDiffInfo(const SCEV *SrcStart, const SCEV *SinkStart, 0450 unsigned AccessSize, bool NeedsFreeze) 0451 : SrcStart(SrcStart), SinkStart(SinkStart), AccessSize(AccessSize), 0452 NeedsFreeze(NeedsFreeze) {} 0453 }; 0454 0455 /// Holds information about the memory runtime legality checks to verify 0456 /// that a group of pointers do not overlap. 0457 class RuntimePointerChecking { 0458 friend struct RuntimeCheckingPtrGroup; 0459 0460 public: 0461 struct PointerInfo { 0462 /// Holds the pointer value that we need to check. 0463 TrackingVH<Value> PointerValue; 0464 /// Holds the smallest byte address accessed by the pointer throughout all 0465 /// iterations of the loop. 0466 const SCEV *Start; 0467 /// Holds the largest byte address accessed by the pointer throughout all 0468 /// iterations of the loop, plus 1. 0469 const SCEV *End; 0470 /// Holds the information if this pointer is used for writing to memory. 0471 bool IsWritePtr; 0472 /// Holds the id of the set of pointers that could be dependent because of a 0473 /// shared underlying object. 0474 unsigned DependencySetId; 0475 /// Holds the id of the disjoint alias set to which this pointer belongs. 0476 unsigned AliasSetId; 0477 /// SCEV for the access. 0478 const SCEV *Expr; 0479 /// True if the pointer expressions needs to be frozen after expansion. 0480 bool NeedsFreeze; 0481 0482 PointerInfo(Value *PointerValue, const SCEV *Start, const SCEV *End, 0483 bool IsWritePtr, unsigned DependencySetId, unsigned AliasSetId, 0484 const SCEV *Expr, bool NeedsFreeze) 0485 : PointerValue(PointerValue), Start(Start), End(End), 0486 IsWritePtr(IsWritePtr), DependencySetId(DependencySetId), 0487 AliasSetId(AliasSetId), Expr(Expr), NeedsFreeze(NeedsFreeze) {} 0488 }; 0489 0490 RuntimePointerChecking(MemoryDepChecker &DC, ScalarEvolution *SE) 0491 : DC(DC), SE(SE) {} 0492 0493 /// Reset the state of the pointer runtime information. 0494 void reset() { 0495 Need = false; 0496 CanUseDiffCheck = true; 0497 Pointers.clear(); 0498 Checks.clear(); 0499 DiffChecks.clear(); 0500 } 0501 0502 /// Insert a pointer and calculate the start and end SCEVs. 0503 /// We need \p PSE in order to compute the SCEV expression of the pointer 0504 /// according to the assumptions that we've made during the analysis. 0505 /// The method might also version the pointer stride according to \p Strides, 0506 /// and add new predicates to \p PSE. 0507 void insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr, Type *AccessTy, 0508 bool WritePtr, unsigned DepSetId, unsigned ASId, 0509 PredicatedScalarEvolution &PSE, bool NeedsFreeze); 0510 0511 /// No run-time memory checking is necessary. 0512 bool empty() const { return Pointers.empty(); } 0513 0514 /// Generate the checks and store it. This also performs the grouping 0515 /// of pointers to reduce the number of memchecks necessary. 0516 void generateChecks(MemoryDepChecker::DepCandidates &DepCands, 0517 bool UseDependencies); 0518 0519 /// Returns the checks that generateChecks created. They can be used to ensure 0520 /// no read/write accesses overlap across all loop iterations. 0521 const SmallVectorImpl<RuntimePointerCheck> &getChecks() const { 0522 return Checks; 0523 } 0524 0525 // Returns an optional list of (pointer-difference expressions, access size) 0526 // pairs that can be used to prove that there are no vectorization-preventing 0527 // dependencies at runtime. There are is a vectorization-preventing dependency 0528 // if any pointer-difference is <u VF * InterleaveCount * access size. Returns 0529 // std::nullopt if pointer-difference checks cannot be used. 0530 std::optional<ArrayRef<PointerDiffInfo>> getDiffChecks() const { 0531 if (!CanUseDiffCheck) 0532 return std::nullopt; 0533 return {DiffChecks}; 0534 } 0535 0536 /// Decide if we need to add a check between two groups of pointers, 0537 /// according to needsChecking. 0538 bool needsChecking(const RuntimeCheckingPtrGroup &M, 0539 const RuntimeCheckingPtrGroup &N) const; 0540 0541 /// Returns the number of run-time checks required according to 0542 /// needsChecking. 0543 unsigned getNumberOfChecks() const { return Checks.size(); } 0544 0545 /// Print the list run-time memory checks necessary. 0546 void print(raw_ostream &OS, unsigned Depth = 0) const; 0547 0548 /// Print \p Checks. 0549 void printChecks(raw_ostream &OS, 0550 const SmallVectorImpl<RuntimePointerCheck> &Checks, 0551 unsigned Depth = 0) const; 0552 0553 /// This flag indicates if we need to add the runtime check. 0554 bool Need = false; 0555 0556 /// Information about the pointers that may require checking. 0557 SmallVector<PointerInfo, 2> Pointers; 0558 0559 /// Holds a partitioning of pointers into "check groups". 0560 SmallVector<RuntimeCheckingPtrGroup, 2> CheckingGroups; 0561 0562 /// Check if pointers are in the same partition 0563 /// 0564 /// \p PtrToPartition contains the partition number for pointers (-1 if the 0565 /// pointer belongs to multiple partitions). 0566 static bool 0567 arePointersInSamePartition(const SmallVectorImpl<int> &PtrToPartition, 0568 unsigned PtrIdx1, unsigned PtrIdx2); 0569 0570 /// Decide whether we need to issue a run-time check for pointer at 0571 /// index \p I and \p J to prove their independence. 0572 bool needsChecking(unsigned I, unsigned J) const; 0573 0574 /// Return PointerInfo for pointer at index \p PtrIdx. 0575 const PointerInfo &getPointerInfo(unsigned PtrIdx) const { 0576 return Pointers[PtrIdx]; 0577 } 0578 0579 ScalarEvolution *getSE() const { return SE; } 0580 0581 private: 0582 /// Groups pointers such that a single memcheck is required 0583 /// between two different groups. This will clear the CheckingGroups vector 0584 /// and re-compute it. We will only group dependecies if \p UseDependencies 0585 /// is true, otherwise we will create a separate group for each pointer. 0586 void groupChecks(MemoryDepChecker::DepCandidates &DepCands, 0587 bool UseDependencies); 0588 0589 /// Generate the checks and return them. 0590 SmallVector<RuntimePointerCheck, 4> generateChecks(); 0591 0592 /// Try to create add a new (pointer-difference, access size) pair to 0593 /// DiffCheck for checking groups \p CGI and \p CGJ. If pointer-difference 0594 /// checks cannot be used for the groups, set CanUseDiffCheck to false. 0595 bool tryToCreateDiffCheck(const RuntimeCheckingPtrGroup &CGI, 0596 const RuntimeCheckingPtrGroup &CGJ); 0597 0598 MemoryDepChecker &DC; 0599 0600 /// Holds a pointer to the ScalarEvolution analysis. 0601 ScalarEvolution *SE; 0602 0603 /// Set of run-time checks required to establish independence of 0604 /// otherwise may-aliasing pointers in the loop. 0605 SmallVector<RuntimePointerCheck, 4> Checks; 0606 0607 /// Flag indicating if pointer-difference checks can be used 0608 bool CanUseDiffCheck = true; 0609 0610 /// A list of (pointer-difference, access size) pairs that can be used to 0611 /// prove that there are no vectorization-preventing dependencies. 0612 SmallVector<PointerDiffInfo> DiffChecks; 0613 }; 0614 0615 /// Drive the analysis of memory accesses in the loop 0616 /// 0617 /// This class is responsible for analyzing the memory accesses of a loop. It 0618 /// collects the accesses and then its main helper the AccessAnalysis class 0619 /// finds and categorizes the dependences in buildDependenceSets. 0620 /// 0621 /// For memory dependences that can be analyzed at compile time, it determines 0622 /// whether the dependence is part of cycle inhibiting vectorization. This work 0623 /// is delegated to the MemoryDepChecker class. 0624 /// 0625 /// For memory dependences that cannot be determined at compile time, it 0626 /// generates run-time checks to prove independence. This is done by 0627 /// AccessAnalysis::canCheckPtrAtRT and the checks are maintained by the 0628 /// RuntimePointerCheck class. 0629 /// 0630 /// If pointers can wrap or can't be expressed as affine AddRec expressions by 0631 /// ScalarEvolution, we will generate run-time checks by emitting a 0632 /// SCEVUnionPredicate. 0633 /// 0634 /// Checks for both memory dependences and the SCEV predicates contained in the 0635 /// PSE must be emitted in order for the results of this analysis to be valid. 0636 class LoopAccessInfo { 0637 public: 0638 LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetTransformInfo *TTI, 0639 const TargetLibraryInfo *TLI, AAResults *AA, DominatorTree *DT, 0640 LoopInfo *LI); 0641 0642 /// Return true we can analyze the memory accesses in the loop and there are 0643 /// no memory dependence cycles. Note that for dependences between loads & 0644 /// stores with uniform addresses, 0645 /// hasStoreStoreDependenceInvolvingLoopInvariantAddress and 0646 /// hasLoadStoreDependenceInvolvingLoopInvariantAddress also need to be 0647 /// checked. 0648 bool canVectorizeMemory() const { return CanVecMem; } 0649 0650 /// Return true if there is a convergent operation in the loop. There may 0651 /// still be reported runtime pointer checks that would be required, but it is 0652 /// not legal to insert them. 0653 bool hasConvergentOp() const { return HasConvergentOp; } 0654 0655 const RuntimePointerChecking *getRuntimePointerChecking() const { 0656 return PtrRtChecking.get(); 0657 } 0658 0659 /// Number of memchecks required to prove independence of otherwise 0660 /// may-alias pointers. 0661 unsigned getNumRuntimePointerChecks() const { 0662 return PtrRtChecking->getNumberOfChecks(); 0663 } 0664 0665 /// Return true if the block BB needs to be predicated in order for the loop 0666 /// to be vectorized. 0667 static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, 0668 DominatorTree *DT); 0669 0670 /// Returns true if value \p V is loop invariant. 0671 bool isInvariant(Value *V) const; 0672 0673 unsigned getNumStores() const { return NumStores; } 0674 unsigned getNumLoads() const { return NumLoads;} 0675 0676 /// The diagnostics report generated for the analysis. E.g. why we 0677 /// couldn't analyze the loop. 0678 const OptimizationRemarkAnalysis *getReport() const { return Report.get(); } 0679 0680 /// the Memory Dependence Checker which can determine the 0681 /// loop-independent and loop-carried dependences between memory accesses. 0682 const MemoryDepChecker &getDepChecker() const { return *DepChecker; } 0683 0684 /// Return the list of instructions that use \p Ptr to read or write 0685 /// memory. 0686 SmallVector<Instruction *, 4> getInstructionsForAccess(Value *Ptr, 0687 bool isWrite) const { 0688 return DepChecker->getInstructionsForAccess(Ptr, isWrite); 0689 } 0690 0691 /// If an access has a symbolic strides, this maps the pointer value to 0692 /// the stride symbol. 0693 const DenseMap<Value *, const SCEV *> &getSymbolicStrides() const { 0694 return SymbolicStrides; 0695 } 0696 0697 /// Print the information about the memory accesses in the loop. 0698 void print(raw_ostream &OS, unsigned Depth = 0) const; 0699 0700 /// Return true if the loop has memory dependence involving two stores to an 0701 /// invariant address, else return false. 0702 bool hasStoreStoreDependenceInvolvingLoopInvariantAddress() const { 0703 return HasStoreStoreDependenceInvolvingLoopInvariantAddress; 0704 } 0705 0706 /// Return true if the loop has memory dependence involving a load and a store 0707 /// to an invariant address, else return false. 0708 bool hasLoadStoreDependenceInvolvingLoopInvariantAddress() const { 0709 return HasLoadStoreDependenceInvolvingLoopInvariantAddress; 0710 } 0711 0712 /// Return the list of stores to invariant addresses. 0713 ArrayRef<StoreInst *> getStoresToInvariantAddresses() const { 0714 return StoresToInvariantAddresses; 0715 } 0716 0717 /// Used to add runtime SCEV checks. Simplifies SCEV expressions and converts 0718 /// them to a more usable form. All SCEV expressions during the analysis 0719 /// should be re-written (and therefore simplified) according to PSE. 0720 /// A user of LoopAccessAnalysis will need to emit the runtime checks 0721 /// associated with this predicate. 0722 const PredicatedScalarEvolution &getPSE() const { return *PSE; } 0723 0724 private: 0725 /// Analyze the loop. Returns true if all memory access in the loop can be 0726 /// vectorized. 0727 bool analyzeLoop(AAResults *AA, const LoopInfo *LI, 0728 const TargetLibraryInfo *TLI, DominatorTree *DT); 0729 0730 /// Check if the structure of the loop allows it to be analyzed by this 0731 /// pass. 0732 bool canAnalyzeLoop(); 0733 0734 /// Save the analysis remark. 0735 /// 0736 /// LAA does not directly emits the remarks. Instead it stores it which the 0737 /// client can retrieve and presents as its own analysis 0738 /// (e.g. -Rpass-analysis=loop-vectorize). 0739 OptimizationRemarkAnalysis & 0740 recordAnalysis(StringRef RemarkName, const Instruction *Instr = nullptr); 0741 0742 /// Collect memory access with loop invariant strides. 0743 /// 0744 /// Looks for accesses like "a[i * StrideA]" where "StrideA" is loop 0745 /// invariant. 0746 void collectStridedAccess(Value *LoadOrStoreInst); 0747 0748 // Emits the first unsafe memory dependence in a loop. 0749 // Emits nothing if there are no unsafe dependences 0750 // or if the dependences were not recorded. 0751 void emitUnsafeDependenceRemark(); 0752 0753 std::unique_ptr<PredicatedScalarEvolution> PSE; 0754 0755 /// We need to check that all of the pointers in this list are disjoint 0756 /// at runtime. Using std::unique_ptr to make using move ctor simpler. 0757 std::unique_ptr<RuntimePointerChecking> PtrRtChecking; 0758 0759 /// the Memory Dependence Checker which can determine the 0760 /// loop-independent and loop-carried dependences between memory accesses. 0761 std::unique_ptr<MemoryDepChecker> DepChecker; 0762 0763 Loop *TheLoop; 0764 0765 unsigned NumLoads = 0; 0766 unsigned NumStores = 0; 0767 0768 /// Cache the result of analyzeLoop. 0769 bool CanVecMem = false; 0770 bool HasConvergentOp = false; 0771 0772 /// Indicator that there are two non vectorizable stores to the same uniform 0773 /// address. 0774 bool HasStoreStoreDependenceInvolvingLoopInvariantAddress = false; 0775 /// Indicator that there is non vectorizable load and store to the same 0776 /// uniform address. 0777 bool HasLoadStoreDependenceInvolvingLoopInvariantAddress = false; 0778 0779 /// List of stores to invariant addresses. 0780 SmallVector<StoreInst *> StoresToInvariantAddresses; 0781 0782 /// The diagnostics report generated for the analysis. E.g. why we 0783 /// couldn't analyze the loop. 0784 std::unique_ptr<OptimizationRemarkAnalysis> Report; 0785 0786 /// If an access has a symbolic strides, this maps the pointer value to 0787 /// the stride symbol. 0788 DenseMap<Value *, const SCEV *> SymbolicStrides; 0789 }; 0790 0791 /// Return the SCEV corresponding to a pointer with the symbolic stride 0792 /// replaced with constant one, assuming the SCEV predicate associated with 0793 /// \p PSE is true. 0794 /// 0795 /// If necessary this method will version the stride of the pointer according 0796 /// to \p PtrToStride and therefore add further predicates to \p PSE. 0797 /// 0798 /// \p PtrToStride provides the mapping between the pointer value and its 0799 /// stride as collected by LoopVectorizationLegality::collectStridedAccess. 0800 const SCEV * 0801 replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, 0802 const DenseMap<Value *, const SCEV *> &PtrToStride, 0803 Value *Ptr); 0804 0805 /// If the pointer has a constant stride return it in units of the access type 0806 /// size. If the pointer is loop-invariant, return 0. Otherwise return 0807 /// std::nullopt. 0808 /// 0809 /// Ensure that it does not wrap in the address space, assuming the predicate 0810 /// associated with \p PSE is true. 0811 /// 0812 /// If necessary this method will version the stride of the pointer according 0813 /// to \p PtrToStride and therefore add further predicates to \p PSE. 0814 /// The \p Assume parameter indicates if we are allowed to make additional 0815 /// run-time assumptions. 0816 /// 0817 /// Note that the analysis results are defined if-and-only-if the original 0818 /// memory access was defined. If that access was dead, or UB, then the 0819 /// result of this function is undefined. 0820 std::optional<int64_t> 0821 getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, 0822 const Loop *Lp, 0823 const DenseMap<Value *, const SCEV *> &StridesMap = DenseMap<Value *, const SCEV *>(), 0824 bool Assume = false, bool ShouldCheckWrap = true); 0825 0826 /// Returns the distance between the pointers \p PtrA and \p PtrB iff they are 0827 /// compatible and it is possible to calculate the distance between them. This 0828 /// is a simple API that does not depend on the analysis pass. 0829 /// \param StrictCheck Ensure that the calculated distance matches the 0830 /// type-based one after all the bitcasts removal in the provided pointers. 0831 std::optional<int> getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB, 0832 Value *PtrB, const DataLayout &DL, 0833 ScalarEvolution &SE, 0834 bool StrictCheck = false, 0835 bool CheckType = true); 0836 0837 /// Attempt to sort the pointers in \p VL and return the sorted indices 0838 /// in \p SortedIndices, if reordering is required. 0839 /// 0840 /// Returns 'true' if sorting is legal, otherwise returns 'false'. 0841 /// 0842 /// For example, for a given \p VL of memory accesses in program order, a[i+4], 0843 /// a[i+0], a[i+1] and a[i+7], this function will sort the \p VL and save the 0844 /// sorted indices in \p SortedIndices as a[i+0], a[i+1], a[i+4], a[i+7] and 0845 /// saves the mask for actual memory accesses in program order in 0846 /// \p SortedIndices as <1,2,0,3> 0847 bool sortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy, const DataLayout &DL, 0848 ScalarEvolution &SE, 0849 SmallVectorImpl<unsigned> &SortedIndices); 0850 0851 /// Returns true if the memory operations \p A and \p B are consecutive. 0852 /// This is a simple API that does not depend on the analysis pass. 0853 bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, 0854 ScalarEvolution &SE, bool CheckType = true); 0855 0856 /// Calculate Start and End points of memory access. 0857 /// Let's assume A is the first access and B is a memory access on N-th loop 0858 /// iteration. Then B is calculated as: 0859 /// B = A + Step*N . 0860 /// Step value may be positive or negative. 0861 /// N is a calculated back-edge taken count: 0862 /// N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0 0863 /// Start and End points are calculated in the following way: 0864 /// Start = UMIN(A, B) ; End = UMAX(A, B) + SizeOfElt, 0865 /// where SizeOfElt is the size of single memory access in bytes. 0866 /// 0867 /// There is no conflict when the intervals are disjoint: 0868 /// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End) 0869 std::pair<const SCEV *, const SCEV *> getStartAndEndForAccess( 0870 const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *MaxBECount, 0871 ScalarEvolution *SE, 0872 DenseMap<std::pair<const SCEV *, Type *>, 0873 std::pair<const SCEV *, const SCEV *>> *PointerBounds); 0874 0875 class LoopAccessInfoManager { 0876 /// The cache. 0877 DenseMap<Loop *, std::unique_ptr<LoopAccessInfo>> LoopAccessInfoMap; 0878 0879 // The used analysis passes. 0880 ScalarEvolution &SE; 0881 AAResults &AA; 0882 DominatorTree &DT; 0883 LoopInfo &LI; 0884 TargetTransformInfo *TTI; 0885 const TargetLibraryInfo *TLI = nullptr; 0886 0887 public: 0888 LoopAccessInfoManager(ScalarEvolution &SE, AAResults &AA, DominatorTree &DT, 0889 LoopInfo &LI, TargetTransformInfo *TTI, 0890 const TargetLibraryInfo *TLI) 0891 : SE(SE), AA(AA), DT(DT), LI(LI), TTI(TTI), TLI(TLI) {} 0892 0893 const LoopAccessInfo &getInfo(Loop &L); 0894 0895 void clear(); 0896 0897 bool invalidate(Function &F, const PreservedAnalyses &PA, 0898 FunctionAnalysisManager::Invalidator &Inv); 0899 }; 0900 0901 /// This analysis provides dependence information for the memory 0902 /// accesses of a loop. 0903 /// 0904 /// It runs the analysis for a loop on demand. This can be initiated by 0905 /// querying the loop access info via AM.getResult<LoopAccessAnalysis>. 0906 /// getResult return a LoopAccessInfo object. See this class for the 0907 /// specifics of what information is provided. 0908 class LoopAccessAnalysis 0909 : public AnalysisInfoMixin<LoopAccessAnalysis> { 0910 friend AnalysisInfoMixin<LoopAccessAnalysis>; 0911 static AnalysisKey Key; 0912 0913 public: 0914 typedef LoopAccessInfoManager Result; 0915 0916 Result run(Function &F, FunctionAnalysisManager &AM); 0917 }; 0918 0919 inline Instruction *MemoryDepChecker::Dependence::getSource( 0920 const MemoryDepChecker &DepChecker) const { 0921 return DepChecker.getMemoryInstructions()[Source]; 0922 } 0923 0924 inline Instruction *MemoryDepChecker::Dependence::getDestination( 0925 const MemoryDepChecker &DepChecker) const { 0926 return DepChecker.getMemoryInstructions()[Destination]; 0927 } 0928 0929 } // End llvm namespace 0930 0931 #endif
| [ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
|
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
|