File indexing completed on 2026-05-10 08:43:27
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016 #ifndef LLVM_CODEGEN_BASICTTIIMPL_H
0017 #define LLVM_CODEGEN_BASICTTIIMPL_H
0018
0019 #include "llvm/ADT/APInt.h"
0020 #include "llvm/ADT/BitVector.h"
0021 #include "llvm/ADT/SmallPtrSet.h"
0022 #include "llvm/ADT/SmallVector.h"
0023 #include "llvm/Analysis/LoopInfo.h"
0024 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
0025 #include "llvm/Analysis/TargetTransformInfo.h"
0026 #include "llvm/Analysis/TargetTransformInfoImpl.h"
0027 #include "llvm/Analysis/ValueTracking.h"
0028 #include "llvm/CodeGen/ISDOpcodes.h"
0029 #include "llvm/CodeGen/TargetLowering.h"
0030 #include "llvm/CodeGen/TargetSubtargetInfo.h"
0031 #include "llvm/CodeGen/ValueTypes.h"
0032 #include "llvm/CodeGenTypes/MachineValueType.h"
0033 #include "llvm/IR/BasicBlock.h"
0034 #include "llvm/IR/Constant.h"
0035 #include "llvm/IR/Constants.h"
0036 #include "llvm/IR/DataLayout.h"
0037 #include "llvm/IR/DerivedTypes.h"
0038 #include "llvm/IR/InstrTypes.h"
0039 #include "llvm/IR/Instruction.h"
0040 #include "llvm/IR/Instructions.h"
0041 #include "llvm/IR/Intrinsics.h"
0042 #include "llvm/IR/Operator.h"
0043 #include "llvm/IR/Type.h"
0044 #include "llvm/IR/Value.h"
0045 #include "llvm/Support/Casting.h"
0046 #include "llvm/Support/CommandLine.h"
0047 #include "llvm/Support/ErrorHandling.h"
0048 #include "llvm/Support/MathExtras.h"
0049 #include "llvm/Target/TargetMachine.h"
0050 #include "llvm/Target/TargetOptions.h"
0051 #include "llvm/Transforms/Utils/LoopUtils.h"
0052 #include <algorithm>
0053 #include <cassert>
0054 #include <cstdint>
0055 #include <limits>
0056 #include <optional>
0057 #include <utility>
0058
0059 namespace llvm {
0060
0061 class Function;
0062 class GlobalValue;
0063 class LLVMContext;
0064 class ScalarEvolution;
0065 class SCEV;
0066 class TargetMachine;
0067
0068 extern cl::opt<unsigned> PartialUnrollingThreshold;
0069
0070
0071
0072
0073
0074
0075
0076
0077
0078
0079 template <typename T>
0080 class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
0081 private:
0082 using BaseT = TargetTransformInfoImplCRTPBase<T>;
0083 using TTI = TargetTransformInfo;
0084
0085
0086 T *thisT() { return static_cast<T *>(this); }
0087
0088
0089
0090 InstructionCost getBroadcastShuffleOverhead(FixedVectorType *VTy,
0091 TTI::TargetCostKind CostKind) {
0092 InstructionCost Cost = 0;
0093
0094
0095 Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy,
0096 CostKind, 0, nullptr, nullptr);
0097
0098 for (int i = 0, e = VTy->getNumElements(); i < e; ++i) {
0099 Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy,
0100 CostKind, i, nullptr, nullptr);
0101 }
0102 return Cost;
0103 }
0104
0105
0106
0107 InstructionCost getPermuteShuffleOverhead(FixedVectorType *VTy,
0108 TTI::TargetCostKind CostKind) {
0109 InstructionCost Cost = 0;
0110
0111
0112
0113
0114
0115
0116
0117 for (int i = 0, e = VTy->getNumElements(); i < e; ++i) {
0118 Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy,
0119 CostKind, i, nullptr, nullptr);
0120 Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy,
0121 CostKind, i, nullptr, nullptr);
0122 }
0123 return Cost;
0124 }
0125
0126
0127
0128 InstructionCost getExtractSubvectorOverhead(VectorType *VTy,
0129 TTI::TargetCostKind CostKind,
0130 int Index,
0131 FixedVectorType *SubVTy) {
0132 assert(VTy && SubVTy &&
0133 "Can only extract subvectors from vectors");
0134 int NumSubElts = SubVTy->getNumElements();
0135 assert((!isa<FixedVectorType>(VTy) ||
0136 (Index + NumSubElts) <=
0137 (int)cast<FixedVectorType>(VTy)->getNumElements()) &&
0138 "SK_ExtractSubvector index out of range");
0139
0140 InstructionCost Cost = 0;
0141
0142
0143
0144 for (int i = 0; i != NumSubElts; ++i) {
0145 Cost +=
0146 thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy,
0147 CostKind, i + Index, nullptr, nullptr);
0148 Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, SubVTy,
0149 CostKind, i, nullptr, nullptr);
0150 }
0151 return Cost;
0152 }
0153
0154
0155
0156 InstructionCost getInsertSubvectorOverhead(VectorType *VTy,
0157 TTI::TargetCostKind CostKind,
0158 int Index,
0159 FixedVectorType *SubVTy) {
0160 assert(VTy && SubVTy &&
0161 "Can only insert subvectors into vectors");
0162 int NumSubElts = SubVTy->getNumElements();
0163 assert((!isa<FixedVectorType>(VTy) ||
0164 (Index + NumSubElts) <=
0165 (int)cast<FixedVectorType>(VTy)->getNumElements()) &&
0166 "SK_InsertSubvector index out of range");
0167
0168 InstructionCost Cost = 0;
0169
0170
0171
0172 for (int i = 0; i != NumSubElts; ++i) {
0173 Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVTy,
0174 CostKind, i, nullptr, nullptr);
0175 Cost +=
0176 thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, CostKind,
0177 i + Index, nullptr, nullptr);
0178 }
0179 return Cost;
0180 }
0181
0182
0183 const TargetSubtargetInfo *getST() const {
0184 return static_cast<const T *>(this)->getST();
0185 }
0186
0187
0188 const TargetLoweringBase *getTLI() const {
0189 return static_cast<const T *>(this)->getTLI();
0190 }
0191
0192 static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) {
0193 switch (M) {
0194 case TTI::MIM_Unindexed:
0195 return ISD::UNINDEXED;
0196 case TTI::MIM_PreInc:
0197 return ISD::PRE_INC;
0198 case TTI::MIM_PreDec:
0199 return ISD::PRE_DEC;
0200 case TTI::MIM_PostInc:
0201 return ISD::POST_INC;
0202 case TTI::MIM_PostDec:
0203 return ISD::POST_DEC;
0204 }
0205 llvm_unreachable("Unexpected MemIndexedMode");
0206 }
0207
0208 InstructionCost getCommonMaskedMemoryOpCost(unsigned Opcode, Type *DataTy,
0209 Align Alignment,
0210 bool VariableMask,
0211 bool IsGatherScatter,
0212 TTI::TargetCostKind CostKind,
0213 unsigned AddressSpace = 0) {
0214
0215 if (isa<ScalableVectorType>(DataTy))
0216 return InstructionCost::getInvalid();
0217
0218 auto *VT = cast<FixedVectorType>(DataTy);
0219 unsigned VF = VT->getNumElements();
0220
0221
0222
0223
0224
0225 InstructionCost AddrExtractCost =
0226 IsGatherScatter ? getScalarizationOverhead(
0227 FixedVectorType::get(
0228 PointerType::get(VT->getContext(), 0), VF),
0229 false, true, CostKind)
0230 : 0;
0231
0232
0233 InstructionCost MemoryOpCost =
0234 VF * thisT()->getMemoryOpCost(Opcode, VT->getElementType(), Alignment,
0235 AddressSpace, CostKind);
0236
0237
0238 InstructionCost PackingCost =
0239 getScalarizationOverhead(VT, Opcode != Instruction::Store,
0240 Opcode == Instruction::Store, CostKind);
0241
0242 InstructionCost ConditionalCost = 0;
0243 if (VariableMask) {
0244
0245
0246
0247
0248
0249
0250 ConditionalCost =
0251 getScalarizationOverhead(
0252 FixedVectorType::get(Type::getInt1Ty(DataTy->getContext()), VF),
0253 false, true, CostKind) +
0254 VF * (thisT()->getCFInstrCost(Instruction::Br, CostKind) +
0255 thisT()->getCFInstrCost(Instruction::PHI, CostKind));
0256 }
0257
0258 return AddrExtractCost + MemoryOpCost + PackingCost + ConditionalCost;
0259 }
0260
0261
0262
0263
0264
0265
0266 static bool isSplatMask(ArrayRef<int> Mask, unsigned NumSrcElts, int &Index) {
0267
0268 bool IsCompared = false;
0269 if (int SplatIdx = PoisonMaskElem;
0270 all_of(enumerate(Mask), [&](const auto &P) {
0271 if (P.value() == PoisonMaskElem)
0272 return P.index() != Mask.size() - 1 || IsCompared;
0273 if (static_cast<unsigned>(P.value()) >= NumSrcElts * 2)
0274 return false;
0275 if (SplatIdx == PoisonMaskElem) {
0276 SplatIdx = P.value();
0277 return P.index() != Mask.size() - 1;
0278 }
0279 IsCompared = true;
0280 return SplatIdx == P.value();
0281 })) {
0282 Index = SplatIdx;
0283 return true;
0284 }
0285 return false;
0286 }
0287
0288 protected:
0289 explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
0290 : BaseT(DL) {}
0291 virtual ~BasicTTIImplBase() = default;
0292
0293 using TargetTransformInfoImplBase::DL;
0294
0295 public:
0296
0297
0298 bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
0299 unsigned AddressSpace, Align Alignment,
0300 unsigned *Fast) const {
0301 EVT E = EVT::getIntegerVT(Context, BitWidth);
0302 return getTLI()->allowsMisalignedMemoryAccesses(
0303 E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast);
0304 }
0305
0306 bool areInlineCompatible(const Function *Caller,
0307 const Function *Callee) const {
0308 const TargetMachine &TM = getTLI()->getTargetMachine();
0309
0310 const FeatureBitset &CallerBits =
0311 TM.getSubtargetImpl(*Caller)->getFeatureBits();
0312 const FeatureBitset &CalleeBits =
0313 TM.getSubtargetImpl(*Callee)->getFeatureBits();
0314
0315
0316
0317 return (CallerBits & CalleeBits) == CalleeBits;
0318 }
0319
0320 bool hasBranchDivergence(const Function *F = nullptr) { return false; }
0321
0322 bool isSourceOfDivergence(const Value *V) { return false; }
0323
0324 bool isAlwaysUniform(const Value *V) { return false; }
0325
0326 bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
0327 return false;
0328 }
0329
0330 bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const {
0331 return true;
0332 }
0333
0334 unsigned getFlatAddressSpace() {
0335
0336 return -1;
0337 }
0338
0339 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
0340 Intrinsic::ID IID) const {
0341 return false;
0342 }
0343
0344 bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
0345 return getTLI()->getTargetMachine().isNoopAddrSpaceCast(FromAS, ToAS);
0346 }
0347
0348 unsigned getAssumedAddrSpace(const Value *V) const {
0349 return getTLI()->getTargetMachine().getAssumedAddrSpace(V);
0350 }
0351
0352 bool isSingleThreaded() const {
0353 return getTLI()->getTargetMachine().Options.ThreadModel ==
0354 ThreadModel::Single;
0355 }
0356
0357 std::pair<const Value *, unsigned>
0358 getPredicatedAddrSpace(const Value *V) const {
0359 return getTLI()->getTargetMachine().getPredicatedAddrSpace(V);
0360 }
0361
0362 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
0363 Value *NewV) const {
0364 return nullptr;
0365 }
0366
0367 bool isLegalAddImmediate(int64_t imm) {
0368 return getTLI()->isLegalAddImmediate(imm);
0369 }
0370
0371 bool isLegalAddScalableImmediate(int64_t Imm) {
0372 return getTLI()->isLegalAddScalableImmediate(Imm);
0373 }
0374
0375 bool isLegalICmpImmediate(int64_t imm) {
0376 return getTLI()->isLegalICmpImmediate(imm);
0377 }
0378
0379 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
0380 bool HasBaseReg, int64_t Scale, unsigned AddrSpace,
0381 Instruction *I = nullptr,
0382 int64_t ScalableOffset = 0) {
0383 TargetLoweringBase::AddrMode AM;
0384 AM.BaseGV = BaseGV;
0385 AM.BaseOffs = BaseOffset;
0386 AM.HasBaseReg = HasBaseReg;
0387 AM.Scale = Scale;
0388 AM.ScalableOffset = ScalableOffset;
0389 return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
0390 }
0391
0392 int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, int64_t MaxOffset) {
0393 return getTLI()->getPreferredLargeGEPBaseOffset(MinOffset, MaxOffset);
0394 }
0395
0396 unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
0397 Type *ScalarValTy) const {
0398 auto &&IsSupportedByTarget = [this, ScalarMemTy, ScalarValTy](unsigned VF) {
0399 auto *SrcTy = FixedVectorType::get(ScalarMemTy, VF / 2);
0400 EVT VT = getTLI()->getValueType(DL, SrcTy);
0401 if (getTLI()->isOperationLegal(ISD::STORE, VT) ||
0402 getTLI()->isOperationCustom(ISD::STORE, VT))
0403 return true;
0404
0405 EVT ValVT =
0406 getTLI()->getValueType(DL, FixedVectorType::get(ScalarValTy, VF / 2));
0407 EVT LegalizedVT =
0408 getTLI()->getTypeToTransformTo(ScalarMemTy->getContext(), VT);
0409 return getTLI()->isTruncStoreLegal(LegalizedVT, ValVT);
0410 };
0411 while (VF > 2 && IsSupportedByTarget(VF))
0412 VF /= 2;
0413 return VF;
0414 }
0415
0416 bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty,
0417 const DataLayout &DL) const {
0418 EVT VT = getTLI()->getValueType(DL, Ty);
0419 return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT);
0420 }
0421
0422 bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty,
0423 const DataLayout &DL) const {
0424 EVT VT = getTLI()->getValueType(DL, Ty);
0425 return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT);
0426 }
0427
0428 bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) {
0429 return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);
0430 }
0431
0432 bool isNumRegsMajorCostOfLSR() {
0433 return TargetTransformInfoImplBase::isNumRegsMajorCostOfLSR();
0434 }
0435
0436 bool shouldDropLSRSolutionIfLessProfitable() const {
0437 return TargetTransformInfoImplBase::shouldDropLSRSolutionIfLessProfitable();
0438 }
0439
0440 bool isProfitableLSRChainElement(Instruction *I) {
0441 return TargetTransformInfoImplBase::isProfitableLSRChainElement(I);
0442 }
0443
0444 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
0445 StackOffset BaseOffset, bool HasBaseReg,
0446 int64_t Scale, unsigned AddrSpace) {
0447 TargetLoweringBase::AddrMode AM;
0448 AM.BaseGV = BaseGV;
0449 AM.BaseOffs = BaseOffset.getFixed();
0450 AM.HasBaseReg = HasBaseReg;
0451 AM.Scale = Scale;
0452 AM.ScalableOffset = BaseOffset.getScalable();
0453 if (getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace))
0454 return 0;
0455 return -1;
0456 }
0457
0458 bool isTruncateFree(Type *Ty1, Type *Ty2) {
0459 return getTLI()->isTruncateFree(Ty1, Ty2);
0460 }
0461
0462 bool isProfitableToHoist(Instruction *I) {
0463 return getTLI()->isProfitableToHoist(I);
0464 }
0465
0466 bool useAA() const { return getST()->useAA(); }
0467
0468 bool isTypeLegal(Type *Ty) {
0469 EVT VT = getTLI()->getValueType(DL, Ty, true);
0470 return getTLI()->isTypeLegal(VT);
0471 }
0472
0473 unsigned getRegUsageForType(Type *Ty) {
0474 EVT ETy = getTLI()->getValueType(DL, Ty);
0475 return getTLI()->getNumRegisters(Ty->getContext(), ETy);
0476 }
0477
0478 InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
0479 ArrayRef<const Value *> Operands, Type *AccessType,
0480 TTI::TargetCostKind CostKind) {
0481 return BaseT::getGEPCost(PointeeType, Ptr, Operands, AccessType, CostKind);
0482 }
0483
0484 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
0485 unsigned &JumpTableSize,
0486 ProfileSummaryInfo *PSI,
0487 BlockFrequencyInfo *BFI) {
0488
0489
0490
0491
0492
0493
0494
0495 unsigned N = SI.getNumCases();
0496 const TargetLoweringBase *TLI = getTLI();
0497 const DataLayout &DL = this->getDataLayout();
0498
0499 JumpTableSize = 0;
0500 bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent());
0501
0502
0503 if (N < 1 || (!IsJTAllowed && DL.getIndexSizeInBits(0u) < N))
0504 return N;
0505
0506 APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue();
0507 APInt MinCaseVal = MaxCaseVal;
0508 for (auto CI : SI.cases()) {
0509 const APInt &CaseVal = CI.getCaseValue()->getValue();
0510 if (CaseVal.sgt(MaxCaseVal))
0511 MaxCaseVal = CaseVal;
0512 if (CaseVal.slt(MinCaseVal))
0513 MinCaseVal = CaseVal;
0514 }
0515
0516
0517 if (N <= DL.getIndexSizeInBits(0u)) {
0518 SmallPtrSet<const BasicBlock *, 4> Dests;
0519 for (auto I : SI.cases())
0520 Dests.insert(I.getCaseSuccessor());
0521
0522 if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal,
0523 DL))
0524 return 1;
0525 }
0526
0527
0528 if (IsJTAllowed) {
0529 if (N < 2 || N < TLI->getMinimumJumpTableEntries())
0530 return N;
0531 uint64_t Range =
0532 (MaxCaseVal - MinCaseVal)
0533 .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1;
0534
0535 if (TLI->isSuitableForJumpTable(&SI, N, Range, PSI, BFI)) {
0536 JumpTableSize = Range;
0537 return 1;
0538 }
0539 }
0540 return N;
0541 }
0542
0543 bool shouldBuildLookupTables() {
0544 const TargetLoweringBase *TLI = getTLI();
0545 return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
0546 TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
0547 }
0548
0549 bool shouldBuildRelLookupTables() const {
0550 const TargetMachine &TM = getTLI()->getTargetMachine();
0551
0552 if (!TM.isPositionIndependent())
0553 return false;
0554
0555
0556
0557
0558 if (TM.getCodeModel() == CodeModel::Medium ||
0559 TM.getCodeModel() == CodeModel::Large)
0560 return false;
0561
0562 const Triple &TargetTriple = TM.getTargetTriple();
0563 if (!TargetTriple.isArch64Bit())
0564 return false;
0565
0566
0567
0568 if (TargetTriple.getArch() == Triple::aarch64 && TargetTriple.isOSDarwin())
0569 return false;
0570
0571 return true;
0572 }
0573
0574 bool haveFastSqrt(Type *Ty) {
0575 const TargetLoweringBase *TLI = getTLI();
0576 EVT VT = TLI->getValueType(DL, Ty);
0577 return TLI->isTypeLegal(VT) &&
0578 TLI->isOperationLegalOrCustom(ISD::FSQRT, VT);
0579 }
0580
0581 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) {
0582 return true;
0583 }
0584
0585 InstructionCost getFPOpCost(Type *Ty) {
0586
0587
0588 const TargetLoweringBase *TLI = getTLI();
0589 EVT VT = TLI->getValueType(DL, Ty);
0590 if (TLI->isOperationLegalOrCustomOrPromote(ISD::FADD, VT))
0591 return TargetTransformInfo::TCC_Basic;
0592 return TargetTransformInfo::TCC_Expensive;
0593 }
0594
0595 bool preferToKeepConstantsAttached(const Instruction &Inst,
0596 const Function &Fn) const {
0597 switch (Inst.getOpcode()) {
0598 default:
0599 break;
0600 case Instruction::SDiv:
0601 case Instruction::SRem:
0602 case Instruction::UDiv:
0603 case Instruction::URem: {
0604 if (!isa<ConstantInt>(Inst.getOperand(1)))
0605 return false;
0606 EVT VT = getTLI()->getValueType(DL, Inst.getType());
0607 return !getTLI()->isIntDivCheap(VT, Fn.getAttributes());
0608 }
0609 };
0610
0611 return false;
0612 }
0613
0614 unsigned getInliningThresholdMultiplier() const { return 1; }
0615 unsigned adjustInliningThreshold(const CallBase *CB) { return 0; }
0616 unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const {
0617 return 0;
0618 }
0619
0620 int getInlinerVectorBonusPercent() const { return 150; }
0621
0622 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
0623 TTI::UnrollingPreferences &UP,
0624 OptimizationRemarkEmitter *ORE) {
0625
0626
0627
0628
0629
0630
0631
0632
0633
0634
0635
0636
0637
0638
0639
0640
0641
0642
0643
0644
0645
0646
0647
0648 unsigned MaxOps;
0649 const TargetSubtargetInfo *ST = getST();
0650 if (PartialUnrollingThreshold.getNumOccurrences() > 0)
0651 MaxOps = PartialUnrollingThreshold;
0652 else if (ST->getSchedModel().LoopMicroOpBufferSize > 0)
0653 MaxOps = ST->getSchedModel().LoopMicroOpBufferSize;
0654 else
0655 return;
0656
0657
0658 for (BasicBlock *BB : L->blocks()) {
0659 for (Instruction &I : *BB) {
0660 if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
0661 if (const Function *F = cast<CallBase>(I).getCalledFunction()) {
0662 if (!thisT()->isLoweredToCall(F))
0663 continue;
0664 }
0665
0666 if (ORE) {
0667 ORE->emit([&]() {
0668 return OptimizationRemark("TTI", "DontUnroll", L->getStartLoc(),
0669 L->getHeader())
0670 << "advising against unrolling the loop because it "
0671 "contains a "
0672 << ore::NV("Call", &I);
0673 });
0674 }
0675 return;
0676 }
0677 }
0678 }
0679
0680
0681
0682 UP.Partial = UP.Runtime = UP.UpperBound = true;
0683 UP.PartialThreshold = MaxOps;
0684
0685
0686 UP.OptSizeThreshold = 0;
0687 UP.PartialOptSizeThreshold = 0;
0688
0689
0690
0691 UP.BEInsns = 2;
0692 }
0693
0694 void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
0695 TTI::PeelingPreferences &PP) {
0696 PP.PeelCount = 0;
0697 PP.AllowPeeling = true;
0698 PP.AllowLoopNestsPeeling = false;
0699 PP.PeelProfiledIterations = true;
0700 }
0701
0702 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
0703 AssumptionCache &AC,
0704 TargetLibraryInfo *LibInfo,
0705 HardwareLoopInfo &HWLoopInfo) {
0706 return BaseT::isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
0707 }
0708
0709 unsigned getEpilogueVectorizationMinVF() {
0710 return BaseT::getEpilogueVectorizationMinVF();
0711 }
0712
0713 bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) {
0714 return BaseT::preferPredicateOverEpilogue(TFI);
0715 }
0716
0717 TailFoldingStyle
0718 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) {
0719 return BaseT::getPreferredTailFoldingStyle(IVUpdateMayOverflow);
0720 }
0721
0722 std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
0723 IntrinsicInst &II) {
0724 return BaseT::instCombineIntrinsic(IC, II);
0725 }
0726
0727 std::optional<Value *>
0728 simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
0729 APInt DemandedMask, KnownBits &Known,
0730 bool &KnownBitsComputed) {
0731 return BaseT::simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
0732 KnownBitsComputed);
0733 }
0734
0735 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
0736 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
0737 APInt &UndefElts2, APInt &UndefElts3,
0738 std::function<void(Instruction *, unsigned, APInt, APInt &)>
0739 SimplifyAndSetOp) {
0740 return BaseT::simplifyDemandedVectorEltsIntrinsic(
0741 IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
0742 SimplifyAndSetOp);
0743 }
0744
0745 virtual std::optional<unsigned>
0746 getCacheSize(TargetTransformInfo::CacheLevel Level) const {
0747 return std::optional<unsigned>(
0748 getST()->getCacheSize(static_cast<unsigned>(Level)));
0749 }
0750
0751 virtual std::optional<unsigned>
0752 getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const {
0753 std::optional<unsigned> TargetResult =
0754 getST()->getCacheAssociativity(static_cast<unsigned>(Level));
0755
0756 if (TargetResult)
0757 return TargetResult;
0758
0759 return BaseT::getCacheAssociativity(Level);
0760 }
0761
0762 virtual unsigned getCacheLineSize() const {
0763 return getST()->getCacheLineSize();
0764 }
0765
0766 virtual unsigned getPrefetchDistance() const {
0767 return getST()->getPrefetchDistance();
0768 }
0769
0770 virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,
0771 unsigned NumStridedMemAccesses,
0772 unsigned NumPrefetches,
0773 bool HasCall) const {
0774 return getST()->getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
0775 NumPrefetches, HasCall);
0776 }
0777
0778 virtual unsigned getMaxPrefetchIterationsAhead() const {
0779 return getST()->getMaxPrefetchIterationsAhead();
0780 }
0781
0782 virtual bool enableWritePrefetching() const {
0783 return getST()->enableWritePrefetching();
0784 }
0785
0786 virtual bool shouldPrefetchAddressSpace(unsigned AS) const {
0787 return getST()->shouldPrefetchAddressSpace(AS);
0788 }
0789
0790
0791
0792
0793
0794
0795 TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
0796 return TypeSize::getFixed(32);
0797 }
0798
0799 std::optional<unsigned> getMaxVScale() const { return std::nullopt; }
0800 std::optional<unsigned> getVScaleForTuning() const { return std::nullopt; }
0801 bool isVScaleKnownToBeAPowerOfTwo() const { return false; }
0802
0803
0804
0805
0806 InstructionCost getScalarizationOverhead(VectorType *InTy,
0807 const APInt &DemandedElts,
0808 bool Insert, bool Extract,
0809 TTI::TargetCostKind CostKind,
0810 ArrayRef<Value *> VL = {}) {
0811
0812
0813 if (isa<ScalableVectorType>(InTy))
0814 return InstructionCost::getInvalid();
0815 auto *Ty = cast<FixedVectorType>(InTy);
0816
0817 assert(DemandedElts.getBitWidth() == Ty->getNumElements() &&
0818 (VL.empty() || VL.size() == Ty->getNumElements()) &&
0819 "Vector size mismatch");
0820
0821 InstructionCost Cost = 0;
0822
0823 for (int i = 0, e = Ty->getNumElements(); i < e; ++i) {
0824 if (!DemandedElts[i])
0825 continue;
0826 if (Insert) {
0827 Value *InsertedVal = VL.empty() ? nullptr : VL[i];
0828 Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, Ty,
0829 CostKind, i, nullptr, InsertedVal);
0830 }
0831 if (Extract)
0832 Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty,
0833 CostKind, i, nullptr, nullptr);
0834 }
0835
0836 return Cost;
0837 }
0838
0839 bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID) const {
0840 return false;
0841 }
0842
0843 bool isTargetIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,
0844 unsigned ScalarOpdIdx) const {
0845 return false;
0846 }
0847
0848 bool isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID,
0849 int OpdIdx) const {
0850 return OpdIdx == -1;
0851 }
0852
0853 bool isTargetIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID,
0854 int RetIdx) const {
0855 return RetIdx == 0;
0856 }
0857
0858
0859 InstructionCost getScalarizationOverhead(VectorType *InTy, bool Insert,
0860 bool Extract,
0861 TTI::TargetCostKind CostKind) {
0862 if (isa<ScalableVectorType>(InTy))
0863 return InstructionCost::getInvalid();
0864 auto *Ty = cast<FixedVectorType>(InTy);
0865
0866 APInt DemandedElts = APInt::getAllOnes(Ty->getNumElements());
0867 return thisT()->getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
0868 CostKind);
0869 }
0870
0871
0872
0873
0874 InstructionCost
0875 getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
0876 ArrayRef<Type *> Tys,
0877 TTI::TargetCostKind CostKind) {
0878 assert(Args.size() == Tys.size() && "Expected matching Args and Tys");
0879
0880 InstructionCost Cost = 0;
0881 SmallPtrSet<const Value*, 4> UniqueOperands;
0882 for (int I = 0, E = Args.size(); I != E; I++) {
0883
0884 const Value *A = Args[I];
0885 Type *Ty = Tys[I];
0886 if (!Ty->isIntOrIntVectorTy() && !Ty->isFPOrFPVectorTy() &&
0887 !Ty->isPtrOrPtrVectorTy())
0888 continue;
0889
0890 if (!isa<Constant>(A) && UniqueOperands.insert(A).second) {
0891 if (auto *VecTy = dyn_cast<VectorType>(Ty))
0892 Cost += getScalarizationOverhead(VecTy, false,
0893 true, CostKind);
0894 }
0895 }
0896
0897 return Cost;
0898 }
0899
0900
0901
0902
0903
0904 InstructionCost getScalarizationOverhead(VectorType *RetTy,
0905 ArrayRef<const Value *> Args,
0906 ArrayRef<Type *> Tys,
0907 TTI::TargetCostKind CostKind) {
0908 InstructionCost Cost = getScalarizationOverhead(
0909 RetTy, true, false, CostKind);
0910 if (!Args.empty())
0911 Cost += getOperandsScalarizationOverhead(Args, Tys, CostKind);
0912 else
0913
0914
0915 Cost += getScalarizationOverhead(RetTy, false,
0916 true, CostKind);
0917
0918 return Cost;
0919 }
0920
0921
0922 std::pair<InstructionCost, MVT> getTypeLegalizationCost(Type *Ty) const {
0923 LLVMContext &C = Ty->getContext();
0924 EVT MTy = getTLI()->getValueType(DL, Ty);
0925
0926 InstructionCost Cost = 1;
0927
0928
0929
0930 while (true) {
0931 TargetLoweringBase::LegalizeKind LK = getTLI()->getTypeConversion(C, MTy);
0932
0933 if (LK.first == TargetLoweringBase::TypeScalarizeScalableVector) {
0934
0935
0936 MVT VT = MTy.isSimple() ? MTy.getSimpleVT() : MVT::i64;
0937 return std::make_pair(InstructionCost::getInvalid(), VT);
0938 }
0939
0940 if (LK.first == TargetLoweringBase::TypeLegal)
0941 return std::make_pair(Cost, MTy.getSimpleVT());
0942
0943 if (LK.first == TargetLoweringBase::TypeSplitVector ||
0944 LK.first == TargetLoweringBase::TypeExpandInteger)
0945 Cost *= 2;
0946
0947
0948 if (MTy == LK.second)
0949 return std::make_pair(Cost, MTy.getSimpleVT());
0950
0951
0952 MTy = LK.second;
0953 }
0954 }
0955
0956 unsigned getMaxInterleaveFactor(ElementCount VF) { return 1; }
0957
0958 InstructionCost getArithmeticInstrCost(
0959 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
0960 TTI::OperandValueInfo Opd1Info = {TTI::OK_AnyValue, TTI::OP_None},
0961 TTI::OperandValueInfo Opd2Info = {TTI::OK_AnyValue, TTI::OP_None},
0962 ArrayRef<const Value *> Args = {}, const Instruction *CxtI = nullptr) {
0963
0964 const TargetLoweringBase *TLI = getTLI();
0965 int ISD = TLI->InstructionOpcodeToISD(Opcode);
0966 assert(ISD && "Invalid opcode");
0967
0968
0969 if (CostKind != TTI::TCK_RecipThroughput)
0970 return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind,
0971 Opd1Info, Opd2Info,
0972 Args, CxtI);
0973
0974 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
0975
0976 bool IsFloat = Ty->isFPOrFPVectorTy();
0977
0978
0979 InstructionCost OpCost = (IsFloat ? 2 : 1);
0980
0981 if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
0982
0983
0984 return LT.first * OpCost;
0985 }
0986
0987 if (!TLI->isOperationExpand(ISD, LT.second)) {
0988
0989
0990 return LT.first * 2 * OpCost;
0991 }
0992
0993
0994
0995
0996 if (ISD == ISD::UREM || ISD == ISD::SREM) {
0997 bool IsSigned = ISD == ISD::SREM;
0998 if (TLI->isOperationLegalOrCustom(IsSigned ? ISD::SDIVREM : ISD::UDIVREM,
0999 LT.second) ||
1000 TLI->isOperationLegalOrCustom(IsSigned ? ISD::SDIV : ISD::UDIV,
1001 LT.second)) {
1002 unsigned DivOpc = IsSigned ? Instruction::SDiv : Instruction::UDiv;
1003 InstructionCost DivCost = thisT()->getArithmeticInstrCost(
1004 DivOpc, Ty, CostKind, Opd1Info, Opd2Info);
1005 InstructionCost MulCost =
1006 thisT()->getArithmeticInstrCost(Instruction::Mul, Ty, CostKind);
1007 InstructionCost SubCost =
1008 thisT()->getArithmeticInstrCost(Instruction::Sub, Ty, CostKind);
1009 return DivCost + MulCost + SubCost;
1010 }
1011 }
1012
1013
1014 if (isa<ScalableVectorType>(Ty))
1015 return InstructionCost::getInvalid();
1016
1017
1018
1019
1020 if (auto *VTy = dyn_cast<FixedVectorType>(Ty)) {
1021 InstructionCost Cost = thisT()->getArithmeticInstrCost(
1022 Opcode, VTy->getScalarType(), CostKind, Opd1Info, Opd2Info,
1023 Args, CxtI);
1024
1025
1026 SmallVector<Type *> Tys(Args.size(), Ty);
1027 return getScalarizationOverhead(VTy, Args, Tys, CostKind) +
1028 VTy->getNumElements() * Cost;
1029 }
1030
1031
1032 return OpCost;
1033 }
1034
1035 TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind,
1036 ArrayRef<int> Mask,
1037 VectorType *Ty, int &Index,
1038 VectorType *&SubTy) const {
1039 if (Mask.empty())
1040 return Kind;
1041 int NumSrcElts = Ty->getElementCount().getKnownMinValue();
1042 switch (Kind) {
1043 case TTI::SK_PermuteSingleSrc: {
1044 if (ShuffleVectorInst::isReverseMask(Mask, NumSrcElts))
1045 return TTI::SK_Reverse;
1046 if (ShuffleVectorInst::isZeroEltSplatMask(Mask, NumSrcElts))
1047 return TTI::SK_Broadcast;
1048 if (isSplatMask(Mask, NumSrcElts, Index))
1049 return TTI::SK_Broadcast;
1050 if (ShuffleVectorInst::isExtractSubvectorMask(Mask, NumSrcElts, Index) &&
1051 (Index + Mask.size()) <= (size_t)NumSrcElts) {
1052 SubTy = FixedVectorType::get(Ty->getElementType(), Mask.size());
1053 return TTI::SK_ExtractSubvector;
1054 }
1055 break;
1056 }
1057 case TTI::SK_PermuteTwoSrc: {
1058 int NumSubElts;
1059 if (Mask.size() > 2 && ShuffleVectorInst::isInsertSubvectorMask(
1060 Mask, NumSrcElts, NumSubElts, Index)) {
1061 if (Index + NumSubElts > NumSrcElts)
1062 return Kind;
1063 SubTy = FixedVectorType::get(Ty->getElementType(), NumSubElts);
1064 return TTI::SK_InsertSubvector;
1065 }
1066 if (ShuffleVectorInst::isSelectMask(Mask, NumSrcElts))
1067 return TTI::SK_Select;
1068 if (ShuffleVectorInst::isTransposeMask(Mask, NumSrcElts))
1069 return TTI::SK_Transpose;
1070 if (ShuffleVectorInst::isSpliceMask(Mask, NumSrcElts, Index))
1071 return TTI::SK_Splice;
1072 break;
1073 }
1074 case TTI::SK_Select:
1075 case TTI::SK_Reverse:
1076 case TTI::SK_Broadcast:
1077 case TTI::SK_Transpose:
1078 case TTI::SK_InsertSubvector:
1079 case TTI::SK_ExtractSubvector:
1080 case TTI::SK_Splice:
1081 break;
1082 }
1083 return Kind;
1084 }
1085
1086 InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
1087 ArrayRef<int> Mask,
1088 TTI::TargetCostKind CostKind, int Index,
1089 VectorType *SubTp,
1090 ArrayRef<const Value *> Args = {},
1091 const Instruction *CxtI = nullptr) {
1092 switch (improveShuffleKindFromMask(Kind, Mask, Tp, Index, SubTp)) {
1093 case TTI::SK_Broadcast:
1094 if (auto *FVT = dyn_cast<FixedVectorType>(Tp))
1095 return getBroadcastShuffleOverhead(FVT, CostKind);
1096 return InstructionCost::getInvalid();
1097 case TTI::SK_Select:
1098 case TTI::SK_Splice:
1099 case TTI::SK_Reverse:
1100 case TTI::SK_Transpose:
1101 case TTI::SK_PermuteSingleSrc:
1102 case TTI::SK_PermuteTwoSrc:
1103 if (auto *FVT = dyn_cast<FixedVectorType>(Tp))
1104 return getPermuteShuffleOverhead(FVT, CostKind);
1105 return InstructionCost::getInvalid();
1106 case TTI::SK_ExtractSubvector:
1107 return getExtractSubvectorOverhead(Tp, CostKind, Index,
1108 cast<FixedVectorType>(SubTp));
1109 case TTI::SK_InsertSubvector:
1110 return getInsertSubvectorOverhead(Tp, CostKind, Index,
1111 cast<FixedVectorType>(SubTp));
1112 }
1113 llvm_unreachable("Unknown TTI::ShuffleKind");
1114 }
1115
1116 InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1117 TTI::CastContextHint CCH,
1118 TTI::TargetCostKind CostKind,
1119 const Instruction *I = nullptr) {
1120 if (BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I) == 0)
1121 return 0;
1122
1123 const TargetLoweringBase *TLI = getTLI();
1124 int ISD = TLI->InstructionOpcodeToISD(Opcode);
1125 assert(ISD && "Invalid opcode");
1126 std::pair<InstructionCost, MVT> SrcLT = getTypeLegalizationCost(Src);
1127 std::pair<InstructionCost, MVT> DstLT = getTypeLegalizationCost(Dst);
1128
1129 TypeSize SrcSize = SrcLT.second.getSizeInBits();
1130 TypeSize DstSize = DstLT.second.getSizeInBits();
1131 bool IntOrPtrSrc = Src->isIntegerTy() || Src->isPointerTy();
1132 bool IntOrPtrDst = Dst->isIntegerTy() || Dst->isPointerTy();
1133
1134 switch (Opcode) {
1135 default:
1136 break;
1137 case Instruction::Trunc:
1138
1139 if (TLI->isTruncateFree(SrcLT.second, DstLT.second))
1140 return 0;
1141 [[fallthrough]];
1142 case Instruction::BitCast:
1143
1144
1145 if (SrcLT.first == DstLT.first && IntOrPtrSrc == IntOrPtrDst &&
1146 SrcSize == DstSize)
1147 return 0;
1148 break;
1149 case Instruction::FPExt:
1150 if (I && getTLI()->isExtFree(I))
1151 return 0;
1152 break;
1153 case Instruction::ZExt:
1154 if (TLI->isZExtFree(SrcLT.second, DstLT.second))
1155 return 0;
1156 [[fallthrough]];
1157 case Instruction::SExt:
1158 if (I && getTLI()->isExtFree(I))
1159 return 0;
1160
1161
1162
1163 if (CCH == TTI::CastContextHint::Normal) {
1164 EVT ExtVT = EVT::getEVT(Dst);
1165 EVT LoadVT = EVT::getEVT(Src);
1166 unsigned LType =
1167 ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD);
1168 if (DstLT.first == SrcLT.first &&
1169 TLI->isLoadExtLegal(LType, ExtVT, LoadVT))
1170 return 0;
1171 }
1172 break;
1173 case Instruction::AddrSpaceCast:
1174 if (TLI->isFreeAddrSpaceCast(Src->getPointerAddressSpace(),
1175 Dst->getPointerAddressSpace()))
1176 return 0;
1177 break;
1178 }
1179
1180 auto *SrcVTy = dyn_cast<VectorType>(Src);
1181 auto *DstVTy = dyn_cast<VectorType>(Dst);
1182
1183
1184 if (SrcLT.first == DstLT.first &&
1185 TLI->isOperationLegalOrPromote(ISD, DstLT.second))
1186 return SrcLT.first;
1187
1188
1189 if (!SrcVTy && !DstVTy) {
1190
1191
1192 if (!TLI->isOperationExpand(ISD, DstLT.second))
1193 return 1;
1194
1195
1196 return 4;
1197 }
1198
1199
1200 if (DstVTy && SrcVTy) {
1201
1202 if (SrcLT.first == DstLT.first && SrcSize == DstSize) {
1203
1204
1205 if (Opcode == Instruction::ZExt)
1206 return SrcLT.first;
1207
1208
1209 if (Opcode == Instruction::SExt)
1210 return SrcLT.first * 2;
1211
1212
1213
1214
1215 if (!TLI->isOperationExpand(ISD, DstLT.second))
1216 return SrcLT.first * 1;
1217 }
1218
1219
1220
1221
1222
1223 bool SplitSrc =
1224 TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) ==
1225 TargetLowering::TypeSplitVector;
1226 bool SplitDst =
1227 TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) ==
1228 TargetLowering::TypeSplitVector;
1229 if ((SplitSrc || SplitDst) && SrcVTy->getElementCount().isVector() &&
1230 DstVTy->getElementCount().isVector()) {
1231 Type *SplitDstTy = VectorType::getHalfElementsVectorType(DstVTy);
1232 Type *SplitSrcTy = VectorType::getHalfElementsVectorType(SrcVTy);
1233 T *TTI = static_cast<T *>(this);
1234
1235 InstructionCost SplitCost =
1236 (!SplitSrc || !SplitDst) ? TTI->getVectorSplitCost() : 0;
1237 return SplitCost +
1238 (2 * TTI->getCastInstrCost(Opcode, SplitDstTy, SplitSrcTy, CCH,
1239 CostKind, I));
1240 }
1241
1242
1243 if (isa<ScalableVectorType>(DstVTy))
1244 return InstructionCost::getInvalid();
1245
1246
1247
1248 unsigned Num = cast<FixedVectorType>(DstVTy)->getNumElements();
1249 InstructionCost Cost = thisT()->getCastInstrCost(
1250 Opcode, Dst->getScalarType(), Src->getScalarType(), CCH, CostKind, I);
1251
1252
1253
1254 return getScalarizationOverhead(DstVTy, true, true,
1255 CostKind) +
1256 Num * Cost;
1257 }
1258
1259
1260
1261
1262
1263 if (Opcode == Instruction::BitCast) {
1264
1265 return (SrcVTy ? getScalarizationOverhead(SrcVTy, false,
1266 true, CostKind)
1267 : 0) +
1268 (DstVTy ? getScalarizationOverhead(DstVTy, true,
1269 false, CostKind)
1270 : 0);
1271 }
1272
1273 llvm_unreachable("Unhandled cast");
1274 }
1275
1276 InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1277 VectorType *VecTy, unsigned Index) {
1278 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
1279 return thisT()->getVectorInstrCost(Instruction::ExtractElement, VecTy,
1280 CostKind, Index, nullptr, nullptr) +
1281 thisT()->getCastInstrCost(Opcode, Dst, VecTy->getElementType(),
1282 TTI::CastContextHint::None, CostKind);
1283 }
1284
1285 InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
1286 const Instruction *I = nullptr) {
1287 return BaseT::getCFInstrCost(Opcode, CostKind, I);
1288 }
1289
1290 InstructionCost getCmpSelInstrCost(
1291 unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred,
1292 TTI::TargetCostKind CostKind,
1293 TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None},
1294 TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None},
1295 const Instruction *I = nullptr) {
1296 const TargetLoweringBase *TLI = getTLI();
1297 int ISD = TLI->InstructionOpcodeToISD(Opcode);
1298 assert(ISD && "Invalid opcode");
1299
1300
1301 if (CostKind != TTI::TCK_RecipThroughput)
1302 return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
1303 Op1Info, Op2Info, I);
1304
1305
1306 if (ISD == ISD::SELECT) {
1307 assert(CondTy && "CondTy must exist");
1308 if (CondTy->isVectorTy())
1309 ISD = ISD::VSELECT;
1310 }
1311 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);
1312
1313 if (!(ValTy->isVectorTy() && !LT.second.isVector()) &&
1314 !TLI->isOperationExpand(ISD, LT.second)) {
1315
1316
1317 return LT.first * 1;
1318 }
1319
1320
1321
1322
1323 if (auto *ValVTy = dyn_cast<VectorType>(ValTy)) {
1324 if (isa<ScalableVectorType>(ValTy))
1325 return InstructionCost::getInvalid();
1326
1327 unsigned Num = cast<FixedVectorType>(ValVTy)->getNumElements();
1328 if (CondTy)
1329 CondTy = CondTy->getScalarType();
1330 InstructionCost Cost =
1331 thisT()->getCmpSelInstrCost(Opcode, ValVTy->getScalarType(), CondTy,
1332 VecPred, CostKind, Op1Info, Op2Info, I);
1333
1334
1335
1336 return getScalarizationOverhead(ValVTy, true,
1337 false, CostKind) +
1338 Num * Cost;
1339 }
1340
1341
1342 return 1;
1343 }
1344
1345 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
1346 TTI::TargetCostKind CostKind,
1347 unsigned Index, Value *Op0, Value *Op1) {
1348 return getRegUsageForType(Val->getScalarType());
1349 }
1350
1351
1352
1353
1354
1355 InstructionCost getVectorInstrCost(
1356 unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
1357 Value *Scalar,
1358 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) {
1359 return thisT()->getVectorInstrCost(Opcode, Val, CostKind, Index, nullptr,
1360 nullptr);
1361 }
1362
1363 InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
1364 TTI::TargetCostKind CostKind,
1365 unsigned Index) {
1366 Value *Op0 = nullptr;
1367 Value *Op1 = nullptr;
1368 if (auto *IE = dyn_cast<InsertElementInst>(&I)) {
1369 Op0 = IE->getOperand(0);
1370 Op1 = IE->getOperand(1);
1371 }
1372 return thisT()->getVectorInstrCost(I.getOpcode(), Val, CostKind, Index, Op0,
1373 Op1);
1374 }
1375
1376 InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
1377 int VF,
1378 const APInt &DemandedDstElts,
1379 TTI::TargetCostKind CostKind) {
1380 assert(DemandedDstElts.getBitWidth() == (unsigned)VF * ReplicationFactor &&
1381 "Unexpected size of DemandedDstElts.");
1382
1383 InstructionCost Cost;
1384
1385 auto *SrcVT = FixedVectorType::get(EltTy, VF);
1386 auto *ReplicatedVT = FixedVectorType::get(EltTy, VF * ReplicationFactor);
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398 APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedDstElts, VF);
1399 Cost += thisT()->getScalarizationOverhead(SrcVT, DemandedSrcElts,
1400 false,
1401 true, CostKind);
1402 Cost += thisT()->getScalarizationOverhead(ReplicatedVT, DemandedDstElts,
1403 true,
1404 false, CostKind);
1405
1406 return Cost;
1407 }
1408
1409 InstructionCost
1410 getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
1411 unsigned AddressSpace, TTI::TargetCostKind CostKind,
1412 TTI::OperandValueInfo OpInfo = {TTI::OK_AnyValue, TTI::OP_None},
1413 const Instruction *I = nullptr) {
1414 assert(!Src->isVoidTy() && "Invalid type");
1415
1416 if (getTLI()->getValueType(DL, Src, true) == MVT::Other)
1417 return 4;
1418 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Src);
1419
1420
1421 InstructionCost Cost = LT.first;
1422 if (CostKind != TTI::TCK_RecipThroughput)
1423 return Cost;
1424
1425 const DataLayout &DL = this->getDataLayout();
1426 if (Src->isVectorTy() &&
1427
1428
1429
1430 TypeSize::isKnownLT(DL.getTypeStoreSizeInBits(Src),
1431 LT.second.getSizeInBits())) {
1432
1433
1434
1435 TargetLowering::LegalizeAction LA = TargetLowering::Expand;
1436 EVT MemVT = getTLI()->getValueType(DL, Src);
1437 if (Opcode == Instruction::Store)
1438 LA = getTLI()->getTruncStoreAction(LT.second, MemVT);
1439 else
1440 LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT);
1441
1442 if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) {
1443
1444
1445 Cost += getScalarizationOverhead(
1446 cast<VectorType>(Src), Opcode != Instruction::Store,
1447 Opcode == Instruction::Store, CostKind);
1448 }
1449 }
1450
1451 return Cost;
1452 }
1453
1454 InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy,
1455 Align Alignment, unsigned AddressSpace,
1456 TTI::TargetCostKind CostKind) {
1457
1458 return getCommonMaskedMemoryOpCost(Opcode, DataTy, Alignment, true, false,
1459 CostKind);
1460 }
1461
1462 InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
1463 const Value *Ptr, bool VariableMask,
1464 Align Alignment,
1465 TTI::TargetCostKind CostKind,
1466 const Instruction *I = nullptr) {
1467 return getCommonMaskedMemoryOpCost(Opcode, DataTy, Alignment, VariableMask,
1468 true, CostKind);
1469 }
1470
1471 InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy,
1472 const Value *Ptr, bool VariableMask,
1473 Align Alignment,
1474 TTI::TargetCostKind CostKind,
1475 const Instruction *I) {
1476
1477
1478
1479 return thisT()->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
1480 Alignment, CostKind, I);
1481 }
1482
1483 InstructionCost getInterleavedMemoryOpCost(
1484 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1485 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
1486 bool UseMaskForCond = false, bool UseMaskForGaps = false) {
1487
1488
1489 if (isa<ScalableVectorType>(VecTy))
1490 return InstructionCost::getInvalid();
1491
1492 auto *VT = cast<FixedVectorType>(VecTy);
1493
1494 unsigned NumElts = VT->getNumElements();
1495 assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor");
1496
1497 unsigned NumSubElts = NumElts / Factor;
1498 auto *SubVT = FixedVectorType::get(VT->getElementType(), NumSubElts);
1499
1500
1501 InstructionCost Cost;
1502 if (UseMaskForCond || UseMaskForGaps)
1503 Cost = thisT()->getMaskedMemoryOpCost(Opcode, VecTy, Alignment,
1504 AddressSpace, CostKind);
1505 else
1506 Cost = thisT()->getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace,
1507 CostKind);
1508
1509
1510
1511 MVT VecTyLT = getTypeLegalizationCost(VecTy).second;
1512 unsigned VecTySize = thisT()->getDataLayout().getTypeStoreSize(VecTy);
1513 unsigned VecTyLTSize = VecTyLT.getStoreSize();
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529 if (Cost.isValid() && VecTySize > VecTyLTSize) {
1530
1531
1532 unsigned NumLegalInsts = divideCeil(VecTySize, VecTyLTSize);
1533
1534
1535
1536 unsigned NumEltsPerLegalInst = divideCeil(NumElts, NumLegalInsts);
1537
1538
1539 BitVector UsedInsts(NumLegalInsts, false);
1540 for (unsigned Index : Indices)
1541 for (unsigned Elt = 0; Elt < NumSubElts; ++Elt)
1542 UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst);
1543
1544
1545
1546 Cost = divideCeil(UsedInsts.count() * *Cost.getValue(), NumLegalInsts);
1547 }
1548
1549
1550 assert(Indices.size() <= Factor &&
1551 "Interleaved memory op has too many members");
1552
1553 const APInt DemandedAllSubElts = APInt::getAllOnes(NumSubElts);
1554 const APInt DemandedAllResultElts = APInt::getAllOnes(NumElts);
1555
1556 APInt DemandedLoadStoreElts = APInt::getZero(NumElts);
1557 for (unsigned Index : Indices) {
1558 assert(Index < Factor && "Invalid index for interleaved memory op");
1559 for (unsigned Elm = 0; Elm < NumSubElts; Elm++)
1560 DemandedLoadStoreElts.setBit(Index + Elm * Factor);
1561 }
1562
1563 if (Opcode == Instruction::Load) {
1564
1565
1566
1567
1568
1569
1570
1571
1572 InstructionCost InsSubCost = thisT()->getScalarizationOverhead(
1573 SubVT, DemandedAllSubElts,
1574 true, false, CostKind);
1575 Cost += Indices.size() * InsSubCost;
1576 Cost += thisT()->getScalarizationOverhead(VT, DemandedLoadStoreElts,
1577 false,
1578 true, CostKind);
1579 } else {
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593 InstructionCost ExtSubCost = thisT()->getScalarizationOverhead(
1594 SubVT, DemandedAllSubElts,
1595 false, true, CostKind);
1596 Cost += ExtSubCost * Indices.size();
1597 Cost += thisT()->getScalarizationOverhead(VT, DemandedLoadStoreElts,
1598 true,
1599 false, CostKind);
1600 }
1601
1602 if (!UseMaskForCond)
1603 return Cost;
1604
1605 Type *I8Type = Type::getInt8Ty(VT->getContext());
1606
1607 Cost += thisT()->getReplicationShuffleCost(
1608 I8Type, Factor, NumSubElts,
1609 UseMaskForGaps ? DemandedLoadStoreElts : DemandedAllResultElts,
1610 CostKind);
1611
1612
1613
1614
1615
1616
1617 if (UseMaskForGaps) {
1618 auto *MaskVT = FixedVectorType::get(I8Type, NumElts);
1619 Cost += thisT()->getArithmeticInstrCost(BinaryOperator::And, MaskVT,
1620 CostKind);
1621 }
1622
1623 return Cost;
1624 }
1625
1626
1627 InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
1628 TTI::TargetCostKind CostKind) {
1629
1630 if (BaseT::getIntrinsicInstrCost(ICA, CostKind) == 0)
1631 return 0;
1632
1633
1634 Intrinsic::ID IID = ICA.getID();
1635 if (Intrinsic::isTargetIntrinsic(IID))
1636 return TargetTransformInfo::TCC_Basic;
1637
1638
1639
1640
1641
1642
1643 if (VPIntrinsic::isVPIntrinsic(ICA.getID())) {
1644 std::optional<unsigned> FOp =
1645 VPIntrinsic::getFunctionalOpcodeForVP(ICA.getID());
1646 if (FOp) {
1647 if (ICA.getID() == Intrinsic::vp_load) {
1648 Align Alignment;
1649 if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst()))
1650 Alignment = VPI->getPointerAlignment().valueOrOne();
1651 unsigned AS = 0;
1652 if (ICA.getArgTypes().size() > 1)
1653 if (auto *PtrTy = dyn_cast<PointerType>(ICA.getArgTypes()[0]))
1654 AS = PtrTy->getAddressSpace();
1655 return thisT()->getMemoryOpCost(*FOp, ICA.getReturnType(), Alignment,
1656 AS, CostKind);
1657 }
1658 if (ICA.getID() == Intrinsic::vp_store) {
1659 Align Alignment;
1660 if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst()))
1661 Alignment = VPI->getPointerAlignment().valueOrOne();
1662 unsigned AS = 0;
1663 if (ICA.getArgTypes().size() >= 2)
1664 if (auto *PtrTy = dyn_cast<PointerType>(ICA.getArgTypes()[1]))
1665 AS = PtrTy->getAddressSpace();
1666 return thisT()->getMemoryOpCost(*FOp, ICA.getArgTypes()[0], Alignment,
1667 AS, CostKind);
1668 }
1669 if (VPBinOpIntrinsic::isVPBinOp(ICA.getID())) {
1670 return thisT()->getArithmeticInstrCost(*FOp, ICA.getReturnType(),
1671 CostKind);
1672 }
1673 if (VPCastIntrinsic::isVPCast(ICA.getID())) {
1674 return thisT()->getCastInstrCost(
1675 *FOp, ICA.getReturnType(), ICA.getArgTypes()[0],
1676 TTI::CastContextHint::None, CostKind);
1677 }
1678 if (VPCmpIntrinsic::isVPCmp(ICA.getID())) {
1679
1680 if (ICA.getInst()) {
1681 assert(FOp);
1682 auto *UI = cast<VPCmpIntrinsic>(ICA.getInst());
1683 return thisT()->getCmpSelInstrCost(*FOp, ICA.getArgTypes()[0],
1684 ICA.getReturnType(),
1685 UI->getPredicate(), CostKind);
1686 }
1687 }
1688 }
1689
1690 std::optional<Intrinsic::ID> FID =
1691 VPIntrinsic::getFunctionalIntrinsicIDForVP(ICA.getID());
1692 if (FID) {
1693
1694
1695 assert(ICA.getArgTypes().size() >= 2 &&
1696 "Expected VPIntrinsic to have Mask and Vector Length args and "
1697 "types");
1698 ArrayRef<Type *> NewTys = ArrayRef(ICA.getArgTypes()).drop_back(2);
1699
1700
1701
1702
1703 if (VPReductionIntrinsic::isVPReduction(ICA.getID()) &&
1704 *FID != Intrinsic::vector_reduce_fadd &&
1705 *FID != Intrinsic::vector_reduce_fmul)
1706 NewTys = NewTys.drop_front();
1707
1708 IntrinsicCostAttributes NewICA(*FID, ICA.getReturnType(), NewTys,
1709 ICA.getFlags());
1710 return thisT()->getIntrinsicInstrCost(NewICA, CostKind);
1711 }
1712 }
1713
1714 if (ICA.isTypeBasedOnly())
1715 return getTypeBasedIntrinsicInstrCost(ICA, CostKind);
1716
1717 Type *RetTy = ICA.getReturnType();
1718
1719 ElementCount RetVF =
1720 (RetTy->isVectorTy() ? cast<VectorType>(RetTy)->getElementCount()
1721 : ElementCount::getFixed(1));
1722 const IntrinsicInst *I = ICA.getInst();
1723 const SmallVectorImpl<const Value *> &Args = ICA.getArgs();
1724 FastMathFlags FMF = ICA.getFlags();
1725 switch (IID) {
1726 default:
1727 break;
1728
1729 case Intrinsic::powi:
1730 if (auto *RHSC = dyn_cast<ConstantInt>(Args[1])) {
1731 bool ShouldOptForSize = I->getParent()->getParent()->hasOptSize();
1732 if (getTLI()->isBeneficialToExpandPowI(RHSC->getSExtValue(),
1733 ShouldOptForSize)) {
1734
1735
1736 APInt Exponent = RHSC->getValue().abs();
1737 unsigned ActiveBits = Exponent.getActiveBits();
1738 unsigned PopCount = Exponent.popcount();
1739 InstructionCost Cost = (ActiveBits + PopCount - 2) *
1740 thisT()->getArithmeticInstrCost(
1741 Instruction::FMul, RetTy, CostKind);
1742 if (RHSC->isNegative())
1743 Cost += thisT()->getArithmeticInstrCost(Instruction::FDiv, RetTy,
1744 CostKind);
1745 return Cost;
1746 }
1747 }
1748 break;
1749 case Intrinsic::cttz:
1750
1751 if (RetVF.isScalar() && getTLI()->isCheapToSpeculateCttz(RetTy))
1752 return TargetTransformInfo::TCC_Basic;
1753 break;
1754
1755 case Intrinsic::ctlz:
1756
1757 if (RetVF.isScalar() && getTLI()->isCheapToSpeculateCtlz(RetTy))
1758 return TargetTransformInfo::TCC_Basic;
1759 break;
1760
1761 case Intrinsic::memcpy:
1762 return thisT()->getMemcpyCost(ICA.getInst());
1763
1764 case Intrinsic::masked_scatter: {
1765 const Value *Mask = Args[3];
1766 bool VarMask = !isa<Constant>(Mask);
1767 Align Alignment = cast<ConstantInt>(Args[2])->getAlignValue();
1768 return thisT()->getGatherScatterOpCost(Instruction::Store,
1769 ICA.getArgTypes()[0], Args[1],
1770 VarMask, Alignment, CostKind, I);
1771 }
1772 case Intrinsic::masked_gather: {
1773 const Value *Mask = Args[2];
1774 bool VarMask = !isa<Constant>(Mask);
1775 Align Alignment = cast<ConstantInt>(Args[1])->getAlignValue();
1776 return thisT()->getGatherScatterOpCost(Instruction::Load, RetTy, Args[0],
1777 VarMask, Alignment, CostKind, I);
1778 }
1779 case Intrinsic::experimental_vp_strided_store: {
1780 const Value *Data = Args[0];
1781 const Value *Ptr = Args[1];
1782 const Value *Mask = Args[3];
1783 const Value *EVL = Args[4];
1784 bool VarMask = !isa<Constant>(Mask) || !isa<Constant>(EVL);
1785 Type *EltTy = cast<VectorType>(Data->getType())->getElementType();
1786 Align Alignment =
1787 I->getParamAlign(1).value_or(thisT()->DL.getABITypeAlign(EltTy));
1788 return thisT()->getStridedMemoryOpCost(Instruction::Store,
1789 Data->getType(), Ptr, VarMask,
1790 Alignment, CostKind, I);
1791 }
1792 case Intrinsic::experimental_vp_strided_load: {
1793 const Value *Ptr = Args[0];
1794 const Value *Mask = Args[2];
1795 const Value *EVL = Args[3];
1796 bool VarMask = !isa<Constant>(Mask) || !isa<Constant>(EVL);
1797 Type *EltTy = cast<VectorType>(RetTy)->getElementType();
1798 Align Alignment =
1799 I->getParamAlign(0).value_or(thisT()->DL.getABITypeAlign(EltTy));
1800 return thisT()->getStridedMemoryOpCost(Instruction::Load, RetTy, Ptr,
1801 VarMask, Alignment, CostKind, I);
1802 }
1803 case Intrinsic::stepvector: {
1804 if (isa<ScalableVectorType>(RetTy))
1805 return BaseT::getIntrinsicInstrCost(ICA, CostKind);
1806
1807 return TargetTransformInfo::TCC_Basic;
1808 }
1809 case Intrinsic::vector_extract: {
1810
1811
1812 if (isa<ScalableVectorType>(RetTy))
1813 return BaseT::getIntrinsicInstrCost(ICA, CostKind);
1814 unsigned Index = cast<ConstantInt>(Args[1])->getZExtValue();
1815 return thisT()->getShuffleCost(TTI::SK_ExtractSubvector,
1816 cast<VectorType>(Args[0]->getType()), {},
1817 CostKind, Index, cast<VectorType>(RetTy));
1818 }
1819 case Intrinsic::vector_insert: {
1820
1821
1822 if (isa<ScalableVectorType>(Args[1]->getType()))
1823 return BaseT::getIntrinsicInstrCost(ICA, CostKind);
1824 unsigned Index = cast<ConstantInt>(Args[2])->getZExtValue();
1825 return thisT()->getShuffleCost(
1826 TTI::SK_InsertSubvector, cast<VectorType>(Args[0]->getType()), {},
1827 CostKind, Index, cast<VectorType>(Args[1]->getType()));
1828 }
1829 case Intrinsic::vector_reverse: {
1830 return thisT()->getShuffleCost(TTI::SK_Reverse,
1831 cast<VectorType>(Args[0]->getType()), {},
1832 CostKind, 0, cast<VectorType>(RetTy));
1833 }
1834 case Intrinsic::vector_splice: {
1835 unsigned Index = cast<ConstantInt>(Args[2])->getZExtValue();
1836 return thisT()->getShuffleCost(TTI::SK_Splice,
1837 cast<VectorType>(Args[0]->getType()), {},
1838 CostKind, Index, cast<VectorType>(RetTy));
1839 }
1840 case Intrinsic::vector_reduce_add:
1841 case Intrinsic::vector_reduce_mul:
1842 case Intrinsic::vector_reduce_and:
1843 case Intrinsic::vector_reduce_or:
1844 case Intrinsic::vector_reduce_xor:
1845 case Intrinsic::vector_reduce_smax:
1846 case Intrinsic::vector_reduce_smin:
1847 case Intrinsic::vector_reduce_fmax:
1848 case Intrinsic::vector_reduce_fmin:
1849 case Intrinsic::vector_reduce_fmaximum:
1850 case Intrinsic::vector_reduce_fminimum:
1851 case Intrinsic::vector_reduce_umax:
1852 case Intrinsic::vector_reduce_umin: {
1853 IntrinsicCostAttributes Attrs(IID, RetTy, Args[0]->getType(), FMF, I, 1);
1854 return getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
1855 }
1856 case Intrinsic::vector_reduce_fadd:
1857 case Intrinsic::vector_reduce_fmul: {
1858 IntrinsicCostAttributes Attrs(
1859 IID, RetTy, {Args[0]->getType(), Args[1]->getType()}, FMF, I, 1);
1860 return getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
1861 }
1862 case Intrinsic::fshl:
1863 case Intrinsic::fshr: {
1864 const Value *X = Args[0];
1865 const Value *Y = Args[1];
1866 const Value *Z = Args[2];
1867 const TTI::OperandValueInfo OpInfoX = TTI::getOperandInfo(X);
1868 const TTI::OperandValueInfo OpInfoY = TTI::getOperandInfo(Y);
1869 const TTI::OperandValueInfo OpInfoZ = TTI::getOperandInfo(Z);
1870 const TTI::OperandValueInfo OpInfoBW =
1871 {TTI::OK_UniformConstantValue,
1872 isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2
1873 : TTI::OP_None};
1874
1875
1876
1877 InstructionCost Cost = 0;
1878 Cost +=
1879 thisT()->getArithmeticInstrCost(BinaryOperator::Or, RetTy, CostKind);
1880 Cost +=
1881 thisT()->getArithmeticInstrCost(BinaryOperator::Sub, RetTy, CostKind);
1882 Cost += thisT()->getArithmeticInstrCost(
1883 BinaryOperator::Shl, RetTy, CostKind, OpInfoX,
1884 {OpInfoZ.Kind, TTI::OP_None});
1885 Cost += thisT()->getArithmeticInstrCost(
1886 BinaryOperator::LShr, RetTy, CostKind, OpInfoY,
1887 {OpInfoZ.Kind, TTI::OP_None});
1888
1889 if (!OpInfoZ.isConstant())
1890 Cost += thisT()->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
1891 CostKind, OpInfoZ, OpInfoBW);
1892
1893 if (X != Y) {
1894 Type *CondTy = RetTy->getWithNewBitWidth(1);
1895 Cost +=
1896 thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
1897 CmpInst::ICMP_EQ, CostKind);
1898 Cost +=
1899 thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
1900 CmpInst::ICMP_EQ, CostKind);
1901 }
1902 return Cost;
1903 }
1904 case Intrinsic::get_active_lane_mask: {
1905 EVT ResVT = getTLI()->getValueType(DL, RetTy, true);
1906 EVT ArgType = getTLI()->getValueType(DL, ICA.getArgTypes()[0], true);
1907
1908
1909
1910 if (!getTLI()->shouldExpandGetActiveLaneMask(ResVT, ArgType)) {
1911 return getTypeLegalizationCost(RetTy).first;
1912 }
1913
1914
1915
1916 Type *ExpRetTy = VectorType::get(
1917 ICA.getArgTypes()[0], cast<VectorType>(RetTy)->getElementCount());
1918 IntrinsicCostAttributes Attrs(Intrinsic::uadd_sat, ExpRetTy, {}, FMF);
1919 InstructionCost Cost =
1920 thisT()->getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
1921 Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, ExpRetTy, RetTy,
1922 CmpInst::ICMP_ULT, CostKind);
1923 return Cost;
1924 }
1925 case Intrinsic::experimental_cttz_elts: {
1926 EVT ArgType = getTLI()->getValueType(DL, ICA.getArgTypes()[0], true);
1927
1928
1929
1930 if (!getTLI()->shouldExpandCttzElements(ArgType))
1931 return getTypeLegalizationCost(RetTy).first;
1932
1933
1934
1935
1936
1937
1938 bool ZeroIsPoison = !cast<ConstantInt>(Args[1])->isZero();
1939 ConstantRange VScaleRange(APInt(64, 1), APInt::getZero(64));
1940 if (isa<ScalableVectorType>(ICA.getArgTypes()[0]) && I && I->getCaller())
1941 VScaleRange = getVScaleRange(I->getCaller(), 64);
1942
1943 unsigned EltWidth = getTLI()->getBitWidthForCttzElements(
1944 RetTy, ArgType.getVectorElementCount(), ZeroIsPoison, &VScaleRange);
1945 Type *NewEltTy = IntegerType::getIntNTy(RetTy->getContext(), EltWidth);
1946
1947
1948 Type *NewVecTy = VectorType::get(
1949 NewEltTy, cast<VectorType>(Args[0]->getType())->getElementCount());
1950
1951 IntrinsicCostAttributes StepVecAttrs(Intrinsic::stepvector, NewVecTy, {},
1952 FMF);
1953 InstructionCost Cost =
1954 thisT()->getIntrinsicInstrCost(StepVecAttrs, CostKind);
1955
1956 Cost +=
1957 thisT()->getArithmeticInstrCost(Instruction::Sub, NewVecTy, CostKind);
1958 Cost += thisT()->getCastInstrCost(Instruction::SExt, NewVecTy,
1959 Args[0]->getType(),
1960 TTI::CastContextHint::None, CostKind);
1961 Cost +=
1962 thisT()->getArithmeticInstrCost(Instruction::And, NewVecTy, CostKind);
1963
1964 IntrinsicCostAttributes ReducAttrs(Intrinsic::vector_reduce_umax,
1965 NewEltTy, NewVecTy, FMF, I, 1);
1966 Cost += thisT()->getTypeBasedIntrinsicInstrCost(ReducAttrs, CostKind);
1967 Cost +=
1968 thisT()->getArithmeticInstrCost(Instruction::Sub, NewEltTy, CostKind);
1969
1970 return Cost;
1971 }
1972 case Intrinsic::experimental_vector_match:
1973 return thisT()->getTypeBasedIntrinsicInstrCost(ICA, CostKind);
1974 }
1975
1976
1977
1978
1979 InstructionCost ScalarizationCost = InstructionCost::getInvalid();
1980 if (RetVF.isVector() && !RetVF.isScalable()) {
1981 ScalarizationCost = 0;
1982 if (!RetTy->isVoidTy())
1983 ScalarizationCost += getScalarizationOverhead(
1984 cast<VectorType>(RetTy),
1985 true, false, CostKind);
1986 ScalarizationCost +=
1987 getOperandsScalarizationOverhead(Args, ICA.getArgTypes(), CostKind);
1988 }
1989
1990 IntrinsicCostAttributes Attrs(IID, RetTy, ICA.getArgTypes(), FMF, I,
1991 ScalarizationCost);
1992 return thisT()->getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
1993 }
1994
1995
1996
1997
1998
1999 InstructionCost
2000 getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
2001 TTI::TargetCostKind CostKind) {
2002 Intrinsic::ID IID = ICA.getID();
2003 Type *RetTy = ICA.getReturnType();
2004 const SmallVectorImpl<Type *> &Tys = ICA.getArgTypes();
2005 FastMathFlags FMF = ICA.getFlags();
2006 InstructionCost ScalarizationCostPassed = ICA.getScalarizationCost();
2007 bool SkipScalarizationCost = ICA.skipScalarizationCost();
2008
2009 VectorType *VecOpTy = nullptr;
2010 if (!Tys.empty()) {
2011
2012
2013 unsigned VecTyIndex = 0;
2014 if (IID == Intrinsic::vector_reduce_fadd ||
2015 IID == Intrinsic::vector_reduce_fmul)
2016 VecTyIndex = 1;
2017 assert(Tys.size() > VecTyIndex && "Unexpected IntrinsicCostAttributes");
2018 VecOpTy = dyn_cast<VectorType>(Tys[VecTyIndex]);
2019 }
2020
2021
2022 unsigned SingleCallCost = CostKind == TTI::TCK_CodeSize ? 1 : 10;
2023 unsigned ISD = 0;
2024 switch (IID) {
2025 default: {
2026
2027 if (isa<ScalableVectorType>(RetTy) || any_of(Tys, [](const Type *Ty) {
2028 return isa<ScalableVectorType>(Ty);
2029 }))
2030 return InstructionCost::getInvalid();
2031
2032
2033 InstructionCost ScalarizationCost =
2034 SkipScalarizationCost ? ScalarizationCostPassed : 0;
2035 unsigned ScalarCalls = 1;
2036 Type *ScalarRetTy = RetTy;
2037 if (auto *RetVTy = dyn_cast<VectorType>(RetTy)) {
2038 if (!SkipScalarizationCost)
2039 ScalarizationCost = getScalarizationOverhead(
2040 RetVTy, true, false, CostKind);
2041 ScalarCalls = std::max(ScalarCalls,
2042 cast<FixedVectorType>(RetVTy)->getNumElements());
2043 ScalarRetTy = RetTy->getScalarType();
2044 }
2045 SmallVector<Type *, 4> ScalarTys;
2046 for (Type *Ty : Tys) {
2047 if (auto *VTy = dyn_cast<VectorType>(Ty)) {
2048 if (!SkipScalarizationCost)
2049 ScalarizationCost += getScalarizationOverhead(
2050 VTy, false, true, CostKind);
2051 ScalarCalls = std::max(ScalarCalls,
2052 cast<FixedVectorType>(VTy)->getNumElements());
2053 Ty = Ty->getScalarType();
2054 }
2055 ScalarTys.push_back(Ty);
2056 }
2057 if (ScalarCalls == 1)
2058 return 1;
2059
2060 IntrinsicCostAttributes ScalarAttrs(IID, ScalarRetTy, ScalarTys, FMF);
2061 InstructionCost ScalarCost =
2062 thisT()->getIntrinsicInstrCost(ScalarAttrs, CostKind);
2063
2064 return ScalarCalls * ScalarCost + ScalarizationCost;
2065 }
2066
2067
2068 case Intrinsic::sqrt:
2069 ISD = ISD::FSQRT;
2070 break;
2071 case Intrinsic::sin:
2072 ISD = ISD::FSIN;
2073 break;
2074 case Intrinsic::cos:
2075 ISD = ISD::FCOS;
2076 break;
2077 case Intrinsic::sincos:
2078 ISD = ISD::FSINCOS;
2079 break;
2080 case Intrinsic::tan:
2081 ISD = ISD::FTAN;
2082 break;
2083 case Intrinsic::asin:
2084 ISD = ISD::FASIN;
2085 break;
2086 case Intrinsic::acos:
2087 ISD = ISD::FACOS;
2088 break;
2089 case Intrinsic::atan:
2090 ISD = ISD::FATAN;
2091 break;
2092 case Intrinsic::atan2:
2093 ISD = ISD::FATAN2;
2094 break;
2095 case Intrinsic::sinh:
2096 ISD = ISD::FSINH;
2097 break;
2098 case Intrinsic::cosh:
2099 ISD = ISD::FCOSH;
2100 break;
2101 case Intrinsic::tanh:
2102 ISD = ISD::FTANH;
2103 break;
2104 case Intrinsic::exp:
2105 ISD = ISD::FEXP;
2106 break;
2107 case Intrinsic::exp2:
2108 ISD = ISD::FEXP2;
2109 break;
2110 case Intrinsic::exp10:
2111 ISD = ISD::FEXP10;
2112 break;
2113 case Intrinsic::log:
2114 ISD = ISD::FLOG;
2115 break;
2116 case Intrinsic::log10:
2117 ISD = ISD::FLOG10;
2118 break;
2119 case Intrinsic::log2:
2120 ISD = ISD::FLOG2;
2121 break;
2122 case Intrinsic::fabs:
2123 ISD = ISD::FABS;
2124 break;
2125 case Intrinsic::canonicalize:
2126 ISD = ISD::FCANONICALIZE;
2127 break;
2128 case Intrinsic::minnum:
2129 ISD = ISD::FMINNUM;
2130 break;
2131 case Intrinsic::maxnum:
2132 ISD = ISD::FMAXNUM;
2133 break;
2134 case Intrinsic::minimum:
2135 ISD = ISD::FMINIMUM;
2136 break;
2137 case Intrinsic::maximum:
2138 ISD = ISD::FMAXIMUM;
2139 break;
2140 case Intrinsic::minimumnum:
2141 ISD = ISD::FMINIMUMNUM;
2142 break;
2143 case Intrinsic::maximumnum:
2144 ISD = ISD::FMAXIMUMNUM;
2145 break;
2146 case Intrinsic::copysign:
2147 ISD = ISD::FCOPYSIGN;
2148 break;
2149 case Intrinsic::floor:
2150 ISD = ISD::FFLOOR;
2151 break;
2152 case Intrinsic::ceil:
2153 ISD = ISD::FCEIL;
2154 break;
2155 case Intrinsic::trunc:
2156 ISD = ISD::FTRUNC;
2157 break;
2158 case Intrinsic::nearbyint:
2159 ISD = ISD::FNEARBYINT;
2160 break;
2161 case Intrinsic::rint:
2162 ISD = ISD::FRINT;
2163 break;
2164 case Intrinsic::lrint:
2165 ISD = ISD::LRINT;
2166 break;
2167 case Intrinsic::llrint:
2168 ISD = ISD::LLRINT;
2169 break;
2170 case Intrinsic::round:
2171 ISD = ISD::FROUND;
2172 break;
2173 case Intrinsic::roundeven:
2174 ISD = ISD::FROUNDEVEN;
2175 break;
2176 case Intrinsic::pow:
2177 ISD = ISD::FPOW;
2178 break;
2179 case Intrinsic::fma:
2180 ISD = ISD::FMA;
2181 break;
2182 case Intrinsic::fmuladd:
2183 ISD = ISD::FMA;
2184 break;
2185 case Intrinsic::experimental_constrained_fmuladd:
2186 ISD = ISD::STRICT_FMA;
2187 break;
2188
2189 case Intrinsic::lifetime_start:
2190 case Intrinsic::lifetime_end:
2191 case Intrinsic::sideeffect:
2192 case Intrinsic::pseudoprobe:
2193 case Intrinsic::arithmetic_fence:
2194 return 0;
2195 case Intrinsic::masked_store: {
2196 Type *Ty = Tys[0];
2197 Align TyAlign = thisT()->DL.getABITypeAlign(Ty);
2198 return thisT()->getMaskedMemoryOpCost(Instruction::Store, Ty, TyAlign, 0,
2199 CostKind);
2200 }
2201 case Intrinsic::masked_load: {
2202 Type *Ty = RetTy;
2203 Align TyAlign = thisT()->DL.getABITypeAlign(Ty);
2204 return thisT()->getMaskedMemoryOpCost(Instruction::Load, Ty, TyAlign, 0,
2205 CostKind);
2206 }
2207 case Intrinsic::vector_reduce_add:
2208 case Intrinsic::vector_reduce_mul:
2209 case Intrinsic::vector_reduce_and:
2210 case Intrinsic::vector_reduce_or:
2211 case Intrinsic::vector_reduce_xor:
2212 return thisT()->getArithmeticReductionCost(
2213 getArithmeticReductionInstruction(IID), VecOpTy, std::nullopt,
2214 CostKind);
2215 case Intrinsic::vector_reduce_fadd:
2216 case Intrinsic::vector_reduce_fmul:
2217 return thisT()->getArithmeticReductionCost(
2218 getArithmeticReductionInstruction(IID), VecOpTy, FMF, CostKind);
2219 case Intrinsic::vector_reduce_smax:
2220 case Intrinsic::vector_reduce_smin:
2221 case Intrinsic::vector_reduce_umax:
2222 case Intrinsic::vector_reduce_umin:
2223 case Intrinsic::vector_reduce_fmax:
2224 case Intrinsic::vector_reduce_fmin:
2225 case Intrinsic::vector_reduce_fmaximum:
2226 case Intrinsic::vector_reduce_fminimum:
2227 return thisT()->getMinMaxReductionCost(getMinMaxReductionIntrinsicOp(IID),
2228 VecOpTy, ICA.getFlags(), CostKind);
2229 case Intrinsic::experimental_vector_match: {
2230 auto *SearchTy = cast<VectorType>(ICA.getArgTypes()[0]);
2231 auto *NeedleTy = cast<FixedVectorType>(ICA.getArgTypes()[1]);
2232 unsigned SearchSize = NeedleTy->getNumElements();
2233
2234
2235
2236 EVT SearchVT = getTLI()->getValueType(DL, SearchTy);
2237 if (!getTLI()->shouldExpandVectorMatch(SearchVT, SearchSize))
2238 return getTypeLegalizationCost(RetTy).first;
2239
2240
2241
2242 InstructionCost Cost = 0;
2243 Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, NeedleTy,
2244 CostKind, 1, nullptr, nullptr);
2245 Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, SearchTy,
2246 CostKind, 0, nullptr, nullptr);
2247 Cost += thisT()->getShuffleCost(TTI::SK_Broadcast, SearchTy, std::nullopt,
2248 CostKind, 0, nullptr);
2249 Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SearchTy, RetTy,
2250 CmpInst::ICMP_EQ, CostKind);
2251 Cost +=
2252 thisT()->getArithmeticInstrCost(BinaryOperator::Or, RetTy, CostKind);
2253 Cost *= SearchSize;
2254 Cost +=
2255 thisT()->getArithmeticInstrCost(BinaryOperator::And, RetTy, CostKind);
2256 return Cost;
2257 }
2258 case Intrinsic::abs:
2259 ISD = ISD::ABS;
2260 break;
2261 case Intrinsic::fshl:
2262 ISD = ISD::FSHL;
2263 break;
2264 case Intrinsic::fshr:
2265 ISD = ISD::FSHR;
2266 break;
2267 case Intrinsic::smax:
2268 ISD = ISD::SMAX;
2269 break;
2270 case Intrinsic::smin:
2271 ISD = ISD::SMIN;
2272 break;
2273 case Intrinsic::umax:
2274 ISD = ISD::UMAX;
2275 break;
2276 case Intrinsic::umin:
2277 ISD = ISD::UMIN;
2278 break;
2279 case Intrinsic::sadd_sat:
2280 ISD = ISD::SADDSAT;
2281 break;
2282 case Intrinsic::ssub_sat:
2283 ISD = ISD::SSUBSAT;
2284 break;
2285 case Intrinsic::uadd_sat:
2286 ISD = ISD::UADDSAT;
2287 break;
2288 case Intrinsic::usub_sat:
2289 ISD = ISD::USUBSAT;
2290 break;
2291 case Intrinsic::smul_fix:
2292 ISD = ISD::SMULFIX;
2293 break;
2294 case Intrinsic::umul_fix:
2295 ISD = ISD::UMULFIX;
2296 break;
2297 case Intrinsic::sadd_with_overflow:
2298 ISD = ISD::SADDO;
2299 break;
2300 case Intrinsic::ssub_with_overflow:
2301 ISD = ISD::SSUBO;
2302 break;
2303 case Intrinsic::uadd_with_overflow:
2304 ISD = ISD::UADDO;
2305 break;
2306 case Intrinsic::usub_with_overflow:
2307 ISD = ISD::USUBO;
2308 break;
2309 case Intrinsic::smul_with_overflow:
2310 ISD = ISD::SMULO;
2311 break;
2312 case Intrinsic::umul_with_overflow:
2313 ISD = ISD::UMULO;
2314 break;
2315 case Intrinsic::fptosi_sat:
2316 ISD = ISD::FP_TO_SINT_SAT;
2317 break;
2318 case Intrinsic::fptoui_sat:
2319 ISD = ISD::FP_TO_UINT_SAT;
2320 break;
2321 case Intrinsic::ctpop:
2322 ISD = ISD::CTPOP;
2323
2324
2325 SingleCallCost = TargetTransformInfo::TCC_Expensive;
2326 break;
2327 case Intrinsic::ctlz:
2328 ISD = ISD::CTLZ;
2329 break;
2330 case Intrinsic::cttz:
2331 ISD = ISD::CTTZ;
2332 break;
2333 case Intrinsic::bswap:
2334 ISD = ISD::BSWAP;
2335 break;
2336 case Intrinsic::bitreverse:
2337 ISD = ISD::BITREVERSE;
2338 break;
2339 case Intrinsic::ucmp:
2340 ISD = ISD::UCMP;
2341 break;
2342 case Intrinsic::scmp:
2343 ISD = ISD::SCMP;
2344 break;
2345 }
2346
2347 auto *ST = dyn_cast<StructType>(RetTy);
2348 Type *LegalizeTy = ST ? ST->getContainedType(0) : RetTy;
2349 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(LegalizeTy);
2350
2351 const TargetLoweringBase *TLI = getTLI();
2352
2353 if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
2354 if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() &&
2355 TLI->isFAbsFree(LT.second)) {
2356 return 0;
2357 }
2358
2359
2360
2361
2362
2363 if (LT.first > 1)
2364 return (LT.first * 2);
2365 else
2366 return (LT.first * 1);
2367 } else if (!TLI->isOperationExpand(ISD, LT.second)) {
2368
2369
2370 return (LT.first * 2);
2371 }
2372
2373 switch (IID) {
2374 case Intrinsic::fmuladd: {
2375
2376
2377
2378 return thisT()->getArithmeticInstrCost(BinaryOperator::FMul, RetTy,
2379 CostKind) +
2380 thisT()->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy,
2381 CostKind);
2382 }
2383 case Intrinsic::experimental_constrained_fmuladd: {
2384 IntrinsicCostAttributes FMulAttrs(
2385 Intrinsic::experimental_constrained_fmul, RetTy, Tys);
2386 IntrinsicCostAttributes FAddAttrs(
2387 Intrinsic::experimental_constrained_fadd, RetTy, Tys);
2388 return thisT()->getIntrinsicInstrCost(FMulAttrs, CostKind) +
2389 thisT()->getIntrinsicInstrCost(FAddAttrs, CostKind);
2390 }
2391 case Intrinsic::smin:
2392 case Intrinsic::smax:
2393 case Intrinsic::umin:
2394 case Intrinsic::umax: {
2395
2396 Type *CondTy = RetTy->getWithNewBitWidth(1);
2397 bool IsUnsigned = IID == Intrinsic::umax || IID == Intrinsic::umin;
2398 CmpInst::Predicate Pred =
2399 IsUnsigned ? CmpInst::ICMP_UGT : CmpInst::ICMP_SGT;
2400 InstructionCost Cost = 0;
2401 Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
2402 Pred, CostKind);
2403 Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
2404 Pred, CostKind);
2405 return Cost;
2406 }
2407 case Intrinsic::sadd_with_overflow:
2408 case Intrinsic::ssub_with_overflow: {
2409 Type *SumTy = RetTy->getContainedType(0);
2410 Type *OverflowTy = RetTy->getContainedType(1);
2411 unsigned Opcode = IID == Intrinsic::sadd_with_overflow
2412 ? BinaryOperator::Add
2413 : BinaryOperator::Sub;
2414
2415
2416
2417
2418
2419 InstructionCost Cost = 0;
2420 Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind);
2421 Cost +=
2422 2 * thisT()->getCmpSelInstrCost(Instruction::ICmp, SumTy, OverflowTy,
2423 CmpInst::ICMP_SGT, CostKind);
2424 Cost += thisT()->getArithmeticInstrCost(BinaryOperator::Xor, OverflowTy,
2425 CostKind);
2426 return Cost;
2427 }
2428 case Intrinsic::uadd_with_overflow:
2429 case Intrinsic::usub_with_overflow: {
2430 Type *SumTy = RetTy->getContainedType(0);
2431 Type *OverflowTy = RetTy->getContainedType(1);
2432 unsigned Opcode = IID == Intrinsic::uadd_with_overflow
2433 ? BinaryOperator::Add
2434 : BinaryOperator::Sub;
2435 CmpInst::Predicate Pred = IID == Intrinsic::uadd_with_overflow
2436 ? CmpInst::ICMP_ULT
2437 : CmpInst::ICMP_UGT;
2438
2439 InstructionCost Cost = 0;
2440 Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind);
2441 Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
2442 OverflowTy, Pred, CostKind);
2443 return Cost;
2444 }
2445 case Intrinsic::smul_with_overflow:
2446 case Intrinsic::umul_with_overflow: {
2447 Type *MulTy = RetTy->getContainedType(0);
2448 Type *OverflowTy = RetTy->getContainedType(1);
2449 unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
2450 Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize);
2451 bool IsSigned = IID == Intrinsic::smul_with_overflow;
2452
2453 unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt;
2454 TTI::CastContextHint CCH = TTI::CastContextHint::None;
2455
2456 InstructionCost Cost = 0;
2457 Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, CostKind);
2458 Cost +=
2459 thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
2460 Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy,
2461 CCH, CostKind);
2462 Cost += thisT()->getArithmeticInstrCost(
2463 Instruction::LShr, ExtTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None},
2464 {TTI::OK_UniformConstantValue, TTI::OP_None});
2465
2466 if (IsSigned)
2467 Cost += thisT()->getArithmeticInstrCost(
2468 Instruction::AShr, MulTy, CostKind,
2469 {TTI::OK_AnyValue, TTI::OP_None},
2470 {TTI::OK_UniformConstantValue, TTI::OP_None});
2471
2472 Cost += thisT()->getCmpSelInstrCost(
2473 BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind);
2474 return Cost;
2475 }
2476 case Intrinsic::sadd_sat:
2477 case Intrinsic::ssub_sat: {
2478
2479 Type *CondTy = RetTy->getWithNewBitWidth(1);
2480
2481 Type *OpTy = StructType::create({RetTy, CondTy});
2482 Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat
2483 ? Intrinsic::sadd_with_overflow
2484 : Intrinsic::ssub_with_overflow;
2485 CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
2486
2487
2488
2489 InstructionCost Cost = 0;
2490 IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF,
2491 nullptr, ScalarizationCostPassed);
2492 Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind);
2493 Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
2494 Pred, CostKind);
2495 Cost += 2 * thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
2496 CondTy, Pred, CostKind);
2497 return Cost;
2498 }
2499 case Intrinsic::uadd_sat:
2500 case Intrinsic::usub_sat: {
2501 Type *CondTy = RetTy->getWithNewBitWidth(1);
2502
2503 Type *OpTy = StructType::create({RetTy, CondTy});
2504 Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat
2505 ? Intrinsic::uadd_with_overflow
2506 : Intrinsic::usub_with_overflow;
2507
2508 InstructionCost Cost = 0;
2509 IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF,
2510 nullptr, ScalarizationCostPassed);
2511 Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind);
2512 Cost +=
2513 thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
2514 CmpInst::BAD_ICMP_PREDICATE, CostKind);
2515 return Cost;
2516 }
2517 case Intrinsic::smul_fix:
2518 case Intrinsic::umul_fix: {
2519 unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
2520 Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize);
2521
2522 unsigned ExtOp =
2523 IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
2524 TTI::CastContextHint CCH = TTI::CastContextHint::None;
2525
2526 InstructionCost Cost = 0;
2527 Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, CostKind);
2528 Cost +=
2529 thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
2530 Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy,
2531 CCH, CostKind);
2532 Cost += thisT()->getArithmeticInstrCost(
2533 Instruction::LShr, RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None},
2534 {TTI::OK_UniformConstantValue, TTI::OP_None});
2535 Cost += thisT()->getArithmeticInstrCost(
2536 Instruction::Shl, RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None},
2537 {TTI::OK_UniformConstantValue, TTI::OP_None});
2538 Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind);
2539 return Cost;
2540 }
2541 case Intrinsic::abs: {
2542
2543 Type *CondTy = RetTy->getWithNewBitWidth(1);
2544 CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
2545 InstructionCost Cost = 0;
2546 Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
2547 Pred, CostKind);
2548 Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
2549 Pred, CostKind);
2550
2551 Cost += thisT()->getArithmeticInstrCost(
2552 BinaryOperator::Sub, RetTy, CostKind,
2553 {TTI::OK_UniformConstantValue, TTI::OP_None});
2554 return Cost;
2555 }
2556 case Intrinsic::fshl:
2557 case Intrinsic::fshr: {
2558
2559
2560 Type *CondTy = RetTy->getWithNewBitWidth(1);
2561 InstructionCost Cost = 0;
2562 Cost +=
2563 thisT()->getArithmeticInstrCost(BinaryOperator::Or, RetTy, CostKind);
2564 Cost +=
2565 thisT()->getArithmeticInstrCost(BinaryOperator::Sub, RetTy, CostKind);
2566 Cost +=
2567 thisT()->getArithmeticInstrCost(BinaryOperator::Shl, RetTy, CostKind);
2568 Cost += thisT()->getArithmeticInstrCost(BinaryOperator::LShr, RetTy,
2569 CostKind);
2570 Cost += thisT()->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
2571 CostKind);
2572
2573 Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
2574 CmpInst::ICMP_EQ, CostKind);
2575 Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
2576 CmpInst::ICMP_EQ, CostKind);
2577 return Cost;
2578 }
2579 case Intrinsic::fptosi_sat:
2580 case Intrinsic::fptoui_sat: {
2581 if (Tys.empty())
2582 break;
2583 Type *FromTy = Tys[0];
2584 bool IsSigned = IID == Intrinsic::fptosi_sat;
2585
2586 InstructionCost Cost = 0;
2587 IntrinsicCostAttributes Attrs1(Intrinsic::minnum, FromTy,
2588 {FromTy, FromTy});
2589 Cost += thisT()->getIntrinsicInstrCost(Attrs1, CostKind);
2590 IntrinsicCostAttributes Attrs2(Intrinsic::maxnum, FromTy,
2591 {FromTy, FromTy});
2592 Cost += thisT()->getIntrinsicInstrCost(Attrs2, CostKind);
2593 Cost += thisT()->getCastInstrCost(
2594 IsSigned ? Instruction::FPToSI : Instruction::FPToUI, RetTy, FromTy,
2595 TTI::CastContextHint::None, CostKind);
2596 if (IsSigned) {
2597 Type *CondTy = RetTy->getWithNewBitWidth(1);
2598 Cost += thisT()->getCmpSelInstrCost(
2599 BinaryOperator::FCmp, FromTy, CondTy, CmpInst::FCMP_UNO, CostKind);
2600 Cost += thisT()->getCmpSelInstrCost(
2601 BinaryOperator::Select, RetTy, CondTy, CmpInst::FCMP_UNO, CostKind);
2602 }
2603 return Cost;
2604 }
2605 case Intrinsic::ucmp:
2606 case Intrinsic::scmp: {
2607 Type *CmpTy = Tys[0];
2608 Type *CondTy = RetTy->getWithNewBitWidth(1);
2609 InstructionCost Cost =
2610 thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, CmpTy, CondTy,
2611 CmpIntrinsic::getGTPredicate(IID),
2612 CostKind) +
2613 thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, CmpTy, CondTy,
2614 CmpIntrinsic::getLTPredicate(IID),
2615 CostKind);
2616
2617 EVT VT = TLI->getValueType(DL, CmpTy, true);
2618 if (TLI->shouldExpandCmpUsingSelects(VT)) {
2619
2620 Cost += 2 * thisT()->getCmpSelInstrCost(
2621 BinaryOperator::Select, RetTy, CondTy,
2622 ICmpInst::BAD_ICMP_PREDICATE, CostKind);
2623 } else {
2624
2625 Cost +=
2626 2 * thisT()->getCastInstrCost(CastInst::ZExt, RetTy, CondTy,
2627 TTI::CastContextHint::None, CostKind);
2628 Cost += thisT()->getArithmeticInstrCost(BinaryOperator::Sub, RetTy,
2629 CostKind);
2630 }
2631 return Cost;
2632 }
2633 default:
2634 break;
2635 }
2636
2637
2638
2639
2640 if (auto *RetVTy = dyn_cast<VectorType>(RetTy)) {
2641
2642 if (isa<ScalableVectorType>(RetTy) || any_of(Tys, [](const Type *Ty) {
2643 return isa<ScalableVectorType>(Ty);
2644 }))
2645 return InstructionCost::getInvalid();
2646
2647 InstructionCost ScalarizationCost =
2648 SkipScalarizationCost
2649 ? ScalarizationCostPassed
2650 : getScalarizationOverhead(RetVTy, true,
2651 false, CostKind);
2652
2653 unsigned ScalarCalls = cast<FixedVectorType>(RetVTy)->getNumElements();
2654 SmallVector<Type *, 4> ScalarTys;
2655 for (Type *Ty : Tys) {
2656 if (Ty->isVectorTy())
2657 Ty = Ty->getScalarType();
2658 ScalarTys.push_back(Ty);
2659 }
2660 IntrinsicCostAttributes Attrs(IID, RetTy->getScalarType(), ScalarTys, FMF);
2661 InstructionCost ScalarCost =
2662 thisT()->getIntrinsicInstrCost(Attrs, CostKind);
2663 for (Type *Ty : Tys) {
2664 if (auto *VTy = dyn_cast<VectorType>(Ty)) {
2665 if (!ICA.skipScalarizationCost())
2666 ScalarizationCost += getScalarizationOverhead(
2667 VTy, false, true, CostKind);
2668 ScalarCalls = std::max(ScalarCalls,
2669 cast<FixedVectorType>(VTy)->getNumElements());
2670 }
2671 }
2672 return ScalarCalls * ScalarCost + ScalarizationCost;
2673 }
2674
2675
2676 return SingleCallCost;
2677 }
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690 InstructionCost getCallInstrCost(Function *F, Type *RetTy,
2691 ArrayRef<Type *> Tys,
2692 TTI::TargetCostKind CostKind) {
2693 return 10;
2694 }
2695
2696 unsigned getNumberOfParts(Type *Tp) {
2697 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
2698 if (!LT.first.isValid())
2699 return 0;
2700
2701
2702 if (auto *FTp = dyn_cast<FixedVectorType>(Tp);
2703 Tp && LT.second.isFixedLengthVector() &&
2704 !has_single_bit(FTp->getNumElements())) {
2705 if (auto *SubTp = dyn_cast_if_present<FixedVectorType>(
2706 EVT(LT.second).getTypeForEVT(Tp->getContext()));
2707 SubTp && SubTp->getElementType() == FTp->getElementType())
2708 return divideCeil(FTp->getNumElements(), SubTp->getNumElements());
2709 }
2710 return *LT.first.getValue();
2711 }
2712
2713 InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *,
2714 const SCEV *) {
2715 return 0;
2716 }
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739 InstructionCost getTreeReductionCost(unsigned Opcode, VectorType *Ty,
2740 TTI::TargetCostKind CostKind) {
2741
2742
2743 if (isa<ScalableVectorType>(Ty))
2744 return InstructionCost::getInvalid();
2745
2746 Type *ScalarTy = Ty->getElementType();
2747 unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements();
2748 if ((Opcode == Instruction::Or || Opcode == Instruction::And) &&
2749 ScalarTy == IntegerType::getInt1Ty(Ty->getContext()) &&
2750 NumVecElts >= 2) {
2751
2752
2753
2754
2755
2756
2757 Type *ValTy = IntegerType::get(Ty->getContext(), NumVecElts);
2758 return thisT()->getCastInstrCost(Instruction::BitCast, ValTy, Ty,
2759 TTI::CastContextHint::None, CostKind) +
2760 thisT()->getCmpSelInstrCost(Instruction::ICmp, ValTy,
2761 CmpInst::makeCmpResultType(ValTy),
2762 CmpInst::BAD_ICMP_PREDICATE, CostKind);
2763 }
2764 unsigned NumReduxLevels = Log2_32(NumVecElts);
2765 InstructionCost ArithCost = 0;
2766 InstructionCost ShuffleCost = 0;
2767 std::pair<InstructionCost, MVT> LT = thisT()->getTypeLegalizationCost(Ty);
2768 unsigned LongVectorCount = 0;
2769 unsigned MVTLen =
2770 LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
2771 while (NumVecElts > MVTLen) {
2772 NumVecElts /= 2;
2773 VectorType *SubTy = FixedVectorType::get(ScalarTy, NumVecElts);
2774 ShuffleCost += thisT()->getShuffleCost(TTI::SK_ExtractSubvector, Ty, {},
2775 CostKind, NumVecElts, SubTy);
2776 ArithCost += thisT()->getArithmeticInstrCost(Opcode, SubTy, CostKind);
2777 Ty = SubTy;
2778 ++LongVectorCount;
2779 }
2780
2781 NumReduxLevels -= LongVectorCount;
2782
2783
2784
2785
2786
2787
2788
2789 ShuffleCost +=
2790 NumReduxLevels * thisT()->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
2791 {}, CostKind, 0, Ty);
2792 ArithCost +=
2793 NumReduxLevels * thisT()->getArithmeticInstrCost(Opcode, Ty, CostKind);
2794 return ShuffleCost + ArithCost +
2795 thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty,
2796 CostKind, 0, nullptr, nullptr);
2797 }
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815 InstructionCost getOrderedReductionCost(unsigned Opcode, VectorType *Ty,
2816 TTI::TargetCostKind CostKind) {
2817
2818
2819 if (isa<ScalableVectorType>(Ty))
2820 return InstructionCost::getInvalid();
2821
2822 auto *VTy = cast<FixedVectorType>(Ty);
2823 InstructionCost ExtractCost = getScalarizationOverhead(
2824 VTy, false, true, CostKind);
2825 InstructionCost ArithCost = thisT()->getArithmeticInstrCost(
2826 Opcode, VTy->getElementType(), CostKind);
2827 ArithCost *= VTy->getNumElements();
2828
2829 return ExtractCost + ArithCost;
2830 }
2831
2832 InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
2833 std::optional<FastMathFlags> FMF,
2834 TTI::TargetCostKind CostKind) {
2835 assert(Ty && "Unknown reduction vector type");
2836 if (TTI::requiresOrderedReduction(FMF))
2837 return getOrderedReductionCost(Opcode, Ty, CostKind);
2838 return getTreeReductionCost(Opcode, Ty, CostKind);
2839 }
2840
2841
2842
2843 InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
2844 FastMathFlags FMF,
2845 TTI::TargetCostKind CostKind) {
2846
2847
2848 if (isa<ScalableVectorType>(Ty))
2849 return InstructionCost::getInvalid();
2850
2851 Type *ScalarTy = Ty->getElementType();
2852 unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements();
2853 unsigned NumReduxLevels = Log2_32(NumVecElts);
2854 InstructionCost MinMaxCost = 0;
2855 InstructionCost ShuffleCost = 0;
2856 std::pair<InstructionCost, MVT> LT = thisT()->getTypeLegalizationCost(Ty);
2857 unsigned LongVectorCount = 0;
2858 unsigned MVTLen =
2859 LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
2860 while (NumVecElts > MVTLen) {
2861 NumVecElts /= 2;
2862 auto *SubTy = FixedVectorType::get(ScalarTy, NumVecElts);
2863
2864 ShuffleCost += thisT()->getShuffleCost(TTI::SK_ExtractSubvector, Ty, {},
2865 CostKind, NumVecElts, SubTy);
2866
2867 IntrinsicCostAttributes Attrs(IID, SubTy, {SubTy, SubTy}, FMF);
2868 MinMaxCost += getIntrinsicInstrCost(Attrs, CostKind);
2869 Ty = SubTy;
2870 ++LongVectorCount;
2871 }
2872
2873 NumReduxLevels -= LongVectorCount;
2874
2875
2876
2877
2878
2879 ShuffleCost +=
2880 NumReduxLevels * thisT()->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
2881 {}, CostKind, 0, Ty);
2882 IntrinsicCostAttributes Attrs(IID, Ty, {Ty, Ty}, FMF);
2883 MinMaxCost += NumReduxLevels * getIntrinsicInstrCost(Attrs, CostKind);
2884
2885
2886 return ShuffleCost + MinMaxCost +
2887 thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty,
2888 CostKind, 0, nullptr, nullptr);
2889 }
2890
2891 InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned,
2892 Type *ResTy, VectorType *Ty,
2893 FastMathFlags FMF,
2894 TTI::TargetCostKind CostKind) {
2895 if (auto *FTy = dyn_cast<FixedVectorType>(Ty);
2896 FTy && IsUnsigned && Opcode == Instruction::Add &&
2897 FTy->getElementType() == IntegerType::getInt1Ty(Ty->getContext())) {
2898
2899
2900 auto *IntTy =
2901 IntegerType::get(ResTy->getContext(), FTy->getNumElements());
2902 IntrinsicCostAttributes ICA(Intrinsic::ctpop, IntTy, {IntTy}, FMF);
2903 return thisT()->getCastInstrCost(Instruction::BitCast, IntTy, FTy,
2904 TTI::CastContextHint::None, CostKind) +
2905 thisT()->getIntrinsicInstrCost(ICA, CostKind);
2906 }
2907
2908
2909 VectorType *ExtTy = VectorType::get(ResTy, Ty);
2910 InstructionCost RedCost =
2911 thisT()->getArithmeticReductionCost(Opcode, ExtTy, FMF, CostKind);
2912 InstructionCost ExtCost = thisT()->getCastInstrCost(
2913 IsUnsigned ? Instruction::ZExt : Instruction::SExt, ExtTy, Ty,
2914 TTI::CastContextHint::None, CostKind);
2915
2916 return RedCost + ExtCost;
2917 }
2918
2919 InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy,
2920 VectorType *Ty,
2921 TTI::TargetCostKind CostKind) {
2922
2923
2924
2925 VectorType *ExtTy = VectorType::get(ResTy, Ty);
2926 InstructionCost RedCost = thisT()->getArithmeticReductionCost(
2927 Instruction::Add, ExtTy, std::nullopt, CostKind);
2928 InstructionCost ExtCost = thisT()->getCastInstrCost(
2929 IsUnsigned ? Instruction::ZExt : Instruction::SExt, ExtTy, Ty,
2930 TTI::CastContextHint::None, CostKind);
2931
2932 InstructionCost MulCost =
2933 thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
2934
2935 return RedCost + MulCost + 2 * ExtCost;
2936 }
2937
2938 InstructionCost getVectorSplitCost() { return 1; }
2939
2940
2941 };
2942
2943
2944
2945 class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> {
2946 using BaseT = BasicTTIImplBase<BasicTTIImpl>;
2947
2948 friend class BasicTTIImplBase<BasicTTIImpl>;
2949
2950 const TargetSubtargetInfo *ST;
2951 const TargetLoweringBase *TLI;
2952
2953 const TargetSubtargetInfo *getST() const { return ST; }
2954 const TargetLoweringBase *getTLI() const { return TLI; }
2955
2956 public:
2957 explicit BasicTTIImpl(const TargetMachine *TM, const Function &F);
2958 };
2959
2960 }
2961
2962 #endif