File indexing completed on 2026-05-10 08:43:19
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
0015 #define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
0016
0017 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
0018 #include "llvm/Analysis/TargetTransformInfo.h"
0019 #include "llvm/Analysis/VectorUtils.h"
0020 #include "llvm/IR/DataLayout.h"
0021 #include "llvm/IR/GetElementPtrTypeIterator.h"
0022 #include "llvm/IR/IntrinsicInst.h"
0023 #include "llvm/IR/Operator.h"
0024 #include "llvm/IR/PatternMatch.h"
0025 #include <optional>
0026 #include <utility>
0027
0028 namespace llvm {
0029
0030 class Function;
0031
0032
0033
0034 class TargetTransformInfoImplBase {
0035
0036 protected:
0037 typedef TargetTransformInfo TTI;
0038
0039 const DataLayout &DL;
0040
0041 explicit TargetTransformInfoImplBase(const DataLayout &DL) : DL(DL) {}
0042
0043 public:
0044
0045 TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg) = default;
0046 TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg) : DL(Arg.DL) {}
0047
0048 const DataLayout &getDataLayout() const { return DL; }
0049
0050 InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
0051 ArrayRef<const Value *> Operands, Type *AccessType,
0052 TTI::TargetCostKind CostKind) const {
0053
0054
0055 for (const Value *Operand : Operands)
0056 if (!isa<Constant>(Operand))
0057 return TTI::TCC_Basic;
0058
0059 return TTI::TCC_Free;
0060 }
0061
0062 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
0063 unsigned &JTSize,
0064 ProfileSummaryInfo *PSI,
0065 BlockFrequencyInfo *BFI) const {
0066 (void)PSI;
0067 (void)BFI;
0068 JTSize = 0;
0069 return SI.getNumCases();
0070 }
0071
0072 unsigned getInliningThresholdMultiplier() const { return 1; }
0073 unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const { return 8; }
0074 unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const {
0075 return 8;
0076 }
0077 int getInliningLastCallToStaticBonus() const {
0078
0079
0080 return 15000;
0081 }
0082 unsigned adjustInliningThreshold(const CallBase *CB) const { return 0; }
0083 unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const {
0084 return 0;
0085 };
0086
0087 int getInlinerVectorBonusPercent() const { return 150; }
0088
0089 InstructionCost getMemcpyCost(const Instruction *I) const {
0090 return TTI::TCC_Expensive;
0091 }
0092
0093 uint64_t getMaxMemIntrinsicInlineSizeThreshold() const {
0094 return 64;
0095 }
0096
0097
0098
0099
0100
0101
0102
0103 BranchProbability getPredictableBranchThreshold() const {
0104 return BranchProbability(99, 100);
0105 }
0106
0107 InstructionCost getBranchMispredictPenalty() const { return 0; }
0108
0109 bool hasBranchDivergence(const Function *F = nullptr) const { return false; }
0110
0111 bool isSourceOfDivergence(const Value *V) const { return false; }
0112
0113 bool isAlwaysUniform(const Value *V) const { return false; }
0114
0115 bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
0116 return false;
0117 }
0118
0119 bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const {
0120 return true;
0121 }
0122
0123 unsigned getFlatAddressSpace() const { return -1; }
0124
0125 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
0126 Intrinsic::ID IID) const {
0127 return false;
0128 }
0129
0130 bool isNoopAddrSpaceCast(unsigned, unsigned) const { return false; }
0131 bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const {
0132 return AS == 0;
0133 };
0134
0135 unsigned getAssumedAddrSpace(const Value *V) const { return -1; }
0136
0137 bool isSingleThreaded() const { return false; }
0138
0139 std::pair<const Value *, unsigned>
0140 getPredicatedAddrSpace(const Value *V) const {
0141 return std::make_pair(nullptr, -1);
0142 }
0143
0144 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
0145 Value *NewV) const {
0146 return nullptr;
0147 }
0148
0149 bool isLoweredToCall(const Function *F) const {
0150 assert(F && "A concrete function must be provided to this routine.");
0151
0152
0153
0154
0155
0156
0157 if (F->isIntrinsic())
0158 return false;
0159
0160 if (F->hasLocalLinkage() || !F->hasName())
0161 return true;
0162
0163 StringRef Name = F->getName();
0164
0165
0166
0167 if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
0168 Name == "fabs" || Name == "fabsf" || Name == "fabsl" ||
0169 Name == "fmin" || Name == "fminf" || Name == "fminl" ||
0170 Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" ||
0171 Name == "sin" || Name == "sinf" || Name == "sinl" ||
0172 Name == "cos" || Name == "cosf" || Name == "cosl" ||
0173 Name == "tan" || Name == "tanf" || Name == "tanl" ||
0174 Name == "asin" || Name == "asinf" || Name == "asinl" ||
0175 Name == "acos" || Name == "acosf" || Name == "acosl" ||
0176 Name == "atan" || Name == "atanf" || Name == "atanl" ||
0177 Name == "atan2" || Name == "atan2f" || Name == "atan2l"||
0178 Name == "sinh" || Name == "sinhf" || Name == "sinhl" ||
0179 Name == "cosh" || Name == "coshf" || Name == "coshl" ||
0180 Name == "tanh" || Name == "tanhf" || Name == "tanhl" ||
0181 Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" ||
0182 Name == "exp10" || Name == "exp10l" || Name == "exp10f")
0183 return false;
0184
0185
0186 if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" ||
0187 Name == "exp2l" || Name == "exp2f" || Name == "floor" ||
0188 Name == "floorf" || Name == "ceil" || Name == "round" ||
0189 Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" ||
0190 Name == "llabs")
0191 return false;
0192
0193 return true;
0194 }
0195
0196 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
0197 AssumptionCache &AC, TargetLibraryInfo *LibInfo,
0198 HardwareLoopInfo &HWLoopInfo) const {
0199 return false;
0200 }
0201
0202 unsigned getEpilogueVectorizationMinVF() const { return 16; }
0203
0204 bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const { return false; }
0205
0206 TailFoldingStyle
0207 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const {
0208 return TailFoldingStyle::DataWithoutLaneMask;
0209 }
0210
0211 std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
0212 IntrinsicInst &II) const {
0213 return std::nullopt;
0214 }
0215
0216 std::optional<Value *>
0217 simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
0218 APInt DemandedMask, KnownBits &Known,
0219 bool &KnownBitsComputed) const {
0220 return std::nullopt;
0221 }
0222
0223 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
0224 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
0225 APInt &UndefElts2, APInt &UndefElts3,
0226 std::function<void(Instruction *, unsigned, APInt, APInt &)>
0227 SimplifyAndSetOp) const {
0228 return std::nullopt;
0229 }
0230
0231 void getUnrollingPreferences(Loop *, ScalarEvolution &,
0232 TTI::UnrollingPreferences &,
0233 OptimizationRemarkEmitter *) const {}
0234
0235 void getPeelingPreferences(Loop *, ScalarEvolution &,
0236 TTI::PeelingPreferences &) const {}
0237
0238 bool isLegalAddImmediate(int64_t Imm) const { return false; }
0239
0240 bool isLegalAddScalableImmediate(int64_t Imm) const { return false; }
0241
0242 bool isLegalICmpImmediate(int64_t Imm) const { return false; }
0243
0244 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
0245 bool HasBaseReg, int64_t Scale, unsigned AddrSpace,
0246 Instruction *I = nullptr,
0247 int64_t ScalableOffset = 0) const {
0248
0249
0250 return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
0251 }
0252
0253 bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const {
0254 return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds,
0255 C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
0256 std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds,
0257 C2.ScaleCost, C2.ImmCost, C2.SetupCost);
0258 }
0259
0260 bool isNumRegsMajorCostOfLSR() const { return true; }
0261
0262 bool shouldDropLSRSolutionIfLessProfitable() const { return false; }
0263
0264 bool isProfitableLSRChainElement(Instruction *I) const { return false; }
0265
0266 bool canMacroFuseCmp() const { return false; }
0267
0268 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
0269 DominatorTree *DT, AssumptionCache *AC,
0270 TargetLibraryInfo *LibInfo) const {
0271 return false;
0272 }
0273
0274 TTI::AddressingModeKind
0275 getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const {
0276 return TTI::AMK_None;
0277 }
0278
0279 bool isLegalMaskedStore(Type *DataType, Align Alignment) const {
0280 return false;
0281 }
0282
0283 bool isLegalMaskedLoad(Type *DataType, Align Alignment) const {
0284 return false;
0285 }
0286
0287 bool isLegalNTStore(Type *DataType, Align Alignment) const {
0288
0289
0290 unsigned DataSize = DL.getTypeStoreSize(DataType);
0291 return Alignment >= DataSize && isPowerOf2_32(DataSize);
0292 }
0293
0294 bool isLegalNTLoad(Type *DataType, Align Alignment) const {
0295
0296
0297 unsigned DataSize = DL.getTypeStoreSize(DataType);
0298 return Alignment >= DataSize && isPowerOf2_32(DataSize);
0299 }
0300
0301 bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const {
0302 return false;
0303 }
0304
0305 bool isLegalMaskedScatter(Type *DataType, Align Alignment) const {
0306 return false;
0307 }
0308
0309 bool isLegalMaskedGather(Type *DataType, Align Alignment) const {
0310 return false;
0311 }
0312
0313 bool forceScalarizeMaskedGather(VectorType *DataType, Align Alignment) const {
0314 return false;
0315 }
0316
0317 bool forceScalarizeMaskedScatter(VectorType *DataType,
0318 Align Alignment) const {
0319 return false;
0320 }
0321
0322 bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) const {
0323 return false;
0324 }
0325
0326 bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
0327 const SmallBitVector &OpcodeMask) const {
0328 return false;
0329 }
0330
0331 bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const {
0332 return false;
0333 }
0334
0335 bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const {
0336 return false;
0337 }
0338
0339 bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor,
0340 Align Alignment, unsigned AddrSpace) {
0341 return false;
0342 }
0343
0344 bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType) const {
0345 return false;
0346 }
0347
0348 bool enableOrderedReductions() const { return false; }
0349
0350 bool hasDivRemOp(Type *DataType, bool IsSigned) const { return false; }
0351
0352 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const {
0353 return false;
0354 }
0355
0356 bool prefersVectorizedAddressing() const { return true; }
0357
0358 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
0359 StackOffset BaseOffset, bool HasBaseReg,
0360 int64_t Scale,
0361 unsigned AddrSpace) const {
0362
0363 if (isLegalAddressingMode(Ty, BaseGV, BaseOffset.getFixed(), HasBaseReg,
0364 Scale, AddrSpace, nullptr,
0365 BaseOffset.getScalable()))
0366 return 0;
0367 return -1;
0368 }
0369
0370 bool LSRWithInstrQueries() const { return false; }
0371
0372 bool isTruncateFree(Type *Ty1, Type *Ty2) const { return false; }
0373
0374 bool isProfitableToHoist(Instruction *I) const { return true; }
0375
0376 bool useAA() const { return false; }
0377
0378 bool isTypeLegal(Type *Ty) const { return false; }
0379
0380 unsigned getRegUsageForType(Type *Ty) const { return 1; }
0381
0382 bool shouldBuildLookupTables() const { return true; }
0383
0384 bool shouldBuildLookupTablesForConstant(Constant *C) const { return true; }
0385
0386 bool shouldBuildRelLookupTables() const { return false; }
0387
0388 bool useColdCCForColdCall(Function &F) const { return false; }
0389
0390 bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID) const {
0391 return false;
0392 }
0393
0394 bool isTargetIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,
0395 unsigned ScalarOpdIdx) const {
0396 return false;
0397 }
0398
0399 bool isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID,
0400 int OpdIdx) const {
0401 return OpdIdx == -1;
0402 }
0403
0404 bool isTargetIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID,
0405 int RetIdx) const {
0406 return RetIdx == 0;
0407 }
0408
0409 InstructionCost getScalarizationOverhead(VectorType *Ty,
0410 const APInt &DemandedElts,
0411 bool Insert, bool Extract,
0412 TTI::TargetCostKind CostKind,
0413 ArrayRef<Value *> VL = {}) const {
0414 return 0;
0415 }
0416
0417 InstructionCost
0418 getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
0419 ArrayRef<Type *> Tys,
0420 TTI::TargetCostKind CostKind) const {
0421 return 0;
0422 }
0423
0424 bool supportsEfficientVectorElementLoadStore() const { return false; }
0425
0426 bool supportsTailCalls() const { return true; }
0427
0428 bool enableAggressiveInterleaving(bool LoopHasReductions) const {
0429 return false;
0430 }
0431
0432 TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
0433 bool IsZeroCmp) const {
0434 return {};
0435 }
0436
0437 bool enableSelectOptimize() const { return true; }
0438
0439 bool shouldTreatInstructionLikeSelect(const Instruction *I) {
0440
0441
0442 using namespace llvm::PatternMatch;
0443 if (match(I, m_Select(m_Value(), m_Constant(), m_Constant())))
0444 return false;
0445
0446
0447 return isa<SelectInst>(I) &&
0448 !match(I, m_CombineOr(m_LogicalAnd(m_Value(), m_Value()),
0449 m_LogicalOr(m_Value(), m_Value())));
0450 }
0451
0452 bool enableInterleavedAccessVectorization() const { return false; }
0453
0454 bool enableMaskedInterleavedAccessVectorization() const { return false; }
0455
0456 bool isFPVectorizationPotentiallyUnsafe() const { return false; }
0457
0458 bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
0459 unsigned AddressSpace, Align Alignment,
0460 unsigned *Fast) const {
0461 return false;
0462 }
0463
0464 TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const {
0465 return TTI::PSK_Software;
0466 }
0467
0468 bool haveFastSqrt(Type *Ty) const { return false; }
0469
0470 bool isExpensiveToSpeculativelyExecute(const Instruction *I) { return true; }
0471
0472 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const { return true; }
0473
0474 InstructionCost getFPOpCost(Type *Ty) const {
0475 return TargetTransformInfo::TCC_Basic;
0476 }
0477
0478 InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
0479 const APInt &Imm, Type *Ty) const {
0480 return 0;
0481 }
0482
0483 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
0484 TTI::TargetCostKind CostKind) const {
0485 return TTI::TCC_Basic;
0486 }
0487
0488 InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
0489 const APInt &Imm, Type *Ty,
0490 TTI::TargetCostKind CostKind,
0491 Instruction *Inst = nullptr) const {
0492 return TTI::TCC_Free;
0493 }
0494
0495 InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
0496 const APInt &Imm, Type *Ty,
0497 TTI::TargetCostKind CostKind) const {
0498 return TTI::TCC_Free;
0499 }
0500
0501 bool preferToKeepConstantsAttached(const Instruction &Inst,
0502 const Function &Fn) const {
0503 return false;
0504 }
0505
0506 unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; }
0507 bool hasConditionalLoadStoreForType(Type *Ty) const { return false; }
0508
0509 unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const {
0510 return Vector ? 1 : 0;
0511 };
0512
0513 const char *getRegisterClassName(unsigned ClassID) const {
0514 switch (ClassID) {
0515 default:
0516 return "Generic::Unknown Register Class";
0517 case 0:
0518 return "Generic::ScalarRC";
0519 case 1:
0520 return "Generic::VectorRC";
0521 }
0522 }
0523
0524 TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
0525 return TypeSize::getFixed(32);
0526 }
0527
0528 unsigned getMinVectorRegisterBitWidth() const { return 128; }
0529
0530 std::optional<unsigned> getMaxVScale() const { return std::nullopt; }
0531 std::optional<unsigned> getVScaleForTuning() const { return std::nullopt; }
0532 bool isVScaleKnownToBeAPowerOfTwo() const { return false; }
0533
0534 bool
0535 shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const {
0536 return false;
0537 }
0538
0539 ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const {
0540 return ElementCount::get(0, IsScalable);
0541 }
0542
0543 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const { return 0; }
0544 unsigned getStoreMinimumVF(unsigned VF, Type *, Type *) const { return VF; }
0545
0546 bool shouldConsiderAddressTypePromotion(
0547 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
0548 AllowPromotionWithoutCommonHeader = false;
0549 return false;
0550 }
0551
0552 unsigned getCacheLineSize() const { return 0; }
0553 std::optional<unsigned>
0554 getCacheSize(TargetTransformInfo::CacheLevel Level) const {
0555 switch (Level) {
0556 case TargetTransformInfo::CacheLevel::L1D:
0557 [[fallthrough]];
0558 case TargetTransformInfo::CacheLevel::L2D:
0559 return std::nullopt;
0560 }
0561 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
0562 }
0563
0564 std::optional<unsigned>
0565 getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const {
0566 switch (Level) {
0567 case TargetTransformInfo::CacheLevel::L1D:
0568 [[fallthrough]];
0569 case TargetTransformInfo::CacheLevel::L2D:
0570 return std::nullopt;
0571 }
0572
0573 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
0574 }
0575
0576 std::optional<unsigned> getMinPageSize() const { return {}; }
0577
0578 unsigned getPrefetchDistance() const { return 0; }
0579 unsigned getMinPrefetchStride(unsigned NumMemAccesses,
0580 unsigned NumStridedMemAccesses,
0581 unsigned NumPrefetches, bool HasCall) const {
0582 return 1;
0583 }
0584 unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; }
0585 bool enableWritePrefetching() const { return false; }
0586 bool shouldPrefetchAddressSpace(unsigned AS) const { return !AS; }
0587
0588 InstructionCost
0589 getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB,
0590 Type *AccumType, ElementCount VF,
0591 TTI::PartialReductionExtendKind OpAExtend,
0592 TTI::PartialReductionExtendKind OpBExtend,
0593 std::optional<unsigned> BinOp = std::nullopt) const {
0594 return InstructionCost::getInvalid();
0595 }
0596
0597 unsigned getMaxInterleaveFactor(ElementCount VF) const { return 1; }
0598
0599 InstructionCost getArithmeticInstrCost(
0600 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
0601 TTI::OperandValueInfo Opd1Info, TTI::OperandValueInfo Opd2Info,
0602 ArrayRef<const Value *> Args,
0603 const Instruction *CxtI = nullptr) const {
0604
0605
0606 auto IsWidenableCondition = [](const Value *V) {
0607 if (auto *II = dyn_cast<IntrinsicInst>(V))
0608 if (II->getIntrinsicID() == Intrinsic::experimental_widenable_condition)
0609 return true;
0610 return false;
0611 };
0612
0613
0614 switch (Opcode) {
0615 default:
0616 break;
0617 case Instruction::FDiv:
0618 case Instruction::FRem:
0619 case Instruction::SDiv:
0620 case Instruction::SRem:
0621 case Instruction::UDiv:
0622 case Instruction::URem:
0623
0624 return TTI::TCC_Expensive;
0625 case Instruction::And:
0626 case Instruction::Or:
0627 if (any_of(Args, IsWidenableCondition))
0628 return TTI::TCC_Free;
0629 break;
0630 }
0631
0632
0633 if (CostKind == TTI::TCK_Latency)
0634 if (Ty->getScalarType()->isFloatingPointTy())
0635 return 3;
0636
0637 return 1;
0638 }
0639
0640 InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0,
0641 unsigned Opcode1,
0642 const SmallBitVector &OpcodeMask,
0643 TTI::TargetCostKind CostKind) const {
0644 return InstructionCost::getInvalid();
0645 }
0646
0647 InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty,
0648 ArrayRef<int> Mask,
0649 TTI::TargetCostKind CostKind, int Index,
0650 VectorType *SubTp,
0651 ArrayRef<const Value *> Args = {},
0652 const Instruction *CxtI = nullptr) const {
0653 return 1;
0654 }
0655
0656 InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
0657 TTI::CastContextHint CCH,
0658 TTI::TargetCostKind CostKind,
0659 const Instruction *I) const {
0660 switch (Opcode) {
0661 default:
0662 break;
0663 case Instruction::IntToPtr: {
0664 unsigned SrcSize = Src->getScalarSizeInBits();
0665 if (DL.isLegalInteger(SrcSize) &&
0666 SrcSize <= DL.getPointerTypeSizeInBits(Dst))
0667 return 0;
0668 break;
0669 }
0670 case Instruction::PtrToInt: {
0671 unsigned DstSize = Dst->getScalarSizeInBits();
0672 if (DL.isLegalInteger(DstSize) &&
0673 DstSize >= DL.getPointerTypeSizeInBits(Src))
0674 return 0;
0675 break;
0676 }
0677 case Instruction::BitCast:
0678 if (Dst == Src || (Dst->isPointerTy() && Src->isPointerTy()))
0679
0680 return 0;
0681 break;
0682 case Instruction::Trunc: {
0683
0684
0685 TypeSize DstSize = DL.getTypeSizeInBits(Dst);
0686 if (!DstSize.isScalable() && DL.isLegalInteger(DstSize.getFixedValue()))
0687 return 0;
0688 break;
0689 }
0690 }
0691 return 1;
0692 }
0693
0694 InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
0695 VectorType *VecTy,
0696 unsigned Index) const {
0697 return 1;
0698 }
0699
0700 InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
0701 const Instruction *I = nullptr) const {
0702
0703
0704 if (Opcode == Instruction::PHI && CostKind != TTI::TCK_RecipThroughput)
0705 return 0;
0706 return 1;
0707 }
0708
0709 InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
0710 CmpInst::Predicate VecPred,
0711 TTI::TargetCostKind CostKind,
0712 TTI::OperandValueInfo Op1Info,
0713 TTI::OperandValueInfo Op2Info,
0714 const Instruction *I) const {
0715 return 1;
0716 }
0717
0718 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
0719 TTI::TargetCostKind CostKind,
0720 unsigned Index, Value *Op0,
0721 Value *Op1) const {
0722 return 1;
0723 }
0724
0725
0726
0727
0728
0729 InstructionCost getVectorInstrCost(
0730 unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
0731 Value *Scalar,
0732 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) const {
0733 return 1;
0734 }
0735
0736 InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
0737 TTI::TargetCostKind CostKind,
0738 unsigned Index) const {
0739 return 1;
0740 }
0741
0742 unsigned getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
0743 const APInt &DemandedDstElts,
0744 TTI::TargetCostKind CostKind) {
0745 return 1;
0746 }
0747
0748 InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
0749 unsigned AddressSpace,
0750 TTI::TargetCostKind CostKind,
0751 TTI::OperandValueInfo OpInfo,
0752 const Instruction *I) const {
0753 return 1;
0754 }
0755
0756 InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
0757 unsigned AddressSpace,
0758 TTI::TargetCostKind CostKind,
0759 const Instruction *I) const {
0760 return 1;
0761 }
0762
0763 InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
0764 Align Alignment, unsigned AddressSpace,
0765 TTI::TargetCostKind CostKind) const {
0766 return 1;
0767 }
0768
0769 InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
0770 const Value *Ptr, bool VariableMask,
0771 Align Alignment,
0772 TTI::TargetCostKind CostKind,
0773 const Instruction *I = nullptr) const {
0774 return 1;
0775 }
0776
0777 InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy,
0778 const Value *Ptr, bool VariableMask,
0779 Align Alignment,
0780 TTI::TargetCostKind CostKind,
0781 const Instruction *I = nullptr) const {
0782 return InstructionCost::getInvalid();
0783 }
0784
0785 unsigned getInterleavedMemoryOpCost(
0786 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
0787 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
0788 bool UseMaskForCond, bool UseMaskForGaps) const {
0789 return 1;
0790 }
0791
0792 InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
0793 TTI::TargetCostKind CostKind) const {
0794 switch (ICA.getID()) {
0795 default:
0796 break;
0797 case Intrinsic::experimental_vector_histogram_add:
0798
0799 return InstructionCost::getInvalid();
0800 case Intrinsic::allow_runtime_check:
0801 case Intrinsic::allow_ubsan_check:
0802 case Intrinsic::annotation:
0803 case Intrinsic::assume:
0804 case Intrinsic::sideeffect:
0805 case Intrinsic::pseudoprobe:
0806 case Intrinsic::arithmetic_fence:
0807 case Intrinsic::dbg_assign:
0808 case Intrinsic::dbg_declare:
0809 case Intrinsic::dbg_value:
0810 case Intrinsic::dbg_label:
0811 case Intrinsic::invariant_start:
0812 case Intrinsic::invariant_end:
0813 case Intrinsic::launder_invariant_group:
0814 case Intrinsic::strip_invariant_group:
0815 case Intrinsic::is_constant:
0816 case Intrinsic::lifetime_start:
0817 case Intrinsic::lifetime_end:
0818 case Intrinsic::experimental_noalias_scope_decl:
0819 case Intrinsic::objectsize:
0820 case Intrinsic::ptr_annotation:
0821 case Intrinsic::var_annotation:
0822 case Intrinsic::experimental_gc_result:
0823 case Intrinsic::experimental_gc_relocate:
0824 case Intrinsic::coro_alloc:
0825 case Intrinsic::coro_begin:
0826 case Intrinsic::coro_begin_custom_abi:
0827 case Intrinsic::coro_free:
0828 case Intrinsic::coro_end:
0829 case Intrinsic::coro_frame:
0830 case Intrinsic::coro_size:
0831 case Intrinsic::coro_align:
0832 case Intrinsic::coro_suspend:
0833 case Intrinsic::coro_subfn_addr:
0834 case Intrinsic::threadlocal_address:
0835 case Intrinsic::experimental_widenable_condition:
0836 case Intrinsic::ssa_copy:
0837
0838 return 0;
0839 }
0840 return 1;
0841 }
0842
0843 InstructionCost getCallInstrCost(Function *F, Type *RetTy,
0844 ArrayRef<Type *> Tys,
0845 TTI::TargetCostKind CostKind) const {
0846 return 1;
0847 }
0848
0849
0850 unsigned getNumberOfParts(Type *Tp) const { return 1; }
0851
0852 InstructionCost getAddressComputationCost(Type *Tp, ScalarEvolution *,
0853 const SCEV *) const {
0854 return 0;
0855 }
0856
0857 InstructionCost getArithmeticReductionCost(unsigned, VectorType *,
0858 std::optional<FastMathFlags> FMF,
0859 TTI::TargetCostKind) const {
0860 return 1;
0861 }
0862
0863 InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *,
0864 FastMathFlags,
0865 TTI::TargetCostKind) const {
0866 return 1;
0867 }
0868
0869 InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned,
0870 Type *ResTy, VectorType *Ty,
0871 FastMathFlags FMF,
0872 TTI::TargetCostKind CostKind) const {
0873 return 1;
0874 }
0875
0876 InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy,
0877 VectorType *Ty,
0878 TTI::TargetCostKind CostKind) const {
0879 return 1;
0880 }
0881
0882 InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const {
0883 return 0;
0884 }
0885
0886 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const {
0887 return false;
0888 }
0889
0890 unsigned getAtomicMemIntrinsicMaxElementSize() const {
0891
0892
0893
0894
0895
0896 return 0;
0897 }
0898
0899 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
0900 Type *ExpectedType) const {
0901 return nullptr;
0902 }
0903
0904 Type *
0905 getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
0906 unsigned SrcAddrSpace, unsigned DestAddrSpace,
0907 Align SrcAlign, Align DestAlign,
0908 std::optional<uint32_t> AtomicElementSize) const {
0909 return AtomicElementSize ? Type::getIntNTy(Context, *AtomicElementSize * 8)
0910 : Type::getInt8Ty(Context);
0911 }
0912
0913 void getMemcpyLoopResidualLoweringType(
0914 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
0915 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
0916 Align SrcAlign, Align DestAlign,
0917 std::optional<uint32_t> AtomicCpySize) const {
0918 unsigned OpSizeInBytes = AtomicCpySize.value_or(1);
0919 Type *OpType = Type::getIntNTy(Context, OpSizeInBytes * 8);
0920 for (unsigned i = 0; i != RemainingBytes; i += OpSizeInBytes)
0921 OpsOut.push_back(OpType);
0922 }
0923
0924 bool areInlineCompatible(const Function *Caller,
0925 const Function *Callee) const {
0926 return (Caller->getFnAttribute("target-cpu") ==
0927 Callee->getFnAttribute("target-cpu")) &&
0928 (Caller->getFnAttribute("target-features") ==
0929 Callee->getFnAttribute("target-features"));
0930 }
0931
0932 unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
0933 unsigned DefaultCallPenalty) const {
0934 return DefaultCallPenalty;
0935 }
0936
0937 bool areTypesABICompatible(const Function *Caller, const Function *Callee,
0938 const ArrayRef<Type *> &Types) const {
0939 return (Caller->getFnAttribute("target-cpu") ==
0940 Callee->getFnAttribute("target-cpu")) &&
0941 (Caller->getFnAttribute("target-features") ==
0942 Callee->getFnAttribute("target-features"));
0943 }
0944
0945 bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty,
0946 const DataLayout &DL) const {
0947 return false;
0948 }
0949
0950 bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty,
0951 const DataLayout &DL) const {
0952 return false;
0953 }
0954
0955 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; }
0956
0957 bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; }
0958
0959 bool isLegalToVectorizeStore(StoreInst *SI) const { return true; }
0960
0961 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
0962 unsigned AddrSpace) const {
0963 return true;
0964 }
0965
0966 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
0967 unsigned AddrSpace) const {
0968 return true;
0969 }
0970
0971 bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
0972 ElementCount VF) const {
0973 return true;
0974 }
0975
0976 bool isElementTypeLegalForScalableVector(Type *Ty) const { return true; }
0977
0978 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
0979 unsigned ChainSizeInBytes,
0980 VectorType *VecTy) const {
0981 return VF;
0982 }
0983
0984 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
0985 unsigned ChainSizeInBytes,
0986 VectorType *VecTy) const {
0987 return VF;
0988 }
0989
0990 bool preferFixedOverScalableIfEqualCost() const { return false; }
0991
0992 bool preferInLoopReduction(unsigned Opcode, Type *Ty,
0993 TTI::ReductionFlags Flags) const {
0994 return false;
0995 }
0996
0997 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
0998 TTI::ReductionFlags Flags) const {
0999 return false;
1000 }
1001
1002 bool preferEpilogueVectorization() const {
1003 return true;
1004 }
1005
1006 bool shouldExpandReduction(const IntrinsicInst *II) const { return true; }
1007
1008 TTI::ReductionShuffle
1009 getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const {
1010 return TTI::ReductionShuffle::SplitHalf;
1011 }
1012
1013 unsigned getGISelRematGlobalCost() const { return 1; }
1014
1015 unsigned getMinTripCountTailFoldingThreshold() const { return 0; }
1016
1017 bool supportsScalableVectors() const { return false; }
1018
1019 bool enableScalableVectorization() const { return false; }
1020
1021 bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
1022 Align Alignment) const {
1023 return false;
1024 }
1025
1026 bool isProfitableToSinkOperands(Instruction *I,
1027 SmallVectorImpl<Use *> &Ops) const {
1028 return false;
1029 }
1030
1031 bool isVectorShiftByScalarCheap(Type *Ty) const { return false; }
1032
1033 TargetTransformInfo::VPLegalization
1034 getVPLegalizationStrategy(const VPIntrinsic &PI) const {
1035 return TargetTransformInfo::VPLegalization(
1036 TargetTransformInfo::VPLegalization::Discard,
1037 TargetTransformInfo::VPLegalization::Convert);
1038 }
1039
1040 bool hasArmWideBranch(bool) const { return false; }
1041
1042 uint64_t getFeatureMask(const Function &F) const { return 0; }
1043
1044 bool isMultiversionedFunction(const Function &F) const { return false; }
1045
1046 unsigned getMaxNumArgs() const { return UINT_MAX; }
1047
1048 unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const {
1049 return 0;
1050 }
1051
1052 protected:
1053
1054
1055 unsigned minRequiredElementSize(const Value *Val, bool &isSigned) const {
1056 if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) {
1057 const auto *VectorValue = cast<Constant>(Val);
1058
1059
1060
1061 auto *VT = cast<FixedVectorType>(Val->getType());
1062
1063
1064 isSigned = false;
1065
1066
1067 unsigned MaxRequiredSize =
1068 VT->getElementType()->getPrimitiveSizeInBits().getFixedValue();
1069
1070 unsigned MinRequiredSize = 0;
1071 for (unsigned i = 0, e = VT->getNumElements(); i < e; ++i) {
1072 if (auto *IntElement =
1073 dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) {
1074 bool signedElement = IntElement->getValue().isNegative();
1075
1076 unsigned ElementMinRequiredSize =
1077 IntElement->getValue().getSignificantBits() - 1;
1078
1079 isSigned |= signedElement;
1080
1081 MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize);
1082 } else {
1083
1084 return MaxRequiredSize;
1085 }
1086 }
1087 return MinRequiredSize;
1088 }
1089
1090 if (const auto *CI = dyn_cast<ConstantInt>(Val)) {
1091 isSigned = CI->getValue().isNegative();
1092 return CI->getValue().getSignificantBits() - 1;
1093 }
1094
1095 if (const auto *Cast = dyn_cast<SExtInst>(Val)) {
1096 isSigned = true;
1097 return Cast->getSrcTy()->getScalarSizeInBits() - 1;
1098 }
1099
1100 if (const auto *Cast = dyn_cast<ZExtInst>(Val)) {
1101 isSigned = false;
1102 return Cast->getSrcTy()->getScalarSizeInBits();
1103 }
1104
1105 isSigned = false;
1106 return Val->getType()->getScalarSizeInBits();
1107 }
1108
1109 bool isStridedAccess(const SCEV *Ptr) const {
1110 return Ptr && isa<SCEVAddRecExpr>(Ptr);
1111 }
1112
1113 const SCEVConstant *getConstantStrideStep(ScalarEvolution *SE,
1114 const SCEV *Ptr) const {
1115 if (!isStridedAccess(Ptr))
1116 return nullptr;
1117 const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr);
1118 return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE));
1119 }
1120
1121 bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr,
1122 int64_t MergeDistance) const {
1123 const SCEVConstant *Step = getConstantStrideStep(SE, Ptr);
1124 if (!Step)
1125 return false;
1126 APInt StrideVal = Step->getAPInt();
1127 if (StrideVal.getBitWidth() > 64)
1128 return false;
1129
1130 return StrideVal.getSExtValue() < MergeDistance;
1131 }
1132 };
1133
1134
1135
1136 template <typename T>
1137 class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
1138 private:
1139 typedef TargetTransformInfoImplBase BaseT;
1140
1141 protected:
1142 explicit TargetTransformInfoImplCRTPBase(const DataLayout &DL) : BaseT(DL) {}
1143
1144 public:
1145 using BaseT::getGEPCost;
1146
1147 InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
1148 ArrayRef<const Value *> Operands, Type *AccessType,
1149 TTI::TargetCostKind CostKind) {
1150 assert(PointeeType && Ptr && "can't get GEPCost of nullptr");
1151 auto *BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts());
1152 bool HasBaseReg = (BaseGV == nullptr);
1153
1154 auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType());
1155 APInt BaseOffset(PtrSizeBits, 0);
1156 int64_t Scale = 0;
1157
1158 auto GTI = gep_type_begin(PointeeType, Operands);
1159 Type *TargetType = nullptr;
1160
1161
1162
1163 if (Operands.empty())
1164 return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic;
1165
1166 for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) {
1167 TargetType = GTI.getIndexedType();
1168
1169
1170 const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I);
1171 if (!ConstIdx)
1172 if (auto Splat = getSplatValue(*I))
1173 ConstIdx = dyn_cast<ConstantInt>(Splat);
1174 if (StructType *STy = GTI.getStructTypeOrNull()) {
1175
1176 assert(ConstIdx && "Unexpected GEP index");
1177 uint64_t Field = ConstIdx->getZExtValue();
1178 BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field);
1179 } else {
1180
1181
1182 if (TargetType->isScalableTy())
1183 return TTI::TCC_Basic;
1184 int64_t ElementSize =
1185 GTI.getSequentialElementStride(DL).getFixedValue();
1186 if (ConstIdx) {
1187 BaseOffset +=
1188 ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize;
1189 } else {
1190
1191 if (Scale != 0)
1192
1193 return TTI::TCC_Basic;
1194 Scale = ElementSize;
1195 }
1196 }
1197 }
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208 if (!AccessType)
1209 AccessType = TargetType;
1210
1211
1212
1213 if (static_cast<T *>(this)->isLegalAddressingMode(
1214 AccessType, const_cast<GlobalValue *>(BaseGV),
1215 BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale,
1216 Ptr->getType()->getPointerAddressSpace()))
1217 return TTI::TCC_Free;
1218
1219
1220
1221
1222 return TTI::TCC_Basic;
1223 }
1224
1225 InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
1226 const Value *Base,
1227 const TTI::PointersChainInfo &Info,
1228 Type *AccessTy,
1229 TTI::TargetCostKind CostKind) {
1230 InstructionCost Cost = TTI::TCC_Free;
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241 for (const Value *V : Ptrs) {
1242 const auto *GEP = dyn_cast<GetElementPtrInst>(V);
1243 if (!GEP)
1244 continue;
1245 if (Info.isSameBase() && V != Base) {
1246 if (GEP->hasAllConstantIndices())
1247 continue;
1248 Cost += static_cast<T *>(this)->getArithmeticInstrCost(
1249 Instruction::Add, GEP->getType(), CostKind,
1250 {TTI::OK_AnyValue, TTI::OP_None}, {TTI::OK_AnyValue, TTI::OP_None},
1251 {});
1252 } else {
1253 SmallVector<const Value *> Indices(GEP->indices());
1254 Cost += static_cast<T *>(this)->getGEPCost(GEP->getSourceElementType(),
1255 GEP->getPointerOperand(),
1256 Indices, AccessTy, CostKind);
1257 }
1258 }
1259 return Cost;
1260 }
1261
1262 InstructionCost getInstructionCost(const User *U,
1263 ArrayRef<const Value *> Operands,
1264 TTI::TargetCostKind CostKind) {
1265 using namespace llvm::PatternMatch;
1266
1267 auto *TargetTTI = static_cast<T *>(this);
1268
1269
1270 auto *CB = dyn_cast<CallBase>(U);
1271 if (CB && !isa<IntrinsicInst>(U)) {
1272 if (const Function *F = CB->getCalledFunction()) {
1273 if (!TargetTTI->isLoweredToCall(F))
1274 return TTI::TCC_Basic;
1275
1276 return TTI::TCC_Basic * (F->getFunctionType()->getNumParams() + 1);
1277 }
1278
1279 return TTI::TCC_Basic * (CB->arg_size() + 1);
1280 }
1281
1282 Type *Ty = U->getType();
1283 unsigned Opcode = Operator::getOpcode(U);
1284 auto *I = dyn_cast<Instruction>(U);
1285 switch (Opcode) {
1286 default:
1287 break;
1288 case Instruction::Call: {
1289 assert(isa<IntrinsicInst>(U) && "Unexpected non-intrinsic call");
1290 auto *Intrinsic = cast<IntrinsicInst>(U);
1291 IntrinsicCostAttributes CostAttrs(Intrinsic->getIntrinsicID(), *CB);
1292 return TargetTTI->getIntrinsicInstrCost(CostAttrs, CostKind);
1293 }
1294 case Instruction::Br:
1295 case Instruction::Ret:
1296 case Instruction::PHI:
1297 case Instruction::Switch:
1298 return TargetTTI->getCFInstrCost(Opcode, CostKind, I);
1299 case Instruction::ExtractValue:
1300 case Instruction::Freeze:
1301 return TTI::TCC_Free;
1302 case Instruction::Alloca:
1303 if (cast<AllocaInst>(U)->isStaticAlloca())
1304 return TTI::TCC_Free;
1305 break;
1306 case Instruction::GetElementPtr: {
1307 const auto *GEP = cast<GEPOperator>(U);
1308 Type *AccessType = nullptr;
1309
1310
1311 if (GEP->hasOneUser() && I)
1312 AccessType = I->user_back()->getAccessType();
1313
1314 return TargetTTI->getGEPCost(GEP->getSourceElementType(),
1315 Operands.front(), Operands.drop_front(),
1316 AccessType, CostKind);
1317 }
1318 case Instruction::Add:
1319 case Instruction::FAdd:
1320 case Instruction::Sub:
1321 case Instruction::FSub:
1322 case Instruction::Mul:
1323 case Instruction::FMul:
1324 case Instruction::UDiv:
1325 case Instruction::SDiv:
1326 case Instruction::FDiv:
1327 case Instruction::URem:
1328 case Instruction::SRem:
1329 case Instruction::FRem:
1330 case Instruction::Shl:
1331 case Instruction::LShr:
1332 case Instruction::AShr:
1333 case Instruction::And:
1334 case Instruction::Or:
1335 case Instruction::Xor:
1336 case Instruction::FNeg: {
1337 const TTI::OperandValueInfo Op1Info = TTI::getOperandInfo(Operands[0]);
1338 TTI::OperandValueInfo Op2Info;
1339 if (Opcode != Instruction::FNeg)
1340 Op2Info = TTI::getOperandInfo(Operands[1]);
1341 return TargetTTI->getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
1342 Op2Info, Operands, I);
1343 }
1344 case Instruction::IntToPtr:
1345 case Instruction::PtrToInt:
1346 case Instruction::SIToFP:
1347 case Instruction::UIToFP:
1348 case Instruction::FPToUI:
1349 case Instruction::FPToSI:
1350 case Instruction::Trunc:
1351 case Instruction::FPTrunc:
1352 case Instruction::BitCast:
1353 case Instruction::FPExt:
1354 case Instruction::SExt:
1355 case Instruction::ZExt:
1356 case Instruction::AddrSpaceCast: {
1357 Type *OpTy = Operands[0]->getType();
1358 return TargetTTI->getCastInstrCost(
1359 Opcode, Ty, OpTy, TTI::getCastContextHint(I), CostKind, I);
1360 }
1361 case Instruction::Store: {
1362 auto *SI = cast<StoreInst>(U);
1363 Type *ValTy = Operands[0]->getType();
1364 TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(Operands[0]);
1365 return TargetTTI->getMemoryOpCost(Opcode, ValTy, SI->getAlign(),
1366 SI->getPointerAddressSpace(), CostKind,
1367 OpInfo, I);
1368 }
1369 case Instruction::Load: {
1370
1371 if (CostKind == TTI::TCK_Latency)
1372 return 4;
1373 auto *LI = cast<LoadInst>(U);
1374 Type *LoadType = U->getType();
1375
1376
1377
1378
1379
1380
1381
1382 if (CostKind == TTI::TCK_CodeSize && LI->hasOneUse() &&
1383 !LoadType->isVectorTy()) {
1384 if (const TruncInst *TI = dyn_cast<TruncInst>(*LI->user_begin()))
1385 LoadType = TI->getDestTy();
1386 }
1387 return TargetTTI->getMemoryOpCost(Opcode, LoadType, LI->getAlign(),
1388 LI->getPointerAddressSpace(), CostKind,
1389 {TTI::OK_AnyValue, TTI::OP_None}, I);
1390 }
1391 case Instruction::Select: {
1392 const Value *Op0, *Op1;
1393 if (match(U, m_LogicalAnd(m_Value(Op0), m_Value(Op1))) ||
1394 match(U, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) {
1395
1396
1397 const auto Op1Info = TTI::getOperandInfo(Op0);
1398 const auto Op2Info = TTI::getOperandInfo(Op1);
1399 assert(Op0->getType()->getScalarSizeInBits() == 1 &&
1400 Op1->getType()->getScalarSizeInBits() == 1);
1401
1402 SmallVector<const Value *, 2> Operands{Op0, Op1};
1403 return TargetTTI->getArithmeticInstrCost(
1404 match(U, m_LogicalOr()) ? Instruction::Or : Instruction::And, Ty,
1405 CostKind, Op1Info, Op2Info, Operands, I);
1406 }
1407 const auto Op1Info = TTI::getOperandInfo(Operands[1]);
1408 const auto Op2Info = TTI::getOperandInfo(Operands[2]);
1409 Type *CondTy = Operands[0]->getType();
1410 return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy,
1411 CmpInst::BAD_ICMP_PREDICATE,
1412 CostKind, Op1Info, Op2Info, I);
1413 }
1414 case Instruction::ICmp:
1415 case Instruction::FCmp: {
1416 const auto Op1Info = TTI::getOperandInfo(Operands[0]);
1417 const auto Op2Info = TTI::getOperandInfo(Operands[1]);
1418 Type *ValTy = Operands[0]->getType();
1419
1420 return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(),
1421 I ? cast<CmpInst>(I)->getPredicate()
1422 : CmpInst::BAD_ICMP_PREDICATE,
1423 CostKind, Op1Info, Op2Info, I);
1424 }
1425 case Instruction::InsertElement: {
1426 auto *IE = dyn_cast<InsertElementInst>(U);
1427 if (!IE)
1428 return TTI::TCC_Basic;
1429 unsigned Idx = -1;
1430 if (auto *CI = dyn_cast<ConstantInt>(Operands[2]))
1431 if (CI->getValue().getActiveBits() <= 32)
1432 Idx = CI->getZExtValue();
1433 return TargetTTI->getVectorInstrCost(*IE, Ty, CostKind, Idx);
1434 }
1435 case Instruction::ShuffleVector: {
1436 auto *Shuffle = dyn_cast<ShuffleVectorInst>(U);
1437 if (!Shuffle)
1438 return TTI::TCC_Basic;
1439
1440 auto *VecTy = cast<VectorType>(U->getType());
1441 auto *VecSrcTy = cast<VectorType>(Operands[0]->getType());
1442 ArrayRef<int> Mask = Shuffle->getShuffleMask();
1443 int NumSubElts, SubIndex;
1444
1445
1446 if (Shuffle->changesLength()) {
1447
1448 if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding())
1449 return 0;
1450
1451 if (Shuffle->isExtractSubvectorMask(SubIndex))
1452 return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy,
1453 Mask, CostKind, SubIndex, VecTy,
1454 Operands, Shuffle);
1455
1456 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
1457 return TargetTTI->getShuffleCost(
1458 TTI::SK_InsertSubvector, VecTy, Mask, CostKind, SubIndex,
1459 FixedVectorType::get(VecTy->getScalarType(), NumSubElts),
1460 Operands, Shuffle);
1461
1462 int ReplicationFactor, VF;
1463 if (Shuffle->isReplicationMask(ReplicationFactor, VF)) {
1464 APInt DemandedDstElts = APInt::getZero(Mask.size());
1465 for (auto I : enumerate(Mask)) {
1466 if (I.value() != PoisonMaskElem)
1467 DemandedDstElts.setBit(I.index());
1468 }
1469 return TargetTTI->getReplicationShuffleCost(
1470 VecSrcTy->getElementType(), ReplicationFactor, VF,
1471 DemandedDstElts, CostKind);
1472 }
1473
1474 bool IsUnary = isa<UndefValue>(Operands[1]);
1475 NumSubElts = VecSrcTy->getElementCount().getKnownMinValue();
1476 SmallVector<int, 16> AdjustMask(Mask);
1477
1478
1479
1480
1481 if (Shuffle->increasesLength()) {
1482 for (int &M : AdjustMask)
1483 M = M >= NumSubElts ? (M + (Mask.size() - NumSubElts)) : M;
1484
1485 return TargetTTI->getShuffleCost(
1486 IsUnary ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc, VecTy,
1487 AdjustMask, CostKind, 0, nullptr, Operands, Shuffle);
1488 }
1489
1490
1491
1492 AdjustMask.append(NumSubElts - Mask.size(), PoisonMaskElem);
1493
1494 InstructionCost ShuffleCost = TargetTTI->getShuffleCost(
1495 IsUnary ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc,
1496 VecSrcTy, AdjustMask, CostKind, 0, nullptr, Operands, Shuffle);
1497
1498 SmallVector<int, 16> ExtractMask(Mask.size());
1499 std::iota(ExtractMask.begin(), ExtractMask.end(), 0);
1500 return ShuffleCost + TargetTTI->getShuffleCost(
1501 TTI::SK_ExtractSubvector, VecSrcTy,
1502 ExtractMask, CostKind, 0, VecTy, {}, Shuffle);
1503 }
1504
1505 if (Shuffle->isIdentity())
1506 return 0;
1507
1508 if (Shuffle->isReverse())
1509 return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy, Mask, CostKind,
1510 0, nullptr, Operands, Shuffle);
1511
1512 if (Shuffle->isSelect())
1513 return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy, Mask, CostKind,
1514 0, nullptr, Operands, Shuffle);
1515
1516 if (Shuffle->isTranspose())
1517 return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy, Mask,
1518 CostKind, 0, nullptr, Operands,
1519 Shuffle);
1520
1521 if (Shuffle->isZeroEltSplat())
1522 return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy, Mask,
1523 CostKind, 0, nullptr, Operands,
1524 Shuffle);
1525
1526 if (Shuffle->isSingleSource())
1527 return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy, Mask,
1528 CostKind, 0, nullptr, Operands,
1529 Shuffle);
1530
1531 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
1532 return TargetTTI->getShuffleCost(
1533 TTI::SK_InsertSubvector, VecTy, Mask, CostKind, SubIndex,
1534 FixedVectorType::get(VecTy->getScalarType(), NumSubElts), Operands,
1535 Shuffle);
1536
1537 if (Shuffle->isSplice(SubIndex))
1538 return TargetTTI->getShuffleCost(TTI::SK_Splice, VecTy, Mask, CostKind,
1539 SubIndex, nullptr, Operands, Shuffle);
1540
1541 return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, Mask,
1542 CostKind, 0, nullptr, Operands, Shuffle);
1543 }
1544 case Instruction::ExtractElement: {
1545 auto *EEI = dyn_cast<ExtractElementInst>(U);
1546 if (!EEI)
1547 return TTI::TCC_Basic;
1548 unsigned Idx = -1;
1549 if (auto *CI = dyn_cast<ConstantInt>(Operands[1]))
1550 if (CI->getValue().getActiveBits() <= 32)
1551 Idx = CI->getZExtValue();
1552 Type *DstTy = Operands[0]->getType();
1553 return TargetTTI->getVectorInstrCost(*EEI, DstTy, CostKind, Idx);
1554 }
1555 }
1556
1557
1558
1559 return CostKind == TTI::TCK_RecipThroughput ? -1 : TTI::TCC_Basic;
1560 }
1561
1562 bool isExpensiveToSpeculativelyExecute(const Instruction *I) {
1563 auto *TargetTTI = static_cast<T *>(this);
1564 SmallVector<const Value *, 4> Ops(I->operand_values());
1565 InstructionCost Cost = TargetTTI->getInstructionCost(
1566 I, Ops, TargetTransformInfo::TCK_SizeAndLatency);
1567 return Cost >= TargetTransformInfo::TCC_Expensive;
1568 }
1569
1570 bool supportsTailCallFor(const CallBase *CB) const {
1571 return static_cast<const T *>(this)->supportsTailCalls();
1572 }
1573 };
1574 }
1575
1576 #endif