|
|
|||
File indexing completed on 2026-05-10 08:44:40
0001 //===- FunctionSpecialization.h - Function Specialization -----------------===// 0002 // 0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 0004 // See https://llvm.org/LICENSE.txt for license information. 0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 0006 // 0007 //===----------------------------------------------------------------------===// 0008 // 0009 // Overview: 0010 // --------- 0011 // Function Specialization is a transformation which propagates the constant 0012 // parameters of a function call from the caller to the callee. It is part of 0013 // the Inter-Procedural Sparse Conditional Constant Propagation (IPSCCP) pass. 0014 // The transformation runs iteratively a number of times which is controlled 0015 // by the option `funcspec-max-iters`. Running it multiple times is needed 0016 // for specializing recursive functions, but also exposes new opportunities 0017 // arising from specializations which return constant values or contain calls 0018 // which can be specialized. 0019 // 0020 // Function Specialization supports propagating constant parameters like 0021 // function pointers, literal constants and addresses of global variables. 0022 // By propagating function pointers, indirect calls become direct calls. This 0023 // exposes inlining opportunities which we would have otherwise missed. That's 0024 // why function specialization is run before the inliner in the optimization 0025 // pipeline; that is by design. 0026 // 0027 // Cost Model: 0028 // ----------- 0029 // The cost model facilitates a utility for estimating the specialization bonus 0030 // from propagating a constant argument. This is the InstCostVisitor, a class 0031 // that inherits from the InstVisitor. The bonus itself is expressed as codesize 0032 // and latency savings. Codesize savings means the amount of code that becomes 0033 // dead in the specialization from propagating the constant, whereas latency 0034 // savings represents the cycles we are saving from replacing instructions with 0035 // constant values. The InstCostVisitor overrides a set of `visit*` methods to 0036 // be able to handle different types of instructions. These attempt to constant- 0037 // fold the instruction in which case a constant is returned and propagated 0038 // further. 0039 // 0040 // Function pointers are not handled by the InstCostVisitor. They are treated 0041 // separately as they could expose inlining opportunities via indirect call 0042 // promotion. The inlining bonus contributes to the total specialization score. 0043 // 0044 // For a specialization to be profitable its bonus needs to exceed a minimum 0045 // threshold. There are three options for controlling the threshold which are 0046 // expressed as percentages of the original function size: 0047 // * funcspec-min-codesize-savings 0048 // * funcspec-min-latency-savings 0049 // * funcspec-min-inlining-bonus 0050 // There's also an option for controlling the codesize growth from recursive 0051 // specializations. That is `funcspec-max-codesize-growth`. 0052 // 0053 // Once we have all the potential specializations with their score we need to 0054 // choose the best ones, which fit in the module specialization budget. That 0055 // is controlled by the option `funcspec-max-clones`. To find the best `NSpec` 0056 // specializations we use a max-heap. For more details refer to D139346. 0057 // 0058 // Ideas: 0059 // ------ 0060 // - With a function specialization attribute for arguments, we could have 0061 // a direct way to steer function specialization, avoiding the cost-model, 0062 // and thus control compile-times / code-size. 0063 // 0064 // - Perhaps a post-inlining function specialization pass could be more 0065 // aggressive on literal constants. 0066 // 0067 // Limitations: 0068 // ------------ 0069 // - We are unable to consider specializations of functions called from indirect 0070 // callsites whose pointer operand has a lattice value that is known to be 0071 // constant, either from IPSCCP or previous iterations of FuncSpec. This is 0072 // because SCCP has not yet replaced the uses of the known constant. 0073 // 0074 // References: 0075 // ----------- 0076 // 2021 LLVM Dev Mtg “Introducing function specialisation, and can we enable 0077 // it by default?”, https://www.youtube.com/watch?v=zJiCjeXgV5Q 0078 // 0079 //===----------------------------------------------------------------------===// 0080 0081 #ifndef LLVM_TRANSFORMS_IPO_FUNCTIONSPECIALIZATION_H 0082 #define LLVM_TRANSFORMS_IPO_FUNCTIONSPECIALIZATION_H 0083 0084 #include "llvm/Analysis/BlockFrequencyInfo.h" 0085 #include "llvm/Analysis/CodeMetrics.h" 0086 #include "llvm/Analysis/InlineCost.h" 0087 #include "llvm/Analysis/TargetTransformInfo.h" 0088 #include "llvm/IR/InstVisitor.h" 0089 #include "llvm/Transforms/Scalar/SCCP.h" 0090 #include "llvm/Transforms/Utils/Cloning.h" 0091 #include "llvm/Transforms/Utils/SCCPSolver.h" 0092 #include "llvm/Transforms/Utils/SizeOpts.h" 0093 0094 namespace llvm { 0095 // Map of potential specializations for each function. The FunctionSpecializer 0096 // keeps the discovered specialisation opportunities for the module in a single 0097 // vector, where the specialisations of each function form a contiguous range. 0098 // This map's value is the beginning and the end of that range. 0099 using SpecMap = DenseMap<Function *, std::pair<unsigned, unsigned>>; 0100 0101 // Just a shorter abbreviation to improve indentation. 0102 using Cost = InstructionCost; 0103 0104 // Map of known constants found during the specialization bonus estimation. 0105 using ConstMap = DenseMap<Value *, Constant *>; 0106 0107 // Specialization signature, used to uniquely designate a specialization within 0108 // a function. 0109 struct SpecSig { 0110 // Hashing support, used to distinguish between ordinary, empty, or tombstone 0111 // keys. 0112 unsigned Key = 0; 0113 SmallVector<ArgInfo, 4> Args; 0114 0115 bool operator==(const SpecSig &Other) const { 0116 if (Key != Other.Key) 0117 return false; 0118 return Args == Other.Args; 0119 } 0120 0121 friend hash_code hash_value(const SpecSig &S) { 0122 return hash_combine(hash_value(S.Key), 0123 hash_combine_range(S.Args.begin(), S.Args.end())); 0124 } 0125 }; 0126 0127 // Specialization instance. 0128 struct Spec { 0129 // Original function. 0130 Function *F; 0131 0132 // Cloned function, a specialized version of the original one. 0133 Function *Clone = nullptr; 0134 0135 // Specialization signature. 0136 SpecSig Sig; 0137 0138 // Profitability of the specialization. 0139 unsigned Score; 0140 0141 // Number of instructions in the specialization. 0142 unsigned CodeSize; 0143 0144 // List of call sites, matching this specialization. 0145 SmallVector<CallBase *> CallSites; 0146 0147 Spec(Function *F, const SpecSig &S, unsigned Score, unsigned CodeSize) 0148 : F(F), Sig(S), Score(Score), CodeSize(CodeSize) {} 0149 Spec(Function *F, const SpecSig &&S, unsigned Score, unsigned CodeSize) 0150 : F(F), Sig(S), Score(Score), CodeSize(CodeSize) {} 0151 }; 0152 0153 class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> { 0154 std::function<BlockFrequencyInfo &(Function &)> GetBFI; 0155 Function *F; 0156 const DataLayout &DL; 0157 TargetTransformInfo &TTI; 0158 const SCCPSolver &Solver; 0159 0160 ConstMap KnownConstants; 0161 // Basic blocks known to be unreachable after constant propagation. 0162 DenseSet<BasicBlock *> DeadBlocks; 0163 // PHI nodes we have visited before. 0164 DenseSet<Instruction *> VisitedPHIs; 0165 // PHI nodes we have visited once without successfully constant folding them. 0166 // Once the InstCostVisitor has processed all the specialization arguments, 0167 // it should be possible to determine whether those PHIs can be folded 0168 // (some of their incoming values may have become constant or dead). 0169 SmallVector<Instruction *> PendingPHIs; 0170 0171 ConstMap::iterator LastVisited; 0172 0173 public: 0174 InstCostVisitor(std::function<BlockFrequencyInfo &(Function &)> GetBFI, 0175 Function *F, const DataLayout &DL, TargetTransformInfo &TTI, 0176 SCCPSolver &Solver) 0177 : GetBFI(GetBFI), F(F), DL(DL), TTI(TTI), Solver(Solver) {} 0178 0179 bool isBlockExecutable(BasicBlock *BB) const { 0180 return Solver.isBlockExecutable(BB) && !DeadBlocks.contains(BB); 0181 } 0182 0183 Cost getCodeSizeSavingsForArg(Argument *A, Constant *C); 0184 0185 Cost getCodeSizeSavingsFromPendingPHIs(); 0186 0187 Cost getLatencySavingsForKnownConstants(); 0188 0189 private: 0190 friend class InstVisitor<InstCostVisitor, Constant *>; 0191 0192 Constant *findConstantFor(Value *V) const; 0193 0194 bool canEliminateSuccessor(BasicBlock *BB, BasicBlock *Succ) const; 0195 0196 Cost getCodeSizeSavingsForUser(Instruction *User, Value *Use = nullptr, 0197 Constant *C = nullptr); 0198 0199 Cost estimateBasicBlocks(SmallVectorImpl<BasicBlock *> &WorkList); 0200 Cost estimateSwitchInst(SwitchInst &I); 0201 Cost estimateBranchInst(BranchInst &I); 0202 0203 // Transitively Incoming Values (TIV) is a set of Values that can "feed" a 0204 // value to the initial PHI-node. It is defined like this: 0205 // 0206 // * the initial PHI-node belongs to TIV. 0207 // 0208 // * for every PHI-node in TIV, its operands belong to TIV 0209 // 0210 // If TIV for the initial PHI-node (P) contains more than one constant or a 0211 // value that is not a PHI-node, then P cannot be folded to a constant. 0212 // 0213 // As soon as we detect these cases, we bail, without constructing the 0214 // full TIV. 0215 // Otherwise P can be folded to the one constant in TIV. 0216 bool discoverTransitivelyIncomingValues(Constant *Const, PHINode *Root, 0217 DenseSet<PHINode *> &TransitivePHIs); 0218 0219 Constant *visitInstruction(Instruction &I) { return nullptr; } 0220 Constant *visitPHINode(PHINode &I); 0221 Constant *visitFreezeInst(FreezeInst &I); 0222 Constant *visitCallBase(CallBase &I); 0223 Constant *visitLoadInst(LoadInst &I); 0224 Constant *visitGetElementPtrInst(GetElementPtrInst &I); 0225 Constant *visitSelectInst(SelectInst &I); 0226 Constant *visitCastInst(CastInst &I); 0227 Constant *visitCmpInst(CmpInst &I); 0228 Constant *visitUnaryOperator(UnaryOperator &I); 0229 Constant *visitBinaryOperator(BinaryOperator &I); 0230 }; 0231 0232 class FunctionSpecializer { 0233 0234 /// The IPSCCP Solver. 0235 SCCPSolver &Solver; 0236 0237 Module &M; 0238 0239 /// Analysis manager, needed to invalidate analyses. 0240 FunctionAnalysisManager *FAM; 0241 0242 /// Analyses used to help determine if a function should be specialized. 0243 std::function<BlockFrequencyInfo &(Function &)> GetBFI; 0244 std::function<const TargetLibraryInfo &(Function &)> GetTLI; 0245 std::function<TargetTransformInfo &(Function &)> GetTTI; 0246 std::function<AssumptionCache &(Function &)> GetAC; 0247 0248 SmallPtrSet<Function *, 32> Specializations; 0249 SmallPtrSet<Function *, 32> FullySpecialized; 0250 DenseMap<Function *, CodeMetrics> FunctionMetrics; 0251 DenseMap<Function *, unsigned> FunctionGrowth; 0252 unsigned NGlobals = 0; 0253 0254 public: 0255 FunctionSpecializer( 0256 SCCPSolver &Solver, Module &M, FunctionAnalysisManager *FAM, 0257 std::function<BlockFrequencyInfo &(Function &)> GetBFI, 0258 std::function<const TargetLibraryInfo &(Function &)> GetTLI, 0259 std::function<TargetTransformInfo &(Function &)> GetTTI, 0260 std::function<AssumptionCache &(Function &)> GetAC) 0261 : Solver(Solver), M(M), FAM(FAM), GetBFI(GetBFI), GetTLI(GetTLI), 0262 GetTTI(GetTTI), GetAC(GetAC) {} 0263 0264 ~FunctionSpecializer(); 0265 0266 bool run(); 0267 0268 InstCostVisitor getInstCostVisitorFor(Function *F) { 0269 auto &TTI = GetTTI(*F); 0270 return InstCostVisitor(GetBFI, F, M.getDataLayout(), TTI, Solver); 0271 } 0272 0273 private: 0274 Constant *getPromotableAlloca(AllocaInst *Alloca, CallInst *Call); 0275 0276 /// A constant stack value is an AllocaInst that has a single constant 0277 /// value stored to it. Return this constant if such an alloca stack value 0278 /// is a function argument. 0279 Constant *getConstantStackValue(CallInst *Call, Value *Val); 0280 0281 /// See if there are any new constant values for the callers of \p F via 0282 /// stack variables and promote them to global variables. 0283 void promoteConstantStackValues(Function *F); 0284 0285 /// Clean up fully specialized functions. 0286 void removeDeadFunctions(); 0287 0288 /// Remove any ssa_copy intrinsics that may have been introduced. 0289 void cleanUpSSA(); 0290 0291 /// @brief Find potential specialization opportunities. 0292 /// @param F Function to specialize 0293 /// @param FuncSize Cost of specializing a function. 0294 /// @param AllSpecs A vector to add potential specializations to. 0295 /// @param SM A map for a function's specialisation range 0296 /// @return True, if any potential specializations were found 0297 bool findSpecializations(Function *F, unsigned FuncSize, 0298 SmallVectorImpl<Spec> &AllSpecs, SpecMap &SM); 0299 0300 /// Compute the inlining bonus for replacing argument \p A with constant \p C. 0301 unsigned getInliningBonus(Argument *A, Constant *C); 0302 0303 bool isCandidateFunction(Function *F); 0304 0305 /// @brief Create a specialization of \p F and prime the SCCPSolver 0306 /// @param F Function to specialize 0307 /// @param S Which specialization to create 0308 /// @return The new, cloned function 0309 Function *createSpecialization(Function *F, const SpecSig &S); 0310 0311 /// Determine if it is possible to specialise the function for constant values 0312 /// of the formal parameter \p A. 0313 bool isArgumentInteresting(Argument *A); 0314 0315 /// Check if the value \p V (an actual argument) is a constant or can only 0316 /// have a constant value. Return that constant. 0317 Constant *getCandidateConstant(Value *V); 0318 0319 /// @brief Find and update calls to \p F, which match a specialization 0320 /// @param F Orginal function 0321 /// @param Begin Start of a range of possibly matching specialisations 0322 /// @param End End of a range (exclusive) of possibly matching specialisations 0323 void updateCallSites(Function *F, const Spec *Begin, const Spec *End); 0324 }; 0325 } // namespace llvm 0326 0327 #endif // LLVM_TRANSFORMS_IPO_FUNCTIONSPECIALIZATION_H
| [ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
|
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
|