|
|
|||
File indexing completed on 2026-05-10 08:48:11
0001 //===- LoopGenerators.h - IR helper to create loops -------------*- C++ -*-===// 0002 // 0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 0004 // See https://llvm.org/LICENSE.txt for license information. 0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 0006 // 0007 //===----------------------------------------------------------------------===// 0008 // 0009 // This file contains functions to create scalar and OpenMP parallel loops 0010 // as LLVM-IR. 0011 // 0012 //===----------------------------------------------------------------------===// 0013 #ifndef POLLY_LOOP_GENERATORS_H 0014 #define POLLY_LOOP_GENERATORS_H 0015 0016 #include "polly/CodeGen/IRBuilder.h" 0017 #include "polly/Support/ScopHelper.h" 0018 #include "llvm/ADT/SetVector.h" 0019 0020 namespace polly { 0021 using llvm::AllocaInst; 0022 using llvm::BasicBlock; 0023 using llvm::DataLayout; 0024 using llvm::DominatorTree; 0025 using llvm::Function; 0026 using llvm::ICmpInst; 0027 using llvm::LoopInfo; 0028 using llvm::Module; 0029 using llvm::SetVector; 0030 using llvm::Type; 0031 using llvm::Value; 0032 0033 /// General scheduling types of parallel OpenMP for loops. 0034 /// Initialization values taken from OpenMP's enum in kmp.h: sched_type. 0035 /// Currently, only 'static' scheduling may change from chunked to non-chunked. 0036 enum class OMPGeneralSchedulingType { 0037 StaticChunked = 33, 0038 StaticNonChunked = 34, 0039 Dynamic = 35, 0040 Guided = 36, 0041 Runtime = 37 0042 }; 0043 0044 extern int PollyNumThreads; 0045 extern OMPGeneralSchedulingType PollyScheduling; 0046 extern int PollyChunkSize; 0047 0048 /// Create a scalar do/for-style loop. 0049 /// 0050 /// @param LowerBound The starting value of the induction variable. 0051 /// @param UpperBound The upper bound of the induction variable. 0052 /// @param Stride The value by which the induction variable 0053 /// is incremented. 0054 /// 0055 /// @param Builder The builder used to create the loop. 0056 /// @param P A pointer to the pass that uses this function. 0057 /// It is used to update analysis information. 0058 /// @param LI The loop info we need to update 0059 /// @param DT The dominator tree we need to update 0060 /// @param ExitBlock The block the loop will exit to. 0061 /// @param Predicate The predicate used to generate the upper loop 0062 /// bound. 0063 /// @param Annotator This function can (optionally) take 0064 /// a ScopAnnotator which 0065 /// annotates loops and alias information in the SCoP. 0066 /// @param Parallel If this loop should be marked parallel in 0067 /// the Annotator. 0068 /// @param UseGuard Create a guard in front of the header to check if 0069 /// the loop is executed at least once, otherwise just 0070 /// assume it. 0071 /// @param LoopVectDisabled If the Loop vectorizer should be disabled for this 0072 /// loop. 0073 /// 0074 /// @return Value* The newly created induction variable for this loop. 0075 Value *createLoop(Value *LowerBound, Value *UpperBound, Value *Stride, 0076 PollyIRBuilder &Builder, LoopInfo &LI, DominatorTree &DT, 0077 BasicBlock *&ExitBlock, ICmpInst::Predicate Predicate, 0078 ScopAnnotator *Annotator = nullptr, bool Parallel = false, 0079 bool UseGuard = true, bool LoopVectDisabled = false); 0080 0081 /// Create a DebugLoc representing generated instructions. 0082 /// 0083 /// The IR verifier requires !dbg metadata to be set in some situations. For 0084 /// instance, if an (inlinable) function has debug info, all its call site must 0085 /// have debug info as well. 0086 llvm::DebugLoc createDebugLocForGeneratedCode(Function *F); 0087 0088 /// The ParallelLoopGenerator allows to create parallelized loops 0089 /// 0090 /// To parallelize a loop, we perform the following steps: 0091 /// o Generate a subfunction which will hold the loop body. 0092 /// o Create a struct to hold all outer values needed in the loop body. 0093 /// o Create calls to a runtime library to achieve the actual parallelism. 0094 /// These calls will spawn and join threads, define how the work (here the 0095 /// iterations) are distributed between them and make sure each has access 0096 /// to the struct holding all needed values. 0097 /// 0098 /// At the moment we support only one parallel runtime, OpenMP. 0099 /// 0100 /// If we parallelize the outer loop of the following loop nest, 0101 /// 0102 /// S0; 0103 /// for (int i = 0; i < N; i++) 0104 /// for (int j = 0; j < M; j++) 0105 /// S1(i, j); 0106 /// S2; 0107 /// 0108 /// we will generate the following code (with different runtime function names): 0109 /// 0110 /// S0; 0111 /// auto *values = storeValuesIntoStruct(); 0112 /// // Execute subfunction with multiple threads 0113 /// spawn_threads(subfunction, values); 0114 /// join_threads(); 0115 /// S2; 0116 /// 0117 /// // This function is executed in parallel by different threads 0118 /// void subfunction(values) { 0119 /// while (auto *WorkItem = getWorkItem()) { 0120 /// int LB = WorkItem.begin(); 0121 /// int UB = WorkItem.end(); 0122 /// for (int i = LB; i < UB; i++) 0123 /// for (int j = 0; j < M; j++) 0124 /// S1(i, j); 0125 /// } 0126 /// cleanup_thread(); 0127 /// } 0128 class ParallelLoopGenerator { 0129 public: 0130 /// Create a parallel loop generator for the current function. 0131 ParallelLoopGenerator(PollyIRBuilder &Builder, const DataLayout &DL) 0132 : Builder(Builder), LongType(Type::getIntNTy(Builder.getContext(), 0133 DL.getPointerSizeInBits())), 0134 M(Builder.GetInsertBlock()->getParent()->getParent()), 0135 DLGenerated(createDebugLocForGeneratedCode( 0136 Builder.GetInsertBlock()->getParent())) {} 0137 0138 virtual ~ParallelLoopGenerator() {} 0139 0140 /// Create a parallel loop. 0141 /// 0142 /// This function is the main function to automatically generate a parallel 0143 /// loop with all its components. 0144 /// 0145 /// @param LB The lower bound for the loop we parallelize. 0146 /// @param UB The upper bound for the loop we parallelize. 0147 /// @param Stride The stride of the loop we parallelize. 0148 /// @param Values A set of LLVM-IR Values that should be available in 0149 /// the new loop body. 0150 /// @param VMap A map to allow outside access to the new versions of 0151 /// the values in @p Values. 0152 /// @param LoopBody A pointer to an iterator that is set to point to the 0153 /// body of the created loop. It should be used to insert 0154 /// instructions that form the actual loop body. 0155 /// 0156 /// @return The newly created induction variable for this loop. 0157 Value *createParallelLoop(Value *LB, Value *UB, Value *Stride, 0158 SetVector<Value *> &Values, ValueMapT &VMap, 0159 BasicBlock::iterator *LoopBody); 0160 0161 protected: 0162 /// The IR builder we use to create instructions. 0163 PollyIRBuilder &Builder; 0164 0165 /// The loop info for the generated subfunction. 0166 std::unique_ptr<LoopInfo> SubFnLI; 0167 0168 /// The dominance tree for the generated subfunction. 0169 std::unique_ptr<DominatorTree> SubFnDT; 0170 0171 /// The type of a "long" on this hardware used for backend calls. 0172 Type *LongType; 0173 0174 /// The current module 0175 Module *M; 0176 0177 /// Debug location for generated code without direct link to any specific 0178 /// line. 0179 /// 0180 /// We only set the DebugLoc where the IR Verifier requires us to. Otherwise, 0181 /// absent debug location for optimized code should be fine. 0182 llvm::DebugLoc DLGenerated; 0183 0184 public: 0185 /// Returns the DominatorTree for the generated subfunction. 0186 DominatorTree *getCalleeDominatorTree() const { return SubFnDT.get(); } 0187 0188 /// Returns the LoopInfo for the generated subfunction. 0189 LoopInfo *getCalleeLoopInfo() const { return SubFnLI.get(); } 0190 0191 /// Create a struct for all @p Values and store them in there. 0192 /// 0193 /// @param Values The values which should be stored in the struct. 0194 /// 0195 /// @return The created struct. 0196 AllocaInst *storeValuesIntoStruct(SetVector<Value *> &Values); 0197 0198 /// Extract all values from the @p Struct and construct the mapping. 0199 /// 0200 /// @param Values The values which were stored in the struct. 0201 /// @param Struct The struct holding all the values in @p Values. 0202 /// @param VMap A map to associate every element of @p Values with the 0203 /// new llvm value loaded from the @p Struct. 0204 void extractValuesFromStruct(SetVector<Value *> Values, Type *Ty, 0205 Value *Struct, ValueMapT &VMap); 0206 0207 /// Create the definition of the parallel subfunction. 0208 /// 0209 /// @return A pointer to the subfunction. 0210 Function *createSubFnDefinition(); 0211 0212 /// Create the runtime library calls for spawn and join of the worker threads. 0213 /// Additionally, places a call to the specified subfunction. 0214 /// 0215 /// @param SubFn The subfunction which holds the loop body. 0216 /// @param SubFnParam The parameter for the subfunction (basically the struct 0217 /// filled with the outside values). 0218 /// @param LB The lower bound for the loop we parallelize. 0219 /// @param UB The upper bound for the loop we parallelize. 0220 /// @param Stride The stride of the loop we parallelize. 0221 virtual void deployParallelExecution(Function *SubFn, Value *SubFnParam, 0222 Value *LB, Value *UB, Value *Stride) = 0; 0223 0224 /// Prepare the definition of the parallel subfunction. 0225 /// Creates the argument list and names them (as well as the subfunction). 0226 /// 0227 /// @param F A pointer to the (parallel) subfunction's parent function. 0228 /// 0229 /// @return The pointer to the (parallel) subfunction. 0230 virtual Function *prepareSubFnDefinition(Function *F) const = 0; 0231 0232 /// Create the parallel subfunction. 0233 /// 0234 /// @param Stride The induction variable increment. 0235 /// @param Struct A struct holding all values in @p Values. 0236 /// @param Values A set of LLVM-IR Values that should be available in 0237 /// the new loop body. 0238 /// @param VMap A map to allow outside access to the new versions of 0239 /// the values in @p Values. 0240 /// @param SubFn The newly created subfunction is returned here. 0241 /// 0242 /// @return The newly created induction variable. 0243 virtual std::tuple<Value *, Function *> 0244 createSubFn(Value *Stride, AllocaInst *Struct, SetVector<Value *> UsedValues, 0245 ValueMapT &VMap) = 0; 0246 }; 0247 } // end namespace polly 0248 #endif
| [ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
|
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
|