Transforms/Vectorize/LoopVectorize.h

0001 //===- LoopVectorize.h ------------------------------------------*- C++ -*-===//
0002 //
0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
0004 // See https://llvm.org/LICENSE.txt for license information.
0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0006 //
0007 //===----------------------------------------------------------------------===//
0008 //
0009 // This is the LLVM loop vectorizer. This pass modifies 'vectorizable' loops
0010 // and generates target-independent LLVM-IR.
0011 // The vectorizer uses the TargetTransformInfo analysis to estimate the costs
0012 // of instructions in order to estimate the profitability of vectorization.
0013 //
0014 // The loop vectorizer combines consecutive loop iterations into a single
0015 // 'wide' iteration. After this transformation the index is incremented
0016 // by the SIMD vector width, and not by one.
0017 //
0018 // This pass has four parts:
0019 // 1. The main loop pass that drives the different parts.
0020 // 2. LoopVectorizationLegality - A unit that checks for the legality
0021 //    of the vectorization.
0022 // 3. InnerLoopVectorizer - A unit that performs the actual
0023 //    widening of instructions.
0024 // 4. LoopVectorizationCostModel - A unit that checks for the profitability
0025 //    of vectorization. It decides on the optimal vector width, which
0026 //    can be one, if vectorization is not profitable.
0027 //
0028 // There is a development effort going on to migrate loop vectorizer to the
0029 // VPlan infrastructure and to introduce outer loop vectorization support (see
0030 // docs/VectorizationPlan.rst and
0031 // http://lists.llvm.org/pipermail/llvm-dev/2017-December/119523.html). For this
0032 // purpose, we temporarily introduced the VPlan-native vectorization path: an
0033 // alternative vectorization path that is natively implemented on top of the
0034 // VPlan infrastructure. See EnableVPlanNativePath for enabling.
0035 //
0036 //===----------------------------------------------------------------------===//
0037 //
0038 // The reduction-variable vectorization is based on the paper:
0039 //  D. Nuzman and R. Henderson. Multi-platform Auto-vectorization.
0040 //
0041 // Variable uniformity checks are inspired by:
0042 //  Karrenberg, R. and Hack, S. Whole Function Vectorization.
0043 //
0044 // The interleaved access vectorization is based on the paper:
0045 //  Dorit Nuzman, Ira Rosen and Ayal Zaks.  Auto-Vectorization of Interleaved
0046 //  Data for SIMD
0047 //
0048 // Other ideas/concepts are from:
0049 //  A. Zaks and D. Nuzman. Autovectorization in GCC-two years later.
0050 //
0051 //  S. Maleki, Y. Gao, M. Garzaran, T. Wong and D. Padua.  An Evaluation of
0052 //  Vectorizing Compilers.
0053 //
0054 //===----------------------------------------------------------------------===//
0055
0056 #ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZE_H
0057 #define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZE_H
0058
0059 #include "llvm/IR/PassManager.h"
0060 #include "llvm/Support/CommandLine.h"
0061 #include "llvm/Transforms/Utils/ExtraPassManager.h"
0062 #include <functional>
0063
0064 namespace llvm {
0065
0066 class AssumptionCache;
0067 class BlockFrequencyInfo;
0068 class DemandedBits;
0069 class DominatorTree;
0070 class Function;
0071 class Instruction;
0072 class Loop;
0073 class LoopAccessInfoManager;
0074 class LoopInfo;
0075 class OptimizationRemarkEmitter;
0076 class ProfileSummaryInfo;
0077 class ScalarEvolution;
0078 class TargetLibraryInfo;
0079 class TargetTransformInfo;
0080
0081 extern cl::opt<bool> EnableLoopInterleaving;
0082 extern cl::opt<bool> EnableLoopVectorization;
0083
0084 struct LoopVectorizeOptions {
0085   /// If false, consider all loops for interleaving.
0086   /// If true, only loops that explicitly request interleaving are considered.
0087   bool InterleaveOnlyWhenForced;
0088
0089   /// If false, consider all loops for vectorization.
0090   /// If true, only loops that explicitly request vectorization are considered.
0091   bool VectorizeOnlyWhenForced;
0092
0093   /// The current defaults when creating the pass with no arguments are:
0094   /// EnableLoopInterleaving = true and EnableLoopVectorization = true. This
0095   /// means that interleaving default is consistent with the cl::opt flag, while
0096   /// vectorization is not.
0097   /// FIXME: The default for EnableLoopVectorization in the cl::opt should be
0098   /// set to true, and the corresponding change to account for this be made in
0099   /// opt.cpp. The initializations below will become:
0100   /// InterleaveOnlyWhenForced(!EnableLoopInterleaving)
0101   /// VectorizeOnlyWhenForced(!EnableLoopVectorization).
0102   LoopVectorizeOptions()
0103       : InterleaveOnlyWhenForced(false), VectorizeOnlyWhenForced(false) {}
0104   LoopVectorizeOptions(bool InterleaveOnlyWhenForced,
0105                        bool VectorizeOnlyWhenForced)
0106       : InterleaveOnlyWhenForced(InterleaveOnlyWhenForced),
0107         VectorizeOnlyWhenForced(VectorizeOnlyWhenForced) {}
0108
0109   LoopVectorizeOptions &setInterleaveOnlyWhenForced(bool Value) {
0110     InterleaveOnlyWhenForced = Value;
0111     return *this;
0112   }
0113
0114   LoopVectorizeOptions &setVectorizeOnlyWhenForced(bool Value) {
0115     VectorizeOnlyWhenForced = Value;
0116     return *this;
0117   }
0118 };
0119
0120 /// Storage for information about made changes.
0121 struct LoopVectorizeResult {
0122   bool MadeAnyChange;
0123   bool MadeCFGChange;
0124
0125   LoopVectorizeResult(bool MadeAnyChange, bool MadeCFGChange)
0126       : MadeAnyChange(MadeAnyChange), MadeCFGChange(MadeCFGChange) {}
0127 };
0128
0129 /// The LoopVectorize Pass.
0130 struct LoopVectorizePass : public PassInfoMixin<LoopVectorizePass> {
0131 private:
0132   /// If false, consider all loops for interleaving.
0133   /// If true, only loops that explicitly request interleaving are considered.
0134   bool InterleaveOnlyWhenForced;
0135
0136   /// If false, consider all loops for vectorization.
0137   /// If true, only loops that explicitly request vectorization are considered.
0138   bool VectorizeOnlyWhenForced;
0139
0140 public:
0141   LoopVectorizePass(LoopVectorizeOptions Opts = {});
0142
0143   ScalarEvolution *SE;
0144   LoopInfo *LI;
0145   TargetTransformInfo *TTI;
0146   DominatorTree *DT;
0147   BlockFrequencyInfo *BFI;
0148   TargetLibraryInfo *TLI;
0149   DemandedBits *DB;
0150   AssumptionCache *AC;
0151   LoopAccessInfoManager *LAIs;
0152   OptimizationRemarkEmitter *ORE;
0153   ProfileSummaryInfo *PSI;
0154
0155   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
0156   void printPipeline(raw_ostream &OS,
0157                      function_ref<StringRef(StringRef)> MapClassName2PassName);
0158
0159   // Shim for old PM.
0160   LoopVectorizeResult runImpl(Function &F);
0161
0162   bool processLoop(Loop *L);
0163 };
0164
0165 /// Reports a vectorization failure: print \p DebugMsg for debugging
0166 /// purposes along with the corresponding optimization remark \p RemarkName.
0167 /// If \p I is passed, it is an instruction that prevents vectorization.
0168 /// Otherwise, the loop \p TheLoop is used for the location of the remark.
0169 void reportVectorizationFailure(const StringRef DebugMsg,
0170     const StringRef OREMsg, const StringRef ORETag,
0171     OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I = nullptr);
0172
0173 /// Same as above, but the debug message and optimization remark are identical
0174 inline void reportVectorizationFailure(const StringRef DebugMsg,
0175                                        const StringRef ORETag,
0176                                        OptimizationRemarkEmitter *ORE,
0177                                        Loop *TheLoop,
0178                                        Instruction *I = nullptr) {
0179   reportVectorizationFailure(DebugMsg, DebugMsg, ORETag, ORE, TheLoop, I);
0180 }
0181
0182 /// A marker analysis to determine if extra passes should be run after loop
0183 /// vectorization.
0184 struct ShouldRunExtraVectorPasses
0185     : public ShouldRunExtraPasses<ShouldRunExtraVectorPasses>,
0186       public AnalysisInfoMixin<ShouldRunExtraVectorPasses> {
0187   static AnalysisKey Key;
0188 };
0189 } // end namespace llvm
0190
0191 #endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZE_H