llvm/Analysis/MustExecute.h

0001 //===- MustExecute.h - Is an instruction known to execute--------*- C++ -*-===//
0002 //
0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
0004 // See https://llvm.org/LICENSE.txt for license information.
0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0006 //
0007 //===----------------------------------------------------------------------===//
0008 /// \file
0009 /// Contains a collection of routines for determining if a given instruction is
0010 /// guaranteed to execute if a given point in control flow is reached. The most
0011 /// common example is an instruction within a loop being provably executed if we
0012 /// branch to the header of it's containing loop.
0013 ///
0014 /// There are two interfaces available to determine if an instruction is
0015 /// executed once a given point in the control flow is reached:
0016 /// 1) A loop-centric one derived from LoopSafetyInfo.
0017 /// 2) A "must be executed context"-based one implemented in the
0018 ///    MustBeExecutedContextExplorer.
0019 /// Please refer to the class comments for more information.
0020 ///
0021 //===----------------------------------------------------------------------===//
0022
0023 #ifndef LLVM_ANALYSIS_MUSTEXECUTE_H
0024 #define LLVM_ANALYSIS_MUSTEXECUTE_H
0025
0026 #include "llvm/ADT/DenseMap.h"
0027 #include "llvm/ADT/DenseSet.h"
0028 #include "llvm/Analysis/InstructionPrecedenceTracking.h"
0029 #include "llvm/IR/EHPersonalities.h"
0030 #include "llvm/IR/PassManager.h"
0031
0032 namespace llvm {
0033
0034 namespace {
0035 template <typename T> using GetterTy = std::function<T *(const Function &F)>;
0036 }
0037
0038 class BasicBlock;
0039 class DominatorTree;
0040 class Loop;
0041 class LoopInfo;
0042 class PostDominatorTree;
0043 class raw_ostream;
0044
0045 /// Captures loop safety information.
0046 /// It keep information for loop blocks may throw exception or otherwise
0047 /// exit abnormally on any iteration of the loop which might actually execute
0048 /// at runtime.  The primary way to consume this information is via
0049 /// isGuaranteedToExecute below, but some callers bailout or fallback to
0050 /// alternate reasoning if a loop contains any implicit control flow.
0051 /// NOTE: LoopSafetyInfo contains cached information regarding loops and their
0052 /// particular blocks. This information is only dropped on invocation of
0053 /// computeLoopSafetyInfo. If the loop or any of its block is deleted, or if
0054 /// any thrower instructions have been added or removed from them, or if the
0055 /// control flow has changed, or in case of other meaningful modifications, the
0056 /// LoopSafetyInfo needs to be recomputed. If a meaningful modifications to the
0057 /// loop were made and the info wasn't recomputed properly, the behavior of all
0058 /// methods except for computeLoopSafetyInfo is undefined.
0059 class LoopSafetyInfo {
0060   // Used to update funclet bundle operands.
0061   DenseMap<BasicBlock *, ColorVector> BlockColors;
0062
0063 protected:
0064   /// Computes block colors.
0065   void computeBlockColors(const Loop *CurLoop);
0066
0067 public:
0068   /// Returns block colors map that is used to update funclet operand bundles.
0069   const DenseMap<BasicBlock *, ColorVector> &getBlockColors() const;
0070
0071   /// Copy colors of block \p Old into the block \p New.
0072   void copyColors(BasicBlock *New, BasicBlock *Old);
0073
0074   /// Returns true iff the block \p BB potentially may throw exception. It can
0075   /// be false-positive in cases when we want to avoid complex analysis.
0076   virtual bool blockMayThrow(const BasicBlock *BB) const = 0;
0077
0078   /// Returns true iff any block of the loop for which this info is contains an
0079   /// instruction that may throw or otherwise exit abnormally.
0080   virtual bool anyBlockMayThrow() const = 0;
0081
0082   /// Return true if we must reach the block \p BB under assumption that the
0083   /// loop \p CurLoop is entered.
0084   bool allLoopPathsLeadToBlock(const Loop *CurLoop, const BasicBlock *BB,
0085                                const DominatorTree *DT) const;
0086
0087   /// Computes safety information for a loop checks loop body & header for
0088   /// the possibility of may throw exception, it takes LoopSafetyInfo and loop
0089   /// as argument. Updates safety information in LoopSafetyInfo argument.
0090   /// Note: This is defined to clear and reinitialize an already initialized
0091   /// LoopSafetyInfo.  Some callers rely on this fact.
0092   virtual void computeLoopSafetyInfo(const Loop *CurLoop) = 0;
0093
0094   /// Returns true if the instruction in a loop is guaranteed to execute at
0095   /// least once (under the assumption that the loop is entered).
0096   virtual bool isGuaranteedToExecute(const Instruction &Inst,
0097                                      const DominatorTree *DT,
0098                                      const Loop *CurLoop) const = 0;
0099
0100   LoopSafetyInfo() = default;
0101
0102   virtual ~LoopSafetyInfo() = default;
0103 };
0104
0105
0106 /// Simple and conservative implementation of LoopSafetyInfo that can give
0107 /// false-positive answers to its queries in order to avoid complicated
0108 /// analysis.
0109 class SimpleLoopSafetyInfo: public LoopSafetyInfo {
0110   bool MayThrow = false;       // The current loop contains an instruction which
0111                                // may throw.
0112   bool HeaderMayThrow = false; // Same as previous, but specific to loop header
0113
0114 public:
0115   bool blockMayThrow(const BasicBlock *BB) const override;
0116
0117   bool anyBlockMayThrow() const override;
0118
0119   void computeLoopSafetyInfo(const Loop *CurLoop) override;
0120
0121   bool isGuaranteedToExecute(const Instruction &Inst,
0122                              const DominatorTree *DT,
0123                              const Loop *CurLoop) const override;
0124 };
0125
0126 /// This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to
0127 /// give precise answers on "may throw" queries. This implementation uses cache
0128 /// that should be invalidated by calling the methods insertInstructionTo and
0129 /// removeInstruction whenever we modify a basic block's contents by adding or
0130 /// removing instructions.
0131 class ICFLoopSafetyInfo: public LoopSafetyInfo {
0132   bool MayThrow = false;       // The current loop contains an instruction which
0133                                // may throw.
0134   // Contains information about implicit control flow in this loop's blocks.
0135   mutable ImplicitControlFlowTracking ICF;
0136   // Contains information about instruction that may possibly write memory.
0137   mutable MemoryWriteTracking MW;
0138
0139 public:
0140   bool blockMayThrow(const BasicBlock *BB) const override;
0141
0142   bool anyBlockMayThrow() const override;
0143
0144   void computeLoopSafetyInfo(const Loop *CurLoop) override;
0145
0146   bool isGuaranteedToExecute(const Instruction &Inst,
0147                              const DominatorTree *DT,
0148                              const Loop *CurLoop) const override;
0149
0150   /// Returns true if we could not execute a memory-modifying instruction before
0151   /// we enter \p BB under assumption that \p CurLoop is entered.
0152   bool doesNotWriteMemoryBefore(const BasicBlock *BB, const Loop *CurLoop)
0153       const;
0154
0155   /// Returns true if we could not execute a memory-modifying instruction before
0156   /// we execute \p I under assumption that \p CurLoop is entered.
0157   bool doesNotWriteMemoryBefore(const Instruction &I, const Loop *CurLoop)
0158       const;
0159
0160   /// Inform the safety info that we are planning to insert a new instruction
0161   /// \p Inst into the basic block \p BB. It will make all cache updates to keep
0162   /// it correct after this insertion.
0163   void insertInstructionTo(const Instruction *Inst, const BasicBlock *BB);
0164
0165   /// Inform safety info that we are planning to remove the instruction \p Inst
0166   /// from its block. It will make all cache updates to keep it correct after
0167   /// this removal.
0168   void removeInstruction(const Instruction *Inst);
0169 };
0170
0171 bool mayContainIrreducibleControl(const Function &F, const LoopInfo *LI);
0172
0173 struct MustBeExecutedContextExplorer;
0174
0175 /// Enum that allows us to spell out the direction.
0176 enum class ExplorationDirection {
0177   BACKWARD = 0,
0178   FORWARD = 1,
0179 };
0180
0181 /// Must be executed iterators visit stretches of instructions that are
0182 /// guaranteed to be executed together, potentially with other instruction
0183 /// executed in-between.
0184 ///
0185 /// Given the following code, and assuming all statements are single
0186 /// instructions which transfer execution to the successor (see
0187 /// isGuaranteedToTransferExecutionToSuccessor), there are two possible
0188 /// outcomes. If we start the iterator at A, B, or E, we will visit only A, B,
0189 /// and E. If we start at C or D, we will visit all instructions A-E.
0190 ///
0191 /// \code
0192 ///   A;
0193 ///   B;
0194 ///   if (...) {
0195 ///     C;
0196 ///     D;
0197 ///   }
0198 ///   E;
0199 /// \endcode
0200 ///
0201 ///
0202 /// Below is the example extneded with instructions F and G. Now we assume F
0203 /// might not transfer execution to it's successor G. As a result we get the
0204 /// following visit sets:
0205 ///
0206 /// Start Instruction   | Visit Set
0207 /// A                   | A, B,       E, F
0208 ///    B                | A, B,       E, F
0209 ///       C             | A, B, C, D, E, F
0210 ///          D          | A, B, C, D, E, F
0211 ///             E       | A, B,       E, F
0212 ///                F    | A, B,       E, F
0213 ///                   G | A, B,       E, F, G
0214 ///
0215 ///
0216 /// \code
0217 ///   A;
0218 ///   B;
0219 ///   if (...) {
0220 ///     C;
0221 ///     D;
0222 ///   }
0223 ///   E;
0224 ///   F;  // Might not transfer execution to its successor G.
0225 ///   G;
0226 /// \endcode
0227 ///
0228 ///
0229 /// A more complex example involving conditionals, loops, break, and continue
0230 /// is shown below. We again assume all instructions will transmit control to
0231 /// the successor and we assume we can prove the inner loop to be finite. We
0232 /// omit non-trivial branch conditions as the exploration is oblivious to them.
0233 /// Constant branches are assumed to be unconditional in the CFG. The resulting
0234 /// visist sets are shown in the table below.
0235 ///
0236 /// \code
0237 ///   A;
0238 ///   while (true) {
0239 ///     B;
0240 ///     if (...)
0241 ///       C;
0242 ///     if (...)
0243 ///       continue;
0244 ///     D;
0245 ///     if (...)
0246 ///       break;
0247 ///     do {
0248 ///       if (...)
0249 ///         continue;
0250 ///       E;
0251 ///     } while (...);
0252 ///     F;
0253 ///   }
0254 ///   G;
0255 /// \endcode
0256 ///
0257 /// Start Instruction    | Visit Set
0258 /// A                    | A, B
0259 ///    B                 | A, B
0260 ///       C              | A, B, C
0261 ///          D           | A, B,    D
0262 ///             E        | A, B,    D, E, F
0263 ///                F     | A, B,    D,    F
0264 ///                   G  | A, B,    D,       G
0265 ///
0266 ///
0267 /// Note that the examples show optimal visist sets but not necessarily the ones
0268 /// derived by the explorer depending on the available CFG analyses (see
0269 /// MustBeExecutedContextExplorer). Also note that we, depending on the options,
0270 /// the visit set can contain instructions from other functions.
0271 struct MustBeExecutedIterator {
0272   /// Type declarations that make his class an input iterator.
0273   ///{
0274   typedef const Instruction *value_type;
0275   typedef std::ptrdiff_t difference_type;
0276   typedef const Instruction **pointer;
0277   typedef const Instruction *&reference;
0278   typedef std::input_iterator_tag iterator_category;
0279   ///}
0280
0281   using ExplorerTy = MustBeExecutedContextExplorer;
0282
0283   MustBeExecutedIterator(const MustBeExecutedIterator &Other) = default;
0284
0285   MustBeExecutedIterator(MustBeExecutedIterator &&Other)
0286       : Visited(std::move(Other.Visited)), Explorer(Other.Explorer),
0287         CurInst(Other.CurInst), Head(Other.Head), Tail(Other.Tail) {}
0288
0289   MustBeExecutedIterator &operator=(MustBeExecutedIterator &&Other) {
0290     if (this != &Other) {
0291       std::swap(Visited, Other.Visited);
0292       std::swap(CurInst, Other.CurInst);
0293       std::swap(Head, Other.Head);
0294       std::swap(Tail, Other.Tail);
0295     }
0296     return *this;
0297   }
0298
0299   ~MustBeExecutedIterator() = default;
0300
0301   /// Pre- and post-increment operators.
0302   ///{
0303   MustBeExecutedIterator &operator++() {
0304     CurInst = advance();
0305     return *this;
0306   }
0307
0308   MustBeExecutedIterator operator++(int) {
0309     MustBeExecutedIterator tmp(*this);
0310     operator++();
0311     return tmp;
0312   }
0313   ///}
0314
0315   /// Equality and inequality operators. Note that we ignore the history here.
0316   ///{
0317   bool operator==(const MustBeExecutedIterator &Other) const {
0318     return CurInst == Other.CurInst && Head == Other.Head && Tail == Other.Tail;
0319   }
0320
0321   bool operator!=(const MustBeExecutedIterator &Other) const {
0322     return !(*this == Other);
0323   }
0324   ///}
0325
0326   /// Return the underlying instruction.
0327   const Instruction *&operator*() { return CurInst; }
0328   const Instruction *getCurrentInst() const { return CurInst; }
0329
0330   /// Return true if \p I was encountered by this iterator already.
0331   bool count(const Instruction *I) const {
0332     return Visited.count({I, ExplorationDirection::FORWARD}) ||
0333            Visited.count({I, ExplorationDirection::BACKWARD});
0334   }
0335
0336 private:
0337   using VisitedSetTy =
0338       DenseSet<PointerIntPair<const Instruction *, 1, ExplorationDirection>>;
0339
0340   /// Private constructors.
0341   MustBeExecutedIterator(ExplorerTy &Explorer, const Instruction *I);
0342
0343   /// Reset the iterator to its initial state pointing at \p I.
0344   void reset(const Instruction *I);
0345
0346   /// Reset the iterator to point at \p I, keep cached state.
0347   void resetInstruction(const Instruction *I);
0348
0349   /// Try to advance one of the underlying positions (Head or Tail).
0350   ///
0351   /// \return The next instruction in the must be executed context, or nullptr
0352   ///         if none was found.
0353   const Instruction *advance();
0354
0355   /// A set to track the visited instructions in order to deal with endless
0356   /// loops and recursion.
0357   VisitedSetTy Visited;
0358
0359   /// A reference to the explorer that created this iterator.
0360   ExplorerTy &Explorer;
0361
0362   /// The instruction we are currently exposing to the user. There is always an
0363   /// instruction that we know is executed with the given program point,
0364   /// initially the program point itself.
0365   const Instruction *CurInst;
0366
0367   /// Two positions that mark the program points where this iterator will look
0368   /// for the next instruction. Note that the current instruction is either the
0369   /// one pointed to by Head, Tail, or both.
0370   const Instruction *Head, *Tail;
0371
0372   friend struct MustBeExecutedContextExplorer;
0373 };
0374
0375 /// A "must be executed context" for a given program point PP is the set of
0376 /// instructions, potentially before and after PP, that are executed always when
0377 /// PP is reached. The MustBeExecutedContextExplorer an interface to explore
0378 /// "must be executed contexts" in a module through the use of
0379 /// MustBeExecutedIterator.
0380 ///
0381 /// The explorer exposes "must be executed iterators" that traverse the must be
0382 /// executed context. There is little information sharing between iterators as
0383 /// the expected use case involves few iterators for "far apart" instructions.
0384 /// If that changes, we should consider caching more intermediate results.
0385 struct MustBeExecutedContextExplorer {
0386
0387   /// In the description of the parameters we use PP to denote a program point
0388   /// for which the must be executed context is explored, or put differently,
0389   /// for which the MustBeExecutedIterator is created.
0390   ///
0391   /// \param ExploreInterBlock    Flag to indicate if instructions in blocks
0392   ///                             other than the parent of PP should be
0393   ///                             explored.
0394   /// \param ExploreCFGForward    Flag to indicate if instructions located after
0395   ///                             PP in the CFG, e.g., post-dominating PP,
0396   ///                             should be explored.
0397   /// \param ExploreCFGBackward   Flag to indicate if instructions located
0398   ///                             before PP in the CFG, e.g., dominating PP,
0399   ///                             should be explored.
0400   MustBeExecutedContextExplorer(
0401       bool ExploreInterBlock, bool ExploreCFGForward, bool ExploreCFGBackward,
0402       GetterTy<const LoopInfo> LIGetter =
0403           [](const Function &) { return nullptr; },
0404       GetterTy<const DominatorTree> DTGetter =
0405           [](const Function &) { return nullptr; },
0406       GetterTy<const PostDominatorTree> PDTGetter =
0407           [](const Function &) { return nullptr; })
0408       : ExploreInterBlock(ExploreInterBlock),
0409         ExploreCFGForward(ExploreCFGForward),
0410         ExploreCFGBackward(ExploreCFGBackward), LIGetter(LIGetter),
0411         DTGetter(DTGetter), PDTGetter(PDTGetter), EndIterator(*this, nullptr) {}
0412
0413   /// Iterator-based interface. \see MustBeExecutedIterator.
0414   ///{
0415   using iterator = MustBeExecutedIterator;
0416   using const_iterator = const MustBeExecutedIterator;
0417
0418   /// Return an iterator to explore the context around \p PP.
0419   iterator &begin(const Instruction *PP) {
0420     auto &It = InstructionIteratorMap[PP];
0421     if (!It)
0422       It.reset(new iterator(*this, PP));
0423     return *It;
0424   }
0425
0426   /// Return an iterator to explore the cached context around \p PP.
0427   const_iterator &begin(const Instruction *PP) const {
0428     return *InstructionIteratorMap.find(PP)->second;
0429   }
0430
0431   /// Return an universal end iterator.
0432   ///{
0433   iterator &end() { return EndIterator; }
0434   iterator &end(const Instruction *) { return EndIterator; }
0435
0436   const_iterator &end() const { return EndIterator; }
0437   const_iterator &end(const Instruction *) const { return EndIterator; }
0438   ///}
0439
0440   /// Return an iterator range to explore the context around \p PP.
0441   llvm::iterator_range<iterator> range(const Instruction *PP) {
0442     return llvm::make_range(begin(PP), end(PP));
0443   }
0444
0445   /// Return an iterator range to explore the cached context around \p PP.
0446   llvm::iterator_range<const_iterator> range(const Instruction *PP) const {
0447     return llvm::make_range(begin(PP), end(PP));
0448   }
0449   ///}
0450
0451   /// Check \p Pred on all instructions in the context.
0452   ///
0453   /// This method will evaluate \p Pred and return
0454   /// true if \p Pred holds in every instruction.
0455   bool checkForAllContext(const Instruction *PP,
0456                           function_ref<bool(const Instruction *)> Pred) {
0457     for (auto EIt = begin(PP), EEnd = end(PP); EIt != EEnd; ++EIt)
0458       if (!Pred(*EIt))
0459         return false;
0460     return true;
0461   }
0462
0463   /// Helper to look for \p I in the context of \p PP.
0464   ///
0465   /// The context is expanded until \p I was found or no more expansion is
0466   /// possible.
0467   ///
0468   /// \returns True, iff \p I was found.
0469   bool findInContextOf(const Instruction *I, const Instruction *PP) {
0470     auto EIt = begin(PP), EEnd = end(PP);
0471     return findInContextOf(I, EIt, EEnd);
0472   }
0473
0474   /// Helper to look for \p I in the context defined by \p EIt and \p EEnd.
0475   ///
0476   /// The context is expanded until \p I was found or no more expansion is
0477   /// possible.
0478   ///
0479   /// \returns True, iff \p I was found.
0480   bool findInContextOf(const Instruction *I, iterator &EIt, iterator &EEnd) {
0481     bool Found = EIt.count(I);
0482     while (!Found && EIt != EEnd)
0483       Found = (++EIt).getCurrentInst() == I;
0484     return Found;
0485   }
0486
0487   /// Return the next instruction that is guaranteed to be executed after \p PP.
0488   ///
0489   /// \param It              The iterator that is used to traverse the must be
0490   ///                        executed context.
0491   /// \param PP              The program point for which the next instruction
0492   ///                        that is guaranteed to execute is determined.
0493   const Instruction *
0494   getMustBeExecutedNextInstruction(MustBeExecutedIterator &It,
0495                                    const Instruction *PP);
0496   /// Return the previous instr. that is guaranteed to be executed before \p PP.
0497   ///
0498   /// \param It              The iterator that is used to traverse the must be
0499   ///                        executed context.
0500   /// \param PP              The program point for which the previous instr.
0501   ///                        that is guaranteed to execute is determined.
0502   const Instruction *
0503   getMustBeExecutedPrevInstruction(MustBeExecutedIterator &It,
0504                                    const Instruction *PP);
0505
0506   /// Find the next join point from \p InitBB in forward direction.
0507   const BasicBlock *findForwardJoinPoint(const BasicBlock *InitBB);
0508
0509   /// Find the next join point from \p InitBB in backward direction.
0510   const BasicBlock *findBackwardJoinPoint(const BasicBlock *InitBB);
0511
0512   /// Parameter that limit the performed exploration. See the constructor for
0513   /// their meaning.
0514   ///{
0515   const bool ExploreInterBlock;
0516   const bool ExploreCFGForward;
0517   const bool ExploreCFGBackward;
0518   ///}
0519
0520 private:
0521   /// Getters for common CFG analyses: LoopInfo, DominatorTree, and
0522   /// PostDominatorTree.
0523   ///{
0524   GetterTy<const LoopInfo> LIGetter;
0525   GetterTy<const DominatorTree> DTGetter;
0526   GetterTy<const PostDominatorTree> PDTGetter;
0527   ///}
0528
0529   /// Map to cache isGuaranteedToTransferExecutionToSuccessor results.
0530   DenseMap<const BasicBlock *, std::optional<bool>> BlockTransferMap;
0531
0532   /// Map to cache containsIrreducibleCFG results.
0533   DenseMap<const Function *, std::optional<bool>> IrreducibleControlMap;
0534
0535   /// Map from instructions to associated must be executed iterators.
0536   DenseMap<const Instruction *, std::unique_ptr<MustBeExecutedIterator>>
0537       InstructionIteratorMap;
0538
0539   /// A unique end iterator.
0540   MustBeExecutedIterator EndIterator;
0541 };
0542
0543 class MustExecutePrinterPass : public PassInfoMixin<MustExecutePrinterPass> {
0544   raw_ostream &OS;
0545
0546 public:
0547   MustExecutePrinterPass(raw_ostream &OS) : OS(OS) {}
0548   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
0549   static bool isRequired() { return true; }
0550 };
0551
0552 class MustBeExecutedContextPrinterPass
0553     : public PassInfoMixin<MustBeExecutedContextPrinterPass> {
0554   raw_ostream &OS;
0555
0556 public:
0557   MustBeExecutedContextPrinterPass(raw_ostream &OS) : OS(OS) {}
0558   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
0559   static bool isRequired() { return true; }
0560 };
0561
0562 } // namespace llvm
0563
0564 #endif