Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-05-10 08:43:05

0001 //===- GenericUniformityImpl.h -----------------------*- C++ -*------------===//
0002 //
0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
0004 // See https://llvm.org/LICENSE.txt for license information.
0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0006 //
0007 //===----------------------------------------------------------------------===//
0008 //
0009 // This template implementation resides in a separate file so that it
0010 // does not get injected into every .cpp file that includes the
0011 // generic header.
0012 //
0013 // DO NOT INCLUDE THIS FILE WHEN MERELY USING UNIFORMITYINFO.
0014 //
0015 // This file should only be included by files that implement a
0016 // specialization of the relvant templates. Currently these are:
0017 // - UniformityAnalysis.cpp
0018 //
0019 // Note: The DEBUG_TYPE macro should be defined before using this
0020 // file so that any use of LLVM_DEBUG is associated with the
0021 // including file rather than this file.
0022 //
0023 //===----------------------------------------------------------------------===//
0024 ///
0025 /// \file
0026 /// \brief Implementation of uniformity analysis.
0027 ///
0028 /// The algorithm is a fixed point iteration that starts with the assumption
0029 /// that all control flow and all values are uniform. Starting from sources of
0030 /// divergence (whose discovery must be implemented by a CFG- or even
0031 /// target-specific derived class), divergence of values is propagated from
0032 /// definition to uses in a straight-forward way. The main complexity lies in
0033 /// the propagation of the impact of divergent control flow on the divergence of
0034 /// values (sync dependencies).
0035 ///
0036 /// NOTE: In general, no interface exists for a transform to update
0037 /// (Machine)UniformityInfo. Additionally, (Machine)CycleAnalysis is a
0038 /// transitive dependence, but it also does not provide an interface for
0039 /// updating itself. Given that, transforms should not preserve uniformity in
0040 /// their getAnalysisUsage() callback.
0041 ///
0042 //===----------------------------------------------------------------------===//
0043 
0044 #ifndef LLVM_ADT_GENERICUNIFORMITYIMPL_H
0045 #define LLVM_ADT_GENERICUNIFORMITYIMPL_H
0046 
0047 #include "llvm/ADT/GenericUniformityInfo.h"
0048 
0049 #include "llvm/ADT/DenseSet.h"
0050 #include "llvm/ADT/STLExtras.h"
0051 #include "llvm/ADT/SmallPtrSet.h"
0052 #include "llvm/ADT/SparseBitVector.h"
0053 #include "llvm/ADT/StringExtras.h"
0054 #include "llvm/Support/raw_ostream.h"
0055 
0056 #define DEBUG_TYPE "uniformity"
0057 
0058 namespace llvm {
0059 
0060 /// Construct a specially modified post-order traversal of cycles.
0061 ///
0062 /// The ModifiedPO is contructed using a virtually modified CFG as follows:
0063 ///
0064 /// 1. The successors of pre-entry nodes (predecessors of an cycle
0065 ///    entry that are outside the cycle) are replaced by the
0066 ///    successors of the successors of the header.
0067 /// 2. Successors of the cycle header are replaced by the exit blocks
0068 ///    of the cycle.
0069 ///
0070 /// Effectively, we produce a depth-first numbering with the following
0071 /// properties:
0072 ///
0073 /// 1. Nodes after a cycle are numbered earlier than the cycle header.
0074 /// 2. The header is numbered earlier than the nodes in the cycle.
0075 /// 3. The numbering of the nodes within the cycle forms an interval
0076 ///    starting with the header.
0077 ///
0078 /// Effectively, the virtual modification arranges the nodes in a
0079 /// cycle as a DAG with the header as the sole leaf, and successors of
0080 /// the header as the roots. A reverse traversal of this numbering has
0081 /// the following invariant on the unmodified original CFG:
0082 ///
0083 ///    Each node is visited after all its predecessors, except if that
0084 ///    predecessor is the cycle header.
0085 ///
0086 template <typename ContextT> class ModifiedPostOrder {
0087 public:
0088   using BlockT = typename ContextT::BlockT;
0089   using FunctionT = typename ContextT::FunctionT;
0090   using DominatorTreeT = typename ContextT::DominatorTreeT;
0091 
0092   using CycleInfoT = GenericCycleInfo<ContextT>;
0093   using CycleT = typename CycleInfoT::CycleT;
0094   using const_iterator = typename std::vector<BlockT *>::const_iterator;
0095 
0096   ModifiedPostOrder(const ContextT &C) : Context(C) {}
0097 
0098   bool empty() const { return m_order.empty(); }
0099   size_t size() const { return m_order.size(); }
0100 
0101   void clear() { m_order.clear(); }
0102   void compute(const CycleInfoT &CI);
0103 
0104   unsigned count(BlockT *BB) const { return POIndex.count(BB); }
0105   const BlockT *operator[](size_t idx) const { return m_order[idx]; }
0106 
0107   void appendBlock(const BlockT &BB, bool isReducibleCycleHeader = false) {
0108     POIndex[&BB] = m_order.size();
0109     m_order.push_back(&BB);
0110     LLVM_DEBUG(dbgs() << "ModifiedPO(" << POIndex[&BB]
0111                       << "): " << Context.print(&BB) << "\n");
0112     if (isReducibleCycleHeader)
0113       ReducibleCycleHeaders.insert(&BB);
0114   }
0115 
0116   unsigned getIndex(const BlockT *BB) const {
0117     assert(POIndex.count(BB));
0118     return POIndex.lookup(BB);
0119   }
0120 
0121   bool isReducibleCycleHeader(const BlockT *BB) const {
0122     return ReducibleCycleHeaders.contains(BB);
0123   }
0124 
0125 private:
0126   SmallVector<const BlockT *> m_order;
0127   DenseMap<const BlockT *, unsigned> POIndex;
0128   SmallPtrSet<const BlockT *, 32> ReducibleCycleHeaders;
0129   const ContextT &Context;
0130 
0131   void computeCyclePO(const CycleInfoT &CI, const CycleT *Cycle,
0132                       SmallPtrSetImpl<const BlockT *> &Finalized);
0133 
0134   void computeStackPO(SmallVectorImpl<const BlockT *> &Stack,
0135                       const CycleInfoT &CI, const CycleT *Cycle,
0136                       SmallPtrSetImpl<const BlockT *> &Finalized);
0137 };
0138 
0139 template <typename> class DivergencePropagator;
0140 
0141 /// \class GenericSyncDependenceAnalysis
0142 ///
0143 /// \brief Locate join blocks for disjoint paths starting at a divergent branch.
0144 ///
0145 /// An analysis per divergent branch that returns the set of basic
0146 /// blocks whose phi nodes become divergent due to divergent control.
0147 /// These are the blocks that are reachable by two disjoint paths from
0148 /// the branch, or cycle exits reachable along a path that is disjoint
0149 /// from a path to the cycle latch.
0150 
0151 // --- Above line is not a doxygen comment; intentionally left blank ---
0152 //
0153 // Originally implemented in SyncDependenceAnalysis.cpp for DivergenceAnalysis.
0154 //
0155 // The SyncDependenceAnalysis is used in the UniformityAnalysis to model
0156 // control-induced divergence in phi nodes.
0157 //
0158 // -- Reference --
0159 // The algorithm is an extension of Section 5 of
0160 //
0161 //   An abstract interpretation for SPMD divergence
0162 //       on reducible control flow graphs.
0163 //   Julian Rosemann, Simon Moll and Sebastian Hack
0164 //   POPL '21
0165 //
0166 //
0167 // -- Sync dependence --
0168 // Sync dependence characterizes the control flow aspect of the
0169 // propagation of branch divergence. For example,
0170 //
0171 //   %cond = icmp slt i32 %tid, 10
0172 //   br i1 %cond, label %then, label %else
0173 // then:
0174 //   br label %merge
0175 // else:
0176 //   br label %merge
0177 // merge:
0178 //   %a = phi i32 [ 0, %then ], [ 1, %else ]
0179 //
0180 // Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
0181 // because %tid is not on its use-def chains, %a is sync dependent on %tid
0182 // because the branch "br i1 %cond" depends on %tid and affects which value %a
0183 // is assigned to.
0184 //
0185 //
0186 // -- Reduction to SSA construction --
0187 // There are two disjoint paths from A to X, if a certain variant of SSA
0188 // construction places a phi node in X under the following set-up scheme.
0189 //
0190 // This variant of SSA construction ignores incoming undef values.
0191 // That is paths from the entry without a definition do not result in
0192 // phi nodes.
0193 //
0194 //       entry
0195 //     /      \
0196 //    A        \
0197 //  /   \       Y
0198 // B     C     /
0199 //  \   /  \  /
0200 //    D     E
0201 //     \   /
0202 //       F
0203 //
0204 // Assume that A contains a divergent branch. We are interested
0205 // in the set of all blocks where each block is reachable from A
0206 // via two disjoint paths. This would be the set {D, F} in this
0207 // case.
0208 // To generally reduce this query to SSA construction we introduce
0209 // a virtual variable x and assign to x different values in each
0210 // successor block of A.
0211 //
0212 //           entry
0213 //         /      \
0214 //        A        \
0215 //      /   \       Y
0216 // x = 0   x = 1   /
0217 //      \  /   \  /
0218 //        D     E
0219 //         \   /
0220 //           F
0221 //
0222 // Our flavor of SSA construction for x will construct the following
0223 //
0224 //            entry
0225 //          /      \
0226 //         A        \
0227 //       /   \       Y
0228 // x0 = 0   x1 = 1  /
0229 //       \   /   \ /
0230 //     x2 = phi   E
0231 //         \     /
0232 //         x3 = phi
0233 //
0234 // The blocks D and F contain phi nodes and are thus each reachable
0235 // by two disjoins paths from A.
0236 //
0237 // -- Remarks --
0238 // * In case of cycle exits we need to check for temporal divergence.
0239 //   To this end, we check whether the definition of x differs between the
0240 //   cycle exit and the cycle header (_after_ SSA construction).
0241 //
0242 // * In the presence of irreducible control flow, the fixed point is
0243 //   reached only after multiple iterations. This is because labels
0244 //   reaching the header of a cycle must be repropagated through the
0245 //   cycle. This is true even in a reducible cycle, since the labels
0246 //   may have been produced by a nested irreducible cycle.
0247 //
0248 // * Note that SyncDependenceAnalysis is not concerned with the points
0249 //   of convergence in an irreducible cycle. It's only purpose is to
0250 //   identify join blocks. The "diverged entry" criterion is
0251 //   separately applied on join blocks to determine if an entire
0252 //   irreducible cycle is assumed to be divergent.
0253 //
0254 // * Relevant related work:
0255 //     A simple algorithm for global data flow analysis problems.
0256 //     Matthew S. Hecht and Jeffrey D. Ullman.
0257 //     SIAM Journal on Computing, 4(4):519–532, December 1975.
0258 //
0259 template <typename ContextT> class GenericSyncDependenceAnalysis {
0260 public:
0261   using BlockT = typename ContextT::BlockT;
0262   using DominatorTreeT = typename ContextT::DominatorTreeT;
0263   using FunctionT = typename ContextT::FunctionT;
0264   using ValueRefT = typename ContextT::ValueRefT;
0265   using InstructionT = typename ContextT::InstructionT;
0266 
0267   using CycleInfoT = GenericCycleInfo<ContextT>;
0268   using CycleT = typename CycleInfoT::CycleT;
0269 
0270   using ConstBlockSet = SmallPtrSet<const BlockT *, 4>;
0271   using ModifiedPO = ModifiedPostOrder<ContextT>;
0272 
0273   // * if BlockLabels[B] == C then C is the dominating definition at
0274   //   block B
0275   // * if BlockLabels[B] == nullptr then we haven't seen B yet
0276   // * if BlockLabels[B] == B then:
0277   //   - B is a join point of disjoint paths from X, or,
0278   //   - B is an immediate successor of X (initial value), or,
0279   //   - B is X
0280   using BlockLabelMap = DenseMap<const BlockT *, const BlockT *>;
0281 
0282   /// Information discovered by the sync dependence analysis for each
0283   /// divergent branch.
0284   struct DivergenceDescriptor {
0285     // Join points of diverged paths.
0286     ConstBlockSet JoinDivBlocks;
0287     // Divergent cycle exits
0288     ConstBlockSet CycleDivBlocks;
0289     // Labels assigned to blocks on diverged paths.
0290     BlockLabelMap BlockLabels;
0291   };
0292 
0293   using DivergencePropagatorT = DivergencePropagator<ContextT>;
0294 
0295   GenericSyncDependenceAnalysis(const ContextT &Context,
0296                                 const DominatorTreeT &DT, const CycleInfoT &CI);
0297 
0298   /// \brief Computes divergent join points and cycle exits caused by branch
0299   /// divergence in \p Term.
0300   ///
0301   /// This returns a pair of sets:
0302   /// * The set of blocks which are reachable by disjoint paths from
0303   ///   \p Term.
0304   /// * The set also contains cycle exits if there two disjoint paths:
0305   ///   one from \p Term to the cycle exit and another from \p Term to
0306   ///   the cycle header.
0307   const DivergenceDescriptor &getJoinBlocks(const BlockT *DivTermBlock);
0308 
0309 private:
0310   static DivergenceDescriptor EmptyDivergenceDesc;
0311 
0312   ModifiedPO CyclePO;
0313 
0314   const DominatorTreeT &DT;
0315   const CycleInfoT &CI;
0316 
0317   DenseMap<const BlockT *, std::unique_ptr<DivergenceDescriptor>>
0318       CachedControlDivDescs;
0319 };
0320 
0321 /// \brief Analysis that identifies uniform values in a data-parallel
0322 /// execution.
0323 ///
0324 /// This analysis propagates divergence in a data-parallel context
0325 /// from sources of divergence to all users. It can be instantiated
0326 /// for an IR that provides a suitable SSAContext.
0327 template <typename ContextT> class GenericUniformityAnalysisImpl {
0328 public:
0329   using BlockT = typename ContextT::BlockT;
0330   using FunctionT = typename ContextT::FunctionT;
0331   using ValueRefT = typename ContextT::ValueRefT;
0332   using ConstValueRefT = typename ContextT::ConstValueRefT;
0333   using UseT = typename ContextT::UseT;
0334   using InstructionT = typename ContextT::InstructionT;
0335   using DominatorTreeT = typename ContextT::DominatorTreeT;
0336 
0337   using CycleInfoT = GenericCycleInfo<ContextT>;
0338   using CycleT = typename CycleInfoT::CycleT;
0339 
0340   using SyncDependenceAnalysisT = GenericSyncDependenceAnalysis<ContextT>;
0341   using DivergenceDescriptorT =
0342       typename SyncDependenceAnalysisT::DivergenceDescriptor;
0343   using BlockLabelMapT = typename SyncDependenceAnalysisT::BlockLabelMap;
0344 
0345   GenericUniformityAnalysisImpl(const DominatorTreeT &DT, const CycleInfoT &CI,
0346                                 const TargetTransformInfo *TTI)
0347       : Context(CI.getSSAContext()), F(*Context.getFunction()), CI(CI),
0348         TTI(TTI), DT(DT), SDA(Context, DT, CI) {}
0349 
0350   void initialize();
0351 
0352   const FunctionT &getFunction() const { return F; }
0353 
0354   /// \brief Mark \p UniVal as a value that is always uniform.
0355   void addUniformOverride(const InstructionT &Instr);
0356 
0357   /// \brief Examine \p I for divergent outputs and add to the worklist.
0358   void markDivergent(const InstructionT &I);
0359 
0360   /// \brief Mark \p DivVal as a divergent value.
0361   /// \returns Whether the tracked divergence state of \p DivVal changed.
0362   bool markDivergent(ConstValueRefT DivVal);
0363 
0364   /// \brief Mark outputs of \p Instr as divergent.
0365   /// \returns Whether the tracked divergence state of any output has changed.
0366   bool markDefsDivergent(const InstructionT &Instr);
0367 
0368   /// \brief Propagate divergence to all instructions in the region.
0369   /// Divergence is seeded by calls to \p markDivergent.
0370   void compute();
0371 
0372   /// \brief Whether any value was marked or analyzed to be divergent.
0373   bool hasDivergence() const { return !DivergentValues.empty(); }
0374 
0375   /// \brief Whether \p Val will always return a uniform value regardless of its
0376   /// operands
0377   bool isAlwaysUniform(const InstructionT &Instr) const;
0378 
0379   bool hasDivergentDefs(const InstructionT &I) const;
0380 
0381   bool isDivergent(const InstructionT &I) const {
0382     if (I.isTerminator()) {
0383       return DivergentTermBlocks.contains(I.getParent());
0384     }
0385     return hasDivergentDefs(I);
0386   };
0387 
0388   /// \brief Whether \p Val is divergent at its definition.
0389   bool isDivergent(ConstValueRefT V) const { return DivergentValues.count(V); }
0390 
0391   bool isDivergentUse(const UseT &U) const;
0392 
0393   bool hasDivergentTerminator(const BlockT &B) const {
0394     return DivergentTermBlocks.contains(&B);
0395   }
0396 
0397   void print(raw_ostream &out) const;
0398 
0399 protected:
0400   /// \brief Value/block pair representing a single phi input.
0401   struct PhiInput {
0402     ConstValueRefT value;
0403     BlockT *predBlock;
0404 
0405     PhiInput(ConstValueRefT value, BlockT *predBlock)
0406         : value(value), predBlock(predBlock) {}
0407   };
0408 
0409   const ContextT &Context;
0410   const FunctionT &F;
0411   const CycleInfoT &CI;
0412   const TargetTransformInfo *TTI = nullptr;
0413 
0414   // Detected/marked divergent values.
0415   DenseSet<ConstValueRefT> DivergentValues;
0416   SmallPtrSet<const BlockT *, 32> DivergentTermBlocks;
0417 
0418   // Internal worklist for divergence propagation.
0419   std::vector<const InstructionT *> Worklist;
0420 
0421   /// \brief Mark \p Term as divergent and push all Instructions that become
0422   /// divergent as a result on the worklist.
0423   void analyzeControlDivergence(const InstructionT &Term);
0424 
0425 private:
0426   const DominatorTreeT &DT;
0427 
0428   // Recognized cycles with divergent exits.
0429   SmallPtrSet<const CycleT *, 16> DivergentExitCycles;
0430 
0431   // Cycles assumed to be divergent.
0432   //
0433   // We don't use a set here because every insertion needs an explicit
0434   // traversal of all existing members.
0435   SmallVector<const CycleT *> AssumedDivergent;
0436 
0437   // The SDA links divergent branches to divergent control-flow joins.
0438   SyncDependenceAnalysisT SDA;
0439 
0440   // Set of known-uniform values.
0441   SmallPtrSet<const InstructionT *, 32> UniformOverrides;
0442 
0443   /// \brief Mark all nodes in \p JoinBlock as divergent and push them on
0444   /// the worklist.
0445   void taintAndPushAllDefs(const BlockT &JoinBlock);
0446 
0447   /// \brief Mark all phi nodes in \p JoinBlock as divergent and push them on
0448   /// the worklist.
0449   void taintAndPushPhiNodes(const BlockT &JoinBlock);
0450 
0451   /// \brief Identify all Instructions that become divergent because \p DivExit
0452   /// is a divergent cycle exit of \p DivCycle. Mark those instructions as
0453   /// divergent and push them on the worklist.
0454   void propagateCycleExitDivergence(const BlockT &DivExit,
0455                                     const CycleT &DivCycle);
0456 
0457   /// Mark as divergent all external uses of values defined in \p DefCycle.
0458   void analyzeCycleExitDivergence(const CycleT &DefCycle);
0459 
0460   /// \brief Mark as divergent all uses of \p I that are outside \p DefCycle.
0461   void propagateTemporalDivergence(const InstructionT &I,
0462                                    const CycleT &DefCycle);
0463 
0464   /// \brief Push all users of \p Val (in the region) to the worklist.
0465   void pushUsers(const InstructionT &I);
0466   void pushUsers(ConstValueRefT V);
0467 
0468   bool usesValueFromCycle(const InstructionT &I, const CycleT &DefCycle) const;
0469 
0470   /// \brief Whether \p Def is divergent when read in \p ObservingBlock.
0471   bool isTemporalDivergent(const BlockT &ObservingBlock,
0472                            const InstructionT &Def) const;
0473 };
0474 
0475 template <typename ImplT>
0476 void GenericUniformityAnalysisImplDeleter<ImplT>::operator()(ImplT *Impl) {
0477   delete Impl;
0478 }
0479 
0480 /// Compute divergence starting with a divergent branch.
0481 template <typename ContextT> class DivergencePropagator {
0482 public:
0483   using BlockT = typename ContextT::BlockT;
0484   using DominatorTreeT = typename ContextT::DominatorTreeT;
0485   using FunctionT = typename ContextT::FunctionT;
0486   using ValueRefT = typename ContextT::ValueRefT;
0487 
0488   using CycleInfoT = GenericCycleInfo<ContextT>;
0489   using CycleT = typename CycleInfoT::CycleT;
0490 
0491   using ModifiedPO = ModifiedPostOrder<ContextT>;
0492   using SyncDependenceAnalysisT = GenericSyncDependenceAnalysis<ContextT>;
0493   using DivergenceDescriptorT =
0494       typename SyncDependenceAnalysisT::DivergenceDescriptor;
0495   using BlockLabelMapT = typename SyncDependenceAnalysisT::BlockLabelMap;
0496 
0497   const ModifiedPO &CyclePOT;
0498   const DominatorTreeT &DT;
0499   const CycleInfoT &CI;
0500   const BlockT &DivTermBlock;
0501   const ContextT &Context;
0502 
0503   // Track blocks that receive a new label. Every time we relabel a
0504   // cycle header, we another pass over the modified post-order in
0505   // order to propagate the header label. The bit vector also allows
0506   // us to skip labels that have not changed.
0507   SparseBitVector<> FreshLabels;
0508 
0509   // divergent join and cycle exit descriptor.
0510   std::unique_ptr<DivergenceDescriptorT> DivDesc;
0511   BlockLabelMapT &BlockLabels;
0512 
0513   DivergencePropagator(const ModifiedPO &CyclePOT, const DominatorTreeT &DT,
0514                        const CycleInfoT &CI, const BlockT &DivTermBlock)
0515       : CyclePOT(CyclePOT), DT(DT), CI(CI), DivTermBlock(DivTermBlock),
0516         Context(CI.getSSAContext()), DivDesc(new DivergenceDescriptorT),
0517         BlockLabels(DivDesc->BlockLabels) {}
0518 
0519   void printDefs(raw_ostream &Out) {
0520     Out << "Propagator::BlockLabels {\n";
0521     for (int BlockIdx = (int)CyclePOT.size() - 1; BlockIdx >= 0; --BlockIdx) {
0522       const auto *Block = CyclePOT[BlockIdx];
0523       const auto *Label = BlockLabels[Block];
0524       Out << Context.print(Block) << "(" << BlockIdx << ") : ";
0525       if (!Label) {
0526         Out << "<null>\n";
0527       } else {
0528         Out << Context.print(Label) << "\n";
0529       }
0530     }
0531     Out << "}\n";
0532   }
0533 
0534   // Push a definition (\p PushedLabel) to \p SuccBlock and return whether this
0535   // causes a divergent join.
0536   bool computeJoin(const BlockT &SuccBlock, const BlockT &PushedLabel) {
0537     const auto *OldLabel = BlockLabels[&SuccBlock];
0538 
0539     LLVM_DEBUG(dbgs() << "labeling " << Context.print(&SuccBlock) << ":\n"
0540                       << "\tpushed label: " << Context.print(&PushedLabel)
0541                       << "\n"
0542                       << "\told label: " << Context.print(OldLabel) << "\n");
0543 
0544     // Early exit if there is no change in the label.
0545     if (OldLabel == &PushedLabel)
0546       return false;
0547 
0548     if (OldLabel != &SuccBlock) {
0549       auto SuccIdx = CyclePOT.getIndex(&SuccBlock);
0550       // Assigning a new label, mark this in FreshLabels.
0551       LLVM_DEBUG(dbgs() << "\tfresh label: " << SuccIdx << "\n");
0552       FreshLabels.set(SuccIdx);
0553     }
0554 
0555     // This is not a join if the succ was previously unlabeled.
0556     if (!OldLabel) {
0557       LLVM_DEBUG(dbgs() << "\tnew label: " << Context.print(&PushedLabel)
0558                         << "\n");
0559       BlockLabels[&SuccBlock] = &PushedLabel;
0560       return false;
0561     }
0562 
0563     // This is a new join. Label the join block as itself, and not as
0564     // the pushed label.
0565     LLVM_DEBUG(dbgs() << "\tnew label: " << Context.print(&SuccBlock) << "\n");
0566     BlockLabels[&SuccBlock] = &SuccBlock;
0567 
0568     return true;
0569   }
0570 
0571   // visiting a virtual cycle exit edge from the cycle header --> temporal
0572   // divergence on join
0573   bool visitCycleExitEdge(const BlockT &ExitBlock, const BlockT &Label) {
0574     if (!computeJoin(ExitBlock, Label))
0575       return false;
0576 
0577     // Identified a divergent cycle exit
0578     DivDesc->CycleDivBlocks.insert(&ExitBlock);
0579     LLVM_DEBUG(dbgs() << "\tDivergent cycle exit: " << Context.print(&ExitBlock)
0580                       << "\n");
0581     return true;
0582   }
0583 
0584   // process \p SuccBlock with reaching definition \p Label
0585   bool visitEdge(const BlockT &SuccBlock, const BlockT &Label) {
0586     if (!computeJoin(SuccBlock, Label))
0587       return false;
0588 
0589     // Divergent, disjoint paths join.
0590     DivDesc->JoinDivBlocks.insert(&SuccBlock);
0591     LLVM_DEBUG(dbgs() << "\tDivergent join: " << Context.print(&SuccBlock)
0592                       << "\n");
0593     return true;
0594   }
0595 
0596   std::unique_ptr<DivergenceDescriptorT> computeJoinPoints() {
0597     assert(DivDesc);
0598 
0599     LLVM_DEBUG(dbgs() << "SDA:computeJoinPoints: "
0600                       << Context.print(&DivTermBlock) << "\n");
0601 
0602     // Early stopping criterion
0603     int FloorIdx = CyclePOT.size() - 1;
0604     const BlockT *FloorLabel = nullptr;
0605     int DivTermIdx = CyclePOT.getIndex(&DivTermBlock);
0606 
0607     // Bootstrap with branch targets
0608     auto const *DivTermCycle = CI.getCycle(&DivTermBlock);
0609     for (const auto *SuccBlock : successors(&DivTermBlock)) {
0610       if (DivTermCycle && !DivTermCycle->contains(SuccBlock)) {
0611         // If DivTerm exits the cycle immediately, computeJoin() might
0612         // not reach SuccBlock with a different label. We need to
0613         // check for this exit now.
0614         DivDesc->CycleDivBlocks.insert(SuccBlock);
0615         LLVM_DEBUG(dbgs() << "\tImmediate divergent cycle exit: "
0616                           << Context.print(SuccBlock) << "\n");
0617       }
0618       auto SuccIdx = CyclePOT.getIndex(SuccBlock);
0619       visitEdge(*SuccBlock, *SuccBlock);
0620       FloorIdx = std::min<int>(FloorIdx, SuccIdx);
0621     }
0622 
0623     while (true) {
0624       auto BlockIdx = FreshLabels.find_last();
0625       if (BlockIdx == -1 || BlockIdx < FloorIdx)
0626         break;
0627 
0628       LLVM_DEBUG(dbgs() << "Current labels:\n"; printDefs(dbgs()));
0629 
0630       FreshLabels.reset(BlockIdx);
0631       if (BlockIdx == DivTermIdx) {
0632         LLVM_DEBUG(dbgs() << "Skipping DivTermBlock\n");
0633         continue;
0634       }
0635 
0636       const auto *Block = CyclePOT[BlockIdx];
0637       LLVM_DEBUG(dbgs() << "visiting " << Context.print(Block) << " at index "
0638                         << BlockIdx << "\n");
0639 
0640       const auto *Label = BlockLabels[Block];
0641       assert(Label);
0642 
0643       bool CausedJoin = false;
0644       int LoweredFloorIdx = FloorIdx;
0645 
0646       // If the current block is the header of a reducible cycle that
0647       // contains the divergent branch, then the label should be
0648       // propagated to the cycle exits. Such a header is the "last
0649       // possible join" of any disjoint paths within this cycle. This
0650       // prevents detection of spurious joins at the entries of any
0651       // irreducible child cycles.
0652       //
0653       // This conclusion about the header is true for any choice of DFS:
0654       //
0655       //   If some DFS has a reducible cycle C with header H, then for
0656       //   any other DFS, H is the header of a cycle C' that is a
0657       //   superset of C. For a divergent branch inside the subgraph
0658       //   C, any join node inside C is either H, or some node
0659       //   encountered without passing through H.
0660       //
0661       auto getReducibleParent = [&](const BlockT *Block) -> const CycleT * {
0662         if (!CyclePOT.isReducibleCycleHeader(Block))
0663           return nullptr;
0664         const auto *BlockCycle = CI.getCycle(Block);
0665         if (BlockCycle->contains(&DivTermBlock))
0666           return BlockCycle;
0667         return nullptr;
0668       };
0669 
0670       if (const auto *BlockCycle = getReducibleParent(Block)) {
0671         SmallVector<BlockT *, 4> BlockCycleExits;
0672         BlockCycle->getExitBlocks(BlockCycleExits);
0673         for (auto *BlockCycleExit : BlockCycleExits) {
0674           CausedJoin |= visitCycleExitEdge(*BlockCycleExit, *Label);
0675           LoweredFloorIdx =
0676               std::min<int>(LoweredFloorIdx, CyclePOT.getIndex(BlockCycleExit));
0677         }
0678       } else {
0679         for (const auto *SuccBlock : successors(Block)) {
0680           CausedJoin |= visitEdge(*SuccBlock, *Label);
0681           LoweredFloorIdx =
0682               std::min<int>(LoweredFloorIdx, CyclePOT.getIndex(SuccBlock));
0683         }
0684       }
0685 
0686       // Floor update
0687       if (CausedJoin) {
0688         // 1. Different labels pushed to successors
0689         FloorIdx = LoweredFloorIdx;
0690       } else if (FloorLabel != Label) {
0691         // 2. No join caused BUT we pushed a label that is different than the
0692         // last pushed label
0693         FloorIdx = LoweredFloorIdx;
0694         FloorLabel = Label;
0695       }
0696     }
0697 
0698     LLVM_DEBUG(dbgs() << "Final labeling:\n"; printDefs(dbgs()));
0699 
0700     // Check every cycle containing DivTermBlock for exit divergence.
0701     // A cycle has exit divergence if the label of an exit block does
0702     // not match the label of its header.
0703     for (const auto *Cycle = CI.getCycle(&DivTermBlock); Cycle;
0704          Cycle = Cycle->getParentCycle()) {
0705       if (Cycle->isReducible()) {
0706         // The exit divergence of a reducible cycle is recorded while
0707         // propagating labels.
0708         continue;
0709       }
0710       SmallVector<BlockT *> Exits;
0711       Cycle->getExitBlocks(Exits);
0712       auto *Header = Cycle->getHeader();
0713       auto *HeaderLabel = BlockLabels[Header];
0714       for (const auto *Exit : Exits) {
0715         if (BlockLabels[Exit] != HeaderLabel) {
0716           // Identified a divergent cycle exit
0717           DivDesc->CycleDivBlocks.insert(Exit);
0718           LLVM_DEBUG(dbgs() << "\tDivergent cycle exit: " << Context.print(Exit)
0719                             << "\n");
0720         }
0721       }
0722     }
0723 
0724     return std::move(DivDesc);
0725   }
0726 };
0727 
0728 template <typename ContextT>
0729 typename llvm::GenericSyncDependenceAnalysis<ContextT>::DivergenceDescriptor
0730     llvm::GenericSyncDependenceAnalysis<ContextT>::EmptyDivergenceDesc;
0731 
0732 template <typename ContextT>
0733 llvm::GenericSyncDependenceAnalysis<ContextT>::GenericSyncDependenceAnalysis(
0734     const ContextT &Context, const DominatorTreeT &DT, const CycleInfoT &CI)
0735     : CyclePO(Context), DT(DT), CI(CI) {
0736   CyclePO.compute(CI);
0737 }
0738 
0739 template <typename ContextT>
0740 auto llvm::GenericSyncDependenceAnalysis<ContextT>::getJoinBlocks(
0741     const BlockT *DivTermBlock) -> const DivergenceDescriptor & {
0742   // trivial case
0743   if (succ_size(DivTermBlock) <= 1) {
0744     return EmptyDivergenceDesc;
0745   }
0746 
0747   // already available in cache?
0748   auto ItCached = CachedControlDivDescs.find(DivTermBlock);
0749   if (ItCached != CachedControlDivDescs.end())
0750     return *ItCached->second;
0751 
0752   // compute all join points
0753   DivergencePropagatorT Propagator(CyclePO, DT, CI, *DivTermBlock);
0754   auto DivDesc = Propagator.computeJoinPoints();
0755 
0756   auto printBlockSet = [&](ConstBlockSet &Blocks) {
0757     return Printable([&](raw_ostream &Out) {
0758       Out << "[";
0759       ListSeparator LS;
0760       for (const auto *BB : Blocks) {
0761         Out << LS << CI.getSSAContext().print(BB);
0762       }
0763       Out << "]\n";
0764     });
0765   };
0766 
0767   LLVM_DEBUG(
0768       dbgs() << "\nResult (" << CI.getSSAContext().print(DivTermBlock)
0769              << "):\n  JoinDivBlocks: " << printBlockSet(DivDesc->JoinDivBlocks)
0770              << "  CycleDivBlocks: " << printBlockSet(DivDesc->CycleDivBlocks)
0771              << "\n");
0772   (void)printBlockSet;
0773 
0774   auto ItInserted =
0775       CachedControlDivDescs.try_emplace(DivTermBlock, std::move(DivDesc));
0776   assert(ItInserted.second);
0777   return *ItInserted.first->second;
0778 }
0779 
0780 template <typename ContextT>
0781 void GenericUniformityAnalysisImpl<ContextT>::markDivergent(
0782     const InstructionT &I) {
0783   if (isAlwaysUniform(I))
0784     return;
0785   bool Marked = false;
0786   if (I.isTerminator()) {
0787     Marked = DivergentTermBlocks.insert(I.getParent()).second;
0788     if (Marked) {
0789       LLVM_DEBUG(dbgs() << "marked divergent term block: "
0790                         << Context.print(I.getParent()) << "\n");
0791     }
0792   } else {
0793     Marked = markDefsDivergent(I);
0794   }
0795 
0796   if (Marked)
0797     Worklist.push_back(&I);
0798 }
0799 
0800 template <typename ContextT>
0801 bool GenericUniformityAnalysisImpl<ContextT>::markDivergent(
0802     ConstValueRefT Val) {
0803   if (DivergentValues.insert(Val).second) {
0804     LLVM_DEBUG(dbgs() << "marked divergent: " << Context.print(Val) << "\n");
0805     return true;
0806   }
0807   return false;
0808 }
0809 
0810 template <typename ContextT>
0811 void GenericUniformityAnalysisImpl<ContextT>::addUniformOverride(
0812     const InstructionT &Instr) {
0813   UniformOverrides.insert(&Instr);
0814 }
0815 
0816 // Mark as divergent all external uses of values defined in \p DefCycle.
0817 //
0818 // A value V defined by a block B inside \p DefCycle may be used outside the
0819 // cycle only if the use is a PHI in some exit block, or B dominates some exit
0820 // block. Thus, we check uses as follows:
0821 //
0822 // - Check all PHIs in all exit blocks for inputs defined inside \p DefCycle.
0823 // - For every block B inside \p DefCycle that dominates at least one exit
0824 //   block, check all uses outside \p DefCycle.
0825 //
0826 // FIXME: This function does not distinguish between divergent and uniform
0827 // exits. For each divergent exit, only the values that are live at that exit
0828 // need to be propagated as divergent at their use outside the cycle.
0829 template <typename ContextT>
0830 void GenericUniformityAnalysisImpl<ContextT>::analyzeCycleExitDivergence(
0831     const CycleT &DefCycle) {
0832   SmallVector<BlockT *> Exits;
0833   DefCycle.getExitBlocks(Exits);
0834   for (auto *Exit : Exits) {
0835     for (auto &Phi : Exit->phis()) {
0836       if (usesValueFromCycle(Phi, DefCycle)) {
0837         markDivergent(Phi);
0838       }
0839     }
0840   }
0841 
0842   for (auto *BB : DefCycle.blocks()) {
0843     if (!llvm::any_of(Exits,
0844                      [&](BlockT *Exit) { return DT.dominates(BB, Exit); }))
0845       continue;
0846     for (auto &II : *BB) {
0847       propagateTemporalDivergence(II, DefCycle);
0848     }
0849   }
0850 }
0851 
0852 template <typename ContextT>
0853 void GenericUniformityAnalysisImpl<ContextT>::propagateCycleExitDivergence(
0854     const BlockT &DivExit, const CycleT &InnerDivCycle) {
0855   LLVM_DEBUG(dbgs() << "\tpropCycleExitDiv " << Context.print(&DivExit)
0856                     << "\n");
0857   auto *DivCycle = &InnerDivCycle;
0858   auto *OuterDivCycle = DivCycle;
0859   auto *ExitLevelCycle = CI.getCycle(&DivExit);
0860   const unsigned CycleExitDepth =
0861       ExitLevelCycle ? ExitLevelCycle->getDepth() : 0;
0862 
0863   // Find outer-most cycle that does not contain \p DivExit
0864   while (DivCycle && DivCycle->getDepth() > CycleExitDepth) {
0865     LLVM_DEBUG(dbgs() << "  Found exiting cycle: "
0866                       << Context.print(DivCycle->getHeader()) << "\n");
0867     OuterDivCycle = DivCycle;
0868     DivCycle = DivCycle->getParentCycle();
0869   }
0870   LLVM_DEBUG(dbgs() << "\tOuter-most exiting cycle: "
0871                     << Context.print(OuterDivCycle->getHeader()) << "\n");
0872 
0873   if (!DivergentExitCycles.insert(OuterDivCycle).second)
0874     return;
0875 
0876   // Exit divergence does not matter if the cycle itself is assumed to
0877   // be divergent.
0878   for (const auto *C : AssumedDivergent) {
0879     if (C->contains(OuterDivCycle))
0880       return;
0881   }
0882 
0883   analyzeCycleExitDivergence(*OuterDivCycle);
0884 }
0885 
0886 template <typename ContextT>
0887 void GenericUniformityAnalysisImpl<ContextT>::taintAndPushAllDefs(
0888     const BlockT &BB) {
0889   LLVM_DEBUG(dbgs() << "taintAndPushAllDefs " << Context.print(&BB) << "\n");
0890   for (const auto &I : instrs(BB)) {
0891     // Terminators do not produce values; they are divergent only if
0892     // the condition is divergent. That is handled when the divergent
0893     // condition is placed in the worklist.
0894     if (I.isTerminator())
0895       break;
0896 
0897     markDivergent(I);
0898   }
0899 }
0900 
0901 /// Mark divergent phi nodes in a join block
0902 template <typename ContextT>
0903 void GenericUniformityAnalysisImpl<ContextT>::taintAndPushPhiNodes(
0904     const BlockT &JoinBlock) {
0905   LLVM_DEBUG(dbgs() << "taintAndPushPhiNodes in " << Context.print(&JoinBlock)
0906                     << "\n");
0907   for (const auto &Phi : JoinBlock.phis()) {
0908     // FIXME: The non-undef value is not constant per se; it just happens to be
0909     // uniform and may not dominate this PHI. So assuming that the same value
0910     // reaches along all incoming edges may itself be undefined behaviour. This
0911     // particular interpretation of the undef value was added to
0912     // DivergenceAnalysis in the following review:
0913     //
0914     // https://reviews.llvm.org/D19013
0915     if (ContextT::isConstantOrUndefValuePhi(Phi))
0916       continue;
0917     markDivergent(Phi);
0918   }
0919 }
0920 
0921 /// Add \p Candidate to \p Cycles if it is not already contained in \p Cycles.
0922 ///
0923 /// \return true iff \p Candidate was added to \p Cycles.
0924 template <typename CycleT>
0925 static bool insertIfNotContained(SmallVector<CycleT *> &Cycles,
0926                                  CycleT *Candidate) {
0927   if (llvm::any_of(Cycles,
0928                    [Candidate](CycleT *C) { return C->contains(Candidate); }))
0929     return false;
0930   Cycles.push_back(Candidate);
0931   return true;
0932 }
0933 
0934 /// Return the outermost cycle made divergent by branch outside it.
0935 ///
0936 /// If two paths that diverged outside an irreducible cycle join
0937 /// inside that cycle, then that whole cycle is assumed to be
0938 /// divergent. This does not apply if the cycle is reducible.
0939 template <typename CycleT, typename BlockT>
0940 static const CycleT *getExtDivCycle(const CycleT *Cycle,
0941                                     const BlockT *DivTermBlock,
0942                                     const BlockT *JoinBlock) {
0943   assert(Cycle);
0944   assert(Cycle->contains(JoinBlock));
0945 
0946   if (Cycle->contains(DivTermBlock))
0947     return nullptr;
0948 
0949   const auto *OriginalCycle = Cycle;
0950   const auto *Parent = Cycle->getParentCycle();
0951   while (Parent && !Parent->contains(DivTermBlock)) {
0952     Cycle = Parent;
0953     Parent = Cycle->getParentCycle();
0954   }
0955 
0956   // If the original cycle is not the outermost cycle, then the outermost cycle
0957   // is irreducible. If the outermost cycle were reducible, then external
0958   // diverged paths would not reach the original inner cycle.
0959   (void)OriginalCycle;
0960   assert(Cycle == OriginalCycle || !Cycle->isReducible());
0961 
0962   if (Cycle->isReducible()) {
0963     assert(Cycle->getHeader() == JoinBlock);
0964     return nullptr;
0965   }
0966 
0967   LLVM_DEBUG(dbgs() << "cycle made divergent by external branch\n");
0968   return Cycle;
0969 }
0970 
0971 /// Return the outermost cycle made divergent by branch inside it.
0972 ///
0973 /// This checks the "diverged entry" criterion defined in the
0974 /// docs/ConvergenceAnalysis.html.
0975 template <typename ContextT, typename CycleT, typename BlockT,
0976           typename DominatorTreeT>
0977 static const CycleT *
0978 getIntDivCycle(const CycleT *Cycle, const BlockT *DivTermBlock,
0979                const BlockT *JoinBlock, const DominatorTreeT &DT,
0980                ContextT &Context) {
0981   LLVM_DEBUG(dbgs() << "examine join " << Context.print(JoinBlock)
0982                     << " for internal branch " << Context.print(DivTermBlock)
0983                     << "\n");
0984   if (DT.properlyDominates(DivTermBlock, JoinBlock))
0985     return nullptr;
0986 
0987   // Find the smallest common cycle, if one exists.
0988   assert(Cycle && Cycle->contains(JoinBlock));
0989   while (Cycle && !Cycle->contains(DivTermBlock)) {
0990     Cycle = Cycle->getParentCycle();
0991   }
0992   if (!Cycle || Cycle->isReducible())
0993     return nullptr;
0994 
0995   if (DT.properlyDominates(Cycle->getHeader(), JoinBlock))
0996     return nullptr;
0997 
0998   LLVM_DEBUG(dbgs() << "  header " << Context.print(Cycle->getHeader())
0999                     << " does not dominate join\n");
1000 
1001   const auto *Parent = Cycle->getParentCycle();
1002   while (Parent && !DT.properlyDominates(Parent->getHeader(), JoinBlock)) {
1003     LLVM_DEBUG(dbgs() << "  header " << Context.print(Parent->getHeader())
1004                       << " does not dominate join\n");
1005     Cycle = Parent;
1006     Parent = Parent->getParentCycle();
1007   }
1008 
1009   LLVM_DEBUG(dbgs() << "  cycle made divergent by internal branch\n");
1010   return Cycle;
1011 }
1012 
1013 template <typename ContextT, typename CycleT, typename BlockT,
1014           typename DominatorTreeT>
1015 static const CycleT *
1016 getOutermostDivergentCycle(const CycleT *Cycle, const BlockT *DivTermBlock,
1017                            const BlockT *JoinBlock, const DominatorTreeT &DT,
1018                            ContextT &Context) {
1019   if (!Cycle)
1020     return nullptr;
1021 
1022   // First try to expand Cycle to the largest that contains JoinBlock
1023   // but not DivTermBlock.
1024   const auto *Ext = getExtDivCycle(Cycle, DivTermBlock, JoinBlock);
1025 
1026   // Continue expanding to the largest cycle that contains both.
1027   const auto *Int = getIntDivCycle(Cycle, DivTermBlock, JoinBlock, DT, Context);
1028 
1029   if (Int)
1030     return Int;
1031   return Ext;
1032 }
1033 
1034 template <typename ContextT>
1035 bool GenericUniformityAnalysisImpl<ContextT>::isTemporalDivergent(
1036     const BlockT &ObservingBlock, const InstructionT &Def) const {
1037   const BlockT *DefBlock = Def.getParent();
1038   for (const CycleT *Cycle = CI.getCycle(DefBlock);
1039        Cycle && !Cycle->contains(&ObservingBlock);
1040        Cycle = Cycle->getParentCycle()) {
1041     if (DivergentExitCycles.contains(Cycle)) {
1042       return true;
1043     }
1044   }
1045   return false;
1046 }
1047 
1048 template <typename ContextT>
1049 void GenericUniformityAnalysisImpl<ContextT>::analyzeControlDivergence(
1050     const InstructionT &Term) {
1051   const auto *DivTermBlock = Term.getParent();
1052   DivergentTermBlocks.insert(DivTermBlock);
1053   LLVM_DEBUG(dbgs() << "analyzeControlDiv " << Context.print(DivTermBlock)
1054                     << "\n");
1055 
1056   // Don't propagate divergence from unreachable blocks.
1057   if (!DT.isReachableFromEntry(DivTermBlock))
1058     return;
1059 
1060   const auto &DivDesc = SDA.getJoinBlocks(DivTermBlock);
1061   SmallVector<const CycleT *> DivCycles;
1062 
1063   // Iterate over all blocks now reachable by a disjoint path join
1064   for (const auto *JoinBlock : DivDesc.JoinDivBlocks) {
1065     const auto *Cycle = CI.getCycle(JoinBlock);
1066     LLVM_DEBUG(dbgs() << "visiting join block " << Context.print(JoinBlock)
1067                       << "\n");
1068     if (const auto *Outermost = getOutermostDivergentCycle(
1069             Cycle, DivTermBlock, JoinBlock, DT, Context)) {
1070       LLVM_DEBUG(dbgs() << "found divergent cycle\n");
1071       DivCycles.push_back(Outermost);
1072       continue;
1073     }
1074     taintAndPushPhiNodes(*JoinBlock);
1075   }
1076 
1077   // Sort by order of decreasing depth. This allows later cycles to be skipped
1078   // because they are already contained in earlier ones.
1079   llvm::sort(DivCycles, [](const CycleT *A, const CycleT *B) {
1080     return A->getDepth() > B->getDepth();
1081   });
1082 
1083   // Cycles that are assumed divergent due to the diverged entry
1084   // criterion potentially contain temporal divergence depending on
1085   // the DFS chosen. Conservatively, all values produced in such a
1086   // cycle are assumed divergent. "Cycle invariant" values may be
1087   // assumed uniform, but that requires further analysis.
1088   for (auto *C : DivCycles) {
1089     if (!insertIfNotContained(AssumedDivergent, C))
1090       continue;
1091     LLVM_DEBUG(dbgs() << "process divergent cycle\n");
1092     for (const BlockT *BB : C->blocks()) {
1093       taintAndPushAllDefs(*BB);
1094     }
1095   }
1096 
1097   const auto *BranchCycle = CI.getCycle(DivTermBlock);
1098   assert(DivDesc.CycleDivBlocks.empty() || BranchCycle);
1099   for (const auto *DivExitBlock : DivDesc.CycleDivBlocks) {
1100     propagateCycleExitDivergence(*DivExitBlock, *BranchCycle);
1101   }
1102 }
1103 
1104 template <typename ContextT>
1105 void GenericUniformityAnalysisImpl<ContextT>::compute() {
1106   // Initialize worklist.
1107   auto DivValuesCopy = DivergentValues;
1108   for (const auto DivVal : DivValuesCopy) {
1109     assert(isDivergent(DivVal) && "Worklist invariant violated!");
1110     pushUsers(DivVal);
1111   }
1112 
1113   // All values on the Worklist are divergent.
1114   // Their users may not have been updated yet.
1115   while (!Worklist.empty()) {
1116     const InstructionT *I = Worklist.back();
1117     Worklist.pop_back();
1118 
1119     LLVM_DEBUG(dbgs() << "worklist pop: " << Context.print(I) << "\n");
1120 
1121     if (I->isTerminator()) {
1122       analyzeControlDivergence(*I);
1123       continue;
1124     }
1125 
1126     // propagate value divergence to users
1127     assert(isDivergent(*I) && "Worklist invariant violated!");
1128     pushUsers(*I);
1129   }
1130 }
1131 
1132 template <typename ContextT>
1133 bool GenericUniformityAnalysisImpl<ContextT>::isAlwaysUniform(
1134     const InstructionT &Instr) const {
1135   return UniformOverrides.contains(&Instr);
1136 }
1137 
1138 template <typename ContextT>
1139 GenericUniformityInfo<ContextT>::GenericUniformityInfo(
1140     const DominatorTreeT &DT, const CycleInfoT &CI,
1141     const TargetTransformInfo *TTI) {
1142   DA.reset(new ImplT{DT, CI, TTI});
1143 }
1144 
1145 template <typename ContextT>
1146 void GenericUniformityAnalysisImpl<ContextT>::print(raw_ostream &OS) const {
1147   bool haveDivergentArgs = false;
1148 
1149   // Control flow instructions may be divergent even if their inputs are
1150   // uniform. Thus, although exceedingly rare, it is possible to have a program
1151   // with no divergent values but with divergent control structures.
1152   if (DivergentValues.empty() && DivergentTermBlocks.empty() &&
1153       DivergentExitCycles.empty()) {
1154     OS << "ALL VALUES UNIFORM\n";
1155     return;
1156   }
1157 
1158   for (const auto &entry : DivergentValues) {
1159     const BlockT *parent = Context.getDefBlock(entry);
1160     if (!parent) {
1161       if (!haveDivergentArgs) {
1162         OS << "DIVERGENT ARGUMENTS:\n";
1163         haveDivergentArgs = true;
1164       }
1165       OS << "  DIVERGENT: " << Context.print(entry) << '\n';
1166     }
1167   }
1168 
1169   if (!AssumedDivergent.empty()) {
1170     OS << "CYCLES ASSSUMED DIVERGENT:\n";
1171     for (const CycleT *cycle : AssumedDivergent) {
1172       OS << "  " << cycle->print(Context) << '\n';
1173     }
1174   }
1175 
1176   if (!DivergentExitCycles.empty()) {
1177     OS << "CYCLES WITH DIVERGENT EXIT:\n";
1178     for (const CycleT *cycle : DivergentExitCycles) {
1179       OS << "  " << cycle->print(Context) << '\n';
1180     }
1181   }
1182 
1183   for (auto &block : F) {
1184     OS << "\nBLOCK " << Context.print(&block) << '\n';
1185 
1186     OS << "DEFINITIONS\n";
1187     SmallVector<ConstValueRefT, 16> defs;
1188     Context.appendBlockDefs(defs, block);
1189     for (auto value : defs) {
1190       if (isDivergent(value))
1191         OS << "  DIVERGENT: ";
1192       else
1193         OS << "             ";
1194       OS << Context.print(value) << '\n';
1195     }
1196 
1197     OS << "TERMINATORS\n";
1198     SmallVector<const InstructionT *, 8> terms;
1199     Context.appendBlockTerms(terms, block);
1200     bool divergentTerminators = hasDivergentTerminator(block);
1201     for (auto *T : terms) {
1202       if (divergentTerminators)
1203         OS << "  DIVERGENT: ";
1204       else
1205         OS << "             ";
1206       OS << Context.print(T) << '\n';
1207     }
1208 
1209     OS << "END BLOCK\n";
1210   }
1211 }
1212 
1213 template <typename ContextT>
1214 bool GenericUniformityInfo<ContextT>::hasDivergence() const {
1215   return DA->hasDivergence();
1216 }
1217 
1218 template <typename ContextT>
1219 const typename ContextT::FunctionT &
1220 GenericUniformityInfo<ContextT>::getFunction() const {
1221   return DA->getFunction();
1222 }
1223 
1224 /// Whether \p V is divergent at its definition.
1225 template <typename ContextT>
1226 bool GenericUniformityInfo<ContextT>::isDivergent(ConstValueRefT V) const {
1227   return DA->isDivergent(V);
1228 }
1229 
1230 template <typename ContextT>
1231 bool GenericUniformityInfo<ContextT>::isDivergent(const InstructionT *I) const {
1232   return DA->isDivergent(*I);
1233 }
1234 
1235 template <typename ContextT>
1236 bool GenericUniformityInfo<ContextT>::isDivergentUse(const UseT &U) const {
1237   return DA->isDivergentUse(U);
1238 }
1239 
1240 template <typename ContextT>
1241 bool GenericUniformityInfo<ContextT>::hasDivergentTerminator(const BlockT &B) {
1242   return DA->hasDivergentTerminator(B);
1243 }
1244 
1245 /// \brief T helper function for printing.
1246 template <typename ContextT>
1247 void GenericUniformityInfo<ContextT>::print(raw_ostream &out) const {
1248   DA->print(out);
1249 }
1250 
1251 template <typename ContextT>
1252 void llvm::ModifiedPostOrder<ContextT>::computeStackPO(
1253     SmallVectorImpl<const BlockT *> &Stack, const CycleInfoT &CI,
1254     const CycleT *Cycle, SmallPtrSetImpl<const BlockT *> &Finalized) {
1255   LLVM_DEBUG(dbgs() << "inside computeStackPO\n");
1256   while (!Stack.empty()) {
1257     auto *NextBB = Stack.back();
1258     if (Finalized.count(NextBB)) {
1259       Stack.pop_back();
1260       continue;
1261     }
1262     LLVM_DEBUG(dbgs() << "  visiting " << CI.getSSAContext().print(NextBB)
1263                       << "\n");
1264     auto *NestedCycle = CI.getCycle(NextBB);
1265     if (Cycle != NestedCycle && (!Cycle || Cycle->contains(NestedCycle))) {
1266       LLVM_DEBUG(dbgs() << "  found a cycle\n");
1267       while (NestedCycle->getParentCycle() != Cycle)
1268         NestedCycle = NestedCycle->getParentCycle();
1269 
1270       SmallVector<BlockT *, 3> NestedExits;
1271       NestedCycle->getExitBlocks(NestedExits);
1272       bool PushedNodes = false;
1273       for (auto *NestedExitBB : NestedExits) {
1274         LLVM_DEBUG(dbgs() << "  examine exit: "
1275                           << CI.getSSAContext().print(NestedExitBB) << "\n");
1276         if (Cycle && !Cycle->contains(NestedExitBB))
1277           continue;
1278         if (Finalized.count(NestedExitBB))
1279           continue;
1280         PushedNodes = true;
1281         Stack.push_back(NestedExitBB);
1282         LLVM_DEBUG(dbgs() << "  pushed exit: "
1283                           << CI.getSSAContext().print(NestedExitBB) << "\n");
1284       }
1285       if (!PushedNodes) {
1286         // All loop exits finalized -> finish this node
1287         Stack.pop_back();
1288         computeCyclePO(CI, NestedCycle, Finalized);
1289       }
1290       continue;
1291     }
1292 
1293     LLVM_DEBUG(dbgs() << "  no nested cycle, going into DAG\n");
1294     // DAG-style
1295     bool PushedNodes = false;
1296     for (auto *SuccBB : successors(NextBB)) {
1297       LLVM_DEBUG(dbgs() << "  examine succ: "
1298                         << CI.getSSAContext().print(SuccBB) << "\n");
1299       if (Cycle && !Cycle->contains(SuccBB))
1300         continue;
1301       if (Finalized.count(SuccBB))
1302         continue;
1303       PushedNodes = true;
1304       Stack.push_back(SuccBB);
1305       LLVM_DEBUG(dbgs() << "  pushed succ: " << CI.getSSAContext().print(SuccBB)
1306                         << "\n");
1307     }
1308     if (!PushedNodes) {
1309       // Never push nodes twice
1310       LLVM_DEBUG(dbgs() << "  finishing node: "
1311                         << CI.getSSAContext().print(NextBB) << "\n");
1312       Stack.pop_back();
1313       Finalized.insert(NextBB);
1314       appendBlock(*NextBB);
1315     }
1316   }
1317   LLVM_DEBUG(dbgs() << "exited computeStackPO\n");
1318 }
1319 
1320 template <typename ContextT>
1321 void ModifiedPostOrder<ContextT>::computeCyclePO(
1322     const CycleInfoT &CI, const CycleT *Cycle,
1323     SmallPtrSetImpl<const BlockT *> &Finalized) {
1324   LLVM_DEBUG(dbgs() << "inside computeCyclePO\n");
1325   SmallVector<const BlockT *> Stack;
1326   auto *CycleHeader = Cycle->getHeader();
1327 
1328   LLVM_DEBUG(dbgs() << "  noted header: "
1329                     << CI.getSSAContext().print(CycleHeader) << "\n");
1330   assert(!Finalized.count(CycleHeader));
1331   Finalized.insert(CycleHeader);
1332 
1333   // Visit the header last
1334   LLVM_DEBUG(dbgs() << "  finishing header: "
1335                     << CI.getSSAContext().print(CycleHeader) << "\n");
1336   appendBlock(*CycleHeader, Cycle->isReducible());
1337 
1338   // Initialize with immediate successors
1339   for (auto *BB : successors(CycleHeader)) {
1340     LLVM_DEBUG(dbgs() << "  examine succ: " << CI.getSSAContext().print(BB)
1341                       << "\n");
1342     if (!Cycle->contains(BB))
1343       continue;
1344     if (BB == CycleHeader)
1345       continue;
1346     if (!Finalized.count(BB)) {
1347       LLVM_DEBUG(dbgs() << "  pushed succ: " << CI.getSSAContext().print(BB)
1348                         << "\n");
1349       Stack.push_back(BB);
1350     }
1351   }
1352 
1353   // Compute PO inside region
1354   computeStackPO(Stack, CI, Cycle, Finalized);
1355 
1356   LLVM_DEBUG(dbgs() << "exited computeCyclePO\n");
1357 }
1358 
1359 /// \brief Generically compute the modified post order.
1360 template <typename ContextT>
1361 void llvm::ModifiedPostOrder<ContextT>::compute(const CycleInfoT &CI) {
1362   SmallPtrSet<const BlockT *, 32> Finalized;
1363   SmallVector<const BlockT *> Stack;
1364   auto *F = CI.getFunction();
1365   Stack.reserve(24); // FIXME made-up number
1366   Stack.push_back(&F->front());
1367   computeStackPO(Stack, CI, nullptr, Finalized);
1368 }
1369 
1370 } // namespace llvm
1371 
1372 #undef DEBUG_TYPE
1373 
1374 #endif // LLVM_ADT_GENERICUNIFORMITYIMPL_H