File indexing completed on 2026-05-10 08:43:05
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044 #ifndef LLVM_ADT_GENERICUNIFORMITYIMPL_H
0045 #define LLVM_ADT_GENERICUNIFORMITYIMPL_H
0046
0047 #include "llvm/ADT/GenericUniformityInfo.h"
0048
0049 #include "llvm/ADT/DenseSet.h"
0050 #include "llvm/ADT/STLExtras.h"
0051 #include "llvm/ADT/SmallPtrSet.h"
0052 #include "llvm/ADT/SparseBitVector.h"
0053 #include "llvm/ADT/StringExtras.h"
0054 #include "llvm/Support/raw_ostream.h"
0055
0056 #define DEBUG_TYPE "uniformity"
0057
0058 namespace llvm {
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069
0070
0071
0072
0073
0074
0075
0076
0077
0078
0079
0080
0081
0082
0083
0084
0085
0086 template <typename ContextT> class ModifiedPostOrder {
0087 public:
0088 using BlockT = typename ContextT::BlockT;
0089 using FunctionT = typename ContextT::FunctionT;
0090 using DominatorTreeT = typename ContextT::DominatorTreeT;
0091
0092 using CycleInfoT = GenericCycleInfo<ContextT>;
0093 using CycleT = typename CycleInfoT::CycleT;
0094 using const_iterator = typename std::vector<BlockT *>::const_iterator;
0095
0096 ModifiedPostOrder(const ContextT &C) : Context(C) {}
0097
0098 bool empty() const { return m_order.empty(); }
0099 size_t size() const { return m_order.size(); }
0100
0101 void clear() { m_order.clear(); }
0102 void compute(const CycleInfoT &CI);
0103
0104 unsigned count(BlockT *BB) const { return POIndex.count(BB); }
0105 const BlockT *operator[](size_t idx) const { return m_order[idx]; }
0106
0107 void appendBlock(const BlockT &BB, bool isReducibleCycleHeader = false) {
0108 POIndex[&BB] = m_order.size();
0109 m_order.push_back(&BB);
0110 LLVM_DEBUG(dbgs() << "ModifiedPO(" << POIndex[&BB]
0111 << "): " << Context.print(&BB) << "\n");
0112 if (isReducibleCycleHeader)
0113 ReducibleCycleHeaders.insert(&BB);
0114 }
0115
0116 unsigned getIndex(const BlockT *BB) const {
0117 assert(POIndex.count(BB));
0118 return POIndex.lookup(BB);
0119 }
0120
0121 bool isReducibleCycleHeader(const BlockT *BB) const {
0122 return ReducibleCycleHeaders.contains(BB);
0123 }
0124
0125 private:
0126 SmallVector<const BlockT *> m_order;
0127 DenseMap<const BlockT *, unsigned> POIndex;
0128 SmallPtrSet<const BlockT *, 32> ReducibleCycleHeaders;
0129 const ContextT &Context;
0130
0131 void computeCyclePO(const CycleInfoT &CI, const CycleT *Cycle,
0132 SmallPtrSetImpl<const BlockT *> &Finalized);
0133
0134 void computeStackPO(SmallVectorImpl<const BlockT *> &Stack,
0135 const CycleInfoT &CI, const CycleT *Cycle,
0136 SmallPtrSetImpl<const BlockT *> &Finalized);
0137 };
0138
0139 template <typename> class DivergencePropagator;
0140
0141
0142
0143
0144
0145
0146
0147
0148
0149
0150
0151
0152
0153
0154
0155
0156
0157
0158
0159
0160
0161
0162
0163
0164
0165
0166
0167
0168
0169
0170
0171
0172
0173
0174
0175
0176
0177
0178
0179
0180
0181
0182
0183
0184
0185
0186
0187
0188
0189
0190
0191
0192
0193
0194
0195
0196
0197
0198
0199
0200
0201
0202
0203
0204
0205
0206
0207
0208
0209
0210
0211
0212
0213
0214
0215
0216
0217
0218
0219
0220
0221
0222
0223
0224
0225
0226
0227
0228
0229
0230
0231
0232
0233
0234
0235
0236
0237
0238
0239
0240
0241
0242
0243
0244
0245
0246
0247
0248
0249
0250
0251
0252
0253
0254
0255
0256
0257
0258
0259 template <typename ContextT> class GenericSyncDependenceAnalysis {
0260 public:
0261 using BlockT = typename ContextT::BlockT;
0262 using DominatorTreeT = typename ContextT::DominatorTreeT;
0263 using FunctionT = typename ContextT::FunctionT;
0264 using ValueRefT = typename ContextT::ValueRefT;
0265 using InstructionT = typename ContextT::InstructionT;
0266
0267 using CycleInfoT = GenericCycleInfo<ContextT>;
0268 using CycleT = typename CycleInfoT::CycleT;
0269
0270 using ConstBlockSet = SmallPtrSet<const BlockT *, 4>;
0271 using ModifiedPO = ModifiedPostOrder<ContextT>;
0272
0273
0274
0275
0276
0277
0278
0279
0280 using BlockLabelMap = DenseMap<const BlockT *, const BlockT *>;
0281
0282
0283
0284 struct DivergenceDescriptor {
0285
0286 ConstBlockSet JoinDivBlocks;
0287
0288 ConstBlockSet CycleDivBlocks;
0289
0290 BlockLabelMap BlockLabels;
0291 };
0292
0293 using DivergencePropagatorT = DivergencePropagator<ContextT>;
0294
0295 GenericSyncDependenceAnalysis(const ContextT &Context,
0296 const DominatorTreeT &DT, const CycleInfoT &CI);
0297
0298
0299
0300
0301
0302
0303
0304
0305
0306
0307 const DivergenceDescriptor &getJoinBlocks(const BlockT *DivTermBlock);
0308
0309 private:
0310 static DivergenceDescriptor EmptyDivergenceDesc;
0311
0312 ModifiedPO CyclePO;
0313
0314 const DominatorTreeT &DT;
0315 const CycleInfoT &CI;
0316
0317 DenseMap<const BlockT *, std::unique_ptr<DivergenceDescriptor>>
0318 CachedControlDivDescs;
0319 };
0320
0321
0322
0323
0324
0325
0326
0327 template <typename ContextT> class GenericUniformityAnalysisImpl {
0328 public:
0329 using BlockT = typename ContextT::BlockT;
0330 using FunctionT = typename ContextT::FunctionT;
0331 using ValueRefT = typename ContextT::ValueRefT;
0332 using ConstValueRefT = typename ContextT::ConstValueRefT;
0333 using UseT = typename ContextT::UseT;
0334 using InstructionT = typename ContextT::InstructionT;
0335 using DominatorTreeT = typename ContextT::DominatorTreeT;
0336
0337 using CycleInfoT = GenericCycleInfo<ContextT>;
0338 using CycleT = typename CycleInfoT::CycleT;
0339
0340 using SyncDependenceAnalysisT = GenericSyncDependenceAnalysis<ContextT>;
0341 using DivergenceDescriptorT =
0342 typename SyncDependenceAnalysisT::DivergenceDescriptor;
0343 using BlockLabelMapT = typename SyncDependenceAnalysisT::BlockLabelMap;
0344
0345 GenericUniformityAnalysisImpl(const DominatorTreeT &DT, const CycleInfoT &CI,
0346 const TargetTransformInfo *TTI)
0347 : Context(CI.getSSAContext()), F(*Context.getFunction()), CI(CI),
0348 TTI(TTI), DT(DT), SDA(Context, DT, CI) {}
0349
0350 void initialize();
0351
0352 const FunctionT &getFunction() const { return F; }
0353
0354
0355 void addUniformOverride(const InstructionT &Instr);
0356
0357
0358 void markDivergent(const InstructionT &I);
0359
0360
0361
0362 bool markDivergent(ConstValueRefT DivVal);
0363
0364
0365
0366 bool markDefsDivergent(const InstructionT &Instr);
0367
0368
0369
0370 void compute();
0371
0372
0373 bool hasDivergence() const { return !DivergentValues.empty(); }
0374
0375
0376
0377 bool isAlwaysUniform(const InstructionT &Instr) const;
0378
0379 bool hasDivergentDefs(const InstructionT &I) const;
0380
0381 bool isDivergent(const InstructionT &I) const {
0382 if (I.isTerminator()) {
0383 return DivergentTermBlocks.contains(I.getParent());
0384 }
0385 return hasDivergentDefs(I);
0386 };
0387
0388
0389 bool isDivergent(ConstValueRefT V) const { return DivergentValues.count(V); }
0390
0391 bool isDivergentUse(const UseT &U) const;
0392
0393 bool hasDivergentTerminator(const BlockT &B) const {
0394 return DivergentTermBlocks.contains(&B);
0395 }
0396
0397 void print(raw_ostream &out) const;
0398
0399 protected:
0400
0401 struct PhiInput {
0402 ConstValueRefT value;
0403 BlockT *predBlock;
0404
0405 PhiInput(ConstValueRefT value, BlockT *predBlock)
0406 : value(value), predBlock(predBlock) {}
0407 };
0408
0409 const ContextT &Context;
0410 const FunctionT &F;
0411 const CycleInfoT &CI;
0412 const TargetTransformInfo *TTI = nullptr;
0413
0414
0415 DenseSet<ConstValueRefT> DivergentValues;
0416 SmallPtrSet<const BlockT *, 32> DivergentTermBlocks;
0417
0418
0419 std::vector<const InstructionT *> Worklist;
0420
0421
0422
0423 void analyzeControlDivergence(const InstructionT &Term);
0424
0425 private:
0426 const DominatorTreeT &DT;
0427
0428
0429 SmallPtrSet<const CycleT *, 16> DivergentExitCycles;
0430
0431
0432
0433
0434
0435 SmallVector<const CycleT *> AssumedDivergent;
0436
0437
0438 SyncDependenceAnalysisT SDA;
0439
0440
0441 SmallPtrSet<const InstructionT *, 32> UniformOverrides;
0442
0443
0444
0445 void taintAndPushAllDefs(const BlockT &JoinBlock);
0446
0447
0448
0449 void taintAndPushPhiNodes(const BlockT &JoinBlock);
0450
0451
0452
0453
0454 void propagateCycleExitDivergence(const BlockT &DivExit,
0455 const CycleT &DivCycle);
0456
0457
0458 void analyzeCycleExitDivergence(const CycleT &DefCycle);
0459
0460
0461 void propagateTemporalDivergence(const InstructionT &I,
0462 const CycleT &DefCycle);
0463
0464
0465 void pushUsers(const InstructionT &I);
0466 void pushUsers(ConstValueRefT V);
0467
0468 bool usesValueFromCycle(const InstructionT &I, const CycleT &DefCycle) const;
0469
0470
0471 bool isTemporalDivergent(const BlockT &ObservingBlock,
0472 const InstructionT &Def) const;
0473 };
0474
0475 template <typename ImplT>
0476 void GenericUniformityAnalysisImplDeleter<ImplT>::operator()(ImplT *Impl) {
0477 delete Impl;
0478 }
0479
0480
0481 template <typename ContextT> class DivergencePropagator {
0482 public:
0483 using BlockT = typename ContextT::BlockT;
0484 using DominatorTreeT = typename ContextT::DominatorTreeT;
0485 using FunctionT = typename ContextT::FunctionT;
0486 using ValueRefT = typename ContextT::ValueRefT;
0487
0488 using CycleInfoT = GenericCycleInfo<ContextT>;
0489 using CycleT = typename CycleInfoT::CycleT;
0490
0491 using ModifiedPO = ModifiedPostOrder<ContextT>;
0492 using SyncDependenceAnalysisT = GenericSyncDependenceAnalysis<ContextT>;
0493 using DivergenceDescriptorT =
0494 typename SyncDependenceAnalysisT::DivergenceDescriptor;
0495 using BlockLabelMapT = typename SyncDependenceAnalysisT::BlockLabelMap;
0496
0497 const ModifiedPO &CyclePOT;
0498 const DominatorTreeT &DT;
0499 const CycleInfoT &CI;
0500 const BlockT &DivTermBlock;
0501 const ContextT &Context;
0502
0503
0504
0505
0506
0507 SparseBitVector<> FreshLabels;
0508
0509
0510 std::unique_ptr<DivergenceDescriptorT> DivDesc;
0511 BlockLabelMapT &BlockLabels;
0512
0513 DivergencePropagator(const ModifiedPO &CyclePOT, const DominatorTreeT &DT,
0514 const CycleInfoT &CI, const BlockT &DivTermBlock)
0515 : CyclePOT(CyclePOT), DT(DT), CI(CI), DivTermBlock(DivTermBlock),
0516 Context(CI.getSSAContext()), DivDesc(new DivergenceDescriptorT),
0517 BlockLabels(DivDesc->BlockLabels) {}
0518
0519 void printDefs(raw_ostream &Out) {
0520 Out << "Propagator::BlockLabels {\n";
0521 for (int BlockIdx = (int)CyclePOT.size() - 1; BlockIdx >= 0; --BlockIdx) {
0522 const auto *Block = CyclePOT[BlockIdx];
0523 const auto *Label = BlockLabels[Block];
0524 Out << Context.print(Block) << "(" << BlockIdx << ") : ";
0525 if (!Label) {
0526 Out << "<null>\n";
0527 } else {
0528 Out << Context.print(Label) << "\n";
0529 }
0530 }
0531 Out << "}\n";
0532 }
0533
0534
0535
0536 bool computeJoin(const BlockT &SuccBlock, const BlockT &PushedLabel) {
0537 const auto *OldLabel = BlockLabels[&SuccBlock];
0538
0539 LLVM_DEBUG(dbgs() << "labeling " << Context.print(&SuccBlock) << ":\n"
0540 << "\tpushed label: " << Context.print(&PushedLabel)
0541 << "\n"
0542 << "\told label: " << Context.print(OldLabel) << "\n");
0543
0544
0545 if (OldLabel == &PushedLabel)
0546 return false;
0547
0548 if (OldLabel != &SuccBlock) {
0549 auto SuccIdx = CyclePOT.getIndex(&SuccBlock);
0550
0551 LLVM_DEBUG(dbgs() << "\tfresh label: " << SuccIdx << "\n");
0552 FreshLabels.set(SuccIdx);
0553 }
0554
0555
0556 if (!OldLabel) {
0557 LLVM_DEBUG(dbgs() << "\tnew label: " << Context.print(&PushedLabel)
0558 << "\n");
0559 BlockLabels[&SuccBlock] = &PushedLabel;
0560 return false;
0561 }
0562
0563
0564
0565 LLVM_DEBUG(dbgs() << "\tnew label: " << Context.print(&SuccBlock) << "\n");
0566 BlockLabels[&SuccBlock] = &SuccBlock;
0567
0568 return true;
0569 }
0570
0571
0572
0573 bool visitCycleExitEdge(const BlockT &ExitBlock, const BlockT &Label) {
0574 if (!computeJoin(ExitBlock, Label))
0575 return false;
0576
0577
0578 DivDesc->CycleDivBlocks.insert(&ExitBlock);
0579 LLVM_DEBUG(dbgs() << "\tDivergent cycle exit: " << Context.print(&ExitBlock)
0580 << "\n");
0581 return true;
0582 }
0583
0584
0585 bool visitEdge(const BlockT &SuccBlock, const BlockT &Label) {
0586 if (!computeJoin(SuccBlock, Label))
0587 return false;
0588
0589
0590 DivDesc->JoinDivBlocks.insert(&SuccBlock);
0591 LLVM_DEBUG(dbgs() << "\tDivergent join: " << Context.print(&SuccBlock)
0592 << "\n");
0593 return true;
0594 }
0595
0596 std::unique_ptr<DivergenceDescriptorT> computeJoinPoints() {
0597 assert(DivDesc);
0598
0599 LLVM_DEBUG(dbgs() << "SDA:computeJoinPoints: "
0600 << Context.print(&DivTermBlock) << "\n");
0601
0602
0603 int FloorIdx = CyclePOT.size() - 1;
0604 const BlockT *FloorLabel = nullptr;
0605 int DivTermIdx = CyclePOT.getIndex(&DivTermBlock);
0606
0607
0608 auto const *DivTermCycle = CI.getCycle(&DivTermBlock);
0609 for (const auto *SuccBlock : successors(&DivTermBlock)) {
0610 if (DivTermCycle && !DivTermCycle->contains(SuccBlock)) {
0611
0612
0613
0614 DivDesc->CycleDivBlocks.insert(SuccBlock);
0615 LLVM_DEBUG(dbgs() << "\tImmediate divergent cycle exit: "
0616 << Context.print(SuccBlock) << "\n");
0617 }
0618 auto SuccIdx = CyclePOT.getIndex(SuccBlock);
0619 visitEdge(*SuccBlock, *SuccBlock);
0620 FloorIdx = std::min<int>(FloorIdx, SuccIdx);
0621 }
0622
0623 while (true) {
0624 auto BlockIdx = FreshLabels.find_last();
0625 if (BlockIdx == -1 || BlockIdx < FloorIdx)
0626 break;
0627
0628 LLVM_DEBUG(dbgs() << "Current labels:\n"; printDefs(dbgs()));
0629
0630 FreshLabels.reset(BlockIdx);
0631 if (BlockIdx == DivTermIdx) {
0632 LLVM_DEBUG(dbgs() << "Skipping DivTermBlock\n");
0633 continue;
0634 }
0635
0636 const auto *Block = CyclePOT[BlockIdx];
0637 LLVM_DEBUG(dbgs() << "visiting " << Context.print(Block) << " at index "
0638 << BlockIdx << "\n");
0639
0640 const auto *Label = BlockLabels[Block];
0641 assert(Label);
0642
0643 bool CausedJoin = false;
0644 int LoweredFloorIdx = FloorIdx;
0645
0646
0647
0648
0649
0650
0651
0652
0653
0654
0655
0656
0657
0658
0659
0660
0661 auto getReducibleParent = [&](const BlockT *Block) -> const CycleT * {
0662 if (!CyclePOT.isReducibleCycleHeader(Block))
0663 return nullptr;
0664 const auto *BlockCycle = CI.getCycle(Block);
0665 if (BlockCycle->contains(&DivTermBlock))
0666 return BlockCycle;
0667 return nullptr;
0668 };
0669
0670 if (const auto *BlockCycle = getReducibleParent(Block)) {
0671 SmallVector<BlockT *, 4> BlockCycleExits;
0672 BlockCycle->getExitBlocks(BlockCycleExits);
0673 for (auto *BlockCycleExit : BlockCycleExits) {
0674 CausedJoin |= visitCycleExitEdge(*BlockCycleExit, *Label);
0675 LoweredFloorIdx =
0676 std::min<int>(LoweredFloorIdx, CyclePOT.getIndex(BlockCycleExit));
0677 }
0678 } else {
0679 for (const auto *SuccBlock : successors(Block)) {
0680 CausedJoin |= visitEdge(*SuccBlock, *Label);
0681 LoweredFloorIdx =
0682 std::min<int>(LoweredFloorIdx, CyclePOT.getIndex(SuccBlock));
0683 }
0684 }
0685
0686
0687 if (CausedJoin) {
0688
0689 FloorIdx = LoweredFloorIdx;
0690 } else if (FloorLabel != Label) {
0691
0692
0693 FloorIdx = LoweredFloorIdx;
0694 FloorLabel = Label;
0695 }
0696 }
0697
0698 LLVM_DEBUG(dbgs() << "Final labeling:\n"; printDefs(dbgs()));
0699
0700
0701
0702
0703 for (const auto *Cycle = CI.getCycle(&DivTermBlock); Cycle;
0704 Cycle = Cycle->getParentCycle()) {
0705 if (Cycle->isReducible()) {
0706
0707
0708 continue;
0709 }
0710 SmallVector<BlockT *> Exits;
0711 Cycle->getExitBlocks(Exits);
0712 auto *Header = Cycle->getHeader();
0713 auto *HeaderLabel = BlockLabels[Header];
0714 for (const auto *Exit : Exits) {
0715 if (BlockLabels[Exit] != HeaderLabel) {
0716
0717 DivDesc->CycleDivBlocks.insert(Exit);
0718 LLVM_DEBUG(dbgs() << "\tDivergent cycle exit: " << Context.print(Exit)
0719 << "\n");
0720 }
0721 }
0722 }
0723
0724 return std::move(DivDesc);
0725 }
0726 };
0727
0728 template <typename ContextT>
0729 typename llvm::GenericSyncDependenceAnalysis<ContextT>::DivergenceDescriptor
0730 llvm::GenericSyncDependenceAnalysis<ContextT>::EmptyDivergenceDesc;
0731
0732 template <typename ContextT>
0733 llvm::GenericSyncDependenceAnalysis<ContextT>::GenericSyncDependenceAnalysis(
0734 const ContextT &Context, const DominatorTreeT &DT, const CycleInfoT &CI)
0735 : CyclePO(Context), DT(DT), CI(CI) {
0736 CyclePO.compute(CI);
0737 }
0738
0739 template <typename ContextT>
0740 auto llvm::GenericSyncDependenceAnalysis<ContextT>::getJoinBlocks(
0741 const BlockT *DivTermBlock) -> const DivergenceDescriptor & {
0742
0743 if (succ_size(DivTermBlock) <= 1) {
0744 return EmptyDivergenceDesc;
0745 }
0746
0747
0748 auto ItCached = CachedControlDivDescs.find(DivTermBlock);
0749 if (ItCached != CachedControlDivDescs.end())
0750 return *ItCached->second;
0751
0752
0753 DivergencePropagatorT Propagator(CyclePO, DT, CI, *DivTermBlock);
0754 auto DivDesc = Propagator.computeJoinPoints();
0755
0756 auto printBlockSet = [&](ConstBlockSet &Blocks) {
0757 return Printable([&](raw_ostream &Out) {
0758 Out << "[";
0759 ListSeparator LS;
0760 for (const auto *BB : Blocks) {
0761 Out << LS << CI.getSSAContext().print(BB);
0762 }
0763 Out << "]\n";
0764 });
0765 };
0766
0767 LLVM_DEBUG(
0768 dbgs() << "\nResult (" << CI.getSSAContext().print(DivTermBlock)
0769 << "):\n JoinDivBlocks: " << printBlockSet(DivDesc->JoinDivBlocks)
0770 << " CycleDivBlocks: " << printBlockSet(DivDesc->CycleDivBlocks)
0771 << "\n");
0772 (void)printBlockSet;
0773
0774 auto ItInserted =
0775 CachedControlDivDescs.try_emplace(DivTermBlock, std::move(DivDesc));
0776 assert(ItInserted.second);
0777 return *ItInserted.first->second;
0778 }
0779
0780 template <typename ContextT>
0781 void GenericUniformityAnalysisImpl<ContextT>::markDivergent(
0782 const InstructionT &I) {
0783 if (isAlwaysUniform(I))
0784 return;
0785 bool Marked = false;
0786 if (I.isTerminator()) {
0787 Marked = DivergentTermBlocks.insert(I.getParent()).second;
0788 if (Marked) {
0789 LLVM_DEBUG(dbgs() << "marked divergent term block: "
0790 << Context.print(I.getParent()) << "\n");
0791 }
0792 } else {
0793 Marked = markDefsDivergent(I);
0794 }
0795
0796 if (Marked)
0797 Worklist.push_back(&I);
0798 }
0799
0800 template <typename ContextT>
0801 bool GenericUniformityAnalysisImpl<ContextT>::markDivergent(
0802 ConstValueRefT Val) {
0803 if (DivergentValues.insert(Val).second) {
0804 LLVM_DEBUG(dbgs() << "marked divergent: " << Context.print(Val) << "\n");
0805 return true;
0806 }
0807 return false;
0808 }
0809
0810 template <typename ContextT>
0811 void GenericUniformityAnalysisImpl<ContextT>::addUniformOverride(
0812 const InstructionT &Instr) {
0813 UniformOverrides.insert(&Instr);
0814 }
0815
0816
0817
0818
0819
0820
0821
0822
0823
0824
0825
0826
0827
0828
0829 template <typename ContextT>
0830 void GenericUniformityAnalysisImpl<ContextT>::analyzeCycleExitDivergence(
0831 const CycleT &DefCycle) {
0832 SmallVector<BlockT *> Exits;
0833 DefCycle.getExitBlocks(Exits);
0834 for (auto *Exit : Exits) {
0835 for (auto &Phi : Exit->phis()) {
0836 if (usesValueFromCycle(Phi, DefCycle)) {
0837 markDivergent(Phi);
0838 }
0839 }
0840 }
0841
0842 for (auto *BB : DefCycle.blocks()) {
0843 if (!llvm::any_of(Exits,
0844 [&](BlockT *Exit) { return DT.dominates(BB, Exit); }))
0845 continue;
0846 for (auto &II : *BB) {
0847 propagateTemporalDivergence(II, DefCycle);
0848 }
0849 }
0850 }
0851
0852 template <typename ContextT>
0853 void GenericUniformityAnalysisImpl<ContextT>::propagateCycleExitDivergence(
0854 const BlockT &DivExit, const CycleT &InnerDivCycle) {
0855 LLVM_DEBUG(dbgs() << "\tpropCycleExitDiv " << Context.print(&DivExit)
0856 << "\n");
0857 auto *DivCycle = &InnerDivCycle;
0858 auto *OuterDivCycle = DivCycle;
0859 auto *ExitLevelCycle = CI.getCycle(&DivExit);
0860 const unsigned CycleExitDepth =
0861 ExitLevelCycle ? ExitLevelCycle->getDepth() : 0;
0862
0863
0864 while (DivCycle && DivCycle->getDepth() > CycleExitDepth) {
0865 LLVM_DEBUG(dbgs() << " Found exiting cycle: "
0866 << Context.print(DivCycle->getHeader()) << "\n");
0867 OuterDivCycle = DivCycle;
0868 DivCycle = DivCycle->getParentCycle();
0869 }
0870 LLVM_DEBUG(dbgs() << "\tOuter-most exiting cycle: "
0871 << Context.print(OuterDivCycle->getHeader()) << "\n");
0872
0873 if (!DivergentExitCycles.insert(OuterDivCycle).second)
0874 return;
0875
0876
0877
0878 for (const auto *C : AssumedDivergent) {
0879 if (C->contains(OuterDivCycle))
0880 return;
0881 }
0882
0883 analyzeCycleExitDivergence(*OuterDivCycle);
0884 }
0885
0886 template <typename ContextT>
0887 void GenericUniformityAnalysisImpl<ContextT>::taintAndPushAllDefs(
0888 const BlockT &BB) {
0889 LLVM_DEBUG(dbgs() << "taintAndPushAllDefs " << Context.print(&BB) << "\n");
0890 for (const auto &I : instrs(BB)) {
0891
0892
0893
0894 if (I.isTerminator())
0895 break;
0896
0897 markDivergent(I);
0898 }
0899 }
0900
0901
0902 template <typename ContextT>
0903 void GenericUniformityAnalysisImpl<ContextT>::taintAndPushPhiNodes(
0904 const BlockT &JoinBlock) {
0905 LLVM_DEBUG(dbgs() << "taintAndPushPhiNodes in " << Context.print(&JoinBlock)
0906 << "\n");
0907 for (const auto &Phi : JoinBlock.phis()) {
0908
0909
0910
0911
0912
0913
0914
0915 if (ContextT::isConstantOrUndefValuePhi(Phi))
0916 continue;
0917 markDivergent(Phi);
0918 }
0919 }
0920
0921
0922
0923
0924 template <typename CycleT>
0925 static bool insertIfNotContained(SmallVector<CycleT *> &Cycles,
0926 CycleT *Candidate) {
0927 if (llvm::any_of(Cycles,
0928 [Candidate](CycleT *C) { return C->contains(Candidate); }))
0929 return false;
0930 Cycles.push_back(Candidate);
0931 return true;
0932 }
0933
0934
0935
0936
0937
0938
0939 template <typename CycleT, typename BlockT>
0940 static const CycleT *getExtDivCycle(const CycleT *Cycle,
0941 const BlockT *DivTermBlock,
0942 const BlockT *JoinBlock) {
0943 assert(Cycle);
0944 assert(Cycle->contains(JoinBlock));
0945
0946 if (Cycle->contains(DivTermBlock))
0947 return nullptr;
0948
0949 const auto *OriginalCycle = Cycle;
0950 const auto *Parent = Cycle->getParentCycle();
0951 while (Parent && !Parent->contains(DivTermBlock)) {
0952 Cycle = Parent;
0953 Parent = Cycle->getParentCycle();
0954 }
0955
0956
0957
0958
0959 (void)OriginalCycle;
0960 assert(Cycle == OriginalCycle || !Cycle->isReducible());
0961
0962 if (Cycle->isReducible()) {
0963 assert(Cycle->getHeader() == JoinBlock);
0964 return nullptr;
0965 }
0966
0967 LLVM_DEBUG(dbgs() << "cycle made divergent by external branch\n");
0968 return Cycle;
0969 }
0970
0971
0972
0973
0974
0975 template <typename ContextT, typename CycleT, typename BlockT,
0976 typename DominatorTreeT>
0977 static const CycleT *
0978 getIntDivCycle(const CycleT *Cycle, const BlockT *DivTermBlock,
0979 const BlockT *JoinBlock, const DominatorTreeT &DT,
0980 ContextT &Context) {
0981 LLVM_DEBUG(dbgs() << "examine join " << Context.print(JoinBlock)
0982 << " for internal branch " << Context.print(DivTermBlock)
0983 << "\n");
0984 if (DT.properlyDominates(DivTermBlock, JoinBlock))
0985 return nullptr;
0986
0987
0988 assert(Cycle && Cycle->contains(JoinBlock));
0989 while (Cycle && !Cycle->contains(DivTermBlock)) {
0990 Cycle = Cycle->getParentCycle();
0991 }
0992 if (!Cycle || Cycle->isReducible())
0993 return nullptr;
0994
0995 if (DT.properlyDominates(Cycle->getHeader(), JoinBlock))
0996 return nullptr;
0997
0998 LLVM_DEBUG(dbgs() << " header " << Context.print(Cycle->getHeader())
0999 << " does not dominate join\n");
1000
1001 const auto *Parent = Cycle->getParentCycle();
1002 while (Parent && !DT.properlyDominates(Parent->getHeader(), JoinBlock)) {
1003 LLVM_DEBUG(dbgs() << " header " << Context.print(Parent->getHeader())
1004 << " does not dominate join\n");
1005 Cycle = Parent;
1006 Parent = Parent->getParentCycle();
1007 }
1008
1009 LLVM_DEBUG(dbgs() << " cycle made divergent by internal branch\n");
1010 return Cycle;
1011 }
1012
1013 template <typename ContextT, typename CycleT, typename BlockT,
1014 typename DominatorTreeT>
1015 static const CycleT *
1016 getOutermostDivergentCycle(const CycleT *Cycle, const BlockT *DivTermBlock,
1017 const BlockT *JoinBlock, const DominatorTreeT &DT,
1018 ContextT &Context) {
1019 if (!Cycle)
1020 return nullptr;
1021
1022
1023
1024 const auto *Ext = getExtDivCycle(Cycle, DivTermBlock, JoinBlock);
1025
1026
1027 const auto *Int = getIntDivCycle(Cycle, DivTermBlock, JoinBlock, DT, Context);
1028
1029 if (Int)
1030 return Int;
1031 return Ext;
1032 }
1033
1034 template <typename ContextT>
1035 bool GenericUniformityAnalysisImpl<ContextT>::isTemporalDivergent(
1036 const BlockT &ObservingBlock, const InstructionT &Def) const {
1037 const BlockT *DefBlock = Def.getParent();
1038 for (const CycleT *Cycle = CI.getCycle(DefBlock);
1039 Cycle && !Cycle->contains(&ObservingBlock);
1040 Cycle = Cycle->getParentCycle()) {
1041 if (DivergentExitCycles.contains(Cycle)) {
1042 return true;
1043 }
1044 }
1045 return false;
1046 }
1047
1048 template <typename ContextT>
1049 void GenericUniformityAnalysisImpl<ContextT>::analyzeControlDivergence(
1050 const InstructionT &Term) {
1051 const auto *DivTermBlock = Term.getParent();
1052 DivergentTermBlocks.insert(DivTermBlock);
1053 LLVM_DEBUG(dbgs() << "analyzeControlDiv " << Context.print(DivTermBlock)
1054 << "\n");
1055
1056
1057 if (!DT.isReachableFromEntry(DivTermBlock))
1058 return;
1059
1060 const auto &DivDesc = SDA.getJoinBlocks(DivTermBlock);
1061 SmallVector<const CycleT *> DivCycles;
1062
1063
1064 for (const auto *JoinBlock : DivDesc.JoinDivBlocks) {
1065 const auto *Cycle = CI.getCycle(JoinBlock);
1066 LLVM_DEBUG(dbgs() << "visiting join block " << Context.print(JoinBlock)
1067 << "\n");
1068 if (const auto *Outermost = getOutermostDivergentCycle(
1069 Cycle, DivTermBlock, JoinBlock, DT, Context)) {
1070 LLVM_DEBUG(dbgs() << "found divergent cycle\n");
1071 DivCycles.push_back(Outermost);
1072 continue;
1073 }
1074 taintAndPushPhiNodes(*JoinBlock);
1075 }
1076
1077
1078
1079 llvm::sort(DivCycles, [](const CycleT *A, const CycleT *B) {
1080 return A->getDepth() > B->getDepth();
1081 });
1082
1083
1084
1085
1086
1087
1088 for (auto *C : DivCycles) {
1089 if (!insertIfNotContained(AssumedDivergent, C))
1090 continue;
1091 LLVM_DEBUG(dbgs() << "process divergent cycle\n");
1092 for (const BlockT *BB : C->blocks()) {
1093 taintAndPushAllDefs(*BB);
1094 }
1095 }
1096
1097 const auto *BranchCycle = CI.getCycle(DivTermBlock);
1098 assert(DivDesc.CycleDivBlocks.empty() || BranchCycle);
1099 for (const auto *DivExitBlock : DivDesc.CycleDivBlocks) {
1100 propagateCycleExitDivergence(*DivExitBlock, *BranchCycle);
1101 }
1102 }
1103
1104 template <typename ContextT>
1105 void GenericUniformityAnalysisImpl<ContextT>::compute() {
1106
1107 auto DivValuesCopy = DivergentValues;
1108 for (const auto DivVal : DivValuesCopy) {
1109 assert(isDivergent(DivVal) && "Worklist invariant violated!");
1110 pushUsers(DivVal);
1111 }
1112
1113
1114
1115 while (!Worklist.empty()) {
1116 const InstructionT *I = Worklist.back();
1117 Worklist.pop_back();
1118
1119 LLVM_DEBUG(dbgs() << "worklist pop: " << Context.print(I) << "\n");
1120
1121 if (I->isTerminator()) {
1122 analyzeControlDivergence(*I);
1123 continue;
1124 }
1125
1126
1127 assert(isDivergent(*I) && "Worklist invariant violated!");
1128 pushUsers(*I);
1129 }
1130 }
1131
1132 template <typename ContextT>
1133 bool GenericUniformityAnalysisImpl<ContextT>::isAlwaysUniform(
1134 const InstructionT &Instr) const {
1135 return UniformOverrides.contains(&Instr);
1136 }
1137
1138 template <typename ContextT>
1139 GenericUniformityInfo<ContextT>::GenericUniformityInfo(
1140 const DominatorTreeT &DT, const CycleInfoT &CI,
1141 const TargetTransformInfo *TTI) {
1142 DA.reset(new ImplT{DT, CI, TTI});
1143 }
1144
1145 template <typename ContextT>
1146 void GenericUniformityAnalysisImpl<ContextT>::print(raw_ostream &OS) const {
1147 bool haveDivergentArgs = false;
1148
1149
1150
1151
1152 if (DivergentValues.empty() && DivergentTermBlocks.empty() &&
1153 DivergentExitCycles.empty()) {
1154 OS << "ALL VALUES UNIFORM\n";
1155 return;
1156 }
1157
1158 for (const auto &entry : DivergentValues) {
1159 const BlockT *parent = Context.getDefBlock(entry);
1160 if (!parent) {
1161 if (!haveDivergentArgs) {
1162 OS << "DIVERGENT ARGUMENTS:\n";
1163 haveDivergentArgs = true;
1164 }
1165 OS << " DIVERGENT: " << Context.print(entry) << '\n';
1166 }
1167 }
1168
1169 if (!AssumedDivergent.empty()) {
1170 OS << "CYCLES ASSSUMED DIVERGENT:\n";
1171 for (const CycleT *cycle : AssumedDivergent) {
1172 OS << " " << cycle->print(Context) << '\n';
1173 }
1174 }
1175
1176 if (!DivergentExitCycles.empty()) {
1177 OS << "CYCLES WITH DIVERGENT EXIT:\n";
1178 for (const CycleT *cycle : DivergentExitCycles) {
1179 OS << " " << cycle->print(Context) << '\n';
1180 }
1181 }
1182
1183 for (auto &block : F) {
1184 OS << "\nBLOCK " << Context.print(&block) << '\n';
1185
1186 OS << "DEFINITIONS\n";
1187 SmallVector<ConstValueRefT, 16> defs;
1188 Context.appendBlockDefs(defs, block);
1189 for (auto value : defs) {
1190 if (isDivergent(value))
1191 OS << " DIVERGENT: ";
1192 else
1193 OS << " ";
1194 OS << Context.print(value) << '\n';
1195 }
1196
1197 OS << "TERMINATORS\n";
1198 SmallVector<const InstructionT *, 8> terms;
1199 Context.appendBlockTerms(terms, block);
1200 bool divergentTerminators = hasDivergentTerminator(block);
1201 for (auto *T : terms) {
1202 if (divergentTerminators)
1203 OS << " DIVERGENT: ";
1204 else
1205 OS << " ";
1206 OS << Context.print(T) << '\n';
1207 }
1208
1209 OS << "END BLOCK\n";
1210 }
1211 }
1212
1213 template <typename ContextT>
1214 bool GenericUniformityInfo<ContextT>::hasDivergence() const {
1215 return DA->hasDivergence();
1216 }
1217
1218 template <typename ContextT>
1219 const typename ContextT::FunctionT &
1220 GenericUniformityInfo<ContextT>::getFunction() const {
1221 return DA->getFunction();
1222 }
1223
1224
1225 template <typename ContextT>
1226 bool GenericUniformityInfo<ContextT>::isDivergent(ConstValueRefT V) const {
1227 return DA->isDivergent(V);
1228 }
1229
1230 template <typename ContextT>
1231 bool GenericUniformityInfo<ContextT>::isDivergent(const InstructionT *I) const {
1232 return DA->isDivergent(*I);
1233 }
1234
1235 template <typename ContextT>
1236 bool GenericUniformityInfo<ContextT>::isDivergentUse(const UseT &U) const {
1237 return DA->isDivergentUse(U);
1238 }
1239
1240 template <typename ContextT>
1241 bool GenericUniformityInfo<ContextT>::hasDivergentTerminator(const BlockT &B) {
1242 return DA->hasDivergentTerminator(B);
1243 }
1244
1245
1246 template <typename ContextT>
1247 void GenericUniformityInfo<ContextT>::print(raw_ostream &out) const {
1248 DA->print(out);
1249 }
1250
1251 template <typename ContextT>
1252 void llvm::ModifiedPostOrder<ContextT>::computeStackPO(
1253 SmallVectorImpl<const BlockT *> &Stack, const CycleInfoT &CI,
1254 const CycleT *Cycle, SmallPtrSetImpl<const BlockT *> &Finalized) {
1255 LLVM_DEBUG(dbgs() << "inside computeStackPO\n");
1256 while (!Stack.empty()) {
1257 auto *NextBB = Stack.back();
1258 if (Finalized.count(NextBB)) {
1259 Stack.pop_back();
1260 continue;
1261 }
1262 LLVM_DEBUG(dbgs() << " visiting " << CI.getSSAContext().print(NextBB)
1263 << "\n");
1264 auto *NestedCycle = CI.getCycle(NextBB);
1265 if (Cycle != NestedCycle && (!Cycle || Cycle->contains(NestedCycle))) {
1266 LLVM_DEBUG(dbgs() << " found a cycle\n");
1267 while (NestedCycle->getParentCycle() != Cycle)
1268 NestedCycle = NestedCycle->getParentCycle();
1269
1270 SmallVector<BlockT *, 3> NestedExits;
1271 NestedCycle->getExitBlocks(NestedExits);
1272 bool PushedNodes = false;
1273 for (auto *NestedExitBB : NestedExits) {
1274 LLVM_DEBUG(dbgs() << " examine exit: "
1275 << CI.getSSAContext().print(NestedExitBB) << "\n");
1276 if (Cycle && !Cycle->contains(NestedExitBB))
1277 continue;
1278 if (Finalized.count(NestedExitBB))
1279 continue;
1280 PushedNodes = true;
1281 Stack.push_back(NestedExitBB);
1282 LLVM_DEBUG(dbgs() << " pushed exit: "
1283 << CI.getSSAContext().print(NestedExitBB) << "\n");
1284 }
1285 if (!PushedNodes) {
1286
1287 Stack.pop_back();
1288 computeCyclePO(CI, NestedCycle, Finalized);
1289 }
1290 continue;
1291 }
1292
1293 LLVM_DEBUG(dbgs() << " no nested cycle, going into DAG\n");
1294
1295 bool PushedNodes = false;
1296 for (auto *SuccBB : successors(NextBB)) {
1297 LLVM_DEBUG(dbgs() << " examine succ: "
1298 << CI.getSSAContext().print(SuccBB) << "\n");
1299 if (Cycle && !Cycle->contains(SuccBB))
1300 continue;
1301 if (Finalized.count(SuccBB))
1302 continue;
1303 PushedNodes = true;
1304 Stack.push_back(SuccBB);
1305 LLVM_DEBUG(dbgs() << " pushed succ: " << CI.getSSAContext().print(SuccBB)
1306 << "\n");
1307 }
1308 if (!PushedNodes) {
1309
1310 LLVM_DEBUG(dbgs() << " finishing node: "
1311 << CI.getSSAContext().print(NextBB) << "\n");
1312 Stack.pop_back();
1313 Finalized.insert(NextBB);
1314 appendBlock(*NextBB);
1315 }
1316 }
1317 LLVM_DEBUG(dbgs() << "exited computeStackPO\n");
1318 }
1319
1320 template <typename ContextT>
1321 void ModifiedPostOrder<ContextT>::computeCyclePO(
1322 const CycleInfoT &CI, const CycleT *Cycle,
1323 SmallPtrSetImpl<const BlockT *> &Finalized) {
1324 LLVM_DEBUG(dbgs() << "inside computeCyclePO\n");
1325 SmallVector<const BlockT *> Stack;
1326 auto *CycleHeader = Cycle->getHeader();
1327
1328 LLVM_DEBUG(dbgs() << " noted header: "
1329 << CI.getSSAContext().print(CycleHeader) << "\n");
1330 assert(!Finalized.count(CycleHeader));
1331 Finalized.insert(CycleHeader);
1332
1333
1334 LLVM_DEBUG(dbgs() << " finishing header: "
1335 << CI.getSSAContext().print(CycleHeader) << "\n");
1336 appendBlock(*CycleHeader, Cycle->isReducible());
1337
1338
1339 for (auto *BB : successors(CycleHeader)) {
1340 LLVM_DEBUG(dbgs() << " examine succ: " << CI.getSSAContext().print(BB)
1341 << "\n");
1342 if (!Cycle->contains(BB))
1343 continue;
1344 if (BB == CycleHeader)
1345 continue;
1346 if (!Finalized.count(BB)) {
1347 LLVM_DEBUG(dbgs() << " pushed succ: " << CI.getSSAContext().print(BB)
1348 << "\n");
1349 Stack.push_back(BB);
1350 }
1351 }
1352
1353
1354 computeStackPO(Stack, CI, Cycle, Finalized);
1355
1356 LLVM_DEBUG(dbgs() << "exited computeCyclePO\n");
1357 }
1358
1359
1360 template <typename ContextT>
1361 void llvm::ModifiedPostOrder<ContextT>::compute(const CycleInfoT &CI) {
1362 SmallPtrSet<const BlockT *, 32> Finalized;
1363 SmallVector<const BlockT *> Stack;
1364 auto *F = CI.getFunction();
1365 Stack.reserve(24);
1366 Stack.push_back(&F->front());
1367 computeStackPO(Stack, CI, nullptr, Finalized);
1368 }
1369
1370 }
1371
1372 #undef DEBUG_TYPE
1373
1374 #endif