File indexing completed on 2026-05-10 08:43:11
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014 #ifndef LLVM_ANALYSIS_BLOCKFREQUENCYINFOIMPL_H
0015 #define LLVM_ANALYSIS_BLOCKFREQUENCYINFOIMPL_H
0016
0017 #include "llvm/ADT/BitVector.h"
0018 #include "llvm/ADT/DenseMap.h"
0019 #include "llvm/ADT/DenseSet.h"
0020 #include "llvm/ADT/GraphTraits.h"
0021 #include "llvm/ADT/PostOrderIterator.h"
0022 #include "llvm/ADT/SmallPtrSet.h"
0023 #include "llvm/ADT/SmallVector.h"
0024 #include "llvm/ADT/SparseBitVector.h"
0025 #include "llvm/ADT/Twine.h"
0026 #include "llvm/ADT/iterator_range.h"
0027 #include "llvm/IR/BasicBlock.h"
0028 #include "llvm/IR/Function.h"
0029 #include "llvm/IR/ValueHandle.h"
0030 #include "llvm/Support/BlockFrequency.h"
0031 #include "llvm/Support/BranchProbability.h"
0032 #include "llvm/Support/CommandLine.h"
0033 #include "llvm/Support/DOTGraphTraits.h"
0034 #include "llvm/Support/Debug.h"
0035 #include "llvm/Support/Format.h"
0036 #include "llvm/Support/ScaledNumber.h"
0037 #include "llvm/Support/raw_ostream.h"
0038 #include <algorithm>
0039 #include <cassert>
0040 #include <cstddef>
0041 #include <cstdint>
0042 #include <deque>
0043 #include <iterator>
0044 #include <limits>
0045 #include <list>
0046 #include <optional>
0047 #include <queue>
0048 #include <string>
0049 #include <utility>
0050 #include <vector>
0051
0052 #define DEBUG_TYPE "block-freq"
0053
0054 namespace llvm {
0055 extern llvm::cl::opt<bool> CheckBFIUnknownBlockQueries;
0056
0057 extern llvm::cl::opt<bool> UseIterativeBFIInference;
0058 extern llvm::cl::opt<unsigned> IterativeBFIMaxIterationsPerBlock;
0059 extern llvm::cl::opt<double> IterativeBFIPrecision;
0060
0061 class BranchProbabilityInfo;
0062 class Function;
0063 class Loop;
0064 class LoopInfo;
0065 class MachineBasicBlock;
0066 class MachineBranchProbabilityInfo;
0067 class MachineFunction;
0068 class MachineLoop;
0069 class MachineLoopInfo;
0070
0071 namespace bfi_detail {
0072
0073 struct IrreducibleGraph;
0074
0075
0076 template <class BT> struct BlockEdgesAdder;
0077
0078
0079
0080
0081
0082
0083
0084
0085
0086
0087
0088
0089
0090
0091
0092 class BlockMass {
0093 uint64_t Mass = 0;
0094
0095 public:
0096 BlockMass() = default;
0097 explicit BlockMass(uint64_t Mass) : Mass(Mass) {}
0098
0099 static BlockMass getEmpty() { return BlockMass(); }
0100
0101 static BlockMass getFull() {
0102 return BlockMass(std::numeric_limits<uint64_t>::max());
0103 }
0104
0105 uint64_t getMass() const { return Mass; }
0106
0107 bool isFull() const { return Mass == std::numeric_limits<uint64_t>::max(); }
0108 bool isEmpty() const { return !Mass; }
0109
0110 bool operator!() const { return isEmpty(); }
0111
0112
0113
0114
0115 BlockMass &operator+=(BlockMass X) {
0116 uint64_t Sum = Mass + X.Mass;
0117 Mass = Sum < Mass ? std::numeric_limits<uint64_t>::max() : Sum;
0118 return *this;
0119 }
0120
0121
0122
0123
0124
0125 BlockMass &operator-=(BlockMass X) {
0126 uint64_t Diff = Mass - X.Mass;
0127 Mass = Diff > Mass ? 0 : Diff;
0128 return *this;
0129 }
0130
0131 BlockMass &operator*=(BranchProbability P) {
0132 Mass = P.scale(Mass);
0133 return *this;
0134 }
0135
0136 bool operator==(BlockMass X) const { return Mass == X.Mass; }
0137 bool operator!=(BlockMass X) const { return Mass != X.Mass; }
0138 bool operator<=(BlockMass X) const { return Mass <= X.Mass; }
0139 bool operator>=(BlockMass X) const { return Mass >= X.Mass; }
0140 bool operator<(BlockMass X) const { return Mass < X.Mass; }
0141 bool operator>(BlockMass X) const { return Mass > X.Mass; }
0142
0143
0144
0145
0146
0147 ScaledNumber<uint64_t> toScaled() const;
0148
0149 void dump() const;
0150 raw_ostream &print(raw_ostream &OS) const;
0151 };
0152
0153 inline BlockMass operator+(BlockMass L, BlockMass R) {
0154 return BlockMass(L) += R;
0155 }
0156 inline BlockMass operator-(BlockMass L, BlockMass R) {
0157 return BlockMass(L) -= R;
0158 }
0159 inline BlockMass operator*(BlockMass L, BranchProbability R) {
0160 return BlockMass(L) *= R;
0161 }
0162 inline BlockMass operator*(BranchProbability L, BlockMass R) {
0163 return BlockMass(R) *= L;
0164 }
0165
0166 inline raw_ostream &operator<<(raw_ostream &OS, BlockMass X) {
0167 return X.print(OS);
0168 }
0169
0170 }
0171
0172
0173
0174
0175
0176
0177
0178
0179
0180 class BlockFrequencyInfoImplBase {
0181 public:
0182 using Scaled64 = ScaledNumber<uint64_t>;
0183 using BlockMass = bfi_detail::BlockMass;
0184
0185
0186
0187
0188
0189
0190
0191
0192 struct BlockNode {
0193 using IndexType = uint32_t;
0194
0195 IndexType Index;
0196
0197 BlockNode() : Index(std::numeric_limits<uint32_t>::max()) {}
0198 BlockNode(IndexType Index) : Index(Index) {}
0199
0200 bool operator==(const BlockNode &X) const { return Index == X.Index; }
0201 bool operator!=(const BlockNode &X) const { return Index != X.Index; }
0202 bool operator<=(const BlockNode &X) const { return Index <= X.Index; }
0203 bool operator>=(const BlockNode &X) const { return Index >= X.Index; }
0204 bool operator<(const BlockNode &X) const { return Index < X.Index; }
0205 bool operator>(const BlockNode &X) const { return Index > X.Index; }
0206
0207 bool isValid() const { return Index <= getMaxIndex(); }
0208
0209 static size_t getMaxIndex() {
0210 return std::numeric_limits<uint32_t>::max() - 1;
0211 }
0212 };
0213
0214
0215 struct FrequencyData {
0216 Scaled64 Scaled;
0217 uint64_t Integer;
0218 };
0219
0220
0221
0222
0223
0224 struct LoopData {
0225 using ExitMap = SmallVector<std::pair<BlockNode, BlockMass>, 4>;
0226 using NodeList = SmallVector<BlockNode, 4>;
0227 using HeaderMassList = SmallVector<BlockMass, 1>;
0228
0229 LoopData *Parent;
0230 bool IsPackaged = false;
0231 uint32_t NumHeaders = 1;
0232 ExitMap Exits;
0233 NodeList Nodes;
0234 HeaderMassList BackedgeMass;
0235 BlockMass Mass;
0236 Scaled64 Scale;
0237
0238 LoopData(LoopData *Parent, const BlockNode &Header)
0239 : Parent(Parent), Nodes(1, Header), BackedgeMass(1) {}
0240
0241 template <class It1, class It2>
0242 LoopData(LoopData *Parent, It1 FirstHeader, It1 LastHeader, It2 FirstOther,
0243 It2 LastOther)
0244 : Parent(Parent), Nodes(FirstHeader, LastHeader) {
0245 NumHeaders = Nodes.size();
0246 Nodes.insert(Nodes.end(), FirstOther, LastOther);
0247 BackedgeMass.resize(NumHeaders);
0248 }
0249
0250 bool isHeader(const BlockNode &Node) const {
0251 if (isIrreducible())
0252 return std::binary_search(Nodes.begin(), Nodes.begin() + NumHeaders,
0253 Node);
0254 return Node == Nodes[0];
0255 }
0256
0257 BlockNode getHeader() const { return Nodes[0]; }
0258 bool isIrreducible() const { return NumHeaders > 1; }
0259
0260 HeaderMassList::difference_type getHeaderIndex(const BlockNode &B) {
0261 assert(isHeader(B) && "this is only valid on loop header blocks");
0262 if (isIrreducible())
0263 return std::lower_bound(Nodes.begin(), Nodes.begin() + NumHeaders, B) -
0264 Nodes.begin();
0265 return 0;
0266 }
0267
0268 NodeList::const_iterator members_begin() const {
0269 return Nodes.begin() + NumHeaders;
0270 }
0271
0272 NodeList::const_iterator members_end() const { return Nodes.end(); }
0273 iterator_range<NodeList::const_iterator> members() const {
0274 return make_range(members_begin(), members_end());
0275 }
0276 };
0277
0278
0279 struct WorkingData {
0280 BlockNode Node;
0281 LoopData *Loop = nullptr;
0282 BlockMass Mass;
0283
0284 WorkingData(const BlockNode &Node) : Node(Node) {}
0285
0286 bool isLoopHeader() const { return Loop && Loop->isHeader(Node); }
0287
0288 bool isDoubleLoopHeader() const {
0289 return isLoopHeader() && Loop->Parent && Loop->Parent->isIrreducible() &&
0290 Loop->Parent->isHeader(Node);
0291 }
0292
0293 LoopData *getContainingLoop() const {
0294 if (!isLoopHeader())
0295 return Loop;
0296 if (!isDoubleLoopHeader())
0297 return Loop->Parent;
0298 return Loop->Parent->Parent;
0299 }
0300
0301
0302
0303
0304
0305
0306
0307
0308
0309
0310
0311
0312
0313
0314 BlockNode getResolvedNode() const {
0315 auto *L = getPackagedLoop();
0316 return L ? L->getHeader() : Node;
0317 }
0318
0319 LoopData *getPackagedLoop() const {
0320 if (!Loop || !Loop->IsPackaged)
0321 return nullptr;
0322 auto *L = Loop;
0323 while (L->Parent && L->Parent->IsPackaged)
0324 L = L->Parent;
0325 return L;
0326 }
0327
0328
0329
0330
0331
0332
0333 BlockMass &getMass() {
0334 if (!isAPackage())
0335 return Mass;
0336 if (!isADoublePackage())
0337 return Loop->Mass;
0338 return Loop->Parent->Mass;
0339 }
0340
0341
0342 bool isPackaged() const { return getResolvedNode() != Node; }
0343
0344
0345 bool isAPackage() const { return isLoopHeader() && Loop->IsPackaged; }
0346
0347
0348 bool isADoublePackage() const {
0349 return isDoubleLoopHeader() && Loop->Parent->IsPackaged;
0350 }
0351 };
0352
0353
0354
0355
0356
0357
0358
0359
0360
0361
0362
0363
0364
0365
0366 struct Weight {
0367 enum DistType { Local, Exit, Backedge };
0368 DistType Type = Local;
0369 BlockNode TargetNode;
0370 uint64_t Amount = 0;
0371
0372 Weight() = default;
0373 Weight(DistType Type, BlockNode TargetNode, uint64_t Amount)
0374 : Type(Type), TargetNode(TargetNode), Amount(Amount) {}
0375 };
0376
0377
0378
0379
0380
0381
0382
0383
0384
0385 struct Distribution {
0386 using WeightList = SmallVector<Weight, 4>;
0387
0388 WeightList Weights;
0389 uint64_t Total = 0;
0390 bool DidOverflow = false;
0391
0392 Distribution() = default;
0393
0394 void addLocal(const BlockNode &Node, uint64_t Amount) {
0395 add(Node, Amount, Weight::Local);
0396 }
0397
0398 void addExit(const BlockNode &Node, uint64_t Amount) {
0399 add(Node, Amount, Weight::Exit);
0400 }
0401
0402 void addBackedge(const BlockNode &Node, uint64_t Amount) {
0403 add(Node, Amount, Weight::Backedge);
0404 }
0405
0406
0407
0408
0409
0410
0411
0412
0413
0414
0415 void normalize();
0416
0417 private:
0418 void add(const BlockNode &Node, uint64_t Amount, Weight::DistType Type);
0419 };
0420
0421
0422 std::vector<FrequencyData> Freqs;
0423
0424
0425
0426 SparseBitVector<> IsIrrLoopHeader;
0427
0428
0429 std::vector<WorkingData> Working;
0430
0431
0432 std::list<LoopData> Loops;
0433
0434
0435
0436
0437
0438 virtual ~BlockFrequencyInfoImplBase() = default;
0439
0440
0441
0442
0443
0444
0445
0446 bool addLoopSuccessorsToDist(const LoopData *OuterLoop, LoopData &Loop,
0447 Distribution &Dist);
0448
0449
0450
0451
0452
0453
0454
0455
0456 bool addToDist(Distribution &Dist, const LoopData *OuterLoop,
0457 const BlockNode &Pred, const BlockNode &Succ, uint64_t Weight);
0458
0459
0460
0461
0462
0463
0464
0465
0466 iterator_range<std::list<LoopData>::iterator>
0467 analyzeIrreducible(const bfi_detail::IrreducibleGraph &G, LoopData *OuterLoop,
0468 std::list<LoopData>::iterator Insert);
0469
0470
0471
0472
0473
0474
0475
0476 void updateLoopWithIrreducible(LoopData &OuterLoop);
0477
0478
0479
0480
0481
0482
0483
0484 void distributeMass(const BlockNode &Source, LoopData *OuterLoop,
0485 Distribution &Dist);
0486
0487
0488 void computeLoopScale(LoopData &Loop);
0489
0490
0491
0492
0493
0494
0495
0496
0497
0498 void adjustLoopHeaderMass(LoopData &Loop);
0499
0500 void distributeIrrLoopHeaderMass(Distribution &Dist);
0501
0502
0503 void packageLoop(LoopData &Loop);
0504
0505
0506 void unwrapLoops();
0507
0508
0509
0510
0511
0512 void finalizeMetrics();
0513
0514
0515 void clear();
0516
0517 virtual std::string getBlockName(const BlockNode &Node) const;
0518 std::string getLoopName(const LoopData &Loop) const;
0519
0520 virtual raw_ostream &print(raw_ostream &OS) const { return OS; }
0521 void dump() const { print(dbgs()); }
0522
0523 Scaled64 getFloatingBlockFreq(const BlockNode &Node) const;
0524
0525 BlockFrequency getBlockFreq(const BlockNode &Node) const;
0526 std::optional<uint64_t>
0527 getBlockProfileCount(const Function &F, const BlockNode &Node,
0528 bool AllowSynthetic = false) const;
0529 std::optional<uint64_t>
0530 getProfileCountFromFreq(const Function &F, BlockFrequency Freq,
0531 bool AllowSynthetic = false) const;
0532 bool isIrrLoopHeader(const BlockNode &Node);
0533
0534 void setBlockFreq(const BlockNode &Node, BlockFrequency Freq);
0535
0536 BlockFrequency getEntryFreq() const {
0537 assert(!Freqs.empty());
0538 return BlockFrequency(Freqs[0].Integer);
0539 }
0540 };
0541
0542 namespace bfi_detail {
0543
0544 template <class BlockT> struct TypeMap {};
0545 template <> struct TypeMap<BasicBlock> {
0546 using BlockT = BasicBlock;
0547 using BlockKeyT = AssertingVH<const BasicBlock>;
0548 using FunctionT = Function;
0549 using BranchProbabilityInfoT = BranchProbabilityInfo;
0550 using LoopT = Loop;
0551 using LoopInfoT = LoopInfo;
0552 };
0553 template <> struct TypeMap<MachineBasicBlock> {
0554 using BlockT = MachineBasicBlock;
0555 using BlockKeyT = const MachineBasicBlock *;
0556 using FunctionT = MachineFunction;
0557 using BranchProbabilityInfoT = MachineBranchProbabilityInfo;
0558 using LoopT = MachineLoop;
0559 using LoopInfoT = MachineLoopInfo;
0560 };
0561
0562 template <class BlockT, class BFIImplT>
0563 class BFICallbackVH;
0564
0565
0566
0567
0568
0569
0570
0571
0572 template <class BlockT> std::string getBlockName(const BlockT *BB) {
0573 assert(BB && "Unexpected nullptr");
0574 auto MachineName = "BB" + Twine(BB->getNumber());
0575 if (BB->getBasicBlock())
0576 return (MachineName + "[" + BB->getName() + "]").str();
0577 return MachineName.str();
0578 }
0579
0580 template <> inline std::string getBlockName(const BasicBlock *BB) {
0581 assert(BB && "Unexpected nullptr");
0582 return BB->getName().str();
0583 }
0584
0585
0586
0587
0588
0589
0590
0591
0592
0593
0594
0595
0596
0597
0598
0599 struct IrreducibleGraph {
0600 using BFIBase = BlockFrequencyInfoImplBase;
0601
0602 BFIBase &BFI;
0603
0604 using BlockNode = BFIBase::BlockNode;
0605 struct IrrNode {
0606 BlockNode Node;
0607 unsigned NumIn = 0;
0608 std::deque<const IrrNode *> Edges;
0609
0610 IrrNode(const BlockNode &Node) : Node(Node) {}
0611
0612 using iterator = std::deque<const IrrNode *>::const_iterator;
0613
0614 iterator pred_begin() const { return Edges.begin(); }
0615 iterator succ_begin() const { return Edges.begin() + NumIn; }
0616 iterator pred_end() const { return succ_begin(); }
0617 iterator succ_end() const { return Edges.end(); }
0618 };
0619 BlockNode Start;
0620 const IrrNode *StartIrr = nullptr;
0621 std::vector<IrrNode> Nodes;
0622 SmallDenseMap<uint32_t, IrrNode *, 4> Lookup;
0623
0624
0625
0626
0627
0628
0629
0630
0631
0632
0633 template <class BlockEdgesAdder>
0634 IrreducibleGraph(BFIBase &BFI, const BFIBase::LoopData *OuterLoop,
0635 BlockEdgesAdder addBlockEdges) : BFI(BFI) {
0636 initialize(OuterLoop, addBlockEdges);
0637 }
0638
0639 template <class BlockEdgesAdder>
0640 void initialize(const BFIBase::LoopData *OuterLoop,
0641 BlockEdgesAdder addBlockEdges);
0642 void addNodesInLoop(const BFIBase::LoopData &OuterLoop);
0643 void addNodesInFunction();
0644
0645 void addNode(const BlockNode &Node) {
0646 Nodes.emplace_back(Node);
0647 BFI.Working[Node.Index].getMass() = BlockMass::getEmpty();
0648 }
0649
0650 void indexNodes();
0651 template <class BlockEdgesAdder>
0652 void addEdges(const BlockNode &Node, const BFIBase::LoopData *OuterLoop,
0653 BlockEdgesAdder addBlockEdges);
0654 void addEdge(IrrNode &Irr, const BlockNode &Succ,
0655 const BFIBase::LoopData *OuterLoop);
0656 };
0657
0658 template <class BlockEdgesAdder>
0659 void IrreducibleGraph::initialize(const BFIBase::LoopData *OuterLoop,
0660 BlockEdgesAdder addBlockEdges) {
0661 if (OuterLoop) {
0662 addNodesInLoop(*OuterLoop);
0663 for (auto N : OuterLoop->Nodes)
0664 addEdges(N, OuterLoop, addBlockEdges);
0665 } else {
0666 addNodesInFunction();
0667 for (uint32_t Index = 0; Index < BFI.Working.size(); ++Index)
0668 addEdges(Index, OuterLoop, addBlockEdges);
0669 }
0670 StartIrr = Lookup[Start.Index];
0671 }
0672
0673 template <class BlockEdgesAdder>
0674 void IrreducibleGraph::addEdges(const BlockNode &Node,
0675 const BFIBase::LoopData *OuterLoop,
0676 BlockEdgesAdder addBlockEdges) {
0677 auto L = Lookup.find(Node.Index);
0678 if (L == Lookup.end())
0679 return;
0680 IrrNode &Irr = *L->second;
0681 const auto &Working = BFI.Working[Node.Index];
0682
0683 if (Working.isAPackage())
0684 for (const auto &I : Working.Loop->Exits)
0685 addEdge(Irr, I.first, OuterLoop);
0686 else
0687 addBlockEdges(*this, Irr, OuterLoop);
0688 }
0689
0690 }
0691
0692
0693
0694
0695
0696
0697
0698
0699
0700
0701
0702
0703
0704
0705
0706
0707
0708
0709
0710
0711
0712
0713
0714
0715
0716
0717
0718
0719
0720
0721
0722
0723
0724
0725
0726
0727
0728
0729
0730
0731
0732
0733
0734
0735
0736
0737
0738
0739
0740
0741
0742
0743
0744
0745
0746
0747
0748
0749
0750
0751
0752
0753
0754
0755
0756
0757
0758
0759
0760
0761
0762
0763
0764
0765
0766
0767
0768
0769
0770
0771
0772
0773
0774
0775
0776
0777
0778
0779
0780
0781
0782
0783
0784
0785
0786
0787
0788
0789
0790
0791
0792
0793
0794
0795
0796
0797
0798
0799
0800
0801
0802
0803
0804
0805
0806
0807
0808
0809
0810
0811
0812
0813
0814
0815
0816
0817
0818
0819
0820
0821
0822
0823
0824
0825
0826
0827
0828
0829
0830
0831
0832
0833
0834
0835
0836
0837
0838
0839
0840
0841
0842
0843
0844
0845 template <class BT> class BlockFrequencyInfoImpl : BlockFrequencyInfoImplBase {
0846
0847 friend struct bfi_detail::BlockEdgesAdder<BT>;
0848
0849 using BlockT = typename bfi_detail::TypeMap<BT>::BlockT;
0850 using BlockKeyT = typename bfi_detail::TypeMap<BT>::BlockKeyT;
0851 using FunctionT = typename bfi_detail::TypeMap<BT>::FunctionT;
0852 using BranchProbabilityInfoT =
0853 typename bfi_detail::TypeMap<BT>::BranchProbabilityInfoT;
0854 using LoopT = typename bfi_detail::TypeMap<BT>::LoopT;
0855 using LoopInfoT = typename bfi_detail::TypeMap<BT>::LoopInfoT;
0856 using Successor = GraphTraits<const BlockT *>;
0857 using Predecessor = GraphTraits<Inverse<const BlockT *>>;
0858 using BFICallbackVH =
0859 bfi_detail::BFICallbackVH<BlockT, BlockFrequencyInfoImpl>;
0860
0861 const BranchProbabilityInfoT *BPI = nullptr;
0862 const LoopInfoT *LI = nullptr;
0863 const FunctionT *F = nullptr;
0864
0865
0866 std::vector<const BlockT *> RPOT;
0867 DenseMap<BlockKeyT, std::pair<BlockNode, BFICallbackVH>> Nodes;
0868
0869 using rpot_iterator = typename std::vector<const BlockT *>::const_iterator;
0870
0871 rpot_iterator rpot_begin() const { return RPOT.begin(); }
0872 rpot_iterator rpot_end() const { return RPOT.end(); }
0873
0874 size_t getIndex(const rpot_iterator &I) const { return I - rpot_begin(); }
0875
0876 BlockNode getNode(const rpot_iterator &I) const {
0877 return BlockNode(getIndex(I));
0878 }
0879
0880 BlockNode getNode(const BlockT *BB) const { return Nodes.lookup(BB).first; }
0881
0882 const BlockT *getBlock(const BlockNode &Node) const {
0883 assert(Node.Index < RPOT.size());
0884 return RPOT[Node.Index];
0885 }
0886
0887
0888
0889
0890 void initializeRPOT();
0891
0892
0893
0894
0895
0896
0897
0898
0899 void initializeLoops();
0900
0901
0902
0903
0904
0905
0906
0907 bool propagateMassToSuccessors(LoopData *OuterLoop, const BlockNode &Node);
0908
0909
0910
0911
0912
0913
0914
0915
0916
0917 bool computeMassInLoop(LoopData &Loop);
0918
0919
0920
0921
0922
0923
0924
0925
0926
0927 bool tryToComputeMassInFunction();
0928
0929
0930
0931
0932
0933
0934
0935
0936
0937
0938
0939
0940
0941 void computeIrreducibleMass(LoopData *OuterLoop,
0942 std::list<LoopData>::iterator Insert);
0943
0944
0945
0946
0947
0948
0949
0950
0951
0952
0953 void computeMassInLoops();
0954
0955
0956
0957
0958
0959
0960
0961 void computeMassInFunction();
0962
0963 std::string getBlockName(const BlockNode &Node) const override {
0964 return bfi_detail::getBlockName(getBlock(Node));
0965 }
0966
0967
0968
0969
0970
0971
0972
0973
0974
0975
0976
0977 bool needIterativeInference() const;
0978
0979
0980 void applyIterativeInference();
0981
0982 using ProbMatrixType = std::vector<std::vector<std::pair<size_t, Scaled64>>>;
0983
0984
0985 void iterativeInference(const ProbMatrixType &ProbMatrix,
0986 std::vector<Scaled64> &Freq) const;
0987
0988
0989
0990 void findReachableBlocks(std::vector<const BlockT *> &Blocks) const;
0991
0992
0993
0994 void initTransitionProbabilities(
0995 const std::vector<const BlockT *> &Blocks,
0996 const DenseMap<const BlockT *, size_t> &BlockIndex,
0997 ProbMatrixType &ProbMatrix) const;
0998
0999 #ifndef NDEBUG
1000
1001
1002 Scaled64 discrepancy(const ProbMatrixType &ProbMatrix,
1003 const std::vector<Scaled64> &Freq) const;
1004 #endif
1005
1006 public:
1007 BlockFrequencyInfoImpl() = default;
1008
1009 const FunctionT *getFunction() const { return F; }
1010
1011 void calculate(const FunctionT &F, const BranchProbabilityInfoT &BPI,
1012 const LoopInfoT &LI);
1013
1014 using BlockFrequencyInfoImplBase::getEntryFreq;
1015
1016 BlockFrequency getBlockFreq(const BlockT *BB) const {
1017 return BlockFrequencyInfoImplBase::getBlockFreq(getNode(BB));
1018 }
1019
1020 std::optional<uint64_t>
1021 getBlockProfileCount(const Function &F, const BlockT *BB,
1022 bool AllowSynthetic = false) const {
1023 return BlockFrequencyInfoImplBase::getBlockProfileCount(F, getNode(BB),
1024 AllowSynthetic);
1025 }
1026
1027 std::optional<uint64_t>
1028 getProfileCountFromFreq(const Function &F, BlockFrequency Freq,
1029 bool AllowSynthetic = false) const {
1030 return BlockFrequencyInfoImplBase::getProfileCountFromFreq(F, Freq,
1031 AllowSynthetic);
1032 }
1033
1034 bool isIrrLoopHeader(const BlockT *BB) {
1035 return BlockFrequencyInfoImplBase::isIrrLoopHeader(getNode(BB));
1036 }
1037
1038 void setBlockFreq(const BlockT *BB, BlockFrequency Freq);
1039
1040 void forgetBlock(const BlockT *BB) {
1041
1042
1043
1044 Nodes.erase(BB);
1045 }
1046
1047 Scaled64 getFloatingBlockFreq(const BlockT *BB) const {
1048 return BlockFrequencyInfoImplBase::getFloatingBlockFreq(getNode(BB));
1049 }
1050
1051 const BranchProbabilityInfoT &getBPI() const { return *BPI; }
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064 raw_ostream &print(raw_ostream &OS) const override;
1065
1066 using BlockFrequencyInfoImplBase::dump;
1067
1068 void verifyMatch(BlockFrequencyInfoImpl<BT> &Other) const;
1069 };
1070
1071 namespace bfi_detail {
1072
1073 template <class BFIImplT>
1074 class BFICallbackVH<BasicBlock, BFIImplT> : public CallbackVH {
1075 BFIImplT *BFIImpl;
1076
1077 public:
1078 BFICallbackVH() = default;
1079
1080 BFICallbackVH(const BasicBlock *BB, BFIImplT *BFIImpl)
1081 : CallbackVH(BB), BFIImpl(BFIImpl) {}
1082
1083 virtual ~BFICallbackVH() = default;
1084
1085 void deleted() override {
1086 BFIImpl->forgetBlock(cast<BasicBlock>(getValPtr()));
1087 }
1088 };
1089
1090
1091
1092 template <class BFIImplT>
1093 class BFICallbackVH<MachineBasicBlock, BFIImplT> {
1094 public:
1095 BFICallbackVH() = default;
1096 BFICallbackVH(const MachineBasicBlock *, BFIImplT *) {}
1097 };
1098
1099 }
1100
1101 template <class BT>
1102 void BlockFrequencyInfoImpl<BT>::calculate(const FunctionT &F,
1103 const BranchProbabilityInfoT &BPI,
1104 const LoopInfoT &LI) {
1105
1106 this->BPI = &BPI;
1107 this->LI = &LI;
1108 this->F = &F;
1109
1110
1111 BlockFrequencyInfoImplBase::clear();
1112 RPOT.clear();
1113 Nodes.clear();
1114
1115
1116 LLVM_DEBUG(dbgs() << "\nblock-frequency: " << F.getName()
1117 << "\n================="
1118 << std::string(F.getName().size(), '=') << "\n");
1119 initializeRPOT();
1120 initializeLoops();
1121
1122
1123
1124 computeMassInLoops();
1125 computeMassInFunction();
1126 unwrapLoops();
1127
1128
1129 if (needIterativeInference())
1130 applyIterativeInference();
1131 finalizeMetrics();
1132
1133 if (CheckBFIUnknownBlockQueries) {
1134
1135
1136
1137 for (const BlockT &BB : F)
1138 if (!Nodes.count(&BB))
1139 setBlockFreq(&BB, BlockFrequency());
1140 }
1141 }
1142
1143 template <class BT>
1144 void BlockFrequencyInfoImpl<BT>::setBlockFreq(const BlockT *BB,
1145 BlockFrequency Freq) {
1146 if (Nodes.count(BB))
1147 BlockFrequencyInfoImplBase::setBlockFreq(getNode(BB), Freq);
1148 else {
1149
1150
1151
1152 BlockNode NewNode(Freqs.size());
1153 Nodes[BB] = {NewNode, BFICallbackVH(BB, this)};
1154 Freqs.emplace_back();
1155 BlockFrequencyInfoImplBase::setBlockFreq(NewNode, Freq);
1156 }
1157 }
1158
1159 template <class BT> void BlockFrequencyInfoImpl<BT>::initializeRPOT() {
1160 const BlockT *Entry = &F->front();
1161 RPOT.reserve(F->size());
1162 std::copy(po_begin(Entry), po_end(Entry), std::back_inserter(RPOT));
1163 std::reverse(RPOT.begin(), RPOT.end());
1164
1165 assert(RPOT.size() - 1 <= BlockNode::getMaxIndex() &&
1166 "More nodes in function than Block Frequency Info supports");
1167
1168 LLVM_DEBUG(dbgs() << "reverse-post-order-traversal\n");
1169 for (rpot_iterator I = rpot_begin(), E = rpot_end(); I != E; ++I) {
1170 BlockNode Node = getNode(I);
1171 LLVM_DEBUG(dbgs() << " - " << getIndex(I) << ": " << getBlockName(Node)
1172 << "\n");
1173 Nodes[*I] = {Node, BFICallbackVH(*I, this)};
1174 }
1175
1176 Working.reserve(RPOT.size());
1177 for (size_t Index = 0; Index < RPOT.size(); ++Index)
1178 Working.emplace_back(Index);
1179 Freqs.resize(RPOT.size());
1180 }
1181
1182 template <class BT> void BlockFrequencyInfoImpl<BT>::initializeLoops() {
1183 LLVM_DEBUG(dbgs() << "loop-detection\n");
1184 if (LI->empty())
1185 return;
1186
1187
1188 std::deque<std::pair<const LoopT *, LoopData *>> Q;
1189 for (const LoopT *L : *LI)
1190 Q.emplace_back(L, nullptr);
1191 while (!Q.empty()) {
1192 const LoopT *Loop = Q.front().first;
1193 LoopData *Parent = Q.front().second;
1194 Q.pop_front();
1195
1196 BlockNode Header = getNode(Loop->getHeader());
1197 assert(Header.isValid());
1198
1199 Loops.emplace_back(Parent, Header);
1200 Working[Header.Index].Loop = &Loops.back();
1201 LLVM_DEBUG(dbgs() << " - loop = " << getBlockName(Header) << "\n");
1202
1203 for (const LoopT *L : *Loop)
1204 Q.emplace_back(L, &Loops.back());
1205 }
1206
1207
1208
1209 for (size_t Index = 0; Index < RPOT.size(); ++Index) {
1210
1211 if (Working[Index].isLoopHeader()) {
1212 LoopData *ContainingLoop = Working[Index].getContainingLoop();
1213 if (ContainingLoop)
1214 ContainingLoop->Nodes.push_back(Index);
1215 continue;
1216 }
1217
1218 const LoopT *Loop = LI->getLoopFor(RPOT[Index]);
1219 if (!Loop)
1220 continue;
1221
1222
1223 BlockNode Header = getNode(Loop->getHeader());
1224 assert(Header.isValid());
1225 const auto &HeaderData = Working[Header.Index];
1226 assert(HeaderData.isLoopHeader());
1227
1228 Working[Index].Loop = HeaderData.Loop;
1229 HeaderData.Loop->Nodes.push_back(Index);
1230 LLVM_DEBUG(dbgs() << " - loop = " << getBlockName(Header)
1231 << ": member = " << getBlockName(Index) << "\n");
1232 }
1233 }
1234
1235 template <class BT> void BlockFrequencyInfoImpl<BT>::computeMassInLoops() {
1236
1237 for (auto L = Loops.rbegin(), E = Loops.rend(); L != E; ++L) {
1238 if (computeMassInLoop(*L))
1239 continue;
1240 auto Next = std::next(L);
1241 computeIrreducibleMass(&*L, L.base());
1242 L = std::prev(Next);
1243 if (computeMassInLoop(*L))
1244 continue;
1245 llvm_unreachable("unhandled irreducible control flow");
1246 }
1247 }
1248
1249 template <class BT>
1250 bool BlockFrequencyInfoImpl<BT>::computeMassInLoop(LoopData &Loop) {
1251
1252 LLVM_DEBUG(dbgs() << "compute-mass-in-loop: " << getLoopName(Loop) << "\n");
1253
1254 if (Loop.isIrreducible()) {
1255 LLVM_DEBUG(dbgs() << "isIrreducible = true\n");
1256 Distribution Dist;
1257 unsigned NumHeadersWithWeight = 0;
1258 std::optional<uint64_t> MinHeaderWeight;
1259 DenseSet<uint32_t> HeadersWithoutWeight;
1260 HeadersWithoutWeight.reserve(Loop.NumHeaders);
1261 for (uint32_t H = 0; H < Loop.NumHeaders; ++H) {
1262 auto &HeaderNode = Loop.Nodes[H];
1263 const BlockT *Block = getBlock(HeaderNode);
1264 IsIrrLoopHeader.set(Loop.Nodes[H].Index);
1265 std::optional<uint64_t> HeaderWeight = Block->getIrrLoopHeaderWeight();
1266 if (!HeaderWeight) {
1267 LLVM_DEBUG(dbgs() << "Missing irr loop header metadata on "
1268 << getBlockName(HeaderNode) << "\n");
1269 HeadersWithoutWeight.insert(H);
1270 continue;
1271 }
1272 LLVM_DEBUG(dbgs() << getBlockName(HeaderNode)
1273 << " has irr loop header weight " << *HeaderWeight
1274 << "\n");
1275 NumHeadersWithWeight++;
1276 uint64_t HeaderWeightValue = *HeaderWeight;
1277 if (!MinHeaderWeight || HeaderWeightValue < MinHeaderWeight)
1278 MinHeaderWeight = HeaderWeightValue;
1279 if (HeaderWeightValue) {
1280 Dist.addLocal(HeaderNode, HeaderWeightValue);
1281 }
1282 }
1283
1284
1285
1286
1287
1288
1289 if (!MinHeaderWeight)
1290 MinHeaderWeight = 1;
1291 for (uint32_t H : HeadersWithoutWeight) {
1292 auto &HeaderNode = Loop.Nodes[H];
1293 assert(!getBlock(HeaderNode)->getIrrLoopHeaderWeight() &&
1294 "Shouldn't have a weight metadata");
1295 uint64_t MinWeight = *MinHeaderWeight;
1296 LLVM_DEBUG(dbgs() << "Giving weight " << MinWeight << " to "
1297 << getBlockName(HeaderNode) << "\n");
1298 if (MinWeight)
1299 Dist.addLocal(HeaderNode, MinWeight);
1300 }
1301 distributeIrrLoopHeaderMass(Dist);
1302 for (const BlockNode &M : Loop.Nodes)
1303 if (!propagateMassToSuccessors(&Loop, M))
1304 llvm_unreachable("unhandled irreducible control flow");
1305 if (NumHeadersWithWeight == 0)
1306
1307 adjustLoopHeaderMass(Loop);
1308 } else {
1309 Working[Loop.getHeader().Index].getMass() = BlockMass::getFull();
1310 if (!propagateMassToSuccessors(&Loop, Loop.getHeader()))
1311 llvm_unreachable("irreducible control flow to loop header!?");
1312 for (const BlockNode &M : Loop.members())
1313 if (!propagateMassToSuccessors(&Loop, M))
1314
1315 return false;
1316 }
1317
1318 computeLoopScale(Loop);
1319 packageLoop(Loop);
1320 return true;
1321 }
1322
1323 template <class BT>
1324 bool BlockFrequencyInfoImpl<BT>::tryToComputeMassInFunction() {
1325
1326 LLVM_DEBUG(dbgs() << "compute-mass-in-function\n");
1327 assert(!Working.empty() && "no blocks in function");
1328 assert(!Working[0].isLoopHeader() && "entry block is a loop header");
1329
1330 Working[0].getMass() = BlockMass::getFull();
1331 for (rpot_iterator I = rpot_begin(), IE = rpot_end(); I != IE; ++I) {
1332
1333 BlockNode Node = getNode(I);
1334 if (Working[Node.Index].isPackaged())
1335 continue;
1336
1337 if (!propagateMassToSuccessors(nullptr, Node))
1338 return false;
1339 }
1340 return true;
1341 }
1342
1343 template <class BT> void BlockFrequencyInfoImpl<BT>::computeMassInFunction() {
1344 if (tryToComputeMassInFunction())
1345 return;
1346 computeIrreducibleMass(nullptr, Loops.begin());
1347 if (tryToComputeMassInFunction())
1348 return;
1349 llvm_unreachable("unhandled irreducible control flow");
1350 }
1351
1352 template <class BT>
1353 bool BlockFrequencyInfoImpl<BT>::needIterativeInference() const {
1354 if (!UseIterativeBFIInference)
1355 return false;
1356 if (!F->getFunction().hasProfileData())
1357 return false;
1358
1359
1360 for (auto L = Loops.rbegin(), E = Loops.rend(); L != E; ++L) {
1361 if (L->isIrreducible())
1362 return true;
1363 }
1364 return false;
1365 }
1366
1367 template <class BT> void BlockFrequencyInfoImpl<BT>::applyIterativeInference() {
1368
1369
1370
1371
1372 std::vector<const BlockT *> ReachableBlocks;
1373 findReachableBlocks(ReachableBlocks);
1374 if (ReachableBlocks.empty())
1375 return;
1376
1377
1378
1379 DenseMap<const BlockT *, size_t> BlockIndex;
1380
1381 auto Freq = std::vector<Scaled64>(ReachableBlocks.size());
1382 Scaled64 SumFreq;
1383 for (size_t I = 0; I < ReachableBlocks.size(); I++) {
1384 const BlockT *BB = ReachableBlocks[I];
1385 BlockIndex[BB] = I;
1386 Freq[I] = getFloatingBlockFreq(BB);
1387 SumFreq += Freq[I];
1388 }
1389 assert(!SumFreq.isZero() && "empty initial block frequencies");
1390
1391 LLVM_DEBUG(dbgs() << "Applying iterative inference for " << F->getName()
1392 << " with " << ReachableBlocks.size() << " blocks\n");
1393
1394
1395 for (auto &Value : Freq) {
1396 Value /= SumFreq;
1397 }
1398
1399
1400
1401 ProbMatrixType ProbMatrix;
1402 initTransitionProbabilities(ReachableBlocks, BlockIndex, ProbMatrix);
1403
1404
1405 iterativeInference(ProbMatrix, Freq);
1406
1407
1408 for (const BlockT &BB : *F) {
1409 auto Node = getNode(&BB);
1410 if (!Node.isValid())
1411 continue;
1412 if (auto It = BlockIndex.find(&BB); It != BlockIndex.end())
1413 Freqs[Node.Index].Scaled = Freq[It->second];
1414 else
1415 Freqs[Node.Index].Scaled = Scaled64::getZero();
1416 }
1417 }
1418
1419 template <class BT>
1420 void BlockFrequencyInfoImpl<BT>::iterativeInference(
1421 const ProbMatrixType &ProbMatrix, std::vector<Scaled64> &Freq) const {
1422 assert(0.0 < IterativeBFIPrecision && IterativeBFIPrecision < 1.0 &&
1423 "incorrectly specified precision");
1424
1425 const auto Precision =
1426 Scaled64::getInverse(static_cast<uint64_t>(1.0 / IterativeBFIPrecision));
1427 const size_t MaxIterations = IterativeBFIMaxIterationsPerBlock * Freq.size();
1428
1429 #ifndef NDEBUG
1430 LLVM_DEBUG(dbgs() << " Initial discrepancy = "
1431 << discrepancy(ProbMatrix, Freq).toString() << "\n");
1432 #endif
1433
1434
1435 auto Successors = std::vector<std::vector<size_t>>(Freq.size());
1436 for (size_t I = 0; I < Freq.size(); I++) {
1437 for (const auto &Jump : ProbMatrix[I]) {
1438 Successors[Jump.first].push_back(I);
1439 }
1440 }
1441
1442
1443
1444
1445
1446 auto IsActive = BitVector(Freq.size(), false);
1447 std::queue<size_t> ActiveSet;
1448 for (size_t I = 0; I < Freq.size(); I++) {
1449 if (Freq[I] > 0) {
1450 ActiveSet.push(I);
1451 IsActive[I] = true;
1452 }
1453 }
1454
1455
1456 size_t It = 0;
1457 while (It++ < MaxIterations && !ActiveSet.empty()) {
1458 size_t I = ActiveSet.front();
1459 ActiveSet.pop();
1460 IsActive[I] = false;
1461
1462
1463
1464
1465 Scaled64 NewFreq;
1466 Scaled64 OneMinusSelfProb = Scaled64::getOne();
1467 for (const auto &Jump : ProbMatrix[I]) {
1468 if (Jump.first == I) {
1469 OneMinusSelfProb -= Jump.second;
1470 } else {
1471 NewFreq += Freq[Jump.first] * Jump.second;
1472 }
1473 }
1474 if (OneMinusSelfProb != Scaled64::getOne())
1475 NewFreq /= OneMinusSelfProb;
1476
1477
1478
1479 auto Change = Freq[I] >= NewFreq ? Freq[I] - NewFreq : NewFreq - Freq[I];
1480 if (Change > Precision) {
1481 ActiveSet.push(I);
1482 IsActive[I] = true;
1483 for (size_t Succ : Successors[I]) {
1484 if (!IsActive[Succ]) {
1485 ActiveSet.push(Succ);
1486 IsActive[Succ] = true;
1487 }
1488 }
1489 }
1490
1491
1492 Freq[I] = NewFreq;
1493 }
1494
1495 LLVM_DEBUG(dbgs() << " Completed " << It << " inference iterations"
1496 << format(" (%0.0f per block)", double(It) / Freq.size())
1497 << "\n");
1498 #ifndef NDEBUG
1499 LLVM_DEBUG(dbgs() << " Final discrepancy = "
1500 << discrepancy(ProbMatrix, Freq).toString() << "\n");
1501 #endif
1502 }
1503
1504 template <class BT>
1505 void BlockFrequencyInfoImpl<BT>::findReachableBlocks(
1506 std::vector<const BlockT *> &Blocks) const {
1507
1508
1509 std::queue<const BlockT *> Queue;
1510 SmallPtrSet<const BlockT *, 8> Reachable;
1511 const BlockT *Entry = &F->front();
1512 Queue.push(Entry);
1513 Reachable.insert(Entry);
1514 while (!Queue.empty()) {
1515 const BlockT *SrcBB = Queue.front();
1516 Queue.pop();
1517 for (const BlockT *DstBB : children<const BlockT *>(SrcBB)) {
1518 auto EP = BPI->getEdgeProbability(SrcBB, DstBB);
1519 if (EP.isZero())
1520 continue;
1521 if (Reachable.insert(DstBB).second)
1522 Queue.push(DstBB);
1523 }
1524 }
1525
1526
1527
1528 SmallPtrSet<const BlockT *, 8> InverseReachable;
1529 for (const BlockT &BB : *F) {
1530
1531 bool HasSucc = !llvm::children<const BlockT *>(&BB).empty();
1532 if (!HasSucc && Reachable.count(&BB)) {
1533 Queue.push(&BB);
1534 InverseReachable.insert(&BB);
1535 }
1536 }
1537 while (!Queue.empty()) {
1538 const BlockT *SrcBB = Queue.front();
1539 Queue.pop();
1540 for (const BlockT *DstBB : inverse_children<const BlockT *>(SrcBB)) {
1541 auto EP = BPI->getEdgeProbability(DstBB, SrcBB);
1542 if (EP.isZero())
1543 continue;
1544 if (InverseReachable.insert(DstBB).second)
1545 Queue.push(DstBB);
1546 }
1547 }
1548
1549
1550 Blocks.reserve(F->size());
1551 for (const BlockT &BB : *F) {
1552 if (Reachable.count(&BB) && InverseReachable.count(&BB)) {
1553 Blocks.push_back(&BB);
1554 }
1555 }
1556 }
1557
1558 template <class BT>
1559 void BlockFrequencyInfoImpl<BT>::initTransitionProbabilities(
1560 const std::vector<const BlockT *> &Blocks,
1561 const DenseMap<const BlockT *, size_t> &BlockIndex,
1562 ProbMatrixType &ProbMatrix) const {
1563 const size_t NumBlocks = Blocks.size();
1564 auto Succs = std::vector<std::vector<std::pair<size_t, Scaled64>>>(NumBlocks);
1565 auto SumProb = std::vector<Scaled64>(NumBlocks);
1566
1567
1568 for (size_t Src = 0; Src < NumBlocks; Src++) {
1569 const BlockT *BB = Blocks[Src];
1570 SmallPtrSet<const BlockT *, 2> UniqueSuccs;
1571 for (const auto SI : children<const BlockT *>(BB)) {
1572
1573 if (!BlockIndex.contains(SI))
1574 continue;
1575
1576 if (!UniqueSuccs.insert(SI).second)
1577 continue;
1578
1579 auto EP = BPI->getEdgeProbability(BB, SI);
1580 if (EP.isZero())
1581 continue;
1582
1583 auto EdgeProb =
1584 Scaled64::getFraction(EP.getNumerator(), EP.getDenominator());
1585 size_t Dst = BlockIndex.find(SI)->second;
1586 Succs[Src].push_back(std::make_pair(Dst, EdgeProb));
1587 SumProb[Src] += EdgeProb;
1588 }
1589 }
1590
1591
1592 ProbMatrix = ProbMatrixType(NumBlocks);
1593 for (size_t Src = 0; Src < NumBlocks; Src++) {
1594
1595 if (Succs[Src].empty())
1596 continue;
1597
1598 assert(!SumProb[Src].isZero() && "Zero sum probability of non-exit block");
1599 for (auto &Jump : Succs[Src]) {
1600 size_t Dst = Jump.first;
1601 Scaled64 Prob = Jump.second;
1602 ProbMatrix[Dst].push_back(std::make_pair(Src, Prob / SumProb[Src]));
1603 }
1604 }
1605
1606
1607 size_t EntryIdx = BlockIndex.find(&F->front())->second;
1608 for (size_t Src = 0; Src < NumBlocks; Src++) {
1609 if (Succs[Src].empty()) {
1610 ProbMatrix[EntryIdx].push_back(std::make_pair(Src, Scaled64::getOne()));
1611 }
1612 }
1613 }
1614
1615 #ifndef NDEBUG
1616 template <class BT>
1617 BlockFrequencyInfoImplBase::Scaled64 BlockFrequencyInfoImpl<BT>::discrepancy(
1618 const ProbMatrixType &ProbMatrix, const std::vector<Scaled64> &Freq) const {
1619 assert(Freq[0] > 0 && "Incorrectly computed frequency of the entry block");
1620 Scaled64 Discrepancy;
1621 for (size_t I = 0; I < ProbMatrix.size(); I++) {
1622 Scaled64 Sum;
1623 for (const auto &Jump : ProbMatrix[I]) {
1624 Sum += Freq[Jump.first] * Jump.second;
1625 }
1626 Discrepancy += Freq[I] >= Sum ? Freq[I] - Sum : Sum - Freq[I];
1627 }
1628
1629 return Discrepancy / Freq[0];
1630 }
1631 #endif
1632
1633
1634 namespace bfi_detail {
1635
1636 template <class BT> struct BlockEdgesAdder {
1637 using BlockT = BT;
1638 using LoopData = BlockFrequencyInfoImplBase::LoopData;
1639 using Successor = GraphTraits<const BlockT *>;
1640
1641 const BlockFrequencyInfoImpl<BT> &BFI;
1642
1643 explicit BlockEdgesAdder(const BlockFrequencyInfoImpl<BT> &BFI)
1644 : BFI(BFI) {}
1645
1646 void operator()(IrreducibleGraph &G, IrreducibleGraph::IrrNode &Irr,
1647 const LoopData *OuterLoop) {
1648 const BlockT *BB = BFI.RPOT[Irr.Node.Index];
1649 for (const auto *Succ : children<const BlockT *>(BB))
1650 G.addEdge(Irr, BFI.getNode(Succ), OuterLoop);
1651 }
1652 };
1653
1654 }
1655
1656 template <class BT>
1657 void BlockFrequencyInfoImpl<BT>::computeIrreducibleMass(
1658 LoopData *OuterLoop, std::list<LoopData>::iterator Insert) {
1659 LLVM_DEBUG(dbgs() << "analyze-irreducible-in-";
1660 if (OuterLoop) dbgs()
1661 << "loop: " << getLoopName(*OuterLoop) << "\n";
1662 else dbgs() << "function\n");
1663
1664 using namespace bfi_detail;
1665
1666
1667
1668 BlockEdgesAdder<BT> addBlockEdges(*this);
1669 IrreducibleGraph G(*this, OuterLoop, addBlockEdges);
1670
1671 for (auto &L : analyzeIrreducible(G, OuterLoop, Insert))
1672 computeMassInLoop(L);
1673
1674 if (!OuterLoop)
1675 return;
1676 updateLoopWithIrreducible(*OuterLoop);
1677 }
1678
1679
1680 inline uint32_t getWeightFromBranchProb(const BranchProbability Prob) {
1681 return Prob.getNumerator();
1682 }
1683
1684 template <class BT>
1685 bool
1686 BlockFrequencyInfoImpl<BT>::propagateMassToSuccessors(LoopData *OuterLoop,
1687 const BlockNode &Node) {
1688 LLVM_DEBUG(dbgs() << " - node: " << getBlockName(Node) << "\n");
1689
1690 Distribution Dist;
1691 if (auto *Loop = Working[Node.Index].getPackagedLoop()) {
1692 assert(Loop != OuterLoop && "Cannot propagate mass in a packaged loop");
1693 if (!addLoopSuccessorsToDist(OuterLoop, *Loop, Dist))
1694
1695 return false;
1696 } else {
1697 const BlockT *BB = getBlock(Node);
1698 for (auto SI = GraphTraits<const BlockT *>::child_begin(BB),
1699 SE = GraphTraits<const BlockT *>::child_end(BB);
1700 SI != SE; ++SI)
1701 if (!addToDist(
1702 Dist, OuterLoop, Node, getNode(*SI),
1703 getWeightFromBranchProb(BPI->getEdgeProbability(BB, SI))))
1704
1705 return false;
1706 }
1707
1708
1709
1710 distributeMass(Node, OuterLoop, Dist);
1711 return true;
1712 }
1713
1714 template <class BT>
1715 raw_ostream &BlockFrequencyInfoImpl<BT>::print(raw_ostream &OS) const {
1716 if (!F)
1717 return OS;
1718 OS << "block-frequency-info: " << F->getName() << "\n";
1719 for (const BlockT &BB : *F) {
1720 OS << " - " << bfi_detail::getBlockName(&BB) << ": float = ";
1721 getFloatingBlockFreq(&BB).print(OS, 5)
1722 << ", int = " << getBlockFreq(&BB).getFrequency();
1723 if (std::optional<uint64_t> ProfileCount =
1724 BlockFrequencyInfoImplBase::getBlockProfileCount(
1725 F->getFunction(), getNode(&BB)))
1726 OS << ", count = " << *ProfileCount;
1727 if (std::optional<uint64_t> IrrLoopHeaderWeight =
1728 BB.getIrrLoopHeaderWeight())
1729 OS << ", irr_loop_header_weight = " << *IrrLoopHeaderWeight;
1730 OS << "\n";
1731 }
1732
1733
1734 OS << "\n";
1735 return OS;
1736 }
1737
1738 template <class BT>
1739 void BlockFrequencyInfoImpl<BT>::verifyMatch(
1740 BlockFrequencyInfoImpl<BT> &Other) const {
1741 bool Match = true;
1742 DenseMap<const BlockT *, BlockNode> ValidNodes;
1743 DenseMap<const BlockT *, BlockNode> OtherValidNodes;
1744 for (auto &Entry : Nodes) {
1745 const BlockT *BB = Entry.first;
1746 if (BB) {
1747 ValidNodes[BB] = Entry.second.first;
1748 }
1749 }
1750 for (auto &Entry : Other.Nodes) {
1751 const BlockT *BB = Entry.first;
1752 if (BB) {
1753 OtherValidNodes[BB] = Entry.second.first;
1754 }
1755 }
1756 unsigned NumValidNodes = ValidNodes.size();
1757 unsigned NumOtherValidNodes = OtherValidNodes.size();
1758 if (NumValidNodes != NumOtherValidNodes) {
1759 Match = false;
1760 dbgs() << "Number of blocks mismatch: " << NumValidNodes << " vs "
1761 << NumOtherValidNodes << "\n";
1762 } else {
1763 for (auto &Entry : ValidNodes) {
1764 const BlockT *BB = Entry.first;
1765 BlockNode Node = Entry.second;
1766 if (auto It = OtherValidNodes.find(BB); It != OtherValidNodes.end()) {
1767 BlockNode OtherNode = It->second;
1768 const auto &Freq = Freqs[Node.Index];
1769 const auto &OtherFreq = Other.Freqs[OtherNode.Index];
1770 if (Freq.Integer != OtherFreq.Integer) {
1771 Match = false;
1772 dbgs() << "Freq mismatch: " << bfi_detail::getBlockName(BB) << " "
1773 << Freq.Integer << " vs " << OtherFreq.Integer << "\n";
1774 }
1775 } else {
1776 Match = false;
1777 dbgs() << "Block " << bfi_detail::getBlockName(BB) << " index "
1778 << Node.Index << " does not exist in Other.\n";
1779 }
1780 }
1781
1782
1783 }
1784 if (!Match) {
1785 dbgs() << "This\n";
1786 print(dbgs());
1787 dbgs() << "Other\n";
1788 Other.print(dbgs());
1789 }
1790 assert(Match && "BFI mismatch");
1791 }
1792
1793
1794
1795
1796 enum GVDAGType { GVDT_None, GVDT_Fraction, GVDT_Integer, GVDT_Count };
1797
1798 template <class BlockFrequencyInfoT, class BranchProbabilityInfoT>
1799 struct BFIDOTGraphTraitsBase : public DefaultDOTGraphTraits {
1800 using GTraits = GraphTraits<BlockFrequencyInfoT *>;
1801 using NodeRef = typename GTraits::NodeRef;
1802 using EdgeIter = typename GTraits::ChildIteratorType;
1803 using NodeIter = typename GTraits::nodes_iterator;
1804
1805 uint64_t MaxFrequency = 0;
1806
1807 explicit BFIDOTGraphTraitsBase(bool isSimple = false)
1808 : DefaultDOTGraphTraits(isSimple) {}
1809
1810 static StringRef getGraphName(const BlockFrequencyInfoT *G) {
1811 return G->getFunction()->getName();
1812 }
1813
1814 std::string getNodeAttributes(NodeRef Node, const BlockFrequencyInfoT *Graph,
1815 unsigned HotPercentThreshold = 0) {
1816 std::string Result;
1817 if (!HotPercentThreshold)
1818 return Result;
1819
1820
1821 if (!MaxFrequency) {
1822 for (NodeIter I = GTraits::nodes_begin(Graph),
1823 E = GTraits::nodes_end(Graph);
1824 I != E; ++I) {
1825 NodeRef N = *I;
1826 MaxFrequency =
1827 std::max(MaxFrequency, Graph->getBlockFreq(N).getFrequency());
1828 }
1829 }
1830 BlockFrequency Freq = Graph->getBlockFreq(Node);
1831 BlockFrequency HotFreq =
1832 (BlockFrequency(MaxFrequency) *
1833 BranchProbability::getBranchProbability(HotPercentThreshold, 100));
1834
1835 if (Freq < HotFreq)
1836 return Result;
1837
1838 raw_string_ostream OS(Result);
1839 OS << "color=\"red\"";
1840 OS.flush();
1841 return Result;
1842 }
1843
1844 std::string getNodeLabel(NodeRef Node, const BlockFrequencyInfoT *Graph,
1845 GVDAGType GType, int layout_order = -1) {
1846 std::string Result;
1847 raw_string_ostream OS(Result);
1848
1849 if (layout_order != -1)
1850 OS << Node->getName() << "[" << layout_order << "] : ";
1851 else
1852 OS << Node->getName() << " : ";
1853 switch (GType) {
1854 case GVDT_Fraction:
1855 OS << printBlockFreq(*Graph, *Node);
1856 break;
1857 case GVDT_Integer:
1858 OS << Graph->getBlockFreq(Node).getFrequency();
1859 break;
1860 case GVDT_Count: {
1861 auto Count = Graph->getBlockProfileCount(Node);
1862 if (Count)
1863 OS << *Count;
1864 else
1865 OS << "Unknown";
1866 break;
1867 }
1868 case GVDT_None:
1869 llvm_unreachable("If we are not supposed to render a graph we should "
1870 "never reach this point.");
1871 }
1872 return Result;
1873 }
1874
1875 std::string getEdgeAttributes(NodeRef Node, EdgeIter EI,
1876 const BlockFrequencyInfoT *BFI,
1877 const BranchProbabilityInfoT *BPI,
1878 unsigned HotPercentThreshold = 0) {
1879 std::string Str;
1880 if (!BPI)
1881 return Str;
1882
1883 BranchProbability BP = BPI->getEdgeProbability(Node, EI);
1884 uint32_t N = BP.getNumerator();
1885 uint32_t D = BP.getDenominator();
1886 double Percent = 100.0 * N / D;
1887 raw_string_ostream OS(Str);
1888 OS << format("label=\"%.1f%%\"", Percent);
1889
1890 if (HotPercentThreshold) {
1891 BlockFrequency EFreq = BFI->getBlockFreq(Node) * BP;
1892 BlockFrequency HotFreq = BlockFrequency(MaxFrequency) *
1893 BranchProbability(HotPercentThreshold, 100);
1894
1895 if (EFreq >= HotFreq) {
1896 OS << ",color=\"red\"";
1897 }
1898 }
1899
1900 OS.flush();
1901 return Str;
1902 }
1903 };
1904
1905 }
1906
1907 #undef DEBUG_TYPE
1908
1909 #endif