Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-05-10 08:43:32

0001 //===- MachinePipeliner.h - Machine Software Pipeliner Pass -------------===//
0002 //
0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
0004 // See https://llvm.org/LICENSE.txt for license information.
0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0006 //
0007 //===----------------------------------------------------------------------===//
0008 //
0009 // An implementation of the Swing Modulo Scheduling (SMS) software pipeliner.
0010 //
0011 // Software pipelining (SWP) is an instruction scheduling technique for loops
0012 // that overlap loop iterations and exploits ILP via a compiler transformation.
0013 //
0014 // Swing Modulo Scheduling is an implementation of software pipelining
0015 // that generates schedules that are near optimal in terms of initiation
0016 // interval, register requirements, and stage count. See the papers:
0017 //
0018 // "Swing Modulo Scheduling: A Lifetime-Sensitive Approach", by J. Llosa,
0019 // A. Gonzalez, E. Ayguade, and M. Valero. In PACT '96 Proceedings of the 1996
0020 // Conference on Parallel Architectures and Compilation Techiniques.
0021 //
0022 // "Lifetime-Sensitive Modulo Scheduling in a Production Environment", by J.
0023 // Llosa, E. Ayguade, A. Gonzalez, M. Valero, and J. Eckhardt. In IEEE
0024 // Transactions on Computers, Vol. 50, No. 3, 2001.
0025 //
0026 // "An Implementation of Swing Modulo Scheduling With Extensions for
0027 // Superblocks", by T. Lattner, Master's Thesis, University of Illinois at
0028 // Urbana-Champaign, 2005.
0029 //
0030 //
0031 // The SMS algorithm consists of three main steps after computing the minimal
0032 // initiation interval (MII).
0033 // 1) Analyze the dependence graph and compute information about each
0034 //    instruction in the graph.
0035 // 2) Order the nodes (instructions) by priority based upon the heuristics
0036 //    described in the algorithm.
0037 // 3) Attempt to schedule the nodes in the specified order using the MII.
0038 //
0039 //===----------------------------------------------------------------------===//
0040 #ifndef LLVM_CODEGEN_MACHINEPIPELINER_H
0041 #define LLVM_CODEGEN_MACHINEPIPELINER_H
0042 
0043 #include "llvm/ADT/STLExtras.h"
0044 #include "llvm/ADT/SetVector.h"
0045 #include "llvm/CodeGen/DFAPacketizer.h"
0046 #include "llvm/CodeGen/MachineDominators.h"
0047 #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
0048 #include "llvm/CodeGen/MachineScheduler.h"
0049 #include "llvm/CodeGen/RegisterClassInfo.h"
0050 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
0051 #include "llvm/CodeGen/ScheduleDAGMutation.h"
0052 #include "llvm/CodeGen/TargetInstrInfo.h"
0053 #include "llvm/CodeGen/WindowScheduler.h"
0054 #include "llvm/InitializePasses.h"
0055 
0056 #include <deque>
0057 
0058 namespace llvm {
0059 
0060 class AAResults;
0061 class NodeSet;
0062 class SMSchedule;
0063 
0064 extern cl::opt<bool> SwpEnableCopyToPhi;
0065 extern cl::opt<int> SwpForceIssueWidth;
0066 
0067 /// The main class in the implementation of the target independent
0068 /// software pipeliner pass.
0069 class MachinePipeliner : public MachineFunctionPass {
0070 public:
0071   MachineFunction *MF = nullptr;
0072   MachineOptimizationRemarkEmitter *ORE = nullptr;
0073   const MachineLoopInfo *MLI = nullptr;
0074   const MachineDominatorTree *MDT = nullptr;
0075   const InstrItineraryData *InstrItins = nullptr;
0076   const TargetInstrInfo *TII = nullptr;
0077   RegisterClassInfo RegClassInfo;
0078   bool disabledByPragma = false;
0079   unsigned II_setByPragma = 0;
0080 
0081 #ifndef NDEBUG
0082   static int NumTries;
0083 #endif
0084 
0085   /// Cache the target analysis information about the loop.
0086   struct LoopInfo {
0087     MachineBasicBlock *TBB = nullptr;
0088     MachineBasicBlock *FBB = nullptr;
0089     SmallVector<MachineOperand, 4> BrCond;
0090     MachineInstr *LoopInductionVar = nullptr;
0091     MachineInstr *LoopCompare = nullptr;
0092     std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> LoopPipelinerInfo =
0093         nullptr;
0094   };
0095   LoopInfo LI;
0096 
0097   static char ID;
0098 
0099   MachinePipeliner() : MachineFunctionPass(ID) {
0100     initializeMachinePipelinerPass(*PassRegistry::getPassRegistry());
0101   }
0102 
0103   bool runOnMachineFunction(MachineFunction &MF) override;
0104 
0105   void getAnalysisUsage(AnalysisUsage &AU) const override;
0106 
0107 private:
0108   void preprocessPhiNodes(MachineBasicBlock &B);
0109   bool canPipelineLoop(MachineLoop &L);
0110   bool scheduleLoop(MachineLoop &L);
0111   bool swingModuloScheduler(MachineLoop &L);
0112   void setPragmaPipelineOptions(MachineLoop &L);
0113   bool runWindowScheduler(MachineLoop &L);
0114   bool useSwingModuloScheduler();
0115   bool useWindowScheduler(bool Changed);
0116 };
0117 
0118 /// Represents a dependence between two instruction.
0119 class SwingSchedulerDDGEdge {
0120   SUnit *Dst = nullptr;
0121   SDep Pred;
0122   unsigned Distance = 0;
0123 
0124 public:
0125   /// Creates an edge corresponding to an edge represented by \p PredOrSucc and
0126   /// \p Dep in the original DAG. This pair has no information about the
0127   /// direction of the edge, so we need to pass an additional argument \p
0128   /// IsSucc.
0129   SwingSchedulerDDGEdge(SUnit *PredOrSucc, const SDep &Dep, bool IsSucc)
0130       : Dst(PredOrSucc), Pred(Dep), Distance(0u) {
0131     SUnit *Src = Dep.getSUnit();
0132 
0133     if (IsSucc) {
0134       std::swap(Src, Dst);
0135       Pred.setSUnit(Src);
0136     }
0137 
0138     // An anti-dependence to PHI means loop-carried dependence.
0139     if (Pred.getKind() == SDep::Anti && Src->getInstr()->isPHI()) {
0140       Distance = 1;
0141       std::swap(Src, Dst);
0142       auto Reg = Pred.getReg();
0143       Pred = SDep(Src, SDep::Kind::Data, Reg);
0144     }
0145   }
0146 
0147   /// Returns the SUnit from which the edge comes (source node).
0148   SUnit *getSrc() const { return Pred.getSUnit(); }
0149 
0150   /// Returns the SUnit to which the edge points (destination node).
0151   SUnit *getDst() const { return Dst; }
0152 
0153   /// Returns the latency value for the edge.
0154   unsigned getLatency() const { return Pred.getLatency(); }
0155 
0156   /// Sets the latency for the edge.
0157   void setLatency(unsigned Latency) { Pred.setLatency(Latency); }
0158 
0159   /// Returns the distance value for the edge.
0160   unsigned getDistance() const { return Distance; }
0161 
0162   /// Sets the distance value for the edge.
0163   void setDistance(unsigned D) { Distance = D; }
0164 
0165   /// Returns the register associated with the edge.
0166   Register getReg() const { return Pred.getReg(); }
0167 
0168   /// Returns true if the edge represents anti dependence.
0169   bool isAntiDep() const { return Pred.getKind() == SDep::Kind::Anti; }
0170 
0171   /// Returns true if the edge represents output dependence.
0172   bool isOutputDep() const { return Pred.getKind() == SDep::Kind::Output; }
0173 
0174   /// Returns true if the edge represents a dependence that is not data, anti or
0175   /// output dependence.
0176   bool isOrderDep() const { return Pred.getKind() == SDep::Kind::Order; }
0177 
0178   /// Returns true if the edge represents unknown scheduling barrier.
0179   bool isBarrier() const { return Pred.isBarrier(); }
0180 
0181   /// Returns true if the edge represents an artificial dependence.
0182   bool isArtificial() const { return Pred.isArtificial(); }
0183 
0184   /// Tests if this is a Data dependence that is associated with a register.
0185   bool isAssignedRegDep() const { return Pred.isAssignedRegDep(); }
0186 
0187   /// Returns true for DDG nodes that we ignore when computing the cost
0188   /// functions. We ignore the back-edge recurrence in order to avoid unbounded
0189   /// recursion in the calculation of the ASAP, ALAP, etc functions.
0190   bool ignoreDependence(bool IgnoreAnti) const;
0191 };
0192 
0193 /// Represents dependencies between instructions. This class is a wrapper of
0194 /// `SUnits` and its dependencies to manipulate back-edges in a natural way.
0195 /// Currently it only supports back-edges via PHI, which are expressed as
0196 /// anti-dependencies in the original DAG.
0197 /// FIXME: Support any other loop-carried dependencies
0198 class SwingSchedulerDDG {
0199   using EdgesType = SmallVector<SwingSchedulerDDGEdge, 4>;
0200 
0201   struct SwingSchedulerDDGEdges {
0202     EdgesType Preds;
0203     EdgesType Succs;
0204   };
0205 
0206   void initEdges(SUnit *SU);
0207 
0208   SUnit *EntrySU;
0209   SUnit *ExitSU;
0210 
0211   std::vector<SwingSchedulerDDGEdges> EdgesVec;
0212   SwingSchedulerDDGEdges EntrySUEdges;
0213   SwingSchedulerDDGEdges ExitSUEdges;
0214 
0215   void addEdge(const SUnit *SU, const SwingSchedulerDDGEdge &Edge);
0216 
0217   SwingSchedulerDDGEdges &getEdges(const SUnit *SU);
0218   const SwingSchedulerDDGEdges &getEdges(const SUnit *SU) const;
0219 
0220 public:
0221   SwingSchedulerDDG(std::vector<SUnit> &SUnits, SUnit *EntrySU, SUnit *ExitSU);
0222 
0223   const EdgesType &getInEdges(const SUnit *SU) const;
0224 
0225   const EdgesType &getOutEdges(const SUnit *SU) const;
0226 };
0227 
0228 /// This class builds the dependence graph for the instructions in a loop,
0229 /// and attempts to schedule the instructions using the SMS algorithm.
0230 class SwingSchedulerDAG : public ScheduleDAGInstrs {
0231   MachinePipeliner &Pass;
0232 
0233   std::unique_ptr<SwingSchedulerDDG> DDG;
0234 
0235   /// The minimum initiation interval between iterations for this schedule.
0236   unsigned MII = 0;
0237   /// The maximum initiation interval between iterations for this schedule.
0238   unsigned MAX_II = 0;
0239   /// Set to true if a valid pipelined schedule is found for the loop.
0240   bool Scheduled = false;
0241   MachineLoop &Loop;
0242   LiveIntervals &LIS;
0243   const RegisterClassInfo &RegClassInfo;
0244   unsigned II_setByPragma = 0;
0245   TargetInstrInfo::PipelinerLoopInfo *LoopPipelinerInfo = nullptr;
0246 
0247   /// A topological ordering of the SUnits, which is needed for changing
0248   /// dependences and iterating over the SUnits.
0249   ScheduleDAGTopologicalSort Topo;
0250 
0251   struct NodeInfo {
0252     int ASAP = 0;
0253     int ALAP = 0;
0254     int ZeroLatencyDepth = 0;
0255     int ZeroLatencyHeight = 0;
0256 
0257     NodeInfo() = default;
0258   };
0259   /// Computed properties for each node in the graph.
0260   std::vector<NodeInfo> ScheduleInfo;
0261 
0262   enum OrderKind { BottomUp = 0, TopDown = 1 };
0263   /// Computed node ordering for scheduling.
0264   SetVector<SUnit *> NodeOrder;
0265 
0266   using NodeSetType = SmallVector<NodeSet, 8>;
0267   using ValueMapTy = DenseMap<unsigned, unsigned>;
0268   using MBBVectorTy = SmallVectorImpl<MachineBasicBlock *>;
0269   using InstrMapTy = DenseMap<MachineInstr *, MachineInstr *>;
0270 
0271   /// Instructions to change when emitting the final schedule.
0272   DenseMap<SUnit *, std::pair<unsigned, int64_t>> InstrChanges;
0273 
0274   /// We may create a new instruction, so remember it because it
0275   /// must be deleted when the pass is finished.
0276   DenseMap<MachineInstr*, MachineInstr *> NewMIs;
0277 
0278   /// Ordered list of DAG postprocessing steps.
0279   std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
0280 
0281   /// Helper class to implement Johnson's circuit finding algorithm.
0282   class Circuits {
0283     std::vector<SUnit> &SUnits;
0284     SetVector<SUnit *> Stack;
0285     BitVector Blocked;
0286     SmallVector<SmallPtrSet<SUnit *, 4>, 10> B;
0287     SmallVector<SmallVector<int, 4>, 16> AdjK;
0288     // Node to Index from ScheduleDAGTopologicalSort
0289     std::vector<int> *Node2Idx;
0290     unsigned NumPaths = 0u;
0291     static unsigned MaxPaths;
0292 
0293   public:
0294     Circuits(std::vector<SUnit> &SUs, ScheduleDAGTopologicalSort &Topo)
0295         : SUnits(SUs), Blocked(SUs.size()), B(SUs.size()), AdjK(SUs.size()) {
0296       Node2Idx = new std::vector<int>(SUs.size());
0297       unsigned Idx = 0;
0298       for (const auto &NodeNum : Topo)
0299         Node2Idx->at(NodeNum) = Idx++;
0300     }
0301     Circuits &operator=(const Circuits &other) = delete;
0302     Circuits(const Circuits &other) = delete;
0303     ~Circuits() { delete Node2Idx; }
0304 
0305     /// Reset the data structures used in the circuit algorithm.
0306     void reset() {
0307       Stack.clear();
0308       Blocked.reset();
0309       B.assign(SUnits.size(), SmallPtrSet<SUnit *, 4>());
0310       NumPaths = 0;
0311     }
0312 
0313     void createAdjacencyStructure(SwingSchedulerDAG *DAG);
0314     bool circuit(int V, int S, NodeSetType &NodeSets,
0315                  const SwingSchedulerDAG *DAG, bool HasBackedge = false);
0316     void unblock(int U);
0317   };
0318 
0319   struct CopyToPhiMutation : public ScheduleDAGMutation {
0320     void apply(ScheduleDAGInstrs *DAG) override;
0321   };
0322 
0323 public:
0324   SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis,
0325                     const RegisterClassInfo &rci, unsigned II,
0326                     TargetInstrInfo::PipelinerLoopInfo *PLI)
0327       : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), Loop(L), LIS(lis),
0328         RegClassInfo(rci), II_setByPragma(II), LoopPipelinerInfo(PLI),
0329         Topo(SUnits, &ExitSU) {
0330     P.MF->getSubtarget().getSMSMutations(Mutations);
0331     if (SwpEnableCopyToPhi)
0332       Mutations.push_back(std::make_unique<CopyToPhiMutation>());
0333   }
0334 
0335   void schedule() override;
0336   void finishBlock() override;
0337 
0338   /// Return true if the loop kernel has been scheduled.
0339   bool hasNewSchedule() { return Scheduled; }
0340 
0341   /// Return the earliest time an instruction may be scheduled.
0342   int getASAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ASAP; }
0343 
0344   /// Return the latest time an instruction my be scheduled.
0345   int getALAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ALAP; }
0346 
0347   /// The mobility function, which the number of slots in which
0348   /// an instruction may be scheduled.
0349   int getMOV(SUnit *Node) { return getALAP(Node) - getASAP(Node); }
0350 
0351   /// The depth, in the dependence graph, for a node.
0352   unsigned getDepth(SUnit *Node) { return Node->getDepth(); }
0353 
0354   /// The maximum unweighted length of a path from an arbitrary node to the
0355   /// given node in which each edge has latency 0
0356   int getZeroLatencyDepth(SUnit *Node) {
0357     return ScheduleInfo[Node->NodeNum].ZeroLatencyDepth;
0358   }
0359 
0360   /// The height, in the dependence graph, for a node.
0361   unsigned getHeight(SUnit *Node) { return Node->getHeight(); }
0362 
0363   /// The maximum unweighted length of a path from the given node to an
0364   /// arbitrary node in which each edge has latency 0
0365   int getZeroLatencyHeight(SUnit *Node) {
0366     return ScheduleInfo[Node->NodeNum].ZeroLatencyHeight;
0367   }
0368 
0369   bool isLoopCarriedDep(const SwingSchedulerDDGEdge &Edge) const;
0370 
0371   void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule);
0372 
0373   void fixupRegisterOverlaps(std::deque<SUnit *> &Instrs);
0374 
0375   /// Return the new base register that was stored away for the changed
0376   /// instruction.
0377   unsigned getInstrBaseReg(SUnit *SU) const {
0378     DenseMap<SUnit *, std::pair<unsigned, int64_t>>::const_iterator It =
0379         InstrChanges.find(SU);
0380     if (It != InstrChanges.end())
0381       return It->second.first;
0382     return 0;
0383   }
0384 
0385   void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) {
0386     Mutations.push_back(std::move(Mutation));
0387   }
0388 
0389   static bool classof(const ScheduleDAGInstrs *DAG) { return true; }
0390 
0391   const SwingSchedulerDDG *getDDG() const { return DDG.get(); }
0392 
0393 private:
0394   void addLoopCarriedDependences(AAResults *AA);
0395   void updatePhiDependences();
0396   void changeDependences();
0397   unsigned calculateResMII();
0398   unsigned calculateRecMII(NodeSetType &RecNodeSets);
0399   void findCircuits(NodeSetType &NodeSets);
0400   void fuseRecs(NodeSetType &NodeSets);
0401   void removeDuplicateNodes(NodeSetType &NodeSets);
0402   void computeNodeFunctions(NodeSetType &NodeSets);
0403   void registerPressureFilter(NodeSetType &NodeSets);
0404   void colocateNodeSets(NodeSetType &NodeSets);
0405   void checkNodeSets(NodeSetType &NodeSets);
0406   void groupRemainingNodes(NodeSetType &NodeSets);
0407   void addConnectedNodes(SUnit *SU, NodeSet &NewSet,
0408                          SetVector<SUnit *> &NodesAdded);
0409   void computeNodeOrder(NodeSetType &NodeSets);
0410   void checkValidNodeOrder(const NodeSetType &Circuits) const;
0411   bool schedulePipeline(SMSchedule &Schedule);
0412   bool computeDelta(MachineInstr &MI, unsigned &Delta) const;
0413   MachineInstr *findDefInLoop(Register Reg);
0414   bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos,
0415                              unsigned &OffsetPos, unsigned &NewBase,
0416                              int64_t &NewOffset);
0417   void postProcessDAG();
0418   /// Set the Minimum Initiation Interval for this schedule attempt.
0419   void setMII(unsigned ResMII, unsigned RecMII);
0420   /// Set the Maximum Initiation Interval for this schedule attempt.
0421   void setMAX_II();
0422 };
0423 
0424 /// A NodeSet contains a set of SUnit DAG nodes with additional information
0425 /// that assigns a priority to the set.
0426 class NodeSet {
0427   SetVector<SUnit *> Nodes;
0428   bool HasRecurrence = false;
0429   unsigned RecMII = 0;
0430   int MaxMOV = 0;
0431   unsigned MaxDepth = 0;
0432   unsigned Colocate = 0;
0433   SUnit *ExceedPressure = nullptr;
0434   unsigned Latency = 0;
0435 
0436 public:
0437   using iterator = SetVector<SUnit *>::const_iterator;
0438 
0439   NodeSet() = default;
0440   NodeSet(iterator S, iterator E, const SwingSchedulerDAG *DAG)
0441       : Nodes(S, E), HasRecurrence(true) {
0442     // Calculate the latency of this node set.
0443     // Example to demonstrate the calculation:
0444     // Given: N0 -> N1 -> N2 -> N0
0445     // Edges:
0446     // (N0 -> N1, 3)
0447     // (N0 -> N1, 5)
0448     // (N1 -> N2, 2)
0449     // (N2 -> N0, 1)
0450     // The total latency which is a lower bound of the recurrence MII is the
0451     // longest path from N0 back to N0 given only the edges of this node set.
0452     // In this example, the latency is: 5 + 2 + 1 = 8.
0453     //
0454     // Hold a map from each SUnit in the circle to the maximum distance from the
0455     // source node by only considering the nodes.
0456     const SwingSchedulerDDG *DDG = DAG->getDDG();
0457     DenseMap<SUnit *, unsigned> SUnitToDistance;
0458     for (auto *Node : Nodes)
0459       SUnitToDistance[Node] = 0;
0460 
0461     for (unsigned I = 1, E = Nodes.size(); I <= E; ++I) {
0462       SUnit *U = Nodes[I - 1];
0463       SUnit *V = Nodes[I % Nodes.size()];
0464       for (const SwingSchedulerDDGEdge &Succ : DDG->getOutEdges(U)) {
0465         SUnit *SuccSUnit = Succ.getDst();
0466         if (V != SuccSUnit)
0467           continue;
0468         if (SUnitToDistance[U] + Succ.getLatency() > SUnitToDistance[V]) {
0469           SUnitToDistance[V] = SUnitToDistance[U] + Succ.getLatency();
0470         }
0471       }
0472     }
0473     // Handle a back-edge in loop carried dependencies
0474     SUnit *FirstNode = Nodes[0];
0475     SUnit *LastNode = Nodes[Nodes.size() - 1];
0476 
0477     for (auto &PI : DDG->getInEdges(LastNode)) {
0478       // If we have an order dep that is potentially loop carried then a
0479       // back-edge exists between the last node and the first node that isn't
0480       // modeled in the DAG. Handle it manually by adding 1 to the distance of
0481       // the last node.
0482       if (PI.getSrc() != FirstNode || !PI.isOrderDep() ||
0483           !DAG->isLoopCarriedDep(PI))
0484         continue;
0485       SUnitToDistance[FirstNode] =
0486           std::max(SUnitToDistance[FirstNode], SUnitToDistance[LastNode] + 1);
0487     }
0488 
0489     // The latency is the distance from the source node to itself.
0490     Latency = SUnitToDistance[Nodes.front()];
0491   }
0492 
0493   bool insert(SUnit *SU) { return Nodes.insert(SU); }
0494 
0495   void insert(iterator S, iterator E) { Nodes.insert(S, E); }
0496 
0497   template <typename UnaryPredicate> bool remove_if(UnaryPredicate P) {
0498     return Nodes.remove_if(P);
0499   }
0500 
0501   unsigned count(SUnit *SU) const { return Nodes.count(SU); }
0502 
0503   bool hasRecurrence() { return HasRecurrence; };
0504 
0505   unsigned size() const { return Nodes.size(); }
0506 
0507   bool empty() const { return Nodes.empty(); }
0508 
0509   SUnit *getNode(unsigned i) const { return Nodes[i]; };
0510 
0511   void setRecMII(unsigned mii) { RecMII = mii; };
0512 
0513   void setColocate(unsigned c) { Colocate = c; };
0514 
0515   void setExceedPressure(SUnit *SU) { ExceedPressure = SU; }
0516 
0517   bool isExceedSU(SUnit *SU) { return ExceedPressure == SU; }
0518 
0519   int compareRecMII(NodeSet &RHS) { return RecMII - RHS.RecMII; }
0520 
0521   int getRecMII() { return RecMII; }
0522 
0523   /// Summarize node functions for the entire node set.
0524   void computeNodeSetInfo(SwingSchedulerDAG *SSD) {
0525     for (SUnit *SU : *this) {
0526       MaxMOV = std::max(MaxMOV, SSD->getMOV(SU));
0527       MaxDepth = std::max(MaxDepth, SSD->getDepth(SU));
0528     }
0529   }
0530 
0531   unsigned getLatency() { return Latency; }
0532 
0533   unsigned getMaxDepth() { return MaxDepth; }
0534 
0535   void clear() {
0536     Nodes.clear();
0537     RecMII = 0;
0538     HasRecurrence = false;
0539     MaxMOV = 0;
0540     MaxDepth = 0;
0541     Colocate = 0;
0542     ExceedPressure = nullptr;
0543   }
0544 
0545   operator SetVector<SUnit *> &() { return Nodes; }
0546 
0547   /// Sort the node sets by importance. First, rank them by recurrence MII,
0548   /// then by mobility (least mobile done first), and finally by depth.
0549   /// Each node set may contain a colocate value which is used as the first
0550   /// tie breaker, if it's set.
0551   bool operator>(const NodeSet &RHS) const {
0552     if (RecMII == RHS.RecMII) {
0553       if (Colocate != 0 && RHS.Colocate != 0 && Colocate != RHS.Colocate)
0554         return Colocate < RHS.Colocate;
0555       if (MaxMOV == RHS.MaxMOV)
0556         return MaxDepth > RHS.MaxDepth;
0557       return MaxMOV < RHS.MaxMOV;
0558     }
0559     return RecMII > RHS.RecMII;
0560   }
0561 
0562   bool operator==(const NodeSet &RHS) const {
0563     return RecMII == RHS.RecMII && MaxMOV == RHS.MaxMOV &&
0564            MaxDepth == RHS.MaxDepth;
0565   }
0566 
0567   bool operator!=(const NodeSet &RHS) const { return !operator==(RHS); }
0568 
0569   iterator begin() { return Nodes.begin(); }
0570   iterator end() { return Nodes.end(); }
0571   void print(raw_ostream &os) const;
0572 
0573 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
0574   LLVM_DUMP_METHOD void dump() const;
0575 #endif
0576 };
0577 
0578 // 16 was selected based on the number of ProcResource kinds for all
0579 // existing Subtargets, so that SmallVector don't need to resize too often.
0580 static const int DefaultProcResSize = 16;
0581 
0582 class ResourceManager {
0583 private:
0584   const MCSubtargetInfo *STI;
0585   const MCSchedModel &SM;
0586   const TargetSubtargetInfo *ST;
0587   const TargetInstrInfo *TII;
0588   ScheduleDAGInstrs *DAG;
0589   const bool UseDFA;
0590   /// DFA resources for each slot
0591   llvm::SmallVector<std::unique_ptr<DFAPacketizer>> DFAResources;
0592   /// Modulo Reservation Table. When a resource with ID R is consumed in cycle
0593   /// C, it is counted in MRT[C mod II][R]. (Used when UseDFA == F)
0594   llvm::SmallVector<llvm::SmallVector<uint64_t, DefaultProcResSize>> MRT;
0595   /// The number of scheduled micro operations for each slot. Micro operations
0596   /// are assumed to be scheduled one per cycle, starting with the cycle in
0597   /// which the instruction is scheduled.
0598   llvm::SmallVector<int> NumScheduledMops;
0599   /// Each processor resource is associated with a so-called processor resource
0600   /// mask. This vector allows to correlate processor resource IDs with
0601   /// processor resource masks. There is exactly one element per each processor
0602   /// resource declared by the scheduling model.
0603   llvm::SmallVector<uint64_t, DefaultProcResSize> ProcResourceMasks;
0604   int InitiationInterval = 0;
0605   /// The number of micro operations that can be scheduled at a cycle.
0606   int IssueWidth;
0607 
0608   int calculateResMIIDFA() const;
0609   /// Check if MRT is overbooked
0610   bool isOverbooked() const;
0611   /// Reserve resources on MRT
0612   void reserveResources(const MCSchedClassDesc *SCDesc, int Cycle);
0613   /// Unreserve resources on MRT
0614   void unreserveResources(const MCSchedClassDesc *SCDesc, int Cycle);
0615 
0616   /// Return M satisfying Dividend = Divisor * X + M, 0 < M < Divisor.
0617   /// The slot on MRT to reserve a resource for the cycle C is positiveModulo(C,
0618   /// II).
0619   int positiveModulo(int Dividend, int Divisor) const {
0620     assert(Divisor > 0);
0621     int R = Dividend % Divisor;
0622     if (R < 0)
0623       R += Divisor;
0624     return R;
0625   }
0626 
0627 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
0628   LLVM_DUMP_METHOD void dumpMRT() const;
0629 #endif
0630 
0631 public:
0632   ResourceManager(const TargetSubtargetInfo *ST, ScheduleDAGInstrs *DAG)
0633       : STI(ST), SM(ST->getSchedModel()), ST(ST), TII(ST->getInstrInfo()),
0634         DAG(DAG), UseDFA(ST->useDFAforSMS()),
0635         ProcResourceMasks(SM.getNumProcResourceKinds(), 0),
0636         IssueWidth(SM.IssueWidth) {
0637     initProcResourceVectors(SM, ProcResourceMasks);
0638     if (IssueWidth <= 0)
0639       // If IssueWidth is not specified, set a sufficiently large value
0640       IssueWidth = 100;
0641     if (SwpForceIssueWidth > 0)
0642       IssueWidth = SwpForceIssueWidth;
0643   }
0644 
0645   void initProcResourceVectors(const MCSchedModel &SM,
0646                                SmallVectorImpl<uint64_t> &Masks);
0647 
0648   /// Check if the resources occupied by a machine instruction are available
0649   /// in the current state.
0650   bool canReserveResources(SUnit &SU, int Cycle);
0651 
0652   /// Reserve the resources occupied by a machine instruction and change the
0653   /// current state to reflect that change.
0654   void reserveResources(SUnit &SU, int Cycle);
0655 
0656   int calculateResMII() const;
0657 
0658   /// Initialize resources with the initiation interval II.
0659   void init(int II);
0660 };
0661 
0662 /// This class represents the scheduled code.  The main data structure is a
0663 /// map from scheduled cycle to instructions.  During scheduling, the
0664 /// data structure explicitly represents all stages/iterations.   When
0665 /// the algorithm finshes, the schedule is collapsed into a single stage,
0666 /// which represents instructions from different loop iterations.
0667 ///
0668 /// The SMS algorithm allows negative values for cycles, so the first cycle
0669 /// in the schedule is the smallest cycle value.
0670 class SMSchedule {
0671 private:
0672   /// Map from execution cycle to instructions.
0673   DenseMap<int, std::deque<SUnit *>> ScheduledInstrs;
0674 
0675   /// Map from instruction to execution cycle.
0676   std::map<SUnit *, int> InstrToCycle;
0677 
0678   /// Keep track of the first cycle value in the schedule.  It starts
0679   /// as zero, but the algorithm allows negative values.
0680   int FirstCycle = 0;
0681 
0682   /// Keep track of the last cycle value in the schedule.
0683   int LastCycle = 0;
0684 
0685   /// The initiation interval (II) for the schedule.
0686   int InitiationInterval = 0;
0687 
0688   /// Target machine information.
0689   const TargetSubtargetInfo &ST;
0690 
0691   /// Virtual register information.
0692   MachineRegisterInfo &MRI;
0693 
0694   ResourceManager ProcItinResources;
0695 
0696 public:
0697   SMSchedule(MachineFunction *mf, SwingSchedulerDAG *DAG)
0698       : ST(mf->getSubtarget()), MRI(mf->getRegInfo()),
0699         ProcItinResources(&ST, DAG) {}
0700 
0701   void reset() {
0702     ScheduledInstrs.clear();
0703     InstrToCycle.clear();
0704     FirstCycle = 0;
0705     LastCycle = 0;
0706     InitiationInterval = 0;
0707   }
0708 
0709   /// Set the initiation interval for this schedule.
0710   void setInitiationInterval(int ii) {
0711     InitiationInterval = ii;
0712     ProcItinResources.init(ii);
0713   }
0714 
0715   /// Return the initiation interval for this schedule.
0716   int getInitiationInterval() const { return InitiationInterval; }
0717 
0718   /// Return the first cycle in the completed schedule.  This
0719   /// can be a negative value.
0720   int getFirstCycle() const { return FirstCycle; }
0721 
0722   /// Return the last cycle in the finalized schedule.
0723   int getFinalCycle() const { return FirstCycle + InitiationInterval - 1; }
0724 
0725   /// Return the cycle of the earliest scheduled instruction in the dependence
0726   /// chain.
0727   int earliestCycleInChain(const SwingSchedulerDDGEdge &Dep,
0728                            const SwingSchedulerDDG *DDG);
0729 
0730   /// Return the cycle of the latest scheduled instruction in the dependence
0731   /// chain.
0732   int latestCycleInChain(const SwingSchedulerDDGEdge &Dep,
0733                          const SwingSchedulerDDG *DDG);
0734 
0735   void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int II,
0736                     SwingSchedulerDAG *DAG);
0737   bool insert(SUnit *SU, int StartCycle, int EndCycle, int II);
0738 
0739   /// Iterators for the cycle to instruction map.
0740   using sched_iterator = DenseMap<int, std::deque<SUnit *>>::iterator;
0741   using const_sched_iterator =
0742       DenseMap<int, std::deque<SUnit *>>::const_iterator;
0743 
0744   /// Return true if the instruction is scheduled at the specified stage.
0745   bool isScheduledAtStage(SUnit *SU, unsigned StageNum) {
0746     return (stageScheduled(SU) == (int)StageNum);
0747   }
0748 
0749   /// Return the stage for a scheduled instruction.  Return -1 if
0750   /// the instruction has not been scheduled.
0751   int stageScheduled(SUnit *SU) const {
0752     std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
0753     if (it == InstrToCycle.end())
0754       return -1;
0755     return (it->second - FirstCycle) / InitiationInterval;
0756   }
0757 
0758   /// Return the cycle for a scheduled instruction. This function normalizes
0759   /// the first cycle to be 0.
0760   unsigned cycleScheduled(SUnit *SU) const {
0761     std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
0762     assert(it != InstrToCycle.end() && "Instruction hasn't been scheduled.");
0763     return (it->second - FirstCycle) % InitiationInterval;
0764   }
0765 
0766   /// Return the maximum stage count needed for this schedule.
0767   unsigned getMaxStageCount() {
0768     return (LastCycle - FirstCycle) / InitiationInterval;
0769   }
0770 
0771   /// Return the instructions that are scheduled at the specified cycle.
0772   std::deque<SUnit *> &getInstructions(int cycle) {
0773     return ScheduledInstrs[cycle];
0774   }
0775 
0776   SmallSet<SUnit *, 8>
0777   computeUnpipelineableNodes(SwingSchedulerDAG *SSD,
0778                              TargetInstrInfo::PipelinerLoopInfo *PLI);
0779 
0780   std::deque<SUnit *>
0781   reorderInstructions(const SwingSchedulerDAG *SSD,
0782                       const std::deque<SUnit *> &Instrs) const;
0783 
0784   bool
0785   normalizeNonPipelinedInstructions(SwingSchedulerDAG *SSD,
0786                                     TargetInstrInfo::PipelinerLoopInfo *PLI);
0787   bool isValidSchedule(SwingSchedulerDAG *SSD);
0788   void finalizeSchedule(SwingSchedulerDAG *SSD);
0789   void orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU,
0790                        std::deque<SUnit *> &Insts) const;
0791   bool isLoopCarried(const SwingSchedulerDAG *SSD, MachineInstr &Phi) const;
0792   bool isLoopCarriedDefOfUse(const SwingSchedulerDAG *SSD, MachineInstr *Def,
0793                              MachineOperand &MO) const;
0794 
0795   bool onlyHasLoopCarriedOutputOrOrderPreds(SUnit *SU,
0796                                             const SwingSchedulerDDG *DDG) const;
0797   void print(raw_ostream &os) const;
0798   void dump() const;
0799 };
0800 
0801 } // end namespace llvm
0802 
0803 #endif // LLVM_CODEGEN_MACHINEPIPELINER_H