Mercurial > hg > CbC > CbC_llvm
comparison lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @ 0:95c75e76d11b LLVM3.4
LLVM 3.4
author | Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp> |
---|---|
date | Thu, 12 Dec 2013 13:56:28 +0900 |
parents | |
children | 54457678186b |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:95c75e76d11b |
---|---|
1 //===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===// | |
2 // | |
3 // The LLVM Compiler Infrastructure | |
4 // | |
5 // This file is distributed under the University of Illinois Open Source | |
6 // License. See LICENSE.TXT for details. | |
7 // | |
8 //===----------------------------------------------------------------------===// | |
9 // | |
10 // This implements bottom-up and top-down register pressure reduction list | |
11 // schedulers, using standard algorithms. The basic approach uses a priority | |
12 // queue of available nodes to schedule. One at a time, nodes are taken from | |
13 // the priority queue (thus in priority order), checked for legality to | |
14 // schedule, and emitted if legal. | |
15 // | |
16 //===----------------------------------------------------------------------===// | |
17 | |
18 #define DEBUG_TYPE "pre-RA-sched" | |
19 #include "llvm/CodeGen/SchedulerRegistry.h" | |
20 #include "ScheduleDAGSDNodes.h" | |
21 #include "llvm/ADT/STLExtras.h" | |
22 #include "llvm/ADT/SmallSet.h" | |
23 #include "llvm/ADT/Statistic.h" | |
24 #include "llvm/CodeGen/MachineRegisterInfo.h" | |
25 #include "llvm/CodeGen/ScheduleHazardRecognizer.h" | |
26 #include "llvm/CodeGen/SelectionDAGISel.h" | |
27 #include "llvm/IR/DataLayout.h" | |
28 #include "llvm/IR/InlineAsm.h" | |
29 #include "llvm/Support/Debug.h" | |
30 #include "llvm/Support/ErrorHandling.h" | |
31 #include "llvm/Support/raw_ostream.h" | |
32 #include "llvm/Target/TargetInstrInfo.h" | |
33 #include "llvm/Target/TargetLowering.h" | |
34 #include "llvm/Target/TargetMachine.h" | |
35 #include "llvm/Target/TargetRegisterInfo.h" | |
36 #include <climits> | |
37 using namespace llvm; | |
38 | |
39 STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); | |
40 STATISTIC(NumUnfolds, "Number of nodes unfolded"); | |
41 STATISTIC(NumDups, "Number of duplicated nodes"); | |
42 STATISTIC(NumPRCopies, "Number of physical register copies"); | |
43 | |
44 static RegisterScheduler | |
45 burrListDAGScheduler("list-burr", | |
46 "Bottom-up register reduction list scheduling", | |
47 createBURRListDAGScheduler); | |
48 static RegisterScheduler | |
49 sourceListDAGScheduler("source", | |
50 "Similar to list-burr but schedules in source " | |
51 "order when possible", | |
52 createSourceListDAGScheduler); | |
53 | |
54 static RegisterScheduler | |
55 hybridListDAGScheduler("list-hybrid", | |
56 "Bottom-up register pressure aware list scheduling " | |
57 "which tries to balance latency and register pressure", | |
58 createHybridListDAGScheduler); | |
59 | |
60 static RegisterScheduler | |
61 ILPListDAGScheduler("list-ilp", | |
62 "Bottom-up register pressure aware list scheduling " | |
63 "which tries to balance ILP and register pressure", | |
64 createILPListDAGScheduler); | |
65 | |
66 static cl::opt<bool> DisableSchedCycles( | |
67 "disable-sched-cycles", cl::Hidden, cl::init(false), | |
68 cl::desc("Disable cycle-level precision during preRA scheduling")); | |
69 | |
70 // Temporary sched=list-ilp flags until the heuristics are robust. | |
71 // Some options are also available under sched=list-hybrid. | |
72 static cl::opt<bool> DisableSchedRegPressure( | |
73 "disable-sched-reg-pressure", cl::Hidden, cl::init(false), | |
74 cl::desc("Disable regpressure priority in sched=list-ilp")); | |
75 static cl::opt<bool> DisableSchedLiveUses( | |
76 "disable-sched-live-uses", cl::Hidden, cl::init(true), | |
77 cl::desc("Disable live use priority in sched=list-ilp")); | |
78 static cl::opt<bool> DisableSchedVRegCycle( | |
79 "disable-sched-vrcycle", cl::Hidden, cl::init(false), | |
80 cl::desc("Disable virtual register cycle interference checks")); | |
81 static cl::opt<bool> DisableSchedPhysRegJoin( | |
82 "disable-sched-physreg-join", cl::Hidden, cl::init(false), | |
83 cl::desc("Disable physreg def-use affinity")); | |
84 static cl::opt<bool> DisableSchedStalls( | |
85 "disable-sched-stalls", cl::Hidden, cl::init(true), | |
86 cl::desc("Disable no-stall priority in sched=list-ilp")); | |
87 static cl::opt<bool> DisableSchedCriticalPath( | |
88 "disable-sched-critical-path", cl::Hidden, cl::init(false), | |
89 cl::desc("Disable critical path priority in sched=list-ilp")); | |
90 static cl::opt<bool> DisableSchedHeight( | |
91 "disable-sched-height", cl::Hidden, cl::init(false), | |
92 cl::desc("Disable scheduled-height priority in sched=list-ilp")); | |
93 static cl::opt<bool> Disable2AddrHack( | |
94 "disable-2addr-hack", cl::Hidden, cl::init(true), | |
95 cl::desc("Disable scheduler's two-address hack")); | |
96 | |
97 static cl::opt<int> MaxReorderWindow( | |
98 "max-sched-reorder", cl::Hidden, cl::init(6), | |
99 cl::desc("Number of instructions to allow ahead of the critical path " | |
100 "in sched=list-ilp")); | |
101 | |
102 static cl::opt<unsigned> AvgIPC( | |
103 "sched-avg-ipc", cl::Hidden, cl::init(1), | |
104 cl::desc("Average inst/cycle whan no target itinerary exists.")); | |
105 | |
106 namespace { | |
107 //===----------------------------------------------------------------------===// | |
108 /// ScheduleDAGRRList - The actual register reduction list scheduler | |
109 /// implementation. This supports both top-down and bottom-up scheduling. | |
110 /// | |
111 class ScheduleDAGRRList : public ScheduleDAGSDNodes { | |
112 private: | |
113 /// NeedLatency - True if the scheduler will make use of latency information. | |
114 /// | |
115 bool NeedLatency; | |
116 | |
117 /// AvailableQueue - The priority queue to use for the available SUnits. | |
118 SchedulingPriorityQueue *AvailableQueue; | |
119 | |
120 /// PendingQueue - This contains all of the instructions whose operands have | |
121 /// been issued, but their results are not ready yet (due to the latency of | |
122 /// the operation). Once the operands becomes available, the instruction is | |
123 /// added to the AvailableQueue. | |
124 std::vector<SUnit*> PendingQueue; | |
125 | |
126 /// HazardRec - The hazard recognizer to use. | |
127 ScheduleHazardRecognizer *HazardRec; | |
128 | |
129 /// CurCycle - The current scheduler state corresponds to this cycle. | |
130 unsigned CurCycle; | |
131 | |
132 /// MinAvailableCycle - Cycle of the soonest available instruction. | |
133 unsigned MinAvailableCycle; | |
134 | |
135 /// IssueCount - Count instructions issued in this cycle | |
136 /// Currently valid only for bottom-up scheduling. | |
137 unsigned IssueCount; | |
138 | |
139 /// LiveRegDefs - A set of physical registers and their definition | |
140 /// that are "live". These nodes must be scheduled before any other nodes that | |
141 /// modifies the registers can be scheduled. | |
142 unsigned NumLiveRegs; | |
143 std::vector<SUnit*> LiveRegDefs; | |
144 std::vector<SUnit*> LiveRegGens; | |
145 | |
146 // Collect interferences between physical register use/defs. | |
147 // Each interference is an SUnit and set of physical registers. | |
148 SmallVector<SUnit*, 4> Interferences; | |
149 typedef DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMapT; | |
150 LRegsMapT LRegsMap; | |
151 | |
152 /// Topo - A topological ordering for SUnits which permits fast IsReachable | |
153 /// and similar queries. | |
154 ScheduleDAGTopologicalSort Topo; | |
155 | |
156 // Hack to keep track of the inverse of FindCallSeqStart without more crazy | |
157 // DAG crawling. | |
158 DenseMap<SUnit*, SUnit*> CallSeqEndForStart; | |
159 | |
160 public: | |
161 ScheduleDAGRRList(MachineFunction &mf, bool needlatency, | |
162 SchedulingPriorityQueue *availqueue, | |
163 CodeGenOpt::Level OptLevel) | |
164 : ScheduleDAGSDNodes(mf), | |
165 NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0), | |
166 Topo(SUnits, NULL) { | |
167 | |
168 const TargetMachine &tm = mf.getTarget(); | |
169 if (DisableSchedCycles || !NeedLatency) | |
170 HazardRec = new ScheduleHazardRecognizer(); | |
171 else | |
172 HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this); | |
173 } | |
174 | |
175 ~ScheduleDAGRRList() { | |
176 delete HazardRec; | |
177 delete AvailableQueue; | |
178 } | |
179 | |
180 void Schedule(); | |
181 | |
182 ScheduleHazardRecognizer *getHazardRec() { return HazardRec; } | |
183 | |
184 /// IsReachable - Checks if SU is reachable from TargetSU. | |
185 bool IsReachable(const SUnit *SU, const SUnit *TargetSU) { | |
186 return Topo.IsReachable(SU, TargetSU); | |
187 } | |
188 | |
189 /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will | |
190 /// create a cycle. | |
191 bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { | |
192 return Topo.WillCreateCycle(SU, TargetSU); | |
193 } | |
194 | |
195 /// AddPred - adds a predecessor edge to SUnit SU. | |
196 /// This returns true if this is a new predecessor. | |
197 /// Updates the topological ordering if required. | |
198 void AddPred(SUnit *SU, const SDep &D) { | |
199 Topo.AddPred(SU, D.getSUnit()); | |
200 SU->addPred(D); | |
201 } | |
202 | |
203 /// RemovePred - removes a predecessor edge from SUnit SU. | |
204 /// This returns true if an edge was removed. | |
205 /// Updates the topological ordering if required. | |
206 void RemovePred(SUnit *SU, const SDep &D) { | |
207 Topo.RemovePred(SU, D.getSUnit()); | |
208 SU->removePred(D); | |
209 } | |
210 | |
211 private: | |
212 bool isReady(SUnit *SU) { | |
213 return DisableSchedCycles || !AvailableQueue->hasReadyFilter() || | |
214 AvailableQueue->isReady(SU); | |
215 } | |
216 | |
217 void ReleasePred(SUnit *SU, const SDep *PredEdge); | |
218 void ReleasePredecessors(SUnit *SU); | |
219 void ReleasePending(); | |
220 void AdvanceToCycle(unsigned NextCycle); | |
221 void AdvancePastStalls(SUnit *SU); | |
222 void EmitNode(SUnit *SU); | |
223 void ScheduleNodeBottomUp(SUnit*); | |
224 void CapturePred(SDep *PredEdge); | |
225 void UnscheduleNodeBottomUp(SUnit*); | |
226 void RestoreHazardCheckerBottomUp(); | |
227 void BacktrackBottomUp(SUnit*, SUnit*); | |
228 SUnit *CopyAndMoveSuccessors(SUnit*); | |
229 void InsertCopiesAndMoveSuccs(SUnit*, unsigned, | |
230 const TargetRegisterClass*, | |
231 const TargetRegisterClass*, | |
232 SmallVectorImpl<SUnit*>&); | |
233 bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&); | |
234 | |
235 void releaseInterferences(unsigned Reg = 0); | |
236 | |
237 SUnit *PickNodeToScheduleBottomUp(); | |
238 void ListScheduleBottomUp(); | |
239 | |
240 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. | |
241 /// Updates the topological ordering if required. | |
242 SUnit *CreateNewSUnit(SDNode *N) { | |
243 unsigned NumSUnits = SUnits.size(); | |
244 SUnit *NewNode = newSUnit(N); | |
245 // Update the topological ordering. | |
246 if (NewNode->NodeNum >= NumSUnits) | |
247 Topo.InitDAGTopologicalSorting(); | |
248 return NewNode; | |
249 } | |
250 | |
251 /// CreateClone - Creates a new SUnit from an existing one. | |
252 /// Updates the topological ordering if required. | |
253 SUnit *CreateClone(SUnit *N) { | |
254 unsigned NumSUnits = SUnits.size(); | |
255 SUnit *NewNode = Clone(N); | |
256 // Update the topological ordering. | |
257 if (NewNode->NodeNum >= NumSUnits) | |
258 Topo.InitDAGTopologicalSorting(); | |
259 return NewNode; | |
260 } | |
261 | |
262 /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't | |
263 /// need actual latency information but the hybrid scheduler does. | |
264 bool forceUnitLatencies() const { | |
265 return !NeedLatency; | |
266 } | |
267 }; | |
268 } // end anonymous namespace | |
269 | |
270 /// GetCostForDef - Looks up the register class and cost for a given definition. | |
271 /// Typically this just means looking up the representative register class, | |
272 /// but for untyped values (MVT::Untyped) it means inspecting the node's | |
273 /// opcode to determine what register class is being generated. | |
274 static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, | |
275 const TargetLowering *TLI, | |
276 const TargetInstrInfo *TII, | |
277 const TargetRegisterInfo *TRI, | |
278 unsigned &RegClass, unsigned &Cost, | |
279 const MachineFunction &MF) { | |
280 MVT VT = RegDefPos.GetValue(); | |
281 | |
282 // Special handling for untyped values. These values can only come from | |
283 // the expansion of custom DAG-to-DAG patterns. | |
284 if (VT == MVT::Untyped) { | |
285 const SDNode *Node = RegDefPos.GetNode(); | |
286 | |
287 // Special handling for CopyFromReg of untyped values. | |
288 if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) { | |
289 unsigned Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); | |
290 const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg); | |
291 RegClass = RC->getID(); | |
292 Cost = 1; | |
293 return; | |
294 } | |
295 | |
296 unsigned Opcode = Node->getMachineOpcode(); | |
297 if (Opcode == TargetOpcode::REG_SEQUENCE) { | |
298 unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); | |
299 const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); | |
300 RegClass = RC->getID(); | |
301 Cost = 1; | |
302 return; | |
303 } | |
304 | |
305 unsigned Idx = RegDefPos.GetIdx(); | |
306 const MCInstrDesc Desc = TII->get(Opcode); | |
307 const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF); | |
308 RegClass = RC->getID(); | |
309 // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a | |
310 // better way to determine it. | |
311 Cost = 1; | |
312 } else { | |
313 RegClass = TLI->getRepRegClassFor(VT)->getID(); | |
314 Cost = TLI->getRepRegClassCostFor(VT); | |
315 } | |
316 } | |
317 | |
318 /// Schedule - Schedule the DAG using list scheduling. | |
319 void ScheduleDAGRRList::Schedule() { | |
320 DEBUG(dbgs() | |
321 << "********** List Scheduling BB#" << BB->getNumber() | |
322 << " '" << BB->getName() << "' **********\n"); | |
323 | |
324 CurCycle = 0; | |
325 IssueCount = 0; | |
326 MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX; | |
327 NumLiveRegs = 0; | |
328 // Allocate slots for each physical register, plus one for a special register | |
329 // to track the virtual resource of a calling sequence. | |
330 LiveRegDefs.resize(TRI->getNumRegs() + 1, NULL); | |
331 LiveRegGens.resize(TRI->getNumRegs() + 1, NULL); | |
332 CallSeqEndForStart.clear(); | |
333 assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences"); | |
334 | |
335 // Build the scheduling graph. | |
336 BuildSchedGraph(NULL); | |
337 | |
338 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) | |
339 SUnits[su].dumpAll(this)); | |
340 Topo.InitDAGTopologicalSorting(); | |
341 | |
342 AvailableQueue->initNodes(SUnits); | |
343 | |
344 HazardRec->Reset(); | |
345 | |
346 // Execute the actual scheduling loop. | |
347 ListScheduleBottomUp(); | |
348 | |
349 AvailableQueue->releaseState(); | |
350 | |
351 DEBUG({ | |
352 dbgs() << "*** Final schedule ***\n"; | |
353 dumpSchedule(); | |
354 dbgs() << '\n'; | |
355 }); | |
356 } | |
357 | |
358 //===----------------------------------------------------------------------===// | |
359 // Bottom-Up Scheduling | |
360 //===----------------------------------------------------------------------===// | |
361 | |
362 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to | |
363 /// the AvailableQueue if the count reaches zero. Also update its cycle bound. | |
364 void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { | |
365 SUnit *PredSU = PredEdge->getSUnit(); | |
366 | |
367 #ifndef NDEBUG | |
368 if (PredSU->NumSuccsLeft == 0) { | |
369 dbgs() << "*** Scheduling failed! ***\n"; | |
370 PredSU->dump(this); | |
371 dbgs() << " has been released too many times!\n"; | |
372 llvm_unreachable(0); | |
373 } | |
374 #endif | |
375 --PredSU->NumSuccsLeft; | |
376 | |
377 if (!forceUnitLatencies()) { | |
378 // Updating predecessor's height. This is now the cycle when the | |
379 // predecessor can be scheduled without causing a pipeline stall. | |
380 PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency()); | |
381 } | |
382 | |
383 // If all the node's successors are scheduled, this node is ready | |
384 // to be scheduled. Ignore the special EntrySU node. | |
385 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { | |
386 PredSU->isAvailable = true; | |
387 | |
388 unsigned Height = PredSU->getHeight(); | |
389 if (Height < MinAvailableCycle) | |
390 MinAvailableCycle = Height; | |
391 | |
392 if (isReady(PredSU)) { | |
393 AvailableQueue->push(PredSU); | |
394 } | |
395 // CapturePred and others may have left the node in the pending queue, avoid | |
396 // adding it twice. | |
397 else if (!PredSU->isPending) { | |
398 PredSU->isPending = true; | |
399 PendingQueue.push_back(PredSU); | |
400 } | |
401 } | |
402 } | |
403 | |
404 /// IsChainDependent - Test if Outer is reachable from Inner through | |
405 /// chain dependencies. | |
406 static bool IsChainDependent(SDNode *Outer, SDNode *Inner, | |
407 unsigned NestLevel, | |
408 const TargetInstrInfo *TII) { | |
409 SDNode *N = Outer; | |
410 for (;;) { | |
411 if (N == Inner) | |
412 return true; | |
413 // For a TokenFactor, examine each operand. There may be multiple ways | |
414 // to get to the CALLSEQ_BEGIN, but we need to find the path with the | |
415 // most nesting in order to ensure that we find the corresponding match. | |
416 if (N->getOpcode() == ISD::TokenFactor) { | |
417 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) | |
418 if (IsChainDependent(N->getOperand(i).getNode(), Inner, NestLevel, TII)) | |
419 return true; | |
420 return false; | |
421 } | |
422 // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END. | |
423 if (N->isMachineOpcode()) { | |
424 if (N->getMachineOpcode() == | |
425 (unsigned)TII->getCallFrameDestroyOpcode()) { | |
426 ++NestLevel; | |
427 } else if (N->getMachineOpcode() == | |
428 (unsigned)TII->getCallFrameSetupOpcode()) { | |
429 if (NestLevel == 0) | |
430 return false; | |
431 --NestLevel; | |
432 } | |
433 } | |
434 // Otherwise, find the chain and continue climbing. | |
435 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) | |
436 if (N->getOperand(i).getValueType() == MVT::Other) { | |
437 N = N->getOperand(i).getNode(); | |
438 goto found_chain_operand; | |
439 } | |
440 return false; | |
441 found_chain_operand:; | |
442 if (N->getOpcode() == ISD::EntryToken) | |
443 return false; | |
444 } | |
445 } | |
446 | |
447 /// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate | |
448 /// the corresponding (lowered) CALLSEQ_BEGIN node. | |
449 /// | |
450 /// NestLevel and MaxNested are used in recursion to indcate the current level | |
451 /// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum | |
452 /// level seen so far. | |
453 /// | |
454 /// TODO: It would be better to give CALLSEQ_END an explicit operand to point | |
455 /// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it. | |
456 static SDNode * | |
457 FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, | |
458 const TargetInstrInfo *TII) { | |
459 for (;;) { | |
460 // For a TokenFactor, examine each operand. There may be multiple ways | |
461 // to get to the CALLSEQ_BEGIN, but we need to find the path with the | |
462 // most nesting in order to ensure that we find the corresponding match. | |
463 if (N->getOpcode() == ISD::TokenFactor) { | |
464 SDNode *Best = 0; | |
465 unsigned BestMaxNest = MaxNest; | |
466 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { | |
467 unsigned MyNestLevel = NestLevel; | |
468 unsigned MyMaxNest = MaxNest; | |
469 if (SDNode *New = FindCallSeqStart(N->getOperand(i).getNode(), | |
470 MyNestLevel, MyMaxNest, TII)) | |
471 if (!Best || (MyMaxNest > BestMaxNest)) { | |
472 Best = New; | |
473 BestMaxNest = MyMaxNest; | |
474 } | |
475 } | |
476 assert(Best); | |
477 MaxNest = BestMaxNest; | |
478 return Best; | |
479 } | |
480 // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END. | |
481 if (N->isMachineOpcode()) { | |
482 if (N->getMachineOpcode() == | |
483 (unsigned)TII->getCallFrameDestroyOpcode()) { | |
484 ++NestLevel; | |
485 MaxNest = std::max(MaxNest, NestLevel); | |
486 } else if (N->getMachineOpcode() == | |
487 (unsigned)TII->getCallFrameSetupOpcode()) { | |
488 assert(NestLevel != 0); | |
489 --NestLevel; | |
490 if (NestLevel == 0) | |
491 return N; | |
492 } | |
493 } | |
494 // Otherwise, find the chain and continue climbing. | |
495 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) | |
496 if (N->getOperand(i).getValueType() == MVT::Other) { | |
497 N = N->getOperand(i).getNode(); | |
498 goto found_chain_operand; | |
499 } | |
500 return 0; | |
501 found_chain_operand:; | |
502 if (N->getOpcode() == ISD::EntryToken) | |
503 return 0; | |
504 } | |
505 } | |
506 | |
507 /// Call ReleasePred for each predecessor, then update register live def/gen. | |
508 /// Always update LiveRegDefs for a register dependence even if the current SU | |
509 /// also defines the register. This effectively create one large live range | |
510 /// across a sequence of two-address node. This is important because the | |
511 /// entire chain must be scheduled together. Example: | |
512 /// | |
513 /// flags = (3) add | |
514 /// flags = (2) addc flags | |
515 /// flags = (1) addc flags | |
516 /// | |
517 /// results in | |
518 /// | |
519 /// LiveRegDefs[flags] = 3 | |
520 /// LiveRegGens[flags] = 1 | |
521 /// | |
522 /// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid | |
523 /// interference on flags. | |
524 void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) { | |
525 // Bottom up: release predecessors | |
526 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); | |
527 I != E; ++I) { | |
528 ReleasePred(SU, &*I); | |
529 if (I->isAssignedRegDep()) { | |
530 // This is a physical register dependency and it's impossible or | |
531 // expensive to copy the register. Make sure nothing that can | |
532 // clobber the register is scheduled between the predecessor and | |
533 // this node. | |
534 SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef; | |
535 assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) && | |
536 "interference on register dependence"); | |
537 LiveRegDefs[I->getReg()] = I->getSUnit(); | |
538 if (!LiveRegGens[I->getReg()]) { | |
539 ++NumLiveRegs; | |
540 LiveRegGens[I->getReg()] = SU; | |
541 } | |
542 } | |
543 } | |
544 | |
545 // If we're scheduling a lowered CALLSEQ_END, find the corresponding | |
546 // CALLSEQ_BEGIN. Inject an artificial physical register dependence between | |
547 // these nodes, to prevent other calls from being interscheduled with them. | |
548 unsigned CallResource = TRI->getNumRegs(); | |
549 if (!LiveRegDefs[CallResource]) | |
550 for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) | |
551 if (Node->isMachineOpcode() && | |
552 Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { | |
553 unsigned NestLevel = 0; | |
554 unsigned MaxNest = 0; | |
555 SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII); | |
556 | |
557 SUnit *Def = &SUnits[N->getNodeId()]; | |
558 CallSeqEndForStart[Def] = SU; | |
559 | |
560 ++NumLiveRegs; | |
561 LiveRegDefs[CallResource] = Def; | |
562 LiveRegGens[CallResource] = SU; | |
563 break; | |
564 } | |
565 } | |
566 | |
567 /// Check to see if any of the pending instructions are ready to issue. If | |
568 /// so, add them to the available queue. | |
569 void ScheduleDAGRRList::ReleasePending() { | |
570 if (DisableSchedCycles) { | |
571 assert(PendingQueue.empty() && "pending instrs not allowed in this mode"); | |
572 return; | |
573 } | |
574 | |
575 // If the available queue is empty, it is safe to reset MinAvailableCycle. | |
576 if (AvailableQueue->empty()) | |
577 MinAvailableCycle = UINT_MAX; | |
578 | |
579 // Check to see if any of the pending instructions are ready to issue. If | |
580 // so, add them to the available queue. | |
581 for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { | |
582 unsigned ReadyCycle = PendingQueue[i]->getHeight(); | |
583 if (ReadyCycle < MinAvailableCycle) | |
584 MinAvailableCycle = ReadyCycle; | |
585 | |
586 if (PendingQueue[i]->isAvailable) { | |
587 if (!isReady(PendingQueue[i])) | |
588 continue; | |
589 AvailableQueue->push(PendingQueue[i]); | |
590 } | |
591 PendingQueue[i]->isPending = false; | |
592 PendingQueue[i] = PendingQueue.back(); | |
593 PendingQueue.pop_back(); | |
594 --i; --e; | |
595 } | |
596 } | |
597 | |
598 /// Move the scheduler state forward by the specified number of Cycles. | |
599 void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) { | |
600 if (NextCycle <= CurCycle) | |
601 return; | |
602 | |
603 IssueCount = 0; | |
604 AvailableQueue->setCurCycle(NextCycle); | |
605 if (!HazardRec->isEnabled()) { | |
606 // Bypass lots of virtual calls in case of long latency. | |
607 CurCycle = NextCycle; | |
608 } | |
609 else { | |
610 for (; CurCycle != NextCycle; ++CurCycle) { | |
611 HazardRec->RecedeCycle(); | |
612 } | |
613 } | |
614 // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the | |
615 // available Q to release pending nodes at least once before popping. | |
616 ReleasePending(); | |
617 } | |
618 | |
619 /// Move the scheduler state forward until the specified node's dependents are | |
620 /// ready and can be scheduled with no resource conflicts. | |
621 void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) { | |
622 if (DisableSchedCycles) | |
623 return; | |
624 | |
625 // FIXME: Nodes such as CopyFromReg probably should not advance the current | |
626 // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node | |
627 // has predecessors the cycle will be advanced when they are scheduled. | |
628 // But given the crude nature of modeling latency though such nodes, we | |
629 // currently need to treat these nodes like real instructions. | |
630 // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return; | |
631 | |
632 unsigned ReadyCycle = SU->getHeight(); | |
633 | |
634 // Bump CurCycle to account for latency. We assume the latency of other | |
635 // available instructions may be hidden by the stall (not a full pipe stall). | |
636 // This updates the hazard recognizer's cycle before reserving resources for | |
637 // this instruction. | |
638 AdvanceToCycle(ReadyCycle); | |
639 | |
640 // Calls are scheduled in their preceding cycle, so don't conflict with | |
641 // hazards from instructions after the call. EmitNode will reset the | |
642 // scoreboard state before emitting the call. | |
643 if (SU->isCall) | |
644 return; | |
645 | |
646 // FIXME: For resource conflicts in very long non-pipelined stages, we | |
647 // should probably skip ahead here to avoid useless scoreboard checks. | |
648 int Stalls = 0; | |
649 while (true) { | |
650 ScheduleHazardRecognizer::HazardType HT = | |
651 HazardRec->getHazardType(SU, -Stalls); | |
652 | |
653 if (HT == ScheduleHazardRecognizer::NoHazard) | |
654 break; | |
655 | |
656 ++Stalls; | |
657 } | |
658 AdvanceToCycle(CurCycle + Stalls); | |
659 } | |
660 | |
661 /// Record this SUnit in the HazardRecognizer. | |
662 /// Does not update CurCycle. | |
663 void ScheduleDAGRRList::EmitNode(SUnit *SU) { | |
664 if (!HazardRec->isEnabled()) | |
665 return; | |
666 | |
667 // Check for phys reg copy. | |
668 if (!SU->getNode()) | |
669 return; | |
670 | |
671 switch (SU->getNode()->getOpcode()) { | |
672 default: | |
673 assert(SU->getNode()->isMachineOpcode() && | |
674 "This target-independent node should not be scheduled."); | |
675 break; | |
676 case ISD::MERGE_VALUES: | |
677 case ISD::TokenFactor: | |
678 case ISD::LIFETIME_START: | |
679 case ISD::LIFETIME_END: | |
680 case ISD::CopyToReg: | |
681 case ISD::CopyFromReg: | |
682 case ISD::EH_LABEL: | |
683 // Noops don't affect the scoreboard state. Copies are likely to be | |
684 // removed. | |
685 return; | |
686 case ISD::INLINEASM: | |
687 // For inline asm, clear the pipeline state. | |
688 HazardRec->Reset(); | |
689 return; | |
690 } | |
691 if (SU->isCall) { | |
692 // Calls are scheduled with their preceding instructions. For bottom-up | |
693 // scheduling, clear the pipeline state before emitting. | |
694 HazardRec->Reset(); | |
695 } | |
696 | |
697 HazardRec->EmitInstruction(SU); | |
698 } | |
699 | |
700 static void resetVRegCycle(SUnit *SU); | |
701 | |
702 /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending | |
703 /// count of its predecessors. If a predecessor pending count is zero, add it to | |
704 /// the Available queue. | |
705 void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { | |
706 DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: "); | |
707 DEBUG(SU->dump(this)); | |
708 | |
709 #ifndef NDEBUG | |
710 if (CurCycle < SU->getHeight()) | |
711 DEBUG(dbgs() << " Height [" << SU->getHeight() | |
712 << "] pipeline stall!\n"); | |
713 #endif | |
714 | |
715 // FIXME: Do not modify node height. It may interfere with | |
716 // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the | |
717 // node its ready cycle can aid heuristics, and after scheduling it can | |
718 // indicate the scheduled cycle. | |
719 SU->setHeightToAtLeast(CurCycle); | |
720 | |
721 // Reserve resources for the scheduled instruction. | |
722 EmitNode(SU); | |
723 | |
724 Sequence.push_back(SU); | |
725 | |
726 AvailableQueue->scheduledNode(SU); | |
727 | |
728 // If HazardRec is disabled, and each inst counts as one cycle, then | |
729 // advance CurCycle before ReleasePredecessors to avoid useless pushes to | |
730 // PendingQueue for schedulers that implement HasReadyFilter. | |
731 if (!HazardRec->isEnabled() && AvgIPC < 2) | |
732 AdvanceToCycle(CurCycle + 1); | |
733 | |
734 // Update liveness of predecessors before successors to avoid treating a | |
735 // two-address node as a live range def. | |
736 ReleasePredecessors(SU); | |
737 | |
738 // Release all the implicit physical register defs that are live. | |
739 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); | |
740 I != E; ++I) { | |
741 // LiveRegDegs[I->getReg()] != SU when SU is a two-address node. | |
742 if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) { | |
743 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); | |
744 --NumLiveRegs; | |
745 LiveRegDefs[I->getReg()] = NULL; | |
746 LiveRegGens[I->getReg()] = NULL; | |
747 releaseInterferences(I->getReg()); | |
748 } | |
749 } | |
750 // Release the special call resource dependence, if this is the beginning | |
751 // of a call. | |
752 unsigned CallResource = TRI->getNumRegs(); | |
753 if (LiveRegDefs[CallResource] == SU) | |
754 for (const SDNode *SUNode = SU->getNode(); SUNode; | |
755 SUNode = SUNode->getGluedNode()) { | |
756 if (SUNode->isMachineOpcode() && | |
757 SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) { | |
758 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); | |
759 --NumLiveRegs; | |
760 LiveRegDefs[CallResource] = NULL; | |
761 LiveRegGens[CallResource] = NULL; | |
762 releaseInterferences(CallResource); | |
763 } | |
764 } | |
765 | |
766 resetVRegCycle(SU); | |
767 | |
768 SU->isScheduled = true; | |
769 | |
770 // Conditions under which the scheduler should eagerly advance the cycle: | |
771 // (1) No available instructions | |
772 // (2) All pipelines full, so available instructions must have hazards. | |
773 // | |
774 // If HazardRec is disabled, the cycle was pre-advanced before calling | |
775 // ReleasePredecessors. In that case, IssueCount should remain 0. | |
776 // | |
777 // Check AvailableQueue after ReleasePredecessors in case of zero latency. | |
778 if (HazardRec->isEnabled() || AvgIPC > 1) { | |
779 if (SU->getNode() && SU->getNode()->isMachineOpcode()) | |
780 ++IssueCount; | |
781 if ((HazardRec->isEnabled() && HazardRec->atIssueLimit()) | |
782 || (!HazardRec->isEnabled() && IssueCount == AvgIPC)) | |
783 AdvanceToCycle(CurCycle + 1); | |
784 } | |
785 } | |
786 | |
787 /// CapturePred - This does the opposite of ReleasePred. Since SU is being | |
788 /// unscheduled, incrcease the succ left count of its predecessors. Remove | |
789 /// them from AvailableQueue if necessary. | |
790 void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { | |
791 SUnit *PredSU = PredEdge->getSUnit(); | |
792 if (PredSU->isAvailable) { | |
793 PredSU->isAvailable = false; | |
794 if (!PredSU->isPending) | |
795 AvailableQueue->remove(PredSU); | |
796 } | |
797 | |
798 assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); | |
799 ++PredSU->NumSuccsLeft; | |
800 } | |
801 | |
802 /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and | |
803 /// its predecessor states to reflect the change. | |
804 void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { | |
805 DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: "); | |
806 DEBUG(SU->dump(this)); | |
807 | |
808 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); | |
809 I != E; ++I) { | |
810 CapturePred(&*I); | |
811 if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){ | |
812 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); | |
813 assert(LiveRegDefs[I->getReg()] == I->getSUnit() && | |
814 "Physical register dependency violated?"); | |
815 --NumLiveRegs; | |
816 LiveRegDefs[I->getReg()] = NULL; | |
817 LiveRegGens[I->getReg()] = NULL; | |
818 releaseInterferences(I->getReg()); | |
819 } | |
820 } | |
821 | |
822 // Reclaim the special call resource dependence, if this is the beginning | |
823 // of a call. | |
824 unsigned CallResource = TRI->getNumRegs(); | |
825 for (const SDNode *SUNode = SU->getNode(); SUNode; | |
826 SUNode = SUNode->getGluedNode()) { | |
827 if (SUNode->isMachineOpcode() && | |
828 SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) { | |
829 ++NumLiveRegs; | |
830 LiveRegDefs[CallResource] = SU; | |
831 LiveRegGens[CallResource] = CallSeqEndForStart[SU]; | |
832 } | |
833 } | |
834 | |
835 // Release the special call resource dependence, if this is the end | |
836 // of a call. | |
837 if (LiveRegGens[CallResource] == SU) | |
838 for (const SDNode *SUNode = SU->getNode(); SUNode; | |
839 SUNode = SUNode->getGluedNode()) { | |
840 if (SUNode->isMachineOpcode() && | |
841 SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { | |
842 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); | |
843 --NumLiveRegs; | |
844 LiveRegDefs[CallResource] = NULL; | |
845 LiveRegGens[CallResource] = NULL; | |
846 releaseInterferences(CallResource); | |
847 } | |
848 } | |
849 | |
850 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); | |
851 I != E; ++I) { | |
852 if (I->isAssignedRegDep()) { | |
853 if (!LiveRegDefs[I->getReg()]) | |
854 ++NumLiveRegs; | |
855 // This becomes the nearest def. Note that an earlier def may still be | |
856 // pending if this is a two-address node. | |
857 LiveRegDefs[I->getReg()] = SU; | |
858 if (LiveRegGens[I->getReg()] == NULL || | |
859 I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight()) | |
860 LiveRegGens[I->getReg()] = I->getSUnit(); | |
861 } | |
862 } | |
863 if (SU->getHeight() < MinAvailableCycle) | |
864 MinAvailableCycle = SU->getHeight(); | |
865 | |
866 SU->setHeightDirty(); | |
867 SU->isScheduled = false; | |
868 SU->isAvailable = true; | |
869 if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) { | |
870 // Don't make available until backtracking is complete. | |
871 SU->isPending = true; | |
872 PendingQueue.push_back(SU); | |
873 } | |
874 else { | |
875 AvailableQueue->push(SU); | |
876 } | |
877 AvailableQueue->unscheduledNode(SU); | |
878 } | |
879 | |
880 /// After backtracking, the hazard checker needs to be restored to a state | |
881 /// corresponding the current cycle. | |
882 void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() { | |
883 HazardRec->Reset(); | |
884 | |
885 unsigned LookAhead = std::min((unsigned)Sequence.size(), | |
886 HazardRec->getMaxLookAhead()); | |
887 if (LookAhead == 0) | |
888 return; | |
889 | |
890 std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead); | |
891 unsigned HazardCycle = (*I)->getHeight(); | |
892 for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) { | |
893 SUnit *SU = *I; | |
894 for (; SU->getHeight() > HazardCycle; ++HazardCycle) { | |
895 HazardRec->RecedeCycle(); | |
896 } | |
897 EmitNode(SU); | |
898 } | |
899 } | |
900 | |
901 /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in | |
902 /// BTCycle in order to schedule a specific node. | |
903 void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) { | |
904 SUnit *OldSU = Sequence.back(); | |
905 while (true) { | |
906 Sequence.pop_back(); | |
907 // FIXME: use ready cycle instead of height | |
908 CurCycle = OldSU->getHeight(); | |
909 UnscheduleNodeBottomUp(OldSU); | |
910 AvailableQueue->setCurCycle(CurCycle); | |
911 if (OldSU == BtSU) | |
912 break; | |
913 OldSU = Sequence.back(); | |
914 } | |
915 | |
916 assert(!SU->isSucc(OldSU) && "Something is wrong!"); | |
917 | |
918 RestoreHazardCheckerBottomUp(); | |
919 | |
920 ReleasePending(); | |
921 | |
922 ++NumBacktracks; | |
923 } | |
924 | |
925 static bool isOperandOf(const SUnit *SU, SDNode *N) { | |
926 for (const SDNode *SUNode = SU->getNode(); SUNode; | |
927 SUNode = SUNode->getGluedNode()) { | |
928 if (SUNode->isOperandOf(N)) | |
929 return true; | |
930 } | |
931 return false; | |
932 } | |
933 | |
934 /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled | |
935 /// successors to the newly created node. | |
936 SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { | |
937 SDNode *N = SU->getNode(); | |
938 if (!N) | |
939 return NULL; | |
940 | |
941 if (SU->getNode()->getGluedNode()) | |
942 return NULL; | |
943 | |
944 SUnit *NewSU; | |
945 bool TryUnfold = false; | |
946 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { | |
947 EVT VT = N->getValueType(i); | |
948 if (VT == MVT::Glue) | |
949 return NULL; | |
950 else if (VT == MVT::Other) | |
951 TryUnfold = true; | |
952 } | |
953 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { | |
954 const SDValue &Op = N->getOperand(i); | |
955 EVT VT = Op.getNode()->getValueType(Op.getResNo()); | |
956 if (VT == MVT::Glue) | |
957 return NULL; | |
958 } | |
959 | |
960 if (TryUnfold) { | |
961 SmallVector<SDNode*, 2> NewNodes; | |
962 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) | |
963 return NULL; | |
964 | |
965 // unfolding an x86 DEC64m operation results in store, dec, load which | |
966 // can't be handled here so quit | |
967 if (NewNodes.size() == 3) | |
968 return NULL; | |
969 | |
970 DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); | |
971 assert(NewNodes.size() == 2 && "Expected a load folding node!"); | |
972 | |
973 N = NewNodes[1]; | |
974 SDNode *LoadNode = NewNodes[0]; | |
975 unsigned NumVals = N->getNumValues(); | |
976 unsigned OldNumVals = SU->getNode()->getNumValues(); | |
977 for (unsigned i = 0; i != NumVals; ++i) | |
978 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); | |
979 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), | |
980 SDValue(LoadNode, 1)); | |
981 | |
982 // LoadNode may already exist. This can happen when there is another | |
983 // load from the same location and producing the same type of value | |
984 // but it has different alignment or volatileness. | |
985 bool isNewLoad = true; | |
986 SUnit *LoadSU; | |
987 if (LoadNode->getNodeId() != -1) { | |
988 LoadSU = &SUnits[LoadNode->getNodeId()]; | |
989 isNewLoad = false; | |
990 } else { | |
991 LoadSU = CreateNewSUnit(LoadNode); | |
992 LoadNode->setNodeId(LoadSU->NodeNum); | |
993 | |
994 InitNumRegDefsLeft(LoadSU); | |
995 computeLatency(LoadSU); | |
996 } | |
997 | |
998 SUnit *NewSU = CreateNewSUnit(N); | |
999 assert(N->getNodeId() == -1 && "Node already inserted!"); | |
1000 N->setNodeId(NewSU->NodeNum); | |
1001 | |
1002 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); | |
1003 for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { | |
1004 if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { | |
1005 NewSU->isTwoAddress = true; | |
1006 break; | |
1007 } | |
1008 } | |
1009 if (MCID.isCommutable()) | |
1010 NewSU->isCommutable = true; | |
1011 | |
1012 InitNumRegDefsLeft(NewSU); | |
1013 computeLatency(NewSU); | |
1014 | |
1015 // Record all the edges to and from the old SU, by category. | |
1016 SmallVector<SDep, 4> ChainPreds; | |
1017 SmallVector<SDep, 4> ChainSuccs; | |
1018 SmallVector<SDep, 4> LoadPreds; | |
1019 SmallVector<SDep, 4> NodePreds; | |
1020 SmallVector<SDep, 4> NodeSuccs; | |
1021 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); | |
1022 I != E; ++I) { | |
1023 if (I->isCtrl()) | |
1024 ChainPreds.push_back(*I); | |
1025 else if (isOperandOf(I->getSUnit(), LoadNode)) | |
1026 LoadPreds.push_back(*I); | |
1027 else | |
1028 NodePreds.push_back(*I); | |
1029 } | |
1030 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); | |
1031 I != E; ++I) { | |
1032 if (I->isCtrl()) | |
1033 ChainSuccs.push_back(*I); | |
1034 else | |
1035 NodeSuccs.push_back(*I); | |
1036 } | |
1037 | |
1038 // Now assign edges to the newly-created nodes. | |
1039 for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) { | |
1040 const SDep &Pred = ChainPreds[i]; | |
1041 RemovePred(SU, Pred); | |
1042 if (isNewLoad) | |
1043 AddPred(LoadSU, Pred); | |
1044 } | |
1045 for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { | |
1046 const SDep &Pred = LoadPreds[i]; | |
1047 RemovePred(SU, Pred); | |
1048 if (isNewLoad) | |
1049 AddPred(LoadSU, Pred); | |
1050 } | |
1051 for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { | |
1052 const SDep &Pred = NodePreds[i]; | |
1053 RemovePred(SU, Pred); | |
1054 AddPred(NewSU, Pred); | |
1055 } | |
1056 for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { | |
1057 SDep D = NodeSuccs[i]; | |
1058 SUnit *SuccDep = D.getSUnit(); | |
1059 D.setSUnit(SU); | |
1060 RemovePred(SuccDep, D); | |
1061 D.setSUnit(NewSU); | |
1062 AddPred(SuccDep, D); | |
1063 // Balance register pressure. | |
1064 if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled | |
1065 && !D.isCtrl() && NewSU->NumRegDefsLeft > 0) | |
1066 --NewSU->NumRegDefsLeft; | |
1067 } | |
1068 for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { | |
1069 SDep D = ChainSuccs[i]; | |
1070 SUnit *SuccDep = D.getSUnit(); | |
1071 D.setSUnit(SU); | |
1072 RemovePred(SuccDep, D); | |
1073 if (isNewLoad) { | |
1074 D.setSUnit(LoadSU); | |
1075 AddPred(SuccDep, D); | |
1076 } | |
1077 } | |
1078 | |
1079 // Add a data dependency to reflect that NewSU reads the value defined | |
1080 // by LoadSU. | |
1081 SDep D(LoadSU, SDep::Data, 0); | |
1082 D.setLatency(LoadSU->Latency); | |
1083 AddPred(NewSU, D); | |
1084 | |
1085 if (isNewLoad) | |
1086 AvailableQueue->addNode(LoadSU); | |
1087 AvailableQueue->addNode(NewSU); | |
1088 | |
1089 ++NumUnfolds; | |
1090 | |
1091 if (NewSU->NumSuccsLeft == 0) { | |
1092 NewSU->isAvailable = true; | |
1093 return NewSU; | |
1094 } | |
1095 SU = NewSU; | |
1096 } | |
1097 | |
1098 DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n"); | |
1099 NewSU = CreateClone(SU); | |
1100 | |
1101 // New SUnit has the exact same predecessors. | |
1102 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); | |
1103 I != E; ++I) | |
1104 if (!I->isArtificial()) | |
1105 AddPred(NewSU, *I); | |
1106 | |
1107 // Only copy scheduled successors. Cut them from old node's successor | |
1108 // list and move them over. | |
1109 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; | |
1110 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); | |
1111 I != E; ++I) { | |
1112 if (I->isArtificial()) | |
1113 continue; | |
1114 SUnit *SuccSU = I->getSUnit(); | |
1115 if (SuccSU->isScheduled) { | |
1116 SDep D = *I; | |
1117 D.setSUnit(NewSU); | |
1118 AddPred(SuccSU, D); | |
1119 D.setSUnit(SU); | |
1120 DelDeps.push_back(std::make_pair(SuccSU, D)); | |
1121 } | |
1122 } | |
1123 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) | |
1124 RemovePred(DelDeps[i].first, DelDeps[i].second); | |
1125 | |
1126 AvailableQueue->updateNode(SU); | |
1127 AvailableQueue->addNode(NewSU); | |
1128 | |
1129 ++NumDups; | |
1130 return NewSU; | |
1131 } | |
1132 | |
1133 /// InsertCopiesAndMoveSuccs - Insert register copies and move all | |
1134 /// scheduled successors of the given SUnit to the last copy. | |
1135 void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, | |
1136 const TargetRegisterClass *DestRC, | |
1137 const TargetRegisterClass *SrcRC, | |
1138 SmallVectorImpl<SUnit*> &Copies) { | |
1139 SUnit *CopyFromSU = CreateNewSUnit(NULL); | |
1140 CopyFromSU->CopySrcRC = SrcRC; | |
1141 CopyFromSU->CopyDstRC = DestRC; | |
1142 | |
1143 SUnit *CopyToSU = CreateNewSUnit(NULL); | |
1144 CopyToSU->CopySrcRC = DestRC; | |
1145 CopyToSU->CopyDstRC = SrcRC; | |
1146 | |
1147 // Only copy scheduled successors. Cut them from old node's successor | |
1148 // list and move them over. | |
1149 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; | |
1150 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); | |
1151 I != E; ++I) { | |
1152 if (I->isArtificial()) | |
1153 continue; | |
1154 SUnit *SuccSU = I->getSUnit(); | |
1155 if (SuccSU->isScheduled) { | |
1156 SDep D = *I; | |
1157 D.setSUnit(CopyToSU); | |
1158 AddPred(SuccSU, D); | |
1159 DelDeps.push_back(std::make_pair(SuccSU, *I)); | |
1160 } | |
1161 else { | |
1162 // Avoid scheduling the def-side copy before other successors. Otherwise | |
1163 // we could introduce another physreg interference on the copy and | |
1164 // continue inserting copies indefinitely. | |
1165 AddPred(SuccSU, SDep(CopyFromSU, SDep::Artificial)); | |
1166 } | |
1167 } | |
1168 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) | |
1169 RemovePred(DelDeps[i].first, DelDeps[i].second); | |
1170 | |
1171 SDep FromDep(SU, SDep::Data, Reg); | |
1172 FromDep.setLatency(SU->Latency); | |
1173 AddPred(CopyFromSU, FromDep); | |
1174 SDep ToDep(CopyFromSU, SDep::Data, 0); | |
1175 ToDep.setLatency(CopyFromSU->Latency); | |
1176 AddPred(CopyToSU, ToDep); | |
1177 | |
1178 AvailableQueue->updateNode(SU); | |
1179 AvailableQueue->addNode(CopyFromSU); | |
1180 AvailableQueue->addNode(CopyToSU); | |
1181 Copies.push_back(CopyFromSU); | |
1182 Copies.push_back(CopyToSU); | |
1183 | |
1184 ++NumPRCopies; | |
1185 } | |
1186 | |
1187 /// getPhysicalRegisterVT - Returns the ValueType of the physical register | |
1188 /// definition of the specified node. | |
1189 /// FIXME: Move to SelectionDAG? | |
1190 static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, | |
1191 const TargetInstrInfo *TII) { | |
1192 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); | |
1193 assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); | |
1194 unsigned NumRes = MCID.getNumDefs(); | |
1195 for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { | |
1196 if (Reg == *ImpDef) | |
1197 break; | |
1198 ++NumRes; | |
1199 } | |
1200 return N->getValueType(NumRes); | |
1201 } | |
1202 | |
1203 /// CheckForLiveRegDef - Return true and update live register vector if the | |
1204 /// specified register def of the specified SUnit clobbers any "live" registers. | |
1205 static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, | |
1206 std::vector<SUnit*> &LiveRegDefs, | |
1207 SmallSet<unsigned, 4> &RegAdded, | |
1208 SmallVectorImpl<unsigned> &LRegs, | |
1209 const TargetRegisterInfo *TRI) { | |
1210 for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) { | |
1211 | |
1212 // Check if Ref is live. | |
1213 if (!LiveRegDefs[*AliasI]) continue; | |
1214 | |
1215 // Allow multiple uses of the same def. | |
1216 if (LiveRegDefs[*AliasI] == SU) continue; | |
1217 | |
1218 // Add Reg to the set of interfering live regs. | |
1219 if (RegAdded.insert(*AliasI)) { | |
1220 LRegs.push_back(*AliasI); | |
1221 } | |
1222 } | |
1223 } | |
1224 | |
1225 /// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered | |
1226 /// by RegMask, and add them to LRegs. | |
1227 static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask, | |
1228 std::vector<SUnit*> &LiveRegDefs, | |
1229 SmallSet<unsigned, 4> &RegAdded, | |
1230 SmallVectorImpl<unsigned> &LRegs) { | |
1231 // Look at all live registers. Skip Reg0 and the special CallResource. | |
1232 for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) { | |
1233 if (!LiveRegDefs[i]) continue; | |
1234 if (LiveRegDefs[i] == SU) continue; | |
1235 if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue; | |
1236 if (RegAdded.insert(i)) | |
1237 LRegs.push_back(i); | |
1238 } | |
1239 } | |
1240 | |
1241 /// getNodeRegMask - Returns the register mask attached to an SDNode, if any. | |
1242 static const uint32_t *getNodeRegMask(const SDNode *N) { | |
1243 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) | |
1244 if (const RegisterMaskSDNode *Op = | |
1245 dyn_cast<RegisterMaskSDNode>(N->getOperand(i).getNode())) | |
1246 return Op->getRegMask(); | |
1247 return NULL; | |
1248 } | |
1249 | |
1250 /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay | |
1251 /// scheduling of the given node to satisfy live physical register dependencies. | |
1252 /// If the specific node is the last one that's available to schedule, do | |
1253 /// whatever is necessary (i.e. backtracking or cloning) to make it possible. | |
1254 bool ScheduleDAGRRList:: | |
1255 DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) { | |
1256 if (NumLiveRegs == 0) | |
1257 return false; | |
1258 | |
1259 SmallSet<unsigned, 4> RegAdded; | |
1260 // If this node would clobber any "live" register, then it's not ready. | |
1261 // | |
1262 // If SU is the currently live definition of the same register that it uses, | |
1263 // then we are free to schedule it. | |
1264 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); | |
1265 I != E; ++I) { | |
1266 if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU) | |
1267 CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs, | |
1268 RegAdded, LRegs, TRI); | |
1269 } | |
1270 | |
1271 for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) { | |
1272 if (Node->getOpcode() == ISD::INLINEASM) { | |
1273 // Inline asm can clobber physical defs. | |
1274 unsigned NumOps = Node->getNumOperands(); | |
1275 if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) | |
1276 --NumOps; // Ignore the glue operand. | |
1277 | |
1278 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { | |
1279 unsigned Flags = | |
1280 cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); | |
1281 unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); | |
1282 | |
1283 ++i; // Skip the ID value. | |
1284 if (InlineAsm::isRegDefKind(Flags) || | |
1285 InlineAsm::isRegDefEarlyClobberKind(Flags) || | |
1286 InlineAsm::isClobberKind(Flags)) { | |
1287 // Check for def of register or earlyclobber register. | |
1288 for (; NumVals; --NumVals, ++i) { | |
1289 unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); | |
1290 if (TargetRegisterInfo::isPhysicalRegister(Reg)) | |
1291 CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); | |
1292 } | |
1293 } else | |
1294 i += NumVals; | |
1295 } | |
1296 continue; | |
1297 } | |
1298 | |
1299 if (!Node->isMachineOpcode()) | |
1300 continue; | |
1301 // If we're in the middle of scheduling a call, don't begin scheduling | |
1302 // another call. Also, don't allow any physical registers to be live across | |
1303 // the call. | |
1304 if (Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { | |
1305 // Check the special calling-sequence resource. | |
1306 unsigned CallResource = TRI->getNumRegs(); | |
1307 if (LiveRegDefs[CallResource]) { | |
1308 SDNode *Gen = LiveRegGens[CallResource]->getNode(); | |
1309 while (SDNode *Glued = Gen->getGluedNode()) | |
1310 Gen = Glued; | |
1311 if (!IsChainDependent(Gen, Node, 0, TII) && RegAdded.insert(CallResource)) | |
1312 LRegs.push_back(CallResource); | |
1313 } | |
1314 } | |
1315 if (const uint32_t *RegMask = getNodeRegMask(Node)) | |
1316 CheckForLiveRegDefMasked(SU, RegMask, LiveRegDefs, RegAdded, LRegs); | |
1317 | |
1318 const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); | |
1319 if (!MCID.ImplicitDefs) | |
1320 continue; | |
1321 for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) | |
1322 CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); | |
1323 } | |
1324 | |
1325 return !LRegs.empty(); | |
1326 } | |
1327 | |
1328 void ScheduleDAGRRList::releaseInterferences(unsigned Reg) { | |
1329 // Add the nodes that aren't ready back onto the available list. | |
1330 for (unsigned i = Interferences.size(); i > 0; --i) { | |
1331 SUnit *SU = Interferences[i-1]; | |
1332 LRegsMapT::iterator LRegsPos = LRegsMap.find(SU); | |
1333 if (Reg) { | |
1334 SmallVectorImpl<unsigned> &LRegs = LRegsPos->second; | |
1335 if (std::find(LRegs.begin(), LRegs.end(), Reg) == LRegs.end()) | |
1336 continue; | |
1337 } | |
1338 SU->isPending = false; | |
1339 // The interfering node may no longer be available due to backtracking. | |
1340 // Furthermore, it may have been made available again, in which case it is | |
1341 // now already in the AvailableQueue. | |
1342 if (SU->isAvailable && !SU->NodeQueueId) { | |
1343 DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << '\n'); | |
1344 AvailableQueue->push(SU); | |
1345 } | |
1346 if (i < Interferences.size()) | |
1347 Interferences[i-1] = Interferences.back(); | |
1348 Interferences.pop_back(); | |
1349 LRegsMap.erase(LRegsPos); | |
1350 } | |
1351 } | |
1352 | |
1353 /// Return a node that can be scheduled in this cycle. Requirements: | |
1354 /// (1) Ready: latency has been satisfied | |
1355 /// (2) No Hazards: resources are available | |
1356 /// (3) No Interferences: may unschedule to break register interferences. | |
1357 SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { | |
1358 SUnit *CurSU = AvailableQueue->empty() ? 0 : AvailableQueue->pop(); | |
1359 while (CurSU) { | |
1360 SmallVector<unsigned, 4> LRegs; | |
1361 if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) | |
1362 break; | |
1363 DEBUG(dbgs() << " Interfering reg " << | |
1364 (LRegs[0] == TRI->getNumRegs() ? "CallResource" | |
1365 : TRI->getName(LRegs[0])) | |
1366 << " SU #" << CurSU->NodeNum << '\n'); | |
1367 std::pair<LRegsMapT::iterator, bool> LRegsPair = | |
1368 LRegsMap.insert(std::make_pair(CurSU, LRegs)); | |
1369 if (LRegsPair.second) { | |
1370 CurSU->isPending = true; // This SU is not in AvailableQueue right now. | |
1371 Interferences.push_back(CurSU); | |
1372 } | |
1373 else { | |
1374 assert(CurSU->isPending && "Intereferences are pending"); | |
1375 // Update the interference with current live regs. | |
1376 LRegsPair.first->second = LRegs; | |
1377 } | |
1378 CurSU = AvailableQueue->pop(); | |
1379 } | |
1380 if (CurSU) | |
1381 return CurSU; | |
1382 | |
1383 // All candidates are delayed due to live physical reg dependencies. | |
1384 // Try backtracking, code duplication, or inserting cross class copies | |
1385 // to resolve it. | |
1386 for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { | |
1387 SUnit *TrySU = Interferences[i]; | |
1388 SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU]; | |
1389 | |
1390 // Try unscheduling up to the point where it's safe to schedule | |
1391 // this node. | |
1392 SUnit *BtSU = NULL; | |
1393 unsigned LiveCycle = UINT_MAX; | |
1394 for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { | |
1395 unsigned Reg = LRegs[j]; | |
1396 if (LiveRegGens[Reg]->getHeight() < LiveCycle) { | |
1397 BtSU = LiveRegGens[Reg]; | |
1398 LiveCycle = BtSU->getHeight(); | |
1399 } | |
1400 } | |
1401 if (!WillCreateCycle(TrySU, BtSU)) { | |
1402 // BacktrackBottomUp mutates Interferences! | |
1403 BacktrackBottomUp(TrySU, BtSU); | |
1404 | |
1405 // Force the current node to be scheduled before the node that | |
1406 // requires the physical reg dep. | |
1407 if (BtSU->isAvailable) { | |
1408 BtSU->isAvailable = false; | |
1409 if (!BtSU->isPending) | |
1410 AvailableQueue->remove(BtSU); | |
1411 } | |
1412 DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum << ") to SU(" | |
1413 << TrySU->NodeNum << ")\n"); | |
1414 AddPred(TrySU, SDep(BtSU, SDep::Artificial)); | |
1415 | |
1416 // If one or more successors has been unscheduled, then the current | |
1417 // node is no longer available. | |
1418 if (!TrySU->isAvailable) | |
1419 CurSU = AvailableQueue->pop(); | |
1420 else { | |
1421 AvailableQueue->remove(TrySU); | |
1422 CurSU = TrySU; | |
1423 } | |
1424 // Interferences has been mutated. We must break. | |
1425 break; | |
1426 } | |
1427 } | |
1428 | |
1429 if (!CurSU) { | |
1430 // Can't backtrack. If it's too expensive to copy the value, then try | |
1431 // duplicate the nodes that produces these "too expensive to copy" | |
1432 // values to break the dependency. In case even that doesn't work, | |
1433 // insert cross class copies. | |
1434 // If it's not too expensive, i.e. cost != -1, issue copies. | |
1435 SUnit *TrySU = Interferences[0]; | |
1436 SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU]; | |
1437 assert(LRegs.size() == 1 && "Can't handle this yet!"); | |
1438 unsigned Reg = LRegs[0]; | |
1439 SUnit *LRDef = LiveRegDefs[Reg]; | |
1440 EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); | |
1441 const TargetRegisterClass *RC = | |
1442 TRI->getMinimalPhysRegClass(Reg, VT); | |
1443 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); | |
1444 | |
1445 // If cross copy register class is the same as RC, then it must be possible | |
1446 // copy the value directly. Do not try duplicate the def. | |
1447 // If cross copy register class is not the same as RC, then it's possible to | |
1448 // copy the value but it require cross register class copies and it is | |
1449 // expensive. | |
1450 // If cross copy register class is null, then it's not possible to copy | |
1451 // the value at all. | |
1452 SUnit *NewDef = 0; | |
1453 if (DestRC != RC) { | |
1454 NewDef = CopyAndMoveSuccessors(LRDef); | |
1455 if (!DestRC && !NewDef) | |
1456 report_fatal_error("Can't handle live physical register dependency!"); | |
1457 } | |
1458 if (!NewDef) { | |
1459 // Issue copies, these can be expensive cross register class copies. | |
1460 SmallVector<SUnit*, 2> Copies; | |
1461 InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); | |
1462 DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum | |
1463 << " to SU #" << Copies.front()->NodeNum << "\n"); | |
1464 AddPred(TrySU, SDep(Copies.front(), SDep::Artificial)); | |
1465 NewDef = Copies.back(); | |
1466 } | |
1467 | |
1468 DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum | |
1469 << " to SU #" << TrySU->NodeNum << "\n"); | |
1470 LiveRegDefs[Reg] = NewDef; | |
1471 AddPred(NewDef, SDep(TrySU, SDep::Artificial)); | |
1472 TrySU->isAvailable = false; | |
1473 CurSU = NewDef; | |
1474 } | |
1475 assert(CurSU && "Unable to resolve live physical register dependencies!"); | |
1476 return CurSU; | |
1477 } | |
1478 | |
1479 /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up | |
1480 /// schedulers. | |
1481 void ScheduleDAGRRList::ListScheduleBottomUp() { | |
1482 // Release any predecessors of the special Exit node. | |
1483 ReleasePredecessors(&ExitSU); | |
1484 | |
1485 // Add root to Available queue. | |
1486 if (!SUnits.empty()) { | |
1487 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; | |
1488 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); | |
1489 RootSU->isAvailable = true; | |
1490 AvailableQueue->push(RootSU); | |
1491 } | |
1492 | |
1493 // While Available queue is not empty, grab the node with the highest | |
1494 // priority. If it is not ready put it back. Schedule the node. | |
1495 Sequence.reserve(SUnits.size()); | |
1496 while (!AvailableQueue->empty() || !Interferences.empty()) { | |
1497 DEBUG(dbgs() << "\nExamining Available:\n"; | |
1498 AvailableQueue->dump(this)); | |
1499 | |
1500 // Pick the best node to schedule taking all constraints into | |
1501 // consideration. | |
1502 SUnit *SU = PickNodeToScheduleBottomUp(); | |
1503 | |
1504 AdvancePastStalls(SU); | |
1505 | |
1506 ScheduleNodeBottomUp(SU); | |
1507 | |
1508 while (AvailableQueue->empty() && !PendingQueue.empty()) { | |
1509 // Advance the cycle to free resources. Skip ahead to the next ready SU. | |
1510 assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized"); | |
1511 AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle)); | |
1512 } | |
1513 } | |
1514 | |
1515 // Reverse the order if it is bottom up. | |
1516 std::reverse(Sequence.begin(), Sequence.end()); | |
1517 | |
1518 #ifndef NDEBUG | |
1519 VerifyScheduledSequence(/*isBottomUp=*/true); | |
1520 #endif | |
1521 } | |
1522 | |
1523 //===----------------------------------------------------------------------===// | |
1524 // RegReductionPriorityQueue Definition | |
1525 //===----------------------------------------------------------------------===// | |
1526 // | |
1527 // This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers | |
1528 // to reduce register pressure. | |
1529 // | |
1530 namespace { | |
1531 class RegReductionPQBase; | |
1532 | |
1533 struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> { | |
1534 bool isReady(SUnit* SU, unsigned CurCycle) const { return true; } | |
1535 }; | |
1536 | |
1537 #ifndef NDEBUG | |
1538 template<class SF> | |
1539 struct reverse_sort : public queue_sort { | |
1540 SF &SortFunc; | |
1541 reverse_sort(SF &sf) : SortFunc(sf) {} | |
1542 reverse_sort(const reverse_sort &RHS) : SortFunc(RHS.SortFunc) {} | |
1543 | |
1544 bool operator()(SUnit* left, SUnit* right) const { | |
1545 // reverse left/right rather than simply !SortFunc(left, right) | |
1546 // to expose different paths in the comparison logic. | |
1547 return SortFunc(right, left); | |
1548 } | |
1549 }; | |
1550 #endif // NDEBUG | |
1551 | |
1552 /// bu_ls_rr_sort - Priority function for bottom up register pressure | |
1553 // reduction scheduler. | |
1554 struct bu_ls_rr_sort : public queue_sort { | |
1555 enum { | |
1556 IsBottomUp = true, | |
1557 HasReadyFilter = false | |
1558 }; | |
1559 | |
1560 RegReductionPQBase *SPQ; | |
1561 bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} | |
1562 bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} | |
1563 | |
1564 bool operator()(SUnit* left, SUnit* right) const; | |
1565 }; | |
1566 | |
1567 // src_ls_rr_sort - Priority function for source order scheduler. | |
1568 struct src_ls_rr_sort : public queue_sort { | |
1569 enum { | |
1570 IsBottomUp = true, | |
1571 HasReadyFilter = false | |
1572 }; | |
1573 | |
1574 RegReductionPQBase *SPQ; | |
1575 src_ls_rr_sort(RegReductionPQBase *spq) | |
1576 : SPQ(spq) {} | |
1577 src_ls_rr_sort(const src_ls_rr_sort &RHS) | |
1578 : SPQ(RHS.SPQ) {} | |
1579 | |
1580 bool operator()(SUnit* left, SUnit* right) const; | |
1581 }; | |
1582 | |
1583 // hybrid_ls_rr_sort - Priority function for hybrid scheduler. | |
1584 struct hybrid_ls_rr_sort : public queue_sort { | |
1585 enum { | |
1586 IsBottomUp = true, | |
1587 HasReadyFilter = false | |
1588 }; | |
1589 | |
1590 RegReductionPQBase *SPQ; | |
1591 hybrid_ls_rr_sort(RegReductionPQBase *spq) | |
1592 : SPQ(spq) {} | |
1593 hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS) | |
1594 : SPQ(RHS.SPQ) {} | |
1595 | |
1596 bool isReady(SUnit *SU, unsigned CurCycle) const; | |
1597 | |
1598 bool operator()(SUnit* left, SUnit* right) const; | |
1599 }; | |
1600 | |
1601 // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism) | |
1602 // scheduler. | |
1603 struct ilp_ls_rr_sort : public queue_sort { | |
1604 enum { | |
1605 IsBottomUp = true, | |
1606 HasReadyFilter = false | |
1607 }; | |
1608 | |
1609 RegReductionPQBase *SPQ; | |
1610 ilp_ls_rr_sort(RegReductionPQBase *spq) | |
1611 : SPQ(spq) {} | |
1612 ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS) | |
1613 : SPQ(RHS.SPQ) {} | |
1614 | |
1615 bool isReady(SUnit *SU, unsigned CurCycle) const; | |
1616 | |
1617 bool operator()(SUnit* left, SUnit* right) const; | |
1618 }; | |
1619 | |
1620 class RegReductionPQBase : public SchedulingPriorityQueue { | |
1621 protected: | |
1622 std::vector<SUnit*> Queue; | |
1623 unsigned CurQueueId; | |
1624 bool TracksRegPressure; | |
1625 bool SrcOrder; | |
1626 | |
1627 // SUnits - The SUnits for the current graph. | |
1628 std::vector<SUnit> *SUnits; | |
1629 | |
1630 MachineFunction &MF; | |
1631 const TargetInstrInfo *TII; | |
1632 const TargetRegisterInfo *TRI; | |
1633 const TargetLowering *TLI; | |
1634 ScheduleDAGRRList *scheduleDAG; | |
1635 | |
1636 // SethiUllmanNumbers - The SethiUllman number for each node. | |
1637 std::vector<unsigned> SethiUllmanNumbers; | |
1638 | |
1639 /// RegPressure - Tracking current reg pressure per register class. | |
1640 /// | |
1641 std::vector<unsigned> RegPressure; | |
1642 | |
1643 /// RegLimit - Tracking the number of allocatable registers per register | |
1644 /// class. | |
1645 std::vector<unsigned> RegLimit; | |
1646 | |
1647 public: | |
1648 RegReductionPQBase(MachineFunction &mf, | |
1649 bool hasReadyFilter, | |
1650 bool tracksrp, | |
1651 bool srcorder, | |
1652 const TargetInstrInfo *tii, | |
1653 const TargetRegisterInfo *tri, | |
1654 const TargetLowering *tli) | |
1655 : SchedulingPriorityQueue(hasReadyFilter), | |
1656 CurQueueId(0), TracksRegPressure(tracksrp), SrcOrder(srcorder), | |
1657 MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) { | |
1658 if (TracksRegPressure) { | |
1659 unsigned NumRC = TRI->getNumRegClasses(); | |
1660 RegLimit.resize(NumRC); | |
1661 RegPressure.resize(NumRC); | |
1662 std::fill(RegLimit.begin(), RegLimit.end(), 0); | |
1663 std::fill(RegPressure.begin(), RegPressure.end(), 0); | |
1664 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), | |
1665 E = TRI->regclass_end(); I != E; ++I) | |
1666 RegLimit[(*I)->getID()] = tri->getRegPressureLimit(*I, MF); | |
1667 } | |
1668 } | |
1669 | |
1670 void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { | |
1671 scheduleDAG = scheduleDag; | |
1672 } | |
1673 | |
1674 ScheduleHazardRecognizer* getHazardRec() { | |
1675 return scheduleDAG->getHazardRec(); | |
1676 } | |
1677 | |
1678 void initNodes(std::vector<SUnit> &sunits); | |
1679 | |
1680 void addNode(const SUnit *SU); | |
1681 | |
1682 void updateNode(const SUnit *SU); | |
1683 | |
1684 void releaseState() { | |
1685 SUnits = 0; | |
1686 SethiUllmanNumbers.clear(); | |
1687 std::fill(RegPressure.begin(), RegPressure.end(), 0); | |
1688 } | |
1689 | |
1690 unsigned getNodePriority(const SUnit *SU) const; | |
1691 | |
1692 unsigned getNodeOrdering(const SUnit *SU) const { | |
1693 if (!SU->getNode()) return 0; | |
1694 | |
1695 return SU->getNode()->getIROrder(); | |
1696 } | |
1697 | |
1698 bool empty() const { return Queue.empty(); } | |
1699 | |
1700 void push(SUnit *U) { | |
1701 assert(!U->NodeQueueId && "Node in the queue already"); | |
1702 U->NodeQueueId = ++CurQueueId; | |
1703 Queue.push_back(U); | |
1704 } | |
1705 | |
1706 void remove(SUnit *SU) { | |
1707 assert(!Queue.empty() && "Queue is empty!"); | |
1708 assert(SU->NodeQueueId != 0 && "Not in queue!"); | |
1709 std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), | |
1710 SU); | |
1711 if (I != prior(Queue.end())) | |
1712 std::swap(*I, Queue.back()); | |
1713 Queue.pop_back(); | |
1714 SU->NodeQueueId = 0; | |
1715 } | |
1716 | |
1717 bool tracksRegPressure() const { return TracksRegPressure; } | |
1718 | |
1719 void dumpRegPressure() const; | |
1720 | |
1721 bool HighRegPressure(const SUnit *SU) const; | |
1722 | |
1723 bool MayReduceRegPressure(SUnit *SU) const; | |
1724 | |
1725 int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const; | |
1726 | |
1727 void scheduledNode(SUnit *SU); | |
1728 | |
1729 void unscheduledNode(SUnit *SU); | |
1730 | |
1731 protected: | |
1732 bool canClobber(const SUnit *SU, const SUnit *Op); | |
1733 void AddPseudoTwoAddrDeps(); | |
1734 void PrescheduleNodesWithMultipleUses(); | |
1735 void CalculateSethiUllmanNumbers(); | |
1736 }; | |
1737 | |
1738 template<class SF> | |
1739 static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) { | |
1740 std::vector<SUnit *>::iterator Best = Q.begin(); | |
1741 for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()), | |
1742 E = Q.end(); I != E; ++I) | |
1743 if (Picker(*Best, *I)) | |
1744 Best = I; | |
1745 SUnit *V = *Best; | |
1746 if (Best != prior(Q.end())) | |
1747 std::swap(*Best, Q.back()); | |
1748 Q.pop_back(); | |
1749 return V; | |
1750 } | |
1751 | |
1752 template<class SF> | |
1753 SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) { | |
1754 #ifndef NDEBUG | |
1755 if (DAG->StressSched) { | |
1756 reverse_sort<SF> RPicker(Picker); | |
1757 return popFromQueueImpl(Q, RPicker); | |
1758 } | |
1759 #endif | |
1760 (void)DAG; | |
1761 return popFromQueueImpl(Q, Picker); | |
1762 } | |
1763 | |
1764 template<class SF> | |
1765 class RegReductionPriorityQueue : public RegReductionPQBase { | |
1766 SF Picker; | |
1767 | |
1768 public: | |
1769 RegReductionPriorityQueue(MachineFunction &mf, | |
1770 bool tracksrp, | |
1771 bool srcorder, | |
1772 const TargetInstrInfo *tii, | |
1773 const TargetRegisterInfo *tri, | |
1774 const TargetLowering *tli) | |
1775 : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder, | |
1776 tii, tri, tli), | |
1777 Picker(this) {} | |
1778 | |
1779 bool isBottomUp() const { return SF::IsBottomUp; } | |
1780 | |
1781 bool isReady(SUnit *U) const { | |
1782 return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle()); | |
1783 } | |
1784 | |
1785 SUnit *pop() { | |
1786 if (Queue.empty()) return NULL; | |
1787 | |
1788 SUnit *V = popFromQueue(Queue, Picker, scheduleDAG); | |
1789 V->NodeQueueId = 0; | |
1790 return V; | |
1791 } | |
1792 | |
1793 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | |
1794 void dump(ScheduleDAG *DAG) const { | |
1795 // Emulate pop() without clobbering NodeQueueIds. | |
1796 std::vector<SUnit*> DumpQueue = Queue; | |
1797 SF DumpPicker = Picker; | |
1798 while (!DumpQueue.empty()) { | |
1799 SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG); | |
1800 dbgs() << "Height " << SU->getHeight() << ": "; | |
1801 SU->dump(DAG); | |
1802 } | |
1803 } | |
1804 #endif | |
1805 }; | |
1806 | |
1807 typedef RegReductionPriorityQueue<bu_ls_rr_sort> | |
1808 BURegReductionPriorityQueue; | |
1809 | |
1810 typedef RegReductionPriorityQueue<src_ls_rr_sort> | |
1811 SrcRegReductionPriorityQueue; | |
1812 | |
1813 typedef RegReductionPriorityQueue<hybrid_ls_rr_sort> | |
1814 HybridBURRPriorityQueue; | |
1815 | |
1816 typedef RegReductionPriorityQueue<ilp_ls_rr_sort> | |
1817 ILPBURRPriorityQueue; | |
1818 } // end anonymous namespace | |
1819 | |
1820 //===----------------------------------------------------------------------===// | |
1821 // Static Node Priority for Register Pressure Reduction | |
1822 //===----------------------------------------------------------------------===// | |
1823 | |
1824 // Check for special nodes that bypass scheduling heuristics. | |
1825 // Currently this pushes TokenFactor nodes down, but may be used for other | |
1826 // pseudo-ops as well. | |
1827 // | |
1828 // Return -1 to schedule right above left, 1 for left above right. | |
1829 // Return 0 if no bias exists. | |
1830 static int checkSpecialNodes(const SUnit *left, const SUnit *right) { | |
1831 bool LSchedLow = left->isScheduleLow; | |
1832 bool RSchedLow = right->isScheduleLow; | |
1833 if (LSchedLow != RSchedLow) | |
1834 return LSchedLow < RSchedLow ? 1 : -1; | |
1835 return 0; | |
1836 } | |
1837 | |
1838 /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. | |
1839 /// Smaller number is the higher priority. | |
1840 static unsigned | |
1841 CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { | |
1842 unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum]; | |
1843 if (SethiUllmanNumber != 0) | |
1844 return SethiUllmanNumber; | |
1845 | |
1846 unsigned Extra = 0; | |
1847 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); | |
1848 I != E; ++I) { | |
1849 if (I->isCtrl()) continue; // ignore chain preds | |
1850 SUnit *PredSU = I->getSUnit(); | |
1851 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers); | |
1852 if (PredSethiUllman > SethiUllmanNumber) { | |
1853 SethiUllmanNumber = PredSethiUllman; | |
1854 Extra = 0; | |
1855 } else if (PredSethiUllman == SethiUllmanNumber) | |
1856 ++Extra; | |
1857 } | |
1858 | |
1859 SethiUllmanNumber += Extra; | |
1860 | |
1861 if (SethiUllmanNumber == 0) | |
1862 SethiUllmanNumber = 1; | |
1863 | |
1864 return SethiUllmanNumber; | |
1865 } | |
1866 | |
1867 /// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all | |
1868 /// scheduling units. | |
1869 void RegReductionPQBase::CalculateSethiUllmanNumbers() { | |
1870 SethiUllmanNumbers.assign(SUnits->size(), 0); | |
1871 | |
1872 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) | |
1873 CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); | |
1874 } | |
1875 | |
1876 void RegReductionPQBase::addNode(const SUnit *SU) { | |
1877 unsigned SUSize = SethiUllmanNumbers.size(); | |
1878 if (SUnits->size() > SUSize) | |
1879 SethiUllmanNumbers.resize(SUSize*2, 0); | |
1880 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); | |
1881 } | |
1882 | |
1883 void RegReductionPQBase::updateNode(const SUnit *SU) { | |
1884 SethiUllmanNumbers[SU->NodeNum] = 0; | |
1885 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); | |
1886 } | |
1887 | |
1888 // Lower priority means schedule further down. For bottom-up scheduling, lower | |
1889 // priority SUs are scheduled before higher priority SUs. | |
1890 unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const { | |
1891 assert(SU->NodeNum < SethiUllmanNumbers.size()); | |
1892 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; | |
1893 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) | |
1894 // CopyToReg should be close to its uses to facilitate coalescing and | |
1895 // avoid spilling. | |
1896 return 0; | |
1897 if (Opc == TargetOpcode::EXTRACT_SUBREG || | |
1898 Opc == TargetOpcode::SUBREG_TO_REG || | |
1899 Opc == TargetOpcode::INSERT_SUBREG) | |
1900 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be | |
1901 // close to their uses to facilitate coalescing. | |
1902 return 0; | |
1903 if (SU->NumSuccs == 0 && SU->NumPreds != 0) | |
1904 // If SU does not have a register use, i.e. it doesn't produce a value | |
1905 // that would be consumed (e.g. store), then it terminates a chain of | |
1906 // computation. Give it a large SethiUllman number so it will be | |
1907 // scheduled right before its predecessors that it doesn't lengthen | |
1908 // their live ranges. | |
1909 return 0xffff; | |
1910 if (SU->NumPreds == 0 && SU->NumSuccs != 0) | |
1911 // If SU does not have a register def, schedule it close to its uses | |
1912 // because it does not lengthen any live ranges. | |
1913 return 0; | |
1914 #if 1 | |
1915 return SethiUllmanNumbers[SU->NodeNum]; | |
1916 #else | |
1917 unsigned Priority = SethiUllmanNumbers[SU->NodeNum]; | |
1918 if (SU->isCallOp) { | |
1919 // FIXME: This assumes all of the defs are used as call operands. | |
1920 int NP = (int)Priority - SU->getNode()->getNumValues(); | |
1921 return (NP > 0) ? NP : 0; | |
1922 } | |
1923 return Priority; | |
1924 #endif | |
1925 } | |
1926 | |
1927 //===----------------------------------------------------------------------===// | |
1928 // Register Pressure Tracking | |
1929 //===----------------------------------------------------------------------===// | |
1930 | |
1931 void RegReductionPQBase::dumpRegPressure() const { | |
1932 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | |
1933 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), | |
1934 E = TRI->regclass_end(); I != E; ++I) { | |
1935 const TargetRegisterClass *RC = *I; | |
1936 unsigned Id = RC->getID(); | |
1937 unsigned RP = RegPressure[Id]; | |
1938 if (!RP) continue; | |
1939 DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id] | |
1940 << '\n'); | |
1941 } | |
1942 #endif | |
1943 } | |
1944 | |
1945 bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const { | |
1946 if (!TLI) | |
1947 return false; | |
1948 | |
1949 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); | |
1950 I != E; ++I) { | |
1951 if (I->isCtrl()) | |
1952 continue; | |
1953 SUnit *PredSU = I->getSUnit(); | |
1954 // NumRegDefsLeft is zero when enough uses of this node have been scheduled | |
1955 // to cover the number of registers defined (they are all live). | |
1956 if (PredSU->NumRegDefsLeft == 0) { | |
1957 continue; | |
1958 } | |
1959 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); | |
1960 RegDefPos.IsValid(); RegDefPos.Advance()) { | |
1961 unsigned RCId, Cost; | |
1962 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF); | |
1963 | |
1964 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) | |
1965 return true; | |
1966 } | |
1967 } | |
1968 return false; | |
1969 } | |
1970 | |
1971 bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const { | |
1972 const SDNode *N = SU->getNode(); | |
1973 | |
1974 if (!N->isMachineOpcode() || !SU->NumSuccs) | |
1975 return false; | |
1976 | |
1977 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); | |
1978 for (unsigned i = 0; i != NumDefs; ++i) { | |
1979 MVT VT = N->getSimpleValueType(i); | |
1980 if (!N->hasAnyUseOfValue(i)) | |
1981 continue; | |
1982 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); | |
1983 if (RegPressure[RCId] >= RegLimit[RCId]) | |
1984 return true; | |
1985 } | |
1986 return false; | |
1987 } | |
1988 | |
1989 // Compute the register pressure contribution by this instruction by count up | |
1990 // for uses that are not live and down for defs. Only count register classes | |
1991 // that are already under high pressure. As a side effect, compute the number of | |
1992 // uses of registers that are already live. | |
1993 // | |
1994 // FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure | |
1995 // so could probably be factored. | |
1996 int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const { | |
1997 LiveUses = 0; | |
1998 int PDiff = 0; | |
1999 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); | |
2000 I != E; ++I) { | |
2001 if (I->isCtrl()) | |
2002 continue; | |
2003 SUnit *PredSU = I->getSUnit(); | |
2004 // NumRegDefsLeft is zero when enough uses of this node have been scheduled | |
2005 // to cover the number of registers defined (they are all live). | |
2006 if (PredSU->NumRegDefsLeft == 0) { | |
2007 if (PredSU->getNode()->isMachineOpcode()) | |
2008 ++LiveUses; | |
2009 continue; | |
2010 } | |
2011 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); | |
2012 RegDefPos.IsValid(); RegDefPos.Advance()) { | |
2013 MVT VT = RegDefPos.GetValue(); | |
2014 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); | |
2015 if (RegPressure[RCId] >= RegLimit[RCId]) | |
2016 ++PDiff; | |
2017 } | |
2018 } | |
2019 const SDNode *N = SU->getNode(); | |
2020 | |
2021 if (!N || !N->isMachineOpcode() || !SU->NumSuccs) | |
2022 return PDiff; | |
2023 | |
2024 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); | |
2025 for (unsigned i = 0; i != NumDefs; ++i) { | |
2026 MVT VT = N->getSimpleValueType(i); | |
2027 if (!N->hasAnyUseOfValue(i)) | |
2028 continue; | |
2029 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); | |
2030 if (RegPressure[RCId] >= RegLimit[RCId]) | |
2031 --PDiff; | |
2032 } | |
2033 return PDiff; | |
2034 } | |
2035 | |
2036 void RegReductionPQBase::scheduledNode(SUnit *SU) { | |
2037 if (!TracksRegPressure) | |
2038 return; | |
2039 | |
2040 if (!SU->getNode()) | |
2041 return; | |
2042 | |
2043 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); | |
2044 I != E; ++I) { | |
2045 if (I->isCtrl()) | |
2046 continue; | |
2047 SUnit *PredSU = I->getSUnit(); | |
2048 // NumRegDefsLeft is zero when enough uses of this node have been scheduled | |
2049 // to cover the number of registers defined (they are all live). | |
2050 if (PredSU->NumRegDefsLeft == 0) { | |
2051 continue; | |
2052 } | |
2053 // FIXME: The ScheduleDAG currently loses information about which of a | |
2054 // node's values is consumed by each dependence. Consequently, if the node | |
2055 // defines multiple register classes, we don't know which to pressurize | |
2056 // here. Instead the following loop consumes the register defs in an | |
2057 // arbitrary order. At least it handles the common case of clustered loads | |
2058 // to the same class. For precise liveness, each SDep needs to indicate the | |
2059 // result number. But that tightly couples the ScheduleDAG with the | |
2060 // SelectionDAG making updates tricky. A simpler hack would be to attach a | |
2061 // value type or register class to SDep. | |
2062 // | |
2063 // The most important aspect of register tracking is balancing the increase | |
2064 // here with the reduction further below. Note that this SU may use multiple | |
2065 // defs in PredSU. The can't be determined here, but we've already | |
2066 // compensated by reducing NumRegDefsLeft in PredSU during | |
2067 // ScheduleDAGSDNodes::AddSchedEdges. | |
2068 --PredSU->NumRegDefsLeft; | |
2069 unsigned SkipRegDefs = PredSU->NumRegDefsLeft; | |
2070 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); | |
2071 RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { | |
2072 if (SkipRegDefs) | |
2073 continue; | |
2074 | |
2075 unsigned RCId, Cost; | |
2076 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF); | |
2077 RegPressure[RCId] += Cost; | |
2078 break; | |
2079 } | |
2080 } | |
2081 | |
2082 // We should have this assert, but there may be dead SDNodes that never | |
2083 // materialize as SUnits, so they don't appear to generate liveness. | |
2084 //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses"); | |
2085 int SkipRegDefs = (int)SU->NumRegDefsLeft; | |
2086 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG); | |
2087 RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { | |
2088 if (SkipRegDefs > 0) | |
2089 continue; | |
2090 unsigned RCId, Cost; | |
2091 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF); | |
2092 if (RegPressure[RCId] < Cost) { | |
2093 // Register pressure tracking is imprecise. This can happen. But we try | |
2094 // hard not to let it happen because it likely results in poor scheduling. | |
2095 DEBUG(dbgs() << " SU(" << SU->NodeNum << ") has too many regdefs\n"); | |
2096 RegPressure[RCId] = 0; | |
2097 } | |
2098 else { | |
2099 RegPressure[RCId] -= Cost; | |
2100 } | |
2101 } | |
2102 dumpRegPressure(); | |
2103 } | |
2104 | |
2105 void RegReductionPQBase::unscheduledNode(SUnit *SU) { | |
2106 if (!TracksRegPressure) | |
2107 return; | |
2108 | |
2109 const SDNode *N = SU->getNode(); | |
2110 if (!N) return; | |
2111 | |
2112 if (!N->isMachineOpcode()) { | |
2113 if (N->getOpcode() != ISD::CopyToReg) | |
2114 return; | |
2115 } else { | |
2116 unsigned Opc = N->getMachineOpcode(); | |
2117 if (Opc == TargetOpcode::EXTRACT_SUBREG || | |
2118 Opc == TargetOpcode::INSERT_SUBREG || | |
2119 Opc == TargetOpcode::SUBREG_TO_REG || | |
2120 Opc == TargetOpcode::REG_SEQUENCE || | |
2121 Opc == TargetOpcode::IMPLICIT_DEF) | |
2122 return; | |
2123 } | |
2124 | |
2125 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); | |
2126 I != E; ++I) { | |
2127 if (I->isCtrl()) | |
2128 continue; | |
2129 SUnit *PredSU = I->getSUnit(); | |
2130 // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only | |
2131 // counts data deps. | |
2132 if (PredSU->NumSuccsLeft != PredSU->Succs.size()) | |
2133 continue; | |
2134 const SDNode *PN = PredSU->getNode(); | |
2135 if (!PN->isMachineOpcode()) { | |
2136 if (PN->getOpcode() == ISD::CopyFromReg) { | |
2137 MVT VT = PN->getSimpleValueType(0); | |
2138 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); | |
2139 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); | |
2140 } | |
2141 continue; | |
2142 } | |
2143 unsigned POpc = PN->getMachineOpcode(); | |
2144 if (POpc == TargetOpcode::IMPLICIT_DEF) | |
2145 continue; | |
2146 if (POpc == TargetOpcode::EXTRACT_SUBREG || | |
2147 POpc == TargetOpcode::INSERT_SUBREG || | |
2148 POpc == TargetOpcode::SUBREG_TO_REG) { | |
2149 MVT VT = PN->getSimpleValueType(0); | |
2150 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); | |
2151 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); | |
2152 continue; | |
2153 } | |
2154 unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); | |
2155 for (unsigned i = 0; i != NumDefs; ++i) { | |
2156 MVT VT = PN->getSimpleValueType(i); | |
2157 if (!PN->hasAnyUseOfValue(i)) | |
2158 continue; | |
2159 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); | |
2160 if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) | |
2161 // Register pressure tracking is imprecise. This can happen. | |
2162 RegPressure[RCId] = 0; | |
2163 else | |
2164 RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); | |
2165 } | |
2166 } | |
2167 | |
2168 // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses() | |
2169 // may transfer data dependencies to CopyToReg. | |
2170 if (SU->NumSuccs && N->isMachineOpcode()) { | |
2171 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); | |
2172 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { | |
2173 MVT VT = N->getSimpleValueType(i); | |
2174 if (VT == MVT::Glue || VT == MVT::Other) | |
2175 continue; | |
2176 if (!N->hasAnyUseOfValue(i)) | |
2177 continue; | |
2178 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); | |
2179 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); | |
2180 } | |
2181 } | |
2182 | |
2183 dumpRegPressure(); | |
2184 } | |
2185 | |
2186 //===----------------------------------------------------------------------===// | |
2187 // Dynamic Node Priority for Register Pressure Reduction | |
2188 //===----------------------------------------------------------------------===// | |
2189 | |
2190 /// closestSucc - Returns the scheduled cycle of the successor which is | |
2191 /// closest to the current cycle. | |
2192 static unsigned closestSucc(const SUnit *SU) { | |
2193 unsigned MaxHeight = 0; | |
2194 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); | |
2195 I != E; ++I) { | |
2196 if (I->isCtrl()) continue; // ignore chain succs | |
2197 unsigned Height = I->getSUnit()->getHeight(); | |
2198 // If there are bunch of CopyToRegs stacked up, they should be considered | |
2199 // to be at the same position. | |
2200 if (I->getSUnit()->getNode() && | |
2201 I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg) | |
2202 Height = closestSucc(I->getSUnit())+1; | |
2203 if (Height > MaxHeight) | |
2204 MaxHeight = Height; | |
2205 } | |
2206 return MaxHeight; | |
2207 } | |
2208 | |
2209 /// calcMaxScratches - Returns an cost estimate of the worse case requirement | |
2210 /// for scratch registers, i.e. number of data dependencies. | |
2211 static unsigned calcMaxScratches(const SUnit *SU) { | |
2212 unsigned Scratches = 0; | |
2213 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); | |
2214 I != E; ++I) { | |
2215 if (I->isCtrl()) continue; // ignore chain preds | |
2216 Scratches++; | |
2217 } | |
2218 return Scratches; | |
2219 } | |
2220 | |
2221 /// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are | |
2222 /// CopyFromReg from a virtual register. | |
2223 static bool hasOnlyLiveInOpers(const SUnit *SU) { | |
2224 bool RetVal = false; | |
2225 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); | |
2226 I != E; ++I) { | |
2227 if (I->isCtrl()) continue; | |
2228 const SUnit *PredSU = I->getSUnit(); | |
2229 if (PredSU->getNode() && | |
2230 PredSU->getNode()->getOpcode() == ISD::CopyFromReg) { | |
2231 unsigned Reg = | |
2232 cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg(); | |
2233 if (TargetRegisterInfo::isVirtualRegister(Reg)) { | |
2234 RetVal = true; | |
2235 continue; | |
2236 } | |
2237 } | |
2238 return false; | |
2239 } | |
2240 return RetVal; | |
2241 } | |
2242 | |
2243 /// hasOnlyLiveOutUses - Return true if SU has only value successors that are | |
2244 /// CopyToReg to a virtual register. This SU def is probably a liveout and | |
2245 /// it has no other use. It should be scheduled closer to the terminator. | |
2246 static bool hasOnlyLiveOutUses(const SUnit *SU) { | |
2247 bool RetVal = false; | |
2248 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); | |
2249 I != E; ++I) { | |
2250 if (I->isCtrl()) continue; | |
2251 const SUnit *SuccSU = I->getSUnit(); | |
2252 if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) { | |
2253 unsigned Reg = | |
2254 cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg(); | |
2255 if (TargetRegisterInfo::isVirtualRegister(Reg)) { | |
2256 RetVal = true; | |
2257 continue; | |
2258 } | |
2259 } | |
2260 return false; | |
2261 } | |
2262 return RetVal; | |
2263 } | |
2264 | |
2265 // Set isVRegCycle for a node with only live in opers and live out uses. Also | |
2266 // set isVRegCycle for its CopyFromReg operands. | |
2267 // | |
2268 // This is only relevant for single-block loops, in which case the VRegCycle | |
2269 // node is likely an induction variable in which the operand and target virtual | |
2270 // registers should be coalesced (e.g. pre/post increment values). Setting the | |
2271 // isVRegCycle flag helps the scheduler prioritize other uses of the same | |
2272 // CopyFromReg so that this node becomes the virtual register "kill". This | |
2273 // avoids interference between the values live in and out of the block and | |
2274 // eliminates a copy inside the loop. | |
2275 static void initVRegCycle(SUnit *SU) { | |
2276 if (DisableSchedVRegCycle) | |
2277 return; | |
2278 | |
2279 if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU)) | |
2280 return; | |
2281 | |
2282 DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n"); | |
2283 | |
2284 SU->isVRegCycle = true; | |
2285 | |
2286 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); | |
2287 I != E; ++I) { | |
2288 if (I->isCtrl()) continue; | |
2289 I->getSUnit()->isVRegCycle = true; | |
2290 } | |
2291 } | |
2292 | |
2293 // After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of | |
2294 // CopyFromReg operands. We should no longer penalize other uses of this VReg. | |
2295 static void resetVRegCycle(SUnit *SU) { | |
2296 if (!SU->isVRegCycle) | |
2297 return; | |
2298 | |
2299 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); | |
2300 I != E; ++I) { | |
2301 if (I->isCtrl()) continue; // ignore chain preds | |
2302 SUnit *PredSU = I->getSUnit(); | |
2303 if (PredSU->isVRegCycle) { | |
2304 assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg && | |
2305 "VRegCycle def must be CopyFromReg"); | |
2306 I->getSUnit()->isVRegCycle = 0; | |
2307 } | |
2308 } | |
2309 } | |
2310 | |
2311 // Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This | |
2312 // means a node that defines the VRegCycle has not been scheduled yet. | |
2313 static bool hasVRegCycleUse(const SUnit *SU) { | |
2314 // If this SU also defines the VReg, don't hoist it as a "use". | |
2315 if (SU->isVRegCycle) | |
2316 return false; | |
2317 | |
2318 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); | |
2319 I != E; ++I) { | |
2320 if (I->isCtrl()) continue; // ignore chain preds | |
2321 if (I->getSUnit()->isVRegCycle && | |
2322 I->getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) { | |
2323 DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n"); | |
2324 return true; | |
2325 } | |
2326 } | |
2327 return false; | |
2328 } | |
2329 | |
2330 // Check for either a dependence (latency) or resource (hazard) stall. | |
2331 // | |
2332 // Note: The ScheduleHazardRecognizer interface requires a non-const SU. | |
2333 static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) { | |
2334 if ((int)SPQ->getCurCycle() < Height) return true; | |
2335 if (SPQ->getHazardRec()->getHazardType(SU, 0) | |
2336 != ScheduleHazardRecognizer::NoHazard) | |
2337 return true; | |
2338 return false; | |
2339 } | |
2340 | |
2341 // Return -1 if left has higher priority, 1 if right has higher priority. | |
2342 // Return 0 if latency-based priority is equivalent. | |
2343 static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, | |
2344 RegReductionPQBase *SPQ) { | |
2345 // Scheduling an instruction that uses a VReg whose postincrement has not yet | |
2346 // been scheduled will induce a copy. Model this as an extra cycle of latency. | |
2347 int LPenalty = hasVRegCycleUse(left) ? 1 : 0; | |
2348 int RPenalty = hasVRegCycleUse(right) ? 1 : 0; | |
2349 int LHeight = (int)left->getHeight() + LPenalty; | |
2350 int RHeight = (int)right->getHeight() + RPenalty; | |
2351 | |
2352 bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) && | |
2353 BUHasStall(left, LHeight, SPQ); | |
2354 bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) && | |
2355 BUHasStall(right, RHeight, SPQ); | |
2356 | |
2357 // If scheduling one of the node will cause a pipeline stall, delay it. | |
2358 // If scheduling either one of the node will cause a pipeline stall, sort | |
2359 // them according to their height. | |
2360 if (LStall) { | |
2361 if (!RStall) | |
2362 return 1; | |
2363 if (LHeight != RHeight) | |
2364 return LHeight > RHeight ? 1 : -1; | |
2365 } else if (RStall) | |
2366 return -1; | |
2367 | |
2368 // If either node is scheduling for latency, sort them by height/depth | |
2369 // and latency. | |
2370 if (!checkPref || (left->SchedulingPref == Sched::ILP || | |
2371 right->SchedulingPref == Sched::ILP)) { | |
2372 // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer | |
2373 // is enabled, grouping instructions by cycle, then its height is already | |
2374 // covered so only its depth matters. We also reach this point if both stall | |
2375 // but have the same height. | |
2376 if (!SPQ->getHazardRec()->isEnabled()) { | |
2377 if (LHeight != RHeight) | |
2378 return LHeight > RHeight ? 1 : -1; | |
2379 } | |
2380 int LDepth = left->getDepth() - LPenalty; | |
2381 int RDepth = right->getDepth() - RPenalty; | |
2382 if (LDepth != RDepth) { | |
2383 DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum | |
2384 << ") depth " << LDepth << " vs SU (" << right->NodeNum | |
2385 << ") depth " << RDepth << "\n"); | |
2386 return LDepth < RDepth ? 1 : -1; | |
2387 } | |
2388 if (left->Latency != right->Latency) | |
2389 return left->Latency > right->Latency ? 1 : -1; | |
2390 } | |
2391 return 0; | |
2392 } | |
2393 | |
2394 static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { | |
2395 // Schedule physical register definitions close to their use. This is | |
2396 // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as | |
2397 // long as shortening physreg live ranges is generally good, we can defer | |
2398 // creating a subtarget hook. | |
2399 if (!DisableSchedPhysRegJoin) { | |
2400 bool LHasPhysReg = left->hasPhysRegDefs; | |
2401 bool RHasPhysReg = right->hasPhysRegDefs; | |
2402 if (LHasPhysReg != RHasPhysReg) { | |
2403 #ifndef NDEBUG | |
2404 static const char *const PhysRegMsg[] = { " has no physreg", | |
2405 " defines a physreg" }; | |
2406 #endif | |
2407 DEBUG(dbgs() << " SU (" << left->NodeNum << ") " | |
2408 << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") " | |
2409 << PhysRegMsg[RHasPhysReg] << "\n"); | |
2410 return LHasPhysReg < RHasPhysReg; | |
2411 } | |
2412 } | |
2413 | |
2414 // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down. | |
2415 unsigned LPriority = SPQ->getNodePriority(left); | |
2416 unsigned RPriority = SPQ->getNodePriority(right); | |
2417 | |
2418 // Be really careful about hoisting call operands above previous calls. | |
2419 // Only allows it if it would reduce register pressure. | |
2420 if (left->isCall && right->isCallOp) { | |
2421 unsigned RNumVals = right->getNode()->getNumValues(); | |
2422 RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0; | |
2423 } | |
2424 if (right->isCall && left->isCallOp) { | |
2425 unsigned LNumVals = left->getNode()->getNumValues(); | |
2426 LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0; | |
2427 } | |
2428 | |
2429 if (LPriority != RPriority) | |
2430 return LPriority > RPriority; | |
2431 | |
2432 // One or both of the nodes are calls and their sethi-ullman numbers are the | |
2433 // same, then keep source order. | |
2434 if (left->isCall || right->isCall) { | |
2435 unsigned LOrder = SPQ->getNodeOrdering(left); | |
2436 unsigned ROrder = SPQ->getNodeOrdering(right); | |
2437 | |
2438 // Prefer an ordering where the lower the non-zero order number, the higher | |
2439 // the preference. | |
2440 if ((LOrder || ROrder) && LOrder != ROrder) | |
2441 return LOrder != 0 && (LOrder < ROrder || ROrder == 0); | |
2442 } | |
2443 | |
2444 // Try schedule def + use closer when Sethi-Ullman numbers are the same. | |
2445 // e.g. | |
2446 // t1 = op t2, c1 | |
2447 // t3 = op t4, c2 | |
2448 // | |
2449 // and the following instructions are both ready. | |
2450 // t2 = op c3 | |
2451 // t4 = op c4 | |
2452 // | |
2453 // Then schedule t2 = op first. | |
2454 // i.e. | |
2455 // t4 = op c4 | |
2456 // t2 = op c3 | |
2457 // t1 = op t2, c1 | |
2458 // t3 = op t4, c2 | |
2459 // | |
2460 // This creates more short live intervals. | |
2461 unsigned LDist = closestSucc(left); | |
2462 unsigned RDist = closestSucc(right); | |
2463 if (LDist != RDist) | |
2464 return LDist < RDist; | |
2465 | |
2466 // How many registers becomes live when the node is scheduled. | |
2467 unsigned LScratch = calcMaxScratches(left); | |
2468 unsigned RScratch = calcMaxScratches(right); | |
2469 if (LScratch != RScratch) | |
2470 return LScratch > RScratch; | |
2471 | |
2472 // Comparing latency against a call makes little sense unless the node | |
2473 // is register pressure-neutral. | |
2474 if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0)) | |
2475 return (left->NodeQueueId > right->NodeQueueId); | |
2476 | |
2477 // Do not compare latencies when one or both of the nodes are calls. | |
2478 if (!DisableSchedCycles && | |
2479 !(left->isCall || right->isCall)) { | |
2480 int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ); | |
2481 if (result != 0) | |
2482 return result > 0; | |
2483 } | |
2484 else { | |
2485 if (left->getHeight() != right->getHeight()) | |
2486 return left->getHeight() > right->getHeight(); | |
2487 | |
2488 if (left->getDepth() != right->getDepth()) | |
2489 return left->getDepth() < right->getDepth(); | |
2490 } | |
2491 | |
2492 assert(left->NodeQueueId && right->NodeQueueId && | |
2493 "NodeQueueId cannot be zero"); | |
2494 return (left->NodeQueueId > right->NodeQueueId); | |
2495 } | |
2496 | |
2497 // Bottom up | |
2498 bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { | |
2499 if (int res = checkSpecialNodes(left, right)) | |
2500 return res > 0; | |
2501 | |
2502 return BURRSort(left, right, SPQ); | |
2503 } | |
2504 | |
2505 // Source order, otherwise bottom up. | |
2506 bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { | |
2507 if (int res = checkSpecialNodes(left, right)) | |
2508 return res > 0; | |
2509 | |
2510 unsigned LOrder = SPQ->getNodeOrdering(left); | |
2511 unsigned ROrder = SPQ->getNodeOrdering(right); | |
2512 | |
2513 // Prefer an ordering where the lower the non-zero order number, the higher | |
2514 // the preference. | |
2515 if ((LOrder || ROrder) && LOrder != ROrder) | |
2516 return LOrder != 0 && (LOrder < ROrder || ROrder == 0); | |
2517 | |
2518 return BURRSort(left, right, SPQ); | |
2519 } | |
2520 | |
2521 // If the time between now and when the instruction will be ready can cover | |
2522 // the spill code, then avoid adding it to the ready queue. This gives long | |
2523 // stalls highest priority and allows hoisting across calls. It should also | |
2524 // speed up processing the available queue. | |
2525 bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { | |
2526 static const unsigned ReadyDelay = 3; | |
2527 | |
2528 if (SPQ->MayReduceRegPressure(SU)) return true; | |
2529 | |
2530 if (SU->getHeight() > (CurCycle + ReadyDelay)) return false; | |
2531 | |
2532 if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay) | |
2533 != ScheduleHazardRecognizer::NoHazard) | |
2534 return false; | |
2535 | |
2536 return true; | |
2537 } | |
2538 | |
2539 // Return true if right should be scheduled with higher priority than left. | |
2540 bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { | |
2541 if (int res = checkSpecialNodes(left, right)) | |
2542 return res > 0; | |
2543 | |
2544 if (left->isCall || right->isCall) | |
2545 // No way to compute latency of calls. | |
2546 return BURRSort(left, right, SPQ); | |
2547 | |
2548 bool LHigh = SPQ->HighRegPressure(left); | |
2549 bool RHigh = SPQ->HighRegPressure(right); | |
2550 // Avoid causing spills. If register pressure is high, schedule for | |
2551 // register pressure reduction. | |
2552 if (LHigh && !RHigh) { | |
2553 DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU(" | |
2554 << right->NodeNum << ")\n"); | |
2555 return true; | |
2556 } | |
2557 else if (!LHigh && RHigh) { | |
2558 DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU(" | |
2559 << left->NodeNum << ")\n"); | |
2560 return false; | |
2561 } | |
2562 if (!LHigh && !RHigh) { | |
2563 int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ); | |
2564 if (result != 0) | |
2565 return result > 0; | |
2566 } | |
2567 return BURRSort(left, right, SPQ); | |
2568 } | |
2569 | |
2570 // Schedule as many instructions in each cycle as possible. So don't make an | |
2571 // instruction available unless it is ready in the current cycle. | |
2572 bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { | |
2573 if (SU->getHeight() > CurCycle) return false; | |
2574 | |
2575 if (SPQ->getHazardRec()->getHazardType(SU, 0) | |
2576 != ScheduleHazardRecognizer::NoHazard) | |
2577 return false; | |
2578 | |
2579 return true; | |
2580 } | |
2581 | |
2582 static bool canEnableCoalescing(SUnit *SU) { | |
2583 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; | |
2584 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) | |
2585 // CopyToReg should be close to its uses to facilitate coalescing and | |
2586 // avoid spilling. | |
2587 return true; | |
2588 | |
2589 if (Opc == TargetOpcode::EXTRACT_SUBREG || | |
2590 Opc == TargetOpcode::SUBREG_TO_REG || | |
2591 Opc == TargetOpcode::INSERT_SUBREG) | |
2592 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be | |
2593 // close to their uses to facilitate coalescing. | |
2594 return true; | |
2595 | |
2596 if (SU->NumPreds == 0 && SU->NumSuccs != 0) | |
2597 // If SU does not have a register def, schedule it close to its uses | |
2598 // because it does not lengthen any live ranges. | |
2599 return true; | |
2600 | |
2601 return false; | |
2602 } | |
2603 | |
2604 // list-ilp is currently an experimental scheduler that allows various | |
2605 // heuristics to be enabled prior to the normal register reduction logic. | |
2606 bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { | |
2607 if (int res = checkSpecialNodes(left, right)) | |
2608 return res > 0; | |
2609 | |
2610 if (left->isCall || right->isCall) | |
2611 // No way to compute latency of calls. | |
2612 return BURRSort(left, right, SPQ); | |
2613 | |
2614 unsigned LLiveUses = 0, RLiveUses = 0; | |
2615 int LPDiff = 0, RPDiff = 0; | |
2616 if (!DisableSchedRegPressure || !DisableSchedLiveUses) { | |
2617 LPDiff = SPQ->RegPressureDiff(left, LLiveUses); | |
2618 RPDiff = SPQ->RegPressureDiff(right, RLiveUses); | |
2619 } | |
2620 if (!DisableSchedRegPressure && LPDiff != RPDiff) { | |
2621 DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff | |
2622 << " != SU(" << right->NodeNum << "): " << RPDiff << "\n"); | |
2623 return LPDiff > RPDiff; | |
2624 } | |
2625 | |
2626 if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) { | |
2627 bool LReduce = canEnableCoalescing(left); | |
2628 bool RReduce = canEnableCoalescing(right); | |
2629 if (LReduce && !RReduce) return false; | |
2630 if (RReduce && !LReduce) return true; | |
2631 } | |
2632 | |
2633 if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) { | |
2634 DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses | |
2635 << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n"); | |
2636 return LLiveUses < RLiveUses; | |
2637 } | |
2638 | |
2639 if (!DisableSchedStalls) { | |
2640 bool LStall = BUHasStall(left, left->getHeight(), SPQ); | |
2641 bool RStall = BUHasStall(right, right->getHeight(), SPQ); | |
2642 if (LStall != RStall) | |
2643 return left->getHeight() > right->getHeight(); | |
2644 } | |
2645 | |
2646 if (!DisableSchedCriticalPath) { | |
2647 int spread = (int)left->getDepth() - (int)right->getDepth(); | |
2648 if (std::abs(spread) > MaxReorderWindow) { | |
2649 DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): " | |
2650 << left->getDepth() << " != SU(" << right->NodeNum << "): " | |
2651 << right->getDepth() << "\n"); | |
2652 return left->getDepth() < right->getDepth(); | |
2653 } | |
2654 } | |
2655 | |
2656 if (!DisableSchedHeight && left->getHeight() != right->getHeight()) { | |
2657 int spread = (int)left->getHeight() - (int)right->getHeight(); | |
2658 if (std::abs(spread) > MaxReorderWindow) | |
2659 return left->getHeight() > right->getHeight(); | |
2660 } | |
2661 | |
2662 return BURRSort(left, right, SPQ); | |
2663 } | |
2664 | |
2665 void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) { | |
2666 SUnits = &sunits; | |
2667 // Add pseudo dependency edges for two-address nodes. | |
2668 if (!Disable2AddrHack) | |
2669 AddPseudoTwoAddrDeps(); | |
2670 // Reroute edges to nodes with multiple uses. | |
2671 if (!TracksRegPressure && !SrcOrder) | |
2672 PrescheduleNodesWithMultipleUses(); | |
2673 // Calculate node priorities. | |
2674 CalculateSethiUllmanNumbers(); | |
2675 | |
2676 // For single block loops, mark nodes that look like canonical IV increments. | |
2677 if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) { | |
2678 for (unsigned i = 0, e = sunits.size(); i != e; ++i) { | |
2679 initVRegCycle(&sunits[i]); | |
2680 } | |
2681 } | |
2682 } | |
2683 | |
2684 //===----------------------------------------------------------------------===// | |
2685 // Preschedule for Register Pressure | |
2686 //===----------------------------------------------------------------------===// | |
2687 | |
2688 bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) { | |
2689 if (SU->isTwoAddress) { | |
2690 unsigned Opc = SU->getNode()->getMachineOpcode(); | |
2691 const MCInstrDesc &MCID = TII->get(Opc); | |
2692 unsigned NumRes = MCID.getNumDefs(); | |
2693 unsigned NumOps = MCID.getNumOperands() - NumRes; | |
2694 for (unsigned i = 0; i != NumOps; ++i) { | |
2695 if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) { | |
2696 SDNode *DU = SU->getNode()->getOperand(i).getNode(); | |
2697 if (DU->getNodeId() != -1 && | |
2698 Op->OrigNode == &(*SUnits)[DU->getNodeId()]) | |
2699 return true; | |
2700 } | |
2701 } | |
2702 } | |
2703 return false; | |
2704 } | |
2705 | |
2706 /// canClobberReachingPhysRegUse - True if SU would clobber one of it's | |
2707 /// successor's explicit physregs whose definition can reach DepSU. | |
2708 /// i.e. DepSU should not be scheduled above SU. | |
2709 static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, | |
2710 ScheduleDAGRRList *scheduleDAG, | |
2711 const TargetInstrInfo *TII, | |
2712 const TargetRegisterInfo *TRI) { | |
2713 const uint16_t *ImpDefs | |
2714 = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs(); | |
2715 const uint32_t *RegMask = getNodeRegMask(SU->getNode()); | |
2716 if(!ImpDefs && !RegMask) | |
2717 return false; | |
2718 | |
2719 for (SUnit::const_succ_iterator SI = SU->Succs.begin(), SE = SU->Succs.end(); | |
2720 SI != SE; ++SI) { | |
2721 SUnit *SuccSU = SI->getSUnit(); | |
2722 for (SUnit::const_pred_iterator PI = SuccSU->Preds.begin(), | |
2723 PE = SuccSU->Preds.end(); PI != PE; ++PI) { | |
2724 if (!PI->isAssignedRegDep()) | |
2725 continue; | |
2726 | |
2727 if (RegMask && MachineOperand::clobbersPhysReg(RegMask, PI->getReg()) && | |
2728 scheduleDAG->IsReachable(DepSU, PI->getSUnit())) | |
2729 return true; | |
2730 | |
2731 if (ImpDefs) | |
2732 for (const uint16_t *ImpDef = ImpDefs; *ImpDef; ++ImpDef) | |
2733 // Return true if SU clobbers this physical register use and the | |
2734 // definition of the register reaches from DepSU. IsReachable queries | |
2735 // a topological forward sort of the DAG (following the successors). | |
2736 if (TRI->regsOverlap(*ImpDef, PI->getReg()) && | |
2737 scheduleDAG->IsReachable(DepSU, PI->getSUnit())) | |
2738 return true; | |
2739 } | |
2740 } | |
2741 return false; | |
2742 } | |
2743 | |
2744 /// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's | |
2745 /// physical register defs. | |
2746 static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, | |
2747 const TargetInstrInfo *TII, | |
2748 const TargetRegisterInfo *TRI) { | |
2749 SDNode *N = SuccSU->getNode(); | |
2750 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); | |
2751 const uint16_t *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); | |
2752 assert(ImpDefs && "Caller should check hasPhysRegDefs"); | |
2753 for (const SDNode *SUNode = SU->getNode(); SUNode; | |
2754 SUNode = SUNode->getGluedNode()) { | |
2755 if (!SUNode->isMachineOpcode()) | |
2756 continue; | |
2757 const uint16_t *SUImpDefs = | |
2758 TII->get(SUNode->getMachineOpcode()).getImplicitDefs(); | |
2759 const uint32_t *SURegMask = getNodeRegMask(SUNode); | |
2760 if (!SUImpDefs && !SURegMask) | |
2761 continue; | |
2762 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { | |
2763 EVT VT = N->getValueType(i); | |
2764 if (VT == MVT::Glue || VT == MVT::Other) | |
2765 continue; | |
2766 if (!N->hasAnyUseOfValue(i)) | |
2767 continue; | |
2768 unsigned Reg = ImpDefs[i - NumDefs]; | |
2769 if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg)) | |
2770 return true; | |
2771 if (!SUImpDefs) | |
2772 continue; | |
2773 for (;*SUImpDefs; ++SUImpDefs) { | |
2774 unsigned SUReg = *SUImpDefs; | |
2775 if (TRI->regsOverlap(Reg, SUReg)) | |
2776 return true; | |
2777 } | |
2778 } | |
2779 } | |
2780 return false; | |
2781 } | |
2782 | |
2783 /// PrescheduleNodesWithMultipleUses - Nodes with multiple uses | |
2784 /// are not handled well by the general register pressure reduction | |
2785 /// heuristics. When presented with code like this: | |
2786 /// | |
2787 /// N | |
2788 /// / | | |
2789 /// / | | |
2790 /// U store | |
2791 /// | | |
2792 /// ... | |
2793 /// | |
2794 /// the heuristics tend to push the store up, but since the | |
2795 /// operand of the store has another use (U), this would increase | |
2796 /// the length of that other use (the U->N edge). | |
2797 /// | |
2798 /// This function transforms code like the above to route U's | |
2799 /// dependence through the store when possible, like this: | |
2800 /// | |
2801 /// N | |
2802 /// || | |
2803 /// || | |
2804 /// store | |
2805 /// | | |
2806 /// U | |
2807 /// | | |
2808 /// ... | |
2809 /// | |
2810 /// This results in the store being scheduled immediately | |
2811 /// after N, which shortens the U->N live range, reducing | |
2812 /// register pressure. | |
2813 /// | |
2814 void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { | |
2815 // Visit all the nodes in topological order, working top-down. | |
2816 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { | |
2817 SUnit *SU = &(*SUnits)[i]; | |
2818 // For now, only look at nodes with no data successors, such as stores. | |
2819 // These are especially important, due to the heuristics in | |
2820 // getNodePriority for nodes with no data successors. | |
2821 if (SU->NumSuccs != 0) | |
2822 continue; | |
2823 // For now, only look at nodes with exactly one data predecessor. | |
2824 if (SU->NumPreds != 1) | |
2825 continue; | |
2826 // Avoid prescheduling copies to virtual registers, which don't behave | |
2827 // like other nodes from the perspective of scheduling heuristics. | |
2828 if (SDNode *N = SU->getNode()) | |
2829 if (N->getOpcode() == ISD::CopyToReg && | |
2830 TargetRegisterInfo::isVirtualRegister | |
2831 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) | |
2832 continue; | |
2833 | |
2834 // Locate the single data predecessor. | |
2835 SUnit *PredSU = 0; | |
2836 for (SUnit::const_pred_iterator II = SU->Preds.begin(), | |
2837 EE = SU->Preds.end(); II != EE; ++II) | |
2838 if (!II->isCtrl()) { | |
2839 PredSU = II->getSUnit(); | |
2840 break; | |
2841 } | |
2842 assert(PredSU); | |
2843 | |
2844 // Don't rewrite edges that carry physregs, because that requires additional | |
2845 // support infrastructure. | |
2846 if (PredSU->hasPhysRegDefs) | |
2847 continue; | |
2848 // Short-circuit the case where SU is PredSU's only data successor. | |
2849 if (PredSU->NumSuccs == 1) | |
2850 continue; | |
2851 // Avoid prescheduling to copies from virtual registers, which don't behave | |
2852 // like other nodes from the perspective of scheduling heuristics. | |
2853 if (SDNode *N = SU->getNode()) | |
2854 if (N->getOpcode() == ISD::CopyFromReg && | |
2855 TargetRegisterInfo::isVirtualRegister | |
2856 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) | |
2857 continue; | |
2858 | |
2859 // Perform checks on the successors of PredSU. | |
2860 for (SUnit::const_succ_iterator II = PredSU->Succs.begin(), | |
2861 EE = PredSU->Succs.end(); II != EE; ++II) { | |
2862 SUnit *PredSuccSU = II->getSUnit(); | |
2863 if (PredSuccSU == SU) continue; | |
2864 // If PredSU has another successor with no data successors, for | |
2865 // now don't attempt to choose either over the other. | |
2866 if (PredSuccSU->NumSuccs == 0) | |
2867 goto outer_loop_continue; | |
2868 // Don't break physical register dependencies. | |
2869 if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs) | |
2870 if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI)) | |
2871 goto outer_loop_continue; | |
2872 // Don't introduce graph cycles. | |
2873 if (scheduleDAG->IsReachable(SU, PredSuccSU)) | |
2874 goto outer_loop_continue; | |
2875 } | |
2876 | |
2877 // Ok, the transformation is safe and the heuristics suggest it is | |
2878 // profitable. Update the graph. | |
2879 DEBUG(dbgs() << " Prescheduling SU #" << SU->NodeNum | |
2880 << " next to PredSU #" << PredSU->NodeNum | |
2881 << " to guide scheduling in the presence of multiple uses\n"); | |
2882 for (unsigned i = 0; i != PredSU->Succs.size(); ++i) { | |
2883 SDep Edge = PredSU->Succs[i]; | |
2884 assert(!Edge.isAssignedRegDep()); | |
2885 SUnit *SuccSU = Edge.getSUnit(); | |
2886 if (SuccSU != SU) { | |
2887 Edge.setSUnit(PredSU); | |
2888 scheduleDAG->RemovePred(SuccSU, Edge); | |
2889 scheduleDAG->AddPred(SU, Edge); | |
2890 Edge.setSUnit(SU); | |
2891 scheduleDAG->AddPred(SuccSU, Edge); | |
2892 --i; | |
2893 } | |
2894 } | |
2895 outer_loop_continue:; | |
2896 } | |
2897 } | |
2898 | |
2899 /// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses | |
2900 /// it as a def&use operand. Add a pseudo control edge from it to the other | |
2901 /// node (if it won't create a cycle) so the two-address one will be scheduled | |
2902 /// first (lower in the schedule). If both nodes are two-address, favor the | |
2903 /// one that has a CopyToReg use (more likely to be a loop induction update). | |
2904 /// If both are two-address, but one is commutable while the other is not | |
2905 /// commutable, favor the one that's not commutable. | |
2906 void RegReductionPQBase::AddPseudoTwoAddrDeps() { | |
2907 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { | |
2908 SUnit *SU = &(*SUnits)[i]; | |
2909 if (!SU->isTwoAddress) | |
2910 continue; | |
2911 | |
2912 SDNode *Node = SU->getNode(); | |
2913 if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode()) | |
2914 continue; | |
2915 | |
2916 bool isLiveOut = hasOnlyLiveOutUses(SU); | |
2917 unsigned Opc = Node->getMachineOpcode(); | |
2918 const MCInstrDesc &MCID = TII->get(Opc); | |
2919 unsigned NumRes = MCID.getNumDefs(); | |
2920 unsigned NumOps = MCID.getNumOperands() - NumRes; | |
2921 for (unsigned j = 0; j != NumOps; ++j) { | |
2922 if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1) | |
2923 continue; | |
2924 SDNode *DU = SU->getNode()->getOperand(j).getNode(); | |
2925 if (DU->getNodeId() == -1) | |
2926 continue; | |
2927 const SUnit *DUSU = &(*SUnits)[DU->getNodeId()]; | |
2928 if (!DUSU) continue; | |
2929 for (SUnit::const_succ_iterator I = DUSU->Succs.begin(), | |
2930 E = DUSU->Succs.end(); I != E; ++I) { | |
2931 if (I->isCtrl()) continue; | |
2932 SUnit *SuccSU = I->getSUnit(); | |
2933 if (SuccSU == SU) | |
2934 continue; | |
2935 // Be conservative. Ignore if nodes aren't at roughly the same | |
2936 // depth and height. | |
2937 if (SuccSU->getHeight() < SU->getHeight() && | |
2938 (SU->getHeight() - SuccSU->getHeight()) > 1) | |
2939 continue; | |
2940 // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge | |
2941 // constrains whatever is using the copy, instead of the copy | |
2942 // itself. In the case that the copy is coalesced, this | |
2943 // preserves the intent of the pseudo two-address heurietics. | |
2944 while (SuccSU->Succs.size() == 1 && | |
2945 SuccSU->getNode()->isMachineOpcode() && | |
2946 SuccSU->getNode()->getMachineOpcode() == | |
2947 TargetOpcode::COPY_TO_REGCLASS) | |
2948 SuccSU = SuccSU->Succs.front().getSUnit(); | |
2949 // Don't constrain non-instruction nodes. | |
2950 if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode()) | |
2951 continue; | |
2952 // Don't constrain nodes with physical register defs if the | |
2953 // predecessor can clobber them. | |
2954 if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) { | |
2955 if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI)) | |
2956 continue; | |
2957 } | |
2958 // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG; | |
2959 // these may be coalesced away. We want them close to their uses. | |
2960 unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode(); | |
2961 if (SuccOpc == TargetOpcode::EXTRACT_SUBREG || | |
2962 SuccOpc == TargetOpcode::INSERT_SUBREG || | |
2963 SuccOpc == TargetOpcode::SUBREG_TO_REG) | |
2964 continue; | |
2965 if (!canClobberReachingPhysRegUse(SuccSU, SU, scheduleDAG, TII, TRI) && | |
2966 (!canClobber(SuccSU, DUSU) || | |
2967 (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) || | |
2968 (!SU->isCommutable && SuccSU->isCommutable)) && | |
2969 !scheduleDAG->IsReachable(SuccSU, SU)) { | |
2970 DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #" | |
2971 << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); | |
2972 scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Artificial)); | |
2973 } | |
2974 } | |
2975 } | |
2976 } | |
2977 } | |
2978 | |
2979 //===----------------------------------------------------------------------===// | |
2980 // Public Constructor Functions | |
2981 //===----------------------------------------------------------------------===// | |
2982 | |
2983 llvm::ScheduleDAGSDNodes * | |
2984 llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, | |
2985 CodeGenOpt::Level OptLevel) { | |
2986 const TargetMachine &TM = IS->TM; | |
2987 const TargetInstrInfo *TII = TM.getInstrInfo(); | |
2988 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); | |
2989 | |
2990 BURegReductionPriorityQueue *PQ = | |
2991 new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, 0); | |
2992 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); | |
2993 PQ->setScheduleDAG(SD); | |
2994 return SD; | |
2995 } | |
2996 | |
2997 llvm::ScheduleDAGSDNodes * | |
2998 llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, | |
2999 CodeGenOpt::Level OptLevel) { | |
3000 const TargetMachine &TM = IS->TM; | |
3001 const TargetInstrInfo *TII = TM.getInstrInfo(); | |
3002 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); | |
3003 | |
3004 SrcRegReductionPriorityQueue *PQ = | |
3005 new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, 0); | |
3006 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); | |
3007 PQ->setScheduleDAG(SD); | |
3008 return SD; | |
3009 } | |
3010 | |
3011 llvm::ScheduleDAGSDNodes * | |
3012 llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, | |
3013 CodeGenOpt::Level OptLevel) { | |
3014 const TargetMachine &TM = IS->TM; | |
3015 const TargetInstrInfo *TII = TM.getInstrInfo(); | |
3016 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); | |
3017 const TargetLowering *TLI = IS->getTargetLowering(); | |
3018 | |
3019 HybridBURRPriorityQueue *PQ = | |
3020 new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); | |
3021 | |
3022 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); | |
3023 PQ->setScheduleDAG(SD); | |
3024 return SD; | |
3025 } | |
3026 | |
3027 llvm::ScheduleDAGSDNodes * | |
3028 llvm::createILPListDAGScheduler(SelectionDAGISel *IS, | |
3029 CodeGenOpt::Level OptLevel) { | |
3030 const TargetMachine &TM = IS->TM; | |
3031 const TargetInstrInfo *TII = TM.getInstrInfo(); | |
3032 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); | |
3033 const TargetLowering *TLI = IS->getTargetLowering(); | |
3034 | |
3035 ILPBURRPriorityQueue *PQ = | |
3036 new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); | |
3037 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); | |
3038 PQ->setScheduleDAG(SD); | |
3039 return SD; | |
3040 } |