Mercurial > hg > CbC > CbC_llvm
comparison lib/CodeGen/PeepholeOptimizer.cpp @ 95:afa8332a0e37 LLVM3.8
LLVM 3.8
author | Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 13 Oct 2015 17:48:58 +0900 |
parents | 60c9769439b8 |
children | 7d135dc70f03 |
comparison
equal
deleted
inserted
replaced
84:f3e34b893a5f | 95:afa8332a0e37 |
---|---|
41 // "sub" with "subs" and eliminate the "cmp" instruction. | 41 // "sub" with "subs" and eliminate the "cmp" instruction. |
42 // | 42 // |
43 // - Optimize Loads: | 43 // - Optimize Loads: |
44 // | 44 // |
45 // Loads that can be folded into a later instruction. A load is foldable | 45 // Loads that can be folded into a later instruction. A load is foldable |
46 // if it loads to virtual registers and the virtual register defined has | 46 // if it loads to virtual registers and the virtual register defined has |
47 // a single use. | 47 // a single use. |
48 // | 48 // |
49 // - Optimize Copies and Bitcast (more generally, target specific copies): | 49 // - Optimize Copies and Bitcast (more generally, target specific copies): |
50 // | 50 // |
51 // Rewrite copies and bitcasts to avoid cross register bank copies | 51 // Rewrite copies and bitcasts to avoid cross register bank copies |
74 #include "llvm/CodeGen/MachineDominators.h" | 74 #include "llvm/CodeGen/MachineDominators.h" |
75 #include "llvm/CodeGen/MachineInstrBuilder.h" | 75 #include "llvm/CodeGen/MachineInstrBuilder.h" |
76 #include "llvm/CodeGen/MachineRegisterInfo.h" | 76 #include "llvm/CodeGen/MachineRegisterInfo.h" |
77 #include "llvm/Support/CommandLine.h" | 77 #include "llvm/Support/CommandLine.h" |
78 #include "llvm/Support/Debug.h" | 78 #include "llvm/Support/Debug.h" |
79 #include "llvm/Support/raw_ostream.h" | |
79 #include "llvm/Target/TargetInstrInfo.h" | 80 #include "llvm/Target/TargetInstrInfo.h" |
80 #include "llvm/Target/TargetRegisterInfo.h" | 81 #include "llvm/Target/TargetRegisterInfo.h" |
81 #include "llvm/Target/TargetSubtargetInfo.h" | 82 #include "llvm/Target/TargetSubtargetInfo.h" |
82 #include <utility> | 83 #include <utility> |
83 using namespace llvm; | 84 using namespace llvm; |
94 cl::desc("Disable the peephole optimizer")); | 95 cl::desc("Disable the peephole optimizer")); |
95 | 96 |
96 static cl::opt<bool> | 97 static cl::opt<bool> |
97 DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false), | 98 DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false), |
98 cl::desc("Disable advanced copy optimization")); | 99 cl::desc("Disable advanced copy optimization")); |
100 | |
101 // Limit the number of PHI instructions to process | |
102 // in PeepholeOptimizer::getNextSource. | |
103 static cl::opt<unsigned> RewritePHILimit( | |
104 "rewrite-phi-limit", cl::Hidden, cl::init(10), | |
105 cl::desc("Limit the length of PHI chains to lookup")); | |
99 | 106 |
100 STATISTIC(NumReuse, "Number of extension results reused"); | 107 STATISTIC(NumReuse, "Number of extension results reused"); |
101 STATISTIC(NumCmps, "Number of compares eliminated"); | 108 STATISTIC(NumCmps, "Number of compares eliminated"); |
102 STATISTIC(NumImmFold, "Number of move immediate folded"); | 109 STATISTIC(NumImmFold, "Number of move immediate folded"); |
103 STATISTIC(NumLoadFold, "Number of loads folded"); | 110 STATISTIC(NumLoadFold, "Number of loads folded"); |
104 STATISTIC(NumSelects, "Number of selects optimized"); | 111 STATISTIC(NumSelects, "Number of selects optimized"); |
105 STATISTIC(NumUncoalescableCopies, "Number of uncoalescable copies optimized"); | 112 STATISTIC(NumUncoalescableCopies, "Number of uncoalescable copies optimized"); |
106 STATISTIC(NumRewrittenCopies, "Number of copies rewritten"); | 113 STATISTIC(NumRewrittenCopies, "Number of copies rewritten"); |
107 | 114 |
108 namespace { | 115 namespace { |
116 class ValueTrackerResult; | |
117 | |
109 class PeepholeOptimizer : public MachineFunctionPass { | 118 class PeepholeOptimizer : public MachineFunctionPass { |
110 const TargetInstrInfo *TII; | 119 const TargetInstrInfo *TII; |
111 const TargetRegisterInfo *TRI; | 120 const TargetRegisterInfo *TRI; |
112 MachineRegisterInfo *MRI; | 121 MachineRegisterInfo *MRI; |
113 MachineDominatorTree *DT; // Machine dominator tree | 122 MachineDominatorTree *DT; // Machine dominator tree |
127 AU.addRequired<MachineDominatorTree>(); | 136 AU.addRequired<MachineDominatorTree>(); |
128 AU.addPreserved<MachineDominatorTree>(); | 137 AU.addPreserved<MachineDominatorTree>(); |
129 } | 138 } |
130 } | 139 } |
131 | 140 |
141 /// \brief Track Def -> Use info used for rewriting copies. | |
142 typedef SmallDenseMap<TargetInstrInfo::RegSubRegPair, ValueTrackerResult> | |
143 RewriteMapTy; | |
144 | |
132 private: | 145 private: |
133 bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); | 146 bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); |
134 bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, | 147 bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, |
135 SmallPtrSetImpl<MachineInstr*> &LocalMIs); | 148 SmallPtrSetImpl<MachineInstr*> &LocalMIs); |
136 bool optimizeSelect(MachineInstr *MI, | 149 bool optimizeSelect(MachineInstr *MI, |
137 SmallPtrSetImpl<MachineInstr *> &LocalMIs); | 150 SmallPtrSetImpl<MachineInstr *> &LocalMIs); |
138 bool optimizeCondBranch(MachineInstr *MI); | 151 bool optimizeCondBranch(MachineInstr *MI); |
139 bool optimizeCopyOrBitcast(MachineInstr *MI); | |
140 bool optimizeCoalescableCopy(MachineInstr *MI); | 152 bool optimizeCoalescableCopy(MachineInstr *MI); |
141 bool optimizeUncoalescableCopy(MachineInstr *MI, | 153 bool optimizeUncoalescableCopy(MachineInstr *MI, |
142 SmallPtrSetImpl<MachineInstr *> &LocalMIs); | 154 SmallPtrSetImpl<MachineInstr *> &LocalMIs); |
143 bool findNextSource(unsigned &Reg, unsigned &SubReg); | 155 bool findNextSource(unsigned Reg, unsigned SubReg, |
156 RewriteMapTy &RewriteMap); | |
144 bool isMoveImmediate(MachineInstr *MI, | 157 bool isMoveImmediate(MachineInstr *MI, |
145 SmallSet<unsigned, 4> &ImmDefRegs, | 158 SmallSet<unsigned, 4> &ImmDefRegs, |
146 DenseMap<unsigned, MachineInstr*> &ImmDefMIs); | 159 DenseMap<unsigned, MachineInstr*> &ImmDefMIs); |
147 bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, | 160 bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, |
148 SmallSet<unsigned, 4> &ImmDefRegs, | 161 SmallSet<unsigned, 4> &ImmDefRegs, |
149 DenseMap<unsigned, MachineInstr*> &ImmDefMIs); | 162 DenseMap<unsigned, MachineInstr*> &ImmDefMIs); |
163 | |
164 /// \brief If copy instruction \p MI is a virtual register copy, track it in | |
165 /// the set \p CopiedFromRegs and \p CopyMIs. If this virtual register was | |
166 /// previously seen as a copy, replace the uses of this copy with the | |
167 /// previously seen copy's destination register. | |
168 bool foldRedundantCopy(MachineInstr *MI, | |
169 SmallSet<unsigned, 4> &CopiedFromRegs, | |
170 DenseMap<unsigned, MachineInstr*> &CopyMIs); | |
171 | |
150 bool isLoadFoldable(MachineInstr *MI, | 172 bool isLoadFoldable(MachineInstr *MI, |
151 SmallSet<unsigned, 16> &FoldAsLoadDefCandidates); | 173 SmallSet<unsigned, 16> &FoldAsLoadDefCandidates); |
152 | 174 |
153 /// \brief Check whether \p MI is understood by the register coalescer | 175 /// \brief Check whether \p MI is understood by the register coalescer |
154 /// but may require some rewriting. | 176 /// but may require some rewriting. |
165 bool isUncoalescableCopy(const MachineInstr &MI) { | 187 bool isUncoalescableCopy(const MachineInstr &MI) { |
166 return MI.isBitcast() || | 188 return MI.isBitcast() || |
167 (!DisableAdvCopyOpt && | 189 (!DisableAdvCopyOpt && |
168 (MI.isRegSequenceLike() || MI.isInsertSubregLike() || | 190 (MI.isRegSequenceLike() || MI.isInsertSubregLike() || |
169 MI.isExtractSubregLike())); | 191 MI.isExtractSubregLike())); |
192 } | |
193 }; | |
194 | |
195 /// \brief Helper class to hold a reply for ValueTracker queries. Contains the | |
196 /// returned sources for a given search and the instructions where the sources | |
197 /// were tracked from. | |
198 class ValueTrackerResult { | |
199 private: | |
200 /// Track all sources found by one ValueTracker query. | |
201 SmallVector<TargetInstrInfo::RegSubRegPair, 2> RegSrcs; | |
202 | |
203 /// Instruction using the sources in 'RegSrcs'. | |
204 const MachineInstr *Inst; | |
205 | |
206 public: | |
207 ValueTrackerResult() : Inst(nullptr) {} | |
208 ValueTrackerResult(unsigned Reg, unsigned SubReg) : Inst(nullptr) { | |
209 addSource(Reg, SubReg); | |
210 } | |
211 | |
212 bool isValid() const { return getNumSources() > 0; } | |
213 | |
214 void setInst(const MachineInstr *I) { Inst = I; } | |
215 const MachineInstr *getInst() const { return Inst; } | |
216 | |
217 void clear() { | |
218 RegSrcs.clear(); | |
219 Inst = nullptr; | |
220 } | |
221 | |
222 void addSource(unsigned SrcReg, unsigned SrcSubReg) { | |
223 RegSrcs.push_back(TargetInstrInfo::RegSubRegPair(SrcReg, SrcSubReg)); | |
224 } | |
225 | |
226 void setSource(int Idx, unsigned SrcReg, unsigned SrcSubReg) { | |
227 assert(Idx < getNumSources() && "Reg pair source out of index"); | |
228 RegSrcs[Idx] = TargetInstrInfo::RegSubRegPair(SrcReg, SrcSubReg); | |
229 } | |
230 | |
231 int getNumSources() const { return RegSrcs.size(); } | |
232 | |
233 unsigned getSrcReg(int Idx) const { | |
234 assert(Idx < getNumSources() && "Reg source out of index"); | |
235 return RegSrcs[Idx].Reg; | |
236 } | |
237 | |
238 unsigned getSrcSubReg(int Idx) const { | |
239 assert(Idx < getNumSources() && "SubReg source out of index"); | |
240 return RegSrcs[Idx].SubReg; | |
241 } | |
242 | |
243 bool operator==(const ValueTrackerResult &Other) { | |
244 if (Other.getInst() != getInst()) | |
245 return false; | |
246 | |
247 if (Other.getNumSources() != getNumSources()) | |
248 return false; | |
249 | |
250 for (int i = 0, e = Other.getNumSources(); i != e; ++i) | |
251 if (Other.getSrcReg(i) != getSrcReg(i) || | |
252 Other.getSrcSubReg(i) != getSrcSubReg(i)) | |
253 return false; | |
254 return true; | |
170 } | 255 } |
171 }; | 256 }; |
172 | 257 |
173 /// \brief Helper class to track the possible sources of a value defined by | 258 /// \brief Helper class to track the possible sources of a value defined by |
174 /// a (chain of) copy related instructions. | 259 /// a (chain of) copy related instructions. |
210 /// tracking. | 295 /// tracking. |
211 const TargetInstrInfo *TII; | 296 const TargetInstrInfo *TII; |
212 | 297 |
213 /// \brief Dispatcher to the right underlying implementation of | 298 /// \brief Dispatcher to the right underlying implementation of |
214 /// getNextSource. | 299 /// getNextSource. |
215 bool getNextSourceImpl(unsigned &SrcReg, unsigned &SrcSubReg); | 300 ValueTrackerResult getNextSourceImpl(); |
216 /// \brief Specialized version of getNextSource for Copy instructions. | 301 /// \brief Specialized version of getNextSource for Copy instructions. |
217 bool getNextSourceFromCopy(unsigned &SrcReg, unsigned &SrcSubReg); | 302 ValueTrackerResult getNextSourceFromCopy(); |
218 /// \brief Specialized version of getNextSource for Bitcast instructions. | 303 /// \brief Specialized version of getNextSource for Bitcast instructions. |
219 bool getNextSourceFromBitcast(unsigned &SrcReg, unsigned &SrcSubReg); | 304 ValueTrackerResult getNextSourceFromBitcast(); |
220 /// \brief Specialized version of getNextSource for RegSequence | 305 /// \brief Specialized version of getNextSource for RegSequence |
221 /// instructions. | 306 /// instructions. |
222 bool getNextSourceFromRegSequence(unsigned &SrcReg, unsigned &SrcSubReg); | 307 ValueTrackerResult getNextSourceFromRegSequence(); |
223 /// \brief Specialized version of getNextSource for InsertSubreg | 308 /// \brief Specialized version of getNextSource for InsertSubreg |
224 /// instructions. | 309 /// instructions. |
225 bool getNextSourceFromInsertSubreg(unsigned &SrcReg, unsigned &SrcSubReg); | 310 ValueTrackerResult getNextSourceFromInsertSubreg(); |
226 /// \brief Specialized version of getNextSource for ExtractSubreg | 311 /// \brief Specialized version of getNextSource for ExtractSubreg |
227 /// instructions. | 312 /// instructions. |
228 bool getNextSourceFromExtractSubreg(unsigned &SrcReg, unsigned &SrcSubReg); | 313 ValueTrackerResult getNextSourceFromExtractSubreg(); |
229 /// \brief Specialized version of getNextSource for SubregToReg | 314 /// \brief Specialized version of getNextSource for SubregToReg |
230 /// instructions. | 315 /// instructions. |
231 bool getNextSourceFromSubregToReg(unsigned &SrcReg, unsigned &SrcSubReg); | 316 ValueTrackerResult getNextSourceFromSubregToReg(); |
317 /// \brief Specialized version of getNextSource for PHI instructions. | |
318 ValueTrackerResult getNextSourceFromPHI(); | |
232 | 319 |
233 public: | 320 public: |
234 /// \brief Create a ValueTracker instance for the value defined by \p Reg. | 321 /// \brief Create a ValueTracker instance for the value defined by \p Reg. |
235 /// \p DefSubReg represents the sub register index the value tracker will | 322 /// \p DefSubReg represents the sub register index the value tracker will |
236 /// track. It does not need to match the sub register index used in the | 323 /// track. It does not need to match the sub register index used in the |
273 Reg = Def->getOperand(DefIdx).getReg(); | 360 Reg = Def->getOperand(DefIdx).getReg(); |
274 } | 361 } |
275 | 362 |
276 /// \brief Following the use-def chain, get the next available source | 363 /// \brief Following the use-def chain, get the next available source |
277 /// for the tracked value. | 364 /// for the tracked value. |
278 /// When the returned value is not nullptr, \p SrcReg gives the register | 365 /// \return A ValueTrackerResult containing a set of registers |
279 /// that contain the tracked value. | 366 /// and sub registers with tracked values. A ValueTrackerResult with |
280 /// \note The sub register index returned in \p SrcSubReg must be used | 367 /// an empty set of registers means no source was found. |
281 /// on \p SrcReg to access the actual value. | 368 ValueTrackerResult getNextSource(); |
282 /// \return Unless the returned value is nullptr (i.e., no source found), | |
283 /// \p SrcReg gives the register of the next source used in the returned | |
284 /// instruction and \p SrcSubReg the sub-register index to be used on that | |
285 /// source to get the tracked value. When nullptr is returned, no | |
286 /// alternative source has been found. | |
287 const MachineInstr *getNextSource(unsigned &SrcReg, unsigned &SrcSubReg); | |
288 | 369 |
289 /// \brief Get the last register where the initial value can be found. | 370 /// \brief Get the last register where the initial value can be found. |
290 /// Initially this is the register of the definition. | 371 /// Initially this is the register of the definition. |
291 /// Then, after each successful call to getNextSource, this is the | 372 /// Then, after each successful call to getNextSource, this is the |
292 /// register of the last source. | 373 /// register of the last source. |
409 } | 490 } |
410 } | 491 } |
411 | 492 |
412 if (ExtendLife && !ExtendedUses.empty()) | 493 if (ExtendLife && !ExtendedUses.empty()) |
413 // Extend the liveness of the extension result. | 494 // Extend the liveness of the extension result. |
414 std::copy(ExtendedUses.begin(), ExtendedUses.end(), | 495 Uses.append(ExtendedUses.begin(), ExtendedUses.end()); |
415 std::back_inserter(Uses)); | |
416 | 496 |
417 // Now replace all uses. | 497 // Now replace all uses. |
418 bool Changed = false; | 498 bool Changed = false; |
419 if (!Uses.empty()) { | 499 if (!Uses.empty()) { |
420 SmallPtrSet<MachineBasicBlock*, 4> PHIBBs; | 500 SmallPtrSet<MachineBasicBlock*, 4> PHIBBs; |
504 // generated | 584 // generated |
505 bool PeepholeOptimizer::optimizeCondBranch(MachineInstr *MI) { | 585 bool PeepholeOptimizer::optimizeCondBranch(MachineInstr *MI) { |
506 return TII->optimizeCondBranch(MI); | 586 return TII->optimizeCondBranch(MI); |
507 } | 587 } |
508 | 588 |
509 /// \brief Check if the registers defined by the pair (RegisterClass, SubReg) | |
510 /// share the same register file. | |
511 static bool shareSameRegisterFile(const TargetRegisterInfo &TRI, | |
512 const TargetRegisterClass *DefRC, | |
513 unsigned DefSubReg, | |
514 const TargetRegisterClass *SrcRC, | |
515 unsigned SrcSubReg) { | |
516 // Same register class. | |
517 if (DefRC == SrcRC) | |
518 return true; | |
519 | |
520 // Both operands are sub registers. Check if they share a register class. | |
521 unsigned SrcIdx, DefIdx; | |
522 if (SrcSubReg && DefSubReg) | |
523 return TRI.getCommonSuperRegClass(SrcRC, SrcSubReg, DefRC, DefSubReg, | |
524 SrcIdx, DefIdx) != nullptr; | |
525 // At most one of the register is a sub register, make it Src to avoid | |
526 // duplicating the test. | |
527 if (!SrcSubReg) { | |
528 std::swap(DefSubReg, SrcSubReg); | |
529 std::swap(DefRC, SrcRC); | |
530 } | |
531 | |
532 // One of the register is a sub register, check if we can get a superclass. | |
533 if (SrcSubReg) | |
534 return TRI.getMatchingSuperRegClass(SrcRC, DefRC, SrcSubReg) != nullptr; | |
535 // Plain copy. | |
536 return TRI.getCommonSubClass(DefRC, SrcRC) != nullptr; | |
537 } | |
538 | |
539 /// \brief Try to find the next source that share the same register file | 589 /// \brief Try to find the next source that share the same register file |
540 /// for the value defined by \p Reg and \p SubReg. | 590 /// for the value defined by \p Reg and \p SubReg. |
541 /// When true is returned, \p Reg and \p SubReg are updated with the | 591 /// When true is returned, the \p RewriteMap can be used by the client to |
542 /// register number and sub-register index of the new source. | 592 /// retrieve all Def -> Use along the way up to the next source. Any found |
593 /// Use that is not itself a key for another entry, is the next source to | |
594 /// use. During the search for the next source, multiple sources can be found | |
595 /// given multiple incoming sources of a PHI instruction. In this case, we | |
596 /// look in each PHI source for the next source; all found next sources must | |
597 /// share the same register file as \p Reg and \p SubReg. The client should | |
598 /// then be capable to rewrite all intermediate PHIs to get the next source. | |
543 /// \return False if no alternative sources are available. True otherwise. | 599 /// \return False if no alternative sources are available. True otherwise. |
544 bool PeepholeOptimizer::findNextSource(unsigned &Reg, unsigned &SubReg) { | 600 bool PeepholeOptimizer::findNextSource(unsigned Reg, unsigned SubReg, |
601 RewriteMapTy &RewriteMap) { | |
545 // Do not try to find a new source for a physical register. | 602 // Do not try to find a new source for a physical register. |
546 // So far we do not have any motivating example for doing that. | 603 // So far we do not have any motivating example for doing that. |
547 // Thus, instead of maintaining untested code, we will revisit that if | 604 // Thus, instead of maintaining untested code, we will revisit that if |
548 // that changes at some point. | 605 // that changes at some point. |
549 if (TargetRegisterInfo::isPhysicalRegister(Reg)) | 606 if (TargetRegisterInfo::isPhysicalRegister(Reg)) |
550 return false; | 607 return false; |
551 | |
552 const TargetRegisterClass *DefRC = MRI->getRegClass(Reg); | 608 const TargetRegisterClass *DefRC = MRI->getRegClass(Reg); |
553 unsigned DefSubReg = SubReg; | 609 |
554 | 610 SmallVector<TargetInstrInfo::RegSubRegPair, 4> SrcToLook; |
555 unsigned Src; | 611 TargetInstrInfo::RegSubRegPair CurSrcPair(Reg, SubReg); |
556 unsigned SrcSubReg; | 612 SrcToLook.push_back(CurSrcPair); |
557 bool ShouldRewrite = false; | 613 |
558 | 614 unsigned PHICount = 0; |
559 // Follow the chain of copies until we reach the top of the use-def chain | 615 while (!SrcToLook.empty() && PHICount < RewritePHILimit) { |
560 // or find a more suitable source. | 616 TargetInstrInfo::RegSubRegPair Pair = SrcToLook.pop_back_val(); |
561 ValueTracker ValTracker(Reg, DefSubReg, *MRI, !DisableAdvCopyOpt, TII); | 617 // As explained above, do not handle physical registers |
562 do { | 618 if (TargetRegisterInfo::isPhysicalRegister(Pair.Reg)) |
563 unsigned CopySrcReg, CopySrcSubReg; | 619 return false; |
564 if (!ValTracker.getNextSource(CopySrcReg, CopySrcSubReg)) | 620 |
565 break; | 621 CurSrcPair = Pair; |
566 Src = CopySrcReg; | 622 ValueTracker ValTracker(CurSrcPair.Reg, CurSrcPair.SubReg, *MRI, |
567 SrcSubReg = CopySrcSubReg; | 623 !DisableAdvCopyOpt, TII); |
568 | 624 ValueTrackerResult Res; |
569 // Do not extend the live-ranges of physical registers as they add | 625 bool ShouldRewrite = false; |
570 // constraints to the register allocator. | 626 |
571 // Moreover, if we want to extend the live-range of a physical register, | 627 do { |
572 // unlike SSA virtual register, we will have to check that they are not | 628 // Follow the chain of copies until we reach the top of the use-def chain |
573 // redefine before the related use. | 629 // or find a more suitable source. |
574 if (TargetRegisterInfo::isPhysicalRegister(Src)) | 630 Res = ValTracker.getNextSource(); |
575 break; | 631 if (!Res.isValid()) |
576 | 632 break; |
577 const TargetRegisterClass *SrcRC = MRI->getRegClass(Src); | 633 |
578 | 634 // Insert the Def -> Use entry for the recently found source. |
579 // If this source does not incur a cross register bank copy, use it. | 635 ValueTrackerResult CurSrcRes = RewriteMap.lookup(CurSrcPair); |
580 ShouldRewrite = shareSameRegisterFile(*TRI, DefRC, DefSubReg, SrcRC, | 636 if (CurSrcRes.isValid()) { |
581 SrcSubReg); | 637 assert(CurSrcRes == Res && "ValueTrackerResult found must match"); |
582 } while (!ShouldRewrite); | 638 // An existent entry with multiple sources is a PHI cycle we must avoid. |
639 // Otherwise it's an entry with a valid next source we already found. | |
640 if (CurSrcRes.getNumSources() > 1) { | |
641 DEBUG(dbgs() << "findNextSource: found PHI cycle, aborting...\n"); | |
642 return false; | |
643 } | |
644 break; | |
645 } | |
646 RewriteMap.insert(std::make_pair(CurSrcPair, Res)); | |
647 | |
648 // ValueTrackerResult usually have one source unless it's the result from | |
649 // a PHI instruction. Add the found PHI edges to be looked up further. | |
650 unsigned NumSrcs = Res.getNumSources(); | |
651 if (NumSrcs > 1) { | |
652 PHICount++; | |
653 for (unsigned i = 0; i < NumSrcs; ++i) | |
654 SrcToLook.push_back(TargetInstrInfo::RegSubRegPair( | |
655 Res.getSrcReg(i), Res.getSrcSubReg(i))); | |
656 break; | |
657 } | |
658 | |
659 CurSrcPair.Reg = Res.getSrcReg(0); | |
660 CurSrcPair.SubReg = Res.getSrcSubReg(0); | |
661 // Do not extend the live-ranges of physical registers as they add | |
662 // constraints to the register allocator. Moreover, if we want to extend | |
663 // the live-range of a physical register, unlike SSA virtual register, | |
664 // we will have to check that they aren't redefine before the related use. | |
665 if (TargetRegisterInfo::isPhysicalRegister(CurSrcPair.Reg)) | |
666 return false; | |
667 | |
668 const TargetRegisterClass *SrcRC = MRI->getRegClass(CurSrcPair.Reg); | |
669 ShouldRewrite = TRI->shouldRewriteCopySrc(DefRC, SubReg, SrcRC, | |
670 CurSrcPair.SubReg); | |
671 } while (!ShouldRewrite); | |
672 | |
673 // Continue looking for new sources... | |
674 if (Res.isValid()) | |
675 continue; | |
676 | |
677 // Do not continue searching for a new source if the there's at least | |
678 // one use-def which cannot be rewritten. | |
679 if (!ShouldRewrite) | |
680 return false; | |
681 } | |
682 | |
683 if (PHICount >= RewritePHILimit) { | |
684 DEBUG(dbgs() << "findNextSource: PHI limit reached\n"); | |
685 return false; | |
686 } | |
583 | 687 |
584 // If we did not find a more suitable source, there is nothing to optimize. | 688 // If we did not find a more suitable source, there is nothing to optimize. |
585 if (!ShouldRewrite || Src == Reg) | 689 if (CurSrcPair.Reg == Reg) |
586 return false; | 690 return false; |
587 | 691 |
588 Reg = Src; | |
589 SubReg = SrcSubReg; | |
590 return true; | 692 return true; |
693 } | |
694 | |
695 /// \brief Insert a PHI instruction with incoming edges \p SrcRegs that are | |
696 /// guaranteed to have the same register class. This is necessary whenever we | |
697 /// successfully traverse a PHI instruction and find suitable sources coming | |
698 /// from its edges. By inserting a new PHI, we provide a rewritten PHI def | |
699 /// suitable to be used in a new COPY instruction. | |
700 static MachineInstr * | |
701 insertPHI(MachineRegisterInfo *MRI, const TargetInstrInfo *TII, | |
702 const SmallVectorImpl<TargetInstrInfo::RegSubRegPair> &SrcRegs, | |
703 MachineInstr *OrigPHI) { | |
704 assert(!SrcRegs.empty() && "No sources to create a PHI instruction?"); | |
705 | |
706 const TargetRegisterClass *NewRC = MRI->getRegClass(SrcRegs[0].Reg); | |
707 unsigned NewVR = MRI->createVirtualRegister(NewRC); | |
708 MachineBasicBlock *MBB = OrigPHI->getParent(); | |
709 MachineInstrBuilder MIB = BuildMI(*MBB, OrigPHI, OrigPHI->getDebugLoc(), | |
710 TII->get(TargetOpcode::PHI), NewVR); | |
711 | |
712 unsigned MBBOpIdx = 2; | |
713 for (auto RegPair : SrcRegs) { | |
714 MIB.addReg(RegPair.Reg, 0, RegPair.SubReg); | |
715 MIB.addMBB(OrigPHI->getOperand(MBBOpIdx).getMBB()); | |
716 // Since we're extended the lifetime of RegPair.Reg, clear the | |
717 // kill flags to account for that and make RegPair.Reg reaches | |
718 // the new PHI. | |
719 MRI->clearKillFlags(RegPair.Reg); | |
720 MBBOpIdx += 2; | |
721 } | |
722 | |
723 return MIB; | |
591 } | 724 } |
592 | 725 |
593 namespace { | 726 namespace { |
594 /// \brief Helper class to rewrite the arguments of a copy-like instruction. | 727 /// \brief Helper class to rewrite the arguments of a copy-like instruction. |
595 class CopyRewriter { | 728 class CopyRewriter { |
622 /// the only source this instruction has: | 755 /// the only source this instruction has: |
623 /// (SrcReg, SrcSubReg) = (src, srcSubIdx). | 756 /// (SrcReg, SrcSubReg) = (src, srcSubIdx). |
624 /// This source defines the whole definition, i.e., | 757 /// This source defines the whole definition, i.e., |
625 /// (TrackReg, TrackSubReg) = (dst, dstSubIdx). | 758 /// (TrackReg, TrackSubReg) = (dst, dstSubIdx). |
626 /// | 759 /// |
627 /// The second and subsequent calls will return false, has there is only one | 760 /// The second and subsequent calls will return false, as there is only one |
628 /// rewritable source. | 761 /// rewritable source. |
629 /// | 762 /// |
630 /// \return True if a rewritable source has been found, false otherwise. | 763 /// \return True if a rewritable source has been found, false otherwise. |
631 /// The output arguments are valid if and only if true is returned. | 764 /// The output arguments are valid if and only if true is returned. |
632 virtual bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, | 765 virtual bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, |
633 unsigned &TrackReg, | 766 unsigned &TrackReg, |
634 unsigned &TrackSubReg) { | 767 unsigned &TrackSubReg) { |
635 // If CurrentSrcIdx == 1, this means this function has already been | 768 // If CurrentSrcIdx == 1, this means this function has already been called |
636 // called once. CopyLike has one defintiion and one argument, thus, | 769 // once. CopyLike has one definition and one argument, thus, there is |
637 // there is nothing else to rewrite. | 770 // nothing else to rewrite. |
638 if (!CopyLike.isCopy() || CurrentSrcIdx == 1) | 771 if (!CopyLike.isCopy() || CurrentSrcIdx == 1) |
639 return false; | 772 return false; |
640 // This is the first call to getNextRewritableSource. | 773 // This is the first call to getNextRewritableSource. |
641 // Move the CurrentSrcIdx to remember that we made that call. | 774 // Move the CurrentSrcIdx to remember that we made that call. |
642 CurrentSrcIdx = 1; | 775 CurrentSrcIdx = 1; |
651 return true; | 784 return true; |
652 } | 785 } |
653 | 786 |
654 /// \brief Rewrite the current source with \p NewReg and \p NewSubReg | 787 /// \brief Rewrite the current source with \p NewReg and \p NewSubReg |
655 /// if possible. | 788 /// if possible. |
656 /// \return True if the rewritting was possible, false otherwise. | 789 /// \return True if the rewriting was possible, false otherwise. |
657 virtual bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) { | 790 virtual bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) { |
658 if (!CopyLike.isCopy() || CurrentSrcIdx != 1) | 791 if (!CopyLike.isCopy() || CurrentSrcIdx != 1) |
659 return false; | 792 return false; |
660 MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx); | 793 MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx); |
661 MOSrc.setReg(NewReg); | 794 MOSrc.setReg(NewReg); |
662 MOSrc.setSubReg(NewSubReg); | 795 MOSrc.setSubReg(NewSubReg); |
663 return true; | 796 return true; |
797 } | |
798 | |
799 /// \brief Given a \p Def.Reg and Def.SubReg pair, use \p RewriteMap to find | |
800 /// the new source to use for rewrite. If \p HandleMultipleSources is true and | |
801 /// multiple sources for a given \p Def are found along the way, we found a | |
802 /// PHI instructions that needs to be rewritten. | |
803 /// TODO: HandleMultipleSources should be removed once we test PHI handling | |
804 /// with coalescable copies. | |
805 TargetInstrInfo::RegSubRegPair | |
806 getNewSource(MachineRegisterInfo *MRI, const TargetInstrInfo *TII, | |
807 TargetInstrInfo::RegSubRegPair Def, | |
808 PeepholeOptimizer::RewriteMapTy &RewriteMap, | |
809 bool HandleMultipleSources = true) { | |
810 | |
811 TargetInstrInfo::RegSubRegPair LookupSrc(Def.Reg, Def.SubReg); | |
812 do { | |
813 ValueTrackerResult Res = RewriteMap.lookup(LookupSrc); | |
814 // If there are no entries on the map, LookupSrc is the new source. | |
815 if (!Res.isValid()) | |
816 return LookupSrc; | |
817 | |
818 // There's only one source for this definition, keep searching... | |
819 unsigned NumSrcs = Res.getNumSources(); | |
820 if (NumSrcs == 1) { | |
821 LookupSrc.Reg = Res.getSrcReg(0); | |
822 LookupSrc.SubReg = Res.getSrcSubReg(0); | |
823 continue; | |
824 } | |
825 | |
826 // TODO: Remove once multiple srcs w/ coalescable copies are supported. | |
827 if (!HandleMultipleSources) | |
828 break; | |
829 | |
830 // Multiple sources, recurse into each source to find a new source | |
831 // for it. Then, rewrite the PHI accordingly to its new edges. | |
832 SmallVector<TargetInstrInfo::RegSubRegPair, 4> NewPHISrcs; | |
833 for (unsigned i = 0; i < NumSrcs; ++i) { | |
834 TargetInstrInfo::RegSubRegPair PHISrc(Res.getSrcReg(i), | |
835 Res.getSrcSubReg(i)); | |
836 NewPHISrcs.push_back( | |
837 getNewSource(MRI, TII, PHISrc, RewriteMap, HandleMultipleSources)); | |
838 } | |
839 | |
840 // Build the new PHI node and return its def register as the new source. | |
841 MachineInstr *OrigPHI = const_cast<MachineInstr *>(Res.getInst()); | |
842 MachineInstr *NewPHI = insertPHI(MRI, TII, NewPHISrcs, OrigPHI); | |
843 DEBUG(dbgs() << "-- getNewSource\n"); | |
844 DEBUG(dbgs() << " Replacing: " << *OrigPHI); | |
845 DEBUG(dbgs() << " With: " << *NewPHI); | |
846 const MachineOperand &MODef = NewPHI->getOperand(0); | |
847 return TargetInstrInfo::RegSubRegPair(MODef.getReg(), MODef.getSubReg()); | |
848 | |
849 } while (1); | |
850 | |
851 return TargetInstrInfo::RegSubRegPair(0, 0); | |
852 } | |
853 | |
854 /// \brief Rewrite the source found through \p Def, by using the \p RewriteMap | |
855 /// and create a new COPY instruction. More info about RewriteMap in | |
856 /// PeepholeOptimizer::findNextSource. Right now this is only used to handle | |
857 /// Uncoalescable copies, since they are copy like instructions that aren't | |
858 /// recognized by the register allocator. | |
859 virtual MachineInstr * | |
860 RewriteSource(TargetInstrInfo::RegSubRegPair Def, | |
861 PeepholeOptimizer::RewriteMapTy &RewriteMap) { | |
862 return nullptr; | |
863 } | |
864 }; | |
865 | |
866 /// \brief Helper class to rewrite uncoalescable copy like instructions | |
867 /// into new COPY (coalescable friendly) instructions. | |
868 class UncoalescableRewriter : public CopyRewriter { | |
869 protected: | |
870 const TargetInstrInfo &TII; | |
871 MachineRegisterInfo &MRI; | |
872 /// The number of defs in the bitcast | |
873 unsigned NumDefs; | |
874 | |
875 public: | |
876 UncoalescableRewriter(MachineInstr &MI, const TargetInstrInfo &TII, | |
877 MachineRegisterInfo &MRI) | |
878 : CopyRewriter(MI), TII(TII), MRI(MRI) { | |
879 NumDefs = MI.getDesc().getNumDefs(); | |
880 } | |
881 | |
882 /// \brief Get the next rewritable def source (TrackReg, TrackSubReg) | |
883 /// All such sources need to be considered rewritable in order to | |
884 /// rewrite a uncoalescable copy-like instruction. This method return | |
885 /// each definition that must be checked if rewritable. | |
886 /// | |
887 bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, | |
888 unsigned &TrackReg, | |
889 unsigned &TrackSubReg) override { | |
890 // Find the next non-dead definition and continue from there. | |
891 if (CurrentSrcIdx == NumDefs) | |
892 return false; | |
893 | |
894 while (CopyLike.getOperand(CurrentSrcIdx).isDead()) { | |
895 ++CurrentSrcIdx; | |
896 if (CurrentSrcIdx == NumDefs) | |
897 return false; | |
898 } | |
899 | |
900 // What we track are the alternative sources of the definition. | |
901 const MachineOperand &MODef = CopyLike.getOperand(CurrentSrcIdx); | |
902 TrackReg = MODef.getReg(); | |
903 TrackSubReg = MODef.getSubReg(); | |
904 | |
905 CurrentSrcIdx++; | |
906 return true; | |
907 } | |
908 | |
909 /// \brief Rewrite the source found through \p Def, by using the \p RewriteMap | |
910 /// and create a new COPY instruction. More info about RewriteMap in | |
911 /// PeepholeOptimizer::findNextSource. Right now this is only used to handle | |
912 /// Uncoalescable copies, since they are copy like instructions that aren't | |
913 /// recognized by the register allocator. | |
914 MachineInstr * | |
915 RewriteSource(TargetInstrInfo::RegSubRegPair Def, | |
916 PeepholeOptimizer::RewriteMapTy &RewriteMap) override { | |
917 assert(!TargetRegisterInfo::isPhysicalRegister(Def.Reg) && | |
918 "We do not rewrite physical registers"); | |
919 | |
920 // Find the new source to use in the COPY rewrite. | |
921 TargetInstrInfo::RegSubRegPair NewSrc = | |
922 getNewSource(&MRI, &TII, Def, RewriteMap); | |
923 | |
924 // Insert the COPY. | |
925 const TargetRegisterClass *DefRC = MRI.getRegClass(Def.Reg); | |
926 unsigned NewVR = MRI.createVirtualRegister(DefRC); | |
927 | |
928 MachineInstr *NewCopy = | |
929 BuildMI(*CopyLike.getParent(), &CopyLike, CopyLike.getDebugLoc(), | |
930 TII.get(TargetOpcode::COPY), NewVR) | |
931 .addReg(NewSrc.Reg, 0, NewSrc.SubReg); | |
932 | |
933 NewCopy->getOperand(0).setSubReg(Def.SubReg); | |
934 if (Def.SubReg) | |
935 NewCopy->getOperand(0).setIsUndef(); | |
936 | |
937 DEBUG(dbgs() << "-- RewriteSource\n"); | |
938 DEBUG(dbgs() << " Replacing: " << CopyLike); | |
939 DEBUG(dbgs() << " With: " << *NewCopy); | |
940 MRI.replaceRegWith(Def.Reg, NewVR); | |
941 MRI.clearKillFlags(NewVR); | |
942 | |
943 // We extended the lifetime of NewSrc.Reg, clear the kill flags to | |
944 // account for that. | |
945 MRI.clearKillFlags(NewSrc.Reg); | |
946 | |
947 return NewCopy; | |
664 } | 948 } |
665 }; | 949 }; |
666 | 950 |
667 /// \brief Specialized rewriter for INSERT_SUBREG instruction. | 951 /// \brief Specialized rewriter for INSERT_SUBREG instruction. |
668 class InsertSubregRewriter : public CopyRewriter { | 952 class InsertSubregRewriter : public CopyRewriter { |
697 | 981 |
698 // We want to track something that is compatible with the | 982 // We want to track something that is compatible with the |
699 // partial definition. | 983 // partial definition. |
700 TrackReg = MODef.getReg(); | 984 TrackReg = MODef.getReg(); |
701 if (MODef.getSubReg()) | 985 if (MODef.getSubReg()) |
702 // Bails if we have to compose sub-register indices. | 986 // Bail if we have to compose sub-register indices. |
703 return false; | 987 return false; |
704 TrackSubReg = (unsigned)CopyLike.getOperand(3).getImm(); | 988 TrackSubReg = (unsigned)CopyLike.getOperand(3).getImm(); |
705 return true; | 989 return true; |
706 } | 990 } |
707 bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { | 991 bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { |
738 return false; | 1022 return false; |
739 // We are looking at v1 = EXTRACT_SUBREG v0, sub0. | 1023 // We are looking at v1 = EXTRACT_SUBREG v0, sub0. |
740 CurrentSrcIdx = 1; | 1024 CurrentSrcIdx = 1; |
741 const MachineOperand &MOExtractedReg = CopyLike.getOperand(1); | 1025 const MachineOperand &MOExtractedReg = CopyLike.getOperand(1); |
742 SrcReg = MOExtractedReg.getReg(); | 1026 SrcReg = MOExtractedReg.getReg(); |
743 // If we have to compose sub-register indices, bails out. | 1027 // If we have to compose sub-register indices, bail out. |
744 if (MOExtractedReg.getSubReg()) | 1028 if (MOExtractedReg.getSubReg()) |
745 return false; | 1029 return false; |
746 | 1030 |
747 SrcSubReg = CopyLike.getOperand(2).getImm(); | 1031 SrcSubReg = CopyLike.getOperand(2).getImm(); |
748 | 1032 |
816 if (CurrentSrcIdx >= CopyLike.getNumOperands()) | 1100 if (CurrentSrcIdx >= CopyLike.getNumOperands()) |
817 return false; | 1101 return false; |
818 } | 1102 } |
819 const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx); | 1103 const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx); |
820 SrcReg = MOInsertedReg.getReg(); | 1104 SrcReg = MOInsertedReg.getReg(); |
821 // If we have to compose sub-register indices, bails out. | 1105 // If we have to compose sub-register indices, bail out. |
822 if ((SrcSubReg = MOInsertedReg.getSubReg())) | 1106 if ((SrcSubReg = MOInsertedReg.getSubReg())) |
823 return false; | 1107 return false; |
824 | 1108 |
825 // We want to track something that is compatible with the related | 1109 // We want to track something that is compatible with the related |
826 // partial definition. | 1110 // partial definition. |
827 TrackSubReg = CopyLike.getOperand(CurrentSrcIdx + 1).getImm(); | 1111 TrackSubReg = CopyLike.getOperand(CurrentSrcIdx + 1).getImm(); |
828 | 1112 |
829 const MachineOperand &MODef = CopyLike.getOperand(0); | 1113 const MachineOperand &MODef = CopyLike.getOperand(0); |
830 TrackReg = MODef.getReg(); | 1114 TrackReg = MODef.getReg(); |
831 // If we have to compose sub-registers, bails. | 1115 // If we have to compose sub-registers, bail. |
832 return MODef.getSubReg() == 0; | 1116 return MODef.getSubReg() == 0; |
833 } | 1117 } |
834 | 1118 |
835 bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { | 1119 bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { |
836 // We cannot rewrite out of bound operands. | 1120 // We cannot rewrite out of bound operands. |
848 | 1132 |
849 /// \brief Get the appropriated CopyRewriter for \p MI. | 1133 /// \brief Get the appropriated CopyRewriter for \p MI. |
850 /// \return A pointer to a dynamically allocated CopyRewriter or nullptr | 1134 /// \return A pointer to a dynamically allocated CopyRewriter or nullptr |
851 /// if no rewriter works for \p MI. | 1135 /// if no rewriter works for \p MI. |
852 static CopyRewriter *getCopyRewriter(MachineInstr &MI, | 1136 static CopyRewriter *getCopyRewriter(MachineInstr &MI, |
853 const TargetInstrInfo &TII) { | 1137 const TargetInstrInfo &TII, |
1138 MachineRegisterInfo &MRI) { | |
1139 // Handle uncoalescable copy-like instructions. | |
1140 if (MI.isBitcast() || (MI.isRegSequenceLike() || MI.isInsertSubregLike() || | |
1141 MI.isExtractSubregLike())) | |
1142 return new UncoalescableRewriter(MI, TII, MRI); | |
1143 | |
854 switch (MI.getOpcode()) { | 1144 switch (MI.getOpcode()) { |
855 default: | 1145 default: |
856 return nullptr; | 1146 return nullptr; |
857 case TargetOpcode::COPY: | 1147 case TargetOpcode::COPY: |
858 return new CopyRewriter(MI); | 1148 return new CopyRewriter(MI); |
872 /// class. | 1162 /// class. |
873 /// Two register classes are considered to be compatible if they share | 1163 /// Two register classes are considered to be compatible if they share |
874 /// the same register bank. | 1164 /// the same register bank. |
875 /// New copies issued by this optimization are register allocator | 1165 /// New copies issued by this optimization are register allocator |
876 /// friendly. This optimization does not remove any copy as it may | 1166 /// friendly. This optimization does not remove any copy as it may |
877 /// overconstraint the register allocator, but replaces some operands | 1167 /// overconstrain the register allocator, but replaces some operands |
878 /// when possible. | 1168 /// when possible. |
879 /// \pre isCoalescableCopy(*MI) is true. | 1169 /// \pre isCoalescableCopy(*MI) is true. |
880 /// \return True, when \p MI has been rewritten. False otherwise. | 1170 /// \return True, when \p MI has been rewritten. False otherwise. |
881 bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr *MI) { | 1171 bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr *MI) { |
882 assert(MI && isCoalescableCopy(*MI) && "Invalid argument"); | 1172 assert(MI && isCoalescableCopy(*MI) && "Invalid argument"); |
887 if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg())) | 1177 if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg())) |
888 return false; | 1178 return false; |
889 | 1179 |
890 bool Changed = false; | 1180 bool Changed = false; |
891 // Get the right rewriter for the current copy. | 1181 // Get the right rewriter for the current copy. |
892 std::unique_ptr<CopyRewriter> CpyRewriter(getCopyRewriter(*MI, *TII)); | 1182 std::unique_ptr<CopyRewriter> CpyRewriter(getCopyRewriter(*MI, *TII, *MRI)); |
893 // If none exists, bails out. | 1183 // If none exists, bail out. |
894 if (!CpyRewriter) | 1184 if (!CpyRewriter) |
895 return false; | 1185 return false; |
896 // Rewrite each rewritable source. | 1186 // Rewrite each rewritable source. |
897 unsigned SrcReg, SrcSubReg, TrackReg, TrackSubReg; | 1187 unsigned SrcReg, SrcSubReg, TrackReg, TrackSubReg; |
898 while (CpyRewriter->getNextRewritableSource(SrcReg, SrcSubReg, TrackReg, | 1188 while (CpyRewriter->getNextRewritableSource(SrcReg, SrcSubReg, TrackReg, |
899 TrackSubReg)) { | 1189 TrackSubReg)) { |
900 unsigned NewSrc = TrackReg; | 1190 // Keep track of PHI nodes and its incoming edges when looking for sources. |
901 unsigned NewSubReg = TrackSubReg; | 1191 RewriteMapTy RewriteMap; |
902 // Try to find a more suitable source. | 1192 // Try to find a more suitable source. If we failed to do so, or get the |
903 // If we failed to do so, or get the actual source, | 1193 // actual source, move to the next source. |
904 // move to the next source. | 1194 if (!findNextSource(TrackReg, TrackSubReg, RewriteMap)) |
905 if (!findNextSource(NewSrc, NewSubReg) || SrcReg == NewSrc) | |
906 continue; | 1195 continue; |
1196 | |
1197 // Get the new source to rewrite. TODO: Only enable handling of multiple | |
1198 // sources (PHIs) once we have a motivating example and testcases for it. | |
1199 TargetInstrInfo::RegSubRegPair TrackPair(TrackReg, TrackSubReg); | |
1200 TargetInstrInfo::RegSubRegPair NewSrc = CpyRewriter->getNewSource( | |
1201 MRI, TII, TrackPair, RewriteMap, false /* multiple sources */); | |
1202 if (SrcReg == NewSrc.Reg || NewSrc.Reg == 0) | |
1203 continue; | |
1204 | |
907 // Rewrite source. | 1205 // Rewrite source. |
908 if (CpyRewriter->RewriteCurrentSource(NewSrc, NewSubReg)) { | 1206 if (CpyRewriter->RewriteCurrentSource(NewSrc.Reg, NewSrc.SubReg)) { |
909 // We may have extended the live-range of NewSrc, account for that. | 1207 // We may have extended the live-range of NewSrc, account for that. |
910 MRI->clearKillFlags(NewSrc); | 1208 MRI->clearKillFlags(NewSrc.Reg); |
911 Changed = true; | 1209 Changed = true; |
912 } | 1210 } |
913 } | 1211 } |
914 // TODO: We could have a clean-up method to tidy the instruction. | 1212 // TODO: We could have a clean-up method to tidy the instruction. |
915 // E.g., v0 = INSERT_SUBREG v1, v1.sub0, sub0 | 1213 // E.g., v0 = INSERT_SUBREG v1, v1.sub0, sub0 |
916 // => v0 = COPY v1 | 1214 // => v0 = COPY v1 |
917 // Currently we haven't seen motivating example for that and we | 1215 // Currently we haven't seen motivating example for that and we |
918 // want to avoid untested code. | 1216 // want to avoid untested code. |
919 NumRewrittenCopies += Changed == true; | 1217 NumRewrittenCopies += Changed; |
920 return Changed; | 1218 return Changed; |
921 } | 1219 } |
922 | 1220 |
923 /// \brief Optimize copy-like instructions to create | 1221 /// \brief Optimize copy-like instructions to create |
924 /// register coalescer friendly instruction. | 1222 /// register coalescer friendly instruction. |
934 bool PeepholeOptimizer::optimizeUncoalescableCopy( | 1232 bool PeepholeOptimizer::optimizeUncoalescableCopy( |
935 MachineInstr *MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs) { | 1233 MachineInstr *MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs) { |
936 assert(MI && isUncoalescableCopy(*MI) && "Invalid argument"); | 1234 assert(MI && isUncoalescableCopy(*MI) && "Invalid argument"); |
937 | 1235 |
938 // Check if we can rewrite all the values defined by this instruction. | 1236 // Check if we can rewrite all the values defined by this instruction. |
939 SmallVector< | 1237 SmallVector<TargetInstrInfo::RegSubRegPair, 4> RewritePairs; |
940 std::pair<TargetInstrInfo::RegSubRegPair, TargetInstrInfo::RegSubRegPair>, | 1238 // Get the right rewriter for the current copy. |
941 4> RewritePairs; | 1239 std::unique_ptr<CopyRewriter> CpyRewriter(getCopyRewriter(*MI, *TII, *MRI)); |
942 for (const MachineOperand &MODef : MI->defs()) { | 1240 // If none exists, bail out. |
943 if (MODef.isDead()) | 1241 if (!CpyRewriter) |
944 // We can ignore those. | 1242 return false; |
945 continue; | 1243 |
946 | 1244 // Rewrite each rewritable source by generating new COPYs. This works |
1245 // differently from optimizeCoalescableCopy since it first makes sure that all | |
1246 // definitions can be rewritten. | |
1247 RewriteMapTy RewriteMap; | |
1248 unsigned Reg, SubReg, CopyDefReg, CopyDefSubReg; | |
1249 while (CpyRewriter->getNextRewritableSource(Reg, SubReg, CopyDefReg, | |
1250 CopyDefSubReg)) { | |
947 // If a physical register is here, this is probably for a good reason. | 1251 // If a physical register is here, this is probably for a good reason. |
948 // Do not rewrite that. | 1252 // Do not rewrite that. |
949 if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg())) | 1253 if (TargetRegisterInfo::isPhysicalRegister(CopyDefReg)) |
950 return false; | 1254 return false; |
951 | 1255 |
952 // If we do not know how to rewrite this definition, there is no point | 1256 // If we do not know how to rewrite this definition, there is no point |
953 // in trying to kill this instruction. | 1257 // in trying to kill this instruction. |
954 TargetInstrInfo::RegSubRegPair Def(MODef.getReg(), MODef.getSubReg()); | 1258 TargetInstrInfo::RegSubRegPair Def(CopyDefReg, CopyDefSubReg); |
955 TargetInstrInfo::RegSubRegPair Src = Def; | 1259 if (!findNextSource(Def.Reg, Def.SubReg, RewriteMap)) |
956 if (!findNextSource(Src.Reg, Src.SubReg)) | |
957 return false; | 1260 return false; |
958 RewritePairs.push_back(std::make_pair(Def, Src)); | 1261 |
959 } | 1262 RewritePairs.push_back(Def); |
1263 } | |
1264 | |
960 // The change is possible for all defs, do it. | 1265 // The change is possible for all defs, do it. |
961 for (const auto &PairDefSrc : RewritePairs) { | 1266 for (const auto &Def : RewritePairs) { |
962 const auto &Def = PairDefSrc.first; | |
963 const auto &Src = PairDefSrc.second; | |
964 // Rewrite the "copy" in a way the register coalescer understands. | 1267 // Rewrite the "copy" in a way the register coalescer understands. |
965 assert(!TargetRegisterInfo::isPhysicalRegister(Def.Reg) && | 1268 MachineInstr *NewCopy = CpyRewriter->RewriteSource(Def, RewriteMap); |
966 "We do not rewrite physical registers"); | 1269 assert(NewCopy && "Should be able to always generate a new copy"); |
967 const TargetRegisterClass *DefRC = MRI->getRegClass(Def.Reg); | |
968 unsigned NewVR = MRI->createVirtualRegister(DefRC); | |
969 MachineInstr *NewCopy = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), | |
970 TII->get(TargetOpcode::COPY), | |
971 NewVR).addReg(Src.Reg, 0, Src.SubReg); | |
972 NewCopy->getOperand(0).setSubReg(Def.SubReg); | |
973 if (Def.SubReg) | |
974 NewCopy->getOperand(0).setIsUndef(); | |
975 LocalMIs.insert(NewCopy); | 1270 LocalMIs.insert(NewCopy); |
976 MRI->replaceRegWith(Def.Reg, NewVR); | 1271 } |
977 MRI->clearKillFlags(NewVR); | 1272 |
978 // We extended the lifetime of Src. | |
979 // Clear the kill flags to account for that. | |
980 MRI->clearKillFlags(Src.Reg); | |
981 } | |
982 // MI is now dead. | 1273 // MI is now dead. |
983 MI->eraseFromParent(); | 1274 MI->eraseFromParent(); |
984 ++NumUncoalescableCopies; | 1275 ++NumUncoalescableCopies; |
985 return true; | 1276 return true; |
986 } | 1277 } |
1051 } | 1342 } |
1052 } | 1343 } |
1053 return false; | 1344 return false; |
1054 } | 1345 } |
1055 | 1346 |
1347 // FIXME: This is very simple and misses some cases which should be handled when | |
1348 // motivating examples are found. | |
1349 // | |
1350 // The copy rewriting logic should look at uses as well as defs and be able to | |
1351 // eliminate copies across blocks. | |
1352 // | |
1353 // Later copies that are subregister extracts will also not be eliminated since | |
1354 // only the first copy is considered. | |
1355 // | |
1356 // e.g. | |
1357 // %vreg1 = COPY %vreg0 | |
1358 // %vreg2 = COPY %vreg0:sub1 | |
1359 // | |
1360 // Should replace %vreg2 uses with %vreg1:sub1 | |
1361 bool PeepholeOptimizer::foldRedundantCopy( | |
1362 MachineInstr *MI, | |
1363 SmallSet<unsigned, 4> &CopySrcRegs, | |
1364 DenseMap<unsigned, MachineInstr *> &CopyMIs) { | |
1365 assert(MI->isCopy()); | |
1366 | |
1367 unsigned SrcReg = MI->getOperand(1).getReg(); | |
1368 if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) | |
1369 return false; | |
1370 | |
1371 unsigned DstReg = MI->getOperand(0).getReg(); | |
1372 if (!TargetRegisterInfo::isVirtualRegister(DstReg)) | |
1373 return false; | |
1374 | |
1375 if (CopySrcRegs.insert(SrcReg).second) { | |
1376 // First copy of this reg seen. | |
1377 CopyMIs.insert(std::make_pair(SrcReg, MI)); | |
1378 return false; | |
1379 } | |
1380 | |
1381 MachineInstr *PrevCopy = CopyMIs.find(SrcReg)->second; | |
1382 | |
1383 unsigned SrcSubReg = MI->getOperand(1).getSubReg(); | |
1384 unsigned PrevSrcSubReg = PrevCopy->getOperand(1).getSubReg(); | |
1385 | |
1386 // Can't replace different subregister extracts. | |
1387 if (SrcSubReg != PrevSrcSubReg) | |
1388 return false; | |
1389 | |
1390 unsigned PrevDstReg = PrevCopy->getOperand(0).getReg(); | |
1391 | |
1392 // Only replace if the copy register class is the same. | |
1393 // | |
1394 // TODO: If we have multiple copies to different register classes, we may want | |
1395 // to track multiple copies of the same source register. | |
1396 if (MRI->getRegClass(DstReg) != MRI->getRegClass(PrevDstReg)) | |
1397 return false; | |
1398 | |
1399 MRI->replaceRegWith(DstReg, PrevDstReg); | |
1400 | |
1401 // Lifetime of the previous copy has been extended. | |
1402 MRI->clearKillFlags(PrevDstReg); | |
1403 return true; | |
1404 } | |
1405 | |
1056 bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { | 1406 bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { |
1057 if (skipOptnoneFunction(*MF.getFunction())) | 1407 if (skipOptnoneFunction(*MF.getFunction())) |
1058 return false; | 1408 return false; |
1059 | 1409 |
1060 DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n"); | 1410 DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n"); |
1084 SmallPtrSet<MachineInstr*, 16> LocalMIs; | 1434 SmallPtrSet<MachineInstr*, 16> LocalMIs; |
1085 SmallSet<unsigned, 4> ImmDefRegs; | 1435 SmallSet<unsigned, 4> ImmDefRegs; |
1086 DenseMap<unsigned, MachineInstr*> ImmDefMIs; | 1436 DenseMap<unsigned, MachineInstr*> ImmDefMIs; |
1087 SmallSet<unsigned, 16> FoldAsLoadDefCandidates; | 1437 SmallSet<unsigned, 16> FoldAsLoadDefCandidates; |
1088 | 1438 |
1439 // Set of virtual registers that are copied from. | |
1440 SmallSet<unsigned, 4> CopySrcRegs; | |
1441 DenseMap<unsigned, MachineInstr *> CopySrcMIs; | |
1442 | |
1089 for (MachineBasicBlock::iterator | 1443 for (MachineBasicBlock::iterator |
1090 MII = I->begin(), MIE = I->end(); MII != MIE; ) { | 1444 MII = I->begin(), MIE = I->end(); MII != MIE; ) { |
1091 MachineInstr *MI = &*MII; | 1445 MachineInstr *MI = &*MII; |
1092 // We may be erasing MI below, increment MII now. | 1446 // We may be erasing MI below, increment MII now. |
1093 ++MII; | 1447 ++MII; |
1095 | 1449 |
1096 // Skip debug values. They should not affect this peephole optimization. | 1450 // Skip debug values. They should not affect this peephole optimization. |
1097 if (MI->isDebugValue()) | 1451 if (MI->isDebugValue()) |
1098 continue; | 1452 continue; |
1099 | 1453 |
1100 // If there exists an instruction which belongs to the following | 1454 // If we run into an instruction we can't fold across, discard |
1101 // categories, we will discard the load candidates. | 1455 // the load candidates. |
1456 if (MI->isLoadFoldBarrier()) | |
1457 FoldAsLoadDefCandidates.clear(); | |
1458 | |
1102 if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || | 1459 if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || |
1103 MI->isKill() || MI->isInlineAsm() || | 1460 MI->isKill() || MI->isInlineAsm() || |
1104 MI->hasUnmodeledSideEffects()) { | 1461 MI->hasUnmodeledSideEffects()) |
1105 FoldAsLoadDefCandidates.clear(); | |
1106 continue; | 1462 continue; |
1107 } | |
1108 if (MI->mayStore() || MI->isCall()) | |
1109 FoldAsLoadDefCandidates.clear(); | |
1110 | 1463 |
1111 if ((isUncoalescableCopy(*MI) && | 1464 if ((isUncoalescableCopy(*MI) && |
1112 optimizeUncoalescableCopy(MI, LocalMIs)) || | 1465 optimizeUncoalescableCopy(MI, LocalMIs)) || |
1113 (MI->isCompare() && optimizeCmpInstr(MI, MBB)) || | 1466 (MI->isCompare() && optimizeCmpInstr(MI, MBB)) || |
1114 (MI->isSelect() && optimizeSelect(MI, LocalMIs))) { | 1467 (MI->isSelect() && optimizeSelect(MI, LocalMIs))) { |
1123 continue; | 1476 continue; |
1124 } | 1477 } |
1125 | 1478 |
1126 if (isCoalescableCopy(*MI) && optimizeCoalescableCopy(MI)) { | 1479 if (isCoalescableCopy(*MI) && optimizeCoalescableCopy(MI)) { |
1127 // MI is just rewritten. | 1480 // MI is just rewritten. |
1481 Changed = true; | |
1482 continue; | |
1483 } | |
1484 | |
1485 if (MI->isCopy() && foldRedundantCopy(MI, CopySrcRegs, CopySrcMIs)) { | |
1486 LocalMIs.erase(MI); | |
1487 MI->eraseFromParent(); | |
1128 Changed = true; | 1488 Changed = true; |
1129 continue; | 1489 continue; |
1130 } | 1490 } |
1131 | 1491 |
1132 if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) { | 1492 if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) { |
1188 } | 1548 } |
1189 | 1549 |
1190 return Changed; | 1550 return Changed; |
1191 } | 1551 } |
1192 | 1552 |
1193 bool ValueTracker::getNextSourceFromCopy(unsigned &SrcReg, | 1553 ValueTrackerResult ValueTracker::getNextSourceFromCopy() { |
1194 unsigned &SrcSubReg) { | |
1195 assert(Def->isCopy() && "Invalid definition"); | 1554 assert(Def->isCopy() && "Invalid definition"); |
1196 // Copy instruction are supposed to be: Def = Src. | 1555 // Copy instruction are supposed to be: Def = Src. |
1197 // If someone breaks this assumption, bad things will happen everywhere. | 1556 // If someone breaks this assumption, bad things will happen everywhere. |
1198 assert(Def->getNumOperands() == 2 && "Invalid number of operands"); | 1557 assert(Def->getNumOperands() == 2 && "Invalid number of operands"); |
1199 | 1558 |
1200 if (Def->getOperand(DefIdx).getSubReg() != DefSubReg) | 1559 if (Def->getOperand(DefIdx).getSubReg() != DefSubReg) |
1201 // If we look for a different subreg, it means we want a subreg of src. | 1560 // If we look for a different subreg, it means we want a subreg of src. |
1202 // Bails as we do not support composing subreg yet. | 1561 // Bails as we do not support composing subregs yet. |
1203 return false; | 1562 return ValueTrackerResult(); |
1204 // Otherwise, we want the whole source. | 1563 // Otherwise, we want the whole source. |
1205 const MachineOperand &Src = Def->getOperand(1); | 1564 const MachineOperand &Src = Def->getOperand(1); |
1206 SrcReg = Src.getReg(); | 1565 return ValueTrackerResult(Src.getReg(), Src.getSubReg()); |
1207 SrcSubReg = Src.getSubReg(); | 1566 } |
1208 return true; | 1567 |
1209 } | 1568 ValueTrackerResult ValueTracker::getNextSourceFromBitcast() { |
1210 | |
1211 bool ValueTracker::getNextSourceFromBitcast(unsigned &SrcReg, | |
1212 unsigned &SrcSubReg) { | |
1213 assert(Def->isBitcast() && "Invalid definition"); | 1569 assert(Def->isBitcast() && "Invalid definition"); |
1214 | 1570 |
1215 // Bail if there are effects that a plain copy will not expose. | 1571 // Bail if there are effects that a plain copy will not expose. |
1216 if (Def->hasUnmodeledSideEffects()) | 1572 if (Def->hasUnmodeledSideEffects()) |
1217 return false; | 1573 return ValueTrackerResult(); |
1218 | 1574 |
1219 // Bitcasts with more than one def are not supported. | 1575 // Bitcasts with more than one def are not supported. |
1220 if (Def->getDesc().getNumDefs() != 1) | 1576 if (Def->getDesc().getNumDefs() != 1) |
1221 return false; | 1577 return ValueTrackerResult(); |
1222 if (Def->getOperand(DefIdx).getSubReg() != DefSubReg) | 1578 if (Def->getOperand(DefIdx).getSubReg() != DefSubReg) |
1223 // If we look for a different subreg, it means we want a subreg of the src. | 1579 // If we look for a different subreg, it means we want a subreg of the src. |
1224 // Bails as we do not support composing subreg yet. | 1580 // Bails as we do not support composing subregs yet. |
1225 return false; | 1581 return ValueTrackerResult(); |
1226 | 1582 |
1227 unsigned SrcIdx = Def->getNumOperands(); | 1583 unsigned SrcIdx = Def->getNumOperands(); |
1228 for (unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx; OpIdx != EndOpIdx; | 1584 for (unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx; OpIdx != EndOpIdx; |
1229 ++OpIdx) { | 1585 ++OpIdx) { |
1230 const MachineOperand &MO = Def->getOperand(OpIdx); | 1586 const MachineOperand &MO = Def->getOperand(OpIdx); |
1231 if (!MO.isReg() || !MO.getReg()) | 1587 if (!MO.isReg() || !MO.getReg()) |
1232 continue; | 1588 continue; |
1233 assert(!MO.isDef() && "We should have skipped all the definitions by now"); | 1589 assert(!MO.isDef() && "We should have skipped all the definitions by now"); |
1234 if (SrcIdx != EndOpIdx) | 1590 if (SrcIdx != EndOpIdx) |
1235 // Multiple sources? | 1591 // Multiple sources? |
1236 return false; | 1592 return ValueTrackerResult(); |
1237 SrcIdx = OpIdx; | 1593 SrcIdx = OpIdx; |
1238 } | 1594 } |
1239 const MachineOperand &Src = Def->getOperand(SrcIdx); | 1595 const MachineOperand &Src = Def->getOperand(SrcIdx); |
1240 SrcReg = Src.getReg(); | 1596 return ValueTrackerResult(Src.getReg(), Src.getSubReg()); |
1241 SrcSubReg = Src.getSubReg(); | 1597 } |
1242 return true; | 1598 |
1243 } | 1599 ValueTrackerResult ValueTracker::getNextSourceFromRegSequence() { |
1244 | |
1245 bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcReg, | |
1246 unsigned &SrcSubReg) { | |
1247 assert((Def->isRegSequence() || Def->isRegSequenceLike()) && | 1600 assert((Def->isRegSequence() || Def->isRegSequenceLike()) && |
1248 "Invalid definition"); | 1601 "Invalid definition"); |
1249 | 1602 |
1250 if (Def->getOperand(DefIdx).getSubReg()) | 1603 if (Def->getOperand(DefIdx).getSubReg()) |
1251 // If we are composing subreg, bails out. | 1604 // If we are composing subregs, bail out. |
1252 // The case we are checking is Def.<subreg> = REG_SEQUENCE. | 1605 // The case we are checking is Def.<subreg> = REG_SEQUENCE. |
1253 // This should almost never happen as the SSA property is tracked at | 1606 // This should almost never happen as the SSA property is tracked at |
1254 // the register level (as opposed to the subreg level). | 1607 // the register level (as opposed to the subreg level). |
1255 // I.e., | 1608 // I.e., |
1256 // Def.sub0 = | 1609 // Def.sub0 = |
1260 // However, some code could theoretically generates a single | 1613 // However, some code could theoretically generates a single |
1261 // Def.sub0 (i.e, not defining the other subregs) and we would | 1614 // Def.sub0 (i.e, not defining the other subregs) and we would |
1262 // have this case. | 1615 // have this case. |
1263 // If we can ascertain (or force) that this never happens, we could | 1616 // If we can ascertain (or force) that this never happens, we could |
1264 // turn that into an assertion. | 1617 // turn that into an assertion. |
1265 return false; | 1618 return ValueTrackerResult(); |
1266 | 1619 |
1267 if (!TII) | 1620 if (!TII) |
1268 // We could handle the REG_SEQUENCE here, but we do not want to | 1621 // We could handle the REG_SEQUENCE here, but we do not want to |
1269 // duplicate the code from the generic TII. | 1622 // duplicate the code from the generic TII. |
1270 return false; | 1623 return ValueTrackerResult(); |
1271 | 1624 |
1272 SmallVector<TargetInstrInfo::RegSubRegPairAndIdx, 8> RegSeqInputRegs; | 1625 SmallVector<TargetInstrInfo::RegSubRegPairAndIdx, 8> RegSeqInputRegs; |
1273 if (!TII->getRegSequenceInputs(*Def, DefIdx, RegSeqInputRegs)) | 1626 if (!TII->getRegSequenceInputs(*Def, DefIdx, RegSeqInputRegs)) |
1274 return false; | 1627 return ValueTrackerResult(); |
1275 | 1628 |
1276 // We are looking at: | 1629 // We are looking at: |
1277 // Def = REG_SEQUENCE v0, sub0, v1, sub1, ... | 1630 // Def = REG_SEQUENCE v0, sub0, v1, sub1, ... |
1278 // Check if one of the operand defines the subreg we are interested in. | 1631 // Check if one of the operand defines the subreg we are interested in. |
1279 for (auto &RegSeqInput : RegSeqInputRegs) { | 1632 for (auto &RegSeqInput : RegSeqInputRegs) { |
1280 if (RegSeqInput.SubIdx == DefSubReg) { | 1633 if (RegSeqInput.SubIdx == DefSubReg) { |
1281 if (RegSeqInput.SubReg) | 1634 if (RegSeqInput.SubReg) |
1282 // Bails if we have to compose sub registers. | 1635 // Bail if we have to compose sub registers. |
1283 return false; | 1636 return ValueTrackerResult(); |
1284 | 1637 |
1285 SrcReg = RegSeqInput.Reg; | 1638 return ValueTrackerResult(RegSeqInput.Reg, RegSeqInput.SubReg); |
1286 SrcSubReg = RegSeqInput.SubReg; | |
1287 return true; | |
1288 } | 1639 } |
1289 } | 1640 } |
1290 | 1641 |
1291 // If the subreg we are tracking is super-defined by another subreg, | 1642 // If the subreg we are tracking is super-defined by another subreg, |
1292 // we could follow this value. However, this would require to compose | 1643 // we could follow this value. However, this would require to compose |
1293 // the subreg and we do not do that for now. | 1644 // the subreg and we do not do that for now. |
1294 return false; | 1645 return ValueTrackerResult(); |
1295 } | 1646 } |
1296 | 1647 |
1297 bool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcReg, | 1648 ValueTrackerResult ValueTracker::getNextSourceFromInsertSubreg() { |
1298 unsigned &SrcSubReg) { | |
1299 assert((Def->isInsertSubreg() || Def->isInsertSubregLike()) && | 1649 assert((Def->isInsertSubreg() || Def->isInsertSubregLike()) && |
1300 "Invalid definition"); | 1650 "Invalid definition"); |
1301 | 1651 |
1302 if (Def->getOperand(DefIdx).getSubReg()) | 1652 if (Def->getOperand(DefIdx).getSubReg()) |
1303 // If we are composing subreg, bails out. | 1653 // If we are composing subreg, bail out. |
1304 // Same remark as getNextSourceFromRegSequence. | 1654 // Same remark as getNextSourceFromRegSequence. |
1305 // I.e., this may be turned into an assert. | 1655 // I.e., this may be turned into an assert. |
1306 return false; | 1656 return ValueTrackerResult(); |
1307 | 1657 |
1308 if (!TII) | 1658 if (!TII) |
1309 // We could handle the REG_SEQUENCE here, but we do not want to | 1659 // We could handle the REG_SEQUENCE here, but we do not want to |
1310 // duplicate the code from the generic TII. | 1660 // duplicate the code from the generic TII. |
1311 return false; | 1661 return ValueTrackerResult(); |
1312 | 1662 |
1313 TargetInstrInfo::RegSubRegPair BaseReg; | 1663 TargetInstrInfo::RegSubRegPair BaseReg; |
1314 TargetInstrInfo::RegSubRegPairAndIdx InsertedReg; | 1664 TargetInstrInfo::RegSubRegPairAndIdx InsertedReg; |
1315 if (!TII->getInsertSubregInputs(*Def, DefIdx, BaseReg, InsertedReg)) | 1665 if (!TII->getInsertSubregInputs(*Def, DefIdx, BaseReg, InsertedReg)) |
1316 return false; | 1666 return ValueTrackerResult(); |
1317 | 1667 |
1318 // We are looking at: | 1668 // We are looking at: |
1319 // Def = INSERT_SUBREG v0, v1, sub1 | 1669 // Def = INSERT_SUBREG v0, v1, sub1 |
1320 // There are two cases: | 1670 // There are two cases: |
1321 // 1. DefSubReg == sub1, get v1. | 1671 // 1. DefSubReg == sub1, get v1. |
1322 // 2. DefSubReg != sub1, the value may be available through v0. | 1672 // 2. DefSubReg != sub1, the value may be available through v0. |
1323 | 1673 |
1324 // #1 Check if the inserted register matches the required sub index. | 1674 // #1 Check if the inserted register matches the required sub index. |
1325 if (InsertedReg.SubIdx == DefSubReg) { | 1675 if (InsertedReg.SubIdx == DefSubReg) { |
1326 SrcReg = InsertedReg.Reg; | 1676 return ValueTrackerResult(InsertedReg.Reg, InsertedReg.SubReg); |
1327 SrcSubReg = InsertedReg.SubReg; | |
1328 return true; | |
1329 } | 1677 } |
1330 // #2 Otherwise, if the sub register we are looking for is not partial | 1678 // #2 Otherwise, if the sub register we are looking for is not partial |
1331 // defined by the inserted element, we can look through the main | 1679 // defined by the inserted element, we can look through the main |
1332 // register (v0). | 1680 // register (v0). |
1333 const MachineOperand &MODef = Def->getOperand(DefIdx); | 1681 const MachineOperand &MODef = Def->getOperand(DefIdx); |
1334 // If the result register (Def) and the base register (v0) do not | 1682 // If the result register (Def) and the base register (v0) do not |
1335 // have the same register class or if we have to compose | 1683 // have the same register class or if we have to compose |
1336 // subregisters, bails out. | 1684 // subregisters, bail out. |
1337 if (MRI.getRegClass(MODef.getReg()) != MRI.getRegClass(BaseReg.Reg) || | 1685 if (MRI.getRegClass(MODef.getReg()) != MRI.getRegClass(BaseReg.Reg) || |
1338 BaseReg.SubReg) | 1686 BaseReg.SubReg) |
1339 return false; | 1687 return ValueTrackerResult(); |
1340 | 1688 |
1341 // Get the TRI and check if the inserted sub-register overlaps with the | 1689 // Get the TRI and check if the inserted sub-register overlaps with the |
1342 // sub-register we are tracking. | 1690 // sub-register we are tracking. |
1343 const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); | 1691 const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); |
1344 if (!TRI || | 1692 if (!TRI || |
1345 (TRI->getSubRegIndexLaneMask(DefSubReg) & | 1693 (TRI->getSubRegIndexLaneMask(DefSubReg) & |
1346 TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx)) != 0) | 1694 TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx)) != 0) |
1347 return false; | 1695 return ValueTrackerResult(); |
1348 // At this point, the value is available in v0 via the same subreg | 1696 // At this point, the value is available in v0 via the same subreg |
1349 // we used for Def. | 1697 // we used for Def. |
1350 SrcReg = BaseReg.Reg; | 1698 return ValueTrackerResult(BaseReg.Reg, DefSubReg); |
1351 SrcSubReg = DefSubReg; | 1699 } |
1352 return true; | 1700 |
1353 } | 1701 ValueTrackerResult ValueTracker::getNextSourceFromExtractSubreg() { |
1354 | |
1355 bool ValueTracker::getNextSourceFromExtractSubreg(unsigned &SrcReg, | |
1356 unsigned &SrcSubReg) { | |
1357 assert((Def->isExtractSubreg() || | 1702 assert((Def->isExtractSubreg() || |
1358 Def->isExtractSubregLike()) && "Invalid definition"); | 1703 Def->isExtractSubregLike()) && "Invalid definition"); |
1359 // We are looking at: | 1704 // We are looking at: |
1360 // Def = EXTRACT_SUBREG v0, sub0 | 1705 // Def = EXTRACT_SUBREG v0, sub0 |
1361 | 1706 |
1362 // Bails if we have to compose sub registers. | 1707 // Bail if we have to compose sub registers. |
1363 // Indeed, if DefSubReg != 0, we would have to compose it with sub0. | 1708 // Indeed, if DefSubReg != 0, we would have to compose it with sub0. |
1364 if (DefSubReg) | 1709 if (DefSubReg) |
1365 return false; | 1710 return ValueTrackerResult(); |
1366 | 1711 |
1367 if (!TII) | 1712 if (!TII) |
1368 // We could handle the EXTRACT_SUBREG here, but we do not want to | 1713 // We could handle the EXTRACT_SUBREG here, but we do not want to |
1369 // duplicate the code from the generic TII. | 1714 // duplicate the code from the generic TII. |
1370 return false; | 1715 return ValueTrackerResult(); |
1371 | 1716 |
1372 TargetInstrInfo::RegSubRegPairAndIdx ExtractSubregInputReg; | 1717 TargetInstrInfo::RegSubRegPairAndIdx ExtractSubregInputReg; |
1373 if (!TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg)) | 1718 if (!TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg)) |
1374 return false; | 1719 return ValueTrackerResult(); |
1375 | 1720 |
1376 // Bails if we have to compose sub registers. | 1721 // Bail if we have to compose sub registers. |
1377 // Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0. | 1722 // Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0. |
1378 if (ExtractSubregInputReg.SubReg) | 1723 if (ExtractSubregInputReg.SubReg) |
1379 return false; | 1724 return ValueTrackerResult(); |
1380 // Otherwise, the value is available in the v0.sub0. | 1725 // Otherwise, the value is available in the v0.sub0. |
1381 SrcReg = ExtractSubregInputReg.Reg; | 1726 return ValueTrackerResult(ExtractSubregInputReg.Reg, ExtractSubregInputReg.SubIdx); |
1382 SrcSubReg = ExtractSubregInputReg.SubIdx; | 1727 } |
1383 return true; | 1728 |
1384 } | 1729 ValueTrackerResult ValueTracker::getNextSourceFromSubregToReg() { |
1385 | |
1386 bool ValueTracker::getNextSourceFromSubregToReg(unsigned &SrcReg, | |
1387 unsigned &SrcSubReg) { | |
1388 assert(Def->isSubregToReg() && "Invalid definition"); | 1730 assert(Def->isSubregToReg() && "Invalid definition"); |
1389 // We are looking at: | 1731 // We are looking at: |
1390 // Def = SUBREG_TO_REG Imm, v0, sub0 | 1732 // Def = SUBREG_TO_REG Imm, v0, sub0 |
1391 | 1733 |
1392 // Bails if we have to compose sub registers. | 1734 // Bail if we have to compose sub registers. |
1393 // If DefSubReg != sub0, we would have to check that all the bits | 1735 // If DefSubReg != sub0, we would have to check that all the bits |
1394 // we track are included in sub0 and if yes, we would have to | 1736 // we track are included in sub0 and if yes, we would have to |
1395 // determine the right subreg in v0. | 1737 // determine the right subreg in v0. |
1396 if (DefSubReg != Def->getOperand(3).getImm()) | 1738 if (DefSubReg != Def->getOperand(3).getImm()) |
1397 return false; | 1739 return ValueTrackerResult(); |
1398 // Bails if we have to compose sub registers. | 1740 // Bail if we have to compose sub registers. |
1399 // Likewise, if v0.subreg != 0, we would have to compose it with sub0. | 1741 // Likewise, if v0.subreg != 0, we would have to compose it with sub0. |
1400 if (Def->getOperand(2).getSubReg()) | 1742 if (Def->getOperand(2).getSubReg()) |
1401 return false; | 1743 return ValueTrackerResult(); |
1402 | 1744 |
1403 SrcReg = Def->getOperand(2).getReg(); | 1745 return ValueTrackerResult(Def->getOperand(2).getReg(), |
1404 SrcSubReg = Def->getOperand(3).getImm(); | 1746 Def->getOperand(3).getImm()); |
1405 return true; | 1747 } |
1406 } | 1748 |
1407 | 1749 /// \brief Explore each PHI incoming operand and return its sources |
1408 bool ValueTracker::getNextSourceImpl(unsigned &SrcReg, unsigned &SrcSubReg) { | 1750 ValueTrackerResult ValueTracker::getNextSourceFromPHI() { |
1751 assert(Def->isPHI() && "Invalid definition"); | |
1752 ValueTrackerResult Res; | |
1753 | |
1754 // If we look for a different subreg, bail as we do not support composing | |
1755 // subregs yet. | |
1756 if (Def->getOperand(0).getSubReg() != DefSubReg) | |
1757 return ValueTrackerResult(); | |
1758 | |
1759 // Return all register sources for PHI instructions. | |
1760 for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2) { | |
1761 auto &MO = Def->getOperand(i); | |
1762 assert(MO.isReg() && "Invalid PHI instruction"); | |
1763 Res.addSource(MO.getReg(), MO.getSubReg()); | |
1764 } | |
1765 | |
1766 return Res; | |
1767 } | |
1768 | |
1769 ValueTrackerResult ValueTracker::getNextSourceImpl() { | |
1409 assert(Def && "This method needs a valid definition"); | 1770 assert(Def && "This method needs a valid definition"); |
1410 | 1771 |
1411 assert( | 1772 assert( |
1412 (DefIdx < Def->getDesc().getNumDefs() || Def->getDesc().isVariadic()) && | 1773 (DefIdx < Def->getDesc().getNumDefs() || Def->getDesc().isVariadic()) && |
1413 Def->getOperand(DefIdx).isDef() && "Invalid DefIdx"); | 1774 Def->getOperand(DefIdx).isDef() && "Invalid DefIdx"); |
1414 if (Def->isCopy()) | 1775 if (Def->isCopy()) |
1415 return getNextSourceFromCopy(SrcReg, SrcSubReg); | 1776 return getNextSourceFromCopy(); |
1416 if (Def->isBitcast()) | 1777 if (Def->isBitcast()) |
1417 return getNextSourceFromBitcast(SrcReg, SrcSubReg); | 1778 return getNextSourceFromBitcast(); |
1418 // All the remaining cases involve "complex" instructions. | 1779 // All the remaining cases involve "complex" instructions. |
1419 // Bails if we did not ask for the advanced tracking. | 1780 // Bail if we did not ask for the advanced tracking. |
1420 if (!UseAdvancedTracking) | 1781 if (!UseAdvancedTracking) |
1421 return false; | 1782 return ValueTrackerResult(); |
1422 if (Def->isRegSequence() || Def->isRegSequenceLike()) | 1783 if (Def->isRegSequence() || Def->isRegSequenceLike()) |
1423 return getNextSourceFromRegSequence(SrcReg, SrcSubReg); | 1784 return getNextSourceFromRegSequence(); |
1424 if (Def->isInsertSubreg() || Def->isInsertSubregLike()) | 1785 if (Def->isInsertSubreg() || Def->isInsertSubregLike()) |
1425 return getNextSourceFromInsertSubreg(SrcReg, SrcSubReg); | 1786 return getNextSourceFromInsertSubreg(); |
1426 if (Def->isExtractSubreg() || Def->isExtractSubregLike()) | 1787 if (Def->isExtractSubreg() || Def->isExtractSubregLike()) |
1427 return getNextSourceFromExtractSubreg(SrcReg, SrcSubReg); | 1788 return getNextSourceFromExtractSubreg(); |
1428 if (Def->isSubregToReg()) | 1789 if (Def->isSubregToReg()) |
1429 return getNextSourceFromSubregToReg(SrcReg, SrcSubReg); | 1790 return getNextSourceFromSubregToReg(); |
1430 return false; | 1791 if (Def->isPHI()) |
1431 } | 1792 return getNextSourceFromPHI(); |
1432 | 1793 return ValueTrackerResult(); |
1433 const MachineInstr *ValueTracker::getNextSource(unsigned &SrcReg, | 1794 } |
1434 unsigned &SrcSubReg) { | 1795 |
1796 ValueTrackerResult ValueTracker::getNextSource() { | |
1435 // If we reach a point where we cannot move up in the use-def chain, | 1797 // If we reach a point where we cannot move up in the use-def chain, |
1436 // there is nothing we can get. | 1798 // there is nothing we can get. |
1437 if (!Def) | 1799 if (!Def) |
1438 return nullptr; | 1800 return ValueTrackerResult(); |
1439 | 1801 |
1440 const MachineInstr *PrevDef = nullptr; | 1802 ValueTrackerResult Res = getNextSourceImpl(); |
1441 // Try to find the next source. | 1803 if (Res.isValid()) { |
1442 if (getNextSourceImpl(SrcReg, SrcSubReg)) { | |
1443 // Update definition, definition index, and subregister for the | 1804 // Update definition, definition index, and subregister for the |
1444 // next call of getNextSource. | 1805 // next call of getNextSource. |
1445 // Update the current register. | 1806 // Update the current register. |
1446 Reg = SrcReg; | 1807 bool OneRegSrc = Res.getNumSources() == 1; |
1447 // Update the return value before moving up in the use-def chain. | 1808 if (OneRegSrc) |
1448 PrevDef = Def; | 1809 Reg = Res.getSrcReg(0); |
1810 // Update the result before moving up in the use-def chain | |
1811 // with the instruction containing the last found sources. | |
1812 Res.setInst(Def); | |
1813 | |
1449 // If we can still move up in the use-def chain, move to the next | 1814 // If we can still move up in the use-def chain, move to the next |
1450 // defintion. | 1815 // definition. |
1451 if (!TargetRegisterInfo::isPhysicalRegister(Reg)) { | 1816 if (!TargetRegisterInfo::isPhysicalRegister(Reg) && OneRegSrc) { |
1452 Def = MRI.getVRegDef(Reg); | 1817 Def = MRI.getVRegDef(Reg); |
1453 DefIdx = MRI.def_begin(Reg).getOperandNo(); | 1818 DefIdx = MRI.def_begin(Reg).getOperandNo(); |
1454 DefSubReg = SrcSubReg; | 1819 DefSubReg = Res.getSrcSubReg(0); |
1455 return PrevDef; | 1820 return Res; |
1456 } | 1821 } |
1457 } | 1822 } |
1458 // If we end up here, this means we will not be able to find another source | 1823 // If we end up here, this means we will not be able to find another source |
1459 // for the next iteration. | 1824 // for the next iteration. Make sure any new call to getNextSource bails out |
1460 // Make sure any new call to getNextSource bails out early by cutting the | 1825 // early by cutting the use-def chain. |
1461 // use-def chain. | |
1462 Def = nullptr; | 1826 Def = nullptr; |
1463 return PrevDef; | 1827 return Res; |
1464 } | 1828 } |