comparison lib/CodeGen/PeepholeOptimizer.cpp @ 95:afa8332a0e37 LLVM3.8

LLVM 3.8
author Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
date Tue, 13 Oct 2015 17:48:58 +0900
parents 60c9769439b8
children 7d135dc70f03
comparison
equal deleted inserted replaced
84:f3e34b893a5f 95:afa8332a0e37
41 // "sub" with "subs" and eliminate the "cmp" instruction. 41 // "sub" with "subs" and eliminate the "cmp" instruction.
42 // 42 //
43 // - Optimize Loads: 43 // - Optimize Loads:
44 // 44 //
45 // Loads that can be folded into a later instruction. A load is foldable 45 // Loads that can be folded into a later instruction. A load is foldable
46 // if it loads to virtual registers and the virtual register defined has 46 // if it loads to virtual registers and the virtual register defined has
47 // a single use. 47 // a single use.
48 // 48 //
49 // - Optimize Copies and Bitcast (more generally, target specific copies): 49 // - Optimize Copies and Bitcast (more generally, target specific copies):
50 // 50 //
51 // Rewrite copies and bitcasts to avoid cross register bank copies 51 // Rewrite copies and bitcasts to avoid cross register bank copies
74 #include "llvm/CodeGen/MachineDominators.h" 74 #include "llvm/CodeGen/MachineDominators.h"
75 #include "llvm/CodeGen/MachineInstrBuilder.h" 75 #include "llvm/CodeGen/MachineInstrBuilder.h"
76 #include "llvm/CodeGen/MachineRegisterInfo.h" 76 #include "llvm/CodeGen/MachineRegisterInfo.h"
77 #include "llvm/Support/CommandLine.h" 77 #include "llvm/Support/CommandLine.h"
78 #include "llvm/Support/Debug.h" 78 #include "llvm/Support/Debug.h"
79 #include "llvm/Support/raw_ostream.h"
79 #include "llvm/Target/TargetInstrInfo.h" 80 #include "llvm/Target/TargetInstrInfo.h"
80 #include "llvm/Target/TargetRegisterInfo.h" 81 #include "llvm/Target/TargetRegisterInfo.h"
81 #include "llvm/Target/TargetSubtargetInfo.h" 82 #include "llvm/Target/TargetSubtargetInfo.h"
82 #include <utility> 83 #include <utility>
83 using namespace llvm; 84 using namespace llvm;
94 cl::desc("Disable the peephole optimizer")); 95 cl::desc("Disable the peephole optimizer"));
95 96
96 static cl::opt<bool> 97 static cl::opt<bool>
97 DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false), 98 DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false),
98 cl::desc("Disable advanced copy optimization")); 99 cl::desc("Disable advanced copy optimization"));
100
101 // Limit the number of PHI instructions to process
102 // in PeepholeOptimizer::getNextSource.
103 static cl::opt<unsigned> RewritePHILimit(
104 "rewrite-phi-limit", cl::Hidden, cl::init(10),
105 cl::desc("Limit the length of PHI chains to lookup"));
99 106
100 STATISTIC(NumReuse, "Number of extension results reused"); 107 STATISTIC(NumReuse, "Number of extension results reused");
101 STATISTIC(NumCmps, "Number of compares eliminated"); 108 STATISTIC(NumCmps, "Number of compares eliminated");
102 STATISTIC(NumImmFold, "Number of move immediate folded"); 109 STATISTIC(NumImmFold, "Number of move immediate folded");
103 STATISTIC(NumLoadFold, "Number of loads folded"); 110 STATISTIC(NumLoadFold, "Number of loads folded");
104 STATISTIC(NumSelects, "Number of selects optimized"); 111 STATISTIC(NumSelects, "Number of selects optimized");
105 STATISTIC(NumUncoalescableCopies, "Number of uncoalescable copies optimized"); 112 STATISTIC(NumUncoalescableCopies, "Number of uncoalescable copies optimized");
106 STATISTIC(NumRewrittenCopies, "Number of copies rewritten"); 113 STATISTIC(NumRewrittenCopies, "Number of copies rewritten");
107 114
108 namespace { 115 namespace {
116 class ValueTrackerResult;
117
109 class PeepholeOptimizer : public MachineFunctionPass { 118 class PeepholeOptimizer : public MachineFunctionPass {
110 const TargetInstrInfo *TII; 119 const TargetInstrInfo *TII;
111 const TargetRegisterInfo *TRI; 120 const TargetRegisterInfo *TRI;
112 MachineRegisterInfo *MRI; 121 MachineRegisterInfo *MRI;
113 MachineDominatorTree *DT; // Machine dominator tree 122 MachineDominatorTree *DT; // Machine dominator tree
127 AU.addRequired<MachineDominatorTree>(); 136 AU.addRequired<MachineDominatorTree>();
128 AU.addPreserved<MachineDominatorTree>(); 137 AU.addPreserved<MachineDominatorTree>();
129 } 138 }
130 } 139 }
131 140
141 /// \brief Track Def -> Use info used for rewriting copies.
142 typedef SmallDenseMap<TargetInstrInfo::RegSubRegPair, ValueTrackerResult>
143 RewriteMapTy;
144
132 private: 145 private:
133 bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); 146 bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
134 bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, 147 bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
135 SmallPtrSetImpl<MachineInstr*> &LocalMIs); 148 SmallPtrSetImpl<MachineInstr*> &LocalMIs);
136 bool optimizeSelect(MachineInstr *MI, 149 bool optimizeSelect(MachineInstr *MI,
137 SmallPtrSetImpl<MachineInstr *> &LocalMIs); 150 SmallPtrSetImpl<MachineInstr *> &LocalMIs);
138 bool optimizeCondBranch(MachineInstr *MI); 151 bool optimizeCondBranch(MachineInstr *MI);
139 bool optimizeCopyOrBitcast(MachineInstr *MI);
140 bool optimizeCoalescableCopy(MachineInstr *MI); 152 bool optimizeCoalescableCopy(MachineInstr *MI);
141 bool optimizeUncoalescableCopy(MachineInstr *MI, 153 bool optimizeUncoalescableCopy(MachineInstr *MI,
142 SmallPtrSetImpl<MachineInstr *> &LocalMIs); 154 SmallPtrSetImpl<MachineInstr *> &LocalMIs);
143 bool findNextSource(unsigned &Reg, unsigned &SubReg); 155 bool findNextSource(unsigned Reg, unsigned SubReg,
156 RewriteMapTy &RewriteMap);
144 bool isMoveImmediate(MachineInstr *MI, 157 bool isMoveImmediate(MachineInstr *MI,
145 SmallSet<unsigned, 4> &ImmDefRegs, 158 SmallSet<unsigned, 4> &ImmDefRegs,
146 DenseMap<unsigned, MachineInstr*> &ImmDefMIs); 159 DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
147 bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, 160 bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
148 SmallSet<unsigned, 4> &ImmDefRegs, 161 SmallSet<unsigned, 4> &ImmDefRegs,
149 DenseMap<unsigned, MachineInstr*> &ImmDefMIs); 162 DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
163
164 /// \brief If copy instruction \p MI is a virtual register copy, track it in
165 /// the set \p CopiedFromRegs and \p CopyMIs. If this virtual register was
166 /// previously seen as a copy, replace the uses of this copy with the
167 /// previously seen copy's destination register.
168 bool foldRedundantCopy(MachineInstr *MI,
169 SmallSet<unsigned, 4> &CopiedFromRegs,
170 DenseMap<unsigned, MachineInstr*> &CopyMIs);
171
150 bool isLoadFoldable(MachineInstr *MI, 172 bool isLoadFoldable(MachineInstr *MI,
151 SmallSet<unsigned, 16> &FoldAsLoadDefCandidates); 173 SmallSet<unsigned, 16> &FoldAsLoadDefCandidates);
152 174
153 /// \brief Check whether \p MI is understood by the register coalescer 175 /// \brief Check whether \p MI is understood by the register coalescer
154 /// but may require some rewriting. 176 /// but may require some rewriting.
165 bool isUncoalescableCopy(const MachineInstr &MI) { 187 bool isUncoalescableCopy(const MachineInstr &MI) {
166 return MI.isBitcast() || 188 return MI.isBitcast() ||
167 (!DisableAdvCopyOpt && 189 (!DisableAdvCopyOpt &&
168 (MI.isRegSequenceLike() || MI.isInsertSubregLike() || 190 (MI.isRegSequenceLike() || MI.isInsertSubregLike() ||
169 MI.isExtractSubregLike())); 191 MI.isExtractSubregLike()));
192 }
193 };
194
195 /// \brief Helper class to hold a reply for ValueTracker queries. Contains the
196 /// returned sources for a given search and the instructions where the sources
197 /// were tracked from.
198 class ValueTrackerResult {
199 private:
200 /// Track all sources found by one ValueTracker query.
201 SmallVector<TargetInstrInfo::RegSubRegPair, 2> RegSrcs;
202
203 /// Instruction using the sources in 'RegSrcs'.
204 const MachineInstr *Inst;
205
206 public:
207 ValueTrackerResult() : Inst(nullptr) {}
208 ValueTrackerResult(unsigned Reg, unsigned SubReg) : Inst(nullptr) {
209 addSource(Reg, SubReg);
210 }
211
212 bool isValid() const { return getNumSources() > 0; }
213
214 void setInst(const MachineInstr *I) { Inst = I; }
215 const MachineInstr *getInst() const { return Inst; }
216
217 void clear() {
218 RegSrcs.clear();
219 Inst = nullptr;
220 }
221
222 void addSource(unsigned SrcReg, unsigned SrcSubReg) {
223 RegSrcs.push_back(TargetInstrInfo::RegSubRegPair(SrcReg, SrcSubReg));
224 }
225
226 void setSource(int Idx, unsigned SrcReg, unsigned SrcSubReg) {
227 assert(Idx < getNumSources() && "Reg pair source out of index");
228 RegSrcs[Idx] = TargetInstrInfo::RegSubRegPair(SrcReg, SrcSubReg);
229 }
230
231 int getNumSources() const { return RegSrcs.size(); }
232
233 unsigned getSrcReg(int Idx) const {
234 assert(Idx < getNumSources() && "Reg source out of index");
235 return RegSrcs[Idx].Reg;
236 }
237
238 unsigned getSrcSubReg(int Idx) const {
239 assert(Idx < getNumSources() && "SubReg source out of index");
240 return RegSrcs[Idx].SubReg;
241 }
242
243 bool operator==(const ValueTrackerResult &Other) {
244 if (Other.getInst() != getInst())
245 return false;
246
247 if (Other.getNumSources() != getNumSources())
248 return false;
249
250 for (int i = 0, e = Other.getNumSources(); i != e; ++i)
251 if (Other.getSrcReg(i) != getSrcReg(i) ||
252 Other.getSrcSubReg(i) != getSrcSubReg(i))
253 return false;
254 return true;
170 } 255 }
171 }; 256 };
172 257
173 /// \brief Helper class to track the possible sources of a value defined by 258 /// \brief Helper class to track the possible sources of a value defined by
174 /// a (chain of) copy related instructions. 259 /// a (chain of) copy related instructions.
210 /// tracking. 295 /// tracking.
211 const TargetInstrInfo *TII; 296 const TargetInstrInfo *TII;
212 297
213 /// \brief Dispatcher to the right underlying implementation of 298 /// \brief Dispatcher to the right underlying implementation of
214 /// getNextSource. 299 /// getNextSource.
215 bool getNextSourceImpl(unsigned &SrcReg, unsigned &SrcSubReg); 300 ValueTrackerResult getNextSourceImpl();
216 /// \brief Specialized version of getNextSource for Copy instructions. 301 /// \brief Specialized version of getNextSource for Copy instructions.
217 bool getNextSourceFromCopy(unsigned &SrcReg, unsigned &SrcSubReg); 302 ValueTrackerResult getNextSourceFromCopy();
218 /// \brief Specialized version of getNextSource for Bitcast instructions. 303 /// \brief Specialized version of getNextSource for Bitcast instructions.
219 bool getNextSourceFromBitcast(unsigned &SrcReg, unsigned &SrcSubReg); 304 ValueTrackerResult getNextSourceFromBitcast();
220 /// \brief Specialized version of getNextSource for RegSequence 305 /// \brief Specialized version of getNextSource for RegSequence
221 /// instructions. 306 /// instructions.
222 bool getNextSourceFromRegSequence(unsigned &SrcReg, unsigned &SrcSubReg); 307 ValueTrackerResult getNextSourceFromRegSequence();
223 /// \brief Specialized version of getNextSource for InsertSubreg 308 /// \brief Specialized version of getNextSource for InsertSubreg
224 /// instructions. 309 /// instructions.
225 bool getNextSourceFromInsertSubreg(unsigned &SrcReg, unsigned &SrcSubReg); 310 ValueTrackerResult getNextSourceFromInsertSubreg();
226 /// \brief Specialized version of getNextSource for ExtractSubreg 311 /// \brief Specialized version of getNextSource for ExtractSubreg
227 /// instructions. 312 /// instructions.
228 bool getNextSourceFromExtractSubreg(unsigned &SrcReg, unsigned &SrcSubReg); 313 ValueTrackerResult getNextSourceFromExtractSubreg();
229 /// \brief Specialized version of getNextSource for SubregToReg 314 /// \brief Specialized version of getNextSource for SubregToReg
230 /// instructions. 315 /// instructions.
231 bool getNextSourceFromSubregToReg(unsigned &SrcReg, unsigned &SrcSubReg); 316 ValueTrackerResult getNextSourceFromSubregToReg();
317 /// \brief Specialized version of getNextSource for PHI instructions.
318 ValueTrackerResult getNextSourceFromPHI();
232 319
233 public: 320 public:
234 /// \brief Create a ValueTracker instance for the value defined by \p Reg. 321 /// \brief Create a ValueTracker instance for the value defined by \p Reg.
235 /// \p DefSubReg represents the sub register index the value tracker will 322 /// \p DefSubReg represents the sub register index the value tracker will
236 /// track. It does not need to match the sub register index used in the 323 /// track. It does not need to match the sub register index used in the
273 Reg = Def->getOperand(DefIdx).getReg(); 360 Reg = Def->getOperand(DefIdx).getReg();
274 } 361 }
275 362
276 /// \brief Following the use-def chain, get the next available source 363 /// \brief Following the use-def chain, get the next available source
277 /// for the tracked value. 364 /// for the tracked value.
278 /// When the returned value is not nullptr, \p SrcReg gives the register 365 /// \return A ValueTrackerResult containing a set of registers
279 /// that contain the tracked value. 366 /// and sub registers with tracked values. A ValueTrackerResult with
280 /// \note The sub register index returned in \p SrcSubReg must be used 367 /// an empty set of registers means no source was found.
281 /// on \p SrcReg to access the actual value. 368 ValueTrackerResult getNextSource();
282 /// \return Unless the returned value is nullptr (i.e., no source found),
283 /// \p SrcReg gives the register of the next source used in the returned
284 /// instruction and \p SrcSubReg the sub-register index to be used on that
285 /// source to get the tracked value. When nullptr is returned, no
286 /// alternative source has been found.
287 const MachineInstr *getNextSource(unsigned &SrcReg, unsigned &SrcSubReg);
288 369
289 /// \brief Get the last register where the initial value can be found. 370 /// \brief Get the last register where the initial value can be found.
290 /// Initially this is the register of the definition. 371 /// Initially this is the register of the definition.
291 /// Then, after each successful call to getNextSource, this is the 372 /// Then, after each successful call to getNextSource, this is the
292 /// register of the last source. 373 /// register of the last source.
409 } 490 }
410 } 491 }
411 492
412 if (ExtendLife && !ExtendedUses.empty()) 493 if (ExtendLife && !ExtendedUses.empty())
413 // Extend the liveness of the extension result. 494 // Extend the liveness of the extension result.
414 std::copy(ExtendedUses.begin(), ExtendedUses.end(), 495 Uses.append(ExtendedUses.begin(), ExtendedUses.end());
415 std::back_inserter(Uses));
416 496
417 // Now replace all uses. 497 // Now replace all uses.
418 bool Changed = false; 498 bool Changed = false;
419 if (!Uses.empty()) { 499 if (!Uses.empty()) {
420 SmallPtrSet<MachineBasicBlock*, 4> PHIBBs; 500 SmallPtrSet<MachineBasicBlock*, 4> PHIBBs;
504 // generated 584 // generated
505 bool PeepholeOptimizer::optimizeCondBranch(MachineInstr *MI) { 585 bool PeepholeOptimizer::optimizeCondBranch(MachineInstr *MI) {
506 return TII->optimizeCondBranch(MI); 586 return TII->optimizeCondBranch(MI);
507 } 587 }
508 588
509 /// \brief Check if the registers defined by the pair (RegisterClass, SubReg)
510 /// share the same register file.
511 static bool shareSameRegisterFile(const TargetRegisterInfo &TRI,
512 const TargetRegisterClass *DefRC,
513 unsigned DefSubReg,
514 const TargetRegisterClass *SrcRC,
515 unsigned SrcSubReg) {
516 // Same register class.
517 if (DefRC == SrcRC)
518 return true;
519
520 // Both operands are sub registers. Check if they share a register class.
521 unsigned SrcIdx, DefIdx;
522 if (SrcSubReg && DefSubReg)
523 return TRI.getCommonSuperRegClass(SrcRC, SrcSubReg, DefRC, DefSubReg,
524 SrcIdx, DefIdx) != nullptr;
525 // At most one of the register is a sub register, make it Src to avoid
526 // duplicating the test.
527 if (!SrcSubReg) {
528 std::swap(DefSubReg, SrcSubReg);
529 std::swap(DefRC, SrcRC);
530 }
531
532 // One of the register is a sub register, check if we can get a superclass.
533 if (SrcSubReg)
534 return TRI.getMatchingSuperRegClass(SrcRC, DefRC, SrcSubReg) != nullptr;
535 // Plain copy.
536 return TRI.getCommonSubClass(DefRC, SrcRC) != nullptr;
537 }
538
539 /// \brief Try to find the next source that share the same register file 589 /// \brief Try to find the next source that share the same register file
540 /// for the value defined by \p Reg and \p SubReg. 590 /// for the value defined by \p Reg and \p SubReg.
541 /// When true is returned, \p Reg and \p SubReg are updated with the 591 /// When true is returned, the \p RewriteMap can be used by the client to
542 /// register number and sub-register index of the new source. 592 /// retrieve all Def -> Use along the way up to the next source. Any found
593 /// Use that is not itself a key for another entry, is the next source to
594 /// use. During the search for the next source, multiple sources can be found
595 /// given multiple incoming sources of a PHI instruction. In this case, we
596 /// look in each PHI source for the next source; all found next sources must
597 /// share the same register file as \p Reg and \p SubReg. The client should
598 /// then be capable to rewrite all intermediate PHIs to get the next source.
543 /// \return False if no alternative sources are available. True otherwise. 599 /// \return False if no alternative sources are available. True otherwise.
544 bool PeepholeOptimizer::findNextSource(unsigned &Reg, unsigned &SubReg) { 600 bool PeepholeOptimizer::findNextSource(unsigned Reg, unsigned SubReg,
601 RewriteMapTy &RewriteMap) {
545 // Do not try to find a new source for a physical register. 602 // Do not try to find a new source for a physical register.
546 // So far we do not have any motivating example for doing that. 603 // So far we do not have any motivating example for doing that.
547 // Thus, instead of maintaining untested code, we will revisit that if 604 // Thus, instead of maintaining untested code, we will revisit that if
548 // that changes at some point. 605 // that changes at some point.
549 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 606 if (TargetRegisterInfo::isPhysicalRegister(Reg))
550 return false; 607 return false;
551
552 const TargetRegisterClass *DefRC = MRI->getRegClass(Reg); 608 const TargetRegisterClass *DefRC = MRI->getRegClass(Reg);
553 unsigned DefSubReg = SubReg; 609
554 610 SmallVector<TargetInstrInfo::RegSubRegPair, 4> SrcToLook;
555 unsigned Src; 611 TargetInstrInfo::RegSubRegPair CurSrcPair(Reg, SubReg);
556 unsigned SrcSubReg; 612 SrcToLook.push_back(CurSrcPair);
557 bool ShouldRewrite = false; 613
558 614 unsigned PHICount = 0;
559 // Follow the chain of copies until we reach the top of the use-def chain 615 while (!SrcToLook.empty() && PHICount < RewritePHILimit) {
560 // or find a more suitable source. 616 TargetInstrInfo::RegSubRegPair Pair = SrcToLook.pop_back_val();
561 ValueTracker ValTracker(Reg, DefSubReg, *MRI, !DisableAdvCopyOpt, TII); 617 // As explained above, do not handle physical registers
562 do { 618 if (TargetRegisterInfo::isPhysicalRegister(Pair.Reg))
563 unsigned CopySrcReg, CopySrcSubReg; 619 return false;
564 if (!ValTracker.getNextSource(CopySrcReg, CopySrcSubReg)) 620
565 break; 621 CurSrcPair = Pair;
566 Src = CopySrcReg; 622 ValueTracker ValTracker(CurSrcPair.Reg, CurSrcPair.SubReg, *MRI,
567 SrcSubReg = CopySrcSubReg; 623 !DisableAdvCopyOpt, TII);
568 624 ValueTrackerResult Res;
569 // Do not extend the live-ranges of physical registers as they add 625 bool ShouldRewrite = false;
570 // constraints to the register allocator. 626
571 // Moreover, if we want to extend the live-range of a physical register, 627 do {
572 // unlike SSA virtual register, we will have to check that they are not 628 // Follow the chain of copies until we reach the top of the use-def chain
573 // redefine before the related use. 629 // or find a more suitable source.
574 if (TargetRegisterInfo::isPhysicalRegister(Src)) 630 Res = ValTracker.getNextSource();
575 break; 631 if (!Res.isValid())
576 632 break;
577 const TargetRegisterClass *SrcRC = MRI->getRegClass(Src); 633
578 634 // Insert the Def -> Use entry for the recently found source.
579 // If this source does not incur a cross register bank copy, use it. 635 ValueTrackerResult CurSrcRes = RewriteMap.lookup(CurSrcPair);
580 ShouldRewrite = shareSameRegisterFile(*TRI, DefRC, DefSubReg, SrcRC, 636 if (CurSrcRes.isValid()) {
581 SrcSubReg); 637 assert(CurSrcRes == Res && "ValueTrackerResult found must match");
582 } while (!ShouldRewrite); 638 // An existent entry with multiple sources is a PHI cycle we must avoid.
639 // Otherwise it's an entry with a valid next source we already found.
640 if (CurSrcRes.getNumSources() > 1) {
641 DEBUG(dbgs() << "findNextSource: found PHI cycle, aborting...\n");
642 return false;
643 }
644 break;
645 }
646 RewriteMap.insert(std::make_pair(CurSrcPair, Res));
647
648 // ValueTrackerResult usually have one source unless it's the result from
649 // a PHI instruction. Add the found PHI edges to be looked up further.
650 unsigned NumSrcs = Res.getNumSources();
651 if (NumSrcs > 1) {
652 PHICount++;
653 for (unsigned i = 0; i < NumSrcs; ++i)
654 SrcToLook.push_back(TargetInstrInfo::RegSubRegPair(
655 Res.getSrcReg(i), Res.getSrcSubReg(i)));
656 break;
657 }
658
659 CurSrcPair.Reg = Res.getSrcReg(0);
660 CurSrcPair.SubReg = Res.getSrcSubReg(0);
661 // Do not extend the live-ranges of physical registers as they add
662 // constraints to the register allocator. Moreover, if we want to extend
663 // the live-range of a physical register, unlike SSA virtual register,
664 // we will have to check that they aren't redefine before the related use.
665 if (TargetRegisterInfo::isPhysicalRegister(CurSrcPair.Reg))
666 return false;
667
668 const TargetRegisterClass *SrcRC = MRI->getRegClass(CurSrcPair.Reg);
669 ShouldRewrite = TRI->shouldRewriteCopySrc(DefRC, SubReg, SrcRC,
670 CurSrcPair.SubReg);
671 } while (!ShouldRewrite);
672
673 // Continue looking for new sources...
674 if (Res.isValid())
675 continue;
676
677 // Do not continue searching for a new source if the there's at least
678 // one use-def which cannot be rewritten.
679 if (!ShouldRewrite)
680 return false;
681 }
682
683 if (PHICount >= RewritePHILimit) {
684 DEBUG(dbgs() << "findNextSource: PHI limit reached\n");
685 return false;
686 }
583 687
584 // If we did not find a more suitable source, there is nothing to optimize. 688 // If we did not find a more suitable source, there is nothing to optimize.
585 if (!ShouldRewrite || Src == Reg) 689 if (CurSrcPair.Reg == Reg)
586 return false; 690 return false;
587 691
588 Reg = Src;
589 SubReg = SrcSubReg;
590 return true; 692 return true;
693 }
694
695 /// \brief Insert a PHI instruction with incoming edges \p SrcRegs that are
696 /// guaranteed to have the same register class. This is necessary whenever we
697 /// successfully traverse a PHI instruction and find suitable sources coming
698 /// from its edges. By inserting a new PHI, we provide a rewritten PHI def
699 /// suitable to be used in a new COPY instruction.
700 static MachineInstr *
701 insertPHI(MachineRegisterInfo *MRI, const TargetInstrInfo *TII,
702 const SmallVectorImpl<TargetInstrInfo::RegSubRegPair> &SrcRegs,
703 MachineInstr *OrigPHI) {
704 assert(!SrcRegs.empty() && "No sources to create a PHI instruction?");
705
706 const TargetRegisterClass *NewRC = MRI->getRegClass(SrcRegs[0].Reg);
707 unsigned NewVR = MRI->createVirtualRegister(NewRC);
708 MachineBasicBlock *MBB = OrigPHI->getParent();
709 MachineInstrBuilder MIB = BuildMI(*MBB, OrigPHI, OrigPHI->getDebugLoc(),
710 TII->get(TargetOpcode::PHI), NewVR);
711
712 unsigned MBBOpIdx = 2;
713 for (auto RegPair : SrcRegs) {
714 MIB.addReg(RegPair.Reg, 0, RegPair.SubReg);
715 MIB.addMBB(OrigPHI->getOperand(MBBOpIdx).getMBB());
716 // Since we're extended the lifetime of RegPair.Reg, clear the
717 // kill flags to account for that and make RegPair.Reg reaches
718 // the new PHI.
719 MRI->clearKillFlags(RegPair.Reg);
720 MBBOpIdx += 2;
721 }
722
723 return MIB;
591 } 724 }
592 725
593 namespace { 726 namespace {
594 /// \brief Helper class to rewrite the arguments of a copy-like instruction. 727 /// \brief Helper class to rewrite the arguments of a copy-like instruction.
595 class CopyRewriter { 728 class CopyRewriter {
622 /// the only source this instruction has: 755 /// the only source this instruction has:
623 /// (SrcReg, SrcSubReg) = (src, srcSubIdx). 756 /// (SrcReg, SrcSubReg) = (src, srcSubIdx).
624 /// This source defines the whole definition, i.e., 757 /// This source defines the whole definition, i.e.,
625 /// (TrackReg, TrackSubReg) = (dst, dstSubIdx). 758 /// (TrackReg, TrackSubReg) = (dst, dstSubIdx).
626 /// 759 ///
627 /// The second and subsequent calls will return false, has there is only one 760 /// The second and subsequent calls will return false, as there is only one
628 /// rewritable source. 761 /// rewritable source.
629 /// 762 ///
630 /// \return True if a rewritable source has been found, false otherwise. 763 /// \return True if a rewritable source has been found, false otherwise.
631 /// The output arguments are valid if and only if true is returned. 764 /// The output arguments are valid if and only if true is returned.
632 virtual bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, 765 virtual bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg,
633 unsigned &TrackReg, 766 unsigned &TrackReg,
634 unsigned &TrackSubReg) { 767 unsigned &TrackSubReg) {
635 // If CurrentSrcIdx == 1, this means this function has already been 768 // If CurrentSrcIdx == 1, this means this function has already been called
636 // called once. CopyLike has one defintiion and one argument, thus, 769 // once. CopyLike has one definition and one argument, thus, there is
637 // there is nothing else to rewrite. 770 // nothing else to rewrite.
638 if (!CopyLike.isCopy() || CurrentSrcIdx == 1) 771 if (!CopyLike.isCopy() || CurrentSrcIdx == 1)
639 return false; 772 return false;
640 // This is the first call to getNextRewritableSource. 773 // This is the first call to getNextRewritableSource.
641 // Move the CurrentSrcIdx to remember that we made that call. 774 // Move the CurrentSrcIdx to remember that we made that call.
642 CurrentSrcIdx = 1; 775 CurrentSrcIdx = 1;
651 return true; 784 return true;
652 } 785 }
653 786
654 /// \brief Rewrite the current source with \p NewReg and \p NewSubReg 787 /// \brief Rewrite the current source with \p NewReg and \p NewSubReg
655 /// if possible. 788 /// if possible.
656 /// \return True if the rewritting was possible, false otherwise. 789 /// \return True if the rewriting was possible, false otherwise.
657 virtual bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) { 790 virtual bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) {
658 if (!CopyLike.isCopy() || CurrentSrcIdx != 1) 791 if (!CopyLike.isCopy() || CurrentSrcIdx != 1)
659 return false; 792 return false;
660 MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx); 793 MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx);
661 MOSrc.setReg(NewReg); 794 MOSrc.setReg(NewReg);
662 MOSrc.setSubReg(NewSubReg); 795 MOSrc.setSubReg(NewSubReg);
663 return true; 796 return true;
797 }
798
799 /// \brief Given a \p Def.Reg and Def.SubReg pair, use \p RewriteMap to find
800 /// the new source to use for rewrite. If \p HandleMultipleSources is true and
801 /// multiple sources for a given \p Def are found along the way, we found a
802 /// PHI instructions that needs to be rewritten.
803 /// TODO: HandleMultipleSources should be removed once we test PHI handling
804 /// with coalescable copies.
805 TargetInstrInfo::RegSubRegPair
806 getNewSource(MachineRegisterInfo *MRI, const TargetInstrInfo *TII,
807 TargetInstrInfo::RegSubRegPair Def,
808 PeepholeOptimizer::RewriteMapTy &RewriteMap,
809 bool HandleMultipleSources = true) {
810
811 TargetInstrInfo::RegSubRegPair LookupSrc(Def.Reg, Def.SubReg);
812 do {
813 ValueTrackerResult Res = RewriteMap.lookup(LookupSrc);
814 // If there are no entries on the map, LookupSrc is the new source.
815 if (!Res.isValid())
816 return LookupSrc;
817
818 // There's only one source for this definition, keep searching...
819 unsigned NumSrcs = Res.getNumSources();
820 if (NumSrcs == 1) {
821 LookupSrc.Reg = Res.getSrcReg(0);
822 LookupSrc.SubReg = Res.getSrcSubReg(0);
823 continue;
824 }
825
826 // TODO: Remove once multiple srcs w/ coalescable copies are supported.
827 if (!HandleMultipleSources)
828 break;
829
830 // Multiple sources, recurse into each source to find a new source
831 // for it. Then, rewrite the PHI accordingly to its new edges.
832 SmallVector<TargetInstrInfo::RegSubRegPair, 4> NewPHISrcs;
833 for (unsigned i = 0; i < NumSrcs; ++i) {
834 TargetInstrInfo::RegSubRegPair PHISrc(Res.getSrcReg(i),
835 Res.getSrcSubReg(i));
836 NewPHISrcs.push_back(
837 getNewSource(MRI, TII, PHISrc, RewriteMap, HandleMultipleSources));
838 }
839
840 // Build the new PHI node and return its def register as the new source.
841 MachineInstr *OrigPHI = const_cast<MachineInstr *>(Res.getInst());
842 MachineInstr *NewPHI = insertPHI(MRI, TII, NewPHISrcs, OrigPHI);
843 DEBUG(dbgs() << "-- getNewSource\n");
844 DEBUG(dbgs() << " Replacing: " << *OrigPHI);
845 DEBUG(dbgs() << " With: " << *NewPHI);
846 const MachineOperand &MODef = NewPHI->getOperand(0);
847 return TargetInstrInfo::RegSubRegPair(MODef.getReg(), MODef.getSubReg());
848
849 } while (1);
850
851 return TargetInstrInfo::RegSubRegPair(0, 0);
852 }
853
854 /// \brief Rewrite the source found through \p Def, by using the \p RewriteMap
855 /// and create a new COPY instruction. More info about RewriteMap in
856 /// PeepholeOptimizer::findNextSource. Right now this is only used to handle
857 /// Uncoalescable copies, since they are copy like instructions that aren't
858 /// recognized by the register allocator.
859 virtual MachineInstr *
860 RewriteSource(TargetInstrInfo::RegSubRegPair Def,
861 PeepholeOptimizer::RewriteMapTy &RewriteMap) {
862 return nullptr;
863 }
864 };
865
866 /// \brief Helper class to rewrite uncoalescable copy like instructions
867 /// into new COPY (coalescable friendly) instructions.
868 class UncoalescableRewriter : public CopyRewriter {
869 protected:
870 const TargetInstrInfo &TII;
871 MachineRegisterInfo &MRI;
872 /// The number of defs in the bitcast
873 unsigned NumDefs;
874
875 public:
876 UncoalescableRewriter(MachineInstr &MI, const TargetInstrInfo &TII,
877 MachineRegisterInfo &MRI)
878 : CopyRewriter(MI), TII(TII), MRI(MRI) {
879 NumDefs = MI.getDesc().getNumDefs();
880 }
881
882 /// \brief Get the next rewritable def source (TrackReg, TrackSubReg)
883 /// All such sources need to be considered rewritable in order to
884 /// rewrite a uncoalescable copy-like instruction. This method return
885 /// each definition that must be checked if rewritable.
886 ///
887 bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg,
888 unsigned &TrackReg,
889 unsigned &TrackSubReg) override {
890 // Find the next non-dead definition and continue from there.
891 if (CurrentSrcIdx == NumDefs)
892 return false;
893
894 while (CopyLike.getOperand(CurrentSrcIdx).isDead()) {
895 ++CurrentSrcIdx;
896 if (CurrentSrcIdx == NumDefs)
897 return false;
898 }
899
900 // What we track are the alternative sources of the definition.
901 const MachineOperand &MODef = CopyLike.getOperand(CurrentSrcIdx);
902 TrackReg = MODef.getReg();
903 TrackSubReg = MODef.getSubReg();
904
905 CurrentSrcIdx++;
906 return true;
907 }
908
909 /// \brief Rewrite the source found through \p Def, by using the \p RewriteMap
910 /// and create a new COPY instruction. More info about RewriteMap in
911 /// PeepholeOptimizer::findNextSource. Right now this is only used to handle
912 /// Uncoalescable copies, since they are copy like instructions that aren't
913 /// recognized by the register allocator.
914 MachineInstr *
915 RewriteSource(TargetInstrInfo::RegSubRegPair Def,
916 PeepholeOptimizer::RewriteMapTy &RewriteMap) override {
917 assert(!TargetRegisterInfo::isPhysicalRegister(Def.Reg) &&
918 "We do not rewrite physical registers");
919
920 // Find the new source to use in the COPY rewrite.
921 TargetInstrInfo::RegSubRegPair NewSrc =
922 getNewSource(&MRI, &TII, Def, RewriteMap);
923
924 // Insert the COPY.
925 const TargetRegisterClass *DefRC = MRI.getRegClass(Def.Reg);
926 unsigned NewVR = MRI.createVirtualRegister(DefRC);
927
928 MachineInstr *NewCopy =
929 BuildMI(*CopyLike.getParent(), &CopyLike, CopyLike.getDebugLoc(),
930 TII.get(TargetOpcode::COPY), NewVR)
931 .addReg(NewSrc.Reg, 0, NewSrc.SubReg);
932
933 NewCopy->getOperand(0).setSubReg(Def.SubReg);
934 if (Def.SubReg)
935 NewCopy->getOperand(0).setIsUndef();
936
937 DEBUG(dbgs() << "-- RewriteSource\n");
938 DEBUG(dbgs() << " Replacing: " << CopyLike);
939 DEBUG(dbgs() << " With: " << *NewCopy);
940 MRI.replaceRegWith(Def.Reg, NewVR);
941 MRI.clearKillFlags(NewVR);
942
943 // We extended the lifetime of NewSrc.Reg, clear the kill flags to
944 // account for that.
945 MRI.clearKillFlags(NewSrc.Reg);
946
947 return NewCopy;
664 } 948 }
665 }; 949 };
666 950
667 /// \brief Specialized rewriter for INSERT_SUBREG instruction. 951 /// \brief Specialized rewriter for INSERT_SUBREG instruction.
668 class InsertSubregRewriter : public CopyRewriter { 952 class InsertSubregRewriter : public CopyRewriter {
697 981
698 // We want to track something that is compatible with the 982 // We want to track something that is compatible with the
699 // partial definition. 983 // partial definition.
700 TrackReg = MODef.getReg(); 984 TrackReg = MODef.getReg();
701 if (MODef.getSubReg()) 985 if (MODef.getSubReg())
702 // Bails if we have to compose sub-register indices. 986 // Bail if we have to compose sub-register indices.
703 return false; 987 return false;
704 TrackSubReg = (unsigned)CopyLike.getOperand(3).getImm(); 988 TrackSubReg = (unsigned)CopyLike.getOperand(3).getImm();
705 return true; 989 return true;
706 } 990 }
707 bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { 991 bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override {
738 return false; 1022 return false;
739 // We are looking at v1 = EXTRACT_SUBREG v0, sub0. 1023 // We are looking at v1 = EXTRACT_SUBREG v0, sub0.
740 CurrentSrcIdx = 1; 1024 CurrentSrcIdx = 1;
741 const MachineOperand &MOExtractedReg = CopyLike.getOperand(1); 1025 const MachineOperand &MOExtractedReg = CopyLike.getOperand(1);
742 SrcReg = MOExtractedReg.getReg(); 1026 SrcReg = MOExtractedReg.getReg();
743 // If we have to compose sub-register indices, bails out. 1027 // If we have to compose sub-register indices, bail out.
744 if (MOExtractedReg.getSubReg()) 1028 if (MOExtractedReg.getSubReg())
745 return false; 1029 return false;
746 1030
747 SrcSubReg = CopyLike.getOperand(2).getImm(); 1031 SrcSubReg = CopyLike.getOperand(2).getImm();
748 1032
816 if (CurrentSrcIdx >= CopyLike.getNumOperands()) 1100 if (CurrentSrcIdx >= CopyLike.getNumOperands())
817 return false; 1101 return false;
818 } 1102 }
819 const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx); 1103 const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx);
820 SrcReg = MOInsertedReg.getReg(); 1104 SrcReg = MOInsertedReg.getReg();
821 // If we have to compose sub-register indices, bails out. 1105 // If we have to compose sub-register indices, bail out.
822 if ((SrcSubReg = MOInsertedReg.getSubReg())) 1106 if ((SrcSubReg = MOInsertedReg.getSubReg()))
823 return false; 1107 return false;
824 1108
825 // We want to track something that is compatible with the related 1109 // We want to track something that is compatible with the related
826 // partial definition. 1110 // partial definition.
827 TrackSubReg = CopyLike.getOperand(CurrentSrcIdx + 1).getImm(); 1111 TrackSubReg = CopyLike.getOperand(CurrentSrcIdx + 1).getImm();
828 1112
829 const MachineOperand &MODef = CopyLike.getOperand(0); 1113 const MachineOperand &MODef = CopyLike.getOperand(0);
830 TrackReg = MODef.getReg(); 1114 TrackReg = MODef.getReg();
831 // If we have to compose sub-registers, bails. 1115 // If we have to compose sub-registers, bail.
832 return MODef.getSubReg() == 0; 1116 return MODef.getSubReg() == 0;
833 } 1117 }
834 1118
835 bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { 1119 bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override {
836 // We cannot rewrite out of bound operands. 1120 // We cannot rewrite out of bound operands.
848 1132
849 /// \brief Get the appropriated CopyRewriter for \p MI. 1133 /// \brief Get the appropriated CopyRewriter for \p MI.
850 /// \return A pointer to a dynamically allocated CopyRewriter or nullptr 1134 /// \return A pointer to a dynamically allocated CopyRewriter or nullptr
851 /// if no rewriter works for \p MI. 1135 /// if no rewriter works for \p MI.
852 static CopyRewriter *getCopyRewriter(MachineInstr &MI, 1136 static CopyRewriter *getCopyRewriter(MachineInstr &MI,
853 const TargetInstrInfo &TII) { 1137 const TargetInstrInfo &TII,
1138 MachineRegisterInfo &MRI) {
1139 // Handle uncoalescable copy-like instructions.
1140 if (MI.isBitcast() || (MI.isRegSequenceLike() || MI.isInsertSubregLike() ||
1141 MI.isExtractSubregLike()))
1142 return new UncoalescableRewriter(MI, TII, MRI);
1143
854 switch (MI.getOpcode()) { 1144 switch (MI.getOpcode()) {
855 default: 1145 default:
856 return nullptr; 1146 return nullptr;
857 case TargetOpcode::COPY: 1147 case TargetOpcode::COPY:
858 return new CopyRewriter(MI); 1148 return new CopyRewriter(MI);
872 /// class. 1162 /// class.
873 /// Two register classes are considered to be compatible if they share 1163 /// Two register classes are considered to be compatible if they share
874 /// the same register bank. 1164 /// the same register bank.
875 /// New copies issued by this optimization are register allocator 1165 /// New copies issued by this optimization are register allocator
876 /// friendly. This optimization does not remove any copy as it may 1166 /// friendly. This optimization does not remove any copy as it may
877 /// overconstraint the register allocator, but replaces some operands 1167 /// overconstrain the register allocator, but replaces some operands
878 /// when possible. 1168 /// when possible.
879 /// \pre isCoalescableCopy(*MI) is true. 1169 /// \pre isCoalescableCopy(*MI) is true.
880 /// \return True, when \p MI has been rewritten. False otherwise. 1170 /// \return True, when \p MI has been rewritten. False otherwise.
881 bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr *MI) { 1171 bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr *MI) {
882 assert(MI && isCoalescableCopy(*MI) && "Invalid argument"); 1172 assert(MI && isCoalescableCopy(*MI) && "Invalid argument");
887 if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg())) 1177 if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg()))
888 return false; 1178 return false;
889 1179
890 bool Changed = false; 1180 bool Changed = false;
891 // Get the right rewriter for the current copy. 1181 // Get the right rewriter for the current copy.
892 std::unique_ptr<CopyRewriter> CpyRewriter(getCopyRewriter(*MI, *TII)); 1182 std::unique_ptr<CopyRewriter> CpyRewriter(getCopyRewriter(*MI, *TII, *MRI));
893 // If none exists, bails out. 1183 // If none exists, bail out.
894 if (!CpyRewriter) 1184 if (!CpyRewriter)
895 return false; 1185 return false;
896 // Rewrite each rewritable source. 1186 // Rewrite each rewritable source.
897 unsigned SrcReg, SrcSubReg, TrackReg, TrackSubReg; 1187 unsigned SrcReg, SrcSubReg, TrackReg, TrackSubReg;
898 while (CpyRewriter->getNextRewritableSource(SrcReg, SrcSubReg, TrackReg, 1188 while (CpyRewriter->getNextRewritableSource(SrcReg, SrcSubReg, TrackReg,
899 TrackSubReg)) { 1189 TrackSubReg)) {
900 unsigned NewSrc = TrackReg; 1190 // Keep track of PHI nodes and its incoming edges when looking for sources.
901 unsigned NewSubReg = TrackSubReg; 1191 RewriteMapTy RewriteMap;
902 // Try to find a more suitable source. 1192 // Try to find a more suitable source. If we failed to do so, or get the
903 // If we failed to do so, or get the actual source, 1193 // actual source, move to the next source.
904 // move to the next source. 1194 if (!findNextSource(TrackReg, TrackSubReg, RewriteMap))
905 if (!findNextSource(NewSrc, NewSubReg) || SrcReg == NewSrc)
906 continue; 1195 continue;
1196
1197 // Get the new source to rewrite. TODO: Only enable handling of multiple
1198 // sources (PHIs) once we have a motivating example and testcases for it.
1199 TargetInstrInfo::RegSubRegPair TrackPair(TrackReg, TrackSubReg);
1200 TargetInstrInfo::RegSubRegPair NewSrc = CpyRewriter->getNewSource(
1201 MRI, TII, TrackPair, RewriteMap, false /* multiple sources */);
1202 if (SrcReg == NewSrc.Reg || NewSrc.Reg == 0)
1203 continue;
1204
907 // Rewrite source. 1205 // Rewrite source.
908 if (CpyRewriter->RewriteCurrentSource(NewSrc, NewSubReg)) { 1206 if (CpyRewriter->RewriteCurrentSource(NewSrc.Reg, NewSrc.SubReg)) {
909 // We may have extended the live-range of NewSrc, account for that. 1207 // We may have extended the live-range of NewSrc, account for that.
910 MRI->clearKillFlags(NewSrc); 1208 MRI->clearKillFlags(NewSrc.Reg);
911 Changed = true; 1209 Changed = true;
912 } 1210 }
913 } 1211 }
914 // TODO: We could have a clean-up method to tidy the instruction. 1212 // TODO: We could have a clean-up method to tidy the instruction.
915 // E.g., v0 = INSERT_SUBREG v1, v1.sub0, sub0 1213 // E.g., v0 = INSERT_SUBREG v1, v1.sub0, sub0
916 // => v0 = COPY v1 1214 // => v0 = COPY v1
917 // Currently we haven't seen motivating example for that and we 1215 // Currently we haven't seen motivating example for that and we
918 // want to avoid untested code. 1216 // want to avoid untested code.
919 NumRewrittenCopies += Changed == true; 1217 NumRewrittenCopies += Changed;
920 return Changed; 1218 return Changed;
921 } 1219 }
922 1220
923 /// \brief Optimize copy-like instructions to create 1221 /// \brief Optimize copy-like instructions to create
924 /// register coalescer friendly instruction. 1222 /// register coalescer friendly instruction.
934 bool PeepholeOptimizer::optimizeUncoalescableCopy( 1232 bool PeepholeOptimizer::optimizeUncoalescableCopy(
935 MachineInstr *MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs) { 1233 MachineInstr *MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs) {
936 assert(MI && isUncoalescableCopy(*MI) && "Invalid argument"); 1234 assert(MI && isUncoalescableCopy(*MI) && "Invalid argument");
937 1235
938 // Check if we can rewrite all the values defined by this instruction. 1236 // Check if we can rewrite all the values defined by this instruction.
939 SmallVector< 1237 SmallVector<TargetInstrInfo::RegSubRegPair, 4> RewritePairs;
940 std::pair<TargetInstrInfo::RegSubRegPair, TargetInstrInfo::RegSubRegPair>, 1238 // Get the right rewriter for the current copy.
941 4> RewritePairs; 1239 std::unique_ptr<CopyRewriter> CpyRewriter(getCopyRewriter(*MI, *TII, *MRI));
942 for (const MachineOperand &MODef : MI->defs()) { 1240 // If none exists, bail out.
943 if (MODef.isDead()) 1241 if (!CpyRewriter)
944 // We can ignore those. 1242 return false;
945 continue; 1243
946 1244 // Rewrite each rewritable source by generating new COPYs. This works
1245 // differently from optimizeCoalescableCopy since it first makes sure that all
1246 // definitions can be rewritten.
1247 RewriteMapTy RewriteMap;
1248 unsigned Reg, SubReg, CopyDefReg, CopyDefSubReg;
1249 while (CpyRewriter->getNextRewritableSource(Reg, SubReg, CopyDefReg,
1250 CopyDefSubReg)) {
947 // If a physical register is here, this is probably for a good reason. 1251 // If a physical register is here, this is probably for a good reason.
948 // Do not rewrite that. 1252 // Do not rewrite that.
949 if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg())) 1253 if (TargetRegisterInfo::isPhysicalRegister(CopyDefReg))
950 return false; 1254 return false;
951 1255
952 // If we do not know how to rewrite this definition, there is no point 1256 // If we do not know how to rewrite this definition, there is no point
953 // in trying to kill this instruction. 1257 // in trying to kill this instruction.
954 TargetInstrInfo::RegSubRegPair Def(MODef.getReg(), MODef.getSubReg()); 1258 TargetInstrInfo::RegSubRegPair Def(CopyDefReg, CopyDefSubReg);
955 TargetInstrInfo::RegSubRegPair Src = Def; 1259 if (!findNextSource(Def.Reg, Def.SubReg, RewriteMap))
956 if (!findNextSource(Src.Reg, Src.SubReg))
957 return false; 1260 return false;
958 RewritePairs.push_back(std::make_pair(Def, Src)); 1261
959 } 1262 RewritePairs.push_back(Def);
1263 }
1264
960 // The change is possible for all defs, do it. 1265 // The change is possible for all defs, do it.
961 for (const auto &PairDefSrc : RewritePairs) { 1266 for (const auto &Def : RewritePairs) {
962 const auto &Def = PairDefSrc.first;
963 const auto &Src = PairDefSrc.second;
964 // Rewrite the "copy" in a way the register coalescer understands. 1267 // Rewrite the "copy" in a way the register coalescer understands.
965 assert(!TargetRegisterInfo::isPhysicalRegister(Def.Reg) && 1268 MachineInstr *NewCopy = CpyRewriter->RewriteSource(Def, RewriteMap);
966 "We do not rewrite physical registers"); 1269 assert(NewCopy && "Should be able to always generate a new copy");
967 const TargetRegisterClass *DefRC = MRI->getRegClass(Def.Reg);
968 unsigned NewVR = MRI->createVirtualRegister(DefRC);
969 MachineInstr *NewCopy = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
970 TII->get(TargetOpcode::COPY),
971 NewVR).addReg(Src.Reg, 0, Src.SubReg);
972 NewCopy->getOperand(0).setSubReg(Def.SubReg);
973 if (Def.SubReg)
974 NewCopy->getOperand(0).setIsUndef();
975 LocalMIs.insert(NewCopy); 1270 LocalMIs.insert(NewCopy);
976 MRI->replaceRegWith(Def.Reg, NewVR); 1271 }
977 MRI->clearKillFlags(NewVR); 1272
978 // We extended the lifetime of Src.
979 // Clear the kill flags to account for that.
980 MRI->clearKillFlags(Src.Reg);
981 }
982 // MI is now dead. 1273 // MI is now dead.
983 MI->eraseFromParent(); 1274 MI->eraseFromParent();
984 ++NumUncoalescableCopies; 1275 ++NumUncoalescableCopies;
985 return true; 1276 return true;
986 } 1277 }
1051 } 1342 }
1052 } 1343 }
1053 return false; 1344 return false;
1054 } 1345 }
1055 1346
1347 // FIXME: This is very simple and misses some cases which should be handled when
1348 // motivating examples are found.
1349 //
1350 // The copy rewriting logic should look at uses as well as defs and be able to
1351 // eliminate copies across blocks.
1352 //
1353 // Later copies that are subregister extracts will also not be eliminated since
1354 // only the first copy is considered.
1355 //
1356 // e.g.
1357 // %vreg1 = COPY %vreg0
1358 // %vreg2 = COPY %vreg0:sub1
1359 //
1360 // Should replace %vreg2 uses with %vreg1:sub1
1361 bool PeepholeOptimizer::foldRedundantCopy(
1362 MachineInstr *MI,
1363 SmallSet<unsigned, 4> &CopySrcRegs,
1364 DenseMap<unsigned, MachineInstr *> &CopyMIs) {
1365 assert(MI->isCopy());
1366
1367 unsigned SrcReg = MI->getOperand(1).getReg();
1368 if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
1369 return false;
1370
1371 unsigned DstReg = MI->getOperand(0).getReg();
1372 if (!TargetRegisterInfo::isVirtualRegister(DstReg))
1373 return false;
1374
1375 if (CopySrcRegs.insert(SrcReg).second) {
1376 // First copy of this reg seen.
1377 CopyMIs.insert(std::make_pair(SrcReg, MI));
1378 return false;
1379 }
1380
1381 MachineInstr *PrevCopy = CopyMIs.find(SrcReg)->second;
1382
1383 unsigned SrcSubReg = MI->getOperand(1).getSubReg();
1384 unsigned PrevSrcSubReg = PrevCopy->getOperand(1).getSubReg();
1385
1386 // Can't replace different subregister extracts.
1387 if (SrcSubReg != PrevSrcSubReg)
1388 return false;
1389
1390 unsigned PrevDstReg = PrevCopy->getOperand(0).getReg();
1391
1392 // Only replace if the copy register class is the same.
1393 //
1394 // TODO: If we have multiple copies to different register classes, we may want
1395 // to track multiple copies of the same source register.
1396 if (MRI->getRegClass(DstReg) != MRI->getRegClass(PrevDstReg))
1397 return false;
1398
1399 MRI->replaceRegWith(DstReg, PrevDstReg);
1400
1401 // Lifetime of the previous copy has been extended.
1402 MRI->clearKillFlags(PrevDstReg);
1403 return true;
1404 }
1405
1056 bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { 1406 bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
1057 if (skipOptnoneFunction(*MF.getFunction())) 1407 if (skipOptnoneFunction(*MF.getFunction()))
1058 return false; 1408 return false;
1059 1409
1060 DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n"); 1410 DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n");
1084 SmallPtrSet<MachineInstr*, 16> LocalMIs; 1434 SmallPtrSet<MachineInstr*, 16> LocalMIs;
1085 SmallSet<unsigned, 4> ImmDefRegs; 1435 SmallSet<unsigned, 4> ImmDefRegs;
1086 DenseMap<unsigned, MachineInstr*> ImmDefMIs; 1436 DenseMap<unsigned, MachineInstr*> ImmDefMIs;
1087 SmallSet<unsigned, 16> FoldAsLoadDefCandidates; 1437 SmallSet<unsigned, 16> FoldAsLoadDefCandidates;
1088 1438
1439 // Set of virtual registers that are copied from.
1440 SmallSet<unsigned, 4> CopySrcRegs;
1441 DenseMap<unsigned, MachineInstr *> CopySrcMIs;
1442
1089 for (MachineBasicBlock::iterator 1443 for (MachineBasicBlock::iterator
1090 MII = I->begin(), MIE = I->end(); MII != MIE; ) { 1444 MII = I->begin(), MIE = I->end(); MII != MIE; ) {
1091 MachineInstr *MI = &*MII; 1445 MachineInstr *MI = &*MII;
1092 // We may be erasing MI below, increment MII now. 1446 // We may be erasing MI below, increment MII now.
1093 ++MII; 1447 ++MII;
1095 1449
1096 // Skip debug values. They should not affect this peephole optimization. 1450 // Skip debug values. They should not affect this peephole optimization.
1097 if (MI->isDebugValue()) 1451 if (MI->isDebugValue())
1098 continue; 1452 continue;
1099 1453
1100 // If there exists an instruction which belongs to the following 1454 // If we run into an instruction we can't fold across, discard
1101 // categories, we will discard the load candidates. 1455 // the load candidates.
1456 if (MI->isLoadFoldBarrier())
1457 FoldAsLoadDefCandidates.clear();
1458
1102 if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || 1459 if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() ||
1103 MI->isKill() || MI->isInlineAsm() || 1460 MI->isKill() || MI->isInlineAsm() ||
1104 MI->hasUnmodeledSideEffects()) { 1461 MI->hasUnmodeledSideEffects())
1105 FoldAsLoadDefCandidates.clear();
1106 continue; 1462 continue;
1107 }
1108 if (MI->mayStore() || MI->isCall())
1109 FoldAsLoadDefCandidates.clear();
1110 1463
1111 if ((isUncoalescableCopy(*MI) && 1464 if ((isUncoalescableCopy(*MI) &&
1112 optimizeUncoalescableCopy(MI, LocalMIs)) || 1465 optimizeUncoalescableCopy(MI, LocalMIs)) ||
1113 (MI->isCompare() && optimizeCmpInstr(MI, MBB)) || 1466 (MI->isCompare() && optimizeCmpInstr(MI, MBB)) ||
1114 (MI->isSelect() && optimizeSelect(MI, LocalMIs))) { 1467 (MI->isSelect() && optimizeSelect(MI, LocalMIs))) {
1123 continue; 1476 continue;
1124 } 1477 }
1125 1478
1126 if (isCoalescableCopy(*MI) && optimizeCoalescableCopy(MI)) { 1479 if (isCoalescableCopy(*MI) && optimizeCoalescableCopy(MI)) {
1127 // MI is just rewritten. 1480 // MI is just rewritten.
1481 Changed = true;
1482 continue;
1483 }
1484
1485 if (MI->isCopy() && foldRedundantCopy(MI, CopySrcRegs, CopySrcMIs)) {
1486 LocalMIs.erase(MI);
1487 MI->eraseFromParent();
1128 Changed = true; 1488 Changed = true;
1129 continue; 1489 continue;
1130 } 1490 }
1131 1491
1132 if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) { 1492 if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
1188 } 1548 }
1189 1549
1190 return Changed; 1550 return Changed;
1191 } 1551 }
1192 1552
1193 bool ValueTracker::getNextSourceFromCopy(unsigned &SrcReg, 1553 ValueTrackerResult ValueTracker::getNextSourceFromCopy() {
1194 unsigned &SrcSubReg) {
1195 assert(Def->isCopy() && "Invalid definition"); 1554 assert(Def->isCopy() && "Invalid definition");
1196 // Copy instruction are supposed to be: Def = Src. 1555 // Copy instruction are supposed to be: Def = Src.
1197 // If someone breaks this assumption, bad things will happen everywhere. 1556 // If someone breaks this assumption, bad things will happen everywhere.
1198 assert(Def->getNumOperands() == 2 && "Invalid number of operands"); 1557 assert(Def->getNumOperands() == 2 && "Invalid number of operands");
1199 1558
1200 if (Def->getOperand(DefIdx).getSubReg() != DefSubReg) 1559 if (Def->getOperand(DefIdx).getSubReg() != DefSubReg)
1201 // If we look for a different subreg, it means we want a subreg of src. 1560 // If we look for a different subreg, it means we want a subreg of src.
1202 // Bails as we do not support composing subreg yet. 1561 // Bails as we do not support composing subregs yet.
1203 return false; 1562 return ValueTrackerResult();
1204 // Otherwise, we want the whole source. 1563 // Otherwise, we want the whole source.
1205 const MachineOperand &Src = Def->getOperand(1); 1564 const MachineOperand &Src = Def->getOperand(1);
1206 SrcReg = Src.getReg(); 1565 return ValueTrackerResult(Src.getReg(), Src.getSubReg());
1207 SrcSubReg = Src.getSubReg(); 1566 }
1208 return true; 1567
1209 } 1568 ValueTrackerResult ValueTracker::getNextSourceFromBitcast() {
1210
1211 bool ValueTracker::getNextSourceFromBitcast(unsigned &SrcReg,
1212 unsigned &SrcSubReg) {
1213 assert(Def->isBitcast() && "Invalid definition"); 1569 assert(Def->isBitcast() && "Invalid definition");
1214 1570
1215 // Bail if there are effects that a plain copy will not expose. 1571 // Bail if there are effects that a plain copy will not expose.
1216 if (Def->hasUnmodeledSideEffects()) 1572 if (Def->hasUnmodeledSideEffects())
1217 return false; 1573 return ValueTrackerResult();
1218 1574
1219 // Bitcasts with more than one def are not supported. 1575 // Bitcasts with more than one def are not supported.
1220 if (Def->getDesc().getNumDefs() != 1) 1576 if (Def->getDesc().getNumDefs() != 1)
1221 return false; 1577 return ValueTrackerResult();
1222 if (Def->getOperand(DefIdx).getSubReg() != DefSubReg) 1578 if (Def->getOperand(DefIdx).getSubReg() != DefSubReg)
1223 // If we look for a different subreg, it means we want a subreg of the src. 1579 // If we look for a different subreg, it means we want a subreg of the src.
1224 // Bails as we do not support composing subreg yet. 1580 // Bails as we do not support composing subregs yet.
1225 return false; 1581 return ValueTrackerResult();
1226 1582
1227 unsigned SrcIdx = Def->getNumOperands(); 1583 unsigned SrcIdx = Def->getNumOperands();
1228 for (unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx; OpIdx != EndOpIdx; 1584 for (unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx; OpIdx != EndOpIdx;
1229 ++OpIdx) { 1585 ++OpIdx) {
1230 const MachineOperand &MO = Def->getOperand(OpIdx); 1586 const MachineOperand &MO = Def->getOperand(OpIdx);
1231 if (!MO.isReg() || !MO.getReg()) 1587 if (!MO.isReg() || !MO.getReg())
1232 continue; 1588 continue;
1233 assert(!MO.isDef() && "We should have skipped all the definitions by now"); 1589 assert(!MO.isDef() && "We should have skipped all the definitions by now");
1234 if (SrcIdx != EndOpIdx) 1590 if (SrcIdx != EndOpIdx)
1235 // Multiple sources? 1591 // Multiple sources?
1236 return false; 1592 return ValueTrackerResult();
1237 SrcIdx = OpIdx; 1593 SrcIdx = OpIdx;
1238 } 1594 }
1239 const MachineOperand &Src = Def->getOperand(SrcIdx); 1595 const MachineOperand &Src = Def->getOperand(SrcIdx);
1240 SrcReg = Src.getReg(); 1596 return ValueTrackerResult(Src.getReg(), Src.getSubReg());
1241 SrcSubReg = Src.getSubReg(); 1597 }
1242 return true; 1598
1243 } 1599 ValueTrackerResult ValueTracker::getNextSourceFromRegSequence() {
1244
1245 bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcReg,
1246 unsigned &SrcSubReg) {
1247 assert((Def->isRegSequence() || Def->isRegSequenceLike()) && 1600 assert((Def->isRegSequence() || Def->isRegSequenceLike()) &&
1248 "Invalid definition"); 1601 "Invalid definition");
1249 1602
1250 if (Def->getOperand(DefIdx).getSubReg()) 1603 if (Def->getOperand(DefIdx).getSubReg())
1251 // If we are composing subreg, bails out. 1604 // If we are composing subregs, bail out.
1252 // The case we are checking is Def.<subreg> = REG_SEQUENCE. 1605 // The case we are checking is Def.<subreg> = REG_SEQUENCE.
1253 // This should almost never happen as the SSA property is tracked at 1606 // This should almost never happen as the SSA property is tracked at
1254 // the register level (as opposed to the subreg level). 1607 // the register level (as opposed to the subreg level).
1255 // I.e., 1608 // I.e.,
1256 // Def.sub0 = 1609 // Def.sub0 =
1260 // However, some code could theoretically generates a single 1613 // However, some code could theoretically generates a single
1261 // Def.sub0 (i.e, not defining the other subregs) and we would 1614 // Def.sub0 (i.e, not defining the other subregs) and we would
1262 // have this case. 1615 // have this case.
1263 // If we can ascertain (or force) that this never happens, we could 1616 // If we can ascertain (or force) that this never happens, we could
1264 // turn that into an assertion. 1617 // turn that into an assertion.
1265 return false; 1618 return ValueTrackerResult();
1266 1619
1267 if (!TII) 1620 if (!TII)
1268 // We could handle the REG_SEQUENCE here, but we do not want to 1621 // We could handle the REG_SEQUENCE here, but we do not want to
1269 // duplicate the code from the generic TII. 1622 // duplicate the code from the generic TII.
1270 return false; 1623 return ValueTrackerResult();
1271 1624
1272 SmallVector<TargetInstrInfo::RegSubRegPairAndIdx, 8> RegSeqInputRegs; 1625 SmallVector<TargetInstrInfo::RegSubRegPairAndIdx, 8> RegSeqInputRegs;
1273 if (!TII->getRegSequenceInputs(*Def, DefIdx, RegSeqInputRegs)) 1626 if (!TII->getRegSequenceInputs(*Def, DefIdx, RegSeqInputRegs))
1274 return false; 1627 return ValueTrackerResult();
1275 1628
1276 // We are looking at: 1629 // We are looking at:
1277 // Def = REG_SEQUENCE v0, sub0, v1, sub1, ... 1630 // Def = REG_SEQUENCE v0, sub0, v1, sub1, ...
1278 // Check if one of the operand defines the subreg we are interested in. 1631 // Check if one of the operand defines the subreg we are interested in.
1279 for (auto &RegSeqInput : RegSeqInputRegs) { 1632 for (auto &RegSeqInput : RegSeqInputRegs) {
1280 if (RegSeqInput.SubIdx == DefSubReg) { 1633 if (RegSeqInput.SubIdx == DefSubReg) {
1281 if (RegSeqInput.SubReg) 1634 if (RegSeqInput.SubReg)
1282 // Bails if we have to compose sub registers. 1635 // Bail if we have to compose sub registers.
1283 return false; 1636 return ValueTrackerResult();
1284 1637
1285 SrcReg = RegSeqInput.Reg; 1638 return ValueTrackerResult(RegSeqInput.Reg, RegSeqInput.SubReg);
1286 SrcSubReg = RegSeqInput.SubReg;
1287 return true;
1288 } 1639 }
1289 } 1640 }
1290 1641
1291 // If the subreg we are tracking is super-defined by another subreg, 1642 // If the subreg we are tracking is super-defined by another subreg,
1292 // we could follow this value. However, this would require to compose 1643 // we could follow this value. However, this would require to compose
1293 // the subreg and we do not do that for now. 1644 // the subreg and we do not do that for now.
1294 return false; 1645 return ValueTrackerResult();
1295 } 1646 }
1296 1647
1297 bool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcReg, 1648 ValueTrackerResult ValueTracker::getNextSourceFromInsertSubreg() {
1298 unsigned &SrcSubReg) {
1299 assert((Def->isInsertSubreg() || Def->isInsertSubregLike()) && 1649 assert((Def->isInsertSubreg() || Def->isInsertSubregLike()) &&
1300 "Invalid definition"); 1650 "Invalid definition");
1301 1651
1302 if (Def->getOperand(DefIdx).getSubReg()) 1652 if (Def->getOperand(DefIdx).getSubReg())
1303 // If we are composing subreg, bails out. 1653 // If we are composing subreg, bail out.
1304 // Same remark as getNextSourceFromRegSequence. 1654 // Same remark as getNextSourceFromRegSequence.
1305 // I.e., this may be turned into an assert. 1655 // I.e., this may be turned into an assert.
1306 return false; 1656 return ValueTrackerResult();
1307 1657
1308 if (!TII) 1658 if (!TII)
1309 // We could handle the REG_SEQUENCE here, but we do not want to 1659 // We could handle the REG_SEQUENCE here, but we do not want to
1310 // duplicate the code from the generic TII. 1660 // duplicate the code from the generic TII.
1311 return false; 1661 return ValueTrackerResult();
1312 1662
1313 TargetInstrInfo::RegSubRegPair BaseReg; 1663 TargetInstrInfo::RegSubRegPair BaseReg;
1314 TargetInstrInfo::RegSubRegPairAndIdx InsertedReg; 1664 TargetInstrInfo::RegSubRegPairAndIdx InsertedReg;
1315 if (!TII->getInsertSubregInputs(*Def, DefIdx, BaseReg, InsertedReg)) 1665 if (!TII->getInsertSubregInputs(*Def, DefIdx, BaseReg, InsertedReg))
1316 return false; 1666 return ValueTrackerResult();
1317 1667
1318 // We are looking at: 1668 // We are looking at:
1319 // Def = INSERT_SUBREG v0, v1, sub1 1669 // Def = INSERT_SUBREG v0, v1, sub1
1320 // There are two cases: 1670 // There are two cases:
1321 // 1. DefSubReg == sub1, get v1. 1671 // 1. DefSubReg == sub1, get v1.
1322 // 2. DefSubReg != sub1, the value may be available through v0. 1672 // 2. DefSubReg != sub1, the value may be available through v0.
1323 1673
1324 // #1 Check if the inserted register matches the required sub index. 1674 // #1 Check if the inserted register matches the required sub index.
1325 if (InsertedReg.SubIdx == DefSubReg) { 1675 if (InsertedReg.SubIdx == DefSubReg) {
1326 SrcReg = InsertedReg.Reg; 1676 return ValueTrackerResult(InsertedReg.Reg, InsertedReg.SubReg);
1327 SrcSubReg = InsertedReg.SubReg;
1328 return true;
1329 } 1677 }
1330 // #2 Otherwise, if the sub register we are looking for is not partial 1678 // #2 Otherwise, if the sub register we are looking for is not partial
1331 // defined by the inserted element, we can look through the main 1679 // defined by the inserted element, we can look through the main
1332 // register (v0). 1680 // register (v0).
1333 const MachineOperand &MODef = Def->getOperand(DefIdx); 1681 const MachineOperand &MODef = Def->getOperand(DefIdx);
1334 // If the result register (Def) and the base register (v0) do not 1682 // If the result register (Def) and the base register (v0) do not
1335 // have the same register class or if we have to compose 1683 // have the same register class or if we have to compose
1336 // subregisters, bails out. 1684 // subregisters, bail out.
1337 if (MRI.getRegClass(MODef.getReg()) != MRI.getRegClass(BaseReg.Reg) || 1685 if (MRI.getRegClass(MODef.getReg()) != MRI.getRegClass(BaseReg.Reg) ||
1338 BaseReg.SubReg) 1686 BaseReg.SubReg)
1339 return false; 1687 return ValueTrackerResult();
1340 1688
1341 // Get the TRI and check if the inserted sub-register overlaps with the 1689 // Get the TRI and check if the inserted sub-register overlaps with the
1342 // sub-register we are tracking. 1690 // sub-register we are tracking.
1343 const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); 1691 const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
1344 if (!TRI || 1692 if (!TRI ||
1345 (TRI->getSubRegIndexLaneMask(DefSubReg) & 1693 (TRI->getSubRegIndexLaneMask(DefSubReg) &
1346 TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx)) != 0) 1694 TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx)) != 0)
1347 return false; 1695 return ValueTrackerResult();
1348 // At this point, the value is available in v0 via the same subreg 1696 // At this point, the value is available in v0 via the same subreg
1349 // we used for Def. 1697 // we used for Def.
1350 SrcReg = BaseReg.Reg; 1698 return ValueTrackerResult(BaseReg.Reg, DefSubReg);
1351 SrcSubReg = DefSubReg; 1699 }
1352 return true; 1700
1353 } 1701 ValueTrackerResult ValueTracker::getNextSourceFromExtractSubreg() {
1354
1355 bool ValueTracker::getNextSourceFromExtractSubreg(unsigned &SrcReg,
1356 unsigned &SrcSubReg) {
1357 assert((Def->isExtractSubreg() || 1702 assert((Def->isExtractSubreg() ||
1358 Def->isExtractSubregLike()) && "Invalid definition"); 1703 Def->isExtractSubregLike()) && "Invalid definition");
1359 // We are looking at: 1704 // We are looking at:
1360 // Def = EXTRACT_SUBREG v0, sub0 1705 // Def = EXTRACT_SUBREG v0, sub0
1361 1706
1362 // Bails if we have to compose sub registers. 1707 // Bail if we have to compose sub registers.
1363 // Indeed, if DefSubReg != 0, we would have to compose it with sub0. 1708 // Indeed, if DefSubReg != 0, we would have to compose it with sub0.
1364 if (DefSubReg) 1709 if (DefSubReg)
1365 return false; 1710 return ValueTrackerResult();
1366 1711
1367 if (!TII) 1712 if (!TII)
1368 // We could handle the EXTRACT_SUBREG here, but we do not want to 1713 // We could handle the EXTRACT_SUBREG here, but we do not want to
1369 // duplicate the code from the generic TII. 1714 // duplicate the code from the generic TII.
1370 return false; 1715 return ValueTrackerResult();
1371 1716
1372 TargetInstrInfo::RegSubRegPairAndIdx ExtractSubregInputReg; 1717 TargetInstrInfo::RegSubRegPairAndIdx ExtractSubregInputReg;
1373 if (!TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg)) 1718 if (!TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg))
1374 return false; 1719 return ValueTrackerResult();
1375 1720
1376 // Bails if we have to compose sub registers. 1721 // Bail if we have to compose sub registers.
1377 // Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0. 1722 // Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0.
1378 if (ExtractSubregInputReg.SubReg) 1723 if (ExtractSubregInputReg.SubReg)
1379 return false; 1724 return ValueTrackerResult();
1380 // Otherwise, the value is available in the v0.sub0. 1725 // Otherwise, the value is available in the v0.sub0.
1381 SrcReg = ExtractSubregInputReg.Reg; 1726 return ValueTrackerResult(ExtractSubregInputReg.Reg, ExtractSubregInputReg.SubIdx);
1382 SrcSubReg = ExtractSubregInputReg.SubIdx; 1727 }
1383 return true; 1728
1384 } 1729 ValueTrackerResult ValueTracker::getNextSourceFromSubregToReg() {
1385
1386 bool ValueTracker::getNextSourceFromSubregToReg(unsigned &SrcReg,
1387 unsigned &SrcSubReg) {
1388 assert(Def->isSubregToReg() && "Invalid definition"); 1730 assert(Def->isSubregToReg() && "Invalid definition");
1389 // We are looking at: 1731 // We are looking at:
1390 // Def = SUBREG_TO_REG Imm, v0, sub0 1732 // Def = SUBREG_TO_REG Imm, v0, sub0
1391 1733
1392 // Bails if we have to compose sub registers. 1734 // Bail if we have to compose sub registers.
1393 // If DefSubReg != sub0, we would have to check that all the bits 1735 // If DefSubReg != sub0, we would have to check that all the bits
1394 // we track are included in sub0 and if yes, we would have to 1736 // we track are included in sub0 and if yes, we would have to
1395 // determine the right subreg in v0. 1737 // determine the right subreg in v0.
1396 if (DefSubReg != Def->getOperand(3).getImm()) 1738 if (DefSubReg != Def->getOperand(3).getImm())
1397 return false; 1739 return ValueTrackerResult();
1398 // Bails if we have to compose sub registers. 1740 // Bail if we have to compose sub registers.
1399 // Likewise, if v0.subreg != 0, we would have to compose it with sub0. 1741 // Likewise, if v0.subreg != 0, we would have to compose it with sub0.
1400 if (Def->getOperand(2).getSubReg()) 1742 if (Def->getOperand(2).getSubReg())
1401 return false; 1743 return ValueTrackerResult();
1402 1744
1403 SrcReg = Def->getOperand(2).getReg(); 1745 return ValueTrackerResult(Def->getOperand(2).getReg(),
1404 SrcSubReg = Def->getOperand(3).getImm(); 1746 Def->getOperand(3).getImm());
1405 return true; 1747 }
1406 } 1748
1407 1749 /// \brief Explore each PHI incoming operand and return its sources
1408 bool ValueTracker::getNextSourceImpl(unsigned &SrcReg, unsigned &SrcSubReg) { 1750 ValueTrackerResult ValueTracker::getNextSourceFromPHI() {
1751 assert(Def->isPHI() && "Invalid definition");
1752 ValueTrackerResult Res;
1753
1754 // If we look for a different subreg, bail as we do not support composing
1755 // subregs yet.
1756 if (Def->getOperand(0).getSubReg() != DefSubReg)
1757 return ValueTrackerResult();
1758
1759 // Return all register sources for PHI instructions.
1760 for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2) {
1761 auto &MO = Def->getOperand(i);
1762 assert(MO.isReg() && "Invalid PHI instruction");
1763 Res.addSource(MO.getReg(), MO.getSubReg());
1764 }
1765
1766 return Res;
1767 }
1768
1769 ValueTrackerResult ValueTracker::getNextSourceImpl() {
1409 assert(Def && "This method needs a valid definition"); 1770 assert(Def && "This method needs a valid definition");
1410 1771
1411 assert( 1772 assert(
1412 (DefIdx < Def->getDesc().getNumDefs() || Def->getDesc().isVariadic()) && 1773 (DefIdx < Def->getDesc().getNumDefs() || Def->getDesc().isVariadic()) &&
1413 Def->getOperand(DefIdx).isDef() && "Invalid DefIdx"); 1774 Def->getOperand(DefIdx).isDef() && "Invalid DefIdx");
1414 if (Def->isCopy()) 1775 if (Def->isCopy())
1415 return getNextSourceFromCopy(SrcReg, SrcSubReg); 1776 return getNextSourceFromCopy();
1416 if (Def->isBitcast()) 1777 if (Def->isBitcast())
1417 return getNextSourceFromBitcast(SrcReg, SrcSubReg); 1778 return getNextSourceFromBitcast();
1418 // All the remaining cases involve "complex" instructions. 1779 // All the remaining cases involve "complex" instructions.
1419 // Bails if we did not ask for the advanced tracking. 1780 // Bail if we did not ask for the advanced tracking.
1420 if (!UseAdvancedTracking) 1781 if (!UseAdvancedTracking)
1421 return false; 1782 return ValueTrackerResult();
1422 if (Def->isRegSequence() || Def->isRegSequenceLike()) 1783 if (Def->isRegSequence() || Def->isRegSequenceLike())
1423 return getNextSourceFromRegSequence(SrcReg, SrcSubReg); 1784 return getNextSourceFromRegSequence();
1424 if (Def->isInsertSubreg() || Def->isInsertSubregLike()) 1785 if (Def->isInsertSubreg() || Def->isInsertSubregLike())
1425 return getNextSourceFromInsertSubreg(SrcReg, SrcSubReg); 1786 return getNextSourceFromInsertSubreg();
1426 if (Def->isExtractSubreg() || Def->isExtractSubregLike()) 1787 if (Def->isExtractSubreg() || Def->isExtractSubregLike())
1427 return getNextSourceFromExtractSubreg(SrcReg, SrcSubReg); 1788 return getNextSourceFromExtractSubreg();
1428 if (Def->isSubregToReg()) 1789 if (Def->isSubregToReg())
1429 return getNextSourceFromSubregToReg(SrcReg, SrcSubReg); 1790 return getNextSourceFromSubregToReg();
1430 return false; 1791 if (Def->isPHI())
1431 } 1792 return getNextSourceFromPHI();
1432 1793 return ValueTrackerResult();
1433 const MachineInstr *ValueTracker::getNextSource(unsigned &SrcReg, 1794 }
1434 unsigned &SrcSubReg) { 1795
1796 ValueTrackerResult ValueTracker::getNextSource() {
1435 // If we reach a point where we cannot move up in the use-def chain, 1797 // If we reach a point where we cannot move up in the use-def chain,
1436 // there is nothing we can get. 1798 // there is nothing we can get.
1437 if (!Def) 1799 if (!Def)
1438 return nullptr; 1800 return ValueTrackerResult();
1439 1801
1440 const MachineInstr *PrevDef = nullptr; 1802 ValueTrackerResult Res = getNextSourceImpl();
1441 // Try to find the next source. 1803 if (Res.isValid()) {
1442 if (getNextSourceImpl(SrcReg, SrcSubReg)) {
1443 // Update definition, definition index, and subregister for the 1804 // Update definition, definition index, and subregister for the
1444 // next call of getNextSource. 1805 // next call of getNextSource.
1445 // Update the current register. 1806 // Update the current register.
1446 Reg = SrcReg; 1807 bool OneRegSrc = Res.getNumSources() == 1;
1447 // Update the return value before moving up in the use-def chain. 1808 if (OneRegSrc)
1448 PrevDef = Def; 1809 Reg = Res.getSrcReg(0);
1810 // Update the result before moving up in the use-def chain
1811 // with the instruction containing the last found sources.
1812 Res.setInst(Def);
1813
1449 // If we can still move up in the use-def chain, move to the next 1814 // If we can still move up in the use-def chain, move to the next
1450 // defintion. 1815 // definition.
1451 if (!TargetRegisterInfo::isPhysicalRegister(Reg)) { 1816 if (!TargetRegisterInfo::isPhysicalRegister(Reg) && OneRegSrc) {
1452 Def = MRI.getVRegDef(Reg); 1817 Def = MRI.getVRegDef(Reg);
1453 DefIdx = MRI.def_begin(Reg).getOperandNo(); 1818 DefIdx = MRI.def_begin(Reg).getOperandNo();
1454 DefSubReg = SrcSubReg; 1819 DefSubReg = Res.getSrcSubReg(0);
1455 return PrevDef; 1820 return Res;
1456 } 1821 }
1457 } 1822 }
1458 // If we end up here, this means we will not be able to find another source 1823 // If we end up here, this means we will not be able to find another source
1459 // for the next iteration. 1824 // for the next iteration. Make sure any new call to getNextSource bails out
1460 // Make sure any new call to getNextSource bails out early by cutting the 1825 // early by cutting the use-def chain.
1461 // use-def chain.
1462 Def = nullptr; 1826 Def = nullptr;
1463 return PrevDef; 1827 return Res;
1464 } 1828 }