Mercurial > hg > CbC > CbC_llvm
comparison lib/Target/R600/SIInstrInfo.cpp @ 77:54457678186b LLVM3.6
LLVM 3.6
author | Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 08 Sep 2014 22:06:00 +0900 |
parents | e4204d083e25 |
children | 60c9769439b8 |
comparison
equal
deleted
inserted
replaced
34:e874dbf0ad9d | 77:54457678186b |
---|---|
15 | 15 |
16 #include "SIInstrInfo.h" | 16 #include "SIInstrInfo.h" |
17 #include "AMDGPUTargetMachine.h" | 17 #include "AMDGPUTargetMachine.h" |
18 #include "SIDefines.h" | 18 #include "SIDefines.h" |
19 #include "SIMachineFunctionInfo.h" | 19 #include "SIMachineFunctionInfo.h" |
20 #include "llvm/CodeGen/MachineFrameInfo.h" | |
20 #include "llvm/CodeGen/MachineInstrBuilder.h" | 21 #include "llvm/CodeGen/MachineInstrBuilder.h" |
21 #include "llvm/CodeGen/MachineRegisterInfo.h" | 22 #include "llvm/CodeGen/MachineRegisterInfo.h" |
23 #include "llvm/IR/Function.h" | |
22 #include "llvm/MC/MCInstrDesc.h" | 24 #include "llvm/MC/MCInstrDesc.h" |
23 | 25 |
24 using namespace llvm; | 26 using namespace llvm; |
25 | 27 |
26 SIInstrInfo::SIInstrInfo(AMDGPUTargetMachine &tm) | 28 SIInstrInfo::SIInstrInfo(const AMDGPUSubtarget &st) |
27 : AMDGPUInstrInfo(tm), | 29 : AMDGPUInstrInfo(st), |
28 RI(tm) | 30 RI(st) { } |
29 { } | |
30 | |
31 const SIRegisterInfo &SIInstrInfo::getRegisterInfo() const { | |
32 return RI; | |
33 } | |
34 | 31 |
35 //===----------------------------------------------------------------------===// | 32 //===----------------------------------------------------------------------===// |
36 // TargetInstrInfo callbacks | 33 // TargetInstrInfo callbacks |
37 //===----------------------------------------------------------------------===// | 34 //===----------------------------------------------------------------------===// |
35 | |
36 static unsigned getNumOperandsNoGlue(SDNode *Node) { | |
37 unsigned N = Node->getNumOperands(); | |
38 while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue) | |
39 --N; | |
40 return N; | |
41 } | |
42 | |
43 static SDValue findChainOperand(SDNode *Load) { | |
44 SDValue LastOp = Load->getOperand(getNumOperandsNoGlue(Load) - 1); | |
45 assert(LastOp.getValueType() == MVT::Other && "Chain missing from load node"); | |
46 return LastOp; | |
47 } | |
48 | |
49 /// \brief Returns true if both nodes have the same value for the given | |
50 /// operand \p Op, or if both nodes do not have this operand. | |
51 static bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) { | |
52 unsigned Opc0 = N0->getMachineOpcode(); | |
53 unsigned Opc1 = N1->getMachineOpcode(); | |
54 | |
55 int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0, OpName); | |
56 int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1, OpName); | |
57 | |
58 if (Op0Idx == -1 && Op1Idx == -1) | |
59 return true; | |
60 | |
61 | |
62 if ((Op0Idx == -1 && Op1Idx != -1) || | |
63 (Op1Idx == -1 && Op0Idx != -1)) | |
64 return false; | |
65 | |
66 // getNamedOperandIdx returns the index for the MachineInstr's operands, | |
67 // which includes the result as the first operand. We are indexing into the | |
68 // MachineSDNode's operands, so we need to skip the result operand to get | |
69 // the real index. | |
70 --Op0Idx; | |
71 --Op1Idx; | |
72 | |
73 return N0->getOperand(Op0Idx) == N1->getOperand(Op1Idx); | |
74 } | |
75 | |
76 bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, | |
77 int64_t &Offset0, | |
78 int64_t &Offset1) const { | |
79 if (!Load0->isMachineOpcode() || !Load1->isMachineOpcode()) | |
80 return false; | |
81 | |
82 unsigned Opc0 = Load0->getMachineOpcode(); | |
83 unsigned Opc1 = Load1->getMachineOpcode(); | |
84 | |
85 // Make sure both are actually loads. | |
86 if (!get(Opc0).mayLoad() || !get(Opc1).mayLoad()) | |
87 return false; | |
88 | |
89 if (isDS(Opc0) && isDS(Opc1)) { | |
90 assert(getNumOperandsNoGlue(Load0) == getNumOperandsNoGlue(Load1)); | |
91 | |
92 // TODO: Also shouldn't see read2st | |
93 assert(Opc0 != AMDGPU::DS_READ2_B32 && | |
94 Opc0 != AMDGPU::DS_READ2_B64 && | |
95 Opc1 != AMDGPU::DS_READ2_B32 && | |
96 Opc1 != AMDGPU::DS_READ2_B64); | |
97 | |
98 // Check base reg. | |
99 if (Load0->getOperand(1) != Load1->getOperand(1)) | |
100 return false; | |
101 | |
102 // Check chain. | |
103 if (findChainOperand(Load0) != findChainOperand(Load1)) | |
104 return false; | |
105 | |
106 Offset0 = cast<ConstantSDNode>(Load0->getOperand(2))->getZExtValue(); | |
107 Offset1 = cast<ConstantSDNode>(Load1->getOperand(2))->getZExtValue(); | |
108 return true; | |
109 } | |
110 | |
111 if (isSMRD(Opc0) && isSMRD(Opc1)) { | |
112 assert(getNumOperandsNoGlue(Load0) == getNumOperandsNoGlue(Load1)); | |
113 | |
114 // Check base reg. | |
115 if (Load0->getOperand(0) != Load1->getOperand(0)) | |
116 return false; | |
117 | |
118 // Check chain. | |
119 if (findChainOperand(Load0) != findChainOperand(Load1)) | |
120 return false; | |
121 | |
122 Offset0 = cast<ConstantSDNode>(Load0->getOperand(1))->getZExtValue(); | |
123 Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getZExtValue(); | |
124 return true; | |
125 } | |
126 | |
127 // MUBUF and MTBUF can access the same addresses. | |
128 if ((isMUBUF(Opc0) || isMTBUF(Opc0)) && (isMUBUF(Opc1) || isMTBUF(Opc1))) { | |
129 | |
130 // MUBUF and MTBUF have vaddr at different indices. | |
131 if (!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::soffset) || | |
132 findChainOperand(Load0) != findChainOperand(Load1) || | |
133 !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::vaddr) || | |
134 !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::srsrc)) | |
135 return false; | |
136 | |
137 int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset); | |
138 int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset); | |
139 | |
140 if (OffIdx0 == -1 || OffIdx1 == -1) | |
141 return false; | |
142 | |
143 // getNamedOperandIdx returns the index for MachineInstrs. Since they | |
144 // inlcude the output in the operand list, but SDNodes don't, we need to | |
145 // subtract the index by one. | |
146 --OffIdx0; | |
147 --OffIdx1; | |
148 | |
149 SDValue Off0 = Load0->getOperand(OffIdx0); | |
150 SDValue Off1 = Load1->getOperand(OffIdx1); | |
151 | |
152 // The offset might be a FrameIndexSDNode. | |
153 if (!isa<ConstantSDNode>(Off0) || !isa<ConstantSDNode>(Off1)) | |
154 return false; | |
155 | |
156 Offset0 = cast<ConstantSDNode>(Off0)->getZExtValue(); | |
157 Offset1 = cast<ConstantSDNode>(Off1)->getZExtValue(); | |
158 return true; | |
159 } | |
160 | |
161 return false; | |
162 } | |
163 | |
164 bool SIInstrInfo::getLdStBaseRegImmOfs(MachineInstr *LdSt, | |
165 unsigned &BaseReg, unsigned &Offset, | |
166 const TargetRegisterInfo *TRI) const { | |
167 unsigned Opc = LdSt->getOpcode(); | |
168 if (isDS(Opc)) { | |
169 const MachineOperand *OffsetImm = getNamedOperand(*LdSt, | |
170 AMDGPU::OpName::offset); | |
171 if (OffsetImm) { | |
172 // Normal, single offset LDS instruction. | |
173 const MachineOperand *AddrReg = getNamedOperand(*LdSt, | |
174 AMDGPU::OpName::addr); | |
175 | |
176 BaseReg = AddrReg->getReg(); | |
177 Offset = OffsetImm->getImm(); | |
178 return true; | |
179 } | |
180 | |
181 // The 2 offset instructions use offset0 and offset1 instead. We can treat | |
182 // these as a load with a single offset if the 2 offsets are consecutive. We | |
183 // will use this for some partially aligned loads. | |
184 const MachineOperand *Offset0Imm = getNamedOperand(*LdSt, | |
185 AMDGPU::OpName::offset0); | |
186 const MachineOperand *Offset1Imm = getNamedOperand(*LdSt, | |
187 AMDGPU::OpName::offset1); | |
188 | |
189 uint8_t Offset0 = Offset0Imm->getImm(); | |
190 uint8_t Offset1 = Offset1Imm->getImm(); | |
191 assert(Offset1 > Offset0); | |
192 | |
193 if (Offset1 - Offset0 == 1) { | |
194 // Each of these offsets is in element sized units, so we need to convert | |
195 // to bytes of the individual reads. | |
196 | |
197 unsigned EltSize; | |
198 if (LdSt->mayLoad()) | |
199 EltSize = getOpRegClass(*LdSt, 0)->getSize() / 2; | |
200 else { | |
201 assert(LdSt->mayStore()); | |
202 int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0); | |
203 EltSize = getOpRegClass(*LdSt, Data0Idx)->getSize(); | |
204 } | |
205 | |
206 const MachineOperand *AddrReg = getNamedOperand(*LdSt, | |
207 AMDGPU::OpName::addr); | |
208 BaseReg = AddrReg->getReg(); | |
209 Offset = EltSize * Offset0; | |
210 return true; | |
211 } | |
212 | |
213 return false; | |
214 } | |
215 | |
216 if (isMUBUF(Opc) || isMTBUF(Opc)) { | |
217 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::soffset) != -1) | |
218 return false; | |
219 | |
220 const MachineOperand *AddrReg = getNamedOperand(*LdSt, | |
221 AMDGPU::OpName::vaddr); | |
222 if (!AddrReg) | |
223 return false; | |
224 | |
225 const MachineOperand *OffsetImm = getNamedOperand(*LdSt, | |
226 AMDGPU::OpName::offset); | |
227 BaseReg = AddrReg->getReg(); | |
228 Offset = OffsetImm->getImm(); | |
229 return true; | |
230 } | |
231 | |
232 if (isSMRD(Opc)) { | |
233 const MachineOperand *OffsetImm = getNamedOperand(*LdSt, | |
234 AMDGPU::OpName::offset); | |
235 if (!OffsetImm) | |
236 return false; | |
237 | |
238 const MachineOperand *SBaseReg = getNamedOperand(*LdSt, | |
239 AMDGPU::OpName::sbase); | |
240 BaseReg = SBaseReg->getReg(); | |
241 Offset = OffsetImm->getImm(); | |
242 return true; | |
243 } | |
244 | |
245 return false; | |
246 } | |
38 | 247 |
39 void | 248 void |
40 SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, | 249 SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, |
41 MachineBasicBlock::iterator MI, DebugLoc DL, | 250 MachineBasicBlock::iterator MI, DebugLoc DL, |
42 unsigned DestReg, unsigned SrcReg, | 251 unsigned DestReg, unsigned SrcReg, |
190 MachineBasicBlock::iterator MI, | 399 MachineBasicBlock::iterator MI, |
191 unsigned SrcReg, bool isKill, | 400 unsigned SrcReg, bool isKill, |
192 int FrameIndex, | 401 int FrameIndex, |
193 const TargetRegisterClass *RC, | 402 const TargetRegisterClass *RC, |
194 const TargetRegisterInfo *TRI) const { | 403 const TargetRegisterInfo *TRI) const { |
195 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); | 404 MachineFunction *MF = MBB.getParent(); |
196 SIMachineFunctionInfo *MFI = MBB.getParent()->getInfo<SIMachineFunctionInfo>(); | 405 MachineFrameInfo *FrameInfo = MF->getFrameInfo(); |
197 DebugLoc DL = MBB.findDebugLoc(MI); | 406 DebugLoc DL = MBB.findDebugLoc(MI); |
198 unsigned KillFlag = isKill ? RegState::Kill : 0; | 407 |
199 | 408 if (RI.hasVGPRs(RC)) { |
200 if (TRI->getCommonSubClass(RC, &AMDGPU::SGPR_32RegClass)) { | 409 LLVMContext &Ctx = MF->getFunction()->getContext(); |
201 unsigned Lane = MFI->SpillTracker.getNextLane(MRI); | 410 Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Can't spill VGPR!"); |
202 BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32), | 411 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), AMDGPU::VGPR0) |
203 MFI->SpillTracker.LaneVGPR) | 412 .addReg(SrcReg); |
204 .addReg(SrcReg, KillFlag) | 413 } else if (RI.isSGPRClass(RC)) { |
205 .addImm(Lane); | 414 // We are only allowed to create one new instruction when spilling |
206 MFI->SpillTracker.addSpilledReg(FrameIndex, MFI->SpillTracker.LaneVGPR, | 415 // registers, so we need to use pseudo instruction for spilling |
207 Lane); | 416 // SGPRs. |
417 unsigned Opcode; | |
418 switch (RC->getSize() * 8) { | |
419 case 32: Opcode = AMDGPU::SI_SPILL_S32_SAVE; break; | |
420 case 64: Opcode = AMDGPU::SI_SPILL_S64_SAVE; break; | |
421 case 128: Opcode = AMDGPU::SI_SPILL_S128_SAVE; break; | |
422 case 256: Opcode = AMDGPU::SI_SPILL_S256_SAVE; break; | |
423 case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break; | |
424 default: llvm_unreachable("Cannot spill register class"); | |
425 } | |
426 | |
427 FrameInfo->setObjectAlignment(FrameIndex, 4); | |
428 BuildMI(MBB, MI, DL, get(Opcode)) | |
429 .addReg(SrcReg) | |
430 .addFrameIndex(FrameIndex); | |
208 } else { | 431 } else { |
209 for (unsigned i = 0, e = RC->getSize() / 4; i != e; ++i) { | 432 llvm_unreachable("VGPR spilling not supported"); |
210 unsigned SubReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); | |
211 BuildMI(MBB, MI, MBB.findDebugLoc(MI), get(AMDGPU::COPY), SubReg) | |
212 .addReg(SrcReg, 0, RI.getSubRegFromChannel(i)); | |
213 storeRegToStackSlot(MBB, MI, SubReg, isKill, FrameIndex + i, | |
214 &AMDGPU::SReg_32RegClass, TRI); | |
215 } | |
216 } | 433 } |
217 } | 434 } |
218 | 435 |
219 void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, | 436 void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, |
220 MachineBasicBlock::iterator MI, | 437 MachineBasicBlock::iterator MI, |
221 unsigned DestReg, int FrameIndex, | 438 unsigned DestReg, int FrameIndex, |
222 const TargetRegisterClass *RC, | 439 const TargetRegisterClass *RC, |
223 const TargetRegisterInfo *TRI) const { | 440 const TargetRegisterInfo *TRI) const { |
224 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); | 441 MachineFunction *MF = MBB.getParent(); |
225 SIMachineFunctionInfo *MFI = MBB.getParent()->getInfo<SIMachineFunctionInfo>(); | 442 MachineFrameInfo *FrameInfo = MF->getFrameInfo(); |
226 DebugLoc DL = MBB.findDebugLoc(MI); | 443 DebugLoc DL = MBB.findDebugLoc(MI); |
227 if (TRI->getCommonSubClass(RC, &AMDGPU::SReg_32RegClass)) { | 444 |
228 SIMachineFunctionInfo::SpilledReg Spill = | 445 if (RI.hasVGPRs(RC)) { |
229 MFI->SpillTracker.getSpilledReg(FrameIndex); | 446 LLVMContext &Ctx = MF->getFunction()->getContext(); |
230 assert(Spill.VGPR); | 447 Ctx.emitError("SIInstrInfo::loadRegToStackSlot - Can't retrieve spilled VGPR!"); |
231 BuildMI(MBB, MI, DL, get(AMDGPU::V_READLANE_B32), DestReg) | 448 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg) |
232 .addReg(Spill.VGPR) | 449 .addImm(0); |
233 .addImm(Spill.Lane); | 450 } else if (RI.isSGPRClass(RC)){ |
451 unsigned Opcode; | |
452 switch(RC->getSize() * 8) { | |
453 case 32: Opcode = AMDGPU::SI_SPILL_S32_RESTORE; break; | |
454 case 64: Opcode = AMDGPU::SI_SPILL_S64_RESTORE; break; | |
455 case 128: Opcode = AMDGPU::SI_SPILL_S128_RESTORE; break; | |
456 case 256: Opcode = AMDGPU::SI_SPILL_S256_RESTORE; break; | |
457 case 512: Opcode = AMDGPU::SI_SPILL_S512_RESTORE; break; | |
458 default: llvm_unreachable("Cannot spill register class"); | |
459 } | |
460 | |
461 FrameInfo->setObjectAlignment(FrameIndex, 4); | |
462 BuildMI(MBB, MI, DL, get(Opcode), DestReg) | |
463 .addFrameIndex(FrameIndex); | |
234 } else { | 464 } else { |
235 for (unsigned i = 0, e = RC->getSize() / 4; i != e; ++i) { | 465 llvm_unreachable("VGPR spilling not supported"); |
236 unsigned Flags = RegState::Define; | 466 } |
237 if (i == 0) { | 467 } |
238 Flags |= RegState::Undef; | 468 |
239 } | 469 void SIInstrInfo::insertNOPs(MachineBasicBlock::iterator MI, |
240 unsigned SubReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); | 470 int Count) const { |
241 loadRegFromStackSlot(MBB, MI, SubReg, FrameIndex + i, | 471 while (Count > 0) { |
242 &AMDGPU::SReg_32RegClass, TRI); | 472 int Arg; |
243 BuildMI(MBB, MI, DL, get(AMDGPU::COPY)) | 473 if (Count >= 8) |
244 .addReg(DestReg, Flags, RI.getSubRegFromChannel(i)) | 474 Arg = 7; |
245 .addReg(SubReg); | 475 else |
246 } | 476 Arg = Count - 1; |
247 } | 477 Count -= 8; |
478 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(AMDGPU::S_NOP)) | |
479 .addImm(Arg); | |
480 } | |
481 } | |
482 | |
483 bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { | |
484 MachineBasicBlock &MBB = *MI->getParent(); | |
485 DebugLoc DL = MBB.findDebugLoc(MI); | |
486 switch (MI->getOpcode()) { | |
487 default: return AMDGPUInstrInfo::expandPostRAPseudo(MI); | |
488 | |
489 case AMDGPU::SI_CONSTDATA_PTR: { | |
490 unsigned Reg = MI->getOperand(0).getReg(); | |
491 unsigned RegLo = RI.getSubReg(Reg, AMDGPU::sub0); | |
492 unsigned RegHi = RI.getSubReg(Reg, AMDGPU::sub1); | |
493 | |
494 BuildMI(MBB, MI, DL, get(AMDGPU::S_GETPC_B64), Reg); | |
495 | |
496 // Add 32-bit offset from this instruction to the start of the constant data. | |
497 BuildMI(MBB, MI, DL, get(AMDGPU::S_ADD_U32), RegLo) | |
498 .addReg(RegLo) | |
499 .addTargetIndex(AMDGPU::TI_CONSTDATA_START) | |
500 .addReg(AMDGPU::SCC, RegState::Define | RegState::Implicit); | |
501 BuildMI(MBB, MI, DL, get(AMDGPU::S_ADDC_U32), RegHi) | |
502 .addReg(RegHi) | |
503 .addImm(0) | |
504 .addReg(AMDGPU::SCC, RegState::Define | RegState::Implicit) | |
505 .addReg(AMDGPU::SCC, RegState::Implicit); | |
506 MI->eraseFromParent(); | |
507 break; | |
508 } | |
509 } | |
510 return true; | |
248 } | 511 } |
249 | 512 |
250 MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI, | 513 MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI, |
251 bool NewMI) const { | 514 bool NewMI) const { |
252 | 515 |
253 MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); | |
254 if (MI->getNumOperands() < 3 || !MI->getOperand(1).isReg()) | 516 if (MI->getNumOperands() < 3 || !MI->getOperand(1).isReg()) |
255 return 0; | 517 return nullptr; |
256 | 518 |
257 // Cannot commute VOP2 if src0 is SGPR. | 519 // Make sure it s legal to commute operands for VOP2. |
258 if (isVOP2(MI->getOpcode()) && MI->getOperand(1).isReg() && | 520 if (isVOP2(MI->getOpcode()) && |
259 RI.isSGPRClass(MRI.getRegClass(MI->getOperand(1).getReg()))) | 521 (!isOperandLegal(MI, 1, &MI->getOperand(2)) || |
260 return 0; | 522 !isOperandLegal(MI, 2, &MI->getOperand(1)))) |
523 return nullptr; | |
261 | 524 |
262 if (!MI->getOperand(2).isReg()) { | 525 if (!MI->getOperand(2).isReg()) { |
263 // XXX: Commute instructions with FPImm operands | 526 // XXX: Commute instructions with FPImm operands |
264 if (NewMI || MI->getOperand(2).isFPImm() || | 527 if (NewMI || MI->getOperand(2).isFPImm() || |
265 (!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode()))) { | 528 (!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode()))) { |
266 return 0; | 529 return nullptr; |
267 } | 530 } |
268 | 531 |
269 // XXX: Commute VOP3 instructions with abs and neg set. | 532 // XXX: Commute VOP3 instructions with abs and neg set . |
270 if (isVOP3(MI->getOpcode()) && | 533 const MachineOperand *Abs = getNamedOperand(*MI, AMDGPU::OpName::abs); |
271 (MI->getOperand(AMDGPU::getNamedOperandIdx(MI->getOpcode(), | 534 const MachineOperand *Neg = getNamedOperand(*MI, AMDGPU::OpName::neg); |
272 AMDGPU::OpName::abs)).getImm() || | 535 const MachineOperand *Src0Mods = getNamedOperand(*MI, |
273 MI->getOperand(AMDGPU::getNamedOperandIdx(MI->getOpcode(), | 536 AMDGPU::OpName::src0_modifiers); |
274 AMDGPU::OpName::neg)).getImm())) | 537 const MachineOperand *Src1Mods = getNamedOperand(*MI, |
275 return 0; | 538 AMDGPU::OpName::src1_modifiers); |
539 const MachineOperand *Src2Mods = getNamedOperand(*MI, | |
540 AMDGPU::OpName::src2_modifiers); | |
541 | |
542 if ((Abs && Abs->getImm()) || (Neg && Neg->getImm()) || | |
543 (Src0Mods && Src0Mods->getImm()) || (Src1Mods && Src1Mods->getImm()) || | |
544 (Src2Mods && Src2Mods->getImm())) | |
545 return nullptr; | |
276 | 546 |
277 unsigned Reg = MI->getOperand(1).getReg(); | 547 unsigned Reg = MI->getOperand(1).getReg(); |
548 unsigned SubReg = MI->getOperand(1).getSubReg(); | |
278 MI->getOperand(1).ChangeToImmediate(MI->getOperand(2).getImm()); | 549 MI->getOperand(1).ChangeToImmediate(MI->getOperand(2).getImm()); |
279 MI->getOperand(2).ChangeToRegister(Reg, false); | 550 MI->getOperand(2).ChangeToRegister(Reg, false); |
551 MI->getOperand(2).setSubReg(SubReg); | |
280 } else { | 552 } else { |
281 MI = TargetInstrInfo::commuteInstruction(MI, NewMI); | 553 MI = TargetInstrInfo::commuteInstruction(MI, NewMI); |
282 } | 554 } |
283 | 555 |
284 if (MI) | 556 if (MI) |
309 bool | 581 bool |
310 SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { | 582 SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { |
311 return RC != &AMDGPU::EXECRegRegClass; | 583 return RC != &AMDGPU::EXECRegRegClass; |
312 } | 584 } |
313 | 585 |
314 int SIInstrInfo::isMIMG(uint16_t Opcode) const { | 586 bool |
587 SIInstrInfo::isTriviallyReMaterializable(const MachineInstr *MI, | |
588 AliasAnalysis *AA) const { | |
589 switch(MI->getOpcode()) { | |
590 default: return AMDGPUInstrInfo::isTriviallyReMaterializable(MI, AA); | |
591 case AMDGPU::S_MOV_B32: | |
592 case AMDGPU::S_MOV_B64: | |
593 case AMDGPU::V_MOV_B32_e32: | |
594 return MI->getOperand(1).isImm(); | |
595 } | |
596 } | |
597 | |
598 namespace llvm { | |
599 namespace AMDGPU { | |
600 // Helper function generated by tablegen. We are wrapping this with | |
601 // an SIInstrInfo function that returns bool rather than int. | |
602 int isDS(uint16_t Opcode); | |
603 } | |
604 } | |
605 | |
606 bool SIInstrInfo::isDS(uint16_t Opcode) const { | |
607 return ::AMDGPU::isDS(Opcode) != -1; | |
608 } | |
609 | |
610 bool SIInstrInfo::isMIMG(uint16_t Opcode) const { | |
315 return get(Opcode).TSFlags & SIInstrFlags::MIMG; | 611 return get(Opcode).TSFlags & SIInstrFlags::MIMG; |
316 } | 612 } |
317 | 613 |
318 int SIInstrInfo::isSMRD(uint16_t Opcode) const { | 614 bool SIInstrInfo::isSMRD(uint16_t Opcode) const { |
319 return get(Opcode).TSFlags & SIInstrFlags::SMRD; | 615 return get(Opcode).TSFlags & SIInstrFlags::SMRD; |
616 } | |
617 | |
618 bool SIInstrInfo::isMUBUF(uint16_t Opcode) const { | |
619 return get(Opcode).TSFlags & SIInstrFlags::MUBUF; | |
620 } | |
621 | |
622 bool SIInstrInfo::isMTBUF(uint16_t Opcode) const { | |
623 return get(Opcode).TSFlags & SIInstrFlags::MTBUF; | |
320 } | 624 } |
321 | 625 |
322 bool SIInstrInfo::isVOP1(uint16_t Opcode) const { | 626 bool SIInstrInfo::isVOP1(uint16_t Opcode) const { |
323 return get(Opcode).TSFlags & SIInstrFlags::VOP1; | 627 return get(Opcode).TSFlags & SIInstrFlags::VOP1; |
324 } | 628 } |
337 | 641 |
338 bool SIInstrInfo::isSALUInstr(const MachineInstr &MI) const { | 642 bool SIInstrInfo::isSALUInstr(const MachineInstr &MI) const { |
339 return get(MI.getOpcode()).TSFlags & SIInstrFlags::SALU; | 643 return get(MI.getOpcode()).TSFlags & SIInstrFlags::SALU; |
340 } | 644 } |
341 | 645 |
646 bool SIInstrInfo::isInlineConstant(const APInt &Imm) const { | |
647 int32_t Val = Imm.getSExtValue(); | |
648 if (Val >= -16 && Val <= 64) | |
649 return true; | |
650 | |
651 // The actual type of the operand does not seem to matter as long | |
652 // as the bits match one of the inline immediate values. For example: | |
653 // | |
654 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal, | |
655 // so it is a legal inline immediate. | |
656 // | |
657 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in | |
658 // floating-point, so it is a legal inline immediate. | |
659 | |
660 return (APInt::floatToBits(0.0f) == Imm) || | |
661 (APInt::floatToBits(1.0f) == Imm) || | |
662 (APInt::floatToBits(-1.0f) == Imm) || | |
663 (APInt::floatToBits(0.5f) == Imm) || | |
664 (APInt::floatToBits(-0.5f) == Imm) || | |
665 (APInt::floatToBits(2.0f) == Imm) || | |
666 (APInt::floatToBits(-2.0f) == Imm) || | |
667 (APInt::floatToBits(4.0f) == Imm) || | |
668 (APInt::floatToBits(-4.0f) == Imm); | |
669 } | |
670 | |
342 bool SIInstrInfo::isInlineConstant(const MachineOperand &MO) const { | 671 bool SIInstrInfo::isInlineConstant(const MachineOperand &MO) const { |
343 if(MO.isImm()) { | 672 if (MO.isImm()) |
344 return MO.getImm() >= -16 && MO.getImm() <= 64; | 673 return isInlineConstant(APInt(32, MO.getImm(), true)); |
345 } | 674 |
346 if (MO.isFPImm()) { | 675 if (MO.isFPImm()) { |
347 return MO.getFPImm()->isExactlyValue(0.0) || | 676 APFloat FpImm = MO.getFPImm()->getValueAPF(); |
348 MO.getFPImm()->isExactlyValue(0.5) || | 677 return isInlineConstant(FpImm.bitcastToAPInt()); |
349 MO.getFPImm()->isExactlyValue(-0.5) || | 678 } |
350 MO.getFPImm()->isExactlyValue(1.0) || | 679 |
351 MO.getFPImm()->isExactlyValue(-1.0) || | |
352 MO.getFPImm()->isExactlyValue(2.0) || | |
353 MO.getFPImm()->isExactlyValue(-2.0) || | |
354 MO.getFPImm()->isExactlyValue(4.0) || | |
355 MO.getFPImm()->isExactlyValue(-4.0); | |
356 } | |
357 return false; | 680 return false; |
358 } | 681 } |
359 | 682 |
360 bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO) const { | 683 bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO) const { |
361 return (MO.isImm() || MO.isFPImm()) && !isInlineConstant(MO); | 684 return (MO.isImm() || MO.isFPImm()) && !isInlineConstant(MO); |
685 } | |
686 | |
687 static bool compareMachineOp(const MachineOperand &Op0, | |
688 const MachineOperand &Op1) { | |
689 if (Op0.getType() != Op1.getType()) | |
690 return false; | |
691 | |
692 switch (Op0.getType()) { | |
693 case MachineOperand::MO_Register: | |
694 return Op0.getReg() == Op1.getReg(); | |
695 case MachineOperand::MO_Immediate: | |
696 return Op0.getImm() == Op1.getImm(); | |
697 case MachineOperand::MO_FPImmediate: | |
698 return Op0.getFPImm() == Op1.getFPImm(); | |
699 default: | |
700 llvm_unreachable("Didn't expect to be comparing these operand types"); | |
701 } | |
702 } | |
703 | |
704 bool SIInstrInfo::isImmOperandLegal(const MachineInstr *MI, unsigned OpNo, | |
705 const MachineOperand &MO) const { | |
706 const MCOperandInfo &OpInfo = get(MI->getOpcode()).OpInfo[OpNo]; | |
707 | |
708 assert(MO.isImm() || MO.isFPImm()); | |
709 | |
710 if (OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE) | |
711 return true; | |
712 | |
713 if (OpInfo.RegClass < 0) | |
714 return false; | |
715 | |
716 return RI.regClassCanUseImmediate(OpInfo.RegClass); | |
717 } | |
718 | |
719 bool SIInstrInfo::canFoldOffset(unsigned OffsetSize, unsigned AS) { | |
720 switch (AS) { | |
721 case AMDGPUAS::GLOBAL_ADDRESS: { | |
722 // MUBUF instructions a 12-bit offset in bytes. | |
723 return isUInt<12>(OffsetSize); | |
724 } | |
725 case AMDGPUAS::CONSTANT_ADDRESS: { | |
726 // SMRD instructions have an 8-bit offset in dwords. | |
727 return (OffsetSize % 4 == 0) && isUInt<8>(OffsetSize / 4); | |
728 } | |
729 case AMDGPUAS::LOCAL_ADDRESS: | |
730 case AMDGPUAS::REGION_ADDRESS: { | |
731 // The single offset versions have a 16-bit offset in bytes. | |
732 return isUInt<16>(OffsetSize); | |
733 } | |
734 case AMDGPUAS::PRIVATE_ADDRESS: | |
735 // Indirect register addressing does not use any offsets. | |
736 default: | |
737 return 0; | |
738 } | |
739 } | |
740 | |
741 bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const { | |
742 return AMDGPU::getVOPe32(Opcode) != -1; | |
743 } | |
744 | |
745 bool SIInstrInfo::hasModifiers(unsigned Opcode) const { | |
746 // The src0_modifier operand is present on all instructions | |
747 // that have modifiers. | |
748 | |
749 return AMDGPU::getNamedOperandIdx(Opcode, | |
750 AMDGPU::OpName::src0_modifiers) != -1; | |
362 } | 751 } |
363 | 752 |
364 bool SIInstrInfo::verifyInstruction(const MachineInstr *MI, | 753 bool SIInstrInfo::verifyInstruction(const MachineInstr *MI, |
365 StringRef &ErrInfo) const { | 754 StringRef &ErrInfo) const { |
366 uint16_t Opcode = MI->getOpcode(); | 755 uint16_t Opcode = MI->getOpcode(); |
367 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); | 756 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); |
368 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); | 757 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); |
369 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); | 758 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); |
759 | |
760 // Make sure the number of operands is correct. | |
761 const MCInstrDesc &Desc = get(Opcode); | |
762 if (!Desc.isVariadic() && | |
763 Desc.getNumOperands() != MI->getNumExplicitOperands()) { | |
764 ErrInfo = "Instruction has wrong number of operands."; | |
765 return false; | |
766 } | |
767 | |
768 // Make sure the register classes are correct | |
769 for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) { | |
770 switch (Desc.OpInfo[i].OperandType) { | |
771 case MCOI::OPERAND_REGISTER: { | |
772 int RegClass = Desc.OpInfo[i].RegClass; | |
773 if (!RI.regClassCanUseImmediate(RegClass) && | |
774 (MI->getOperand(i).isImm() || MI->getOperand(i).isFPImm())) { | |
775 // Handle some special cases: | |
776 // Src0 can of VOP1, VOP2, VOPC can be an immediate no matter what | |
777 // the register class. | |
778 if (i != Src0Idx || (!isVOP1(Opcode) && !isVOP2(Opcode) && | |
779 !isVOPC(Opcode))) { | |
780 ErrInfo = "Expected register, but got immediate"; | |
781 return false; | |
782 } | |
783 } | |
784 } | |
785 break; | |
786 case MCOI::OPERAND_IMMEDIATE: | |
787 // Check if this operand is an immediate. | |
788 // FrameIndex operands will be replaced by immediates, so they are | |
789 // allowed. | |
790 if (!MI->getOperand(i).isImm() && !MI->getOperand(i).isFPImm() && | |
791 !MI->getOperand(i).isFI()) { | |
792 ErrInfo = "Expected immediate, but got non-immediate"; | |
793 return false; | |
794 } | |
795 // Fall-through | |
796 default: | |
797 continue; | |
798 } | |
799 | |
800 if (!MI->getOperand(i).isReg()) | |
801 continue; | |
802 | |
803 int RegClass = Desc.OpInfo[i].RegClass; | |
804 if (RegClass != -1) { | |
805 unsigned Reg = MI->getOperand(i).getReg(); | |
806 if (TargetRegisterInfo::isVirtualRegister(Reg)) | |
807 continue; | |
808 | |
809 const TargetRegisterClass *RC = RI.getRegClass(RegClass); | |
810 if (!RC->contains(Reg)) { | |
811 ErrInfo = "Operand has incorrect register class."; | |
812 return false; | |
813 } | |
814 } | |
815 } | |
816 | |
370 | 817 |
371 // Verify VOP* | 818 // Verify VOP* |
372 if (isVOP1(Opcode) || isVOP2(Opcode) || isVOP3(Opcode) || isVOPC(Opcode)) { | 819 if (isVOP1(Opcode) || isVOP2(Opcode) || isVOP3(Opcode) || isVOPC(Opcode)) { |
373 unsigned ConstantBusCount = 0; | 820 unsigned ConstantBusCount = 0; |
374 unsigned SGPRUsed = AMDGPU::NoRegister; | 821 unsigned SGPRUsed = AMDGPU::NoRegister; |
424 if (Src2Idx != -1 && isLiteralConstant(MI->getOperand(Src2Idx))) { | 871 if (Src2Idx != -1 && isLiteralConstant(MI->getOperand(Src2Idx))) { |
425 ErrInfo = "VOP3 src2 cannot be a literal constant."; | 872 ErrInfo = "VOP3 src2 cannot be a literal constant."; |
426 return false; | 873 return false; |
427 } | 874 } |
428 } | 875 } |
876 | |
877 // Verify misc. restrictions on specific instructions. | |
878 if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 || | |
879 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) { | |
880 MI->dump(); | |
881 | |
882 const MachineOperand &Src0 = MI->getOperand(2); | |
883 const MachineOperand &Src1 = MI->getOperand(3); | |
884 const MachineOperand &Src2 = MI->getOperand(4); | |
885 if (Src0.isReg() && Src1.isReg() && Src2.isReg()) { | |
886 if (!compareMachineOp(Src0, Src1) && | |
887 !compareMachineOp(Src0, Src2)) { | |
888 ErrInfo = "v_div_scale_{f32|f64} require src0 = src1 or src2"; | |
889 return false; | |
890 } | |
891 } | |
892 } | |
893 | |
429 return true; | 894 return true; |
430 } | 895 } |
431 | 896 |
432 unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) { | 897 unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) { |
433 switch (MI.getOpcode()) { | 898 switch (MI.getOpcode()) { |
434 default: return AMDGPU::INSTRUCTION_LIST_END; | 899 default: return AMDGPU::INSTRUCTION_LIST_END; |
435 case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE; | 900 case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE; |
436 case AMDGPU::COPY: return AMDGPU::COPY; | 901 case AMDGPU::COPY: return AMDGPU::COPY; |
437 case AMDGPU::PHI: return AMDGPU::PHI; | 902 case AMDGPU::PHI: return AMDGPU::PHI; |
438 case AMDGPU::S_ADD_I32: return AMDGPU::V_ADD_I32_e32; | 903 case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG; |
904 case AMDGPU::S_MOV_B32: | |
905 return MI.getOperand(1).isReg() ? | |
906 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32; | |
907 case AMDGPU::S_ADD_I32: | |
908 case AMDGPU::S_ADD_U32: return AMDGPU::V_ADD_I32_e32; | |
439 case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32; | 909 case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32; |
440 case AMDGPU::S_SUB_I32: return AMDGPU::V_SUB_I32_e32; | 910 case AMDGPU::S_SUB_I32: |
911 case AMDGPU::S_SUB_U32: return AMDGPU::V_SUB_I32_e32; | |
441 case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32; | 912 case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32; |
913 case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_I32; | |
914 case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e32; | |
915 case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e32; | |
916 case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e32; | |
917 case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e32; | |
918 case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e32; | |
919 case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e32; | |
920 case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e32; | |
442 case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32; | 921 case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32; |
443 case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64; | 922 case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64; |
444 case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32; | 923 case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32; |
445 case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64; | 924 case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64; |
446 case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32; | 925 case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32; |
447 case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64; | 926 case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64; |
927 case AMDGPU::S_SEXT_I32_I8: return AMDGPU::V_BFE_I32; | |
928 case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32; | |
929 case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32; | |
930 case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32; | |
931 case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32; | |
932 case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32; | |
933 case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32; | |
934 case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32; | |
935 case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32; | |
936 case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e32; | |
937 case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e32; | |
938 case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32; | |
939 case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32; | |
940 case AMDGPU::S_LOAD_DWORD_IMM: | |
941 case AMDGPU::S_LOAD_DWORD_SGPR: return AMDGPU::BUFFER_LOAD_DWORD_ADDR64; | |
942 case AMDGPU::S_LOAD_DWORDX2_IMM: | |
943 case AMDGPU::S_LOAD_DWORDX2_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64; | |
944 case AMDGPU::S_LOAD_DWORDX4_IMM: | |
945 case AMDGPU::S_LOAD_DWORDX4_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64; | |
946 case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e32; | |
947 case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32; | |
948 case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32; | |
448 } | 949 } |
449 } | 950 } |
450 | 951 |
451 bool SIInstrInfo::isSALUOpSupportedOnVALU(const MachineInstr &MI) const { | 952 bool SIInstrInfo::isSALUOpSupportedOnVALU(const MachineInstr &MI) const { |
452 return getVALUOp(MI) != AMDGPU::INSTRUCTION_LIST_END; | 953 return getVALUOp(MI) != AMDGPU::INSTRUCTION_LIST_END; |
466 | 967 |
467 bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const { | 968 bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const { |
468 switch (MI.getOpcode()) { | 969 switch (MI.getOpcode()) { |
469 case AMDGPU::COPY: | 970 case AMDGPU::COPY: |
470 case AMDGPU::REG_SEQUENCE: | 971 case AMDGPU::REG_SEQUENCE: |
972 case AMDGPU::PHI: | |
973 case AMDGPU::INSERT_SUBREG: | |
471 return RI.hasVGPRs(getOpRegClass(MI, 0)); | 974 return RI.hasVGPRs(getOpRegClass(MI, 0)); |
472 default: | 975 default: |
473 return RI.hasVGPRs(getOpRegClass(MI, OpNo)); | 976 return RI.hasVGPRs(getOpRegClass(MI, OpNo)); |
474 } | 977 } |
475 } | 978 } |
486 } else if (RI.isSGPRClass(RC)) { | 989 } else if (RI.isSGPRClass(RC)) { |
487 Opcode = AMDGPU::S_MOV_B32; | 990 Opcode = AMDGPU::S_MOV_B32; |
488 } | 991 } |
489 | 992 |
490 const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC); | 993 const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC); |
994 if (RI.getCommonSubClass(&AMDGPU::VReg_64RegClass, VRC)) { | |
995 VRC = &AMDGPU::VReg_64RegClass; | |
996 } else { | |
997 VRC = &AMDGPU::VReg_32RegClass; | |
998 } | |
491 unsigned Reg = MRI.createVirtualRegister(VRC); | 999 unsigned Reg = MRI.createVirtualRegister(VRC); |
492 BuildMI(*MI->getParent(), I, MI->getParent()->findDebugLoc(I), get(Opcode), | 1000 BuildMI(*MI->getParent(), I, MI->getParent()->findDebugLoc(I), get(Opcode), |
493 Reg).addOperand(MO); | 1001 Reg).addOperand(MO); |
494 MO.ChangeToRegister(Reg, false); | 1002 MO.ChangeToRegister(Reg, false); |
495 } | 1003 } |
496 | 1004 |
1005 unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI, | |
1006 MachineRegisterInfo &MRI, | |
1007 MachineOperand &SuperReg, | |
1008 const TargetRegisterClass *SuperRC, | |
1009 unsigned SubIdx, | |
1010 const TargetRegisterClass *SubRC) | |
1011 const { | |
1012 assert(SuperReg.isReg()); | |
1013 | |
1014 unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC); | |
1015 unsigned SubReg = MRI.createVirtualRegister(SubRC); | |
1016 | |
1017 // Just in case the super register is itself a sub-register, copy it to a new | |
1018 // value so we don't need to worry about merging its subreg index with the | |
1019 // SubIdx passed to this function. The register coalescer should be able to | |
1020 // eliminate this extra copy. | |
1021 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(TargetOpcode::COPY), | |
1022 NewSuperReg) | |
1023 .addOperand(SuperReg); | |
1024 | |
1025 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(TargetOpcode::COPY), | |
1026 SubReg) | |
1027 .addReg(NewSuperReg, 0, SubIdx); | |
1028 return SubReg; | |
1029 } | |
1030 | |
1031 MachineOperand SIInstrInfo::buildExtractSubRegOrImm( | |
1032 MachineBasicBlock::iterator MII, | |
1033 MachineRegisterInfo &MRI, | |
1034 MachineOperand &Op, | |
1035 const TargetRegisterClass *SuperRC, | |
1036 unsigned SubIdx, | |
1037 const TargetRegisterClass *SubRC) const { | |
1038 if (Op.isImm()) { | |
1039 // XXX - Is there a better way to do this? | |
1040 if (SubIdx == AMDGPU::sub0) | |
1041 return MachineOperand::CreateImm(Op.getImm() & 0xFFFFFFFF); | |
1042 if (SubIdx == AMDGPU::sub1) | |
1043 return MachineOperand::CreateImm(Op.getImm() >> 32); | |
1044 | |
1045 llvm_unreachable("Unhandled register index for immediate"); | |
1046 } | |
1047 | |
1048 unsigned SubReg = buildExtractSubReg(MII, MRI, Op, SuperRC, | |
1049 SubIdx, SubRC); | |
1050 return MachineOperand::CreateReg(SubReg, false); | |
1051 } | |
1052 | |
1053 unsigned SIInstrInfo::split64BitImm(SmallVectorImpl<MachineInstr *> &Worklist, | |
1054 MachineBasicBlock::iterator MI, | |
1055 MachineRegisterInfo &MRI, | |
1056 const TargetRegisterClass *RC, | |
1057 const MachineOperand &Op) const { | |
1058 MachineBasicBlock *MBB = MI->getParent(); | |
1059 DebugLoc DL = MI->getDebugLoc(); | |
1060 unsigned LoDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); | |
1061 unsigned HiDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); | |
1062 unsigned Dst = MRI.createVirtualRegister(RC); | |
1063 | |
1064 MachineInstr *Lo = BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32), | |
1065 LoDst) | |
1066 .addImm(Op.getImm() & 0xFFFFFFFF); | |
1067 MachineInstr *Hi = BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32), | |
1068 HiDst) | |
1069 .addImm(Op.getImm() >> 32); | |
1070 | |
1071 BuildMI(*MBB, MI, DL, get(TargetOpcode::REG_SEQUENCE), Dst) | |
1072 .addReg(LoDst) | |
1073 .addImm(AMDGPU::sub0) | |
1074 .addReg(HiDst) | |
1075 .addImm(AMDGPU::sub1); | |
1076 | |
1077 Worklist.push_back(Lo); | |
1078 Worklist.push_back(Hi); | |
1079 | |
1080 return Dst; | |
1081 } | |
1082 | |
1083 bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx, | |
1084 const MachineOperand *MO) const { | |
1085 const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); | |
1086 const MCInstrDesc &InstDesc = get(MI->getOpcode()); | |
1087 const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx]; | |
1088 const TargetRegisterClass *DefinedRC = | |
1089 OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) : nullptr; | |
1090 if (!MO) | |
1091 MO = &MI->getOperand(OpIdx); | |
1092 | |
1093 if (MO->isReg()) { | |
1094 assert(DefinedRC); | |
1095 const TargetRegisterClass *RC = MRI.getRegClass(MO->getReg()); | |
1096 return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)); | |
1097 } | |
1098 | |
1099 | |
1100 // Handle non-register types that are treated like immediates. | |
1101 assert(MO->isImm() || MO->isFPImm() || MO->isTargetIndex() || MO->isFI()); | |
1102 | |
1103 if (!DefinedRC) | |
1104 // This opperand expects an immediate | |
1105 return true; | |
1106 | |
1107 return RI.regClassCanUseImmediate(DefinedRC); | |
1108 } | |
1109 | |
497 void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { | 1110 void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { |
498 MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); | 1111 MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); |
1112 | |
499 int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), | 1113 int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), |
500 AMDGPU::OpName::src0); | 1114 AMDGPU::OpName::src0); |
501 int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), | 1115 int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), |
502 AMDGPU::OpName::src1); | 1116 AMDGPU::OpName::src1); |
503 int Src2Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), | 1117 int Src2Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), |
504 AMDGPU::OpName::src2); | 1118 AMDGPU::OpName::src2); |
505 | 1119 |
506 // Legalize VOP2 | 1120 // Legalize VOP2 |
507 if (isVOP2(MI->getOpcode()) && Src1Idx != -1) { | 1121 if (isVOP2(MI->getOpcode()) && Src1Idx != -1) { |
508 MachineOperand &Src0 = MI->getOperand(Src0Idx); | 1122 // Legalize src0 |
509 MachineOperand &Src1 = MI->getOperand(Src1Idx); | 1123 if (!isOperandLegal(MI, Src0Idx)) |
510 | |
511 // If the instruction implicitly reads VCC, we can't have any SGPR operands, | |
512 // so move any. | |
513 bool ReadsVCC = MI->readsRegister(AMDGPU::VCC, &RI); | |
514 if (ReadsVCC && Src0.isReg() && | |
515 RI.isSGPRClass(MRI.getRegClass(Src0.getReg()))) { | |
516 legalizeOpWithMove(MI, Src0Idx); | 1124 legalizeOpWithMove(MI, Src0Idx); |
1125 | |
1126 // Legalize src1 | |
1127 if (isOperandLegal(MI, Src1Idx)) | |
517 return; | 1128 return; |
518 } | 1129 |
519 | 1130 // Usually src0 of VOP2 instructions allow more types of inputs |
520 if (ReadsVCC && Src1.isReg() && | 1131 // than src1, so try to commute the instruction to decrease our |
521 RI.isSGPRClass(MRI.getRegClass(Src1.getReg()))) { | 1132 // chances of having to insert a MOV instruction to legalize src1. |
522 legalizeOpWithMove(MI, Src1Idx); | 1133 if (MI->isCommutable()) { |
523 return; | 1134 if (commuteInstruction(MI)) |
524 } | 1135 // If we are successful in commuting, then we know MI is legal, so |
525 | 1136 // we are done. |
526 // Legalize VOP2 instructions where src1 is not a VGPR. An SGPR input must | 1137 return; |
527 // be the first operand, and there can only be one. | 1138 } |
528 if (Src1.isImm() || Src1.isFPImm() || | 1139 |
529 (Src1.isReg() && RI.isSGPRClass(MRI.getRegClass(Src1.getReg())))) { | 1140 legalizeOpWithMove(MI, Src1Idx); |
530 if (MI->isCommutable()) { | 1141 return; |
531 if (commuteInstruction(MI)) | |
532 return; | |
533 } | |
534 legalizeOpWithMove(MI, Src1Idx); | |
535 } | |
536 } | 1142 } |
537 | 1143 |
538 // XXX - Do any VOP3 instructions read VCC? | 1144 // XXX - Do any VOP3 instructions read VCC? |
539 // Legalize VOP3 | 1145 // Legalize VOP3 |
540 if (isVOP3(MI->getOpcode())) { | 1146 if (isVOP3(MI->getOpcode())) { |
566 // legalize it. | 1172 // legalize it. |
567 legalizeOpWithMove(MI, Idx); | 1173 legalizeOpWithMove(MI, Idx); |
568 } | 1174 } |
569 } | 1175 } |
570 | 1176 |
571 // Legalize REG_SEQUENCE | 1177 // Legalize REG_SEQUENCE and PHI |
572 // The register class of the operands much be the same type as the register | 1178 // The register class of the operands much be the same type as the register |
573 // class of the output. | 1179 // class of the output. |
574 if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) { | 1180 if (MI->getOpcode() == AMDGPU::REG_SEQUENCE || |
575 const TargetRegisterClass *RC = NULL, *SRC = NULL, *VRC = NULL; | 1181 MI->getOpcode() == AMDGPU::PHI) { |
1182 const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr; | |
576 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) { | 1183 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) { |
577 if (!MI->getOperand(i).isReg() || | 1184 if (!MI->getOperand(i).isReg() || |
578 !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg())) | 1185 !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg())) |
579 continue; | 1186 continue; |
580 const TargetRegisterClass *OpRC = | 1187 const TargetRegisterClass *OpRC = |
603 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) { | 1210 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) { |
604 if (!MI->getOperand(i).isReg() || | 1211 if (!MI->getOperand(i).isReg() || |
605 !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg())) | 1212 !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg())) |
606 continue; | 1213 continue; |
607 unsigned DstReg = MRI.createVirtualRegister(RC); | 1214 unsigned DstReg = MRI.createVirtualRegister(RC); |
608 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), | 1215 MachineBasicBlock *InsertBB; |
1216 MachineBasicBlock::iterator Insert; | |
1217 if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) { | |
1218 InsertBB = MI->getParent(); | |
1219 Insert = MI; | |
1220 } else { | |
1221 // MI is a PHI instruction. | |
1222 InsertBB = MI->getOperand(i + 1).getMBB(); | |
1223 Insert = InsertBB->getFirstTerminator(); | |
1224 } | |
1225 BuildMI(*InsertBB, Insert, MI->getDebugLoc(), | |
609 get(AMDGPU::COPY), DstReg) | 1226 get(AMDGPU::COPY), DstReg) |
610 .addOperand(MI->getOperand(i)); | 1227 .addOperand(MI->getOperand(i)); |
611 MI->getOperand(i).setReg(DstReg); | 1228 MI->getOperand(i).setReg(DstReg); |
612 } | 1229 } |
613 } | 1230 } |
1231 | |
1232 // Legalize INSERT_SUBREG | |
1233 // src0 must have the same register class as dst | |
1234 if (MI->getOpcode() == AMDGPU::INSERT_SUBREG) { | |
1235 unsigned Dst = MI->getOperand(0).getReg(); | |
1236 unsigned Src0 = MI->getOperand(1).getReg(); | |
1237 const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); | |
1238 const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0); | |
1239 if (DstRC != Src0RC) { | |
1240 MachineBasicBlock &MBB = *MI->getParent(); | |
1241 unsigned NewSrc0 = MRI.createVirtualRegister(DstRC); | |
1242 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::COPY), NewSrc0) | |
1243 .addReg(Src0); | |
1244 MI->getOperand(1).setReg(NewSrc0); | |
1245 } | |
1246 return; | |
1247 } | |
1248 | |
1249 // Legalize MUBUF* instructions | |
1250 // FIXME: If we start using the non-addr64 instructions for compute, we | |
1251 // may need to legalize them here. | |
1252 int SRsrcIdx = | |
1253 AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::srsrc); | |
1254 if (SRsrcIdx != -1) { | |
1255 // We have an MUBUF instruction | |
1256 MachineOperand *SRsrc = &MI->getOperand(SRsrcIdx); | |
1257 unsigned SRsrcRC = get(MI->getOpcode()).OpInfo[SRsrcIdx].RegClass; | |
1258 if (RI.getCommonSubClass(MRI.getRegClass(SRsrc->getReg()), | |
1259 RI.getRegClass(SRsrcRC))) { | |
1260 // The operands are legal. | |
1261 // FIXME: We may need to legalize operands besided srsrc. | |
1262 return; | |
1263 } | |
1264 | |
1265 MachineBasicBlock &MBB = *MI->getParent(); | |
1266 // Extract the the ptr from the resource descriptor. | |
1267 | |
1268 // SRsrcPtrLo = srsrc:sub0 | |
1269 unsigned SRsrcPtrLo = buildExtractSubReg(MI, MRI, *SRsrc, | |
1270 &AMDGPU::VReg_128RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass); | |
1271 | |
1272 // SRsrcPtrHi = srsrc:sub1 | |
1273 unsigned SRsrcPtrHi = buildExtractSubReg(MI, MRI, *SRsrc, | |
1274 &AMDGPU::VReg_128RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass); | |
1275 | |
1276 // Create an empty resource descriptor | |
1277 unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); | |
1278 unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); | |
1279 unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); | |
1280 unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass); | |
1281 | |
1282 // Zero64 = 0 | |
1283 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64), | |
1284 Zero64) | |
1285 .addImm(0); | |
1286 | |
1287 // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0} | |
1288 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), | |
1289 SRsrcFormatLo) | |
1290 .addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF); | |
1291 | |
1292 // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32} | |
1293 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), | |
1294 SRsrcFormatHi) | |
1295 .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32); | |
1296 | |
1297 // NewSRsrc = {Zero64, SRsrcFormat} | |
1298 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), | |
1299 NewSRsrc) | |
1300 .addReg(Zero64) | |
1301 .addImm(AMDGPU::sub0_sub1) | |
1302 .addReg(SRsrcFormatLo) | |
1303 .addImm(AMDGPU::sub2) | |
1304 .addReg(SRsrcFormatHi) | |
1305 .addImm(AMDGPU::sub3); | |
1306 | |
1307 MachineOperand *VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr); | |
1308 unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); | |
1309 unsigned NewVAddrLo; | |
1310 unsigned NewVAddrHi; | |
1311 if (VAddr) { | |
1312 // This is already an ADDR64 instruction so we need to add the pointer | |
1313 // extracted from the resource descriptor to the current value of VAddr. | |
1314 NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); | |
1315 NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); | |
1316 | |
1317 // NewVaddrLo = SRsrcPtrLo + VAddr:sub0 | |
1318 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADD_I32_e32), | |
1319 NewVAddrLo) | |
1320 .addReg(SRsrcPtrLo) | |
1321 .addReg(VAddr->getReg(), 0, AMDGPU::sub0) | |
1322 .addReg(AMDGPU::VCC, RegState::ImplicitDefine); | |
1323 | |
1324 // NewVaddrHi = SRsrcPtrHi + VAddr:sub1 | |
1325 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADDC_U32_e32), | |
1326 NewVAddrHi) | |
1327 .addReg(SRsrcPtrHi) | |
1328 .addReg(VAddr->getReg(), 0, AMDGPU::sub1) | |
1329 .addReg(AMDGPU::VCC, RegState::ImplicitDefine) | |
1330 .addReg(AMDGPU::VCC, RegState::Implicit); | |
1331 | |
1332 } else { | |
1333 // This instructions is the _OFFSET variant, so we need to convert it to | |
1334 // ADDR64. | |
1335 MachineOperand *VData = getNamedOperand(*MI, AMDGPU::OpName::vdata); | |
1336 MachineOperand *Offset = getNamedOperand(*MI, AMDGPU::OpName::offset); | |
1337 MachineOperand *SOffset = getNamedOperand(*MI, AMDGPU::OpName::soffset); | |
1338 assert(SOffset->isImm() && SOffset->getImm() == 0 && "Legalizing MUBUF " | |
1339 "with non-zero soffset is not implemented"); | |
1340 (void)SOffset; | |
1341 | |
1342 // Create the new instruction. | |
1343 unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI->getOpcode()); | |
1344 MachineInstr *Addr64 = | |
1345 BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode)) | |
1346 .addOperand(*VData) | |
1347 .addOperand(*SRsrc) | |
1348 .addReg(AMDGPU::NoRegister) // Dummy value for vaddr. | |
1349 // This will be replaced later | |
1350 // with the new value of vaddr. | |
1351 .addOperand(*Offset); | |
1352 | |
1353 MI->removeFromParent(); | |
1354 MI = Addr64; | |
1355 | |
1356 NewVAddrLo = SRsrcPtrLo; | |
1357 NewVAddrHi = SRsrcPtrHi; | |
1358 VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr); | |
1359 SRsrc = getNamedOperand(*MI, AMDGPU::OpName::srsrc); | |
1360 } | |
1361 | |
1362 // NewVaddr = {NewVaddrHi, NewVaddrLo} | |
1363 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), | |
1364 NewVAddr) | |
1365 .addReg(NewVAddrLo) | |
1366 .addImm(AMDGPU::sub0) | |
1367 .addReg(NewVAddrHi) | |
1368 .addImm(AMDGPU::sub1); | |
1369 | |
1370 | |
1371 // Update the instruction to use NewVaddr | |
1372 VAddr->setReg(NewVAddr); | |
1373 // Update the instruction to use NewSRsrc | |
1374 SRsrc->setReg(NewSRsrc); | |
1375 } | |
1376 } | |
1377 | |
1378 void SIInstrInfo::splitSMRD(MachineInstr *MI, | |
1379 const TargetRegisterClass *HalfRC, | |
1380 unsigned HalfImmOp, unsigned HalfSGPROp, | |
1381 MachineInstr *&Lo, MachineInstr *&Hi) const { | |
1382 | |
1383 DebugLoc DL = MI->getDebugLoc(); | |
1384 MachineBasicBlock *MBB = MI->getParent(); | |
1385 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); | |
1386 unsigned RegLo = MRI.createVirtualRegister(HalfRC); | |
1387 unsigned RegHi = MRI.createVirtualRegister(HalfRC); | |
1388 unsigned HalfSize = HalfRC->getSize(); | |
1389 const MachineOperand *OffOp = | |
1390 getNamedOperand(*MI, AMDGPU::OpName::offset); | |
1391 const MachineOperand *SBase = getNamedOperand(*MI, AMDGPU::OpName::sbase); | |
1392 | |
1393 if (OffOp) { | |
1394 // Handle the _IMM variant | |
1395 unsigned LoOffset = OffOp->getImm(); | |
1396 unsigned HiOffset = LoOffset + (HalfSize / 4); | |
1397 Lo = BuildMI(*MBB, MI, DL, get(HalfImmOp), RegLo) | |
1398 .addOperand(*SBase) | |
1399 .addImm(LoOffset); | |
1400 | |
1401 if (!isUInt<8>(HiOffset)) { | |
1402 unsigned OffsetSGPR = | |
1403 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); | |
1404 BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32), OffsetSGPR) | |
1405 .addImm(HiOffset << 2); // The immediate offset is in dwords, | |
1406 // but offset in register is in bytes. | |
1407 Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegHi) | |
1408 .addOperand(*SBase) | |
1409 .addReg(OffsetSGPR); | |
1410 } else { | |
1411 Hi = BuildMI(*MBB, MI, DL, get(HalfImmOp), RegHi) | |
1412 .addOperand(*SBase) | |
1413 .addImm(HiOffset); | |
1414 } | |
1415 } else { | |
1416 // Handle the _SGPR variant | |
1417 MachineOperand *SOff = getNamedOperand(*MI, AMDGPU::OpName::soff); | |
1418 Lo = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegLo) | |
1419 .addOperand(*SBase) | |
1420 .addOperand(*SOff); | |
1421 unsigned OffsetSGPR = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); | |
1422 BuildMI(*MBB, MI, DL, get(AMDGPU::S_ADD_I32), OffsetSGPR) | |
1423 .addOperand(*SOff) | |
1424 .addImm(HalfSize); | |
1425 Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp)) | |
1426 .addOperand(*SBase) | |
1427 .addReg(OffsetSGPR); | |
1428 } | |
1429 | |
1430 unsigned SubLo, SubHi; | |
1431 switch (HalfSize) { | |
1432 case 4: | |
1433 SubLo = AMDGPU::sub0; | |
1434 SubHi = AMDGPU::sub1; | |
1435 break; | |
1436 case 8: | |
1437 SubLo = AMDGPU::sub0_sub1; | |
1438 SubHi = AMDGPU::sub2_sub3; | |
1439 break; | |
1440 case 16: | |
1441 SubLo = AMDGPU::sub0_sub1_sub2_sub3; | |
1442 SubHi = AMDGPU::sub4_sub5_sub6_sub7; | |
1443 break; | |
1444 case 32: | |
1445 SubLo = AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7; | |
1446 SubHi = AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15; | |
1447 break; | |
1448 default: | |
1449 llvm_unreachable("Unhandled HalfSize"); | |
1450 } | |
1451 | |
1452 BuildMI(*MBB, MI, DL, get(AMDGPU::REG_SEQUENCE)) | |
1453 .addOperand(MI->getOperand(0)) | |
1454 .addReg(RegLo) | |
1455 .addImm(SubLo) | |
1456 .addReg(RegHi) | |
1457 .addImm(SubHi); | |
1458 } | |
1459 | |
1460 void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) const { | |
1461 MachineBasicBlock *MBB = MI->getParent(); | |
1462 switch (MI->getOpcode()) { | |
1463 case AMDGPU::S_LOAD_DWORD_IMM: | |
1464 case AMDGPU::S_LOAD_DWORD_SGPR: | |
1465 case AMDGPU::S_LOAD_DWORDX2_IMM: | |
1466 case AMDGPU::S_LOAD_DWORDX2_SGPR: | |
1467 case AMDGPU::S_LOAD_DWORDX4_IMM: | |
1468 case AMDGPU::S_LOAD_DWORDX4_SGPR: { | |
1469 unsigned NewOpcode = getVALUOp(*MI); | |
1470 unsigned RegOffset; | |
1471 unsigned ImmOffset; | |
1472 | |
1473 if (MI->getOperand(2).isReg()) { | |
1474 RegOffset = MI->getOperand(2).getReg(); | |
1475 ImmOffset = 0; | |
1476 } else { | |
1477 assert(MI->getOperand(2).isImm()); | |
1478 // SMRD instructions take a dword offsets and MUBUF instructions | |
1479 // take a byte offset. | |
1480 ImmOffset = MI->getOperand(2).getImm() << 2; | |
1481 RegOffset = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); | |
1482 if (isUInt<12>(ImmOffset)) { | |
1483 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), | |
1484 RegOffset) | |
1485 .addImm(0); | |
1486 } else { | |
1487 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), | |
1488 RegOffset) | |
1489 .addImm(ImmOffset); | |
1490 ImmOffset = 0; | |
1491 } | |
1492 } | |
1493 | |
1494 unsigned SRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass); | |
1495 unsigned DWord0 = RegOffset; | |
1496 unsigned DWord1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); | |
1497 unsigned DWord2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); | |
1498 unsigned DWord3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); | |
1499 | |
1500 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord1) | |
1501 .addImm(0); | |
1502 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord2) | |
1503 .addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF); | |
1504 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord3) | |
1505 .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32); | |
1506 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), SRsrc) | |
1507 .addReg(DWord0) | |
1508 .addImm(AMDGPU::sub0) | |
1509 .addReg(DWord1) | |
1510 .addImm(AMDGPU::sub1) | |
1511 .addReg(DWord2) | |
1512 .addImm(AMDGPU::sub2) | |
1513 .addReg(DWord3) | |
1514 .addImm(AMDGPU::sub3); | |
1515 MI->setDesc(get(NewOpcode)); | |
1516 if (MI->getOperand(2).isReg()) { | |
1517 MI->getOperand(2).setReg(MI->getOperand(1).getReg()); | |
1518 } else { | |
1519 MI->getOperand(2).ChangeToRegister(MI->getOperand(1).getReg(), false); | |
1520 } | |
1521 MI->getOperand(1).setReg(SRsrc); | |
1522 MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(ImmOffset)); | |
1523 | |
1524 const TargetRegisterClass *NewDstRC = | |
1525 RI.getRegClass(get(NewOpcode).OpInfo[0].RegClass); | |
1526 | |
1527 unsigned DstReg = MI->getOperand(0).getReg(); | |
1528 unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC); | |
1529 MRI.replaceRegWith(DstReg, NewDstReg); | |
1530 break; | |
1531 } | |
1532 case AMDGPU::S_LOAD_DWORDX8_IMM: | |
1533 case AMDGPU::S_LOAD_DWORDX8_SGPR: { | |
1534 MachineInstr *Lo, *Hi; | |
1535 splitSMRD(MI, &AMDGPU::SReg_128RegClass, AMDGPU::S_LOAD_DWORDX4_IMM, | |
1536 AMDGPU::S_LOAD_DWORDX4_SGPR, Lo, Hi); | |
1537 MI->eraseFromParent(); | |
1538 moveSMRDToVALU(Lo, MRI); | |
1539 moveSMRDToVALU(Hi, MRI); | |
1540 break; | |
1541 } | |
1542 | |
1543 case AMDGPU::S_LOAD_DWORDX16_IMM: | |
1544 case AMDGPU::S_LOAD_DWORDX16_SGPR: { | |
1545 MachineInstr *Lo, *Hi; | |
1546 splitSMRD(MI, &AMDGPU::SReg_256RegClass, AMDGPU::S_LOAD_DWORDX8_IMM, | |
1547 AMDGPU::S_LOAD_DWORDX8_SGPR, Lo, Hi); | |
1548 MI->eraseFromParent(); | |
1549 moveSMRDToVALU(Lo, MRI); | |
1550 moveSMRDToVALU(Hi, MRI); | |
1551 break; | |
1552 } | |
1553 } | |
614 } | 1554 } |
615 | 1555 |
616 void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { | 1556 void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { |
617 SmallVector<MachineInstr *, 128> Worklist; | 1557 SmallVector<MachineInstr *, 128> Worklist; |
618 Worklist.push_back(&TopInst); | 1558 Worklist.push_back(&TopInst); |
619 | 1559 |
620 while (!Worklist.empty()) { | 1560 while (!Worklist.empty()) { |
621 MachineInstr *Inst = Worklist.pop_back_val(); | 1561 MachineInstr *Inst = Worklist.pop_back_val(); |
1562 MachineBasicBlock *MBB = Inst->getParent(); | |
1563 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); | |
1564 | |
1565 unsigned Opcode = Inst->getOpcode(); | |
622 unsigned NewOpcode = getVALUOp(*Inst); | 1566 unsigned NewOpcode = getVALUOp(*Inst); |
623 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) | 1567 |
1568 // Handle some special cases | |
1569 switch (Opcode) { | |
1570 default: | |
1571 if (isSMRD(Inst->getOpcode())) { | |
1572 moveSMRDToVALU(Inst, MRI); | |
1573 } | |
1574 break; | |
1575 case AMDGPU::S_MOV_B64: { | |
1576 DebugLoc DL = Inst->getDebugLoc(); | |
1577 | |
1578 // If the source operand is a register we can replace this with a | |
1579 // copy. | |
1580 if (Inst->getOperand(1).isReg()) { | |
1581 MachineInstr *Copy = BuildMI(*MBB, Inst, DL, get(TargetOpcode::COPY)) | |
1582 .addOperand(Inst->getOperand(0)) | |
1583 .addOperand(Inst->getOperand(1)); | |
1584 Worklist.push_back(Copy); | |
1585 } else { | |
1586 // Otherwise, we need to split this into two movs, because there is | |
1587 // no 64-bit VALU move instruction. | |
1588 unsigned Reg = Inst->getOperand(0).getReg(); | |
1589 unsigned Dst = split64BitImm(Worklist, | |
1590 Inst, | |
1591 MRI, | |
1592 MRI.getRegClass(Reg), | |
1593 Inst->getOperand(1)); | |
1594 MRI.replaceRegWith(Reg, Dst); | |
1595 } | |
1596 Inst->eraseFromParent(); | |
624 continue; | 1597 continue; |
625 | 1598 } |
626 MachineRegisterInfo &MRI = Inst->getParent()->getParent()->getRegInfo(); | 1599 case AMDGPU::S_AND_B64: |
1600 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32); | |
1601 Inst->eraseFromParent(); | |
1602 continue; | |
1603 | |
1604 case AMDGPU::S_OR_B64: | |
1605 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32); | |
1606 Inst->eraseFromParent(); | |
1607 continue; | |
1608 | |
1609 case AMDGPU::S_XOR_B64: | |
1610 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32); | |
1611 Inst->eraseFromParent(); | |
1612 continue; | |
1613 | |
1614 case AMDGPU::S_NOT_B64: | |
1615 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32); | |
1616 Inst->eraseFromParent(); | |
1617 continue; | |
1618 | |
1619 case AMDGPU::S_BCNT1_I32_B64: | |
1620 splitScalar64BitBCNT(Worklist, Inst); | |
1621 Inst->eraseFromParent(); | |
1622 continue; | |
1623 | |
1624 case AMDGPU::S_BFE_U64: | |
1625 case AMDGPU::S_BFE_I64: | |
1626 case AMDGPU::S_BFM_B64: | |
1627 llvm_unreachable("Moving this op to VALU not implemented"); | |
1628 } | |
1629 | |
1630 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) { | |
1631 // We cannot move this instruction to the VALU, so we should try to | |
1632 // legalize its operands instead. | |
1633 legalizeOperands(Inst); | |
1634 continue; | |
1635 } | |
627 | 1636 |
628 // Use the new VALU Opcode. | 1637 // Use the new VALU Opcode. |
629 const MCInstrDesc &NewDesc = get(NewOpcode); | 1638 const MCInstrDesc &NewDesc = get(NewOpcode); |
630 Inst->setDesc(NewDesc); | 1639 Inst->setDesc(NewDesc); |
631 | 1640 |
636 MachineOperand &Op = Inst->getOperand(i); | 1645 MachineOperand &Op = Inst->getOperand(i); |
637 if (Op.isReg() && Op.getReg() == AMDGPU::SCC) | 1646 if (Op.isReg() && Op.getReg() == AMDGPU::SCC) |
638 Inst->RemoveOperand(i); | 1647 Inst->RemoveOperand(i); |
639 } | 1648 } |
640 | 1649 |
641 // Add the implict and explicit register definitions. | 1650 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) { |
642 if (NewDesc.ImplicitUses) { | 1651 // We are converting these to a BFE, so we need to add the missing |
643 for (unsigned i = 0; NewDesc.ImplicitUses[i]; ++i) { | 1652 // operands for the size and offset. |
644 unsigned Reg = NewDesc.ImplicitUses[i]; | 1653 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16; |
645 Inst->addOperand(MachineOperand::CreateReg(Reg, false, true)); | 1654 Inst->addOperand(MachineOperand::CreateImm(0)); |
646 } | 1655 Inst->addOperand(MachineOperand::CreateImm(Size)); |
647 } | 1656 |
648 | 1657 } else if (Opcode == AMDGPU::S_BCNT1_I32_B32) { |
649 if (NewDesc.ImplicitDefs) { | 1658 // The VALU version adds the second operand to the result, so insert an |
650 for (unsigned i = 0; NewDesc.ImplicitDefs[i]; ++i) { | 1659 // extra 0 operand. |
651 unsigned Reg = NewDesc.ImplicitDefs[i]; | 1660 Inst->addOperand(MachineOperand::CreateImm(0)); |
652 Inst->addOperand(MachineOperand::CreateReg(Reg, true, true)); | 1661 } |
653 } | 1662 |
654 } | 1663 addDescImplicitUseDef(NewDesc, Inst); |
655 | 1664 |
656 legalizeOperands(Inst); | 1665 if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) { |
1666 const MachineOperand &OffsetWidthOp = Inst->getOperand(2); | |
1667 // If we need to move this to VGPRs, we need to unpack the second operand | |
1668 // back into the 2 separate ones for bit offset and width. | |
1669 assert(OffsetWidthOp.isImm() && | |
1670 "Scalar BFE is only implemented for constant width and offset"); | |
1671 uint32_t Imm = OffsetWidthOp.getImm(); | |
1672 | |
1673 uint32_t Offset = Imm & 0x3f; // Extract bits [5:0]. | |
1674 uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16]. | |
1675 Inst->RemoveOperand(2); // Remove old immediate. | |
1676 Inst->addOperand(MachineOperand::CreateImm(Offset)); | |
1677 Inst->addOperand(MachineOperand::CreateImm(BitWidth)); | |
1678 } | |
657 | 1679 |
658 // Update the destination register class. | 1680 // Update the destination register class. |
1681 | |
659 const TargetRegisterClass *NewDstRC = getOpRegClass(*Inst, 0); | 1682 const TargetRegisterClass *NewDstRC = getOpRegClass(*Inst, 0); |
660 | 1683 |
661 switch (Inst->getOpcode()) { | 1684 switch (Opcode) { |
662 // For target instructions, getOpRegClass just returns the virtual | 1685 // For target instructions, getOpRegClass just returns the virtual |
663 // register class associated with the operand, so we need to find an | 1686 // register class associated with the operand, so we need to find an |
664 // equivalent VGPR register class in order to move the instruction to the | 1687 // equivalent VGPR register class in order to move the instruction to the |
665 // VALU. | 1688 // VALU. |
666 case AMDGPU::COPY: | 1689 case AMDGPU::COPY: |
667 case AMDGPU::PHI: | 1690 case AMDGPU::PHI: |
668 case AMDGPU::REG_SEQUENCE: | 1691 case AMDGPU::REG_SEQUENCE: |
1692 case AMDGPU::INSERT_SUBREG: | |
669 if (RI.hasVGPRs(NewDstRC)) | 1693 if (RI.hasVGPRs(NewDstRC)) |
670 continue; | 1694 continue; |
671 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC); | 1695 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC); |
672 if (!NewDstRC) | 1696 if (!NewDstRC) |
673 continue; | 1697 continue; |
678 | 1702 |
679 unsigned DstReg = Inst->getOperand(0).getReg(); | 1703 unsigned DstReg = Inst->getOperand(0).getReg(); |
680 unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC); | 1704 unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC); |
681 MRI.replaceRegWith(DstReg, NewDstReg); | 1705 MRI.replaceRegWith(DstReg, NewDstReg); |
682 | 1706 |
1707 // Legalize the operands | |
1708 legalizeOperands(Inst); | |
1709 | |
683 for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg), | 1710 for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg), |
684 E = MRI.use_end(); I != E; ++I) { | 1711 E = MRI.use_end(); I != E; ++I) { |
685 MachineInstr &UseMI = *I; | 1712 MachineInstr &UseMI = *I->getParent(); |
686 if (!canReadVGPR(UseMI, I.getOperandNo())) { | 1713 if (!canReadVGPR(UseMI, I.getOperandNo())) { |
687 Worklist.push_back(&UseMI); | 1714 Worklist.push_back(&UseMI); |
688 } | 1715 } |
689 } | 1716 } |
690 } | 1717 } |
700 return RegIndex; | 1727 return RegIndex; |
701 } | 1728 } |
702 | 1729 |
703 const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const { | 1730 const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const { |
704 return &AMDGPU::VReg_32RegClass; | 1731 return &AMDGPU::VReg_32RegClass; |
1732 } | |
1733 | |
1734 void SIInstrInfo::splitScalar64BitUnaryOp( | |
1735 SmallVectorImpl<MachineInstr *> &Worklist, | |
1736 MachineInstr *Inst, | |
1737 unsigned Opcode) const { | |
1738 MachineBasicBlock &MBB = *Inst->getParent(); | |
1739 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); | |
1740 | |
1741 MachineOperand &Dest = Inst->getOperand(0); | |
1742 MachineOperand &Src0 = Inst->getOperand(1); | |
1743 DebugLoc DL = Inst->getDebugLoc(); | |
1744 | |
1745 MachineBasicBlock::iterator MII = Inst; | |
1746 | |
1747 const MCInstrDesc &InstDesc = get(Opcode); | |
1748 const TargetRegisterClass *Src0RC = Src0.isReg() ? | |
1749 MRI.getRegClass(Src0.getReg()) : | |
1750 &AMDGPU::SGPR_32RegClass; | |
1751 | |
1752 const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0); | |
1753 | |
1754 MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, | |
1755 AMDGPU::sub0, Src0SubRC); | |
1756 | |
1757 const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg()); | |
1758 const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0); | |
1759 | |
1760 unsigned DestSub0 = MRI.createVirtualRegister(DestRC); | |
1761 MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0) | |
1762 .addOperand(SrcReg0Sub0); | |
1763 | |
1764 MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, | |
1765 AMDGPU::sub1, Src0SubRC); | |
1766 | |
1767 unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC); | |
1768 MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1) | |
1769 .addOperand(SrcReg0Sub1); | |
1770 | |
1771 unsigned FullDestReg = MRI.createVirtualRegister(DestRC); | |
1772 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) | |
1773 .addReg(DestSub0) | |
1774 .addImm(AMDGPU::sub0) | |
1775 .addReg(DestSub1) | |
1776 .addImm(AMDGPU::sub1); | |
1777 | |
1778 MRI.replaceRegWith(Dest.getReg(), FullDestReg); | |
1779 | |
1780 // Try to legalize the operands in case we need to swap the order to keep it | |
1781 // valid. | |
1782 Worklist.push_back(LoHalf); | |
1783 Worklist.push_back(HiHalf); | |
1784 } | |
1785 | |
1786 void SIInstrInfo::splitScalar64BitBinaryOp( | |
1787 SmallVectorImpl<MachineInstr *> &Worklist, | |
1788 MachineInstr *Inst, | |
1789 unsigned Opcode) const { | |
1790 MachineBasicBlock &MBB = *Inst->getParent(); | |
1791 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); | |
1792 | |
1793 MachineOperand &Dest = Inst->getOperand(0); | |
1794 MachineOperand &Src0 = Inst->getOperand(1); | |
1795 MachineOperand &Src1 = Inst->getOperand(2); | |
1796 DebugLoc DL = Inst->getDebugLoc(); | |
1797 | |
1798 MachineBasicBlock::iterator MII = Inst; | |
1799 | |
1800 const MCInstrDesc &InstDesc = get(Opcode); | |
1801 const TargetRegisterClass *Src0RC = Src0.isReg() ? | |
1802 MRI.getRegClass(Src0.getReg()) : | |
1803 &AMDGPU::SGPR_32RegClass; | |
1804 | |
1805 const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0); | |
1806 const TargetRegisterClass *Src1RC = Src1.isReg() ? | |
1807 MRI.getRegClass(Src1.getReg()) : | |
1808 &AMDGPU::SGPR_32RegClass; | |
1809 | |
1810 const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0); | |
1811 | |
1812 MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, | |
1813 AMDGPU::sub0, Src0SubRC); | |
1814 MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, | |
1815 AMDGPU::sub0, Src1SubRC); | |
1816 | |
1817 const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg()); | |
1818 const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0); | |
1819 | |
1820 unsigned DestSub0 = MRI.createVirtualRegister(DestRC); | |
1821 MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0) | |
1822 .addOperand(SrcReg0Sub0) | |
1823 .addOperand(SrcReg1Sub0); | |
1824 | |
1825 MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, | |
1826 AMDGPU::sub1, Src0SubRC); | |
1827 MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, | |
1828 AMDGPU::sub1, Src1SubRC); | |
1829 | |
1830 unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC); | |
1831 MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1) | |
1832 .addOperand(SrcReg0Sub1) | |
1833 .addOperand(SrcReg1Sub1); | |
1834 | |
1835 unsigned FullDestReg = MRI.createVirtualRegister(DestRC); | |
1836 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) | |
1837 .addReg(DestSub0) | |
1838 .addImm(AMDGPU::sub0) | |
1839 .addReg(DestSub1) | |
1840 .addImm(AMDGPU::sub1); | |
1841 | |
1842 MRI.replaceRegWith(Dest.getReg(), FullDestReg); | |
1843 | |
1844 // Try to legalize the operands in case we need to swap the order to keep it | |
1845 // valid. | |
1846 Worklist.push_back(LoHalf); | |
1847 Worklist.push_back(HiHalf); | |
1848 } | |
1849 | |
1850 void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist, | |
1851 MachineInstr *Inst) const { | |
1852 MachineBasicBlock &MBB = *Inst->getParent(); | |
1853 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); | |
1854 | |
1855 MachineBasicBlock::iterator MII = Inst; | |
1856 DebugLoc DL = Inst->getDebugLoc(); | |
1857 | |
1858 MachineOperand &Dest = Inst->getOperand(0); | |
1859 MachineOperand &Src = Inst->getOperand(1); | |
1860 | |
1861 const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e32); | |
1862 const TargetRegisterClass *SrcRC = Src.isReg() ? | |
1863 MRI.getRegClass(Src.getReg()) : | |
1864 &AMDGPU::SGPR_32RegClass; | |
1865 | |
1866 unsigned MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); | |
1867 unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); | |
1868 | |
1869 const TargetRegisterClass *SrcSubRC = RI.getSubRegClass(SrcRC, AMDGPU::sub0); | |
1870 | |
1871 MachineOperand SrcRegSub0 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, | |
1872 AMDGPU::sub0, SrcSubRC); | |
1873 MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, | |
1874 AMDGPU::sub1, SrcSubRC); | |
1875 | |
1876 MachineInstr *First = BuildMI(MBB, MII, DL, InstDesc, MidReg) | |
1877 .addOperand(SrcRegSub0) | |
1878 .addImm(0); | |
1879 | |
1880 MachineInstr *Second = BuildMI(MBB, MII, DL, InstDesc, ResultReg) | |
1881 .addOperand(SrcRegSub1) | |
1882 .addReg(MidReg); | |
1883 | |
1884 MRI.replaceRegWith(Dest.getReg(), ResultReg); | |
1885 | |
1886 Worklist.push_back(First); | |
1887 Worklist.push_back(Second); | |
1888 } | |
1889 | |
1890 void SIInstrInfo::addDescImplicitUseDef(const MCInstrDesc &NewDesc, | |
1891 MachineInstr *Inst) const { | |
1892 // Add the implict and explicit register definitions. | |
1893 if (NewDesc.ImplicitUses) { | |
1894 for (unsigned i = 0; NewDesc.ImplicitUses[i]; ++i) { | |
1895 unsigned Reg = NewDesc.ImplicitUses[i]; | |
1896 Inst->addOperand(MachineOperand::CreateReg(Reg, false, true)); | |
1897 } | |
1898 } | |
1899 | |
1900 if (NewDesc.ImplicitDefs) { | |
1901 for (unsigned i = 0; NewDesc.ImplicitDefs[i]; ++i) { | |
1902 unsigned Reg = NewDesc.ImplicitDefs[i]; | |
1903 Inst->addOperand(MachineOperand::CreateReg(Reg, true, true)); | |
1904 } | |
1905 } | |
705 } | 1906 } |
706 | 1907 |
707 MachineInstrBuilder SIInstrInfo::buildIndirectWrite( | 1908 MachineInstrBuilder SIInstrInfo::buildIndirectWrite( |
708 MachineBasicBlock *MBB, | 1909 MachineBasicBlock *MBB, |
709 MachineBasicBlock::iterator I, | 1910 MachineBasicBlock::iterator I, |
765 Reserved.set(AMDGPU::VReg_256RegClass.getRegister(Index)); | 1966 Reserved.set(AMDGPU::VReg_256RegClass.getRegister(Index)); |
766 | 1967 |
767 for (int Index = std::max(0, Begin - 15); Index <= End; ++Index) | 1968 for (int Index = std::max(0, Begin - 15); Index <= End; ++Index) |
768 Reserved.set(AMDGPU::VReg_512RegClass.getRegister(Index)); | 1969 Reserved.set(AMDGPU::VReg_512RegClass.getRegister(Index)); |
769 } | 1970 } |
1971 | |
1972 MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI, | |
1973 unsigned OperandName) const { | |
1974 int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OperandName); | |
1975 if (Idx == -1) | |
1976 return nullptr; | |
1977 | |
1978 return &MI.getOperand(Idx); | |
1979 } |