comparison lib/Target/R600/SIInstrInfo.cpp @ 83:60c9769439b8 LLVM3.7

LLVM 3.7
author Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
date Wed, 18 Feb 2015 14:55:36 +0900
parents 54457678186b
children
comparison
equal deleted inserted replaced
78:af83660cff7b 83:60c9769439b8
19 #include "SIMachineFunctionInfo.h" 19 #include "SIMachineFunctionInfo.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h" 20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h" 21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h" 22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/IR/Function.h" 23 #include "llvm/IR/Function.h"
24 #include "llvm/CodeGen/RegisterScavenging.h"
24 #include "llvm/MC/MCInstrDesc.h" 25 #include "llvm/MC/MCInstrDesc.h"
26 #include "llvm/Support/Debug.h"
25 27
26 using namespace llvm; 28 using namespace llvm;
27 29
28 SIInstrInfo::SIInstrInfo(const AMDGPUSubtarget &st) 30 SIInstrInfo::SIInstrInfo(const AMDGPUSubtarget &st)
29 : AMDGPUInstrInfo(st), 31 : AMDGPUInstrInfo(st), RI(st) {}
30 RI(st) { }
31 32
32 //===----------------------------------------------------------------------===// 33 //===----------------------------------------------------------------------===//
33 // TargetInstrInfo callbacks 34 // TargetInstrInfo callbacks
34 //===----------------------------------------------------------------------===// 35 //===----------------------------------------------------------------------===//
35 36
85 // Make sure both are actually loads. 86 // Make sure both are actually loads.
86 if (!get(Opc0).mayLoad() || !get(Opc1).mayLoad()) 87 if (!get(Opc0).mayLoad() || !get(Opc1).mayLoad())
87 return false; 88 return false;
88 89
89 if (isDS(Opc0) && isDS(Opc1)) { 90 if (isDS(Opc0) && isDS(Opc1)) {
90 assert(getNumOperandsNoGlue(Load0) == getNumOperandsNoGlue(Load1)); 91
91 92 // FIXME: Handle this case:
92 // TODO: Also shouldn't see read2st 93 if (getNumOperandsNoGlue(Load0) != getNumOperandsNoGlue(Load1))
93 assert(Opc0 != AMDGPU::DS_READ2_B32 && 94 return false;
94 Opc0 != AMDGPU::DS_READ2_B64 &&
95 Opc1 != AMDGPU::DS_READ2_B32 &&
96 Opc1 != AMDGPU::DS_READ2_B64);
97 95
98 // Check base reg. 96 // Check base reg.
99 if (Load0->getOperand(1) != Load1->getOperand(1)) 97 if (Load0->getOperand(1) != Load1->getOperand(1))
100 return false; 98 return false;
101 99
102 // Check chain. 100 // Check chain.
103 if (findChainOperand(Load0) != findChainOperand(Load1)) 101 if (findChainOperand(Load0) != findChainOperand(Load1))
102 return false;
103
104 // Skip read2 / write2 variants for simplicity.
105 // TODO: We should report true if the used offsets are adjacent (excluded
106 // st64 versions).
107 if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::data1) != -1 ||
108 AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::data1) != -1)
104 return false; 109 return false;
105 110
106 Offset0 = cast<ConstantSDNode>(Load0->getOperand(2))->getZExtValue(); 111 Offset0 = cast<ConstantSDNode>(Load0->getOperand(2))->getZExtValue();
107 Offset1 = cast<ConstantSDNode>(Load1->getOperand(2))->getZExtValue(); 112 Offset1 = cast<ConstantSDNode>(Load1->getOperand(2))->getZExtValue();
108 return true; 113 return true;
157 Offset1 = cast<ConstantSDNode>(Off1)->getZExtValue(); 162 Offset1 = cast<ConstantSDNode>(Off1)->getZExtValue();
158 return true; 163 return true;
159 } 164 }
160 165
161 return false; 166 return false;
167 }
168
169 static bool isStride64(unsigned Opc) {
170 switch (Opc) {
171 case AMDGPU::DS_READ2ST64_B32:
172 case AMDGPU::DS_READ2ST64_B64:
173 case AMDGPU::DS_WRITE2ST64_B32:
174 case AMDGPU::DS_WRITE2ST64_B64:
175 return true;
176 default:
177 return false;
178 }
162 } 179 }
163 180
164 bool SIInstrInfo::getLdStBaseRegImmOfs(MachineInstr *LdSt, 181 bool SIInstrInfo::getLdStBaseRegImmOfs(MachineInstr *LdSt,
165 unsigned &BaseReg, unsigned &Offset, 182 unsigned &BaseReg, unsigned &Offset,
166 const TargetRegisterInfo *TRI) const { 183 const TargetRegisterInfo *TRI) const {
201 assert(LdSt->mayStore()); 218 assert(LdSt->mayStore());
202 int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0); 219 int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0);
203 EltSize = getOpRegClass(*LdSt, Data0Idx)->getSize(); 220 EltSize = getOpRegClass(*LdSt, Data0Idx)->getSize();
204 } 221 }
205 222
223 if (isStride64(Opc))
224 EltSize *= 64;
225
206 const MachineOperand *AddrReg = getNamedOperand(*LdSt, 226 const MachineOperand *AddrReg = getNamedOperand(*LdSt,
207 AMDGPU::OpName::addr); 227 AMDGPU::OpName::addr);
208 BaseReg = AddrReg->getReg(); 228 BaseReg = AddrReg->getReg();
209 Offset = EltSize * Offset0; 229 Offset = EltSize * Offset0;
210 return true; 230 return true;
239 AMDGPU::OpName::sbase); 259 AMDGPU::OpName::sbase);
240 BaseReg = SBaseReg->getReg(); 260 BaseReg = SBaseReg->getReg();
241 Offset = OffsetImm->getImm(); 261 Offset = OffsetImm->getImm();
242 return true; 262 return true;
243 } 263 }
264
265 return false;
266 }
267
268 bool SIInstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt,
269 MachineInstr *SecondLdSt,
270 unsigned NumLoads) const {
271 unsigned Opc0 = FirstLdSt->getOpcode();
272 unsigned Opc1 = SecondLdSt->getOpcode();
273
274 // TODO: This needs finer tuning
275 if (NumLoads > 4)
276 return false;
277
278 if (isDS(Opc0) && isDS(Opc1))
279 return true;
280
281 if (isSMRD(Opc0) && isSMRD(Opc1))
282 return true;
283
284 if ((isMUBUF(Opc0) || isMTBUF(Opc0)) && (isMUBUF(Opc1) || isMTBUF(Opc1)))
285 return true;
244 286
245 return false; 287 return false;
246 } 288 }
247 289
248 void 290 void
281 }; 323 };
282 324
283 unsigned Opcode; 325 unsigned Opcode;
284 const int16_t *SubIndices; 326 const int16_t *SubIndices;
285 327
286 if (AMDGPU::M0 == DestReg) {
287 // Check if M0 isn't already set to this value
288 for (MachineBasicBlock::reverse_iterator E = MBB.rend(),
289 I = MachineBasicBlock::reverse_iterator(MI); I != E; ++I) {
290
291 if (!I->definesRegister(AMDGPU::M0))
292 continue;
293
294 unsigned Opc = I->getOpcode();
295 if (Opc != TargetOpcode::COPY && Opc != AMDGPU::S_MOV_B32)
296 break;
297
298 if (!I->readsRegister(SrcReg))
299 break;
300
301 // The copy isn't necessary
302 return;
303 }
304 }
305
306 if (AMDGPU::SReg_32RegClass.contains(DestReg)) { 328 if (AMDGPU::SReg_32RegClass.contains(DestReg)) {
307 assert(AMDGPU::SReg_32RegClass.contains(SrcReg)); 329 assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
308 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg) 330 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
309 .addReg(SrcReg, getKillRegState(KillSrc)); 331 .addReg(SrcReg, getKillRegState(KillSrc));
310 return; 332 return;
311 333
312 } else if (AMDGPU::SReg_64RegClass.contains(DestReg)) { 334 } else if (AMDGPU::SReg_64RegClass.contains(DestReg)) {
335 if (DestReg == AMDGPU::VCC) {
336 if (AMDGPU::SReg_64RegClass.contains(SrcReg)) {
337 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC)
338 .addReg(SrcReg, getKillRegState(KillSrc));
339 } else {
340 // FIXME: Hack until VReg_1 removed.
341 assert(AMDGPU::VGPR_32RegClass.contains(SrcReg));
342 BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_I32_e32), AMDGPU::VCC)
343 .addImm(0)
344 .addReg(SrcReg, getKillRegState(KillSrc));
345 }
346
347 return;
348 }
349
313 assert(AMDGPU::SReg_64RegClass.contains(SrcReg)); 350 assert(AMDGPU::SReg_64RegClass.contains(SrcReg));
314 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg) 351 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
315 .addReg(SrcReg, getKillRegState(KillSrc)); 352 .addReg(SrcReg, getKillRegState(KillSrc));
316 return; 353 return;
317 354
328 } else if (AMDGPU::SReg_512RegClass.contains(DestReg)) { 365 } else if (AMDGPU::SReg_512RegClass.contains(DestReg)) {
329 assert(AMDGPU::SReg_512RegClass.contains(SrcReg)); 366 assert(AMDGPU::SReg_512RegClass.contains(SrcReg));
330 Opcode = AMDGPU::S_MOV_B32; 367 Opcode = AMDGPU::S_MOV_B32;
331 SubIndices = Sub0_15; 368 SubIndices = Sub0_15;
332 369
333 } else if (AMDGPU::VReg_32RegClass.contains(DestReg)) { 370 } else if (AMDGPU::VGPR_32RegClass.contains(DestReg)) {
334 assert(AMDGPU::VReg_32RegClass.contains(SrcReg) || 371 assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) ||
335 AMDGPU::SReg_32RegClass.contains(SrcReg)); 372 AMDGPU::SReg_32RegClass.contains(SrcReg));
336 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg) 373 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
337 .addReg(SrcReg, getKillRegState(KillSrc)); 374 .addReg(SrcReg, getKillRegState(KillSrc));
338 return; 375 return;
339 376
383 420
384 unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const { 421 unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const {
385 int NewOpc; 422 int NewOpc;
386 423
387 // Try to map original to commuted opcode 424 // Try to map original to commuted opcode
388 if ((NewOpc = AMDGPU::getCommuteRev(Opcode)) != -1) 425 NewOpc = AMDGPU::getCommuteRev(Opcode);
426 // Check if the commuted (REV) opcode exists on the target.
427 if (NewOpc != -1 && pseudoToMCOpcode(NewOpc) != -1)
389 return NewOpc; 428 return NewOpc;
390 429
391 // Try to map commuted to original opcode 430 // Try to map commuted to original opcode
392 if ((NewOpc = AMDGPU::getCommuteOrig(Opcode)) != -1) 431 NewOpc = AMDGPU::getCommuteOrig(Opcode);
432 // Check if the original (non-REV) opcode exists on the target.
433 if (NewOpc != -1 && pseudoToMCOpcode(NewOpc) != -1)
393 return NewOpc; 434 return NewOpc;
394 435
395 return Opcode; 436 return Opcode;
437 }
438
439 unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const {
440
441 if (DstRC->getSize() == 4) {
442 return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
443 } else if (DstRC->getSize() == 8 && RI.isSGPRClass(DstRC)) {
444 return AMDGPU::S_MOV_B64;
445 } else if (DstRC->getSize() == 8 && !RI.isSGPRClass(DstRC)) {
446 return AMDGPU::V_MOV_B64_PSEUDO;
447 }
448 return AMDGPU::COPY;
396 } 449 }
397 450
398 void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, 451 void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
399 MachineBasicBlock::iterator MI, 452 MachineBasicBlock::iterator MI,
400 unsigned SrcReg, bool isKill, 453 unsigned SrcReg, bool isKill,
401 int FrameIndex, 454 int FrameIndex,
402 const TargetRegisterClass *RC, 455 const TargetRegisterClass *RC,
403 const TargetRegisterInfo *TRI) const { 456 const TargetRegisterInfo *TRI) const {
404 MachineFunction *MF = MBB.getParent(); 457 MachineFunction *MF = MBB.getParent();
458 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
405 MachineFrameInfo *FrameInfo = MF->getFrameInfo(); 459 MachineFrameInfo *FrameInfo = MF->getFrameInfo();
406 DebugLoc DL = MBB.findDebugLoc(MI); 460 DebugLoc DL = MBB.findDebugLoc(MI);
407 461 int Opcode = -1;
408 if (RI.hasVGPRs(RC)) { 462
409 LLVMContext &Ctx = MF->getFunction()->getContext(); 463 if (RI.isSGPRClass(RC)) {
410 Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Can't spill VGPR!");
411 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), AMDGPU::VGPR0)
412 .addReg(SrcReg);
413 } else if (RI.isSGPRClass(RC)) {
414 // We are only allowed to create one new instruction when spilling 464 // We are only allowed to create one new instruction when spilling
415 // registers, so we need to use pseudo instruction for spilling 465 // registers, so we need to use pseudo instruction for spilling
416 // SGPRs. 466 // SGPRs.
417 unsigned Opcode;
418 switch (RC->getSize() * 8) { 467 switch (RC->getSize() * 8) {
419 case 32: Opcode = AMDGPU::SI_SPILL_S32_SAVE; break; 468 case 32: Opcode = AMDGPU::SI_SPILL_S32_SAVE; break;
420 case 64: Opcode = AMDGPU::SI_SPILL_S64_SAVE; break; 469 case 64: Opcode = AMDGPU::SI_SPILL_S64_SAVE; break;
421 case 128: Opcode = AMDGPU::SI_SPILL_S128_SAVE; break; 470 case 128: Opcode = AMDGPU::SI_SPILL_S128_SAVE; break;
422 case 256: Opcode = AMDGPU::SI_SPILL_S256_SAVE; break; 471 case 256: Opcode = AMDGPU::SI_SPILL_S256_SAVE; break;
423 case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break; 472 case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break;
424 default: llvm_unreachable("Cannot spill register class"); 473 }
425 } 474 } else if(RI.hasVGPRs(RC) && ST.isVGPRSpillingEnabled(MFI)) {
426 475 MFI->setHasSpilledVGPRs();
476
477 switch(RC->getSize() * 8) {
478 case 32: Opcode = AMDGPU::SI_SPILL_V32_SAVE; break;
479 case 64: Opcode = AMDGPU::SI_SPILL_V64_SAVE; break;
480 case 96: Opcode = AMDGPU::SI_SPILL_V96_SAVE; break;
481 case 128: Opcode = AMDGPU::SI_SPILL_V128_SAVE; break;
482 case 256: Opcode = AMDGPU::SI_SPILL_V256_SAVE; break;
483 case 512: Opcode = AMDGPU::SI_SPILL_V512_SAVE; break;
484 }
485 }
486
487 if (Opcode != -1) {
427 FrameInfo->setObjectAlignment(FrameIndex, 4); 488 FrameInfo->setObjectAlignment(FrameIndex, 4);
428 BuildMI(MBB, MI, DL, get(Opcode)) 489 BuildMI(MBB, MI, DL, get(Opcode))
429 .addReg(SrcReg) 490 .addReg(SrcReg)
430 .addFrameIndex(FrameIndex); 491 .addFrameIndex(FrameIndex)
492 // Place-holder registers, these will be filled in by
493 // SIPrepareScratchRegs.
494 .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
495 .addReg(AMDGPU::SGPR0, RegState::Undef);
431 } else { 496 } else {
432 llvm_unreachable("VGPR spilling not supported"); 497 LLVMContext &Ctx = MF->getFunction()->getContext();
498 Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to"
499 " spill register");
500 BuildMI(MBB, MI, DL, get(AMDGPU::KILL))
501 .addReg(SrcReg);
433 } 502 }
434 } 503 }
435 504
436 void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, 505 void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
437 MachineBasicBlock::iterator MI, 506 MachineBasicBlock::iterator MI,
438 unsigned DestReg, int FrameIndex, 507 unsigned DestReg, int FrameIndex,
439 const TargetRegisterClass *RC, 508 const TargetRegisterClass *RC,
440 const TargetRegisterInfo *TRI) const { 509 const TargetRegisterInfo *TRI) const {
441 MachineFunction *MF = MBB.getParent(); 510 MachineFunction *MF = MBB.getParent();
511 const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
442 MachineFrameInfo *FrameInfo = MF->getFrameInfo(); 512 MachineFrameInfo *FrameInfo = MF->getFrameInfo();
443 DebugLoc DL = MBB.findDebugLoc(MI); 513 DebugLoc DL = MBB.findDebugLoc(MI);
444 514 int Opcode = -1;
445 if (RI.hasVGPRs(RC)) { 515
446 LLVMContext &Ctx = MF->getFunction()->getContext(); 516 if (RI.isSGPRClass(RC)){
447 Ctx.emitError("SIInstrInfo::loadRegToStackSlot - Can't retrieve spilled VGPR!");
448 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
449 .addImm(0);
450 } else if (RI.isSGPRClass(RC)){
451 unsigned Opcode;
452 switch(RC->getSize() * 8) { 517 switch(RC->getSize() * 8) {
453 case 32: Opcode = AMDGPU::SI_SPILL_S32_RESTORE; break; 518 case 32: Opcode = AMDGPU::SI_SPILL_S32_RESTORE; break;
454 case 64: Opcode = AMDGPU::SI_SPILL_S64_RESTORE; break; 519 case 64: Opcode = AMDGPU::SI_SPILL_S64_RESTORE; break;
455 case 128: Opcode = AMDGPU::SI_SPILL_S128_RESTORE; break; 520 case 128: Opcode = AMDGPU::SI_SPILL_S128_RESTORE; break;
456 case 256: Opcode = AMDGPU::SI_SPILL_S256_RESTORE; break; 521 case 256: Opcode = AMDGPU::SI_SPILL_S256_RESTORE; break;
457 case 512: Opcode = AMDGPU::SI_SPILL_S512_RESTORE; break; 522 case 512: Opcode = AMDGPU::SI_SPILL_S512_RESTORE; break;
458 default: llvm_unreachable("Cannot spill register class"); 523 }
459 } 524 } else if(RI.hasVGPRs(RC) && ST.isVGPRSpillingEnabled(MFI)) {
460 525 switch(RC->getSize() * 8) {
526 case 32: Opcode = AMDGPU::SI_SPILL_V32_RESTORE; break;
527 case 64: Opcode = AMDGPU::SI_SPILL_V64_RESTORE; break;
528 case 96: Opcode = AMDGPU::SI_SPILL_V96_RESTORE; break;
529 case 128: Opcode = AMDGPU::SI_SPILL_V128_RESTORE; break;
530 case 256: Opcode = AMDGPU::SI_SPILL_V256_RESTORE; break;
531 case 512: Opcode = AMDGPU::SI_SPILL_V512_RESTORE; break;
532 }
533 }
534
535 if (Opcode != -1) {
461 FrameInfo->setObjectAlignment(FrameIndex, 4); 536 FrameInfo->setObjectAlignment(FrameIndex, 4);
462 BuildMI(MBB, MI, DL, get(Opcode), DestReg) 537 BuildMI(MBB, MI, DL, get(Opcode), DestReg)
463 .addFrameIndex(FrameIndex); 538 .addFrameIndex(FrameIndex)
539 // Place-holder registers, these will be filled in by
540 // SIPrepareScratchRegs.
541 .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
542 .addReg(AMDGPU::SGPR0, RegState::Undef);
543
464 } else { 544 } else {
465 llvm_unreachable("VGPR spilling not supported"); 545 LLVMContext &Ctx = MF->getFunction()->getContext();
466 } 546 Ctx.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to"
547 " restore register");
548 BuildMI(MBB, MI, DL, get(AMDGPU::IMPLICIT_DEF), DestReg);
549 }
550 }
551
552 /// \param @Offset Offset in bytes of the FrameIndex being spilled
553 unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB,
554 MachineBasicBlock::iterator MI,
555 RegScavenger *RS, unsigned TmpReg,
556 unsigned FrameOffset,
557 unsigned Size) const {
558 MachineFunction *MF = MBB.getParent();
559 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
560 const AMDGPUSubtarget &ST = MF->getSubtarget<AMDGPUSubtarget>();
561 const SIRegisterInfo *TRI =
562 static_cast<const SIRegisterInfo*>(ST.getRegisterInfo());
563 DebugLoc DL = MBB.findDebugLoc(MI);
564 unsigned WorkGroupSize = MFI->getMaximumWorkGroupSize(*MF);
565 unsigned WavefrontSize = ST.getWavefrontSize();
566
567 unsigned TIDReg = MFI->getTIDReg();
568 if (!MFI->hasCalculatedTID()) {
569 MachineBasicBlock &Entry = MBB.getParent()->front();
570 MachineBasicBlock::iterator Insert = Entry.front();
571 DebugLoc DL = Insert->getDebugLoc();
572
573 TIDReg = RI.findUnusedRegister(MF->getRegInfo(), &AMDGPU::VGPR_32RegClass);
574 if (TIDReg == AMDGPU::NoRegister)
575 return TIDReg;
576
577
578 if (MFI->getShaderType() == ShaderType::COMPUTE &&
579 WorkGroupSize > WavefrontSize) {
580
581 unsigned TIDIGXReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_X);
582 unsigned TIDIGYReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_Y);
583 unsigned TIDIGZReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_Z);
584 unsigned InputPtrReg =
585 TRI->getPreloadedValue(*MF, SIRegisterInfo::INPUT_PTR);
586 static const unsigned TIDIGRegs[3] = {
587 TIDIGXReg, TIDIGYReg, TIDIGZReg
588 };
589 for (unsigned Reg : TIDIGRegs) {
590 if (!Entry.isLiveIn(Reg))
591 Entry.addLiveIn(Reg);
592 }
593
594 RS->enterBasicBlock(&Entry);
595 unsigned STmp0 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
596 unsigned STmp1 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
597 BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp0)
598 .addReg(InputPtrReg)
599 .addImm(SI::KernelInputOffsets::NGROUPS_Z);
600 BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp1)
601 .addReg(InputPtrReg)
602 .addImm(SI::KernelInputOffsets::NGROUPS_Y);
603
604 // NGROUPS.X * NGROUPS.Y
605 BuildMI(Entry, Insert, DL, get(AMDGPU::S_MUL_I32), STmp1)
606 .addReg(STmp1)
607 .addReg(STmp0);
608 // (NGROUPS.X * NGROUPS.Y) * TIDIG.X
609 BuildMI(Entry, Insert, DL, get(AMDGPU::V_MUL_U32_U24_e32), TIDReg)
610 .addReg(STmp1)
611 .addReg(TIDIGXReg);
612 // NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)
613 BuildMI(Entry, Insert, DL, get(AMDGPU::V_MAD_U32_U24), TIDReg)
614 .addReg(STmp0)
615 .addReg(TIDIGYReg)
616 .addReg(TIDReg);
617 // (NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)) + TIDIG.Z
618 BuildMI(Entry, Insert, DL, get(AMDGPU::V_ADD_I32_e32), TIDReg)
619 .addReg(TIDReg)
620 .addReg(TIDIGZReg);
621 } else {
622 // Get the wave id
623 BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_LO_U32_B32_e64),
624 TIDReg)
625 .addImm(-1)
626 .addImm(0);
627
628 BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_HI_U32_B32_e64),
629 TIDReg)
630 .addImm(-1)
631 .addReg(TIDReg);
632 }
633
634 BuildMI(Entry, Insert, DL, get(AMDGPU::V_LSHLREV_B32_e32),
635 TIDReg)
636 .addImm(2)
637 .addReg(TIDReg);
638 MFI->setTIDReg(TIDReg);
639 }
640
641 // Add FrameIndex to LDS offset
642 unsigned LDSOffset = MFI->LDSSize + (FrameOffset * WorkGroupSize);
643 BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), TmpReg)
644 .addImm(LDSOffset)
645 .addReg(TIDReg);
646
647 return TmpReg;
467 } 648 }
468 649
469 void SIInstrInfo::insertNOPs(MachineBasicBlock::iterator MI, 650 void SIInstrInfo::insertNOPs(MachineBasicBlock::iterator MI,
470 int Count) const { 651 int Count) const {
471 while (Count > 0) { 652 while (Count > 0) {
504 .addReg(AMDGPU::SCC, RegState::Define | RegState::Implicit) 685 .addReg(AMDGPU::SCC, RegState::Define | RegState::Implicit)
505 .addReg(AMDGPU::SCC, RegState::Implicit); 686 .addReg(AMDGPU::SCC, RegState::Implicit);
506 MI->eraseFromParent(); 687 MI->eraseFromParent();
507 break; 688 break;
508 } 689 }
690 case AMDGPU::SGPR_USE:
691 // This is just a placeholder for register allocation.
692 MI->eraseFromParent();
693 break;
694
695 case AMDGPU::V_MOV_B64_PSEUDO: {
696 unsigned Dst = MI->getOperand(0).getReg();
697 unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
698 unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
699
700 const MachineOperand &SrcOp = MI->getOperand(1);
701 // FIXME: Will this work for 64-bit floating point immediates?
702 assert(!SrcOp.isFPImm());
703 if (SrcOp.isImm()) {
704 APInt Imm(64, SrcOp.getImm());
705 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo)
706 .addImm(Imm.getLoBits(32).getZExtValue())
707 .addReg(Dst, RegState::Implicit);
708 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)
709 .addImm(Imm.getHiBits(32).getZExtValue())
710 .addReg(Dst, RegState::Implicit);
711 } else {
712 assert(SrcOp.isReg());
713 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo)
714 .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub0))
715 .addReg(Dst, RegState::Implicit);
716 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)
717 .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub1))
718 .addReg(Dst, RegState::Implicit);
719 }
720 MI->eraseFromParent();
721 break;
722 }
509 } 723 }
510 return true; 724 return true;
511 } 725 }
512 726
513 MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI, 727 MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
514 bool NewMI) const { 728 bool NewMI) const {
515 729
516 if (MI->getNumOperands() < 3 || !MI->getOperand(1).isReg()) 730 if (MI->getNumOperands() < 3)
517 return nullptr; 731 return nullptr;
518 732
519 // Make sure it s legal to commute operands for VOP2. 733 int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
734 AMDGPU::OpName::src0);
735 assert(Src0Idx != -1 && "Should always have src0 operand");
736
737 MachineOperand &Src0 = MI->getOperand(Src0Idx);
738 if (!Src0.isReg())
739 return nullptr;
740
741 int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
742 AMDGPU::OpName::src1);
743 if (Src1Idx == -1)
744 return nullptr;
745
746 MachineOperand &Src1 = MI->getOperand(Src1Idx);
747
748 // Make sure it's legal to commute operands for VOP2.
520 if (isVOP2(MI->getOpcode()) && 749 if (isVOP2(MI->getOpcode()) &&
521 (!isOperandLegal(MI, 1, &MI->getOperand(2)) || 750 (!isOperandLegal(MI, Src0Idx, &Src1) ||
522 !isOperandLegal(MI, 2, &MI->getOperand(1)))) 751 !isOperandLegal(MI, Src1Idx, &Src0))) {
523 return nullptr; 752 return nullptr;
524 753 }
525 if (!MI->getOperand(2).isReg()) { 754
526 // XXX: Commute instructions with FPImm operands 755 if (!Src1.isReg()) {
527 if (NewMI || MI->getOperand(2).isFPImm() || 756 // Allow commuting instructions with Imm operands.
757 if (NewMI || !Src1.isImm() ||
528 (!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode()))) { 758 (!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode()))) {
529 return nullptr; 759 return nullptr;
530 } 760 }
531 761
532 // XXX: Commute VOP3 instructions with abs and neg set . 762 // Be sure to copy the source modifiers to the right place.
533 const MachineOperand *Abs = getNamedOperand(*MI, AMDGPU::OpName::abs); 763 if (MachineOperand *Src0Mods
534 const MachineOperand *Neg = getNamedOperand(*MI, AMDGPU::OpName::neg); 764 = getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) {
535 const MachineOperand *Src0Mods = getNamedOperand(*MI, 765 MachineOperand *Src1Mods
536 AMDGPU::OpName::src0_modifiers); 766 = getNamedOperand(*MI, AMDGPU::OpName::src1_modifiers);
537 const MachineOperand *Src1Mods = getNamedOperand(*MI, 767
538 AMDGPU::OpName::src1_modifiers); 768 int Src0ModsVal = Src0Mods->getImm();
539 const MachineOperand *Src2Mods = getNamedOperand(*MI, 769 if (!Src1Mods && Src0ModsVal != 0)
540 AMDGPU::OpName::src2_modifiers); 770 return nullptr;
541 771
542 if ((Abs && Abs->getImm()) || (Neg && Neg->getImm()) || 772 // XXX - This assert might be a lie. It might be useful to have a neg
543 (Src0Mods && Src0Mods->getImm()) || (Src1Mods && Src1Mods->getImm()) || 773 // modifier with 0.0.
544 (Src2Mods && Src2Mods->getImm())) 774 int Src1ModsVal = Src1Mods->getImm();
545 return nullptr; 775 assert((Src1ModsVal == 0) && "Not expecting modifiers with immediates");
546 776
547 unsigned Reg = MI->getOperand(1).getReg(); 777 Src1Mods->setImm(Src0ModsVal);
548 unsigned SubReg = MI->getOperand(1).getSubReg(); 778 Src0Mods->setImm(Src1ModsVal);
549 MI->getOperand(1).ChangeToImmediate(MI->getOperand(2).getImm()); 779 }
550 MI->getOperand(2).ChangeToRegister(Reg, false); 780
551 MI->getOperand(2).setSubReg(SubReg); 781 unsigned Reg = Src0.getReg();
782 unsigned SubReg = Src0.getSubReg();
783 if (Src1.isImm())
784 Src0.ChangeToImmediate(Src1.getImm());
785 else
786 llvm_unreachable("Should only have immediates");
787
788 Src1.ChangeToRegister(Reg, false);
789 Src1.setSubReg(SubReg);
552 } else { 790 } else {
553 MI = TargetInstrInfo::commuteInstruction(MI, NewMI); 791 MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
554 } 792 }
555 793
556 if (MI) 794 if (MI)
557 MI->setDesc(get(commuteOpcode(MI->getOpcode()))); 795 MI->setDesc(get(commuteOpcode(MI->getOpcode())));
558 796
559 return MI; 797 return MI;
798 }
799
800 // This needs to be implemented because the source modifiers may be inserted
801 // between the true commutable operands, and the base
802 // TargetInstrInfo::commuteInstruction uses it.
803 bool SIInstrInfo::findCommutedOpIndices(MachineInstr *MI,
804 unsigned &SrcOpIdx1,
805 unsigned &SrcOpIdx2) const {
806 const MCInstrDesc &MCID = MI->getDesc();
807 if (!MCID.isCommutable())
808 return false;
809
810 unsigned Opc = MI->getOpcode();
811 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
812 if (Src0Idx == -1)
813 return false;
814
815 // FIXME: Workaround TargetInstrInfo::commuteInstruction asserting on
816 // immediate.
817 if (!MI->getOperand(Src0Idx).isReg())
818 return false;
819
820 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
821 if (Src1Idx == -1)
822 return false;
823
824 if (!MI->getOperand(Src1Idx).isReg())
825 return false;
826
827 // If any source modifiers are set, the generic instruction commuting won't
828 // understand how to copy the source modifiers.
829 if (hasModifiersSet(*MI, AMDGPU::OpName::src0_modifiers) ||
830 hasModifiersSet(*MI, AMDGPU::OpName::src1_modifiers))
831 return false;
832
833 SrcOpIdx1 = Src0Idx;
834 SrcOpIdx2 = Src1Idx;
835 return true;
560 } 836 }
561 837
562 MachineInstr *SIInstrInfo::buildMovInstr(MachineBasicBlock *MBB, 838 MachineInstr *SIInstrInfo::buildMovInstr(MachineBasicBlock *MBB,
563 MachineBasicBlock::iterator I, 839 MachineBasicBlock::iterator I,
564 unsigned DstReg, 840 unsigned DstReg,
593 case AMDGPU::V_MOV_B32_e32: 869 case AMDGPU::V_MOV_B32_e32:
594 return MI->getOperand(1).isImm(); 870 return MI->getOperand(1).isImm();
595 } 871 }
596 } 872 }
597 873
598 namespace llvm { 874 static bool offsetsDoNotOverlap(int WidthA, int OffsetA,
599 namespace AMDGPU { 875 int WidthB, int OffsetB) {
600 // Helper function generated by tablegen. We are wrapping this with 876 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
601 // an SIInstrInfo function that returns bool rather than int. 877 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
602 int isDS(uint16_t Opcode); 878 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
603 } 879 return LowOffset + LowWidth <= HighOffset;
604 } 880 }
605 881
606 bool SIInstrInfo::isDS(uint16_t Opcode) const { 882 bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr *MIa,
607 return ::AMDGPU::isDS(Opcode) != -1; 883 MachineInstr *MIb) const {
608 } 884 unsigned BaseReg0, Offset0;
609 885 unsigned BaseReg1, Offset1;
610 bool SIInstrInfo::isMIMG(uint16_t Opcode) const { 886
611 return get(Opcode).TSFlags & SIInstrFlags::MIMG; 887 if (getLdStBaseRegImmOfs(MIa, BaseReg0, Offset0, &RI) &&
612 } 888 getLdStBaseRegImmOfs(MIb, BaseReg1, Offset1, &RI)) {
613 889 assert(MIa->hasOneMemOperand() && MIb->hasOneMemOperand() &&
614 bool SIInstrInfo::isSMRD(uint16_t Opcode) const { 890 "read2 / write2 not expected here yet");
615 return get(Opcode).TSFlags & SIInstrFlags::SMRD; 891 unsigned Width0 = (*MIa->memoperands_begin())->getSize();
616 } 892 unsigned Width1 = (*MIb->memoperands_begin())->getSize();
617 893 if (BaseReg0 == BaseReg1 &&
618 bool SIInstrInfo::isMUBUF(uint16_t Opcode) const { 894 offsetsDoNotOverlap(Width0, Offset0, Width1, Offset1)) {
619 return get(Opcode).TSFlags & SIInstrFlags::MUBUF; 895 return true;
620 } 896 }
621 897 }
622 bool SIInstrInfo::isMTBUF(uint16_t Opcode) const { 898
623 return get(Opcode).TSFlags & SIInstrFlags::MTBUF; 899 return false;
624 } 900 }
625 901
626 bool SIInstrInfo::isVOP1(uint16_t Opcode) const { 902 bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa,
627 return get(Opcode).TSFlags & SIInstrFlags::VOP1; 903 MachineInstr *MIb,
628 } 904 AliasAnalysis *AA) const {
629 905 unsigned Opc0 = MIa->getOpcode();
630 bool SIInstrInfo::isVOP2(uint16_t Opcode) const { 906 unsigned Opc1 = MIb->getOpcode();
631 return get(Opcode).TSFlags & SIInstrFlags::VOP2; 907
632 } 908 assert(MIa && (MIa->mayLoad() || MIa->mayStore()) &&
633 909 "MIa must load from or modify a memory location");
634 bool SIInstrInfo::isVOP3(uint16_t Opcode) const { 910 assert(MIb && (MIb->mayLoad() || MIb->mayStore()) &&
635 return get(Opcode).TSFlags & SIInstrFlags::VOP3; 911 "MIb must load from or modify a memory location");
636 } 912
637 913 if (MIa->hasUnmodeledSideEffects() || MIb->hasUnmodeledSideEffects())
638 bool SIInstrInfo::isVOPC(uint16_t Opcode) const { 914 return false;
639 return get(Opcode).TSFlags & SIInstrFlags::VOPC; 915
640 } 916 // XXX - Can we relax this between address spaces?
641 917 if (MIa->hasOrderedMemoryRef() || MIb->hasOrderedMemoryRef())
642 bool SIInstrInfo::isSALUInstr(const MachineInstr &MI) const { 918 return false;
643 return get(MI.getOpcode()).TSFlags & SIInstrFlags::SALU; 919
920 // TODO: Should we check the address space from the MachineMemOperand? That
921 // would allow us to distinguish objects we know don't alias based on the
922 // underlying addres space, even if it was lowered to a different one,
923 // e.g. private accesses lowered to use MUBUF instructions on a scratch
924 // buffer.
925 if (isDS(Opc0)) {
926 if (isDS(Opc1))
927 return checkInstOffsetsDoNotOverlap(MIa, MIb);
928
929 return !isFLAT(Opc1);
930 }
931
932 if (isMUBUF(Opc0) || isMTBUF(Opc0)) {
933 if (isMUBUF(Opc1) || isMTBUF(Opc1))
934 return checkInstOffsetsDoNotOverlap(MIa, MIb);
935
936 return !isFLAT(Opc1) && !isSMRD(Opc1);
937 }
938
939 if (isSMRD(Opc0)) {
940 if (isSMRD(Opc1))
941 return checkInstOffsetsDoNotOverlap(MIa, MIb);
942
943 return !isFLAT(Opc1) && !isMUBUF(Opc0) && !isMTBUF(Opc0);
944 }
945
946 if (isFLAT(Opc0)) {
947 if (isFLAT(Opc1))
948 return checkInstOffsetsDoNotOverlap(MIa, MIb);
949
950 return false;
951 }
952
953 return false;
644 } 954 }
645 955
646 bool SIInstrInfo::isInlineConstant(const APInt &Imm) const { 956 bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
647 int32_t Val = Imm.getSExtValue(); 957 int64_t SVal = Imm.getSExtValue();
648 if (Val >= -16 && Val <= 64) 958 if (SVal >= -16 && SVal <= 64)
649 return true; 959 return true;
960
961 if (Imm.getBitWidth() == 64) {
962 uint64_t Val = Imm.getZExtValue();
963 return (DoubleToBits(0.0) == Val) ||
964 (DoubleToBits(1.0) == Val) ||
965 (DoubleToBits(-1.0) == Val) ||
966 (DoubleToBits(0.5) == Val) ||
967 (DoubleToBits(-0.5) == Val) ||
968 (DoubleToBits(2.0) == Val) ||
969 (DoubleToBits(-2.0) == Val) ||
970 (DoubleToBits(4.0) == Val) ||
971 (DoubleToBits(-4.0) == Val);
972 }
650 973
651 // The actual type of the operand does not seem to matter as long 974 // The actual type of the operand does not seem to matter as long
652 // as the bits match one of the inline immediate values. For example: 975 // as the bits match one of the inline immediate values. For example:
653 // 976 //
654 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal, 977 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
655 // so it is a legal inline immediate. 978 // so it is a legal inline immediate.
656 // 979 //
657 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in 980 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
658 // floating-point, so it is a legal inline immediate. 981 // floating-point, so it is a legal inline immediate.
659 982 uint32_t Val = Imm.getZExtValue();
660 return (APInt::floatToBits(0.0f) == Imm) || 983
661 (APInt::floatToBits(1.0f) == Imm) || 984 return (FloatToBits(0.0f) == Val) ||
662 (APInt::floatToBits(-1.0f) == Imm) || 985 (FloatToBits(1.0f) == Val) ||
663 (APInt::floatToBits(0.5f) == Imm) || 986 (FloatToBits(-1.0f) == Val) ||
664 (APInt::floatToBits(-0.5f) == Imm) || 987 (FloatToBits(0.5f) == Val) ||
665 (APInt::floatToBits(2.0f) == Imm) || 988 (FloatToBits(-0.5f) == Val) ||
666 (APInt::floatToBits(-2.0f) == Imm) || 989 (FloatToBits(2.0f) == Val) ||
667 (APInt::floatToBits(4.0f) == Imm) || 990 (FloatToBits(-2.0f) == Val) ||
668 (APInt::floatToBits(-4.0f) == Imm); 991 (FloatToBits(4.0f) == Val) ||
669 } 992 (FloatToBits(-4.0f) == Val);
670 993 }
671 bool SIInstrInfo::isInlineConstant(const MachineOperand &MO) const { 994
672 if (MO.isImm()) 995 bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
673 return isInlineConstant(APInt(32, MO.getImm(), true)); 996 unsigned OpSize) const {
674 997 if (MO.isImm()) {
675 if (MO.isFPImm()) { 998 // MachineOperand provides no way to tell the true operand size, since it
676 APFloat FpImm = MO.getFPImm()->getValueAPF(); 999 // only records a 64-bit value. We need to know the size to determine if a
677 return isInlineConstant(FpImm.bitcastToAPInt()); 1000 // 32-bit floating point immediate bit pattern is legal for an integer
1001 // immediate. It would be for any 32-bit integer operand, but would not be
1002 // for a 64-bit one.
1003
1004 unsigned BitSize = 8 * OpSize;
1005 return isInlineConstant(APInt(BitSize, MO.getImm(), true));
678 } 1006 }
679 1007
680 return false; 1008 return false;
681 } 1009 }
682 1010
683 bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO) const { 1011 bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO,
684 return (MO.isImm() || MO.isFPImm()) && !isInlineConstant(MO); 1012 unsigned OpSize) const {
1013 return MO.isImm() && !isInlineConstant(MO, OpSize);
685 } 1014 }
686 1015
687 static bool compareMachineOp(const MachineOperand &Op0, 1016 static bool compareMachineOp(const MachineOperand &Op0,
688 const MachineOperand &Op1) { 1017 const MachineOperand &Op1) {
689 if (Op0.getType() != Op1.getType()) 1018 if (Op0.getType() != Op1.getType())
692 switch (Op0.getType()) { 1021 switch (Op0.getType()) {
693 case MachineOperand::MO_Register: 1022 case MachineOperand::MO_Register:
694 return Op0.getReg() == Op1.getReg(); 1023 return Op0.getReg() == Op1.getReg();
695 case MachineOperand::MO_Immediate: 1024 case MachineOperand::MO_Immediate:
696 return Op0.getImm() == Op1.getImm(); 1025 return Op0.getImm() == Op1.getImm();
697 case MachineOperand::MO_FPImmediate:
698 return Op0.getFPImm() == Op1.getFPImm();
699 default: 1026 default:
700 llvm_unreachable("Didn't expect to be comparing these operand types"); 1027 llvm_unreachable("Didn't expect to be comparing these operand types");
701 } 1028 }
702 } 1029 }
703 1030
704 bool SIInstrInfo::isImmOperandLegal(const MachineInstr *MI, unsigned OpNo, 1031 bool SIInstrInfo::isImmOperandLegal(const MachineInstr *MI, unsigned OpNo,
705 const MachineOperand &MO) const { 1032 const MachineOperand &MO) const {
706 const MCOperandInfo &OpInfo = get(MI->getOpcode()).OpInfo[OpNo]; 1033 const MCOperandInfo &OpInfo = get(MI->getOpcode()).OpInfo[OpNo];
707 1034
708 assert(MO.isImm() || MO.isFPImm()); 1035 assert(MO.isImm() || MO.isTargetIndex() || MO.isFI());
709 1036
710 if (OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE) 1037 if (OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE)
711 return true; 1038 return true;
712 1039
713 if (OpInfo.RegClass < 0) 1040 if (OpInfo.RegClass < 0)
714 return false; 1041 return false;
715 1042
716 return RI.regClassCanUseImmediate(OpInfo.RegClass); 1043 unsigned OpSize = RI.getRegClass(OpInfo.RegClass)->getSize();
717 } 1044 if (isLiteralConstant(MO, OpSize))
718 1045 return RI.opCanUseLiteralConstant(OpInfo.OperandType);
719 bool SIInstrInfo::canFoldOffset(unsigned OffsetSize, unsigned AS) { 1046
1047 return RI.opCanUseInlineConstant(OpInfo.OperandType);
1048 }
1049
1050 bool SIInstrInfo::canFoldOffset(unsigned OffsetSize, unsigned AS) const {
720 switch (AS) { 1051 switch (AS) {
721 case AMDGPUAS::GLOBAL_ADDRESS: { 1052 case AMDGPUAS::GLOBAL_ADDRESS: {
722 // MUBUF instructions a 12-bit offset in bytes. 1053 // MUBUF instructions a 12-bit offset in bytes.
723 return isUInt<12>(OffsetSize); 1054 return isUInt<12>(OffsetSize);
724 } 1055 }
725 case AMDGPUAS::CONSTANT_ADDRESS: { 1056 case AMDGPUAS::CONSTANT_ADDRESS: {
726 // SMRD instructions have an 8-bit offset in dwords. 1057 // SMRD instructions have an 8-bit offset in dwords on SI and
727 return (OffsetSize % 4 == 0) && isUInt<8>(OffsetSize / 4); 1058 // a 20-bit offset in bytes on VI.
1059 if (RI.ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
1060 return isUInt<20>(OffsetSize);
1061 else
1062 return (OffsetSize % 4 == 0) && isUInt<8>(OffsetSize / 4);
728 } 1063 }
729 case AMDGPUAS::LOCAL_ADDRESS: 1064 case AMDGPUAS::LOCAL_ADDRESS:
730 case AMDGPUAS::REGION_ADDRESS: { 1065 case AMDGPUAS::REGION_ADDRESS: {
731 // The single offset versions have a 16-bit offset in bytes. 1066 // The single offset versions have a 16-bit offset in bytes.
732 return isUInt<16>(OffsetSize); 1067 return isUInt<16>(OffsetSize);
737 return 0; 1072 return 0;
738 } 1073 }
739 } 1074 }
740 1075
741 bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const { 1076 bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const {
742 return AMDGPU::getVOPe32(Opcode) != -1; 1077 int Op32 = AMDGPU::getVOPe32(Opcode);
1078 if (Op32 == -1)
1079 return false;
1080
1081 return pseudoToMCOpcode(Op32) != -1;
743 } 1082 }
744 1083
745 bool SIInstrInfo::hasModifiers(unsigned Opcode) const { 1084 bool SIInstrInfo::hasModifiers(unsigned Opcode) const {
746 // The src0_modifier operand is present on all instructions 1085 // The src0_modifier operand is present on all instructions
747 // that have modifiers. 1086 // that have modifiers.
748 1087
749 return AMDGPU::getNamedOperandIdx(Opcode, 1088 return AMDGPU::getNamedOperandIdx(Opcode,
750 AMDGPU::OpName::src0_modifiers) != -1; 1089 AMDGPU::OpName::src0_modifiers) != -1;
751 } 1090 }
752 1091
1092 bool SIInstrInfo::hasModifiersSet(const MachineInstr &MI,
1093 unsigned OpName) const {
1094 const MachineOperand *Mods = getNamedOperand(MI, OpName);
1095 return Mods && Mods->getImm();
1096 }
1097
1098 bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI,
1099 const MachineOperand &MO,
1100 unsigned OpSize) const {
1101 // Literal constants use the constant bus.
1102 if (isLiteralConstant(MO, OpSize))
1103 return true;
1104
1105 if (!MO.isReg() || !MO.isUse())
1106 return false;
1107
1108 if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
1109 return RI.isSGPRClass(MRI.getRegClass(MO.getReg()));
1110
1111 // FLAT_SCR is just an SGPR pair.
1112 if (!MO.isImplicit() && (MO.getReg() == AMDGPU::FLAT_SCR))
1113 return true;
1114
1115 // EXEC register uses the constant bus.
1116 if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC)
1117 return true;
1118
1119 // SGPRs use the constant bus
1120 if (MO.getReg() == AMDGPU::M0 || MO.getReg() == AMDGPU::VCC ||
1121 (!MO.isImplicit() &&
1122 (AMDGPU::SGPR_32RegClass.contains(MO.getReg()) ||
1123 AMDGPU::SGPR_64RegClass.contains(MO.getReg())))) {
1124 return true;
1125 }
1126
1127 return false;
1128 }
1129
753 bool SIInstrInfo::verifyInstruction(const MachineInstr *MI, 1130 bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
754 StringRef &ErrInfo) const { 1131 StringRef &ErrInfo) const {
755 uint16_t Opcode = MI->getOpcode(); 1132 uint16_t Opcode = MI->getOpcode();
1133 const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
756 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 1134 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
757 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 1135 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
758 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 1136 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
759 1137
760 // Make sure the number of operands is correct. 1138 // Make sure the number of operands is correct.
765 return false; 1143 return false;
766 } 1144 }
767 1145
768 // Make sure the register classes are correct 1146 // Make sure the register classes are correct
769 for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) { 1147 for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) {
1148 if (MI->getOperand(i).isFPImm()) {
1149 ErrInfo = "FPImm Machine Operands are not supported. ISel should bitcast "
1150 "all fp values to integers.";
1151 return false;
1152 }
1153
770 switch (Desc.OpInfo[i].OperandType) { 1154 switch (Desc.OpInfo[i].OperandType) {
771 case MCOI::OPERAND_REGISTER: { 1155 case MCOI::OPERAND_REGISTER:
772 int RegClass = Desc.OpInfo[i].RegClass; 1156 if (MI->getOperand(i).isImm()) {
773 if (!RI.regClassCanUseImmediate(RegClass) && 1157 ErrInfo = "Illegal immediate value for operand.";
774 (MI->getOperand(i).isImm() || MI->getOperand(i).isFPImm())) { 1158 return false;
775 // Handle some special cases: 1159 }
776 // Src0 can of VOP1, VOP2, VOPC can be an immediate no matter what 1160 break;
777 // the register class. 1161 case AMDGPU::OPERAND_REG_IMM32:
778 if (i != Src0Idx || (!isVOP1(Opcode) && !isVOP2(Opcode) && 1162 break;
779 !isVOPC(Opcode))) { 1163 case AMDGPU::OPERAND_REG_INLINE_C:
780 ErrInfo = "Expected register, but got immediate"; 1164 if (MI->getOperand(i).isImm()) {
1165 int RegClass = Desc.OpInfo[i].RegClass;
1166 const TargetRegisterClass *RC = RI.getRegClass(RegClass);
1167 if (!isInlineConstant(MI->getOperand(i), RC->getSize())) {
1168 ErrInfo = "Illegal immediate value for operand.";
781 return false; 1169 return false;
782 } 1170 }
783 } 1171 }
784 }
785 break; 1172 break;
786 case MCOI::OPERAND_IMMEDIATE: 1173 case MCOI::OPERAND_IMMEDIATE:
787 // Check if this operand is an immediate. 1174 // Check if this operand is an immediate.
788 // FrameIndex operands will be replaced by immediates, so they are 1175 // FrameIndex operands will be replaced by immediates, so they are
789 // allowed. 1176 // allowed.
790 if (!MI->getOperand(i).isImm() && !MI->getOperand(i).isFPImm() && 1177 if (!MI->getOperand(i).isImm() && !MI->getOperand(i).isFI()) {
791 !MI->getOperand(i).isFI()) {
792 ErrInfo = "Expected immediate, but got non-immediate"; 1178 ErrInfo = "Expected immediate, but got non-immediate";
793 return false; 1179 return false;
794 } 1180 }
795 // Fall-through 1181 // Fall-through
796 default: 1182 default:
815 } 1201 }
816 1202
817 1203
818 // Verify VOP* 1204 // Verify VOP*
819 if (isVOP1(Opcode) || isVOP2(Opcode) || isVOP3(Opcode) || isVOPC(Opcode)) { 1205 if (isVOP1(Opcode) || isVOP2(Opcode) || isVOP3(Opcode) || isVOPC(Opcode)) {
1206 // Only look at the true operands. Only a real operand can use the constant
1207 // bus, and we don't want to check pseudo-operands like the source modifier
1208 // flags.
1209 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
1210
820 unsigned ConstantBusCount = 0; 1211 unsigned ConstantBusCount = 0;
821 unsigned SGPRUsed = AMDGPU::NoRegister; 1212 unsigned SGPRUsed = AMDGPU::NoRegister;
822 for (int i = 0, e = MI->getNumOperands(); i != e; ++i) { 1213 for (int OpIdx : OpIndices) {
823 const MachineOperand &MO = MI->getOperand(i); 1214 if (OpIdx == -1)
824 if (MO.isReg() && MO.isUse() && 1215 break;
825 !TargetRegisterInfo::isVirtualRegister(MO.getReg())) { 1216 const MachineOperand &MO = MI->getOperand(OpIdx);
826 1217 if (usesConstantBus(MRI, MO, getOpSize(Opcode, OpIdx))) {
827 // EXEC register uses the constant bus. 1218 if (MO.isReg()) {
828 if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC) 1219 if (MO.getReg() != SGPRUsed)
1220 ++ConstantBusCount;
1221 SGPRUsed = MO.getReg();
1222 } else {
829 ++ConstantBusCount; 1223 ++ConstantBusCount;
830
831 // SGPRs use the constant bus
832 if (MO.getReg() == AMDGPU::M0 || MO.getReg() == AMDGPU::VCC ||
833 (!MO.isImplicit() &&
834 (AMDGPU::SGPR_32RegClass.contains(MO.getReg()) ||
835 AMDGPU::SGPR_64RegClass.contains(MO.getReg())))) {
836 if (SGPRUsed != MO.getReg()) {
837 ++ConstantBusCount;
838 SGPRUsed = MO.getReg();
839 }
840 } 1224 }
841 } 1225 }
842 // Literal constants use the constant bus.
843 if (isLiteralConstant(MO))
844 ++ConstantBusCount;
845 } 1226 }
846 if (ConstantBusCount > 1) { 1227 if (ConstantBusCount > 1) {
847 ErrInfo = "VOP* instruction uses the constant bus more than once"; 1228 ErrInfo = "VOP* instruction uses the constant bus more than once";
848 return false; 1229 return false;
849 } 1230 }
850 } 1231 }
851 1232
852 // Verify SRC1 for VOP2 and VOPC 1233 // Verify SRC1 for VOP2 and VOPC
853 if (Src1Idx != -1 && (isVOP2(Opcode) || isVOPC(Opcode))) { 1234 if (Src1Idx != -1 && (isVOP2(Opcode) || isVOPC(Opcode))) {
854 const MachineOperand &Src1 = MI->getOperand(Src1Idx); 1235 const MachineOperand &Src1 = MI->getOperand(Src1Idx);
855 if (Src1.isImm() || Src1.isFPImm()) { 1236 if (Src1.isImm()) {
856 ErrInfo = "VOP[2C] src1 cannot be an immediate."; 1237 ErrInfo = "VOP[2C] src1 cannot be an immediate.";
857 return false; 1238 return false;
858 } 1239 }
859 } 1240 }
860 1241
861 // Verify VOP3 1242 // Verify VOP3
862 if (isVOP3(Opcode)) { 1243 if (isVOP3(Opcode)) {
863 if (Src0Idx != -1 && isLiteralConstant(MI->getOperand(Src0Idx))) { 1244 if (Src0Idx != -1 &&
1245 isLiteralConstant(MI->getOperand(Src0Idx), getOpSize(Opcode, Src0Idx))) {
864 ErrInfo = "VOP3 src0 cannot be a literal constant."; 1246 ErrInfo = "VOP3 src0 cannot be a literal constant.";
865 return false; 1247 return false;
866 } 1248 }
867 if (Src1Idx != -1 && isLiteralConstant(MI->getOperand(Src1Idx))) { 1249 if (Src1Idx != -1 &&
1250 isLiteralConstant(MI->getOperand(Src1Idx), getOpSize(Opcode, Src1Idx))) {
868 ErrInfo = "VOP3 src1 cannot be a literal constant."; 1251 ErrInfo = "VOP3 src1 cannot be a literal constant.";
869 return false; 1252 return false;
870 } 1253 }
871 if (Src2Idx != -1 && isLiteralConstant(MI->getOperand(Src2Idx))) { 1254 if (Src2Idx != -1 &&
1255 isLiteralConstant(MI->getOperand(Src2Idx), getOpSize(Opcode, Src2Idx))) {
872 ErrInfo = "VOP3 src2 cannot be a literal constant."; 1256 ErrInfo = "VOP3 src2 cannot be a literal constant.";
873 return false; 1257 return false;
874 } 1258 }
875 } 1259 }
876 1260
877 // Verify misc. restrictions on specific instructions. 1261 // Verify misc. restrictions on specific instructions.
878 if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 || 1262 if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 ||
879 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) { 1263 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) {
880 MI->dump(); 1264 const MachineOperand &Src0 = MI->getOperand(Src0Idx);
881 1265 const MachineOperand &Src1 = MI->getOperand(Src1Idx);
882 const MachineOperand &Src0 = MI->getOperand(2); 1266 const MachineOperand &Src2 = MI->getOperand(Src2Idx);
883 const MachineOperand &Src1 = MI->getOperand(3);
884 const MachineOperand &Src2 = MI->getOperand(4);
885 if (Src0.isReg() && Src1.isReg() && Src2.isReg()) { 1267 if (Src0.isReg() && Src1.isReg() && Src2.isReg()) {
886 if (!compareMachineOp(Src0, Src1) && 1268 if (!compareMachineOp(Src0, Src1) &&
887 !compareMachineOp(Src0, Src2)) { 1269 !compareMachineOp(Src0, Src2)) {
888 ErrInfo = "v_div_scale_{f32|f64} require src0 = src1 or src2"; 1270 ErrInfo = "v_div_scale_{f32|f64} require src0 = src1 or src2";
889 return false; 1271 return false;
941 case AMDGPU::S_LOAD_DWORD_SGPR: return AMDGPU::BUFFER_LOAD_DWORD_ADDR64; 1323 case AMDGPU::S_LOAD_DWORD_SGPR: return AMDGPU::BUFFER_LOAD_DWORD_ADDR64;
942 case AMDGPU::S_LOAD_DWORDX2_IMM: 1324 case AMDGPU::S_LOAD_DWORDX2_IMM:
943 case AMDGPU::S_LOAD_DWORDX2_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64; 1325 case AMDGPU::S_LOAD_DWORDX2_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64;
944 case AMDGPU::S_LOAD_DWORDX4_IMM: 1326 case AMDGPU::S_LOAD_DWORDX4_IMM:
945 case AMDGPU::S_LOAD_DWORDX4_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64; 1327 case AMDGPU::S_LOAD_DWORDX4_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64;
946 case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e32; 1328 case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64;
947 case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32; 1329 case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
948 case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32; 1330 case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
949 } 1331 }
950 } 1332 }
951 1333
956 const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI, 1338 const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI,
957 unsigned OpNo) const { 1339 unsigned OpNo) const {
958 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 1340 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
959 const MCInstrDesc &Desc = get(MI.getOpcode()); 1341 const MCInstrDesc &Desc = get(MI.getOpcode());
960 if (MI.isVariadic() || OpNo >= Desc.getNumOperands() || 1342 if (MI.isVariadic() || OpNo >= Desc.getNumOperands() ||
961 Desc.OpInfo[OpNo].RegClass == -1) 1343 Desc.OpInfo[OpNo].RegClass == -1) {
962 return MRI.getRegClass(MI.getOperand(OpNo).getReg()); 1344 unsigned Reg = MI.getOperand(OpNo).getReg();
1345
1346 if (TargetRegisterInfo::isVirtualRegister(Reg))
1347 return MRI.getRegClass(Reg);
1348 return RI.getPhysRegClass(Reg);
1349 }
963 1350
964 unsigned RCID = Desc.OpInfo[OpNo].RegClass; 1351 unsigned RCID = Desc.OpInfo[OpNo].RegClass;
965 return RI.getRegClass(RCID); 1352 return RI.getRegClass(RCID);
966 } 1353 }
967 1354
977 } 1364 }
978 } 1365 }
979 1366
980 void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const { 1367 void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const {
981 MachineBasicBlock::iterator I = MI; 1368 MachineBasicBlock::iterator I = MI;
1369 MachineBasicBlock *MBB = MI->getParent();
982 MachineOperand &MO = MI->getOperand(OpIdx); 1370 MachineOperand &MO = MI->getOperand(OpIdx);
983 MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); 1371 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
984 unsigned RCID = get(MI->getOpcode()).OpInfo[OpIdx].RegClass; 1372 unsigned RCID = get(MI->getOpcode()).OpInfo[OpIdx].RegClass;
985 const TargetRegisterClass *RC = RI.getRegClass(RCID); 1373 const TargetRegisterClass *RC = RI.getRegClass(RCID);
986 unsigned Opcode = AMDGPU::V_MOV_B32_e32; 1374 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
987 if (MO.isReg()) { 1375 if (MO.isReg())
988 Opcode = AMDGPU::COPY; 1376 Opcode = AMDGPU::COPY;
989 } else if (RI.isSGPRClass(RC)) { 1377 else if (RI.isSGPRClass(RC))
990 Opcode = AMDGPU::S_MOV_B32; 1378 Opcode = AMDGPU::S_MOV_B32;
991 } 1379
992 1380
993 const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC); 1381 const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC);
994 if (RI.getCommonSubClass(&AMDGPU::VReg_64RegClass, VRC)) { 1382 if (RI.getCommonSubClass(&AMDGPU::VReg_64RegClass, VRC))
995 VRC = &AMDGPU::VReg_64RegClass; 1383 VRC = &AMDGPU::VReg_64RegClass;
996 } else { 1384 else
997 VRC = &AMDGPU::VReg_32RegClass; 1385 VRC = &AMDGPU::VGPR_32RegClass;
998 } 1386
999 unsigned Reg = MRI.createVirtualRegister(VRC); 1387 unsigned Reg = MRI.createVirtualRegister(VRC);
1000 BuildMI(*MI->getParent(), I, MI->getParent()->findDebugLoc(I), get(Opcode), 1388 DebugLoc DL = MBB->findDebugLoc(I);
1001 Reg).addOperand(MO); 1389 BuildMI(*MI->getParent(), I, DL, get(Opcode), Reg)
1390 .addOperand(MO);
1002 MO.ChangeToRegister(Reg, false); 1391 MO.ChangeToRegister(Reg, false);
1003 } 1392 }
1004 1393
1005 unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI, 1394 unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI,
1006 MachineRegisterInfo &MRI, 1395 MachineRegisterInfo &MRI,
1016 1405
1017 // Just in case the super register is itself a sub-register, copy it to a new 1406 // Just in case the super register is itself a sub-register, copy it to a new
1018 // value so we don't need to worry about merging its subreg index with the 1407 // value so we don't need to worry about merging its subreg index with the
1019 // SubIdx passed to this function. The register coalescer should be able to 1408 // SubIdx passed to this function. The register coalescer should be able to
1020 // eliminate this extra copy. 1409 // eliminate this extra copy.
1021 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(TargetOpcode::COPY), 1410 MachineBasicBlock *MBB = MI->getParent();
1022 NewSuperReg) 1411 DebugLoc DL = MI->getDebugLoc();
1023 .addOperand(SuperReg); 1412
1024 1413 BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg)
1025 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(TargetOpcode::COPY), 1414 .addReg(SuperReg.getReg(), 0, SuperReg.getSubReg());
1026 SubReg) 1415
1027 .addReg(NewSuperReg, 0, SubIdx); 1416 BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg)
1417 .addReg(NewSuperReg, 0, SubIdx);
1418
1028 return SubReg; 1419 return SubReg;
1029 } 1420 }
1030 1421
1031 MachineOperand SIInstrInfo::buildExtractSubRegOrImm( 1422 MachineOperand SIInstrInfo::buildExtractSubRegOrImm(
1032 MachineBasicBlock::iterator MII, 1423 MachineBasicBlock::iterator MII,
1078 Worklist.push_back(Hi); 1469 Worklist.push_back(Hi);
1079 1470
1080 return Dst; 1471 return Dst;
1081 } 1472 }
1082 1473
1474 // Change the order of operands from (0, 1, 2) to (0, 2, 1)
1475 void SIInstrInfo::swapOperands(MachineBasicBlock::iterator Inst) const {
1476 assert(Inst->getNumExplicitOperands() == 3);
1477 MachineOperand Op1 = Inst->getOperand(1);
1478 Inst->RemoveOperand(1);
1479 Inst->addOperand(Op1);
1480 }
1481
1083 bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx, 1482 bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx,
1084 const MachineOperand *MO) const { 1483 const MachineOperand *MO) const {
1085 const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); 1484 const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
1086 const MCInstrDesc &InstDesc = get(MI->getOpcode()); 1485 const MCInstrDesc &InstDesc = get(MI->getOpcode());
1087 const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx]; 1486 const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx];
1088 const TargetRegisterClass *DefinedRC = 1487 const TargetRegisterClass *DefinedRC =
1089 OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) : nullptr; 1488 OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) : nullptr;
1090 if (!MO) 1489 if (!MO)
1091 MO = &MI->getOperand(OpIdx); 1490 MO = &MI->getOperand(OpIdx);
1092 1491
1492 if (isVALU(InstDesc.Opcode) &&
1493 usesConstantBus(MRI, *MO, DefinedRC->getSize())) {
1494 unsigned SGPRUsed =
1495 MO->isReg() ? MO->getReg() : (unsigned)AMDGPU::NoRegister;
1496 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1497 if (i == OpIdx)
1498 continue;
1499 const MachineOperand &Op = MI->getOperand(i);
1500 if (Op.isReg() && Op.getReg() != SGPRUsed &&
1501 usesConstantBus(MRI, Op, getOpSize(*MI, i))) {
1502 return false;
1503 }
1504 }
1505 }
1506
1093 if (MO->isReg()) { 1507 if (MO->isReg()) {
1094 assert(DefinedRC); 1508 assert(DefinedRC);
1095 const TargetRegisterClass *RC = MRI.getRegClass(MO->getReg()); 1509 const TargetRegisterClass *RC = MRI.getRegClass(MO->getReg());
1096 return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)); 1510
1511 // In order to be legal, the common sub-class must be equal to the
1512 // class of the current operand. For example:
1513 //
1514 // v_mov_b32 s0 ; Operand defined as vsrc_32
1515 // ; RI.getCommonSubClass(s0,vsrc_32) = sgpr ; LEGAL
1516 //
1517 // s_sendmsg 0, s0 ; Operand defined as m0reg
1518 // ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL
1519
1520 return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC;
1097 } 1521 }
1098 1522
1099 1523
1100 // Handle non-register types that are treated like immediates. 1524 // Handle non-register types that are treated like immediates.
1101 assert(MO->isImm() || MO->isFPImm() || MO->isTargetIndex() || MO->isFI()); 1525 assert(MO->isImm() || MO->isTargetIndex() || MO->isFI());
1102 1526
1103 if (!DefinedRC) 1527 if (!DefinedRC) {
1104 // This opperand expects an immediate 1528 // This operand expects an immediate.
1105 return true; 1529 return true;
1106 1530 }
1107 return RI.regClassCanUseImmediate(DefinedRC); 1531
1532 return isImmOperandLegal(MI, OpIdx, *MO);
1108 } 1533 }
1109 1534
1110 void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { 1535 void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
1111 MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); 1536 MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
1112 1537
1142 } 1567 }
1143 1568
1144 // XXX - Do any VOP3 instructions read VCC? 1569 // XXX - Do any VOP3 instructions read VCC?
1145 // Legalize VOP3 1570 // Legalize VOP3
1146 if (isVOP3(MI->getOpcode())) { 1571 if (isVOP3(MI->getOpcode())) {
1147 int VOP3Idx[3] = {Src0Idx, Src1Idx, Src2Idx}; 1572 int VOP3Idx[3] = { Src0Idx, Src1Idx, Src2Idx };
1148 unsigned SGPRReg = AMDGPU::NoRegister; 1573
1574 // Find the one SGPR operand we are allowed to use.
1575 unsigned SGPRReg = findUsedSGPR(MI, VOP3Idx);
1576
1149 for (unsigned i = 0; i < 3; ++i) { 1577 for (unsigned i = 0; i < 3; ++i) {
1150 int Idx = VOP3Idx[i]; 1578 int Idx = VOP3Idx[i];
1151 if (Idx == -1) 1579 if (Idx == -1)
1152 continue; 1580 break;
1153 MachineOperand &MO = MI->getOperand(Idx); 1581 MachineOperand &MO = MI->getOperand(Idx);
1154 1582
1155 if (MO.isReg()) { 1583 if (MO.isReg()) {
1156 if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg()))) 1584 if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
1157 continue; // VGPRs are legal 1585 continue; // VGPRs are legal
1161 if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) { 1589 if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) {
1162 SGPRReg = MO.getReg(); 1590 SGPRReg = MO.getReg();
1163 // We can use one SGPR in each VOP3 instruction. 1591 // We can use one SGPR in each VOP3 instruction.
1164 continue; 1592 continue;
1165 } 1593 }
1166 } else if (!isLiteralConstant(MO)) { 1594 } else if (!isLiteralConstant(MO, getOpSize(MI->getOpcode(), Idx))) {
1167 // If it is not a register and not a literal constant, then it must be 1595 // If it is not a register and not a literal constant, then it must be
1168 // an inline constant which is always legal. 1596 // an inline constant which is always legal.
1169 continue; 1597 continue;
1170 } 1598 }
1171 // If we make it this far, then the operand is not legal and we must 1599 // If we make it this far, then the operand is not legal and we must
1265 MachineBasicBlock &MBB = *MI->getParent(); 1693 MachineBasicBlock &MBB = *MI->getParent();
1266 // Extract the the ptr from the resource descriptor. 1694 // Extract the the ptr from the resource descriptor.
1267 1695
1268 // SRsrcPtrLo = srsrc:sub0 1696 // SRsrcPtrLo = srsrc:sub0
1269 unsigned SRsrcPtrLo = buildExtractSubReg(MI, MRI, *SRsrc, 1697 unsigned SRsrcPtrLo = buildExtractSubReg(MI, MRI, *SRsrc,
1270 &AMDGPU::VReg_128RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass); 1698 &AMDGPU::VReg_128RegClass, AMDGPU::sub0, &AMDGPU::VGPR_32RegClass);
1271 1699
1272 // SRsrcPtrHi = srsrc:sub1 1700 // SRsrcPtrHi = srsrc:sub1
1273 unsigned SRsrcPtrHi = buildExtractSubReg(MI, MRI, *SRsrc, 1701 unsigned SRsrcPtrHi = buildExtractSubReg(MI, MRI, *SRsrc,
1274 &AMDGPU::VReg_128RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass); 1702 &AMDGPU::VReg_128RegClass, AMDGPU::sub1, &AMDGPU::VGPR_32RegClass);
1275 1703
1276 // Create an empty resource descriptor 1704 // Create an empty resource descriptor
1277 unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); 1705 unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
1278 unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 1706 unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
1279 unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 1707 unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
1280 unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass); 1708 unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
1709 uint64_t RsrcDataFormat = getDefaultRsrcDataFormat();
1281 1710
1282 // Zero64 = 0 1711 // Zero64 = 0
1283 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64), 1712 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64),
1284 Zero64) 1713 Zero64)
1285 .addImm(0); 1714 .addImm(0);
1286 1715
1287 // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0} 1716 // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
1288 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), 1717 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
1289 SRsrcFormatLo) 1718 SRsrcFormatLo)
1290 .addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF); 1719 .addImm(RsrcDataFormat & 0xFFFFFFFF);
1291 1720
1292 // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32} 1721 // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
1293 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), 1722 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
1294 SRsrcFormatHi) 1723 SRsrcFormatHi)
1295 .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32); 1724 .addImm(RsrcDataFormat >> 32);
1296 1725
1297 // NewSRsrc = {Zero64, SRsrcFormat} 1726 // NewSRsrc = {Zero64, SRsrcFormat}
1298 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), 1727 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
1299 NewSRsrc) 1728 NewSRsrc)
1300 .addReg(Zero64) 1729 .addReg(Zero64)
1309 unsigned NewVAddrLo; 1738 unsigned NewVAddrLo;
1310 unsigned NewVAddrHi; 1739 unsigned NewVAddrHi;
1311 if (VAddr) { 1740 if (VAddr) {
1312 // This is already an ADDR64 instruction so we need to add the pointer 1741 // This is already an ADDR64 instruction so we need to add the pointer
1313 // extracted from the resource descriptor to the current value of VAddr. 1742 // extracted from the resource descriptor to the current value of VAddr.
1314 NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); 1743 NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1315 NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); 1744 NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1316 1745
1317 // NewVaddrLo = SRsrcPtrLo + VAddr:sub0 1746 // NewVaddrLo = SRsrcPtrLo + VAddr:sub0
1318 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADD_I32_e32), 1747 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADD_I32_e32),
1319 NewVAddrLo) 1748 NewVAddrLo)
1320 .addReg(SRsrcPtrLo) 1749 .addReg(SRsrcPtrLo)
1333 // This instructions is the _OFFSET variant, so we need to convert it to 1762 // This instructions is the _OFFSET variant, so we need to convert it to
1334 // ADDR64. 1763 // ADDR64.
1335 MachineOperand *VData = getNamedOperand(*MI, AMDGPU::OpName::vdata); 1764 MachineOperand *VData = getNamedOperand(*MI, AMDGPU::OpName::vdata);
1336 MachineOperand *Offset = getNamedOperand(*MI, AMDGPU::OpName::offset); 1765 MachineOperand *Offset = getNamedOperand(*MI, AMDGPU::OpName::offset);
1337 MachineOperand *SOffset = getNamedOperand(*MI, AMDGPU::OpName::soffset); 1766 MachineOperand *SOffset = getNamedOperand(*MI, AMDGPU::OpName::soffset);
1338 assert(SOffset->isImm() && SOffset->getImm() == 0 && "Legalizing MUBUF "
1339 "with non-zero soffset is not implemented");
1340 (void)SOffset;
1341 1767
1342 // Create the new instruction. 1768 // Create the new instruction.
1343 unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI->getOpcode()); 1769 unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI->getOpcode());
1344 MachineInstr *Addr64 = 1770 MachineInstr *Addr64 =
1345 BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode)) 1771 BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode))
1346 .addOperand(*VData) 1772 .addOperand(*VData)
1347 .addOperand(*SRsrc) 1773 .addOperand(*SRsrc)
1348 .addReg(AMDGPU::NoRegister) // Dummy value for vaddr. 1774 .addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
1349 // This will be replaced later 1775 // This will be replaced later
1350 // with the new value of vaddr. 1776 // with the new value of vaddr.
1777 .addOperand(*SOffset)
1351 .addOperand(*Offset); 1778 .addOperand(*Offset);
1352 1779
1353 MI->removeFromParent(); 1780 MI->removeFromParent();
1354 MI = Addr64; 1781 MI = Addr64;
1355 1782
1388 unsigned HalfSize = HalfRC->getSize(); 1815 unsigned HalfSize = HalfRC->getSize();
1389 const MachineOperand *OffOp = 1816 const MachineOperand *OffOp =
1390 getNamedOperand(*MI, AMDGPU::OpName::offset); 1817 getNamedOperand(*MI, AMDGPU::OpName::offset);
1391 const MachineOperand *SBase = getNamedOperand(*MI, AMDGPU::OpName::sbase); 1818 const MachineOperand *SBase = getNamedOperand(*MI, AMDGPU::OpName::sbase);
1392 1819
1820 // The SMRD has an 8-bit offset in dwords on SI and a 20-bit offset in bytes
1821 // on VI.
1393 if (OffOp) { 1822 if (OffOp) {
1823 bool isVI = RI.ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS;
1824 unsigned OffScale = isVI ? 1 : 4;
1394 // Handle the _IMM variant 1825 // Handle the _IMM variant
1395 unsigned LoOffset = OffOp->getImm(); 1826 unsigned LoOffset = OffOp->getImm() * OffScale;
1396 unsigned HiOffset = LoOffset + (HalfSize / 4); 1827 unsigned HiOffset = LoOffset + HalfSize;
1397 Lo = BuildMI(*MBB, MI, DL, get(HalfImmOp), RegLo) 1828 Lo = BuildMI(*MBB, MI, DL, get(HalfImmOp), RegLo)
1398 .addOperand(*SBase) 1829 .addOperand(*SBase)
1399 .addImm(LoOffset); 1830 .addImm(LoOffset / OffScale);
1400 1831
1401 if (!isUInt<8>(HiOffset)) { 1832 if (!isUInt<20>(HiOffset) || (!isVI && !isUInt<8>(HiOffset / OffScale))) {
1402 unsigned OffsetSGPR = 1833 unsigned OffsetSGPR =
1403 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 1834 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
1404 BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32), OffsetSGPR) 1835 BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32), OffsetSGPR)
1405 .addImm(HiOffset << 2); // The immediate offset is in dwords, 1836 .addImm(HiOffset); // The offset in register is in bytes.
1406 // but offset in register is in bytes.
1407 Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegHi) 1837 Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegHi)
1408 .addOperand(*SBase) 1838 .addOperand(*SBase)
1409 .addReg(OffsetSGPR); 1839 .addReg(OffsetSGPR);
1410 } else { 1840 } else {
1411 Hi = BuildMI(*MBB, MI, DL, get(HalfImmOp), RegHi) 1841 Hi = BuildMI(*MBB, MI, DL, get(HalfImmOp), RegHi)
1412 .addOperand(*SBase) 1842 .addOperand(*SBase)
1413 .addImm(HiOffset); 1843 .addImm(HiOffset / OffScale);
1414 } 1844 }
1415 } else { 1845 } else {
1416 // Handle the _SGPR variant 1846 // Handle the _SGPR variant
1417 MachineOperand *SOff = getNamedOperand(*MI, AMDGPU::OpName::soff); 1847 MachineOperand *SOff = getNamedOperand(*MI, AMDGPU::OpName::soff);
1418 Lo = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegLo) 1848 Lo = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegLo)
1473 if (MI->getOperand(2).isReg()) { 1903 if (MI->getOperand(2).isReg()) {
1474 RegOffset = MI->getOperand(2).getReg(); 1904 RegOffset = MI->getOperand(2).getReg();
1475 ImmOffset = 0; 1905 ImmOffset = 0;
1476 } else { 1906 } else {
1477 assert(MI->getOperand(2).isImm()); 1907 assert(MI->getOperand(2).isImm());
1478 // SMRD instructions take a dword offsets and MUBUF instructions 1908 // SMRD instructions take a dword offsets on SI and byte offset on VI
1479 // take a byte offset. 1909 // and MUBUF instructions always take a byte offset.
1480 ImmOffset = MI->getOperand(2).getImm() << 2; 1910 ImmOffset = MI->getOperand(2).getImm();
1911 if (RI.ST.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
1912 ImmOffset <<= 2;
1481 RegOffset = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 1913 RegOffset = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
1914
1482 if (isUInt<12>(ImmOffset)) { 1915 if (isUInt<12>(ImmOffset)) {
1483 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), 1916 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
1484 RegOffset) 1917 RegOffset)
1485 .addImm(0); 1918 .addImm(0);
1486 } else { 1919 } else {
1494 unsigned SRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass); 1927 unsigned SRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
1495 unsigned DWord0 = RegOffset; 1928 unsigned DWord0 = RegOffset;
1496 unsigned DWord1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 1929 unsigned DWord1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
1497 unsigned DWord2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 1930 unsigned DWord2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
1498 unsigned DWord3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 1931 unsigned DWord3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
1932 uint64_t RsrcDataFormat = getDefaultRsrcDataFormat();
1499 1933
1500 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord1) 1934 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord1)
1501 .addImm(0); 1935 .addImm(0);
1502 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord2) 1936 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord2)
1503 .addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF); 1937 .addImm(RsrcDataFormat & 0xFFFFFFFF);
1504 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord3) 1938 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord3)
1505 .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32); 1939 .addImm(RsrcDataFormat >> 32);
1506 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), SRsrc) 1940 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), SRsrc)
1507 .addReg(DWord0) 1941 .addReg(DWord0)
1508 .addImm(AMDGPU::sub0) 1942 .addImm(AMDGPU::sub0)
1509 .addReg(DWord1) 1943 .addReg(DWord1)
1510 .addImm(AMDGPU::sub1) 1944 .addImm(AMDGPU::sub1)
1517 MI->getOperand(2).setReg(MI->getOperand(1).getReg()); 1951 MI->getOperand(2).setReg(MI->getOperand(1).getReg());
1518 } else { 1952 } else {
1519 MI->getOperand(2).ChangeToRegister(MI->getOperand(1).getReg(), false); 1953 MI->getOperand(2).ChangeToRegister(MI->getOperand(1).getReg(), false);
1520 } 1954 }
1521 MI->getOperand(1).setReg(SRsrc); 1955 MI->getOperand(1).setReg(SRsrc);
1956 MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0));
1522 MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(ImmOffset)); 1957 MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(ImmOffset));
1523 1958
1524 const TargetRegisterClass *NewDstRC = 1959 const TargetRegisterClass *NewDstRC =
1525 RI.getRegClass(get(NewOpcode).OpInfo[0].RegClass); 1960 RI.getRegClass(get(NewOpcode).OpInfo[0].RegClass);
1526 1961
1619 case AMDGPU::S_BCNT1_I32_B64: 2054 case AMDGPU::S_BCNT1_I32_B64:
1620 splitScalar64BitBCNT(Worklist, Inst); 2055 splitScalar64BitBCNT(Worklist, Inst);
1621 Inst->eraseFromParent(); 2056 Inst->eraseFromParent();
1622 continue; 2057 continue;
1623 2058
2059 case AMDGPU::S_BFE_I64: {
2060 splitScalar64BitBFE(Worklist, Inst);
2061 Inst->eraseFromParent();
2062 continue;
2063 }
2064
2065 case AMDGPU::S_LSHL_B32:
2066 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
2067 NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
2068 swapOperands(Inst);
2069 }
2070 break;
2071 case AMDGPU::S_ASHR_I32:
2072 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
2073 NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
2074 swapOperands(Inst);
2075 }
2076 break;
2077 case AMDGPU::S_LSHR_B32:
2078 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
2079 NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
2080 swapOperands(Inst);
2081 }
2082 break;
2083 case AMDGPU::S_LSHL_B64:
2084 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
2085 NewOpcode = AMDGPU::V_LSHLREV_B64;
2086 swapOperands(Inst);
2087 }
2088 break;
2089 case AMDGPU::S_ASHR_I64:
2090 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
2091 NewOpcode = AMDGPU::V_ASHRREV_I64;
2092 swapOperands(Inst);
2093 }
2094 break;
2095 case AMDGPU::S_LSHR_B64:
2096 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
2097 NewOpcode = AMDGPU::V_LSHRREV_B64;
2098 swapOperands(Inst);
2099 }
2100 break;
2101
1624 case AMDGPU::S_BFE_U64: 2102 case AMDGPU::S_BFE_U64:
1625 case AMDGPU::S_BFE_I64:
1626 case AMDGPU::S_BFM_B64: 2103 case AMDGPU::S_BFM_B64:
1627 llvm_unreachable("Moving this op to VALU not implemented"); 2104 llvm_unreachable("Moving this op to VALU not implemented");
1628 } 2105 }
1629 2106
1630 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) { 2107 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
1726 assert(Channel == 0); 2203 assert(Channel == 0);
1727 return RegIndex; 2204 return RegIndex;
1728 } 2205 }
1729 2206
1730 const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const { 2207 const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const {
1731 return &AMDGPU::VReg_32RegClass; 2208 return &AMDGPU::VGPR_32RegClass;
1732 } 2209 }
1733 2210
1734 void SIInstrInfo::splitScalar64BitUnaryOp( 2211 void SIInstrInfo::splitScalar64BitUnaryOp(
1735 SmallVectorImpl<MachineInstr *> &Worklist, 2212 SmallVectorImpl<MachineInstr *> &Worklist,
1736 MachineInstr *Inst, 2213 MachineInstr *Inst,
1856 DebugLoc DL = Inst->getDebugLoc(); 2333 DebugLoc DL = Inst->getDebugLoc();
1857 2334
1858 MachineOperand &Dest = Inst->getOperand(0); 2335 MachineOperand &Dest = Inst->getOperand(0);
1859 MachineOperand &Src = Inst->getOperand(1); 2336 MachineOperand &Src = Inst->getOperand(1);
1860 2337
1861 const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e32); 2338 const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e64);
1862 const TargetRegisterClass *SrcRC = Src.isReg() ? 2339 const TargetRegisterClass *SrcRC = Src.isReg() ?
1863 MRI.getRegClass(Src.getReg()) : 2340 MRI.getRegClass(Src.getReg()) :
1864 &AMDGPU::SGPR_32RegClass; 2341 &AMDGPU::SGPR_32RegClass;
1865 2342
1866 unsigned MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2343 unsigned MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1883 2360
1884 MRI.replaceRegWith(Dest.getReg(), ResultReg); 2361 MRI.replaceRegWith(Dest.getReg(), ResultReg);
1885 2362
1886 Worklist.push_back(First); 2363 Worklist.push_back(First);
1887 Worklist.push_back(Second); 2364 Worklist.push_back(Second);
2365 }
2366
2367 void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist,
2368 MachineInstr *Inst) const {
2369 MachineBasicBlock &MBB = *Inst->getParent();
2370 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
2371 MachineBasicBlock::iterator MII = Inst;
2372 DebugLoc DL = Inst->getDebugLoc();
2373
2374 MachineOperand &Dest = Inst->getOperand(0);
2375 uint32_t Imm = Inst->getOperand(2).getImm();
2376 uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
2377 uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
2378
2379 (void) Offset;
2380
2381 // Only sext_inreg cases handled.
2382 assert(Inst->getOpcode() == AMDGPU::S_BFE_I64 &&
2383 BitWidth <= 32 &&
2384 Offset == 0 &&
2385 "Not implemented");
2386
2387 if (BitWidth < 32) {
2388 unsigned MidRegLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2389 unsigned MidRegHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2390 unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
2391
2392 BuildMI(MBB, MII, DL, get(AMDGPU::V_BFE_I32), MidRegLo)
2393 .addReg(Inst->getOperand(1).getReg(), 0, AMDGPU::sub0)
2394 .addImm(0)
2395 .addImm(BitWidth);
2396
2397 BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e32), MidRegHi)
2398 .addImm(31)
2399 .addReg(MidRegLo);
2400
2401 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg)
2402 .addReg(MidRegLo)
2403 .addImm(AMDGPU::sub0)
2404 .addReg(MidRegHi)
2405 .addImm(AMDGPU::sub1);
2406
2407 MRI.replaceRegWith(Dest.getReg(), ResultReg);
2408 return;
2409 }
2410
2411 MachineOperand &Src = Inst->getOperand(1);
2412 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2413 unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
2414
2415 BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e64), TmpReg)
2416 .addImm(31)
2417 .addReg(Src.getReg(), 0, AMDGPU::sub0);
2418
2419 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg)
2420 .addReg(Src.getReg(), 0, AMDGPU::sub0)
2421 .addImm(AMDGPU::sub0)
2422 .addReg(TmpReg)
2423 .addImm(AMDGPU::sub1);
2424
2425 MRI.replaceRegWith(Dest.getReg(), ResultReg);
1888 } 2426 }
1889 2427
1890 void SIInstrInfo::addDescImplicitUseDef(const MCInstrDesc &NewDesc, 2428 void SIInstrInfo::addDescImplicitUseDef(const MCInstrDesc &NewDesc,
1891 MachineInstr *Inst) const { 2429 MachineInstr *Inst) const {
1892 // Add the implict and explicit register definitions. 2430 // Add the implict and explicit register definitions.
1903 Inst->addOperand(MachineOperand::CreateReg(Reg, true, true)); 2441 Inst->addOperand(MachineOperand::CreateReg(Reg, true, true));
1904 } 2442 }
1905 } 2443 }
1906 } 2444 }
1907 2445
2446 unsigned SIInstrInfo::findUsedSGPR(const MachineInstr *MI,
2447 int OpIndices[3]) const {
2448 const MCInstrDesc &Desc = get(MI->getOpcode());
2449
2450 // Find the one SGPR operand we are allowed to use.
2451 unsigned SGPRReg = AMDGPU::NoRegister;
2452
2453 // First we need to consider the instruction's operand requirements before
2454 // legalizing. Some operands are required to be SGPRs, such as implicit uses
2455 // of VCC, but we are still bound by the constant bus requirement to only use
2456 // one.
2457 //
2458 // If the operand's class is an SGPR, we can never move it.
2459
2460 for (const MachineOperand &MO : MI->implicit_operands()) {
2461 // We only care about reads.
2462 if (MO.isDef())
2463 continue;
2464
2465 if (MO.getReg() == AMDGPU::VCC)
2466 return AMDGPU::VCC;
2467
2468 if (MO.getReg() == AMDGPU::FLAT_SCR)
2469 return AMDGPU::FLAT_SCR;
2470 }
2471
2472 unsigned UsedSGPRs[3] = { AMDGPU::NoRegister };
2473 const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
2474
2475 for (unsigned i = 0; i < 3; ++i) {
2476 int Idx = OpIndices[i];
2477 if (Idx == -1)
2478 break;
2479
2480 const MachineOperand &MO = MI->getOperand(Idx);
2481 if (RI.isSGPRClassID(Desc.OpInfo[Idx].RegClass))
2482 SGPRReg = MO.getReg();
2483
2484 if (MO.isReg() && RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
2485 UsedSGPRs[i] = MO.getReg();
2486 }
2487
2488 if (SGPRReg != AMDGPU::NoRegister)
2489 return SGPRReg;
2490
2491 // We don't have a required SGPR operand, so we have a bit more freedom in
2492 // selecting operands to move.
2493
2494 // Try to select the most used SGPR. If an SGPR is equal to one of the
2495 // others, we choose that.
2496 //
2497 // e.g.
2498 // V_FMA_F32 v0, s0, s0, s0 -> No moves
2499 // V_FMA_F32 v0, s0, s1, s0 -> Move s1
2500
2501 if (UsedSGPRs[0] != AMDGPU::NoRegister) {
2502 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
2503 SGPRReg = UsedSGPRs[0];
2504 }
2505
2506 if (SGPRReg == AMDGPU::NoRegister && UsedSGPRs[1] != AMDGPU::NoRegister) {
2507 if (UsedSGPRs[1] == UsedSGPRs[2])
2508 SGPRReg = UsedSGPRs[1];
2509 }
2510
2511 return SGPRReg;
2512 }
2513
1908 MachineInstrBuilder SIInstrInfo::buildIndirectWrite( 2514 MachineInstrBuilder SIInstrInfo::buildIndirectWrite(
1909 MachineBasicBlock *MBB, 2515 MachineBasicBlock *MBB,
1910 MachineBasicBlock::iterator I, 2516 MachineBasicBlock::iterator I,
1911 unsigned ValueReg, 2517 unsigned ValueReg,
1912 unsigned Address, unsigned OffsetReg) const { 2518 unsigned Address, unsigned OffsetReg) const {
1913 const DebugLoc &DL = MBB->findDebugLoc(I); 2519 const DebugLoc &DL = MBB->findDebugLoc(I);
1914 unsigned IndirectBaseReg = AMDGPU::VReg_32RegClass.getRegister( 2520 unsigned IndirectBaseReg = AMDGPU::VGPR_32RegClass.getRegister(
1915 getIndirectIndexBegin(*MBB->getParent())); 2521 getIndirectIndexBegin(*MBB->getParent()));
1916 2522
1917 return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_DST_V1)) 2523 return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_DST_V1))
1918 .addReg(IndirectBaseReg, RegState::Define) 2524 .addReg(IndirectBaseReg, RegState::Define)
1919 .addOperand(I->getOperand(0)) 2525 .addOperand(I->getOperand(0))
1927 MachineBasicBlock *MBB, 2533 MachineBasicBlock *MBB,
1928 MachineBasicBlock::iterator I, 2534 MachineBasicBlock::iterator I,
1929 unsigned ValueReg, 2535 unsigned ValueReg,
1930 unsigned Address, unsigned OffsetReg) const { 2536 unsigned Address, unsigned OffsetReg) const {
1931 const DebugLoc &DL = MBB->findDebugLoc(I); 2537 const DebugLoc &DL = MBB->findDebugLoc(I);
1932 unsigned IndirectBaseReg = AMDGPU::VReg_32RegClass.getRegister( 2538 unsigned IndirectBaseReg = AMDGPU::VGPR_32RegClass.getRegister(
1933 getIndirectIndexBegin(*MBB->getParent())); 2539 getIndirectIndexBegin(*MBB->getParent()));
1934 2540
1935 return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_SRC)) 2541 return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_SRC))
1936 .addOperand(I->getOperand(0)) 2542 .addOperand(I->getOperand(0))
1937 .addOperand(I->getOperand(1)) 2543 .addOperand(I->getOperand(1))
1949 if (End == -1) 2555 if (End == -1)
1950 return; 2556 return;
1951 2557
1952 2558
1953 for (int Index = Begin; Index <= End; ++Index) 2559 for (int Index = Begin; Index <= End; ++Index)
1954 Reserved.set(AMDGPU::VReg_32RegClass.getRegister(Index)); 2560 Reserved.set(AMDGPU::VGPR_32RegClass.getRegister(Index));
1955 2561
1956 for (int Index = std::max(0, Begin - 1); Index <= End; ++Index) 2562 for (int Index = std::max(0, Begin - 1); Index <= End; ++Index)
1957 Reserved.set(AMDGPU::VReg_64RegClass.getRegister(Index)); 2563 Reserved.set(AMDGPU::VReg_64RegClass.getRegister(Index));
1958 2564
1959 for (int Index = std::max(0, Begin - 2); Index <= End; ++Index) 2565 for (int Index = std::max(0, Begin - 2); Index <= End; ++Index)
1968 for (int Index = std::max(0, Begin - 15); Index <= End; ++Index) 2574 for (int Index = std::max(0, Begin - 15); Index <= End; ++Index)
1969 Reserved.set(AMDGPU::VReg_512RegClass.getRegister(Index)); 2575 Reserved.set(AMDGPU::VReg_512RegClass.getRegister(Index));
1970 } 2576 }
1971 2577
1972 MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI, 2578 MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI,
1973 unsigned OperandName) const { 2579 unsigned OperandName) const {
1974 int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OperandName); 2580 int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OperandName);
1975 if (Idx == -1) 2581 if (Idx == -1)
1976 return nullptr; 2582 return nullptr;
1977 2583
1978 return &MI.getOperand(Idx); 2584 return &MI.getOperand(Idx);
1979 } 2585 }
2586
2587 uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const {
2588 uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT;
2589 if (ST.isAmdHsaOS())
2590 RsrcDataFormat |= (1ULL << 56);
2591
2592 return RsrcDataFormat;
2593 }