Mercurial > hg > CbC > CbC_llvm
comparison lib/Target/R600/SIInstrInfo.cpp @ 83:60c9769439b8 LLVM3.7
LLVM 3.7
author | Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp> |
---|---|
date | Wed, 18 Feb 2015 14:55:36 +0900 |
parents | 54457678186b |
children |
comparison
equal
deleted
inserted
replaced
78:af83660cff7b | 83:60c9769439b8 |
---|---|
19 #include "SIMachineFunctionInfo.h" | 19 #include "SIMachineFunctionInfo.h" |
20 #include "llvm/CodeGen/MachineFrameInfo.h" | 20 #include "llvm/CodeGen/MachineFrameInfo.h" |
21 #include "llvm/CodeGen/MachineInstrBuilder.h" | 21 #include "llvm/CodeGen/MachineInstrBuilder.h" |
22 #include "llvm/CodeGen/MachineRegisterInfo.h" | 22 #include "llvm/CodeGen/MachineRegisterInfo.h" |
23 #include "llvm/IR/Function.h" | 23 #include "llvm/IR/Function.h" |
24 #include "llvm/CodeGen/RegisterScavenging.h" | |
24 #include "llvm/MC/MCInstrDesc.h" | 25 #include "llvm/MC/MCInstrDesc.h" |
26 #include "llvm/Support/Debug.h" | |
25 | 27 |
26 using namespace llvm; | 28 using namespace llvm; |
27 | 29 |
28 SIInstrInfo::SIInstrInfo(const AMDGPUSubtarget &st) | 30 SIInstrInfo::SIInstrInfo(const AMDGPUSubtarget &st) |
29 : AMDGPUInstrInfo(st), | 31 : AMDGPUInstrInfo(st), RI(st) {} |
30 RI(st) { } | |
31 | 32 |
32 //===----------------------------------------------------------------------===// | 33 //===----------------------------------------------------------------------===// |
33 // TargetInstrInfo callbacks | 34 // TargetInstrInfo callbacks |
34 //===----------------------------------------------------------------------===// | 35 //===----------------------------------------------------------------------===// |
35 | 36 |
85 // Make sure both are actually loads. | 86 // Make sure both are actually loads. |
86 if (!get(Opc0).mayLoad() || !get(Opc1).mayLoad()) | 87 if (!get(Opc0).mayLoad() || !get(Opc1).mayLoad()) |
87 return false; | 88 return false; |
88 | 89 |
89 if (isDS(Opc0) && isDS(Opc1)) { | 90 if (isDS(Opc0) && isDS(Opc1)) { |
90 assert(getNumOperandsNoGlue(Load0) == getNumOperandsNoGlue(Load1)); | 91 |
91 | 92 // FIXME: Handle this case: |
92 // TODO: Also shouldn't see read2st | 93 if (getNumOperandsNoGlue(Load0) != getNumOperandsNoGlue(Load1)) |
93 assert(Opc0 != AMDGPU::DS_READ2_B32 && | 94 return false; |
94 Opc0 != AMDGPU::DS_READ2_B64 && | |
95 Opc1 != AMDGPU::DS_READ2_B32 && | |
96 Opc1 != AMDGPU::DS_READ2_B64); | |
97 | 95 |
98 // Check base reg. | 96 // Check base reg. |
99 if (Load0->getOperand(1) != Load1->getOperand(1)) | 97 if (Load0->getOperand(1) != Load1->getOperand(1)) |
100 return false; | 98 return false; |
101 | 99 |
102 // Check chain. | 100 // Check chain. |
103 if (findChainOperand(Load0) != findChainOperand(Load1)) | 101 if (findChainOperand(Load0) != findChainOperand(Load1)) |
102 return false; | |
103 | |
104 // Skip read2 / write2 variants for simplicity. | |
105 // TODO: We should report true if the used offsets are adjacent (excluded | |
106 // st64 versions). | |
107 if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::data1) != -1 || | |
108 AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::data1) != -1) | |
104 return false; | 109 return false; |
105 | 110 |
106 Offset0 = cast<ConstantSDNode>(Load0->getOperand(2))->getZExtValue(); | 111 Offset0 = cast<ConstantSDNode>(Load0->getOperand(2))->getZExtValue(); |
107 Offset1 = cast<ConstantSDNode>(Load1->getOperand(2))->getZExtValue(); | 112 Offset1 = cast<ConstantSDNode>(Load1->getOperand(2))->getZExtValue(); |
108 return true; | 113 return true; |
157 Offset1 = cast<ConstantSDNode>(Off1)->getZExtValue(); | 162 Offset1 = cast<ConstantSDNode>(Off1)->getZExtValue(); |
158 return true; | 163 return true; |
159 } | 164 } |
160 | 165 |
161 return false; | 166 return false; |
167 } | |
168 | |
169 static bool isStride64(unsigned Opc) { | |
170 switch (Opc) { | |
171 case AMDGPU::DS_READ2ST64_B32: | |
172 case AMDGPU::DS_READ2ST64_B64: | |
173 case AMDGPU::DS_WRITE2ST64_B32: | |
174 case AMDGPU::DS_WRITE2ST64_B64: | |
175 return true; | |
176 default: | |
177 return false; | |
178 } | |
162 } | 179 } |
163 | 180 |
164 bool SIInstrInfo::getLdStBaseRegImmOfs(MachineInstr *LdSt, | 181 bool SIInstrInfo::getLdStBaseRegImmOfs(MachineInstr *LdSt, |
165 unsigned &BaseReg, unsigned &Offset, | 182 unsigned &BaseReg, unsigned &Offset, |
166 const TargetRegisterInfo *TRI) const { | 183 const TargetRegisterInfo *TRI) const { |
201 assert(LdSt->mayStore()); | 218 assert(LdSt->mayStore()); |
202 int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0); | 219 int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0); |
203 EltSize = getOpRegClass(*LdSt, Data0Idx)->getSize(); | 220 EltSize = getOpRegClass(*LdSt, Data0Idx)->getSize(); |
204 } | 221 } |
205 | 222 |
223 if (isStride64(Opc)) | |
224 EltSize *= 64; | |
225 | |
206 const MachineOperand *AddrReg = getNamedOperand(*LdSt, | 226 const MachineOperand *AddrReg = getNamedOperand(*LdSt, |
207 AMDGPU::OpName::addr); | 227 AMDGPU::OpName::addr); |
208 BaseReg = AddrReg->getReg(); | 228 BaseReg = AddrReg->getReg(); |
209 Offset = EltSize * Offset0; | 229 Offset = EltSize * Offset0; |
210 return true; | 230 return true; |
239 AMDGPU::OpName::sbase); | 259 AMDGPU::OpName::sbase); |
240 BaseReg = SBaseReg->getReg(); | 260 BaseReg = SBaseReg->getReg(); |
241 Offset = OffsetImm->getImm(); | 261 Offset = OffsetImm->getImm(); |
242 return true; | 262 return true; |
243 } | 263 } |
264 | |
265 return false; | |
266 } | |
267 | |
268 bool SIInstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt, | |
269 MachineInstr *SecondLdSt, | |
270 unsigned NumLoads) const { | |
271 unsigned Opc0 = FirstLdSt->getOpcode(); | |
272 unsigned Opc1 = SecondLdSt->getOpcode(); | |
273 | |
274 // TODO: This needs finer tuning | |
275 if (NumLoads > 4) | |
276 return false; | |
277 | |
278 if (isDS(Opc0) && isDS(Opc1)) | |
279 return true; | |
280 | |
281 if (isSMRD(Opc0) && isSMRD(Opc1)) | |
282 return true; | |
283 | |
284 if ((isMUBUF(Opc0) || isMTBUF(Opc0)) && (isMUBUF(Opc1) || isMTBUF(Opc1))) | |
285 return true; | |
244 | 286 |
245 return false; | 287 return false; |
246 } | 288 } |
247 | 289 |
248 void | 290 void |
281 }; | 323 }; |
282 | 324 |
283 unsigned Opcode; | 325 unsigned Opcode; |
284 const int16_t *SubIndices; | 326 const int16_t *SubIndices; |
285 | 327 |
286 if (AMDGPU::M0 == DestReg) { | |
287 // Check if M0 isn't already set to this value | |
288 for (MachineBasicBlock::reverse_iterator E = MBB.rend(), | |
289 I = MachineBasicBlock::reverse_iterator(MI); I != E; ++I) { | |
290 | |
291 if (!I->definesRegister(AMDGPU::M0)) | |
292 continue; | |
293 | |
294 unsigned Opc = I->getOpcode(); | |
295 if (Opc != TargetOpcode::COPY && Opc != AMDGPU::S_MOV_B32) | |
296 break; | |
297 | |
298 if (!I->readsRegister(SrcReg)) | |
299 break; | |
300 | |
301 // The copy isn't necessary | |
302 return; | |
303 } | |
304 } | |
305 | |
306 if (AMDGPU::SReg_32RegClass.contains(DestReg)) { | 328 if (AMDGPU::SReg_32RegClass.contains(DestReg)) { |
307 assert(AMDGPU::SReg_32RegClass.contains(SrcReg)); | 329 assert(AMDGPU::SReg_32RegClass.contains(SrcReg)); |
308 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg) | 330 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg) |
309 .addReg(SrcReg, getKillRegState(KillSrc)); | 331 .addReg(SrcReg, getKillRegState(KillSrc)); |
310 return; | 332 return; |
311 | 333 |
312 } else if (AMDGPU::SReg_64RegClass.contains(DestReg)) { | 334 } else if (AMDGPU::SReg_64RegClass.contains(DestReg)) { |
335 if (DestReg == AMDGPU::VCC) { | |
336 if (AMDGPU::SReg_64RegClass.contains(SrcReg)) { | |
337 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC) | |
338 .addReg(SrcReg, getKillRegState(KillSrc)); | |
339 } else { | |
340 // FIXME: Hack until VReg_1 removed. | |
341 assert(AMDGPU::VGPR_32RegClass.contains(SrcReg)); | |
342 BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_I32_e32), AMDGPU::VCC) | |
343 .addImm(0) | |
344 .addReg(SrcReg, getKillRegState(KillSrc)); | |
345 } | |
346 | |
347 return; | |
348 } | |
349 | |
313 assert(AMDGPU::SReg_64RegClass.contains(SrcReg)); | 350 assert(AMDGPU::SReg_64RegClass.contains(SrcReg)); |
314 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg) | 351 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg) |
315 .addReg(SrcReg, getKillRegState(KillSrc)); | 352 .addReg(SrcReg, getKillRegState(KillSrc)); |
316 return; | 353 return; |
317 | 354 |
328 } else if (AMDGPU::SReg_512RegClass.contains(DestReg)) { | 365 } else if (AMDGPU::SReg_512RegClass.contains(DestReg)) { |
329 assert(AMDGPU::SReg_512RegClass.contains(SrcReg)); | 366 assert(AMDGPU::SReg_512RegClass.contains(SrcReg)); |
330 Opcode = AMDGPU::S_MOV_B32; | 367 Opcode = AMDGPU::S_MOV_B32; |
331 SubIndices = Sub0_15; | 368 SubIndices = Sub0_15; |
332 | 369 |
333 } else if (AMDGPU::VReg_32RegClass.contains(DestReg)) { | 370 } else if (AMDGPU::VGPR_32RegClass.contains(DestReg)) { |
334 assert(AMDGPU::VReg_32RegClass.contains(SrcReg) || | 371 assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) || |
335 AMDGPU::SReg_32RegClass.contains(SrcReg)); | 372 AMDGPU::SReg_32RegClass.contains(SrcReg)); |
336 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg) | 373 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg) |
337 .addReg(SrcReg, getKillRegState(KillSrc)); | 374 .addReg(SrcReg, getKillRegState(KillSrc)); |
338 return; | 375 return; |
339 | 376 |
383 | 420 |
384 unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const { | 421 unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const { |
385 int NewOpc; | 422 int NewOpc; |
386 | 423 |
387 // Try to map original to commuted opcode | 424 // Try to map original to commuted opcode |
388 if ((NewOpc = AMDGPU::getCommuteRev(Opcode)) != -1) | 425 NewOpc = AMDGPU::getCommuteRev(Opcode); |
426 // Check if the commuted (REV) opcode exists on the target. | |
427 if (NewOpc != -1 && pseudoToMCOpcode(NewOpc) != -1) | |
389 return NewOpc; | 428 return NewOpc; |
390 | 429 |
391 // Try to map commuted to original opcode | 430 // Try to map commuted to original opcode |
392 if ((NewOpc = AMDGPU::getCommuteOrig(Opcode)) != -1) | 431 NewOpc = AMDGPU::getCommuteOrig(Opcode); |
432 // Check if the original (non-REV) opcode exists on the target. | |
433 if (NewOpc != -1 && pseudoToMCOpcode(NewOpc) != -1) | |
393 return NewOpc; | 434 return NewOpc; |
394 | 435 |
395 return Opcode; | 436 return Opcode; |
437 } | |
438 | |
439 unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const { | |
440 | |
441 if (DstRC->getSize() == 4) { | |
442 return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; | |
443 } else if (DstRC->getSize() == 8 && RI.isSGPRClass(DstRC)) { | |
444 return AMDGPU::S_MOV_B64; | |
445 } else if (DstRC->getSize() == 8 && !RI.isSGPRClass(DstRC)) { | |
446 return AMDGPU::V_MOV_B64_PSEUDO; | |
447 } | |
448 return AMDGPU::COPY; | |
396 } | 449 } |
397 | 450 |
398 void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, | 451 void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, |
399 MachineBasicBlock::iterator MI, | 452 MachineBasicBlock::iterator MI, |
400 unsigned SrcReg, bool isKill, | 453 unsigned SrcReg, bool isKill, |
401 int FrameIndex, | 454 int FrameIndex, |
402 const TargetRegisterClass *RC, | 455 const TargetRegisterClass *RC, |
403 const TargetRegisterInfo *TRI) const { | 456 const TargetRegisterInfo *TRI) const { |
404 MachineFunction *MF = MBB.getParent(); | 457 MachineFunction *MF = MBB.getParent(); |
458 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); | |
405 MachineFrameInfo *FrameInfo = MF->getFrameInfo(); | 459 MachineFrameInfo *FrameInfo = MF->getFrameInfo(); |
406 DebugLoc DL = MBB.findDebugLoc(MI); | 460 DebugLoc DL = MBB.findDebugLoc(MI); |
407 | 461 int Opcode = -1; |
408 if (RI.hasVGPRs(RC)) { | 462 |
409 LLVMContext &Ctx = MF->getFunction()->getContext(); | 463 if (RI.isSGPRClass(RC)) { |
410 Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Can't spill VGPR!"); | |
411 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), AMDGPU::VGPR0) | |
412 .addReg(SrcReg); | |
413 } else if (RI.isSGPRClass(RC)) { | |
414 // We are only allowed to create one new instruction when spilling | 464 // We are only allowed to create one new instruction when spilling |
415 // registers, so we need to use pseudo instruction for spilling | 465 // registers, so we need to use pseudo instruction for spilling |
416 // SGPRs. | 466 // SGPRs. |
417 unsigned Opcode; | |
418 switch (RC->getSize() * 8) { | 467 switch (RC->getSize() * 8) { |
419 case 32: Opcode = AMDGPU::SI_SPILL_S32_SAVE; break; | 468 case 32: Opcode = AMDGPU::SI_SPILL_S32_SAVE; break; |
420 case 64: Opcode = AMDGPU::SI_SPILL_S64_SAVE; break; | 469 case 64: Opcode = AMDGPU::SI_SPILL_S64_SAVE; break; |
421 case 128: Opcode = AMDGPU::SI_SPILL_S128_SAVE; break; | 470 case 128: Opcode = AMDGPU::SI_SPILL_S128_SAVE; break; |
422 case 256: Opcode = AMDGPU::SI_SPILL_S256_SAVE; break; | 471 case 256: Opcode = AMDGPU::SI_SPILL_S256_SAVE; break; |
423 case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break; | 472 case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break; |
424 default: llvm_unreachable("Cannot spill register class"); | 473 } |
425 } | 474 } else if(RI.hasVGPRs(RC) && ST.isVGPRSpillingEnabled(MFI)) { |
426 | 475 MFI->setHasSpilledVGPRs(); |
476 | |
477 switch(RC->getSize() * 8) { | |
478 case 32: Opcode = AMDGPU::SI_SPILL_V32_SAVE; break; | |
479 case 64: Opcode = AMDGPU::SI_SPILL_V64_SAVE; break; | |
480 case 96: Opcode = AMDGPU::SI_SPILL_V96_SAVE; break; | |
481 case 128: Opcode = AMDGPU::SI_SPILL_V128_SAVE; break; | |
482 case 256: Opcode = AMDGPU::SI_SPILL_V256_SAVE; break; | |
483 case 512: Opcode = AMDGPU::SI_SPILL_V512_SAVE; break; | |
484 } | |
485 } | |
486 | |
487 if (Opcode != -1) { | |
427 FrameInfo->setObjectAlignment(FrameIndex, 4); | 488 FrameInfo->setObjectAlignment(FrameIndex, 4); |
428 BuildMI(MBB, MI, DL, get(Opcode)) | 489 BuildMI(MBB, MI, DL, get(Opcode)) |
429 .addReg(SrcReg) | 490 .addReg(SrcReg) |
430 .addFrameIndex(FrameIndex); | 491 .addFrameIndex(FrameIndex) |
492 // Place-holder registers, these will be filled in by | |
493 // SIPrepareScratchRegs. | |
494 .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef) | |
495 .addReg(AMDGPU::SGPR0, RegState::Undef); | |
431 } else { | 496 } else { |
432 llvm_unreachable("VGPR spilling not supported"); | 497 LLVMContext &Ctx = MF->getFunction()->getContext(); |
498 Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to" | |
499 " spill register"); | |
500 BuildMI(MBB, MI, DL, get(AMDGPU::KILL)) | |
501 .addReg(SrcReg); | |
433 } | 502 } |
434 } | 503 } |
435 | 504 |
436 void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, | 505 void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, |
437 MachineBasicBlock::iterator MI, | 506 MachineBasicBlock::iterator MI, |
438 unsigned DestReg, int FrameIndex, | 507 unsigned DestReg, int FrameIndex, |
439 const TargetRegisterClass *RC, | 508 const TargetRegisterClass *RC, |
440 const TargetRegisterInfo *TRI) const { | 509 const TargetRegisterInfo *TRI) const { |
441 MachineFunction *MF = MBB.getParent(); | 510 MachineFunction *MF = MBB.getParent(); |
511 const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); | |
442 MachineFrameInfo *FrameInfo = MF->getFrameInfo(); | 512 MachineFrameInfo *FrameInfo = MF->getFrameInfo(); |
443 DebugLoc DL = MBB.findDebugLoc(MI); | 513 DebugLoc DL = MBB.findDebugLoc(MI); |
444 | 514 int Opcode = -1; |
445 if (RI.hasVGPRs(RC)) { | 515 |
446 LLVMContext &Ctx = MF->getFunction()->getContext(); | 516 if (RI.isSGPRClass(RC)){ |
447 Ctx.emitError("SIInstrInfo::loadRegToStackSlot - Can't retrieve spilled VGPR!"); | |
448 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg) | |
449 .addImm(0); | |
450 } else if (RI.isSGPRClass(RC)){ | |
451 unsigned Opcode; | |
452 switch(RC->getSize() * 8) { | 517 switch(RC->getSize() * 8) { |
453 case 32: Opcode = AMDGPU::SI_SPILL_S32_RESTORE; break; | 518 case 32: Opcode = AMDGPU::SI_SPILL_S32_RESTORE; break; |
454 case 64: Opcode = AMDGPU::SI_SPILL_S64_RESTORE; break; | 519 case 64: Opcode = AMDGPU::SI_SPILL_S64_RESTORE; break; |
455 case 128: Opcode = AMDGPU::SI_SPILL_S128_RESTORE; break; | 520 case 128: Opcode = AMDGPU::SI_SPILL_S128_RESTORE; break; |
456 case 256: Opcode = AMDGPU::SI_SPILL_S256_RESTORE; break; | 521 case 256: Opcode = AMDGPU::SI_SPILL_S256_RESTORE; break; |
457 case 512: Opcode = AMDGPU::SI_SPILL_S512_RESTORE; break; | 522 case 512: Opcode = AMDGPU::SI_SPILL_S512_RESTORE; break; |
458 default: llvm_unreachable("Cannot spill register class"); | 523 } |
459 } | 524 } else if(RI.hasVGPRs(RC) && ST.isVGPRSpillingEnabled(MFI)) { |
460 | 525 switch(RC->getSize() * 8) { |
526 case 32: Opcode = AMDGPU::SI_SPILL_V32_RESTORE; break; | |
527 case 64: Opcode = AMDGPU::SI_SPILL_V64_RESTORE; break; | |
528 case 96: Opcode = AMDGPU::SI_SPILL_V96_RESTORE; break; | |
529 case 128: Opcode = AMDGPU::SI_SPILL_V128_RESTORE; break; | |
530 case 256: Opcode = AMDGPU::SI_SPILL_V256_RESTORE; break; | |
531 case 512: Opcode = AMDGPU::SI_SPILL_V512_RESTORE; break; | |
532 } | |
533 } | |
534 | |
535 if (Opcode != -1) { | |
461 FrameInfo->setObjectAlignment(FrameIndex, 4); | 536 FrameInfo->setObjectAlignment(FrameIndex, 4); |
462 BuildMI(MBB, MI, DL, get(Opcode), DestReg) | 537 BuildMI(MBB, MI, DL, get(Opcode), DestReg) |
463 .addFrameIndex(FrameIndex); | 538 .addFrameIndex(FrameIndex) |
539 // Place-holder registers, these will be filled in by | |
540 // SIPrepareScratchRegs. | |
541 .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef) | |
542 .addReg(AMDGPU::SGPR0, RegState::Undef); | |
543 | |
464 } else { | 544 } else { |
465 llvm_unreachable("VGPR spilling not supported"); | 545 LLVMContext &Ctx = MF->getFunction()->getContext(); |
466 } | 546 Ctx.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to" |
547 " restore register"); | |
548 BuildMI(MBB, MI, DL, get(AMDGPU::IMPLICIT_DEF), DestReg); | |
549 } | |
550 } | |
551 | |
552 /// \param @Offset Offset in bytes of the FrameIndex being spilled | |
553 unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB, | |
554 MachineBasicBlock::iterator MI, | |
555 RegScavenger *RS, unsigned TmpReg, | |
556 unsigned FrameOffset, | |
557 unsigned Size) const { | |
558 MachineFunction *MF = MBB.getParent(); | |
559 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); | |
560 const AMDGPUSubtarget &ST = MF->getSubtarget<AMDGPUSubtarget>(); | |
561 const SIRegisterInfo *TRI = | |
562 static_cast<const SIRegisterInfo*>(ST.getRegisterInfo()); | |
563 DebugLoc DL = MBB.findDebugLoc(MI); | |
564 unsigned WorkGroupSize = MFI->getMaximumWorkGroupSize(*MF); | |
565 unsigned WavefrontSize = ST.getWavefrontSize(); | |
566 | |
567 unsigned TIDReg = MFI->getTIDReg(); | |
568 if (!MFI->hasCalculatedTID()) { | |
569 MachineBasicBlock &Entry = MBB.getParent()->front(); | |
570 MachineBasicBlock::iterator Insert = Entry.front(); | |
571 DebugLoc DL = Insert->getDebugLoc(); | |
572 | |
573 TIDReg = RI.findUnusedRegister(MF->getRegInfo(), &AMDGPU::VGPR_32RegClass); | |
574 if (TIDReg == AMDGPU::NoRegister) | |
575 return TIDReg; | |
576 | |
577 | |
578 if (MFI->getShaderType() == ShaderType::COMPUTE && | |
579 WorkGroupSize > WavefrontSize) { | |
580 | |
581 unsigned TIDIGXReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_X); | |
582 unsigned TIDIGYReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_Y); | |
583 unsigned TIDIGZReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_Z); | |
584 unsigned InputPtrReg = | |
585 TRI->getPreloadedValue(*MF, SIRegisterInfo::INPUT_PTR); | |
586 static const unsigned TIDIGRegs[3] = { | |
587 TIDIGXReg, TIDIGYReg, TIDIGZReg | |
588 }; | |
589 for (unsigned Reg : TIDIGRegs) { | |
590 if (!Entry.isLiveIn(Reg)) | |
591 Entry.addLiveIn(Reg); | |
592 } | |
593 | |
594 RS->enterBasicBlock(&Entry); | |
595 unsigned STmp0 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0); | |
596 unsigned STmp1 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0); | |
597 BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp0) | |
598 .addReg(InputPtrReg) | |
599 .addImm(SI::KernelInputOffsets::NGROUPS_Z); | |
600 BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp1) | |
601 .addReg(InputPtrReg) | |
602 .addImm(SI::KernelInputOffsets::NGROUPS_Y); | |
603 | |
604 // NGROUPS.X * NGROUPS.Y | |
605 BuildMI(Entry, Insert, DL, get(AMDGPU::S_MUL_I32), STmp1) | |
606 .addReg(STmp1) | |
607 .addReg(STmp0); | |
608 // (NGROUPS.X * NGROUPS.Y) * TIDIG.X | |
609 BuildMI(Entry, Insert, DL, get(AMDGPU::V_MUL_U32_U24_e32), TIDReg) | |
610 .addReg(STmp1) | |
611 .addReg(TIDIGXReg); | |
612 // NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X) | |
613 BuildMI(Entry, Insert, DL, get(AMDGPU::V_MAD_U32_U24), TIDReg) | |
614 .addReg(STmp0) | |
615 .addReg(TIDIGYReg) | |
616 .addReg(TIDReg); | |
617 // (NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)) + TIDIG.Z | |
618 BuildMI(Entry, Insert, DL, get(AMDGPU::V_ADD_I32_e32), TIDReg) | |
619 .addReg(TIDReg) | |
620 .addReg(TIDIGZReg); | |
621 } else { | |
622 // Get the wave id | |
623 BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_LO_U32_B32_e64), | |
624 TIDReg) | |
625 .addImm(-1) | |
626 .addImm(0); | |
627 | |
628 BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_HI_U32_B32_e64), | |
629 TIDReg) | |
630 .addImm(-1) | |
631 .addReg(TIDReg); | |
632 } | |
633 | |
634 BuildMI(Entry, Insert, DL, get(AMDGPU::V_LSHLREV_B32_e32), | |
635 TIDReg) | |
636 .addImm(2) | |
637 .addReg(TIDReg); | |
638 MFI->setTIDReg(TIDReg); | |
639 } | |
640 | |
641 // Add FrameIndex to LDS offset | |
642 unsigned LDSOffset = MFI->LDSSize + (FrameOffset * WorkGroupSize); | |
643 BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), TmpReg) | |
644 .addImm(LDSOffset) | |
645 .addReg(TIDReg); | |
646 | |
647 return TmpReg; | |
467 } | 648 } |
468 | 649 |
469 void SIInstrInfo::insertNOPs(MachineBasicBlock::iterator MI, | 650 void SIInstrInfo::insertNOPs(MachineBasicBlock::iterator MI, |
470 int Count) const { | 651 int Count) const { |
471 while (Count > 0) { | 652 while (Count > 0) { |
504 .addReg(AMDGPU::SCC, RegState::Define | RegState::Implicit) | 685 .addReg(AMDGPU::SCC, RegState::Define | RegState::Implicit) |
505 .addReg(AMDGPU::SCC, RegState::Implicit); | 686 .addReg(AMDGPU::SCC, RegState::Implicit); |
506 MI->eraseFromParent(); | 687 MI->eraseFromParent(); |
507 break; | 688 break; |
508 } | 689 } |
690 case AMDGPU::SGPR_USE: | |
691 // This is just a placeholder for register allocation. | |
692 MI->eraseFromParent(); | |
693 break; | |
694 | |
695 case AMDGPU::V_MOV_B64_PSEUDO: { | |
696 unsigned Dst = MI->getOperand(0).getReg(); | |
697 unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0); | |
698 unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1); | |
699 | |
700 const MachineOperand &SrcOp = MI->getOperand(1); | |
701 // FIXME: Will this work for 64-bit floating point immediates? | |
702 assert(!SrcOp.isFPImm()); | |
703 if (SrcOp.isImm()) { | |
704 APInt Imm(64, SrcOp.getImm()); | |
705 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo) | |
706 .addImm(Imm.getLoBits(32).getZExtValue()) | |
707 .addReg(Dst, RegState::Implicit); | |
708 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi) | |
709 .addImm(Imm.getHiBits(32).getZExtValue()) | |
710 .addReg(Dst, RegState::Implicit); | |
711 } else { | |
712 assert(SrcOp.isReg()); | |
713 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo) | |
714 .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub0)) | |
715 .addReg(Dst, RegState::Implicit); | |
716 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi) | |
717 .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub1)) | |
718 .addReg(Dst, RegState::Implicit); | |
719 } | |
720 MI->eraseFromParent(); | |
721 break; | |
722 } | |
509 } | 723 } |
510 return true; | 724 return true; |
511 } | 725 } |
512 | 726 |
513 MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI, | 727 MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI, |
514 bool NewMI) const { | 728 bool NewMI) const { |
515 | 729 |
516 if (MI->getNumOperands() < 3 || !MI->getOperand(1).isReg()) | 730 if (MI->getNumOperands() < 3) |
517 return nullptr; | 731 return nullptr; |
518 | 732 |
519 // Make sure it s legal to commute operands for VOP2. | 733 int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), |
734 AMDGPU::OpName::src0); | |
735 assert(Src0Idx != -1 && "Should always have src0 operand"); | |
736 | |
737 MachineOperand &Src0 = MI->getOperand(Src0Idx); | |
738 if (!Src0.isReg()) | |
739 return nullptr; | |
740 | |
741 int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), | |
742 AMDGPU::OpName::src1); | |
743 if (Src1Idx == -1) | |
744 return nullptr; | |
745 | |
746 MachineOperand &Src1 = MI->getOperand(Src1Idx); | |
747 | |
748 // Make sure it's legal to commute operands for VOP2. | |
520 if (isVOP2(MI->getOpcode()) && | 749 if (isVOP2(MI->getOpcode()) && |
521 (!isOperandLegal(MI, 1, &MI->getOperand(2)) || | 750 (!isOperandLegal(MI, Src0Idx, &Src1) || |
522 !isOperandLegal(MI, 2, &MI->getOperand(1)))) | 751 !isOperandLegal(MI, Src1Idx, &Src0))) { |
523 return nullptr; | 752 return nullptr; |
524 | 753 } |
525 if (!MI->getOperand(2).isReg()) { | 754 |
526 // XXX: Commute instructions with FPImm operands | 755 if (!Src1.isReg()) { |
527 if (NewMI || MI->getOperand(2).isFPImm() || | 756 // Allow commuting instructions with Imm operands. |
757 if (NewMI || !Src1.isImm() || | |
528 (!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode()))) { | 758 (!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode()))) { |
529 return nullptr; | 759 return nullptr; |
530 } | 760 } |
531 | 761 |
532 // XXX: Commute VOP3 instructions with abs and neg set . | 762 // Be sure to copy the source modifiers to the right place. |
533 const MachineOperand *Abs = getNamedOperand(*MI, AMDGPU::OpName::abs); | 763 if (MachineOperand *Src0Mods |
534 const MachineOperand *Neg = getNamedOperand(*MI, AMDGPU::OpName::neg); | 764 = getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) { |
535 const MachineOperand *Src0Mods = getNamedOperand(*MI, | 765 MachineOperand *Src1Mods |
536 AMDGPU::OpName::src0_modifiers); | 766 = getNamedOperand(*MI, AMDGPU::OpName::src1_modifiers); |
537 const MachineOperand *Src1Mods = getNamedOperand(*MI, | 767 |
538 AMDGPU::OpName::src1_modifiers); | 768 int Src0ModsVal = Src0Mods->getImm(); |
539 const MachineOperand *Src2Mods = getNamedOperand(*MI, | 769 if (!Src1Mods && Src0ModsVal != 0) |
540 AMDGPU::OpName::src2_modifiers); | 770 return nullptr; |
541 | 771 |
542 if ((Abs && Abs->getImm()) || (Neg && Neg->getImm()) || | 772 // XXX - This assert might be a lie. It might be useful to have a neg |
543 (Src0Mods && Src0Mods->getImm()) || (Src1Mods && Src1Mods->getImm()) || | 773 // modifier with 0.0. |
544 (Src2Mods && Src2Mods->getImm())) | 774 int Src1ModsVal = Src1Mods->getImm(); |
545 return nullptr; | 775 assert((Src1ModsVal == 0) && "Not expecting modifiers with immediates"); |
546 | 776 |
547 unsigned Reg = MI->getOperand(1).getReg(); | 777 Src1Mods->setImm(Src0ModsVal); |
548 unsigned SubReg = MI->getOperand(1).getSubReg(); | 778 Src0Mods->setImm(Src1ModsVal); |
549 MI->getOperand(1).ChangeToImmediate(MI->getOperand(2).getImm()); | 779 } |
550 MI->getOperand(2).ChangeToRegister(Reg, false); | 780 |
551 MI->getOperand(2).setSubReg(SubReg); | 781 unsigned Reg = Src0.getReg(); |
782 unsigned SubReg = Src0.getSubReg(); | |
783 if (Src1.isImm()) | |
784 Src0.ChangeToImmediate(Src1.getImm()); | |
785 else | |
786 llvm_unreachable("Should only have immediates"); | |
787 | |
788 Src1.ChangeToRegister(Reg, false); | |
789 Src1.setSubReg(SubReg); | |
552 } else { | 790 } else { |
553 MI = TargetInstrInfo::commuteInstruction(MI, NewMI); | 791 MI = TargetInstrInfo::commuteInstruction(MI, NewMI); |
554 } | 792 } |
555 | 793 |
556 if (MI) | 794 if (MI) |
557 MI->setDesc(get(commuteOpcode(MI->getOpcode()))); | 795 MI->setDesc(get(commuteOpcode(MI->getOpcode()))); |
558 | 796 |
559 return MI; | 797 return MI; |
798 } | |
799 | |
800 // This needs to be implemented because the source modifiers may be inserted | |
801 // between the true commutable operands, and the base | |
802 // TargetInstrInfo::commuteInstruction uses it. | |
803 bool SIInstrInfo::findCommutedOpIndices(MachineInstr *MI, | |
804 unsigned &SrcOpIdx1, | |
805 unsigned &SrcOpIdx2) const { | |
806 const MCInstrDesc &MCID = MI->getDesc(); | |
807 if (!MCID.isCommutable()) | |
808 return false; | |
809 | |
810 unsigned Opc = MI->getOpcode(); | |
811 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); | |
812 if (Src0Idx == -1) | |
813 return false; | |
814 | |
815 // FIXME: Workaround TargetInstrInfo::commuteInstruction asserting on | |
816 // immediate. | |
817 if (!MI->getOperand(Src0Idx).isReg()) | |
818 return false; | |
819 | |
820 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); | |
821 if (Src1Idx == -1) | |
822 return false; | |
823 | |
824 if (!MI->getOperand(Src1Idx).isReg()) | |
825 return false; | |
826 | |
827 // If any source modifiers are set, the generic instruction commuting won't | |
828 // understand how to copy the source modifiers. | |
829 if (hasModifiersSet(*MI, AMDGPU::OpName::src0_modifiers) || | |
830 hasModifiersSet(*MI, AMDGPU::OpName::src1_modifiers)) | |
831 return false; | |
832 | |
833 SrcOpIdx1 = Src0Idx; | |
834 SrcOpIdx2 = Src1Idx; | |
835 return true; | |
560 } | 836 } |
561 | 837 |
562 MachineInstr *SIInstrInfo::buildMovInstr(MachineBasicBlock *MBB, | 838 MachineInstr *SIInstrInfo::buildMovInstr(MachineBasicBlock *MBB, |
563 MachineBasicBlock::iterator I, | 839 MachineBasicBlock::iterator I, |
564 unsigned DstReg, | 840 unsigned DstReg, |
593 case AMDGPU::V_MOV_B32_e32: | 869 case AMDGPU::V_MOV_B32_e32: |
594 return MI->getOperand(1).isImm(); | 870 return MI->getOperand(1).isImm(); |
595 } | 871 } |
596 } | 872 } |
597 | 873 |
598 namespace llvm { | 874 static bool offsetsDoNotOverlap(int WidthA, int OffsetA, |
599 namespace AMDGPU { | 875 int WidthB, int OffsetB) { |
600 // Helper function generated by tablegen. We are wrapping this with | 876 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB; |
601 // an SIInstrInfo function that returns bool rather than int. | 877 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA; |
602 int isDS(uint16_t Opcode); | 878 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; |
603 } | 879 return LowOffset + LowWidth <= HighOffset; |
604 } | 880 } |
605 | 881 |
606 bool SIInstrInfo::isDS(uint16_t Opcode) const { | 882 bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr *MIa, |
607 return ::AMDGPU::isDS(Opcode) != -1; | 883 MachineInstr *MIb) const { |
608 } | 884 unsigned BaseReg0, Offset0; |
609 | 885 unsigned BaseReg1, Offset1; |
610 bool SIInstrInfo::isMIMG(uint16_t Opcode) const { | 886 |
611 return get(Opcode).TSFlags & SIInstrFlags::MIMG; | 887 if (getLdStBaseRegImmOfs(MIa, BaseReg0, Offset0, &RI) && |
612 } | 888 getLdStBaseRegImmOfs(MIb, BaseReg1, Offset1, &RI)) { |
613 | 889 assert(MIa->hasOneMemOperand() && MIb->hasOneMemOperand() && |
614 bool SIInstrInfo::isSMRD(uint16_t Opcode) const { | 890 "read2 / write2 not expected here yet"); |
615 return get(Opcode).TSFlags & SIInstrFlags::SMRD; | 891 unsigned Width0 = (*MIa->memoperands_begin())->getSize(); |
616 } | 892 unsigned Width1 = (*MIb->memoperands_begin())->getSize(); |
617 | 893 if (BaseReg0 == BaseReg1 && |
618 bool SIInstrInfo::isMUBUF(uint16_t Opcode) const { | 894 offsetsDoNotOverlap(Width0, Offset0, Width1, Offset1)) { |
619 return get(Opcode).TSFlags & SIInstrFlags::MUBUF; | 895 return true; |
620 } | 896 } |
621 | 897 } |
622 bool SIInstrInfo::isMTBUF(uint16_t Opcode) const { | 898 |
623 return get(Opcode).TSFlags & SIInstrFlags::MTBUF; | 899 return false; |
624 } | 900 } |
625 | 901 |
626 bool SIInstrInfo::isVOP1(uint16_t Opcode) const { | 902 bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa, |
627 return get(Opcode).TSFlags & SIInstrFlags::VOP1; | 903 MachineInstr *MIb, |
628 } | 904 AliasAnalysis *AA) const { |
629 | 905 unsigned Opc0 = MIa->getOpcode(); |
630 bool SIInstrInfo::isVOP2(uint16_t Opcode) const { | 906 unsigned Opc1 = MIb->getOpcode(); |
631 return get(Opcode).TSFlags & SIInstrFlags::VOP2; | 907 |
632 } | 908 assert(MIa && (MIa->mayLoad() || MIa->mayStore()) && |
633 | 909 "MIa must load from or modify a memory location"); |
634 bool SIInstrInfo::isVOP3(uint16_t Opcode) const { | 910 assert(MIb && (MIb->mayLoad() || MIb->mayStore()) && |
635 return get(Opcode).TSFlags & SIInstrFlags::VOP3; | 911 "MIb must load from or modify a memory location"); |
636 } | 912 |
637 | 913 if (MIa->hasUnmodeledSideEffects() || MIb->hasUnmodeledSideEffects()) |
638 bool SIInstrInfo::isVOPC(uint16_t Opcode) const { | 914 return false; |
639 return get(Opcode).TSFlags & SIInstrFlags::VOPC; | 915 |
640 } | 916 // XXX - Can we relax this between address spaces? |
641 | 917 if (MIa->hasOrderedMemoryRef() || MIb->hasOrderedMemoryRef()) |
642 bool SIInstrInfo::isSALUInstr(const MachineInstr &MI) const { | 918 return false; |
643 return get(MI.getOpcode()).TSFlags & SIInstrFlags::SALU; | 919 |
920 // TODO: Should we check the address space from the MachineMemOperand? That | |
921 // would allow us to distinguish objects we know don't alias based on the | |
922 // underlying addres space, even if it was lowered to a different one, | |
923 // e.g. private accesses lowered to use MUBUF instructions on a scratch | |
924 // buffer. | |
925 if (isDS(Opc0)) { | |
926 if (isDS(Opc1)) | |
927 return checkInstOffsetsDoNotOverlap(MIa, MIb); | |
928 | |
929 return !isFLAT(Opc1); | |
930 } | |
931 | |
932 if (isMUBUF(Opc0) || isMTBUF(Opc0)) { | |
933 if (isMUBUF(Opc1) || isMTBUF(Opc1)) | |
934 return checkInstOffsetsDoNotOverlap(MIa, MIb); | |
935 | |
936 return !isFLAT(Opc1) && !isSMRD(Opc1); | |
937 } | |
938 | |
939 if (isSMRD(Opc0)) { | |
940 if (isSMRD(Opc1)) | |
941 return checkInstOffsetsDoNotOverlap(MIa, MIb); | |
942 | |
943 return !isFLAT(Opc1) && !isMUBUF(Opc0) && !isMTBUF(Opc0); | |
944 } | |
945 | |
946 if (isFLAT(Opc0)) { | |
947 if (isFLAT(Opc1)) | |
948 return checkInstOffsetsDoNotOverlap(MIa, MIb); | |
949 | |
950 return false; | |
951 } | |
952 | |
953 return false; | |
644 } | 954 } |
645 | 955 |
646 bool SIInstrInfo::isInlineConstant(const APInt &Imm) const { | 956 bool SIInstrInfo::isInlineConstant(const APInt &Imm) const { |
647 int32_t Val = Imm.getSExtValue(); | 957 int64_t SVal = Imm.getSExtValue(); |
648 if (Val >= -16 && Val <= 64) | 958 if (SVal >= -16 && SVal <= 64) |
649 return true; | 959 return true; |
960 | |
961 if (Imm.getBitWidth() == 64) { | |
962 uint64_t Val = Imm.getZExtValue(); | |
963 return (DoubleToBits(0.0) == Val) || | |
964 (DoubleToBits(1.0) == Val) || | |
965 (DoubleToBits(-1.0) == Val) || | |
966 (DoubleToBits(0.5) == Val) || | |
967 (DoubleToBits(-0.5) == Val) || | |
968 (DoubleToBits(2.0) == Val) || | |
969 (DoubleToBits(-2.0) == Val) || | |
970 (DoubleToBits(4.0) == Val) || | |
971 (DoubleToBits(-4.0) == Val); | |
972 } | |
650 | 973 |
651 // The actual type of the operand does not seem to matter as long | 974 // The actual type of the operand does not seem to matter as long |
652 // as the bits match one of the inline immediate values. For example: | 975 // as the bits match one of the inline immediate values. For example: |
653 // | 976 // |
654 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal, | 977 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal, |
655 // so it is a legal inline immediate. | 978 // so it is a legal inline immediate. |
656 // | 979 // |
657 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in | 980 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in |
658 // floating-point, so it is a legal inline immediate. | 981 // floating-point, so it is a legal inline immediate. |
659 | 982 uint32_t Val = Imm.getZExtValue(); |
660 return (APInt::floatToBits(0.0f) == Imm) || | 983 |
661 (APInt::floatToBits(1.0f) == Imm) || | 984 return (FloatToBits(0.0f) == Val) || |
662 (APInt::floatToBits(-1.0f) == Imm) || | 985 (FloatToBits(1.0f) == Val) || |
663 (APInt::floatToBits(0.5f) == Imm) || | 986 (FloatToBits(-1.0f) == Val) || |
664 (APInt::floatToBits(-0.5f) == Imm) || | 987 (FloatToBits(0.5f) == Val) || |
665 (APInt::floatToBits(2.0f) == Imm) || | 988 (FloatToBits(-0.5f) == Val) || |
666 (APInt::floatToBits(-2.0f) == Imm) || | 989 (FloatToBits(2.0f) == Val) || |
667 (APInt::floatToBits(4.0f) == Imm) || | 990 (FloatToBits(-2.0f) == Val) || |
668 (APInt::floatToBits(-4.0f) == Imm); | 991 (FloatToBits(4.0f) == Val) || |
669 } | 992 (FloatToBits(-4.0f) == Val); |
670 | 993 } |
671 bool SIInstrInfo::isInlineConstant(const MachineOperand &MO) const { | 994 |
672 if (MO.isImm()) | 995 bool SIInstrInfo::isInlineConstant(const MachineOperand &MO, |
673 return isInlineConstant(APInt(32, MO.getImm(), true)); | 996 unsigned OpSize) const { |
674 | 997 if (MO.isImm()) { |
675 if (MO.isFPImm()) { | 998 // MachineOperand provides no way to tell the true operand size, since it |
676 APFloat FpImm = MO.getFPImm()->getValueAPF(); | 999 // only records a 64-bit value. We need to know the size to determine if a |
677 return isInlineConstant(FpImm.bitcastToAPInt()); | 1000 // 32-bit floating point immediate bit pattern is legal for an integer |
1001 // immediate. It would be for any 32-bit integer operand, but would not be | |
1002 // for a 64-bit one. | |
1003 | |
1004 unsigned BitSize = 8 * OpSize; | |
1005 return isInlineConstant(APInt(BitSize, MO.getImm(), true)); | |
678 } | 1006 } |
679 | 1007 |
680 return false; | 1008 return false; |
681 } | 1009 } |
682 | 1010 |
683 bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO) const { | 1011 bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO, |
684 return (MO.isImm() || MO.isFPImm()) && !isInlineConstant(MO); | 1012 unsigned OpSize) const { |
1013 return MO.isImm() && !isInlineConstant(MO, OpSize); | |
685 } | 1014 } |
686 | 1015 |
687 static bool compareMachineOp(const MachineOperand &Op0, | 1016 static bool compareMachineOp(const MachineOperand &Op0, |
688 const MachineOperand &Op1) { | 1017 const MachineOperand &Op1) { |
689 if (Op0.getType() != Op1.getType()) | 1018 if (Op0.getType() != Op1.getType()) |
692 switch (Op0.getType()) { | 1021 switch (Op0.getType()) { |
693 case MachineOperand::MO_Register: | 1022 case MachineOperand::MO_Register: |
694 return Op0.getReg() == Op1.getReg(); | 1023 return Op0.getReg() == Op1.getReg(); |
695 case MachineOperand::MO_Immediate: | 1024 case MachineOperand::MO_Immediate: |
696 return Op0.getImm() == Op1.getImm(); | 1025 return Op0.getImm() == Op1.getImm(); |
697 case MachineOperand::MO_FPImmediate: | |
698 return Op0.getFPImm() == Op1.getFPImm(); | |
699 default: | 1026 default: |
700 llvm_unreachable("Didn't expect to be comparing these operand types"); | 1027 llvm_unreachable("Didn't expect to be comparing these operand types"); |
701 } | 1028 } |
702 } | 1029 } |
703 | 1030 |
704 bool SIInstrInfo::isImmOperandLegal(const MachineInstr *MI, unsigned OpNo, | 1031 bool SIInstrInfo::isImmOperandLegal(const MachineInstr *MI, unsigned OpNo, |
705 const MachineOperand &MO) const { | 1032 const MachineOperand &MO) const { |
706 const MCOperandInfo &OpInfo = get(MI->getOpcode()).OpInfo[OpNo]; | 1033 const MCOperandInfo &OpInfo = get(MI->getOpcode()).OpInfo[OpNo]; |
707 | 1034 |
708 assert(MO.isImm() || MO.isFPImm()); | 1035 assert(MO.isImm() || MO.isTargetIndex() || MO.isFI()); |
709 | 1036 |
710 if (OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE) | 1037 if (OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE) |
711 return true; | 1038 return true; |
712 | 1039 |
713 if (OpInfo.RegClass < 0) | 1040 if (OpInfo.RegClass < 0) |
714 return false; | 1041 return false; |
715 | 1042 |
716 return RI.regClassCanUseImmediate(OpInfo.RegClass); | 1043 unsigned OpSize = RI.getRegClass(OpInfo.RegClass)->getSize(); |
717 } | 1044 if (isLiteralConstant(MO, OpSize)) |
718 | 1045 return RI.opCanUseLiteralConstant(OpInfo.OperandType); |
719 bool SIInstrInfo::canFoldOffset(unsigned OffsetSize, unsigned AS) { | 1046 |
1047 return RI.opCanUseInlineConstant(OpInfo.OperandType); | |
1048 } | |
1049 | |
1050 bool SIInstrInfo::canFoldOffset(unsigned OffsetSize, unsigned AS) const { | |
720 switch (AS) { | 1051 switch (AS) { |
721 case AMDGPUAS::GLOBAL_ADDRESS: { | 1052 case AMDGPUAS::GLOBAL_ADDRESS: { |
722 // MUBUF instructions a 12-bit offset in bytes. | 1053 // MUBUF instructions a 12-bit offset in bytes. |
723 return isUInt<12>(OffsetSize); | 1054 return isUInt<12>(OffsetSize); |
724 } | 1055 } |
725 case AMDGPUAS::CONSTANT_ADDRESS: { | 1056 case AMDGPUAS::CONSTANT_ADDRESS: { |
726 // SMRD instructions have an 8-bit offset in dwords. | 1057 // SMRD instructions have an 8-bit offset in dwords on SI and |
727 return (OffsetSize % 4 == 0) && isUInt<8>(OffsetSize / 4); | 1058 // a 20-bit offset in bytes on VI. |
1059 if (RI.ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) | |
1060 return isUInt<20>(OffsetSize); | |
1061 else | |
1062 return (OffsetSize % 4 == 0) && isUInt<8>(OffsetSize / 4); | |
728 } | 1063 } |
729 case AMDGPUAS::LOCAL_ADDRESS: | 1064 case AMDGPUAS::LOCAL_ADDRESS: |
730 case AMDGPUAS::REGION_ADDRESS: { | 1065 case AMDGPUAS::REGION_ADDRESS: { |
731 // The single offset versions have a 16-bit offset in bytes. | 1066 // The single offset versions have a 16-bit offset in bytes. |
732 return isUInt<16>(OffsetSize); | 1067 return isUInt<16>(OffsetSize); |
737 return 0; | 1072 return 0; |
738 } | 1073 } |
739 } | 1074 } |
740 | 1075 |
741 bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const { | 1076 bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const { |
742 return AMDGPU::getVOPe32(Opcode) != -1; | 1077 int Op32 = AMDGPU::getVOPe32(Opcode); |
1078 if (Op32 == -1) | |
1079 return false; | |
1080 | |
1081 return pseudoToMCOpcode(Op32) != -1; | |
743 } | 1082 } |
744 | 1083 |
745 bool SIInstrInfo::hasModifiers(unsigned Opcode) const { | 1084 bool SIInstrInfo::hasModifiers(unsigned Opcode) const { |
746 // The src0_modifier operand is present on all instructions | 1085 // The src0_modifier operand is present on all instructions |
747 // that have modifiers. | 1086 // that have modifiers. |
748 | 1087 |
749 return AMDGPU::getNamedOperandIdx(Opcode, | 1088 return AMDGPU::getNamedOperandIdx(Opcode, |
750 AMDGPU::OpName::src0_modifiers) != -1; | 1089 AMDGPU::OpName::src0_modifiers) != -1; |
751 } | 1090 } |
752 | 1091 |
1092 bool SIInstrInfo::hasModifiersSet(const MachineInstr &MI, | |
1093 unsigned OpName) const { | |
1094 const MachineOperand *Mods = getNamedOperand(MI, OpName); | |
1095 return Mods && Mods->getImm(); | |
1096 } | |
1097 | |
1098 bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI, | |
1099 const MachineOperand &MO, | |
1100 unsigned OpSize) const { | |
1101 // Literal constants use the constant bus. | |
1102 if (isLiteralConstant(MO, OpSize)) | |
1103 return true; | |
1104 | |
1105 if (!MO.isReg() || !MO.isUse()) | |
1106 return false; | |
1107 | |
1108 if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) | |
1109 return RI.isSGPRClass(MRI.getRegClass(MO.getReg())); | |
1110 | |
1111 // FLAT_SCR is just an SGPR pair. | |
1112 if (!MO.isImplicit() && (MO.getReg() == AMDGPU::FLAT_SCR)) | |
1113 return true; | |
1114 | |
1115 // EXEC register uses the constant bus. | |
1116 if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC) | |
1117 return true; | |
1118 | |
1119 // SGPRs use the constant bus | |
1120 if (MO.getReg() == AMDGPU::M0 || MO.getReg() == AMDGPU::VCC || | |
1121 (!MO.isImplicit() && | |
1122 (AMDGPU::SGPR_32RegClass.contains(MO.getReg()) || | |
1123 AMDGPU::SGPR_64RegClass.contains(MO.getReg())))) { | |
1124 return true; | |
1125 } | |
1126 | |
1127 return false; | |
1128 } | |
1129 | |
753 bool SIInstrInfo::verifyInstruction(const MachineInstr *MI, | 1130 bool SIInstrInfo::verifyInstruction(const MachineInstr *MI, |
754 StringRef &ErrInfo) const { | 1131 StringRef &ErrInfo) const { |
755 uint16_t Opcode = MI->getOpcode(); | 1132 uint16_t Opcode = MI->getOpcode(); |
1133 const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); | |
756 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); | 1134 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); |
757 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); | 1135 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); |
758 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); | 1136 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); |
759 | 1137 |
760 // Make sure the number of operands is correct. | 1138 // Make sure the number of operands is correct. |
765 return false; | 1143 return false; |
766 } | 1144 } |
767 | 1145 |
768 // Make sure the register classes are correct | 1146 // Make sure the register classes are correct |
769 for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) { | 1147 for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) { |
1148 if (MI->getOperand(i).isFPImm()) { | |
1149 ErrInfo = "FPImm Machine Operands are not supported. ISel should bitcast " | |
1150 "all fp values to integers."; | |
1151 return false; | |
1152 } | |
1153 | |
770 switch (Desc.OpInfo[i].OperandType) { | 1154 switch (Desc.OpInfo[i].OperandType) { |
771 case MCOI::OPERAND_REGISTER: { | 1155 case MCOI::OPERAND_REGISTER: |
772 int RegClass = Desc.OpInfo[i].RegClass; | 1156 if (MI->getOperand(i).isImm()) { |
773 if (!RI.regClassCanUseImmediate(RegClass) && | 1157 ErrInfo = "Illegal immediate value for operand."; |
774 (MI->getOperand(i).isImm() || MI->getOperand(i).isFPImm())) { | 1158 return false; |
775 // Handle some special cases: | 1159 } |
776 // Src0 can of VOP1, VOP2, VOPC can be an immediate no matter what | 1160 break; |
777 // the register class. | 1161 case AMDGPU::OPERAND_REG_IMM32: |
778 if (i != Src0Idx || (!isVOP1(Opcode) && !isVOP2(Opcode) && | 1162 break; |
779 !isVOPC(Opcode))) { | 1163 case AMDGPU::OPERAND_REG_INLINE_C: |
780 ErrInfo = "Expected register, but got immediate"; | 1164 if (MI->getOperand(i).isImm()) { |
1165 int RegClass = Desc.OpInfo[i].RegClass; | |
1166 const TargetRegisterClass *RC = RI.getRegClass(RegClass); | |
1167 if (!isInlineConstant(MI->getOperand(i), RC->getSize())) { | |
1168 ErrInfo = "Illegal immediate value for operand."; | |
781 return false; | 1169 return false; |
782 } | 1170 } |
783 } | 1171 } |
784 } | |
785 break; | 1172 break; |
786 case MCOI::OPERAND_IMMEDIATE: | 1173 case MCOI::OPERAND_IMMEDIATE: |
787 // Check if this operand is an immediate. | 1174 // Check if this operand is an immediate. |
788 // FrameIndex operands will be replaced by immediates, so they are | 1175 // FrameIndex operands will be replaced by immediates, so they are |
789 // allowed. | 1176 // allowed. |
790 if (!MI->getOperand(i).isImm() && !MI->getOperand(i).isFPImm() && | 1177 if (!MI->getOperand(i).isImm() && !MI->getOperand(i).isFI()) { |
791 !MI->getOperand(i).isFI()) { | |
792 ErrInfo = "Expected immediate, but got non-immediate"; | 1178 ErrInfo = "Expected immediate, but got non-immediate"; |
793 return false; | 1179 return false; |
794 } | 1180 } |
795 // Fall-through | 1181 // Fall-through |
796 default: | 1182 default: |
815 } | 1201 } |
816 | 1202 |
817 | 1203 |
818 // Verify VOP* | 1204 // Verify VOP* |
819 if (isVOP1(Opcode) || isVOP2(Opcode) || isVOP3(Opcode) || isVOPC(Opcode)) { | 1205 if (isVOP1(Opcode) || isVOP2(Opcode) || isVOP3(Opcode) || isVOPC(Opcode)) { |
1206 // Only look at the true operands. Only a real operand can use the constant | |
1207 // bus, and we don't want to check pseudo-operands like the source modifier | |
1208 // flags. | |
1209 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; | |
1210 | |
820 unsigned ConstantBusCount = 0; | 1211 unsigned ConstantBusCount = 0; |
821 unsigned SGPRUsed = AMDGPU::NoRegister; | 1212 unsigned SGPRUsed = AMDGPU::NoRegister; |
822 for (int i = 0, e = MI->getNumOperands(); i != e; ++i) { | 1213 for (int OpIdx : OpIndices) { |
823 const MachineOperand &MO = MI->getOperand(i); | 1214 if (OpIdx == -1) |
824 if (MO.isReg() && MO.isUse() && | 1215 break; |
825 !TargetRegisterInfo::isVirtualRegister(MO.getReg())) { | 1216 const MachineOperand &MO = MI->getOperand(OpIdx); |
826 | 1217 if (usesConstantBus(MRI, MO, getOpSize(Opcode, OpIdx))) { |
827 // EXEC register uses the constant bus. | 1218 if (MO.isReg()) { |
828 if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC) | 1219 if (MO.getReg() != SGPRUsed) |
1220 ++ConstantBusCount; | |
1221 SGPRUsed = MO.getReg(); | |
1222 } else { | |
829 ++ConstantBusCount; | 1223 ++ConstantBusCount; |
830 | |
831 // SGPRs use the constant bus | |
832 if (MO.getReg() == AMDGPU::M0 || MO.getReg() == AMDGPU::VCC || | |
833 (!MO.isImplicit() && | |
834 (AMDGPU::SGPR_32RegClass.contains(MO.getReg()) || | |
835 AMDGPU::SGPR_64RegClass.contains(MO.getReg())))) { | |
836 if (SGPRUsed != MO.getReg()) { | |
837 ++ConstantBusCount; | |
838 SGPRUsed = MO.getReg(); | |
839 } | |
840 } | 1224 } |
841 } | 1225 } |
842 // Literal constants use the constant bus. | |
843 if (isLiteralConstant(MO)) | |
844 ++ConstantBusCount; | |
845 } | 1226 } |
846 if (ConstantBusCount > 1) { | 1227 if (ConstantBusCount > 1) { |
847 ErrInfo = "VOP* instruction uses the constant bus more than once"; | 1228 ErrInfo = "VOP* instruction uses the constant bus more than once"; |
848 return false; | 1229 return false; |
849 } | 1230 } |
850 } | 1231 } |
851 | 1232 |
852 // Verify SRC1 for VOP2 and VOPC | 1233 // Verify SRC1 for VOP2 and VOPC |
853 if (Src1Idx != -1 && (isVOP2(Opcode) || isVOPC(Opcode))) { | 1234 if (Src1Idx != -1 && (isVOP2(Opcode) || isVOPC(Opcode))) { |
854 const MachineOperand &Src1 = MI->getOperand(Src1Idx); | 1235 const MachineOperand &Src1 = MI->getOperand(Src1Idx); |
855 if (Src1.isImm() || Src1.isFPImm()) { | 1236 if (Src1.isImm()) { |
856 ErrInfo = "VOP[2C] src1 cannot be an immediate."; | 1237 ErrInfo = "VOP[2C] src1 cannot be an immediate."; |
857 return false; | 1238 return false; |
858 } | 1239 } |
859 } | 1240 } |
860 | 1241 |
861 // Verify VOP3 | 1242 // Verify VOP3 |
862 if (isVOP3(Opcode)) { | 1243 if (isVOP3(Opcode)) { |
863 if (Src0Idx != -1 && isLiteralConstant(MI->getOperand(Src0Idx))) { | 1244 if (Src0Idx != -1 && |
1245 isLiteralConstant(MI->getOperand(Src0Idx), getOpSize(Opcode, Src0Idx))) { | |
864 ErrInfo = "VOP3 src0 cannot be a literal constant."; | 1246 ErrInfo = "VOP3 src0 cannot be a literal constant."; |
865 return false; | 1247 return false; |
866 } | 1248 } |
867 if (Src1Idx != -1 && isLiteralConstant(MI->getOperand(Src1Idx))) { | 1249 if (Src1Idx != -1 && |
1250 isLiteralConstant(MI->getOperand(Src1Idx), getOpSize(Opcode, Src1Idx))) { | |
868 ErrInfo = "VOP3 src1 cannot be a literal constant."; | 1251 ErrInfo = "VOP3 src1 cannot be a literal constant."; |
869 return false; | 1252 return false; |
870 } | 1253 } |
871 if (Src2Idx != -1 && isLiteralConstant(MI->getOperand(Src2Idx))) { | 1254 if (Src2Idx != -1 && |
1255 isLiteralConstant(MI->getOperand(Src2Idx), getOpSize(Opcode, Src2Idx))) { | |
872 ErrInfo = "VOP3 src2 cannot be a literal constant."; | 1256 ErrInfo = "VOP3 src2 cannot be a literal constant."; |
873 return false; | 1257 return false; |
874 } | 1258 } |
875 } | 1259 } |
876 | 1260 |
877 // Verify misc. restrictions on specific instructions. | 1261 // Verify misc. restrictions on specific instructions. |
878 if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 || | 1262 if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 || |
879 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) { | 1263 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) { |
880 MI->dump(); | 1264 const MachineOperand &Src0 = MI->getOperand(Src0Idx); |
881 | 1265 const MachineOperand &Src1 = MI->getOperand(Src1Idx); |
882 const MachineOperand &Src0 = MI->getOperand(2); | 1266 const MachineOperand &Src2 = MI->getOperand(Src2Idx); |
883 const MachineOperand &Src1 = MI->getOperand(3); | |
884 const MachineOperand &Src2 = MI->getOperand(4); | |
885 if (Src0.isReg() && Src1.isReg() && Src2.isReg()) { | 1267 if (Src0.isReg() && Src1.isReg() && Src2.isReg()) { |
886 if (!compareMachineOp(Src0, Src1) && | 1268 if (!compareMachineOp(Src0, Src1) && |
887 !compareMachineOp(Src0, Src2)) { | 1269 !compareMachineOp(Src0, Src2)) { |
888 ErrInfo = "v_div_scale_{f32|f64} require src0 = src1 or src2"; | 1270 ErrInfo = "v_div_scale_{f32|f64} require src0 = src1 or src2"; |
889 return false; | 1271 return false; |
941 case AMDGPU::S_LOAD_DWORD_SGPR: return AMDGPU::BUFFER_LOAD_DWORD_ADDR64; | 1323 case AMDGPU::S_LOAD_DWORD_SGPR: return AMDGPU::BUFFER_LOAD_DWORD_ADDR64; |
942 case AMDGPU::S_LOAD_DWORDX2_IMM: | 1324 case AMDGPU::S_LOAD_DWORDX2_IMM: |
943 case AMDGPU::S_LOAD_DWORDX2_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64; | 1325 case AMDGPU::S_LOAD_DWORDX2_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64; |
944 case AMDGPU::S_LOAD_DWORDX4_IMM: | 1326 case AMDGPU::S_LOAD_DWORDX4_IMM: |
945 case AMDGPU::S_LOAD_DWORDX4_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64; | 1327 case AMDGPU::S_LOAD_DWORDX4_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64; |
946 case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e32; | 1328 case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64; |
947 case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32; | 1329 case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32; |
948 case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32; | 1330 case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32; |
949 } | 1331 } |
950 } | 1332 } |
951 | 1333 |
956 const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI, | 1338 const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI, |
957 unsigned OpNo) const { | 1339 unsigned OpNo) const { |
958 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); | 1340 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); |
959 const MCInstrDesc &Desc = get(MI.getOpcode()); | 1341 const MCInstrDesc &Desc = get(MI.getOpcode()); |
960 if (MI.isVariadic() || OpNo >= Desc.getNumOperands() || | 1342 if (MI.isVariadic() || OpNo >= Desc.getNumOperands() || |
961 Desc.OpInfo[OpNo].RegClass == -1) | 1343 Desc.OpInfo[OpNo].RegClass == -1) { |
962 return MRI.getRegClass(MI.getOperand(OpNo).getReg()); | 1344 unsigned Reg = MI.getOperand(OpNo).getReg(); |
1345 | |
1346 if (TargetRegisterInfo::isVirtualRegister(Reg)) | |
1347 return MRI.getRegClass(Reg); | |
1348 return RI.getPhysRegClass(Reg); | |
1349 } | |
963 | 1350 |
964 unsigned RCID = Desc.OpInfo[OpNo].RegClass; | 1351 unsigned RCID = Desc.OpInfo[OpNo].RegClass; |
965 return RI.getRegClass(RCID); | 1352 return RI.getRegClass(RCID); |
966 } | 1353 } |
967 | 1354 |
977 } | 1364 } |
978 } | 1365 } |
979 | 1366 |
980 void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const { | 1367 void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const { |
981 MachineBasicBlock::iterator I = MI; | 1368 MachineBasicBlock::iterator I = MI; |
1369 MachineBasicBlock *MBB = MI->getParent(); | |
982 MachineOperand &MO = MI->getOperand(OpIdx); | 1370 MachineOperand &MO = MI->getOperand(OpIdx); |
983 MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); | 1371 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); |
984 unsigned RCID = get(MI->getOpcode()).OpInfo[OpIdx].RegClass; | 1372 unsigned RCID = get(MI->getOpcode()).OpInfo[OpIdx].RegClass; |
985 const TargetRegisterClass *RC = RI.getRegClass(RCID); | 1373 const TargetRegisterClass *RC = RI.getRegClass(RCID); |
986 unsigned Opcode = AMDGPU::V_MOV_B32_e32; | 1374 unsigned Opcode = AMDGPU::V_MOV_B32_e32; |
987 if (MO.isReg()) { | 1375 if (MO.isReg()) |
988 Opcode = AMDGPU::COPY; | 1376 Opcode = AMDGPU::COPY; |
989 } else if (RI.isSGPRClass(RC)) { | 1377 else if (RI.isSGPRClass(RC)) |
990 Opcode = AMDGPU::S_MOV_B32; | 1378 Opcode = AMDGPU::S_MOV_B32; |
991 } | 1379 |
992 | 1380 |
993 const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC); | 1381 const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC); |
994 if (RI.getCommonSubClass(&AMDGPU::VReg_64RegClass, VRC)) { | 1382 if (RI.getCommonSubClass(&AMDGPU::VReg_64RegClass, VRC)) |
995 VRC = &AMDGPU::VReg_64RegClass; | 1383 VRC = &AMDGPU::VReg_64RegClass; |
996 } else { | 1384 else |
997 VRC = &AMDGPU::VReg_32RegClass; | 1385 VRC = &AMDGPU::VGPR_32RegClass; |
998 } | 1386 |
999 unsigned Reg = MRI.createVirtualRegister(VRC); | 1387 unsigned Reg = MRI.createVirtualRegister(VRC); |
1000 BuildMI(*MI->getParent(), I, MI->getParent()->findDebugLoc(I), get(Opcode), | 1388 DebugLoc DL = MBB->findDebugLoc(I); |
1001 Reg).addOperand(MO); | 1389 BuildMI(*MI->getParent(), I, DL, get(Opcode), Reg) |
1390 .addOperand(MO); | |
1002 MO.ChangeToRegister(Reg, false); | 1391 MO.ChangeToRegister(Reg, false); |
1003 } | 1392 } |
1004 | 1393 |
1005 unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI, | 1394 unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI, |
1006 MachineRegisterInfo &MRI, | 1395 MachineRegisterInfo &MRI, |
1016 | 1405 |
1017 // Just in case the super register is itself a sub-register, copy it to a new | 1406 // Just in case the super register is itself a sub-register, copy it to a new |
1018 // value so we don't need to worry about merging its subreg index with the | 1407 // value so we don't need to worry about merging its subreg index with the |
1019 // SubIdx passed to this function. The register coalescer should be able to | 1408 // SubIdx passed to this function. The register coalescer should be able to |
1020 // eliminate this extra copy. | 1409 // eliminate this extra copy. |
1021 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(TargetOpcode::COPY), | 1410 MachineBasicBlock *MBB = MI->getParent(); |
1022 NewSuperReg) | 1411 DebugLoc DL = MI->getDebugLoc(); |
1023 .addOperand(SuperReg); | 1412 |
1024 | 1413 BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg) |
1025 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(TargetOpcode::COPY), | 1414 .addReg(SuperReg.getReg(), 0, SuperReg.getSubReg()); |
1026 SubReg) | 1415 |
1027 .addReg(NewSuperReg, 0, SubIdx); | 1416 BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg) |
1417 .addReg(NewSuperReg, 0, SubIdx); | |
1418 | |
1028 return SubReg; | 1419 return SubReg; |
1029 } | 1420 } |
1030 | 1421 |
1031 MachineOperand SIInstrInfo::buildExtractSubRegOrImm( | 1422 MachineOperand SIInstrInfo::buildExtractSubRegOrImm( |
1032 MachineBasicBlock::iterator MII, | 1423 MachineBasicBlock::iterator MII, |
1078 Worklist.push_back(Hi); | 1469 Worklist.push_back(Hi); |
1079 | 1470 |
1080 return Dst; | 1471 return Dst; |
1081 } | 1472 } |
1082 | 1473 |
1474 // Change the order of operands from (0, 1, 2) to (0, 2, 1) | |
1475 void SIInstrInfo::swapOperands(MachineBasicBlock::iterator Inst) const { | |
1476 assert(Inst->getNumExplicitOperands() == 3); | |
1477 MachineOperand Op1 = Inst->getOperand(1); | |
1478 Inst->RemoveOperand(1); | |
1479 Inst->addOperand(Op1); | |
1480 } | |
1481 | |
1083 bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx, | 1482 bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx, |
1084 const MachineOperand *MO) const { | 1483 const MachineOperand *MO) const { |
1085 const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); | 1484 const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); |
1086 const MCInstrDesc &InstDesc = get(MI->getOpcode()); | 1485 const MCInstrDesc &InstDesc = get(MI->getOpcode()); |
1087 const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx]; | 1486 const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx]; |
1088 const TargetRegisterClass *DefinedRC = | 1487 const TargetRegisterClass *DefinedRC = |
1089 OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) : nullptr; | 1488 OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) : nullptr; |
1090 if (!MO) | 1489 if (!MO) |
1091 MO = &MI->getOperand(OpIdx); | 1490 MO = &MI->getOperand(OpIdx); |
1092 | 1491 |
1492 if (isVALU(InstDesc.Opcode) && | |
1493 usesConstantBus(MRI, *MO, DefinedRC->getSize())) { | |
1494 unsigned SGPRUsed = | |
1495 MO->isReg() ? MO->getReg() : (unsigned)AMDGPU::NoRegister; | |
1496 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { | |
1497 if (i == OpIdx) | |
1498 continue; | |
1499 const MachineOperand &Op = MI->getOperand(i); | |
1500 if (Op.isReg() && Op.getReg() != SGPRUsed && | |
1501 usesConstantBus(MRI, Op, getOpSize(*MI, i))) { | |
1502 return false; | |
1503 } | |
1504 } | |
1505 } | |
1506 | |
1093 if (MO->isReg()) { | 1507 if (MO->isReg()) { |
1094 assert(DefinedRC); | 1508 assert(DefinedRC); |
1095 const TargetRegisterClass *RC = MRI.getRegClass(MO->getReg()); | 1509 const TargetRegisterClass *RC = MRI.getRegClass(MO->getReg()); |
1096 return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)); | 1510 |
1511 // In order to be legal, the common sub-class must be equal to the | |
1512 // class of the current operand. For example: | |
1513 // | |
1514 // v_mov_b32 s0 ; Operand defined as vsrc_32 | |
1515 // ; RI.getCommonSubClass(s0,vsrc_32) = sgpr ; LEGAL | |
1516 // | |
1517 // s_sendmsg 0, s0 ; Operand defined as m0reg | |
1518 // ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL | |
1519 | |
1520 return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC; | |
1097 } | 1521 } |
1098 | 1522 |
1099 | 1523 |
1100 // Handle non-register types that are treated like immediates. | 1524 // Handle non-register types that are treated like immediates. |
1101 assert(MO->isImm() || MO->isFPImm() || MO->isTargetIndex() || MO->isFI()); | 1525 assert(MO->isImm() || MO->isTargetIndex() || MO->isFI()); |
1102 | 1526 |
1103 if (!DefinedRC) | 1527 if (!DefinedRC) { |
1104 // This opperand expects an immediate | 1528 // This operand expects an immediate. |
1105 return true; | 1529 return true; |
1106 | 1530 } |
1107 return RI.regClassCanUseImmediate(DefinedRC); | 1531 |
1532 return isImmOperandLegal(MI, OpIdx, *MO); | |
1108 } | 1533 } |
1109 | 1534 |
1110 void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { | 1535 void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { |
1111 MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); | 1536 MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); |
1112 | 1537 |
1142 } | 1567 } |
1143 | 1568 |
1144 // XXX - Do any VOP3 instructions read VCC? | 1569 // XXX - Do any VOP3 instructions read VCC? |
1145 // Legalize VOP3 | 1570 // Legalize VOP3 |
1146 if (isVOP3(MI->getOpcode())) { | 1571 if (isVOP3(MI->getOpcode())) { |
1147 int VOP3Idx[3] = {Src0Idx, Src1Idx, Src2Idx}; | 1572 int VOP3Idx[3] = { Src0Idx, Src1Idx, Src2Idx }; |
1148 unsigned SGPRReg = AMDGPU::NoRegister; | 1573 |
1574 // Find the one SGPR operand we are allowed to use. | |
1575 unsigned SGPRReg = findUsedSGPR(MI, VOP3Idx); | |
1576 | |
1149 for (unsigned i = 0; i < 3; ++i) { | 1577 for (unsigned i = 0; i < 3; ++i) { |
1150 int Idx = VOP3Idx[i]; | 1578 int Idx = VOP3Idx[i]; |
1151 if (Idx == -1) | 1579 if (Idx == -1) |
1152 continue; | 1580 break; |
1153 MachineOperand &MO = MI->getOperand(Idx); | 1581 MachineOperand &MO = MI->getOperand(Idx); |
1154 | 1582 |
1155 if (MO.isReg()) { | 1583 if (MO.isReg()) { |
1156 if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg()))) | 1584 if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg()))) |
1157 continue; // VGPRs are legal | 1585 continue; // VGPRs are legal |
1161 if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) { | 1589 if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) { |
1162 SGPRReg = MO.getReg(); | 1590 SGPRReg = MO.getReg(); |
1163 // We can use one SGPR in each VOP3 instruction. | 1591 // We can use one SGPR in each VOP3 instruction. |
1164 continue; | 1592 continue; |
1165 } | 1593 } |
1166 } else if (!isLiteralConstant(MO)) { | 1594 } else if (!isLiteralConstant(MO, getOpSize(MI->getOpcode(), Idx))) { |
1167 // If it is not a register and not a literal constant, then it must be | 1595 // If it is not a register and not a literal constant, then it must be |
1168 // an inline constant which is always legal. | 1596 // an inline constant which is always legal. |
1169 continue; | 1597 continue; |
1170 } | 1598 } |
1171 // If we make it this far, then the operand is not legal and we must | 1599 // If we make it this far, then the operand is not legal and we must |
1265 MachineBasicBlock &MBB = *MI->getParent(); | 1693 MachineBasicBlock &MBB = *MI->getParent(); |
1266 // Extract the the ptr from the resource descriptor. | 1694 // Extract the the ptr from the resource descriptor. |
1267 | 1695 |
1268 // SRsrcPtrLo = srsrc:sub0 | 1696 // SRsrcPtrLo = srsrc:sub0 |
1269 unsigned SRsrcPtrLo = buildExtractSubReg(MI, MRI, *SRsrc, | 1697 unsigned SRsrcPtrLo = buildExtractSubReg(MI, MRI, *SRsrc, |
1270 &AMDGPU::VReg_128RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass); | 1698 &AMDGPU::VReg_128RegClass, AMDGPU::sub0, &AMDGPU::VGPR_32RegClass); |
1271 | 1699 |
1272 // SRsrcPtrHi = srsrc:sub1 | 1700 // SRsrcPtrHi = srsrc:sub1 |
1273 unsigned SRsrcPtrHi = buildExtractSubReg(MI, MRI, *SRsrc, | 1701 unsigned SRsrcPtrHi = buildExtractSubReg(MI, MRI, *SRsrc, |
1274 &AMDGPU::VReg_128RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass); | 1702 &AMDGPU::VReg_128RegClass, AMDGPU::sub1, &AMDGPU::VGPR_32RegClass); |
1275 | 1703 |
1276 // Create an empty resource descriptor | 1704 // Create an empty resource descriptor |
1277 unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); | 1705 unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); |
1278 unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); | 1706 unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); |
1279 unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); | 1707 unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); |
1280 unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass); | 1708 unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass); |
1709 uint64_t RsrcDataFormat = getDefaultRsrcDataFormat(); | |
1281 | 1710 |
1282 // Zero64 = 0 | 1711 // Zero64 = 0 |
1283 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64), | 1712 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64), |
1284 Zero64) | 1713 Zero64) |
1285 .addImm(0); | 1714 .addImm(0); |
1286 | 1715 |
1287 // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0} | 1716 // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0} |
1288 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), | 1717 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), |
1289 SRsrcFormatLo) | 1718 SRsrcFormatLo) |
1290 .addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF); | 1719 .addImm(RsrcDataFormat & 0xFFFFFFFF); |
1291 | 1720 |
1292 // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32} | 1721 // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32} |
1293 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), | 1722 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), |
1294 SRsrcFormatHi) | 1723 SRsrcFormatHi) |
1295 .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32); | 1724 .addImm(RsrcDataFormat >> 32); |
1296 | 1725 |
1297 // NewSRsrc = {Zero64, SRsrcFormat} | 1726 // NewSRsrc = {Zero64, SRsrcFormat} |
1298 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), | 1727 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), |
1299 NewSRsrc) | 1728 NewSRsrc) |
1300 .addReg(Zero64) | 1729 .addReg(Zero64) |
1309 unsigned NewVAddrLo; | 1738 unsigned NewVAddrLo; |
1310 unsigned NewVAddrHi; | 1739 unsigned NewVAddrHi; |
1311 if (VAddr) { | 1740 if (VAddr) { |
1312 // This is already an ADDR64 instruction so we need to add the pointer | 1741 // This is already an ADDR64 instruction so we need to add the pointer |
1313 // extracted from the resource descriptor to the current value of VAddr. | 1742 // extracted from the resource descriptor to the current value of VAddr. |
1314 NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); | 1743 NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); |
1315 NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); | 1744 NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); |
1316 | 1745 |
1317 // NewVaddrLo = SRsrcPtrLo + VAddr:sub0 | 1746 // NewVaddrLo = SRsrcPtrLo + VAddr:sub0 |
1318 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADD_I32_e32), | 1747 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADD_I32_e32), |
1319 NewVAddrLo) | 1748 NewVAddrLo) |
1320 .addReg(SRsrcPtrLo) | 1749 .addReg(SRsrcPtrLo) |
1333 // This instructions is the _OFFSET variant, so we need to convert it to | 1762 // This instructions is the _OFFSET variant, so we need to convert it to |
1334 // ADDR64. | 1763 // ADDR64. |
1335 MachineOperand *VData = getNamedOperand(*MI, AMDGPU::OpName::vdata); | 1764 MachineOperand *VData = getNamedOperand(*MI, AMDGPU::OpName::vdata); |
1336 MachineOperand *Offset = getNamedOperand(*MI, AMDGPU::OpName::offset); | 1765 MachineOperand *Offset = getNamedOperand(*MI, AMDGPU::OpName::offset); |
1337 MachineOperand *SOffset = getNamedOperand(*MI, AMDGPU::OpName::soffset); | 1766 MachineOperand *SOffset = getNamedOperand(*MI, AMDGPU::OpName::soffset); |
1338 assert(SOffset->isImm() && SOffset->getImm() == 0 && "Legalizing MUBUF " | |
1339 "with non-zero soffset is not implemented"); | |
1340 (void)SOffset; | |
1341 | 1767 |
1342 // Create the new instruction. | 1768 // Create the new instruction. |
1343 unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI->getOpcode()); | 1769 unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI->getOpcode()); |
1344 MachineInstr *Addr64 = | 1770 MachineInstr *Addr64 = |
1345 BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode)) | 1771 BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode)) |
1346 .addOperand(*VData) | 1772 .addOperand(*VData) |
1347 .addOperand(*SRsrc) | 1773 .addOperand(*SRsrc) |
1348 .addReg(AMDGPU::NoRegister) // Dummy value for vaddr. | 1774 .addReg(AMDGPU::NoRegister) // Dummy value for vaddr. |
1349 // This will be replaced later | 1775 // This will be replaced later |
1350 // with the new value of vaddr. | 1776 // with the new value of vaddr. |
1777 .addOperand(*SOffset) | |
1351 .addOperand(*Offset); | 1778 .addOperand(*Offset); |
1352 | 1779 |
1353 MI->removeFromParent(); | 1780 MI->removeFromParent(); |
1354 MI = Addr64; | 1781 MI = Addr64; |
1355 | 1782 |
1388 unsigned HalfSize = HalfRC->getSize(); | 1815 unsigned HalfSize = HalfRC->getSize(); |
1389 const MachineOperand *OffOp = | 1816 const MachineOperand *OffOp = |
1390 getNamedOperand(*MI, AMDGPU::OpName::offset); | 1817 getNamedOperand(*MI, AMDGPU::OpName::offset); |
1391 const MachineOperand *SBase = getNamedOperand(*MI, AMDGPU::OpName::sbase); | 1818 const MachineOperand *SBase = getNamedOperand(*MI, AMDGPU::OpName::sbase); |
1392 | 1819 |
1820 // The SMRD has an 8-bit offset in dwords on SI and a 20-bit offset in bytes | |
1821 // on VI. | |
1393 if (OffOp) { | 1822 if (OffOp) { |
1823 bool isVI = RI.ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS; | |
1824 unsigned OffScale = isVI ? 1 : 4; | |
1394 // Handle the _IMM variant | 1825 // Handle the _IMM variant |
1395 unsigned LoOffset = OffOp->getImm(); | 1826 unsigned LoOffset = OffOp->getImm() * OffScale; |
1396 unsigned HiOffset = LoOffset + (HalfSize / 4); | 1827 unsigned HiOffset = LoOffset + HalfSize; |
1397 Lo = BuildMI(*MBB, MI, DL, get(HalfImmOp), RegLo) | 1828 Lo = BuildMI(*MBB, MI, DL, get(HalfImmOp), RegLo) |
1398 .addOperand(*SBase) | 1829 .addOperand(*SBase) |
1399 .addImm(LoOffset); | 1830 .addImm(LoOffset / OffScale); |
1400 | 1831 |
1401 if (!isUInt<8>(HiOffset)) { | 1832 if (!isUInt<20>(HiOffset) || (!isVI && !isUInt<8>(HiOffset / OffScale))) { |
1402 unsigned OffsetSGPR = | 1833 unsigned OffsetSGPR = |
1403 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); | 1834 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); |
1404 BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32), OffsetSGPR) | 1835 BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32), OffsetSGPR) |
1405 .addImm(HiOffset << 2); // The immediate offset is in dwords, | 1836 .addImm(HiOffset); // The offset in register is in bytes. |
1406 // but offset in register is in bytes. | |
1407 Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegHi) | 1837 Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegHi) |
1408 .addOperand(*SBase) | 1838 .addOperand(*SBase) |
1409 .addReg(OffsetSGPR); | 1839 .addReg(OffsetSGPR); |
1410 } else { | 1840 } else { |
1411 Hi = BuildMI(*MBB, MI, DL, get(HalfImmOp), RegHi) | 1841 Hi = BuildMI(*MBB, MI, DL, get(HalfImmOp), RegHi) |
1412 .addOperand(*SBase) | 1842 .addOperand(*SBase) |
1413 .addImm(HiOffset); | 1843 .addImm(HiOffset / OffScale); |
1414 } | 1844 } |
1415 } else { | 1845 } else { |
1416 // Handle the _SGPR variant | 1846 // Handle the _SGPR variant |
1417 MachineOperand *SOff = getNamedOperand(*MI, AMDGPU::OpName::soff); | 1847 MachineOperand *SOff = getNamedOperand(*MI, AMDGPU::OpName::soff); |
1418 Lo = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegLo) | 1848 Lo = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegLo) |
1473 if (MI->getOperand(2).isReg()) { | 1903 if (MI->getOperand(2).isReg()) { |
1474 RegOffset = MI->getOperand(2).getReg(); | 1904 RegOffset = MI->getOperand(2).getReg(); |
1475 ImmOffset = 0; | 1905 ImmOffset = 0; |
1476 } else { | 1906 } else { |
1477 assert(MI->getOperand(2).isImm()); | 1907 assert(MI->getOperand(2).isImm()); |
1478 // SMRD instructions take a dword offsets and MUBUF instructions | 1908 // SMRD instructions take a dword offsets on SI and byte offset on VI |
1479 // take a byte offset. | 1909 // and MUBUF instructions always take a byte offset. |
1480 ImmOffset = MI->getOperand(2).getImm() << 2; | 1910 ImmOffset = MI->getOperand(2).getImm(); |
1911 if (RI.ST.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) | |
1912 ImmOffset <<= 2; | |
1481 RegOffset = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); | 1913 RegOffset = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); |
1914 | |
1482 if (isUInt<12>(ImmOffset)) { | 1915 if (isUInt<12>(ImmOffset)) { |
1483 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), | 1916 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), |
1484 RegOffset) | 1917 RegOffset) |
1485 .addImm(0); | 1918 .addImm(0); |
1486 } else { | 1919 } else { |
1494 unsigned SRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass); | 1927 unsigned SRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass); |
1495 unsigned DWord0 = RegOffset; | 1928 unsigned DWord0 = RegOffset; |
1496 unsigned DWord1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); | 1929 unsigned DWord1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); |
1497 unsigned DWord2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); | 1930 unsigned DWord2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); |
1498 unsigned DWord3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); | 1931 unsigned DWord3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); |
1932 uint64_t RsrcDataFormat = getDefaultRsrcDataFormat(); | |
1499 | 1933 |
1500 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord1) | 1934 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord1) |
1501 .addImm(0); | 1935 .addImm(0); |
1502 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord2) | 1936 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord2) |
1503 .addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF); | 1937 .addImm(RsrcDataFormat & 0xFFFFFFFF); |
1504 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord3) | 1938 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord3) |
1505 .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32); | 1939 .addImm(RsrcDataFormat >> 32); |
1506 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), SRsrc) | 1940 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), SRsrc) |
1507 .addReg(DWord0) | 1941 .addReg(DWord0) |
1508 .addImm(AMDGPU::sub0) | 1942 .addImm(AMDGPU::sub0) |
1509 .addReg(DWord1) | 1943 .addReg(DWord1) |
1510 .addImm(AMDGPU::sub1) | 1944 .addImm(AMDGPU::sub1) |
1517 MI->getOperand(2).setReg(MI->getOperand(1).getReg()); | 1951 MI->getOperand(2).setReg(MI->getOperand(1).getReg()); |
1518 } else { | 1952 } else { |
1519 MI->getOperand(2).ChangeToRegister(MI->getOperand(1).getReg(), false); | 1953 MI->getOperand(2).ChangeToRegister(MI->getOperand(1).getReg(), false); |
1520 } | 1954 } |
1521 MI->getOperand(1).setReg(SRsrc); | 1955 MI->getOperand(1).setReg(SRsrc); |
1956 MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); | |
1522 MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(ImmOffset)); | 1957 MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(ImmOffset)); |
1523 | 1958 |
1524 const TargetRegisterClass *NewDstRC = | 1959 const TargetRegisterClass *NewDstRC = |
1525 RI.getRegClass(get(NewOpcode).OpInfo[0].RegClass); | 1960 RI.getRegClass(get(NewOpcode).OpInfo[0].RegClass); |
1526 | 1961 |
1619 case AMDGPU::S_BCNT1_I32_B64: | 2054 case AMDGPU::S_BCNT1_I32_B64: |
1620 splitScalar64BitBCNT(Worklist, Inst); | 2055 splitScalar64BitBCNT(Worklist, Inst); |
1621 Inst->eraseFromParent(); | 2056 Inst->eraseFromParent(); |
1622 continue; | 2057 continue; |
1623 | 2058 |
2059 case AMDGPU::S_BFE_I64: { | |
2060 splitScalar64BitBFE(Worklist, Inst); | |
2061 Inst->eraseFromParent(); | |
2062 continue; | |
2063 } | |
2064 | |
2065 case AMDGPU::S_LSHL_B32: | |
2066 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { | |
2067 NewOpcode = AMDGPU::V_LSHLREV_B32_e64; | |
2068 swapOperands(Inst); | |
2069 } | |
2070 break; | |
2071 case AMDGPU::S_ASHR_I32: | |
2072 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { | |
2073 NewOpcode = AMDGPU::V_ASHRREV_I32_e64; | |
2074 swapOperands(Inst); | |
2075 } | |
2076 break; | |
2077 case AMDGPU::S_LSHR_B32: | |
2078 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { | |
2079 NewOpcode = AMDGPU::V_LSHRREV_B32_e64; | |
2080 swapOperands(Inst); | |
2081 } | |
2082 break; | |
2083 case AMDGPU::S_LSHL_B64: | |
2084 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { | |
2085 NewOpcode = AMDGPU::V_LSHLREV_B64; | |
2086 swapOperands(Inst); | |
2087 } | |
2088 break; | |
2089 case AMDGPU::S_ASHR_I64: | |
2090 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { | |
2091 NewOpcode = AMDGPU::V_ASHRREV_I64; | |
2092 swapOperands(Inst); | |
2093 } | |
2094 break; | |
2095 case AMDGPU::S_LSHR_B64: | |
2096 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { | |
2097 NewOpcode = AMDGPU::V_LSHRREV_B64; | |
2098 swapOperands(Inst); | |
2099 } | |
2100 break; | |
2101 | |
1624 case AMDGPU::S_BFE_U64: | 2102 case AMDGPU::S_BFE_U64: |
1625 case AMDGPU::S_BFE_I64: | |
1626 case AMDGPU::S_BFM_B64: | 2103 case AMDGPU::S_BFM_B64: |
1627 llvm_unreachable("Moving this op to VALU not implemented"); | 2104 llvm_unreachable("Moving this op to VALU not implemented"); |
1628 } | 2105 } |
1629 | 2106 |
1630 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) { | 2107 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) { |
1726 assert(Channel == 0); | 2203 assert(Channel == 0); |
1727 return RegIndex; | 2204 return RegIndex; |
1728 } | 2205 } |
1729 | 2206 |
1730 const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const { | 2207 const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const { |
1731 return &AMDGPU::VReg_32RegClass; | 2208 return &AMDGPU::VGPR_32RegClass; |
1732 } | 2209 } |
1733 | 2210 |
1734 void SIInstrInfo::splitScalar64BitUnaryOp( | 2211 void SIInstrInfo::splitScalar64BitUnaryOp( |
1735 SmallVectorImpl<MachineInstr *> &Worklist, | 2212 SmallVectorImpl<MachineInstr *> &Worklist, |
1736 MachineInstr *Inst, | 2213 MachineInstr *Inst, |
1856 DebugLoc DL = Inst->getDebugLoc(); | 2333 DebugLoc DL = Inst->getDebugLoc(); |
1857 | 2334 |
1858 MachineOperand &Dest = Inst->getOperand(0); | 2335 MachineOperand &Dest = Inst->getOperand(0); |
1859 MachineOperand &Src = Inst->getOperand(1); | 2336 MachineOperand &Src = Inst->getOperand(1); |
1860 | 2337 |
1861 const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e32); | 2338 const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e64); |
1862 const TargetRegisterClass *SrcRC = Src.isReg() ? | 2339 const TargetRegisterClass *SrcRC = Src.isReg() ? |
1863 MRI.getRegClass(Src.getReg()) : | 2340 MRI.getRegClass(Src.getReg()) : |
1864 &AMDGPU::SGPR_32RegClass; | 2341 &AMDGPU::SGPR_32RegClass; |
1865 | 2342 |
1866 unsigned MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); | 2343 unsigned MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); |
1883 | 2360 |
1884 MRI.replaceRegWith(Dest.getReg(), ResultReg); | 2361 MRI.replaceRegWith(Dest.getReg(), ResultReg); |
1885 | 2362 |
1886 Worklist.push_back(First); | 2363 Worklist.push_back(First); |
1887 Worklist.push_back(Second); | 2364 Worklist.push_back(Second); |
2365 } | |
2366 | |
2367 void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist, | |
2368 MachineInstr *Inst) const { | |
2369 MachineBasicBlock &MBB = *Inst->getParent(); | |
2370 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); | |
2371 MachineBasicBlock::iterator MII = Inst; | |
2372 DebugLoc DL = Inst->getDebugLoc(); | |
2373 | |
2374 MachineOperand &Dest = Inst->getOperand(0); | |
2375 uint32_t Imm = Inst->getOperand(2).getImm(); | |
2376 uint32_t Offset = Imm & 0x3f; // Extract bits [5:0]. | |
2377 uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16]. | |
2378 | |
2379 (void) Offset; | |
2380 | |
2381 // Only sext_inreg cases handled. | |
2382 assert(Inst->getOpcode() == AMDGPU::S_BFE_I64 && | |
2383 BitWidth <= 32 && | |
2384 Offset == 0 && | |
2385 "Not implemented"); | |
2386 | |
2387 if (BitWidth < 32) { | |
2388 unsigned MidRegLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); | |
2389 unsigned MidRegHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); | |
2390 unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); | |
2391 | |
2392 BuildMI(MBB, MII, DL, get(AMDGPU::V_BFE_I32), MidRegLo) | |
2393 .addReg(Inst->getOperand(1).getReg(), 0, AMDGPU::sub0) | |
2394 .addImm(0) | |
2395 .addImm(BitWidth); | |
2396 | |
2397 BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e32), MidRegHi) | |
2398 .addImm(31) | |
2399 .addReg(MidRegLo); | |
2400 | |
2401 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg) | |
2402 .addReg(MidRegLo) | |
2403 .addImm(AMDGPU::sub0) | |
2404 .addReg(MidRegHi) | |
2405 .addImm(AMDGPU::sub1); | |
2406 | |
2407 MRI.replaceRegWith(Dest.getReg(), ResultReg); | |
2408 return; | |
2409 } | |
2410 | |
2411 MachineOperand &Src = Inst->getOperand(1); | |
2412 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); | |
2413 unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); | |
2414 | |
2415 BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e64), TmpReg) | |
2416 .addImm(31) | |
2417 .addReg(Src.getReg(), 0, AMDGPU::sub0); | |
2418 | |
2419 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg) | |
2420 .addReg(Src.getReg(), 0, AMDGPU::sub0) | |
2421 .addImm(AMDGPU::sub0) | |
2422 .addReg(TmpReg) | |
2423 .addImm(AMDGPU::sub1); | |
2424 | |
2425 MRI.replaceRegWith(Dest.getReg(), ResultReg); | |
1888 } | 2426 } |
1889 | 2427 |
1890 void SIInstrInfo::addDescImplicitUseDef(const MCInstrDesc &NewDesc, | 2428 void SIInstrInfo::addDescImplicitUseDef(const MCInstrDesc &NewDesc, |
1891 MachineInstr *Inst) const { | 2429 MachineInstr *Inst) const { |
1892 // Add the implict and explicit register definitions. | 2430 // Add the implict and explicit register definitions. |
1903 Inst->addOperand(MachineOperand::CreateReg(Reg, true, true)); | 2441 Inst->addOperand(MachineOperand::CreateReg(Reg, true, true)); |
1904 } | 2442 } |
1905 } | 2443 } |
1906 } | 2444 } |
1907 | 2445 |
2446 unsigned SIInstrInfo::findUsedSGPR(const MachineInstr *MI, | |
2447 int OpIndices[3]) const { | |
2448 const MCInstrDesc &Desc = get(MI->getOpcode()); | |
2449 | |
2450 // Find the one SGPR operand we are allowed to use. | |
2451 unsigned SGPRReg = AMDGPU::NoRegister; | |
2452 | |
2453 // First we need to consider the instruction's operand requirements before | |
2454 // legalizing. Some operands are required to be SGPRs, such as implicit uses | |
2455 // of VCC, but we are still bound by the constant bus requirement to only use | |
2456 // one. | |
2457 // | |
2458 // If the operand's class is an SGPR, we can never move it. | |
2459 | |
2460 for (const MachineOperand &MO : MI->implicit_operands()) { | |
2461 // We only care about reads. | |
2462 if (MO.isDef()) | |
2463 continue; | |
2464 | |
2465 if (MO.getReg() == AMDGPU::VCC) | |
2466 return AMDGPU::VCC; | |
2467 | |
2468 if (MO.getReg() == AMDGPU::FLAT_SCR) | |
2469 return AMDGPU::FLAT_SCR; | |
2470 } | |
2471 | |
2472 unsigned UsedSGPRs[3] = { AMDGPU::NoRegister }; | |
2473 const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); | |
2474 | |
2475 for (unsigned i = 0; i < 3; ++i) { | |
2476 int Idx = OpIndices[i]; | |
2477 if (Idx == -1) | |
2478 break; | |
2479 | |
2480 const MachineOperand &MO = MI->getOperand(Idx); | |
2481 if (RI.isSGPRClassID(Desc.OpInfo[Idx].RegClass)) | |
2482 SGPRReg = MO.getReg(); | |
2483 | |
2484 if (MO.isReg() && RI.isSGPRClass(MRI.getRegClass(MO.getReg()))) | |
2485 UsedSGPRs[i] = MO.getReg(); | |
2486 } | |
2487 | |
2488 if (SGPRReg != AMDGPU::NoRegister) | |
2489 return SGPRReg; | |
2490 | |
2491 // We don't have a required SGPR operand, so we have a bit more freedom in | |
2492 // selecting operands to move. | |
2493 | |
2494 // Try to select the most used SGPR. If an SGPR is equal to one of the | |
2495 // others, we choose that. | |
2496 // | |
2497 // e.g. | |
2498 // V_FMA_F32 v0, s0, s0, s0 -> No moves | |
2499 // V_FMA_F32 v0, s0, s1, s0 -> Move s1 | |
2500 | |
2501 if (UsedSGPRs[0] != AMDGPU::NoRegister) { | |
2502 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2]) | |
2503 SGPRReg = UsedSGPRs[0]; | |
2504 } | |
2505 | |
2506 if (SGPRReg == AMDGPU::NoRegister && UsedSGPRs[1] != AMDGPU::NoRegister) { | |
2507 if (UsedSGPRs[1] == UsedSGPRs[2]) | |
2508 SGPRReg = UsedSGPRs[1]; | |
2509 } | |
2510 | |
2511 return SGPRReg; | |
2512 } | |
2513 | |
1908 MachineInstrBuilder SIInstrInfo::buildIndirectWrite( | 2514 MachineInstrBuilder SIInstrInfo::buildIndirectWrite( |
1909 MachineBasicBlock *MBB, | 2515 MachineBasicBlock *MBB, |
1910 MachineBasicBlock::iterator I, | 2516 MachineBasicBlock::iterator I, |
1911 unsigned ValueReg, | 2517 unsigned ValueReg, |
1912 unsigned Address, unsigned OffsetReg) const { | 2518 unsigned Address, unsigned OffsetReg) const { |
1913 const DebugLoc &DL = MBB->findDebugLoc(I); | 2519 const DebugLoc &DL = MBB->findDebugLoc(I); |
1914 unsigned IndirectBaseReg = AMDGPU::VReg_32RegClass.getRegister( | 2520 unsigned IndirectBaseReg = AMDGPU::VGPR_32RegClass.getRegister( |
1915 getIndirectIndexBegin(*MBB->getParent())); | 2521 getIndirectIndexBegin(*MBB->getParent())); |
1916 | 2522 |
1917 return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_DST_V1)) | 2523 return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_DST_V1)) |
1918 .addReg(IndirectBaseReg, RegState::Define) | 2524 .addReg(IndirectBaseReg, RegState::Define) |
1919 .addOperand(I->getOperand(0)) | 2525 .addOperand(I->getOperand(0)) |
1927 MachineBasicBlock *MBB, | 2533 MachineBasicBlock *MBB, |
1928 MachineBasicBlock::iterator I, | 2534 MachineBasicBlock::iterator I, |
1929 unsigned ValueReg, | 2535 unsigned ValueReg, |
1930 unsigned Address, unsigned OffsetReg) const { | 2536 unsigned Address, unsigned OffsetReg) const { |
1931 const DebugLoc &DL = MBB->findDebugLoc(I); | 2537 const DebugLoc &DL = MBB->findDebugLoc(I); |
1932 unsigned IndirectBaseReg = AMDGPU::VReg_32RegClass.getRegister( | 2538 unsigned IndirectBaseReg = AMDGPU::VGPR_32RegClass.getRegister( |
1933 getIndirectIndexBegin(*MBB->getParent())); | 2539 getIndirectIndexBegin(*MBB->getParent())); |
1934 | 2540 |
1935 return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_SRC)) | 2541 return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_SRC)) |
1936 .addOperand(I->getOperand(0)) | 2542 .addOperand(I->getOperand(0)) |
1937 .addOperand(I->getOperand(1)) | 2543 .addOperand(I->getOperand(1)) |
1949 if (End == -1) | 2555 if (End == -1) |
1950 return; | 2556 return; |
1951 | 2557 |
1952 | 2558 |
1953 for (int Index = Begin; Index <= End; ++Index) | 2559 for (int Index = Begin; Index <= End; ++Index) |
1954 Reserved.set(AMDGPU::VReg_32RegClass.getRegister(Index)); | 2560 Reserved.set(AMDGPU::VGPR_32RegClass.getRegister(Index)); |
1955 | 2561 |
1956 for (int Index = std::max(0, Begin - 1); Index <= End; ++Index) | 2562 for (int Index = std::max(0, Begin - 1); Index <= End; ++Index) |
1957 Reserved.set(AMDGPU::VReg_64RegClass.getRegister(Index)); | 2563 Reserved.set(AMDGPU::VReg_64RegClass.getRegister(Index)); |
1958 | 2564 |
1959 for (int Index = std::max(0, Begin - 2); Index <= End; ++Index) | 2565 for (int Index = std::max(0, Begin - 2); Index <= End; ++Index) |
1968 for (int Index = std::max(0, Begin - 15); Index <= End; ++Index) | 2574 for (int Index = std::max(0, Begin - 15); Index <= End; ++Index) |
1969 Reserved.set(AMDGPU::VReg_512RegClass.getRegister(Index)); | 2575 Reserved.set(AMDGPU::VReg_512RegClass.getRegister(Index)); |
1970 } | 2576 } |
1971 | 2577 |
1972 MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI, | 2578 MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI, |
1973 unsigned OperandName) const { | 2579 unsigned OperandName) const { |
1974 int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OperandName); | 2580 int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OperandName); |
1975 if (Idx == -1) | 2581 if (Idx == -1) |
1976 return nullptr; | 2582 return nullptr; |
1977 | 2583 |
1978 return &MI.getOperand(Idx); | 2584 return &MI.getOperand(Idx); |
1979 } | 2585 } |
2586 | |
2587 uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const { | |
2588 uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT; | |
2589 if (ST.isAmdHsaOS()) | |
2590 RsrcDataFormat |= (1ULL << 56); | |
2591 | |
2592 return RsrcDataFormat; | |
2593 } |