Mercurial > hg > CbC > CbC_llvm
comparison lib/Target/R600/SIInstrInfo.cpp @ 0:95c75e76d11b LLVM3.4
LLVM 3.4
author | Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp> |
---|---|
date | Thu, 12 Dec 2013 13:56:28 +0900 |
parents | |
children | e4204d083e25 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:95c75e76d11b |
---|---|
1 //===-- SIInstrInfo.cpp - SI Instruction Information ---------------------===// | |
2 // | |
3 // The LLVM Compiler Infrastructure | |
4 // | |
5 // This file is distributed under the University of Illinois Open Source | |
6 // License. See LICENSE.TXT for details. | |
7 // | |
8 //===----------------------------------------------------------------------===// | |
9 // | |
10 /// \file | |
11 /// \brief SI Implementation of TargetInstrInfo. | |
12 // | |
13 //===----------------------------------------------------------------------===// | |
14 | |
15 | |
16 #include "SIInstrInfo.h" | |
17 #include "AMDGPUTargetMachine.h" | |
18 #include "SIDefines.h" | |
19 #include "llvm/CodeGen/MachineInstrBuilder.h" | |
20 #include "llvm/CodeGen/MachineRegisterInfo.h" | |
21 #include "llvm/MC/MCInstrDesc.h" | |
22 | |
23 using namespace llvm; | |
24 | |
25 SIInstrInfo::SIInstrInfo(AMDGPUTargetMachine &tm) | |
26 : AMDGPUInstrInfo(tm), | |
27 RI(tm) | |
28 { } | |
29 | |
30 const SIRegisterInfo &SIInstrInfo::getRegisterInfo() const { | |
31 return RI; | |
32 } | |
33 | |
34 //===----------------------------------------------------------------------===// | |
35 // TargetInstrInfo callbacks | |
36 //===----------------------------------------------------------------------===// | |
37 | |
38 void | |
39 SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, | |
40 MachineBasicBlock::iterator MI, DebugLoc DL, | |
41 unsigned DestReg, unsigned SrcReg, | |
42 bool KillSrc) const { | |
43 | |
44 // If we are trying to copy to or from SCC, there is a bug somewhere else in | |
45 // the backend. While it may be theoretically possible to do this, it should | |
46 // never be necessary. | |
47 assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC); | |
48 | |
49 static const int16_t Sub0_15[] = { | |
50 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, | |
51 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, | |
52 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11, | |
53 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, 0 | |
54 }; | |
55 | |
56 static const int16_t Sub0_7[] = { | |
57 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, | |
58 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 0 | |
59 }; | |
60 | |
61 static const int16_t Sub0_3[] = { | |
62 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 0 | |
63 }; | |
64 | |
65 static const int16_t Sub0_2[] = { | |
66 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, 0 | |
67 }; | |
68 | |
69 static const int16_t Sub0_1[] = { | |
70 AMDGPU::sub0, AMDGPU::sub1, 0 | |
71 }; | |
72 | |
73 unsigned Opcode; | |
74 const int16_t *SubIndices; | |
75 | |
76 if (AMDGPU::M0 == DestReg) { | |
77 // Check if M0 isn't already set to this value | |
78 for (MachineBasicBlock::reverse_iterator E = MBB.rend(), | |
79 I = MachineBasicBlock::reverse_iterator(MI); I != E; ++I) { | |
80 | |
81 if (!I->definesRegister(AMDGPU::M0)) | |
82 continue; | |
83 | |
84 unsigned Opc = I->getOpcode(); | |
85 if (Opc != TargetOpcode::COPY && Opc != AMDGPU::S_MOV_B32) | |
86 break; | |
87 | |
88 if (!I->readsRegister(SrcReg)) | |
89 break; | |
90 | |
91 // The copy isn't necessary | |
92 return; | |
93 } | |
94 } | |
95 | |
96 if (AMDGPU::SReg_32RegClass.contains(DestReg)) { | |
97 assert(AMDGPU::SReg_32RegClass.contains(SrcReg)); | |
98 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg) | |
99 .addReg(SrcReg, getKillRegState(KillSrc)); | |
100 return; | |
101 | |
102 } else if (AMDGPU::SReg_64RegClass.contains(DestReg)) { | |
103 assert(AMDGPU::SReg_64RegClass.contains(SrcReg)); | |
104 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg) | |
105 .addReg(SrcReg, getKillRegState(KillSrc)); | |
106 return; | |
107 | |
108 } else if (AMDGPU::SReg_128RegClass.contains(DestReg)) { | |
109 assert(AMDGPU::SReg_128RegClass.contains(SrcReg)); | |
110 Opcode = AMDGPU::S_MOV_B32; | |
111 SubIndices = Sub0_3; | |
112 | |
113 } else if (AMDGPU::SReg_256RegClass.contains(DestReg)) { | |
114 assert(AMDGPU::SReg_256RegClass.contains(SrcReg)); | |
115 Opcode = AMDGPU::S_MOV_B32; | |
116 SubIndices = Sub0_7; | |
117 | |
118 } else if (AMDGPU::SReg_512RegClass.contains(DestReg)) { | |
119 assert(AMDGPU::SReg_512RegClass.contains(SrcReg)); | |
120 Opcode = AMDGPU::S_MOV_B32; | |
121 SubIndices = Sub0_15; | |
122 | |
123 } else if (AMDGPU::VReg_32RegClass.contains(DestReg)) { | |
124 assert(AMDGPU::VReg_32RegClass.contains(SrcReg) || | |
125 AMDGPU::SReg_32RegClass.contains(SrcReg)); | |
126 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg) | |
127 .addReg(SrcReg, getKillRegState(KillSrc)); | |
128 return; | |
129 | |
130 } else if (AMDGPU::VReg_64RegClass.contains(DestReg)) { | |
131 assert(AMDGPU::VReg_64RegClass.contains(SrcReg) || | |
132 AMDGPU::SReg_64RegClass.contains(SrcReg)); | |
133 Opcode = AMDGPU::V_MOV_B32_e32; | |
134 SubIndices = Sub0_1; | |
135 | |
136 } else if (AMDGPU::VReg_96RegClass.contains(DestReg)) { | |
137 assert(AMDGPU::VReg_96RegClass.contains(SrcReg)); | |
138 Opcode = AMDGPU::V_MOV_B32_e32; | |
139 SubIndices = Sub0_2; | |
140 | |
141 } else if (AMDGPU::VReg_128RegClass.contains(DestReg)) { | |
142 assert(AMDGPU::VReg_128RegClass.contains(SrcReg) || | |
143 AMDGPU::SReg_128RegClass.contains(SrcReg)); | |
144 Opcode = AMDGPU::V_MOV_B32_e32; | |
145 SubIndices = Sub0_3; | |
146 | |
147 } else if (AMDGPU::VReg_256RegClass.contains(DestReg)) { | |
148 assert(AMDGPU::VReg_256RegClass.contains(SrcReg) || | |
149 AMDGPU::SReg_256RegClass.contains(SrcReg)); | |
150 Opcode = AMDGPU::V_MOV_B32_e32; | |
151 SubIndices = Sub0_7; | |
152 | |
153 } else if (AMDGPU::VReg_512RegClass.contains(DestReg)) { | |
154 assert(AMDGPU::VReg_512RegClass.contains(SrcReg) || | |
155 AMDGPU::SReg_512RegClass.contains(SrcReg)); | |
156 Opcode = AMDGPU::V_MOV_B32_e32; | |
157 SubIndices = Sub0_15; | |
158 | |
159 } else { | |
160 llvm_unreachable("Can't copy register!"); | |
161 } | |
162 | |
163 while (unsigned SubIdx = *SubIndices++) { | |
164 MachineInstrBuilder Builder = BuildMI(MBB, MI, DL, | |
165 get(Opcode), RI.getSubReg(DestReg, SubIdx)); | |
166 | |
167 Builder.addReg(RI.getSubReg(SrcReg, SubIdx), getKillRegState(KillSrc)); | |
168 | |
169 if (*SubIndices) | |
170 Builder.addReg(DestReg, RegState::Define | RegState::Implicit); | |
171 } | |
172 } | |
173 | |
174 unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const { | |
175 int NewOpc; | |
176 | |
177 // Try to map original to commuted opcode | |
178 if ((NewOpc = AMDGPU::getCommuteRev(Opcode)) != -1) | |
179 return NewOpc; | |
180 | |
181 // Try to map commuted to original opcode | |
182 if ((NewOpc = AMDGPU::getCommuteOrig(Opcode)) != -1) | |
183 return NewOpc; | |
184 | |
185 return Opcode; | |
186 } | |
187 | |
188 MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI, | |
189 bool NewMI) const { | |
190 | |
191 MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); | |
192 if (MI->getNumOperands() < 3 || !MI->getOperand(1).isReg()) | |
193 return 0; | |
194 | |
195 // Cannot commute VOP2 if src0 is SGPR. | |
196 if (isVOP2(MI->getOpcode()) && MI->getOperand(1).isReg() && | |
197 RI.isSGPRClass(MRI.getRegClass(MI->getOperand(1).getReg()))) | |
198 return 0; | |
199 | |
200 if (!MI->getOperand(2).isReg()) { | |
201 // XXX: Commute instructions with FPImm operands | |
202 if (NewMI || MI->getOperand(2).isFPImm() || | |
203 (!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode()))) { | |
204 return 0; | |
205 } | |
206 | |
207 // XXX: Commute VOP3 instructions with abs and neg set. | |
208 if (isVOP3(MI->getOpcode()) && | |
209 (MI->getOperand(AMDGPU::getNamedOperandIdx(MI->getOpcode(), | |
210 AMDGPU::OpName::abs)).getImm() || | |
211 MI->getOperand(AMDGPU::getNamedOperandIdx(MI->getOpcode(), | |
212 AMDGPU::OpName::neg)).getImm())) | |
213 return 0; | |
214 | |
215 unsigned Reg = MI->getOperand(1).getReg(); | |
216 MI->getOperand(1).ChangeToImmediate(MI->getOperand(2).getImm()); | |
217 MI->getOperand(2).ChangeToRegister(Reg, false); | |
218 } else { | |
219 MI = TargetInstrInfo::commuteInstruction(MI, NewMI); | |
220 } | |
221 | |
222 if (MI) | |
223 MI->setDesc(get(commuteOpcode(MI->getOpcode()))); | |
224 | |
225 return MI; | |
226 } | |
227 | |
228 MachineInstr *SIInstrInfo::buildMovInstr(MachineBasicBlock *MBB, | |
229 MachineBasicBlock::iterator I, | |
230 unsigned DstReg, | |
231 unsigned SrcReg) const { | |
232 return BuildMI(*MBB, I, MBB->findDebugLoc(I), get(AMDGPU::V_MOV_B32_e32), | |
233 DstReg) .addReg(SrcReg); | |
234 } | |
235 | |
236 bool SIInstrInfo::isMov(unsigned Opcode) const { | |
237 switch(Opcode) { | |
238 default: return false; | |
239 case AMDGPU::S_MOV_B32: | |
240 case AMDGPU::S_MOV_B64: | |
241 case AMDGPU::V_MOV_B32_e32: | |
242 case AMDGPU::V_MOV_B32_e64: | |
243 return true; | |
244 } | |
245 } | |
246 | |
247 bool | |
248 SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { | |
249 return RC != &AMDGPU::EXECRegRegClass; | |
250 } | |
251 | |
252 int SIInstrInfo::isMIMG(uint16_t Opcode) const { | |
253 return get(Opcode).TSFlags & SIInstrFlags::MIMG; | |
254 } | |
255 | |
256 int SIInstrInfo::isSMRD(uint16_t Opcode) const { | |
257 return get(Opcode).TSFlags & SIInstrFlags::SMRD; | |
258 } | |
259 | |
260 bool SIInstrInfo::isVOP1(uint16_t Opcode) const { | |
261 return get(Opcode).TSFlags & SIInstrFlags::VOP1; | |
262 } | |
263 | |
264 bool SIInstrInfo::isVOP2(uint16_t Opcode) const { | |
265 return get(Opcode).TSFlags & SIInstrFlags::VOP2; | |
266 } | |
267 | |
268 bool SIInstrInfo::isVOP3(uint16_t Opcode) const { | |
269 return get(Opcode).TSFlags & SIInstrFlags::VOP3; | |
270 } | |
271 | |
272 bool SIInstrInfo::isVOPC(uint16_t Opcode) const { | |
273 return get(Opcode).TSFlags & SIInstrFlags::VOPC; | |
274 } | |
275 | |
276 bool SIInstrInfo::isSALUInstr(const MachineInstr &MI) const { | |
277 return get(MI.getOpcode()).TSFlags & SIInstrFlags::SALU; | |
278 } | |
279 | |
280 bool SIInstrInfo::isInlineConstant(const MachineOperand &MO) const { | |
281 if(MO.isImm()) { | |
282 return MO.getImm() >= -16 && MO.getImm() <= 64; | |
283 } | |
284 if (MO.isFPImm()) { | |
285 return MO.getFPImm()->isExactlyValue(0.0) || | |
286 MO.getFPImm()->isExactlyValue(0.5) || | |
287 MO.getFPImm()->isExactlyValue(-0.5) || | |
288 MO.getFPImm()->isExactlyValue(1.0) || | |
289 MO.getFPImm()->isExactlyValue(-1.0) || | |
290 MO.getFPImm()->isExactlyValue(2.0) || | |
291 MO.getFPImm()->isExactlyValue(-2.0) || | |
292 MO.getFPImm()->isExactlyValue(4.0) || | |
293 MO.getFPImm()->isExactlyValue(-4.0); | |
294 } | |
295 return false; | |
296 } | |
297 | |
298 bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO) const { | |
299 return (MO.isImm() || MO.isFPImm()) && !isInlineConstant(MO); | |
300 } | |
301 | |
302 bool SIInstrInfo::verifyInstruction(const MachineInstr *MI, | |
303 StringRef &ErrInfo) const { | |
304 uint16_t Opcode = MI->getOpcode(); | |
305 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); | |
306 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); | |
307 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); | |
308 | |
309 // Verify VOP* | |
310 if (isVOP1(Opcode) || isVOP2(Opcode) || isVOP3(Opcode) || isVOPC(Opcode)) { | |
311 unsigned ConstantBusCount = 0; | |
312 unsigned SGPRUsed = AMDGPU::NoRegister; | |
313 for (int i = 0, e = MI->getNumOperands(); i != e; ++i) { | |
314 const MachineOperand &MO = MI->getOperand(i); | |
315 if (MO.isReg() && MO.isUse() && | |
316 !TargetRegisterInfo::isVirtualRegister(MO.getReg())) { | |
317 | |
318 // EXEC register uses the constant bus. | |
319 if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC) | |
320 ++ConstantBusCount; | |
321 | |
322 // SGPRs use the constant bus | |
323 if (MO.getReg() == AMDGPU::M0 || MO.getReg() == AMDGPU::VCC || | |
324 (!MO.isImplicit() && | |
325 (AMDGPU::SGPR_32RegClass.contains(MO.getReg()) || | |
326 AMDGPU::SGPR_64RegClass.contains(MO.getReg())))) { | |
327 if (SGPRUsed != MO.getReg()) { | |
328 ++ConstantBusCount; | |
329 SGPRUsed = MO.getReg(); | |
330 } | |
331 } | |
332 } | |
333 // Literal constants use the constant bus. | |
334 if (isLiteralConstant(MO)) | |
335 ++ConstantBusCount; | |
336 } | |
337 if (ConstantBusCount > 1) { | |
338 ErrInfo = "VOP* instruction uses the constant bus more than once"; | |
339 return false; | |
340 } | |
341 } | |
342 | |
343 // Verify SRC1 for VOP2 and VOPC | |
344 if (Src1Idx != -1 && (isVOP2(Opcode) || isVOPC(Opcode))) { | |
345 const MachineOperand &Src1 = MI->getOperand(Src1Idx); | |
346 if (Src1.isImm() || Src1.isFPImm()) { | |
347 ErrInfo = "VOP[2C] src1 cannot be an immediate."; | |
348 return false; | |
349 } | |
350 } | |
351 | |
352 // Verify VOP3 | |
353 if (isVOP3(Opcode)) { | |
354 if (Src0Idx != -1 && isLiteralConstant(MI->getOperand(Src0Idx))) { | |
355 ErrInfo = "VOP3 src0 cannot be a literal constant."; | |
356 return false; | |
357 } | |
358 if (Src1Idx != -1 && isLiteralConstant(MI->getOperand(Src1Idx))) { | |
359 ErrInfo = "VOP3 src1 cannot be a literal constant."; | |
360 return false; | |
361 } | |
362 if (Src2Idx != -1 && isLiteralConstant(MI->getOperand(Src2Idx))) { | |
363 ErrInfo = "VOP3 src2 cannot be a literal constant."; | |
364 return false; | |
365 } | |
366 } | |
367 return true; | |
368 } | |
369 | |
370 unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) { | |
371 switch (MI.getOpcode()) { | |
372 default: return AMDGPU::INSTRUCTION_LIST_END; | |
373 case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE; | |
374 case AMDGPU::COPY: return AMDGPU::COPY; | |
375 case AMDGPU::PHI: return AMDGPU::PHI; | |
376 case AMDGPU::S_ADD_I32: return AMDGPU::V_ADD_I32_e32; | |
377 case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32; | |
378 case AMDGPU::S_SUB_I32: return AMDGPU::V_SUB_I32_e32; | |
379 case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32; | |
380 case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32; | |
381 case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64; | |
382 case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32; | |
383 case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64; | |
384 case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32; | |
385 case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64; | |
386 } | |
387 } | |
388 | |
389 bool SIInstrInfo::isSALUOpSupportedOnVALU(const MachineInstr &MI) const { | |
390 return getVALUOp(MI) != AMDGPU::INSTRUCTION_LIST_END; | |
391 } | |
392 | |
393 const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI, | |
394 unsigned OpNo) const { | |
395 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); | |
396 const MCInstrDesc &Desc = get(MI.getOpcode()); | |
397 if (MI.isVariadic() || OpNo >= Desc.getNumOperands() || | |
398 Desc.OpInfo[OpNo].RegClass == -1) | |
399 return MRI.getRegClass(MI.getOperand(OpNo).getReg()); | |
400 | |
401 unsigned RCID = Desc.OpInfo[OpNo].RegClass; | |
402 return RI.getRegClass(RCID); | |
403 } | |
404 | |
405 bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const { | |
406 switch (MI.getOpcode()) { | |
407 case AMDGPU::COPY: | |
408 case AMDGPU::REG_SEQUENCE: | |
409 return RI.hasVGPRs(getOpRegClass(MI, 0)); | |
410 default: | |
411 return RI.hasVGPRs(getOpRegClass(MI, OpNo)); | |
412 } | |
413 } | |
414 | |
415 void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const { | |
416 MachineBasicBlock::iterator I = MI; | |
417 MachineOperand &MO = MI->getOperand(OpIdx); | |
418 MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); | |
419 unsigned RCID = get(MI->getOpcode()).OpInfo[OpIdx].RegClass; | |
420 const TargetRegisterClass *RC = RI.getRegClass(RCID); | |
421 unsigned Opcode = AMDGPU::V_MOV_B32_e32; | |
422 if (MO.isReg()) { | |
423 Opcode = AMDGPU::COPY; | |
424 } else if (RI.isSGPRClass(RC)) { | |
425 Opcode = AMDGPU::S_MOV_B32; | |
426 } | |
427 | |
428 const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC); | |
429 unsigned Reg = MRI.createVirtualRegister(VRC); | |
430 BuildMI(*MI->getParent(), I, MI->getParent()->findDebugLoc(I), get(Opcode), | |
431 Reg).addOperand(MO); | |
432 MO.ChangeToRegister(Reg, false); | |
433 } | |
434 | |
435 void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { | |
436 MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); | |
437 int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), | |
438 AMDGPU::OpName::src0); | |
439 int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), | |
440 AMDGPU::OpName::src1); | |
441 int Src2Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), | |
442 AMDGPU::OpName::src2); | |
443 | |
444 // Legalize VOP2 | |
445 if (isVOP2(MI->getOpcode()) && Src1Idx != -1) { | |
446 MachineOperand &Src0 = MI->getOperand(Src0Idx); | |
447 MachineOperand &Src1 = MI->getOperand(Src1Idx); | |
448 | |
449 // If the instruction implicitly reads VCC, we can't have any SGPR operands, | |
450 // so move any. | |
451 bool ReadsVCC = MI->readsRegister(AMDGPU::VCC, &RI); | |
452 if (ReadsVCC && Src0.isReg() && | |
453 RI.isSGPRClass(MRI.getRegClass(Src0.getReg()))) { | |
454 legalizeOpWithMove(MI, Src0Idx); | |
455 return; | |
456 } | |
457 | |
458 if (ReadsVCC && Src1.isReg() && | |
459 RI.isSGPRClass(MRI.getRegClass(Src1.getReg()))) { | |
460 legalizeOpWithMove(MI, Src1Idx); | |
461 return; | |
462 } | |
463 | |
464 // Legalize VOP2 instructions where src1 is not a VGPR. An SGPR input must | |
465 // be the first operand, and there can only be one. | |
466 if (Src1.isImm() || Src1.isFPImm() || | |
467 (Src1.isReg() && RI.isSGPRClass(MRI.getRegClass(Src1.getReg())))) { | |
468 if (MI->isCommutable()) { | |
469 if (commuteInstruction(MI)) | |
470 return; | |
471 } | |
472 legalizeOpWithMove(MI, Src1Idx); | |
473 } | |
474 } | |
475 | |
476 // XXX - Do any VOP3 instructions read VCC? | |
477 // Legalize VOP3 | |
478 if (isVOP3(MI->getOpcode())) { | |
479 int VOP3Idx[3] = {Src0Idx, Src1Idx, Src2Idx}; | |
480 unsigned SGPRReg = AMDGPU::NoRegister; | |
481 for (unsigned i = 0; i < 3; ++i) { | |
482 int Idx = VOP3Idx[i]; | |
483 if (Idx == -1) | |
484 continue; | |
485 MachineOperand &MO = MI->getOperand(Idx); | |
486 | |
487 if (MO.isReg()) { | |
488 if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg()))) | |
489 continue; // VGPRs are legal | |
490 | |
491 assert(MO.getReg() != AMDGPU::SCC && "SCC operand to VOP3 instruction"); | |
492 | |
493 if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) { | |
494 SGPRReg = MO.getReg(); | |
495 // We can use one SGPR in each VOP3 instruction. | |
496 continue; | |
497 } | |
498 } else if (!isLiteralConstant(MO)) { | |
499 // If it is not a register and not a literal constant, then it must be | |
500 // an inline constant which is always legal. | |
501 continue; | |
502 } | |
503 // If we make it this far, then the operand is not legal and we must | |
504 // legalize it. | |
505 legalizeOpWithMove(MI, Idx); | |
506 } | |
507 } | |
508 | |
509 // Legalize REG_SEQUENCE | |
510 // The register class of the operands much be the same type as the register | |
511 // class of the output. | |
512 if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) { | |
513 const TargetRegisterClass *RC = NULL, *SRC = NULL, *VRC = NULL; | |
514 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) { | |
515 if (!MI->getOperand(i).isReg() || | |
516 !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg())) | |
517 continue; | |
518 const TargetRegisterClass *OpRC = | |
519 MRI.getRegClass(MI->getOperand(i).getReg()); | |
520 if (RI.hasVGPRs(OpRC)) { | |
521 VRC = OpRC; | |
522 } else { | |
523 SRC = OpRC; | |
524 } | |
525 } | |
526 | |
527 // If any of the operands are VGPR registers, then they all most be | |
528 // otherwise we will create illegal VGPR->SGPR copies when legalizing | |
529 // them. | |
530 if (VRC || !RI.isSGPRClass(getOpRegClass(*MI, 0))) { | |
531 if (!VRC) { | |
532 assert(SRC); | |
533 VRC = RI.getEquivalentVGPRClass(SRC); | |
534 } | |
535 RC = VRC; | |
536 } else { | |
537 RC = SRC; | |
538 } | |
539 | |
540 // Update all the operands so they have the same type. | |
541 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) { | |
542 if (!MI->getOperand(i).isReg() || | |
543 !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg())) | |
544 continue; | |
545 unsigned DstReg = MRI.createVirtualRegister(RC); | |
546 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), | |
547 get(AMDGPU::COPY), DstReg) | |
548 .addOperand(MI->getOperand(i)); | |
549 MI->getOperand(i).setReg(DstReg); | |
550 } | |
551 } | |
552 } | |
553 | |
554 void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { | |
555 SmallVector<MachineInstr *, 128> Worklist; | |
556 Worklist.push_back(&TopInst); | |
557 | |
558 while (!Worklist.empty()) { | |
559 MachineInstr *Inst = Worklist.pop_back_val(); | |
560 unsigned NewOpcode = getVALUOp(*Inst); | |
561 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) | |
562 continue; | |
563 | |
564 MachineRegisterInfo &MRI = Inst->getParent()->getParent()->getRegInfo(); | |
565 | |
566 // Use the new VALU Opcode. | |
567 const MCInstrDesc &NewDesc = get(NewOpcode); | |
568 Inst->setDesc(NewDesc); | |
569 | |
570 // Remove any references to SCC. Vector instructions can't read from it, and | |
571 // We're just about to add the implicit use / defs of VCC, and we don't want | |
572 // both. | |
573 for (unsigned i = Inst->getNumOperands() - 1; i > 0; --i) { | |
574 MachineOperand &Op = Inst->getOperand(i); | |
575 if (Op.isReg() && Op.getReg() == AMDGPU::SCC) | |
576 Inst->RemoveOperand(i); | |
577 } | |
578 | |
579 // Add the implict and explicit register definitions. | |
580 if (NewDesc.ImplicitUses) { | |
581 for (unsigned i = 0; NewDesc.ImplicitUses[i]; ++i) { | |
582 unsigned Reg = NewDesc.ImplicitUses[i]; | |
583 Inst->addOperand(MachineOperand::CreateReg(Reg, false, true)); | |
584 } | |
585 } | |
586 | |
587 if (NewDesc.ImplicitDefs) { | |
588 for (unsigned i = 0; NewDesc.ImplicitDefs[i]; ++i) { | |
589 unsigned Reg = NewDesc.ImplicitDefs[i]; | |
590 Inst->addOperand(MachineOperand::CreateReg(Reg, true, true)); | |
591 } | |
592 } | |
593 | |
594 legalizeOperands(Inst); | |
595 | |
596 // Update the destination register class. | |
597 const TargetRegisterClass *NewDstRC = getOpRegClass(*Inst, 0); | |
598 | |
599 switch (Inst->getOpcode()) { | |
600 // For target instructions, getOpRegClass just returns the virtual | |
601 // register class associated with the operand, so we need to find an | |
602 // equivalent VGPR register class in order to move the instruction to the | |
603 // VALU. | |
604 case AMDGPU::COPY: | |
605 case AMDGPU::PHI: | |
606 case AMDGPU::REG_SEQUENCE: | |
607 if (RI.hasVGPRs(NewDstRC)) | |
608 continue; | |
609 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC); | |
610 if (!NewDstRC) | |
611 continue; | |
612 break; | |
613 default: | |
614 break; | |
615 } | |
616 | |
617 unsigned DstReg = Inst->getOperand(0).getReg(); | |
618 unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC); | |
619 MRI.replaceRegWith(DstReg, NewDstReg); | |
620 | |
621 for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg), | |
622 E = MRI.use_end(); I != E; ++I) { | |
623 MachineInstr &UseMI = *I; | |
624 if (!canReadVGPR(UseMI, I.getOperandNo())) { | |
625 Worklist.push_back(&UseMI); | |
626 } | |
627 } | |
628 } | |
629 } | |
630 | |
631 //===----------------------------------------------------------------------===// | |
632 // Indirect addressing callbacks | |
633 //===----------------------------------------------------------------------===// | |
634 | |
635 unsigned SIInstrInfo::calculateIndirectAddress(unsigned RegIndex, | |
636 unsigned Channel) const { | |
637 assert(Channel == 0); | |
638 return RegIndex; | |
639 } | |
640 | |
641 const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const { | |
642 return &AMDGPU::VReg_32RegClass; | |
643 } | |
644 | |
645 MachineInstrBuilder SIInstrInfo::buildIndirectWrite( | |
646 MachineBasicBlock *MBB, | |
647 MachineBasicBlock::iterator I, | |
648 unsigned ValueReg, | |
649 unsigned Address, unsigned OffsetReg) const { | |
650 const DebugLoc &DL = MBB->findDebugLoc(I); | |
651 unsigned IndirectBaseReg = AMDGPU::VReg_32RegClass.getRegister( | |
652 getIndirectIndexBegin(*MBB->getParent())); | |
653 | |
654 return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_DST_V1)) | |
655 .addReg(IndirectBaseReg, RegState::Define) | |
656 .addOperand(I->getOperand(0)) | |
657 .addReg(IndirectBaseReg) | |
658 .addReg(OffsetReg) | |
659 .addImm(0) | |
660 .addReg(ValueReg); | |
661 } | |
662 | |
663 MachineInstrBuilder SIInstrInfo::buildIndirectRead( | |
664 MachineBasicBlock *MBB, | |
665 MachineBasicBlock::iterator I, | |
666 unsigned ValueReg, | |
667 unsigned Address, unsigned OffsetReg) const { | |
668 const DebugLoc &DL = MBB->findDebugLoc(I); | |
669 unsigned IndirectBaseReg = AMDGPU::VReg_32RegClass.getRegister( | |
670 getIndirectIndexBegin(*MBB->getParent())); | |
671 | |
672 return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_SRC)) | |
673 .addOperand(I->getOperand(0)) | |
674 .addOperand(I->getOperand(1)) | |
675 .addReg(IndirectBaseReg) | |
676 .addReg(OffsetReg) | |
677 .addImm(0); | |
678 | |
679 } | |
680 | |
681 void SIInstrInfo::reserveIndirectRegisters(BitVector &Reserved, | |
682 const MachineFunction &MF) const { | |
683 int End = getIndirectIndexEnd(MF); | |
684 int Begin = getIndirectIndexBegin(MF); | |
685 | |
686 if (End == -1) | |
687 return; | |
688 | |
689 | |
690 for (int Index = Begin; Index <= End; ++Index) | |
691 Reserved.set(AMDGPU::VReg_32RegClass.getRegister(Index)); | |
692 | |
693 for (int Index = std::max(0, Begin - 1); Index <= End; ++Index) | |
694 Reserved.set(AMDGPU::VReg_64RegClass.getRegister(Index)); | |
695 | |
696 for (int Index = std::max(0, Begin - 2); Index <= End; ++Index) | |
697 Reserved.set(AMDGPU::VReg_96RegClass.getRegister(Index)); | |
698 | |
699 for (int Index = std::max(0, Begin - 3); Index <= End; ++Index) | |
700 Reserved.set(AMDGPU::VReg_128RegClass.getRegister(Index)); | |
701 | |
702 for (int Index = std::max(0, Begin - 7); Index <= End; ++Index) | |
703 Reserved.set(AMDGPU::VReg_256RegClass.getRegister(Index)); | |
704 | |
705 for (int Index = std::max(0, Begin - 15); Index <= End; ++Index) | |
706 Reserved.set(AMDGPU::VReg_512RegClass.getRegister(Index)); | |
707 } |