Mercurial > hg > Members > tobaru > cbc > CbC_llvm
comparison lib/Target/R600/SIFixSGPRCopies.cpp @ 0:95c75e76d11b
LLVM 3.4
author | Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp> |
---|---|
date | Thu, 12 Dec 2013 13:56:28 +0900 |
parents | |
children | 54457678186b |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:95c75e76d11b |
---|---|
1 //===-- SIFixSGPRCopies.cpp - Remove potential VGPR => SGPR copies --------===// | |
2 // | |
3 // The LLVM Compiler Infrastructure | |
4 // | |
5 // This file is distributed under the University of Illinois Open Source | |
6 // License. See LICENSE.TXT for details. | |
7 // | |
8 //===----------------------------------------------------------------------===// | |
9 // | |
10 /// \file | |
11 /// Copies from VGPR to SGPR registers are illegal and the register coalescer | |
12 /// will sometimes generate these illegal copies in situations like this: | |
13 /// | |
14 /// Register Class <vsrc> is the union of <vgpr> and <sgpr> | |
15 /// | |
16 /// BB0: | |
17 /// %vreg0 <sgpr> = SCALAR_INST | |
18 /// %vreg1 <vsrc> = COPY %vreg0 <sgpr> | |
19 /// ... | |
20 /// BRANCH %cond BB1, BB2 | |
21 /// BB1: | |
22 /// %vreg2 <vgpr> = VECTOR_INST | |
23 /// %vreg3 <vsrc> = COPY %vreg2 <vgpr> | |
24 /// BB2: | |
25 /// %vreg4 <vsrc> = PHI %vreg1 <vsrc>, <BB#0>, %vreg3 <vrsc>, <BB#1> | |
26 /// %vreg5 <vgpr> = VECTOR_INST %vreg4 <vsrc> | |
27 /// | |
28 /// | |
29 /// The coalescer will begin at BB0 and eliminate its copy, then the resulting | |
30 /// code will look like this: | |
31 /// | |
32 /// BB0: | |
33 /// %vreg0 <sgpr> = SCALAR_INST | |
34 /// ... | |
35 /// BRANCH %cond BB1, BB2 | |
36 /// BB1: | |
37 /// %vreg2 <vgpr> = VECTOR_INST | |
38 /// %vreg3 <vsrc> = COPY %vreg2 <vgpr> | |
39 /// BB2: | |
40 /// %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <vsrc>, <BB#1> | |
41 /// %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr> | |
42 /// | |
43 /// Now that the result of the PHI instruction is an SGPR, the register | |
44 /// allocator is now forced to constrain the register class of %vreg3 to | |
45 /// <sgpr> so we end up with final code like this: | |
46 /// | |
47 /// BB0: | |
48 /// %vreg0 <sgpr> = SCALAR_INST | |
49 /// ... | |
50 /// BRANCH %cond BB1, BB2 | |
51 /// BB1: | |
52 /// %vreg2 <vgpr> = VECTOR_INST | |
53 /// %vreg3 <sgpr> = COPY %vreg2 <vgpr> | |
54 /// BB2: | |
55 /// %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <sgpr>, <BB#1> | |
56 /// %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr> | |
57 /// | |
58 /// Now this code contains an illegal copy from a VGPR to an SGPR. | |
59 /// | |
60 /// In order to avoid this problem, this pass searches for PHI instructions | |
61 /// which define a <vsrc> register and constrains its definition class to | |
62 /// <vgpr> if the user of the PHI's definition register is a vector instruction. | |
63 /// If the PHI's definition class is constrained to <vgpr> then the coalescer | |
64 /// will be unable to perform the COPY removal from the above example which | |
65 /// ultimately led to the creation of an illegal COPY. | |
66 //===----------------------------------------------------------------------===// | |
67 | |
68 #define DEBUG_TYPE "sgpr-copies" | |
69 #include "AMDGPU.h" | |
70 #include "SIInstrInfo.h" | |
71 #include "llvm/CodeGen/MachineFunctionPass.h" | |
72 #include "llvm/CodeGen/MachineInstrBuilder.h" | |
73 #include "llvm/CodeGen/MachineRegisterInfo.h" | |
74 #include "llvm/Support/Debug.h" | |
75 #include "llvm/Support/raw_ostream.h" | |
76 #include "llvm/Target/TargetMachine.h" | |
77 | |
78 using namespace llvm; | |
79 | |
80 namespace { | |
81 | |
82 class SIFixSGPRCopies : public MachineFunctionPass { | |
83 | |
84 private: | |
85 static char ID; | |
86 const TargetRegisterClass *inferRegClassFromUses(const SIRegisterInfo *TRI, | |
87 const MachineRegisterInfo &MRI, | |
88 unsigned Reg, | |
89 unsigned SubReg) const; | |
90 const TargetRegisterClass *inferRegClassFromDef(const SIRegisterInfo *TRI, | |
91 const MachineRegisterInfo &MRI, | |
92 unsigned Reg, | |
93 unsigned SubReg) const; | |
94 bool isVGPRToSGPRCopy(const MachineInstr &Copy, const SIRegisterInfo *TRI, | |
95 const MachineRegisterInfo &MRI) const; | |
96 | |
97 public: | |
98 SIFixSGPRCopies(TargetMachine &tm) : MachineFunctionPass(ID) { } | |
99 | |
100 virtual bool runOnMachineFunction(MachineFunction &MF); | |
101 | |
102 const char *getPassName() const { | |
103 return "SI Fix SGPR copies"; | |
104 } | |
105 | |
106 }; | |
107 | |
108 } // End anonymous namespace | |
109 | |
110 char SIFixSGPRCopies::ID = 0; | |
111 | |
112 FunctionPass *llvm::createSIFixSGPRCopiesPass(TargetMachine &tm) { | |
113 return new SIFixSGPRCopies(tm); | |
114 } | |
115 | |
116 static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) { | |
117 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); | |
118 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { | |
119 if (!MI.getOperand(i).isReg() || | |
120 !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg())) | |
121 continue; | |
122 | |
123 if (TRI->hasVGPRs(MRI.getRegClass(MI.getOperand(i).getReg()))) | |
124 return true; | |
125 } | |
126 return false; | |
127 } | |
128 | |
129 /// This functions walks the use list of Reg until it finds an Instruction | |
130 /// that isn't a COPY returns the register class of that instruction. | |
131 /// \return The register defined by the first non-COPY instruction. | |
132 const TargetRegisterClass *SIFixSGPRCopies::inferRegClassFromUses( | |
133 const SIRegisterInfo *TRI, | |
134 const MachineRegisterInfo &MRI, | |
135 unsigned Reg, | |
136 unsigned SubReg) const { | |
137 // The Reg parameter to the function must always be defined by either a PHI | |
138 // or a COPY, therefore it cannot be a physical register. | |
139 assert(TargetRegisterInfo::isVirtualRegister(Reg) && | |
140 "Reg cannot be a physical register"); | |
141 | |
142 const TargetRegisterClass *RC = MRI.getRegClass(Reg); | |
143 RC = TRI->getSubRegClass(RC, SubReg); | |
144 for (MachineRegisterInfo::use_iterator I = MRI.use_begin(Reg), | |
145 E = MRI.use_end(); I != E; ++I) { | |
146 switch (I->getOpcode()) { | |
147 case AMDGPU::COPY: | |
148 RC = TRI->getCommonSubClass(RC, inferRegClassFromUses(TRI, MRI, | |
149 I->getOperand(0).getReg(), | |
150 I->getOperand(0).getSubReg())); | |
151 break; | |
152 } | |
153 } | |
154 | |
155 return RC; | |
156 } | |
157 | |
158 const TargetRegisterClass *SIFixSGPRCopies::inferRegClassFromDef( | |
159 const SIRegisterInfo *TRI, | |
160 const MachineRegisterInfo &MRI, | |
161 unsigned Reg, | |
162 unsigned SubReg) const { | |
163 if (!TargetRegisterInfo::isVirtualRegister(Reg)) { | |
164 const TargetRegisterClass *RC = TRI->getPhysRegClass(Reg); | |
165 return TRI->getSubRegClass(RC, SubReg); | |
166 } | |
167 MachineInstr *Def = MRI.getVRegDef(Reg); | |
168 if (Def->getOpcode() != AMDGPU::COPY) { | |
169 return TRI->getSubRegClass(MRI.getRegClass(Reg), SubReg); | |
170 } | |
171 | |
172 return inferRegClassFromDef(TRI, MRI, Def->getOperand(1).getReg(), | |
173 Def->getOperand(1).getSubReg()); | |
174 } | |
175 | |
176 bool SIFixSGPRCopies::isVGPRToSGPRCopy(const MachineInstr &Copy, | |
177 const SIRegisterInfo *TRI, | |
178 const MachineRegisterInfo &MRI) const { | |
179 | |
180 unsigned DstReg = Copy.getOperand(0).getReg(); | |
181 unsigned SrcReg = Copy.getOperand(1).getReg(); | |
182 unsigned SrcSubReg = Copy.getOperand(1).getSubReg(); | |
183 const TargetRegisterClass *DstRC = MRI.getRegClass(DstReg); | |
184 const TargetRegisterClass *SrcRC; | |
185 | |
186 if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || | |
187 DstRC == &AMDGPU::M0RegRegClass) | |
188 return false; | |
189 | |
190 SrcRC = inferRegClassFromDef(TRI, MRI, SrcReg, SrcSubReg); | |
191 return TRI->isSGPRClass(DstRC) && TRI->hasVGPRs(SrcRC); | |
192 } | |
193 | |
194 bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { | |
195 MachineRegisterInfo &MRI = MF.getRegInfo(); | |
196 const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>( | |
197 MF.getTarget().getRegisterInfo()); | |
198 const SIInstrInfo *TII = static_cast<const SIInstrInfo *>( | |
199 MF.getTarget().getInstrInfo()); | |
200 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); | |
201 BI != BE; ++BI) { | |
202 | |
203 MachineBasicBlock &MBB = *BI; | |
204 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); | |
205 I != E; ++I) { | |
206 MachineInstr &MI = *I; | |
207 if (MI.getOpcode() == AMDGPU::COPY && isVGPRToSGPRCopy(MI, TRI, MRI)) { | |
208 DEBUG(dbgs() << "Fixing VGPR -> SGPR copy:\n"); | |
209 DEBUG(MI.print(dbgs())); | |
210 TII->moveToVALU(MI); | |
211 | |
212 } | |
213 | |
214 switch (MI.getOpcode()) { | |
215 default: continue; | |
216 case AMDGPU::PHI: { | |
217 DEBUG(dbgs() << " Fixing PHI:\n"); | |
218 DEBUG(MI.print(dbgs())); | |
219 | |
220 for (unsigned i = 1; i < MI.getNumOperands(); i+=2) { | |
221 unsigned Reg = MI.getOperand(i).getReg(); | |
222 const TargetRegisterClass *RC = inferRegClassFromDef(TRI, MRI, Reg, | |
223 MI.getOperand(0).getSubReg()); | |
224 MRI.constrainRegClass(Reg, RC); | |
225 } | |
226 unsigned Reg = MI.getOperand(0).getReg(); | |
227 const TargetRegisterClass *RC = inferRegClassFromUses(TRI, MRI, Reg, | |
228 MI.getOperand(0).getSubReg()); | |
229 if (TRI->getCommonSubClass(RC, &AMDGPU::VReg_32RegClass)) { | |
230 MRI.constrainRegClass(Reg, &AMDGPU::VReg_32RegClass); | |
231 } | |
232 | |
233 if (!TRI->isSGPRClass(MRI.getRegClass(Reg))) | |
234 break; | |
235 | |
236 // If a PHI node defines an SGPR and any of its operands are VGPRs, | |
237 // then we need to move it to the VALU. | |
238 for (unsigned i = 1; i < MI.getNumOperands(); i+=2) { | |
239 unsigned Reg = MI.getOperand(i).getReg(); | |
240 if (TRI->hasVGPRs(MRI.getRegClass(Reg))) { | |
241 TII->moveToVALU(MI); | |
242 break; | |
243 } | |
244 } | |
245 | |
246 break; | |
247 } | |
248 case AMDGPU::REG_SEQUENCE: { | |
249 if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) || | |
250 !hasVGPROperands(MI, TRI)) | |
251 continue; | |
252 | |
253 DEBUG(dbgs() << "Fixing REG_SEQUENCE:\n"); | |
254 DEBUG(MI.print(dbgs())); | |
255 | |
256 TII->moveToVALU(MI); | |
257 break; | |
258 } | |
259 } | |
260 } | |
261 } | |
262 return false; | |
263 } |