Mercurial > hg > CbC > CbC_llvm
comparison lib/Target/PowerPC/PPCVSXFMAMutate.cpp @ 83:60c9769439b8 LLVM3.7
LLVM 3.7
author | Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp> |
---|---|
date | Wed, 18 Feb 2015 14:55:36 +0900 |
parents | |
children | afa8332a0e37 |
comparison
equal
deleted
inserted
replaced
78:af83660cff7b | 83:60c9769439b8 |
---|---|
1 //===--------------- PPCVSXFMAMutate.cpp - VSX FMA Mutation ---------------===// | |
2 // | |
3 // The LLVM Compiler Infrastructure | |
4 // | |
5 // This file is distributed under the University of Illinois Open Source | |
6 // License. See LICENSE.TXT for details. | |
7 // | |
8 //===----------------------------------------------------------------------===// | |
9 // | |
10 // This pass mutates the form of VSX FMA instructions to avoid unnecessary | |
11 // copies. | |
12 // | |
13 //===----------------------------------------------------------------------===// | |
14 | |
15 #include "PPCInstrInfo.h" | |
16 #include "MCTargetDesc/PPCPredicates.h" | |
17 #include "PPC.h" | |
18 #include "PPCInstrBuilder.h" | |
19 #include "PPCMachineFunctionInfo.h" | |
20 #include "PPCTargetMachine.h" | |
21 #include "llvm/ADT/STLExtras.h" | |
22 #include "llvm/ADT/Statistic.h" | |
23 #include "llvm/CodeGen/LiveIntervalAnalysis.h" | |
24 #include "llvm/CodeGen/MachineFrameInfo.h" | |
25 #include "llvm/CodeGen/MachineFunctionPass.h" | |
26 #include "llvm/CodeGen/MachineInstrBuilder.h" | |
27 #include "llvm/CodeGen/MachineMemOperand.h" | |
28 #include "llvm/CodeGen/MachineRegisterInfo.h" | |
29 #include "llvm/CodeGen/PseudoSourceValue.h" | |
30 #include "llvm/CodeGen/ScheduleDAG.h" | |
31 #include "llvm/CodeGen/SlotIndexes.h" | |
32 #include "llvm/MC/MCAsmInfo.h" | |
33 #include "llvm/Support/CommandLine.h" | |
34 #include "llvm/Support/Debug.h" | |
35 #include "llvm/Support/ErrorHandling.h" | |
36 #include "llvm/Support/TargetRegistry.h" | |
37 #include "llvm/Support/raw_ostream.h" | |
38 | |
39 using namespace llvm; | |
40 | |
41 static cl::opt<bool> DisableVSXFMAMutate("disable-ppc-vsx-fma-mutation", | |
42 cl::desc("Disable VSX FMA instruction mutation"), cl::Hidden); | |
43 | |
44 #define DEBUG_TYPE "ppc-vsx-fma-mutate" | |
45 | |
46 namespace llvm { namespace PPC { | |
47 int getAltVSXFMAOpcode(uint16_t Opcode); | |
48 } } | |
49 | |
50 namespace { | |
51 // PPCVSXFMAMutate pass - For copies between VSX registers and non-VSX registers | |
52 // (Altivec and scalar floating-point registers), we need to transform the | |
53 // copies into subregister copies with other restrictions. | |
54 struct PPCVSXFMAMutate : public MachineFunctionPass { | |
55 static char ID; | |
56 PPCVSXFMAMutate() : MachineFunctionPass(ID) { | |
57 initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry()); | |
58 } | |
59 | |
60 LiveIntervals *LIS; | |
61 const PPCInstrInfo *TII; | |
62 | |
63 protected: | |
64 bool processBlock(MachineBasicBlock &MBB) { | |
65 bool Changed = false; | |
66 | |
67 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); | |
68 const TargetRegisterInfo *TRI = &TII->getRegisterInfo(); | |
69 for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end(); | |
70 I != IE; ++I) { | |
71 MachineInstr *MI = I; | |
72 | |
73 // The default (A-type) VSX FMA form kills the addend (it is taken from | |
74 // the target register, which is then updated to reflect the result of | |
75 // the FMA). If the instruction, however, kills one of the registers | |
76 // used for the product, then we can use the M-form instruction (which | |
77 // will take that value from the to-be-defined register). | |
78 | |
79 int AltOpc = PPC::getAltVSXFMAOpcode(MI->getOpcode()); | |
80 if (AltOpc == -1) | |
81 continue; | |
82 | |
83 // This pass is run after register coalescing, and so we're looking for | |
84 // a situation like this: | |
85 // ... | |
86 // %vreg5<def> = COPY %vreg9; VSLRC:%vreg5,%vreg9 | |
87 // %vreg5<def,tied1> = XSMADDADP %vreg5<tied0>, %vreg17, %vreg16, | |
88 // %RM<imp-use>; VSLRC:%vreg5,%vreg17,%vreg16 | |
89 // ... | |
90 // %vreg9<def,tied1> = XSMADDADP %vreg9<tied0>, %vreg17, %vreg19, | |
91 // %RM<imp-use>; VSLRC:%vreg9,%vreg17,%vreg19 | |
92 // ... | |
93 // Where we can eliminate the copy by changing from the A-type to the | |
94 // M-type instruction. Specifically, for this example, this means: | |
95 // %vreg5<def,tied1> = XSMADDADP %vreg5<tied0>, %vreg17, %vreg16, | |
96 // %RM<imp-use>; VSLRC:%vreg5,%vreg17,%vreg16 | |
97 // is replaced by: | |
98 // %vreg16<def,tied1> = XSMADDMDP %vreg16<tied0>, %vreg18, %vreg9, | |
99 // %RM<imp-use>; VSLRC:%vreg16,%vreg18,%vreg9 | |
100 // and we remove: %vreg5<def> = COPY %vreg9; VSLRC:%vreg5,%vreg9 | |
101 | |
102 SlotIndex FMAIdx = LIS->getInstructionIndex(MI); | |
103 | |
104 VNInfo *AddendValNo = | |
105 LIS->getInterval(MI->getOperand(1).getReg()).Query(FMAIdx).valueIn(); | |
106 MachineInstr *AddendMI = LIS->getInstructionFromIndex(AddendValNo->def); | |
107 | |
108 // The addend and this instruction must be in the same block. | |
109 | |
110 if (!AddendMI || AddendMI->getParent() != MI->getParent()) | |
111 continue; | |
112 | |
113 // The addend must be a full copy within the same register class. | |
114 | |
115 if (!AddendMI->isFullCopy()) | |
116 continue; | |
117 | |
118 unsigned AddendSrcReg = AddendMI->getOperand(1).getReg(); | |
119 if (TargetRegisterInfo::isVirtualRegister(AddendSrcReg)) { | |
120 if (MRI.getRegClass(AddendMI->getOperand(0).getReg()) != | |
121 MRI.getRegClass(AddendSrcReg)) | |
122 continue; | |
123 } else { | |
124 // If AddendSrcReg is a physical register, make sure the destination | |
125 // register class contains it. | |
126 if (!MRI.getRegClass(AddendMI->getOperand(0).getReg()) | |
127 ->contains(AddendSrcReg)) | |
128 continue; | |
129 } | |
130 | |
131 // In theory, there could be other uses of the addend copy before this | |
132 // fma. We could deal with this, but that would require additional | |
133 // logic below and I suspect it will not occur in any relevant | |
134 // situations. Additionally, check whether the copy source is killed | |
135 // prior to the fma. In order to replace the addend here with the | |
136 // source of the copy, it must still be live here. We can't use | |
137 // interval testing for a physical register, so as long as we're | |
138 // walking the MIs we may as well test liveness here. | |
139 bool OtherUsers = false, KillsAddendSrc = false; | |
140 for (auto J = std::prev(I), JE = MachineBasicBlock::iterator(AddendMI); | |
141 J != JE; --J) { | |
142 if (J->readsVirtualRegister(AddendMI->getOperand(0).getReg())) { | |
143 OtherUsers = true; | |
144 break; | |
145 } | |
146 if (J->modifiesRegister(AddendSrcReg, TRI) || | |
147 J->killsRegister(AddendSrcReg, TRI)) { | |
148 KillsAddendSrc = true; | |
149 break; | |
150 } | |
151 } | |
152 | |
153 if (OtherUsers || KillsAddendSrc) | |
154 continue; | |
155 | |
156 // Find one of the product operands that is killed by this instruction. | |
157 | |
158 unsigned KilledProdOp = 0, OtherProdOp = 0; | |
159 if (LIS->getInterval(MI->getOperand(2).getReg()) | |
160 .Query(FMAIdx).isKill()) { | |
161 KilledProdOp = 2; | |
162 OtherProdOp = 3; | |
163 } else if (LIS->getInterval(MI->getOperand(3).getReg()) | |
164 .Query(FMAIdx).isKill()) { | |
165 KilledProdOp = 3; | |
166 OtherProdOp = 2; | |
167 } | |
168 | |
169 // If there are no killed product operands, then this transformation is | |
170 // likely not profitable. | |
171 if (!KilledProdOp) | |
172 continue; | |
173 | |
174 // For virtual registers, verify that the addend source register | |
175 // is live here (as should have been assured above). | |
176 assert((!TargetRegisterInfo::isVirtualRegister(AddendSrcReg) || | |
177 LIS->getInterval(AddendSrcReg).liveAt(FMAIdx)) && | |
178 "Addend source register is not live!"); | |
179 | |
180 // Transform: (O2 * O3) + O1 -> (O2 * O1) + O3. | |
181 | |
182 unsigned AddReg = AddendMI->getOperand(1).getReg(); | |
183 unsigned KilledProdReg = MI->getOperand(KilledProdOp).getReg(); | |
184 unsigned OtherProdReg = MI->getOperand(OtherProdOp).getReg(); | |
185 | |
186 unsigned AddSubReg = AddendMI->getOperand(1).getSubReg(); | |
187 unsigned KilledProdSubReg = MI->getOperand(KilledProdOp).getSubReg(); | |
188 unsigned OtherProdSubReg = MI->getOperand(OtherProdOp).getSubReg(); | |
189 | |
190 bool AddRegKill = AddendMI->getOperand(1).isKill(); | |
191 bool KilledProdRegKill = MI->getOperand(KilledProdOp).isKill(); | |
192 bool OtherProdRegKill = MI->getOperand(OtherProdOp).isKill(); | |
193 | |
194 bool AddRegUndef = AddendMI->getOperand(1).isUndef(); | |
195 bool KilledProdRegUndef = MI->getOperand(KilledProdOp).isUndef(); | |
196 bool OtherProdRegUndef = MI->getOperand(OtherProdOp).isUndef(); | |
197 | |
198 unsigned OldFMAReg = MI->getOperand(0).getReg(); | |
199 | |
200 // The transformation doesn't work well with things like: | |
201 // %vreg5 = A-form-op %vreg5, %vreg11, %vreg5; | |
202 // so leave such things alone. | |
203 if (OldFMAReg == KilledProdReg) | |
204 continue; | |
205 | |
206 assert(OldFMAReg == AddendMI->getOperand(0).getReg() && | |
207 "Addend copy not tied to old FMA output!"); | |
208 | |
209 DEBUG(dbgs() << "VSX FMA Mutation:\n " << *MI;); | |
210 | |
211 MI->getOperand(0).setReg(KilledProdReg); | |
212 MI->getOperand(1).setReg(KilledProdReg); | |
213 MI->getOperand(3).setReg(AddReg); | |
214 MI->getOperand(2).setReg(OtherProdReg); | |
215 | |
216 MI->getOperand(0).setSubReg(KilledProdSubReg); | |
217 MI->getOperand(1).setSubReg(KilledProdSubReg); | |
218 MI->getOperand(3).setSubReg(AddSubReg); | |
219 MI->getOperand(2).setSubReg(OtherProdSubReg); | |
220 | |
221 MI->getOperand(1).setIsKill(KilledProdRegKill); | |
222 MI->getOperand(3).setIsKill(AddRegKill); | |
223 MI->getOperand(2).setIsKill(OtherProdRegKill); | |
224 | |
225 MI->getOperand(1).setIsUndef(KilledProdRegUndef); | |
226 MI->getOperand(3).setIsUndef(AddRegUndef); | |
227 MI->getOperand(2).setIsUndef(OtherProdRegUndef); | |
228 | |
229 MI->setDesc(TII->get(AltOpc)); | |
230 | |
231 DEBUG(dbgs() << " -> " << *MI); | |
232 | |
233 // The killed product operand was killed here, so we can reuse it now | |
234 // for the result of the fma. | |
235 | |
236 LiveInterval &FMAInt = LIS->getInterval(OldFMAReg); | |
237 VNInfo *FMAValNo = FMAInt.getVNInfoAt(FMAIdx.getRegSlot()); | |
238 for (auto UI = MRI.reg_nodbg_begin(OldFMAReg), UE = MRI.reg_nodbg_end(); | |
239 UI != UE;) { | |
240 MachineOperand &UseMO = *UI; | |
241 MachineInstr *UseMI = UseMO.getParent(); | |
242 ++UI; | |
243 | |
244 // Don't replace the result register of the copy we're about to erase. | |
245 if (UseMI == AddendMI) | |
246 continue; | |
247 | |
248 UseMO.setReg(KilledProdReg); | |
249 UseMO.setSubReg(KilledProdSubReg); | |
250 } | |
251 | |
252 // Extend the live intervals of the killed product operand to hold the | |
253 // fma result. | |
254 | |
255 LiveInterval &NewFMAInt = LIS->getInterval(KilledProdReg); | |
256 for (LiveInterval::iterator AI = FMAInt.begin(), AE = FMAInt.end(); | |
257 AI != AE; ++AI) { | |
258 // Don't add the segment that corresponds to the original copy. | |
259 if (AI->valno == AddendValNo) | |
260 continue; | |
261 | |
262 VNInfo *NewFMAValNo = | |
263 NewFMAInt.getNextValue(AI->start, | |
264 LIS->getVNInfoAllocator()); | |
265 | |
266 NewFMAInt.addSegment(LiveInterval::Segment(AI->start, AI->end, | |
267 NewFMAValNo)); | |
268 } | |
269 DEBUG(dbgs() << " extended: " << NewFMAInt << '\n'); | |
270 | |
271 FMAInt.removeValNo(FMAValNo); | |
272 DEBUG(dbgs() << " trimmed: " << FMAInt << '\n'); | |
273 | |
274 // Remove the (now unused) copy. | |
275 | |
276 DEBUG(dbgs() << " removing: " << *AddendMI << '\n'); | |
277 LIS->RemoveMachineInstrFromMaps(AddendMI); | |
278 AddendMI->eraseFromParent(); | |
279 | |
280 Changed = true; | |
281 } | |
282 | |
283 return Changed; | |
284 } | |
285 | |
286 public: | |
287 bool runOnMachineFunction(MachineFunction &MF) override { | |
288 // If we don't have VSX then go ahead and return without doing | |
289 // anything. | |
290 const PPCSubtarget &STI = MF.getSubtarget<PPCSubtarget>(); | |
291 if (!STI.hasVSX()) | |
292 return false; | |
293 | |
294 LIS = &getAnalysis<LiveIntervals>(); | |
295 | |
296 TII = STI.getInstrInfo(); | |
297 | |
298 bool Changed = false; | |
299 | |
300 if (DisableVSXFMAMutate) | |
301 return Changed; | |
302 | |
303 for (MachineFunction::iterator I = MF.begin(); I != MF.end();) { | |
304 MachineBasicBlock &B = *I++; | |
305 if (processBlock(B)) | |
306 Changed = true; | |
307 } | |
308 | |
309 return Changed; | |
310 } | |
311 | |
312 void getAnalysisUsage(AnalysisUsage &AU) const override { | |
313 AU.addRequired<LiveIntervals>(); | |
314 AU.addPreserved<LiveIntervals>(); | |
315 AU.addRequired<SlotIndexes>(); | |
316 AU.addPreserved<SlotIndexes>(); | |
317 MachineFunctionPass::getAnalysisUsage(AU); | |
318 } | |
319 }; | |
320 } | |
321 | |
322 INITIALIZE_PASS_BEGIN(PPCVSXFMAMutate, DEBUG_TYPE, | |
323 "PowerPC VSX FMA Mutation", false, false) | |
324 INITIALIZE_PASS_DEPENDENCY(LiveIntervals) | |
325 INITIALIZE_PASS_DEPENDENCY(SlotIndexes) | |
326 INITIALIZE_PASS_END(PPCVSXFMAMutate, DEBUG_TYPE, | |
327 "PowerPC VSX FMA Mutation", false, false) | |
328 | |
329 char &llvm::PPCVSXFMAMutateID = PPCVSXFMAMutate::ID; | |
330 | |
331 char PPCVSXFMAMutate::ID = 0; | |
332 FunctionPass* | |
333 llvm::createPPCVSXFMAMutatePass() { return new PPCVSXFMAMutate(); } | |
334 | |
335 |