Mercurial > hg > CbC > CbC_llvm
comparison lib/Target/R600/SIInsertWaits.cpp @ 0:95c75e76d11b LLVM3.4
LLVM 3.4
author | Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp> |
---|---|
date | Thu, 12 Dec 2013 13:56:28 +0900 |
parents | |
children | 54457678186b |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:95c75e76d11b |
---|---|
1 //===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===// | |
2 // | |
3 // The LLVM Compiler Infrastructure | |
4 // | |
5 // This file is distributed under the University of Illinois Open Source | |
6 // License. See LICENSE.TXT for details. | |
7 // | |
8 //===----------------------------------------------------------------------===// | |
9 // | |
10 /// \file | |
11 /// \brief Insert wait instructions for memory reads and writes. | |
12 /// | |
13 /// Memory reads and writes are issued asynchronously, so we need to insert | |
14 /// S_WAITCNT instructions when we want to access any of their results or | |
15 /// overwrite any register that's used asynchronously. | |
16 // | |
17 //===----------------------------------------------------------------------===// | |
18 | |
19 #include "AMDGPU.h" | |
20 #include "SIInstrInfo.h" | |
21 #include "SIMachineFunctionInfo.h" | |
22 #include "llvm/CodeGen/MachineFunction.h" | |
23 #include "llvm/CodeGen/MachineFunctionPass.h" | |
24 #include "llvm/CodeGen/MachineInstrBuilder.h" | |
25 #include "llvm/CodeGen/MachineRegisterInfo.h" | |
26 | |
27 using namespace llvm; | |
28 | |
29 namespace { | |
30 | |
31 /// \brief One variable for each of the hardware counters | |
32 typedef union { | |
33 struct { | |
34 unsigned VM; | |
35 unsigned EXP; | |
36 unsigned LGKM; | |
37 } Named; | |
38 unsigned Array[3]; | |
39 | |
40 } Counters; | |
41 | |
42 typedef Counters RegCounters[512]; | |
43 typedef std::pair<unsigned, unsigned> RegInterval; | |
44 | |
45 class SIInsertWaits : public MachineFunctionPass { | |
46 | |
47 private: | |
48 static char ID; | |
49 const SIInstrInfo *TII; | |
50 const SIRegisterInfo *TRI; | |
51 const MachineRegisterInfo *MRI; | |
52 | |
53 /// \brief Constant hardware limits | |
54 static const Counters WaitCounts; | |
55 | |
56 /// \brief Constant zero value | |
57 static const Counters ZeroCounts; | |
58 | |
59 /// \brief Counter values we have already waited on. | |
60 Counters WaitedOn; | |
61 | |
62 /// \brief Counter values for last instruction issued. | |
63 Counters LastIssued; | |
64 | |
65 /// \brief Registers used by async instructions. | |
66 RegCounters UsedRegs; | |
67 | |
68 /// \brief Registers defined by async instructions. | |
69 RegCounters DefinedRegs; | |
70 | |
71 /// \brief Different export instruction types seen since last wait. | |
72 unsigned ExpInstrTypesSeen; | |
73 | |
74 /// \brief Get increment/decrement amount for this instruction. | |
75 Counters getHwCounts(MachineInstr &MI); | |
76 | |
77 /// \brief Is operand relevant for async execution? | |
78 bool isOpRelevant(MachineOperand &Op); | |
79 | |
80 /// \brief Get register interval an operand affects. | |
81 RegInterval getRegInterval(MachineOperand &Op); | |
82 | |
83 /// \brief Handle instructions async components | |
84 void pushInstruction(MachineInstr &MI); | |
85 | |
86 /// \brief Insert the actual wait instruction | |
87 bool insertWait(MachineBasicBlock &MBB, | |
88 MachineBasicBlock::iterator I, | |
89 const Counters &Counts); | |
90 | |
91 /// \brief Do we need def2def checks? | |
92 bool unorderedDefines(MachineInstr &MI); | |
93 | |
94 /// \brief Resolve all operand dependencies to counter requirements | |
95 Counters handleOperands(MachineInstr &MI); | |
96 | |
97 public: | |
98 SIInsertWaits(TargetMachine &tm) : | |
99 MachineFunctionPass(ID), | |
100 TII(0), | |
101 TRI(0), | |
102 ExpInstrTypesSeen(0) { } | |
103 | |
104 virtual bool runOnMachineFunction(MachineFunction &MF); | |
105 | |
106 const char *getPassName() const { | |
107 return "SI insert wait instructions"; | |
108 } | |
109 | |
110 }; | |
111 | |
112 } // End anonymous namespace | |
113 | |
114 char SIInsertWaits::ID = 0; | |
115 | |
116 const Counters SIInsertWaits::WaitCounts = { { 15, 7, 7 } }; | |
117 const Counters SIInsertWaits::ZeroCounts = { { 0, 0, 0 } }; | |
118 | |
119 FunctionPass *llvm::createSIInsertWaits(TargetMachine &tm) { | |
120 return new SIInsertWaits(tm); | |
121 } | |
122 | |
123 Counters SIInsertWaits::getHwCounts(MachineInstr &MI) { | |
124 | |
125 uint64_t TSFlags = TII->get(MI.getOpcode()).TSFlags; | |
126 Counters Result; | |
127 | |
128 Result.Named.VM = !!(TSFlags & SIInstrFlags::VM_CNT); | |
129 | |
130 // Only consider stores or EXP for EXP_CNT | |
131 Result.Named.EXP = !!(TSFlags & SIInstrFlags::EXP_CNT && | |
132 (MI.getOpcode() == AMDGPU::EXP || MI.getDesc().mayStore())); | |
133 | |
134 // LGKM may uses larger values | |
135 if (TSFlags & SIInstrFlags::LGKM_CNT) { | |
136 | |
137 if (TII->isSMRD(MI.getOpcode())) { | |
138 | |
139 MachineOperand &Op = MI.getOperand(0); | |
140 assert(Op.isReg() && "First LGKM operand must be a register!"); | |
141 | |
142 unsigned Reg = Op.getReg(); | |
143 unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize(); | |
144 Result.Named.LGKM = Size > 4 ? 2 : 1; | |
145 | |
146 } else { | |
147 // DS | |
148 Result.Named.LGKM = 1; | |
149 } | |
150 | |
151 } else { | |
152 Result.Named.LGKM = 0; | |
153 } | |
154 | |
155 return Result; | |
156 } | |
157 | |
158 bool SIInsertWaits::isOpRelevant(MachineOperand &Op) { | |
159 | |
160 // Constants are always irrelevant | |
161 if (!Op.isReg()) | |
162 return false; | |
163 | |
164 // Defines are always relevant | |
165 if (Op.isDef()) | |
166 return true; | |
167 | |
168 // For exports all registers are relevant | |
169 MachineInstr &MI = *Op.getParent(); | |
170 if (MI.getOpcode() == AMDGPU::EXP) | |
171 return true; | |
172 | |
173 // For stores the stored value is also relevant | |
174 if (!MI.getDesc().mayStore()) | |
175 return false; | |
176 | |
177 for (MachineInstr::mop_iterator I = MI.operands_begin(), | |
178 E = MI.operands_end(); I != E; ++I) { | |
179 | |
180 if (I->isReg() && I->isUse()) | |
181 return Op.isIdenticalTo(*I); | |
182 } | |
183 | |
184 return false; | |
185 } | |
186 | |
187 RegInterval SIInsertWaits::getRegInterval(MachineOperand &Op) { | |
188 | |
189 if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg())) | |
190 return std::make_pair(0, 0); | |
191 | |
192 unsigned Reg = Op.getReg(); | |
193 unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize(); | |
194 | |
195 assert(Size >= 4); | |
196 | |
197 RegInterval Result; | |
198 Result.first = TRI->getEncodingValue(Reg); | |
199 Result.second = Result.first + Size / 4; | |
200 | |
201 return Result; | |
202 } | |
203 | |
204 void SIInsertWaits::pushInstruction(MachineInstr &MI) { | |
205 | |
206 // Get the hardware counter increments and sum them up | |
207 Counters Increment = getHwCounts(MI); | |
208 unsigned Sum = 0; | |
209 | |
210 for (unsigned i = 0; i < 3; ++i) { | |
211 LastIssued.Array[i] += Increment.Array[i]; | |
212 Sum += Increment.Array[i]; | |
213 } | |
214 | |
215 // If we don't increase anything then that's it | |
216 if (Sum == 0) | |
217 return; | |
218 | |
219 // Remember which export instructions we have seen | |
220 if (Increment.Named.EXP) { | |
221 ExpInstrTypesSeen |= MI.getOpcode() == AMDGPU::EXP ? 1 : 2; | |
222 } | |
223 | |
224 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { | |
225 | |
226 MachineOperand &Op = MI.getOperand(i); | |
227 if (!isOpRelevant(Op)) | |
228 continue; | |
229 | |
230 RegInterval Interval = getRegInterval(Op); | |
231 for (unsigned j = Interval.first; j < Interval.second; ++j) { | |
232 | |
233 // Remember which registers we define | |
234 if (Op.isDef()) | |
235 DefinedRegs[j] = LastIssued; | |
236 | |
237 // and which one we are using | |
238 if (Op.isUse()) | |
239 UsedRegs[j] = LastIssued; | |
240 } | |
241 } | |
242 } | |
243 | |
244 bool SIInsertWaits::insertWait(MachineBasicBlock &MBB, | |
245 MachineBasicBlock::iterator I, | |
246 const Counters &Required) { | |
247 | |
248 // End of program? No need to wait on anything | |
249 if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM) | |
250 return false; | |
251 | |
252 // Figure out if the async instructions execute in order | |
253 bool Ordered[3]; | |
254 | |
255 // VM_CNT is always ordered | |
256 Ordered[0] = true; | |
257 | |
258 // EXP_CNT is unordered if we have both EXP & VM-writes | |
259 Ordered[1] = ExpInstrTypesSeen == 3; | |
260 | |
261 // LGKM_CNT is handled as always unordered. TODO: Handle LDS and GDS | |
262 Ordered[2] = false; | |
263 | |
264 // The values we are going to put into the S_WAITCNT instruction | |
265 Counters Counts = WaitCounts; | |
266 | |
267 // Do we really need to wait? | |
268 bool NeedWait = false; | |
269 | |
270 for (unsigned i = 0; i < 3; ++i) { | |
271 | |
272 if (Required.Array[i] <= WaitedOn.Array[i]) | |
273 continue; | |
274 | |
275 NeedWait = true; | |
276 | |
277 if (Ordered[i]) { | |
278 unsigned Value = LastIssued.Array[i] - Required.Array[i]; | |
279 | |
280 // adjust the value to the real hardware posibilities | |
281 Counts.Array[i] = std::min(Value, WaitCounts.Array[i]); | |
282 | |
283 } else | |
284 Counts.Array[i] = 0; | |
285 | |
286 // Remember on what we have waited on | |
287 WaitedOn.Array[i] = LastIssued.Array[i] - Counts.Array[i]; | |
288 } | |
289 | |
290 if (!NeedWait) | |
291 return false; | |
292 | |
293 // Reset EXP_CNT instruction types | |
294 if (Counts.Named.EXP == 0) | |
295 ExpInstrTypesSeen = 0; | |
296 | |
297 // Build the wait instruction | |
298 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)) | |
299 .addImm((Counts.Named.VM & 0xF) | | |
300 ((Counts.Named.EXP & 0x7) << 4) | | |
301 ((Counts.Named.LGKM & 0x7) << 8)); | |
302 | |
303 return true; | |
304 } | |
305 | |
306 /// \brief helper function for handleOperands | |
307 static void increaseCounters(Counters &Dst, const Counters &Src) { | |
308 | |
309 for (unsigned i = 0; i < 3; ++i) | |
310 Dst.Array[i] = std::max(Dst.Array[i], Src.Array[i]); | |
311 } | |
312 | |
313 Counters SIInsertWaits::handleOperands(MachineInstr &MI) { | |
314 | |
315 Counters Result = ZeroCounts; | |
316 | |
317 // For each register affected by this | |
318 // instruction increase the result sequence | |
319 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { | |
320 | |
321 MachineOperand &Op = MI.getOperand(i); | |
322 RegInterval Interval = getRegInterval(Op); | |
323 for (unsigned j = Interval.first; j < Interval.second; ++j) { | |
324 | |
325 if (Op.isDef()) { | |
326 increaseCounters(Result, UsedRegs[j]); | |
327 increaseCounters(Result, DefinedRegs[j]); | |
328 } | |
329 | |
330 if (Op.isUse()) | |
331 increaseCounters(Result, DefinedRegs[j]); | |
332 } | |
333 } | |
334 | |
335 return Result; | |
336 } | |
337 | |
338 bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) { | |
339 bool Changes = false; | |
340 | |
341 TII = static_cast<const SIInstrInfo*>(MF.getTarget().getInstrInfo()); | |
342 TRI = static_cast<const SIRegisterInfo*>(MF.getTarget().getRegisterInfo()); | |
343 | |
344 MRI = &MF.getRegInfo(); | |
345 | |
346 WaitedOn = ZeroCounts; | |
347 LastIssued = ZeroCounts; | |
348 | |
349 memset(&UsedRegs, 0, sizeof(UsedRegs)); | |
350 memset(&DefinedRegs, 0, sizeof(DefinedRegs)); | |
351 | |
352 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); | |
353 BI != BE; ++BI) { | |
354 | |
355 MachineBasicBlock &MBB = *BI; | |
356 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); | |
357 I != E; ++I) { | |
358 | |
359 Changes |= insertWait(MBB, I, handleOperands(*I)); | |
360 pushInstruction(*I); | |
361 } | |
362 | |
363 // Wait for everything at the end of the MBB | |
364 Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued); | |
365 } | |
366 | |
367 return Changes; | |
368 } |