annotate llvm/lib/Analysis/DivergenceAnalysis.cpp @ 220:42394fc6a535

Added tag llvm12 for changeset 0572611fdcc8
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Tue, 15 Jun 2021 19:13:43 +0900
parents 1d019706d866
children 2e18cbf3894f
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
150
anatofuz
parents:
diff changeset
1 //===- DivergenceAnalysis.cpp --------- Divergence Analysis Implementation -==//
anatofuz
parents:
diff changeset
2 //
anatofuz
parents:
diff changeset
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
anatofuz
parents:
diff changeset
4 // See https://llvm.org/LICENSE.txt for license information.
anatofuz
parents:
diff changeset
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
anatofuz
parents:
diff changeset
6 //
anatofuz
parents:
diff changeset
7 //===----------------------------------------------------------------------===//
anatofuz
parents:
diff changeset
8 //
anatofuz
parents:
diff changeset
9 // This file implements a general divergence analysis for loop vectorization
anatofuz
parents:
diff changeset
10 // and GPU programs. It determines which branches and values in a loop or GPU
anatofuz
parents:
diff changeset
11 // program are divergent. It can help branch optimizations such as jump
anatofuz
parents:
diff changeset
12 // threading and loop unswitching to make better decisions.
anatofuz
parents:
diff changeset
13 //
anatofuz
parents:
diff changeset
14 // GPU programs typically use the SIMD execution model, where multiple threads
anatofuz
parents:
diff changeset
15 // in the same execution group have to execute in lock-step. Therefore, if the
anatofuz
parents:
diff changeset
16 // code contains divergent branches (i.e., threads in a group do not agree on
anatofuz
parents:
diff changeset
17 // which path of the branch to take), the group of threads has to execute all
anatofuz
parents:
diff changeset
18 // the paths from that branch with different subsets of threads enabled until
anatofuz
parents:
diff changeset
19 // they re-converge.
anatofuz
parents:
diff changeset
20 //
anatofuz
parents:
diff changeset
21 // Due to this execution model, some optimizations such as jump
anatofuz
parents:
diff changeset
22 // threading and loop unswitching can interfere with thread re-convergence.
anatofuz
parents:
diff changeset
23 // Therefore, an analysis that computes which branches in a GPU program are
anatofuz
parents:
diff changeset
24 // divergent can help the compiler to selectively run these optimizations.
anatofuz
parents:
diff changeset
25 //
anatofuz
parents:
diff changeset
26 // This implementation is derived from the Vectorization Analysis of the
anatofuz
parents:
diff changeset
27 // Region Vectorizer (RV). That implementation in turn is based on the approach
anatofuz
parents:
diff changeset
28 // described in
anatofuz
parents:
diff changeset
29 //
anatofuz
parents:
diff changeset
30 // Improving Performance of OpenCL on CPUs
anatofuz
parents:
diff changeset
31 // Ralf Karrenberg and Sebastian Hack
anatofuz
parents:
diff changeset
32 // CC '12
anatofuz
parents:
diff changeset
33 //
anatofuz
parents:
diff changeset
34 // This DivergenceAnalysis implementation is generic in the sense that it does
anatofuz
parents:
diff changeset
35 // not itself identify original sources of divergence.
anatofuz
parents:
diff changeset
36 // Instead specialized adapter classes, (LoopDivergenceAnalysis) for loops and
anatofuz
parents:
diff changeset
37 // (GPUDivergenceAnalysis) for GPU programs, identify the sources of divergence
anatofuz
parents:
diff changeset
38 // (e.g., special variables that hold the thread ID or the iteration variable).
anatofuz
parents:
diff changeset
39 //
anatofuz
parents:
diff changeset
40 // The generic implementation propagates divergence to variables that are data
anatofuz
parents:
diff changeset
41 // or sync dependent on a source of divergence.
anatofuz
parents:
diff changeset
42 //
anatofuz
parents:
diff changeset
43 // While data dependency is a well-known concept, the notion of sync dependency
anatofuz
parents:
diff changeset
44 // is worth more explanation. Sync dependence characterizes the control flow
anatofuz
parents:
diff changeset
45 // aspect of the propagation of branch divergence. For example,
anatofuz
parents:
diff changeset
46 //
anatofuz
parents:
diff changeset
47 // %cond = icmp slt i32 %tid, 10
anatofuz
parents:
diff changeset
48 // br i1 %cond, label %then, label %else
anatofuz
parents:
diff changeset
49 // then:
anatofuz
parents:
diff changeset
50 // br label %merge
anatofuz
parents:
diff changeset
51 // else:
anatofuz
parents:
diff changeset
52 // br label %merge
anatofuz
parents:
diff changeset
53 // merge:
anatofuz
parents:
diff changeset
54 // %a = phi i32 [ 0, %then ], [ 1, %else ]
anatofuz
parents:
diff changeset
55 //
anatofuz
parents:
diff changeset
56 // Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
anatofuz
parents:
diff changeset
57 // because %tid is not on its use-def chains, %a is sync dependent on %tid
anatofuz
parents:
diff changeset
58 // because the branch "br i1 %cond" depends on %tid and affects which value %a
anatofuz
parents:
diff changeset
59 // is assigned to.
anatofuz
parents:
diff changeset
60 //
anatofuz
parents:
diff changeset
61 // The sync dependence detection (which branch induces divergence in which join
anatofuz
parents:
diff changeset
62 // points) is implemented in the SyncDependenceAnalysis.
anatofuz
parents:
diff changeset
63 //
anatofuz
parents:
diff changeset
64 // The current DivergenceAnalysis implementation has the following limitations:
anatofuz
parents:
diff changeset
65 // 1. intra-procedural. It conservatively considers the arguments of a
anatofuz
parents:
diff changeset
66 // non-kernel-entry function and the return value of a function call as
anatofuz
parents:
diff changeset
67 // divergent.
anatofuz
parents:
diff changeset
68 // 2. memory as black box. It conservatively considers values loaded from
anatofuz
parents:
diff changeset
69 // generic or local address as divergent. This can be improved by leveraging
anatofuz
parents:
diff changeset
70 // pointer analysis and/or by modelling non-escaping memory objects in SSA
anatofuz
parents:
diff changeset
71 // as done in RV.
anatofuz
parents:
diff changeset
72 //
anatofuz
parents:
diff changeset
73 //===----------------------------------------------------------------------===//
anatofuz
parents:
diff changeset
74
anatofuz
parents:
diff changeset
75 #include "llvm/Analysis/DivergenceAnalysis.h"
anatofuz
parents:
diff changeset
76 #include "llvm/Analysis/LoopInfo.h"
anatofuz
parents:
diff changeset
77 #include "llvm/Analysis/Passes.h"
anatofuz
parents:
diff changeset
78 #include "llvm/Analysis/PostDominators.h"
anatofuz
parents:
diff changeset
79 #include "llvm/Analysis/TargetTransformInfo.h"
anatofuz
parents:
diff changeset
80 #include "llvm/IR/Dominators.h"
anatofuz
parents:
diff changeset
81 #include "llvm/IR/InstIterator.h"
anatofuz
parents:
diff changeset
82 #include "llvm/IR/Instructions.h"
anatofuz
parents:
diff changeset
83 #include "llvm/IR/IntrinsicInst.h"
anatofuz
parents:
diff changeset
84 #include "llvm/IR/Value.h"
anatofuz
parents:
diff changeset
85 #include "llvm/Support/Debug.h"
anatofuz
parents:
diff changeset
86 #include "llvm/Support/raw_ostream.h"
anatofuz
parents:
diff changeset
87 #include <vector>
anatofuz
parents:
diff changeset
88
anatofuz
parents:
diff changeset
89 using namespace llvm;
anatofuz
parents:
diff changeset
90
anatofuz
parents:
diff changeset
91 #define DEBUG_TYPE "divergence-analysis"
anatofuz
parents:
diff changeset
92
anatofuz
parents:
diff changeset
93 // class DivergenceAnalysis
anatofuz
parents:
diff changeset
94 DivergenceAnalysis::DivergenceAnalysis(
anatofuz
parents:
diff changeset
95 const Function &F, const Loop *RegionLoop, const DominatorTree &DT,
anatofuz
parents:
diff changeset
96 const LoopInfo &LI, SyncDependenceAnalysis &SDA, bool IsLCSSAForm)
anatofuz
parents:
diff changeset
97 : F(F), RegionLoop(RegionLoop), DT(DT), LI(LI), SDA(SDA),
anatofuz
parents:
diff changeset
98 IsLCSSAForm(IsLCSSAForm) {}
anatofuz
parents:
diff changeset
99
anatofuz
parents:
diff changeset
100 void DivergenceAnalysis::markDivergent(const Value &DivVal) {
anatofuz
parents:
diff changeset
101 assert(isa<Instruction>(DivVal) || isa<Argument>(DivVal));
anatofuz
parents:
diff changeset
102 assert(!isAlwaysUniform(DivVal) && "cannot be a divergent");
anatofuz
parents:
diff changeset
103 DivergentValues.insert(&DivVal);
anatofuz
parents:
diff changeset
104 }
anatofuz
parents:
diff changeset
105
anatofuz
parents:
diff changeset
106 void DivergenceAnalysis::addUniformOverride(const Value &UniVal) {
anatofuz
parents:
diff changeset
107 UniformOverrides.insert(&UniVal);
anatofuz
parents:
diff changeset
108 }
anatofuz
parents:
diff changeset
109
anatofuz
parents:
diff changeset
110 bool DivergenceAnalysis::updateTerminator(const Instruction &Term) const {
anatofuz
parents:
diff changeset
111 if (Term.getNumSuccessors() <= 1)
anatofuz
parents:
diff changeset
112 return false;
anatofuz
parents:
diff changeset
113 if (auto *BranchTerm = dyn_cast<BranchInst>(&Term)) {
anatofuz
parents:
diff changeset
114 assert(BranchTerm->isConditional());
anatofuz
parents:
diff changeset
115 return isDivergent(*BranchTerm->getCondition());
anatofuz
parents:
diff changeset
116 }
anatofuz
parents:
diff changeset
117 if (auto *SwitchTerm = dyn_cast<SwitchInst>(&Term)) {
anatofuz
parents:
diff changeset
118 return isDivergent(*SwitchTerm->getCondition());
anatofuz
parents:
diff changeset
119 }
anatofuz
parents:
diff changeset
120 if (isa<InvokeInst>(Term)) {
anatofuz
parents:
diff changeset
121 return false; // ignore abnormal executions through landingpad
anatofuz
parents:
diff changeset
122 }
anatofuz
parents:
diff changeset
123
anatofuz
parents:
diff changeset
124 llvm_unreachable("unexpected terminator");
anatofuz
parents:
diff changeset
125 }
anatofuz
parents:
diff changeset
126
anatofuz
parents:
diff changeset
127 bool DivergenceAnalysis::updateNormalInstruction(const Instruction &I) const {
anatofuz
parents:
diff changeset
128 // TODO function calls with side effects, etc
anatofuz
parents:
diff changeset
129 for (const auto &Op : I.operands()) {
anatofuz
parents:
diff changeset
130 if (isDivergent(*Op))
anatofuz
parents:
diff changeset
131 return true;
anatofuz
parents:
diff changeset
132 }
anatofuz
parents:
diff changeset
133 return false;
anatofuz
parents:
diff changeset
134 }
anatofuz
parents:
diff changeset
135
anatofuz
parents:
diff changeset
136 bool DivergenceAnalysis::isTemporalDivergent(const BasicBlock &ObservingBlock,
anatofuz
parents:
diff changeset
137 const Value &Val) const {
anatofuz
parents:
diff changeset
138 const auto *Inst = dyn_cast<const Instruction>(&Val);
anatofuz
parents:
diff changeset
139 if (!Inst)
anatofuz
parents:
diff changeset
140 return false;
anatofuz
parents:
diff changeset
141 // check whether any divergent loop carrying Val terminates before control
anatofuz
parents:
diff changeset
142 // proceeds to ObservingBlock
anatofuz
parents:
diff changeset
143 for (const auto *Loop = LI.getLoopFor(Inst->getParent());
anatofuz
parents:
diff changeset
144 Loop != RegionLoop && !Loop->contains(&ObservingBlock);
anatofuz
parents:
diff changeset
145 Loop = Loop->getParentLoop()) {
anatofuz
parents:
diff changeset
146 if (DivergentLoops.find(Loop) != DivergentLoops.end())
anatofuz
parents:
diff changeset
147 return true;
anatofuz
parents:
diff changeset
148 }
anatofuz
parents:
diff changeset
149
anatofuz
parents:
diff changeset
150 return false;
anatofuz
parents:
diff changeset
151 }
anatofuz
parents:
diff changeset
152
anatofuz
parents:
diff changeset
153 bool DivergenceAnalysis::updatePHINode(const PHINode &Phi) const {
anatofuz
parents:
diff changeset
154 // joining divergent disjoint path in Phi parent block
anatofuz
parents:
diff changeset
155 if (!Phi.hasConstantOrUndefValue() && isJoinDivergent(*Phi.getParent())) {
anatofuz
parents:
diff changeset
156 return true;
anatofuz
parents:
diff changeset
157 }
anatofuz
parents:
diff changeset
158
anatofuz
parents:
diff changeset
159 // An incoming value could be divergent by itself.
anatofuz
parents:
diff changeset
160 // Otherwise, an incoming value could be uniform within the loop
anatofuz
parents:
diff changeset
161 // that carries its definition but it may appear divergent
anatofuz
parents:
diff changeset
162 // from outside the loop. This happens when divergent loop exits
anatofuz
parents:
diff changeset
163 // drop definitions of that uniform value in different iterations.
anatofuz
parents:
diff changeset
164 //
anatofuz
parents:
diff changeset
165 // for (int i = 0; i < n; ++i) { // 'i' is uniform inside the loop
anatofuz
parents:
diff changeset
166 // if (i % thread_id == 0) break; // divergent loop exit
anatofuz
parents:
diff changeset
167 // }
anatofuz
parents:
diff changeset
168 // int divI = i; // divI is divergent
anatofuz
parents:
diff changeset
169 for (size_t i = 0; i < Phi.getNumIncomingValues(); ++i) {
anatofuz
parents:
diff changeset
170 const auto *InVal = Phi.getIncomingValue(i);
anatofuz
parents:
diff changeset
171 if (isDivergent(*Phi.getIncomingValue(i)) ||
anatofuz
parents:
diff changeset
172 isTemporalDivergent(*Phi.getParent(), *InVal)) {
anatofuz
parents:
diff changeset
173 return true;
anatofuz
parents:
diff changeset
174 }
anatofuz
parents:
diff changeset
175 }
anatofuz
parents:
diff changeset
176 return false;
anatofuz
parents:
diff changeset
177 }
anatofuz
parents:
diff changeset
178
anatofuz
parents:
diff changeset
179 bool DivergenceAnalysis::inRegion(const Instruction &I) const {
anatofuz
parents:
diff changeset
180 return I.getParent() && inRegion(*I.getParent());
anatofuz
parents:
diff changeset
181 }
anatofuz
parents:
diff changeset
182
anatofuz
parents:
diff changeset
183 bool DivergenceAnalysis::inRegion(const BasicBlock &BB) const {
anatofuz
parents:
diff changeset
184 return (!RegionLoop && BB.getParent() == &F) || RegionLoop->contains(&BB);
anatofuz
parents:
diff changeset
185 }
anatofuz
parents:
diff changeset
186
anatofuz
parents:
diff changeset
187 // marks all users of loop-carried values of the loop headed by LoopHeader as
anatofuz
parents:
diff changeset
188 // divergent
anatofuz
parents:
diff changeset
189 void DivergenceAnalysis::taintLoopLiveOuts(const BasicBlock &LoopHeader) {
anatofuz
parents:
diff changeset
190 auto *DivLoop = LI.getLoopFor(&LoopHeader);
anatofuz
parents:
diff changeset
191 assert(DivLoop && "loopHeader is not actually part of a loop");
anatofuz
parents:
diff changeset
192
anatofuz
parents:
diff changeset
193 SmallVector<BasicBlock *, 8> TaintStack;
anatofuz
parents:
diff changeset
194 DivLoop->getExitBlocks(TaintStack);
anatofuz
parents:
diff changeset
195
anatofuz
parents:
diff changeset
196 // Otherwise potential users of loop-carried values could be anywhere in the
anatofuz
parents:
diff changeset
197 // dominance region of DivLoop (including its fringes for phi nodes)
anatofuz
parents:
diff changeset
198 DenseSet<const BasicBlock *> Visited;
anatofuz
parents:
diff changeset
199 for (auto *Block : TaintStack) {
anatofuz
parents:
diff changeset
200 Visited.insert(Block);
anatofuz
parents:
diff changeset
201 }
anatofuz
parents:
diff changeset
202 Visited.insert(&LoopHeader);
anatofuz
parents:
diff changeset
203
anatofuz
parents:
diff changeset
204 while (!TaintStack.empty()) {
anatofuz
parents:
diff changeset
205 auto *UserBlock = TaintStack.back();
anatofuz
parents:
diff changeset
206 TaintStack.pop_back();
anatofuz
parents:
diff changeset
207
anatofuz
parents:
diff changeset
208 // don't spread divergence beyond the region
anatofuz
parents:
diff changeset
209 if (!inRegion(*UserBlock))
anatofuz
parents:
diff changeset
210 continue;
anatofuz
parents:
diff changeset
211
anatofuz
parents:
diff changeset
212 assert(!DivLoop->contains(UserBlock) &&
anatofuz
parents:
diff changeset
213 "irreducible control flow detected");
anatofuz
parents:
diff changeset
214
anatofuz
parents:
diff changeset
215 // phi nodes at the fringes of the dominance region
anatofuz
parents:
diff changeset
216 if (!DT.dominates(&LoopHeader, UserBlock)) {
anatofuz
parents:
diff changeset
217 // all PHI nodes of UserBlock become divergent
anatofuz
parents:
diff changeset
218 for (auto &Phi : UserBlock->phis()) {
anatofuz
parents:
diff changeset
219 Worklist.push_back(&Phi);
anatofuz
parents:
diff changeset
220 }
anatofuz
parents:
diff changeset
221 continue;
anatofuz
parents:
diff changeset
222 }
anatofuz
parents:
diff changeset
223
anatofuz
parents:
diff changeset
224 // taint outside users of values carried by DivLoop
anatofuz
parents:
diff changeset
225 for (auto &I : *UserBlock) {
anatofuz
parents:
diff changeset
226 if (isAlwaysUniform(I))
anatofuz
parents:
diff changeset
227 continue;
anatofuz
parents:
diff changeset
228 if (isDivergent(I))
anatofuz
parents:
diff changeset
229 continue;
anatofuz
parents:
diff changeset
230
anatofuz
parents:
diff changeset
231 for (auto &Op : I.operands()) {
anatofuz
parents:
diff changeset
232 auto *OpInst = dyn_cast<Instruction>(&Op);
anatofuz
parents:
diff changeset
233 if (!OpInst)
anatofuz
parents:
diff changeset
234 continue;
anatofuz
parents:
diff changeset
235 if (DivLoop->contains(OpInst->getParent())) {
anatofuz
parents:
diff changeset
236 markDivergent(I);
anatofuz
parents:
diff changeset
237 pushUsers(I);
anatofuz
parents:
diff changeset
238 break;
anatofuz
parents:
diff changeset
239 }
anatofuz
parents:
diff changeset
240 }
anatofuz
parents:
diff changeset
241 }
anatofuz
parents:
diff changeset
242
anatofuz
parents:
diff changeset
243 // visit all blocks in the dominance region
anatofuz
parents:
diff changeset
244 for (auto *SuccBlock : successors(UserBlock)) {
anatofuz
parents:
diff changeset
245 if (!Visited.insert(SuccBlock).second) {
anatofuz
parents:
diff changeset
246 continue;
anatofuz
parents:
diff changeset
247 }
anatofuz
parents:
diff changeset
248 TaintStack.push_back(SuccBlock);
anatofuz
parents:
diff changeset
249 }
anatofuz
parents:
diff changeset
250 }
anatofuz
parents:
diff changeset
251 }
anatofuz
parents:
diff changeset
252
anatofuz
parents:
diff changeset
253 void DivergenceAnalysis::pushPHINodes(const BasicBlock &Block) {
anatofuz
parents:
diff changeset
254 for (const auto &Phi : Block.phis()) {
anatofuz
parents:
diff changeset
255 if (isDivergent(Phi))
anatofuz
parents:
diff changeset
256 continue;
anatofuz
parents:
diff changeset
257 Worklist.push_back(&Phi);
anatofuz
parents:
diff changeset
258 }
anatofuz
parents:
diff changeset
259 }
anatofuz
parents:
diff changeset
260
anatofuz
parents:
diff changeset
261 void DivergenceAnalysis::pushUsers(const Value &V) {
anatofuz
parents:
diff changeset
262 for (const auto *User : V.users()) {
anatofuz
parents:
diff changeset
263 const auto *UserInst = dyn_cast<const Instruction>(User);
anatofuz
parents:
diff changeset
264 if (!UserInst)
anatofuz
parents:
diff changeset
265 continue;
anatofuz
parents:
diff changeset
266
anatofuz
parents:
diff changeset
267 if (isDivergent(*UserInst))
anatofuz
parents:
diff changeset
268 continue;
anatofuz
parents:
diff changeset
269
anatofuz
parents:
diff changeset
270 // only compute divergent inside loop
anatofuz
parents:
diff changeset
271 if (!inRegion(*UserInst))
anatofuz
parents:
diff changeset
272 continue;
anatofuz
parents:
diff changeset
273 Worklist.push_back(UserInst);
anatofuz
parents:
diff changeset
274 }
anatofuz
parents:
diff changeset
275 }
anatofuz
parents:
diff changeset
276
anatofuz
parents:
diff changeset
277 bool DivergenceAnalysis::propagateJoinDivergence(const BasicBlock &JoinBlock,
anatofuz
parents:
diff changeset
278 const Loop *BranchLoop) {
anatofuz
parents:
diff changeset
279 LLVM_DEBUG(dbgs() << "\tpropJoinDiv " << JoinBlock.getName() << "\n");
anatofuz
parents:
diff changeset
280
anatofuz
parents:
diff changeset
281 // ignore divergence outside the region
anatofuz
parents:
diff changeset
282 if (!inRegion(JoinBlock)) {
anatofuz
parents:
diff changeset
283 return false;
anatofuz
parents:
diff changeset
284 }
anatofuz
parents:
diff changeset
285
anatofuz
parents:
diff changeset
286 // push non-divergent phi nodes in JoinBlock to the worklist
anatofuz
parents:
diff changeset
287 pushPHINodes(JoinBlock);
anatofuz
parents:
diff changeset
288
anatofuz
parents:
diff changeset
289 // JoinBlock is a divergent loop exit
anatofuz
parents:
diff changeset
290 if (BranchLoop && !BranchLoop->contains(&JoinBlock)) {
anatofuz
parents:
diff changeset
291 return true;
anatofuz
parents:
diff changeset
292 }
anatofuz
parents:
diff changeset
293
anatofuz
parents:
diff changeset
294 // disjoint-paths divergent at JoinBlock
anatofuz
parents:
diff changeset
295 markBlockJoinDivergent(JoinBlock);
anatofuz
parents:
diff changeset
296 return false;
anatofuz
parents:
diff changeset
297 }
anatofuz
parents:
diff changeset
298
anatofuz
parents:
diff changeset
299 void DivergenceAnalysis::propagateBranchDivergence(const Instruction &Term) {
anatofuz
parents:
diff changeset
300 LLVM_DEBUG(dbgs() << "propBranchDiv " << Term.getParent()->getName() << "\n");
anatofuz
parents:
diff changeset
301
anatofuz
parents:
diff changeset
302 markDivergent(Term);
anatofuz
parents:
diff changeset
303
anatofuz
parents:
diff changeset
304 // Don't propagate divergence from unreachable blocks.
anatofuz
parents:
diff changeset
305 if (!DT.isReachableFromEntry(Term.getParent()))
anatofuz
parents:
diff changeset
306 return;
anatofuz
parents:
diff changeset
307
anatofuz
parents:
diff changeset
308 const auto *BranchLoop = LI.getLoopFor(Term.getParent());
anatofuz
parents:
diff changeset
309
anatofuz
parents:
diff changeset
310 // whether there is a divergent loop exit from BranchLoop (if any)
anatofuz
parents:
diff changeset
311 bool IsBranchLoopDivergent = false;
anatofuz
parents:
diff changeset
312
anatofuz
parents:
diff changeset
313 // iterate over all blocks reachable by disjoint from Term within the loop
anatofuz
parents:
diff changeset
314 // also iterates over loop exits that become divergent due to Term.
anatofuz
parents:
diff changeset
315 for (const auto *JoinBlock : SDA.join_blocks(Term)) {
anatofuz
parents:
diff changeset
316 IsBranchLoopDivergent |= propagateJoinDivergence(*JoinBlock, BranchLoop);
anatofuz
parents:
diff changeset
317 }
anatofuz
parents:
diff changeset
318
anatofuz
parents:
diff changeset
319 // Branch loop is a divergent loop due to the divergent branch in Term
anatofuz
parents:
diff changeset
320 if (IsBranchLoopDivergent) {
anatofuz
parents:
diff changeset
321 assert(BranchLoop);
anatofuz
parents:
diff changeset
322 if (!DivergentLoops.insert(BranchLoop).second) {
anatofuz
parents:
diff changeset
323 return;
anatofuz
parents:
diff changeset
324 }
anatofuz
parents:
diff changeset
325 propagateLoopDivergence(*BranchLoop);
anatofuz
parents:
diff changeset
326 }
anatofuz
parents:
diff changeset
327 }
anatofuz
parents:
diff changeset
328
anatofuz
parents:
diff changeset
329 void DivergenceAnalysis::propagateLoopDivergence(const Loop &ExitingLoop) {
anatofuz
parents:
diff changeset
330 LLVM_DEBUG(dbgs() << "propLoopDiv " << ExitingLoop.getName() << "\n");
anatofuz
parents:
diff changeset
331
anatofuz
parents:
diff changeset
332 // don't propagate beyond region
anatofuz
parents:
diff changeset
333 if (!inRegion(*ExitingLoop.getHeader()))
anatofuz
parents:
diff changeset
334 return;
anatofuz
parents:
diff changeset
335
anatofuz
parents:
diff changeset
336 const auto *BranchLoop = ExitingLoop.getParentLoop();
anatofuz
parents:
diff changeset
337
anatofuz
parents:
diff changeset
338 // Uses of loop-carried values could occur anywhere
anatofuz
parents:
diff changeset
339 // within the dominance region of the definition. All loop-carried
anatofuz
parents:
diff changeset
340 // definitions are dominated by the loop header (reducible control).
anatofuz
parents:
diff changeset
341 // Thus all users have to be in the dominance region of the loop header,
anatofuz
parents:
diff changeset
342 // except PHI nodes that can also live at the fringes of the dom region
anatofuz
parents:
diff changeset
343 // (incoming defining value).
anatofuz
parents:
diff changeset
344 if (!IsLCSSAForm)
anatofuz
parents:
diff changeset
345 taintLoopLiveOuts(*ExitingLoop.getHeader());
anatofuz
parents:
diff changeset
346
anatofuz
parents:
diff changeset
347 // whether there is a divergent loop exit from BranchLoop (if any)
anatofuz
parents:
diff changeset
348 bool IsBranchLoopDivergent = false;
anatofuz
parents:
diff changeset
349
anatofuz
parents:
diff changeset
350 // iterate over all blocks reachable by disjoint paths from exits of
anatofuz
parents:
diff changeset
351 // ExitingLoop also iterates over loop exits (of BranchLoop) that in turn
anatofuz
parents:
diff changeset
352 // become divergent.
anatofuz
parents:
diff changeset
353 for (const auto *JoinBlock : SDA.join_blocks(ExitingLoop)) {
anatofuz
parents:
diff changeset
354 IsBranchLoopDivergent |= propagateJoinDivergence(*JoinBlock, BranchLoop);
anatofuz
parents:
diff changeset
355 }
anatofuz
parents:
diff changeset
356
anatofuz
parents:
diff changeset
357 // Branch loop is a divergent due to divergent loop exit in ExitingLoop
anatofuz
parents:
diff changeset
358 if (IsBranchLoopDivergent) {
anatofuz
parents:
diff changeset
359 assert(BranchLoop);
anatofuz
parents:
diff changeset
360 if (!DivergentLoops.insert(BranchLoop).second) {
anatofuz
parents:
diff changeset
361 return;
anatofuz
parents:
diff changeset
362 }
anatofuz
parents:
diff changeset
363 propagateLoopDivergence(*BranchLoop);
anatofuz
parents:
diff changeset
364 }
anatofuz
parents:
diff changeset
365 }
anatofuz
parents:
diff changeset
366
anatofuz
parents:
diff changeset
367 void DivergenceAnalysis::compute() {
anatofuz
parents:
diff changeset
368 for (auto *DivVal : DivergentValues) {
anatofuz
parents:
diff changeset
369 pushUsers(*DivVal);
anatofuz
parents:
diff changeset
370 }
anatofuz
parents:
diff changeset
371
anatofuz
parents:
diff changeset
372 // propagate divergence
anatofuz
parents:
diff changeset
373 while (!Worklist.empty()) {
anatofuz
parents:
diff changeset
374 const Instruction &I = *Worklist.back();
anatofuz
parents:
diff changeset
375 Worklist.pop_back();
anatofuz
parents:
diff changeset
376
anatofuz
parents:
diff changeset
377 // maintain uniformity of overrides
anatofuz
parents:
diff changeset
378 if (isAlwaysUniform(I))
anatofuz
parents:
diff changeset
379 continue;
anatofuz
parents:
diff changeset
380
anatofuz
parents:
diff changeset
381 bool WasDivergent = isDivergent(I);
anatofuz
parents:
diff changeset
382 if (WasDivergent)
anatofuz
parents:
diff changeset
383 continue;
anatofuz
parents:
diff changeset
384
anatofuz
parents:
diff changeset
385 // propagate divergence caused by terminator
anatofuz
parents:
diff changeset
386 if (I.isTerminator()) {
anatofuz
parents:
diff changeset
387 if (updateTerminator(I)) {
anatofuz
parents:
diff changeset
388 // propagate control divergence to affected instructions
anatofuz
parents:
diff changeset
389 propagateBranchDivergence(I);
anatofuz
parents:
diff changeset
390 continue;
anatofuz
parents:
diff changeset
391 }
anatofuz
parents:
diff changeset
392 }
anatofuz
parents:
diff changeset
393
anatofuz
parents:
diff changeset
394 // update divergence of I due to divergent operands
anatofuz
parents:
diff changeset
395 bool DivergentUpd = false;
anatofuz
parents:
diff changeset
396 const auto *Phi = dyn_cast<const PHINode>(&I);
anatofuz
parents:
diff changeset
397 if (Phi) {
anatofuz
parents:
diff changeset
398 DivergentUpd = updatePHINode(*Phi);
anatofuz
parents:
diff changeset
399 } else {
anatofuz
parents:
diff changeset
400 DivergentUpd = updateNormalInstruction(I);
anatofuz
parents:
diff changeset
401 }
anatofuz
parents:
diff changeset
402
anatofuz
parents:
diff changeset
403 // propagate value divergence to users
anatofuz
parents:
diff changeset
404 if (DivergentUpd) {
anatofuz
parents:
diff changeset
405 markDivergent(I);
anatofuz
parents:
diff changeset
406 pushUsers(I);
anatofuz
parents:
diff changeset
407 }
anatofuz
parents:
diff changeset
408 }
anatofuz
parents:
diff changeset
409 }
anatofuz
parents:
diff changeset
410
anatofuz
parents:
diff changeset
411 bool DivergenceAnalysis::isAlwaysUniform(const Value &V) const {
anatofuz
parents:
diff changeset
412 return UniformOverrides.find(&V) != UniformOverrides.end();
anatofuz
parents:
diff changeset
413 }
anatofuz
parents:
diff changeset
414
anatofuz
parents:
diff changeset
415 bool DivergenceAnalysis::isDivergent(const Value &V) const {
anatofuz
parents:
diff changeset
416 return DivergentValues.find(&V) != DivergentValues.end();
anatofuz
parents:
diff changeset
417 }
anatofuz
parents:
diff changeset
418
anatofuz
parents:
diff changeset
419 bool DivergenceAnalysis::isDivergentUse(const Use &U) const {
anatofuz
parents:
diff changeset
420 Value &V = *U.get();
anatofuz
parents:
diff changeset
421 Instruction &I = *cast<Instruction>(U.getUser());
anatofuz
parents:
diff changeset
422 return isDivergent(V) || isTemporalDivergent(*I.getParent(), V);
anatofuz
parents:
diff changeset
423 }
anatofuz
parents:
diff changeset
424
anatofuz
parents:
diff changeset
425 void DivergenceAnalysis::print(raw_ostream &OS, const Module *) const {
anatofuz
parents:
diff changeset
426 if (DivergentValues.empty())
anatofuz
parents:
diff changeset
427 return;
anatofuz
parents:
diff changeset
428 // iterate instructions using instructions() to ensure a deterministic order.
anatofuz
parents:
diff changeset
429 for (auto &I : instructions(F)) {
anatofuz
parents:
diff changeset
430 if (isDivergent(I))
anatofuz
parents:
diff changeset
431 OS << "DIVERGENT:" << I << '\n';
anatofuz
parents:
diff changeset
432 }
anatofuz
parents:
diff changeset
433 }
anatofuz
parents:
diff changeset
434
anatofuz
parents:
diff changeset
435 // class GPUDivergenceAnalysis
anatofuz
parents:
diff changeset
436 GPUDivergenceAnalysis::GPUDivergenceAnalysis(Function &F,
anatofuz
parents:
diff changeset
437 const DominatorTree &DT,
anatofuz
parents:
diff changeset
438 const PostDominatorTree &PDT,
anatofuz
parents:
diff changeset
439 const LoopInfo &LI,
anatofuz
parents:
diff changeset
440 const TargetTransformInfo &TTI)
anatofuz
parents:
diff changeset
441 : SDA(DT, PDT, LI), DA(F, nullptr, DT, LI, SDA, false) {
anatofuz
parents:
diff changeset
442 for (auto &I : instructions(F)) {
anatofuz
parents:
diff changeset
443 if (TTI.isSourceOfDivergence(&I)) {
anatofuz
parents:
diff changeset
444 DA.markDivergent(I);
anatofuz
parents:
diff changeset
445 } else if (TTI.isAlwaysUniform(&I)) {
anatofuz
parents:
diff changeset
446 DA.addUniformOverride(I);
anatofuz
parents:
diff changeset
447 }
anatofuz
parents:
diff changeset
448 }
anatofuz
parents:
diff changeset
449 for (auto &Arg : F.args()) {
anatofuz
parents:
diff changeset
450 if (TTI.isSourceOfDivergence(&Arg)) {
anatofuz
parents:
diff changeset
451 DA.markDivergent(Arg);
anatofuz
parents:
diff changeset
452 }
anatofuz
parents:
diff changeset
453 }
anatofuz
parents:
diff changeset
454
anatofuz
parents:
diff changeset
455 DA.compute();
anatofuz
parents:
diff changeset
456 }
anatofuz
parents:
diff changeset
457
anatofuz
parents:
diff changeset
458 bool GPUDivergenceAnalysis::isDivergent(const Value &val) const {
anatofuz
parents:
diff changeset
459 return DA.isDivergent(val);
anatofuz
parents:
diff changeset
460 }
anatofuz
parents:
diff changeset
461
anatofuz
parents:
diff changeset
462 bool GPUDivergenceAnalysis::isDivergentUse(const Use &use) const {
anatofuz
parents:
diff changeset
463 return DA.isDivergentUse(use);
anatofuz
parents:
diff changeset
464 }
anatofuz
parents:
diff changeset
465
anatofuz
parents:
diff changeset
466 void GPUDivergenceAnalysis::print(raw_ostream &OS, const Module *mod) const {
anatofuz
parents:
diff changeset
467 OS << "Divergence of kernel " << DA.getFunction().getName() << " {\n";
anatofuz
parents:
diff changeset
468 DA.print(OS, mod);
anatofuz
parents:
diff changeset
469 OS << "}\n";
anatofuz
parents:
diff changeset
470 }