150
|
1 //===- SyncDependenceAnalysis.cpp - Divergent Branch Dependence Calculation
|
|
2 //--===//
|
|
3 //
|
|
4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
5 // See https://llvm.org/LICENSE.txt for license information.
|
|
6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
7 //
|
|
8 //===----------------------------------------------------------------------===//
|
|
9 //
|
|
10 // This file implements an algorithm that returns for a divergent branch
|
|
11 // the set of basic blocks whose phi nodes become divergent due to divergent
|
|
12 // control. These are the blocks that are reachable by two disjoint paths from
|
|
13 // the branch or loop exits that have a reaching path that is disjoint from a
|
|
14 // path to the loop latch.
|
|
15 //
|
|
16 // The SyncDependenceAnalysis is used in the DivergenceAnalysis to model
|
|
17 // control-induced divergence in phi nodes.
|
|
18 //
|
|
19 // -- Summary --
|
|
20 // The SyncDependenceAnalysis lazily computes sync dependences [3].
|
|
21 // The analysis evaluates the disjoint path criterion [2] by a reduction
|
|
22 // to SSA construction. The SSA construction algorithm is implemented as
|
|
23 // a simple data-flow analysis [1].
|
|
24 //
|
|
25 // [1] "A Simple, Fast Dominance Algorithm", SPI '01, Cooper, Harvey and Kennedy
|
|
26 // [2] "Efficiently Computing Static Single Assignment Form
|
|
27 // and the Control Dependence Graph", TOPLAS '91,
|
|
28 // Cytron, Ferrante, Rosen, Wegman and Zadeck
|
|
29 // [3] "Improving Performance of OpenCL on CPUs", CC '12, Karrenberg and Hack
|
|
30 // [4] "Divergence Analysis", TOPLAS '13, Sampaio, Souza, Collange and Pereira
|
|
31 //
|
|
32 // -- Sync dependence --
|
|
33 // Sync dependence [4] characterizes the control flow aspect of the
|
|
34 // propagation of branch divergence. For example,
|
|
35 //
|
|
36 // %cond = icmp slt i32 %tid, 10
|
|
37 // br i1 %cond, label %then, label %else
|
|
38 // then:
|
|
39 // br label %merge
|
|
40 // else:
|
|
41 // br label %merge
|
|
42 // merge:
|
|
43 // %a = phi i32 [ 0, %then ], [ 1, %else ]
|
|
44 //
|
|
45 // Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
|
|
46 // because %tid is not on its use-def chains, %a is sync dependent on %tid
|
|
47 // because the branch "br i1 %cond" depends on %tid and affects which value %a
|
|
48 // is assigned to.
|
|
49 //
|
|
50 // -- Reduction to SSA construction --
|
|
51 // There are two disjoint paths from A to X, if a certain variant of SSA
|
|
52 // construction places a phi node in X under the following set-up scheme [2].
|
|
53 //
|
|
54 // This variant of SSA construction ignores incoming undef values.
|
|
55 // That is paths from the entry without a definition do not result in
|
|
56 // phi nodes.
|
|
57 //
|
|
58 // entry
|
|
59 // / \
|
|
60 // A \
|
|
61 // / \ Y
|
|
62 // B C /
|
|
63 // \ / \ /
|
|
64 // D E
|
|
65 // \ /
|
|
66 // F
|
|
67 // Assume that A contains a divergent branch. We are interested
|
|
68 // in the set of all blocks where each block is reachable from A
|
|
69 // via two disjoint paths. This would be the set {D, F} in this
|
|
70 // case.
|
|
71 // To generally reduce this query to SSA construction we introduce
|
|
72 // a virtual variable x and assign to x different values in each
|
|
73 // successor block of A.
|
|
74 // entry
|
|
75 // / \
|
|
76 // A \
|
|
77 // / \ Y
|
|
78 // x = 0 x = 1 /
|
|
79 // \ / \ /
|
|
80 // D E
|
|
81 // \ /
|
|
82 // F
|
|
83 // Our flavor of SSA construction for x will construct the following
|
|
84 // entry
|
|
85 // / \
|
|
86 // A \
|
|
87 // / \ Y
|
|
88 // x0 = 0 x1 = 1 /
|
|
89 // \ / \ /
|
|
90 // x2=phi E
|
|
91 // \ /
|
|
92 // x3=phi
|
|
93 // The blocks D and F contain phi nodes and are thus each reachable
|
|
94 // by two disjoins paths from A.
|
|
95 //
|
|
96 // -- Remarks --
|
|
97 // In case of loop exits we need to check the disjoint path criterion for loops
|
|
98 // [2]. To this end, we check whether the definition of x differs between the
|
|
99 // loop exit and the loop header (_after_ SSA construction).
|
|
100 //
|
|
101 //===----------------------------------------------------------------------===//
|
|
102 #include "llvm/ADT/PostOrderIterator.h"
|
|
103 #include "llvm/ADT/SmallPtrSet.h"
|
|
104 #include "llvm/Analysis/PostDominators.h"
|
|
105 #include "llvm/Analysis/SyncDependenceAnalysis.h"
|
|
106 #include "llvm/IR/BasicBlock.h"
|
|
107 #include "llvm/IR/CFG.h"
|
|
108 #include "llvm/IR/Dominators.h"
|
|
109 #include "llvm/IR/Function.h"
|
|
110
|
|
111 #include <stack>
|
|
112 #include <unordered_set>
|
|
113
|
|
114 #define DEBUG_TYPE "sync-dependence"
|
|
115
|
|
116 namespace llvm {
|
|
117
|
|
118 ConstBlockSet SyncDependenceAnalysis::EmptyBlockSet;
|
|
119
|
|
120 SyncDependenceAnalysis::SyncDependenceAnalysis(const DominatorTree &DT,
|
|
121 const PostDominatorTree &PDT,
|
|
122 const LoopInfo &LI)
|
|
123 : FuncRPOT(DT.getRoot()->getParent()), DT(DT), PDT(PDT), LI(LI) {}
|
|
124
|
|
125 SyncDependenceAnalysis::~SyncDependenceAnalysis() {}
|
|
126
|
|
127 using FunctionRPOT = ReversePostOrderTraversal<const Function *>;
|
|
128
|
|
129 // divergence propagator for reducible CFGs
|
|
130 struct DivergencePropagator {
|
|
131 const FunctionRPOT &FuncRPOT;
|
|
132 const DominatorTree &DT;
|
|
133 const PostDominatorTree &PDT;
|
|
134 const LoopInfo &LI;
|
|
135
|
|
136 // identified join points
|
|
137 std::unique_ptr<ConstBlockSet> JoinBlocks;
|
|
138
|
|
139 // reached loop exits (by a path disjoint to a path to the loop header)
|
|
140 SmallPtrSet<const BasicBlock *, 4> ReachedLoopExits;
|
|
141
|
|
142 // if DefMap[B] == C then C is the dominating definition at block B
|
|
143 // if DefMap[B] ~ undef then we haven't seen B yet
|
|
144 // if DefMap[B] == B then B is a join point of disjoint paths from X or B is
|
|
145 // an immediate successor of X (initial value).
|
|
146 using DefiningBlockMap = std::map<const BasicBlock *, const BasicBlock *>;
|
|
147 DefiningBlockMap DefMap;
|
|
148
|
|
149 // all blocks with pending visits
|
|
150 std::unordered_set<const BasicBlock *> PendingUpdates;
|
|
151
|
|
152 DivergencePropagator(const FunctionRPOT &FuncRPOT, const DominatorTree &DT,
|
|
153 const PostDominatorTree &PDT, const LoopInfo &LI)
|
|
154 : FuncRPOT(FuncRPOT), DT(DT), PDT(PDT), LI(LI),
|
|
155 JoinBlocks(new ConstBlockSet) {}
|
|
156
|
|
157 // set the definition at @block and mark @block as pending for a visit
|
|
158 void addPending(const BasicBlock &Block, const BasicBlock &DefBlock) {
|
|
159 bool WasAdded = DefMap.emplace(&Block, &DefBlock).second;
|
|
160 if (WasAdded)
|
|
161 PendingUpdates.insert(&Block);
|
|
162 }
|
|
163
|
|
164 void printDefs(raw_ostream &Out) {
|
|
165 Out << "Propagator::DefMap {\n";
|
|
166 for (const auto *Block : FuncRPOT) {
|
|
167 auto It = DefMap.find(Block);
|
|
168 Out << Block->getName() << " : ";
|
|
169 if (It == DefMap.end()) {
|
|
170 Out << "\n";
|
|
171 } else {
|
|
172 const auto *DefBlock = It->second;
|
|
173 Out << (DefBlock ? DefBlock->getName() : "<null>") << "\n";
|
|
174 }
|
|
175 }
|
|
176 Out << "}\n";
|
|
177 }
|
|
178
|
|
179 // process @succBlock with reaching definition @defBlock
|
|
180 // the original divergent branch was in @parentLoop (if any)
|
|
181 void visitSuccessor(const BasicBlock &SuccBlock, const Loop *ParentLoop,
|
|
182 const BasicBlock &DefBlock) {
|
|
183
|
|
184 // @succBlock is a loop exit
|
|
185 if (ParentLoop && !ParentLoop->contains(&SuccBlock)) {
|
|
186 DefMap.emplace(&SuccBlock, &DefBlock);
|
|
187 ReachedLoopExits.insert(&SuccBlock);
|
|
188 return;
|
|
189 }
|
|
190
|
|
191 // first reaching def?
|
|
192 auto ItLastDef = DefMap.find(&SuccBlock);
|
|
193 if (ItLastDef == DefMap.end()) {
|
|
194 addPending(SuccBlock, DefBlock);
|
|
195 return;
|
|
196 }
|
|
197
|
|
198 // a join of at least two definitions
|
|
199 if (ItLastDef->second != &DefBlock) {
|
|
200 // do we know this join already?
|
|
201 if (!JoinBlocks->insert(&SuccBlock).second)
|
|
202 return;
|
|
203
|
|
204 // update the definition
|
|
205 addPending(SuccBlock, SuccBlock);
|
|
206 }
|
|
207 }
|
|
208
|
|
209 // find all blocks reachable by two disjoint paths from @rootTerm.
|
|
210 // This method works for both divergent terminators and loops with
|
|
211 // divergent exits.
|
|
212 // @rootBlock is either the block containing the branch or the header of the
|
|
213 // divergent loop.
|
|
214 // @nodeSuccessors is the set of successors of the node (Loop or Terminator)
|
|
215 // headed by @rootBlock.
|
|
216 // @parentLoop is the parent loop of the Loop or the loop that contains the
|
|
217 // Terminator.
|
|
218 template <typename SuccessorIterable>
|
|
219 std::unique_ptr<ConstBlockSet>
|
|
220 computeJoinPoints(const BasicBlock &RootBlock,
|
|
221 SuccessorIterable NodeSuccessors, const Loop *ParentLoop) {
|
|
222 assert(JoinBlocks);
|
|
223
|
|
224 LLVM_DEBUG(dbgs() << "SDA:computeJoinPoints. Parent loop: " << (ParentLoop ? ParentLoop->getName() : "<null>") << "\n" );
|
|
225
|
|
226 // bootstrap with branch targets
|
|
227 for (const auto *SuccBlock : NodeSuccessors) {
|
|
228 DefMap.emplace(SuccBlock, SuccBlock);
|
|
229
|
|
230 if (ParentLoop && !ParentLoop->contains(SuccBlock)) {
|
|
231 // immediate loop exit from node.
|
|
232 ReachedLoopExits.insert(SuccBlock);
|
|
233 } else {
|
|
234 // regular successor
|
|
235 PendingUpdates.insert(SuccBlock);
|
|
236 }
|
|
237 }
|
|
238
|
|
239 LLVM_DEBUG(
|
|
240 dbgs() << "SDA: rpo order:\n";
|
|
241 for (const auto * RpoBlock : FuncRPOT) {
|
|
242 dbgs() << "- " << RpoBlock->getName() << "\n";
|
|
243 }
|
|
244 );
|
|
245
|
|
246 auto ItBeginRPO = FuncRPOT.begin();
|
|
247 auto ItEndRPO = FuncRPOT.end();
|
|
248
|
|
249 // skip until term (TODO RPOT won't let us start at @term directly)
|
|
250 for (; *ItBeginRPO != &RootBlock; ++ItBeginRPO) {
|
|
251 assert(ItBeginRPO != ItEndRPO && "Unable to find RootBlock");
|
|
252 }
|
|
253
|
|
254 // propagate definitions at the immediate successors of the node in RPO
|
|
255 auto ItBlockRPO = ItBeginRPO;
|
|
256 while ((++ItBlockRPO != ItEndRPO) &&
|
|
257 !PendingUpdates.empty()) {
|
|
258 const auto *Block = *ItBlockRPO;
|
|
259 LLVM_DEBUG(dbgs() << "SDA::joins. visiting " << Block->getName() << "\n");
|
|
260
|
|
261 // skip Block if not pending update
|
|
262 auto ItPending = PendingUpdates.find(Block);
|
|
263 if (ItPending == PendingUpdates.end())
|
|
264 continue;
|
|
265 PendingUpdates.erase(ItPending);
|
|
266
|
|
267 // propagate definition at Block to its successors
|
|
268 auto ItDef = DefMap.find(Block);
|
|
269 const auto *DefBlock = ItDef->second;
|
|
270 assert(DefBlock);
|
|
271
|
|
272 auto *BlockLoop = LI.getLoopFor(Block);
|
|
273 if (ParentLoop &&
|
|
274 (ParentLoop != BlockLoop && ParentLoop->contains(BlockLoop))) {
|
|
275 // if the successor is the header of a nested loop pretend its a
|
|
276 // single node with the loop's exits as successors
|
|
277 SmallVector<BasicBlock *, 4> BlockLoopExits;
|
|
278 BlockLoop->getExitBlocks(BlockLoopExits);
|
|
279 for (const auto *BlockLoopExit : BlockLoopExits) {
|
|
280 visitSuccessor(*BlockLoopExit, ParentLoop, *DefBlock);
|
|
281 }
|
|
282
|
|
283 } else {
|
|
284 // the successors are either on the same loop level or loop exits
|
|
285 for (const auto *SuccBlock : successors(Block)) {
|
|
286 visitSuccessor(*SuccBlock, ParentLoop, *DefBlock);
|
|
287 }
|
|
288 }
|
|
289 }
|
|
290
|
|
291 LLVM_DEBUG(dbgs() << "SDA::joins. After propagation:\n"; printDefs(dbgs()));
|
|
292
|
|
293 // We need to know the definition at the parent loop header to decide
|
|
294 // whether the definition at the header is different from the definition at
|
|
295 // the loop exits, which would indicate a divergent loop exits.
|
|
296 //
|
|
297 // A // loop header
|
|
298 // |
|
|
299 // B // nested loop header
|
|
300 // |
|
|
301 // C -> X (exit from B loop) -..-> (A latch)
|
|
302 // |
|
|
303 // D -> back to B (B latch)
|
|
304 // |
|
|
305 // proper exit from both loops
|
|
306 //
|
|
307 // analyze reached loop exits
|
|
308 if (!ReachedLoopExits.empty()) {
|
|
309 const BasicBlock *ParentLoopHeader =
|
|
310 ParentLoop ? ParentLoop->getHeader() : nullptr;
|
|
311
|
|
312 assert(ParentLoop);
|
|
313 auto ItHeaderDef = DefMap.find(ParentLoopHeader);
|
|
314 const auto *HeaderDefBlock = (ItHeaderDef == DefMap.end()) ? nullptr : ItHeaderDef->second;
|
|
315
|
|
316 LLVM_DEBUG(printDefs(dbgs()));
|
|
317 assert(HeaderDefBlock && "no definition at header of carrying loop");
|
|
318
|
|
319 for (const auto *ExitBlock : ReachedLoopExits) {
|
|
320 auto ItExitDef = DefMap.find(ExitBlock);
|
|
321 assert((ItExitDef != DefMap.end()) &&
|
|
322 "no reaching def at reachable loop exit");
|
|
323 if (ItExitDef->second != HeaderDefBlock) {
|
|
324 JoinBlocks->insert(ExitBlock);
|
|
325 }
|
|
326 }
|
|
327 }
|
|
328
|
|
329 return std::move(JoinBlocks);
|
|
330 }
|
|
331 };
|
|
332
|
|
333 const ConstBlockSet &SyncDependenceAnalysis::join_blocks(const Loop &Loop) {
|
|
334 using LoopExitVec = SmallVector<BasicBlock *, 4>;
|
|
335 LoopExitVec LoopExits;
|
|
336 Loop.getExitBlocks(LoopExits);
|
|
337 if (LoopExits.size() < 1) {
|
|
338 return EmptyBlockSet;
|
|
339 }
|
|
340
|
|
341 // already available in cache?
|
|
342 auto ItCached = CachedLoopExitJoins.find(&Loop);
|
|
343 if (ItCached != CachedLoopExitJoins.end()) {
|
|
344 return *ItCached->second;
|
|
345 }
|
|
346
|
|
347 // compute all join points
|
|
348 DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
|
|
349 auto JoinBlocks = Propagator.computeJoinPoints<const LoopExitVec &>(
|
|
350 *Loop.getHeader(), LoopExits, Loop.getParentLoop());
|
|
351
|
|
352 auto ItInserted = CachedLoopExitJoins.emplace(&Loop, std::move(JoinBlocks));
|
|
353 assert(ItInserted.second);
|
|
354 return *ItInserted.first->second;
|
|
355 }
|
|
356
|
|
357 const ConstBlockSet &
|
|
358 SyncDependenceAnalysis::join_blocks(const Instruction &Term) {
|
|
359 // trivial case
|
|
360 if (Term.getNumSuccessors() < 1) {
|
|
361 return EmptyBlockSet;
|
|
362 }
|
|
363
|
|
364 // already available in cache?
|
|
365 auto ItCached = CachedBranchJoins.find(&Term);
|
|
366 if (ItCached != CachedBranchJoins.end())
|
|
367 return *ItCached->second;
|
|
368
|
|
369 // compute all join points
|
|
370 DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
|
|
371 const auto &TermBlock = *Term.getParent();
|
173
|
372 auto JoinBlocks = Propagator.computeJoinPoints<const_succ_range>(
|
150
|
373 TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock));
|
|
374
|
|
375 auto ItInserted = CachedBranchJoins.emplace(&Term, std::move(JoinBlocks));
|
|
376 assert(ItInserted.second);
|
|
377 return *ItInserted.first->second;
|
|
378 }
|
|
379
|
|
380 } // namespace llvm
|