121
|
1 //===- DivRemPairs.cpp - Hoist/decompose division and remainder -*- C++ -*-===//
|
|
2 //
|
|
3 // The LLVM Compiler Infrastructure
|
|
4 //
|
|
5 // This file is distributed under the University of Illinois Open Source
|
|
6 // License. See LICENSE.TXT for details.
|
|
7 //
|
|
8 //===----------------------------------------------------------------------===//
|
|
9 //
|
|
10 // This pass hoists and/or decomposes integer division and remainder
|
|
11 // instructions to enable CFG improvements and better codegen.
|
|
12 //
|
|
13 //===----------------------------------------------------------------------===//
|
|
14
|
|
15 #include "llvm/Transforms/Scalar/DivRemPairs.h"
|
|
16 #include "llvm/ADT/Statistic.h"
|
|
17 #include "llvm/Analysis/GlobalsModRef.h"
|
|
18 #include "llvm/Analysis/TargetTransformInfo.h"
|
|
19 #include "llvm/IR/Dominators.h"
|
|
20 #include "llvm/IR/Function.h"
|
|
21 #include "llvm/Pass.h"
|
|
22 #include "llvm/Transforms/Scalar.h"
|
|
23 #include "llvm/Transforms/Utils/BypassSlowDivision.h"
|
|
24 using namespace llvm;
|
|
25
|
|
26 #define DEBUG_TYPE "div-rem-pairs"
|
|
27 STATISTIC(NumPairs, "Number of div/rem pairs");
|
|
28 STATISTIC(NumHoisted, "Number of instructions hoisted");
|
|
29 STATISTIC(NumDecomposed, "Number of instructions decomposed");
|
|
30
|
|
31 /// Find matching pairs of integer div/rem ops (they have the same numerator,
|
|
32 /// denominator, and signedness). If they exist in different basic blocks, bring
|
|
33 /// them together by hoisting or replace the common division operation that is
|
|
34 /// implicit in the remainder:
|
|
35 /// X % Y <--> X - ((X / Y) * Y).
|
|
36 ///
|
|
37 /// We can largely ignore the normal safety and cost constraints on speculation
|
|
38 /// of these ops when we find a matching pair. This is because we are already
|
|
39 /// guaranteed that any exceptions and most cost are already incurred by the
|
|
40 /// first member of the pair.
|
|
41 ///
|
|
42 /// Note: This transform could be an oddball enhancement to EarlyCSE, GVN, or
|
|
43 /// SimplifyCFG, but it's split off on its own because it's different enough
|
|
44 /// that it doesn't quite match the stated objectives of those passes.
|
|
45 static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
|
|
46 const DominatorTree &DT) {
|
|
47 bool Changed = false;
|
|
48
|
|
49 // Insert all divide and remainder instructions into maps keyed by their
|
|
50 // operands and opcode (signed or unsigned).
|
|
51 DenseMap<DivRemMapKey, Instruction *> DivMap, RemMap;
|
|
52 for (auto &BB : F) {
|
|
53 for (auto &I : BB) {
|
|
54 if (I.getOpcode() == Instruction::SDiv)
|
|
55 DivMap[DivRemMapKey(true, I.getOperand(0), I.getOperand(1))] = &I;
|
|
56 else if (I.getOpcode() == Instruction::UDiv)
|
|
57 DivMap[DivRemMapKey(false, I.getOperand(0), I.getOperand(1))] = &I;
|
|
58 else if (I.getOpcode() == Instruction::SRem)
|
|
59 RemMap[DivRemMapKey(true, I.getOperand(0), I.getOperand(1))] = &I;
|
|
60 else if (I.getOpcode() == Instruction::URem)
|
|
61 RemMap[DivRemMapKey(false, I.getOperand(0), I.getOperand(1))] = &I;
|
|
62 }
|
|
63 }
|
|
64
|
|
65 // We can iterate over either map because we are only looking for matched
|
|
66 // pairs. Choose remainders for efficiency because they are usually even more
|
|
67 // rare than division.
|
|
68 for (auto &RemPair : RemMap) {
|
|
69 // Find the matching division instruction from the division map.
|
|
70 Instruction *DivInst = DivMap[RemPair.getFirst()];
|
|
71 if (!DivInst)
|
|
72 continue;
|
|
73
|
|
74 // We have a matching pair of div/rem instructions. If one dominates the
|
|
75 // other, hoist and/or replace one.
|
|
76 NumPairs++;
|
|
77 Instruction *RemInst = RemPair.getSecond();
|
|
78 bool IsSigned = DivInst->getOpcode() == Instruction::SDiv;
|
|
79 bool HasDivRemOp = TTI.hasDivRemOp(DivInst->getType(), IsSigned);
|
|
80
|
|
81 // If the target supports div+rem and the instructions are in the same block
|
|
82 // already, there's nothing to do. The backend should handle this. If the
|
|
83 // target does not support div+rem, then we will decompose the rem.
|
|
84 if (HasDivRemOp && RemInst->getParent() == DivInst->getParent())
|
|
85 continue;
|
|
86
|
|
87 bool DivDominates = DT.dominates(DivInst, RemInst);
|
|
88 if (!DivDominates && !DT.dominates(RemInst, DivInst))
|
|
89 continue;
|
|
90
|
|
91 if (HasDivRemOp) {
|
|
92 // The target has a single div/rem operation. Hoist the lower instruction
|
|
93 // to make the matched pair visible to the backend.
|
|
94 if (DivDominates)
|
|
95 RemInst->moveAfter(DivInst);
|
|
96 else
|
|
97 DivInst->moveAfter(RemInst);
|
|
98 NumHoisted++;
|
|
99 } else {
|
|
100 // The target does not have a single div/rem operation. Decompose the
|
|
101 // remainder calculation as:
|
|
102 // X % Y --> X - ((X / Y) * Y).
|
|
103 Value *X = RemInst->getOperand(0);
|
|
104 Value *Y = RemInst->getOperand(1);
|
|
105 Instruction *Mul = BinaryOperator::CreateMul(DivInst, Y);
|
|
106 Instruction *Sub = BinaryOperator::CreateSub(X, Mul);
|
|
107
|
|
108 // If the remainder dominates, then hoist the division up to that block:
|
|
109 //
|
|
110 // bb1:
|
|
111 // %rem = srem %x, %y
|
|
112 // bb2:
|
|
113 // %div = sdiv %x, %y
|
|
114 // -->
|
|
115 // bb1:
|
|
116 // %div = sdiv %x, %y
|
|
117 // %mul = mul %div, %y
|
|
118 // %rem = sub %x, %mul
|
|
119 //
|
|
120 // If the division dominates, it's already in the right place. The mul+sub
|
|
121 // will be in a different block because we don't assume that they are
|
|
122 // cheap to speculatively execute:
|
|
123 //
|
|
124 // bb1:
|
|
125 // %div = sdiv %x, %y
|
|
126 // bb2:
|
|
127 // %rem = srem %x, %y
|
|
128 // -->
|
|
129 // bb1:
|
|
130 // %div = sdiv %x, %y
|
|
131 // bb2:
|
|
132 // %mul = mul %div, %y
|
|
133 // %rem = sub %x, %mul
|
|
134 //
|
|
135 // If the div and rem are in the same block, we do the same transform,
|
|
136 // but any code movement would be within the same block.
|
|
137
|
|
138 if (!DivDominates)
|
|
139 DivInst->moveBefore(RemInst);
|
|
140 Mul->insertAfter(RemInst);
|
|
141 Sub->insertAfter(Mul);
|
|
142
|
|
143 // Now kill the explicit remainder. We have replaced it with:
|
|
144 // (sub X, (mul (div X, Y), Y)
|
|
145 RemInst->replaceAllUsesWith(Sub);
|
|
146 RemInst->eraseFromParent();
|
|
147 NumDecomposed++;
|
|
148 }
|
|
149 Changed = true;
|
|
150 }
|
|
151
|
|
152 return Changed;
|
|
153 }
|
|
154
|
|
155 // Pass manager boilerplate below here.
|
|
156
|
|
157 namespace {
|
|
158 struct DivRemPairsLegacyPass : public FunctionPass {
|
|
159 static char ID;
|
|
160 DivRemPairsLegacyPass() : FunctionPass(ID) {
|
|
161 initializeDivRemPairsLegacyPassPass(*PassRegistry::getPassRegistry());
|
|
162 }
|
|
163
|
|
164 void getAnalysisUsage(AnalysisUsage &AU) const override {
|
|
165 AU.addRequired<DominatorTreeWrapperPass>();
|
|
166 AU.addRequired<TargetTransformInfoWrapperPass>();
|
|
167 AU.setPreservesCFG();
|
|
168 AU.addPreserved<DominatorTreeWrapperPass>();
|
|
169 AU.addPreserved<GlobalsAAWrapperPass>();
|
|
170 FunctionPass::getAnalysisUsage(AU);
|
|
171 }
|
|
172
|
|
173 bool runOnFunction(Function &F) override {
|
|
174 if (skipFunction(F))
|
|
175 return false;
|
|
176 auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
|
|
177 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
|
|
178 return optimizeDivRem(F, TTI, DT);
|
|
179 }
|
|
180 };
|
|
181 }
|
|
182
|
|
183 char DivRemPairsLegacyPass::ID = 0;
|
|
184 INITIALIZE_PASS_BEGIN(DivRemPairsLegacyPass, "div-rem-pairs",
|
|
185 "Hoist/decompose integer division and remainder", false,
|
|
186 false)
|
|
187 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
|
|
188 INITIALIZE_PASS_END(DivRemPairsLegacyPass, "div-rem-pairs",
|
|
189 "Hoist/decompose integer division and remainder", false,
|
|
190 false)
|
|
191 FunctionPass *llvm::createDivRemPairsPass() {
|
|
192 return new DivRemPairsLegacyPass();
|
|
193 }
|
|
194
|
|
195 PreservedAnalyses DivRemPairsPass::run(Function &F,
|
|
196 FunctionAnalysisManager &FAM) {
|
|
197 TargetTransformInfo &TTI = FAM.getResult<TargetIRAnalysis>(F);
|
|
198 DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
|
|
199 if (!optimizeDivRem(F, TTI, DT))
|
|
200 return PreservedAnalyses::all();
|
|
201 // TODO: This pass just hoists/replaces math ops - all analyses are preserved?
|
|
202 PreservedAnalyses PA;
|
|
203 PA.preserveSet<CFGAnalyses>();
|
|
204 PA.preserve<GlobalsAA>();
|
|
205 return PA;
|
|
206 }
|