Mercurial > hg > CbC > CbC_llvm
comparison lib/Target/SystemZ/SystemZTDC.cpp @ 120:1172e4bd9c6f
update 4.0.0
author | mir3636 |
---|---|
date | Fri, 25 Nov 2016 19:14:25 +0900 |
parents | |
children | 803732b1fca8 |
comparison
equal
deleted
inserted
replaced
101:34baf5011add | 120:1172e4bd9c6f |
---|---|
1 //===-- SystemZTDC.cpp - Utilize Test Data Class instruction --------------===// | |
2 // | |
3 // The LLVM Compiler Infrastructure | |
4 // | |
5 // This file is distributed under the University of Illinois Open Source | |
6 // License. See LICENSE.TXT for details. | |
7 // | |
8 //===----------------------------------------------------------------------===// | |
9 // | |
10 // This pass looks for instructions that can be replaced by a Test Data Class | |
11 // instruction, and replaces them when profitable. | |
12 // | |
13 // Roughly, the following rules are recognized: | |
14 // | |
15 // 1: fcmp pred X, 0 -> tdc X, mask | |
16 // 2: fcmp pred X, +-inf -> tdc X, mask | |
17 // 3: fcmp pred X, +-minnorm -> tdc X, mask | |
18 // 4: tdc (fabs X), mask -> tdc X, newmask | |
19 // 5: icmp slt (bitcast float X to int), 0 -> tdc X, mask [ie. signbit] | |
20 // 6: icmp sgt (bitcast float X to int), -1 -> tdc X, mask | |
21 // 7: icmp ne/eq (call @llvm.s390.tdc.*(X, mask)) -> tdc X, mask/~mask | |
22 // 8: and i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 & M2) | |
23 // 9: or i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 | M2) | |
24 // 10: xor i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 ^ M2) | |
25 // | |
26 // The pass works in 4 steps: | |
27 // | |
28 // 1. All fcmp and icmp instructions in a function are checked for a match | |
29 // with rules 1-3 and 5-7. Their TDC equivalents are stored in | |
30 // the ConvertedInsts mapping. If the operand of a fcmp instruction is | |
31 // a fabs, it's also folded according to rule 4. | |
32 // 2. All and/or/xor i1 instructions whose both operands have been already | |
33 // mapped are mapped according to rules 8-10. LogicOpsWorklist is used | |
34 // as a queue of instructions to check. | |
35 // 3. All mapped instructions that are considered worthy of conversion (ie. | |
36 // replacing them will actually simplify the final code) are replaced | |
37 // with a call to the s390.tdc intrinsic. | |
38 // 4. All intermediate results of replaced instructions are removed if unused. | |
39 // | |
40 // Instructions that match rules 1-3 are considered unworthy of conversion | |
41 // on their own (since a comparison instruction is superior), but are mapped | |
42 // in the hopes of folding the result using rules 4 and 8-10 (likely removing | |
43 // the original comparison in the process). | |
44 // | |
45 //===----------------------------------------------------------------------===// | |
46 | |
47 #include "SystemZ.h" | |
48 #include "llvm/ADT/MapVector.h" | |
49 #include "llvm/IR/Constants.h" | |
50 #include "llvm/IR/Instructions.h" | |
51 #include "llvm/IR/InstIterator.h" | |
52 #include "llvm/IR/IntrinsicInst.h" | |
53 #include "llvm/IR/IRBuilder.h" | |
54 #include "llvm/IR/LegacyPassManager.h" | |
55 #include "llvm/IR/Module.h" | |
56 #include <deque> | |
57 #include <set> | |
58 | |
59 using namespace llvm; | |
60 | |
61 namespace llvm { | |
62 void initializeSystemZTDCPassPass(PassRegistry&); | |
63 } | |
64 | |
65 namespace { | |
66 | |
67 class SystemZTDCPass : public FunctionPass { | |
68 public: | |
69 static char ID; | |
70 SystemZTDCPass() : FunctionPass(ID) { | |
71 initializeSystemZTDCPassPass(*PassRegistry::getPassRegistry()); | |
72 } | |
73 | |
74 bool runOnFunction(Function &F) override; | |
75 private: | |
76 // Maps seen instructions that can be mapped to a TDC, values are | |
77 // (TDC operand, TDC mask, worthy flag) triples. | |
78 MapVector<Instruction *, std::tuple<Value *, int, bool>> ConvertedInsts; | |
79 // The queue of and/or/xor i1 instructions to be potentially folded. | |
80 std::vector<BinaryOperator *> LogicOpsWorklist; | |
81 // Instructions matched while folding, to be removed at the end if unused. | |
82 std::set<Instruction *> PossibleJunk; | |
83 | |
84 // Tries to convert a fcmp instruction. | |
85 void convertFCmp(CmpInst &I); | |
86 | |
87 // Tries to convert an icmp instruction. | |
88 void convertICmp(CmpInst &I); | |
89 | |
90 // Tries to convert an i1 and/or/xor instruction, whose both operands | |
91 // have been already converted. | |
92 void convertLogicOp(BinaryOperator &I); | |
93 | |
94 // Marks an instruction as converted - adds it to ConvertedInsts and adds | |
95 // any and/or/xor i1 users to the queue. | |
96 void converted(Instruction *I, Value *V, int Mask, bool Worthy) { | |
97 ConvertedInsts[I] = std::make_tuple(V, Mask, Worthy); | |
98 auto &M = *I->getFunction()->getParent(); | |
99 auto &Ctx = M.getContext(); | |
100 for (auto *U : I->users()) { | |
101 auto *LI = dyn_cast<BinaryOperator>(U); | |
102 if (LI && LI->getType() == Type::getInt1Ty(Ctx) && | |
103 (LI->getOpcode() == Instruction::And || | |
104 LI->getOpcode() == Instruction::Or || | |
105 LI->getOpcode() == Instruction::Xor)) { | |
106 LogicOpsWorklist.push_back(LI); | |
107 } | |
108 } | |
109 } | |
110 }; | |
111 | |
112 } // end anonymous namespace | |
113 | |
114 char SystemZTDCPass::ID = 0; | |
115 INITIALIZE_PASS(SystemZTDCPass, "systemz-tdc", | |
116 "SystemZ Test Data Class optimization", false, false) | |
117 | |
118 FunctionPass *llvm::createSystemZTDCPass() { | |
119 return new SystemZTDCPass(); | |
120 } | |
121 | |
122 void SystemZTDCPass::convertFCmp(CmpInst &I) { | |
123 Value *Op0 = I.getOperand(0); | |
124 auto *Const = dyn_cast<ConstantFP>(I.getOperand(1)); | |
125 auto Pred = I.getPredicate(); | |
126 // Only comparisons with consts are interesting. | |
127 if (!Const) | |
128 return; | |
129 // Compute the smallest normal number (and its negation). | |
130 auto &Sem = Op0->getType()->getFltSemantics(); | |
131 APFloat Smallest = APFloat::getSmallestNormalized(Sem); | |
132 APFloat NegSmallest = Smallest; | |
133 NegSmallest.changeSign(); | |
134 // Check if Const is one of our recognized consts. | |
135 int WhichConst; | |
136 if (Const->isZero()) { | |
137 // All comparisons with 0 can be converted. | |
138 WhichConst = 0; | |
139 } else if (Const->isInfinity()) { | |
140 // Likewise for infinities. | |
141 WhichConst = Const->isNegative() ? 2 : 1; | |
142 } else if (Const->isExactlyValue(Smallest)) { | |
143 // For Smallest, we cannot do EQ separately from GT. | |
144 if ((Pred & CmpInst::FCMP_OGE) != CmpInst::FCMP_OGE && | |
145 (Pred & CmpInst::FCMP_OGE) != 0) | |
146 return; | |
147 WhichConst = 3; | |
148 } else if (Const->isExactlyValue(NegSmallest)) { | |
149 // Likewise for NegSmallest, we cannot do EQ separately from LT. | |
150 if ((Pred & CmpInst::FCMP_OLE) != CmpInst::FCMP_OLE && | |
151 (Pred & CmpInst::FCMP_OLE) != 0) | |
152 return; | |
153 WhichConst = 4; | |
154 } else { | |
155 // Not one of our special constants. | |
156 return; | |
157 } | |
158 // Partial masks to use for EQ, GT, LT, UN comparisons, respectively. | |
159 static const int Masks[][4] = { | |
160 { // 0 | |
161 SystemZ::TDCMASK_ZERO, // eq | |
162 SystemZ::TDCMASK_POSITIVE, // gt | |
163 SystemZ::TDCMASK_NEGATIVE, // lt | |
164 SystemZ::TDCMASK_NAN, // un | |
165 }, | |
166 { // inf | |
167 SystemZ::TDCMASK_INFINITY_PLUS, // eq | |
168 0, // gt | |
169 (SystemZ::TDCMASK_ZERO | | |
170 SystemZ::TDCMASK_NEGATIVE | | |
171 SystemZ::TDCMASK_NORMAL_PLUS | | |
172 SystemZ::TDCMASK_SUBNORMAL_PLUS), // lt | |
173 SystemZ::TDCMASK_NAN, // un | |
174 }, | |
175 { // -inf | |
176 SystemZ::TDCMASK_INFINITY_MINUS, // eq | |
177 (SystemZ::TDCMASK_ZERO | | |
178 SystemZ::TDCMASK_POSITIVE | | |
179 SystemZ::TDCMASK_NORMAL_MINUS | | |
180 SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt | |
181 0, // lt | |
182 SystemZ::TDCMASK_NAN, // un | |
183 }, | |
184 { // minnorm | |
185 0, // eq (unsupported) | |
186 (SystemZ::TDCMASK_NORMAL_PLUS | | |
187 SystemZ::TDCMASK_INFINITY_PLUS), // gt (actually ge) | |
188 (SystemZ::TDCMASK_ZERO | | |
189 SystemZ::TDCMASK_NEGATIVE | | |
190 SystemZ::TDCMASK_SUBNORMAL_PLUS), // lt | |
191 SystemZ::TDCMASK_NAN, // un | |
192 }, | |
193 { // -minnorm | |
194 0, // eq (unsupported) | |
195 (SystemZ::TDCMASK_ZERO | | |
196 SystemZ::TDCMASK_POSITIVE | | |
197 SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt | |
198 (SystemZ::TDCMASK_NORMAL_MINUS | | |
199 SystemZ::TDCMASK_INFINITY_MINUS), // lt (actually le) | |
200 SystemZ::TDCMASK_NAN, // un | |
201 } | |
202 }; | |
203 // Construct the mask as a combination of the partial masks. | |
204 int Mask = 0; | |
205 if (Pred & CmpInst::FCMP_OEQ) | |
206 Mask |= Masks[WhichConst][0]; | |
207 if (Pred & CmpInst::FCMP_OGT) | |
208 Mask |= Masks[WhichConst][1]; | |
209 if (Pred & CmpInst::FCMP_OLT) | |
210 Mask |= Masks[WhichConst][2]; | |
211 if (Pred & CmpInst::FCMP_UNO) | |
212 Mask |= Masks[WhichConst][3]; | |
213 // A lone fcmp is unworthy of tdc conversion on its own, but may become | |
214 // worthy if combined with fabs. | |
215 bool Worthy = false; | |
216 if (CallInst *CI = dyn_cast<CallInst>(Op0)) { | |
217 Function *F = CI->getCalledFunction(); | |
218 if (F && F->getIntrinsicID() == Intrinsic::fabs) { | |
219 // Fold with fabs - adjust the mask appropriately. | |
220 Mask &= SystemZ::TDCMASK_PLUS; | |
221 Mask |= Mask >> 1; | |
222 Op0 = CI->getArgOperand(0); | |
223 // A combination of fcmp with fabs is a win, unless the constant | |
224 // involved is 0 (which is handled by later passes). | |
225 Worthy = WhichConst != 0; | |
226 PossibleJunk.insert(CI); | |
227 } | |
228 } | |
229 converted(&I, Op0, Mask, Worthy); | |
230 } | |
231 | |
232 void SystemZTDCPass::convertICmp(CmpInst &I) { | |
233 Value *Op0 = I.getOperand(0); | |
234 auto *Const = dyn_cast<ConstantInt>(I.getOperand(1)); | |
235 auto Pred = I.getPredicate(); | |
236 // All our icmp rules involve comparisons with consts. | |
237 if (!Const) | |
238 return; | |
239 if (auto *Cast = dyn_cast<BitCastInst>(Op0)) { | |
240 // Check for icmp+bitcast used for signbit. | |
241 if (!Cast->getSrcTy()->isFloatTy() && | |
242 !Cast->getSrcTy()->isDoubleTy() && | |
243 !Cast->getSrcTy()->isFP128Ty()) | |
244 return; | |
245 Value *V = Cast->getOperand(0); | |
246 int Mask; | |
247 if (Pred == CmpInst::ICMP_SLT && Const->isZero()) { | |
248 // icmp slt (bitcast X), 0 - set if sign bit true | |
249 Mask = SystemZ::TDCMASK_MINUS; | |
250 } else if (Pred == CmpInst::ICMP_SGT && Const->isMinusOne()) { | |
251 // icmp sgt (bitcast X), -1 - set if sign bit false | |
252 Mask = SystemZ::TDCMASK_PLUS; | |
253 } else { | |
254 // Not a sign bit check. | |
255 return; | |
256 } | |
257 PossibleJunk.insert(Cast); | |
258 converted(&I, V, Mask, true); | |
259 } else if (auto *CI = dyn_cast<CallInst>(Op0)) { | |
260 // Check if this is a pre-existing call of our tdc intrinsic. | |
261 Function *F = CI->getCalledFunction(); | |
262 if (!F || F->getIntrinsicID() != Intrinsic::s390_tdc) | |
263 return; | |
264 if (!Const->isZero()) | |
265 return; | |
266 Value *V = CI->getArgOperand(0); | |
267 auto *MaskC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); | |
268 // Bail if the mask is not a constant. | |
269 if (!MaskC) | |
270 return; | |
271 int Mask = MaskC->getZExtValue(); | |
272 Mask &= SystemZ::TDCMASK_ALL; | |
273 if (Pred == CmpInst::ICMP_NE) { | |
274 // icmp ne (call llvm.s390.tdc(...)), 0 -> simple TDC | |
275 } else if (Pred == CmpInst::ICMP_EQ) { | |
276 // icmp eq (call llvm.s390.tdc(...)), 0 -> TDC with inverted mask | |
277 Mask ^= SystemZ::TDCMASK_ALL; | |
278 } else { | |
279 // An unknown comparison - ignore. | |
280 return; | |
281 } | |
282 PossibleJunk.insert(CI); | |
283 converted(&I, V, Mask, false); | |
284 } | |
285 } | |
286 | |
287 void SystemZTDCPass::convertLogicOp(BinaryOperator &I) { | |
288 Value *Op0, *Op1; | |
289 int Mask0, Mask1; | |
290 bool Worthy0, Worthy1; | |
291 std::tie(Op0, Mask0, Worthy0) = ConvertedInsts[cast<Instruction>(I.getOperand(0))]; | |
292 std::tie(Op1, Mask1, Worthy1) = ConvertedInsts[cast<Instruction>(I.getOperand(1))]; | |
293 if (Op0 != Op1) | |
294 return; | |
295 int Mask; | |
296 switch (I.getOpcode()) { | |
297 case Instruction::And: | |
298 Mask = Mask0 & Mask1; | |
299 break; | |
300 case Instruction::Or: | |
301 Mask = Mask0 | Mask1; | |
302 break; | |
303 case Instruction::Xor: | |
304 Mask = Mask0 ^ Mask1; | |
305 break; | |
306 default: | |
307 llvm_unreachable("Unknown op in convertLogicOp"); | |
308 } | |
309 converted(&I, Op0, Mask, true); | |
310 } | |
311 | |
312 bool SystemZTDCPass::runOnFunction(Function &F) { | |
313 ConvertedInsts.clear(); | |
314 LogicOpsWorklist.clear(); | |
315 PossibleJunk.clear(); | |
316 | |
317 // Look for icmp+fcmp instructions. | |
318 for (auto &I : instructions(F)) { | |
319 if (I.getOpcode() == Instruction::FCmp) | |
320 convertFCmp(cast<CmpInst>(I)); | |
321 else if (I.getOpcode() == Instruction::ICmp) | |
322 convertICmp(cast<CmpInst>(I)); | |
323 } | |
324 | |
325 // If none found, bail already. | |
326 if (ConvertedInsts.empty()) | |
327 return false; | |
328 | |
329 // Process the queue of logic instructions. | |
330 while (!LogicOpsWorklist.empty()) { | |
331 BinaryOperator *Op = LogicOpsWorklist.back(); | |
332 LogicOpsWorklist.pop_back(); | |
333 // If both operands mapped, and the instruction itself not yet mapped, | |
334 // convert it. | |
335 if (ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(0))) && | |
336 ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(1))) && | |
337 !ConvertedInsts.count(Op)) | |
338 convertLogicOp(*Op); | |
339 } | |
340 | |
341 // Time to actually replace the instructions. Do it in the reverse order | |
342 // of finding them, since there's a good chance the earlier ones will be | |
343 // unused (due to being folded into later ones). | |
344 Module &M = *F.getParent(); | |
345 auto &Ctx = M.getContext(); | |
346 Value *Zero32 = ConstantInt::get(Type::getInt32Ty(Ctx), 0); | |
347 bool MadeChange = false; | |
348 for (auto &It : reverse(ConvertedInsts)) { | |
349 Instruction *I = It.first; | |
350 Value *V; | |
351 int Mask; | |
352 bool Worthy; | |
353 std::tie(V, Mask, Worthy) = It.second; | |
354 if (!I->user_empty()) { | |
355 // If used and unworthy of conversion, skip it. | |
356 if (!Worthy) | |
357 continue; | |
358 // Call the intrinsic, compare result with 0. | |
359 Value *TDCFunc = Intrinsic::getDeclaration(&M, Intrinsic::s390_tdc, | |
360 V->getType()); | |
361 IRBuilder<> IRB(I); | |
362 Value *MaskVal = ConstantInt::get(Type::getInt64Ty(Ctx), Mask); | |
363 Instruction *TDC = IRB.CreateCall(TDCFunc, {V, MaskVal}); | |
364 Value *ICmp = IRB.CreateICmp(CmpInst::ICMP_NE, TDC, Zero32); | |
365 I->replaceAllUsesWith(ICmp); | |
366 } | |
367 // If unused, or used and converted, remove it. | |
368 I->eraseFromParent(); | |
369 MadeChange = true; | |
370 } | |
371 | |
372 if (!MadeChange) | |
373 return false; | |
374 | |
375 // We've actually done something - now clear misc accumulated junk (fabs, | |
376 // bitcast). | |
377 for (auto *I : PossibleJunk) | |
378 if (I->user_empty()) | |
379 I->eraseFromParent(); | |
380 | |
381 return true; | |
382 } |