Mercurial > hg > CbC > CbC_llvm
comparison include/llvm/CodeGen/BasicTTIImpl.h @ 83:60c9769439b8 LLVM3.7
LLVM 3.7
author | Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp> |
---|---|
date | Wed, 18 Feb 2015 14:55:36 +0900 |
parents | |
children | afa8332a0e37 |
comparison
equal
deleted
inserted
replaced
78:af83660cff7b | 83:60c9769439b8 |
---|---|
1 //===- BasicTTIImpl.h -------------------------------------------*- C++ -*-===// | |
2 // | |
3 // The LLVM Compiler Infrastructure | |
4 // | |
5 // This file is distributed under the University of Illinois Open Source | |
6 // License. See LICENSE.TXT for details. | |
7 // | |
8 //===----------------------------------------------------------------------===// | |
9 /// \file | |
10 /// This file provides a helper that implements much of the TTI interface in | |
11 /// terms of the target-independent code generator and TargetLowering | |
12 /// interfaces. | |
13 /// | |
14 //===----------------------------------------------------------------------===// | |
15 | |
16 #ifndef LLVM_CODEGEN_BASICTTIIMPL_H | |
17 #define LLVM_CODEGEN_BASICTTIIMPL_H | |
18 | |
19 #include "llvm/Analysis/LoopInfo.h" | |
20 #include "llvm/Analysis/TargetTransformInfoImpl.h" | |
21 #include "llvm/Support/CommandLine.h" | |
22 #include "llvm/Target/TargetLowering.h" | |
23 #include "llvm/Target/TargetSubtargetInfo.h" | |
24 | |
25 namespace llvm { | |
26 | |
27 extern cl::opt<unsigned> PartialUnrollingThreshold; | |
28 | |
29 /// \brief Base class which can be used to help build a TTI implementation. | |
30 /// | |
31 /// This class provides as much implementation of the TTI interface as is | |
32 /// possible using the target independent parts of the code generator. | |
33 /// | |
34 /// In order to subclass it, your class must implement a getST() method to | |
35 /// return the subtarget, and a getTLI() method to return the target lowering. | |
36 /// We need these methods implemented in the derived class so that this class | |
37 /// doesn't have to duplicate storage for them. | |
38 template <typename T> | |
39 class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> { | |
40 private: | |
41 typedef TargetTransformInfoImplCRTPBase<T> BaseT; | |
42 typedef TargetTransformInfo TTI; | |
43 | |
44 /// Estimate the overhead of scalarizing an instruction. Insert and Extract | |
45 /// are set if the result needs to be inserted and/or extracted from vectors. | |
46 unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { | |
47 assert(Ty->isVectorTy() && "Can only scalarize vectors"); | |
48 unsigned Cost = 0; | |
49 | |
50 for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { | |
51 if (Insert) | |
52 Cost += static_cast<T *>(this) | |
53 ->getVectorInstrCost(Instruction::InsertElement, Ty, i); | |
54 if (Extract) | |
55 Cost += static_cast<T *>(this) | |
56 ->getVectorInstrCost(Instruction::ExtractElement, Ty, i); | |
57 } | |
58 | |
59 return Cost; | |
60 } | |
61 | |
62 /// Estimate the cost overhead of SK_Alternate shuffle. | |
63 unsigned getAltShuffleOverhead(Type *Ty) { | |
64 assert(Ty->isVectorTy() && "Can only shuffle vectors"); | |
65 unsigned Cost = 0; | |
66 // Shuffle cost is equal to the cost of extracting element from its argument | |
67 // plus the cost of inserting them onto the result vector. | |
68 | |
69 // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from | |
70 // index 0 of first vector, index 1 of second vector,index 2 of first | |
71 // vector and finally index 3 of second vector and insert them at index | |
72 // <0,1,2,3> of result vector. | |
73 for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { | |
74 Cost += static_cast<T *>(this) | |
75 ->getVectorInstrCost(Instruction::InsertElement, Ty, i); | |
76 Cost += static_cast<T *>(this) | |
77 ->getVectorInstrCost(Instruction::ExtractElement, Ty, i); | |
78 } | |
79 return Cost; | |
80 } | |
81 | |
82 /// \brief Local query method delegates up to T which *must* implement this! | |
83 const TargetSubtargetInfo *getST() const { | |
84 return static_cast<const T *>(this)->getST(); | |
85 } | |
86 | |
87 /// \brief Local query method delegates up to T which *must* implement this! | |
88 const TargetLoweringBase *getTLI() const { | |
89 return static_cast<const T *>(this)->getTLI(); | |
90 } | |
91 | |
92 protected: | |
93 explicit BasicTTIImplBase(const TargetMachine *TM) | |
94 : BaseT(TM->getDataLayout()) {} | |
95 | |
96 public: | |
97 // Provide value semantics. MSVC requires that we spell all of these out. | |
98 BasicTTIImplBase(const BasicTTIImplBase &Arg) | |
99 : BaseT(static_cast<const BaseT &>(Arg)) {} | |
100 BasicTTIImplBase(BasicTTIImplBase &&Arg) | |
101 : BaseT(std::move(static_cast<BaseT &>(Arg))) {} | |
102 BasicTTIImplBase &operator=(const BasicTTIImplBase &RHS) { | |
103 BaseT::operator=(static_cast<const BaseT &>(RHS)); | |
104 return *this; | |
105 } | |
106 BasicTTIImplBase &operator=(BasicTTIImplBase &&RHS) { | |
107 BaseT::operator=(std::move(static_cast<BaseT &>(RHS))); | |
108 return *this; | |
109 } | |
110 | |
111 /// \name Scalar TTI Implementations | |
112 /// @{ | |
113 | |
114 bool hasBranchDivergence() { return false; } | |
115 | |
116 bool isLegalAddImmediate(int64_t imm) { | |
117 return getTLI()->isLegalAddImmediate(imm); | |
118 } | |
119 | |
120 bool isLegalICmpImmediate(int64_t imm) { | |
121 return getTLI()->isLegalICmpImmediate(imm); | |
122 } | |
123 | |
124 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, | |
125 bool HasBaseReg, int64_t Scale) { | |
126 TargetLoweringBase::AddrMode AM; | |
127 AM.BaseGV = BaseGV; | |
128 AM.BaseOffs = BaseOffset; | |
129 AM.HasBaseReg = HasBaseReg; | |
130 AM.Scale = Scale; | |
131 return getTLI()->isLegalAddressingMode(AM, Ty); | |
132 } | |
133 | |
134 int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, | |
135 bool HasBaseReg, int64_t Scale) { | |
136 TargetLoweringBase::AddrMode AM; | |
137 AM.BaseGV = BaseGV; | |
138 AM.BaseOffs = BaseOffset; | |
139 AM.HasBaseReg = HasBaseReg; | |
140 AM.Scale = Scale; | |
141 return getTLI()->getScalingFactorCost(AM, Ty); | |
142 } | |
143 | |
144 bool isTruncateFree(Type *Ty1, Type *Ty2) { | |
145 return getTLI()->isTruncateFree(Ty1, Ty2); | |
146 } | |
147 | |
148 bool isTypeLegal(Type *Ty) { | |
149 EVT VT = getTLI()->getValueType(Ty); | |
150 return getTLI()->isTypeLegal(VT); | |
151 } | |
152 | |
153 unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, | |
154 ArrayRef<const Value *> Arguments) { | |
155 return BaseT::getIntrinsicCost(IID, RetTy, Arguments); | |
156 } | |
157 | |
158 unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, | |
159 ArrayRef<Type *> ParamTys) { | |
160 if (IID == Intrinsic::cttz) { | |
161 if (getTLI()->isCheapToSpeculateCttz()) | |
162 return TargetTransformInfo::TCC_Basic; | |
163 return TargetTransformInfo::TCC_Expensive; | |
164 } | |
165 | |
166 if (IID == Intrinsic::ctlz) { | |
167 if (getTLI()->isCheapToSpeculateCtlz()) | |
168 return TargetTransformInfo::TCC_Basic; | |
169 return TargetTransformInfo::TCC_Expensive; | |
170 } | |
171 | |
172 return BaseT::getIntrinsicCost(IID, RetTy, ParamTys); | |
173 } | |
174 | |
175 unsigned getJumpBufAlignment() { return getTLI()->getJumpBufAlignment(); } | |
176 | |
177 unsigned getJumpBufSize() { return getTLI()->getJumpBufSize(); } | |
178 | |
179 bool shouldBuildLookupTables() { | |
180 const TargetLoweringBase *TLI = getTLI(); | |
181 return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || | |
182 TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other); | |
183 } | |
184 | |
185 bool haveFastSqrt(Type *Ty) { | |
186 const TargetLoweringBase *TLI = getTLI(); | |
187 EVT VT = TLI->getValueType(Ty); | |
188 return TLI->isTypeLegal(VT) && | |
189 TLI->isOperationLegalOrCustom(ISD::FSQRT, VT); | |
190 } | |
191 | |
192 unsigned getFPOpCost(Type *Ty) { | |
193 // By default, FP instructions are no more expensive since they are | |
194 // implemented in HW. Target specific TTI can override this. | |
195 return TargetTransformInfo::TCC_Basic; | |
196 } | |
197 | |
198 unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) { | |
199 const TargetLoweringBase *TLI = getTLI(); | |
200 switch (Opcode) { | |
201 default: break; | |
202 case Instruction::Trunc: { | |
203 if (TLI->isTruncateFree(OpTy, Ty)) | |
204 return TargetTransformInfo::TCC_Free; | |
205 return TargetTransformInfo::TCC_Basic; | |
206 } | |
207 case Instruction::ZExt: { | |
208 if (TLI->isZExtFree(OpTy, Ty)) | |
209 return TargetTransformInfo::TCC_Free; | |
210 return TargetTransformInfo::TCC_Basic; | |
211 } | |
212 } | |
213 | |
214 return BaseT::getOperationCost(Opcode, Ty, OpTy); | |
215 } | |
216 | |
217 void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP) { | |
218 // This unrolling functionality is target independent, but to provide some | |
219 // motivation for its intended use, for x86: | |
220 | |
221 // According to the Intel 64 and IA-32 Architectures Optimization Reference | |
222 // Manual, Intel Core models and later have a loop stream detector (and | |
223 // associated uop queue) that can benefit from partial unrolling. | |
224 // The relevant requirements are: | |
225 // - The loop must have no more than 4 (8 for Nehalem and later) branches | |
226 // taken, and none of them may be calls. | |
227 // - The loop can have no more than 18 (28 for Nehalem and later) uops. | |
228 | |
229 // According to the Software Optimization Guide for AMD Family 15h | |
230 // Processors, models 30h-4fh (Steamroller and later) have a loop predictor | |
231 // and loop buffer which can benefit from partial unrolling. | |
232 // The relevant requirements are: | |
233 // - The loop must have fewer than 16 branches | |
234 // - The loop must have less than 40 uops in all executed loop branches | |
235 | |
236 // The number of taken branches in a loop is hard to estimate here, and | |
237 // benchmarking has revealed that it is better not to be conservative when | |
238 // estimating the branch count. As a result, we'll ignore the branch limits | |
239 // until someone finds a case where it matters in practice. | |
240 | |
241 unsigned MaxOps; | |
242 const TargetSubtargetInfo *ST = getST(); | |
243 if (PartialUnrollingThreshold.getNumOccurrences() > 0) | |
244 MaxOps = PartialUnrollingThreshold; | |
245 else if (ST->getSchedModel().LoopMicroOpBufferSize > 0) | |
246 MaxOps = ST->getSchedModel().LoopMicroOpBufferSize; | |
247 else | |
248 return; | |
249 | |
250 // Scan the loop: don't unroll loops with calls. | |
251 for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; | |
252 ++I) { | |
253 BasicBlock *BB = *I; | |
254 | |
255 for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J) | |
256 if (isa<CallInst>(J) || isa<InvokeInst>(J)) { | |
257 ImmutableCallSite CS(J); | |
258 if (const Function *F = CS.getCalledFunction()) { | |
259 if (!static_cast<T *>(this)->isLoweredToCall(F)) | |
260 continue; | |
261 } | |
262 | |
263 return; | |
264 } | |
265 } | |
266 | |
267 // Enable runtime and partial unrolling up to the specified size. | |
268 UP.Partial = UP.Runtime = true; | |
269 UP.PartialThreshold = UP.PartialOptSizeThreshold = MaxOps; | |
270 } | |
271 | |
272 /// @} | |
273 | |
274 /// \name Vector TTI Implementations | |
275 /// @{ | |
276 | |
277 unsigned getNumberOfRegisters(bool Vector) { return 1; } | |
278 | |
279 unsigned getRegisterBitWidth(bool Vector) { return 32; } | |
280 | |
281 unsigned getMaxInterleaveFactor() { return 1; } | |
282 | |
283 unsigned getArithmeticInstrCost( | |
284 unsigned Opcode, Type *Ty, | |
285 TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, | |
286 TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, | |
287 TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, | |
288 TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None) { | |
289 // Check if any of the operands are vector operands. | |
290 const TargetLoweringBase *TLI = getTLI(); | |
291 int ISD = TLI->InstructionOpcodeToISD(Opcode); | |
292 assert(ISD && "Invalid opcode"); | |
293 | |
294 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty); | |
295 | |
296 bool IsFloat = Ty->getScalarType()->isFloatingPointTy(); | |
297 // Assume that floating point arithmetic operations cost twice as much as | |
298 // integer operations. | |
299 unsigned OpCost = (IsFloat ? 2 : 1); | |
300 | |
301 if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { | |
302 // The operation is legal. Assume it costs 1. | |
303 // If the type is split to multiple registers, assume that there is some | |
304 // overhead to this. | |
305 // TODO: Once we have extract/insert subvector cost we need to use them. | |
306 if (LT.first > 1) | |
307 return LT.first * 2 * OpCost; | |
308 return LT.first * 1 * OpCost; | |
309 } | |
310 | |
311 if (!TLI->isOperationExpand(ISD, LT.second)) { | |
312 // If the operation is custom lowered then assume | |
313 // thare the code is twice as expensive. | |
314 return LT.first * 2 * OpCost; | |
315 } | |
316 | |
317 // Else, assume that we need to scalarize this op. | |
318 if (Ty->isVectorTy()) { | |
319 unsigned Num = Ty->getVectorNumElements(); | |
320 unsigned Cost = static_cast<T *>(this) | |
321 ->getArithmeticInstrCost(Opcode, Ty->getScalarType()); | |
322 // return the cost of multiple scalar invocation plus the cost of | |
323 // inserting | |
324 // and extracting the values. | |
325 return getScalarizationOverhead(Ty, true, true) + Num * Cost; | |
326 } | |
327 | |
328 // We don't know anything about this scalar instruction. | |
329 return OpCost; | |
330 } | |
331 | |
332 unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, | |
333 Type *SubTp) { | |
334 if (Kind == TTI::SK_Alternate) { | |
335 return getAltShuffleOverhead(Tp); | |
336 } | |
337 return 1; | |
338 } | |
339 | |
340 unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { | |
341 const TargetLoweringBase *TLI = getTLI(); | |
342 int ISD = TLI->InstructionOpcodeToISD(Opcode); | |
343 assert(ISD && "Invalid opcode"); | |
344 | |
345 std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(Src); | |
346 std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(Dst); | |
347 | |
348 // Check for NOOP conversions. | |
349 if (SrcLT.first == DstLT.first && | |
350 SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { | |
351 | |
352 // Bitcast between types that are legalized to the same type are free. | |
353 if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc) | |
354 return 0; | |
355 } | |
356 | |
357 if (Opcode == Instruction::Trunc && | |
358 TLI->isTruncateFree(SrcLT.second, DstLT.second)) | |
359 return 0; | |
360 | |
361 if (Opcode == Instruction::ZExt && | |
362 TLI->isZExtFree(SrcLT.second, DstLT.second)) | |
363 return 0; | |
364 | |
365 // If the cast is marked as legal (or promote) then assume low cost. | |
366 if (SrcLT.first == DstLT.first && | |
367 TLI->isOperationLegalOrPromote(ISD, DstLT.second)) | |
368 return 1; | |
369 | |
370 // Handle scalar conversions. | |
371 if (!Src->isVectorTy() && !Dst->isVectorTy()) { | |
372 | |
373 // Scalar bitcasts are usually free. | |
374 if (Opcode == Instruction::BitCast) | |
375 return 0; | |
376 | |
377 // Just check the op cost. If the operation is legal then assume it costs | |
378 // 1. | |
379 if (!TLI->isOperationExpand(ISD, DstLT.second)) | |
380 return 1; | |
381 | |
382 // Assume that illegal scalar instruction are expensive. | |
383 return 4; | |
384 } | |
385 | |
386 // Check vector-to-vector casts. | |
387 if (Dst->isVectorTy() && Src->isVectorTy()) { | |
388 | |
389 // If the cast is between same-sized registers, then the check is simple. | |
390 if (SrcLT.first == DstLT.first && | |
391 SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { | |
392 | |
393 // Assume that Zext is done using AND. | |
394 if (Opcode == Instruction::ZExt) | |
395 return 1; | |
396 | |
397 // Assume that sext is done using SHL and SRA. | |
398 if (Opcode == Instruction::SExt) | |
399 return 2; | |
400 | |
401 // Just check the op cost. If the operation is legal then assume it | |
402 // costs | |
403 // 1 and multiply by the type-legalization overhead. | |
404 if (!TLI->isOperationExpand(ISD, DstLT.second)) | |
405 return SrcLT.first * 1; | |
406 } | |
407 | |
408 // If we are converting vectors and the operation is illegal, or | |
409 // if the vectors are legalized to different types, estimate the | |
410 // scalarization costs. | |
411 unsigned Num = Dst->getVectorNumElements(); | |
412 unsigned Cost = static_cast<T *>(this)->getCastInstrCost( | |
413 Opcode, Dst->getScalarType(), Src->getScalarType()); | |
414 | |
415 // Return the cost of multiple scalar invocation plus the cost of | |
416 // inserting and extracting the values. | |
417 return getScalarizationOverhead(Dst, true, true) + Num * Cost; | |
418 } | |
419 | |
420 // We already handled vector-to-vector and scalar-to-scalar conversions. | |
421 // This | |
422 // is where we handle bitcast between vectors and scalars. We need to assume | |
423 // that the conversion is scalarized in one way or another. | |
424 if (Opcode == Instruction::BitCast) | |
425 // Illegal bitcasts are done by storing and loading from a stack slot. | |
426 return (Src->isVectorTy() ? getScalarizationOverhead(Src, false, true) | |
427 : 0) + | |
428 (Dst->isVectorTy() ? getScalarizationOverhead(Dst, true, false) | |
429 : 0); | |
430 | |
431 llvm_unreachable("Unhandled cast"); | |
432 } | |
433 | |
434 unsigned getCFInstrCost(unsigned Opcode) { | |
435 // Branches are assumed to be predicted. | |
436 return 0; | |
437 } | |
438 | |
439 unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) { | |
440 const TargetLoweringBase *TLI = getTLI(); | |
441 int ISD = TLI->InstructionOpcodeToISD(Opcode); | |
442 assert(ISD && "Invalid opcode"); | |
443 | |
444 // Selects on vectors are actually vector selects. | |
445 if (ISD == ISD::SELECT) { | |
446 assert(CondTy && "CondTy must exist"); | |
447 if (CondTy->isVectorTy()) | |
448 ISD = ISD::VSELECT; | |
449 } | |
450 | |
451 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy); | |
452 | |
453 if (!(ValTy->isVectorTy() && !LT.second.isVector()) && | |
454 !TLI->isOperationExpand(ISD, LT.second)) { | |
455 // The operation is legal. Assume it costs 1. Multiply | |
456 // by the type-legalization overhead. | |
457 return LT.first * 1; | |
458 } | |
459 | |
460 // Otherwise, assume that the cast is scalarized. | |
461 if (ValTy->isVectorTy()) { | |
462 unsigned Num = ValTy->getVectorNumElements(); | |
463 if (CondTy) | |
464 CondTy = CondTy->getScalarType(); | |
465 unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost( | |
466 Opcode, ValTy->getScalarType(), CondTy); | |
467 | |
468 // Return the cost of multiple scalar invocation plus the cost of | |
469 // inserting | |
470 // and extracting the values. | |
471 return getScalarizationOverhead(ValTy, true, false) + Num * Cost; | |
472 } | |
473 | |
474 // Unknown scalar opcode. | |
475 return 1; | |
476 } | |
477 | |
478 unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { | |
479 std::pair<unsigned, MVT> LT = | |
480 getTLI()->getTypeLegalizationCost(Val->getScalarType()); | |
481 | |
482 return LT.first; | |
483 } | |
484 | |
485 unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, | |
486 unsigned AddressSpace) { | |
487 assert(!Src->isVoidTy() && "Invalid type"); | |
488 std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Src); | |
489 | |
490 // Assuming that all loads of legal types cost 1. | |
491 unsigned Cost = LT.first; | |
492 | |
493 if (Src->isVectorTy() && | |
494 Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) { | |
495 // This is a vector load that legalizes to a larger type than the vector | |
496 // itself. Unless the corresponding extending load or truncating store is | |
497 // legal, then this will scalarize. | |
498 TargetLowering::LegalizeAction LA = TargetLowering::Expand; | |
499 EVT MemVT = getTLI()->getValueType(Src, true); | |
500 if (MemVT.isSimple() && MemVT != MVT::Other) { | |
501 if (Opcode == Instruction::Store) | |
502 LA = getTLI()->getTruncStoreAction(LT.second, MemVT.getSimpleVT()); | |
503 else | |
504 LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT); | |
505 } | |
506 | |
507 if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) { | |
508 // This is a vector load/store for some illegal type that is scalarized. | |
509 // We must account for the cost of building or decomposing the vector. | |
510 Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store, | |
511 Opcode == Instruction::Store); | |
512 } | |
513 } | |
514 | |
515 return Cost; | |
516 } | |
517 | |
518 unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, | |
519 ArrayRef<Type *> Tys) { | |
520 unsigned ISD = 0; | |
521 switch (IID) { | |
522 default: { | |
523 // Assume that we need to scalarize this intrinsic. | |
524 unsigned ScalarizationCost = 0; | |
525 unsigned ScalarCalls = 1; | |
526 if (RetTy->isVectorTy()) { | |
527 ScalarizationCost = getScalarizationOverhead(RetTy, true, false); | |
528 ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements()); | |
529 } | |
530 for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { | |
531 if (Tys[i]->isVectorTy()) { | |
532 ScalarizationCost += getScalarizationOverhead(Tys[i], false, true); | |
533 ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements()); | |
534 } | |
535 } | |
536 | |
537 return ScalarCalls + ScalarizationCost; | |
538 } | |
539 // Look for intrinsics that can be lowered directly or turned into a scalar | |
540 // intrinsic call. | |
541 case Intrinsic::sqrt: | |
542 ISD = ISD::FSQRT; | |
543 break; | |
544 case Intrinsic::sin: | |
545 ISD = ISD::FSIN; | |
546 break; | |
547 case Intrinsic::cos: | |
548 ISD = ISD::FCOS; | |
549 break; | |
550 case Intrinsic::exp: | |
551 ISD = ISD::FEXP; | |
552 break; | |
553 case Intrinsic::exp2: | |
554 ISD = ISD::FEXP2; | |
555 break; | |
556 case Intrinsic::log: | |
557 ISD = ISD::FLOG; | |
558 break; | |
559 case Intrinsic::log10: | |
560 ISD = ISD::FLOG10; | |
561 break; | |
562 case Intrinsic::log2: | |
563 ISD = ISD::FLOG2; | |
564 break; | |
565 case Intrinsic::fabs: | |
566 ISD = ISD::FABS; | |
567 break; | |
568 case Intrinsic::minnum: | |
569 ISD = ISD::FMINNUM; | |
570 break; | |
571 case Intrinsic::maxnum: | |
572 ISD = ISD::FMAXNUM; | |
573 break; | |
574 case Intrinsic::copysign: | |
575 ISD = ISD::FCOPYSIGN; | |
576 break; | |
577 case Intrinsic::floor: | |
578 ISD = ISD::FFLOOR; | |
579 break; | |
580 case Intrinsic::ceil: | |
581 ISD = ISD::FCEIL; | |
582 break; | |
583 case Intrinsic::trunc: | |
584 ISD = ISD::FTRUNC; | |
585 break; | |
586 case Intrinsic::nearbyint: | |
587 ISD = ISD::FNEARBYINT; | |
588 break; | |
589 case Intrinsic::rint: | |
590 ISD = ISD::FRINT; | |
591 break; | |
592 case Intrinsic::round: | |
593 ISD = ISD::FROUND; | |
594 break; | |
595 case Intrinsic::pow: | |
596 ISD = ISD::FPOW; | |
597 break; | |
598 case Intrinsic::fma: | |
599 ISD = ISD::FMA; | |
600 break; | |
601 case Intrinsic::fmuladd: | |
602 ISD = ISD::FMA; | |
603 break; | |
604 // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free. | |
605 case Intrinsic::lifetime_start: | |
606 case Intrinsic::lifetime_end: | |
607 return 0; | |
608 case Intrinsic::masked_store: | |
609 return static_cast<T *>(this) | |
610 ->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0, 0); | |
611 case Intrinsic::masked_load: | |
612 return static_cast<T *>(this) | |
613 ->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0); | |
614 } | |
615 | |
616 const TargetLoweringBase *TLI = getTLI(); | |
617 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(RetTy); | |
618 | |
619 if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { | |
620 // The operation is legal. Assume it costs 1. | |
621 // If the type is split to multiple registers, assume that there is some | |
622 // overhead to this. | |
623 // TODO: Once we have extract/insert subvector cost we need to use them. | |
624 if (LT.first > 1) | |
625 return LT.first * 2; | |
626 return LT.first * 1; | |
627 } | |
628 | |
629 if (!TLI->isOperationExpand(ISD, LT.second)) { | |
630 // If the operation is custom lowered then assume | |
631 // thare the code is twice as expensive. | |
632 return LT.first * 2; | |
633 } | |
634 | |
635 // If we can't lower fmuladd into an FMA estimate the cost as a floating | |
636 // point mul followed by an add. | |
637 if (IID == Intrinsic::fmuladd) | |
638 return static_cast<T *>(this) | |
639 ->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) + | |
640 static_cast<T *>(this) | |
641 ->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy); | |
642 | |
643 // Else, assume that we need to scalarize this intrinsic. For math builtins | |
644 // this will emit a costly libcall, adding call overhead and spills. Make it | |
645 // very expensive. | |
646 if (RetTy->isVectorTy()) { | |
647 unsigned Num = RetTy->getVectorNumElements(); | |
648 unsigned Cost = static_cast<T *>(this)->getIntrinsicInstrCost( | |
649 IID, RetTy->getScalarType(), Tys); | |
650 return 10 * Cost * Num; | |
651 } | |
652 | |
653 // This is going to be turned into a library call, make it expensive. | |
654 return 10; | |
655 } | |
656 | |
657 unsigned getNumberOfParts(Type *Tp) { | |
658 std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Tp); | |
659 return LT.first; | |
660 } | |
661 | |
662 unsigned getAddressComputationCost(Type *Ty, bool IsComplex) { return 0; } | |
663 | |
664 unsigned getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise) { | |
665 assert(Ty->isVectorTy() && "Expect a vector type"); | |
666 unsigned NumVecElts = Ty->getVectorNumElements(); | |
667 unsigned NumReduxLevels = Log2_32(NumVecElts); | |
668 unsigned ArithCost = | |
669 NumReduxLevels * | |
670 static_cast<T *>(this)->getArithmeticInstrCost(Opcode, Ty); | |
671 // Assume the pairwise shuffles add a cost. | |
672 unsigned ShuffleCost = | |
673 NumReduxLevels * (IsPairwise + 1) * | |
674 static_cast<T *>(this) | |
675 ->getShuffleCost(TTI::SK_ExtractSubvector, Ty, NumVecElts / 2, Ty); | |
676 return ShuffleCost + ArithCost + getScalarizationOverhead(Ty, false, true); | |
677 } | |
678 | |
679 /// @} | |
680 }; | |
681 | |
682 /// \brief Concrete BasicTTIImpl that can be used if no further customization | |
683 /// is needed. | |
684 class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> { | |
685 typedef BasicTTIImplBase<BasicTTIImpl> BaseT; | |
686 friend class BasicTTIImplBase<BasicTTIImpl>; | |
687 | |
688 const TargetSubtargetInfo *ST; | |
689 const TargetLoweringBase *TLI; | |
690 | |
691 const TargetSubtargetInfo *getST() const { return ST; } | |
692 const TargetLoweringBase *getTLI() const { return TLI; } | |
693 | |
694 public: | |
695 explicit BasicTTIImpl(const TargetMachine *ST, Function &F); | |
696 | |
697 // Provide value semantics. MSVC requires that we spell all of these out. | |
698 BasicTTIImpl(const BasicTTIImpl &Arg) | |
699 : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {} | |
700 BasicTTIImpl(BasicTTIImpl &&Arg) | |
701 : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)), | |
702 TLI(std::move(Arg.TLI)) {} | |
703 BasicTTIImpl &operator=(const BasicTTIImpl &RHS) { | |
704 BaseT::operator=(static_cast<const BaseT &>(RHS)); | |
705 ST = RHS.ST; | |
706 TLI = RHS.TLI; | |
707 return *this; | |
708 } | |
709 BasicTTIImpl &operator=(BasicTTIImpl &&RHS) { | |
710 BaseT::operator=(std::move(static_cast<BaseT &>(RHS))); | |
711 ST = std::move(RHS.ST); | |
712 TLI = std::move(RHS.TLI); | |
713 return *this; | |
714 } | |
715 }; | |
716 | |
717 } | |
718 | |
719 #endif |