150
|
1 //===- llvm/Analysis/TargetTransformInfo.cpp ------------------------------===//
|
|
2 //
|
|
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
4 // See https://llvm.org/LICENSE.txt for license information.
|
|
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
6 //
|
|
7 //===----------------------------------------------------------------------===//
|
|
8
|
|
9 #include "llvm/Analysis/TargetTransformInfo.h"
|
|
10 #include "llvm/Analysis/CFG.h"
|
|
11 #include "llvm/Analysis/LoopIterator.h"
|
|
12 #include "llvm/Analysis/TargetTransformInfoImpl.h"
|
|
13 #include "llvm/IR/CFG.h"
|
|
14 #include "llvm/IR/DataLayout.h"
|
|
15 #include "llvm/IR/Instruction.h"
|
|
16 #include "llvm/IR/Instructions.h"
|
|
17 #include "llvm/IR/IntrinsicInst.h"
|
|
18 #include "llvm/IR/Module.h"
|
|
19 #include "llvm/IR/Operator.h"
|
|
20 #include "llvm/IR/PatternMatch.h"
|
|
21 #include "llvm/InitializePasses.h"
|
|
22 #include "llvm/Support/CommandLine.h"
|
|
23 #include "llvm/Support/ErrorHandling.h"
|
|
24 #include <utility>
|
|
25
|
|
26 using namespace llvm;
|
|
27 using namespace PatternMatch;
|
|
28
|
|
29 #define DEBUG_TYPE "tti"
|
|
30
|
|
31 static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false),
|
|
32 cl::Hidden,
|
|
33 cl::desc("Recognize reduction patterns."));
|
|
34
|
|
35 namespace {
|
|
36 /// No-op implementation of the TTI interface using the utility base
|
|
37 /// classes.
|
|
38 ///
|
|
39 /// This is used when no target specific information is available.
|
|
40 struct NoTTIImpl : TargetTransformInfoImplCRTPBase<NoTTIImpl> {
|
|
41 explicit NoTTIImpl(const DataLayout &DL)
|
|
42 : TargetTransformInfoImplCRTPBase<NoTTIImpl>(DL) {}
|
|
43 };
|
173
|
44 } // namespace
|
150
|
45
|
|
46 bool HardwareLoopInfo::canAnalyze(LoopInfo &LI) {
|
|
47 // If the loop has irreducible control flow, it can not be converted to
|
|
48 // Hardware loop.
|
173
|
49 LoopBlocksRPO RPOT(L);
|
150
|
50 RPOT.perform(&LI);
|
|
51 if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
|
|
52 return false;
|
|
53 return true;
|
|
54 }
|
|
55
|
173
|
56 IntrinsicCostAttributes::IntrinsicCostAttributes(const IntrinsicInst &I) :
|
|
57 II(&I), RetTy(I.getType()), IID(I.getIntrinsicID()) {
|
|
58
|
|
59 FunctionType *FTy = I.getCalledFunction()->getFunctionType();
|
|
60 ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end());
|
|
61 Arguments.insert(Arguments.begin(), I.arg_begin(), I.arg_end());
|
|
62 if (auto *FPMO = dyn_cast<FPMathOperator>(&I))
|
|
63 FMF = FPMO->getFastMathFlags();
|
|
64 }
|
|
65
|
|
66 IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, CallInst &CI,
|
|
67 unsigned Factor) :
|
|
68 RetTy(CI.getType()), IID(Id), VF(Factor) {
|
|
69
|
|
70 if (auto *FPMO = dyn_cast<FPMathOperator>(&CI))
|
|
71 FMF = FPMO->getFastMathFlags();
|
|
72
|
|
73 Arguments.insert(Arguments.begin(), CI.arg_begin(), CI.arg_end());
|
|
74 FunctionType *FTy =
|
|
75 CI.getCalledFunction()->getFunctionType();
|
|
76 ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end());
|
|
77 }
|
|
78
|
|
79 IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, CallInst &CI,
|
|
80 unsigned Factor,
|
|
81 unsigned ScalarCost) :
|
|
82 RetTy(CI.getType()), IID(Id), VF(Factor), ScalarizationCost(ScalarCost) {
|
|
83
|
|
84 if (auto *FPMO = dyn_cast<FPMathOperator>(&CI))
|
|
85 FMF = FPMO->getFastMathFlags();
|
|
86
|
|
87 Arguments.insert(Arguments.begin(), CI.arg_begin(), CI.arg_end());
|
|
88 FunctionType *FTy =
|
|
89 CI.getCalledFunction()->getFunctionType();
|
|
90 ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end());
|
|
91 }
|
|
92
|
|
93 IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
|
|
94 ArrayRef<Type *> Tys,
|
|
95 FastMathFlags Flags) :
|
|
96 RetTy(RTy), IID(Id), FMF(Flags) {
|
|
97 ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end());
|
|
98 }
|
|
99
|
|
100 IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
|
|
101 ArrayRef<Type *> Tys,
|
|
102 FastMathFlags Flags,
|
|
103 unsigned ScalarCost) :
|
|
104 RetTy(RTy), IID(Id), FMF(Flags), ScalarizationCost(ScalarCost) {
|
|
105 ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end());
|
|
106 }
|
|
107
|
|
108 IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
|
|
109 ArrayRef<Type *> Tys,
|
|
110 FastMathFlags Flags,
|
|
111 unsigned ScalarCost,
|
|
112 const IntrinsicInst *I) :
|
|
113 II(I), RetTy(RTy), IID(Id), FMF(Flags), ScalarizationCost(ScalarCost) {
|
|
114 ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end());
|
|
115 }
|
|
116
|
|
117 IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
|
|
118 ArrayRef<Type *> Tys) :
|
|
119 RetTy(RTy), IID(Id) {
|
|
120 ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end());
|
|
121 }
|
|
122
|
|
123 IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *Ty,
|
|
124 ArrayRef<Value *> Args) :
|
|
125 RetTy(Ty), IID(Id) {
|
|
126
|
|
127 Arguments.insert(Arguments.begin(), Args.begin(), Args.end());
|
|
128 ParamTys.reserve(Arguments.size());
|
|
129 for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx)
|
|
130 ParamTys.push_back(Arguments[Idx]->getType());
|
|
131 }
|
|
132
|
150
|
133 bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE,
|
|
134 LoopInfo &LI, DominatorTree &DT,
|
|
135 bool ForceNestedLoop,
|
|
136 bool ForceHardwareLoopPHI) {
|
|
137 SmallVector<BasicBlock *, 4> ExitingBlocks;
|
|
138 L->getExitingBlocks(ExitingBlocks);
|
|
139
|
|
140 for (BasicBlock *BB : ExitingBlocks) {
|
|
141 // If we pass the updated counter back through a phi, we need to know
|
|
142 // which latch the updated value will be coming from.
|
|
143 if (!L->isLoopLatch(BB)) {
|
|
144 if (ForceHardwareLoopPHI || CounterInReg)
|
|
145 continue;
|
|
146 }
|
|
147
|
|
148 const SCEV *EC = SE.getExitCount(L, BB);
|
|
149 if (isa<SCEVCouldNotCompute>(EC))
|
|
150 continue;
|
|
151 if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) {
|
|
152 if (ConstEC->getValue()->isZero())
|
|
153 continue;
|
|
154 } else if (!SE.isLoopInvariant(EC, L))
|
|
155 continue;
|
|
156
|
|
157 if (SE.getTypeSizeInBits(EC->getType()) > CountType->getBitWidth())
|
|
158 continue;
|
|
159
|
|
160 // If this exiting block is contained in a nested loop, it is not eligible
|
|
161 // for insertion of the branch-and-decrement since the inner loop would
|
|
162 // end up messing up the value in the CTR.
|
|
163 if (!IsNestingLegal && LI.getLoopFor(BB) != L && !ForceNestedLoop)
|
|
164 continue;
|
|
165
|
|
166 // We now have a loop-invariant count of loop iterations (which is not the
|
|
167 // constant zero) for which we know that this loop will not exit via this
|
|
168 // existing block.
|
|
169
|
|
170 // We need to make sure that this block will run on every loop iteration.
|
|
171 // For this to be true, we must dominate all blocks with backedges. Such
|
|
172 // blocks are in-loop predecessors to the header block.
|
|
173 bool NotAlways = false;
|
|
174 for (BasicBlock *Pred : predecessors(L->getHeader())) {
|
|
175 if (!L->contains(Pred))
|
|
176 continue;
|
|
177
|
|
178 if (!DT.dominates(BB, Pred)) {
|
|
179 NotAlways = true;
|
|
180 break;
|
|
181 }
|
|
182 }
|
|
183
|
|
184 if (NotAlways)
|
|
185 continue;
|
|
186
|
|
187 // Make sure this blocks ends with a conditional branch.
|
|
188 Instruction *TI = BB->getTerminator();
|
|
189 if (!TI)
|
|
190 continue;
|
|
191
|
|
192 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
|
|
193 if (!BI->isConditional())
|
|
194 continue;
|
|
195
|
|
196 ExitBranch = BI;
|
|
197 } else
|
|
198 continue;
|
|
199
|
|
200 // Note that this block may not be the loop latch block, even if the loop
|
|
201 // has a latch block.
|
|
202 ExitBlock = BB;
|
|
203 ExitCount = EC;
|
|
204 break;
|
|
205 }
|
|
206
|
|
207 if (!ExitBlock)
|
|
208 return false;
|
|
209 return true;
|
|
210 }
|
|
211
|
|
212 TargetTransformInfo::TargetTransformInfo(const DataLayout &DL)
|
|
213 : TTIImpl(new Model<NoTTIImpl>(NoTTIImpl(DL))) {}
|
|
214
|
|
215 TargetTransformInfo::~TargetTransformInfo() {}
|
|
216
|
|
217 TargetTransformInfo::TargetTransformInfo(TargetTransformInfo &&Arg)
|
|
218 : TTIImpl(std::move(Arg.TTIImpl)) {}
|
|
219
|
|
220 TargetTransformInfo &TargetTransformInfo::operator=(TargetTransformInfo &&RHS) {
|
|
221 TTIImpl = std::move(RHS.TTIImpl);
|
|
222 return *this;
|
|
223 }
|
|
224
|
|
225 unsigned TargetTransformInfo::getInliningThresholdMultiplier() const {
|
|
226 return TTIImpl->getInliningThresholdMultiplier();
|
|
227 }
|
|
228
|
|
229 int TargetTransformInfo::getInlinerVectorBonusPercent() const {
|
|
230 return TTIImpl->getInlinerVectorBonusPercent();
|
|
231 }
|
|
232
|
|
233 int TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr,
|
173
|
234 ArrayRef<const Value *> Operands,
|
|
235 TTI::TargetCostKind CostKind) const {
|
|
236 return TTIImpl->getGEPCost(PointeeType, Ptr, Operands, CostKind);
|
150
|
237 }
|
|
238
|
173
|
239 int TargetTransformInfo::getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
|
|
240 ArrayRef<const Value *> Arguments,
|
|
241 const User *U,
|
|
242 TTI::TargetCostKind CostKind) const {
|
|
243 int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments, U, CostKind);
|
150
|
244 assert(Cost >= 0 && "TTI should not produce negative costs!");
|
|
245 return Cost;
|
|
246 }
|
|
247
|
173
|
248 unsigned TargetTransformInfo::getEstimatedNumberOfCaseClusters(
|
150
|
249 const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI,
|
|
250 BlockFrequencyInfo *BFI) const {
|
|
251 return TTIImpl->getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
|
|
252 }
|
|
253
|
|
254 int TargetTransformInfo::getUserCost(const User *U,
|
173
|
255 ArrayRef<const Value *> Operands,
|
|
256 enum TargetCostKind CostKind) const {
|
|
257 int Cost = TTIImpl->getUserCost(U, Operands, CostKind);
|
150
|
258 assert(Cost >= 0 && "TTI should not produce negative costs!");
|
|
259 return Cost;
|
|
260 }
|
|
261
|
|
262 bool TargetTransformInfo::hasBranchDivergence() const {
|
|
263 return TTIImpl->hasBranchDivergence();
|
|
264 }
|
|
265
|
|
266 bool TargetTransformInfo::useGPUDivergenceAnalysis() const {
|
|
267 return TTIImpl->useGPUDivergenceAnalysis();
|
|
268 }
|
|
269
|
|
270 bool TargetTransformInfo::isSourceOfDivergence(const Value *V) const {
|
|
271 return TTIImpl->isSourceOfDivergence(V);
|
|
272 }
|
|
273
|
|
274 bool llvm::TargetTransformInfo::isAlwaysUniform(const Value *V) const {
|
|
275 return TTIImpl->isAlwaysUniform(V);
|
|
276 }
|
|
277
|
|
278 unsigned TargetTransformInfo::getFlatAddressSpace() const {
|
|
279 return TTIImpl->getFlatAddressSpace();
|
|
280 }
|
|
281
|
|
282 bool TargetTransformInfo::collectFlatAddressOperands(
|
173
|
283 SmallVectorImpl<int> &OpIndexes, Intrinsic::ID IID) const {
|
150
|
284 return TTIImpl->collectFlatAddressOperands(OpIndexes, IID);
|
|
285 }
|
|
286
|
173
|
287 bool TargetTransformInfo::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
|
|
288 Value *OldV,
|
|
289 Value *NewV) const {
|
150
|
290 return TTIImpl->rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
|
|
291 }
|
|
292
|
|
293 bool TargetTransformInfo::isLoweredToCall(const Function *F) const {
|
|
294 return TTIImpl->isLoweredToCall(F);
|
|
295 }
|
|
296
|
|
297 bool TargetTransformInfo::isHardwareLoopProfitable(
|
173
|
298 Loop *L, ScalarEvolution &SE, AssumptionCache &AC,
|
|
299 TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const {
|
150
|
300 return TTIImpl->isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
|
|
301 }
|
|
302
|
173
|
303 bool TargetTransformInfo::preferPredicateOverEpilogue(
|
|
304 Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC,
|
|
305 TargetLibraryInfo *TLI, DominatorTree *DT,
|
|
306 const LoopAccessInfo *LAI) const {
|
150
|
307 return TTIImpl->preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI);
|
|
308 }
|
|
309
|
|
310 void TargetTransformInfo::getUnrollingPreferences(
|
|
311 Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP) const {
|
|
312 return TTIImpl->getUnrollingPreferences(L, SE, UP);
|
|
313 }
|
|
314
|
|
315 bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const {
|
|
316 return TTIImpl->isLegalAddImmediate(Imm);
|
|
317 }
|
|
318
|
|
319 bool TargetTransformInfo::isLegalICmpImmediate(int64_t Imm) const {
|
|
320 return TTIImpl->isLegalICmpImmediate(Imm);
|
|
321 }
|
|
322
|
|
323 bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
|
|
324 int64_t BaseOffset,
|
173
|
325 bool HasBaseReg, int64_t Scale,
|
150
|
326 unsigned AddrSpace,
|
|
327 Instruction *I) const {
|
|
328 return TTIImpl->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
|
|
329 Scale, AddrSpace, I);
|
|
330 }
|
|
331
|
|
332 bool TargetTransformInfo::isLSRCostLess(LSRCost &C1, LSRCost &C2) const {
|
|
333 return TTIImpl->isLSRCostLess(C1, C2);
|
|
334 }
|
|
335
|
173
|
336 bool TargetTransformInfo::isProfitableLSRChainElement(Instruction *I) const {
|
|
337 return TTIImpl->isProfitableLSRChainElement(I);
|
|
338 }
|
|
339
|
150
|
340 bool TargetTransformInfo::canMacroFuseCmp() const {
|
|
341 return TTIImpl->canMacroFuseCmp();
|
|
342 }
|
|
343
|
|
344 bool TargetTransformInfo::canSaveCmp(Loop *L, BranchInst **BI,
|
|
345 ScalarEvolution *SE, LoopInfo *LI,
|
|
346 DominatorTree *DT, AssumptionCache *AC,
|
|
347 TargetLibraryInfo *LibInfo) const {
|
|
348 return TTIImpl->canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
|
|
349 }
|
|
350
|
|
351 bool TargetTransformInfo::shouldFavorPostInc() const {
|
|
352 return TTIImpl->shouldFavorPostInc();
|
|
353 }
|
|
354
|
|
355 bool TargetTransformInfo::shouldFavorBackedgeIndex(const Loop *L) const {
|
|
356 return TTIImpl->shouldFavorBackedgeIndex(L);
|
|
357 }
|
|
358
|
|
359 bool TargetTransformInfo::isLegalMaskedStore(Type *DataType,
|
173
|
360 Align Alignment) const {
|
150
|
361 return TTIImpl->isLegalMaskedStore(DataType, Alignment);
|
|
362 }
|
|
363
|
|
364 bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType,
|
173
|
365 Align Alignment) const {
|
150
|
366 return TTIImpl->isLegalMaskedLoad(DataType, Alignment);
|
|
367 }
|
|
368
|
|
369 bool TargetTransformInfo::isLegalNTStore(Type *DataType,
|
|
370 Align Alignment) const {
|
|
371 return TTIImpl->isLegalNTStore(DataType, Alignment);
|
|
372 }
|
|
373
|
|
374 bool TargetTransformInfo::isLegalNTLoad(Type *DataType, Align Alignment) const {
|
|
375 return TTIImpl->isLegalNTLoad(DataType, Alignment);
|
|
376 }
|
|
377
|
|
378 bool TargetTransformInfo::isLegalMaskedGather(Type *DataType,
|
173
|
379 Align Alignment) const {
|
150
|
380 return TTIImpl->isLegalMaskedGather(DataType, Alignment);
|
|
381 }
|
|
382
|
|
383 bool TargetTransformInfo::isLegalMaskedScatter(Type *DataType,
|
173
|
384 Align Alignment) const {
|
150
|
385 return TTIImpl->isLegalMaskedScatter(DataType, Alignment);
|
|
386 }
|
|
387
|
|
388 bool TargetTransformInfo::isLegalMaskedCompressStore(Type *DataType) const {
|
|
389 return TTIImpl->isLegalMaskedCompressStore(DataType);
|
|
390 }
|
|
391
|
|
392 bool TargetTransformInfo::isLegalMaskedExpandLoad(Type *DataType) const {
|
|
393 return TTIImpl->isLegalMaskedExpandLoad(DataType);
|
|
394 }
|
|
395
|
|
396 bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const {
|
|
397 return TTIImpl->hasDivRemOp(DataType, IsSigned);
|
|
398 }
|
|
399
|
|
400 bool TargetTransformInfo::hasVolatileVariant(Instruction *I,
|
|
401 unsigned AddrSpace) const {
|
|
402 return TTIImpl->hasVolatileVariant(I, AddrSpace);
|
|
403 }
|
|
404
|
|
405 bool TargetTransformInfo::prefersVectorizedAddressing() const {
|
|
406 return TTIImpl->prefersVectorizedAddressing();
|
|
407 }
|
|
408
|
|
409 int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
|
|
410 int64_t BaseOffset,
|
173
|
411 bool HasBaseReg, int64_t Scale,
|
150
|
412 unsigned AddrSpace) const {
|
|
413 int Cost = TTIImpl->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
|
|
414 Scale, AddrSpace);
|
|
415 assert(Cost >= 0 && "TTI should not produce negative costs!");
|
|
416 return Cost;
|
|
417 }
|
|
418
|
|
419 bool TargetTransformInfo::LSRWithInstrQueries() const {
|
|
420 return TTIImpl->LSRWithInstrQueries();
|
|
421 }
|
|
422
|
|
423 bool TargetTransformInfo::isTruncateFree(Type *Ty1, Type *Ty2) const {
|
|
424 return TTIImpl->isTruncateFree(Ty1, Ty2);
|
|
425 }
|
|
426
|
|
427 bool TargetTransformInfo::isProfitableToHoist(Instruction *I) const {
|
|
428 return TTIImpl->isProfitableToHoist(I);
|
|
429 }
|
|
430
|
|
431 bool TargetTransformInfo::useAA() const { return TTIImpl->useAA(); }
|
|
432
|
|
433 bool TargetTransformInfo::isTypeLegal(Type *Ty) const {
|
|
434 return TTIImpl->isTypeLegal(Ty);
|
|
435 }
|
|
436
|
|
437 bool TargetTransformInfo::shouldBuildLookupTables() const {
|
|
438 return TTIImpl->shouldBuildLookupTables();
|
|
439 }
|
173
|
440 bool TargetTransformInfo::shouldBuildLookupTablesForConstant(
|
|
441 Constant *C) const {
|
150
|
442 return TTIImpl->shouldBuildLookupTablesForConstant(C);
|
|
443 }
|
|
444
|
|
445 bool TargetTransformInfo::useColdCCForColdCall(Function &F) const {
|
|
446 return TTIImpl->useColdCCForColdCall(F);
|
|
447 }
|
|
448
|
173
|
449 unsigned
|
|
450 TargetTransformInfo::getScalarizationOverhead(VectorType *Ty,
|
|
451 const APInt &DemandedElts,
|
|
452 bool Insert, bool Extract) const {
|
|
453 return TTIImpl->getScalarizationOverhead(Ty, DemandedElts, Insert, Extract);
|
150
|
454 }
|
|
455
|
173
|
456 unsigned TargetTransformInfo::getOperandsScalarizationOverhead(
|
|
457 ArrayRef<const Value *> Args, unsigned VF) const {
|
150
|
458 return TTIImpl->getOperandsScalarizationOverhead(Args, VF);
|
|
459 }
|
|
460
|
|
461 bool TargetTransformInfo::supportsEfficientVectorElementLoadStore() const {
|
|
462 return TTIImpl->supportsEfficientVectorElementLoadStore();
|
|
463 }
|
|
464
|
173
|
465 bool TargetTransformInfo::enableAggressiveInterleaving(
|
|
466 bool LoopHasReductions) const {
|
150
|
467 return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);
|
|
468 }
|
|
469
|
|
470 TargetTransformInfo::MemCmpExpansionOptions
|
|
471 TargetTransformInfo::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
|
|
472 return TTIImpl->enableMemCmpExpansion(OptSize, IsZeroCmp);
|
|
473 }
|
|
474
|
|
475 bool TargetTransformInfo::enableInterleavedAccessVectorization() const {
|
|
476 return TTIImpl->enableInterleavedAccessVectorization();
|
|
477 }
|
|
478
|
|
479 bool TargetTransformInfo::enableMaskedInterleavedAccessVectorization() const {
|
|
480 return TTIImpl->enableMaskedInterleavedAccessVectorization();
|
|
481 }
|
|
482
|
|
483 bool TargetTransformInfo::isFPVectorizationPotentiallyUnsafe() const {
|
|
484 return TTIImpl->isFPVectorizationPotentiallyUnsafe();
|
|
485 }
|
|
486
|
|
487 bool TargetTransformInfo::allowsMisalignedMemoryAccesses(LLVMContext &Context,
|
|
488 unsigned BitWidth,
|
|
489 unsigned AddressSpace,
|
|
490 unsigned Alignment,
|
|
491 bool *Fast) const {
|
173
|
492 return TTIImpl->allowsMisalignedMemoryAccesses(Context, BitWidth,
|
|
493 AddressSpace, Alignment, Fast);
|
150
|
494 }
|
|
495
|
|
496 TargetTransformInfo::PopcntSupportKind
|
|
497 TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const {
|
|
498 return TTIImpl->getPopcntSupport(IntTyWidthInBit);
|
|
499 }
|
|
500
|
|
501 bool TargetTransformInfo::haveFastSqrt(Type *Ty) const {
|
|
502 return TTIImpl->haveFastSqrt(Ty);
|
|
503 }
|
|
504
|
|
505 bool TargetTransformInfo::isFCmpOrdCheaperThanFCmpZero(Type *Ty) const {
|
|
506 return TTIImpl->isFCmpOrdCheaperThanFCmpZero(Ty);
|
|
507 }
|
|
508
|
|
509 int TargetTransformInfo::getFPOpCost(Type *Ty) const {
|
|
510 int Cost = TTIImpl->getFPOpCost(Ty);
|
|
511 assert(Cost >= 0 && "TTI should not produce negative costs!");
|
|
512 return Cost;
|
|
513 }
|
|
514
|
|
515 int TargetTransformInfo::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
|
|
516 const APInt &Imm,
|
|
517 Type *Ty) const {
|
|
518 int Cost = TTIImpl->getIntImmCodeSizeCost(Opcode, Idx, Imm, Ty);
|
|
519 assert(Cost >= 0 && "TTI should not produce negative costs!");
|
|
520 return Cost;
|
|
521 }
|
|
522
|
173
|
523 int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty,
|
|
524 TTI::TargetCostKind CostKind) const {
|
|
525 int Cost = TTIImpl->getIntImmCost(Imm, Ty, CostKind);
|
150
|
526 assert(Cost >= 0 && "TTI should not produce negative costs!");
|
|
527 return Cost;
|
|
528 }
|
|
529
|
173
|
530 int
|
|
531 TargetTransformInfo::getIntImmCostInst(unsigned Opcode, unsigned Idx,
|
|
532 const APInt &Imm, Type *Ty,
|
|
533 TTI::TargetCostKind CostKind) const {
|
|
534 int Cost = TTIImpl->getIntImmCostInst(Opcode, Idx, Imm, Ty, CostKind);
|
150
|
535 assert(Cost >= 0 && "TTI should not produce negative costs!");
|
|
536 return Cost;
|
|
537 }
|
|
538
|
173
|
539 int
|
|
540 TargetTransformInfo::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
|
|
541 const APInt &Imm, Type *Ty,
|
|
542 TTI::TargetCostKind CostKind) const {
|
|
543 int Cost = TTIImpl->getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
|
150
|
544 assert(Cost >= 0 && "TTI should not produce negative costs!");
|
|
545 return Cost;
|
|
546 }
|
|
547
|
|
548 unsigned TargetTransformInfo::getNumberOfRegisters(unsigned ClassID) const {
|
|
549 return TTIImpl->getNumberOfRegisters(ClassID);
|
|
550 }
|
|
551
|
173
|
552 unsigned TargetTransformInfo::getRegisterClassForType(bool Vector,
|
|
553 Type *Ty) const {
|
150
|
554 return TTIImpl->getRegisterClassForType(Vector, Ty);
|
|
555 }
|
|
556
|
173
|
557 const char *TargetTransformInfo::getRegisterClassName(unsigned ClassID) const {
|
150
|
558 return TTIImpl->getRegisterClassName(ClassID);
|
|
559 }
|
|
560
|
|
561 unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const {
|
|
562 return TTIImpl->getRegisterBitWidth(Vector);
|
|
563 }
|
|
564
|
|
565 unsigned TargetTransformInfo::getMinVectorRegisterBitWidth() const {
|
|
566 return TTIImpl->getMinVectorRegisterBitWidth();
|
|
567 }
|
|
568
|
|
569 bool TargetTransformInfo::shouldMaximizeVectorBandwidth(bool OptSize) const {
|
|
570 return TTIImpl->shouldMaximizeVectorBandwidth(OptSize);
|
|
571 }
|
|
572
|
|
573 unsigned TargetTransformInfo::getMinimumVF(unsigned ElemWidth) const {
|
|
574 return TTIImpl->getMinimumVF(ElemWidth);
|
|
575 }
|
|
576
|
|
577 bool TargetTransformInfo::shouldConsiderAddressTypePromotion(
|
|
578 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
|
|
579 return TTIImpl->shouldConsiderAddressTypePromotion(
|
|
580 I, AllowPromotionWithoutCommonHeader);
|
|
581 }
|
|
582
|
|
583 unsigned TargetTransformInfo::getCacheLineSize() const {
|
|
584 return TTIImpl->getCacheLineSize();
|
|
585 }
|
|
586
|
173
|
587 llvm::Optional<unsigned>
|
|
588 TargetTransformInfo::getCacheSize(CacheLevel Level) const {
|
150
|
589 return TTIImpl->getCacheSize(Level);
|
|
590 }
|
|
591
|
173
|
592 llvm::Optional<unsigned>
|
|
593 TargetTransformInfo::getCacheAssociativity(CacheLevel Level) const {
|
150
|
594 return TTIImpl->getCacheAssociativity(Level);
|
|
595 }
|
|
596
|
|
597 unsigned TargetTransformInfo::getPrefetchDistance() const {
|
|
598 return TTIImpl->getPrefetchDistance();
|
|
599 }
|
|
600
|
173
|
601 unsigned TargetTransformInfo::getMinPrefetchStride(
|
|
602 unsigned NumMemAccesses, unsigned NumStridedMemAccesses,
|
|
603 unsigned NumPrefetches, bool HasCall) const {
|
|
604 return TTIImpl->getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
|
|
605 NumPrefetches, HasCall);
|
150
|
606 }
|
|
607
|
|
608 unsigned TargetTransformInfo::getMaxPrefetchIterationsAhead() const {
|
|
609 return TTIImpl->getMaxPrefetchIterationsAhead();
|
|
610 }
|
|
611
|
173
|
612 bool TargetTransformInfo::enableWritePrefetching() const {
|
|
613 return TTIImpl->enableWritePrefetching();
|
|
614 }
|
|
615
|
150
|
616 unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const {
|
|
617 return TTIImpl->getMaxInterleaveFactor(VF);
|
|
618 }
|
|
619
|
|
620 TargetTransformInfo::OperandValueKind
|
|
621 TargetTransformInfo::getOperandInfo(Value *V, OperandValueProperties &OpProps) {
|
|
622 OperandValueKind OpInfo = OK_AnyValue;
|
|
623 OpProps = OP_None;
|
|
624
|
|
625 if (auto *CI = dyn_cast<ConstantInt>(V)) {
|
|
626 if (CI->getValue().isPowerOf2())
|
|
627 OpProps = OP_PowerOf2;
|
|
628 return OK_UniformConstantValue;
|
|
629 }
|
|
630
|
|
631 // A broadcast shuffle creates a uniform value.
|
|
632 // TODO: Add support for non-zero index broadcasts.
|
|
633 // TODO: Add support for different source vector width.
|
|
634 if (auto *ShuffleInst = dyn_cast<ShuffleVectorInst>(V))
|
|
635 if (ShuffleInst->isZeroEltSplat())
|
|
636 OpInfo = OK_UniformValue;
|
|
637
|
|
638 const Value *Splat = getSplatValue(V);
|
|
639
|
|
640 // Check for a splat of a constant or for a non uniform vector of constants
|
|
641 // and check if the constant(s) are all powers of two.
|
|
642 if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) {
|
|
643 OpInfo = OK_NonUniformConstantValue;
|
|
644 if (Splat) {
|
|
645 OpInfo = OK_UniformConstantValue;
|
|
646 if (auto *CI = dyn_cast<ConstantInt>(Splat))
|
|
647 if (CI->getValue().isPowerOf2())
|
|
648 OpProps = OP_PowerOf2;
|
|
649 } else if (auto *CDS = dyn_cast<ConstantDataSequential>(V)) {
|
|
650 OpProps = OP_PowerOf2;
|
|
651 for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) {
|
|
652 if (auto *CI = dyn_cast<ConstantInt>(CDS->getElementAsConstant(I)))
|
|
653 if (CI->getValue().isPowerOf2())
|
|
654 continue;
|
|
655 OpProps = OP_None;
|
|
656 break;
|
|
657 }
|
|
658 }
|
|
659 }
|
|
660
|
|
661 // Check for a splat of a uniform value. This is not loop aware, so return
|
|
662 // true only for the obviously uniform cases (argument, globalvalue)
|
|
663 if (Splat && (isa<Argument>(Splat) || isa<GlobalValue>(Splat)))
|
|
664 OpInfo = OK_UniformValue;
|
|
665
|
|
666 return OpInfo;
|
|
667 }
|
|
668
|
|
669 int TargetTransformInfo::getArithmeticInstrCost(
|
173
|
670 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
|
|
671 OperandValueKind Opd1Info,
|
150
|
672 OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
|
|
673 OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
|
|
674 const Instruction *CxtI) const {
|
|
675 int Cost = TTIImpl->getArithmeticInstrCost(
|
173
|
676 Opcode, Ty, CostKind, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo,
|
|
677 Args, CxtI);
|
150
|
678 assert(Cost >= 0 && "TTI should not produce negative costs!");
|
|
679 return Cost;
|
|
680 }
|
|
681
|
173
|
682 int TargetTransformInfo::getShuffleCost(ShuffleKind Kind, VectorType *Ty,
|
|
683 int Index, VectorType *SubTp) const {
|
150
|
684 int Cost = TTIImpl->getShuffleCost(Kind, Ty, Index, SubTp);
|
|
685 assert(Cost >= 0 && "TTI should not produce negative costs!");
|
|
686 return Cost;
|
|
687 }
|
|
688
|
173
|
689 int TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
|
|
690 TTI::TargetCostKind CostKind,
|
|
691 const Instruction *I) const {
|
|
692 assert((I == nullptr || I->getOpcode() == Opcode) &&
|
|
693 "Opcode should reflect passed instruction.");
|
|
694 int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src, CostKind, I);
|
150
|
695 assert(Cost >= 0 && "TTI should not produce negative costs!");
|
|
696 return Cost;
|
|
697 }
|
|
698
|
|
699 int TargetTransformInfo::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
|
|
700 VectorType *VecTy,
|
|
701 unsigned Index) const {
|
|
702 int Cost = TTIImpl->getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
|
|
703 assert(Cost >= 0 && "TTI should not produce negative costs!");
|
|
704 return Cost;
|
|
705 }
|
|
706
|
173
|
707 int TargetTransformInfo::getCFInstrCost(unsigned Opcode,
|
|
708 TTI::TargetCostKind CostKind) const {
|
|
709 int Cost = TTIImpl->getCFInstrCost(Opcode, CostKind);
|
150
|
710 assert(Cost >= 0 && "TTI should not produce negative costs!");
|
|
711 return Cost;
|
|
712 }
|
|
713
|
|
714 int TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
|
173
|
715 Type *CondTy,
|
|
716 TTI::TargetCostKind CostKind,
|
|
717 const Instruction *I) const {
|
|
718 assert((I == nullptr || I->getOpcode() == Opcode) &&
|
|
719 "Opcode should reflect passed instruction.");
|
|
720 int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I);
|
150
|
721 assert(Cost >= 0 && "TTI should not produce negative costs!");
|
|
722 return Cost;
|
|
723 }
|
|
724
|
|
725 int TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val,
|
|
726 unsigned Index) const {
|
|
727 int Cost = TTIImpl->getVectorInstrCost(Opcode, Val, Index);
|
|
728 assert(Cost >= 0 && "TTI should not produce negative costs!");
|
|
729 return Cost;
|
|
730 }
|
|
731
|
|
732 int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,
|
173
|
733 Align Alignment, unsigned AddressSpace,
|
|
734 TTI::TargetCostKind CostKind,
|
150
|
735 const Instruction *I) const {
|
173
|
736 assert((I == nullptr || I->getOpcode() == Opcode) &&
|
|
737 "Opcode should reflect passed instruction.");
|
|
738 int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
|
|
739 CostKind, I);
|
150
|
740 assert(Cost >= 0 && "TTI should not produce negative costs!");
|
|
741 return Cost;
|
|
742 }
|
|
743
|
|
744 int TargetTransformInfo::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
|
|
745 unsigned Alignment,
|
173
|
746 unsigned AddressSpace,
|
|
747 TTI::TargetCostKind CostKind) const {
|
150
|
748 int Cost =
|
173
|
749 TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
|
|
750 CostKind);
|
150
|
751 assert(Cost >= 0 && "TTI should not produce negative costs!");
|
|
752 return Cost;
|
|
753 }
|
|
754
|
173
|
755 int TargetTransformInfo::getGatherScatterOpCost(
|
|
756 unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask,
|
|
757 unsigned Alignment, TTI::TargetCostKind CostKind,
|
|
758 const Instruction *I) const {
|
150
|
759 int Cost = TTIImpl->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
|
173
|
760 Alignment, CostKind, I);
|
150
|
761 assert(Cost >= 0 && "TTI should not produce negative costs!");
|
|
762 return Cost;
|
|
763 }
|
|
764
|
|
765 int TargetTransformInfo::getInterleavedMemoryOpCost(
|
|
766 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
|
173
|
767 unsigned Alignment, unsigned AddressSpace,
|
|
768 TTI::TargetCostKind CostKind,
|
|
769 bool UseMaskForCond, bool UseMaskForGaps) const {
|
|
770 int Cost = TTIImpl->getInterleavedMemoryOpCost(
|
|
771 Opcode, VecTy, Factor, Indices, Alignment, AddressSpace, CostKind,
|
|
772 UseMaskForCond, UseMaskForGaps);
|
150
|
773 assert(Cost >= 0 && "TTI should not produce negative costs!");
|
|
774 return Cost;
|
|
775 }
|
|
776
|
173
|
777 int
|
|
778 TargetTransformInfo::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
|
|
779 TTI::TargetCostKind CostKind) const {
|
|
780 int Cost = TTIImpl->getIntrinsicInstrCost(ICA, CostKind);
|
150
|
781 assert(Cost >= 0 && "TTI should not produce negative costs!");
|
|
782 return Cost;
|
|
783 }
|
|
784
|
|
785 int TargetTransformInfo::getCallInstrCost(Function *F, Type *RetTy,
|
173
|
786 ArrayRef<Type *> Tys,
|
|
787 TTI::TargetCostKind CostKind) const {
|
|
788 int Cost = TTIImpl->getCallInstrCost(F, RetTy, Tys, CostKind);
|
150
|
789 assert(Cost >= 0 && "TTI should not produce negative costs!");
|
|
790 return Cost;
|
|
791 }
|
|
792
|
|
793 unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const {
|
|
794 return TTIImpl->getNumberOfParts(Tp);
|
|
795 }
|
|
796
|
|
797 int TargetTransformInfo::getAddressComputationCost(Type *Tp,
|
|
798 ScalarEvolution *SE,
|
|
799 const SCEV *Ptr) const {
|
|
800 int Cost = TTIImpl->getAddressComputationCost(Tp, SE, Ptr);
|
|
801 assert(Cost >= 0 && "TTI should not produce negative costs!");
|
|
802 return Cost;
|
|
803 }
|
|
804
|
|
805 int TargetTransformInfo::getMemcpyCost(const Instruction *I) const {
|
|
806 int Cost = TTIImpl->getMemcpyCost(I);
|
|
807 assert(Cost >= 0 && "TTI should not produce negative costs!");
|
|
808 return Cost;
|
|
809 }
|
|
810
|
173
|
811 int TargetTransformInfo::getArithmeticReductionCost(unsigned Opcode,
|
|
812 VectorType *Ty,
|
|
813 bool IsPairwiseForm,
|
|
814 TTI::TargetCostKind CostKind) const {
|
|
815 int Cost = TTIImpl->getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm,
|
|
816 CostKind);
|
150
|
817 assert(Cost >= 0 && "TTI should not produce negative costs!");
|
|
818 return Cost;
|
|
819 }
|
|
820
|
173
|
821 int TargetTransformInfo::getMinMaxReductionCost(
|
|
822 VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned,
|
|
823 TTI::TargetCostKind CostKind) const {
|
150
|
824 int Cost =
|
173
|
825 TTIImpl->getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned,
|
|
826 CostKind);
|
150
|
827 assert(Cost >= 0 && "TTI should not produce negative costs!");
|
|
828 return Cost;
|
|
829 }
|
|
830
|
|
831 unsigned
|
|
832 TargetTransformInfo::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const {
|
|
833 return TTIImpl->getCostOfKeepingLiveOverCall(Tys);
|
|
834 }
|
|
835
|
|
836 bool TargetTransformInfo::getTgtMemIntrinsic(IntrinsicInst *Inst,
|
|
837 MemIntrinsicInfo &Info) const {
|
|
838 return TTIImpl->getTgtMemIntrinsic(Inst, Info);
|
|
839 }
|
|
840
|
|
841 unsigned TargetTransformInfo::getAtomicMemIntrinsicMaxElementSize() const {
|
|
842 return TTIImpl->getAtomicMemIntrinsicMaxElementSize();
|
|
843 }
|
|
844
|
|
845 Value *TargetTransformInfo::getOrCreateResultFromMemIntrinsic(
|
|
846 IntrinsicInst *Inst, Type *ExpectedType) const {
|
|
847 return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
|
|
848 }
|
|
849
|
173
|
850 Type *TargetTransformInfo::getMemcpyLoopLoweringType(
|
|
851 LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
|
|
852 unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign) const {
|
|
853 return TTIImpl->getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
|
|
854 DestAddrSpace, SrcAlign, DestAlign);
|
150
|
855 }
|
|
856
|
|
857 void TargetTransformInfo::getMemcpyLoopResidualLoweringType(
|
|
858 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
|
173
|
859 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
|
|
860 unsigned SrcAlign, unsigned DestAlign) const {
|
150
|
861 TTIImpl->getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
|
173
|
862 SrcAddrSpace, DestAddrSpace,
|
150
|
863 SrcAlign, DestAlign);
|
|
864 }
|
|
865
|
|
866 bool TargetTransformInfo::areInlineCompatible(const Function *Caller,
|
|
867 const Function *Callee) const {
|
|
868 return TTIImpl->areInlineCompatible(Caller, Callee);
|
|
869 }
|
|
870
|
|
871 bool TargetTransformInfo::areFunctionArgsABICompatible(
|
|
872 const Function *Caller, const Function *Callee,
|
|
873 SmallPtrSetImpl<Argument *> &Args) const {
|
|
874 return TTIImpl->areFunctionArgsABICompatible(Caller, Callee, Args);
|
|
875 }
|
|
876
|
|
877 bool TargetTransformInfo::isIndexedLoadLegal(MemIndexedMode Mode,
|
|
878 Type *Ty) const {
|
|
879 return TTIImpl->isIndexedLoadLegal(Mode, Ty);
|
|
880 }
|
|
881
|
|
882 bool TargetTransformInfo::isIndexedStoreLegal(MemIndexedMode Mode,
|
|
883 Type *Ty) const {
|
|
884 return TTIImpl->isIndexedStoreLegal(Mode, Ty);
|
|
885 }
|
|
886
|
|
887 unsigned TargetTransformInfo::getLoadStoreVecRegBitWidth(unsigned AS) const {
|
|
888 return TTIImpl->getLoadStoreVecRegBitWidth(AS);
|
|
889 }
|
|
890
|
|
891 bool TargetTransformInfo::isLegalToVectorizeLoad(LoadInst *LI) const {
|
|
892 return TTIImpl->isLegalToVectorizeLoad(LI);
|
|
893 }
|
|
894
|
|
895 bool TargetTransformInfo::isLegalToVectorizeStore(StoreInst *SI) const {
|
|
896 return TTIImpl->isLegalToVectorizeStore(SI);
|
|
897 }
|
|
898
|
|
899 bool TargetTransformInfo::isLegalToVectorizeLoadChain(
|
|
900 unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const {
|
|
901 return TTIImpl->isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
|
|
902 AddrSpace);
|
|
903 }
|
|
904
|
|
905 bool TargetTransformInfo::isLegalToVectorizeStoreChain(
|
|
906 unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const {
|
|
907 return TTIImpl->isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
|
|
908 AddrSpace);
|
|
909 }
|
|
910
|
|
911 unsigned TargetTransformInfo::getLoadVectorFactor(unsigned VF,
|
|
912 unsigned LoadSize,
|
|
913 unsigned ChainSizeInBytes,
|
|
914 VectorType *VecTy) const {
|
|
915 return TTIImpl->getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
|
|
916 }
|
|
917
|
|
918 unsigned TargetTransformInfo::getStoreVectorFactor(unsigned VF,
|
|
919 unsigned StoreSize,
|
|
920 unsigned ChainSizeInBytes,
|
|
921 VectorType *VecTy) const {
|
|
922 return TTIImpl->getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
|
|
923 }
|
|
924
|
173
|
925 bool TargetTransformInfo::useReductionIntrinsic(unsigned Opcode, Type *Ty,
|
|
926 ReductionFlags Flags) const {
|
150
|
927 return TTIImpl->useReductionIntrinsic(Opcode, Ty, Flags);
|
|
928 }
|
|
929
|
|
930 bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const {
|
|
931 return TTIImpl->shouldExpandReduction(II);
|
|
932 }
|
|
933
|
|
934 unsigned TargetTransformInfo::getGISelRematGlobalCost() const {
|
|
935 return TTIImpl->getGISelRematGlobalCost();
|
|
936 }
|
|
937
|
|
938 int TargetTransformInfo::getInstructionLatency(const Instruction *I) const {
|
|
939 return TTIImpl->getInstructionLatency(I);
|
|
940 }
|
|
941
|
|
942 static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft,
|
|
943 unsigned Level) {
|
|
944 // We don't need a shuffle if we just want to have element 0 in position 0 of
|
|
945 // the vector.
|
|
946 if (!SI && Level == 0 && IsLeft)
|
|
947 return true;
|
|
948 else if (!SI)
|
|
949 return false;
|
|
950
|
173
|
951 SmallVector<int, 32> Mask(SI->getType()->getNumElements(), -1);
|
150
|
952
|
|
953 // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether
|
|
954 // we look at the left or right side.
|
|
955 for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2)
|
|
956 Mask[i] = val;
|
|
957
|
173
|
958 ArrayRef<int> ActualMask = SI->getShuffleMask();
|
150
|
959 return Mask == ActualMask;
|
|
960 }
|
|
961
|
|
962 namespace {
|
|
963 /// Kind of the reduction data.
|
|
964 enum ReductionKind {
|
|
965 RK_None, /// Not a reduction.
|
|
966 RK_Arithmetic, /// Binary reduction data.
|
|
967 RK_MinMax, /// Min/max reduction data.
|
|
968 RK_UnsignedMinMax, /// Unsigned min/max reduction data.
|
|
969 };
|
|
970 /// Contains opcode + LHS/RHS parts of the reduction operations.
|
|
971 struct ReductionData {
|
|
972 ReductionData() = delete;
|
|
973 ReductionData(ReductionKind Kind, unsigned Opcode, Value *LHS, Value *RHS)
|
|
974 : Opcode(Opcode), LHS(LHS), RHS(RHS), Kind(Kind) {
|
|
975 assert(Kind != RK_None && "expected binary or min/max reduction only.");
|
|
976 }
|
|
977 unsigned Opcode = 0;
|
|
978 Value *LHS = nullptr;
|
|
979 Value *RHS = nullptr;
|
|
980 ReductionKind Kind = RK_None;
|
|
981 bool hasSameData(ReductionData &RD) const {
|
|
982 return Kind == RD.Kind && Opcode == RD.Opcode;
|
|
983 }
|
|
984 };
|
|
985 } // namespace
|
|
986
|
|
987 static Optional<ReductionData> getReductionData(Instruction *I) {
|
|
988 Value *L, *R;
|
|
989 if (m_BinOp(m_Value(L), m_Value(R)).match(I))
|
|
990 return ReductionData(RK_Arithmetic, I->getOpcode(), L, R);
|
|
991 if (auto *SI = dyn_cast<SelectInst>(I)) {
|
|
992 if (m_SMin(m_Value(L), m_Value(R)).match(SI) ||
|
|
993 m_SMax(m_Value(L), m_Value(R)).match(SI) ||
|
|
994 m_OrdFMin(m_Value(L), m_Value(R)).match(SI) ||
|
|
995 m_OrdFMax(m_Value(L), m_Value(R)).match(SI) ||
|
|
996 m_UnordFMin(m_Value(L), m_Value(R)).match(SI) ||
|
|
997 m_UnordFMax(m_Value(L), m_Value(R)).match(SI)) {
|
|
998 auto *CI = cast<CmpInst>(SI->getCondition());
|
|
999 return ReductionData(RK_MinMax, CI->getOpcode(), L, R);
|
|
1000 }
|
|
1001 if (m_UMin(m_Value(L), m_Value(R)).match(SI) ||
|
|
1002 m_UMax(m_Value(L), m_Value(R)).match(SI)) {
|
|
1003 auto *CI = cast<CmpInst>(SI->getCondition());
|
|
1004 return ReductionData(RK_UnsignedMinMax, CI->getOpcode(), L, R);
|
|
1005 }
|
|
1006 }
|
|
1007 return llvm::None;
|
|
1008 }
|
|
1009
|
|
1010 static ReductionKind matchPairwiseReductionAtLevel(Instruction *I,
|
|
1011 unsigned Level,
|
|
1012 unsigned NumLevels) {
|
|
1013 // Match one level of pairwise operations.
|
|
1014 // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
|
|
1015 // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
|
|
1016 // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
|
|
1017 // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
|
|
1018 // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
|
|
1019 if (!I)
|
|
1020 return RK_None;
|
|
1021
|
|
1022 assert(I->getType()->isVectorTy() && "Expecting a vector type");
|
|
1023
|
|
1024 Optional<ReductionData> RD = getReductionData(I);
|
|
1025 if (!RD)
|
|
1026 return RK_None;
|
|
1027
|
|
1028 ShuffleVectorInst *LS = dyn_cast<ShuffleVectorInst>(RD->LHS);
|
|
1029 if (!LS && Level)
|
|
1030 return RK_None;
|
|
1031 ShuffleVectorInst *RS = dyn_cast<ShuffleVectorInst>(RD->RHS);
|
|
1032 if (!RS && Level)
|
|
1033 return RK_None;
|
|
1034
|
|
1035 // On level 0 we can omit one shufflevector instruction.
|
|
1036 if (!Level && !RS && !LS)
|
|
1037 return RK_None;
|
|
1038
|
|
1039 // Shuffle inputs must match.
|
|
1040 Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr;
|
|
1041 Value *NextLevelOpR = RS ? RS->getOperand(0) : nullptr;
|
|
1042 Value *NextLevelOp = nullptr;
|
|
1043 if (NextLevelOpR && NextLevelOpL) {
|
|
1044 // If we have two shuffles their operands must match.
|
|
1045 if (NextLevelOpL != NextLevelOpR)
|
|
1046 return RK_None;
|
|
1047
|
|
1048 NextLevelOp = NextLevelOpL;
|
|
1049 } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) {
|
|
1050 // On the first level we can omit the shufflevector <0, undef,...>. So the
|
|
1051 // input to the other shufflevector <1, undef> must match with one of the
|
|
1052 // inputs to the current binary operation.
|
|
1053 // Example:
|
|
1054 // %NextLevelOpL = shufflevector %R, <1, undef ...>
|
|
1055 // %BinOp = fadd %NextLevelOpL, %R
|
|
1056 if (NextLevelOpL && NextLevelOpL != RD->RHS)
|
|
1057 return RK_None;
|
|
1058 else if (NextLevelOpR && NextLevelOpR != RD->LHS)
|
|
1059 return RK_None;
|
|
1060
|
|
1061 NextLevelOp = NextLevelOpL ? RD->RHS : RD->LHS;
|
|
1062 } else
|
|
1063 return RK_None;
|
|
1064
|
|
1065 // Check that the next levels binary operation exists and matches with the
|
|
1066 // current one.
|
|
1067 if (Level + 1 != NumLevels) {
|
|
1068 Optional<ReductionData> NextLevelRD =
|
|
1069 getReductionData(cast<Instruction>(NextLevelOp));
|
|
1070 if (!NextLevelRD || !RD->hasSameData(*NextLevelRD))
|
|
1071 return RK_None;
|
|
1072 }
|
|
1073
|
|
1074 // Shuffle mask for pairwise operation must match.
|
|
1075 if (matchPairwiseShuffleMask(LS, /*IsLeft=*/true, Level)) {
|
|
1076 if (!matchPairwiseShuffleMask(RS, /*IsLeft=*/false, Level))
|
|
1077 return RK_None;
|
|
1078 } else if (matchPairwiseShuffleMask(RS, /*IsLeft=*/true, Level)) {
|
|
1079 if (!matchPairwiseShuffleMask(LS, /*IsLeft=*/false, Level))
|
|
1080 return RK_None;
|
|
1081 } else {
|
|
1082 return RK_None;
|
|
1083 }
|
|
1084
|
|
1085 if (++Level == NumLevels)
|
|
1086 return RD->Kind;
|
|
1087
|
|
1088 // Match next level.
|
|
1089 return matchPairwiseReductionAtLevel(cast<Instruction>(NextLevelOp), Level,
|
|
1090 NumLevels);
|
|
1091 }
|
|
1092
|
|
1093 static ReductionKind matchPairwiseReduction(const ExtractElementInst *ReduxRoot,
|
173
|
1094 unsigned &Opcode,
|
|
1095 VectorType *&Ty) {
|
150
|
1096 if (!EnableReduxCost)
|
|
1097 return RK_None;
|
|
1098
|
|
1099 // Need to extract the first element.
|
|
1100 ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
|
|
1101 unsigned Idx = ~0u;
|
|
1102 if (CI)
|
|
1103 Idx = CI->getZExtValue();
|
|
1104 if (Idx != 0)
|
|
1105 return RK_None;
|
|
1106
|
|
1107 auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0));
|
|
1108 if (!RdxStart)
|
|
1109 return RK_None;
|
|
1110 Optional<ReductionData> RD = getReductionData(RdxStart);
|
|
1111 if (!RD)
|
|
1112 return RK_None;
|
|
1113
|
173
|
1114 auto *VecTy = cast<VectorType>(RdxStart->getType());
|
|
1115 unsigned NumVecElems = VecTy->getNumElements();
|
150
|
1116 if (!isPowerOf2_32(NumVecElems))
|
|
1117 return RK_None;
|
|
1118
|
|
1119 // We look for a sequence of shuffle,shuffle,add triples like the following
|
|
1120 // that builds a pairwise reduction tree.
|
|
1121 //
|
|
1122 // (X0, X1, X2, X3)
|
|
1123 // (X0 + X1, X2 + X3, undef, undef)
|
|
1124 // ((X0 + X1) + (X2 + X3), undef, undef, undef)
|
|
1125 //
|
|
1126 // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
|
|
1127 // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
|
|
1128 // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
|
|
1129 // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
|
|
1130 // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
|
|
1131 // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
|
|
1132 // <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
|
|
1133 // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
|
|
1134 // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
|
1135 // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
|
|
1136 // %r = extractelement <4 x float> %bin.rdx8, i32 0
|
|
1137 if (matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems)) ==
|
|
1138 RK_None)
|
|
1139 return RK_None;
|
|
1140
|
|
1141 Opcode = RD->Opcode;
|
|
1142 Ty = VecTy;
|
|
1143
|
|
1144 return RD->Kind;
|
|
1145 }
|
|
1146
|
|
1147 static std::pair<Value *, ShuffleVectorInst *>
|
|
1148 getShuffleAndOtherOprd(Value *L, Value *R) {
|
|
1149 ShuffleVectorInst *S = nullptr;
|
|
1150
|
|
1151 if ((S = dyn_cast<ShuffleVectorInst>(L)))
|
|
1152 return std::make_pair(R, S);
|
|
1153
|
|
1154 S = dyn_cast<ShuffleVectorInst>(R);
|
|
1155 return std::make_pair(L, S);
|
|
1156 }
|
|
1157
|
|
1158 static ReductionKind
|
|
1159 matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot,
|
173
|
1160 unsigned &Opcode, VectorType *&Ty) {
|
150
|
1161 if (!EnableReduxCost)
|
|
1162 return RK_None;
|
|
1163
|
|
1164 // Need to extract the first element.
|
|
1165 ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
|
|
1166 unsigned Idx = ~0u;
|
|
1167 if (CI)
|
|
1168 Idx = CI->getZExtValue();
|
|
1169 if (Idx != 0)
|
|
1170 return RK_None;
|
|
1171
|
|
1172 auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0));
|
|
1173 if (!RdxStart)
|
|
1174 return RK_None;
|
|
1175 Optional<ReductionData> RD = getReductionData(RdxStart);
|
|
1176 if (!RD)
|
|
1177 return RK_None;
|
|
1178
|
173
|
1179 auto *VecTy = cast<VectorType>(ReduxRoot->getOperand(0)->getType());
|
|
1180 unsigned NumVecElems = VecTy->getNumElements();
|
150
|
1181 if (!isPowerOf2_32(NumVecElems))
|
|
1182 return RK_None;
|
|
1183
|
|
1184 // We look for a sequence of shuffles and adds like the following matching one
|
|
1185 // fadd, shuffle vector pair at a time.
|
|
1186 //
|
|
1187 // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef,
|
|
1188 // <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
|
1189 // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
|
|
1190 // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef,
|
|
1191 // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
|
1192 // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7
|
|
1193 // %r = extractelement <4 x float> %bin.rdx8, i32 0
|
|
1194
|
|
1195 unsigned MaskStart = 1;
|
|
1196 Instruction *RdxOp = RdxStart;
|
|
1197 SmallVector<int, 32> ShuffleMask(NumVecElems, 0);
|
|
1198 unsigned NumVecElemsRemain = NumVecElems;
|
|
1199 while (NumVecElemsRemain - 1) {
|
|
1200 // Check for the right reduction operation.
|
|
1201 if (!RdxOp)
|
|
1202 return RK_None;
|
|
1203 Optional<ReductionData> RDLevel = getReductionData(RdxOp);
|
|
1204 if (!RDLevel || !RDLevel->hasSameData(*RD))
|
|
1205 return RK_None;
|
|
1206
|
|
1207 Value *NextRdxOp;
|
|
1208 ShuffleVectorInst *Shuffle;
|
|
1209 std::tie(NextRdxOp, Shuffle) =
|
|
1210 getShuffleAndOtherOprd(RDLevel->LHS, RDLevel->RHS);
|
|
1211
|
|
1212 // Check the current reduction operation and the shuffle use the same value.
|
|
1213 if (Shuffle == nullptr)
|
|
1214 return RK_None;
|
|
1215 if (Shuffle->getOperand(0) != NextRdxOp)
|
|
1216 return RK_None;
|
|
1217
|
|
1218 // Check that shuffle masks matches.
|
|
1219 for (unsigned j = 0; j != MaskStart; ++j)
|
|
1220 ShuffleMask[j] = MaskStart + j;
|
|
1221 // Fill the rest of the mask with -1 for undef.
|
|
1222 std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1);
|
|
1223
|
173
|
1224 ArrayRef<int> Mask = Shuffle->getShuffleMask();
|
150
|
1225 if (ShuffleMask != Mask)
|
|
1226 return RK_None;
|
|
1227
|
|
1228 RdxOp = dyn_cast<Instruction>(NextRdxOp);
|
|
1229 NumVecElemsRemain /= 2;
|
|
1230 MaskStart *= 2;
|
|
1231 }
|
|
1232
|
|
1233 Opcode = RD->Opcode;
|
|
1234 Ty = VecTy;
|
|
1235 return RD->Kind;
|
|
1236 }
|
|
1237
|
|
1238 int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
|
173
|
1239 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
|
|
1240
|
150
|
1241 switch (I->getOpcode()) {
|
|
1242 case Instruction::GetElementPtr:
|
173
|
1243 return getUserCost(I, CostKind);
|
150
|
1244
|
|
1245 case Instruction::Ret:
|
|
1246 case Instruction::PHI:
|
|
1247 case Instruction::Br: {
|
173
|
1248 return getCFInstrCost(I->getOpcode(), CostKind);
|
150
|
1249 }
|
|
1250 case Instruction::Add:
|
|
1251 case Instruction::FAdd:
|
|
1252 case Instruction::Sub:
|
|
1253 case Instruction::FSub:
|
|
1254 case Instruction::Mul:
|
|
1255 case Instruction::FMul:
|
|
1256 case Instruction::UDiv:
|
|
1257 case Instruction::SDiv:
|
|
1258 case Instruction::FDiv:
|
|
1259 case Instruction::URem:
|
|
1260 case Instruction::SRem:
|
|
1261 case Instruction::FRem:
|
|
1262 case Instruction::Shl:
|
|
1263 case Instruction::LShr:
|
|
1264 case Instruction::AShr:
|
|
1265 case Instruction::And:
|
|
1266 case Instruction::Or:
|
|
1267 case Instruction::Xor: {
|
|
1268 TargetTransformInfo::OperandValueKind Op1VK, Op2VK;
|
|
1269 TargetTransformInfo::OperandValueProperties Op1VP, Op2VP;
|
|
1270 Op1VK = getOperandInfo(I->getOperand(0), Op1VP);
|
|
1271 Op2VK = getOperandInfo(I->getOperand(1), Op2VP);
|
|
1272 SmallVector<const Value *, 2> Operands(I->operand_values());
|
173
|
1273 return getArithmeticInstrCost(I->getOpcode(), I->getType(), CostKind,
|
|
1274 Op1VK, Op2VK,
|
150
|
1275 Op1VP, Op2VP, Operands, I);
|
|
1276 }
|
|
1277 case Instruction::FNeg: {
|
|
1278 TargetTransformInfo::OperandValueKind Op1VK, Op2VK;
|
|
1279 TargetTransformInfo::OperandValueProperties Op1VP, Op2VP;
|
|
1280 Op1VK = getOperandInfo(I->getOperand(0), Op1VP);
|
|
1281 Op2VK = OK_AnyValue;
|
|
1282 Op2VP = OP_None;
|
|
1283 SmallVector<const Value *, 2> Operands(I->operand_values());
|
173
|
1284 return getArithmeticInstrCost(I->getOpcode(), I->getType(), CostKind,
|
|
1285 Op1VK, Op2VK,
|
150
|
1286 Op1VP, Op2VP, Operands, I);
|
|
1287 }
|
|
1288 case Instruction::Select: {
|
|
1289 const SelectInst *SI = cast<SelectInst>(I);
|
|
1290 Type *CondTy = SI->getCondition()->getType();
|
173
|
1291 return getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy,
|
|
1292 CostKind, I);
|
150
|
1293 }
|
|
1294 case Instruction::ICmp:
|
|
1295 case Instruction::FCmp: {
|
|
1296 Type *ValTy = I->getOperand(0)->getType();
|
173
|
1297 return getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(),
|
|
1298 CostKind, I);
|
150
|
1299 }
|
|
1300 case Instruction::Store: {
|
|
1301 const StoreInst *SI = cast<StoreInst>(I);
|
|
1302 Type *ValTy = SI->getValueOperand()->getType();
|
173
|
1303 return getMemoryOpCost(I->getOpcode(), ValTy, SI->getAlign(),
|
|
1304 SI->getPointerAddressSpace(), CostKind, I);
|
150
|
1305 }
|
|
1306 case Instruction::Load: {
|
|
1307 const LoadInst *LI = cast<LoadInst>(I);
|
173
|
1308 return getMemoryOpCost(I->getOpcode(), I->getType(), LI->getAlign(),
|
|
1309 LI->getPointerAddressSpace(), CostKind, I);
|
150
|
1310 }
|
|
1311 case Instruction::ZExt:
|
|
1312 case Instruction::SExt:
|
|
1313 case Instruction::FPToUI:
|
|
1314 case Instruction::FPToSI:
|
|
1315 case Instruction::FPExt:
|
|
1316 case Instruction::PtrToInt:
|
|
1317 case Instruction::IntToPtr:
|
|
1318 case Instruction::SIToFP:
|
|
1319 case Instruction::UIToFP:
|
|
1320 case Instruction::Trunc:
|
|
1321 case Instruction::FPTrunc:
|
|
1322 case Instruction::BitCast:
|
|
1323 case Instruction::AddrSpaceCast: {
|
|
1324 Type *SrcTy = I->getOperand(0)->getType();
|
173
|
1325 return getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, CostKind, I);
|
150
|
1326 }
|
|
1327 case Instruction::ExtractElement: {
|
173
|
1328 const ExtractElementInst *EEI = cast<ExtractElementInst>(I);
|
150
|
1329 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
|
|
1330 unsigned Idx = -1;
|
|
1331 if (CI)
|
|
1332 Idx = CI->getZExtValue();
|
|
1333
|
|
1334 // Try to match a reduction sequence (series of shufflevector and vector
|
|
1335 // adds followed by a extractelement).
|
|
1336 unsigned ReduxOpCode;
|
173
|
1337 VectorType *ReduxType;
|
150
|
1338
|
|
1339 switch (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) {
|
|
1340 case RK_Arithmetic:
|
|
1341 return getArithmeticReductionCost(ReduxOpCode, ReduxType,
|
173
|
1342 /*IsPairwiseForm=*/false,
|
|
1343 CostKind);
|
150
|
1344 case RK_MinMax:
|
|
1345 return getMinMaxReductionCost(
|
173
|
1346 ReduxType, cast<VectorType>(CmpInst::makeCmpResultType(ReduxType)),
|
150
|
1347 /*IsPairwiseForm=*/false, /*IsUnsigned=*/false);
|
|
1348 case RK_UnsignedMinMax:
|
|
1349 return getMinMaxReductionCost(
|
173
|
1350 ReduxType, cast<VectorType>(CmpInst::makeCmpResultType(ReduxType)),
|
150
|
1351 /*IsPairwiseForm=*/false, /*IsUnsigned=*/true);
|
|
1352 case RK_None:
|
|
1353 break;
|
|
1354 }
|
|
1355
|
|
1356 switch (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) {
|
|
1357 case RK_Arithmetic:
|
|
1358 return getArithmeticReductionCost(ReduxOpCode, ReduxType,
|
173
|
1359 /*IsPairwiseForm=*/true, CostKind);
|
150
|
1360 case RK_MinMax:
|
|
1361 return getMinMaxReductionCost(
|
173
|
1362 ReduxType, cast<VectorType>(CmpInst::makeCmpResultType(ReduxType)),
|
150
|
1363 /*IsPairwiseForm=*/true, /*IsUnsigned=*/false);
|
|
1364 case RK_UnsignedMinMax:
|
|
1365 return getMinMaxReductionCost(
|
173
|
1366 ReduxType, cast<VectorType>(CmpInst::makeCmpResultType(ReduxType)),
|
150
|
1367 /*IsPairwiseForm=*/true, /*IsUnsigned=*/true);
|
|
1368 case RK_None:
|
|
1369 break;
|
|
1370 }
|
|
1371
|
173
|
1372 return getVectorInstrCost(I->getOpcode(), EEI->getOperand(0)->getType(),
|
|
1373 Idx);
|
150
|
1374 }
|
|
1375 case Instruction::InsertElement: {
|
173
|
1376 const InsertElementInst *IE = cast<InsertElementInst>(I);
|
150
|
1377 ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
|
|
1378 unsigned Idx = -1;
|
|
1379 if (CI)
|
|
1380 Idx = CI->getZExtValue();
|
173
|
1381 return getVectorInstrCost(I->getOpcode(), IE->getType(), Idx);
|
150
|
1382 }
|
|
1383 case Instruction::ExtractValue:
|
|
1384 return 0; // Model all ExtractValue nodes as free.
|
|
1385 case Instruction::ShuffleVector: {
|
|
1386 const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
|
173
|
1387 auto *Ty = cast<VectorType>(Shuffle->getType());
|
|
1388 auto *SrcTy = cast<VectorType>(Shuffle->getOperand(0)->getType());
|
150
|
1389
|
|
1390 // TODO: Identify and add costs for insert subvector, etc.
|
|
1391 int SubIndex;
|
|
1392 if (Shuffle->isExtractSubvectorMask(SubIndex))
|
|
1393 return TTIImpl->getShuffleCost(SK_ExtractSubvector, SrcTy, SubIndex, Ty);
|
|
1394
|
|
1395 if (Shuffle->changesLength())
|
|
1396 return -1;
|
|
1397
|
|
1398 if (Shuffle->isIdentity())
|
|
1399 return 0;
|
|
1400
|
|
1401 if (Shuffle->isReverse())
|
|
1402 return TTIImpl->getShuffleCost(SK_Reverse, Ty, 0, nullptr);
|
|
1403
|
|
1404 if (Shuffle->isSelect())
|
|
1405 return TTIImpl->getShuffleCost(SK_Select, Ty, 0, nullptr);
|
|
1406
|
|
1407 if (Shuffle->isTranspose())
|
|
1408 return TTIImpl->getShuffleCost(SK_Transpose, Ty, 0, nullptr);
|
|
1409
|
|
1410 if (Shuffle->isZeroEltSplat())
|
|
1411 return TTIImpl->getShuffleCost(SK_Broadcast, Ty, 0, nullptr);
|
|
1412
|
|
1413 if (Shuffle->isSingleSource())
|
|
1414 return TTIImpl->getShuffleCost(SK_PermuteSingleSrc, Ty, 0, nullptr);
|
|
1415
|
|
1416 return TTIImpl->getShuffleCost(SK_PermuteTwoSrc, Ty, 0, nullptr);
|
|
1417 }
|
|
1418 case Instruction::Call:
|
|
1419 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
|
173
|
1420 IntrinsicCostAttributes CostAttrs(*II);
|
|
1421 return getIntrinsicInstrCost(CostAttrs, CostKind);
|
150
|
1422 }
|
|
1423 return -1;
|
|
1424 default:
|
|
1425 // We don't have any information on this instruction.
|
|
1426 return -1;
|
|
1427 }
|
|
1428 }
|
|
1429
|
|
1430 TargetTransformInfo::Concept::~Concept() {}
|
|
1431
|
|
1432 TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
|
|
1433
|
|
1434 TargetIRAnalysis::TargetIRAnalysis(
|
|
1435 std::function<Result(const Function &)> TTICallback)
|
|
1436 : TTICallback(std::move(TTICallback)) {}
|
|
1437
|
|
1438 TargetIRAnalysis::Result TargetIRAnalysis::run(const Function &F,
|
|
1439 FunctionAnalysisManager &) {
|
|
1440 return TTICallback(F);
|
|
1441 }
|
|
1442
|
|
1443 AnalysisKey TargetIRAnalysis::Key;
|
|
1444
|
|
1445 TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(const Function &F) {
|
|
1446 return Result(F.getParent()->getDataLayout());
|
|
1447 }
|
|
1448
|
|
1449 // Register the basic pass.
|
|
1450 INITIALIZE_PASS(TargetTransformInfoWrapperPass, "tti",
|
|
1451 "Target Transform Information", false, true)
|
|
1452 char TargetTransformInfoWrapperPass::ID = 0;
|
|
1453
|
|
1454 void TargetTransformInfoWrapperPass::anchor() {}
|
|
1455
|
|
1456 TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass()
|
|
1457 : ImmutablePass(ID) {
|
|
1458 initializeTargetTransformInfoWrapperPassPass(
|
|
1459 *PassRegistry::getPassRegistry());
|
|
1460 }
|
|
1461
|
|
1462 TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass(
|
|
1463 TargetIRAnalysis TIRA)
|
|
1464 : ImmutablePass(ID), TIRA(std::move(TIRA)) {
|
|
1465 initializeTargetTransformInfoWrapperPassPass(
|
|
1466 *PassRegistry::getPassRegistry());
|
|
1467 }
|
|
1468
|
|
1469 TargetTransformInfo &TargetTransformInfoWrapperPass::getTTI(const Function &F) {
|
|
1470 FunctionAnalysisManager DummyFAM;
|
|
1471 TTI = TIRA.run(F, DummyFAM);
|
|
1472 return *TTI;
|
|
1473 }
|
|
1474
|
|
1475 ImmutablePass *
|
|
1476 llvm::createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA) {
|
|
1477 return new TargetTransformInfoWrapperPass(std::move(TIRA));
|
|
1478 }
|