121
|
1 //===- ARMTargetTransformInfo.h - ARM specific TTI --------------*- C++ -*-===//
|
83
|
2 //
|
147
|
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
4 // See https://llvm.org/LICENSE.txt for license information.
|
|
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
83
|
6 //
|
|
7 //===----------------------------------------------------------------------===//
|
121
|
8 //
|
83
|
9 /// \file
|
|
10 /// This file a TargetTransformInfo::Concept conforming object specific to the
|
|
11 /// ARM target machine. It uses the target's detailed information to
|
|
12 /// provide more precise answers to certain TTI queries, while letting the
|
|
13 /// target independent and default TTI implementations handle the rest.
|
121
|
14 //
|
83
|
15 //===----------------------------------------------------------------------===//
|
|
16
|
|
17 #ifndef LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
|
|
18 #define LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
|
|
19
|
|
20 #include "ARM.h"
|
121
|
21 #include "ARMSubtarget.h"
|
83
|
22 #include "ARMTargetMachine.h"
|
121
|
23 #include "llvm/ADT/ArrayRef.h"
|
83
|
24 #include "llvm/Analysis/TargetTransformInfo.h"
|
|
25 #include "llvm/CodeGen/BasicTTIImpl.h"
|
121
|
26 #include "llvm/IR/Constant.h"
|
|
27 #include "llvm/IR/Function.h"
|
|
28 #include "llvm/MC/SubtargetFeature.h"
|
83
|
29
|
|
30 namespace llvm {
|
|
31
|
121
|
32 class APInt;
|
|
33 class ARMTargetLowering;
|
|
34 class Instruction;
|
|
35 class Loop;
|
|
36 class SCEV;
|
|
37 class ScalarEvolution;
|
|
38 class Type;
|
|
39 class Value;
|
|
40
|
83
|
41 class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
|
121
|
42 using BaseT = BasicTTIImplBase<ARMTTIImpl>;
|
|
43 using TTI = TargetTransformInfo;
|
|
44
|
83
|
45 friend BaseT;
|
|
46
|
|
47 const ARMSubtarget *ST;
|
|
48 const ARMTargetLowering *TLI;
|
|
49
|
121
|
50 // Currently the following features are excluded from InlineFeatureWhitelist.
|
147
|
51 // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureFP64, FeatureD32
|
121
|
52 // Depending on whether they are set or unset, different
|
|
53 // instructions/registers are available. For example, inlining a callee with
|
|
54 // -thumb-mode in a caller with +thumb-mode, may cause the assembler to
|
|
55 // fail if the callee uses ARM only instructions, e.g. in inline asm.
|
|
56 const FeatureBitset InlineFeatureWhitelist = {
|
|
57 ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2,
|
|
58 ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8,
|
147
|
59 ARM::FeatureFullFP16, ARM::FeatureFP16FML, ARM::FeatureHWDivThumb,
|
121
|
60 ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex,
|
|
61 ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc,
|
|
62 ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt,
|
|
63 ARM::FeatureCrypto, ARM::FeatureCRC, ARM::FeatureRAS,
|
|
64 ARM::FeatureFPAO, ARM::FeatureFuseAES, ARM::FeatureZCZeroing,
|
|
65 ARM::FeatureProfUnpredicate, ARM::FeatureSlowVGETLNi32,
|
|
66 ARM::FeatureSlowVDUP32, ARM::FeaturePreferVMOVSR,
|
|
67 ARM::FeaturePrefISHSTBarrier, ARM::FeatureMuxedUnits,
|
|
68 ARM::FeatureSlowOddRegister, ARM::FeatureSlowLoadDSubreg,
|
|
69 ARM::FeatureDontWidenVMOVS, ARM::FeatureExpandMLx,
|
|
70 ARM::FeatureHasVMLxHazards, ARM::FeatureNEONForFPMovs,
|
|
71 ARM::FeatureNEONForFP, ARM::FeatureCheckVLDnAlign,
|
|
72 ARM::FeatureHasSlowFPVMLx, ARM::FeatureVMLxForwarding,
|
|
73 ARM::FeaturePref32BitThumb, ARM::FeatureAvoidPartialCPSR,
|
|
74 ARM::FeatureCheapPredicableCPSR, ARM::FeatureAvoidMOVsShOp,
|
|
75 ARM::FeatureHasRetAddrStack, ARM::FeatureHasNoBranchPredictor,
|
|
76 ARM::FeatureDSP, ARM::FeatureMP, ARM::FeatureVirtualization,
|
|
77 ARM::FeatureMClass, ARM::FeatureRClass, ARM::FeatureAClass,
|
|
78 ARM::FeatureNaClTrap, ARM::FeatureStrictAlign, ARM::FeatureLongCalls,
|
|
79 ARM::FeatureExecuteOnly, ARM::FeatureReserveR9, ARM::FeatureNoMovt,
|
|
80 ARM::FeatureNoNegativeImmediates
|
|
81 };
|
83
|
82
|
|
83 const ARMSubtarget *getST() const { return ST; }
|
|
84 const ARMTargetLowering *getTLI() const { return TLI; }
|
|
85
|
|
86 public:
|
95
|
87 explicit ARMTTIImpl(const ARMBaseTargetMachine *TM, const Function &F)
|
|
88 : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
|
|
89 TLI(ST->getTargetLowering()) {}
|
83
|
90
|
121
|
91 bool areInlineCompatible(const Function *Caller,
|
|
92 const Function *Callee) const;
|
|
93
|
120
|
94 bool enableInterleavedAccessVectorization() { return true; }
|
95
|
95
|
147
|
96 bool shouldFavorBackedgeIndex(const Loop *L) const {
|
|
97 if (L->getHeader()->getParent()->hasOptSize())
|
|
98 return false;
|
|
99 return ST->isMClass() && ST->isThumb2() && L->getNumBlocks() == 1;
|
|
100 }
|
|
101
|
120
|
102 /// Floating-point computation using ARMv8 AArch32 Advanced
|
|
103 /// SIMD instructions remains unchanged from ARMv7. Only AArch64 SIMD
|
147
|
104 /// and Arm MVE are IEEE-754 compliant.
|
120
|
105 bool isFPVectorizationPotentiallyUnsafe() {
|
147
|
106 return !ST->isTargetDarwin() && !ST->hasMVEFloatOps();
|
120
|
107 }
|
83
|
108
|
|
109 /// \name Scalar TTI Implementations
|
|
110 /// @{
|
|
111
|
120
|
112 int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
|
|
113 Type *Ty);
|
|
114
|
83
|
115 using BaseT::getIntImmCost;
|
95
|
116 int getIntImmCost(const APInt &Imm, Type *Ty);
|
83
|
117
|
120
|
118 int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
|
|
119
|
83
|
120 /// @}
|
|
121
|
|
122 /// \name Vector TTI Implementations
|
|
123 /// @{
|
|
124
|
|
125 unsigned getNumberOfRegisters(bool Vector) {
|
|
126 if (Vector) {
|
|
127 if (ST->hasNEON())
|
|
128 return 16;
|
147
|
129 if (ST->hasMVEIntegerOps())
|
|
130 return 8;
|
83
|
131 return 0;
|
|
132 }
|
|
133
|
|
134 if (ST->isThumb1Only())
|
|
135 return 8;
|
|
136 return 13;
|
|
137 }
|
|
138
|
121
|
139 unsigned getRegisterBitWidth(bool Vector) const {
|
83
|
140 if (Vector) {
|
|
141 if (ST->hasNEON())
|
|
142 return 128;
|
147
|
143 if (ST->hasMVEIntegerOps())
|
|
144 return 128;
|
83
|
145 return 0;
|
|
146 }
|
|
147
|
|
148 return 32;
|
|
149 }
|
|
150
|
95
|
151 unsigned getMaxInterleaveFactor(unsigned VF) {
|
120
|
152 return ST->getMaxInterleaveFactor();
|
83
|
153 }
|
|
154
|
147
|
155 int getMemcpyCost(const Instruction *I);
|
|
156
|
95
|
157 int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
|
83
|
158
|
121
|
159 int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
|
|
160 const Instruction *I = nullptr);
|
83
|
161
|
121
|
162 int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
|
|
163 const Instruction *I = nullptr);
|
83
|
164
|
95
|
165 int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
|
83
|
166
|
147
|
167 int getAddressComputationCost(Type *Val, ScalarEvolution *SE,
|
121
|
168 const SCEV *Ptr);
|
83
|
169
|
95
|
170 int getArithmeticInstrCost(
|
83
|
171 unsigned Opcode, Type *Ty,
|
|
172 TTI::OperandValueKind Op1Info = TTI::OK_AnyValue,
|
|
173 TTI::OperandValueKind Op2Info = TTI::OK_AnyValue,
|
|
174 TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
|
121
|
175 TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
|
|
176 ArrayRef<const Value *> Args = ArrayRef<const Value *>());
|
83
|
177
|
95
|
178 int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
|
121
|
179 unsigned AddressSpace, const Instruction *I = nullptr);
|
83
|
180
|
95
|
181 int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
|
|
182 ArrayRef<unsigned> Indices, unsigned Alignment,
|
147
|
183 unsigned AddressSpace,
|
|
184 bool UseMaskForCond = false,
|
|
185 bool UseMaskForGaps = false);
|
|
186
|
|
187 bool isLoweredToCall(const Function *F);
|
|
188 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
|
|
189 AssumptionCache &AC,
|
|
190 TargetLibraryInfo *LibInfo,
|
|
191 HardwareLoopInfo &HWLoopInfo);
|
120
|
192
|
121
|
193 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
|
194 TTI::UnrollingPreferences &UP);
|
|
195
|
120
|
196 bool shouldBuildLookupTablesForConstant(Constant *C) const {
|
|
197 // In the ROPI and RWPI relocation models we can't have pointers to global
|
|
198 // variables or functions in constant data, so don't convert switches to
|
|
199 // lookup tables if any of the values would need relocation.
|
|
200 if (ST->isROPI() || ST->isRWPI())
|
|
201 return !C->needsRelocation();
|
|
202
|
|
203 return true;
|
|
204 }
|
83
|
205 /// @}
|
|
206 };
|
|
207
|
|
208 } // end namespace llvm
|
|
209
|
121
|
210 #endif // LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
|