comparison lib/Target/X86/X86TargetTransformInfo.h @ 148:63bd29f05246

merged
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Wed, 14 Aug 2019 19:46:37 +0900
parents c2174574ed3a
children
comparison
equal deleted inserted replaced
146:3fc4d5c3e21e 148:63bd29f05246
1 //===-- X86TargetTransformInfo.h - X86 specific TTI -------------*- C++ -*-===// 1 //===-- X86TargetTransformInfo.h - X86 specific TTI -------------*- C++ -*-===//
2 // 2 //
3 // The LLVM Compiler Infrastructure 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // 4 // See https://llvm.org/LICENSE.txt for license information.
5 // This file is distributed under the University of Illinois Open Source 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 // License. See LICENSE.TXT for details.
7 // 6 //
8 //===----------------------------------------------------------------------===// 7 //===----------------------------------------------------------------------===//
9 /// \file 8 /// \file
10 /// This file a TargetTransformInfo::Concept conforming object specific to the 9 /// This file a TargetTransformInfo::Concept conforming object specific to the
11 /// X86 target machine. It uses the target's detailed information to 10 /// X86 target machine. It uses the target's detailed information to
33 const X86Subtarget *ST; 32 const X86Subtarget *ST;
34 const X86TargetLowering *TLI; 33 const X86TargetLowering *TLI;
35 34
36 const X86Subtarget *getST() const { return ST; } 35 const X86Subtarget *getST() const { return ST; }
37 const X86TargetLowering *getTLI() const { return TLI; } 36 const X86TargetLowering *getTLI() const { return TLI; }
37
38 const FeatureBitset InlineFeatureIgnoreList = {
39 // This indicates the CPU is 64 bit capable not that we are in 64-bit
40 // mode.
41 X86::Feature64Bit,
42
43 // These features don't have any intrinsics or ABI effect.
44 X86::FeatureNOPL,
45 X86::FeatureCMPXCHG16B,
46 X86::FeatureLAHFSAHF,
47
48 // Codegen control options.
49 X86::FeatureFast11ByteNOP,
50 X86::FeatureFast15ByteNOP,
51 X86::FeatureFastBEXTR,
52 X86::FeatureFastHorizontalOps,
53 X86::FeatureFastLZCNT,
54 X86::FeatureFastPartialYMMorZMMWrite,
55 X86::FeatureFastScalarFSQRT,
56 X86::FeatureFastSHLDRotate,
57 X86::FeatureFastScalarShiftMasks,
58 X86::FeatureFastVectorShiftMasks,
59 X86::FeatureFastVariableShuffle,
60 X86::FeatureFastVectorFSQRT,
61 X86::FeatureLEAForSP,
62 X86::FeatureLEAUsesAG,
63 X86::FeatureLZCNTFalseDeps,
64 X86::FeatureBranchFusion,
65 X86::FeatureMacroFusion,
66 X86::FeatureMergeToThreeWayBranch,
67 X86::FeaturePadShortFunctions,
68 X86::FeaturePOPCNTFalseDeps,
69 X86::FeatureSSEUnalignedMem,
70 X86::FeatureSlow3OpsLEA,
71 X86::FeatureSlowDivide32,
72 X86::FeatureSlowDivide64,
73 X86::FeatureSlowIncDec,
74 X86::FeatureSlowLEA,
75 X86::FeatureSlowPMADDWD,
76 X86::FeatureSlowPMULLD,
77 X86::FeatureSlowSHLD,
78 X86::FeatureSlowTwoMemOps,
79 X86::FeatureSlowUAMem16,
80
81 // Perf-tuning flags.
82 X86::FeatureHasFastGather,
83 X86::FeatureSlowUAMem32,
84
85 // Based on whether user set the -mprefer-vector-width command line.
86 X86::FeaturePrefer256Bit,
87
88 // CPU name enums. These just follow CPU string.
89 X86::ProcIntelAtom,
90 X86::ProcIntelGLM,
91 X86::ProcIntelGLP,
92 X86::ProcIntelSLM,
93 X86::ProcIntelTRM,
94 };
38 95
39 public: 96 public:
40 explicit X86TTIImpl(const X86TargetMachine *TM, const Function &F) 97 explicit X86TTIImpl(const X86TargetMachine *TM, const Function &F)
41 : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), 98 : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
42 TLI(ST->getTargetLowering()) {} 99 TLI(ST->getTargetLowering()) {}
99 int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm, 156 int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm,
100 bool IsUnsigned); 157 bool IsUnsigned);
101 158
102 int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, 159 int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
103 unsigned Factor, ArrayRef<unsigned> Indices, 160 unsigned Factor, ArrayRef<unsigned> Indices,
104 unsigned Alignment, unsigned AddressSpace); 161 unsigned Alignment, unsigned AddressSpace,
162 bool UseMaskForCond = false,
163 bool UseMaskForGaps = false);
105 int getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy, 164 int getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy,
106 unsigned Factor, ArrayRef<unsigned> Indices, 165 unsigned Factor, ArrayRef<unsigned> Indices,
107 unsigned Alignment, unsigned AddressSpace); 166 unsigned Alignment, unsigned AddressSpace,
167 bool UseMaskForCond = false,
168 bool UseMaskForGaps = false);
108 int getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy, 169 int getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
109 unsigned Factor, ArrayRef<unsigned> Indices, 170 unsigned Factor, ArrayRef<unsigned> Indices,
110 unsigned Alignment, unsigned AddressSpace); 171 unsigned Alignment, unsigned AddressSpace,
172 bool UseMaskForCond = false,
173 bool UseMaskForGaps = false);
111 174
112 int getIntImmCost(int64_t); 175 int getIntImmCost(int64_t);
113 176
114 int getIntImmCost(const APInt &Imm, Type *Ty); 177 int getIntImmCost(const APInt &Imm, Type *Ty);
115 178
121 bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, 184 bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
122 TargetTransformInfo::LSRCost &C2); 185 TargetTransformInfo::LSRCost &C2);
123 bool canMacroFuseCmp(); 186 bool canMacroFuseCmp();
124 bool isLegalMaskedLoad(Type *DataType); 187 bool isLegalMaskedLoad(Type *DataType);
125 bool isLegalMaskedStore(Type *DataType); 188 bool isLegalMaskedStore(Type *DataType);
189 bool isLegalNTLoad(Type *DataType, unsigned Alignment);
190 bool isLegalNTStore(Type *DataType, unsigned Alignment);
126 bool isLegalMaskedGather(Type *DataType); 191 bool isLegalMaskedGather(Type *DataType);
127 bool isLegalMaskedScatter(Type *DataType); 192 bool isLegalMaskedScatter(Type *DataType);
193 bool isLegalMaskedExpandLoad(Type *DataType);
194 bool isLegalMaskedCompressStore(Type *DataType);
128 bool hasDivRemOp(Type *DataType, bool IsSigned); 195 bool hasDivRemOp(Type *DataType, bool IsSigned);
129 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty); 196 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty);
130 bool areInlineCompatible(const Function *Caller, 197 bool areInlineCompatible(const Function *Caller,
131 const Function *Callee) const; 198 const Function *Callee) const;
132 const TTI::MemCmpExpansionOptions *enableMemCmpExpansion( 199 bool areFunctionArgsABICompatible(const Function *Caller,
133 bool IsZeroCmp) const; 200 const Function *Callee,
201 SmallPtrSetImpl<Argument *> &Args) const;
202 TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
203 bool IsZeroCmp) const;
134 bool enableInterleavedAccessVectorization(); 204 bool enableInterleavedAccessVectorization();
135 private: 205 private:
136 int getGSScalarCost(unsigned Opcode, Type *DataTy, bool VariableMask, 206 int getGSScalarCost(unsigned Opcode, Type *DataTy, bool VariableMask,
137 unsigned Alignment, unsigned AddressSpace); 207 unsigned Alignment, unsigned AddressSpace);
138 int getGSVectorCost(unsigned Opcode, Type *DataTy, Value *Ptr, 208 int getGSVectorCost(unsigned Opcode, Type *DataTy, Value *Ptr,