121
|
1 //===- X86InstrVecCompiler.td - Vector Compiler Patterns ---*- tablegen -*-===//
|
|
2 //
|
147
|
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
4 // See https://llvm.org/LICENSE.txt for license information.
|
|
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
121
|
6 //
|
|
7 //===----------------------------------------------------------------------===//
|
|
8 //
|
|
9 // This file describes the various vector pseudo instructions used by the
|
|
10 // compiler, as well as Pat patterns used during instruction selection.
|
|
11 //
|
|
12 //===----------------------------------------------------------------------===//
|
|
13
|
|
14 //===----------------------------------------------------------------------===//
|
|
15 // Non-instruction patterns
|
|
16 //===----------------------------------------------------------------------===//
|
|
17
|
147
|
18 let Predicates = [NoAVX512] in {
|
|
19 // A vector extract of the first f32/f64 position is a subregister copy
|
|
20 def : Pat<(f32 (extractelt (v4f32 VR128:$src), (iPTR 0))),
|
|
21 (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32)>;
|
|
22 def : Pat<(f64 (extractelt (v2f64 VR128:$src), (iPTR 0))),
|
|
23 (COPY_TO_REGCLASS (v2f64 VR128:$src), FR64)>;
|
|
24 }
|
|
25
|
|
26 let Predicates = [HasAVX512] in {
|
|
27 // A vector extract of the first f32/f64 position is a subregister copy
|
|
28 def : Pat<(f32 (extractelt (v4f32 VR128X:$src), (iPTR 0))),
|
|
29 (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X)>;
|
|
30 def : Pat<(f64 (extractelt (v2f64 VR128X:$src), (iPTR 0))),
|
|
31 (COPY_TO_REGCLASS (v2f64 VR128X:$src), FR64X)>;
|
|
32 }
|
121
|
33
|
147
|
34 let Predicates = [NoVLX] in {
|
|
35 // Implicitly promote a 32-bit scalar to a vector.
|
|
36 def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
|
|
37 (COPY_TO_REGCLASS FR32:$src, VR128)>;
|
|
38 // Implicitly promote a 64-bit scalar to a vector.
|
|
39 def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
|
|
40 (COPY_TO_REGCLASS FR64:$src, VR128)>;
|
|
41 }
|
121
|
42
|
147
|
43 let Predicates = [HasVLX] in {
|
|
44 // Implicitly promote a 32-bit scalar to a vector.
|
|
45 def : Pat<(v4f32 (scalar_to_vector FR32X:$src)),
|
|
46 (COPY_TO_REGCLASS FR32X:$src, VR128X)>;
|
|
47 // Implicitly promote a 64-bit scalar to a vector.
|
|
48 def : Pat<(v2f64 (scalar_to_vector FR64X:$src)),
|
|
49 (COPY_TO_REGCLASS FR64X:$src, VR128X)>;
|
|
50 }
|
121
|
51
|
|
52 //===----------------------------------------------------------------------===//
|
|
53 // Subvector tricks
|
|
54 //===----------------------------------------------------------------------===//
|
|
55
|
|
56 // Patterns for insert_subvector/extract_subvector to/from index=0
|
|
57 multiclass subvector_subreg_lowering<RegisterClass subRC, ValueType subVT,
|
|
58 RegisterClass RC, ValueType VT,
|
|
59 SubRegIndex subIdx> {
|
|
60 def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
|
|
61 (subVT (EXTRACT_SUBREG RC:$src, subIdx))>;
|
|
62
|
|
63 def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
|
|
64 (VT (INSERT_SUBREG (IMPLICIT_DEF), subRC:$src, subIdx))>;
|
|
65 }
|
|
66
|
|
67 // A 128-bit subvector extract from the first 256-bit vector position is a
|
|
68 // subregister copy that needs no instruction. Likewise, a 128-bit subvector
|
|
69 // insert to the first 256-bit vector position is a subregister copy that needs
|
|
70 // no instruction.
|
|
71 defm : subvector_subreg_lowering<VR128, v4i32, VR256, v8i32, sub_xmm>;
|
|
72 defm : subvector_subreg_lowering<VR128, v4f32, VR256, v8f32, sub_xmm>;
|
|
73 defm : subvector_subreg_lowering<VR128, v2i64, VR256, v4i64, sub_xmm>;
|
|
74 defm : subvector_subreg_lowering<VR128, v2f64, VR256, v4f64, sub_xmm>;
|
|
75 defm : subvector_subreg_lowering<VR128, v8i16, VR256, v16i16, sub_xmm>;
|
|
76 defm : subvector_subreg_lowering<VR128, v16i8, VR256, v32i8, sub_xmm>;
|
|
77
|
|
78 // A 128-bit subvector extract from the first 512-bit vector position is a
|
|
79 // subregister copy that needs no instruction. Likewise, a 128-bit subvector
|
|
80 // insert to the first 512-bit vector position is a subregister copy that needs
|
|
81 // no instruction.
|
|
82 defm : subvector_subreg_lowering<VR128, v4i32, VR512, v16i32, sub_xmm>;
|
|
83 defm : subvector_subreg_lowering<VR128, v4f32, VR512, v16f32, sub_xmm>;
|
|
84 defm : subvector_subreg_lowering<VR128, v2i64, VR512, v8i64, sub_xmm>;
|
|
85 defm : subvector_subreg_lowering<VR128, v2f64, VR512, v8f64, sub_xmm>;
|
|
86 defm : subvector_subreg_lowering<VR128, v8i16, VR512, v32i16, sub_xmm>;
|
|
87 defm : subvector_subreg_lowering<VR128, v16i8, VR512, v64i8, sub_xmm>;
|
|
88
|
|
89 // A 128-bit subvector extract from the first 512-bit vector position is a
|
|
90 // subregister copy that needs no instruction. Likewise, a 128-bit subvector
|
|
91 // insert to the first 512-bit vector position is a subregister copy that needs
|
|
92 // no instruction.
|
|
93 defm : subvector_subreg_lowering<VR256, v8i32, VR512, v16i32, sub_ymm>;
|
|
94 defm : subvector_subreg_lowering<VR256, v8f32, VR512, v16f32, sub_ymm>;
|
|
95 defm : subvector_subreg_lowering<VR256, v4i64, VR512, v8i64, sub_ymm>;
|
|
96 defm : subvector_subreg_lowering<VR256, v4f64, VR512, v8f64, sub_ymm>;
|
|
97 defm : subvector_subreg_lowering<VR256, v16i16, VR512, v32i16, sub_ymm>;
|
|
98 defm : subvector_subreg_lowering<VR256, v32i8, VR512, v64i8, sub_ymm>;
|
|
99
|
|
100
|
147
|
101 // If we're inserting into an all zeros vector, just use a plain move which
|
|
102 // will zero the upper bits. A post-isel hook will take care of removing
|
|
103 // any moves that we can prove are unnecessary.
|
|
104 multiclass subvec_zero_lowering<string MoveStr,
|
|
105 RegisterClass RC, ValueType DstTy,
|
|
106 ValueType SrcTy, ValueType ZeroTy,
|
|
107 SubRegIndex SubIdx> {
|
|
108 def : Pat<(DstTy (insert_subvector immAllZerosV,
|
|
109 (SrcTy RC:$src), (iPTR 0))),
|
|
110 (SUBREG_TO_REG (i64 0),
|
|
111 (SrcTy (!cast<Instruction>("VMOV"#MoveStr#"rr") RC:$src)), SubIdx)>;
|
121
|
112 }
|
|
113
|
|
114 let Predicates = [HasAVX, NoVLX] in {
|
147
|
115 defm : subvec_zero_lowering<"APD", VR128, v4f64, v2f64, v8i32, sub_xmm>;
|
|
116 defm : subvec_zero_lowering<"APS", VR128, v8f32, v4f32, v8i32, sub_xmm>;
|
|
117 defm : subvec_zero_lowering<"DQA", VR128, v4i64, v2i64, v8i32, sub_xmm>;
|
|
118 defm : subvec_zero_lowering<"DQA", VR128, v8i32, v4i32, v8i32, sub_xmm>;
|
|
119 defm : subvec_zero_lowering<"DQA", VR128, v16i16, v8i16, v8i32, sub_xmm>;
|
|
120 defm : subvec_zero_lowering<"DQA", VR128, v32i8, v16i8, v8i32, sub_xmm>;
|
121
|
121 }
|
|
122
|
|
123 let Predicates = [HasVLX] in {
|
147
|
124 defm : subvec_zero_lowering<"APDZ128", VR128X, v4f64, v2f64, v8i32, sub_xmm>;
|
|
125 defm : subvec_zero_lowering<"APSZ128", VR128X, v8f32, v4f32, v8i32, sub_xmm>;
|
|
126 defm : subvec_zero_lowering<"DQA64Z128", VR128X, v4i64, v2i64, v8i32, sub_xmm>;
|
|
127 defm : subvec_zero_lowering<"DQA64Z128", VR128X, v8i32, v4i32, v8i32, sub_xmm>;
|
|
128 defm : subvec_zero_lowering<"DQA64Z128", VR128X, v16i16, v8i16, v8i32, sub_xmm>;
|
|
129 defm : subvec_zero_lowering<"DQA64Z128", VR128X, v32i8, v16i8, v8i32, sub_xmm>;
|
121
|
130
|
147
|
131 defm : subvec_zero_lowering<"APDZ128", VR128X, v8f64, v2f64, v16i32, sub_xmm>;
|
|
132 defm : subvec_zero_lowering<"APSZ128", VR128X, v16f32, v4f32, v16i32, sub_xmm>;
|
|
133 defm : subvec_zero_lowering<"DQA64Z128", VR128X, v8i64, v2i64, v16i32, sub_xmm>;
|
|
134 defm : subvec_zero_lowering<"DQA64Z128", VR128X, v16i32, v4i32, v16i32, sub_xmm>;
|
|
135 defm : subvec_zero_lowering<"DQA64Z128", VR128X, v32i16, v8i16, v16i32, sub_xmm>;
|
|
136 defm : subvec_zero_lowering<"DQA64Z128", VR128X, v64i8, v16i8, v16i32, sub_xmm>;
|
121
|
137
|
147
|
138 defm : subvec_zero_lowering<"APDZ256", VR256X, v8f64, v4f64, v16i32, sub_ymm>;
|
|
139 defm : subvec_zero_lowering<"APSZ256", VR256X, v16f32, v8f32, v16i32, sub_ymm>;
|
|
140 defm : subvec_zero_lowering<"DQA64Z256", VR256X, v8i64, v4i64, v16i32, sub_ymm>;
|
|
141 defm : subvec_zero_lowering<"DQA64Z256", VR256X, v16i32, v8i32, v16i32, sub_ymm>;
|
|
142 defm : subvec_zero_lowering<"DQA64Z256", VR256X, v32i16, v16i16, v16i32, sub_ymm>;
|
|
143 defm : subvec_zero_lowering<"DQA64Z256", VR256X, v64i8, v32i8, v16i32, sub_ymm>;
|
121
|
144 }
|
|
145
|
|
146 let Predicates = [HasAVX512, NoVLX] in {
|
147
|
147 defm : subvec_zero_lowering<"APD", VR128, v8f64, v2f64, v16i32, sub_xmm>;
|
|
148 defm : subvec_zero_lowering<"APS", VR128, v16f32, v4f32, v16i32, sub_xmm>;
|
|
149 defm : subvec_zero_lowering<"DQA", VR128, v8i64, v2i64, v16i32, sub_xmm>;
|
|
150 defm : subvec_zero_lowering<"DQA", VR128, v16i32, v4i32, v16i32, sub_xmm>;
|
|
151 defm : subvec_zero_lowering<"DQA", VR128, v32i16, v8i16, v16i32, sub_xmm>;
|
|
152 defm : subvec_zero_lowering<"DQA", VR128, v64i8, v16i8, v16i32, sub_xmm>;
|
121
|
153
|
147
|
154 defm : subvec_zero_lowering<"APDY", VR256, v8f64, v4f64, v16i32, sub_ymm>;
|
|
155 defm : subvec_zero_lowering<"APSY", VR256, v16f32, v8f32, v16i32, sub_ymm>;
|
|
156 defm : subvec_zero_lowering<"DQAY", VR256, v8i64, v4i64, v16i32, sub_ymm>;
|
|
157 defm : subvec_zero_lowering<"DQAY", VR256, v16i32, v8i32, v16i32, sub_ymm>;
|
|
158 defm : subvec_zero_lowering<"DQAY", VR256, v32i16, v16i16, v16i32, sub_ymm>;
|
|
159 defm : subvec_zero_lowering<"DQAY", VR256, v64i8, v32i8, v16i32, sub_ymm>;
|
121
|
160 }
|
|
161
|
|
162 class maskzeroupper<ValueType vt, RegisterClass RC> :
|
|
163 PatLeaf<(vt RC:$src), [{
|
|
164 return isMaskZeroExtended(N);
|
|
165 }]>;
|
|
166
|
147
|
167 def maskzeroupperv1i1 : maskzeroupper<v1i1, VK1>;
|
121
|
168 def maskzeroupperv2i1 : maskzeroupper<v2i1, VK2>;
|
|
169 def maskzeroupperv4i1 : maskzeroupper<v4i1, VK4>;
|
|
170 def maskzeroupperv8i1 : maskzeroupper<v8i1, VK8>;
|
|
171 def maskzeroupperv16i1 : maskzeroupper<v16i1, VK16>;
|
|
172 def maskzeroupperv32i1 : maskzeroupper<v32i1, VK32>;
|
|
173
|
|
174 // The patterns determine if we can depend on the upper bits of a mask register
|
|
175 // being zeroed by the previous operation so that we can skip explicit
|
|
176 // zeroing.
|
|
177 let Predicates = [HasBWI] in {
|
|
178 def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
|
147
|
179 maskzeroupperv1i1:$src, (iPTR 0))),
|
|
180 (COPY_TO_REGCLASS VK1:$src, VK32)>;
|
|
181 def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
|
121
|
182 maskzeroupperv8i1:$src, (iPTR 0))),
|
|
183 (COPY_TO_REGCLASS VK8:$src, VK32)>;
|
|
184 def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
|
|
185 maskzeroupperv16i1:$src, (iPTR 0))),
|
|
186 (COPY_TO_REGCLASS VK16:$src, VK32)>;
|
147
|
187
|
|
188 def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
|
|
189 maskzeroupperv1i1:$src, (iPTR 0))),
|
|
190 (COPY_TO_REGCLASS VK1:$src, VK64)>;
|
121
|
191 def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
|
|
192 maskzeroupperv8i1:$src, (iPTR 0))),
|
|
193 (COPY_TO_REGCLASS VK8:$src, VK64)>;
|
|
194 def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
|
|
195 maskzeroupperv16i1:$src, (iPTR 0))),
|
|
196 (COPY_TO_REGCLASS VK16:$src, VK64)>;
|
|
197 def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
|
|
198 maskzeroupperv32i1:$src, (iPTR 0))),
|
|
199 (COPY_TO_REGCLASS VK32:$src, VK64)>;
|
|
200 }
|
|
201
|
|
202 let Predicates = [HasAVX512] in {
|
|
203 def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
|
147
|
204 maskzeroupperv1i1:$src, (iPTR 0))),
|
|
205 (COPY_TO_REGCLASS VK1:$src, VK16)>;
|
|
206 def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
|
121
|
207 maskzeroupperv8i1:$src, (iPTR 0))),
|
|
208 (COPY_TO_REGCLASS VK8:$src, VK16)>;
|
|
209 }
|
|
210
|
147
|
211 let Predicates = [HasDQI] in {
|
|
212 def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
|
|
213 maskzeroupperv1i1:$src, (iPTR 0))),
|
|
214 (COPY_TO_REGCLASS VK1:$src, VK8)>;
|
|
215 }
|
|
216
|
134
|
217 let Predicates = [HasVLX, HasDQI] in {
|
121
|
218 def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
|
|
219 maskzeroupperv2i1:$src, (iPTR 0))),
|
|
220 (COPY_TO_REGCLASS VK2:$src, VK8)>;
|
|
221 def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
|
|
222 maskzeroupperv4i1:$src, (iPTR 0))),
|
|
223 (COPY_TO_REGCLASS VK4:$src, VK8)>;
|
134
|
224 }
|
|
225
|
|
226 let Predicates = [HasVLX] in {
|
121
|
227 def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
|
|
228 maskzeroupperv2i1:$src, (iPTR 0))),
|
|
229 (COPY_TO_REGCLASS VK2:$src, VK16)>;
|
|
230 def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
|
|
231 maskzeroupperv4i1:$src, (iPTR 0))),
|
|
232 (COPY_TO_REGCLASS VK4:$src, VK16)>;
|
|
233 }
|
|
234
|
|
235 let Predicates = [HasBWI, HasVLX] in {
|
|
236 def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
|
|
237 maskzeroupperv2i1:$src, (iPTR 0))),
|
|
238 (COPY_TO_REGCLASS VK2:$src, VK32)>;
|
|
239 def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
|
|
240 maskzeroupperv4i1:$src, (iPTR 0))),
|
|
241 (COPY_TO_REGCLASS VK4:$src, VK32)>;
|
|
242 def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
|
|
243 maskzeroupperv2i1:$src, (iPTR 0))),
|
|
244 (COPY_TO_REGCLASS VK2:$src, VK64)>;
|
|
245 def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
|
|
246 maskzeroupperv4i1:$src, (iPTR 0))),
|
|
247 (COPY_TO_REGCLASS VK4:$src, VK64)>;
|
|
248 }
|
|
249
|
|
250 // If the bits are not zero we have to fall back to explicitly zeroing by
|
|
251 // using shifts.
|
134
|
252 let Predicates = [HasAVX512] in {
|
|
253 def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
|
|
254 (v1i1 VK1:$mask), (iPTR 0))),
|
|
255 (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK1:$mask, VK16),
|
|
256 (i8 15)), (i8 15))>;
|
|
257
|
|
258 def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
|
|
259 (v2i1 VK2:$mask), (iPTR 0))),
|
|
260 (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK2:$mask, VK16),
|
|
261 (i8 14)), (i8 14))>;
|
|
262
|
|
263 def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
|
|
264 (v4i1 VK4:$mask), (iPTR 0))),
|
|
265 (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK4:$mask, VK16),
|
|
266 (i8 12)), (i8 12))>;
|
|
267 }
|
|
268
|
|
269 let Predicates = [HasAVX512, NoDQI] in {
|
121
|
270 def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
|
|
271 (v8i1 VK8:$mask), (iPTR 0))),
|
|
272 (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK8:$mask, VK16),
|
|
273 (i8 8)), (i8 8))>;
|
|
274 }
|
134
|
275
|
|
276 let Predicates = [HasDQI] in {
|
|
277 def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
|
|
278 (v8i1 VK8:$mask), (iPTR 0))),
|
|
279 (COPY_TO_REGCLASS (KMOVBkk VK8:$mask), VK16)>;
|
|
280
|
|
281 def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
|
|
282 (v1i1 VK1:$mask), (iPTR 0))),
|
|
283 (KSHIFTRBri (KSHIFTLBri (COPY_TO_REGCLASS VK1:$mask, VK8),
|
|
284 (i8 7)), (i8 7))>;
|
|
285 def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
|
|
286 (v2i1 VK2:$mask), (iPTR 0))),
|
|
287 (KSHIFTRBri (KSHIFTLBri (COPY_TO_REGCLASS VK2:$mask, VK8),
|
|
288 (i8 6)), (i8 6))>;
|
|
289 def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
|
|
290 (v4i1 VK4:$mask), (iPTR 0))),
|
|
291 (KSHIFTRBri (KSHIFTLBri (COPY_TO_REGCLASS VK4:$mask, VK8),
|
|
292 (i8 4)), (i8 4))>;
|
|
293 }
|
|
294
|
|
295 let Predicates = [HasBWI] in {
|
|
296 def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
|
|
297 (v16i1 VK16:$mask), (iPTR 0))),
|
|
298 (COPY_TO_REGCLASS (KMOVWkk VK16:$mask), VK32)>;
|
|
299
|
|
300 def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
|
|
301 (v16i1 VK16:$mask), (iPTR 0))),
|
|
302 (COPY_TO_REGCLASS (KMOVWkk VK16:$mask), VK64)>;
|
|
303 def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
|
|
304 (v32i1 VK32:$mask), (iPTR 0))),
|
|
305 (COPY_TO_REGCLASS (KMOVDkk VK32:$mask), VK64)>;
|
|
306 }
|
|
307
|
|
308 let Predicates = [HasBWI, NoDQI] in {
|
|
309 def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
|
|
310 (v8i1 VK8:$mask), (iPTR 0))),
|
|
311 (KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK8:$mask, VK32),
|
|
312 (i8 24)), (i8 24))>;
|
|
313
|
|
314 def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
|
|
315 (v8i1 VK8:$mask), (iPTR 0))),
|
|
316 (KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK8:$mask, VK64),
|
|
317 (i8 56)), (i8 56))>;
|
|
318 }
|
|
319
|
|
320 let Predicates = [HasBWI, HasDQI] in {
|
|
321 def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
|
|
322 (v8i1 VK8:$mask), (iPTR 0))),
|
|
323 (COPY_TO_REGCLASS (KMOVBkk VK8:$mask), VK32)>;
|
|
324
|
|
325 def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
|
|
326 (v8i1 VK8:$mask), (iPTR 0))),
|
|
327 (COPY_TO_REGCLASS (KMOVBkk VK8:$mask), VK64)>;
|
|
328 }
|
|
329
|
147
|
330 let Predicates = [HasBWI] in {
|
134
|
331 def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
|
|
332 (v1i1 VK1:$mask), (iPTR 0))),
|
|
333 (KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK1:$mask, VK32),
|
|
334 (i8 31)), (i8 31))>;
|
|
335 def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
|
|
336 (v2i1 VK2:$mask), (iPTR 0))),
|
|
337 (KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK2:$mask, VK32),
|
|
338 (i8 30)), (i8 30))>;
|
|
339 def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
|
|
340 (v4i1 VK4:$mask), (iPTR 0))),
|
|
341 (KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK4:$mask, VK32),
|
|
342 (i8 28)), (i8 28))>;
|
|
343
|
|
344 def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
|
|
345 (v1i1 VK1:$mask), (iPTR 0))),
|
|
346 (KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK1:$mask, VK64),
|
|
347 (i8 63)), (i8 63))>;
|
|
348 def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
|
|
349 (v2i1 VK2:$mask), (iPTR 0))),
|
|
350 (KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK2:$mask, VK64),
|
|
351 (i8 62)), (i8 62))>;
|
|
352 def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
|
|
353 (v4i1 VK4:$mask), (iPTR 0))),
|
|
354 (KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK4:$mask, VK64),
|
|
355 (i8 60)), (i8 60))>;
|
|
356 }
|
147
|
357
|
|
358 //===----------------------------------------------------------------------===//
|
|
359 // Extra selection patterns for f128, f128mem
|
|
360
|
|
361 // movaps is shorter than movdqa. movaps is in SSE and movdqa is in SSE2.
|
|
362 let Predicates = [NoAVX] in {
|
|
363 def : Pat<(alignedstore (f128 VR128:$src), addr:$dst),
|
|
364 (MOVAPSmr addr:$dst, VR128:$src)>;
|
|
365 def : Pat<(store (f128 VR128:$src), addr:$dst),
|
|
366 (MOVUPSmr addr:$dst, VR128:$src)>;
|
|
367
|
|
368 def : Pat<(alignedloadf128 addr:$src),
|
|
369 (MOVAPSrm addr:$src)>;
|
|
370 def : Pat<(loadf128 addr:$src),
|
|
371 (MOVUPSrm addr:$src)>;
|
|
372 }
|
|
373
|
|
374 let Predicates = [HasAVX, NoVLX] in {
|
|
375 def : Pat<(alignedstore (f128 VR128:$src), addr:$dst),
|
|
376 (VMOVAPSmr addr:$dst, VR128:$src)>;
|
|
377 def : Pat<(store (f128 VR128:$src), addr:$dst),
|
|
378 (VMOVUPSmr addr:$dst, VR128:$src)>;
|
|
379
|
|
380 def : Pat<(alignedloadf128 addr:$src),
|
|
381 (VMOVAPSrm addr:$src)>;
|
|
382 def : Pat<(loadf128 addr:$src),
|
|
383 (VMOVUPSrm addr:$src)>;
|
|
384 }
|
|
385
|
|
386 let Predicates = [HasVLX] in {
|
|
387 def : Pat<(alignedstore (f128 VR128X:$src), addr:$dst),
|
|
388 (VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
|
|
389 def : Pat<(store (f128 VR128X:$src), addr:$dst),
|
|
390 (VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
|
|
391
|
|
392 def : Pat<(alignedloadf128 addr:$src),
|
|
393 (VMOVAPSZ128rm addr:$src)>;
|
|
394 def : Pat<(loadf128 addr:$src),
|
|
395 (VMOVUPSZ128rm addr:$src)>;
|
|
396 }
|
|
397
|
|
398 let Predicates = [UseSSE1] in {
|
|
399 // andps is shorter than andpd or pand. andps is SSE and andpd/pand are in SSE2
|
|
400 def : Pat<(f128 (X86fand VR128:$src1, (memopf128 addr:$src2))),
|
|
401 (ANDPSrm VR128:$src1, f128mem:$src2)>;
|
|
402
|
|
403 def : Pat<(f128 (X86fand VR128:$src1, VR128:$src2)),
|
|
404 (ANDPSrr VR128:$src1, VR128:$src2)>;
|
|
405
|
|
406 def : Pat<(f128 (X86for VR128:$src1, (memopf128 addr:$src2))),
|
|
407 (ORPSrm VR128:$src1, f128mem:$src2)>;
|
|
408
|
|
409 def : Pat<(f128 (X86for VR128:$src1, VR128:$src2)),
|
|
410 (ORPSrr VR128:$src1, VR128:$src2)>;
|
|
411
|
|
412 def : Pat<(f128 (X86fxor VR128:$src1, (memopf128 addr:$src2))),
|
|
413 (XORPSrm VR128:$src1, f128mem:$src2)>;
|
|
414
|
|
415 def : Pat<(f128 (X86fxor VR128:$src1, VR128:$src2)),
|
|
416 (XORPSrr VR128:$src1, VR128:$src2)>;
|
|
417 }
|
|
418
|
|
419 let Predicates = [HasAVX, NoVLX] in {
|
|
420 // andps is shorter than andpd or pand. andps is SSE and andpd/pand are in SSE2
|
|
421 def : Pat<(f128 (X86fand VR128:$src1, (loadf128 addr:$src2))),
|
|
422 (VANDPSrm VR128:$src1, f128mem:$src2)>;
|
|
423
|
|
424 def : Pat<(f128 (X86fand VR128:$src1, VR128:$src2)),
|
|
425 (VANDPSrr VR128:$src1, VR128:$src2)>;
|
|
426
|
|
427 def : Pat<(f128 (X86for VR128:$src1, (loadf128 addr:$src2))),
|
|
428 (VORPSrm VR128:$src1, f128mem:$src2)>;
|
|
429
|
|
430 def : Pat<(f128 (X86for VR128:$src1, VR128:$src2)),
|
|
431 (VORPSrr VR128:$src1, VR128:$src2)>;
|
|
432
|
|
433 def : Pat<(f128 (X86fxor VR128:$src1, (loadf128 addr:$src2))),
|
|
434 (VXORPSrm VR128:$src1, f128mem:$src2)>;
|
|
435
|
|
436 def : Pat<(f128 (X86fxor VR128:$src1, VR128:$src2)),
|
|
437 (VXORPSrr VR128:$src1, VR128:$src2)>;
|
|
438 }
|
|
439
|
|
440 let Predicates = [HasVLX] in {
|
|
441 // andps is shorter than andpd or pand. andps is SSE and andpd/pand are in SSE2
|
|
442 def : Pat<(f128 (X86fand VR128X:$src1, (loadf128 addr:$src2))),
|
|
443 (VANDPSZ128rm VR128X:$src1, f128mem:$src2)>;
|
|
444
|
|
445 def : Pat<(f128 (X86fand VR128X:$src1, VR128X:$src2)),
|
|
446 (VANDPSZ128rr VR128X:$src1, VR128X:$src2)>;
|
|
447
|
|
448 def : Pat<(f128 (X86for VR128X:$src1, (loadf128 addr:$src2))),
|
|
449 (VORPSZ128rm VR128X:$src1, f128mem:$src2)>;
|
|
450
|
|
451 def : Pat<(f128 (X86for VR128X:$src1, VR128X:$src2)),
|
|
452 (VORPSZ128rr VR128X:$src1, VR128X:$src2)>;
|
|
453
|
|
454 def : Pat<(f128 (X86fxor VR128X:$src1, (loadf128 addr:$src2))),
|
|
455 (VXORPSZ128rm VR128X:$src1, f128mem:$src2)>;
|
|
456
|
|
457 def : Pat<(f128 (X86fxor VR128X:$src1, VR128X:$src2)),
|
|
458 (VXORPSZ128rr VR128X:$src1, VR128X:$src2)>;
|
|
459 }
|