Mercurial > hg > CbC > CbC_llvm
comparison test/CodeGen/X86/vec_minmax_sint.ll @ 95:afa8332a0e37 LLVM3.8
LLVM 3.8
author | Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 13 Oct 2015 17:48:58 +0900 |
parents | |
children | 7d135dc70f03 |
comparison
equal
deleted
inserted
replaced
84:f3e34b893a5f | 95:afa8332a0e37 |
---|---|
1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 | |
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41 | |
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE42 | |
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 | |
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 | |
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F | |
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW | |
8 | |
9 ; | |
10 ; Signed Maximum (GT) | |
11 ; | |
12 | |
13 define <2 x i64> @max_gt_v2i64(<2 x i64> %a, <2 x i64> %b) { | |
14 ; SSE2-LABEL: max_gt_v2i64: | |
15 ; SSE2: # BB#0: | |
16 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0] | |
17 ; SSE2-NEXT: movdqa %xmm1, %xmm3 | |
18 ; SSE2-NEXT: pxor %xmm2, %xmm3 | |
19 ; SSE2-NEXT: pxor %xmm0, %xmm2 | |
20 ; SSE2-NEXT: movdqa %xmm2, %xmm4 | |
21 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm4 | |
22 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] | |
23 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm2 | |
24 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] | |
25 ; SSE2-NEXT: pand %xmm5, %xmm2 | |
26 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] | |
27 ; SSE2-NEXT: por %xmm2, %xmm3 | |
28 ; SSE2-NEXT: pand %xmm3, %xmm0 | |
29 ; SSE2-NEXT: pandn %xmm1, %xmm3 | |
30 ; SSE2-NEXT: por %xmm3, %xmm0 | |
31 ; SSE2-NEXT: retq | |
32 ; | |
33 ; SSE41-LABEL: max_gt_v2i64: | |
34 ; SSE41: # BB#0: | |
35 ; SSE41-NEXT: movdqa %xmm0, %xmm2 | |
36 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0] | |
37 ; SSE41-NEXT: movdqa %xmm1, %xmm3 | |
38 ; SSE41-NEXT: pxor %xmm0, %xmm3 | |
39 ; SSE41-NEXT: pxor %xmm2, %xmm0 | |
40 ; SSE41-NEXT: movdqa %xmm0, %xmm4 | |
41 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm4 | |
42 ; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] | |
43 ; SSE41-NEXT: pcmpeqd %xmm3, %xmm0 | |
44 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] | |
45 ; SSE41-NEXT: pand %xmm5, %xmm3 | |
46 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3] | |
47 ; SSE41-NEXT: por %xmm3, %xmm0 | |
48 ; SSE41-NEXT: blendvpd %xmm2, %xmm1 | |
49 ; SSE41-NEXT: movapd %xmm1, %xmm0 | |
50 ; SSE41-NEXT: retq | |
51 ; | |
52 ; SSE42-LABEL: max_gt_v2i64: | |
53 ; SSE42: # BB#0: | |
54 ; SSE42-NEXT: movdqa %xmm0, %xmm2 | |
55 ; SSE42-NEXT: pcmpgtq %xmm1, %xmm0 | |
56 ; SSE42-NEXT: blendvpd %xmm2, %xmm1 | |
57 ; SSE42-NEXT: movapd %xmm1, %xmm0 | |
58 ; SSE42-NEXT: retq | |
59 ; | |
60 ; AVX-LABEL: max_gt_v2i64: | |
61 ; AVX: # BB#0: | |
62 ; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 | |
63 ; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 | |
64 ; AVX-NEXT: retq | |
65 %1 = icmp sgt <2 x i64> %a, %b | |
66 %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b | |
67 ret <2 x i64> %2 | |
68 } | |
69 | |
70 define <4 x i64> @max_gt_v4i64(<4 x i64> %a, <4 x i64> %b) { | |
71 ; SSE2-LABEL: max_gt_v4i64: | |
72 ; SSE2: # BB#0: | |
73 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,0,2147483648,0] | |
74 ; SSE2-NEXT: movdqa %xmm3, %xmm5 | |
75 ; SSE2-NEXT: pxor %xmm4, %xmm5 | |
76 ; SSE2-NEXT: movdqa %xmm1, %xmm6 | |
77 ; SSE2-NEXT: pxor %xmm4, %xmm6 | |
78 ; SSE2-NEXT: movdqa %xmm6, %xmm7 | |
79 ; SSE2-NEXT: pcmpgtd %xmm5, %xmm7 | |
80 ; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2] | |
81 ; SSE2-NEXT: pcmpeqd %xmm5, %xmm6 | |
82 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3] | |
83 ; SSE2-NEXT: pand %xmm8, %xmm5 | |
84 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3] | |
85 ; SSE2-NEXT: por %xmm5, %xmm6 | |
86 ; SSE2-NEXT: movdqa %xmm2, %xmm5 | |
87 ; SSE2-NEXT: pxor %xmm4, %xmm5 | |
88 ; SSE2-NEXT: pxor %xmm0, %xmm4 | |
89 ; SSE2-NEXT: movdqa %xmm4, %xmm7 | |
90 ; SSE2-NEXT: pcmpgtd %xmm5, %xmm7 | |
91 ; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2] | |
92 ; SSE2-NEXT: pcmpeqd %xmm5, %xmm4 | |
93 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] | |
94 ; SSE2-NEXT: pand %xmm8, %xmm4 | |
95 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] | |
96 ; SSE2-NEXT: por %xmm4, %xmm5 | |
97 ; SSE2-NEXT: pand %xmm5, %xmm0 | |
98 ; SSE2-NEXT: pandn %xmm2, %xmm5 | |
99 ; SSE2-NEXT: por %xmm5, %xmm0 | |
100 ; SSE2-NEXT: pand %xmm6, %xmm1 | |
101 ; SSE2-NEXT: pandn %xmm3, %xmm6 | |
102 ; SSE2-NEXT: por %xmm6, %xmm1 | |
103 ; SSE2-NEXT: retq | |
104 ; | |
105 ; SSE41-LABEL: max_gt_v4i64: | |
106 ; SSE41: # BB#0: | |
107 ; SSE41-NEXT: movdqa %xmm0, %xmm8 | |
108 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0] | |
109 ; SSE41-NEXT: movdqa %xmm3, %xmm5 | |
110 ; SSE41-NEXT: pxor %xmm0, %xmm5 | |
111 ; SSE41-NEXT: movdqa %xmm1, %xmm6 | |
112 ; SSE41-NEXT: pxor %xmm0, %xmm6 | |
113 ; SSE41-NEXT: movdqa %xmm6, %xmm7 | |
114 ; SSE41-NEXT: pcmpgtd %xmm5, %xmm7 | |
115 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2] | |
116 ; SSE41-NEXT: pcmpeqd %xmm5, %xmm6 | |
117 ; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] | |
118 ; SSE41-NEXT: pand %xmm4, %xmm6 | |
119 ; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] | |
120 ; SSE41-NEXT: por %xmm6, %xmm5 | |
121 ; SSE41-NEXT: movdqa %xmm2, %xmm4 | |
122 ; SSE41-NEXT: pxor %xmm0, %xmm4 | |
123 ; SSE41-NEXT: pxor %xmm8, %xmm0 | |
124 ; SSE41-NEXT: movdqa %xmm0, %xmm6 | |
125 ; SSE41-NEXT: pcmpgtd %xmm4, %xmm6 | |
126 ; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] | |
127 ; SSE41-NEXT: pcmpeqd %xmm4, %xmm0 | |
128 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3] | |
129 ; SSE41-NEXT: pand %xmm7, %xmm4 | |
130 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3] | |
131 ; SSE41-NEXT: por %xmm4, %xmm0 | |
132 ; SSE41-NEXT: blendvpd %xmm8, %xmm2 | |
133 ; SSE41-NEXT: movdqa %xmm5, %xmm0 | |
134 ; SSE41-NEXT: blendvpd %xmm1, %xmm3 | |
135 ; SSE41-NEXT: movapd %xmm2, %xmm0 | |
136 ; SSE41-NEXT: movapd %xmm3, %xmm1 | |
137 ; SSE41-NEXT: retq | |
138 ; | |
139 ; SSE42-LABEL: max_gt_v4i64: | |
140 ; SSE42: # BB#0: | |
141 ; SSE42-NEXT: movdqa %xmm0, %xmm4 | |
142 ; SSE42-NEXT: movdqa %xmm1, %xmm5 | |
143 ; SSE42-NEXT: pcmpgtq %xmm3, %xmm5 | |
144 ; SSE42-NEXT: pcmpgtq %xmm2, %xmm0 | |
145 ; SSE42-NEXT: blendvpd %xmm4, %xmm2 | |
146 ; SSE42-NEXT: movdqa %xmm5, %xmm0 | |
147 ; SSE42-NEXT: blendvpd %xmm1, %xmm3 | |
148 ; SSE42-NEXT: movapd %xmm2, %xmm0 | |
149 ; SSE42-NEXT: movapd %xmm3, %xmm1 | |
150 ; SSE42-NEXT: retq | |
151 ; | |
152 ; AVX1-LABEL: max_gt_v4i64: | |
153 ; AVX1: # BB#0: | |
154 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 | |
155 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 | |
156 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 | |
157 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm3 | |
158 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 | |
159 ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 | |
160 ; AVX1-NEXT: retq | |
161 ; | |
162 ; AVX2-LABEL: max_gt_v4i64: | |
163 ; AVX2: # BB#0: | |
164 ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 | |
165 ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 | |
166 ; AVX2-NEXT: retq | |
167 ; | |
168 ; AVX512-LABEL: max_gt_v4i64: | |
169 ; AVX512: # BB#0: | |
170 ; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 | |
171 ; AVX512-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 | |
172 ; AVX512-NEXT: retq | |
173 %1 = icmp sgt <4 x i64> %a, %b | |
174 %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b | |
175 ret <4 x i64> %2 | |
176 } | |
177 | |
178 define <4 x i32> @max_gt_v4i32(<4 x i32> %a, <4 x i32> %b) { | |
179 ; SSE2-LABEL: max_gt_v4i32: | |
180 ; SSE2: # BB#0: | |
181 ; SSE2-NEXT: movdqa %xmm0, %xmm2 | |
182 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 | |
183 ; SSE2-NEXT: pand %xmm2, %xmm0 | |
184 ; SSE2-NEXT: pandn %xmm1, %xmm2 | |
185 ; SSE2-NEXT: por %xmm0, %xmm2 | |
186 ; SSE2-NEXT: movdqa %xmm2, %xmm0 | |
187 ; SSE2-NEXT: retq | |
188 ; | |
189 ; SSE41-LABEL: max_gt_v4i32: | |
190 ; SSE41: # BB#0: | |
191 ; SSE41-NEXT: pmaxsd %xmm1, %xmm0 | |
192 ; SSE41-NEXT: retq | |
193 ; | |
194 ; SSE42-LABEL: max_gt_v4i32: | |
195 ; SSE42: # BB#0: | |
196 ; SSE42-NEXT: pmaxsd %xmm1, %xmm0 | |
197 ; SSE42-NEXT: retq | |
198 ; | |
199 ; AVX-LABEL: max_gt_v4i32: | |
200 ; AVX: # BB#0: | |
201 ; AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 | |
202 ; AVX-NEXT: retq | |
203 %1 = icmp sgt <4 x i32> %a, %b | |
204 %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b | |
205 ret <4 x i32> %2 | |
206 } | |
207 | |
208 define <8 x i32> @max_gt_v8i32(<8 x i32> %a, <8 x i32> %b) { | |
209 ; SSE2-LABEL: max_gt_v8i32: | |
210 ; SSE2: # BB#0: | |
211 ; SSE2-NEXT: movdqa %xmm1, %xmm4 | |
212 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm4 | |
213 ; SSE2-NEXT: movdqa %xmm0, %xmm5 | |
214 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm5 | |
215 ; SSE2-NEXT: pand %xmm5, %xmm0 | |
216 ; SSE2-NEXT: pandn %xmm2, %xmm5 | |
217 ; SSE2-NEXT: por %xmm0, %xmm5 | |
218 ; SSE2-NEXT: pand %xmm4, %xmm1 | |
219 ; SSE2-NEXT: pandn %xmm3, %xmm4 | |
220 ; SSE2-NEXT: por %xmm1, %xmm4 | |
221 ; SSE2-NEXT: movdqa %xmm5, %xmm0 | |
222 ; SSE2-NEXT: movdqa %xmm4, %xmm1 | |
223 ; SSE2-NEXT: retq | |
224 ; | |
225 ; SSE41-LABEL: max_gt_v8i32: | |
226 ; SSE41: # BB#0: | |
227 ; SSE41-NEXT: pmaxsd %xmm2, %xmm0 | |
228 ; SSE41-NEXT: pmaxsd %xmm3, %xmm1 | |
229 ; SSE41-NEXT: retq | |
230 ; | |
231 ; SSE42-LABEL: max_gt_v8i32: | |
232 ; SSE42: # BB#0: | |
233 ; SSE42-NEXT: pmaxsd %xmm2, %xmm0 | |
234 ; SSE42-NEXT: pmaxsd %xmm3, %xmm1 | |
235 ; SSE42-NEXT: retq | |
236 ; | |
237 ; AVX1-LABEL: max_gt_v8i32: | |
238 ; AVX1: # BB#0: | |
239 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 | |
240 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 | |
241 ; AVX1-NEXT: vpmaxsd %xmm2, %xmm3, %xmm2 | |
242 ; AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 | |
243 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 | |
244 ; AVX1-NEXT: retq | |
245 ; | |
246 ; AVX2-LABEL: max_gt_v8i32: | |
247 ; AVX2: # BB#0: | |
248 ; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 | |
249 ; AVX2-NEXT: retq | |
250 ; | |
251 ; AVX512-LABEL: max_gt_v8i32: | |
252 ; AVX512: # BB#0: | |
253 ; AVX512-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 | |
254 ; AVX512-NEXT: retq | |
255 %1 = icmp sgt <8 x i32> %a, %b | |
256 %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b | |
257 ret <8 x i32> %2 | |
258 } | |
259 | |
260 define <8 x i16> @max_gt_v8i16(<8 x i16> %a, <8 x i16> %b) { | |
261 ; SSE-LABEL: max_gt_v8i16: | |
262 ; SSE: # BB#0: | |
263 ; SSE-NEXT: pmaxsw %xmm1, %xmm0 | |
264 ; SSE-NEXT: retq | |
265 ; | |
266 ; AVX-LABEL: max_gt_v8i16: | |
267 ; AVX: # BB#0: | |
268 ; AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 | |
269 ; AVX-NEXT: retq | |
270 %1 = icmp sgt <8 x i16> %a, %b | |
271 %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b | |
272 ret <8 x i16> %2 | |
273 } | |
274 | |
275 define <16 x i16> @max_gt_v16i16(<16 x i16> %a, <16 x i16> %b) { | |
276 ; SSE-LABEL: max_gt_v16i16: | |
277 ; SSE: # BB#0: | |
278 ; SSE-NEXT: pmaxsw %xmm2, %xmm0 | |
279 ; SSE-NEXT: pmaxsw %xmm3, %xmm1 | |
280 ; SSE-NEXT: retq | |
281 ; | |
282 ; AVX1-LABEL: max_gt_v16i16: | |
283 ; AVX1: # BB#0: | |
284 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 | |
285 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 | |
286 ; AVX1-NEXT: vpmaxsw %xmm2, %xmm3, %xmm2 | |
287 ; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 | |
288 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 | |
289 ; AVX1-NEXT: retq | |
290 ; | |
291 ; AVX2-LABEL: max_gt_v16i16: | |
292 ; AVX2: # BB#0: | |
293 ; AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 | |
294 ; AVX2-NEXT: retq | |
295 ; | |
296 ; AVX512-LABEL: max_gt_v16i16: | |
297 ; AVX512: # BB#0: | |
298 ; AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 | |
299 ; AVX512-NEXT: retq | |
300 %1 = icmp sgt <16 x i16> %a, %b | |
301 %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b | |
302 ret <16 x i16> %2 | |
303 } | |
304 | |
305 define <16 x i8> @max_gt_v16i8(<16 x i8> %a, <16 x i8> %b) { | |
306 ; SSE2-LABEL: max_gt_v16i8: | |
307 ; SSE2: # BB#0: | |
308 ; SSE2-NEXT: movdqa %xmm0, %xmm2 | |
309 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm2 | |
310 ; SSE2-NEXT: pand %xmm2, %xmm0 | |
311 ; SSE2-NEXT: pandn %xmm1, %xmm2 | |
312 ; SSE2-NEXT: por %xmm0, %xmm2 | |
313 ; SSE2-NEXT: movdqa %xmm2, %xmm0 | |
314 ; SSE2-NEXT: retq | |
315 ; | |
316 ; SSE41-LABEL: max_gt_v16i8: | |
317 ; SSE41: # BB#0: | |
318 ; SSE41-NEXT: pmaxsb %xmm1, %xmm0 | |
319 ; SSE41-NEXT: retq | |
320 ; | |
321 ; SSE42-LABEL: max_gt_v16i8: | |
322 ; SSE42: # BB#0: | |
323 ; SSE42-NEXT: pmaxsb %xmm1, %xmm0 | |
324 ; SSE42-NEXT: retq | |
325 ; | |
326 ; AVX-LABEL: max_gt_v16i8: | |
327 ; AVX: # BB#0: | |
328 ; AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 | |
329 ; AVX-NEXT: retq | |
330 %1 = icmp sgt <16 x i8> %a, %b | |
331 %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b | |
332 ret <16 x i8> %2 | |
333 } | |
334 | |
335 define <32 x i8> @max_gt_v32i8(<32 x i8> %a, <32 x i8> %b) { | |
336 ; SSE2-LABEL: max_gt_v32i8: | |
337 ; SSE2: # BB#0: | |
338 ; SSE2-NEXT: movdqa %xmm1, %xmm4 | |
339 ; SSE2-NEXT: pcmpgtb %xmm3, %xmm4 | |
340 ; SSE2-NEXT: movdqa %xmm0, %xmm5 | |
341 ; SSE2-NEXT: pcmpgtb %xmm2, %xmm5 | |
342 ; SSE2-NEXT: pand %xmm5, %xmm0 | |
343 ; SSE2-NEXT: pandn %xmm2, %xmm5 | |
344 ; SSE2-NEXT: por %xmm0, %xmm5 | |
345 ; SSE2-NEXT: pand %xmm4, %xmm1 | |
346 ; SSE2-NEXT: pandn %xmm3, %xmm4 | |
347 ; SSE2-NEXT: por %xmm1, %xmm4 | |
348 ; SSE2-NEXT: movdqa %xmm5, %xmm0 | |
349 ; SSE2-NEXT: movdqa %xmm4, %xmm1 | |
350 ; SSE2-NEXT: retq | |
351 ; | |
352 ; SSE41-LABEL: max_gt_v32i8: | |
353 ; SSE41: # BB#0: | |
354 ; SSE41-NEXT: pmaxsb %xmm2, %xmm0 | |
355 ; SSE41-NEXT: pmaxsb %xmm3, %xmm1 | |
356 ; SSE41-NEXT: retq | |
357 ; | |
358 ; SSE42-LABEL: max_gt_v32i8: | |
359 ; SSE42: # BB#0: | |
360 ; SSE42-NEXT: pmaxsb %xmm2, %xmm0 | |
361 ; SSE42-NEXT: pmaxsb %xmm3, %xmm1 | |
362 ; SSE42-NEXT: retq | |
363 ; | |
364 ; AVX1-LABEL: max_gt_v32i8: | |
365 ; AVX1: # BB#0: | |
366 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 | |
367 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 | |
368 ; AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2 | |
369 ; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 | |
370 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 | |
371 ; AVX1-NEXT: retq | |
372 ; | |
373 ; AVX2-LABEL: max_gt_v32i8: | |
374 ; AVX2: # BB#0: | |
375 ; AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 | |
376 ; AVX2-NEXT: retq | |
377 ; | |
378 ; AVX512-LABEL: max_gt_v32i8: | |
379 ; AVX512: # BB#0: | |
380 ; AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 | |
381 ; AVX512-NEXT: retq | |
382 %1 = icmp sgt <32 x i8> %a, %b | |
383 %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b | |
384 ret <32 x i8> %2 | |
385 } | |
386 | |
387 ; | |
388 ; Signed Maximum (GE) | |
389 ; | |
390 | |
391 define <2 x i64> @max_ge_v2i64(<2 x i64> %a, <2 x i64> %b) { | |
392 ; SSE2-LABEL: max_ge_v2i64: | |
393 ; SSE2: # BB#0: | |
394 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0] | |
395 ; SSE2-NEXT: movdqa %xmm0, %xmm3 | |
396 ; SSE2-NEXT: pxor %xmm2, %xmm3 | |
397 ; SSE2-NEXT: pxor %xmm1, %xmm2 | |
398 ; SSE2-NEXT: movdqa %xmm2, %xmm4 | |
399 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm4 | |
400 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] | |
401 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm2 | |
402 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] | |
403 ; SSE2-NEXT: pand %xmm5, %xmm2 | |
404 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] | |
405 ; SSE2-NEXT: por %xmm2, %xmm3 | |
406 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 | |
407 ; SSE2-NEXT: pxor %xmm3, %xmm2 | |
408 ; SSE2-NEXT: pandn %xmm0, %xmm3 | |
409 ; SSE2-NEXT: pandn %xmm1, %xmm2 | |
410 ; SSE2-NEXT: por %xmm3, %xmm2 | |
411 ; SSE2-NEXT: movdqa %xmm2, %xmm0 | |
412 ; SSE2-NEXT: retq | |
413 ; | |
414 ; SSE41-LABEL: max_ge_v2i64: | |
415 ; SSE41: # BB#0: | |
416 ; SSE41-NEXT: movdqa %xmm0, %xmm2 | |
417 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0] | |
418 ; SSE41-NEXT: movdqa %xmm2, %xmm3 | |
419 ; SSE41-NEXT: pxor %xmm0, %xmm3 | |
420 ; SSE41-NEXT: pxor %xmm1, %xmm0 | |
421 ; SSE41-NEXT: movdqa %xmm0, %xmm4 | |
422 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm4 | |
423 ; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] | |
424 ; SSE41-NEXT: pcmpeqd %xmm3, %xmm0 | |
425 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] | |
426 ; SSE41-NEXT: pand %xmm5, %xmm0 | |
427 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] | |
428 ; SSE41-NEXT: por %xmm0, %xmm3 | |
429 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm0 | |
430 ; SSE41-NEXT: pxor %xmm3, %xmm0 | |
431 ; SSE41-NEXT: blendvpd %xmm2, %xmm1 | |
432 ; SSE41-NEXT: movapd %xmm1, %xmm0 | |
433 ; SSE41-NEXT: retq | |
434 ; | |
435 ; SSE42-LABEL: max_ge_v2i64: | |
436 ; SSE42: # BB#0: | |
437 ; SSE42-NEXT: movdqa %xmm0, %xmm2 | |
438 ; SSE42-NEXT: movdqa %xmm1, %xmm3 | |
439 ; SSE42-NEXT: pcmpgtq %xmm2, %xmm3 | |
440 ; SSE42-NEXT: pcmpeqd %xmm0, %xmm0 | |
441 ; SSE42-NEXT: pxor %xmm3, %xmm0 | |
442 ; SSE42-NEXT: blendvpd %xmm2, %xmm1 | |
443 ; SSE42-NEXT: movapd %xmm1, %xmm0 | |
444 ; SSE42-NEXT: retq | |
445 ; | |
446 ; AVX-LABEL: max_ge_v2i64: | |
447 ; AVX: # BB#0: | |
448 ; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 | |
449 ; AVX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 | |
450 ; AVX-NEXT: vpxor %xmm3, %xmm2, %xmm2 | |
451 ; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 | |
452 ; AVX-NEXT: retq | |
453 %1 = icmp sge <2 x i64> %a, %b | |
454 %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b | |
455 ret <2 x i64> %2 | |
456 } | |
457 | |
458 define <4 x i64> @max_ge_v4i64(<4 x i64> %a, <4 x i64> %b) { | |
459 ; SSE2-LABEL: max_ge_v4i64: | |
460 ; SSE2: # BB#0: | |
461 ; SSE2-NEXT: movdqa {{.*#+}} xmm7 = [2147483648,0,2147483648,0] | |
462 ; SSE2-NEXT: movdqa %xmm1, %xmm4 | |
463 ; SSE2-NEXT: pxor %xmm7, %xmm4 | |
464 ; SSE2-NEXT: movdqa %xmm3, %xmm5 | |
465 ; SSE2-NEXT: pxor %xmm7, %xmm5 | |
466 ; SSE2-NEXT: movdqa %xmm5, %xmm6 | |
467 ; SSE2-NEXT: pcmpgtd %xmm4, %xmm6 | |
468 ; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2] | |
469 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm5 | |
470 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3] | |
471 ; SSE2-NEXT: pand %xmm8, %xmm4 | |
472 ; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm6[1,1,3,3] | |
473 ; SSE2-NEXT: por %xmm4, %xmm8 | |
474 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm4 | |
475 ; SSE2-NEXT: movdqa %xmm8, %xmm9 | |
476 ; SSE2-NEXT: pxor %xmm4, %xmm9 | |
477 ; SSE2-NEXT: movdqa %xmm0, %xmm6 | |
478 ; SSE2-NEXT: pxor %xmm7, %xmm6 | |
479 ; SSE2-NEXT: pxor %xmm2, %xmm7 | |
480 ; SSE2-NEXT: movdqa %xmm7, %xmm5 | |
481 ; SSE2-NEXT: pcmpgtd %xmm6, %xmm5 | |
482 ; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm5[0,0,2,2] | |
483 ; SSE2-NEXT: pcmpeqd %xmm6, %xmm7 | |
484 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3] | |
485 ; SSE2-NEXT: pand %xmm10, %xmm6 | |
486 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3] | |
487 ; SSE2-NEXT: por %xmm6, %xmm5 | |
488 ; SSE2-NEXT: pxor %xmm5, %xmm4 | |
489 ; SSE2-NEXT: pandn %xmm0, %xmm5 | |
490 ; SSE2-NEXT: pandn %xmm2, %xmm4 | |
491 ; SSE2-NEXT: por %xmm5, %xmm4 | |
492 ; SSE2-NEXT: pandn %xmm1, %xmm8 | |
493 ; SSE2-NEXT: pandn %xmm3, %xmm9 | |
494 ; SSE2-NEXT: por %xmm8, %xmm9 | |
495 ; SSE2-NEXT: movdqa %xmm4, %xmm0 | |
496 ; SSE2-NEXT: movdqa %xmm9, %xmm1 | |
497 ; SSE2-NEXT: retq | |
498 ; | |
499 ; SSE41-LABEL: max_ge_v4i64: | |
500 ; SSE41: # BB#0: | |
501 ; SSE41-NEXT: movdqa %xmm0, %xmm8 | |
502 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0] | |
503 ; SSE41-NEXT: movdqa %xmm1, %xmm5 | |
504 ; SSE41-NEXT: pxor %xmm0, %xmm5 | |
505 ; SSE41-NEXT: movdqa %xmm3, %xmm6 | |
506 ; SSE41-NEXT: pxor %xmm0, %xmm6 | |
507 ; SSE41-NEXT: movdqa %xmm6, %xmm7 | |
508 ; SSE41-NEXT: pcmpgtd %xmm5, %xmm7 | |
509 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2] | |
510 ; SSE41-NEXT: pcmpeqd %xmm5, %xmm6 | |
511 ; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] | |
512 ; SSE41-NEXT: pand %xmm4, %xmm6 | |
513 ; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] | |
514 ; SSE41-NEXT: por %xmm6, %xmm5 | |
515 ; SSE41-NEXT: pcmpeqd %xmm9, %xmm9 | |
516 ; SSE41-NEXT: pxor %xmm9, %xmm5 | |
517 ; SSE41-NEXT: movdqa %xmm8, %xmm6 | |
518 ; SSE41-NEXT: pxor %xmm0, %xmm6 | |
519 ; SSE41-NEXT: pxor %xmm2, %xmm0 | |
520 ; SSE41-NEXT: movdqa %xmm0, %xmm7 | |
521 ; SSE41-NEXT: pcmpgtd %xmm6, %xmm7 | |
522 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2] | |
523 ; SSE41-NEXT: pcmpeqd %xmm6, %xmm0 | |
524 ; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3] | |
525 ; SSE41-NEXT: pand %xmm4, %xmm6 | |
526 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[1,1,3,3] | |
527 ; SSE41-NEXT: por %xmm6, %xmm0 | |
528 ; SSE41-NEXT: pxor %xmm9, %xmm0 | |
529 ; SSE41-NEXT: blendvpd %xmm8, %xmm2 | |
530 ; SSE41-NEXT: movdqa %xmm5, %xmm0 | |
531 ; SSE41-NEXT: blendvpd %xmm1, %xmm3 | |
532 ; SSE41-NEXT: movapd %xmm2, %xmm0 | |
533 ; SSE41-NEXT: movapd %xmm3, %xmm1 | |
534 ; SSE41-NEXT: retq | |
535 ; | |
536 ; SSE42-LABEL: max_ge_v4i64: | |
537 ; SSE42: # BB#0: | |
538 ; SSE42-NEXT: movdqa %xmm0, %xmm4 | |
539 ; SSE42-NEXT: movdqa %xmm3, %xmm5 | |
540 ; SSE42-NEXT: pcmpgtq %xmm1, %xmm5 | |
541 ; SSE42-NEXT: pcmpeqd %xmm0, %xmm0 | |
542 ; SSE42-NEXT: pxor %xmm0, %xmm5 | |
543 ; SSE42-NEXT: movdqa %xmm2, %xmm6 | |
544 ; SSE42-NEXT: pcmpgtq %xmm4, %xmm6 | |
545 ; SSE42-NEXT: pxor %xmm6, %xmm0 | |
546 ; SSE42-NEXT: blendvpd %xmm4, %xmm2 | |
547 ; SSE42-NEXT: movdqa %xmm5, %xmm0 | |
548 ; SSE42-NEXT: blendvpd %xmm1, %xmm3 | |
549 ; SSE42-NEXT: movapd %xmm2, %xmm0 | |
550 ; SSE42-NEXT: movapd %xmm3, %xmm1 | |
551 ; SSE42-NEXT: retq | |
552 ; | |
553 ; AVX1-LABEL: max_ge_v4i64: | |
554 ; AVX1: # BB#0: | |
555 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 | |
556 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 | |
557 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 | |
558 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 | |
559 ; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2 | |
560 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm4 | |
561 ; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm3 | |
562 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 | |
563 ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 | |
564 ; AVX1-NEXT: retq | |
565 ; | |
566 ; AVX2-LABEL: max_ge_v4i64: | |
567 ; AVX2: # BB#0: | |
568 ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 | |
569 ; AVX2-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3 | |
570 ; AVX2-NEXT: vpxor %ymm3, %ymm2, %ymm2 | |
571 ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 | |
572 ; AVX2-NEXT: retq | |
573 ; | |
574 ; AVX512-LABEL: max_ge_v4i64: | |
575 ; AVX512: # BB#0: | |
576 ; AVX512-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 | |
577 ; AVX512-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3 | |
578 ; AVX512-NEXT: vpxor %ymm3, %ymm2, %ymm2 | |
579 ; AVX512-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 | |
580 ; AVX512-NEXT: retq | |
581 %1 = icmp sge <4 x i64> %a, %b | |
582 %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b | |
583 ret <4 x i64> %2 | |
584 } | |
585 | |
586 define <4 x i32> @max_ge_v4i32(<4 x i32> %a, <4 x i32> %b) { | |
587 ; SSE2-LABEL: max_ge_v4i32: | |
588 ; SSE2: # BB#0: | |
589 ; SSE2-NEXT: movdqa %xmm1, %xmm3 | |
590 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm3 | |
591 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 | |
592 ; SSE2-NEXT: pxor %xmm3, %xmm2 | |
593 ; SSE2-NEXT: pandn %xmm0, %xmm3 | |
594 ; SSE2-NEXT: pandn %xmm1, %xmm2 | |
595 ; SSE2-NEXT: por %xmm3, %xmm2 | |
596 ; SSE2-NEXT: movdqa %xmm2, %xmm0 | |
597 ; SSE2-NEXT: retq | |
598 ; | |
599 ; SSE41-LABEL: max_ge_v4i32: | |
600 ; SSE41: # BB#0: | |
601 ; SSE41-NEXT: pmaxsd %xmm1, %xmm0 | |
602 ; SSE41-NEXT: retq | |
603 ; | |
604 ; SSE42-LABEL: max_ge_v4i32: | |
605 ; SSE42: # BB#0: | |
606 ; SSE42-NEXT: pmaxsd %xmm1, %xmm0 | |
607 ; SSE42-NEXT: retq | |
608 ; | |
609 ; AVX-LABEL: max_ge_v4i32: | |
610 ; AVX: # BB#0: | |
611 ; AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 | |
612 ; AVX-NEXT: retq | |
613 %1 = icmp sge <4 x i32> %a, %b | |
614 %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b | |
615 ret <4 x i32> %2 | |
616 } | |
617 | |
618 define <8 x i32> @max_ge_v8i32(<8 x i32> %a, <8 x i32> %b) { | |
619 ; SSE2-LABEL: max_ge_v8i32: | |
620 ; SSE2: # BB#0: | |
621 ; SSE2-NEXT: movdqa %xmm3, %xmm6 | |
622 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm6 | |
623 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm4 | |
624 ; SSE2-NEXT: movdqa %xmm6, %xmm5 | |
625 ; SSE2-NEXT: pxor %xmm4, %xmm5 | |
626 ; SSE2-NEXT: movdqa %xmm2, %xmm7 | |
627 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm7 | |
628 ; SSE2-NEXT: pxor %xmm7, %xmm4 | |
629 ; SSE2-NEXT: pandn %xmm0, %xmm7 | |
630 ; SSE2-NEXT: pandn %xmm2, %xmm4 | |
631 ; SSE2-NEXT: por %xmm7, %xmm4 | |
632 ; SSE2-NEXT: pandn %xmm1, %xmm6 | |
633 ; SSE2-NEXT: pandn %xmm3, %xmm5 | |
634 ; SSE2-NEXT: por %xmm6, %xmm5 | |
635 ; SSE2-NEXT: movdqa %xmm4, %xmm0 | |
636 ; SSE2-NEXT: movdqa %xmm5, %xmm1 | |
637 ; SSE2-NEXT: retq | |
638 ; | |
639 ; SSE41-LABEL: max_ge_v8i32: | |
640 ; SSE41: # BB#0: | |
641 ; SSE41-NEXT: pmaxsd %xmm2, %xmm0 | |
642 ; SSE41-NEXT: pmaxsd %xmm3, %xmm1 | |
643 ; SSE41-NEXT: retq | |
644 ; | |
645 ; SSE42-LABEL: max_ge_v8i32: | |
646 ; SSE42: # BB#0: | |
647 ; SSE42-NEXT: pmaxsd %xmm2, %xmm0 | |
648 ; SSE42-NEXT: pmaxsd %xmm3, %xmm1 | |
649 ; SSE42-NEXT: retq | |
650 ; | |
651 ; AVX1-LABEL: max_ge_v8i32: | |
652 ; AVX1: # BB#0: | |
653 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 | |
654 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 | |
655 ; AVX1-NEXT: vpmaxsd %xmm2, %xmm3, %xmm2 | |
656 ; AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 | |
657 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 | |
658 ; AVX1-NEXT: retq | |
659 ; | |
660 ; AVX2-LABEL: max_ge_v8i32: | |
661 ; AVX2: # BB#0: | |
662 ; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 | |
663 ; AVX2-NEXT: retq | |
664 ; | |
665 ; AVX512-LABEL: max_ge_v8i32: | |
666 ; AVX512: # BB#0: | |
667 ; AVX512-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 | |
668 ; AVX512-NEXT: retq | |
669 %1 = icmp sge <8 x i32> %a, %b | |
670 %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b | |
671 ret <8 x i32> %2 | |
672 } | |
673 | |
674 define <8 x i16> @max_ge_v8i16(<8 x i16> %a, <8 x i16> %b) { | |
675 ; SSE-LABEL: max_ge_v8i16: | |
676 ; SSE: # BB#0: | |
677 ; SSE-NEXT: pmaxsw %xmm1, %xmm0 | |
678 ; SSE-NEXT: retq | |
679 ; | |
680 ; AVX-LABEL: max_ge_v8i16: | |
681 ; AVX: # BB#0: | |
682 ; AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 | |
683 ; AVX-NEXT: retq | |
684 %1 = icmp sge <8 x i16> %a, %b | |
685 %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b | |
686 ret <8 x i16> %2 | |
687 } | |
688 | |
689 define <16 x i16> @max_ge_v16i16(<16 x i16> %a, <16 x i16> %b) { | |
690 ; SSE-LABEL: max_ge_v16i16: | |
691 ; SSE: # BB#0: | |
692 ; SSE-NEXT: pmaxsw %xmm2, %xmm0 | |
693 ; SSE-NEXT: pmaxsw %xmm3, %xmm1 | |
694 ; SSE-NEXT: retq | |
695 ; | |
696 ; AVX1-LABEL: max_ge_v16i16: | |
697 ; AVX1: # BB#0: | |
698 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 | |
699 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 | |
700 ; AVX1-NEXT: vpmaxsw %xmm2, %xmm3, %xmm2 | |
701 ; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 | |
702 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 | |
703 ; AVX1-NEXT: retq | |
704 ; | |
705 ; AVX2-LABEL: max_ge_v16i16: | |
706 ; AVX2: # BB#0: | |
707 ; AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 | |
708 ; AVX2-NEXT: retq | |
709 ; | |
710 ; AVX512-LABEL: max_ge_v16i16: | |
711 ; AVX512: # BB#0: | |
712 ; AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 | |
713 ; AVX512-NEXT: retq | |
714 %1 = icmp sge <16 x i16> %a, %b | |
715 %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b | |
716 ret <16 x i16> %2 | |
717 } | |
718 | |
719 define <16 x i8> @max_ge_v16i8(<16 x i8> %a, <16 x i8> %b) { | |
720 ; SSE2-LABEL: max_ge_v16i8: | |
721 ; SSE2: # BB#0: | |
722 ; SSE2-NEXT: movdqa %xmm1, %xmm3 | |
723 ; SSE2-NEXT: pcmpgtb %xmm0, %xmm3 | |
724 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 | |
725 ; SSE2-NEXT: pxor %xmm3, %xmm2 | |
726 ; SSE2-NEXT: pandn %xmm0, %xmm3 | |
727 ; SSE2-NEXT: pandn %xmm1, %xmm2 | |
728 ; SSE2-NEXT: por %xmm3, %xmm2 | |
729 ; SSE2-NEXT: movdqa %xmm2, %xmm0 | |
730 ; SSE2-NEXT: retq | |
731 ; | |
732 ; SSE41-LABEL: max_ge_v16i8: | |
733 ; SSE41: # BB#0: | |
734 ; SSE41-NEXT: pmaxsb %xmm1, %xmm0 | |
735 ; SSE41-NEXT: retq | |
736 ; | |
737 ; SSE42-LABEL: max_ge_v16i8: | |
738 ; SSE42: # BB#0: | |
739 ; SSE42-NEXT: pmaxsb %xmm1, %xmm0 | |
740 ; SSE42-NEXT: retq | |
741 ; | |
742 ; AVX-LABEL: max_ge_v16i8: | |
743 ; AVX: # BB#0: | |
744 ; AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 | |
745 ; AVX-NEXT: retq | |
746 %1 = icmp sge <16 x i8> %a, %b | |
747 %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b | |
748 ret <16 x i8> %2 | |
749 } | |
750 | |
751 define <32 x i8> @max_ge_v32i8(<32 x i8> %a, <32 x i8> %b) { | |
752 ; SSE2-LABEL: max_ge_v32i8: | |
753 ; SSE2: # BB#0: | |
754 ; SSE2-NEXT: movdqa %xmm3, %xmm6 | |
755 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm6 | |
756 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm4 | |
757 ; SSE2-NEXT: movdqa %xmm6, %xmm5 | |
758 ; SSE2-NEXT: pxor %xmm4, %xmm5 | |
759 ; SSE2-NEXT: movdqa %xmm2, %xmm7 | |
760 ; SSE2-NEXT: pcmpgtb %xmm0, %xmm7 | |
761 ; SSE2-NEXT: pxor %xmm7, %xmm4 | |
762 ; SSE2-NEXT: pandn %xmm0, %xmm7 | |
763 ; SSE2-NEXT: pandn %xmm2, %xmm4 | |
764 ; SSE2-NEXT: por %xmm7, %xmm4 | |
765 ; SSE2-NEXT: pandn %xmm1, %xmm6 | |
766 ; SSE2-NEXT: pandn %xmm3, %xmm5 | |
767 ; SSE2-NEXT: por %xmm6, %xmm5 | |
768 ; SSE2-NEXT: movdqa %xmm4, %xmm0 | |
769 ; SSE2-NEXT: movdqa %xmm5, %xmm1 | |
770 ; SSE2-NEXT: retq | |
771 ; | |
772 ; SSE41-LABEL: max_ge_v32i8: | |
773 ; SSE41: # BB#0: | |
774 ; SSE41-NEXT: pmaxsb %xmm2, %xmm0 | |
775 ; SSE41-NEXT: pmaxsb %xmm3, %xmm1 | |
776 ; SSE41-NEXT: retq | |
777 ; | |
778 ; SSE42-LABEL: max_ge_v32i8: | |
779 ; SSE42: # BB#0: | |
780 ; SSE42-NEXT: pmaxsb %xmm2, %xmm0 | |
781 ; SSE42-NEXT: pmaxsb %xmm3, %xmm1 | |
782 ; SSE42-NEXT: retq | |
783 ; | |
784 ; AVX1-LABEL: max_ge_v32i8: | |
785 ; AVX1: # BB#0: | |
786 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 | |
787 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 | |
788 ; AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2 | |
789 ; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 | |
790 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 | |
791 ; AVX1-NEXT: retq | |
792 ; | |
793 ; AVX2-LABEL: max_ge_v32i8: | |
794 ; AVX2: # BB#0: | |
795 ; AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 | |
796 ; AVX2-NEXT: retq | |
797 ; | |
798 ; AVX512-LABEL: max_ge_v32i8: | |
799 ; AVX512: # BB#0: | |
800 ; AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 | |
801 ; AVX512-NEXT: retq | |
802 %1 = icmp sge <32 x i8> %a, %b | |
803 %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b | |
804 ret <32 x i8> %2 | |
805 } | |
806 | |
807 ; | |
808 ; Signed Minimum (LT) | |
809 ; | |
810 | |
811 define <2 x i64> @min_lt_v2i64(<2 x i64> %a, <2 x i64> %b) { | |
812 ; SSE2-LABEL: min_lt_v2i64: | |
813 ; SSE2: # BB#0: | |
814 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0] | |
815 ; SSE2-NEXT: movdqa %xmm0, %xmm3 | |
816 ; SSE2-NEXT: pxor %xmm2, %xmm3 | |
817 ; SSE2-NEXT: pxor %xmm1, %xmm2 | |
818 ; SSE2-NEXT: movdqa %xmm2, %xmm4 | |
819 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm4 | |
820 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] | |
821 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm2 | |
822 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] | |
823 ; SSE2-NEXT: pand %xmm5, %xmm2 | |
824 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] | |
825 ; SSE2-NEXT: por %xmm2, %xmm3 | |
826 ; SSE2-NEXT: pand %xmm3, %xmm0 | |
827 ; SSE2-NEXT: pandn %xmm1, %xmm3 | |
828 ; SSE2-NEXT: por %xmm3, %xmm0 | |
829 ; SSE2-NEXT: retq | |
830 ; | |
831 ; SSE41-LABEL: min_lt_v2i64: | |
832 ; SSE41: # BB#0: | |
833 ; SSE41-NEXT: movdqa %xmm0, %xmm2 | |
834 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0] | |
835 ; SSE41-NEXT: movdqa %xmm2, %xmm3 | |
836 ; SSE41-NEXT: pxor %xmm0, %xmm3 | |
837 ; SSE41-NEXT: pxor %xmm1, %xmm0 | |
838 ; SSE41-NEXT: movdqa %xmm0, %xmm4 | |
839 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm4 | |
840 ; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] | |
841 ; SSE41-NEXT: pcmpeqd %xmm3, %xmm0 | |
842 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] | |
843 ; SSE41-NEXT: pand %xmm5, %xmm3 | |
844 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3] | |
845 ; SSE41-NEXT: por %xmm3, %xmm0 | |
846 ; SSE41-NEXT: blendvpd %xmm2, %xmm1 | |
847 ; SSE41-NEXT: movapd %xmm1, %xmm0 | |
848 ; SSE41-NEXT: retq | |
849 ; | |
850 ; SSE42-LABEL: min_lt_v2i64: | |
851 ; SSE42: # BB#0: | |
852 ; SSE42-NEXT: movdqa %xmm0, %xmm2 | |
853 ; SSE42-NEXT: movdqa %xmm1, %xmm0 | |
854 ; SSE42-NEXT: pcmpgtq %xmm2, %xmm0 | |
855 ; SSE42-NEXT: blendvpd %xmm2, %xmm1 | |
856 ; SSE42-NEXT: movapd %xmm1, %xmm0 | |
857 ; SSE42-NEXT: retq | |
858 ; | |
859 ; AVX-LABEL: min_lt_v2i64: | |
860 ; AVX: # BB#0: | |
861 ; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 | |
862 ; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 | |
863 ; AVX-NEXT: retq | |
864 %1 = icmp slt <2 x i64> %a, %b | |
865 %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b | |
866 ret <2 x i64> %2 | |
867 } | |
868 | |
869 define <4 x i64> @min_lt_v4i64(<4 x i64> %a, <4 x i64> %b) { | |
870 ; SSE2-LABEL: min_lt_v4i64: | |
871 ; SSE2: # BB#0: | |
872 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,0,2147483648,0] | |
873 ; SSE2-NEXT: movdqa %xmm1, %xmm5 | |
874 ; SSE2-NEXT: pxor %xmm4, %xmm5 | |
875 ; SSE2-NEXT: movdqa %xmm3, %xmm6 | |
876 ; SSE2-NEXT: pxor %xmm4, %xmm6 | |
877 ; SSE2-NEXT: movdqa %xmm6, %xmm7 | |
878 ; SSE2-NEXT: pcmpgtd %xmm5, %xmm7 | |
879 ; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2] | |
880 ; SSE2-NEXT: pcmpeqd %xmm5, %xmm6 | |
881 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3] | |
882 ; SSE2-NEXT: pand %xmm8, %xmm5 | |
883 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3] | |
884 ; SSE2-NEXT: por %xmm5, %xmm6 | |
885 ; SSE2-NEXT: movdqa %xmm0, %xmm5 | |
886 ; SSE2-NEXT: pxor %xmm4, %xmm5 | |
887 ; SSE2-NEXT: pxor %xmm2, %xmm4 | |
888 ; SSE2-NEXT: movdqa %xmm4, %xmm7 | |
889 ; SSE2-NEXT: pcmpgtd %xmm5, %xmm7 | |
890 ; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2] | |
891 ; SSE2-NEXT: pcmpeqd %xmm5, %xmm4 | |
892 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] | |
893 ; SSE2-NEXT: pand %xmm8, %xmm4 | |
894 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] | |
895 ; SSE2-NEXT: por %xmm4, %xmm5 | |
896 ; SSE2-NEXT: pand %xmm5, %xmm0 | |
897 ; SSE2-NEXT: pandn %xmm2, %xmm5 | |
898 ; SSE2-NEXT: por %xmm5, %xmm0 | |
899 ; SSE2-NEXT: pand %xmm6, %xmm1 | |
900 ; SSE2-NEXT: pandn %xmm3, %xmm6 | |
901 ; SSE2-NEXT: por %xmm6, %xmm1 | |
902 ; SSE2-NEXT: retq | |
903 ; | |
904 ; SSE41-LABEL: min_lt_v4i64: | |
905 ; SSE41: # BB#0: | |
906 ; SSE41-NEXT: movdqa %xmm0, %xmm8 | |
907 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0] | |
908 ; SSE41-NEXT: movdqa %xmm1, %xmm5 | |
909 ; SSE41-NEXT: pxor %xmm0, %xmm5 | |
910 ; SSE41-NEXT: movdqa %xmm3, %xmm6 | |
911 ; SSE41-NEXT: pxor %xmm0, %xmm6 | |
912 ; SSE41-NEXT: movdqa %xmm6, %xmm7 | |
913 ; SSE41-NEXT: pcmpgtd %xmm5, %xmm7 | |
914 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2] | |
915 ; SSE41-NEXT: pcmpeqd %xmm5, %xmm6 | |
916 ; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] | |
917 ; SSE41-NEXT: pand %xmm4, %xmm6 | |
918 ; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] | |
919 ; SSE41-NEXT: por %xmm6, %xmm5 | |
920 ; SSE41-NEXT: movdqa %xmm8, %xmm4 | |
921 ; SSE41-NEXT: pxor %xmm0, %xmm4 | |
922 ; SSE41-NEXT: pxor %xmm2, %xmm0 | |
923 ; SSE41-NEXT: movdqa %xmm0, %xmm6 | |
924 ; SSE41-NEXT: pcmpgtd %xmm4, %xmm6 | |
925 ; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] | |
926 ; SSE41-NEXT: pcmpeqd %xmm4, %xmm0 | |
927 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3] | |
928 ; SSE41-NEXT: pand %xmm7, %xmm4 | |
929 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3] | |
930 ; SSE41-NEXT: por %xmm4, %xmm0 | |
931 ; SSE41-NEXT: blendvpd %xmm8, %xmm2 | |
932 ; SSE41-NEXT: movdqa %xmm5, %xmm0 | |
933 ; SSE41-NEXT: blendvpd %xmm1, %xmm3 | |
934 ; SSE41-NEXT: movapd %xmm2, %xmm0 | |
935 ; SSE41-NEXT: movapd %xmm3, %xmm1 | |
936 ; SSE41-NEXT: retq | |
937 ; | |
938 ; SSE42-LABEL: min_lt_v4i64: | |
939 ; SSE42: # BB#0: | |
940 ; SSE42-NEXT: movdqa %xmm0, %xmm4 | |
941 ; SSE42-NEXT: movdqa %xmm3, %xmm5 | |
942 ; SSE42-NEXT: pcmpgtq %xmm1, %xmm5 | |
943 ; SSE42-NEXT: movdqa %xmm2, %xmm0 | |
944 ; SSE42-NEXT: pcmpgtq %xmm4, %xmm0 | |
945 ; SSE42-NEXT: blendvpd %xmm4, %xmm2 | |
946 ; SSE42-NEXT: movdqa %xmm5, %xmm0 | |
947 ; SSE42-NEXT: blendvpd %xmm1, %xmm3 | |
948 ; SSE42-NEXT: movapd %xmm2, %xmm0 | |
949 ; SSE42-NEXT: movapd %xmm3, %xmm1 | |
950 ; SSE42-NEXT: retq | |
951 ; | |
952 ; AVX1-LABEL: min_lt_v4i64: | |
953 ; AVX1: # BB#0: | |
954 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 | |
955 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 | |
956 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 | |
957 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm3 | |
958 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 | |
959 ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 | |
960 ; AVX1-NEXT: retq | |
961 ; | |
962 ; AVX2-LABEL: min_lt_v4i64: | |
963 ; AVX2: # BB#0: | |
964 ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 | |
965 ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 | |
966 ; AVX2-NEXT: retq | |
967 ; | |
968 ; AVX512-LABEL: min_lt_v4i64: | |
969 ; AVX512: # BB#0: | |
970 ; AVX512-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 | |
971 ; AVX512-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 | |
972 ; AVX512-NEXT: retq | |
973 %1 = icmp slt <4 x i64> %a, %b | |
974 %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b | |
975 ret <4 x i64> %2 | |
976 } | |
977 | |
978 define <4 x i32> @min_lt_v4i32(<4 x i32> %a, <4 x i32> %b) { | |
979 ; SSE2-LABEL: min_lt_v4i32: | |
980 ; SSE2: # BB#0: | |
981 ; SSE2-NEXT: movdqa %xmm1, %xmm2 | |
982 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 | |
983 ; SSE2-NEXT: pand %xmm2, %xmm0 | |
984 ; SSE2-NEXT: pandn %xmm1, %xmm2 | |
985 ; SSE2-NEXT: por %xmm2, %xmm0 | |
986 ; SSE2-NEXT: retq | |
987 ; | |
988 ; SSE41-LABEL: min_lt_v4i32: | |
989 ; SSE41: # BB#0: | |
990 ; SSE41-NEXT: pminsd %xmm1, %xmm0 | |
991 ; SSE41-NEXT: retq | |
992 ; | |
993 ; SSE42-LABEL: min_lt_v4i32: | |
994 ; SSE42: # BB#0: | |
995 ; SSE42-NEXT: pminsd %xmm1, %xmm0 | |
996 ; SSE42-NEXT: retq | |
997 ; | |
998 ; AVX-LABEL: min_lt_v4i32: | |
999 ; AVX: # BB#0: | |
1000 ; AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0 | |
1001 ; AVX-NEXT: retq | |
1002 %1 = icmp slt <4 x i32> %a, %b | |
1003 %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b | |
1004 ret <4 x i32> %2 | |
1005 } | |
1006 | |
1007 define <8 x i32> @min_lt_v8i32(<8 x i32> %a, <8 x i32> %b) { | |
1008 ; SSE2-LABEL: min_lt_v8i32: | |
1009 ; SSE2: # BB#0: | |
1010 ; SSE2-NEXT: movdqa %xmm3, %xmm4 | |
1011 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm4 | |
1012 ; SSE2-NEXT: movdqa %xmm2, %xmm5 | |
1013 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm5 | |
1014 ; SSE2-NEXT: pand %xmm5, %xmm0 | |
1015 ; SSE2-NEXT: pandn %xmm2, %xmm5 | |
1016 ; SSE2-NEXT: por %xmm5, %xmm0 | |
1017 ; SSE2-NEXT: pand %xmm4, %xmm1 | |
1018 ; SSE2-NEXT: pandn %xmm3, %xmm4 | |
1019 ; SSE2-NEXT: por %xmm4, %xmm1 | |
1020 ; SSE2-NEXT: retq | |
1021 ; | |
1022 ; SSE41-LABEL: min_lt_v8i32: | |
1023 ; SSE41: # BB#0: | |
1024 ; SSE41-NEXT: pminsd %xmm2, %xmm0 | |
1025 ; SSE41-NEXT: pminsd %xmm3, %xmm1 | |
1026 ; SSE41-NEXT: retq | |
1027 ; | |
1028 ; SSE42-LABEL: min_lt_v8i32: | |
1029 ; SSE42: # BB#0: | |
1030 ; SSE42-NEXT: pminsd %xmm2, %xmm0 | |
1031 ; SSE42-NEXT: pminsd %xmm3, %xmm1 | |
1032 ; SSE42-NEXT: retq | |
1033 ; | |
1034 ; AVX1-LABEL: min_lt_v8i32: | |
1035 ; AVX1: # BB#0: | |
1036 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 | |
1037 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 | |
1038 ; AVX1-NEXT: vpminsd %xmm2, %xmm3, %xmm2 | |
1039 ; AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 | |
1040 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 | |
1041 ; AVX1-NEXT: retq | |
1042 ; | |
1043 ; AVX2-LABEL: min_lt_v8i32: | |
1044 ; AVX2: # BB#0: | |
1045 ; AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0 | |
1046 ; AVX2-NEXT: retq | |
1047 ; | |
1048 ; AVX512-LABEL: min_lt_v8i32: | |
1049 ; AVX512: # BB#0: | |
1050 ; AVX512-NEXT: vpminsd %ymm1, %ymm0, %ymm0 | |
1051 ; AVX512-NEXT: retq | |
1052 %1 = icmp slt <8 x i32> %a, %b | |
1053 %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b | |
1054 ret <8 x i32> %2 | |
1055 } | |
1056 | |
1057 define <8 x i16> @min_lt_v8i16(<8 x i16> %a, <8 x i16> %b) { | |
1058 ; SSE-LABEL: min_lt_v8i16: | |
1059 ; SSE: # BB#0: | |
1060 ; SSE-NEXT: pminsw %xmm1, %xmm0 | |
1061 ; SSE-NEXT: retq | |
1062 ; | |
1063 ; AVX-LABEL: min_lt_v8i16: | |
1064 ; AVX: # BB#0: | |
1065 ; AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0 | |
1066 ; AVX-NEXT: retq | |
1067 %1 = icmp slt <8 x i16> %a, %b | |
1068 %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b | |
1069 ret <8 x i16> %2 | |
1070 } | |
1071 | |
1072 define <16 x i16> @min_lt_v16i16(<16 x i16> %a, <16 x i16> %b) { | |
1073 ; SSE-LABEL: min_lt_v16i16: | |
1074 ; SSE: # BB#0: | |
1075 ; SSE-NEXT: pminsw %xmm2, %xmm0 | |
1076 ; SSE-NEXT: pminsw %xmm3, %xmm1 | |
1077 ; SSE-NEXT: retq | |
1078 ; | |
1079 ; AVX1-LABEL: min_lt_v16i16: | |
1080 ; AVX1: # BB#0: | |
1081 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 | |
1082 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 | |
1083 ; AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm2 | |
1084 ; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0 | |
1085 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 | |
1086 ; AVX1-NEXT: retq | |
1087 ; | |
1088 ; AVX2-LABEL: min_lt_v16i16: | |
1089 ; AVX2: # BB#0: | |
1090 ; AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0 | |
1091 ; AVX2-NEXT: retq | |
1092 ; | |
1093 ; AVX512-LABEL: min_lt_v16i16: | |
1094 ; AVX512: # BB#0: | |
1095 ; AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm0 | |
1096 ; AVX512-NEXT: retq | |
1097 %1 = icmp slt <16 x i16> %a, %b | |
1098 %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b | |
1099 ret <16 x i16> %2 | |
1100 } | |
1101 | |
1102 define <16 x i8> @min_lt_v16i8(<16 x i8> %a, <16 x i8> %b) { | |
1103 ; SSE2-LABEL: min_lt_v16i8: | |
1104 ; SSE2: # BB#0: | |
1105 ; SSE2-NEXT: movdqa %xmm1, %xmm2 | |
1106 ; SSE2-NEXT: pcmpgtb %xmm0, %xmm2 | |
1107 ; SSE2-NEXT: pand %xmm2, %xmm0 | |
1108 ; SSE2-NEXT: pandn %xmm1, %xmm2 | |
1109 ; SSE2-NEXT: por %xmm2, %xmm0 | |
1110 ; SSE2-NEXT: retq | |
1111 ; | |
1112 ; SSE41-LABEL: min_lt_v16i8: | |
1113 ; SSE41: # BB#0: | |
1114 ; SSE41-NEXT: pminsb %xmm1, %xmm0 | |
1115 ; SSE41-NEXT: retq | |
1116 ; | |
1117 ; SSE42-LABEL: min_lt_v16i8: | |
1118 ; SSE42: # BB#0: | |
1119 ; SSE42-NEXT: pminsb %xmm1, %xmm0 | |
1120 ; SSE42-NEXT: retq | |
1121 ; | |
1122 ; AVX-LABEL: min_lt_v16i8: | |
1123 ; AVX: # BB#0: | |
1124 ; AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 | |
1125 ; AVX-NEXT: retq | |
1126 %1 = icmp slt <16 x i8> %a, %b | |
1127 %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b | |
1128 ret <16 x i8> %2 | |
1129 } | |
1130 | |
1131 define <32 x i8> @min_lt_v32i8(<32 x i8> %a, <32 x i8> %b) { | |
1132 ; SSE2-LABEL: min_lt_v32i8: | |
1133 ; SSE2: # BB#0: | |
1134 ; SSE2-NEXT: movdqa %xmm3, %xmm4 | |
1135 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm4 | |
1136 ; SSE2-NEXT: movdqa %xmm2, %xmm5 | |
1137 ; SSE2-NEXT: pcmpgtb %xmm0, %xmm5 | |
1138 ; SSE2-NEXT: pand %xmm5, %xmm0 | |
1139 ; SSE2-NEXT: pandn %xmm2, %xmm5 | |
1140 ; SSE2-NEXT: por %xmm5, %xmm0 | |
1141 ; SSE2-NEXT: pand %xmm4, %xmm1 | |
1142 ; SSE2-NEXT: pandn %xmm3, %xmm4 | |
1143 ; SSE2-NEXT: por %xmm4, %xmm1 | |
1144 ; SSE2-NEXT: retq | |
1145 ; | |
1146 ; SSE41-LABEL: min_lt_v32i8: | |
1147 ; SSE41: # BB#0: | |
1148 ; SSE41-NEXT: pminsb %xmm2, %xmm0 | |
1149 ; SSE41-NEXT: pminsb %xmm3, %xmm1 | |
1150 ; SSE41-NEXT: retq | |
1151 ; | |
1152 ; SSE42-LABEL: min_lt_v32i8: | |
1153 ; SSE42: # BB#0: | |
1154 ; SSE42-NEXT: pminsb %xmm2, %xmm0 | |
1155 ; SSE42-NEXT: pminsb %xmm3, %xmm1 | |
1156 ; SSE42-NEXT: retq | |
1157 ; | |
1158 ; AVX1-LABEL: min_lt_v32i8: | |
1159 ; AVX1: # BB#0: | |
1160 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 | |
1161 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 | |
1162 ; AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2 | |
1163 ; AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 | |
1164 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 | |
1165 ; AVX1-NEXT: retq | |
1166 ; | |
1167 ; AVX2-LABEL: min_lt_v32i8: | |
1168 ; AVX2: # BB#0: | |
1169 ; AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0 | |
1170 ; AVX2-NEXT: retq | |
1171 ; | |
1172 ; AVX512-LABEL: min_lt_v32i8: | |
1173 ; AVX512: # BB#0: | |
1174 ; AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0 | |
1175 ; AVX512-NEXT: retq | |
1176 %1 = icmp slt <32 x i8> %a, %b | |
1177 %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b | |
1178 ret <32 x i8> %2 | |
1179 } | |
1180 | |
1181 ; | |
1182 ; Signed Minimum (LE) | |
1183 ; | |
1184 | |
1185 define <2 x i64> @min_le_v2i64(<2 x i64> %a, <2 x i64> %b) { | |
1186 ; SSE2-LABEL: min_le_v2i64: | |
1187 ; SSE2: # BB#0: | |
1188 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0] | |
1189 ; SSE2-NEXT: movdqa %xmm1, %xmm3 | |
1190 ; SSE2-NEXT: pxor %xmm2, %xmm3 | |
1191 ; SSE2-NEXT: pxor %xmm0, %xmm2 | |
1192 ; SSE2-NEXT: movdqa %xmm2, %xmm4 | |
1193 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm4 | |
1194 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] | |
1195 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm2 | |
1196 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] | |
1197 ; SSE2-NEXT: pand %xmm5, %xmm2 | |
1198 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] | |
1199 ; SSE2-NEXT: por %xmm2, %xmm3 | |
1200 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 | |
1201 ; SSE2-NEXT: pxor %xmm3, %xmm2 | |
1202 ; SSE2-NEXT: pandn %xmm0, %xmm3 | |
1203 ; SSE2-NEXT: pandn %xmm1, %xmm2 | |
1204 ; SSE2-NEXT: por %xmm3, %xmm2 | |
1205 ; SSE2-NEXT: movdqa %xmm2, %xmm0 | |
1206 ; SSE2-NEXT: retq | |
1207 ; | |
1208 ; SSE41-LABEL: min_le_v2i64: | |
1209 ; SSE41: # BB#0: | |
1210 ; SSE41-NEXT: movdqa %xmm0, %xmm2 | |
1211 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0] | |
1212 ; SSE41-NEXT: movdqa %xmm1, %xmm3 | |
1213 ; SSE41-NEXT: pxor %xmm0, %xmm3 | |
1214 ; SSE41-NEXT: pxor %xmm2, %xmm0 | |
1215 ; SSE41-NEXT: movdqa %xmm0, %xmm4 | |
1216 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm4 | |
1217 ; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] | |
1218 ; SSE41-NEXT: pcmpeqd %xmm3, %xmm0 | |
1219 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] | |
1220 ; SSE41-NEXT: pand %xmm5, %xmm0 | |
1221 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] | |
1222 ; SSE41-NEXT: por %xmm0, %xmm3 | |
1223 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm0 | |
1224 ; SSE41-NEXT: pxor %xmm3, %xmm0 | |
1225 ; SSE41-NEXT: blendvpd %xmm2, %xmm1 | |
1226 ; SSE41-NEXT: movapd %xmm1, %xmm0 | |
1227 ; SSE41-NEXT: retq | |
1228 ; | |
1229 ; SSE42-LABEL: min_le_v2i64: | |
1230 ; SSE42: # BB#0: | |
1231 ; SSE42-NEXT: movdqa %xmm0, %xmm2 | |
1232 ; SSE42-NEXT: pcmpgtq %xmm1, %xmm0 | |
1233 ; SSE42-NEXT: pcmpeqd %xmm3, %xmm3 | |
1234 ; SSE42-NEXT: pxor %xmm3, %xmm0 | |
1235 ; SSE42-NEXT: blendvpd %xmm2, %xmm1 | |
1236 ; SSE42-NEXT: movapd %xmm1, %xmm0 | |
1237 ; SSE42-NEXT: retq | |
1238 ; | |
1239 ; AVX-LABEL: min_le_v2i64: | |
1240 ; AVX: # BB#0: | |
1241 ; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 | |
1242 ; AVX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 | |
1243 ; AVX-NEXT: vpxor %xmm3, %xmm2, %xmm2 | |
1244 ; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 | |
1245 ; AVX-NEXT: retq | |
1246 %1 = icmp sle <2 x i64> %a, %b | |
1247 %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b | |
1248 ret <2 x i64> %2 | |
1249 } | |
1250 | |
1251 define <4 x i64> @min_le_v4i64(<4 x i64> %a, <4 x i64> %b) { | |
1252 ; SSE2-LABEL: min_le_v4i64: | |
1253 ; SSE2: # BB#0: | |
1254 ; SSE2-NEXT: movdqa {{.*#+}} xmm7 = [2147483648,0,2147483648,0] | |
1255 ; SSE2-NEXT: movdqa %xmm3, %xmm4 | |
1256 ; SSE2-NEXT: pxor %xmm7, %xmm4 | |
1257 ; SSE2-NEXT: movdqa %xmm1, %xmm5 | |
1258 ; SSE2-NEXT: pxor %xmm7, %xmm5 | |
1259 ; SSE2-NEXT: movdqa %xmm5, %xmm6 | |
1260 ; SSE2-NEXT: pcmpgtd %xmm4, %xmm6 | |
1261 ; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2] | |
1262 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm5 | |
1263 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3] | |
1264 ; SSE2-NEXT: pand %xmm8, %xmm4 | |
1265 ; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm6[1,1,3,3] | |
1266 ; SSE2-NEXT: por %xmm4, %xmm8 | |
1267 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm4 | |
1268 ; SSE2-NEXT: movdqa %xmm8, %xmm9 | |
1269 ; SSE2-NEXT: pxor %xmm4, %xmm9 | |
1270 ; SSE2-NEXT: movdqa %xmm2, %xmm6 | |
1271 ; SSE2-NEXT: pxor %xmm7, %xmm6 | |
1272 ; SSE2-NEXT: pxor %xmm0, %xmm7 | |
1273 ; SSE2-NEXT: movdqa %xmm7, %xmm5 | |
1274 ; SSE2-NEXT: pcmpgtd %xmm6, %xmm5 | |
1275 ; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm5[0,0,2,2] | |
1276 ; SSE2-NEXT: pcmpeqd %xmm6, %xmm7 | |
1277 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3] | |
1278 ; SSE2-NEXT: pand %xmm10, %xmm6 | |
1279 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3] | |
1280 ; SSE2-NEXT: por %xmm6, %xmm5 | |
1281 ; SSE2-NEXT: pxor %xmm5, %xmm4 | |
1282 ; SSE2-NEXT: pandn %xmm0, %xmm5 | |
1283 ; SSE2-NEXT: pandn %xmm2, %xmm4 | |
1284 ; SSE2-NEXT: por %xmm5, %xmm4 | |
1285 ; SSE2-NEXT: pandn %xmm1, %xmm8 | |
1286 ; SSE2-NEXT: pandn %xmm3, %xmm9 | |
1287 ; SSE2-NEXT: por %xmm8, %xmm9 | |
1288 ; SSE2-NEXT: movdqa %xmm4, %xmm0 | |
1289 ; SSE2-NEXT: movdqa %xmm9, %xmm1 | |
1290 ; SSE2-NEXT: retq | |
1291 ; | |
1292 ; SSE41-LABEL: min_le_v4i64: | |
1293 ; SSE41: # BB#0: | |
1294 ; SSE41-NEXT: movdqa %xmm0, %xmm8 | |
1295 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0] | |
1296 ; SSE41-NEXT: movdqa %xmm3, %xmm5 | |
1297 ; SSE41-NEXT: pxor %xmm0, %xmm5 | |
1298 ; SSE41-NEXT: movdqa %xmm1, %xmm6 | |
1299 ; SSE41-NEXT: pxor %xmm0, %xmm6 | |
1300 ; SSE41-NEXT: movdqa %xmm6, %xmm7 | |
1301 ; SSE41-NEXT: pcmpgtd %xmm5, %xmm7 | |
1302 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2] | |
1303 ; SSE41-NEXT: pcmpeqd %xmm5, %xmm6 | |
1304 ; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] | |
1305 ; SSE41-NEXT: pand %xmm4, %xmm6 | |
1306 ; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] | |
1307 ; SSE41-NEXT: por %xmm6, %xmm5 | |
1308 ; SSE41-NEXT: pcmpeqd %xmm9, %xmm9 | |
1309 ; SSE41-NEXT: pxor %xmm9, %xmm5 | |
1310 ; SSE41-NEXT: movdqa %xmm2, %xmm6 | |
1311 ; SSE41-NEXT: pxor %xmm0, %xmm6 | |
1312 ; SSE41-NEXT: pxor %xmm8, %xmm0 | |
1313 ; SSE41-NEXT: movdqa %xmm0, %xmm7 | |
1314 ; SSE41-NEXT: pcmpgtd %xmm6, %xmm7 | |
1315 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2] | |
1316 ; SSE41-NEXT: pcmpeqd %xmm6, %xmm0 | |
1317 ; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3] | |
1318 ; SSE41-NEXT: pand %xmm4, %xmm6 | |
1319 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[1,1,3,3] | |
1320 ; SSE41-NEXT: por %xmm6, %xmm0 | |
1321 ; SSE41-NEXT: pxor %xmm9, %xmm0 | |
1322 ; SSE41-NEXT: blendvpd %xmm8, %xmm2 | |
1323 ; SSE41-NEXT: movdqa %xmm5, %xmm0 | |
1324 ; SSE41-NEXT: blendvpd %xmm1, %xmm3 | |
1325 ; SSE41-NEXT: movapd %xmm2, %xmm0 | |
1326 ; SSE41-NEXT: movapd %xmm3, %xmm1 | |
1327 ; SSE41-NEXT: retq | |
1328 ; | |
1329 ; SSE42-LABEL: min_le_v4i64: | |
1330 ; SSE42: # BB#0: | |
1331 ; SSE42-NEXT: movdqa %xmm0, %xmm4 | |
1332 ; SSE42-NEXT: movdqa %xmm1, %xmm5 | |
1333 ; SSE42-NEXT: pcmpgtq %xmm3, %xmm5 | |
1334 ; SSE42-NEXT: pcmpeqd %xmm6, %xmm6 | |
1335 ; SSE42-NEXT: pxor %xmm6, %xmm5 | |
1336 ; SSE42-NEXT: pcmpgtq %xmm2, %xmm0 | |
1337 ; SSE42-NEXT: pxor %xmm6, %xmm0 | |
1338 ; SSE42-NEXT: blendvpd %xmm4, %xmm2 | |
1339 ; SSE42-NEXT: movdqa %xmm5, %xmm0 | |
1340 ; SSE42-NEXT: blendvpd %xmm1, %xmm3 | |
1341 ; SSE42-NEXT: movapd %xmm2, %xmm0 | |
1342 ; SSE42-NEXT: movapd %xmm3, %xmm1 | |
1343 ; SSE42-NEXT: retq | |
1344 ; | |
1345 ; AVX1-LABEL: min_le_v4i64: | |
1346 ; AVX1: # BB#0: | |
1347 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 | |
1348 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 | |
1349 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 | |
1350 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 | |
1351 ; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2 | |
1352 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm4 | |
1353 ; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm3 | |
1354 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 | |
1355 ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 | |
1356 ; AVX1-NEXT: retq | |
1357 ; | |
1358 ; AVX2-LABEL: min_le_v4i64: | |
1359 ; AVX2: # BB#0: | |
1360 ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 | |
1361 ; AVX2-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3 | |
1362 ; AVX2-NEXT: vpxor %ymm3, %ymm2, %ymm2 | |
1363 ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 | |
1364 ; AVX2-NEXT: retq | |
1365 ; | |
1366 ; AVX512-LABEL: min_le_v4i64: | |
1367 ; AVX512: # BB#0: | |
1368 ; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 | |
1369 ; AVX512-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3 | |
1370 ; AVX512-NEXT: vpxor %ymm3, %ymm2, %ymm2 | |
1371 ; AVX512-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 | |
1372 ; AVX512-NEXT: retq | |
1373 %1 = icmp sle <4 x i64> %a, %b | |
1374 %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b | |
1375 ret <4 x i64> %2 | |
1376 } | |
1377 | |
1378 define <4 x i32> @min_le_v4i32(<4 x i32> %a, <4 x i32> %b) { | |
1379 ; SSE2-LABEL: min_le_v4i32: | |
1380 ; SSE2: # BB#0: | |
1381 ; SSE2-NEXT: movdqa %xmm0, %xmm2 | |
1382 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 | |
1383 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm3 | |
1384 ; SSE2-NEXT: pxor %xmm2, %xmm3 | |
1385 ; SSE2-NEXT: pandn %xmm0, %xmm2 | |
1386 ; SSE2-NEXT: pandn %xmm1, %xmm3 | |
1387 ; SSE2-NEXT: por %xmm3, %xmm2 | |
1388 ; SSE2-NEXT: movdqa %xmm2, %xmm0 | |
1389 ; SSE2-NEXT: retq | |
1390 ; | |
1391 ; SSE41-LABEL: min_le_v4i32: | |
1392 ; SSE41: # BB#0: | |
1393 ; SSE41-NEXT: pminsd %xmm1, %xmm0 | |
1394 ; SSE41-NEXT: retq | |
1395 ; | |
1396 ; SSE42-LABEL: min_le_v4i32: | |
1397 ; SSE42: # BB#0: | |
1398 ; SSE42-NEXT: pminsd %xmm1, %xmm0 | |
1399 ; SSE42-NEXT: retq | |
1400 ; | |
1401 ; AVX-LABEL: min_le_v4i32: | |
1402 ; AVX: # BB#0: | |
1403 ; AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0 | |
1404 ; AVX-NEXT: retq | |
1405 %1 = icmp sle <4 x i32> %a, %b | |
1406 %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b | |
1407 ret <4 x i32> %2 | |
1408 } | |
1409 | |
1410 define <8 x i32> @min_le_v8i32(<8 x i32> %a, <8 x i32> %b) { | |
1411 ; SSE2-LABEL: min_le_v8i32: | |
1412 ; SSE2: # BB#0: | |
1413 ; SSE2-NEXT: movdqa %xmm1, %xmm6 | |
1414 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm6 | |
1415 ; SSE2-NEXT: pcmpeqd %xmm7, %xmm7 | |
1416 ; SSE2-NEXT: movdqa %xmm6, %xmm4 | |
1417 ; SSE2-NEXT: pxor %xmm7, %xmm4 | |
1418 ; SSE2-NEXT: movdqa %xmm0, %xmm5 | |
1419 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm5 | |
1420 ; SSE2-NEXT: pxor %xmm5, %xmm7 | |
1421 ; SSE2-NEXT: pandn %xmm0, %xmm5 | |
1422 ; SSE2-NEXT: pandn %xmm2, %xmm7 | |
1423 ; SSE2-NEXT: por %xmm7, %xmm5 | |
1424 ; SSE2-NEXT: pandn %xmm1, %xmm6 | |
1425 ; SSE2-NEXT: pandn %xmm3, %xmm4 | |
1426 ; SSE2-NEXT: por %xmm6, %xmm4 | |
1427 ; SSE2-NEXT: movdqa %xmm5, %xmm0 | |
1428 ; SSE2-NEXT: movdqa %xmm4, %xmm1 | |
1429 ; SSE2-NEXT: retq | |
1430 ; | |
1431 ; SSE41-LABEL: min_le_v8i32: | |
1432 ; SSE41: # BB#0: | |
1433 ; SSE41-NEXT: pminsd %xmm2, %xmm0 | |
1434 ; SSE41-NEXT: pminsd %xmm3, %xmm1 | |
1435 ; SSE41-NEXT: retq | |
1436 ; | |
1437 ; SSE42-LABEL: min_le_v8i32: | |
1438 ; SSE42: # BB#0: | |
1439 ; SSE42-NEXT: pminsd %xmm2, %xmm0 | |
1440 ; SSE42-NEXT: pminsd %xmm3, %xmm1 | |
1441 ; SSE42-NEXT: retq | |
1442 ; | |
1443 ; AVX1-LABEL: min_le_v8i32: | |
1444 ; AVX1: # BB#0: | |
1445 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 | |
1446 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 | |
1447 ; AVX1-NEXT: vpminsd %xmm2, %xmm3, %xmm2 | |
1448 ; AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 | |
1449 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 | |
1450 ; AVX1-NEXT: retq | |
1451 ; | |
1452 ; AVX2-LABEL: min_le_v8i32: | |
1453 ; AVX2: # BB#0: | |
1454 ; AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0 | |
1455 ; AVX2-NEXT: retq | |
1456 ; | |
1457 ; AVX512-LABEL: min_le_v8i32: | |
1458 ; AVX512: # BB#0: | |
1459 ; AVX512-NEXT: vpminsd %ymm1, %ymm0, %ymm0 | |
1460 ; AVX512-NEXT: retq | |
1461 %1 = icmp sle <8 x i32> %a, %b | |
1462 %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b | |
1463 ret <8 x i32> %2 | |
1464 } | |
1465 | |
1466 define <8 x i16> @min_le_v8i16(<8 x i16> %a, <8 x i16> %b) { | |
1467 ; SSE-LABEL: min_le_v8i16: | |
1468 ; SSE: # BB#0: | |
1469 ; SSE-NEXT: pminsw %xmm1, %xmm0 | |
1470 ; SSE-NEXT: retq | |
1471 ; | |
1472 ; AVX-LABEL: min_le_v8i16: | |
1473 ; AVX: # BB#0: | |
1474 ; AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0 | |
1475 ; AVX-NEXT: retq | |
1476 %1 = icmp sle <8 x i16> %a, %b | |
1477 %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b | |
1478 ret <8 x i16> %2 | |
1479 } | |
1480 | |
1481 define <16 x i16> @min_le_v16i16(<16 x i16> %a, <16 x i16> %b) { | |
1482 ; SSE-LABEL: min_le_v16i16: | |
1483 ; SSE: # BB#0: | |
1484 ; SSE-NEXT: pminsw %xmm2, %xmm0 | |
1485 ; SSE-NEXT: pminsw %xmm3, %xmm1 | |
1486 ; SSE-NEXT: retq | |
1487 ; | |
1488 ; AVX1-LABEL: min_le_v16i16: | |
1489 ; AVX1: # BB#0: | |
1490 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 | |
1491 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 | |
1492 ; AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm2 | |
1493 ; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0 | |
1494 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 | |
1495 ; AVX1-NEXT: retq | |
1496 ; | |
1497 ; AVX2-LABEL: min_le_v16i16: | |
1498 ; AVX2: # BB#0: | |
1499 ; AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0 | |
1500 ; AVX2-NEXT: retq | |
1501 ; | |
1502 ; AVX512-LABEL: min_le_v16i16: | |
1503 ; AVX512: # BB#0: | |
1504 ; AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm0 | |
1505 ; AVX512-NEXT: retq | |
1506 %1 = icmp sle <16 x i16> %a, %b | |
1507 %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b | |
1508 ret <16 x i16> %2 | |
1509 } | |
1510 | |
1511 define <16 x i8> @min_le_v16i8(<16 x i8> %a, <16 x i8> %b) { | |
1512 ; SSE2-LABEL: min_le_v16i8: | |
1513 ; SSE2: # BB#0: | |
1514 ; SSE2-NEXT: movdqa %xmm0, %xmm2 | |
1515 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm2 | |
1516 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm3 | |
1517 ; SSE2-NEXT: pxor %xmm2, %xmm3 | |
1518 ; SSE2-NEXT: pandn %xmm0, %xmm2 | |
1519 ; SSE2-NEXT: pandn %xmm1, %xmm3 | |
1520 ; SSE2-NEXT: por %xmm3, %xmm2 | |
1521 ; SSE2-NEXT: movdqa %xmm2, %xmm0 | |
1522 ; SSE2-NEXT: retq | |
1523 ; | |
1524 ; SSE41-LABEL: min_le_v16i8: | |
1525 ; SSE41: # BB#0: | |
1526 ; SSE41-NEXT: pminsb %xmm1, %xmm0 | |
1527 ; SSE41-NEXT: retq | |
1528 ; | |
1529 ; SSE42-LABEL: min_le_v16i8: | |
1530 ; SSE42: # BB#0: | |
1531 ; SSE42-NEXT: pminsb %xmm1, %xmm0 | |
1532 ; SSE42-NEXT: retq | |
1533 ; | |
1534 ; AVX-LABEL: min_le_v16i8: | |
1535 ; AVX: # BB#0: | |
1536 ; AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 | |
1537 ; AVX-NEXT: retq | |
1538 %1 = icmp sle <16 x i8> %a, %b | |
1539 %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b | |
1540 ret <16 x i8> %2 | |
1541 } | |
1542 | |
1543 define <32 x i8> @min_le_v32i8(<32 x i8> %a, <32 x i8> %b) { | |
1544 ; SSE2-LABEL: min_le_v32i8: | |
1545 ; SSE2: # BB#0: | |
1546 ; SSE2-NEXT: movdqa %xmm1, %xmm6 | |
1547 ; SSE2-NEXT: pcmpgtb %xmm3, %xmm6 | |
1548 ; SSE2-NEXT: pcmpeqd %xmm7, %xmm7 | |
1549 ; SSE2-NEXT: movdqa %xmm6, %xmm4 | |
1550 ; SSE2-NEXT: pxor %xmm7, %xmm4 | |
1551 ; SSE2-NEXT: movdqa %xmm0, %xmm5 | |
1552 ; SSE2-NEXT: pcmpgtb %xmm2, %xmm5 | |
1553 ; SSE2-NEXT: pxor %xmm5, %xmm7 | |
1554 ; SSE2-NEXT: pandn %xmm0, %xmm5 | |
1555 ; SSE2-NEXT: pandn %xmm2, %xmm7 | |
1556 ; SSE2-NEXT: por %xmm7, %xmm5 | |
1557 ; SSE2-NEXT: pandn %xmm1, %xmm6 | |
1558 ; SSE2-NEXT: pandn %xmm3, %xmm4 | |
1559 ; SSE2-NEXT: por %xmm6, %xmm4 | |
1560 ; SSE2-NEXT: movdqa %xmm5, %xmm0 | |
1561 ; SSE2-NEXT: movdqa %xmm4, %xmm1 | |
1562 ; SSE2-NEXT: retq | |
1563 ; | |
1564 ; SSE41-LABEL: min_le_v32i8: | |
1565 ; SSE41: # BB#0: | |
1566 ; SSE41-NEXT: pminsb %xmm2, %xmm0 | |
1567 ; SSE41-NEXT: pminsb %xmm3, %xmm1 | |
1568 ; SSE41-NEXT: retq | |
1569 ; | |
1570 ; SSE42-LABEL: min_le_v32i8: | |
1571 ; SSE42: # BB#0: | |
1572 ; SSE42-NEXT: pminsb %xmm2, %xmm0 | |
1573 ; SSE42-NEXT: pminsb %xmm3, %xmm1 | |
1574 ; SSE42-NEXT: retq | |
1575 ; | |
1576 ; AVX1-LABEL: min_le_v32i8: | |
1577 ; AVX1: # BB#0: | |
1578 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 | |
1579 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 | |
1580 ; AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2 | |
1581 ; AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 | |
1582 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 | |
1583 ; AVX1-NEXT: retq | |
1584 ; | |
1585 ; AVX2-LABEL: min_le_v32i8: | |
1586 ; AVX2: # BB#0: | |
1587 ; AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0 | |
1588 ; AVX2-NEXT: retq | |
1589 ; | |
1590 ; AVX512-LABEL: min_le_v32i8: | |
1591 ; AVX512: # BB#0: | |
1592 ; AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0 | |
1593 ; AVX512-NEXT: retq | |
1594 %1 = icmp sle <32 x i8> %a, %b | |
1595 %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b | |
1596 ret <32 x i8> %2 | |
1597 } | |
1598 | |
1599 ; | |
1600 ; Constant Folding | |
1601 ; | |
1602 | |
1603 define <2 x i64> @max_gt_v2i64c() { | |
1604 ; SSE2-LABEL: max_gt_v2i64c: | |
1605 ; SSE2: # BB#0: | |
1606 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551609,7] | |
1607 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551615,1] | |
1608 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0] | |
1609 ; SSE2-NEXT: movdqa %xmm0, %xmm3 | |
1610 ; SSE2-NEXT: pxor %xmm2, %xmm3 | |
1611 ; SSE2-NEXT: pxor %xmm1, %xmm0 | |
1612 ; SSE2-NEXT: movdqa %xmm0, %xmm4 | |
1613 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm4 | |
1614 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] | |
1615 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm0 | |
1616 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] | |
1617 ; SSE2-NEXT: pand %xmm5, %xmm3 | |
1618 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3] | |
1619 ; SSE2-NEXT: por %xmm3, %xmm0 | |
1620 ; SSE2-NEXT: movdqa %xmm0, %xmm3 | |
1621 ; SSE2-NEXT: pandn %xmm2, %xmm3 | |
1622 ; SSE2-NEXT: pand %xmm1, %xmm0 | |
1623 ; SSE2-NEXT: por %xmm3, %xmm0 | |
1624 ; SSE2-NEXT: retq | |
1625 ; | |
1626 ; SSE41-LABEL: max_gt_v2i64c: | |
1627 ; SSE41: # BB#0: | |
1628 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551609,7] | |
1629 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551615,1] | |
1630 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0] | |
1631 ; SSE41-NEXT: movdqa %xmm0, %xmm3 | |
1632 ; SSE41-NEXT: pxor %xmm1, %xmm3 | |
1633 ; SSE41-NEXT: pxor %xmm2, %xmm0 | |
1634 ; SSE41-NEXT: movdqa %xmm0, %xmm4 | |
1635 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm4 | |
1636 ; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] | |
1637 ; SSE41-NEXT: pcmpeqd %xmm3, %xmm0 | |
1638 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] | |
1639 ; SSE41-NEXT: pand %xmm5, %xmm3 | |
1640 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3] | |
1641 ; SSE41-NEXT: por %xmm3, %xmm0 | |
1642 ; SSE41-NEXT: blendvpd %xmm2, %xmm1 | |
1643 ; SSE41-NEXT: movapd %xmm1, %xmm0 | |
1644 ; SSE41-NEXT: retq | |
1645 ; | |
1646 ; SSE42-LABEL: max_gt_v2i64c: | |
1647 ; SSE42: # BB#0: | |
1648 ; SSE42-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551609,7] | |
1649 ; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551615,1] | |
1650 ; SSE42-NEXT: movdqa %xmm2, %xmm0 | |
1651 ; SSE42-NEXT: pcmpgtq %xmm1, %xmm0 | |
1652 ; SSE42-NEXT: blendvpd %xmm2, %xmm1 | |
1653 ; SSE42-NEXT: movapd %xmm1, %xmm0 | |
1654 ; SSE42-NEXT: retq | |
1655 ; | |
1656 ; AVX-LABEL: max_gt_v2i64c: | |
1657 ; AVX: # BB#0: | |
1658 ; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [18446744073709551609,7] | |
1659 ; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709551615,1] | |
1660 ; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 | |
1661 ; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 | |
1662 ; AVX-NEXT: retq | |
1663 %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0 | |
1664 %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0 | |
1665 %3 = icmp sgt <2 x i64> %1, %2 | |
1666 %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2 | |
1667 ret <2 x i64> %4 | |
1668 } | |
1669 | |
1670 define <4 x i64> @max_gt_v4i64c() { | |
1671 ; SSE2-LABEL: max_gt_v4i64c: | |
1672 ; SSE2: # BB#0: | |
1673 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [18446744073709551609,18446744073709551615] | |
1674 ; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [1,7] | |
1675 ; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [18446744073709551615,18446744073709551609] | |
1676 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [7,1] | |
1677 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0] | |
1678 ; SSE2-NEXT: movdqa %xmm0, %xmm1 | |
1679 ; SSE2-NEXT: pxor %xmm3, %xmm1 | |
1680 ; SSE2-NEXT: movdqa %xmm0, %xmm6 | |
1681 ; SSE2-NEXT: pxor %xmm8, %xmm6 | |
1682 ; SSE2-NEXT: movdqa %xmm6, %xmm7 | |
1683 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm7 | |
1684 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm7[0,0,2,2] | |
1685 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm6 | |
1686 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] | |
1687 ; SSE2-NEXT: pand %xmm2, %xmm6 | |
1688 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm7[1,1,3,3] | |
1689 ; SSE2-NEXT: por %xmm6, %xmm1 | |
1690 ; SSE2-NEXT: movdqa %xmm0, %xmm2 | |
1691 ; SSE2-NEXT: pxor %xmm5, %xmm2 | |
1692 ; SSE2-NEXT: pxor %xmm4, %xmm0 | |
1693 ; SSE2-NEXT: movdqa %xmm0, %xmm6 | |
1694 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm6 | |
1695 ; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] | |
1696 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm0 | |
1697 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] | |
1698 ; SSE2-NEXT: pand %xmm7, %xmm2 | |
1699 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3] | |
1700 ; SSE2-NEXT: por %xmm2, %xmm0 | |
1701 ; SSE2-NEXT: movdqa %xmm0, %xmm2 | |
1702 ; SSE2-NEXT: pandn %xmm5, %xmm2 | |
1703 ; SSE2-NEXT: pand %xmm4, %xmm0 | |
1704 ; SSE2-NEXT: por %xmm2, %xmm0 | |
1705 ; SSE2-NEXT: movdqa %xmm1, %xmm2 | |
1706 ; SSE2-NEXT: pandn %xmm3, %xmm2 | |
1707 ; SSE2-NEXT: pand %xmm8, %xmm1 | |
1708 ; SSE2-NEXT: por %xmm2, %xmm1 | |
1709 ; SSE2-NEXT: retq | |
1710 ; | |
1711 ; SSE41-LABEL: max_gt_v4i64c: | |
1712 ; SSE41: # BB#0: | |
1713 ; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [18446744073709551609,18446744073709551615] | |
1714 ; SSE41-NEXT: movdqa {{.*#+}} xmm8 = [1,7] | |
1715 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551615,18446744073709551609] | |
1716 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [7,1] | |
1717 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0] | |
1718 ; SSE41-NEXT: movdqa %xmm0, %xmm3 | |
1719 ; SSE41-NEXT: pxor %xmm1, %xmm3 | |
1720 ; SSE41-NEXT: movdqa %xmm0, %xmm6 | |
1721 ; SSE41-NEXT: pxor %xmm8, %xmm6 | |
1722 ; SSE41-NEXT: movdqa %xmm6, %xmm7 | |
1723 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm7 | |
1724 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2] | |
1725 ; SSE41-NEXT: pcmpeqd %xmm3, %xmm6 | |
1726 ; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] | |
1727 ; SSE41-NEXT: pand %xmm4, %xmm6 | |
1728 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm7[1,1,3,3] | |
1729 ; SSE41-NEXT: por %xmm6, %xmm3 | |
1730 ; SSE41-NEXT: movdqa %xmm0, %xmm4 | |
1731 ; SSE41-NEXT: pxor %xmm2, %xmm4 | |
1732 ; SSE41-NEXT: pxor %xmm5, %xmm0 | |
1733 ; SSE41-NEXT: movdqa %xmm0, %xmm6 | |
1734 ; SSE41-NEXT: pcmpgtd %xmm4, %xmm6 | |
1735 ; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] | |
1736 ; SSE41-NEXT: pcmpeqd %xmm4, %xmm0 | |
1737 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3] | |
1738 ; SSE41-NEXT: pand %xmm7, %xmm4 | |
1739 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3] | |
1740 ; SSE41-NEXT: por %xmm4, %xmm0 | |
1741 ; SSE41-NEXT: blendvpd %xmm5, %xmm2 | |
1742 ; SSE41-NEXT: movdqa %xmm3, %xmm0 | |
1743 ; SSE41-NEXT: blendvpd %xmm8, %xmm1 | |
1744 ; SSE41-NEXT: movapd %xmm2, %xmm0 | |
1745 ; SSE41-NEXT: retq | |
1746 ; | |
1747 ; SSE42-LABEL: max_gt_v4i64c: | |
1748 ; SSE42: # BB#0: | |
1749 ; SSE42-NEXT: movdqa {{.*#+}} xmm4 = [18446744073709551609,18446744073709551615] | |
1750 ; SSE42-NEXT: movdqa {{.*#+}} xmm5 = [1,7] | |
1751 ; SSE42-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551615,18446744073709551609] | |
1752 ; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [7,1] | |
1753 ; SSE42-NEXT: movdqa %xmm5, %xmm3 | |
1754 ; SSE42-NEXT: pcmpgtq %xmm1, %xmm3 | |
1755 ; SSE42-NEXT: movdqa %xmm4, %xmm0 | |
1756 ; SSE42-NEXT: pcmpgtq %xmm2, %xmm0 | |
1757 ; SSE42-NEXT: blendvpd %xmm4, %xmm2 | |
1758 ; SSE42-NEXT: movdqa %xmm3, %xmm0 | |
1759 ; SSE42-NEXT: blendvpd %xmm5, %xmm1 | |
1760 ; SSE42-NEXT: movapd %xmm2, %xmm0 | |
1761 ; SSE42-NEXT: retq | |
1762 ; | |
1763 ; AVX1-LABEL: max_gt_v4i64c: | |
1764 ; AVX1: # BB#0: | |
1765 ; AVX1-NEXT: vmovapd {{.*#+}} ymm0 = [18446744073709551615,18446744073709551609,7,1] | |
1766 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709551609,18446744073709551615] | |
1767 ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm1, %xmm1 | |
1768 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1,7] | |
1769 ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm2, %xmm2 | |
1770 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 | |
1771 ; AVX1-NEXT: vblendvpd %ymm1, {{.*}}(%rip), %ymm0, %ymm0 | |
1772 ; AVX1-NEXT: retq | |
1773 ; | |
1774 ; AVX2-LABEL: max_gt_v4i64c: | |
1775 ; AVX2: # BB#0: | |
1776 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [18446744073709551609,18446744073709551615,1,7] | |
1777 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [18446744073709551615,18446744073709551609,7,1] | |
1778 ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 | |
1779 ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 | |
1780 ; AVX2-NEXT: retq | |
1781 ; | |
1782 ; AVX512-LABEL: max_gt_v4i64c: | |
1783 ; AVX512: # BB#0: | |
1784 ; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [18446744073709551609,18446744073709551615,1,7] | |
1785 ; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [18446744073709551615,18446744073709551609,7,1] | |
1786 ; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 | |
1787 ; AVX512-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 | |
1788 ; AVX512-NEXT: retq | |
1789 %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0 | |
1790 %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0 | |
1791 %3 = icmp sgt <4 x i64> %1, %2 | |
1792 %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2 | |
1793 ret <4 x i64> %4 | |
1794 } | |
1795 | |
1796 define <4 x i32> @max_gt_v4i32c() { | |
1797 ; SSE2-LABEL: max_gt_v4i32c: | |
1798 ; SSE2: # BB#0: | |
1799 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [4294967289,4294967295,1,7] | |
1800 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,4294967289,7,1] | |
1801 ; SSE2-NEXT: movdqa %xmm1, %xmm0 | |
1802 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm0 | |
1803 ; SSE2-NEXT: pand %xmm0, %xmm1 | |
1804 ; SSE2-NEXT: pandn %xmm2, %xmm0 | |
1805 ; SSE2-NEXT: por %xmm1, %xmm0 | |
1806 ; SSE2-NEXT: retq | |
1807 ; | |
1808 ; SSE41-LABEL: max_gt_v4i32c: | |
1809 ; SSE41: # BB#0: | |
1810 ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7] | |
1811 ; SSE41-NEXT: retq | |
1812 ; | |
1813 ; SSE42-LABEL: max_gt_v4i32c: | |
1814 ; SSE42: # BB#0: | |
1815 ; SSE42-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7] | |
1816 ; SSE42-NEXT: retq | |
1817 ; | |
1818 ; AVX-LABEL: max_gt_v4i32c: | |
1819 ; AVX: # BB#0: | |
1820 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7] | |
1821 ; AVX-NEXT: retq | |
1822 %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0 | |
1823 %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0 | |
1824 %3 = icmp sgt <4 x i32> %1, %2 | |
1825 %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2 | |
1826 ret <4 x i32> %4 | |
1827 } | |
1828 | |
1829 define <8 x i32> @max_gt_v8i32c() { | |
1830 ; SSE2-LABEL: max_gt_v8i32c: | |
1831 ; SSE2: # BB#0: | |
1832 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4294967289,4294967291,4294967293,4294967295] | |
1833 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1,3,5,7] | |
1834 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [4294967295,4294967293,4294967291,4294967289] | |
1835 ; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [7,5,3,1] | |
1836 ; SSE2-NEXT: movdqa %xmm3, %xmm1 | |
1837 ; SSE2-NEXT: pcmpgtd %xmm5, %xmm1 | |
1838 ; SSE2-NEXT: movdqa %xmm2, %xmm0 | |
1839 ; SSE2-NEXT: pcmpgtd %xmm4, %xmm0 | |
1840 ; SSE2-NEXT: pand %xmm0, %xmm2 | |
1841 ; SSE2-NEXT: pandn %xmm4, %xmm0 | |
1842 ; SSE2-NEXT: por %xmm2, %xmm0 | |
1843 ; SSE2-NEXT: pand %xmm1, %xmm3 | |
1844 ; SSE2-NEXT: pandn %xmm5, %xmm1 | |
1845 ; SSE2-NEXT: por %xmm3, %xmm1 | |
1846 ; SSE2-NEXT: retq | |
1847 ; | |
1848 ; SSE41-LABEL: max_gt_v8i32c: | |
1849 ; SSE41: # BB#0: | |
1850 ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967293,4294967293,4294967295] | |
1851 ; SSE41-NEXT: movaps {{.*#+}} xmm1 = [7,5,5,7] | |
1852 ; SSE41-NEXT: retq | |
1853 ; | |
1854 ; SSE42-LABEL: max_gt_v8i32c: | |
1855 ; SSE42: # BB#0: | |
1856 ; SSE42-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967293,4294967293,4294967295] | |
1857 ; SSE42-NEXT: movaps {{.*#+}} xmm1 = [7,5,5,7] | |
1858 ; SSE42-NEXT: retq | |
1859 ; | |
1860 ; AVX-LABEL: max_gt_v8i32c: | |
1861 ; AVX: # BB#0: | |
1862 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967295,4294967293,4294967293,4294967295,7,5,5,7] | |
1863 ; AVX-NEXT: retq | |
1864 %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0 | |
1865 %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0 | |
1866 %3 = icmp sgt <8 x i32> %1, %2 | |
1867 %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2 | |
1868 ret <8 x i32> %4 | |
1869 } | |
1870 | |
1871 define <8 x i16> @max_gt_v8i16c() { | |
1872 ; SSE-LABEL: max_gt_v8i16c: | |
1873 ; SSE: # BB#0: | |
1874 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7] | |
1875 ; SSE-NEXT: retq | |
1876 ; | |
1877 ; AVX-LABEL: max_gt_v8i16c: | |
1878 ; AVX: # BB#0: | |
1879 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7] | |
1880 ; AVX-NEXT: retq | |
1881 %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0 | |
1882 %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0 | |
1883 %3 = icmp sgt <8 x i16> %1, %2 | |
1884 %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2 | |
1885 ret <8 x i16> %4 | |
1886 } | |
1887 | |
1888 define <16 x i16> @max_gt_v16i16c() { | |
1889 ; SSE-LABEL: max_gt_v16i16c: | |
1890 ; SSE: # BB#0: | |
1891 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65534,65533,65532,65533,65534,65535,0] | |
1892 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,6,5,4,5,6,7,8] | |
1893 ; SSE-NEXT: retq | |
1894 ; | |
1895 ; AVX-LABEL: max_gt_v16i16c: | |
1896 ; AVX: # BB#0: | |
1897 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65535,65534,65533,65532,65533,65534,65535,0,7,6,5,4,5,6,7,8] | |
1898 ; AVX-NEXT: retq | |
1899 %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0 | |
1900 %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0 | |
1901 %3 = icmp sgt <16 x i16> %1, %2 | |
1902 %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2 | |
1903 ret <16 x i16> %4 | |
1904 } | |
1905 | |
1906 define <16 x i8> @max_gt_v16i8c() { | |
1907 ; SSE2-LABEL: max_gt_v16i8c: | |
1908 ; SSE2: # BB#0: | |
1909 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8] | |
1910 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,254,253,252,251,250,249,0,7,6,5,4,3,2,1,0] | |
1911 ; SSE2-NEXT: movdqa %xmm1, %xmm0 | |
1912 ; SSE2-NEXT: pcmpgtb %xmm2, %xmm0 | |
1913 ; SSE2-NEXT: pand %xmm0, %xmm1 | |
1914 ; SSE2-NEXT: pandn %xmm2, %xmm0 | |
1915 ; SSE2-NEXT: por %xmm1, %xmm0 | |
1916 ; SSE2-NEXT: retq | |
1917 ; | |
1918 ; SSE41-LABEL: max_gt_v16i8c: | |
1919 ; SSE41: # BB#0: | |
1920 ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8] | |
1921 ; SSE41-NEXT: retq | |
1922 ; | |
1923 ; SSE42-LABEL: max_gt_v16i8c: | |
1924 ; SSE42: # BB#0: | |
1925 ; SSE42-NEXT: movaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8] | |
1926 ; SSE42-NEXT: retq | |
1927 ; | |
1928 ; AVX-LABEL: max_gt_v16i8c: | |
1929 ; AVX: # BB#0: | |
1930 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8] | |
1931 ; AVX-NEXT: retq | |
1932 %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0 | |
1933 %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0 | |
1934 %3 = icmp sgt <16 x i8> %1, %2 | |
1935 %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2 | |
1936 ret <16 x i8> %4 | |
1937 } | |
1938 | |
1939 define <2 x i64> @max_ge_v2i64c() { | |
1940 ; SSE2-LABEL: max_ge_v2i64c: | |
1941 ; SSE2: # BB#0: | |
1942 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551609,7] | |
1943 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551615,1] | |
1944 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0] | |
1945 ; SSE2-NEXT: movdqa %xmm0, %xmm3 | |
1946 ; SSE2-NEXT: pxor %xmm1, %xmm3 | |
1947 ; SSE2-NEXT: pxor %xmm2, %xmm0 | |
1948 ; SSE2-NEXT: movdqa %xmm0, %xmm4 | |
1949 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm4 | |
1950 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] | |
1951 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm0 | |
1952 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] | |
1953 ; SSE2-NEXT: pand %xmm5, %xmm0 | |
1954 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] | |
1955 ; SSE2-NEXT: por %xmm0, %xmm3 | |
1956 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 | |
1957 ; SSE2-NEXT: pxor %xmm3, %xmm0 | |
1958 ; SSE2-NEXT: pandn %xmm1, %xmm3 | |
1959 ; SSE2-NEXT: pandn %xmm2, %xmm0 | |
1960 ; SSE2-NEXT: por %xmm3, %xmm0 | |
1961 ; SSE2-NEXT: retq | |
1962 ; | |
1963 ; SSE41-LABEL: max_ge_v2i64c: | |
1964 ; SSE41: # BB#0: | |
1965 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551609,7] | |
1966 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551615,1] | |
1967 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0] | |
1968 ; SSE41-NEXT: movdqa %xmm0, %xmm3 | |
1969 ; SSE41-NEXT: pxor %xmm2, %xmm3 | |
1970 ; SSE41-NEXT: pxor %xmm1, %xmm0 | |
1971 ; SSE41-NEXT: movdqa %xmm0, %xmm4 | |
1972 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm4 | |
1973 ; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] | |
1974 ; SSE41-NEXT: pcmpeqd %xmm3, %xmm0 | |
1975 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] | |
1976 ; SSE41-NEXT: pand %xmm5, %xmm0 | |
1977 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] | |
1978 ; SSE41-NEXT: por %xmm0, %xmm3 | |
1979 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm0 | |
1980 ; SSE41-NEXT: pxor %xmm3, %xmm0 | |
1981 ; SSE41-NEXT: blendvpd %xmm2, %xmm1 | |
1982 ; SSE41-NEXT: movapd %xmm1, %xmm0 | |
1983 ; SSE41-NEXT: retq | |
1984 ; | |
1985 ; SSE42-LABEL: max_ge_v2i64c: | |
1986 ; SSE42: # BB#0: | |
1987 ; SSE42-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551609,7] | |
1988 ; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551615,1] | |
1989 ; SSE42-NEXT: movdqa %xmm1, %xmm3 | |
1990 ; SSE42-NEXT: pcmpgtq %xmm2, %xmm3 | |
1991 ; SSE42-NEXT: pcmpeqd %xmm0, %xmm0 | |
1992 ; SSE42-NEXT: pxor %xmm3, %xmm0 | |
1993 ; SSE42-NEXT: blendvpd %xmm2, %xmm1 | |
1994 ; SSE42-NEXT: movapd %xmm1, %xmm0 | |
1995 ; SSE42-NEXT: retq | |
1996 ; | |
1997 ; AVX-LABEL: max_ge_v2i64c: | |
1998 ; AVX: # BB#0: | |
1999 ; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [18446744073709551609,7] | |
2000 ; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709551615,1] | |
2001 ; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 | |
2002 ; AVX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 | |
2003 ; AVX-NEXT: vpxor %xmm3, %xmm2, %xmm2 | |
2004 ; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 | |
2005 ; AVX-NEXT: retq | |
2006 %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0 | |
2007 %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0 | |
2008 %3 = icmp sge <2 x i64> %1, %2 | |
2009 %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2 | |
2010 ret <2 x i64> %4 | |
2011 } | |
2012 | |
2013 define <4 x i64> @max_ge_v4i64c() { | |
2014 ; SSE2-LABEL: max_ge_v4i64c: | |
2015 ; SSE2: # BB#0: | |
2016 ; SSE2-NEXT: movdqa {{.*#+}} xmm10 = [18446744073709551609,18446744073709551615] | |
2017 ; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [1,7] | |
2018 ; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [18446744073709551615,18446744073709551609] | |
2019 ; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [7,1] | |
2020 ; SSE2-NEXT: movdqa {{.*#+}} xmm7 = [2147483648,0,2147483648,0] | |
2021 ; SSE2-NEXT: movdqa %xmm7, %xmm0 | |
2022 ; SSE2-NEXT: pxor %xmm8, %xmm0 | |
2023 ; SSE2-NEXT: movdqa %xmm7, %xmm1 | |
2024 ; SSE2-NEXT: pxor %xmm9, %xmm1 | |
2025 ; SSE2-NEXT: movdqa %xmm1, %xmm6 | |
2026 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm6 | |
2027 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm6[0,0,2,2] | |
2028 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 | |
2029 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] | |
2030 ; SSE2-NEXT: pand %xmm2, %xmm0 | |
2031 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] | |
2032 ; SSE2-NEXT: por %xmm0, %xmm6 | |
2033 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 | |
2034 ; SSE2-NEXT: movdqa %xmm6, %xmm1 | |
2035 ; SSE2-NEXT: pxor %xmm0, %xmm1 | |
2036 ; SSE2-NEXT: movdqa %xmm7, %xmm2 | |
2037 ; SSE2-NEXT: pxor %xmm10, %xmm2 | |
2038 ; SSE2-NEXT: pxor %xmm5, %xmm7 | |
2039 ; SSE2-NEXT: movdqa %xmm7, %xmm3 | |
2040 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm3 | |
2041 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] | |
2042 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm7 | |
2043 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm7[1,1,3,3] | |
2044 ; SSE2-NEXT: pand %xmm4, %xmm2 | |
2045 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] | |
2046 ; SSE2-NEXT: por %xmm2, %xmm3 | |
2047 ; SSE2-NEXT: pxor %xmm3, %xmm0 | |
2048 ; SSE2-NEXT: pandn %xmm10, %xmm3 | |
2049 ; SSE2-NEXT: pandn %xmm5, %xmm0 | |
2050 ; SSE2-NEXT: por %xmm3, %xmm0 | |
2051 ; SSE2-NEXT: pandn %xmm8, %xmm6 | |
2052 ; SSE2-NEXT: pandn %xmm9, %xmm1 | |
2053 ; SSE2-NEXT: por %xmm6, %xmm1 | |
2054 ; SSE2-NEXT: retq | |
2055 ; | |
2056 ; SSE41-LABEL: max_ge_v4i64c: | |
2057 ; SSE41: # BB#0: | |
2058 ; SSE41-NEXT: movdqa {{.*#+}} xmm9 = [18446744073709551609,18446744073709551615] | |
2059 ; SSE41-NEXT: movdqa {{.*#+}} xmm8 = [1,7] | |
2060 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551615,18446744073709551609] | |
2061 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [7,1] | |
2062 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0] | |
2063 ; SSE41-NEXT: movdqa %xmm0, %xmm3 | |
2064 ; SSE41-NEXT: pxor %xmm8, %xmm3 | |
2065 ; SSE41-NEXT: movdqa %xmm0, %xmm6 | |
2066 ; SSE41-NEXT: pxor %xmm1, %xmm6 | |
2067 ; SSE41-NEXT: movdqa %xmm6, %xmm7 | |
2068 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm7 | |
2069 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2] | |
2070 ; SSE41-NEXT: pcmpeqd %xmm3, %xmm6 | |
2071 ; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] | |
2072 ; SSE41-NEXT: pand %xmm4, %xmm6 | |
2073 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm7[1,1,3,3] | |
2074 ; SSE41-NEXT: por %xmm6, %xmm3 | |
2075 ; SSE41-NEXT: pcmpeqd %xmm4, %xmm4 | |
2076 ; SSE41-NEXT: pxor %xmm4, %xmm3 | |
2077 ; SSE41-NEXT: movdqa %xmm0, %xmm6 | |
2078 ; SSE41-NEXT: pxor %xmm9, %xmm6 | |
2079 ; SSE41-NEXT: pxor %xmm2, %xmm0 | |
2080 ; SSE41-NEXT: movdqa %xmm0, %xmm7 | |
2081 ; SSE41-NEXT: pcmpgtd %xmm6, %xmm7 | |
2082 ; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm7[0,0,2,2] | |
2083 ; SSE41-NEXT: pcmpeqd %xmm6, %xmm0 | |
2084 ; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3] | |
2085 ; SSE41-NEXT: pand %xmm5, %xmm6 | |
2086 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[1,1,3,3] | |
2087 ; SSE41-NEXT: por %xmm6, %xmm0 | |
2088 ; SSE41-NEXT: pxor %xmm4, %xmm0 | |
2089 ; SSE41-NEXT: blendvpd %xmm9, %xmm2 | |
2090 ; SSE41-NEXT: movdqa %xmm3, %xmm0 | |
2091 ; SSE41-NEXT: blendvpd %xmm8, %xmm1 | |
2092 ; SSE41-NEXT: movapd %xmm2, %xmm0 | |
2093 ; SSE41-NEXT: retq | |
2094 ; | |
2095 ; SSE42-LABEL: max_ge_v4i64c: | |
2096 ; SSE42: # BB#0: | |
2097 ; SSE42-NEXT: movdqa {{.*#+}} xmm4 = [18446744073709551609,18446744073709551615] | |
2098 ; SSE42-NEXT: movdqa {{.*#+}} xmm5 = [1,7] | |
2099 ; SSE42-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551615,18446744073709551609] | |
2100 ; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [7,1] | |
2101 ; SSE42-NEXT: movdqa %xmm1, %xmm3 | |
2102 ; SSE42-NEXT: pcmpgtq %xmm5, %xmm3 | |
2103 ; SSE42-NEXT: pcmpeqd %xmm6, %xmm6 | |
2104 ; SSE42-NEXT: pxor %xmm6, %xmm3 | |
2105 ; SSE42-NEXT: movdqa %xmm2, %xmm0 | |
2106 ; SSE42-NEXT: pcmpgtq %xmm4, %xmm0 | |
2107 ; SSE42-NEXT: pxor %xmm6, %xmm0 | |
2108 ; SSE42-NEXT: blendvpd %xmm4, %xmm2 | |
2109 ; SSE42-NEXT: movdqa %xmm3, %xmm0 | |
2110 ; SSE42-NEXT: blendvpd %xmm5, %xmm1 | |
2111 ; SSE42-NEXT: movapd %xmm2, %xmm0 | |
2112 ; SSE42-NEXT: retq | |
2113 ; | |
2114 ; AVX1-LABEL: max_ge_v4i64c: | |
2115 ; AVX1: # BB#0: | |
2116 ; AVX1-NEXT: vmovapd {{.*#+}} ymm0 = [18446744073709551615,18446744073709551609,7,1] | |
2117 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [7,1] | |
2118 ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm1, %xmm1 | |
2119 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 | |
2120 ; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1 | |
2121 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [18446744073709551615,18446744073709551609] | |
2122 ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm3, %xmm3 | |
2123 ; AVX1-NEXT: vpxor %xmm2, %xmm3, %xmm2 | |
2124 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 | |
2125 ; AVX1-NEXT: vblendvpd %ymm1, {{.*}}(%rip), %ymm0, %ymm0 | |
2126 ; AVX1-NEXT: retq | |
2127 ; | |
2128 ; AVX2-LABEL: max_ge_v4i64c: | |
2129 ; AVX2: # BB#0: | |
2130 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [18446744073709551609,18446744073709551615,1,7] | |
2131 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [18446744073709551615,18446744073709551609,7,1] | |
2132 ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 | |
2133 ; AVX2-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3 | |
2134 ; AVX2-NEXT: vpxor %ymm3, %ymm2, %ymm2 | |
2135 ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 | |
2136 ; AVX2-NEXT: retq | |
2137 ; | |
2138 ; AVX512-LABEL: max_ge_v4i64c: | |
2139 ; AVX512: # BB#0: | |
2140 ; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [18446744073709551609,18446744073709551615,1,7] | |
2141 ; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [18446744073709551615,18446744073709551609,7,1] | |
2142 ; AVX512-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 | |
2143 ; AVX512-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3 | |
2144 ; AVX512-NEXT: vpxor %ymm3, %ymm2, %ymm2 | |
2145 ; AVX512-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 | |
2146 ; AVX512-NEXT: retq | |
2147 %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0 | |
2148 %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0 | |
2149 %3 = icmp sge <4 x i64> %1, %2 | |
2150 %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2 | |
2151 ret <4 x i64> %4 | |
2152 } | |
2153 | |
2154 define <4 x i32> @max_ge_v4i32c() { | |
2155 ; SSE2-LABEL: max_ge_v4i32c: | |
2156 ; SSE2: # BB#0: | |
2157 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [4294967289,4294967295,1,7] | |
2158 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,4294967289,7,1] | |
2159 ; SSE2-NEXT: movdqa %xmm2, %xmm3 | |
2160 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm3 | |
2161 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 | |
2162 ; SSE2-NEXT: pxor %xmm3, %xmm0 | |
2163 ; SSE2-NEXT: pandn %xmm1, %xmm3 | |
2164 ; SSE2-NEXT: pandn %xmm2, %xmm0 | |
2165 ; SSE2-NEXT: por %xmm3, %xmm0 | |
2166 ; SSE2-NEXT: retq | |
2167 ; | |
2168 ; SSE41-LABEL: max_ge_v4i32c: | |
2169 ; SSE41: # BB#0: | |
2170 ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7] | |
2171 ; SSE41-NEXT: retq | |
2172 ; | |
2173 ; SSE42-LABEL: max_ge_v4i32c: | |
2174 ; SSE42: # BB#0: | |
2175 ; SSE42-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7] | |
2176 ; SSE42-NEXT: retq | |
2177 ; | |
2178 ; AVX-LABEL: max_ge_v4i32c: | |
2179 ; AVX: # BB#0: | |
2180 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7] | |
2181 ; AVX-NEXT: retq | |
2182 %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0 | |
2183 %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0 | |
2184 %3 = icmp sge <4 x i32> %1, %2 | |
2185 %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2 | |
2186 ret <4 x i32> %4 | |
2187 } | |
2188 | |
2189 define <8 x i32> @max_ge_v8i32c() { | |
2190 ; SSE2-LABEL: max_ge_v8i32c: | |
2191 ; SSE2: # BB#0: | |
2192 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4294967289,4294967291,4294967293,4294967295] | |
2193 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1,3,5,7] | |
2194 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [4294967295,4294967293,4294967291,4294967289] | |
2195 ; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [7,5,3,1] | |
2196 ; SSE2-NEXT: movdqa %xmm5, %xmm6 | |
2197 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm6 | |
2198 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 | |
2199 ; SSE2-NEXT: movdqa %xmm6, %xmm1 | |
2200 ; SSE2-NEXT: pxor %xmm0, %xmm1 | |
2201 ; SSE2-NEXT: movdqa %xmm4, %xmm7 | |
2202 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm7 | |
2203 ; SSE2-NEXT: pxor %xmm7, %xmm0 | |
2204 ; SSE2-NEXT: pandn %xmm2, %xmm7 | |
2205 ; SSE2-NEXT: pandn %xmm4, %xmm0 | |
2206 ; SSE2-NEXT: por %xmm7, %xmm0 | |
2207 ; SSE2-NEXT: pandn %xmm3, %xmm6 | |
2208 ; SSE2-NEXT: pandn %xmm5, %xmm1 | |
2209 ; SSE2-NEXT: por %xmm6, %xmm1 | |
2210 ; SSE2-NEXT: retq | |
2211 ; | |
2212 ; SSE41-LABEL: max_ge_v8i32c: | |
2213 ; SSE41: # BB#0: | |
2214 ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967293,4294967293,4294967295] | |
2215 ; SSE41-NEXT: movaps {{.*#+}} xmm1 = [7,5,5,7] | |
2216 ; SSE41-NEXT: retq | |
2217 ; | |
2218 ; SSE42-LABEL: max_ge_v8i32c: | |
2219 ; SSE42: # BB#0: | |
2220 ; SSE42-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967293,4294967293,4294967295] | |
2221 ; SSE42-NEXT: movaps {{.*#+}} xmm1 = [7,5,5,7] | |
2222 ; SSE42-NEXT: retq | |
2223 ; | |
2224 ; AVX-LABEL: max_ge_v8i32c: | |
2225 ; AVX: # BB#0: | |
2226 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967295,4294967293,4294967293,4294967295,7,5,5,7] | |
2227 ; AVX-NEXT: retq | |
2228 %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0 | |
2229 %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0 | |
2230 %3 = icmp sge <8 x i32> %1, %2 | |
2231 %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2 | |
2232 ret <8 x i32> %4 | |
2233 } | |
2234 | |
2235 define <8 x i16> @max_ge_v8i16c() { | |
2236 ; SSE-LABEL: max_ge_v8i16c: | |
2237 ; SSE: # BB#0: | |
2238 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7] | |
2239 ; SSE-NEXT: retq | |
2240 ; | |
2241 ; AVX-LABEL: max_ge_v8i16c: | |
2242 ; AVX: # BB#0: | |
2243 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7] | |
2244 ; AVX-NEXT: retq | |
2245 %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0 | |
2246 %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0 | |
2247 %3 = icmp sge <8 x i16> %1, %2 | |
2248 %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2 | |
2249 ret <8 x i16> %4 | |
2250 } | |
2251 | |
2252 define <16 x i16> @max_ge_v16i16c() { | |
2253 ; SSE-LABEL: max_ge_v16i16c: | |
2254 ; SSE: # BB#0: | |
2255 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65534,65533,65532,65533,65534,65535,0] | |
2256 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,6,5,4,5,6,7,8] | |
2257 ; SSE-NEXT: retq | |
2258 ; | |
2259 ; AVX-LABEL: max_ge_v16i16c: | |
2260 ; AVX: # BB#0: | |
2261 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65535,65534,65533,65532,65533,65534,65535,0,7,6,5,4,5,6,7,8] | |
2262 ; AVX-NEXT: retq | |
2263 %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0 | |
2264 %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0 | |
2265 %3 = icmp sge <16 x i16> %1, %2 | |
2266 %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2 | |
2267 ret <16 x i16> %4 | |
2268 } | |
2269 | |
2270 define <16 x i8> @max_ge_v16i8c() { | |
2271 ; SSE2-LABEL: max_ge_v16i8c: | |
2272 ; SSE2: # BB#0: | |
2273 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8] | |
2274 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,254,253,252,251,250,249,0,7,6,5,4,3,2,1,0] | |
2275 ; SSE2-NEXT: movdqa %xmm2, %xmm3 | |
2276 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm3 | |
2277 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 | |
2278 ; SSE2-NEXT: pxor %xmm3, %xmm0 | |
2279 ; SSE2-NEXT: pandn %xmm1, %xmm3 | |
2280 ; SSE2-NEXT: pandn %xmm2, %xmm0 | |
2281 ; SSE2-NEXT: por %xmm3, %xmm0 | |
2282 ; SSE2-NEXT: retq | |
2283 ; | |
2284 ; SSE41-LABEL: max_ge_v16i8c: | |
2285 ; SSE41: # BB#0: | |
2286 ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8] | |
2287 ; SSE41-NEXT: retq | |
2288 ; | |
2289 ; SSE42-LABEL: max_ge_v16i8c: | |
2290 ; SSE42: # BB#0: | |
2291 ; SSE42-NEXT: movaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8] | |
2292 ; SSE42-NEXT: retq | |
2293 ; | |
2294 ; AVX-LABEL: max_ge_v16i8c: | |
2295 ; AVX: # BB#0: | |
2296 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8] | |
2297 ; AVX-NEXT: retq | |
2298 %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0 | |
2299 %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0 | |
2300 %3 = icmp sge <16 x i8> %1, %2 | |
2301 %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2 | |
2302 ret <16 x i8> %4 | |
2303 } | |
2304 | |
2305 define <2 x i64> @min_lt_v2i64c() { | |
2306 ; SSE2-LABEL: min_lt_v2i64c: | |
2307 ; SSE2: # BB#0: | |
2308 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551609,7] | |
2309 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551615,1] | |
2310 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0] | |
2311 ; SSE2-NEXT: movdqa %xmm0, %xmm3 | |
2312 ; SSE2-NEXT: pxor %xmm1, %xmm3 | |
2313 ; SSE2-NEXT: pxor %xmm2, %xmm0 | |
2314 ; SSE2-NEXT: movdqa %xmm0, %xmm4 | |
2315 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm4 | |
2316 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] | |
2317 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm0 | |
2318 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] | |
2319 ; SSE2-NEXT: pand %xmm5, %xmm3 | |
2320 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3] | |
2321 ; SSE2-NEXT: por %xmm3, %xmm0 | |
2322 ; SSE2-NEXT: movdqa %xmm0, %xmm3 | |
2323 ; SSE2-NEXT: pandn %xmm2, %xmm3 | |
2324 ; SSE2-NEXT: pand %xmm1, %xmm0 | |
2325 ; SSE2-NEXT: por %xmm3, %xmm0 | |
2326 ; SSE2-NEXT: retq | |
2327 ; | |
2328 ; SSE41-LABEL: min_lt_v2i64c: | |
2329 ; SSE41: # BB#0: | |
2330 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551609,7] | |
2331 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551615,1] | |
2332 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0] | |
2333 ; SSE41-NEXT: movdqa %xmm0, %xmm3 | |
2334 ; SSE41-NEXT: pxor %xmm2, %xmm3 | |
2335 ; SSE41-NEXT: pxor %xmm1, %xmm0 | |
2336 ; SSE41-NEXT: movdqa %xmm0, %xmm4 | |
2337 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm4 | |
2338 ; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] | |
2339 ; SSE41-NEXT: pcmpeqd %xmm3, %xmm0 | |
2340 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] | |
2341 ; SSE41-NEXT: pand %xmm5, %xmm3 | |
2342 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3] | |
2343 ; SSE41-NEXT: por %xmm3, %xmm0 | |
2344 ; SSE41-NEXT: blendvpd %xmm2, %xmm1 | |
2345 ; SSE41-NEXT: movapd %xmm1, %xmm0 | |
2346 ; SSE41-NEXT: retq | |
2347 ; | |
2348 ; SSE42-LABEL: min_lt_v2i64c: | |
2349 ; SSE42: # BB#0: | |
2350 ; SSE42-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551609,7] | |
2351 ; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551615,1] | |
2352 ; SSE42-NEXT: movdqa %xmm1, %xmm0 | |
2353 ; SSE42-NEXT: pcmpgtq %xmm2, %xmm0 | |
2354 ; SSE42-NEXT: blendvpd %xmm2, %xmm1 | |
2355 ; SSE42-NEXT: movapd %xmm1, %xmm0 | |
2356 ; SSE42-NEXT: retq | |
2357 ; | |
2358 ; AVX-LABEL: min_lt_v2i64c: | |
2359 ; AVX: # BB#0: | |
2360 ; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [18446744073709551609,7] | |
2361 ; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709551615,1] | |
2362 ; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 | |
2363 ; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 | |
2364 ; AVX-NEXT: retq | |
2365 %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0 | |
2366 %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0 | |
2367 %3 = icmp slt <2 x i64> %1, %2 | |
2368 %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2 | |
2369 ret <2 x i64> %4 | |
2370 } | |
2371 | |
2372 define <4 x i64> @min_lt_v4i64c() { | |
2373 ; SSE2-LABEL: min_lt_v4i64c: | |
2374 ; SSE2: # BB#0: | |
2375 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [18446744073709551609,18446744073709551615] | |
2376 ; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [1,7] | |
2377 ; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [18446744073709551615,18446744073709551609] | |
2378 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [7,1] | |
2379 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0] | |
2380 ; SSE2-NEXT: movdqa %xmm0, %xmm1 | |
2381 ; SSE2-NEXT: pxor %xmm8, %xmm1 | |
2382 ; SSE2-NEXT: movdqa %xmm0, %xmm6 | |
2383 ; SSE2-NEXT: pxor %xmm3, %xmm6 | |
2384 ; SSE2-NEXT: movdqa %xmm6, %xmm7 | |
2385 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm7 | |
2386 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm7[0,0,2,2] | |
2387 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm6 | |
2388 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] | |
2389 ; SSE2-NEXT: pand %xmm2, %xmm6 | |
2390 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm7[1,1,3,3] | |
2391 ; SSE2-NEXT: por %xmm6, %xmm1 | |
2392 ; SSE2-NEXT: movdqa %xmm0, %xmm2 | |
2393 ; SSE2-NEXT: pxor %xmm4, %xmm2 | |
2394 ; SSE2-NEXT: pxor %xmm5, %xmm0 | |
2395 ; SSE2-NEXT: movdqa %xmm0, %xmm6 | |
2396 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm6 | |
2397 ; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] | |
2398 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm0 | |
2399 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] | |
2400 ; SSE2-NEXT: pand %xmm7, %xmm2 | |
2401 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3] | |
2402 ; SSE2-NEXT: por %xmm2, %xmm0 | |
2403 ; SSE2-NEXT: movdqa %xmm0, %xmm2 | |
2404 ; SSE2-NEXT: pandn %xmm5, %xmm2 | |
2405 ; SSE2-NEXT: pand %xmm4, %xmm0 | |
2406 ; SSE2-NEXT: por %xmm2, %xmm0 | |
2407 ; SSE2-NEXT: movdqa %xmm1, %xmm2 | |
2408 ; SSE2-NEXT: pandn %xmm3, %xmm2 | |
2409 ; SSE2-NEXT: pand %xmm8, %xmm1 | |
2410 ; SSE2-NEXT: por %xmm2, %xmm1 | |
2411 ; SSE2-NEXT: retq | |
2412 ; | |
2413 ; SSE41-LABEL: min_lt_v4i64c: | |
2414 ; SSE41: # BB#0: | |
2415 ; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [18446744073709551609,18446744073709551615] | |
2416 ; SSE41-NEXT: movdqa {{.*#+}} xmm8 = [1,7] | |
2417 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551615,18446744073709551609] | |
2418 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [7,1] | |
2419 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0] | |
2420 ; SSE41-NEXT: movdqa %xmm0, %xmm3 | |
2421 ; SSE41-NEXT: pxor %xmm8, %xmm3 | |
2422 ; SSE41-NEXT: movdqa %xmm0, %xmm6 | |
2423 ; SSE41-NEXT: pxor %xmm1, %xmm6 | |
2424 ; SSE41-NEXT: movdqa %xmm6, %xmm7 | |
2425 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm7 | |
2426 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2] | |
2427 ; SSE41-NEXT: pcmpeqd %xmm3, %xmm6 | |
2428 ; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] | |
2429 ; SSE41-NEXT: pand %xmm4, %xmm6 | |
2430 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm7[1,1,3,3] | |
2431 ; SSE41-NEXT: por %xmm6, %xmm3 | |
2432 ; SSE41-NEXT: movdqa %xmm0, %xmm4 | |
2433 ; SSE41-NEXT: pxor %xmm5, %xmm4 | |
2434 ; SSE41-NEXT: pxor %xmm2, %xmm0 | |
2435 ; SSE41-NEXT: movdqa %xmm0, %xmm6 | |
2436 ; SSE41-NEXT: pcmpgtd %xmm4, %xmm6 | |
2437 ; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] | |
2438 ; SSE41-NEXT: pcmpeqd %xmm4, %xmm0 | |
2439 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3] | |
2440 ; SSE41-NEXT: pand %xmm7, %xmm4 | |
2441 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3] | |
2442 ; SSE41-NEXT: por %xmm4, %xmm0 | |
2443 ; SSE41-NEXT: blendvpd %xmm5, %xmm2 | |
2444 ; SSE41-NEXT: movdqa %xmm3, %xmm0 | |
2445 ; SSE41-NEXT: blendvpd %xmm8, %xmm1 | |
2446 ; SSE41-NEXT: movapd %xmm2, %xmm0 | |
2447 ; SSE41-NEXT: retq | |
2448 ; | |
2449 ; SSE42-LABEL: min_lt_v4i64c: | |
2450 ; SSE42: # BB#0: | |
2451 ; SSE42-NEXT: movdqa {{.*#+}} xmm4 = [18446744073709551609,18446744073709551615] | |
2452 ; SSE42-NEXT: movdqa {{.*#+}} xmm5 = [1,7] | |
2453 ; SSE42-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551615,18446744073709551609] | |
2454 ; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [7,1] | |
2455 ; SSE42-NEXT: movdqa %xmm1, %xmm3 | |
2456 ; SSE42-NEXT: pcmpgtq %xmm5, %xmm3 | |
2457 ; SSE42-NEXT: movdqa %xmm2, %xmm0 | |
2458 ; SSE42-NEXT: pcmpgtq %xmm4, %xmm0 | |
2459 ; SSE42-NEXT: blendvpd %xmm4, %xmm2 | |
2460 ; SSE42-NEXT: movdqa %xmm3, %xmm0 | |
2461 ; SSE42-NEXT: blendvpd %xmm5, %xmm1 | |
2462 ; SSE42-NEXT: movapd %xmm2, %xmm0 | |
2463 ; SSE42-NEXT: retq | |
2464 ; | |
2465 ; AVX1-LABEL: min_lt_v4i64c: | |
2466 ; AVX1: # BB#0: | |
2467 ; AVX1-NEXT: vmovapd {{.*#+}} ymm0 = [18446744073709551615,18446744073709551609,7,1] | |
2468 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709551615,18446744073709551609] | |
2469 ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm1, %xmm1 | |
2470 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [7,1] | |
2471 ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm2, %xmm2 | |
2472 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 | |
2473 ; AVX1-NEXT: vblendvpd %ymm1, {{.*}}(%rip), %ymm0, %ymm0 | |
2474 ; AVX1-NEXT: retq | |
2475 ; | |
2476 ; AVX2-LABEL: min_lt_v4i64c: | |
2477 ; AVX2: # BB#0: | |
2478 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [18446744073709551609,18446744073709551615,1,7] | |
2479 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [18446744073709551615,18446744073709551609,7,1] | |
2480 ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 | |
2481 ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 | |
2482 ; AVX2-NEXT: retq | |
2483 ; | |
2484 ; AVX512-LABEL: min_lt_v4i64c: | |
2485 ; AVX512: # BB#0: | |
2486 ; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [18446744073709551609,18446744073709551615,1,7] | |
2487 ; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [18446744073709551615,18446744073709551609,7,1] | |
2488 ; AVX512-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 | |
2489 ; AVX512-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 | |
2490 ; AVX512-NEXT: retq | |
2491 %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0 | |
2492 %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0 | |
2493 %3 = icmp slt <4 x i64> %1, %2 | |
2494 %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2 | |
2495 ret <4 x i64> %4 | |
2496 } | |
2497 | |
2498 define <4 x i32> @min_lt_v4i32c() { | |
2499 ; SSE2-LABEL: min_lt_v4i32c: | |
2500 ; SSE2: # BB#0: | |
2501 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [4294967289,4294967295,1,7] | |
2502 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,4294967289,7,1] | |
2503 ; SSE2-NEXT: movdqa %xmm2, %xmm0 | |
2504 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 | |
2505 ; SSE2-NEXT: pand %xmm0, %xmm1 | |
2506 ; SSE2-NEXT: pandn %xmm2, %xmm0 | |
2507 ; SSE2-NEXT: por %xmm1, %xmm0 | |
2508 ; SSE2-NEXT: retq | |
2509 ; | |
2510 ; SSE41-LABEL: min_lt_v4i32c: | |
2511 ; SSE41: # BB#0: | |
2512 ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1] | |
2513 ; SSE41-NEXT: retq | |
2514 ; | |
2515 ; SSE42-LABEL: min_lt_v4i32c: | |
2516 ; SSE42: # BB#0: | |
2517 ; SSE42-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1] | |
2518 ; SSE42-NEXT: retq | |
2519 ; | |
2520 ; AVX-LABEL: min_lt_v4i32c: | |
2521 ; AVX: # BB#0: | |
2522 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1] | |
2523 ; AVX-NEXT: retq | |
2524 %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0 | |
2525 %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0 | |
2526 %3 = icmp slt <4 x i32> %1, %2 | |
2527 %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2 | |
2528 ret <4 x i32> %4 | |
2529 } | |
2530 | |
2531 define <8 x i32> @min_lt_v8i32c() { | |
2532 ; SSE2-LABEL: min_lt_v8i32c: | |
2533 ; SSE2: # BB#0: | |
2534 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4294967289,4294967291,4294967293,4294967295] | |
2535 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1,3,5,7] | |
2536 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [4294967295,4294967293,4294967291,4294967289] | |
2537 ; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [7,5,3,1] | |
2538 ; SSE2-NEXT: movdqa %xmm5, %xmm1 | |
2539 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm1 | |
2540 ; SSE2-NEXT: movdqa %xmm4, %xmm0 | |
2541 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm0 | |
2542 ; SSE2-NEXT: pand %xmm0, %xmm2 | |
2543 ; SSE2-NEXT: pandn %xmm4, %xmm0 | |
2544 ; SSE2-NEXT: por %xmm2, %xmm0 | |
2545 ; SSE2-NEXT: pand %xmm1, %xmm3 | |
2546 ; SSE2-NEXT: pandn %xmm5, %xmm1 | |
2547 ; SSE2-NEXT: por %xmm3, %xmm1 | |
2548 ; SSE2-NEXT: retq | |
2549 ; | |
2550 ; SSE41-LABEL: min_lt_v8i32c: | |
2551 ; SSE41: # BB#0: | |
2552 ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967291,4294967291,4294967289] | |
2553 ; SSE41-NEXT: movaps {{.*#+}} xmm1 = [1,3,3,1] | |
2554 ; SSE41-NEXT: retq | |
2555 ; | |
2556 ; SSE42-LABEL: min_lt_v8i32c: | |
2557 ; SSE42: # BB#0: | |
2558 ; SSE42-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967291,4294967291,4294967289] | |
2559 ; SSE42-NEXT: movaps {{.*#+}} xmm1 = [1,3,3,1] | |
2560 ; SSE42-NEXT: retq | |
2561 ; | |
2562 ; AVX-LABEL: min_lt_v8i32c: | |
2563 ; AVX: # BB#0: | |
2564 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967289,4294967291,4294967291,4294967289,1,3,3,1] | |
2565 ; AVX-NEXT: retq | |
2566 %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0 | |
2567 %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0 | |
2568 %3 = icmp slt <8 x i32> %1, %2 | |
2569 %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2 | |
2570 ret <8 x i32> %4 | |
2571 } | |
2572 | |
2573 define <8 x i16> @min_lt_v8i16c() { | |
2574 ; SSE-LABEL: min_lt_v8i16c: | |
2575 ; SSE: # BB#0: | |
2576 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1] | |
2577 ; SSE-NEXT: retq | |
2578 ; | |
2579 ; AVX-LABEL: min_lt_v8i16c: | |
2580 ; AVX: # BB#0: | |
2581 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1] | |
2582 ; AVX-NEXT: retq | |
2583 %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0 | |
2584 %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0 | |
2585 %3 = icmp slt <8 x i16> %1, %2 | |
2586 %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2 | |
2587 ret <8 x i16> %4 | |
2588 } | |
2589 | |
2590 define <16 x i16> @min_lt_v16i16c() { | |
2591 ; SSE-LABEL: min_lt_v16i16c: | |
2592 ; SSE: # BB#0: | |
2593 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [65529,65530,65531,65532,65531,65530,65529,0] | |
2594 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,2,3,4,3,2,1,0] | |
2595 ; SSE-NEXT: retq | |
2596 ; | |
2597 ; AVX-LABEL: min_lt_v16i16c: | |
2598 ; AVX: # BB#0: | |
2599 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65529,65530,65531,65532,65531,65530,65529,0,1,2,3,4,3,2,1,0] | |
2600 ; AVX-NEXT: retq | |
2601 %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0 | |
2602 %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0 | |
2603 %3 = icmp slt <16 x i16> %1, %2 | |
2604 %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2 | |
2605 ret <16 x i16> %4 | |
2606 } | |
2607 | |
2608 define <16 x i8> @min_lt_v16i8c() { | |
2609 ; SSE2-LABEL: min_lt_v16i8c: | |
2610 ; SSE2: # BB#0: | |
2611 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8] | |
2612 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,254,253,252,251,250,249,0,7,6,5,4,3,2,1,0] | |
2613 ; SSE2-NEXT: movdqa %xmm2, %xmm0 | |
2614 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm0 | |
2615 ; SSE2-NEXT: pand %xmm0, %xmm1 | |
2616 ; SSE2-NEXT: pandn %xmm2, %xmm0 | |
2617 ; SSE2-NEXT: por %xmm1, %xmm0 | |
2618 ; SSE2-NEXT: retq | |
2619 ; | |
2620 ; SSE41-LABEL: min_lt_v16i8c: | |
2621 ; SSE41: # BB#0: | |
2622 ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0] | |
2623 ; SSE41-NEXT: retq | |
2624 ; | |
2625 ; SSE42-LABEL: min_lt_v16i8c: | |
2626 ; SSE42: # BB#0: | |
2627 ; SSE42-NEXT: movaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0] | |
2628 ; SSE42-NEXT: retq | |
2629 ; | |
2630 ; AVX-LABEL: min_lt_v16i8c: | |
2631 ; AVX: # BB#0: | |
2632 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0] | |
2633 ; AVX-NEXT: retq | |
2634 %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0 | |
2635 %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0 | |
2636 %3 = icmp slt <16 x i8> %1, %2 | |
2637 %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2 | |
2638 ret <16 x i8> %4 | |
2639 } | |
2640 | |
2641 define <2 x i64> @min_le_v2i64c() { | |
2642 ; SSE2-LABEL: min_le_v2i64c: | |
2643 ; SSE2: # BB#0: | |
2644 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551609,7] | |
2645 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551615,1] | |
2646 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0] | |
2647 ; SSE2-NEXT: movdqa %xmm0, %xmm3 | |
2648 ; SSE2-NEXT: pxor %xmm2, %xmm3 | |
2649 ; SSE2-NEXT: pxor %xmm1, %xmm0 | |
2650 ; SSE2-NEXT: movdqa %xmm0, %xmm4 | |
2651 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm4 | |
2652 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] | |
2653 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm0 | |
2654 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] | |
2655 ; SSE2-NEXT: pand %xmm5, %xmm0 | |
2656 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] | |
2657 ; SSE2-NEXT: por %xmm0, %xmm3 | |
2658 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 | |
2659 ; SSE2-NEXT: pxor %xmm3, %xmm0 | |
2660 ; SSE2-NEXT: pandn %xmm1, %xmm3 | |
2661 ; SSE2-NEXT: pandn %xmm2, %xmm0 | |
2662 ; SSE2-NEXT: por %xmm3, %xmm0 | |
2663 ; SSE2-NEXT: retq | |
2664 ; | |
2665 ; SSE41-LABEL: min_le_v2i64c: | |
2666 ; SSE41: # BB#0: | |
2667 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551609,7] | |
2668 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551615,1] | |
2669 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0] | |
2670 ; SSE41-NEXT: movdqa %xmm0, %xmm3 | |
2671 ; SSE41-NEXT: pxor %xmm1, %xmm3 | |
2672 ; SSE41-NEXT: pxor %xmm2, %xmm0 | |
2673 ; SSE41-NEXT: movdqa %xmm0, %xmm4 | |
2674 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm4 | |
2675 ; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] | |
2676 ; SSE41-NEXT: pcmpeqd %xmm3, %xmm0 | |
2677 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] | |
2678 ; SSE41-NEXT: pand %xmm5, %xmm0 | |
2679 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] | |
2680 ; SSE41-NEXT: por %xmm0, %xmm3 | |
2681 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm0 | |
2682 ; SSE41-NEXT: pxor %xmm3, %xmm0 | |
2683 ; SSE41-NEXT: blendvpd %xmm2, %xmm1 | |
2684 ; SSE41-NEXT: movapd %xmm1, %xmm0 | |
2685 ; SSE41-NEXT: retq | |
2686 ; | |
2687 ; SSE42-LABEL: min_le_v2i64c: | |
2688 ; SSE42: # BB#0: | |
2689 ; SSE42-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551609,7] | |
2690 ; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551615,1] | |
2691 ; SSE42-NEXT: movdqa %xmm2, %xmm3 | |
2692 ; SSE42-NEXT: pcmpgtq %xmm1, %xmm3 | |
2693 ; SSE42-NEXT: pcmpeqd %xmm0, %xmm0 | |
2694 ; SSE42-NEXT: pxor %xmm3, %xmm0 | |
2695 ; SSE42-NEXT: blendvpd %xmm2, %xmm1 | |
2696 ; SSE42-NEXT: movapd %xmm1, %xmm0 | |
2697 ; SSE42-NEXT: retq | |
2698 ; | |
2699 ; AVX-LABEL: min_le_v2i64c: | |
2700 ; AVX: # BB#0: | |
2701 ; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [18446744073709551609,7] | |
2702 ; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709551615,1] | |
2703 ; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 | |
2704 ; AVX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 | |
2705 ; AVX-NEXT: vpxor %xmm3, %xmm2, %xmm2 | |
2706 ; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 | |
2707 ; AVX-NEXT: retq | |
2708 %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0 | |
2709 %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0 | |
2710 %3 = icmp sle <2 x i64> %1, %2 | |
2711 %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2 | |
2712 ret <2 x i64> %4 | |
2713 } | |
2714 | |
2715 define <4 x i64> @min_le_v4i64c() { | |
2716 ; SSE2-LABEL: min_le_v4i64c: | |
2717 ; SSE2: # BB#0: | |
2718 ; SSE2-NEXT: movdqa {{.*#+}} xmm10 = [18446744073709551609,18446744073709551615] | |
2719 ; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [1,7] | |
2720 ; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [18446744073709551615,18446744073709551609] | |
2721 ; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [7,1] | |
2722 ; SSE2-NEXT: movdqa {{.*#+}} xmm7 = [2147483648,0,2147483648,0] | |
2723 ; SSE2-NEXT: movdqa %xmm7, %xmm0 | |
2724 ; SSE2-NEXT: pxor %xmm9, %xmm0 | |
2725 ; SSE2-NEXT: movdqa %xmm7, %xmm1 | |
2726 ; SSE2-NEXT: pxor %xmm8, %xmm1 | |
2727 ; SSE2-NEXT: movdqa %xmm1, %xmm6 | |
2728 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm6 | |
2729 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm6[0,0,2,2] | |
2730 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 | |
2731 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] | |
2732 ; SSE2-NEXT: pand %xmm2, %xmm0 | |
2733 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] | |
2734 ; SSE2-NEXT: por %xmm0, %xmm6 | |
2735 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 | |
2736 ; SSE2-NEXT: movdqa %xmm6, %xmm1 | |
2737 ; SSE2-NEXT: pxor %xmm0, %xmm1 | |
2738 ; SSE2-NEXT: movdqa %xmm7, %xmm2 | |
2739 ; SSE2-NEXT: pxor %xmm5, %xmm2 | |
2740 ; SSE2-NEXT: pxor %xmm10, %xmm7 | |
2741 ; SSE2-NEXT: movdqa %xmm7, %xmm3 | |
2742 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm3 | |
2743 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] | |
2744 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm7 | |
2745 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm7[1,1,3,3] | |
2746 ; SSE2-NEXT: pand %xmm4, %xmm2 | |
2747 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] | |
2748 ; SSE2-NEXT: por %xmm2, %xmm3 | |
2749 ; SSE2-NEXT: pxor %xmm3, %xmm0 | |
2750 ; SSE2-NEXT: pandn %xmm10, %xmm3 | |
2751 ; SSE2-NEXT: pandn %xmm5, %xmm0 | |
2752 ; SSE2-NEXT: por %xmm3, %xmm0 | |
2753 ; SSE2-NEXT: pandn %xmm8, %xmm6 | |
2754 ; SSE2-NEXT: pandn %xmm9, %xmm1 | |
2755 ; SSE2-NEXT: por %xmm6, %xmm1 | |
2756 ; SSE2-NEXT: retq | |
2757 ; | |
2758 ; SSE41-LABEL: min_le_v4i64c: | |
2759 ; SSE41: # BB#0: | |
2760 ; SSE41-NEXT: movdqa {{.*#+}} xmm9 = [18446744073709551609,18446744073709551615] | |
2761 ; SSE41-NEXT: movdqa {{.*#+}} xmm8 = [1,7] | |
2762 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551615,18446744073709551609] | |
2763 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [7,1] | |
2764 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0] | |
2765 ; SSE41-NEXT: movdqa %xmm0, %xmm3 | |
2766 ; SSE41-NEXT: pxor %xmm1, %xmm3 | |
2767 ; SSE41-NEXT: movdqa %xmm0, %xmm6 | |
2768 ; SSE41-NEXT: pxor %xmm8, %xmm6 | |
2769 ; SSE41-NEXT: movdqa %xmm6, %xmm7 | |
2770 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm7 | |
2771 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2] | |
2772 ; SSE41-NEXT: pcmpeqd %xmm3, %xmm6 | |
2773 ; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] | |
2774 ; SSE41-NEXT: pand %xmm4, %xmm6 | |
2775 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm7[1,1,3,3] | |
2776 ; SSE41-NEXT: por %xmm6, %xmm3 | |
2777 ; SSE41-NEXT: pcmpeqd %xmm4, %xmm4 | |
2778 ; SSE41-NEXT: pxor %xmm4, %xmm3 | |
2779 ; SSE41-NEXT: movdqa %xmm0, %xmm6 | |
2780 ; SSE41-NEXT: pxor %xmm2, %xmm6 | |
2781 ; SSE41-NEXT: pxor %xmm9, %xmm0 | |
2782 ; SSE41-NEXT: movdqa %xmm0, %xmm7 | |
2783 ; SSE41-NEXT: pcmpgtd %xmm6, %xmm7 | |
2784 ; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm7[0,0,2,2] | |
2785 ; SSE41-NEXT: pcmpeqd %xmm6, %xmm0 | |
2786 ; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3] | |
2787 ; SSE41-NEXT: pand %xmm5, %xmm6 | |
2788 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[1,1,3,3] | |
2789 ; SSE41-NEXT: por %xmm6, %xmm0 | |
2790 ; SSE41-NEXT: pxor %xmm4, %xmm0 | |
2791 ; SSE41-NEXT: blendvpd %xmm9, %xmm2 | |
2792 ; SSE41-NEXT: movdqa %xmm3, %xmm0 | |
2793 ; SSE41-NEXT: blendvpd %xmm8, %xmm1 | |
2794 ; SSE41-NEXT: movapd %xmm2, %xmm0 | |
2795 ; SSE41-NEXT: retq | |
2796 ; | |
2797 ; SSE42-LABEL: min_le_v4i64c: | |
2798 ; SSE42: # BB#0: | |
2799 ; SSE42-NEXT: movdqa {{.*#+}} xmm4 = [18446744073709551609,18446744073709551615] | |
2800 ; SSE42-NEXT: movdqa {{.*#+}} xmm5 = [1,7] | |
2801 ; SSE42-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551615,18446744073709551609] | |
2802 ; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [7,1] | |
2803 ; SSE42-NEXT: movdqa %xmm5, %xmm3 | |
2804 ; SSE42-NEXT: pcmpgtq %xmm1, %xmm3 | |
2805 ; SSE42-NEXT: pcmpeqd %xmm6, %xmm6 | |
2806 ; SSE42-NEXT: pxor %xmm6, %xmm3 | |
2807 ; SSE42-NEXT: movdqa %xmm4, %xmm0 | |
2808 ; SSE42-NEXT: pcmpgtq %xmm2, %xmm0 | |
2809 ; SSE42-NEXT: pxor %xmm6, %xmm0 | |
2810 ; SSE42-NEXT: blendvpd %xmm4, %xmm2 | |
2811 ; SSE42-NEXT: movdqa %xmm3, %xmm0 | |
2812 ; SSE42-NEXT: blendvpd %xmm5, %xmm1 | |
2813 ; SSE42-NEXT: movapd %xmm2, %xmm0 | |
2814 ; SSE42-NEXT: retq | |
2815 ; | |
2816 ; AVX1-LABEL: min_le_v4i64c: | |
2817 ; AVX1: # BB#0: | |
2818 ; AVX1-NEXT: vmovapd {{.*#+}} ymm0 = [18446744073709551615,18446744073709551609,7,1] | |
2819 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,7] | |
2820 ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm1, %xmm1 | |
2821 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 | |
2822 ; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1 | |
2823 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [18446744073709551609,18446744073709551615] | |
2824 ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm3, %xmm3 | |
2825 ; AVX1-NEXT: vpxor %xmm2, %xmm3, %xmm2 | |
2826 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 | |
2827 ; AVX1-NEXT: vblendvpd %ymm1, {{.*}}(%rip), %ymm0, %ymm0 | |
2828 ; AVX1-NEXT: retq | |
2829 ; | |
2830 ; AVX2-LABEL: min_le_v4i64c: | |
2831 ; AVX2: # BB#0: | |
2832 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [18446744073709551609,18446744073709551615,1,7] | |
2833 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [18446744073709551615,18446744073709551609,7,1] | |
2834 ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 | |
2835 ; AVX2-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3 | |
2836 ; AVX2-NEXT: vpxor %ymm3, %ymm2, %ymm2 | |
2837 ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 | |
2838 ; AVX2-NEXT: retq | |
2839 ; | |
2840 ; AVX512-LABEL: min_le_v4i64c: | |
2841 ; AVX512: # BB#0: | |
2842 ; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [18446744073709551609,18446744073709551615,1,7] | |
2843 ; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [18446744073709551615,18446744073709551609,7,1] | |
2844 ; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 | |
2845 ; AVX512-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3 | |
2846 ; AVX512-NEXT: vpxor %ymm3, %ymm2, %ymm2 | |
2847 ; AVX512-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 | |
2848 ; AVX512-NEXT: retq | |
2849 %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0 | |
2850 %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0 | |
2851 %3 = icmp sle <4 x i64> %1, %2 | |
2852 %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2 | |
2853 ret <4 x i64> %4 | |
2854 } | |
2855 | |
2856 define <4 x i32> @min_le_v4i32c() { | |
2857 ; SSE2-LABEL: min_le_v4i32c: | |
2858 ; SSE2: # BB#0: | |
2859 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [4294967289,4294967295,1,7] | |
2860 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,4294967289,7,1] | |
2861 ; SSE2-NEXT: movdqa %xmm1, %xmm3 | |
2862 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm3 | |
2863 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 | |
2864 ; SSE2-NEXT: pxor %xmm3, %xmm0 | |
2865 ; SSE2-NEXT: pandn %xmm1, %xmm3 | |
2866 ; SSE2-NEXT: pandn %xmm2, %xmm0 | |
2867 ; SSE2-NEXT: por %xmm3, %xmm0 | |
2868 ; SSE2-NEXT: retq | |
2869 ; | |
2870 ; SSE41-LABEL: min_le_v4i32c: | |
2871 ; SSE41: # BB#0: | |
2872 ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1] | |
2873 ; SSE41-NEXT: retq | |
2874 ; | |
2875 ; SSE42-LABEL: min_le_v4i32c: | |
2876 ; SSE42: # BB#0: | |
2877 ; SSE42-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1] | |
2878 ; SSE42-NEXT: retq | |
2879 ; | |
2880 ; AVX-LABEL: min_le_v4i32c: | |
2881 ; AVX: # BB#0: | |
2882 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1] | |
2883 ; AVX-NEXT: retq | |
2884 %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0 | |
2885 %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0 | |
2886 %3 = icmp sle <4 x i32> %1, %2 | |
2887 %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2 | |
2888 ret <4 x i32> %4 | |
2889 } | |
2890 | |
2891 define <8 x i32> @min_le_v8i32c() { | |
2892 ; SSE2-LABEL: min_le_v8i32c: | |
2893 ; SSE2: # BB#0: | |
2894 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4294967289,4294967291,4294967293,4294967295] | |
2895 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1,3,5,7] | |
2896 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [4294967295,4294967293,4294967291,4294967289] | |
2897 ; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [7,5,3,1] | |
2898 ; SSE2-NEXT: movdqa %xmm3, %xmm6 | |
2899 ; SSE2-NEXT: pcmpgtd %xmm5, %xmm6 | |
2900 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 | |
2901 ; SSE2-NEXT: movdqa %xmm6, %xmm1 | |
2902 ; SSE2-NEXT: pxor %xmm0, %xmm1 | |
2903 ; SSE2-NEXT: movdqa %xmm2, %xmm7 | |
2904 ; SSE2-NEXT: pcmpgtd %xmm4, %xmm7 | |
2905 ; SSE2-NEXT: pxor %xmm7, %xmm0 | |
2906 ; SSE2-NEXT: pandn %xmm2, %xmm7 | |
2907 ; SSE2-NEXT: pandn %xmm4, %xmm0 | |
2908 ; SSE2-NEXT: por %xmm7, %xmm0 | |
2909 ; SSE2-NEXT: pandn %xmm3, %xmm6 | |
2910 ; SSE2-NEXT: pandn %xmm5, %xmm1 | |
2911 ; SSE2-NEXT: por %xmm6, %xmm1 | |
2912 ; SSE2-NEXT: retq | |
2913 ; | |
2914 ; SSE41-LABEL: min_le_v8i32c: | |
2915 ; SSE41: # BB#0: | |
2916 ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967291,4294967291,4294967289] | |
2917 ; SSE41-NEXT: movaps {{.*#+}} xmm1 = [1,3,3,1] | |
2918 ; SSE41-NEXT: retq | |
2919 ; | |
2920 ; SSE42-LABEL: min_le_v8i32c: | |
2921 ; SSE42: # BB#0: | |
2922 ; SSE42-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967291,4294967291,4294967289] | |
2923 ; SSE42-NEXT: movaps {{.*#+}} xmm1 = [1,3,3,1] | |
2924 ; SSE42-NEXT: retq | |
2925 ; | |
2926 ; AVX-LABEL: min_le_v8i32c: | |
2927 ; AVX: # BB#0: | |
2928 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967289,4294967291,4294967291,4294967289,1,3,3,1] | |
2929 ; AVX-NEXT: retq | |
2930 %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0 | |
2931 %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0 | |
2932 %3 = icmp sle <8 x i32> %1, %2 | |
2933 %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2 | |
2934 ret <8 x i32> %4 | |
2935 } | |
2936 | |
2937 define <8 x i16> @min_le_v8i16c() { | |
2938 ; SSE-LABEL: min_le_v8i16c: | |
2939 ; SSE: # BB#0: | |
2940 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1] | |
2941 ; SSE-NEXT: retq | |
2942 ; | |
2943 ; AVX-LABEL: min_le_v8i16c: | |
2944 ; AVX: # BB#0: | |
2945 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1] | |
2946 ; AVX-NEXT: retq | |
2947 %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0 | |
2948 %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0 | |
2949 %3 = icmp sle <8 x i16> %1, %2 | |
2950 %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2 | |
2951 ret <8 x i16> %4 | |
2952 } | |
2953 | |
2954 define <16 x i16> @min_le_v16i16c() { | |
2955 ; SSE-LABEL: min_le_v16i16c: | |
2956 ; SSE: # BB#0: | |
2957 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [65529,65530,65531,65532,65531,65530,65529,0] | |
2958 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,2,3,4,3,2,1,0] | |
2959 ; SSE-NEXT: retq | |
2960 ; | |
2961 ; AVX-LABEL: min_le_v16i16c: | |
2962 ; AVX: # BB#0: | |
2963 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65529,65530,65531,65532,65531,65530,65529,0,1,2,3,4,3,2,1,0] | |
2964 ; AVX-NEXT: retq | |
2965 %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0 | |
2966 %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0 | |
2967 %3 = icmp sle <16 x i16> %1, %2 | |
2968 %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2 | |
2969 ret <16 x i16> %4 | |
2970 } | |
2971 | |
2972 define <16 x i8> @min_le_v16i8c() { | |
2973 ; SSE2-LABEL: min_le_v16i8c: | |
2974 ; SSE2: # BB#0: | |
2975 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8] | |
2976 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,254,253,252,251,250,249,0,7,6,5,4,3,2,1,0] | |
2977 ; SSE2-NEXT: movdqa %xmm1, %xmm3 | |
2978 ; SSE2-NEXT: pcmpgtb %xmm2, %xmm3 | |
2979 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 | |
2980 ; SSE2-NEXT: pxor %xmm3, %xmm0 | |
2981 ; SSE2-NEXT: pandn %xmm1, %xmm3 | |
2982 ; SSE2-NEXT: pandn %xmm2, %xmm0 | |
2983 ; SSE2-NEXT: por %xmm3, %xmm0 | |
2984 ; SSE2-NEXT: retq | |
2985 ; | |
2986 ; SSE41-LABEL: min_le_v16i8c: | |
2987 ; SSE41: # BB#0: | |
2988 ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0] | |
2989 ; SSE41-NEXT: retq | |
2990 ; | |
2991 ; SSE42-LABEL: min_le_v16i8c: | |
2992 ; SSE42: # BB#0: | |
2993 ; SSE42-NEXT: movaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0] | |
2994 ; SSE42-NEXT: retq | |
2995 ; | |
2996 ; AVX-LABEL: min_le_v16i8c: | |
2997 ; AVX: # BB#0: | |
2998 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0] | |
2999 ; AVX-NEXT: retq | |
3000 %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0 | |
3001 %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0 | |
3002 %3 = icmp sle <16 x i8> %1, %2 | |
3003 %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2 | |
3004 ret <16 x i8> %4 | |
3005 } |