Mercurial > hg > CbC > CbC_llvm
comparison test/CodeGen/X86/vector-shift-lshr-256.ll @ 95:afa8332a0e37 LLVM3.8
LLVM 3.8
author | Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 13 Oct 2015 17:48:58 +0900 |
parents | |
children | 7d135dc70f03 |
comparison
equal
deleted
inserted
replaced
84:f3e34b893a5f | 95:afa8332a0e37 |
---|---|
1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 | |
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 | |
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+xop,+avx | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX1 | |
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+xop,+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX2 | |
5 | |
6 ; | |
7 ; Variable Shifts | |
8 ; | |
9 | |
10 define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind { | |
11 ; AVX1-LABEL: var_shift_v4i64: | |
12 ; AVX1: # BB#0: | |
13 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 | |
14 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 | |
15 ; AVX1-NEXT: vpsrlq %xmm2, %xmm3, %xmm4 | |
16 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] | |
17 ; AVX1-NEXT: vpsrlq %xmm2, %xmm3, %xmm2 | |
18 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm4[0,1,2,3],xmm2[4,5,6,7] | |
19 ; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm3 | |
20 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] | |
21 ; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 | |
22 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm3[0,1,2,3],xmm0[4,5,6,7] | |
23 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 | |
24 ; AVX1-NEXT: retq | |
25 ; | |
26 ; AVX2-LABEL: var_shift_v4i64: | |
27 ; AVX2: # BB#0: | |
28 ; AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 | |
29 ; AVX2-NEXT: retq | |
30 ; | |
31 ; XOPAVX1-LABEL: var_shift_v4i64: | |
32 ; XOPAVX1: # BB#0: | |
33 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 | |
34 ; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 | |
35 ; XOPAVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm2 | |
36 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 | |
37 ; XOPAVX1-NEXT: vpshlq %xmm2, %xmm4, %xmm2 | |
38 ; XOPAVX1-NEXT: vpsubq %xmm1, %xmm3, %xmm1 | |
39 ; XOPAVX1-NEXT: vpshlq %xmm1, %xmm0, %xmm0 | |
40 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 | |
41 ; XOPAVX1-NEXT: retq | |
42 ; | |
43 ; XOPAVX2-LABEL: var_shift_v4i64: | |
44 ; XOPAVX2: # BB#0: | |
45 ; XOPAVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 | |
46 ; XOPAVX2-NEXT: retq | |
47 %shift = lshr <4 x i64> %a, %b | |
48 ret <4 x i64> %shift | |
49 } | |
50 | |
51 define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind { | |
52 ; AVX1-LABEL: var_shift_v8i32: | |
53 ; AVX1: # BB#0: | |
54 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 | |
55 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 | |
56 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm4 = xmm3[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero | |
57 ; AVX1-NEXT: vpsrld %xmm4, %xmm2, %xmm4 | |
58 ; AVX1-NEXT: vpsrlq $32, %xmm3, %xmm5 | |
59 ; AVX1-NEXT: vpsrld %xmm5, %xmm2, %xmm5 | |
60 ; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm5[0,1,2,3],xmm4[4,5,6,7] | |
61 ; AVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5 | |
62 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm6 = xmm3[2],xmm5[2],xmm3[3],xmm5[3] | |
63 ; AVX1-NEXT: vpsrld %xmm6, %xmm2, %xmm6 | |
64 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero | |
65 ; AVX1-NEXT: vpsrld %xmm3, %xmm2, %xmm2 | |
66 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm6[4,5,6,7] | |
67 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm4[2,3],xmm2[4,5],xmm4[6,7] | |
68 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm3 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero | |
69 ; AVX1-NEXT: vpsrld %xmm3, %xmm0, %xmm3 | |
70 ; AVX1-NEXT: vpsrlq $32, %xmm1, %xmm4 | |
71 ; AVX1-NEXT: vpsrld %xmm4, %xmm0, %xmm4 | |
72 ; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7] | |
73 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm4 = xmm1[2],xmm5[2],xmm1[3],xmm5[3] | |
74 ; AVX1-NEXT: vpsrld %xmm4, %xmm0, %xmm4 | |
75 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero | |
76 ; AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0 | |
77 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm4[4,5,6,7] | |
78 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,3],xmm0[4,5],xmm3[6,7] | |
79 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 | |
80 ; AVX1-NEXT: retq | |
81 ; | |
82 ; AVX2-LABEL: var_shift_v8i32: | |
83 ; AVX2: # BB#0: | |
84 ; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 | |
85 ; AVX2-NEXT: retq | |
86 ; | |
87 ; XOPAVX1-LABEL: var_shift_v8i32: | |
88 ; XOPAVX1: # BB#0: | |
89 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 | |
90 ; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 | |
91 ; XOPAVX1-NEXT: vpsubd %xmm2, %xmm3, %xmm2 | |
92 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 | |
93 ; XOPAVX1-NEXT: vpshld %xmm2, %xmm4, %xmm2 | |
94 ; XOPAVX1-NEXT: vpsubd %xmm1, %xmm3, %xmm1 | |
95 ; XOPAVX1-NEXT: vpshld %xmm1, %xmm0, %xmm0 | |
96 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 | |
97 ; XOPAVX1-NEXT: retq | |
98 ; | |
99 ; XOPAVX2-LABEL: var_shift_v8i32: | |
100 ; XOPAVX2: # BB#0: | |
101 ; XOPAVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 | |
102 ; XOPAVX2-NEXT: retq | |
103 %shift = lshr <8 x i32> %a, %b | |
104 ret <8 x i32> %shift | |
105 } | |
106 | |
107 define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind { | |
108 ; AVX1-LABEL: var_shift_v16i16: | |
109 ; AVX1: # BB#0: | |
110 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 | |
111 ; AVX1-NEXT: vpsllw $12, %xmm2, %xmm3 | |
112 ; AVX1-NEXT: vpsllw $4, %xmm2, %xmm2 | |
113 ; AVX1-NEXT: vpor %xmm3, %xmm2, %xmm2 | |
114 ; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm3 | |
115 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 | |
116 ; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm5 | |
117 ; AVX1-NEXT: vpblendvb %xmm2, %xmm5, %xmm4, %xmm2 | |
118 ; AVX1-NEXT: vpsrlw $4, %xmm2, %xmm4 | |
119 ; AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm2, %xmm2 | |
120 ; AVX1-NEXT: vpsrlw $2, %xmm2, %xmm4 | |
121 ; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 | |
122 ; AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm2, %xmm2 | |
123 ; AVX1-NEXT: vpsrlw $1, %xmm2, %xmm4 | |
124 ; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 | |
125 ; AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm2, %xmm2 | |
126 ; AVX1-NEXT: vpsllw $12, %xmm1, %xmm3 | |
127 ; AVX1-NEXT: vpsllw $4, %xmm1, %xmm1 | |
128 ; AVX1-NEXT: vpor %xmm3, %xmm1, %xmm1 | |
129 ; AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm3 | |
130 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm4 | |
131 ; AVX1-NEXT: vpblendvb %xmm1, %xmm4, %xmm0, %xmm0 | |
132 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm1 | |
133 ; AVX1-NEXT: vpblendvb %xmm3, %xmm1, %xmm0, %xmm0 | |
134 ; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm1 | |
135 ; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 | |
136 ; AVX1-NEXT: vpblendvb %xmm3, %xmm1, %xmm0, %xmm0 | |
137 ; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm1 | |
138 ; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 | |
139 ; AVX1-NEXT: vpblendvb %xmm3, %xmm1, %xmm0, %xmm0 | |
140 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 | |
141 ; AVX1-NEXT: retq | |
142 ; | |
143 ; AVX2-LABEL: var_shift_v16i16: | |
144 ; AVX2: # BB#0: | |
145 ; AVX2-NEXT: vpxor %ymm2, %ymm2, %ymm2 | |
146 ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] | |
147 ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15] | |
148 ; AVX2-NEXT: vpsrlvd %ymm3, %ymm4, %ymm3 | |
149 ; AVX2-NEXT: vpsrld $16, %ymm3, %ymm3 | |
150 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11] | |
151 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11] | |
152 ; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 | |
153 ; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0 | |
154 ; AVX2-NEXT: vpackusdw %ymm3, %ymm0, %ymm0 | |
155 ; AVX2-NEXT: retq | |
156 ; | |
157 ; XOPAVX1-LABEL: var_shift_v16i16: | |
158 ; XOPAVX1: # BB#0: | |
159 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 | |
160 ; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 | |
161 ; XOPAVX1-NEXT: vpsubw %xmm2, %xmm3, %xmm2 | |
162 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 | |
163 ; XOPAVX1-NEXT: vpshlw %xmm2, %xmm4, %xmm2 | |
164 ; XOPAVX1-NEXT: vpsubw %xmm1, %xmm3, %xmm1 | |
165 ; XOPAVX1-NEXT: vpshlw %xmm1, %xmm0, %xmm0 | |
166 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 | |
167 ; XOPAVX1-NEXT: retq | |
168 ; | |
169 ; XOPAVX2-LABEL: var_shift_v16i16: | |
170 ; XOPAVX2: # BB#0: | |
171 ; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 | |
172 ; XOPAVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 | |
173 ; XOPAVX2-NEXT: vpsubw %xmm2, %xmm3, %xmm2 | |
174 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm4 | |
175 ; XOPAVX2-NEXT: vpshlw %xmm2, %xmm4, %xmm2 | |
176 ; XOPAVX2-NEXT: vpsubw %xmm1, %xmm3, %xmm1 | |
177 ; XOPAVX2-NEXT: vpshlw %xmm1, %xmm0, %xmm0 | |
178 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 | |
179 ; XOPAVX2-NEXT: retq | |
180 %shift = lshr <16 x i16> %a, %b | |
181 ret <16 x i16> %shift | |
182 } | |
183 | |
184 define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { | |
185 ; AVX1-LABEL: var_shift_v32i8: | |
186 ; AVX1: # BB#0: | |
187 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 | |
188 ; AVX1-NEXT: vpsrlw $4, %xmm2, %xmm3 | |
189 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] | |
190 ; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3 | |
191 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 | |
192 ; AVX1-NEXT: vpsllw $5, %xmm5, %xmm5 | |
193 ; AVX1-NEXT: vpblendvb %xmm5, %xmm3, %xmm2, %xmm2 | |
194 ; AVX1-NEXT: vpsrlw $2, %xmm2, %xmm3 | |
195 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63] | |
196 ; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3 | |
197 ; AVX1-NEXT: vpaddb %xmm5, %xmm5, %xmm5 | |
198 ; AVX1-NEXT: vpblendvb %xmm5, %xmm3, %xmm2, %xmm2 | |
199 ; AVX1-NEXT: vpsrlw $1, %xmm2, %xmm3 | |
200 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm7 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] | |
201 ; AVX1-NEXT: vpand %xmm7, %xmm3, %xmm3 | |
202 ; AVX1-NEXT: vpaddb %xmm5, %xmm5, %xmm5 | |
203 ; AVX1-NEXT: vpblendvb %xmm5, %xmm3, %xmm2, %xmm2 | |
204 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm3 | |
205 ; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3 | |
206 ; AVX1-NEXT: vpsllw $5, %xmm1, %xmm1 | |
207 ; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 | |
208 ; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm3 | |
209 ; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3 | |
210 ; AVX1-NEXT: vpaddb %xmm1, %xmm1, %xmm1 | |
211 ; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 | |
212 ; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm3 | |
213 ; AVX1-NEXT: vpand %xmm7, %xmm3, %xmm3 | |
214 ; AVX1-NEXT: vpaddb %xmm1, %xmm1, %xmm1 | |
215 ; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 | |
216 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 | |
217 ; AVX1-NEXT: retq | |
218 ; | |
219 ; AVX2-LABEL: var_shift_v32i8: | |
220 ; AVX2: # BB#0: | |
221 ; AVX2-NEXT: vpsllw $5, %ymm1, %ymm1 | |
222 ; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm2 | |
223 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 | |
224 ; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 | |
225 ; AVX2-NEXT: vpsrlw $2, %ymm0, %ymm2 | |
226 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 | |
227 ; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1 | |
228 ; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 | |
229 ; AVX2-NEXT: vpsrlw $1, %ymm0, %ymm2 | |
230 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 | |
231 ; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1 | |
232 ; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 | |
233 ; AVX2-NEXT: retq | |
234 ; | |
235 ; XOPAVX1-LABEL: var_shift_v32i8: | |
236 ; XOPAVX1: # BB#0: | |
237 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 | |
238 ; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 | |
239 ; XOPAVX1-NEXT: vpsubb %xmm2, %xmm3, %xmm2 | |
240 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 | |
241 ; XOPAVX1-NEXT: vpshlb %xmm2, %xmm4, %xmm2 | |
242 ; XOPAVX1-NEXT: vpsubb %xmm1, %xmm3, %xmm1 | |
243 ; XOPAVX1-NEXT: vpshlb %xmm1, %xmm0, %xmm0 | |
244 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 | |
245 ; XOPAVX1-NEXT: retq | |
246 ; | |
247 ; XOPAVX2-LABEL: var_shift_v32i8: | |
248 ; XOPAVX2: # BB#0: | |
249 ; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 | |
250 ; XOPAVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 | |
251 ; XOPAVX2-NEXT: vpsubb %xmm2, %xmm3, %xmm2 | |
252 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm4 | |
253 ; XOPAVX2-NEXT: vpshlb %xmm2, %xmm4, %xmm2 | |
254 ; XOPAVX2-NEXT: vpsubb %xmm1, %xmm3, %xmm1 | |
255 ; XOPAVX2-NEXT: vpshlb %xmm1, %xmm0, %xmm0 | |
256 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 | |
257 ; XOPAVX2-NEXT: retq | |
258 %shift = lshr <32 x i8> %a, %b | |
259 ret <32 x i8> %shift | |
260 } | |
261 | |
262 ; | |
263 ; Uniform Variable Shifts | |
264 ; | |
265 | |
266 define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind { | |
267 ; AVX1-LABEL: splatvar_shift_v4i64: | |
268 ; AVX1: # BB#0: | |
269 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 | |
270 ; AVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm2 | |
271 ; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 | |
272 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 | |
273 ; AVX1-NEXT: retq | |
274 ; | |
275 ; AVX2-LABEL: splatvar_shift_v4i64: | |
276 ; AVX2: # BB#0: | |
277 ; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 | |
278 ; AVX2-NEXT: retq | |
279 ; | |
280 ; XOPAVX1-LABEL: splatvar_shift_v4i64: | |
281 ; XOPAVX1: # BB#0: | |
282 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 | |
283 ; XOPAVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm2 | |
284 ; XOPAVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 | |
285 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 | |
286 ; XOPAVX1-NEXT: retq | |
287 ; | |
288 ; XOPAVX2-LABEL: splatvar_shift_v4i64: | |
289 ; XOPAVX2: # BB#0: | |
290 ; XOPAVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 | |
291 ; XOPAVX2-NEXT: retq | |
292 %splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer | |
293 %shift = lshr <4 x i64> %a, %splat | |
294 ret <4 x i64> %shift | |
295 } | |
296 | |
297 define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind { | |
298 ; AVX1-LABEL: splatvar_shift_v8i32: | |
299 ; AVX1: # BB#0: | |
300 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 | |
301 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7] | |
302 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 | |
303 ; AVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2 | |
304 ; AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0 | |
305 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 | |
306 ; AVX1-NEXT: retq | |
307 ; | |
308 ; AVX2-LABEL: splatvar_shift_v8i32: | |
309 ; AVX2: # BB#0: | |
310 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 | |
311 ; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7] | |
312 ; AVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0 | |
313 ; AVX2-NEXT: retq | |
314 ; | |
315 ; XOPAVX1-LABEL: splatvar_shift_v8i32: | |
316 ; XOPAVX1: # BB#0: | |
317 ; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 | |
318 ; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7] | |
319 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 | |
320 ; XOPAVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2 | |
321 ; XOPAVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0 | |
322 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 | |
323 ; XOPAVX1-NEXT: retq | |
324 ; | |
325 ; XOPAVX2-LABEL: splatvar_shift_v8i32: | |
326 ; XOPAVX2: # BB#0: | |
327 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 | |
328 ; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7] | |
329 ; XOPAVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0 | |
330 ; XOPAVX2-NEXT: retq | |
331 %splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer | |
332 %shift = lshr <8 x i32> %a, %splat | |
333 ret <8 x i32> %shift | |
334 } | |
335 | |
336 define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind { | |
337 ; AVX1-LABEL: splatvar_shift_v16i16: | |
338 ; AVX1: # BB#0: | |
339 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 | |
340 ; AVX1-NEXT: vmovd %xmm1, %eax | |
341 ; AVX1-NEXT: movzwl %ax, %eax | |
342 ; AVX1-NEXT: vmovd %eax, %xmm1 | |
343 ; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2 | |
344 ; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 | |
345 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 | |
346 ; AVX1-NEXT: retq | |
347 ; | |
348 ; AVX2-LABEL: splatvar_shift_v16i16: | |
349 ; AVX2: # BB#0: | |
350 ; AVX2-NEXT: vmovd %xmm1, %eax | |
351 ; AVX2-NEXT: movzwl %ax, %eax | |
352 ; AVX2-NEXT: vmovd %eax, %xmm1 | |
353 ; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 | |
354 ; AVX2-NEXT: retq | |
355 ; | |
356 ; XOPAVX1-LABEL: splatvar_shift_v16i16: | |
357 ; XOPAVX1: # BB#0: | |
358 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 | |
359 ; XOPAVX1-NEXT: vmovd %xmm1, %eax | |
360 ; XOPAVX1-NEXT: movzwl %ax, %eax | |
361 ; XOPAVX1-NEXT: vmovd %eax, %xmm1 | |
362 ; XOPAVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2 | |
363 ; XOPAVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 | |
364 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 | |
365 ; XOPAVX1-NEXT: retq | |
366 ; | |
367 ; XOPAVX2-LABEL: splatvar_shift_v16i16: | |
368 ; XOPAVX2: # BB#0: | |
369 ; XOPAVX2-NEXT: vmovd %xmm1, %eax | |
370 ; XOPAVX2-NEXT: movzwl %ax, %eax | |
371 ; XOPAVX2-NEXT: vmovd %eax, %xmm1 | |
372 ; XOPAVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 | |
373 ; XOPAVX2-NEXT: retq | |
374 %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer | |
375 %shift = lshr <16 x i16> %a, %splat | |
376 ret <16 x i16> %shift | |
377 } | |
378 | |
379 define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { | |
380 ; AVX1-LABEL: splatvar_shift_v32i8: | |
381 ; AVX1: # BB#0: | |
382 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 | |
383 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 | |
384 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 | |
385 ; AVX1-NEXT: vpsrlw $4, %xmm2, %xmm3 | |
386 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm8 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] | |
387 ; AVX1-NEXT: vpand %xmm8, %xmm3, %xmm3 | |
388 ; AVX1-NEXT: vpsllw $5, %xmm1, %xmm1 | |
389 ; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm2, %xmm2 | |
390 ; AVX1-NEXT: vpsrlw $2, %xmm2, %xmm3 | |
391 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63] | |
392 ; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3 | |
393 ; AVX1-NEXT: vpaddb %xmm1, %xmm1, %xmm6 | |
394 ; AVX1-NEXT: vpblendvb %xmm6, %xmm3, %xmm2, %xmm2 | |
395 ; AVX1-NEXT: vpsrlw $1, %xmm2, %xmm3 | |
396 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm7 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] | |
397 ; AVX1-NEXT: vpand %xmm7, %xmm3, %xmm3 | |
398 ; AVX1-NEXT: vpaddb %xmm6, %xmm6, %xmm4 | |
399 ; AVX1-NEXT: vpblendvb %xmm4, %xmm3, %xmm2, %xmm2 | |
400 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm3 | |
401 ; AVX1-NEXT: vpand %xmm8, %xmm3, %xmm3 | |
402 ; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 | |
403 ; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm1 | |
404 ; AVX1-NEXT: vpand %xmm5, %xmm1, %xmm1 | |
405 ; AVX1-NEXT: vpblendvb %xmm6, %xmm1, %xmm0, %xmm0 | |
406 ; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm1 | |
407 ; AVX1-NEXT: vpand %xmm7, %xmm1, %xmm1 | |
408 ; AVX1-NEXT: vpblendvb %xmm4, %xmm1, %xmm0, %xmm0 | |
409 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 | |
410 ; AVX1-NEXT: retq | |
411 ; | |
412 ; AVX2-LABEL: splatvar_shift_v32i8: | |
413 ; AVX2: # BB#0: | |
414 ; AVX2-NEXT: vpbroadcastb %xmm1, %ymm1 | |
415 ; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm2 | |
416 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 | |
417 ; AVX2-NEXT: vpsllw $5, %ymm1, %ymm1 | |
418 ; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 | |
419 ; AVX2-NEXT: vpsrlw $2, %ymm0, %ymm2 | |
420 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 | |
421 ; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1 | |
422 ; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 | |
423 ; AVX2-NEXT: vpsrlw $1, %ymm0, %ymm2 | |
424 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 | |
425 ; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1 | |
426 ; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 | |
427 ; AVX2-NEXT: retq | |
428 ; | |
429 ; XOPAVX1-LABEL: splatvar_shift_v32i8: | |
430 ; XOPAVX1: # BB#0: | |
431 ; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 | |
432 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 | |
433 ; XOPAVX1-NEXT: vpsubb %xmm1, %xmm2, %xmm1 | |
434 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 | |
435 ; XOPAVX1-NEXT: vpshlb %xmm1, %xmm2, %xmm2 | |
436 ; XOPAVX1-NEXT: vpshlb %xmm1, %xmm0, %xmm0 | |
437 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 | |
438 ; XOPAVX1-NEXT: retq | |
439 ; | |
440 ; XOPAVX2-LABEL: splatvar_shift_v32i8: | |
441 ; XOPAVX2: # BB#0: | |
442 ; XOPAVX2-NEXT: vpbroadcastb %xmm1, %ymm1 | |
443 ; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 | |
444 ; XOPAVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 | |
445 ; XOPAVX2-NEXT: vpsubb %xmm2, %xmm3, %xmm2 | |
446 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm4 | |
447 ; XOPAVX2-NEXT: vpshlb %xmm2, %xmm4, %xmm2 | |
448 ; XOPAVX2-NEXT: vpsubb %xmm1, %xmm3, %xmm1 | |
449 ; XOPAVX2-NEXT: vpshlb %xmm1, %xmm0, %xmm0 | |
450 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 | |
451 ; XOPAVX2-NEXT: retq | |
452 %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer | |
453 %shift = lshr <32 x i8> %a, %splat | |
454 ret <32 x i8> %shift | |
455 } | |
456 | |
457 ; | |
458 ; Constant Shifts | |
459 ; | |
460 | |
461 define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) nounwind { | |
462 ; AVX1-LABEL: constant_shift_v4i64: | |
463 ; AVX1: # BB#0: | |
464 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 | |
465 ; AVX1-NEXT: vpsrlq $62, %xmm1, %xmm2 | |
466 ; AVX1-NEXT: vpsrlq $31, %xmm1, %xmm1 | |
467 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] | |
468 ; AVX1-NEXT: vpsrlq $7, %xmm0, %xmm2 | |
469 ; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm0 | |
470 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] | |
471 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 | |
472 ; AVX1-NEXT: retq | |
473 ; | |
474 ; AVX2-LABEL: constant_shift_v4i64: | |
475 ; AVX2: # BB#0: | |
476 ; AVX2-NEXT: vpsrlvq {{.*}}(%rip), %ymm0, %ymm0 | |
477 ; AVX2-NEXT: retq | |
478 ; | |
479 ; XOPAVX1-LABEL: constant_shift_v4i64: | |
480 ; XOPAVX1: # BB#0: | |
481 ; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 | |
482 ; XOPAVX1-NEXT: vpsubq {{.*}}(%rip), %xmm1, %xmm2 | |
483 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 | |
484 ; XOPAVX1-NEXT: vpshlq %xmm2, %xmm3, %xmm2 | |
485 ; XOPAVX1-NEXT: vpsubq {{.*}}(%rip), %xmm1, %xmm1 | |
486 ; XOPAVX1-NEXT: vpshlq %xmm1, %xmm0, %xmm0 | |
487 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 | |
488 ; XOPAVX1-NEXT: retq | |
489 ; | |
490 ; XOPAVX2-LABEL: constant_shift_v4i64: | |
491 ; XOPAVX2: # BB#0: | |
492 ; XOPAVX2-NEXT: vpsrlvq {{.*}}(%rip), %ymm0, %ymm0 | |
493 ; XOPAVX2-NEXT: retq | |
494 %shift = lshr <4 x i64> %a, <i64 1, i64 7, i64 31, i64 62> | |
495 ret <4 x i64> %shift | |
496 } | |
497 | |
498 define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) nounwind { | |
499 ; AVX1-LABEL: constant_shift_v8i32: | |
500 ; AVX1: # BB#0: | |
501 ; AVX1-NEXT: vpsrld $7, %xmm0, %xmm1 | |
502 ; AVX1-NEXT: vpsrld $5, %xmm0, %xmm2 | |
503 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] | |
504 ; AVX1-NEXT: vpsrld $6, %xmm0, %xmm2 | |
505 ; AVX1-NEXT: vpsrld $4, %xmm0, %xmm3 | |
506 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7] | |
507 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] | |
508 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 | |
509 ; AVX1-NEXT: vpsrld $7, %xmm0, %xmm2 | |
510 ; AVX1-NEXT: vpsrld $9, %xmm0, %xmm3 | |
511 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7] | |
512 ; AVX1-NEXT: vpsrld $8, %xmm0, %xmm0 | |
513 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] | |
514 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 | |
515 ; AVX1-NEXT: retq | |
516 ; | |
517 ; AVX2-LABEL: constant_shift_v8i32: | |
518 ; AVX2: # BB#0: | |
519 ; AVX2-NEXT: vpsrlvd {{.*}}(%rip), %ymm0, %ymm0 | |
520 ; AVX2-NEXT: retq | |
521 ; | |
522 ; XOPAVX1-LABEL: constant_shift_v8i32: | |
523 ; XOPAVX1: # BB#0: | |
524 ; XOPAVX1-NEXT: vpshld {{.*}}(%rip), %xmm0, %xmm1 | |
525 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 | |
526 ; XOPAVX1-NEXT: vpshld {{.*}}(%rip), %xmm0, %xmm0 | |
527 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 | |
528 ; XOPAVX1-NEXT: retq | |
529 ; | |
530 ; XOPAVX2-LABEL: constant_shift_v8i32: | |
531 ; XOPAVX2: # BB#0: | |
532 ; XOPAVX2-NEXT: vpsrlvd {{.*}}(%rip), %ymm0, %ymm0 | |
533 ; XOPAVX2-NEXT: retq | |
534 %shift = lshr <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7> | |
535 ret <8 x i32> %shift | |
536 } | |
537 | |
538 define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind { | |
539 ; AVX1-LABEL: constant_shift_v16i16: | |
540 ; AVX1: # BB#0: | |
541 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 | |
542 ; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm2 | |
543 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [32896,37008,41120,45232,49344,53456,57568,61680] | |
544 ; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm1, %xmm1 | |
545 ; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm2 | |
546 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [256,8480,16704,24928,33152,41376,49600,57824] | |
547 ; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm1, %xmm1 | |
548 ; AVX1-NEXT: vpsrlw $2, %xmm1, %xmm2 | |
549 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [512,16960,33408,49856,768,17216,33664,50112] | |
550 ; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm1, %xmm1 | |
551 ; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm2 | |
552 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1024,33920,1280,34176,1536,34432,1792,34688] | |
553 ; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm1, %xmm1 | |
554 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 | |
555 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,4112,8224,12336,16448,20560,24672,28784] | |
556 ; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm0, %xmm0 | |
557 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm2 | |
558 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,8224,16448,24672,32896,41120,49344,57568] | |
559 ; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm0, %xmm0 | |
560 ; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm2 | |
561 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,16448,32896,49344,256,16704,33152,49600] | |
562 ; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm0, %xmm0 | |
563 ; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm2 | |
564 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,32896,256,33152,512,33408,768,33664] | |
565 ; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm0, %xmm0 | |
566 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 | |
567 ; AVX1-NEXT: retq | |
568 ; | |
569 ; AVX2-LABEL: constant_shift_v16i16: | |
570 ; AVX2: # BB#0: | |
571 ; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1 | |
572 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] | |
573 ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm2[4],ymm1[4],ymm2[5],ymm1[5],ymm2[6],ymm1[6],ymm2[7],ymm1[7],ymm2[12],ymm1[12],ymm2[13],ymm1[13],ymm2[14],ymm1[14],ymm2[15],ymm1[15] | |
574 ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15] | |
575 ; AVX2-NEXT: vpsrlvd %ymm3, %ymm4, %ymm3 | |
576 ; AVX2-NEXT: vpsrld $16, %ymm3, %ymm3 | |
577 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm2[0],ymm1[0],ymm2[1],ymm1[1],ymm2[2],ymm1[2],ymm2[3],ymm1[3],ymm2[8],ymm1[8],ymm2[9],ymm1[9],ymm2[10],ymm1[10],ymm2[11],ymm1[11] | |
578 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11] | |
579 ; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 | |
580 ; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0 | |
581 ; AVX2-NEXT: vpackusdw %ymm3, %ymm0, %ymm0 | |
582 ; AVX2-NEXT: retq | |
583 ; | |
584 ; XOPAVX1-LABEL: constant_shift_v16i16: | |
585 ; XOPAVX1: # BB#0: | |
586 ; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 | |
587 ; XOPAVX1-NEXT: vpsubw {{.*}}(%rip), %xmm1, %xmm2 | |
588 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 | |
589 ; XOPAVX1-NEXT: vpshlw %xmm2, %xmm3, %xmm2 | |
590 ; XOPAVX1-NEXT: vpsubw {{.*}}(%rip), %xmm1, %xmm1 | |
591 ; XOPAVX1-NEXT: vpshlw %xmm1, %xmm0, %xmm0 | |
592 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 | |
593 ; XOPAVX1-NEXT: retq | |
594 ; | |
595 ; XOPAVX2-LABEL: constant_shift_v16i16: | |
596 ; XOPAVX2: # BB#0: | |
597 ; XOPAVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 | |
598 ; XOPAVX2-NEXT: vpsubw {{.*}}(%rip), %xmm1, %xmm2 | |
599 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3 | |
600 ; XOPAVX2-NEXT: vpshlw %xmm2, %xmm3, %xmm2 | |
601 ; XOPAVX2-NEXT: vpsubw {{.*}}(%rip), %xmm1, %xmm1 | |
602 ; XOPAVX2-NEXT: vpshlw %xmm1, %xmm0, %xmm0 | |
603 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 | |
604 ; XOPAVX2-NEXT: retq | |
605 %shift = lshr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15> | |
606 ret <16 x i16> %shift | |
607 } | |
608 | |
609 define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind { | |
610 ; AVX1-LABEL: constant_shift_v32i8: | |
611 ; AVX1: # BB#0: | |
612 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 | |
613 ; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm2 | |
614 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm8 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] | |
615 ; AVX1-NEXT: vpand %xmm8, %xmm2, %xmm2 | |
616 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] | |
617 ; AVX1-NEXT: vpsllw $5, %xmm4, %xmm4 | |
618 ; AVX1-NEXT: vpblendvb %xmm4, %xmm2, %xmm1, %xmm1 | |
619 ; AVX1-NEXT: vpsrlw $2, %xmm1, %xmm2 | |
620 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63] | |
621 ; AVX1-NEXT: vpand %xmm5, %xmm2, %xmm2 | |
622 ; AVX1-NEXT: vpaddb %xmm4, %xmm4, %xmm6 | |
623 ; AVX1-NEXT: vpblendvb %xmm6, %xmm2, %xmm1, %xmm1 | |
624 ; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm2 | |
625 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm7 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] | |
626 ; AVX1-NEXT: vpand %xmm7, %xmm2, %xmm2 | |
627 ; AVX1-NEXT: vpaddb %xmm6, %xmm6, %xmm3 | |
628 ; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm1, %xmm1 | |
629 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm2 | |
630 ; AVX1-NEXT: vpand %xmm8, %xmm2, %xmm2 | |
631 ; AVX1-NEXT: vpblendvb %xmm4, %xmm2, %xmm0, %xmm0 | |
632 ; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm2 | |
633 ; AVX1-NEXT: vpand %xmm5, %xmm2, %xmm2 | |
634 ; AVX1-NEXT: vpblendvb %xmm6, %xmm2, %xmm0, %xmm0 | |
635 ; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm2 | |
636 ; AVX1-NEXT: vpand %xmm7, %xmm2, %xmm2 | |
637 ; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm0, %xmm0 | |
638 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 | |
639 ; AVX1-NEXT: retq | |
640 ; | |
641 ; AVX2-LABEL: constant_shift_v32i8: | |
642 ; AVX2: # BB#0: | |
643 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] | |
644 ; AVX2-NEXT: vpsllw $5, %ymm1, %ymm1 | |
645 ; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm2 | |
646 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 | |
647 ; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 | |
648 ; AVX2-NEXT: vpsrlw $2, %ymm0, %ymm2 | |
649 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 | |
650 ; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1 | |
651 ; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 | |
652 ; AVX2-NEXT: vpsrlw $1, %ymm0, %ymm2 | |
653 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 | |
654 ; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1 | |
655 ; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 | |
656 ; AVX2-NEXT: retq | |
657 ; | |
658 ; XOPAVX1-LABEL: constant_shift_v32i8: | |
659 ; XOPAVX1: # BB#0: | |
660 ; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 | |
661 ; XOPAVX1-NEXT: vpsubb {{.*}}(%rip), %xmm1, %xmm1 | |
662 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 | |
663 ; XOPAVX1-NEXT: vpshlb %xmm1, %xmm2, %xmm2 | |
664 ; XOPAVX1-NEXT: vpshlb %xmm1, %xmm0, %xmm0 | |
665 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 | |
666 ; XOPAVX1-NEXT: retq | |
667 ; | |
668 ; XOPAVX2-LABEL: constant_shift_v32i8: | |
669 ; XOPAVX2: # BB#0: | |
670 ; XOPAVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 | |
671 ; XOPAVX2-NEXT: vpsubb {{.*}}(%rip), %xmm1, %xmm1 | |
672 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 | |
673 ; XOPAVX2-NEXT: vpshlb %xmm1, %xmm2, %xmm2 | |
674 ; XOPAVX2-NEXT: vpshlb %xmm1, %xmm0, %xmm0 | |
675 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 | |
676 ; XOPAVX2-NEXT: retq | |
677 %shift = lshr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0> | |
678 ret <32 x i8> %shift | |
679 } | |
680 | |
681 ; | |
682 ; Uniform Constant Shifts | |
683 ; | |
684 | |
685 define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) nounwind { | |
686 ; AVX1-LABEL: splatconstant_shift_v4i64: | |
687 ; AVX1: # BB#0: | |
688 ; AVX1-NEXT: vpsrlq $7, %xmm0, %xmm1 | |
689 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 | |
690 ; AVX1-NEXT: vpsrlq $7, %xmm0, %xmm0 | |
691 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 | |
692 ; AVX1-NEXT: retq | |
693 ; | |
694 ; AVX2-LABEL: splatconstant_shift_v4i64: | |
695 ; AVX2: # BB#0: | |
696 ; AVX2-NEXT: vpsrlq $7, %ymm0, %ymm0 | |
697 ; AVX2-NEXT: retq | |
698 ; | |
699 ; XOPAVX1-LABEL: splatconstant_shift_v4i64: | |
700 ; XOPAVX1: # BB#0: | |
701 ; XOPAVX1-NEXT: vpsrlq $7, %xmm0, %xmm1 | |
702 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 | |
703 ; XOPAVX1-NEXT: vpsrlq $7, %xmm0, %xmm0 | |
704 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 | |
705 ; XOPAVX1-NEXT: retq | |
706 ; | |
707 ; XOPAVX2-LABEL: splatconstant_shift_v4i64: | |
708 ; XOPAVX2: # BB#0: | |
709 ; XOPAVX2-NEXT: vpsrlq $7, %ymm0, %ymm0 | |
710 ; XOPAVX2-NEXT: retq | |
711 %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7> | |
712 ret <4 x i64> %shift | |
713 } | |
714 | |
715 define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) nounwind { | |
716 ; AVX1-LABEL: splatconstant_shift_v8i32: | |
717 ; AVX1: # BB#0: | |
718 ; AVX1-NEXT: vpsrld $5, %xmm0, %xmm1 | |
719 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 | |
720 ; AVX1-NEXT: vpsrld $5, %xmm0, %xmm0 | |
721 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 | |
722 ; AVX1-NEXT: retq | |
723 ; | |
724 ; AVX2-LABEL: splatconstant_shift_v8i32: | |
725 ; AVX2: # BB#0: | |
726 ; AVX2-NEXT: vpsrld $5, %ymm0, %ymm0 | |
727 ; AVX2-NEXT: retq | |
728 ; | |
729 ; XOPAVX1-LABEL: splatconstant_shift_v8i32: | |
730 ; XOPAVX1: # BB#0: | |
731 ; XOPAVX1-NEXT: vpsrld $5, %xmm0, %xmm1 | |
732 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 | |
733 ; XOPAVX1-NEXT: vpsrld $5, %xmm0, %xmm0 | |
734 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 | |
735 ; XOPAVX1-NEXT: retq | |
736 ; | |
737 ; XOPAVX2-LABEL: splatconstant_shift_v8i32: | |
738 ; XOPAVX2: # BB#0: | |
739 ; XOPAVX2-NEXT: vpsrld $5, %ymm0, %ymm0 | |
740 ; XOPAVX2-NEXT: retq | |
741 %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> | |
742 ret <8 x i32> %shift | |
743 } | |
744 | |
745 define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) nounwind { | |
746 ; AVX1-LABEL: splatconstant_shift_v16i16: | |
747 ; AVX1: # BB#0: | |
748 ; AVX1-NEXT: vpsrlw $3, %xmm0, %xmm1 | |
749 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 | |
750 ; AVX1-NEXT: vpsrlw $3, %xmm0, %xmm0 | |
751 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 | |
752 ; AVX1-NEXT: retq | |
753 ; | |
754 ; AVX2-LABEL: splatconstant_shift_v16i16: | |
755 ; AVX2: # BB#0: | |
756 ; AVX2-NEXT: vpsrlw $3, %ymm0, %ymm0 | |
757 ; AVX2-NEXT: retq | |
758 ; | |
759 ; XOPAVX1-LABEL: splatconstant_shift_v16i16: | |
760 ; XOPAVX1: # BB#0: | |
761 ; XOPAVX1-NEXT: vpsrlw $3, %xmm0, %xmm1 | |
762 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 | |
763 ; XOPAVX1-NEXT: vpsrlw $3, %xmm0, %xmm0 | |
764 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 | |
765 ; XOPAVX1-NEXT: retq | |
766 ; | |
767 ; XOPAVX2-LABEL: splatconstant_shift_v16i16: | |
768 ; XOPAVX2: # BB#0: | |
769 ; XOPAVX2-NEXT: vpsrlw $3, %ymm0, %ymm0 | |
770 ; XOPAVX2-NEXT: retq | |
771 %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> | |
772 ret <16 x i16> %shift | |
773 } | |
774 | |
775 define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) nounwind { | |
776 ; AVX1-LABEL: splatconstant_shift_v32i8: | |
777 ; AVX1: # BB#0: | |
778 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 | |
779 ; AVX1-NEXT: vpsrlw $3, %xmm1, %xmm1 | |
780 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31] | |
781 ; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 | |
782 ; AVX1-NEXT: vpsrlw $3, %xmm0, %xmm0 | |
783 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 | |
784 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 | |
785 ; AVX1-NEXT: retq | |
786 ; | |
787 ; AVX2-LABEL: splatconstant_shift_v32i8: | |
788 ; AVX2: # BB#0: | |
789 ; AVX2-NEXT: vpsrlw $3, %ymm0, %ymm0 | |
790 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 | |
791 ; AVX2-NEXT: retq | |
792 ; | |
793 ; XOPAVX1-LABEL: splatconstant_shift_v32i8: | |
794 ; XOPAVX1: # BB#0: | |
795 ; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 | |
796 ; XOPAVX1-NEXT: vpsubb {{.*}}(%rip), %xmm1, %xmm1 | |
797 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 | |
798 ; XOPAVX1-NEXT: vpshlb %xmm1, %xmm2, %xmm2 | |
799 ; XOPAVX1-NEXT: vpshlb %xmm1, %xmm0, %xmm0 | |
800 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 | |
801 ; XOPAVX1-NEXT: retq | |
802 ; | |
803 ; XOPAVX2-LABEL: splatconstant_shift_v32i8: | |
804 ; XOPAVX2: # BB#0: | |
805 ; XOPAVX2-NEXT: vpsrlw $3, %ymm0, %ymm0 | |
806 ; XOPAVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 | |
807 ; XOPAVX2-NEXT: retq | |
808 %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> | |
809 ret <32 x i8> %shift | |
810 } |