Mercurial > hg > CbC > CbC_llvm
comparison test/CodeGen/X86/vec_sdiv_to_shift.ll @ 120:1172e4bd9c6f
update 4.0.0
author | mir3636 |
---|---|
date | Fri, 25 Nov 2016 19:14:25 +0900 |
parents | afa8332a0e37 |
children | 803732b1fca8 |
comparison
equal
deleted
inserted
replaced
101:34baf5011add | 120:1172e4bd9c6f |
---|---|
1 ; RUN: llc < %s -march=x86-64 -mcpu=penryn -mattr=+avx2 | FileCheck %s | 1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
2 | 2 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41 |
3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 | |
4 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 | |
3 | 5 |
4 define <8 x i16> @sdiv_vec8x16(<8 x i16> %var) { | 6 define <8 x i16> @sdiv_vec8x16(<8 x i16> %var) { |
5 entry: | 7 ; SSE-LABEL: sdiv_vec8x16: |
6 ; CHECK: sdiv_vec8x16 | 8 ; SSE: # BB#0: # %entry |
7 ; CHECK: psraw $15 | 9 ; SSE-NEXT: movdqa %xmm0, %xmm1 |
8 ; CHECK: vpsrlw $11 | 10 ; SSE-NEXT: psraw $15, %xmm1 |
9 ; CHECK: vpaddw | 11 ; SSE-NEXT: psrlw $11, %xmm1 |
10 ; CHECK: vpsraw $5 | 12 ; SSE-NEXT: paddw %xmm0, %xmm1 |
11 ; CHECK: ret | 13 ; SSE-NEXT: psraw $5, %xmm1 |
14 ; SSE-NEXT: movdqa %xmm1, %xmm0 | |
15 ; SSE-NEXT: retq | |
16 ; | |
17 ; AVX-LABEL: sdiv_vec8x16: | |
18 ; AVX: # BB#0: # %entry | |
19 ; AVX-NEXT: vpsraw $15, %xmm0, %xmm1 | |
20 ; AVX-NEXT: vpsrlw $11, %xmm1, %xmm1 | |
21 ; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 | |
22 ; AVX-NEXT: vpsraw $5, %xmm0, %xmm0 | |
23 ; AVX-NEXT: retq | |
24 entry: | |
12 %0 = sdiv <8 x i16> %var, <i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32> | 25 %0 = sdiv <8 x i16> %var, <i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32> |
13 ret <8 x i16> %0 | 26 ret <8 x i16> %0 |
14 } | 27 } |
15 | 28 |
16 define <8 x i16> @sdiv_vec8x16_minsize(<8 x i16> %var) minsize { | 29 define <8 x i16> @sdiv_vec8x16_minsize(<8 x i16> %var) minsize { |
17 entry: | 30 ; SSE-LABEL: sdiv_vec8x16_minsize: |
18 ; CHECK: sdiv_vec8x16_minsize | 31 ; SSE: # BB#0: # %entry |
19 ; CHECK: psraw $15 | 32 ; SSE-NEXT: movdqa %xmm0, %xmm1 |
20 ; CHECK: vpsrlw $11 | 33 ; SSE-NEXT: psraw $15, %xmm1 |
21 ; CHECK: vpaddw | 34 ; SSE-NEXT: psrlw $11, %xmm1 |
22 ; CHECK: vpsraw $5 | 35 ; SSE-NEXT: paddw %xmm0, %xmm1 |
23 ; CHECK: ret | 36 ; SSE-NEXT: psraw $5, %xmm1 |
37 ; SSE-NEXT: movdqa %xmm1, %xmm0 | |
38 ; SSE-NEXT: retq | |
39 ; | |
40 ; AVX-LABEL: sdiv_vec8x16_minsize: | |
41 ; AVX: # BB#0: # %entry | |
42 ; AVX-NEXT: vpsraw $15, %xmm0, %xmm1 | |
43 ; AVX-NEXT: vpsrlw $11, %xmm1, %xmm1 | |
44 ; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 | |
45 ; AVX-NEXT: vpsraw $5, %xmm0, %xmm0 | |
46 ; AVX-NEXT: retq | |
47 entry: | |
24 %0 = sdiv <8 x i16> %var, <i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32> | 48 %0 = sdiv <8 x i16> %var, <i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32> |
25 ret <8 x i16> %0 | 49 ret <8 x i16> %0 |
26 } | 50 } |
27 | 51 |
28 | |
29 define <4 x i32> @sdiv_zero(<4 x i32> %var) { | 52 define <4 x i32> @sdiv_zero(<4 x i32> %var) { |
30 entry: | 53 ; SSE-LABEL: sdiv_zero: |
31 ; CHECK: sdiv_zero | 54 ; SSE: # BB#0: # %entry |
32 ; CHECK-NOT: sra | 55 ; SSE-NEXT: pextrd $1, %xmm0, %eax |
33 ; CHECK: ret | 56 ; SSE-NEXT: xorl %esi, %esi |
57 ; SSE-NEXT: cltd | |
58 ; SSE-NEXT: idivl %esi | |
59 ; SSE-NEXT: movl %eax, %ecx | |
60 ; SSE-NEXT: movd %xmm0, %eax | |
61 ; SSE-NEXT: cltd | |
62 ; SSE-NEXT: idivl %esi | |
63 ; SSE-NEXT: movd %eax, %xmm1 | |
64 ; SSE-NEXT: pinsrd $1, %ecx, %xmm1 | |
65 ; SSE-NEXT: pextrd $2, %xmm0, %eax | |
66 ; SSE-NEXT: cltd | |
67 ; SSE-NEXT: idivl %esi | |
68 ; SSE-NEXT: pinsrd $2, %eax, %xmm1 | |
69 ; SSE-NEXT: pextrd $3, %xmm0, %eax | |
70 ; SSE-NEXT: cltd | |
71 ; SSE-NEXT: idivl %esi | |
72 ; SSE-NEXT: pinsrd $3, %eax, %xmm1 | |
73 ; SSE-NEXT: movdqa %xmm1, %xmm0 | |
74 ; SSE-NEXT: retq | |
75 ; | |
76 ; AVX-LABEL: sdiv_zero: | |
77 ; AVX: # BB#0: # %entry | |
78 ; AVX-NEXT: vpextrd $1, %xmm0, %eax | |
79 ; AVX-NEXT: xorl %esi, %esi | |
80 ; AVX-NEXT: cltd | |
81 ; AVX-NEXT: idivl %esi | |
82 ; AVX-NEXT: movl %eax, %ecx | |
83 ; AVX-NEXT: vmovd %xmm0, %eax | |
84 ; AVX-NEXT: cltd | |
85 ; AVX-NEXT: idivl %esi | |
86 ; AVX-NEXT: vmovd %eax, %xmm1 | |
87 ; AVX-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1 | |
88 ; AVX-NEXT: vpextrd $2, %xmm0, %eax | |
89 ; AVX-NEXT: cltd | |
90 ; AVX-NEXT: idivl %esi | |
91 ; AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 | |
92 ; AVX-NEXT: vpextrd $3, %xmm0, %eax | |
93 ; AVX-NEXT: cltd | |
94 ; AVX-NEXT: idivl %esi | |
95 ; AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0 | |
96 ; AVX-NEXT: retq | |
97 entry: | |
34 %0 = sdiv <4 x i32> %var, <i32 0, i32 0, i32 0, i32 0> | 98 %0 = sdiv <4 x i32> %var, <i32 0, i32 0, i32 0, i32 0> |
35 ret <4 x i32> %0 | 99 ret <4 x i32> %0 |
36 } | 100 } |
37 | 101 |
38 define <4 x i32> @sdiv_vec4x32(<4 x i32> %var) { | 102 define <4 x i32> @sdiv_vec4x32(<4 x i32> %var) { |
39 entry: | 103 ; SSE-LABEL: sdiv_vec4x32: |
40 ; CHECK: sdiv_vec4x32 | 104 ; SSE: # BB#0: # %entry |
41 ; CHECK: vpsrad $31 | 105 ; SSE-NEXT: movdqa %xmm0, %xmm1 |
42 ; CHECK: vpsrld $28 | 106 ; SSE-NEXT: psrad $31, %xmm1 |
43 ; CHECK: vpaddd | 107 ; SSE-NEXT: psrld $28, %xmm1 |
44 ; CHECK: vpsrad $4 | 108 ; SSE-NEXT: paddd %xmm0, %xmm1 |
45 ; CHECK: ret | 109 ; SSE-NEXT: psrad $4, %xmm1 |
110 ; SSE-NEXT: movdqa %xmm1, %xmm0 | |
111 ; SSE-NEXT: retq | |
112 ; | |
113 ; AVX-LABEL: sdiv_vec4x32: | |
114 ; AVX: # BB#0: # %entry | |
115 ; AVX-NEXT: vpsrad $31, %xmm0, %xmm1 | |
116 ; AVX-NEXT: vpsrld $28, %xmm1, %xmm1 | |
117 ; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 | |
118 ; AVX-NEXT: vpsrad $4, %xmm0, %xmm0 | |
119 ; AVX-NEXT: retq | |
120 entry: | |
46 %0 = sdiv <4 x i32> %var, <i32 16, i32 16, i32 16, i32 16> | 121 %0 = sdiv <4 x i32> %var, <i32 16, i32 16, i32 16, i32 16> |
47 ret <4 x i32> %0 | 122 ret <4 x i32> %0 |
48 } | 123 } |
49 | 124 |
50 define <4 x i32> @sdiv_negative(<4 x i32> %var) { | 125 define <4 x i32> @sdiv_negative(<4 x i32> %var) { |
51 entry: | 126 ; SSE-LABEL: sdiv_negative: |
52 ; CHECK: sdiv_negative | 127 ; SSE: # BB#0: # %entry |
53 ; CHECK: vpsrad $31 | 128 ; SSE-NEXT: movdqa %xmm0, %xmm1 |
54 ; CHECK: vpsrld $28 | 129 ; SSE-NEXT: psrad $31, %xmm1 |
55 ; CHECK: vpaddd | 130 ; SSE-NEXT: psrld $28, %xmm1 |
56 ; CHECK: vpsrad $4 | 131 ; SSE-NEXT: paddd %xmm0, %xmm1 |
57 ; CHECK: vpsubd | 132 ; SSE-NEXT: psrad $4, %xmm1 |
58 ; CHECK: ret | 133 ; SSE-NEXT: pxor %xmm0, %xmm0 |
134 ; SSE-NEXT: psubd %xmm1, %xmm0 | |
135 ; SSE-NEXT: retq | |
136 ; | |
137 ; AVX-LABEL: sdiv_negative: | |
138 ; AVX: # BB#0: # %entry | |
139 ; AVX-NEXT: vpsrad $31, %xmm0, %xmm1 | |
140 ; AVX-NEXT: vpsrld $28, %xmm1, %xmm1 | |
141 ; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 | |
142 ; AVX-NEXT: vpsrad $4, %xmm0, %xmm0 | |
143 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 | |
144 ; AVX-NEXT: vpsubd %xmm0, %xmm1, %xmm0 | |
145 ; AVX-NEXT: retq | |
146 entry: | |
59 %0 = sdiv <4 x i32> %var, <i32 -16, i32 -16, i32 -16, i32 -16> | 147 %0 = sdiv <4 x i32> %var, <i32 -16, i32 -16, i32 -16, i32 -16> |
60 ret <4 x i32> %0 | 148 ret <4 x i32> %0 |
61 } | 149 } |
62 | 150 |
63 define <8 x i32> @sdiv8x32(<8 x i32> %var) { | 151 define <8 x i32> @sdiv8x32(<8 x i32> %var) { |
64 entry: | 152 ; SSE-LABEL: sdiv8x32: |
65 ; CHECK: sdiv8x32 | 153 ; SSE: # BB#0: # %entry |
66 ; CHECK: vpsrad $31 | 154 ; SSE-NEXT: movdqa %xmm0, %xmm2 |
67 ; CHECK: vpsrld $26 | 155 ; SSE-NEXT: psrad $31, %xmm2 |
68 ; CHECK: vpaddd | 156 ; SSE-NEXT: psrld $26, %xmm2 |
69 ; CHECK: vpsrad $6 | 157 ; SSE-NEXT: paddd %xmm0, %xmm2 |
70 ; CHECK: ret | 158 ; SSE-NEXT: psrad $6, %xmm2 |
159 ; SSE-NEXT: movdqa %xmm1, %xmm3 | |
160 ; SSE-NEXT: psrad $31, %xmm3 | |
161 ; SSE-NEXT: psrld $26, %xmm3 | |
162 ; SSE-NEXT: paddd %xmm1, %xmm3 | |
163 ; SSE-NEXT: psrad $6, %xmm3 | |
164 ; SSE-NEXT: movdqa %xmm2, %xmm0 | |
165 ; SSE-NEXT: movdqa %xmm3, %xmm1 | |
166 ; SSE-NEXT: retq | |
167 ; | |
168 ; AVX1-LABEL: sdiv8x32: | |
169 ; AVX1: # BB#0: # %entry | |
170 ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm1 | |
171 ; AVX1-NEXT: vpsrld $26, %xmm1, %xmm1 | |
172 ; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1 | |
173 ; AVX1-NEXT: vpsrad $6, %xmm1, %xmm1 | |
174 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 | |
175 ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm2 | |
176 ; AVX1-NEXT: vpsrld $26, %xmm2, %xmm2 | |
177 ; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm0 | |
178 ; AVX1-NEXT: vpsrad $6, %xmm0, %xmm0 | |
179 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 | |
180 ; AVX1-NEXT: retq | |
181 ; | |
182 ; AVX2-LABEL: sdiv8x32: | |
183 ; AVX2: # BB#0: # %entry | |
184 ; AVX2-NEXT: vpsrad $31, %ymm0, %ymm1 | |
185 ; AVX2-NEXT: vpsrld $26, %ymm1, %ymm1 | |
186 ; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0 | |
187 ; AVX2-NEXT: vpsrad $6, %ymm0, %ymm0 | |
188 ; AVX2-NEXT: retq | |
189 entry: | |
71 %0 = sdiv <8 x i32> %var, <i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64> | 190 %0 = sdiv <8 x i32> %var, <i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64> |
72 ret <8 x i32> %0 | 191 ret <8 x i32> %0 |
73 } | 192 } |
74 | 193 |
75 define <16 x i16> @sdiv16x16(<16 x i16> %var) { | 194 define <16 x i16> @sdiv16x16(<16 x i16> %var) { |
76 entry: | 195 ; SSE-LABEL: sdiv16x16: |
77 ; CHECK: sdiv16x16 | 196 ; SSE: # BB#0: # %entry |
78 ; CHECK: vpsraw $15 | 197 ; SSE-NEXT: movdqa %xmm0, %xmm2 |
79 ; CHECK: vpsrlw $14 | 198 ; SSE-NEXT: psraw $15, %xmm2 |
80 ; CHECK: vpaddw | 199 ; SSE-NEXT: psrlw $14, %xmm2 |
81 ; CHECK: vpsraw $2 | 200 ; SSE-NEXT: paddw %xmm0, %xmm2 |
82 ; CHECK: ret | 201 ; SSE-NEXT: psraw $2, %xmm2 |
202 ; SSE-NEXT: movdqa %xmm1, %xmm3 | |
203 ; SSE-NEXT: psraw $15, %xmm3 | |
204 ; SSE-NEXT: psrlw $14, %xmm3 | |
205 ; SSE-NEXT: paddw %xmm1, %xmm3 | |
206 ; SSE-NEXT: psraw $2, %xmm3 | |
207 ; SSE-NEXT: movdqa %xmm2, %xmm0 | |
208 ; SSE-NEXT: movdqa %xmm3, %xmm1 | |
209 ; SSE-NEXT: retq | |
210 ; | |
211 ; AVX1-LABEL: sdiv16x16: | |
212 ; AVX1: # BB#0: # %entry | |
213 ; AVX1-NEXT: vpsraw $15, %xmm0, %xmm1 | |
214 ; AVX1-NEXT: vpsrlw $14, %xmm1, %xmm1 | |
215 ; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm1 | |
216 ; AVX1-NEXT: vpsraw $2, %xmm1, %xmm1 | |
217 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 | |
218 ; AVX1-NEXT: vpsraw $15, %xmm0, %xmm2 | |
219 ; AVX1-NEXT: vpsrlw $14, %xmm2, %xmm2 | |
220 ; AVX1-NEXT: vpaddw %xmm2, %xmm0, %xmm0 | |
221 ; AVX1-NEXT: vpsraw $2, %xmm0, %xmm0 | |
222 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 | |
223 ; AVX1-NEXT: retq | |
224 ; | |
225 ; AVX2-LABEL: sdiv16x16: | |
226 ; AVX2: # BB#0: # %entry | |
227 ; AVX2-NEXT: vpsraw $15, %ymm0, %ymm1 | |
228 ; AVX2-NEXT: vpsrlw $14, %ymm1, %ymm1 | |
229 ; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0 | |
230 ; AVX2-NEXT: vpsraw $2, %ymm0, %ymm0 | |
231 ; AVX2-NEXT: retq | |
232 entry: | |
83 %a0 = sdiv <16 x i16> %var, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4> | 233 %a0 = sdiv <16 x i16> %var, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4> |
84 ret <16 x i16> %a0 | 234 ret <16 x i16> %a0 |
85 } | 235 } |
86 | 236 |
87 ; CHECK: sdiv_non_splat | |
88 ; CHECK: idivl | |
89 ; CHECK: ret | |
90 define <4 x i32> @sdiv_non_splat(<4 x i32> %x) { | 237 define <4 x i32> @sdiv_non_splat(<4 x i32> %x) { |
238 ; SSE-LABEL: sdiv_non_splat: | |
239 ; SSE: # BB#0: | |
240 ; SSE-NEXT: pextrd $1, %xmm0, %eax | |
241 ; SSE-NEXT: xorl %ecx, %ecx | |
242 ; SSE-NEXT: cltd | |
243 ; SSE-NEXT: idivl %ecx | |
244 ; SSE-NEXT: movd %xmm0, %edx | |
245 ; SSE-NEXT: movl %edx, %esi | |
246 ; SSE-NEXT: shrl $31, %esi | |
247 ; SSE-NEXT: addl %edx, %esi | |
248 ; SSE-NEXT: sarl %esi | |
249 ; SSE-NEXT: movd %esi, %xmm1 | |
250 ; SSE-NEXT: pinsrd $1, %eax, %xmm1 | |
251 ; SSE-NEXT: pextrd $2, %xmm0, %eax | |
252 ; SSE-NEXT: cltd | |
253 ; SSE-NEXT: idivl %ecx | |
254 ; SSE-NEXT: pinsrd $2, %eax, %xmm1 | |
255 ; SSE-NEXT: pextrd $3, %xmm0, %eax | |
256 ; SSE-NEXT: cltd | |
257 ; SSE-NEXT: idivl %ecx | |
258 ; SSE-NEXT: pinsrd $3, %eax, %xmm1 | |
259 ; SSE-NEXT: movdqa %xmm1, %xmm0 | |
260 ; SSE-NEXT: retq | |
261 ; | |
262 ; AVX-LABEL: sdiv_non_splat: | |
263 ; AVX: # BB#0: | |
264 ; AVX-NEXT: vpextrd $1, %xmm0, %eax | |
265 ; AVX-NEXT: xorl %ecx, %ecx | |
266 ; AVX-NEXT: cltd | |
267 ; AVX-NEXT: idivl %ecx | |
268 ; AVX-NEXT: vmovd %xmm0, %edx | |
269 ; AVX-NEXT: movl %edx, %esi | |
270 ; AVX-NEXT: shrl $31, %esi | |
271 ; AVX-NEXT: addl %edx, %esi | |
272 ; AVX-NEXT: sarl %esi | |
273 ; AVX-NEXT: vmovd %esi, %xmm1 | |
274 ; AVX-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 | |
275 ; AVX-NEXT: vpextrd $2, %xmm0, %eax | |
276 ; AVX-NEXT: cltd | |
277 ; AVX-NEXT: idivl %ecx | |
278 ; AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 | |
279 ; AVX-NEXT: vpextrd $3, %xmm0, %eax | |
280 ; AVX-NEXT: cltd | |
281 ; AVX-NEXT: idivl %ecx | |
282 ; AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0 | |
283 ; AVX-NEXT: retq | |
91 %y = sdiv <4 x i32> %x, <i32 2, i32 0, i32 0, i32 0> | 284 %y = sdiv <4 x i32> %x, <i32 2, i32 0, i32 0, i32 0> |
92 ret <4 x i32> %y | 285 ret <4 x i32> %y |
93 } | 286 } |