comparison test/CodeGen/X86/vec_sdiv_to_shift.ll @ 120:1172e4bd9c6f

update 4.0.0
author mir3636
date Fri, 25 Nov 2016 19:14:25 +0900
parents afa8332a0e37
children 803732b1fca8
comparison
equal deleted inserted replaced
101:34baf5011add 120:1172e4bd9c6f
1 ; RUN: llc < %s -march=x86-64 -mcpu=penryn -mattr=+avx2 | FileCheck %s 1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 2 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
4 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
3 5
4 define <8 x i16> @sdiv_vec8x16(<8 x i16> %var) { 6 define <8 x i16> @sdiv_vec8x16(<8 x i16> %var) {
5 entry: 7 ; SSE-LABEL: sdiv_vec8x16:
6 ; CHECK: sdiv_vec8x16 8 ; SSE: # BB#0: # %entry
7 ; CHECK: psraw $15 9 ; SSE-NEXT: movdqa %xmm0, %xmm1
8 ; CHECK: vpsrlw $11 10 ; SSE-NEXT: psraw $15, %xmm1
9 ; CHECK: vpaddw 11 ; SSE-NEXT: psrlw $11, %xmm1
10 ; CHECK: vpsraw $5 12 ; SSE-NEXT: paddw %xmm0, %xmm1
11 ; CHECK: ret 13 ; SSE-NEXT: psraw $5, %xmm1
14 ; SSE-NEXT: movdqa %xmm1, %xmm0
15 ; SSE-NEXT: retq
16 ;
17 ; AVX-LABEL: sdiv_vec8x16:
18 ; AVX: # BB#0: # %entry
19 ; AVX-NEXT: vpsraw $15, %xmm0, %xmm1
20 ; AVX-NEXT: vpsrlw $11, %xmm1, %xmm1
21 ; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0
22 ; AVX-NEXT: vpsraw $5, %xmm0, %xmm0
23 ; AVX-NEXT: retq
24 entry:
12 %0 = sdiv <8 x i16> %var, <i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32> 25 %0 = sdiv <8 x i16> %var, <i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32>
13 ret <8 x i16> %0 26 ret <8 x i16> %0
14 } 27 }
15 28
16 define <8 x i16> @sdiv_vec8x16_minsize(<8 x i16> %var) minsize { 29 define <8 x i16> @sdiv_vec8x16_minsize(<8 x i16> %var) minsize {
17 entry: 30 ; SSE-LABEL: sdiv_vec8x16_minsize:
18 ; CHECK: sdiv_vec8x16_minsize 31 ; SSE: # BB#0: # %entry
19 ; CHECK: psraw $15 32 ; SSE-NEXT: movdqa %xmm0, %xmm1
20 ; CHECK: vpsrlw $11 33 ; SSE-NEXT: psraw $15, %xmm1
21 ; CHECK: vpaddw 34 ; SSE-NEXT: psrlw $11, %xmm1
22 ; CHECK: vpsraw $5 35 ; SSE-NEXT: paddw %xmm0, %xmm1
23 ; CHECK: ret 36 ; SSE-NEXT: psraw $5, %xmm1
37 ; SSE-NEXT: movdqa %xmm1, %xmm0
38 ; SSE-NEXT: retq
39 ;
40 ; AVX-LABEL: sdiv_vec8x16_minsize:
41 ; AVX: # BB#0: # %entry
42 ; AVX-NEXT: vpsraw $15, %xmm0, %xmm1
43 ; AVX-NEXT: vpsrlw $11, %xmm1, %xmm1
44 ; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0
45 ; AVX-NEXT: vpsraw $5, %xmm0, %xmm0
46 ; AVX-NEXT: retq
47 entry:
24 %0 = sdiv <8 x i16> %var, <i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32> 48 %0 = sdiv <8 x i16> %var, <i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32>
25 ret <8 x i16> %0 49 ret <8 x i16> %0
26 } 50 }
27 51
28
29 define <4 x i32> @sdiv_zero(<4 x i32> %var) { 52 define <4 x i32> @sdiv_zero(<4 x i32> %var) {
30 entry: 53 ; SSE-LABEL: sdiv_zero:
31 ; CHECK: sdiv_zero 54 ; SSE: # BB#0: # %entry
32 ; CHECK-NOT: sra 55 ; SSE-NEXT: pextrd $1, %xmm0, %eax
33 ; CHECK: ret 56 ; SSE-NEXT: xorl %esi, %esi
57 ; SSE-NEXT: cltd
58 ; SSE-NEXT: idivl %esi
59 ; SSE-NEXT: movl %eax, %ecx
60 ; SSE-NEXT: movd %xmm0, %eax
61 ; SSE-NEXT: cltd
62 ; SSE-NEXT: idivl %esi
63 ; SSE-NEXT: movd %eax, %xmm1
64 ; SSE-NEXT: pinsrd $1, %ecx, %xmm1
65 ; SSE-NEXT: pextrd $2, %xmm0, %eax
66 ; SSE-NEXT: cltd
67 ; SSE-NEXT: idivl %esi
68 ; SSE-NEXT: pinsrd $2, %eax, %xmm1
69 ; SSE-NEXT: pextrd $3, %xmm0, %eax
70 ; SSE-NEXT: cltd
71 ; SSE-NEXT: idivl %esi
72 ; SSE-NEXT: pinsrd $3, %eax, %xmm1
73 ; SSE-NEXT: movdqa %xmm1, %xmm0
74 ; SSE-NEXT: retq
75 ;
76 ; AVX-LABEL: sdiv_zero:
77 ; AVX: # BB#0: # %entry
78 ; AVX-NEXT: vpextrd $1, %xmm0, %eax
79 ; AVX-NEXT: xorl %esi, %esi
80 ; AVX-NEXT: cltd
81 ; AVX-NEXT: idivl %esi
82 ; AVX-NEXT: movl %eax, %ecx
83 ; AVX-NEXT: vmovd %xmm0, %eax
84 ; AVX-NEXT: cltd
85 ; AVX-NEXT: idivl %esi
86 ; AVX-NEXT: vmovd %eax, %xmm1
87 ; AVX-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1
88 ; AVX-NEXT: vpextrd $2, %xmm0, %eax
89 ; AVX-NEXT: cltd
90 ; AVX-NEXT: idivl %esi
91 ; AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
92 ; AVX-NEXT: vpextrd $3, %xmm0, %eax
93 ; AVX-NEXT: cltd
94 ; AVX-NEXT: idivl %esi
95 ; AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
96 ; AVX-NEXT: retq
97 entry:
34 %0 = sdiv <4 x i32> %var, <i32 0, i32 0, i32 0, i32 0> 98 %0 = sdiv <4 x i32> %var, <i32 0, i32 0, i32 0, i32 0>
35 ret <4 x i32> %0 99 ret <4 x i32> %0
36 } 100 }
37 101
38 define <4 x i32> @sdiv_vec4x32(<4 x i32> %var) { 102 define <4 x i32> @sdiv_vec4x32(<4 x i32> %var) {
39 entry: 103 ; SSE-LABEL: sdiv_vec4x32:
40 ; CHECK: sdiv_vec4x32 104 ; SSE: # BB#0: # %entry
41 ; CHECK: vpsrad $31 105 ; SSE-NEXT: movdqa %xmm0, %xmm1
42 ; CHECK: vpsrld $28 106 ; SSE-NEXT: psrad $31, %xmm1
43 ; CHECK: vpaddd 107 ; SSE-NEXT: psrld $28, %xmm1
44 ; CHECK: vpsrad $4 108 ; SSE-NEXT: paddd %xmm0, %xmm1
45 ; CHECK: ret 109 ; SSE-NEXT: psrad $4, %xmm1
110 ; SSE-NEXT: movdqa %xmm1, %xmm0
111 ; SSE-NEXT: retq
112 ;
113 ; AVX-LABEL: sdiv_vec4x32:
114 ; AVX: # BB#0: # %entry
115 ; AVX-NEXT: vpsrad $31, %xmm0, %xmm1
116 ; AVX-NEXT: vpsrld $28, %xmm1, %xmm1
117 ; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
118 ; AVX-NEXT: vpsrad $4, %xmm0, %xmm0
119 ; AVX-NEXT: retq
120 entry:
46 %0 = sdiv <4 x i32> %var, <i32 16, i32 16, i32 16, i32 16> 121 %0 = sdiv <4 x i32> %var, <i32 16, i32 16, i32 16, i32 16>
47 ret <4 x i32> %0 122 ret <4 x i32> %0
48 } 123 }
49 124
50 define <4 x i32> @sdiv_negative(<4 x i32> %var) { 125 define <4 x i32> @sdiv_negative(<4 x i32> %var) {
51 entry: 126 ; SSE-LABEL: sdiv_negative:
52 ; CHECK: sdiv_negative 127 ; SSE: # BB#0: # %entry
53 ; CHECK: vpsrad $31 128 ; SSE-NEXT: movdqa %xmm0, %xmm1
54 ; CHECK: vpsrld $28 129 ; SSE-NEXT: psrad $31, %xmm1
55 ; CHECK: vpaddd 130 ; SSE-NEXT: psrld $28, %xmm1
56 ; CHECK: vpsrad $4 131 ; SSE-NEXT: paddd %xmm0, %xmm1
57 ; CHECK: vpsubd 132 ; SSE-NEXT: psrad $4, %xmm1
58 ; CHECK: ret 133 ; SSE-NEXT: pxor %xmm0, %xmm0
134 ; SSE-NEXT: psubd %xmm1, %xmm0
135 ; SSE-NEXT: retq
136 ;
137 ; AVX-LABEL: sdiv_negative:
138 ; AVX: # BB#0: # %entry
139 ; AVX-NEXT: vpsrad $31, %xmm0, %xmm1
140 ; AVX-NEXT: vpsrld $28, %xmm1, %xmm1
141 ; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
142 ; AVX-NEXT: vpsrad $4, %xmm0, %xmm0
143 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
144 ; AVX-NEXT: vpsubd %xmm0, %xmm1, %xmm0
145 ; AVX-NEXT: retq
146 entry:
59 %0 = sdiv <4 x i32> %var, <i32 -16, i32 -16, i32 -16, i32 -16> 147 %0 = sdiv <4 x i32> %var, <i32 -16, i32 -16, i32 -16, i32 -16>
60 ret <4 x i32> %0 148 ret <4 x i32> %0
61 } 149 }
62 150
63 define <8 x i32> @sdiv8x32(<8 x i32> %var) { 151 define <8 x i32> @sdiv8x32(<8 x i32> %var) {
64 entry: 152 ; SSE-LABEL: sdiv8x32:
65 ; CHECK: sdiv8x32 153 ; SSE: # BB#0: # %entry
66 ; CHECK: vpsrad $31 154 ; SSE-NEXT: movdqa %xmm0, %xmm2
67 ; CHECK: vpsrld $26 155 ; SSE-NEXT: psrad $31, %xmm2
68 ; CHECK: vpaddd 156 ; SSE-NEXT: psrld $26, %xmm2
69 ; CHECK: vpsrad $6 157 ; SSE-NEXT: paddd %xmm0, %xmm2
70 ; CHECK: ret 158 ; SSE-NEXT: psrad $6, %xmm2
159 ; SSE-NEXT: movdqa %xmm1, %xmm3
160 ; SSE-NEXT: psrad $31, %xmm3
161 ; SSE-NEXT: psrld $26, %xmm3
162 ; SSE-NEXT: paddd %xmm1, %xmm3
163 ; SSE-NEXT: psrad $6, %xmm3
164 ; SSE-NEXT: movdqa %xmm2, %xmm0
165 ; SSE-NEXT: movdqa %xmm3, %xmm1
166 ; SSE-NEXT: retq
167 ;
168 ; AVX1-LABEL: sdiv8x32:
169 ; AVX1: # BB#0: # %entry
170 ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm1
171 ; AVX1-NEXT: vpsrld $26, %xmm1, %xmm1
172 ; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1
173 ; AVX1-NEXT: vpsrad $6, %xmm1, %xmm1
174 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
175 ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm2
176 ; AVX1-NEXT: vpsrld $26, %xmm2, %xmm2
177 ; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm0
178 ; AVX1-NEXT: vpsrad $6, %xmm0, %xmm0
179 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
180 ; AVX1-NEXT: retq
181 ;
182 ; AVX2-LABEL: sdiv8x32:
183 ; AVX2: # BB#0: # %entry
184 ; AVX2-NEXT: vpsrad $31, %ymm0, %ymm1
185 ; AVX2-NEXT: vpsrld $26, %ymm1, %ymm1
186 ; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
187 ; AVX2-NEXT: vpsrad $6, %ymm0, %ymm0
188 ; AVX2-NEXT: retq
189 entry:
71 %0 = sdiv <8 x i32> %var, <i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64> 190 %0 = sdiv <8 x i32> %var, <i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64>
72 ret <8 x i32> %0 191 ret <8 x i32> %0
73 } 192 }
74 193
75 define <16 x i16> @sdiv16x16(<16 x i16> %var) { 194 define <16 x i16> @sdiv16x16(<16 x i16> %var) {
76 entry: 195 ; SSE-LABEL: sdiv16x16:
77 ; CHECK: sdiv16x16 196 ; SSE: # BB#0: # %entry
78 ; CHECK: vpsraw $15 197 ; SSE-NEXT: movdqa %xmm0, %xmm2
79 ; CHECK: vpsrlw $14 198 ; SSE-NEXT: psraw $15, %xmm2
80 ; CHECK: vpaddw 199 ; SSE-NEXT: psrlw $14, %xmm2
81 ; CHECK: vpsraw $2 200 ; SSE-NEXT: paddw %xmm0, %xmm2
82 ; CHECK: ret 201 ; SSE-NEXT: psraw $2, %xmm2
202 ; SSE-NEXT: movdqa %xmm1, %xmm3
203 ; SSE-NEXT: psraw $15, %xmm3
204 ; SSE-NEXT: psrlw $14, %xmm3
205 ; SSE-NEXT: paddw %xmm1, %xmm3
206 ; SSE-NEXT: psraw $2, %xmm3
207 ; SSE-NEXT: movdqa %xmm2, %xmm0
208 ; SSE-NEXT: movdqa %xmm3, %xmm1
209 ; SSE-NEXT: retq
210 ;
211 ; AVX1-LABEL: sdiv16x16:
212 ; AVX1: # BB#0: # %entry
213 ; AVX1-NEXT: vpsraw $15, %xmm0, %xmm1
214 ; AVX1-NEXT: vpsrlw $14, %xmm1, %xmm1
215 ; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm1
216 ; AVX1-NEXT: vpsraw $2, %xmm1, %xmm1
217 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
218 ; AVX1-NEXT: vpsraw $15, %xmm0, %xmm2
219 ; AVX1-NEXT: vpsrlw $14, %xmm2, %xmm2
220 ; AVX1-NEXT: vpaddw %xmm2, %xmm0, %xmm0
221 ; AVX1-NEXT: vpsraw $2, %xmm0, %xmm0
222 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
223 ; AVX1-NEXT: retq
224 ;
225 ; AVX2-LABEL: sdiv16x16:
226 ; AVX2: # BB#0: # %entry
227 ; AVX2-NEXT: vpsraw $15, %ymm0, %ymm1
228 ; AVX2-NEXT: vpsrlw $14, %ymm1, %ymm1
229 ; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0
230 ; AVX2-NEXT: vpsraw $2, %ymm0, %ymm0
231 ; AVX2-NEXT: retq
232 entry:
83 %a0 = sdiv <16 x i16> %var, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4> 233 %a0 = sdiv <16 x i16> %var, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
84 ret <16 x i16> %a0 234 ret <16 x i16> %a0
85 } 235 }
86 236
87 ; CHECK: sdiv_non_splat
88 ; CHECK: idivl
89 ; CHECK: ret
90 define <4 x i32> @sdiv_non_splat(<4 x i32> %x) { 237 define <4 x i32> @sdiv_non_splat(<4 x i32> %x) {
238 ; SSE-LABEL: sdiv_non_splat:
239 ; SSE: # BB#0:
240 ; SSE-NEXT: pextrd $1, %xmm0, %eax
241 ; SSE-NEXT: xorl %ecx, %ecx
242 ; SSE-NEXT: cltd
243 ; SSE-NEXT: idivl %ecx
244 ; SSE-NEXT: movd %xmm0, %edx
245 ; SSE-NEXT: movl %edx, %esi
246 ; SSE-NEXT: shrl $31, %esi
247 ; SSE-NEXT: addl %edx, %esi
248 ; SSE-NEXT: sarl %esi
249 ; SSE-NEXT: movd %esi, %xmm1
250 ; SSE-NEXT: pinsrd $1, %eax, %xmm1
251 ; SSE-NEXT: pextrd $2, %xmm0, %eax
252 ; SSE-NEXT: cltd
253 ; SSE-NEXT: idivl %ecx
254 ; SSE-NEXT: pinsrd $2, %eax, %xmm1
255 ; SSE-NEXT: pextrd $3, %xmm0, %eax
256 ; SSE-NEXT: cltd
257 ; SSE-NEXT: idivl %ecx
258 ; SSE-NEXT: pinsrd $3, %eax, %xmm1
259 ; SSE-NEXT: movdqa %xmm1, %xmm0
260 ; SSE-NEXT: retq
261 ;
262 ; AVX-LABEL: sdiv_non_splat:
263 ; AVX: # BB#0:
264 ; AVX-NEXT: vpextrd $1, %xmm0, %eax
265 ; AVX-NEXT: xorl %ecx, %ecx
266 ; AVX-NEXT: cltd
267 ; AVX-NEXT: idivl %ecx
268 ; AVX-NEXT: vmovd %xmm0, %edx
269 ; AVX-NEXT: movl %edx, %esi
270 ; AVX-NEXT: shrl $31, %esi
271 ; AVX-NEXT: addl %edx, %esi
272 ; AVX-NEXT: sarl %esi
273 ; AVX-NEXT: vmovd %esi, %xmm1
274 ; AVX-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
275 ; AVX-NEXT: vpextrd $2, %xmm0, %eax
276 ; AVX-NEXT: cltd
277 ; AVX-NEXT: idivl %ecx
278 ; AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
279 ; AVX-NEXT: vpextrd $3, %xmm0, %eax
280 ; AVX-NEXT: cltd
281 ; AVX-NEXT: idivl %ecx
282 ; AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
283 ; AVX-NEXT: retq
91 %y = sdiv <4 x i32> %x, <i32 2, i32 0, i32 0, i32 0> 284 %y = sdiv <4 x i32> %x, <i32 2, i32 0, i32 0, i32 0>
92 ret <4 x i32> %y 285 ret <4 x i32> %y
93 } 286 }