comparison test/CodeGen/X86/scalar_widen_div.ll @ 134:3a76565eade5 LLVM5.0.1

update 5.0.1
author mir3636
date Sat, 17 Feb 2018 09:57:20 +0900
parents 803732b1fca8
children c2174574ed3a
comparison
equal deleted inserted replaced
133:c60214abe0e8 134:3a76565eade5
1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.2 | FileCheck %s 2 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.2 | FileCheck %s
2 3
3 ; Verify when widening a divide/remainder operation, we only generate a 4 ; Verify when widening a divide/remainder operation, we only generate a
4 ; divide/rem per element since divide/remainder can trap. 5 ; divide/rem per element since divide/remainder can trap.
5 6
6 ; CHECK: vectorDiv 7 ; CHECK: vectorDiv
7 define void @vectorDiv (<2 x i32> addrspace(1)* %nsource, <2 x i32> addrspace(1)* %dsource, <2 x i32> addrspace(1)* %qdest) nounwind { 8 define void @vectorDiv (<2 x i32> addrspace(1)* %nsource, <2 x i32> addrspace(1)* %dsource, <2 x i32> addrspace(1)* %qdest) nounwind {
8 ; CHECK: idivq 9 ; CHECK-LABEL: vectorDiv:
9 ; CHECK: idivq 10 ; CHECK: # %bb.0: # %entry
10 ; CHECK-NOT: idivl 11 ; CHECK-NEXT: movq %rdx, %r8
11 ; CHECK: ret 12 ; CHECK-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
13 ; CHECK-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
14 ; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
15 ; CHECK-NEXT: movslq -{{[0-9]+}}(%rsp), %rcx
16 ; CHECK-NEXT: pmovsxdq (%rdi,%rcx,8), %xmm0
17 ; CHECK-NEXT: pmovsxdq (%rsi,%rcx,8), %xmm1
18 ; CHECK-NEXT: pextrq $1, %xmm0, %rax
19 ; CHECK-NEXT: pextrq $1, %xmm1, %rsi
20 ; CHECK-NEXT: cqto
21 ; CHECK-NEXT: idivq %rsi
22 ; CHECK-NEXT: movq %rax, %xmm2
23 ; CHECK-NEXT: movq %xmm0, %rax
24 ; CHECK-NEXT: movq %xmm1, %rsi
25 ; CHECK-NEXT: cqto
26 ; CHECK-NEXT: idivq %rsi
27 ; CHECK-NEXT: movq %rax, %xmm0
28 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
29 ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
30 ; CHECK-NEXT: movq %xmm0, (%r8,%rcx,8)
31 ; CHECK-NEXT: retq
12 entry: 32 entry:
13 %nsource.addr = alloca <2 x i32> addrspace(1)*, align 4 33 %nsource.addr = alloca <2 x i32> addrspace(1)*, align 4
14 %dsource.addr = alloca <2 x i32> addrspace(1)*, align 4 34 %dsource.addr = alloca <2 x i32> addrspace(1)*, align 4
15 %qdest.addr = alloca <2 x i32> addrspace(1)*, align 4 35 %qdest.addr = alloca <2 x i32> addrspace(1)*, align 4
16 %index = alloca i32, align 4 36 %index = alloca i32, align 4
33 ret void 53 ret void
34 } 54 }
35 55
36 ; CHECK: test_char_div 56 ; CHECK: test_char_div
37 define <3 x i8> @test_char_div(<3 x i8> %num, <3 x i8> %div) { 57 define <3 x i8> @test_char_div(<3 x i8> %num, <3 x i8> %div) {
38 ; CHECK: idivb 58 ; CHECK-LABEL: test_char_div:
39 ; CHECK: idivb 59 ; CHECK: # %bb.0:
40 ; CHECK: idivb 60 ; CHECK-NEXT: movl %edi, %eax
41 ; CHECK-NOT: idivb 61 ; CHECK-NEXT: cbtw
42 ; CHECK: ret 62 ; CHECK-NEXT: idivb %cl
63 ; CHECK-NEXT: movl %eax, %edi
64 ; CHECK-NEXT: movl %esi, %eax
65 ; CHECK-NEXT: cbtw
66 ; CHECK-NEXT: idivb %r8b
67 ; CHECK-NEXT: movl %eax, %esi
68 ; CHECK-NEXT: movl %edx, %eax
69 ; CHECK-NEXT: cbtw
70 ; CHECK-NEXT: idivb %r9b
71 ; CHECK-NEXT: movl %eax, %ecx
72 ; CHECK-NEXT: movl %edi, %eax
73 ; CHECK-NEXT: movl %esi, %edx
74 ; CHECK-NEXT: retq
43 %div.r = sdiv <3 x i8> %num, %div 75 %div.r = sdiv <3 x i8> %num, %div
44 ret <3 x i8> %div.r 76 ret <3 x i8> %div.r
45 } 77 }
46 78
47 ; CHECK: test_uchar_div 79 ; CHECK: test_uchar_div
48 define <3 x i8> @test_uchar_div(<3 x i8> %num, <3 x i8> %div) { 80 define <3 x i8> @test_uchar_div(<3 x i8> %num, <3 x i8> %div) {
49 ; CHECK: divb 81 ; CHECK-LABEL: test_uchar_div:
50 ; CHECK: divb 82 ; CHECK: # %bb.0:
51 ; CHECK: divb 83 ; CHECK-NEXT: movzbl %dil, %eax
52 ; CHECK-NOT: divb 84 ; CHECK-NEXT: # kill: def $eax killed $eax def $ax
53 ; CHECK: ret 85 ; CHECK-NEXT: divb %cl
86 ; CHECK-NEXT: movl %eax, %edi
87 ; CHECK-NEXT: movzbl %sil, %eax
88 ; CHECK-NEXT: # kill: def $eax killed $eax def $ax
89 ; CHECK-NEXT: divb %r8b
90 ; CHECK-NEXT: movl %eax, %esi
91 ; CHECK-NEXT: movzbl %dl, %eax
92 ; CHECK-NEXT: # kill: def $eax killed $eax def $ax
93 ; CHECK-NEXT: divb %r9b
94 ; CHECK-NEXT: movl %eax, %ecx
95 ; CHECK-NEXT: movl %edi, %eax
96 ; CHECK-NEXT: movl %esi, %edx
97 ; CHECK-NEXT: retq
54 %div.r = udiv <3 x i8> %num, %div 98 %div.r = udiv <3 x i8> %num, %div
55 ret <3 x i8> %div.r 99 ret <3 x i8> %div.r
56 } 100 }
57 101
58 ; CHECK: test_short_div 102 ; CHECK: test_short_div
59 define <5 x i16> @test_short_div(<5 x i16> %num, <5 x i16> %div) { 103 define <5 x i16> @test_short_div(<5 x i16> %num, <5 x i16> %div) {
60 ; CHECK: idivw 104 ; CHECK-LABEL: test_short_div:
61 ; CHECK: idivw 105 ; CHECK: # %bb.0:
62 ; CHECK: idivw 106 ; CHECK-NEXT: pextrw $4, %xmm0, %eax
63 ; CHECK: idivw 107 ; CHECK-NEXT: pextrw $4, %xmm1, %ecx
64 ; CHECK: idivw 108 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
65 ; CHECK-NOT: idivw 109 ; CHECK-NEXT: cwtd
66 ; CHECK: ret 110 ; CHECK-NEXT: idivw %cx
111 ; CHECK-NEXT: movl %eax, %r8d
112 ; CHECK-NEXT: pextrw $3, %xmm0, %eax
113 ; CHECK-NEXT: pextrw $3, %xmm1, %ecx
114 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
115 ; CHECK-NEXT: cwtd
116 ; CHECK-NEXT: idivw %cx
117 ; CHECK-NEXT: movl %eax, %r9d
118 ; CHECK-NEXT: pextrw $2, %xmm0, %eax
119 ; CHECK-NEXT: pextrw $2, %xmm1, %ecx
120 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
121 ; CHECK-NEXT: cwtd
122 ; CHECK-NEXT: idivw %cx
123 ; CHECK-NEXT: movl %eax, %edi
124 ; CHECK-NEXT: movd %xmm0, %eax
125 ; CHECK-NEXT: movd %xmm1, %ecx
126 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
127 ; CHECK-NEXT: cwtd
128 ; CHECK-NEXT: idivw %cx
129 ; CHECK-NEXT: movl %eax, %ecx
130 ; CHECK-NEXT: pextrw $1, %xmm0, %eax
131 ; CHECK-NEXT: pextrw $1, %xmm1, %esi
132 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
133 ; CHECK-NEXT: cwtd
134 ; CHECK-NEXT: idivw %si
135 ; CHECK-NEXT: # kill: def $ax killed $ax def $eax
136 ; CHECK-NEXT: movd %ecx, %xmm0
137 ; CHECK-NEXT: pinsrw $1, %eax, %xmm0
138 ; CHECK-NEXT: pinsrw $2, %edi, %xmm0
139 ; CHECK-NEXT: pinsrw $3, %r9d, %xmm0
140 ; CHECK-NEXT: pinsrw $4, %r8d, %xmm0
141 ; CHECK-NEXT: retq
67 %div.r = sdiv <5 x i16> %num, %div 142 %div.r = sdiv <5 x i16> %num, %div
68 ret <5 x i16> %div.r 143 ret <5 x i16> %div.r
69 } 144 }
70 145
71 ; CHECK: test_ushort_div 146 ; CHECK: test_ushort_div
72 define <4 x i16> @test_ushort_div(<4 x i16> %num, <4 x i16> %div) { 147 define <4 x i16> @test_ushort_div(<4 x i16> %num, <4 x i16> %div) {
73 ; CHECK: divl 148 ; CHECK-LABEL: test_ushort_div:
74 ; CHECK: divl 149 ; CHECK: # %bb.0:
75 ; CHECK: divl 150 ; CHECK-NEXT: pxor %xmm2, %xmm2
76 ; CHECK: divl 151 ; CHECK-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
77 ; CHECK-NOT: divl 152 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
78 ; CHECK: ret 153 ; CHECK-NEXT: pextrd $1, %xmm0, %eax
154 ; CHECK-NEXT: pextrd $1, %xmm1, %ecx
155 ; CHECK-NEXT: xorl %edx, %edx
156 ; CHECK-NEXT: divl %ecx
157 ; CHECK-NEXT: movl %eax, %ecx
158 ; CHECK-NEXT: movd %xmm0, %eax
159 ; CHECK-NEXT: movd %xmm1, %esi
160 ; CHECK-NEXT: xorl %edx, %edx
161 ; CHECK-NEXT: divl %esi
162 ; CHECK-NEXT: movd %eax, %xmm2
163 ; CHECK-NEXT: pinsrd $1, %ecx, %xmm2
164 ; CHECK-NEXT: pextrd $2, %xmm0, %eax
165 ; CHECK-NEXT: pextrd $2, %xmm1, %ecx
166 ; CHECK-NEXT: xorl %edx, %edx
167 ; CHECK-NEXT: divl %ecx
168 ; CHECK-NEXT: pinsrd $2, %eax, %xmm2
169 ; CHECK-NEXT: pextrd $3, %xmm0, %eax
170 ; CHECK-NEXT: pextrd $3, %xmm1, %ecx
171 ; CHECK-NEXT: xorl %edx, %edx
172 ; CHECK-NEXT: divl %ecx
173 ; CHECK-NEXT: pinsrd $3, %eax, %xmm2
174 ; CHECK-NEXT: movdqa %xmm2, %xmm0
175 ; CHECK-NEXT: retq
79 %div.r = udiv <4 x i16> %num, %div 176 %div.r = udiv <4 x i16> %num, %div
80 ret <4 x i16> %div.r 177 ret <4 x i16> %div.r
81 } 178 }
82 179
83 ; CHECK: test_uint_div 180 ; CHECK: test_uint_div
84 define <3 x i32> @test_uint_div(<3 x i32> %num, <3 x i32> %div) { 181 define <3 x i32> @test_uint_div(<3 x i32> %num, <3 x i32> %div) {
85 ; CHECK: divl 182 ; CHECK-LABEL: test_uint_div:
86 ; CHECK: divl 183 ; CHECK: # %bb.0:
87 ; CHECK: divl 184 ; CHECK-NEXT: pextrd $2, %xmm0, %eax
88 ; CHECK-NOT: divl 185 ; CHECK-NEXT: pextrd $2, %xmm1, %ecx
89 ; CHECK: ret 186 ; CHECK-NEXT: xorl %edx, %edx
187 ; CHECK-NEXT: divl %ecx
188 ; CHECK-NEXT: movl %eax, %ecx
189 ; CHECK-NEXT: pextrd $1, %xmm0, %eax
190 ; CHECK-NEXT: pextrd $1, %xmm1, %esi
191 ; CHECK-NEXT: xorl %edx, %edx
192 ; CHECK-NEXT: divl %esi
193 ; CHECK-NEXT: movl %eax, %esi
194 ; CHECK-NEXT: movd %xmm0, %eax
195 ; CHECK-NEXT: movd %xmm1, %edi
196 ; CHECK-NEXT: xorl %edx, %edx
197 ; CHECK-NEXT: divl %edi
198 ; CHECK-NEXT: movd %eax, %xmm0
199 ; CHECK-NEXT: pinsrd $1, %esi, %xmm0
200 ; CHECK-NEXT: pinsrd $2, %ecx, %xmm0
201 ; CHECK-NEXT: retq
90 %div.r = udiv <3 x i32> %num, %div 202 %div.r = udiv <3 x i32> %num, %div
91 ret <3 x i32> %div.r 203 ret <3 x i32> %div.r
92 } 204 }
93 205
94 ; CHECK: test_long_div 206 ; CHECK: test_long_div
95 define <3 x i64> @test_long_div(<3 x i64> %num, <3 x i64> %div) { 207 define <3 x i64> @test_long_div(<3 x i64> %num, <3 x i64> %div) {
96 ; CHECK: idivq 208 ; CHECK-LABEL: test_long_div:
97 ; CHECK: idivq 209 ; CHECK: # %bb.0:
98 ; CHECK: idivq 210 ; CHECK-NEXT: movq %rdx, %r10
99 ; CHECK-NOT: idivq 211 ; CHECK-NEXT: movq %rdi, %rax
100 ; CHECK: ret 212 ; CHECK-NEXT: cqto
213 ; CHECK-NEXT: idivq %rcx
214 ; CHECK-NEXT: movq %rax, %rcx
215 ; CHECK-NEXT: movq %rsi, %rax
216 ; CHECK-NEXT: cqto
217 ; CHECK-NEXT: idivq %r8
218 ; CHECK-NEXT: movq %rax, %rsi
219 ; CHECK-NEXT: movq %r10, %rax
220 ; CHECK-NEXT: cqto
221 ; CHECK-NEXT: idivq %r9
222 ; CHECK-NEXT: movq %rax, %rdi
223 ; CHECK-NEXT: movq %rcx, %rax
224 ; CHECK-NEXT: movq %rsi, %rdx
225 ; CHECK-NEXT: movq %rdi, %rcx
226 ; CHECK-NEXT: retq
101 %div.r = sdiv <3 x i64> %num, %div 227 %div.r = sdiv <3 x i64> %num, %div
102 ret <3 x i64> %div.r 228 ret <3 x i64> %div.r
103 } 229 }
104 230
105 ; CHECK: test_ulong_div 231 ; CHECK: test_ulong_div
106 define <3 x i64> @test_ulong_div(<3 x i64> %num, <3 x i64> %div) { 232 define <3 x i64> @test_ulong_div(<3 x i64> %num, <3 x i64> %div) {
107 ; CHECK: divq 233 ; CHECK-LABEL: test_ulong_div:
108 ; CHECK: divq 234 ; CHECK: # %bb.0:
109 ; CHECK: divq 235 ; CHECK-NEXT: movq %rdx, %r10
110 ; CHECK-NOT: divq 236 ; CHECK-NEXT: xorl %edx, %edx
111 ; CHECK: ret 237 ; CHECK-NEXT: movq %rdi, %rax
238 ; CHECK-NEXT: divq %rcx
239 ; CHECK-NEXT: movq %rax, %rcx
240 ; CHECK-NEXT: xorl %edx, %edx
241 ; CHECK-NEXT: movq %rsi, %rax
242 ; CHECK-NEXT: divq %r8
243 ; CHECK-NEXT: movq %rax, %rsi
244 ; CHECK-NEXT: xorl %edx, %edx
245 ; CHECK-NEXT: movq %r10, %rax
246 ; CHECK-NEXT: divq %r9
247 ; CHECK-NEXT: movq %rax, %rdi
248 ; CHECK-NEXT: movq %rcx, %rax
249 ; CHECK-NEXT: movq %rsi, %rdx
250 ; CHECK-NEXT: movq %rdi, %rcx
251 ; CHECK-NEXT: retq
112 %div.r = udiv <3 x i64> %num, %div 252 %div.r = udiv <3 x i64> %num, %div
113 ret <3 x i64> %div.r 253 ret <3 x i64> %div.r
114 } 254 }
115 255
116 ; CHECK: test_char_rem 256 ; CHECK: test_char_rem
117 define <4 x i8> @test_char_rem(<4 x i8> %num, <4 x i8> %rem) { 257 define <4 x i8> @test_char_rem(<4 x i8> %num, <4 x i8> %rem) {
118 ; CHECK: idivl 258 ; CHECK-LABEL: test_char_rem:
119 ; CHECK: idivl 259 ; CHECK: # %bb.0:
120 ; CHECK: idivl 260 ; CHECK-NEXT: pslld $24, %xmm1
121 ; CHECK: idivl 261 ; CHECK-NEXT: psrad $24, %xmm1
122 ; CHECK-NOT: idivl 262 ; CHECK-NEXT: pslld $24, %xmm0
123 ; CHECK: ret 263 ; CHECK-NEXT: psrad $24, %xmm0
264 ; CHECK-NEXT: pextrd $1, %xmm0, %eax
265 ; CHECK-NEXT: pextrd $1, %xmm1, %ecx
266 ; CHECK-NEXT: cltd
267 ; CHECK-NEXT: idivl %ecx
268 ; CHECK-NEXT: movl %edx, %ecx
269 ; CHECK-NEXT: movd %xmm0, %eax
270 ; CHECK-NEXT: movd %xmm1, %esi
271 ; CHECK-NEXT: cltd
272 ; CHECK-NEXT: idivl %esi
273 ; CHECK-NEXT: movd %edx, %xmm2
274 ; CHECK-NEXT: pinsrd $1, %ecx, %xmm2
275 ; CHECK-NEXT: pextrd $2, %xmm0, %eax
276 ; CHECK-NEXT: pextrd $2, %xmm1, %ecx
277 ; CHECK-NEXT: cltd
278 ; CHECK-NEXT: idivl %ecx
279 ; CHECK-NEXT: pinsrd $2, %edx, %xmm2
280 ; CHECK-NEXT: pextrd $3, %xmm0, %eax
281 ; CHECK-NEXT: pextrd $3, %xmm1, %ecx
282 ; CHECK-NEXT: cltd
283 ; CHECK-NEXT: idivl %ecx
284 ; CHECK-NEXT: pinsrd $3, %edx, %xmm2
285 ; CHECK-NEXT: movdqa %xmm2, %xmm0
286 ; CHECK-NEXT: retq
124 %rem.r = srem <4 x i8> %num, %rem 287 %rem.r = srem <4 x i8> %num, %rem
125 ret <4 x i8> %rem.r 288 ret <4 x i8> %rem.r
126 } 289 }
127 290
128 ; CHECK: test_short_rem 291 ; CHECK: test_short_rem
129 define <5 x i16> @test_short_rem(<5 x i16> %num, <5 x i16> %rem) { 292 define <5 x i16> @test_short_rem(<5 x i16> %num, <5 x i16> %rem) {
130 ; CHECK: idivw 293 ; CHECK-LABEL: test_short_rem:
131 ; CHECK: idivw 294 ; CHECK: # %bb.0:
132 ; CHECK: idivw 295 ; CHECK-NEXT: pextrw $4, %xmm0, %eax
133 ; CHECK: idivw 296 ; CHECK-NEXT: pextrw $4, %xmm1, %ecx
134 ; CHECK: idivw 297 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
135 ; CHECK-NOT: idivw 298 ; CHECK-NEXT: cwtd
136 ; CHECK: ret 299 ; CHECK-NEXT: idivw %cx
300 ; CHECK-NEXT: movl %edx, %r8d
301 ; CHECK-NEXT: pextrw $3, %xmm0, %eax
302 ; CHECK-NEXT: pextrw $3, %xmm1, %ecx
303 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
304 ; CHECK-NEXT: cwtd
305 ; CHECK-NEXT: idivw %cx
306 ; CHECK-NEXT: movl %edx, %r9d
307 ; CHECK-NEXT: pextrw $2, %xmm0, %eax
308 ; CHECK-NEXT: pextrw $2, %xmm1, %ecx
309 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
310 ; CHECK-NEXT: cwtd
311 ; CHECK-NEXT: idivw %cx
312 ; CHECK-NEXT: movl %edx, %edi
313 ; CHECK-NEXT: movd %xmm0, %eax
314 ; CHECK-NEXT: movd %xmm1, %ecx
315 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
316 ; CHECK-NEXT: cwtd
317 ; CHECK-NEXT: idivw %cx
318 ; CHECK-NEXT: movl %edx, %ecx
319 ; CHECK-NEXT: pextrw $1, %xmm0, %eax
320 ; CHECK-NEXT: pextrw $1, %xmm1, %esi
321 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
322 ; CHECK-NEXT: cwtd
323 ; CHECK-NEXT: idivw %si
324 ; CHECK-NEXT: # kill: def $dx killed $dx def $edx
325 ; CHECK-NEXT: movd %ecx, %xmm0
326 ; CHECK-NEXT: pinsrw $1, %edx, %xmm0
327 ; CHECK-NEXT: pinsrw $2, %edi, %xmm0
328 ; CHECK-NEXT: pinsrw $3, %r9d, %xmm0
329 ; CHECK-NEXT: pinsrw $4, %r8d, %xmm0
330 ; CHECK-NEXT: retq
137 %rem.r = srem <5 x i16> %num, %rem 331 %rem.r = srem <5 x i16> %num, %rem
138 ret <5 x i16> %rem.r 332 ret <5 x i16> %rem.r
139 } 333 }
140 334
141 ; CHECK: test_uint_rem 335 ; CHECK: test_uint_rem
142 define <4 x i32> @test_uint_rem(<4 x i32> %num, <4 x i32> %rem) { 336 define <4 x i32> @test_uint_rem(<4 x i32> %num, <4 x i32> %rem) {
143 ; CHECK: idivl 337 ; CHECK-LABEL: test_uint_rem:
144 ; CHECK: idivl 338 ; CHECK: # %bb.0:
145 ; CHECK: idivl 339 ; CHECK-NEXT: pextrd $1, %xmm0, %eax
146 ; CHECK: idivl 340 ; CHECK-NEXT: pextrd $1, %xmm1, %ecx
147 ; CHECK-NOT: idivl 341 ; CHECK-NEXT: cltd
148 ; CHECK: ret 342 ; CHECK-NEXT: idivl %ecx
343 ; CHECK-NEXT: movl %edx, %ecx
344 ; CHECK-NEXT: movd %xmm0, %eax
345 ; CHECK-NEXT: movd %xmm1, %esi
346 ; CHECK-NEXT: cltd
347 ; CHECK-NEXT: idivl %esi
348 ; CHECK-NEXT: movd %edx, %xmm2
349 ; CHECK-NEXT: pinsrd $1, %ecx, %xmm2
350 ; CHECK-NEXT: pextrd $2, %xmm0, %eax
351 ; CHECK-NEXT: pextrd $2, %xmm1, %ecx
352 ; CHECK-NEXT: cltd
353 ; CHECK-NEXT: idivl %ecx
354 ; CHECK-NEXT: pinsrd $2, %edx, %xmm2
355 ; CHECK-NEXT: pextrd $3, %xmm0, %eax
356 ; CHECK-NEXT: pextrd $3, %xmm1, %ecx
357 ; CHECK-NEXT: cltd
358 ; CHECK-NEXT: idivl %ecx
359 ; CHECK-NEXT: pinsrd $3, %edx, %xmm2
360 ; CHECK-NEXT: movdqa %xmm2, %xmm0
361 ; CHECK-NEXT: retq
149 %rem.r = srem <4 x i32> %num, %rem 362 %rem.r = srem <4 x i32> %num, %rem
150 ret <4 x i32> %rem.r 363 ret <4 x i32> %rem.r
151 } 364 }
152 365
153 366
154 ; CHECK: test_ulong_rem 367 ; CHECK: test_ulong_rem
155 define <5 x i64> @test_ulong_rem(<5 x i64> %num, <5 x i64> %rem) { 368 define <5 x i64> @test_ulong_rem(<5 x i64> %num, <5 x i64> %rem) {
156 ; CHECK: divq 369 ; CHECK-LABEL: test_ulong_rem:
157 ; CHECK: divq 370 ; CHECK: # %bb.0:
158 ; CHECK: divq 371 ; CHECK-NEXT: movq %rdx, %rax
159 ; CHECK: divq 372 ; CHECK-NEXT: xorl %edx, %edx
160 ; CHECK: divq 373 ; CHECK-NEXT: divq {{[0-9]+}}(%rsp)
161 ; CHECK-NOT: divq 374 ; CHECK-NEXT: movq %rdx, %xmm0
162 ; CHECK: ret 375 ; CHECK-NEXT: xorl %edx, %edx
376 ; CHECK-NEXT: movq %rsi, %rax
377 ; CHECK-NEXT: divq {{[0-9]+}}(%rsp)
378 ; CHECK-NEXT: movq %rdx, %xmm1
379 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
380 ; CHECK-NEXT: xorl %edx, %edx
381 ; CHECK-NEXT: movq %r8, %rax
382 ; CHECK-NEXT: divq {{[0-9]+}}(%rsp)
383 ; CHECK-NEXT: movq %rdx, %xmm0
384 ; CHECK-NEXT: xorl %edx, %edx
385 ; CHECK-NEXT: movq %rcx, %rax
386 ; CHECK-NEXT: divq {{[0-9]+}}(%rsp)
387 ; CHECK-NEXT: movq %rdx, %xmm2
388 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
389 ; CHECK-NEXT: xorl %edx, %edx
390 ; CHECK-NEXT: movq %r9, %rax
391 ; CHECK-NEXT: divq {{[0-9]+}}(%rsp)
392 ; CHECK-NEXT: movq %rdx, 32(%rdi)
393 ; CHECK-NEXT: movdqa %xmm2, 16(%rdi)
394 ; CHECK-NEXT: movdqa %xmm1, (%rdi)
395 ; CHECK-NEXT: movq %rdi, %rax
396 ; CHECK-NEXT: retq
163 %rem.r = urem <5 x i64> %num, %rem 397 %rem.r = urem <5 x i64> %num, %rem
164 ret <5 x i64> %rem.r 398 ret <5 x i64> %rem.r
165 } 399 }
166 400
167 ; CHECK: test_int_div 401 ; CHECK: test_int_div
168 define void @test_int_div(<3 x i32>* %dest, <3 x i32>* %old, i32 %n) { 402 define void @test_int_div(<3 x i32>* %dest, <3 x i32>* %old, i32 %n) {
169 ; CHECK: idivl 403 ; CHECK-LABEL: test_int_div:
170 ; CHECK: idivl 404 ; CHECK: # %bb.0: # %entry
171 ; CHECK: idivl 405 ; CHECK-NEXT: movl %edx, %r9d
172 ; CHECK-NOT: idivl 406 ; CHECK-NEXT: testl %edx, %edx
173 ; CHECK: ret 407 ; CHECK-NEXT: jle .LBB12_3
408 ; CHECK-NEXT: # %bb.1: # %bb.nph
409 ; CHECK-NEXT: xorl %ecx, %ecx
410 ; CHECK-NEXT: .p2align 4, 0x90
411 ; CHECK-NEXT: .LBB12_2: # %for.body
412 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
413 ; CHECK-NEXT: movdqa (%rdi,%rcx), %xmm0
414 ; CHECK-NEXT: movdqa (%rsi,%rcx), %xmm1
415 ; CHECK-NEXT: pextrd $1, %xmm0, %eax
416 ; CHECK-NEXT: pextrd $1, %xmm1, %r8d
417 ; CHECK-NEXT: cltd
418 ; CHECK-NEXT: idivl %r8d
419 ; CHECK-NEXT: movl %eax, %r8d
420 ; CHECK-NEXT: movd %xmm0, %eax
421 ; CHECK-NEXT: movd %xmm1, %r10d
422 ; CHECK-NEXT: cltd
423 ; CHECK-NEXT: idivl %r10d
424 ; CHECK-NEXT: movd %eax, %xmm2
425 ; CHECK-NEXT: pinsrd $1, %r8d, %xmm2
426 ; CHECK-NEXT: pextrd $2, %xmm0, %eax
427 ; CHECK-NEXT: pextrd $2, %xmm1, %r8d
428 ; CHECK-NEXT: cltd
429 ; CHECK-NEXT: idivl %r8d
430 ; CHECK-NEXT: pinsrd $2, %eax, %xmm2
431 ; CHECK-NEXT: movl %eax, 8(%rdi,%rcx)
432 ; CHECK-NEXT: movq %xmm2, (%rdi,%rcx)
433 ; CHECK-NEXT: addq $16, %rcx
434 ; CHECK-NEXT: decl %r9d
435 ; CHECK-NEXT: jne .LBB12_2
436 ; CHECK-NEXT: .LBB12_3: # %for.end
437 ; CHECK-NEXT: retq
174 entry: 438 entry:
175 %cmp13 = icmp sgt i32 %n, 0 439 %cmp13 = icmp sgt i32 %n, 0
176 br i1 %cmp13, label %bb.nph, label %for.end 440 br i1 %cmp13, label %bb.nph, label %for.end
177 441
178 bb.nph: 442 bb.nph:
179 br label %for.body 443 br label %for.body
180 444
181 for.body: 445 for.body:
182 %i.014 = phi i32 [ 0, %bb.nph ], [ %inc, %for.body ] 446 %i.014 = phi i32 [ 0, %bb.nph ], [ %inc, %for.body ]
183 %arrayidx11 = getelementptr <3 x i32>, <3 x i32>* %dest, i32 %i.014 447 %arrayidx11 = getelementptr <3 x i32>, <3 x i32>* %dest, i32 %i.014
184 %tmp4 = load <3 x i32>, <3 x i32>* %arrayidx11 ; <<3 x i32>> [#uses=1] 448 %tmp4 = load <3 x i32>, <3 x i32>* %arrayidx11 ; <<3 x i32>> [#uses=1]
185 %arrayidx7 = getelementptr inbounds <3 x i32>, <3 x i32>* %old, i32 %i.014 449 %arrayidx7 = getelementptr inbounds <3 x i32>, <3 x i32>* %old, i32 %i.014
186 %tmp8 = load <3 x i32>, <3 x i32>* %arrayidx7 ; <<3 x i32>> [#uses=1] 450 %tmp8 = load <3 x i32>, <3 x i32>* %arrayidx7 ; <<3 x i32>> [#uses=1]
187 %div = sdiv <3 x i32> %tmp4, %tmp8 451 %div = sdiv <3 x i32> %tmp4, %tmp8
188 store <3 x i32> %div, <3 x i32>* %arrayidx11 452 store <3 x i32> %div, <3 x i32>* %arrayidx11
189 %inc = add nsw i32 %i.014, 1 453 %inc = add nsw i32 %i.014, 1
190 %exitcond = icmp eq i32 %inc, %n 454 %exitcond = icmp eq i32 %inc, %n
191 br i1 %exitcond, label %for.end, label %for.body 455 br i1 %exitcond, label %for.end, label %for.body
192 456
193 for.end: ; preds = %for.body, %entry 457 for.end: ; preds = %for.body, %entry
194 ret void 458 ret void
195 } 459 }