Mercurial > hg > CbC > CbC_llvm
comparison test/CodeGen/X86/scalar_widen_div.ll @ 134:3a76565eade5 LLVM5.0.1
update 5.0.1
author | mir3636 |
---|---|
date | Sat, 17 Feb 2018 09:57:20 +0900 |
parents | 803732b1fca8 |
children | c2174574ed3a |
comparison
equal
deleted
inserted
replaced
133:c60214abe0e8 | 134:3a76565eade5 |
---|---|
1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | |
1 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.2 | FileCheck %s | 2 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.2 | FileCheck %s |
2 | 3 |
3 ; Verify when widening a divide/remainder operation, we only generate a | 4 ; Verify when widening a divide/remainder operation, we only generate a |
4 ; divide/rem per element since divide/remainder can trap. | 5 ; divide/rem per element since divide/remainder can trap. |
5 | 6 |
6 ; CHECK: vectorDiv | 7 ; CHECK: vectorDiv |
7 define void @vectorDiv (<2 x i32> addrspace(1)* %nsource, <2 x i32> addrspace(1)* %dsource, <2 x i32> addrspace(1)* %qdest) nounwind { | 8 define void @vectorDiv (<2 x i32> addrspace(1)* %nsource, <2 x i32> addrspace(1)* %dsource, <2 x i32> addrspace(1)* %qdest) nounwind { |
8 ; CHECK: idivq | 9 ; CHECK-LABEL: vectorDiv: |
9 ; CHECK: idivq | 10 ; CHECK: # %bb.0: # %entry |
10 ; CHECK-NOT: idivl | 11 ; CHECK-NEXT: movq %rdx, %r8 |
11 ; CHECK: ret | 12 ; CHECK-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) |
13 ; CHECK-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) | |
14 ; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) | |
15 ; CHECK-NEXT: movslq -{{[0-9]+}}(%rsp), %rcx | |
16 ; CHECK-NEXT: pmovsxdq (%rdi,%rcx,8), %xmm0 | |
17 ; CHECK-NEXT: pmovsxdq (%rsi,%rcx,8), %xmm1 | |
18 ; CHECK-NEXT: pextrq $1, %xmm0, %rax | |
19 ; CHECK-NEXT: pextrq $1, %xmm1, %rsi | |
20 ; CHECK-NEXT: cqto | |
21 ; CHECK-NEXT: idivq %rsi | |
22 ; CHECK-NEXT: movq %rax, %xmm2 | |
23 ; CHECK-NEXT: movq %xmm0, %rax | |
24 ; CHECK-NEXT: movq %xmm1, %rsi | |
25 ; CHECK-NEXT: cqto | |
26 ; CHECK-NEXT: idivq %rsi | |
27 ; CHECK-NEXT: movq %rax, %xmm0 | |
28 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] | |
29 ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] | |
30 ; CHECK-NEXT: movq %xmm0, (%r8,%rcx,8) | |
31 ; CHECK-NEXT: retq | |
12 entry: | 32 entry: |
13 %nsource.addr = alloca <2 x i32> addrspace(1)*, align 4 | 33 %nsource.addr = alloca <2 x i32> addrspace(1)*, align 4 |
14 %dsource.addr = alloca <2 x i32> addrspace(1)*, align 4 | 34 %dsource.addr = alloca <2 x i32> addrspace(1)*, align 4 |
15 %qdest.addr = alloca <2 x i32> addrspace(1)*, align 4 | 35 %qdest.addr = alloca <2 x i32> addrspace(1)*, align 4 |
16 %index = alloca i32, align 4 | 36 %index = alloca i32, align 4 |
33 ret void | 53 ret void |
34 } | 54 } |
35 | 55 |
36 ; CHECK: test_char_div | 56 ; CHECK: test_char_div |
37 define <3 x i8> @test_char_div(<3 x i8> %num, <3 x i8> %div) { | 57 define <3 x i8> @test_char_div(<3 x i8> %num, <3 x i8> %div) { |
38 ; CHECK: idivb | 58 ; CHECK-LABEL: test_char_div: |
39 ; CHECK: idivb | 59 ; CHECK: # %bb.0: |
40 ; CHECK: idivb | 60 ; CHECK-NEXT: movl %edi, %eax |
41 ; CHECK-NOT: idivb | 61 ; CHECK-NEXT: cbtw |
42 ; CHECK: ret | 62 ; CHECK-NEXT: idivb %cl |
63 ; CHECK-NEXT: movl %eax, %edi | |
64 ; CHECK-NEXT: movl %esi, %eax | |
65 ; CHECK-NEXT: cbtw | |
66 ; CHECK-NEXT: idivb %r8b | |
67 ; CHECK-NEXT: movl %eax, %esi | |
68 ; CHECK-NEXT: movl %edx, %eax | |
69 ; CHECK-NEXT: cbtw | |
70 ; CHECK-NEXT: idivb %r9b | |
71 ; CHECK-NEXT: movl %eax, %ecx | |
72 ; CHECK-NEXT: movl %edi, %eax | |
73 ; CHECK-NEXT: movl %esi, %edx | |
74 ; CHECK-NEXT: retq | |
43 %div.r = sdiv <3 x i8> %num, %div | 75 %div.r = sdiv <3 x i8> %num, %div |
44 ret <3 x i8> %div.r | 76 ret <3 x i8> %div.r |
45 } | 77 } |
46 | 78 |
47 ; CHECK: test_uchar_div | 79 ; CHECK: test_uchar_div |
48 define <3 x i8> @test_uchar_div(<3 x i8> %num, <3 x i8> %div) { | 80 define <3 x i8> @test_uchar_div(<3 x i8> %num, <3 x i8> %div) { |
49 ; CHECK: divb | 81 ; CHECK-LABEL: test_uchar_div: |
50 ; CHECK: divb | 82 ; CHECK: # %bb.0: |
51 ; CHECK: divb | 83 ; CHECK-NEXT: movzbl %dil, %eax |
52 ; CHECK-NOT: divb | 84 ; CHECK-NEXT: # kill: def $eax killed $eax def $ax |
53 ; CHECK: ret | 85 ; CHECK-NEXT: divb %cl |
86 ; CHECK-NEXT: movl %eax, %edi | |
87 ; CHECK-NEXT: movzbl %sil, %eax | |
88 ; CHECK-NEXT: # kill: def $eax killed $eax def $ax | |
89 ; CHECK-NEXT: divb %r8b | |
90 ; CHECK-NEXT: movl %eax, %esi | |
91 ; CHECK-NEXT: movzbl %dl, %eax | |
92 ; CHECK-NEXT: # kill: def $eax killed $eax def $ax | |
93 ; CHECK-NEXT: divb %r9b | |
94 ; CHECK-NEXT: movl %eax, %ecx | |
95 ; CHECK-NEXT: movl %edi, %eax | |
96 ; CHECK-NEXT: movl %esi, %edx | |
97 ; CHECK-NEXT: retq | |
54 %div.r = udiv <3 x i8> %num, %div | 98 %div.r = udiv <3 x i8> %num, %div |
55 ret <3 x i8> %div.r | 99 ret <3 x i8> %div.r |
56 } | 100 } |
57 | 101 |
58 ; CHECK: test_short_div | 102 ; CHECK: test_short_div |
59 define <5 x i16> @test_short_div(<5 x i16> %num, <5 x i16> %div) { | 103 define <5 x i16> @test_short_div(<5 x i16> %num, <5 x i16> %div) { |
60 ; CHECK: idivw | 104 ; CHECK-LABEL: test_short_div: |
61 ; CHECK: idivw | 105 ; CHECK: # %bb.0: |
62 ; CHECK: idivw | 106 ; CHECK-NEXT: pextrw $4, %xmm0, %eax |
63 ; CHECK: idivw | 107 ; CHECK-NEXT: pextrw $4, %xmm1, %ecx |
64 ; CHECK: idivw | 108 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax |
65 ; CHECK-NOT: idivw | 109 ; CHECK-NEXT: cwtd |
66 ; CHECK: ret | 110 ; CHECK-NEXT: idivw %cx |
111 ; CHECK-NEXT: movl %eax, %r8d | |
112 ; CHECK-NEXT: pextrw $3, %xmm0, %eax | |
113 ; CHECK-NEXT: pextrw $3, %xmm1, %ecx | |
114 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax | |
115 ; CHECK-NEXT: cwtd | |
116 ; CHECK-NEXT: idivw %cx | |
117 ; CHECK-NEXT: movl %eax, %r9d | |
118 ; CHECK-NEXT: pextrw $2, %xmm0, %eax | |
119 ; CHECK-NEXT: pextrw $2, %xmm1, %ecx | |
120 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax | |
121 ; CHECK-NEXT: cwtd | |
122 ; CHECK-NEXT: idivw %cx | |
123 ; CHECK-NEXT: movl %eax, %edi | |
124 ; CHECK-NEXT: movd %xmm0, %eax | |
125 ; CHECK-NEXT: movd %xmm1, %ecx | |
126 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax | |
127 ; CHECK-NEXT: cwtd | |
128 ; CHECK-NEXT: idivw %cx | |
129 ; CHECK-NEXT: movl %eax, %ecx | |
130 ; CHECK-NEXT: pextrw $1, %xmm0, %eax | |
131 ; CHECK-NEXT: pextrw $1, %xmm1, %esi | |
132 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax | |
133 ; CHECK-NEXT: cwtd | |
134 ; CHECK-NEXT: idivw %si | |
135 ; CHECK-NEXT: # kill: def $ax killed $ax def $eax | |
136 ; CHECK-NEXT: movd %ecx, %xmm0 | |
137 ; CHECK-NEXT: pinsrw $1, %eax, %xmm0 | |
138 ; CHECK-NEXT: pinsrw $2, %edi, %xmm0 | |
139 ; CHECK-NEXT: pinsrw $3, %r9d, %xmm0 | |
140 ; CHECK-NEXT: pinsrw $4, %r8d, %xmm0 | |
141 ; CHECK-NEXT: retq | |
67 %div.r = sdiv <5 x i16> %num, %div | 142 %div.r = sdiv <5 x i16> %num, %div |
68 ret <5 x i16> %div.r | 143 ret <5 x i16> %div.r |
69 } | 144 } |
70 | 145 |
71 ; CHECK: test_ushort_div | 146 ; CHECK: test_ushort_div |
72 define <4 x i16> @test_ushort_div(<4 x i16> %num, <4 x i16> %div) { | 147 define <4 x i16> @test_ushort_div(<4 x i16> %num, <4 x i16> %div) { |
73 ; CHECK: divl | 148 ; CHECK-LABEL: test_ushort_div: |
74 ; CHECK: divl | 149 ; CHECK: # %bb.0: |
75 ; CHECK: divl | 150 ; CHECK-NEXT: pxor %xmm2, %xmm2 |
76 ; CHECK: divl | 151 ; CHECK-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7] |
77 ; CHECK-NOT: divl | 152 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] |
78 ; CHECK: ret | 153 ; CHECK-NEXT: pextrd $1, %xmm0, %eax |
154 ; CHECK-NEXT: pextrd $1, %xmm1, %ecx | |
155 ; CHECK-NEXT: xorl %edx, %edx | |
156 ; CHECK-NEXT: divl %ecx | |
157 ; CHECK-NEXT: movl %eax, %ecx | |
158 ; CHECK-NEXT: movd %xmm0, %eax | |
159 ; CHECK-NEXT: movd %xmm1, %esi | |
160 ; CHECK-NEXT: xorl %edx, %edx | |
161 ; CHECK-NEXT: divl %esi | |
162 ; CHECK-NEXT: movd %eax, %xmm2 | |
163 ; CHECK-NEXT: pinsrd $1, %ecx, %xmm2 | |
164 ; CHECK-NEXT: pextrd $2, %xmm0, %eax | |
165 ; CHECK-NEXT: pextrd $2, %xmm1, %ecx | |
166 ; CHECK-NEXT: xorl %edx, %edx | |
167 ; CHECK-NEXT: divl %ecx | |
168 ; CHECK-NEXT: pinsrd $2, %eax, %xmm2 | |
169 ; CHECK-NEXT: pextrd $3, %xmm0, %eax | |
170 ; CHECK-NEXT: pextrd $3, %xmm1, %ecx | |
171 ; CHECK-NEXT: xorl %edx, %edx | |
172 ; CHECK-NEXT: divl %ecx | |
173 ; CHECK-NEXT: pinsrd $3, %eax, %xmm2 | |
174 ; CHECK-NEXT: movdqa %xmm2, %xmm0 | |
175 ; CHECK-NEXT: retq | |
79 %div.r = udiv <4 x i16> %num, %div | 176 %div.r = udiv <4 x i16> %num, %div |
80 ret <4 x i16> %div.r | 177 ret <4 x i16> %div.r |
81 } | 178 } |
82 | 179 |
83 ; CHECK: test_uint_div | 180 ; CHECK: test_uint_div |
84 define <3 x i32> @test_uint_div(<3 x i32> %num, <3 x i32> %div) { | 181 define <3 x i32> @test_uint_div(<3 x i32> %num, <3 x i32> %div) { |
85 ; CHECK: divl | 182 ; CHECK-LABEL: test_uint_div: |
86 ; CHECK: divl | 183 ; CHECK: # %bb.0: |
87 ; CHECK: divl | 184 ; CHECK-NEXT: pextrd $2, %xmm0, %eax |
88 ; CHECK-NOT: divl | 185 ; CHECK-NEXT: pextrd $2, %xmm1, %ecx |
89 ; CHECK: ret | 186 ; CHECK-NEXT: xorl %edx, %edx |
187 ; CHECK-NEXT: divl %ecx | |
188 ; CHECK-NEXT: movl %eax, %ecx | |
189 ; CHECK-NEXT: pextrd $1, %xmm0, %eax | |
190 ; CHECK-NEXT: pextrd $1, %xmm1, %esi | |
191 ; CHECK-NEXT: xorl %edx, %edx | |
192 ; CHECK-NEXT: divl %esi | |
193 ; CHECK-NEXT: movl %eax, %esi | |
194 ; CHECK-NEXT: movd %xmm0, %eax | |
195 ; CHECK-NEXT: movd %xmm1, %edi | |
196 ; CHECK-NEXT: xorl %edx, %edx | |
197 ; CHECK-NEXT: divl %edi | |
198 ; CHECK-NEXT: movd %eax, %xmm0 | |
199 ; CHECK-NEXT: pinsrd $1, %esi, %xmm0 | |
200 ; CHECK-NEXT: pinsrd $2, %ecx, %xmm0 | |
201 ; CHECK-NEXT: retq | |
90 %div.r = udiv <3 x i32> %num, %div | 202 %div.r = udiv <3 x i32> %num, %div |
91 ret <3 x i32> %div.r | 203 ret <3 x i32> %div.r |
92 } | 204 } |
93 | 205 |
94 ; CHECK: test_long_div | 206 ; CHECK: test_long_div |
95 define <3 x i64> @test_long_div(<3 x i64> %num, <3 x i64> %div) { | 207 define <3 x i64> @test_long_div(<3 x i64> %num, <3 x i64> %div) { |
96 ; CHECK: idivq | 208 ; CHECK-LABEL: test_long_div: |
97 ; CHECK: idivq | 209 ; CHECK: # %bb.0: |
98 ; CHECK: idivq | 210 ; CHECK-NEXT: movq %rdx, %r10 |
99 ; CHECK-NOT: idivq | 211 ; CHECK-NEXT: movq %rdi, %rax |
100 ; CHECK: ret | 212 ; CHECK-NEXT: cqto |
213 ; CHECK-NEXT: idivq %rcx | |
214 ; CHECK-NEXT: movq %rax, %rcx | |
215 ; CHECK-NEXT: movq %rsi, %rax | |
216 ; CHECK-NEXT: cqto | |
217 ; CHECK-NEXT: idivq %r8 | |
218 ; CHECK-NEXT: movq %rax, %rsi | |
219 ; CHECK-NEXT: movq %r10, %rax | |
220 ; CHECK-NEXT: cqto | |
221 ; CHECK-NEXT: idivq %r9 | |
222 ; CHECK-NEXT: movq %rax, %rdi | |
223 ; CHECK-NEXT: movq %rcx, %rax | |
224 ; CHECK-NEXT: movq %rsi, %rdx | |
225 ; CHECK-NEXT: movq %rdi, %rcx | |
226 ; CHECK-NEXT: retq | |
101 %div.r = sdiv <3 x i64> %num, %div | 227 %div.r = sdiv <3 x i64> %num, %div |
102 ret <3 x i64> %div.r | 228 ret <3 x i64> %div.r |
103 } | 229 } |
104 | 230 |
105 ; CHECK: test_ulong_div | 231 ; CHECK: test_ulong_div |
106 define <3 x i64> @test_ulong_div(<3 x i64> %num, <3 x i64> %div) { | 232 define <3 x i64> @test_ulong_div(<3 x i64> %num, <3 x i64> %div) { |
107 ; CHECK: divq | 233 ; CHECK-LABEL: test_ulong_div: |
108 ; CHECK: divq | 234 ; CHECK: # %bb.0: |
109 ; CHECK: divq | 235 ; CHECK-NEXT: movq %rdx, %r10 |
110 ; CHECK-NOT: divq | 236 ; CHECK-NEXT: xorl %edx, %edx |
111 ; CHECK: ret | 237 ; CHECK-NEXT: movq %rdi, %rax |
238 ; CHECK-NEXT: divq %rcx | |
239 ; CHECK-NEXT: movq %rax, %rcx | |
240 ; CHECK-NEXT: xorl %edx, %edx | |
241 ; CHECK-NEXT: movq %rsi, %rax | |
242 ; CHECK-NEXT: divq %r8 | |
243 ; CHECK-NEXT: movq %rax, %rsi | |
244 ; CHECK-NEXT: xorl %edx, %edx | |
245 ; CHECK-NEXT: movq %r10, %rax | |
246 ; CHECK-NEXT: divq %r9 | |
247 ; CHECK-NEXT: movq %rax, %rdi | |
248 ; CHECK-NEXT: movq %rcx, %rax | |
249 ; CHECK-NEXT: movq %rsi, %rdx | |
250 ; CHECK-NEXT: movq %rdi, %rcx | |
251 ; CHECK-NEXT: retq | |
112 %div.r = udiv <3 x i64> %num, %div | 252 %div.r = udiv <3 x i64> %num, %div |
113 ret <3 x i64> %div.r | 253 ret <3 x i64> %div.r |
114 } | 254 } |
115 | 255 |
116 ; CHECK: test_char_rem | 256 ; CHECK: test_char_rem |
117 define <4 x i8> @test_char_rem(<4 x i8> %num, <4 x i8> %rem) { | 257 define <4 x i8> @test_char_rem(<4 x i8> %num, <4 x i8> %rem) { |
118 ; CHECK: idivl | 258 ; CHECK-LABEL: test_char_rem: |
119 ; CHECK: idivl | 259 ; CHECK: # %bb.0: |
120 ; CHECK: idivl | 260 ; CHECK-NEXT: pslld $24, %xmm1 |
121 ; CHECK: idivl | 261 ; CHECK-NEXT: psrad $24, %xmm1 |
122 ; CHECK-NOT: idivl | 262 ; CHECK-NEXT: pslld $24, %xmm0 |
123 ; CHECK: ret | 263 ; CHECK-NEXT: psrad $24, %xmm0 |
264 ; CHECK-NEXT: pextrd $1, %xmm0, %eax | |
265 ; CHECK-NEXT: pextrd $1, %xmm1, %ecx | |
266 ; CHECK-NEXT: cltd | |
267 ; CHECK-NEXT: idivl %ecx | |
268 ; CHECK-NEXT: movl %edx, %ecx | |
269 ; CHECK-NEXT: movd %xmm0, %eax | |
270 ; CHECK-NEXT: movd %xmm1, %esi | |
271 ; CHECK-NEXT: cltd | |
272 ; CHECK-NEXT: idivl %esi | |
273 ; CHECK-NEXT: movd %edx, %xmm2 | |
274 ; CHECK-NEXT: pinsrd $1, %ecx, %xmm2 | |
275 ; CHECK-NEXT: pextrd $2, %xmm0, %eax | |
276 ; CHECK-NEXT: pextrd $2, %xmm1, %ecx | |
277 ; CHECK-NEXT: cltd | |
278 ; CHECK-NEXT: idivl %ecx | |
279 ; CHECK-NEXT: pinsrd $2, %edx, %xmm2 | |
280 ; CHECK-NEXT: pextrd $3, %xmm0, %eax | |
281 ; CHECK-NEXT: pextrd $3, %xmm1, %ecx | |
282 ; CHECK-NEXT: cltd | |
283 ; CHECK-NEXT: idivl %ecx | |
284 ; CHECK-NEXT: pinsrd $3, %edx, %xmm2 | |
285 ; CHECK-NEXT: movdqa %xmm2, %xmm0 | |
286 ; CHECK-NEXT: retq | |
124 %rem.r = srem <4 x i8> %num, %rem | 287 %rem.r = srem <4 x i8> %num, %rem |
125 ret <4 x i8> %rem.r | 288 ret <4 x i8> %rem.r |
126 } | 289 } |
127 | 290 |
128 ; CHECK: test_short_rem | 291 ; CHECK: test_short_rem |
129 define <5 x i16> @test_short_rem(<5 x i16> %num, <5 x i16> %rem) { | 292 define <5 x i16> @test_short_rem(<5 x i16> %num, <5 x i16> %rem) { |
130 ; CHECK: idivw | 293 ; CHECK-LABEL: test_short_rem: |
131 ; CHECK: idivw | 294 ; CHECK: # %bb.0: |
132 ; CHECK: idivw | 295 ; CHECK-NEXT: pextrw $4, %xmm0, %eax |
133 ; CHECK: idivw | 296 ; CHECK-NEXT: pextrw $4, %xmm1, %ecx |
134 ; CHECK: idivw | 297 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax |
135 ; CHECK-NOT: idivw | 298 ; CHECK-NEXT: cwtd |
136 ; CHECK: ret | 299 ; CHECK-NEXT: idivw %cx |
300 ; CHECK-NEXT: movl %edx, %r8d | |
301 ; CHECK-NEXT: pextrw $3, %xmm0, %eax | |
302 ; CHECK-NEXT: pextrw $3, %xmm1, %ecx | |
303 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax | |
304 ; CHECK-NEXT: cwtd | |
305 ; CHECK-NEXT: idivw %cx | |
306 ; CHECK-NEXT: movl %edx, %r9d | |
307 ; CHECK-NEXT: pextrw $2, %xmm0, %eax | |
308 ; CHECK-NEXT: pextrw $2, %xmm1, %ecx | |
309 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax | |
310 ; CHECK-NEXT: cwtd | |
311 ; CHECK-NEXT: idivw %cx | |
312 ; CHECK-NEXT: movl %edx, %edi | |
313 ; CHECK-NEXT: movd %xmm0, %eax | |
314 ; CHECK-NEXT: movd %xmm1, %ecx | |
315 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax | |
316 ; CHECK-NEXT: cwtd | |
317 ; CHECK-NEXT: idivw %cx | |
318 ; CHECK-NEXT: movl %edx, %ecx | |
319 ; CHECK-NEXT: pextrw $1, %xmm0, %eax | |
320 ; CHECK-NEXT: pextrw $1, %xmm1, %esi | |
321 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax | |
322 ; CHECK-NEXT: cwtd | |
323 ; CHECK-NEXT: idivw %si | |
324 ; CHECK-NEXT: # kill: def $dx killed $dx def $edx | |
325 ; CHECK-NEXT: movd %ecx, %xmm0 | |
326 ; CHECK-NEXT: pinsrw $1, %edx, %xmm0 | |
327 ; CHECK-NEXT: pinsrw $2, %edi, %xmm0 | |
328 ; CHECK-NEXT: pinsrw $3, %r9d, %xmm0 | |
329 ; CHECK-NEXT: pinsrw $4, %r8d, %xmm0 | |
330 ; CHECK-NEXT: retq | |
137 %rem.r = srem <5 x i16> %num, %rem | 331 %rem.r = srem <5 x i16> %num, %rem |
138 ret <5 x i16> %rem.r | 332 ret <5 x i16> %rem.r |
139 } | 333 } |
140 | 334 |
141 ; CHECK: test_uint_rem | 335 ; CHECK: test_uint_rem |
142 define <4 x i32> @test_uint_rem(<4 x i32> %num, <4 x i32> %rem) { | 336 define <4 x i32> @test_uint_rem(<4 x i32> %num, <4 x i32> %rem) { |
143 ; CHECK: idivl | 337 ; CHECK-LABEL: test_uint_rem: |
144 ; CHECK: idivl | 338 ; CHECK: # %bb.0: |
145 ; CHECK: idivl | 339 ; CHECK-NEXT: pextrd $1, %xmm0, %eax |
146 ; CHECK: idivl | 340 ; CHECK-NEXT: pextrd $1, %xmm1, %ecx |
147 ; CHECK-NOT: idivl | 341 ; CHECK-NEXT: cltd |
148 ; CHECK: ret | 342 ; CHECK-NEXT: idivl %ecx |
343 ; CHECK-NEXT: movl %edx, %ecx | |
344 ; CHECK-NEXT: movd %xmm0, %eax | |
345 ; CHECK-NEXT: movd %xmm1, %esi | |
346 ; CHECK-NEXT: cltd | |
347 ; CHECK-NEXT: idivl %esi | |
348 ; CHECK-NEXT: movd %edx, %xmm2 | |
349 ; CHECK-NEXT: pinsrd $1, %ecx, %xmm2 | |
350 ; CHECK-NEXT: pextrd $2, %xmm0, %eax | |
351 ; CHECK-NEXT: pextrd $2, %xmm1, %ecx | |
352 ; CHECK-NEXT: cltd | |
353 ; CHECK-NEXT: idivl %ecx | |
354 ; CHECK-NEXT: pinsrd $2, %edx, %xmm2 | |
355 ; CHECK-NEXT: pextrd $3, %xmm0, %eax | |
356 ; CHECK-NEXT: pextrd $3, %xmm1, %ecx | |
357 ; CHECK-NEXT: cltd | |
358 ; CHECK-NEXT: idivl %ecx | |
359 ; CHECK-NEXT: pinsrd $3, %edx, %xmm2 | |
360 ; CHECK-NEXT: movdqa %xmm2, %xmm0 | |
361 ; CHECK-NEXT: retq | |
149 %rem.r = srem <4 x i32> %num, %rem | 362 %rem.r = srem <4 x i32> %num, %rem |
150 ret <4 x i32> %rem.r | 363 ret <4 x i32> %rem.r |
151 } | 364 } |
152 | 365 |
153 | 366 |
154 ; CHECK: test_ulong_rem | 367 ; CHECK: test_ulong_rem |
155 define <5 x i64> @test_ulong_rem(<5 x i64> %num, <5 x i64> %rem) { | 368 define <5 x i64> @test_ulong_rem(<5 x i64> %num, <5 x i64> %rem) { |
156 ; CHECK: divq | 369 ; CHECK-LABEL: test_ulong_rem: |
157 ; CHECK: divq | 370 ; CHECK: # %bb.0: |
158 ; CHECK: divq | 371 ; CHECK-NEXT: movq %rdx, %rax |
159 ; CHECK: divq | 372 ; CHECK-NEXT: xorl %edx, %edx |
160 ; CHECK: divq | 373 ; CHECK-NEXT: divq {{[0-9]+}}(%rsp) |
161 ; CHECK-NOT: divq | 374 ; CHECK-NEXT: movq %rdx, %xmm0 |
162 ; CHECK: ret | 375 ; CHECK-NEXT: xorl %edx, %edx |
376 ; CHECK-NEXT: movq %rsi, %rax | |
377 ; CHECK-NEXT: divq {{[0-9]+}}(%rsp) | |
378 ; CHECK-NEXT: movq %rdx, %xmm1 | |
379 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] | |
380 ; CHECK-NEXT: xorl %edx, %edx | |
381 ; CHECK-NEXT: movq %r8, %rax | |
382 ; CHECK-NEXT: divq {{[0-9]+}}(%rsp) | |
383 ; CHECK-NEXT: movq %rdx, %xmm0 | |
384 ; CHECK-NEXT: xorl %edx, %edx | |
385 ; CHECK-NEXT: movq %rcx, %rax | |
386 ; CHECK-NEXT: divq {{[0-9]+}}(%rsp) | |
387 ; CHECK-NEXT: movq %rdx, %xmm2 | |
388 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0] | |
389 ; CHECK-NEXT: xorl %edx, %edx | |
390 ; CHECK-NEXT: movq %r9, %rax | |
391 ; CHECK-NEXT: divq {{[0-9]+}}(%rsp) | |
392 ; CHECK-NEXT: movq %rdx, 32(%rdi) | |
393 ; CHECK-NEXT: movdqa %xmm2, 16(%rdi) | |
394 ; CHECK-NEXT: movdqa %xmm1, (%rdi) | |
395 ; CHECK-NEXT: movq %rdi, %rax | |
396 ; CHECK-NEXT: retq | |
163 %rem.r = urem <5 x i64> %num, %rem | 397 %rem.r = urem <5 x i64> %num, %rem |
164 ret <5 x i64> %rem.r | 398 ret <5 x i64> %rem.r |
165 } | 399 } |
166 | 400 |
167 ; CHECK: test_int_div | 401 ; CHECK: test_int_div |
168 define void @test_int_div(<3 x i32>* %dest, <3 x i32>* %old, i32 %n) { | 402 define void @test_int_div(<3 x i32>* %dest, <3 x i32>* %old, i32 %n) { |
169 ; CHECK: idivl | 403 ; CHECK-LABEL: test_int_div: |
170 ; CHECK: idivl | 404 ; CHECK: # %bb.0: # %entry |
171 ; CHECK: idivl | 405 ; CHECK-NEXT: movl %edx, %r9d |
172 ; CHECK-NOT: idivl | 406 ; CHECK-NEXT: testl %edx, %edx |
173 ; CHECK: ret | 407 ; CHECK-NEXT: jle .LBB12_3 |
408 ; CHECK-NEXT: # %bb.1: # %bb.nph | |
409 ; CHECK-NEXT: xorl %ecx, %ecx | |
410 ; CHECK-NEXT: .p2align 4, 0x90 | |
411 ; CHECK-NEXT: .LBB12_2: # %for.body | |
412 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 | |
413 ; CHECK-NEXT: movdqa (%rdi,%rcx), %xmm0 | |
414 ; CHECK-NEXT: movdqa (%rsi,%rcx), %xmm1 | |
415 ; CHECK-NEXT: pextrd $1, %xmm0, %eax | |
416 ; CHECK-NEXT: pextrd $1, %xmm1, %r8d | |
417 ; CHECK-NEXT: cltd | |
418 ; CHECK-NEXT: idivl %r8d | |
419 ; CHECK-NEXT: movl %eax, %r8d | |
420 ; CHECK-NEXT: movd %xmm0, %eax | |
421 ; CHECK-NEXT: movd %xmm1, %r10d | |
422 ; CHECK-NEXT: cltd | |
423 ; CHECK-NEXT: idivl %r10d | |
424 ; CHECK-NEXT: movd %eax, %xmm2 | |
425 ; CHECK-NEXT: pinsrd $1, %r8d, %xmm2 | |
426 ; CHECK-NEXT: pextrd $2, %xmm0, %eax | |
427 ; CHECK-NEXT: pextrd $2, %xmm1, %r8d | |
428 ; CHECK-NEXT: cltd | |
429 ; CHECK-NEXT: idivl %r8d | |
430 ; CHECK-NEXT: pinsrd $2, %eax, %xmm2 | |
431 ; CHECK-NEXT: movl %eax, 8(%rdi,%rcx) | |
432 ; CHECK-NEXT: movq %xmm2, (%rdi,%rcx) | |
433 ; CHECK-NEXT: addq $16, %rcx | |
434 ; CHECK-NEXT: decl %r9d | |
435 ; CHECK-NEXT: jne .LBB12_2 | |
436 ; CHECK-NEXT: .LBB12_3: # %for.end | |
437 ; CHECK-NEXT: retq | |
174 entry: | 438 entry: |
175 %cmp13 = icmp sgt i32 %n, 0 | 439 %cmp13 = icmp sgt i32 %n, 0 |
176 br i1 %cmp13, label %bb.nph, label %for.end | 440 br i1 %cmp13, label %bb.nph, label %for.end |
177 | 441 |
178 bb.nph: | 442 bb.nph: |
179 br label %for.body | 443 br label %for.body |
180 | 444 |
181 for.body: | 445 for.body: |
182 %i.014 = phi i32 [ 0, %bb.nph ], [ %inc, %for.body ] | 446 %i.014 = phi i32 [ 0, %bb.nph ], [ %inc, %for.body ] |
183 %arrayidx11 = getelementptr <3 x i32>, <3 x i32>* %dest, i32 %i.014 | 447 %arrayidx11 = getelementptr <3 x i32>, <3 x i32>* %dest, i32 %i.014 |
184 %tmp4 = load <3 x i32>, <3 x i32>* %arrayidx11 ; <<3 x i32>> [#uses=1] | 448 %tmp4 = load <3 x i32>, <3 x i32>* %arrayidx11 ; <<3 x i32>> [#uses=1] |
185 %arrayidx7 = getelementptr inbounds <3 x i32>, <3 x i32>* %old, i32 %i.014 | 449 %arrayidx7 = getelementptr inbounds <3 x i32>, <3 x i32>* %old, i32 %i.014 |
186 %tmp8 = load <3 x i32>, <3 x i32>* %arrayidx7 ; <<3 x i32>> [#uses=1] | 450 %tmp8 = load <3 x i32>, <3 x i32>* %arrayidx7 ; <<3 x i32>> [#uses=1] |
187 %div = sdiv <3 x i32> %tmp4, %tmp8 | 451 %div = sdiv <3 x i32> %tmp4, %tmp8 |
188 store <3 x i32> %div, <3 x i32>* %arrayidx11 | 452 store <3 x i32> %div, <3 x i32>* %arrayidx11 |
189 %inc = add nsw i32 %i.014, 1 | 453 %inc = add nsw i32 %i.014, 1 |
190 %exitcond = icmp eq i32 %inc, %n | 454 %exitcond = icmp eq i32 %inc, %n |
191 br i1 %exitcond, label %for.end, label %for.body | 455 br i1 %exitcond, label %for.end, label %for.body |
192 | 456 |
193 for.end: ; preds = %for.body, %entry | 457 for.end: ; preds = %for.body, %entry |
194 ret void | 458 ret void |
195 } | 459 } |