Mercurial > hg > CbC > CbC_llvm
diff test/CodeGen/X86/sse1.ll @ 134:3a76565eade5 LLVM5.0.1
update 5.0.1
author | mir3636 |
---|---|
date | Sat, 17 Feb 2018 09:57:20 +0900 |
parents | 803732b1fca8 |
children | c2174574ed3a |
line wrap: on
line diff
--- a/test/CodeGen/X86/sse1.ll Fri Feb 16 19:10:49 2018 +0900 +++ b/test/CodeGen/X86/sse1.ll Sat Feb 17 09:57:20 2018 +0900 @@ -14,9 +14,9 @@ ; rdar://8368414 define <2 x float> @test4(<2 x float> %A, <2 x float> %B) nounwind { ; X32-LABEL: test4: -; X32: # BB#0: # %entry +; X32: # %bb.0: # %entry ; X32-NEXT: movaps %xmm0, %xmm2 -; X32-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,2,3] +; X32-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[2,3] ; X32-NEXT: addss %xmm1, %xmm0 ; X32-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] ; X32-NEXT: subss %xmm1, %xmm2 @@ -24,9 +24,9 @@ ; X32-NEXT: retl ; ; X64-LABEL: test4: -; X64: # BB#0: # %entry +; X64: # %bb.0: # %entry ; X64-NEXT: movaps %xmm0, %xmm2 -; X64-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,2,3] +; X64-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[2,3] ; X64-NEXT: addss %xmm1, %xmm0 ; X64-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] ; X64-NEXT: subss %xmm1, %xmm2 @@ -52,11 +52,11 @@ define <4 x float> @vselect(<4 x float>*%p, <4 x i32> %q) { ; X32-LABEL: vselect: -; X32: # BB#0: # %entry +; X32: # %bb.0: # %entry ; X32-NEXT: cmpl $0, {{[0-9]+}}(%esp) ; X32-NEXT: xorps %xmm0, %xmm0 ; X32-NEXT: je .LBB1_1 -; X32-NEXT: # BB#2: # %entry +; X32-NEXT: # %bb.2: # %entry ; X32-NEXT: xorps %xmm1, %xmm1 ; X32-NEXT: cmpl $0, {{[0-9]+}}(%esp) ; X32-NEXT: jne .LBB1_5 @@ -91,11 +91,11 @@ ; X32-NEXT: retl ; ; X64-LABEL: vselect: -; X64: # BB#0: # %entry +; X64: # %bb.0: # %entry ; X64-NEXT: testl %edx, %edx ; X64-NEXT: xorps %xmm0, %xmm0 ; X64-NEXT: je .LBB1_1 -; X64-NEXT: # BB#2: # %entry +; X64-NEXT: # %bb.2: # %entry ; X64-NEXT: xorps %xmm1, %xmm1 ; X64-NEXT: testl %ecx, %ecx ; X64-NEXT: jne .LBB1_5 @@ -138,12 +138,12 @@ define <4 x float> @PR28044(<4 x float> %a0, <4 x float> %a1) nounwind { ; X32-LABEL: PR28044: -; X32: # BB#0: +; X32: # %bb.0: ; X32-NEXT: cmpeqps %xmm1, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: PR28044: -; X64: # BB#0: +; X64: # %bb.0: ; X64-NEXT: cmpeqps %xmm1, %xmm0 ; X64-NEXT: retq %cmp = fcmp oeq <4 x float> %a0, %a1 @@ -157,57 +157,82 @@ define <4 x i32> @PR30512(<4 x i32> %x, <4 x i32> %y) nounwind { ; X32-LABEL: PR30512: -; X32: # BB#0: -; X32-NEXT: pushl %ebp +; X32: # %bb.0: ; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X32-NEXT: subl $16, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: xorl %ecx, %ecx -; X32-NEXT: cmpl {{[0-9]+}}(%esp), %edx -; X32-NEXT: sete %cl -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: cmpl {{[0-9]+}}(%esp), %ebx -; X32-NEXT: sete %dl ; X32-NEXT: xorl %ebx, %ebx ; X32-NEXT: cmpl {{[0-9]+}}(%esp), %edi ; X32-NEXT: sete %bl -; X32-NEXT: xorl %eax, %eax +; X32-NEXT: negl %ebx +; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X32-NEXT: xorl %ebx, %ebx ; X32-NEXT: cmpl {{[0-9]+}}(%esp), %esi -; X32-NEXT: sete %al -; X32-NEXT: movl %eax, 12(%ebp) -; X32-NEXT: movl %ebx, 8(%ebp) -; X32-NEXT: movl %edx, 4(%ebp) -; X32-NEXT: movl %ecx, (%ebp) -; X32-NEXT: movl %ebp, %eax +; X32-NEXT: sete %bl +; X32-NEXT: negl %ebx +; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X32-NEXT: xorl %ebx, %ebx +; X32-NEXT: cmpl {{[0-9]+}}(%esp), %edx +; X32-NEXT: sete %bl +; X32-NEXT: negl %ebx +; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X32-NEXT: xorl %edx, %edx +; X32-NEXT: cmpl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: sete %dl +; X32-NEXT: negl %edx +; X32-NEXT: movl %edx, (%esp) +; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X32-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X32-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] +; X32-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm1[0] +; X32-NEXT: andps {{\.LCPI.*}}, %xmm2 +; X32-NEXT: movaps %xmm2, (%eax) +; X32-NEXT: addl $16, %esp ; X32-NEXT: popl %esi ; X32-NEXT: popl %edi ; X32-NEXT: popl %ebx -; X32-NEXT: popl %ebp ; X32-NEXT: retl $4 ; ; X64-LABEL: PR30512: -; X64: # BB#0: +; X64: # %bb.0: +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpl {{[0-9]+}}(%rsp), %r8d +; X64-NEXT: sete %al +; X64-NEXT: negl %eax +; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpl {{[0-9]+}}(%rsp), %ecx +; X64-NEXT: sete %al +; X64-NEXT: negl %eax +; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpl {{[0-9]+}}(%rsp), %edx +; X64-NEXT: sete %al +; X64-NEXT: negl %eax +; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpl %r9d, %esi ; X64-NEXT: sete %al -; X64-NEXT: xorl %esi, %esi -; X64-NEXT: cmpl {{[0-9]+}}(%rsp), %edx -; X64-NEXT: sete %sil -; X64-NEXT: xorl %edx, %edx -; X64-NEXT: cmpl {{[0-9]+}}(%rsp), %ecx -; X64-NEXT: sete %dl -; X64-NEXT: xorl %ecx, %ecx -; X64-NEXT: cmpl {{[0-9]+}}(%rsp), %r8d -; X64-NEXT: sete %cl -; X64-NEXT: movl %ecx, 12(%rdi) -; X64-NEXT: movl %edx, 8(%rdi) -; X64-NEXT: movl %esi, 4(%rdi) -; X64-NEXT: movl %eax, (%rdi) +; X64-NEXT: negl %eax +; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] +; X64-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm1[0] +; X64-NEXT: andps {{.*}}(%rip), %xmm2 +; X64-NEXT: movaps %xmm2, (%rdi) ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: retq %cmp = icmp eq <4 x i32> %x, %y @@ -219,126 +244,20 @@ ; post-legalization to cause the crash seen in: ; https://llvm.org/bugs/show_bug.cgi?id=31672 ; Is there a way to do that without an unsafe/fast sqrt intrinsic call? -; Also, although the goal for adding this test is to prove that we -; don't crash, I have no idea what this code is doing, so I'm keeping -; the full codegen checks in case there's motivation to improve this. +; +; We now no longer try to lower sqrt using rsqrt with SSE1 only as the +; v4i32 vselect mentioned above should never have been created. We ended up +; scalarizing it anyway. define <2 x float> @PR31672() #0 { ; X32-LABEL: PR31672: -; X32: # BB#0: -; X32-NEXT: pushl %ebp -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: andl $-16, %esp -; X32-NEXT: subl $80, %esp -; X32-NEXT: xorps %xmm0, %xmm0 -; X32-NEXT: movaps {{.*#+}} xmm1 = <42,3,u,u> -; X32-NEXT: movaps %xmm1, %xmm2 -; X32-NEXT: cmpeqps %xmm0, %xmm2 -; X32-NEXT: movaps %xmm2, {{[0-9]+}}(%esp) -; X32-NEXT: movaps %xmm0, {{[0-9]+}}(%esp) -; X32-NEXT: rsqrtps %xmm1, %xmm0 -; X32-NEXT: mulps %xmm0, %xmm1 -; X32-NEXT: mulps %xmm0, %xmm1 -; X32-NEXT: addps {{\.LCPI.*}}, %xmm1 -; X32-NEXT: mulps {{\.LCPI.*}}, %xmm0 -; X32-NEXT: mulps %xmm1, %xmm0 -; X32-NEXT: movaps %xmm0, {{[0-9]+}}(%esp) -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: andl %eax, %ecx -; X32-NEXT: notl %eax -; X32-NEXT: andl {{[0-9]+}}(%esp), %eax -; X32-NEXT: orl %ecx, %eax -; X32-NEXT: movl %eax, (%esp) -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: andl %eax, %ecx -; X32-NEXT: notl %eax -; X32-NEXT: andl {{[0-9]+}}(%esp), %eax -; X32-NEXT: orl %ecx, %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: andl %ecx, %edx -; X32-NEXT: notl %ecx -; X32-NEXT: andl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: orl %edx, %ecx -; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: andl %eax, %ecx -; X32-NEXT: notl %eax -; X32-NEXT: andl {{[0-9]+}}(%esp), %eax -; X32-NEXT: orl %ecx, %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X32-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; X32-NEXT: movl %ebp, %esp -; X32-NEXT: popl %ebp +; X32: # %bb.0: +; X32-NEXT: sqrtps {{\.LCPI.*}}, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: PR31672: -; X64: # BB#0: -; X64-NEXT: xorps %xmm0, %xmm0 -; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) -; X64-NEXT: movaps {{.*#+}} xmm1 = <42,3,u,u> -; X64-NEXT: cmpeqps %xmm1, %xmm0 -; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) -; X64-NEXT: rsqrtps %xmm1, %xmm0 -; X64-NEXT: mulps %xmm0, %xmm1 -; X64-NEXT: mulps %xmm0, %xmm1 -; X64-NEXT: addps {{.*}}(%rip), %xmm1 -; X64-NEXT: mulps {{.*}}(%rip), %xmm0 -; X64-NEXT: mulps %xmm1, %xmm0 -; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r8 -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rsi -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r9 -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdi -; X64-NEXT: movl %esi, %eax -; X64-NEXT: andl %edi, %eax -; X64-NEXT: movl %edi, %ecx -; X64-NEXT: notl %ecx -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r10 -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdx -; X64-NEXT: andl %edx, %ecx -; X64-NEXT: orl %eax, %ecx -; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) -; X64-NEXT: shrq $32, %rsi -; X64-NEXT: shrq $32, %rdi -; X64-NEXT: andl %edi, %esi -; X64-NEXT: notl %edi -; X64-NEXT: shrq $32, %rdx -; X64-NEXT: andl %edi, %edx -; X64-NEXT: orl %esi, %edx -; X64-NEXT: movl %edx, -{{[0-9]+}}(%rsp) -; X64-NEXT: movl %r8d, %eax -; X64-NEXT: andl %r9d, %eax -; X64-NEXT: movl %r9d, %ecx -; X64-NEXT: notl %ecx -; X64-NEXT: andl %r10d, %ecx -; X64-NEXT: orl %eax, %ecx -; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) -; X64-NEXT: shrq $32, %r8 -; X64-NEXT: shrq $32, %r9 -; X64-NEXT: andl %r9d, %r8d -; X64-NEXT: notl %r9d -; X64-NEXT: shrq $32, %r10 -; X64-NEXT: andl %r9d, %r10d -; X64-NEXT: orl %r8d, %r10d -; X64-NEXT: movl %r10d, -{{[0-9]+}}(%rsp) -; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X64: # %bb.0: +; X64-NEXT: sqrtps {{.*}}(%rip), %xmm0 ; X64-NEXT: retq %t0 = call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> <float 42.0, float 3.0>) ret <2 x float> %t0