diff test/CodeGen/X86/sse1.ll @ 134:3a76565eade5 LLVM5.0.1

update 5.0.1
author mir3636
date Sat, 17 Feb 2018 09:57:20 +0900
parents 803732b1fca8
children c2174574ed3a
line wrap: on
line diff
--- a/test/CodeGen/X86/sse1.ll	Fri Feb 16 19:10:49 2018 +0900
+++ b/test/CodeGen/X86/sse1.ll	Sat Feb 17 09:57:20 2018 +0900
@@ -14,9 +14,9 @@
 ; rdar://8368414
 define <2 x float> @test4(<2 x float> %A, <2 x float> %B) nounwind {
 ; X32-LABEL: test4:
-; X32:       # BB#0: # %entry
+; X32:       # %bb.0: # %entry
 ; X32-NEXT:    movaps %xmm0, %xmm2
-; X32-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,1,2,3]
+; X32-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[2,3]
 ; X32-NEXT:    addss %xmm1, %xmm0
 ; X32-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
 ; X32-NEXT:    subss %xmm1, %xmm2
@@ -24,9 +24,9 @@
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test4:
-; X64:       # BB#0: # %entry
+; X64:       # %bb.0: # %entry
 ; X64-NEXT:    movaps %xmm0, %xmm2
-; X64-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,1,2,3]
+; X64-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[2,3]
 ; X64-NEXT:    addss %xmm1, %xmm0
 ; X64-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
 ; X64-NEXT:    subss %xmm1, %xmm2
@@ -52,11 +52,11 @@
 
 define <4 x float> @vselect(<4 x float>*%p, <4 x i32> %q) {
 ; X32-LABEL: vselect:
-; X32:       # BB#0: # %entry
+; X32:       # %bb.0: # %entry
 ; X32-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
 ; X32-NEXT:    xorps %xmm0, %xmm0
 ; X32-NEXT:    je .LBB1_1
-; X32-NEXT:  # BB#2: # %entry
+; X32-NEXT:  # %bb.2: # %entry
 ; X32-NEXT:    xorps %xmm1, %xmm1
 ; X32-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
 ; X32-NEXT:    jne .LBB1_5
@@ -91,11 +91,11 @@
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: vselect:
-; X64:       # BB#0: # %entry
+; X64:       # %bb.0: # %entry
 ; X64-NEXT:    testl %edx, %edx
 ; X64-NEXT:    xorps %xmm0, %xmm0
 ; X64-NEXT:    je .LBB1_1
-; X64-NEXT:  # BB#2: # %entry
+; X64-NEXT:  # %bb.2: # %entry
 ; X64-NEXT:    xorps %xmm1, %xmm1
 ; X64-NEXT:    testl %ecx, %ecx
 ; X64-NEXT:    jne .LBB1_5
@@ -138,12 +138,12 @@
 
 define <4 x float> @PR28044(<4 x float> %a0, <4 x float> %a1) nounwind {
 ; X32-LABEL: PR28044:
-; X32:       # BB#0:
+; X32:       # %bb.0:
 ; X32-NEXT:    cmpeqps %xmm1, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: PR28044:
-; X64:       # BB#0:
+; X64:       # %bb.0:
 ; X64-NEXT:    cmpeqps %xmm1, %xmm0
 ; X64-NEXT:    retq
   %cmp = fcmp oeq <4 x float> %a0, %a1
@@ -157,57 +157,82 @@
 
 define <4 x i32> @PR30512(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; X32-LABEL: PR30512:
-; X32:       # BB#0:
-; X32-NEXT:    pushl %ebp
+; X32:       # %bb.0:
 ; X32-NEXT:    pushl %ebx
 ; X32-NEXT:    pushl %edi
 ; X32-NEXT:    pushl %esi
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X32-NEXT:    subl $16, %esp
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X32-NEXT:    xorl %ecx, %ecx
-; X32-NEXT:    cmpl {{[0-9]+}}(%esp), %edx
-; X32-NEXT:    sete %cl
-; X32-NEXT:    xorl %edx, %edx
-; X32-NEXT:    cmpl {{[0-9]+}}(%esp), %ebx
-; X32-NEXT:    sete %dl
 ; X32-NEXT:    xorl %ebx, %ebx
 ; X32-NEXT:    cmpl {{[0-9]+}}(%esp), %edi
 ; X32-NEXT:    sete %bl
-; X32-NEXT:    xorl %eax, %eax
+; X32-NEXT:    negl %ebx
+; X32-NEXT:    movl %ebx, {{[0-9]+}}(%esp)
+; X32-NEXT:    xorl %ebx, %ebx
 ; X32-NEXT:    cmpl {{[0-9]+}}(%esp), %esi
-; X32-NEXT:    sete %al
-; X32-NEXT:    movl %eax, 12(%ebp)
-; X32-NEXT:    movl %ebx, 8(%ebp)
-; X32-NEXT:    movl %edx, 4(%ebp)
-; X32-NEXT:    movl %ecx, (%ebp)
-; X32-NEXT:    movl %ebp, %eax
+; X32-NEXT:    sete %bl
+; X32-NEXT:    negl %ebx
+; X32-NEXT:    movl %ebx, {{[0-9]+}}(%esp)
+; X32-NEXT:    xorl %ebx, %ebx
+; X32-NEXT:    cmpl {{[0-9]+}}(%esp), %edx
+; X32-NEXT:    sete %bl
+; X32-NEXT:    negl %ebx
+; X32-NEXT:    movl %ebx, {{[0-9]+}}(%esp)
+; X32-NEXT:    xorl %edx, %edx
+; X32-NEXT:    cmpl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    sete %dl
+; X32-NEXT:    negl %edx
+; X32-NEXT:    movl %edx, (%esp)
+; X32-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X32-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; X32-NEXT:    movlhps {{.*#+}} xmm2 = xmm2[0],xmm1[0]
+; X32-NEXT:    andps {{\.LCPI.*}}, %xmm2
+; X32-NEXT:    movaps %xmm2, (%eax)
+; X32-NEXT:    addl $16, %esp
 ; X32-NEXT:    popl %esi
 ; X32-NEXT:    popl %edi
 ; X32-NEXT:    popl %ebx
-; X32-NEXT:    popl %ebp
 ; X32-NEXT:    retl $4
 ;
 ; X64-LABEL: PR30512:
-; X64:       # BB#0:
+; X64:       # %bb.0:
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    cmpl {{[0-9]+}}(%rsp), %r8d
+; X64-NEXT:    sete %al
+; X64-NEXT:    negl %eax
+; X64-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    cmpl {{[0-9]+}}(%rsp), %ecx
+; X64-NEXT:    sete %al
+; X64-NEXT:    negl %eax
+; X64-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    cmpl {{[0-9]+}}(%rsp), %edx
+; X64-NEXT:    sete %al
+; X64-NEXT:    negl %eax
+; X64-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    xorl %eax, %eax
 ; X64-NEXT:    cmpl %r9d, %esi
 ; X64-NEXT:    sete %al
-; X64-NEXT:    xorl %esi, %esi
-; X64-NEXT:    cmpl {{[0-9]+}}(%rsp), %edx
-; X64-NEXT:    sete %sil
-; X64-NEXT:    xorl %edx, %edx
-; X64-NEXT:    cmpl {{[0-9]+}}(%rsp), %ecx
-; X64-NEXT:    sete %dl
-; X64-NEXT:    xorl %ecx, %ecx
-; X64-NEXT:    cmpl {{[0-9]+}}(%rsp), %r8d
-; X64-NEXT:    sete %cl
-; X64-NEXT:    movl %ecx, 12(%rdi)
-; X64-NEXT:    movl %edx, 8(%rdi)
-; X64-NEXT:    movl %esi, 4(%rdi)
-; X64-NEXT:    movl %eax, (%rdi)
+; X64-NEXT:    negl %eax
+; X64-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
+; X64-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X64-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X64-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X64-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; X64-NEXT:    movlhps {{.*#+}} xmm2 = xmm2[0],xmm1[0]
+; X64-NEXT:    andps {{.*}}(%rip), %xmm2
+; X64-NEXT:    movaps %xmm2, (%rdi)
 ; X64-NEXT:    movq %rdi, %rax
 ; X64-NEXT:    retq
   %cmp = icmp eq <4 x i32> %x, %y
@@ -219,126 +244,20 @@
 ; post-legalization to cause the crash seen in:
 ; https://llvm.org/bugs/show_bug.cgi?id=31672
 ; Is there a way to do that without an unsafe/fast sqrt intrinsic call?
-; Also, although the goal for adding this test is to prove that we
-; don't crash, I have no idea what this code is doing, so I'm keeping
-; the full codegen checks in case there's motivation to improve this.
+;
+; We now no longer try to lower sqrt using rsqrt with SSE1 only as the
+; v4i32 vselect mentioned above should never have been created. We ended up
+; scalarizing it anyway.
 
 define <2 x float> @PR31672() #0 {
 ; X32-LABEL: PR31672:
-; X32:       # BB#0:
-; X32-NEXT:    pushl %ebp
-; X32-NEXT:    movl %esp, %ebp
-; X32-NEXT:    andl $-16, %esp
-; X32-NEXT:    subl $80, %esp
-; X32-NEXT:    xorps %xmm0, %xmm0
-; X32-NEXT:    movaps {{.*#+}} xmm1 = <42,3,u,u>
-; X32-NEXT:    movaps %xmm1, %xmm2
-; X32-NEXT:    cmpeqps %xmm0, %xmm2
-; X32-NEXT:    movaps %xmm2, {{[0-9]+}}(%esp)
-; X32-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp)
-; X32-NEXT:    rsqrtps %xmm1, %xmm0
-; X32-NEXT:    mulps %xmm0, %xmm1
-; X32-NEXT:    mulps %xmm0, %xmm1
-; X32-NEXT:    addps {{\.LCPI.*}}, %xmm1
-; X32-NEXT:    mulps {{\.LCPI.*}}, %xmm0
-; X32-NEXT:    mulps %xmm1, %xmm0
-; X32-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp)
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    andl %eax, %ecx
-; X32-NEXT:    notl %eax
-; X32-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    orl %ecx, %eax
-; X32-NEXT:    movl %eax, (%esp)
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    andl %eax, %ecx
-; X32-NEXT:    notl %eax
-; X32-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    orl %ecx, %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X32-NEXT:    andl %ecx, %edx
-; X32-NEXT:    notl %ecx
-; X32-NEXT:    andl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    orl %edx, %ecx
-; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    andl %eax, %ecx
-; X32-NEXT:    notl %eax
-; X32-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    orl %ecx, %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
-; X32-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X32-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; X32-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X32-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X32-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; X32-NEXT:    movl %ebp, %esp
-; X32-NEXT:    popl %ebp
+; X32:       # %bb.0:
+; X32-NEXT:    sqrtps {{\.LCPI.*}}, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: PR31672:
-; X64:       # BB#0:
-; X64-NEXT:    xorps %xmm0, %xmm0
-; X64-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movaps {{.*#+}} xmm1 = <42,3,u,u>
-; X64-NEXT:    cmpeqps %xmm1, %xmm0
-; X64-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    rsqrtps %xmm1, %xmm0
-; X64-NEXT:    mulps %xmm0, %xmm1
-; X64-NEXT:    mulps %xmm0, %xmm1
-; X64-NEXT:    addps {{.*}}(%rip), %xmm1
-; X64-NEXT:    mulps {{.*}}(%rip), %xmm0
-; X64-NEXT:    mulps %xmm1, %xmm0
-; X64-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %r8
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rsi
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %r9
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rdi
-; X64-NEXT:    movl %esi, %eax
-; X64-NEXT:    andl %edi, %eax
-; X64-NEXT:    movl %edi, %ecx
-; X64-NEXT:    notl %ecx
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %r10
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rdx
-; X64-NEXT:    andl %edx, %ecx
-; X64-NEXT:    orl %eax, %ecx
-; X64-NEXT:    movl %ecx, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    shrq $32, %rsi
-; X64-NEXT:    shrq $32, %rdi
-; X64-NEXT:    andl %edi, %esi
-; X64-NEXT:    notl %edi
-; X64-NEXT:    shrq $32, %rdx
-; X64-NEXT:    andl %edi, %edx
-; X64-NEXT:    orl %esi, %edx
-; X64-NEXT:    movl %edx, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movl %r8d, %eax
-; X64-NEXT:    andl %r9d, %eax
-; X64-NEXT:    movl %r9d, %ecx
-; X64-NEXT:    notl %ecx
-; X64-NEXT:    andl %r10d, %ecx
-; X64-NEXT:    orl %eax, %ecx
-; X64-NEXT:    movl %ecx, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    shrq $32, %r8
-; X64-NEXT:    shrq $32, %r9
-; X64-NEXT:    andl %r9d, %r8d
-; X64-NEXT:    notl %r9d
-; X64-NEXT:    shrq $32, %r10
-; X64-NEXT:    andl %r9d, %r10d
-; X64-NEXT:    orl %r8d, %r10d
-; X64-NEXT:    movl %r10d, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X64-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; X64-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X64-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X64-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X64:       # %bb.0:
+; X64-NEXT:    sqrtps {{.*}}(%rip), %xmm0
 ; X64-NEXT:    retq
   %t0 = call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> <float 42.0, float 3.0>)
   ret <2 x float> %t0