Mercurial > hg > CbC > CbC_llvm
diff test/CodeGen/X86/pseudo_cmov_lower.ll @ 95:afa8332a0e37 LLVM3.8
LLVM 3.8
author | Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 13 Oct 2015 17:48:58 +0900 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/CodeGen/X86/pseudo_cmov_lower.ll Tue Oct 13 17:48:58 2015 +0900 @@ -0,0 +1,267 @@ +; RUN: llc < %s -mtriple=i386-linux-gnu -o - | FileCheck %s + +; This test checks that only a single js gets generated in the final code +; for lowering the CMOV pseudos that get created for this IR. +; CHECK-LABEL: foo1: +; CHECK: js +; CHECK-NOT: js +define i32 @foo1(i32 %v1, i32 %v2, i32 %v3) nounwind { +entry: + %cmp = icmp slt i32 %v1, 0 + %v2.v3 = select i1 %cmp, i32 %v2, i32 %v3 + %v1.v2 = select i1 %cmp, i32 %v1, i32 %v2 + %sub = sub i32 %v1.v2, %v2.v3 + ret i32 %sub +} + +; This test checks that only a single js gets generated in the final code +; for lowering the CMOV pseudos that get created for this IR. This makes +; sure the code for the lowering for opposite conditions gets tested. +; CHECK-LABEL: foo11: +; CHECK: js +; CHECK-NOT: js +; CHECK-NOT: jns +define i32 @foo11(i32 %v1, i32 %v2, i32 %v3) nounwind { +entry: + %cmp1 = icmp slt i32 %v1, 0 + %v2.v3 = select i1 %cmp1, i32 %v2, i32 %v3 + %cmp2 = icmp sge i32 %v1, 0 + %v1.v2 = select i1 %cmp2, i32 %v1, i32 %v2 + %sub = sub i32 %v1.v2, %v2.v3 + ret i32 %sub +} + +; This test checks that only a single js gets generated in the final code +; for lowering the CMOV pseudos that get created for this IR. +; CHECK-LABEL: foo2: +; CHECK: js +; CHECK-NOT: js +define i32 @foo2(i8 %v1, i8 %v2, i8 %v3) nounwind { +entry: + %cmp = icmp slt i8 %v1, 0 + %v2.v3 = select i1 %cmp, i8 %v2, i8 %v3 + %v1.v2 = select i1 %cmp, i8 %v1, i8 %v2 + %t1 = sext i8 %v2.v3 to i32 + %t2 = sext i8 %v1.v2 to i32 + %sub = sub i32 %t1, %t2 + ret i32 %sub +} + +; This test checks that only a single js gets generated in the final code +; for lowering the CMOV pseudos that get created for this IR. +; CHECK-LABEL: foo3: +; CHECK: js +; CHECK-NOT: js +define i32 @foo3(i16 %v1, i16 %v2, i16 %v3) nounwind { +entry: + %cmp = icmp slt i16 %v1, 0 + %v2.v3 = select i1 %cmp, i16 %v2, i16 %v3 + %v1.v2 = select i1 %cmp, i16 %v1, i16 %v2 + %t1 = sext i16 %v2.v3 to i32 + %t2 = sext i16 %v1.v2 to i32 + %sub = sub i32 %t1, %t2 + ret i32 %sub +} + +; This test checks that only a single js gets generated in the final code +; for lowering the CMOV pseudos that get created for this IR. +; CHECK-LABEL: foo4: +; CHECK: js +; CHECK-NOT: js +define float @foo4(i32 %v1, float %v2, float %v3, float %v4) nounwind { +entry: + %cmp = icmp slt i32 %v1, 0 + %t1 = select i1 %cmp, float %v2, float %v3 + %t2 = select i1 %cmp, float %v3, float %v4 + %sub = fsub float %t1, %t2 + ret float %sub +} + +; This test checks that only a single je gets generated in the final code +; for lowering the CMOV pseudos that get created for this IR. +; CHECK-LABEL: foo5: +; CHECK: je +; CHECK-NOT: je +define double @foo5(i32 %v1, double %v2, double %v3, double %v4) nounwind { +entry: + %cmp = icmp eq i32 %v1, 0 + %t1 = select i1 %cmp, double %v2, double %v3 + %t2 = select i1 %cmp, double %v3, double %v4 + %sub = fsub double %t1, %t2 + ret double %sub +} + +; This test checks that only a single je gets generated in the final code +; for lowering the CMOV pseudos that get created for this IR. +; CHECK-LABEL: foo6: +; CHECK: je +; CHECK-NOT: je +define <4 x float> @foo6(i32 %v1, <4 x float> %v2, <4 x float> %v3, <4 x float> %v4) nounwind { +entry: + %cmp = icmp eq i32 %v1, 0 + %t1 = select i1 %cmp, <4 x float> %v2, <4 x float> %v3 + %t2 = select i1 %cmp, <4 x float> %v3, <4 x float> %v4 + %sub = fsub <4 x float> %t1, %t2 + ret <4 x float> %sub +} + +; This test checks that only a single je gets generated in the final code +; for lowering the CMOV pseudos that get created for this IR. +; CHECK-LABEL: foo7: +; CHECK: je +; CHECK-NOT: je +define <2 x double> @foo7(i32 %v1, <2 x double> %v2, <2 x double> %v3, <2 x double> %v4) nounwind { +entry: + %cmp = icmp eq i32 %v1, 0 + %t1 = select i1 %cmp, <2 x double> %v2, <2 x double> %v3 + %t2 = select i1 %cmp, <2 x double> %v3, <2 x double> %v4 + %sub = fsub <2 x double> %t1, %t2 + ret <2 x double> %sub +} + +; This test checks that only a single ja gets generated in the final code +; for lowering the CMOV pseudos that get created for this IR. This combines +; all the supported types together into one long string of selects based +; on the same condition. +; CHECK-LABEL: foo8: +; CHECK: ja +; CHECK-NOT: ja +define void @foo8(i32 %v1, + i8 %v2, i8 %v3, + i16 %v12, i16 %v13, + i32 %v22, i32 %v23, + float %v32, float %v33, + double %v42, double %v43, + <4 x float> %v52, <4 x float> %v53, + <2 x double> %v62, <2 x double> %v63, + <8 x float> %v72, <8 x float> %v73, + <4 x double> %v82, <4 x double> %v83, + <16 x float> %v92, <16 x float> %v93, + <8 x double> %v102, <8 x double> %v103, + i8 * %dst) nounwind { +entry: + %add.ptr11 = getelementptr inbounds i8, i8* %dst, i32 2 + %a11 = bitcast i8* %add.ptr11 to i16* + + %add.ptr21 = getelementptr inbounds i8, i8* %dst, i32 4 + %a21 = bitcast i8* %add.ptr21 to i32* + + %add.ptr31 = getelementptr inbounds i8, i8* %dst, i32 8 + %a31 = bitcast i8* %add.ptr31 to float* + + %add.ptr41 = getelementptr inbounds i8, i8* %dst, i32 16 + %a41 = bitcast i8* %add.ptr41 to double* + + %add.ptr51 = getelementptr inbounds i8, i8* %dst, i32 32 + %a51 = bitcast i8* %add.ptr51 to <4 x float>* + + %add.ptr61 = getelementptr inbounds i8, i8* %dst, i32 48 + %a61 = bitcast i8* %add.ptr61 to <2 x double>* + + %add.ptr71 = getelementptr inbounds i8, i8* %dst, i32 64 + %a71 = bitcast i8* %add.ptr71 to <8 x float>* + + %add.ptr81 = getelementptr inbounds i8, i8* %dst, i32 128 + %a81 = bitcast i8* %add.ptr81 to <4 x double>* + + %add.ptr91 = getelementptr inbounds i8, i8* %dst, i32 64 + %a91 = bitcast i8* %add.ptr91 to <16 x float>* + + %add.ptr101 = getelementptr inbounds i8, i8* %dst, i32 128 + %a101 = bitcast i8* %add.ptr101 to <8 x double>* + + ; These operations are necessary, because select of two single use loads + ; ends up getting optimized into a select of two leas, followed by a + ; single load of the selected address. + %t13 = xor i16 %v13, 11 + %t23 = xor i32 %v23, 1234 + %t33 = fadd float %v33, %v32 + %t43 = fadd double %v43, %v42 + %t53 = fadd <4 x float> %v53, %v52 + %t63 = fadd <2 x double> %v63, %v62 + %t73 = fsub <8 x float> %v73, %v72 + %t83 = fsub <4 x double> %v83, %v82 + %t93 = fsub <16 x float> %v93, %v92 + %t103 = fsub <8 x double> %v103, %v102 + + %cmp = icmp ugt i32 %v1, 31 + %t11 = select i1 %cmp, i16 %v12, i16 %t13 + %t21 = select i1 %cmp, i32 %v22, i32 %t23 + %t31 = select i1 %cmp, float %v32, float %t33 + %t41 = select i1 %cmp, double %v42, double %t43 + %t51 = select i1 %cmp, <4 x float> %v52, <4 x float> %t53 + %t61 = select i1 %cmp, <2 x double> %v62, <2 x double> %t63 + %t71 = select i1 %cmp, <8 x float> %v72, <8 x float> %t73 + %t81 = select i1 %cmp, <4 x double> %v82, <4 x double> %t83 + %t91 = select i1 %cmp, <16 x float> %v92, <16 x float> %t93 + %t101 = select i1 %cmp, <8 x double> %v102, <8 x double> %t103 + + store i16 %t11, i16* %a11, align 2 + store i32 %t21, i32* %a21, align 4 + store float %t31, float* %a31, align 4 + store double %t41, double* %a41, align 8 + store <4 x float> %t51, <4 x float>* %a51, align 16 + store <2 x double> %t61, <2 x double>* %a61, align 16 + store <8 x float> %t71, <8 x float>* %a71, align 32 + store <4 x double> %t81, <4 x double>* %a81, align 32 + store <16 x float> %t91, <16 x float>* %a91, align 32 + store <8 x double> %t101, <8 x double>* %a101, align 32 + + ret void +} + +; This test checks that only a single ja gets generated in the final code +; for lowering the CMOV pseudos that get created for this IR. +; on the same condition. +; Contrary to my expectations, this doesn't exercise the code for +; CMOV_V8I1, CMOV_V16I1, CMOV_V32I1, or CMOV_V64I1. Instead the selects all +; get lowered into vector length number of selects, which all eventually turn +; into a huge number of CMOV_GR8, which are all contiguous, so the optimization +; kicks in as long as CMOV_GR8 is supported. I couldn't find a way to get +; CMOV_V*I1 pseudo-opcodes to get generated. If a way exists to get CMOV_V*1 +; pseudo-opcodes to be generated, this test should be replaced with one that +; tests those opcodes. +; +; CHECK-LABEL: foo9: +; CHECK: ja +; CHECK-NOT: ja +define void @foo9(i32 %v1, + <8 x i1> %v12, <8 x i1> %v13, + <16 x i1> %v22, <16 x i1> %v23, + <32 x i1> %v32, <32 x i1> %v33, + <64 x i1> %v42, <64 x i1> %v43, + i8 * %dst) nounwind { +entry: + %add.ptr11 = getelementptr inbounds i8, i8* %dst, i32 0 + %a11 = bitcast i8* %add.ptr11 to <8 x i1>* + + %add.ptr21 = getelementptr inbounds i8, i8* %dst, i32 4 + %a21 = bitcast i8* %add.ptr21 to <16 x i1>* + + %add.ptr31 = getelementptr inbounds i8, i8* %dst, i32 8 + %a31 = bitcast i8* %add.ptr31 to <32 x i1>* + + %add.ptr41 = getelementptr inbounds i8, i8* %dst, i32 16 + %a41 = bitcast i8* %add.ptr41 to <64 x i1>* + + ; These operations are necessary, because select of two single use loads + ; ends up getting optimized into a select of two leas, followed by a + ; single load of the selected address. + %t13 = xor <8 x i1> %v13, %v12 + %t23 = xor <16 x i1> %v23, %v22 + %t33 = xor <32 x i1> %v33, %v32 + %t43 = xor <64 x i1> %v43, %v42 + + %cmp = icmp ugt i32 %v1, 31 + %t11 = select i1 %cmp, <8 x i1> %v12, <8 x i1> %t13 + %t21 = select i1 %cmp, <16 x i1> %v22, <16 x i1> %t23 + %t31 = select i1 %cmp, <32 x i1> %v32, <32 x i1> %t33 + %t41 = select i1 %cmp, <64 x i1> %v42, <64 x i1> %t43 + + store <8 x i1> %t11, <8 x i1>* %a11, align 16 + store <16 x i1> %t21, <16 x i1>* %a21, align 4 + store <32 x i1> %t31, <32 x i1>* %a31, align 8 + store <64 x i1> %t41, <64 x i1>* %a41, align 16 + + ret void +}