CbC/CbC_llvm: llvm/test/CodeGen/AMDGPU/wave32.ll annotate

annotate llvm/test/CodeGen/AMDGPU/wave32.ll @ 206:f17a3b42b08b

Added tag before-12 for changeset b7591485f4cd

author	Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date	Mon, 07 Jun 2021 21:25:57 +0900
parents	0572611fdcc8
children	2e18cbf3894f

rev	line source
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	1 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %s \| FileCheck -check-prefixes=GCN,GFX1032 %s
1d019706d866 LLVM10 anatofuz parents: diff changeset	2 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s \| FileCheck -check-prefixes=GCN,GFX1064 %s
1d019706d866 LLVM10 anatofuz parents: diff changeset	3 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -amdgpu-early-ifcvt=1 -verify-machineinstrs < %s \| FileCheck -check-prefixes=GCN,GFX1032 %s
1d019706d866 LLVM10 anatofuz parents: diff changeset	4 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -amdgpu-early-ifcvt=1 -verify-machineinstrs < %s \| FileCheck -check-prefixes=GCN,GFX1064 %s
1d019706d866 LLVM10 anatofuz parents: diff changeset	5 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s \| FileCheck -check-prefixes=GCN,GFX1032,GFX10DEFWAVE %s
1d019706d866 LLVM10 anatofuz parents: diff changeset	6
1d019706d866 LLVM10 anatofuz parents: diff changeset	7 ; GCN-LABEL: {{^}}test_vopc_i32:
1d019706d866 LLVM10 anatofuz parents: diff changeset	8 ; GFX1032: v_cmp_lt_i32_e32 vcc_lo, 0, v{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	9 ; GFX1032: v_cndmask_b32_e64 v{{[0-9]+}}, 2, 1, vcc_lo
1d019706d866 LLVM10 anatofuz parents: diff changeset	10 ; GFX1064: v_cmp_lt_i32_e32 vcc, 0, v{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	11 ; GFX1064: v_cndmask_b32_e64 v{{[0-9]+}}, 2, 1, vcc{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	12 define amdgpu_kernel void @test_vopc_i32(i32 addrspace(1)* %arg) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	13 %lid = tail call i32 @llvm.amdgcn.workitem.id.x()
1d019706d866 LLVM10 anatofuz parents: diff changeset	14 %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %lid
1d019706d866 LLVM10 anatofuz parents: diff changeset	15 %load = load i32, i32 addrspace(1)* %gep, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	16 %cmp = icmp sgt i32 %load, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	17 %sel = select i1 %cmp, i32 1, i32 2
1d019706d866 LLVM10 anatofuz parents: diff changeset	18 store i32 %sel, i32 addrspace(1)* %gep, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	19 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	20 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	21
1d019706d866 LLVM10 anatofuz parents: diff changeset	22 ; GCN-LABEL: {{^}}test_vopc_f32:
1d019706d866 LLVM10 anatofuz parents: diff changeset	23 ; GFX1032: v_cmp_nge_f32_e32 vcc_lo, 0, v{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	24 ; GFX1032: v_cndmask_b32_e64 v{{[0-9]+}}, 2.0, 1.0, vcc_lo
1d019706d866 LLVM10 anatofuz parents: diff changeset	25 ; GFX1064: v_cmp_nge_f32_e32 vcc, 0, v{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	26 ; GFX1064: v_cndmask_b32_e64 v{{[0-9]+}}, 2.0, 1.0, vcc{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	27 define amdgpu_kernel void @test_vopc_f32(float addrspace(1)* %arg) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	28 %lid = tail call i32 @llvm.amdgcn.workitem.id.x()
1d019706d866 LLVM10 anatofuz parents: diff changeset	29 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %lid
1d019706d866 LLVM10 anatofuz parents: diff changeset	30 %load = load float, float addrspace(1)* %gep, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	31 %cmp = fcmp ugt float %load, 0.0
1d019706d866 LLVM10 anatofuz parents: diff changeset	32 %sel = select i1 %cmp, float 1.0, float 2.0
1d019706d866 LLVM10 anatofuz parents: diff changeset	33 store float %sel, float addrspace(1)* %gep, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	34 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	35 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	36
1d019706d866 LLVM10 anatofuz parents: diff changeset	37 ; GCN-LABEL: {{^}}test_vopc_vcmpx:
1d019706d866 LLVM10 anatofuz parents: diff changeset	38 ; GFX1032: v_cmpx_le_f32_e32 0, v{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	39 ; GFX1064: v_cmpx_le_f32_e32 0, v{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	40 define amdgpu_ps void @test_vopc_vcmpx(float %x) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	41 %cmp = fcmp oge float %x, 0.0
1d019706d866 LLVM10 anatofuz parents: diff changeset	42 call void @llvm.amdgcn.kill(i1 %cmp)
1d019706d866 LLVM10 anatofuz parents: diff changeset	43 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	44 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	45
1d019706d866 LLVM10 anatofuz parents: diff changeset	46 ; GCN-LABEL: {{^}}test_vopc_2xf16:
1d019706d866 LLVM10 anatofuz parents: diff changeset	47 ; GFX1032: v_cmp_le_f16_sdwa [[SC:s[0-9]+]], {{[vs][0-9]+}}, v{{[0-9]+}} src0_sel:WORD_1 src1_sel:DWORD
1d019706d866 LLVM10 anatofuz parents: diff changeset	48 ; GFX1032: v_cndmask_b32_e64 v{{[0-9]+}}, 0x3c003c00, v{{[0-9]+}}, [[SC]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	49 ; GFX1064: v_cmp_le_f16_sdwa [[SC:s\[[0-9:]+\]]], {{[vs][0-9]+}}, v{{[0-9]+}} src0_sel:WORD_1 src1_sel:DWORD
1d019706d866 LLVM10 anatofuz parents: diff changeset	50 ; GFX1064: v_cndmask_b32_e64 v{{[0-9]+}}, 0x3c003c00, v{{[0-9]+}}, [[SC]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	51 define amdgpu_kernel void @test_vopc_2xf16(<2 x half> addrspace(1)* %arg) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	52 %lid = tail call i32 @llvm.amdgcn.workitem.id.x()
1d019706d866 LLVM10 anatofuz parents: diff changeset	53 %gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i32 %lid
1d019706d866 LLVM10 anatofuz parents: diff changeset	54 %load = load <2 x half>, <2 x half> addrspace(1)* %gep, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	55 %elt = extractelement <2 x half> %load, i32 1
1d019706d866 LLVM10 anatofuz parents: diff changeset	56 %cmp = fcmp ugt half %elt, 0.0
1d019706d866 LLVM10 anatofuz parents: diff changeset	57 %sel = select i1 %cmp, <2 x half> <half 1.0, half 1.0>, <2 x half> %load
1d019706d866 LLVM10 anatofuz parents: diff changeset	58 store <2 x half> %sel, <2 x half> addrspace(1)* %gep, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	59 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	60 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	61
1d019706d866 LLVM10 anatofuz parents: diff changeset	62 ; GCN-LABEL: {{^}}test_vopc_class:
1d019706d866 LLVM10 anatofuz parents: diff changeset	63 ; GFX1032: v_cmp_class_f32_e64 [[C:vcc_lo\|s[0-9:]+]], s{{[0-9]+}}, 0x204
1d019706d866 LLVM10 anatofuz parents: diff changeset	64 ; GFX1032: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, [[C]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	65 ; GFX1064: v_cmp_class_f32_e64 [[C:vcc\|s\[[0-9:]+\]]], s{{[0-9]+}}, 0x204
1d019706d866 LLVM10 anatofuz parents: diff changeset	66 ; GFX1064: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, [[C]]{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	67 define amdgpu_kernel void @test_vopc_class(i32 addrspace(1)* %out, float %x) #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	68 %fabs = tail call float @llvm.fabs.f32(float %x)
1d019706d866 LLVM10 anatofuz parents: diff changeset	69 %cmp = fcmp oeq float %fabs, 0x7FF0000000000000
1d019706d866 LLVM10 anatofuz parents: diff changeset	70 %ext = zext i1 %cmp to i32
1d019706d866 LLVM10 anatofuz parents: diff changeset	71 store i32 %ext, i32 addrspace(1)* %out, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	72 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	73 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	74
1d019706d866 LLVM10 anatofuz parents: diff changeset	75 ; GCN-LABEL: {{^}}test_vcmp_vcnd_f16:
1d019706d866 LLVM10 anatofuz parents: diff changeset	76 ; GFX1032: v_cmp_neq_f16_e64 [[C:vcc_lo\|s\[[0-9:]+\]]], 0x7c00, s{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	77 ; GFX1032: v_cndmask_b32_e32 v{{[0-9]+}}, 0x3c00, v{{[0-9]+}}, [[C]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	78
1d019706d866 LLVM10 anatofuz parents: diff changeset	79 ; GFX1064: v_cmp_neq_f16_e64 [[C:vcc\|s\[[0-9:]+\]]], 0x7c00, s{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	80 ; GFX1064: v_cndmask_b32_e32 v{{[0-9]+}}, 0x3c00, v{{[0-9]+}}, [[C]]{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	81 define amdgpu_kernel void @test_vcmp_vcnd_f16(half addrspace(1)* %out, half %x) #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	82 %cmp = fcmp oeq half %x, 0x7FF0000000000000
1d019706d866 LLVM10 anatofuz parents: diff changeset	83 %sel = select i1 %cmp, half 1.0, half %x
1d019706d866 LLVM10 anatofuz parents: diff changeset	84 store half %sel, half addrspace(1)* %out, align 2
1d019706d866 LLVM10 anatofuz parents: diff changeset	85 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	86 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	87
1d019706d866 LLVM10 anatofuz parents: diff changeset	88 ; GCN-LABEL: {{^}}test_vop3_cmp_f32_sop_and:
1d019706d866 LLVM10 anatofuz parents: diff changeset	89 ; GFX1032: v_cmp_nge_f32_e32 vcc_lo, 0, v{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	90 ; GFX1032: v_cmp_nle_f32_e64 [[C2:s[0-9]+]], 1.0, v{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	91 ; GFX1032: s_and_b32 [[AND:s[0-9]+]], vcc_lo, [[C2]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	92 ; GFX1032: v_cndmask_b32_e64 v{{[0-9]+}}, 2.0, 1.0, [[AND]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	93 ; GFX1064: v_cmp_nge_f32_e32 vcc, 0, v{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	94 ; GFX1064: v_cmp_nle_f32_e64 [[C2:s\[[0-9:]+\]]], 1.0, v{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	95 ; GFX1064: s_and_b64 [[AND:s\[[0-9:]+\]]], vcc, [[C2]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	96 ; GFX1064: v_cndmask_b32_e64 v{{[0-9]+}}, 2.0, 1.0, [[AND]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	97 define amdgpu_kernel void @test_vop3_cmp_f32_sop_and(float addrspace(1)* %arg) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	98 %lid = tail call i32 @llvm.amdgcn.workitem.id.x()
1d019706d866 LLVM10 anatofuz parents: diff changeset	99 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %lid
1d019706d866 LLVM10 anatofuz parents: diff changeset	100 %load = load float, float addrspace(1)* %gep, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	101 %cmp = fcmp ugt float %load, 0.0
1d019706d866 LLVM10 anatofuz parents: diff changeset	102 %cmp2 = fcmp ult float %load, 1.0
1d019706d866 LLVM10 anatofuz parents: diff changeset	103 %and = and i1 %cmp, %cmp2
1d019706d866 LLVM10 anatofuz parents: diff changeset	104 %sel = select i1 %and, float 1.0, float 2.0
1d019706d866 LLVM10 anatofuz parents: diff changeset	105 store float %sel, float addrspace(1)* %gep, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	106 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	107 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	108
1d019706d866 LLVM10 anatofuz parents: diff changeset	109 ; GCN-LABEL: {{^}}test_vop3_cmp_i32_sop_xor:
1d019706d866 LLVM10 anatofuz parents: diff changeset	110 ; GFX1032: v_cmp_lt_i32_e32 vcc_lo, 0, v{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	111 ; GFX1032: v_cmp_gt_i32_e64 [[C2:s[0-9]+]], 1, v{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	112 ; GFX1032: s_xor_b32 [[AND:s[0-9]+]], vcc_lo, [[C2]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	113 ; GFX1032: v_cndmask_b32_e64 v{{[0-9]+}}, 2, 1, [[AND]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	114 ; GFX1064: v_cmp_lt_i32_e32 vcc, 0, v{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	115 ; GFX1064: v_cmp_gt_i32_e64 [[C2:s\[[0-9:]+\]]], 1, v{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	116 ; GFX1064: s_xor_b64 [[AND:s\[[0-9:]+\]]], vcc, [[C2]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	117 ; GFX1064: v_cndmask_b32_e64 v{{[0-9]+}}, 2, 1, [[AND]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	118 define amdgpu_kernel void @test_vop3_cmp_i32_sop_xor(i32 addrspace(1)* %arg) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	119 %lid = tail call i32 @llvm.amdgcn.workitem.id.x()
1d019706d866 LLVM10 anatofuz parents: diff changeset	120 %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %lid
1d019706d866 LLVM10 anatofuz parents: diff changeset	121 %load = load i32, i32 addrspace(1)* %gep, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	122 %cmp = icmp sgt i32 %load, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	123 %cmp2 = icmp slt i32 %load, 1
1d019706d866 LLVM10 anatofuz parents: diff changeset	124 %xor = xor i1 %cmp, %cmp2
1d019706d866 LLVM10 anatofuz parents: diff changeset	125 %sel = select i1 %xor, i32 1, i32 2
1d019706d866 LLVM10 anatofuz parents: diff changeset	126 store i32 %sel, i32 addrspace(1)* %gep, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	127 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	128 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	129
1d019706d866 LLVM10 anatofuz parents: diff changeset	130 ; GCN-LABEL: {{^}}test_vop3_cmp_u32_sop_or:
1d019706d866 LLVM10 anatofuz parents: diff changeset	131 ; GFX1032: v_cmp_lt_u32_e32 vcc_lo, 3, v{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	132 ; GFX1032: v_cmp_gt_u32_e64 [[C2:s[0-9]+]], 2, v{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	133 ; GFX1032: s_or_b32 [[AND:s[0-9]+]], vcc_lo, [[C2]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	134 ; GFX1032: v_cndmask_b32_e64 v{{[0-9]+}}, 2, 1, [[AND]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	135 ; GFX1064: v_cmp_lt_u32_e32 vcc, 3, v{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	136 ; GFX1064: v_cmp_gt_u32_e64 [[C2:s\[[0-9:]+\]]], 2, v{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	137 ; GFX1064: s_or_b64 [[AND:s\[[0-9:]+\]]], vcc, [[C2]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	138 ; GFX1064: v_cndmask_b32_e64 v{{[0-9]+}}, 2, 1, [[AND]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	139 define amdgpu_kernel void @test_vop3_cmp_u32_sop_or(i32 addrspace(1)* %arg) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	140 %lid = tail call i32 @llvm.amdgcn.workitem.id.x()
1d019706d866 LLVM10 anatofuz parents: diff changeset	141 %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %lid
1d019706d866 LLVM10 anatofuz parents: diff changeset	142 %load = load i32, i32 addrspace(1)* %gep, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	143 %cmp = icmp ugt i32 %load, 3
1d019706d866 LLVM10 anatofuz parents: diff changeset	144 %cmp2 = icmp ult i32 %load, 2
1d019706d866 LLVM10 anatofuz parents: diff changeset	145 %or = or i1 %cmp, %cmp2
1d019706d866 LLVM10 anatofuz parents: diff changeset	146 %sel = select i1 %or, i32 1, i32 2
1d019706d866 LLVM10 anatofuz parents: diff changeset	147 store i32 %sel, i32 addrspace(1)* %gep, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	148 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	149 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	150
1d019706d866 LLVM10 anatofuz parents: diff changeset	151 ; GCN-LABEL: {{^}}test_mask_if:
1d019706d866 LLVM10 anatofuz parents: diff changeset	152 ; GFX1032: s_and_saveexec_b32 s{{[0-9]+}}, vcc_lo
1d019706d866 LLVM10 anatofuz parents: diff changeset	153 ; GFX1064: s_and_saveexec_b64 s[{{[0-9:]+}}], vcc{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	154 ; GCN: s_cbranch_execz
1d019706d866 LLVM10 anatofuz parents: diff changeset	155 define amdgpu_kernel void @test_mask_if(i32 addrspace(1)* %arg) #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	156 %lid = tail call i32 @llvm.amdgcn.workitem.id.x()
1d019706d866 LLVM10 anatofuz parents: diff changeset	157 %cmp = icmp ugt i32 %lid, 10
1d019706d866 LLVM10 anatofuz parents: diff changeset	158 br i1 %cmp, label %if, label %endif
1d019706d866 LLVM10 anatofuz parents: diff changeset	159
1d019706d866 LLVM10 anatofuz parents: diff changeset	160 if:
1d019706d866 LLVM10 anatofuz parents: diff changeset	161 store i32 0, i32 addrspace(1)* %arg, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	162 br label %endif
1d019706d866 LLVM10 anatofuz parents: diff changeset	163
1d019706d866 LLVM10 anatofuz parents: diff changeset	164 endif:
1d019706d866 LLVM10 anatofuz parents: diff changeset	165 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	166 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	167
1d019706d866 LLVM10 anatofuz parents: diff changeset	168 ; GCN-LABEL: {{^}}test_loop_with_if:
1d019706d866 LLVM10 anatofuz parents: diff changeset	169 ; GFX1032: s_or_b32 s{{[0-9]+}}, vcc_lo, s{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	170 ; GFX1032: s_andn2_b32 exec_lo, exec_lo, s{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	171 ; GFX1064: s_or_b64 s[{{[0-9:]+}}], vcc, s[{{[0-9:]+}}]
1d019706d866 LLVM10 anatofuz parents: diff changeset	172 ; GFX1064: s_andn2_b64 exec, exec, s[{{[0-9:]+}}]
1d019706d866 LLVM10 anatofuz parents: diff changeset	173 ; GCN: s_cbranch_execz
1d019706d866 LLVM10 anatofuz parents: diff changeset	174 ; GCN: BB{{.*}}:
1d019706d866 LLVM10 anatofuz parents: diff changeset	175 ; GFX1032: s_and_saveexec_b32 s{{[0-9]+}}, vcc_lo
1d019706d866 LLVM10 anatofuz parents: diff changeset	176 ; GFX1064: s_and_saveexec_b64 s[{{[0-9:]+}}], vcc{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	177 ; GCN: s_cbranch_execz
1d019706d866 LLVM10 anatofuz parents: diff changeset	178 ; GCN: ; %bb.{{[0-9]+}}:
1d019706d866 LLVM10 anatofuz parents: diff changeset	179 ; GCN: BB{{.*}}:
1d019706d866 LLVM10 anatofuz parents: diff changeset	180 ; GFX1032: s_xor_b32 s{{[0-9]+}}, exec_lo, s{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	181 ; GFX1064: s_xor_b64 s[{{[0-9:]+}}], exec, s[{{[0-9:]+}}]
1d019706d866 LLVM10 anatofuz parents: diff changeset	182 ; GCN: ; %bb.{{[0-9]+}}:
1d019706d866 LLVM10 anatofuz parents: diff changeset	183 ; GCN: ; %bb.{{[0-9]+}}:
1d019706d866 LLVM10 anatofuz parents: diff changeset	184 ; GFX1032: s_or_b32 exec_lo, exec_lo, s{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	185 ; GFX1032: s_and_saveexec_b32 s{{[0-9]+}}, s{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	186 ; GFX1064: s_or_b64 exec, exec, s[{{[0-9:]+}}]
1d019706d866 LLVM10 anatofuz parents: diff changeset	187 ; GFX1064: s_and_saveexec_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	188 ; GCN: s_cbranch_execz BB
1d019706d866 LLVM10 anatofuz parents: diff changeset	189 ; GCN: ; %bb.{{[0-9]+}}:
1d019706d866 LLVM10 anatofuz parents: diff changeset	190 ; GCN: BB{{.*}}:
1d019706d866 LLVM10 anatofuz parents: diff changeset	191 ; GCN: s_endpgm
1d019706d866 LLVM10 anatofuz parents: diff changeset	192 define amdgpu_kernel void @test_loop_with_if(i32 addrspace(1)* %arg) #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	193 bb:
1d019706d866 LLVM10 anatofuz parents: diff changeset	194 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
1d019706d866 LLVM10 anatofuz parents: diff changeset	195 br label %bb2
1d019706d866 LLVM10 anatofuz parents: diff changeset	196
1d019706d866 LLVM10 anatofuz parents: diff changeset	197 bb1:
1d019706d866 LLVM10 anatofuz parents: diff changeset	198 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	199
1d019706d866 LLVM10 anatofuz parents: diff changeset	200 bb2:
1d019706d866 LLVM10 anatofuz parents: diff changeset	201 %tmp3 = phi i32 [ 0, %bb ], [ %tmp15, %bb13 ]
1d019706d866 LLVM10 anatofuz parents: diff changeset	202 %tmp4 = icmp slt i32 %tmp3, %tmp
1d019706d866 LLVM10 anatofuz parents: diff changeset	203 br i1 %tmp4, label %bb5, label %bb11
1d019706d866 LLVM10 anatofuz parents: diff changeset	204
1d019706d866 LLVM10 anatofuz parents: diff changeset	205 bb5:
1d019706d866 LLVM10 anatofuz parents: diff changeset	206 %tmp6 = sext i32 %tmp3 to i64
1d019706d866 LLVM10 anatofuz parents: diff changeset	207 %tmp7 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp6
1d019706d866 LLVM10 anatofuz parents: diff changeset	208 %tmp8 = load i32, i32 addrspace(1)* %tmp7, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	209 %tmp9 = icmp sgt i32 %tmp8, 10
1d019706d866 LLVM10 anatofuz parents: diff changeset	210 br i1 %tmp9, label %bb10, label %bb11
1d019706d866 LLVM10 anatofuz parents: diff changeset	211
1d019706d866 LLVM10 anatofuz parents: diff changeset	212 bb10:
1d019706d866 LLVM10 anatofuz parents: diff changeset	213 store i32 %tmp, i32 addrspace(1)* %tmp7, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	214 br label %bb13
1d019706d866 LLVM10 anatofuz parents: diff changeset	215
1d019706d866 LLVM10 anatofuz parents: diff changeset	216 bb11:
1d019706d866 LLVM10 anatofuz parents: diff changeset	217 %tmp12 = sdiv i32 %tmp3, 2
1d019706d866 LLVM10 anatofuz parents: diff changeset	218 br label %bb13
1d019706d866 LLVM10 anatofuz parents: diff changeset	219
1d019706d866 LLVM10 anatofuz parents: diff changeset	220 bb13:
1d019706d866 LLVM10 anatofuz parents: diff changeset	221 %tmp14 = phi i32 [ %tmp3, %bb10 ], [ %tmp12, %bb11 ]
1d019706d866 LLVM10 anatofuz parents: diff changeset	222 %tmp15 = add nsw i32 %tmp14, 1
1d019706d866 LLVM10 anatofuz parents: diff changeset	223 %tmp16 = icmp slt i32 %tmp14, 255
1d019706d866 LLVM10 anatofuz parents: diff changeset	224 br i1 %tmp16, label %bb2, label %bb1
1d019706d866 LLVM10 anatofuz parents: diff changeset	225 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	226
1d019706d866 LLVM10 anatofuz parents: diff changeset	227 ; GCN-LABEL: {{^}}test_loop_with_if_else_break:
1d019706d866 LLVM10 anatofuz parents: diff changeset	228 ; GFX1032: s_and_saveexec_b32 s{{[0-9]+}}, vcc_lo
1d019706d866 LLVM10 anatofuz parents: diff changeset	229 ; GFX1064: s_and_saveexec_b64 s[{{[0-9:]+}}], vcc{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	230 ; GCN: s_cbranch_execz
1d019706d866 LLVM10 anatofuz parents: diff changeset	231 ; GCN: ; %bb.{{[0-9]+}}: ; %.preheader
1d019706d866 LLVM10 anatofuz parents: diff changeset	232 ; GCN: BB{{.*}}:
1d019706d866 LLVM10 anatofuz parents: diff changeset	233
1d019706d866 LLVM10 anatofuz parents: diff changeset	234 ; GFX1032: s_or_b32 [[MASK0:s[0-9]+]], [[MASK0]], vcc_lo
1d019706d866 LLVM10 anatofuz parents: diff changeset	235 ; GFX1064: s_or_b64 [[MASK0:s\[[0-9:]+\]]], [[MASK0]], vcc
1d019706d866 LLVM10 anatofuz parents: diff changeset	236 ; GFX1032: s_andn2_b32 [[MASK1:s[0-9]+]], [[MASK1]], exec_lo
1d019706d866 LLVM10 anatofuz parents: diff changeset	237 ; GFX1064: s_andn2_b64 [[MASK1:s\[[0-9:]+\]]], [[MASK1]], exec
1d019706d866 LLVM10 anatofuz parents: diff changeset	238 ; GCN: global_store_dword
1d019706d866 LLVM10 anatofuz parents: diff changeset	239 ; GFX1032: s_and_b32 [[MASK0]], [[MASK0]], exec_lo
1d019706d866 LLVM10 anatofuz parents: diff changeset	240 ; GFX1064: s_and_b64 [[MASK0]], [[MASK0]], exec
1d019706d866 LLVM10 anatofuz parents: diff changeset	241 ; GFX1032: s_or_b32 [[MASK1]], [[MASK1]], [[MASK0]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	242 ; GFX1064: s_or_b64 [[MASK1]], [[MASK1]], [[MASK0]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	243 ; GCN: BB{{.*}}: ; %Flow
1d019706d866 LLVM10 anatofuz parents: diff changeset	244 ; GFX1032: s_and_b32 [[TMP0:s[0-9]+]], exec_lo, [[MASK1]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	245 ; GFX1064: s_and_b64 [[TMP0:s\[[0-9:]+\]]], exec, [[MASK1]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	246 ; GFX1032: s_or_b32 [[ACC:s[0-9]+]], [[TMP0]], [[ACC]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	247 ; GFX1064: s_or_b64 [[ACC:s\[[0-9:]+\]]], [[TMP0]], [[ACC]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	248 ; GFX1032: s_andn2_b32 exec_lo, exec_lo, [[ACC]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	249 ; GFX1064: s_andn2_b64 exec, exec, [[ACC]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	250 ; GCN: s_cbranch_execz
1d019706d866 LLVM10 anatofuz parents: diff changeset	251 ; GCN: BB{{.*}}:
1d019706d866 LLVM10 anatofuz parents: diff changeset	252 ; GCN: s_load_dword [[LOAD:s[0-9]+]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	253 ; GFX1032: s_or_b32 [[MASK1]], [[MASK1]], exec_lo
1d019706d866 LLVM10 anatofuz parents: diff changeset	254 ; GFX1064: s_or_b64 [[MASK1]], [[MASK1]], exec
1d019706d866 LLVM10 anatofuz parents: diff changeset	255 ; GCN: s_cmp_lt_i32 [[LOAD]], 11
1d019706d866 LLVM10 anatofuz parents: diff changeset	256 define amdgpu_kernel void @test_loop_with_if_else_break(i32 addrspace(1)* %arg) #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	257 bb:
1d019706d866 LLVM10 anatofuz parents: diff changeset	258 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
1d019706d866 LLVM10 anatofuz parents: diff changeset	259 %tmp1 = icmp eq i32 %tmp, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	260 br i1 %tmp1, label %.loopexit, label %.preheader
1d019706d866 LLVM10 anatofuz parents: diff changeset	261
1d019706d866 LLVM10 anatofuz parents: diff changeset	262 .preheader:
1d019706d866 LLVM10 anatofuz parents: diff changeset	263 br label %bb2
1d019706d866 LLVM10 anatofuz parents: diff changeset	264
1d019706d866 LLVM10 anatofuz parents: diff changeset	265 bb2:
1d019706d866 LLVM10 anatofuz parents: diff changeset	266 %tmp3 = phi i32 [ %tmp9, %bb8 ], [ 0, %.preheader ]
1d019706d866 LLVM10 anatofuz parents: diff changeset	267 %tmp4 = zext i32 %tmp3 to i64
1d019706d866 LLVM10 anatofuz parents: diff changeset	268 %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp4
1d019706d866 LLVM10 anatofuz parents: diff changeset	269 %tmp6 = load i32, i32 addrspace(1)* %tmp5, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	270 %tmp7 = icmp sgt i32 %tmp6, 10
1d019706d866 LLVM10 anatofuz parents: diff changeset	271 br i1 %tmp7, label %bb8, label %.loopexit
1d019706d866 LLVM10 anatofuz parents: diff changeset	272
1d019706d866 LLVM10 anatofuz parents: diff changeset	273 bb8:
1d019706d866 LLVM10 anatofuz parents: diff changeset	274 store i32 %tmp, i32 addrspace(1)* %tmp5, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	275 %tmp9 = add nuw nsw i32 %tmp3, 1
1d019706d866 LLVM10 anatofuz parents: diff changeset	276 %tmp10 = icmp ult i32 %tmp9, 256
1d019706d866 LLVM10 anatofuz parents: diff changeset	277 %tmp11 = icmp ult i32 %tmp9, %tmp
1d019706d866 LLVM10 anatofuz parents: diff changeset	278 %tmp12 = and i1 %tmp10, %tmp11
1d019706d866 LLVM10 anatofuz parents: diff changeset	279 br i1 %tmp12, label %bb2, label %.loopexit
1d019706d866 LLVM10 anatofuz parents: diff changeset	280
1d019706d866 LLVM10 anatofuz parents: diff changeset	281 .loopexit:
1d019706d866 LLVM10 anatofuz parents: diff changeset	282 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	283 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	284
1d019706d866 LLVM10 anatofuz parents: diff changeset	285 ; GCN-LABEL: {{^}}test_addc_vop2b:
1d019706d866 LLVM10 anatofuz parents: diff changeset	286 ; GFX1032: v_add_co_u32_e64 v{{[0-9]+}}, vcc_lo, v{{[0-9]+}}, s{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	287 ; GFX1032: v_add_co_ci_u32_e32 v{{[0-9]+}}, vcc_lo, s{{[0-9]+}}, v{{[0-9]+}}, vcc_lo
1d019706d866 LLVM10 anatofuz parents: diff changeset	288 ; GFX1064: v_add_co_u32_e64 v{{[0-9]+}}, vcc, v{{[0-9]+}}, s{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	289 ; GFX1064: v_add_co_ci_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}, vcc{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	290 define amdgpu_kernel void @test_addc_vop2b(i64 addrspace(1)* %arg, i64 %arg1) #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	291 bb:
1d019706d866 LLVM10 anatofuz parents: diff changeset	292 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
1d019706d866 LLVM10 anatofuz parents: diff changeset	293 %tmp3 = getelementptr inbounds i64, i64 addrspace(1)* %arg, i32 %tmp
1d019706d866 LLVM10 anatofuz parents: diff changeset	294 %tmp4 = load i64, i64 addrspace(1)* %tmp3, align 8
1d019706d866 LLVM10 anatofuz parents: diff changeset	295 %tmp5 = add nsw i64 %tmp4, %arg1
1d019706d866 LLVM10 anatofuz parents: diff changeset	296 store i64 %tmp5, i64 addrspace(1)* %tmp3, align 8
1d019706d866 LLVM10 anatofuz parents: diff changeset	297 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	298 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	299
1d019706d866 LLVM10 anatofuz parents: diff changeset	300 ; GCN-LABEL: {{^}}test_subbrev_vop2b:
1d019706d866 LLVM10 anatofuz parents: diff changeset	301 ; GFX1032: v_sub_co_u32_e64 v{{[0-9]+}}, [[A0:s[0-9]+\|vcc_lo]], v{{[0-9]+}}, s{{[0-9]+}}{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	302 ; GFX1032: v_subrev_co_ci_u32_e32 v{{[0-9]+}}, vcc_lo, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[A0]]{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	303 ; GFX1064: v_sub_co_u32_e64 v{{[0-9]+}}, [[A0:s\[[0-9:]+\]\|vcc]], v{{[0-9]+}}, s{{[0-9]+}}{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	304 ; GFX1064: v_subrev_co_ci_u32_e32 v{{[0-9]+}}, vcc, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[A0]]{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	305 define amdgpu_kernel void @test_subbrev_vop2b(i64 addrspace(1)* %arg, i64 %arg1) #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	306 bb:
1d019706d866 LLVM10 anatofuz parents: diff changeset	307 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
1d019706d866 LLVM10 anatofuz parents: diff changeset	308 %tmp3 = getelementptr inbounds i64, i64 addrspace(1)* %arg, i32 %tmp
1d019706d866 LLVM10 anatofuz parents: diff changeset	309 %tmp4 = load i64, i64 addrspace(1)* %tmp3, align 8
1d019706d866 LLVM10 anatofuz parents: diff changeset	310 %tmp5 = sub nsw i64 %tmp4, %arg1
1d019706d866 LLVM10 anatofuz parents: diff changeset	311 store i64 %tmp5, i64 addrspace(1)* %tmp3, align 8
1d019706d866 LLVM10 anatofuz parents: diff changeset	312 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	313 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	314
1d019706d866 LLVM10 anatofuz parents: diff changeset	315 ; GCN-LABEL: {{^}}test_subb_vop2b:
1d019706d866 LLVM10 anatofuz parents: diff changeset	316 ; GFX1032: v_sub_co_u32_e64 v{{[0-9]+}}, [[A0:s[0-9]+\|vcc_lo]], s{{[0-9]+}}, v{{[0-9]+}}{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	317 ; GFX1032: v_sub_co_ci_u32_e32 v{{[0-9]+}}, vcc_lo, {{[vs][0-9]+}}, v{{[0-9]+}}, [[A0]]{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	318 ; GFX1064: v_sub_co_u32_e64 v{{[0-9]+}}, [[A0:s\[[0-9:]+\]\|vcc]], s{{[0-9]+}}, v{{[0-9]+}}{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	319 ; GFX1064: v_sub_co_ci_u32_e32 v{{[0-9]+}}, vcc, {{[vs][0-9]+}}, v{{[0-9]+}}, [[A0]]{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	320 define amdgpu_kernel void @test_subb_vop2b(i64 addrspace(1)* %arg, i64 %arg1) #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	321 bb:
1d019706d866 LLVM10 anatofuz parents: diff changeset	322 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
1d019706d866 LLVM10 anatofuz parents: diff changeset	323 %tmp3 = getelementptr inbounds i64, i64 addrspace(1)* %arg, i32 %tmp
1d019706d866 LLVM10 anatofuz parents: diff changeset	324 %tmp4 = load i64, i64 addrspace(1)* %tmp3, align 8
1d019706d866 LLVM10 anatofuz parents: diff changeset	325 %tmp5 = sub nsw i64 %arg1, %tmp4
1d019706d866 LLVM10 anatofuz parents: diff changeset	326 store i64 %tmp5, i64 addrspace(1)* %tmp3, align 8
1d019706d866 LLVM10 anatofuz parents: diff changeset	327 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	328 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	329
1d019706d866 LLVM10 anatofuz parents: diff changeset	330 ; GCN-LABEL: {{^}}test_udiv64:
1d019706d866 LLVM10 anatofuz parents: diff changeset	331 ; GFX1032: v_add_co_u32_e64 v{{[0-9]+}}, [[SDST:s[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	332 ; GFX1032: v_add_co_ci_u32_e32 v{{[0-9]+}}, vcc_lo, 0, v{{[0-9]+}}, vcc_lo
1d019706d866 LLVM10 anatofuz parents: diff changeset	333 ; GFX1032: v_add_co_ci_u32_e64 v{{[0-9]+}}, vcc_lo, v{{[0-9]+}}, v{{[0-9]+}}, [[SDST]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	334 ; GFX1032: v_add_co_u32_e64 v{{[0-9]+}}, vcc_lo, v{{[0-9]+}}, v{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	335 ; GFX1032: v_add_co_u32_e64 v{{[0-9]+}}, vcc_lo, v{{[0-9]+}}, v{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	336 ; GFX1032: v_add_co_u32_e64 v{{[0-9]+}}, vcc_lo, v{{[0-9]+}}, v{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	337 ; GFX1032: v_add_co_ci_u32_e32 v{{[0-9]+}}, vcc_lo, 0, v{{[0-9]+}}, vcc_lo
1d019706d866 LLVM10 anatofuz parents: diff changeset	338 ; GFX1032: v_sub_co_u32_e64 v{{[0-9]+}}, vcc_lo, s{{[0-9]+}}, v{{[0-9]+}}
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	339 ; GFX1032: v_subrev_co_ci_u32_e64 v{{[0-9]+}}, s{{[0-9]+}}, {{[vs][0-9]+}}, v{{[0-9]+}}, vcc_lo
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	340 ; GFX1032: v_sub_co_ci_u32_e32 v{{[0-9]+}}, vcc_lo, {{[vs][0-9]+}}, v{{[0-9]+}}, vcc_lo
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	341 ; GFX1064: v_add_co_u32_e64 v{{[0-9]+}}, [[SDST:s\[[0-9:]+\]]], v{{[0-9]+}}, v{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	342 ; GFX1064: v_add_co_ci_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	343 ; GFX1064: v_add_co_ci_u32_e64 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}}, [[SDST]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	344 ; GFX1064: v_add_co_u32_e64 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	345 ; GFX1064: v_add_co_u32_e64 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	346 ; GFX1064: v_add_co_u32_e64 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	347 ; GFX1064: v_add_co_ci_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	348 ; GFX1064: v_sub_co_u32_e64 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	349 ; GFX1064: v_subrev_co_ci_u32_e64 v{{[0-9]+}}, s[{{[0-9:]+}}], {{[vs][0-9]+}}, v{{[0-9]+}}, vcc{{$}}
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	350 ; GFX1064: v_sub_co_ci_u32_e32 v{{[0-9]+}}, vcc, {{[vs][0-9]+}}, v{{[0-9]+}}, vcc{{$}}
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	351 define amdgpu_kernel void @test_udiv64(i64 addrspace(1)* %arg) #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	352 bb:
1d019706d866 LLVM10 anatofuz parents: diff changeset	353 %tmp = getelementptr inbounds i64, i64 addrspace(1)* %arg, i64 1
1d019706d866 LLVM10 anatofuz parents: diff changeset	354 %tmp1 = load i64, i64 addrspace(1)* %tmp, align 8
1d019706d866 LLVM10 anatofuz parents: diff changeset	355 %tmp2 = load i64, i64 addrspace(1)* %arg, align 8
1d019706d866 LLVM10 anatofuz parents: diff changeset	356 %tmp3 = udiv i64 %tmp1, %tmp2
1d019706d866 LLVM10 anatofuz parents: diff changeset	357 %tmp4 = getelementptr inbounds i64, i64 addrspace(1)* %arg, i64 2
1d019706d866 LLVM10 anatofuz parents: diff changeset	358 store i64 %tmp3, i64 addrspace(1)* %tmp4, align 8
1d019706d866 LLVM10 anatofuz parents: diff changeset	359 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	360 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	361
1d019706d866 LLVM10 anatofuz parents: diff changeset	362 ; GCN-LABEL: {{^}}test_div_scale_f32:
1d019706d866 LLVM10 anatofuz parents: diff changeset	363 ; GFX1032: v_div_scale_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	364 ; GFX1064: v_div_scale_f32 v{{[0-9]+}}, s[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	365 define amdgpu_kernel void @test_div_scale_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	366 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
1d019706d866 LLVM10 anatofuz parents: diff changeset	367 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
1d019706d866 LLVM10 anatofuz parents: diff changeset	368 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
1d019706d866 LLVM10 anatofuz parents: diff changeset	369
1d019706d866 LLVM10 anatofuz parents: diff changeset	370 %a = load volatile float, float addrspace(1)* %gep.0, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	371 %b = load volatile float, float addrspace(1)* %gep.1, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	372
1d019706d866 LLVM10 anatofuz parents: diff changeset	373 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
1d019706d866 LLVM10 anatofuz parents: diff changeset	374 %result0 = extractvalue { float, i1 } %result, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	375 store float %result0, float addrspace(1)* %out, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	376 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	377 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	378
1d019706d866 LLVM10 anatofuz parents: diff changeset	379 ; GCN-LABEL: {{^}}test_div_scale_f64:
1d019706d866 LLVM10 anatofuz parents: diff changeset	380 ; GFX1032: v_div_scale_f64 v[{{[0-9:]+}}], s{{[0-9]+}}, v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]
1d019706d866 LLVM10 anatofuz parents: diff changeset	381 ; GFX1064: v_div_scale_f64 v[{{[0-9:]+}}], s[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]
1d019706d866 LLVM10 anatofuz parents: diff changeset	382 define amdgpu_kernel void @test_div_scale_f64(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %in) #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	383 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
1d019706d866 LLVM10 anatofuz parents: diff changeset	384 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
1d019706d866 LLVM10 anatofuz parents: diff changeset	385 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
1d019706d866 LLVM10 anatofuz parents: diff changeset	386
1d019706d866 LLVM10 anatofuz parents: diff changeset	387 %a = load volatile double, double addrspace(1)* %gep.0, align 8
1d019706d866 LLVM10 anatofuz parents: diff changeset	388 %b = load volatile double, double addrspace(1)* %gep.1, align 8
1d019706d866 LLVM10 anatofuz parents: diff changeset	389
1d019706d866 LLVM10 anatofuz parents: diff changeset	390 %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
1d019706d866 LLVM10 anatofuz parents: diff changeset	391 %result0 = extractvalue { double, i1 } %result, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	392 store double %result0, double addrspace(1)* %out, align 8
1d019706d866 LLVM10 anatofuz parents: diff changeset	393 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	394 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	395
1d019706d866 LLVM10 anatofuz parents: diff changeset	396 ; GCN-LABEL: {{^}}test_mad_i64_i32:
1d019706d866 LLVM10 anatofuz parents: diff changeset	397 ; GFX1032: v_mad_i64_i32 v[{{[0-9:]+}}], s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v[{{[0-9:]+}}]
1d019706d866 LLVM10 anatofuz parents: diff changeset	398 ; GFX1064: v_mad_i64_i32 v[{{[0-9:]+}}], s[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9]+}}, v[{{[0-9:]+}}]
1d019706d866 LLVM10 anatofuz parents: diff changeset	399 define i64 @test_mad_i64_i32(i32 %arg0, i32 %arg1, i64 %arg2) #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	400 %sext0 = sext i32 %arg0 to i64
1d019706d866 LLVM10 anatofuz parents: diff changeset	401 %sext1 = sext i32 %arg1 to i64
1d019706d866 LLVM10 anatofuz parents: diff changeset	402 %mul = mul i64 %sext0, %sext1
1d019706d866 LLVM10 anatofuz parents: diff changeset	403 %mad = add i64 %mul, %arg2
1d019706d866 LLVM10 anatofuz parents: diff changeset	404 ret i64 %mad
1d019706d866 LLVM10 anatofuz parents: diff changeset	405 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	406
1d019706d866 LLVM10 anatofuz parents: diff changeset	407 ; GCN-LABEL: {{^}}test_mad_u64_u32:
1d019706d866 LLVM10 anatofuz parents: diff changeset	408 ; GFX1032: v_mad_u64_u32 v[{{[0-9:]+}}], s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v[{{[0-9:]+}}]
1d019706d866 LLVM10 anatofuz parents: diff changeset	409 ; GFX1064: v_mad_u64_u32 v[{{[0-9:]+}}], s[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9]+}}, v[{{[0-9:]+}}]
1d019706d866 LLVM10 anatofuz parents: diff changeset	410 define i64 @test_mad_u64_u32(i32 %arg0, i32 %arg1, i64 %arg2) #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	411 %sext0 = zext i32 %arg0 to i64
1d019706d866 LLVM10 anatofuz parents: diff changeset	412 %sext1 = zext i32 %arg1 to i64
1d019706d866 LLVM10 anatofuz parents: diff changeset	413 %mul = mul i64 %sext0, %sext1
1d019706d866 LLVM10 anatofuz parents: diff changeset	414 %mad = add i64 %mul, %arg2
1d019706d866 LLVM10 anatofuz parents: diff changeset	415 ret i64 %mad
1d019706d866 LLVM10 anatofuz parents: diff changeset	416 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	417
1d019706d866 LLVM10 anatofuz parents: diff changeset	418 ; GCN-LABEL: {{^}}test_div_fmas_f32:
1d019706d866 LLVM10 anatofuz parents: diff changeset	419 ; GFX1032: v_cmp_eq_u32_e64 vcc_lo,
1d019706d866 LLVM10 anatofuz parents: diff changeset	420 ; GFX1064: v_cmp_eq_u32_e64 vcc,
1d019706d866 LLVM10 anatofuz parents: diff changeset	421 ; GCN: v_div_fmas_f32 v{{[0-9]+}}, {{[vs][0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	422 define amdgpu_kernel void @test_div_fmas_f32(float addrspace(1)* %out, float %a, float %b, float %c, i1 %d) nounwind {
1d019706d866 LLVM10 anatofuz parents: diff changeset	423 %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %d) nounwind readnone
1d019706d866 LLVM10 anatofuz parents: diff changeset	424 store float %result, float addrspace(1)* %out, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	425 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	426 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	427
1d019706d866 LLVM10 anatofuz parents: diff changeset	428 ; GCN-LABEL: {{^}}test_div_fmas_f64:
1d019706d866 LLVM10 anatofuz parents: diff changeset	429 ; GFX1032: v_cmp_eq_u32_e64 vcc_lo,
1d019706d866 LLVM10 anatofuz parents: diff changeset	430 ; GFX1064: v_cmp_eq_u32_e64 vcc,
1d019706d866 LLVM10 anatofuz parents: diff changeset	431 ; GCN-DAG: v_div_fmas_f64 v[{{[0-9:]+}}], {{[vs]}}[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]
1d019706d866 LLVM10 anatofuz parents: diff changeset	432 define amdgpu_kernel void @test_div_fmas_f64(double addrspace(1)* %out, double %a, double %b, double %c, i1 %d) nounwind {
1d019706d866 LLVM10 anatofuz parents: diff changeset	433 %result = call double @llvm.amdgcn.div.fmas.f64(double %a, double %b, double %c, i1 %d) nounwind readnone
1d019706d866 LLVM10 anatofuz parents: diff changeset	434 store double %result, double addrspace(1)* %out, align 8
1d019706d866 LLVM10 anatofuz parents: diff changeset	435 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	436 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	437
1d019706d866 LLVM10 anatofuz parents: diff changeset	438 ; GCN-LABEL: {{^}}test_div_fmas_f32_i1_phi_vcc:
1d019706d866 LLVM10 anatofuz parents: diff changeset	439 ; GFX1032: s_mov_b32 [[VCC:vcc_lo]], 0{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	440 ; GFX1064: s_mov_b64 [[VCC:vcc]], 0{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	441 ; GFX1032: s_and_saveexec_b32 [[SAVE:s[0-9]+]], s{{[0-9]+}}{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	442 ; GFX1064: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], s[{{[0-9:]+}}]{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	443
1d019706d866 LLVM10 anatofuz parents: diff changeset	444 ; GCN: load_dword [[LOAD:v[0-9]+]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	445 ; GCN: v_cmp_ne_u32_e32 [[VCC]], 0, [[LOAD]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	446
1d019706d866 LLVM10 anatofuz parents: diff changeset	447 ; GCN: BB{{[0-9_]+}}:
1d019706d866 LLVM10 anatofuz parents: diff changeset	448 ; GFX1032: s_or_b32 exec_lo, exec_lo, [[SAVE]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	449 ; GFX1064: s_or_b64 exec, exec, [[SAVE]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	450 ; GCN: v_div_fmas_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	451 define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out, float addrspace(1)* %in, i32 addrspace(1)* %dummy) #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	452 entry:
1d019706d866 LLVM10 anatofuz parents: diff changeset	453 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
1d019706d866 LLVM10 anatofuz parents: diff changeset	454 %gep.out = getelementptr float, float addrspace(1)* %out, i32 2
1d019706d866 LLVM10 anatofuz parents: diff changeset	455 %gep.a = getelementptr float, float addrspace(1)* %in, i32 %tid
1d019706d866 LLVM10 anatofuz parents: diff changeset	456 %gep.b = getelementptr float, float addrspace(1)* %gep.a, i32 1
1d019706d866 LLVM10 anatofuz parents: diff changeset	457 %gep.c = getelementptr float, float addrspace(1)* %gep.a, i32 2
1d019706d866 LLVM10 anatofuz parents: diff changeset	458
1d019706d866 LLVM10 anatofuz parents: diff changeset	459 %a = load float, float addrspace(1)* %gep.a
1d019706d866 LLVM10 anatofuz parents: diff changeset	460 %b = load float, float addrspace(1)* %gep.b
1d019706d866 LLVM10 anatofuz parents: diff changeset	461 %c = load float, float addrspace(1)* %gep.c
1d019706d866 LLVM10 anatofuz parents: diff changeset	462
1d019706d866 LLVM10 anatofuz parents: diff changeset	463 %cmp0 = icmp eq i32 %tid, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	464 br i1 %cmp0, label %bb, label %exit
1d019706d866 LLVM10 anatofuz parents: diff changeset	465
1d019706d866 LLVM10 anatofuz parents: diff changeset	466 bb:
1d019706d866 LLVM10 anatofuz parents: diff changeset	467 %val = load volatile i32, i32 addrspace(1)* %dummy
1d019706d866 LLVM10 anatofuz parents: diff changeset	468 %cmp1 = icmp ne i32 %val, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	469 br label %exit
1d019706d866 LLVM10 anatofuz parents: diff changeset	470
1d019706d866 LLVM10 anatofuz parents: diff changeset	471 exit:
1d019706d866 LLVM10 anatofuz parents: diff changeset	472 %cond = phi i1 [false, %entry], [%cmp1, %bb]
1d019706d866 LLVM10 anatofuz parents: diff changeset	473 %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %cond) nounwind readnone
1d019706d866 LLVM10 anatofuz parents: diff changeset	474 store float %result, float addrspace(1)* %gep.out, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	475 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	476 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	477
1d019706d866 LLVM10 anatofuz parents: diff changeset	478 ; GCN-LABEL: {{^}}fdiv_f32:
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	479 ; GFX1032: v_div_scale_f32 v{{[0-9]+}}, vcc_lo, s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	480 ; GFX1064: v_div_scale_f32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	481 ; GCN: v_rcp_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	482 ; GCN-NOT: vcc
1d019706d866 LLVM10 anatofuz parents: diff changeset	483 ; GCN: v_div_fmas_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	484 define amdgpu_kernel void @fdiv_f32(float addrspace(1)* %out, float %a, float %b) #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	485 entry:
1d019706d866 LLVM10 anatofuz parents: diff changeset	486 %fdiv = fdiv float %a, %b
1d019706d866 LLVM10 anatofuz parents: diff changeset	487 store float %fdiv, float addrspace(1)* %out
1d019706d866 LLVM10 anatofuz parents: diff changeset	488 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	489 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	490
1d019706d866 LLVM10 anatofuz parents: diff changeset	491 ; GCN-LABEL: {{^}}test_br_cc_f16:
1d019706d866 LLVM10 anatofuz parents: diff changeset	492 ; GFX1032: v_cmp_nlt_f16_e32 vcc_lo,
1d019706d866 LLVM10 anatofuz parents: diff changeset	493 ; GFX1032: s_and_b32 vcc_lo, exec_lo, vcc_lo
1d019706d866 LLVM10 anatofuz parents: diff changeset	494 ; GFX1064: v_cmp_nlt_f16_e32 vcc,
1d019706d866 LLVM10 anatofuz parents: diff changeset	495 ; GFX1064: s_and_b64 vcc, exec, vcc{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	496 ; GCN-NEXT: s_cbranch_vccnz
1d019706d866 LLVM10 anatofuz parents: diff changeset	497 define amdgpu_kernel void @test_br_cc_f16(
1d019706d866 LLVM10 anatofuz parents: diff changeset	498 half addrspace(1)* %r,
1d019706d866 LLVM10 anatofuz parents: diff changeset	499 half addrspace(1)* %a,
1d019706d866 LLVM10 anatofuz parents: diff changeset	500 half addrspace(1)* %b) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	501 entry:
1d019706d866 LLVM10 anatofuz parents: diff changeset	502 %a.val = load half, half addrspace(1)* %a
1d019706d866 LLVM10 anatofuz parents: diff changeset	503 %b.val = load half, half addrspace(1)* %b
1d019706d866 LLVM10 anatofuz parents: diff changeset	504 %fcmp = fcmp olt half %a.val, %b.val
1d019706d866 LLVM10 anatofuz parents: diff changeset	505 br i1 %fcmp, label %one, label %two
1d019706d866 LLVM10 anatofuz parents: diff changeset	506
1d019706d866 LLVM10 anatofuz parents: diff changeset	507 one:
1d019706d866 LLVM10 anatofuz parents: diff changeset	508 store half %a.val, half addrspace(1)* %r
1d019706d866 LLVM10 anatofuz parents: diff changeset	509 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	510
1d019706d866 LLVM10 anatofuz parents: diff changeset	511 two:
1d019706d866 LLVM10 anatofuz parents: diff changeset	512 store half %b.val, half addrspace(1)* %r
1d019706d866 LLVM10 anatofuz parents: diff changeset	513 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	514 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	515
1d019706d866 LLVM10 anatofuz parents: diff changeset	516 ; GCN-LABEL: {{^}}test_brcc_i1:
1d019706d866 LLVM10 anatofuz parents: diff changeset	517 ; GCN: s_cmp_eq_u32 s{{[0-9]+}}, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	518 ; GCN-NEXT: s_cbranch_scc1
1d019706d866 LLVM10 anatofuz parents: diff changeset	519 define amdgpu_kernel void @test_brcc_i1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i1 %val) #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	520 %cmp0 = icmp ne i1 %val, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	521 br i1 %cmp0, label %store, label %end
1d019706d866 LLVM10 anatofuz parents: diff changeset	522
1d019706d866 LLVM10 anatofuz parents: diff changeset	523 store:
1d019706d866 LLVM10 anatofuz parents: diff changeset	524 store i32 222, i32 addrspace(1)* %out
1d019706d866 LLVM10 anatofuz parents: diff changeset	525 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	526
1d019706d866 LLVM10 anatofuz parents: diff changeset	527 end:
1d019706d866 LLVM10 anatofuz parents: diff changeset	528 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	529 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	530
1d019706d866 LLVM10 anatofuz parents: diff changeset	531 ; GCN-LABEL: {{^}}test_preserve_condition_undef_flag:
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	532 ; GFX1032-DAG: v_cmp_nlt_f32_e64 s{{[0-9]+}}, s{{[0-9]+}}, 1.0
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	533 ; GFX1032-DAG: v_cmp_ngt_f32_e64 s{{[0-9]+}}, s{{[0-9]+}}, 0
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	534 ; GFX1032: v_cmp_nlt_f32_e64 s{{[0-9]+}}, s{{[0-9]+}}, 1.0
1d019706d866 LLVM10 anatofuz parents: diff changeset	535 ; GFX1032: s_or_b32 [[OR1:s[0-9]+]], s{{[0-9]+}}, s{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	536 ; GFX1032: s_or_b32 [[OR2:s[0-9]+]], [[OR1]], s{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	537 ; GFX1032: s_and_b32 vcc_lo, exec_lo, [[OR2]]
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	538 ; GFX1064-DAG: v_cmp_nlt_f32_e64 s[{{[0-9:]+}}], s{{[0-9]+}}, 1.0
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	539 ; GFX1064-DAG: v_cmp_ngt_f32_e64 s[{{[0-9:]+}}], s{{[0-9]+}}, 0
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	540 ; GFX1064: v_cmp_nlt_f32_e64 s[{{[0-9:]+}}], s{{[0-9]+}}, 1.0
1d019706d866 LLVM10 anatofuz parents: diff changeset	541 ; GFX1064: s_or_b64 [[OR1:s\[[0-9:]+\]]], s[{{[0-9:]+}}], s[{{[0-9:]+}}]
1d019706d866 LLVM10 anatofuz parents: diff changeset	542 ; GFX1064: s_or_b64 [[OR2:s\[[0-9:]+\]]], [[OR1]], s[{{[0-9:]+}}]
1d019706d866 LLVM10 anatofuz parents: diff changeset	543 ; GFX1064: s_and_b64 vcc, exec, [[OR2]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	544 ; GCN: s_cbranch_vccnz
1d019706d866 LLVM10 anatofuz parents: diff changeset	545 define amdgpu_kernel void @test_preserve_condition_undef_flag(float %arg, i32 %arg1, float %arg2) #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	546 bb0:
1d019706d866 LLVM10 anatofuz parents: diff changeset	547 %tmp = icmp sgt i32 %arg1, 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	548 %undef = call i1 @llvm.amdgcn.class.f32(float undef, i32 undef)
1d019706d866 LLVM10 anatofuz parents: diff changeset	549 %tmp4 = select i1 %undef, float %arg, float 1.000000e+00
1d019706d866 LLVM10 anatofuz parents: diff changeset	550 %tmp5 = fcmp ogt float %arg2, 0.000000e+00
1d019706d866 LLVM10 anatofuz parents: diff changeset	551 %tmp6 = fcmp olt float %arg2, 1.000000e+00
1d019706d866 LLVM10 anatofuz parents: diff changeset	552 %tmp7 = fcmp olt float %arg, %tmp4
1d019706d866 LLVM10 anatofuz parents: diff changeset	553 %tmp8 = and i1 %tmp5, %tmp6
1d019706d866 LLVM10 anatofuz parents: diff changeset	554 %tmp9 = and i1 %tmp8, %tmp7
1d019706d866 LLVM10 anatofuz parents: diff changeset	555 br i1 %tmp9, label %bb1, label %bb2
1d019706d866 LLVM10 anatofuz parents: diff changeset	556
1d019706d866 LLVM10 anatofuz parents: diff changeset	557 bb1:
1d019706d866 LLVM10 anatofuz parents: diff changeset	558 store volatile i32 0, i32 addrspace(1)* undef
1d019706d866 LLVM10 anatofuz parents: diff changeset	559 br label %bb2
1d019706d866 LLVM10 anatofuz parents: diff changeset	560
1d019706d866 LLVM10 anatofuz parents: diff changeset	561 bb2:
1d019706d866 LLVM10 anatofuz parents: diff changeset	562 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	563 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	564
1d019706d866 LLVM10 anatofuz parents: diff changeset	565 ; GCN-LABEL: {{^}}test_invert_true_phi_cond_break_loop:
1d019706d866 LLVM10 anatofuz parents: diff changeset	566 ; GFX1032: s_xor_b32 s{{[0-9]+}}, s{{[0-9]+}}, -1
1d019706d866 LLVM10 anatofuz parents: diff changeset	567 ; GFX1032: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	568 ; GFX1064: s_xor_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], -1
1d019706d866 LLVM10 anatofuz parents: diff changeset	569 ; GFX1064: s_or_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]
1d019706d866 LLVM10 anatofuz parents: diff changeset	570 define amdgpu_kernel void @test_invert_true_phi_cond_break_loop(i32 %arg) #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	571 bb:
1d019706d866 LLVM10 anatofuz parents: diff changeset	572 %id = call i32 @llvm.amdgcn.workitem.id.x()
1d019706d866 LLVM10 anatofuz parents: diff changeset	573 %tmp = sub i32 %id, %arg
1d019706d866 LLVM10 anatofuz parents: diff changeset	574 br label %bb1
1d019706d866 LLVM10 anatofuz parents: diff changeset	575
1d019706d866 LLVM10 anatofuz parents: diff changeset	576 bb1: ; preds = %Flow, %bb
1d019706d866 LLVM10 anatofuz parents: diff changeset	577 %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
1d019706d866 LLVM10 anatofuz parents: diff changeset	578 %lsr.iv.next = add i32 %lsr.iv, 1
1d019706d866 LLVM10 anatofuz parents: diff changeset	579 %cmp0 = icmp slt i32 %lsr.iv.next, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	580 br i1 %cmp0, label %bb4, label %Flow
1d019706d866 LLVM10 anatofuz parents: diff changeset	581
1d019706d866 LLVM10 anatofuz parents: diff changeset	582 bb4: ; preds = %bb1
1d019706d866 LLVM10 anatofuz parents: diff changeset	583 %load = load volatile i32, i32 addrspace(1)* undef, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	584 %cmp1 = icmp sge i32 %tmp, %load
1d019706d866 LLVM10 anatofuz parents: diff changeset	585 br label %Flow
1d019706d866 LLVM10 anatofuz parents: diff changeset	586
1d019706d866 LLVM10 anatofuz parents: diff changeset	587 Flow: ; preds = %bb4, %bb1
1d019706d866 LLVM10 anatofuz parents: diff changeset	588 %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
1d019706d866 LLVM10 anatofuz parents: diff changeset	589 %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
1d019706d866 LLVM10 anatofuz parents: diff changeset	590 br i1 %tmp3, label %bb1, label %bb9
1d019706d866 LLVM10 anatofuz parents: diff changeset	591
1d019706d866 LLVM10 anatofuz parents: diff changeset	592 bb9: ; preds = %Flow
1d019706d866 LLVM10 anatofuz parents: diff changeset	593 store volatile i32 7, i32 addrspace(3)* undef
1d019706d866 LLVM10 anatofuz parents: diff changeset	594 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	595 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	596
1d019706d866 LLVM10 anatofuz parents: diff changeset	597 ; GCN-LABEL: {{^}}test_movrels_extract_neg_offset_vgpr:
1d019706d866 LLVM10 anatofuz parents: diff changeset	598 ; GFX1032: v_cmp_eq_u32_e32 vcc_lo, 1, v{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	599 ; GFX1032: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc_lo
1d019706d866 LLVM10 anatofuz parents: diff changeset	600 ; GFX1032: v_cmp_ne_u32_e32 vcc_lo, 2, v{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	601 ; GFX1032: v_cndmask_b32_e32 v{{[0-9]+}}, 2, v{{[0-9]+}}, vcc_lo
1d019706d866 LLVM10 anatofuz parents: diff changeset	602 ; GFX1032: v_cmp_ne_u32_e32 vcc_lo, 3, v{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	603 ; GFX1032: v_cndmask_b32_e32 v{{[0-9]+}}, 3, v{{[0-9]+}}, vcc_lo
1d019706d866 LLVM10 anatofuz parents: diff changeset	604 ; GFX1064: v_cmp_eq_u32_e32 vcc, 1, v{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	605 ; GFX1064: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
1d019706d866 LLVM10 anatofuz parents: diff changeset	606 ; GFX1064: v_cmp_ne_u32_e32 vcc, 2, v{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	607 ; GFX1064: v_cndmask_b32_e32 v{{[0-9]+}}, 2, v{{[0-9]+}}, vcc
1d019706d866 LLVM10 anatofuz parents: diff changeset	608 ; GFX1064: v_cmp_ne_u32_e32 vcc, 3, v{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	609 ; GFX1064: v_cndmask_b32_e32 v{{[0-9]+}}, 3, v{{[0-9]+}}, vcc
1d019706d866 LLVM10 anatofuz parents: diff changeset	610 define amdgpu_kernel void @test_movrels_extract_neg_offset_vgpr(i32 addrspace(1)* %out) #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	611 entry:
1d019706d866 LLVM10 anatofuz parents: diff changeset	612 %id = call i32 @llvm.amdgcn.workitem.id.x() #1
1d019706d866 LLVM10 anatofuz parents: diff changeset	613 %index = add i32 %id, -512
1d019706d866 LLVM10 anatofuz parents: diff changeset	614 %value = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %index
1d019706d866 LLVM10 anatofuz parents: diff changeset	615 store i32 %value, i32 addrspace(1)* %out
1d019706d866 LLVM10 anatofuz parents: diff changeset	616 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	617 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	618
1d019706d866 LLVM10 anatofuz parents: diff changeset	619 ; GCN-LABEL: {{^}}test_set_inactive:
1d019706d866 LLVM10 anatofuz parents: diff changeset	620 ; GFX1032: s_not_b32 exec_lo, exec_lo
1d019706d866 LLVM10 anatofuz parents: diff changeset	621 ; GFX1032: v_mov_b32_e32 {{v[0-9]+}}, 42
1d019706d866 LLVM10 anatofuz parents: diff changeset	622 ; GFX1032: s_not_b32 exec_lo, exec_lo
1d019706d866 LLVM10 anatofuz parents: diff changeset	623 ; GFX1064: s_not_b64 exec, exec{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	624 ; GFX1064: v_mov_b32_e32 {{v[0-9]+}}, 42
1d019706d866 LLVM10 anatofuz parents: diff changeset	625 ; GFX1064: s_not_b64 exec, exec{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	626 define amdgpu_kernel void @test_set_inactive(i32 addrspace(1)* %out, i32 %in) #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	627 %tmp = call i32 @llvm.amdgcn.set.inactive.i32(i32 %in, i32 42)
1d019706d866 LLVM10 anatofuz parents: diff changeset	628 store i32 %tmp, i32 addrspace(1)* %out
1d019706d866 LLVM10 anatofuz parents: diff changeset	629 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	630 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	631
1d019706d866 LLVM10 anatofuz parents: diff changeset	632 ; GCN-LABEL: {{^}}test_set_inactive_64:
1d019706d866 LLVM10 anatofuz parents: diff changeset	633 ; GFX1032: s_not_b32 exec_lo, exec_lo
1d019706d866 LLVM10 anatofuz parents: diff changeset	634 ; GFX1032: v_mov_b32_e32 {{v[0-9]+}}, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	635 ; GFX1032: v_mov_b32_e32 {{v[0-9]+}}, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	636 ; GFX1032: s_not_b32 exec_lo, exec_lo
1d019706d866 LLVM10 anatofuz parents: diff changeset	637 ; GFX1064: s_not_b64 exec, exec{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	638 ; GFX1064: v_mov_b32_e32 {{v[0-9]+}}, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	639 ; GFX1064: v_mov_b32_e32 {{v[0-9]+}}, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	640 ; GFX1064: s_not_b64 exec, exec{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	641 define amdgpu_kernel void @test_set_inactive_64(i64 addrspace(1)* %out, i64 %in) #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	642 %tmp = call i64 @llvm.amdgcn.set.inactive.i64(i64 %in, i64 0)
1d019706d866 LLVM10 anatofuz parents: diff changeset	643 store i64 %tmp, i64 addrspace(1)* %out
1d019706d866 LLVM10 anatofuz parents: diff changeset	644 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	645 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	646
1d019706d866 LLVM10 anatofuz parents: diff changeset	647 ; GCN-LABEL: {{^}}test_kill_i1_terminator_float:
1d019706d866 LLVM10 anatofuz parents: diff changeset	648 ; GFX1032: s_mov_b32 exec_lo, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	649 ; GFX1064: s_mov_b64 exec, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	650 define amdgpu_ps void @test_kill_i1_terminator_float() #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	651 call void @llvm.amdgcn.kill(i1 false)
1d019706d866 LLVM10 anatofuz parents: diff changeset	652 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	653 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	654
1d019706d866 LLVM10 anatofuz parents: diff changeset	655 ; GCN-LABEL: {{^}}test_kill_i1_terminator_i1:
1d019706d866 LLVM10 anatofuz parents: diff changeset	656 ; GFX1032: s_or_b32 [[OR:s[0-9]+]],
1d019706d866 LLVM10 anatofuz parents: diff changeset	657 ; GFX1032: s_and_b32 exec_lo, exec_lo, [[OR]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	658 ; GFX1064: s_or_b64 [[OR:s\[[0-9:]+\]]],
1d019706d866 LLVM10 anatofuz parents: diff changeset	659 ; GFX1064: s_and_b64 exec, exec, [[OR]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	660 define amdgpu_gs void @test_kill_i1_terminator_i1(i32 %a, i32 %b, i32 %c, i32 %d) #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	661 %c1 = icmp slt i32 %a, %b
1d019706d866 LLVM10 anatofuz parents: diff changeset	662 %c2 = icmp slt i32 %c, %d
1d019706d866 LLVM10 anatofuz parents: diff changeset	663 %x = or i1 %c1, %c2
1d019706d866 LLVM10 anatofuz parents: diff changeset	664 call void @llvm.amdgcn.kill(i1 %x)
1d019706d866 LLVM10 anatofuz parents: diff changeset	665 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	666 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	667
1d019706d866 LLVM10 anatofuz parents: diff changeset	668 ; GCN-LABEL: {{^}}test_loop_vcc:
1d019706d866 LLVM10 anatofuz parents: diff changeset	669 ; GFX1032: v_cmp_lt_f32_e32 vcc_lo,
1d019706d866 LLVM10 anatofuz parents: diff changeset	670 ; GFX1064: v_cmp_lt_f32_e32 vcc,
1d019706d866 LLVM10 anatofuz parents: diff changeset	671 ; GCN: s_cbranch_vccnz
1d019706d866 LLVM10 anatofuz parents: diff changeset	672 define amdgpu_ps <4 x float> @test_loop_vcc(<4 x float> %in) #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	673 entry:
1d019706d866 LLVM10 anatofuz parents: diff changeset	674 br label %loop
1d019706d866 LLVM10 anatofuz parents: diff changeset	675
1d019706d866 LLVM10 anatofuz parents: diff changeset	676 loop:
1d019706d866 LLVM10 anatofuz parents: diff changeset	677 %ctr.iv = phi float [ 0.0, %entry ], [ %ctr.next, %body ]
1d019706d866 LLVM10 anatofuz parents: diff changeset	678 %c.iv = phi <4 x float> [ %in, %entry ], [ %c.next, %body ]
1d019706d866 LLVM10 anatofuz parents: diff changeset	679 %cc = fcmp ogt float %ctr.iv, 7.0
1d019706d866 LLVM10 anatofuz parents: diff changeset	680 br i1 %cc, label %break, label %body
1d019706d866 LLVM10 anatofuz parents: diff changeset	681
1d019706d866 LLVM10 anatofuz parents: diff changeset	682 body:
1d019706d866 LLVM10 anatofuz parents: diff changeset	683 %c.iv0 = extractelement <4 x float> %c.iv, i32 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	684 %c.next = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.iv0, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
1d019706d866 LLVM10 anatofuz parents: diff changeset	685 %ctr.next = fadd float %ctr.iv, 2.0
1d019706d866 LLVM10 anatofuz parents: diff changeset	686 br label %loop
1d019706d866 LLVM10 anatofuz parents: diff changeset	687
1d019706d866 LLVM10 anatofuz parents: diff changeset	688 break:
1d019706d866 LLVM10 anatofuz parents: diff changeset	689 ret <4 x float> %c.iv
1d019706d866 LLVM10 anatofuz parents: diff changeset	690 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	691
1d019706d866 LLVM10 anatofuz parents: diff changeset	692 ; GCN-LABEL: {{^}}test_wwm1:
1d019706d866 LLVM10 anatofuz parents: diff changeset	693 ; GFX1032: s_or_saveexec_b32 [[SAVE:s[0-9]+]], -1
1d019706d866 LLVM10 anatofuz parents: diff changeset	694 ; GFX1032: s_mov_b32 exec_lo, [[SAVE]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	695 ; GFX1064: s_or_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], -1
1d019706d866 LLVM10 anatofuz parents: diff changeset	696 ; GFX1064: s_mov_b64 exec, [[SAVE]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	697 define amdgpu_ps float @test_wwm1(i32 inreg %idx0, i32 inreg %idx1, float %src0, float %src1) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	698 main_body:
1d019706d866 LLVM10 anatofuz parents: diff changeset	699 %out = fadd float %src0, %src1
1d019706d866 LLVM10 anatofuz parents: diff changeset	700 %out.0 = call float @llvm.amdgcn.wwm.f32(float %out)
1d019706d866 LLVM10 anatofuz parents: diff changeset	701 ret float %out.0
1d019706d866 LLVM10 anatofuz parents: diff changeset	702 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	703
1d019706d866 LLVM10 anatofuz parents: diff changeset	704 ; GCN-LABEL: {{^}}test_wwm2:
1d019706d866 LLVM10 anatofuz parents: diff changeset	705 ; GFX1032: v_cmp_gt_u32_e32 vcc_lo, 32, v{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	706 ; GFX1032: s_and_saveexec_b32 [[SAVE1:s[0-9]+]], vcc_lo
1d019706d866 LLVM10 anatofuz parents: diff changeset	707 ; GFX1032: s_or_saveexec_b32 [[SAVE2:s[0-9]+]], -1
1d019706d866 LLVM10 anatofuz parents: diff changeset	708 ; GFX1032: s_mov_b32 exec_lo, [[SAVE2]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	709 ; GFX1032: s_or_b32 exec_lo, exec_lo, [[SAVE1]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	710 ; GFX1064: v_cmp_gt_u32_e32 vcc, 32, v{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	711 ; GFX1064: s_and_saveexec_b64 [[SAVE1:s\[[0-9:]+\]]], vcc{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	712 ; GFX1064: s_or_saveexec_b64 [[SAVE2:s\[[0-9:]+\]]], -1
1d019706d866 LLVM10 anatofuz parents: diff changeset	713 ; GFX1064: s_mov_b64 exec, [[SAVE2]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	714 ; GFX1064: s_or_b64 exec, exec, [[SAVE1]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	715 define amdgpu_ps float @test_wwm2(i32 inreg %idx) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	716 main_body:
1d019706d866 LLVM10 anatofuz parents: diff changeset	717 ; use mbcnt to make sure the branch is divergent
1d019706d866 LLVM10 anatofuz parents: diff changeset	718 %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
1d019706d866 LLVM10 anatofuz parents: diff changeset	719 %hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %lo)
1d019706d866 LLVM10 anatofuz parents: diff changeset	720 %cc = icmp uge i32 %hi, 32
1d019706d866 LLVM10 anatofuz parents: diff changeset	721 br i1 %cc, label %endif, label %if
1d019706d866 LLVM10 anatofuz parents: diff changeset	722
1d019706d866 LLVM10 anatofuz parents: diff changeset	723 if:
1d019706d866 LLVM10 anatofuz parents: diff changeset	724 %src = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx, i32 0, i32 0, i32 0)
1d019706d866 LLVM10 anatofuz parents: diff changeset	725 %out = fadd float %src, %src
1d019706d866 LLVM10 anatofuz parents: diff changeset	726 %out.0 = call float @llvm.amdgcn.wwm.f32(float %out)
1d019706d866 LLVM10 anatofuz parents: diff changeset	727 %out.1 = fadd float %src, %out.0
1d019706d866 LLVM10 anatofuz parents: diff changeset	728 br label %endif
1d019706d866 LLVM10 anatofuz parents: diff changeset	729
1d019706d866 LLVM10 anatofuz parents: diff changeset	730 endif:
1d019706d866 LLVM10 anatofuz parents: diff changeset	731 %out.2 = phi float [ %out.1, %if ], [ 0.0, %main_body ]
1d019706d866 LLVM10 anatofuz parents: diff changeset	732 ret float %out.2
1d019706d866 LLVM10 anatofuz parents: diff changeset	733 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	734
1d019706d866 LLVM10 anatofuz parents: diff changeset	735 ; GCN-LABEL: {{^}}test_wqm1:
1d019706d866 LLVM10 anatofuz parents: diff changeset	736 ; GFX1032: s_mov_b32 [[ORIG:s[0-9]+]], exec_lo
1d019706d866 LLVM10 anatofuz parents: diff changeset	737 ; GFX1032: s_wqm_b32 exec_lo, exec_lo
1d019706d866 LLVM10 anatofuz parents: diff changeset	738 ; GFX1032: s_and_b32 exec_lo, exec_lo, [[ORIG]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	739 ; GFX1064: s_mov_b64 [[ORIG:s\[[0-9]+:[0-9]+\]]], exec{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	740 ; GFX1064: s_wqm_b64 exec, exec{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	741 ; GFX1064: s_and_b64 exec, exec, [[ORIG]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	742 define amdgpu_ps <4 x float> @test_wqm1(i32 inreg, i32 inreg, i32 inreg, i32 inreg %m0, <8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, <2 x float> %pos) #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	743 main_body:
1d019706d866 LLVM10 anatofuz parents: diff changeset	744 %inst23 = extractelement <2 x float> %pos, i32 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	745 %inst24 = extractelement <2 x float> %pos, i32 1
1d019706d866 LLVM10 anatofuz parents: diff changeset	746 %inst25 = tail call float @llvm.amdgcn.interp.p1(float %inst23, i32 0, i32 0, i32 %m0)
1d019706d866 LLVM10 anatofuz parents: diff changeset	747 %inst26 = tail call float @llvm.amdgcn.interp.p2(float %inst25, float %inst24, i32 0, i32 0, i32 %m0)
1d019706d866 LLVM10 anatofuz parents: diff changeset	748 %inst28 = tail call float @llvm.amdgcn.interp.p1(float %inst23, i32 1, i32 0, i32 %m0)
1d019706d866 LLVM10 anatofuz parents: diff changeset	749 %inst29 = tail call float @llvm.amdgcn.interp.p2(float %inst28, float %inst24, i32 1, i32 0, i32 %m0)
1d019706d866 LLVM10 anatofuz parents: diff changeset	750 %tex = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %inst26, float %inst29, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0)
1d019706d866 LLVM10 anatofuz parents: diff changeset	751 ret <4 x float> %tex
1d019706d866 LLVM10 anatofuz parents: diff changeset	752 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	753
1d019706d866 LLVM10 anatofuz parents: diff changeset	754 ; GCN-LABEL: {{^}}test_wqm2:
1d019706d866 LLVM10 anatofuz parents: diff changeset	755 ; GFX1032: s_wqm_b32 exec_lo, exec_lo
1d019706d866 LLVM10 anatofuz parents: diff changeset	756 ; GFX1032: s_and_b32 exec_lo, exec_lo, s{{[0-9+]}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	757 ; GFX1064: s_wqm_b64 exec, exec{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	758 ; GFX1064: s_and_b64 exec, exec, s[{{[0-9:]+}}]
1d019706d866 LLVM10 anatofuz parents: diff changeset	759 define amdgpu_ps float @test_wqm2(i32 inreg %idx0, i32 inreg %idx1) #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	760 main_body:
1d019706d866 LLVM10 anatofuz parents: diff changeset	761 %src0 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i32 0, i32 0)
1d019706d866 LLVM10 anatofuz parents: diff changeset	762 %src1 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i32 0, i32 0)
1d019706d866 LLVM10 anatofuz parents: diff changeset	763 %out = fadd float %src0, %src1
1d019706d866 LLVM10 anatofuz parents: diff changeset	764 %out.0 = bitcast float %out to i32
1d019706d866 LLVM10 anatofuz parents: diff changeset	765 %out.1 = call i32 @llvm.amdgcn.wqm.i32(i32 %out.0)
1d019706d866 LLVM10 anatofuz parents: diff changeset	766 %out.2 = bitcast i32 %out.1 to float
1d019706d866 LLVM10 anatofuz parents: diff changeset	767 ret float %out.2
1d019706d866 LLVM10 anatofuz parents: diff changeset	768 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	769
1d019706d866 LLVM10 anatofuz parents: diff changeset	770 ; GCN-LABEL: {{^}}test_intr_fcmp_i64:
1d019706d866 LLVM10 anatofuz parents: diff changeset	771 ; GFX1032-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], 0{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	772 ; GFX1032-DAG: v_cmp_eq_f32_e64 s[[C_LO:[0-9]+]], {{s[0-9]+}}, \|{{[vs][0-9]+}}\|
1d019706d866 LLVM10 anatofuz parents: diff changeset	773 ; GFX1032-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[C_LO]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	774 ; GFX1064: v_cmp_eq_f32_e64 s{{\[}}[[C_LO:[0-9]+]]:[[C_HI:[0-9]+]]], {{s[0-9]+}}, \|{{[vs][0-9]+}}\|
1d019706d866 LLVM10 anatofuz parents: diff changeset	775 ; GFX1064-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[C_LO]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	776 ; GFX1064-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[C_HI]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	777 ; GCN: store_dwordx2 v[{{[0-9:]+}}], v{{\[}}[[V_LO]]:[[V_HI]]],
1d019706d866 LLVM10 anatofuz parents: diff changeset	778 define amdgpu_kernel void @test_intr_fcmp_i64(i64 addrspace(1)* %out, float %src, float %a) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	779 %temp = call float @llvm.fabs.f32(float %a)
1d019706d866 LLVM10 anatofuz parents: diff changeset	780 %result = call i64 @llvm.amdgcn.fcmp.i64.f32(float %src, float %temp, i32 1)
1d019706d866 LLVM10 anatofuz parents: diff changeset	781 store i64 %result, i64 addrspace(1)* %out
1d019706d866 LLVM10 anatofuz parents: diff changeset	782 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	783 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	784
1d019706d866 LLVM10 anatofuz parents: diff changeset	785 ; GCN-LABEL: {{^}}test_intr_icmp_i64:
1d019706d866 LLVM10 anatofuz parents: diff changeset	786 ; GFX1032-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], 0{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	787 ; GFX1032-DAG: v_cmp_eq_u32_e64 [[C_LO:vcc_lo\|s[0-9]+]], 0x64, {{s[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	788 ; GFX1032-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], [[C_LO]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	789 ; GFX1064: v_cmp_eq_u32_e64 s{{\[}}[[C_LO:[0-9]+]]:[[C_HI:[0-9]+]]], 0x64, {{s[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	790 ; GFX1064-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[C_LO]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	791 ; GFX1064-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[C_HI]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	792 ; GCN: store_dwordx2 v[{{[0-9:]+}}], v{{\[}}[[V_LO]]:[[V_HI]]],
1d019706d866 LLVM10 anatofuz parents: diff changeset	793 define amdgpu_kernel void @test_intr_icmp_i64(i64 addrspace(1)* %out, i32 %src) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	794 %result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %src, i32 100, i32 32)
1d019706d866 LLVM10 anatofuz parents: diff changeset	795 store i64 %result, i64 addrspace(1)* %out
1d019706d866 LLVM10 anatofuz parents: diff changeset	796 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	797 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	798
1d019706d866 LLVM10 anatofuz parents: diff changeset	799 ; GCN-LABEL: {{^}}test_intr_fcmp_i32:
1d019706d866 LLVM10 anatofuz parents: diff changeset	800 ; GFX1032-DAG: v_cmp_eq_f32_e64 s[[C_LO:[0-9]+]], {{s[0-9]+}}, \|{{[vs][0-9]+}}\|
1d019706d866 LLVM10 anatofuz parents: diff changeset	801 ; GFX1032-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[C_LO]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	802 ; GFX1064: v_cmp_eq_f32_e64 s{{\[}}[[C_LO:[0-9]+]]:[[C_HI:[0-9]+]]], {{s[0-9]+}}, \|{{[vs][0-9]+}}\|
1d019706d866 LLVM10 anatofuz parents: diff changeset	803 ; GFX1064-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[C_LO]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	804 ; GCN: store_dword v[{{[0-9:]+}}], v[[V_LO]],
1d019706d866 LLVM10 anatofuz parents: diff changeset	805 define amdgpu_kernel void @test_intr_fcmp_i32(i32 addrspace(1)* %out, float %src, float %a) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	806 %temp = call float @llvm.fabs.f32(float %a)
1d019706d866 LLVM10 anatofuz parents: diff changeset	807 %result = call i32 @llvm.amdgcn.fcmp.i32.f32(float %src, float %temp, i32 1)
1d019706d866 LLVM10 anatofuz parents: diff changeset	808 store i32 %result, i32 addrspace(1)* %out
1d019706d866 LLVM10 anatofuz parents: diff changeset	809 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	810 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	811
1d019706d866 LLVM10 anatofuz parents: diff changeset	812 ; GCN-LABEL: {{^}}test_intr_icmp_i32:
1d019706d866 LLVM10 anatofuz parents: diff changeset	813 ; GFX1032-DAG: v_cmp_eq_u32_e64 s[[C_LO:[0-9]+]], 0x64, {{s[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	814 ; GFX1032-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[C_LO]]{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	815 ; GFX1064: v_cmp_eq_u32_e64 s{{\[}}[[C_LO:[0-9]+]]:{{[0-9]+}}], 0x64, {{s[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	816 ; GFX1064-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[C_LO]]{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	817 ; GCN: store_dword v[{{[0-9:]+}}], v[[V_LO]],
1d019706d866 LLVM10 anatofuz parents: diff changeset	818 define amdgpu_kernel void @test_intr_icmp_i32(i32 addrspace(1)* %out, i32 %src) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	819 %result = call i32 @llvm.amdgcn.icmp.i32.i32(i32 %src, i32 100, i32 32)
1d019706d866 LLVM10 anatofuz parents: diff changeset	820 store i32 %result, i32 addrspace(1)* %out
1d019706d866 LLVM10 anatofuz parents: diff changeset	821 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	822 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	823
1d019706d866 LLVM10 anatofuz parents: diff changeset	824 ; GCN-LABEL: {{^}}test_wqm_vote:
1d019706d866 LLVM10 anatofuz parents: diff changeset	825 ; GFX1032: v_cmp_neq_f32_e32 vcc_lo, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	826 ; GFX1032: s_wqm_b32 [[WQM:s[0-9]+]], vcc_lo
1d019706d866 LLVM10 anatofuz parents: diff changeset	827 ; GFX1032: s_and_b32 exec_lo, exec_lo, [[WQM]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	828 ; GFX1064: v_cmp_neq_f32_e32 vcc, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	829 ; GFX1064: s_wqm_b64 [[WQM:s\[[0-9:]+\]]], vcc{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	830 ; GFX1064: s_and_b64 exec, exec, [[WQM]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	831 define amdgpu_ps void @test_wqm_vote(float %a) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	832 %c1 = fcmp une float %a, 0.0
1d019706d866 LLVM10 anatofuz parents: diff changeset	833 %c2 = call i1 @llvm.amdgcn.wqm.vote(i1 %c1)
1d019706d866 LLVM10 anatofuz parents: diff changeset	834 call void @llvm.amdgcn.kill(i1 %c2)
1d019706d866 LLVM10 anatofuz parents: diff changeset	835 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	836 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	837
1d019706d866 LLVM10 anatofuz parents: diff changeset	838 ; GCN-LABEL: {{^}}test_branch_true:
1d019706d866 LLVM10 anatofuz parents: diff changeset	839 ; GFX1032: s_and_b32 vcc_lo, exec_lo, -1
1d019706d866 LLVM10 anatofuz parents: diff changeset	840 ; GFX1064: s_and_b64 vcc, exec, -1
1d019706d866 LLVM10 anatofuz parents: diff changeset	841 define amdgpu_kernel void @test_branch_true() #2 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	842 entry:
1d019706d866 LLVM10 anatofuz parents: diff changeset	843 br i1 true, label %for.end, label %for.body.lr.ph
1d019706d866 LLVM10 anatofuz parents: diff changeset	844
1d019706d866 LLVM10 anatofuz parents: diff changeset	845 for.body.lr.ph: ; preds = %entry
1d019706d866 LLVM10 anatofuz parents: diff changeset	846 br label %for.body
1d019706d866 LLVM10 anatofuz parents: diff changeset	847
1d019706d866 LLVM10 anatofuz parents: diff changeset	848 for.body: ; preds = %for.body, %for.body.lr.ph
1d019706d866 LLVM10 anatofuz parents: diff changeset	849 br i1 undef, label %for.end, label %for.body
1d019706d866 LLVM10 anatofuz parents: diff changeset	850
1d019706d866 LLVM10 anatofuz parents: diff changeset	851 for.end: ; preds = %for.body, %entry
1d019706d866 LLVM10 anatofuz parents: diff changeset	852 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	853 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	854
1d019706d866 LLVM10 anatofuz parents: diff changeset	855 ; GCN-LABEL: {{^}}test_ps_live:
1d019706d866 LLVM10 anatofuz parents: diff changeset	856 ; GFX1032: s_mov_b32 [[C:s[0-9]+]], exec_lo
1d019706d866 LLVM10 anatofuz parents: diff changeset	857 ; GFX1064: s_mov_b64 [[C:s\[[0-9:]+\]]], exec{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	858 ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, [[C]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	859 define amdgpu_ps float @test_ps_live() #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	860 %live = call i1 @llvm.amdgcn.ps.live()
1d019706d866 LLVM10 anatofuz parents: diff changeset	861 %live.32 = zext i1 %live to i32
1d019706d866 LLVM10 anatofuz parents: diff changeset	862 %r = bitcast i32 %live.32 to float
1d019706d866 LLVM10 anatofuz parents: diff changeset	863 ret float %r
1d019706d866 LLVM10 anatofuz parents: diff changeset	864 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	865
1d019706d866 LLVM10 anatofuz parents: diff changeset	866 ; GCN-LABEL: {{^}}test_vccnz_ifcvt_triangle64:
1d019706d866 LLVM10 anatofuz parents: diff changeset	867 ; GFX1032: v_cmp_neq_f64_e64 [[C:s[0-9]+]], s[{{[0-9:]+}}], 1.0
1d019706d866 LLVM10 anatofuz parents: diff changeset	868 ; GFX1032: s_and_b32 vcc_lo, exec_lo, [[C]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	869 ; GFX1064: v_cmp_neq_f64_e64 [[C:s\[[0-9:]+\]]], s[{{[0-9:]+}}], 1.0
1d019706d866 LLVM10 anatofuz parents: diff changeset	870 ; GFX1064: s_and_b64 vcc, exec, [[C]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	871 define amdgpu_kernel void @test_vccnz_ifcvt_triangle64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	872 entry:
1d019706d866 LLVM10 anatofuz parents: diff changeset	873 %v = load double, double addrspace(1)* %in
1d019706d866 LLVM10 anatofuz parents: diff changeset	874 %cc = fcmp oeq double %v, 1.000000e+00
1d019706d866 LLVM10 anatofuz parents: diff changeset	875 br i1 %cc, label %if, label %endif
1d019706d866 LLVM10 anatofuz parents: diff changeset	876
1d019706d866 LLVM10 anatofuz parents: diff changeset	877 if:
1d019706d866 LLVM10 anatofuz parents: diff changeset	878 %u = fadd double %v, %v
1d019706d866 LLVM10 anatofuz parents: diff changeset	879 br label %endif
1d019706d866 LLVM10 anatofuz parents: diff changeset	880
1d019706d866 LLVM10 anatofuz parents: diff changeset	881 endif:
1d019706d866 LLVM10 anatofuz parents: diff changeset	882 %r = phi double [ %v, %entry ], [ %u, %if ]
1d019706d866 LLVM10 anatofuz parents: diff changeset	883 store double %r, double addrspace(1)* %out
1d019706d866 LLVM10 anatofuz parents: diff changeset	884 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	885 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	886
1d019706d866 LLVM10 anatofuz parents: diff changeset	887 ; GCN-LABEL: {{^}}test_vgprblocks_w32_attr:
1d019706d866 LLVM10 anatofuz parents: diff changeset	888 ; Test that the wave size can be overridden in function attributes and that the block size is correct as a result
1d019706d866 LLVM10 anatofuz parents: diff changeset	889 ; GFX10DEFWAVE: ; VGPRBlocks: 1
1d019706d866 LLVM10 anatofuz parents: diff changeset	890 define amdgpu_gs float @test_vgprblocks_w32_attr(float %a, float %b, float %c, float %d, float %e,
1d019706d866 LLVM10 anatofuz parents: diff changeset	891 float %f, float %g, float %h, float %i, float %j, float %k, float %l) #3 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	892 main_body:
1d019706d866 LLVM10 anatofuz parents: diff changeset	893 %s = fadd float %a, %b
1d019706d866 LLVM10 anatofuz parents: diff changeset	894 %s.1 = fadd float %s, %c
1d019706d866 LLVM10 anatofuz parents: diff changeset	895 %s.2 = fadd float %s.1, %d
1d019706d866 LLVM10 anatofuz parents: diff changeset	896 %s.3 = fadd float %s.2, %e
1d019706d866 LLVM10 anatofuz parents: diff changeset	897 %s.4 = fadd float %s.3, %f
1d019706d866 LLVM10 anatofuz parents: diff changeset	898 %s.5 = fadd float %s.4, %g
1d019706d866 LLVM10 anatofuz parents: diff changeset	899 %s.6 = fadd float %s.5, %h
1d019706d866 LLVM10 anatofuz parents: diff changeset	900 %s.7 = fadd float %s.6, %i
1d019706d866 LLVM10 anatofuz parents: diff changeset	901 %s.8 = fadd float %s.7, %j
1d019706d866 LLVM10 anatofuz parents: diff changeset	902 %s.9 = fadd float %s.8, %k
1d019706d866 LLVM10 anatofuz parents: diff changeset	903 %s.10 = fadd float %s.9, %l
1d019706d866 LLVM10 anatofuz parents: diff changeset	904 ret float %s.10
1d019706d866 LLVM10 anatofuz parents: diff changeset	905 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	906
1d019706d866 LLVM10 anatofuz parents: diff changeset	907 ; GCN-LABEL: {{^}}test_vgprblocks_w64_attr:
1d019706d866 LLVM10 anatofuz parents: diff changeset	908 ; Test that the wave size can be overridden in function attributes and that the block size is correct as a result
1d019706d866 LLVM10 anatofuz parents: diff changeset	909 ; GFX10DEFWAVE: ; VGPRBlocks: 2
1d019706d866 LLVM10 anatofuz parents: diff changeset	910 define amdgpu_gs float @test_vgprblocks_w64_attr(float %a, float %b, float %c, float %d, float %e,
1d019706d866 LLVM10 anatofuz parents: diff changeset	911 float %f, float %g, float %h, float %i, float %j, float %k, float %l) #4 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	912 main_body:
1d019706d866 LLVM10 anatofuz parents: diff changeset	913 %s = fadd float %a, %b
1d019706d866 LLVM10 anatofuz parents: diff changeset	914 %s.1 = fadd float %s, %c
1d019706d866 LLVM10 anatofuz parents: diff changeset	915 %s.2 = fadd float %s.1, %d
1d019706d866 LLVM10 anatofuz parents: diff changeset	916 %s.3 = fadd float %s.2, %e
1d019706d866 LLVM10 anatofuz parents: diff changeset	917 %s.4 = fadd float %s.3, %f
1d019706d866 LLVM10 anatofuz parents: diff changeset	918 %s.5 = fadd float %s.4, %g
1d019706d866 LLVM10 anatofuz parents: diff changeset	919 %s.6 = fadd float %s.5, %h
1d019706d866 LLVM10 anatofuz parents: diff changeset	920 %s.7 = fadd float %s.6, %i
1d019706d866 LLVM10 anatofuz parents: diff changeset	921 %s.8 = fadd float %s.7, %j
1d019706d866 LLVM10 anatofuz parents: diff changeset	922 %s.9 = fadd float %s.8, %k
1d019706d866 LLVM10 anatofuz parents: diff changeset	923 %s.10 = fadd float %s.9, %l
1d019706d866 LLVM10 anatofuz parents: diff changeset	924 ret float %s.10
1d019706d866 LLVM10 anatofuz parents: diff changeset	925 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	926
1d019706d866 LLVM10 anatofuz parents: diff changeset	927 ; GCN-LABEL: {{^}}icmp64:
1d019706d866 LLVM10 anatofuz parents: diff changeset	928 ; GFX1032: v_cmp_eq_u32_e32 vcc_lo, 0, v
1d019706d866 LLVM10 anatofuz parents: diff changeset	929 ; GFX1064: v_cmp_eq_u32_e32 vcc, 0, v
1d019706d866 LLVM10 anatofuz parents: diff changeset	930 define amdgpu_kernel void @icmp64(i32 %n, i32 %s) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	931 entry:
1d019706d866 LLVM10 anatofuz parents: diff changeset	932 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
1d019706d866 LLVM10 anatofuz parents: diff changeset	933 %mul4 = mul nsw i32 %s, %n
1d019706d866 LLVM10 anatofuz parents: diff changeset	934 %cmp = icmp slt i32 0, %mul4
1d019706d866 LLVM10 anatofuz parents: diff changeset	935 br label %if.end
1d019706d866 LLVM10 anatofuz parents: diff changeset	936
1d019706d866 LLVM10 anatofuz parents: diff changeset	937 if.end: ; preds = %entry
1d019706d866 LLVM10 anatofuz parents: diff changeset	938 %rem = urem i32 %id, %s
1d019706d866 LLVM10 anatofuz parents: diff changeset	939 %icmp = tail call i64 @llvm.amdgcn.icmp.i64.i32(i32 %rem, i32 0, i32 32)
1d019706d866 LLVM10 anatofuz parents: diff changeset	940 %shr = lshr i64 %icmp, 1
1d019706d866 LLVM10 anatofuz parents: diff changeset	941 %notmask = shl nsw i64 -1, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	942 %and = and i64 %notmask, %shr
1d019706d866 LLVM10 anatofuz parents: diff changeset	943 %or = or i64 %and, -9223372036854775808
1d019706d866 LLVM10 anatofuz parents: diff changeset	944 %cttz = tail call i64 @llvm.cttz.i64(i64 %or, i1 true)
1d019706d866 LLVM10 anatofuz parents: diff changeset	945 %cast = trunc i64 %cttz to i32
1d019706d866 LLVM10 anatofuz parents: diff changeset	946 %cmp3 = icmp ugt i32 10, %cast
1d019706d866 LLVM10 anatofuz parents: diff changeset	947 %cmp6 = icmp ne i32 %rem, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	948 %brmerge = or i1 %cmp6, %cmp3
1d019706d866 LLVM10 anatofuz parents: diff changeset	949 br i1 %brmerge, label %if.end2, label %if.then
1d019706d866 LLVM10 anatofuz parents: diff changeset	950
1d019706d866 LLVM10 anatofuz parents: diff changeset	951 if.then: ; preds = %if.end
1d019706d866 LLVM10 anatofuz parents: diff changeset	952 unreachable
1d019706d866 LLVM10 anatofuz parents: diff changeset	953
1d019706d866 LLVM10 anatofuz parents: diff changeset	954 if.end2: ; preds = %if.end
1d019706d866 LLVM10 anatofuz parents: diff changeset	955 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	956 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	957
1d019706d866 LLVM10 anatofuz parents: diff changeset	958 ; GCN-LABEL: {{^}}fcmp64:
1d019706d866 LLVM10 anatofuz parents: diff changeset	959 ; GFX1032: v_cmp_eq_f32_e32 vcc_lo, 0, v
1d019706d866 LLVM10 anatofuz parents: diff changeset	960 ; GFX1064: v_cmp_eq_f32_e32 vcc, 0, v
1d019706d866 LLVM10 anatofuz parents: diff changeset	961 define amdgpu_kernel void @fcmp64(float %n, float %s) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	962 entry:
1d019706d866 LLVM10 anatofuz parents: diff changeset	963 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
1d019706d866 LLVM10 anatofuz parents: diff changeset	964 %id.f = uitofp i32 %id to float
1d019706d866 LLVM10 anatofuz parents: diff changeset	965 %mul4 = fmul float %s, %n
1d019706d866 LLVM10 anatofuz parents: diff changeset	966 %cmp = fcmp ult float 0.0, %mul4
1d019706d866 LLVM10 anatofuz parents: diff changeset	967 br label %if.end
1d019706d866 LLVM10 anatofuz parents: diff changeset	968
1d019706d866 LLVM10 anatofuz parents: diff changeset	969 if.end: ; preds = %entry
1d019706d866 LLVM10 anatofuz parents: diff changeset	970 %rem.f = frem float %id.f, %s
1d019706d866 LLVM10 anatofuz parents: diff changeset	971 %fcmp = tail call i64 @llvm.amdgcn.fcmp.i64.f32(float %rem.f, float 0.0, i32 1)
1d019706d866 LLVM10 anatofuz parents: diff changeset	972 %shr = lshr i64 %fcmp, 1
1d019706d866 LLVM10 anatofuz parents: diff changeset	973 %notmask = shl nsw i64 -1, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	974 %and = and i64 %notmask, %shr
1d019706d866 LLVM10 anatofuz parents: diff changeset	975 %or = or i64 %and, -9223372036854775808
1d019706d866 LLVM10 anatofuz parents: diff changeset	976 %cttz = tail call i64 @llvm.cttz.i64(i64 %or, i1 true)
1d019706d866 LLVM10 anatofuz parents: diff changeset	977 %cast = trunc i64 %cttz to i32
1d019706d866 LLVM10 anatofuz parents: diff changeset	978 %cmp3 = icmp ugt i32 10, %cast
1d019706d866 LLVM10 anatofuz parents: diff changeset	979 %cmp6 = fcmp one float %rem.f, 0.0
1d019706d866 LLVM10 anatofuz parents: diff changeset	980 %brmerge = or i1 %cmp6, %cmp3
1d019706d866 LLVM10 anatofuz parents: diff changeset	981 br i1 %brmerge, label %if.end2, label %if.then
1d019706d866 LLVM10 anatofuz parents: diff changeset	982
1d019706d866 LLVM10 anatofuz parents: diff changeset	983 if.then: ; preds = %if.end
1d019706d866 LLVM10 anatofuz parents: diff changeset	984 unreachable
1d019706d866 LLVM10 anatofuz parents: diff changeset	985
1d019706d866 LLVM10 anatofuz parents: diff changeset	986 if.end2: ; preds = %if.end
1d019706d866 LLVM10 anatofuz parents: diff changeset	987 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	988 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	989
1d019706d866 LLVM10 anatofuz parents: diff changeset	990 ; GCN-LABEL: {{^}}icmp32:
1d019706d866 LLVM10 anatofuz parents: diff changeset	991 ; GFX1032: v_cmp_eq_u32_e32 vcc_lo, 0, v
1d019706d866 LLVM10 anatofuz parents: diff changeset	992 ; GFX1064: v_cmp_eq_u32_e32 vcc, 0, v
1d019706d866 LLVM10 anatofuz parents: diff changeset	993 define amdgpu_kernel void @icmp32(i32 %n, i32 %s) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	994 entry:
1d019706d866 LLVM10 anatofuz parents: diff changeset	995 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
1d019706d866 LLVM10 anatofuz parents: diff changeset	996 %mul4 = mul nsw i32 %s, %n
1d019706d866 LLVM10 anatofuz parents: diff changeset	997 %cmp = icmp slt i32 0, %mul4
1d019706d866 LLVM10 anatofuz parents: diff changeset	998 br label %if.end
1d019706d866 LLVM10 anatofuz parents: diff changeset	999
1d019706d866 LLVM10 anatofuz parents: diff changeset	1000 if.end: ; preds = %entry
1d019706d866 LLVM10 anatofuz parents: diff changeset	1001 %rem = urem i32 %id, %s
1d019706d866 LLVM10 anatofuz parents: diff changeset	1002 %icmp = tail call i32 @llvm.amdgcn.icmp.i32.i32(i32 %rem, i32 0, i32 32)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1003 %shr = lshr i32 %icmp, 1
1d019706d866 LLVM10 anatofuz parents: diff changeset	1004 %notmask = shl nsw i32 -1, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	1005 %and = and i32 %notmask, %shr
1d019706d866 LLVM10 anatofuz parents: diff changeset	1006 %or = or i32 %and, 2147483648
1d019706d866 LLVM10 anatofuz parents: diff changeset	1007 %cttz = tail call i32 @llvm.cttz.i32(i32 %or, i1 true)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1008 %cmp3 = icmp ugt i32 10, %cttz
1d019706d866 LLVM10 anatofuz parents: diff changeset	1009 %cmp6 = icmp ne i32 %rem, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	1010 %brmerge = or i1 %cmp6, %cmp3
1d019706d866 LLVM10 anatofuz parents: diff changeset	1011 br i1 %brmerge, label %if.end2, label %if.then
1d019706d866 LLVM10 anatofuz parents: diff changeset	1012
1d019706d866 LLVM10 anatofuz parents: diff changeset	1013 if.then: ; preds = %if.end
1d019706d866 LLVM10 anatofuz parents: diff changeset	1014 unreachable
1d019706d866 LLVM10 anatofuz parents: diff changeset	1015
1d019706d866 LLVM10 anatofuz parents: diff changeset	1016 if.end2: ; preds = %if.end
1d019706d866 LLVM10 anatofuz parents: diff changeset	1017 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	1018 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	1019
1d019706d866 LLVM10 anatofuz parents: diff changeset	1020 ; GCN-LABEL: {{^}}fcmp32:
1d019706d866 LLVM10 anatofuz parents: diff changeset	1021 ; GFX1032: v_cmp_eq_f32_e32 vcc_lo, 0, v
1d019706d866 LLVM10 anatofuz parents: diff changeset	1022 ; GFX1064: v_cmp_eq_f32_e32 vcc, 0, v
1d019706d866 LLVM10 anatofuz parents: diff changeset	1023 define amdgpu_kernel void @fcmp32(float %n, float %s) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	1024 entry:
1d019706d866 LLVM10 anatofuz parents: diff changeset	1025 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
1d019706d866 LLVM10 anatofuz parents: diff changeset	1026 %id.f = uitofp i32 %id to float
1d019706d866 LLVM10 anatofuz parents: diff changeset	1027 %mul4 = fmul float %s, %n
1d019706d866 LLVM10 anatofuz parents: diff changeset	1028 %cmp = fcmp ult float 0.0, %mul4
1d019706d866 LLVM10 anatofuz parents: diff changeset	1029 br label %if.end
1d019706d866 LLVM10 anatofuz parents: diff changeset	1030
1d019706d866 LLVM10 anatofuz parents: diff changeset	1031 if.end: ; preds = %entry
1d019706d866 LLVM10 anatofuz parents: diff changeset	1032 %rem.f = frem float %id.f, %s
1d019706d866 LLVM10 anatofuz parents: diff changeset	1033 %fcmp = tail call i32 @llvm.amdgcn.fcmp.i32.f32(float %rem.f, float 0.0, i32 1)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1034 %shr = lshr i32 %fcmp, 1
1d019706d866 LLVM10 anatofuz parents: diff changeset	1035 %notmask = shl nsw i32 -1, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	1036 %and = and i32 %notmask, %shr
1d019706d866 LLVM10 anatofuz parents: diff changeset	1037 %or = or i32 %and, 2147483648
1d019706d866 LLVM10 anatofuz parents: diff changeset	1038 %cttz = tail call i32 @llvm.cttz.i32(i32 %or, i1 true)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1039 %cmp3 = icmp ugt i32 10, %cttz
1d019706d866 LLVM10 anatofuz parents: diff changeset	1040 %cmp6 = fcmp one float %rem.f, 0.0
1d019706d866 LLVM10 anatofuz parents: diff changeset	1041 %brmerge = or i1 %cmp6, %cmp3
1d019706d866 LLVM10 anatofuz parents: diff changeset	1042 br i1 %brmerge, label %if.end2, label %if.then
1d019706d866 LLVM10 anatofuz parents: diff changeset	1043
1d019706d866 LLVM10 anatofuz parents: diff changeset	1044 if.then: ; preds = %if.end
1d019706d866 LLVM10 anatofuz parents: diff changeset	1045 unreachable
1d019706d866 LLVM10 anatofuz parents: diff changeset	1046
1d019706d866 LLVM10 anatofuz parents: diff changeset	1047 if.end2: ; preds = %if.end
1d019706d866 LLVM10 anatofuz parents: diff changeset	1048 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	1049 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	1050
1d019706d866 LLVM10 anatofuz parents: diff changeset	1051 declare void @external_void_func_void() #1
1d019706d866 LLVM10 anatofuz parents: diff changeset	1052
1d019706d866 LLVM10 anatofuz parents: diff changeset	1053 ; Test save/restore of VGPR needed for SGPR spilling.
1d019706d866 LLVM10 anatofuz parents: diff changeset	1054
1d019706d866 LLVM10 anatofuz parents: diff changeset	1055 ; GCN-LABEL: {{^}}callee_no_stack_with_call:
1d019706d866 LLVM10 anatofuz parents: diff changeset	1056 ; GCN: s_waitcnt
1d019706d866 LLVM10 anatofuz parents: diff changeset	1057 ; GCN-NEXT: s_waitcnt_vscnt
1d019706d866 LLVM10 anatofuz parents: diff changeset	1058
1d019706d866 LLVM10 anatofuz parents: diff changeset	1059 ; GFX1064-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	1060 ; GFX1032-NEXT: s_or_saveexec_b32 [[COPY_EXEC0:s[0-9]]], -1{{$}}
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	1061 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	1062 ; GCN-NEXT: v_nop
1d019706d866 LLVM10 anatofuz parents: diff changeset	1063 ; GFX1064-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	1064 ; GFX1032-NEXT: s_mov_b32 exec_lo, [[COPY_EXEC0]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	1065
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	1066 ; GCN-NEXT: v_writelane_b32 v40, s33, 2
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	1067 ; GCN: s_mov_b32 s33, s32
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	1068 ; GFX1064: s_add_u32 s32, s32, 0x400
1d019706d866 LLVM10 anatofuz parents: diff changeset	1069 ; GFX1032: s_add_u32 s32, s32, 0x200
1d019706d866 LLVM10 anatofuz parents: diff changeset	1070
1d019706d866 LLVM10 anatofuz parents: diff changeset	1071
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	1072 ; GCN-DAG: v_writelane_b32 v40, s30, 0
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	1073 ; GCN-DAG: v_writelane_b32 v40, s31, 1
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	1074 ; GCN: s_swappc_b64
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	1075 ; GCN-DAG: v_readlane_b32 s4, v40, 0
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	1076 ; GCN-DAG: v_readlane_b32 s5, v40, 1
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	1077
1d019706d866 LLVM10 anatofuz parents: diff changeset	1078
1d019706d866 LLVM10 anatofuz parents: diff changeset	1079 ; GFX1064: s_sub_u32 s32, s32, 0x400
1d019706d866 LLVM10 anatofuz parents: diff changeset	1080 ; GFX1032: s_sub_u32 s32, s32, 0x200
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	1081 ; GCN: v_readlane_b32 s33, v40, 2
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	1082 ; GFX1064: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	1083 ; GFX1032: s_or_saveexec_b32 [[COPY_EXEC1:s[0-9]]], -1{{$}}
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	1084 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	1085 ; GCN-NEXT: v_nop
1d019706d866 LLVM10 anatofuz parents: diff changeset	1086 ; GFX1064-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	1087 ; GFX1032-NEXT: s_mov_b32 exec_lo, [[COPY_EXEC1]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	1088 ; GCN-NEXT: s_waitcnt vmcnt(0)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1089 ; GCN-NEXT: s_setpc_b64
1d019706d866 LLVM10 anatofuz parents: diff changeset	1090 define void @callee_no_stack_with_call() #1 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	1091 call void @external_void_func_void()
1d019706d866 LLVM10 anatofuz parents: diff changeset	1092 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	1093 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	1094
1d019706d866 LLVM10 anatofuz parents: diff changeset	1095
1d019706d866 LLVM10 anatofuz parents: diff changeset	1096 declare i32 @llvm.amdgcn.workitem.id.x()
1d019706d866 LLVM10 anatofuz parents: diff changeset	1097 declare float @llvm.fabs.f32(float)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1098 declare { float, i1 } @llvm.amdgcn.div.scale.f32(float, float, i1)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1099 declare { double, i1 } @llvm.amdgcn.div.scale.f64(double, double, i1)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1100 declare float @llvm.amdgcn.div.fmas.f32(float, float, float, i1)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1101 declare double @llvm.amdgcn.div.fmas.f64(double, double, double, i1)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1102 declare i1 @llvm.amdgcn.class.f32(float, i32)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1103 declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1104 declare i64 @llvm.amdgcn.set.inactive.i64(i64, i64)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1105 declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1106 declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1107 declare float @llvm.amdgcn.wwm.f32(float)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1108 declare i32 @llvm.amdgcn.wqm.i32(i32)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1109 declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1110 declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1111 declare float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32>, i32, i32, i32, i32 immarg)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1112 declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1113 declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1114 declare i64 @llvm.amdgcn.fcmp.i64.f32(float, float, i32)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1115 declare i64 @llvm.amdgcn.icmp.i64.i32(i32, i32, i32)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1116 declare i32 @llvm.amdgcn.fcmp.i32.f32(float, float, i32)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1117 declare i32 @llvm.amdgcn.icmp.i32.i32(i32, i32, i32)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1118 declare void @llvm.amdgcn.kill(i1)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1119 declare i1 @llvm.amdgcn.wqm.vote(i1)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1120 declare i1 @llvm.amdgcn.ps.live()
1d019706d866 LLVM10 anatofuz parents: diff changeset	1121 declare i64 @llvm.cttz.i64(i64, i1)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1122 declare i32 @llvm.cttz.i32(i32, i1)
1d019706d866 LLVM10 anatofuz parents: diff changeset	1123
1d019706d866 LLVM10 anatofuz parents: diff changeset	1124 attributes #0 = { nounwind readnone speculatable }
1d019706d866 LLVM10 anatofuz parents: diff changeset	1125 attributes #1 = { nounwind }
1d019706d866 LLVM10 anatofuz parents: diff changeset	1126 attributes #2 = { nounwind readnone optnone noinline }
1d019706d866 LLVM10 anatofuz parents: diff changeset	1127 attributes #3 = { "target-features"="+wavefrontsize32" }
1d019706d866 LLVM10 anatofuz parents: diff changeset	1128 attributes #4 = { "target-features"="+wavefrontsize64" }

Mercurial > hg > CbC > CbC_llvm

annotate llvm/test/CodeGen/AMDGPU/wave32.ll @ 206:f17a3b42b08b