CbC/CbC_llvm: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll annotate

annotate llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll @ 223:5f17cb93ff66 llvm-original

LLVM13 (2021/7/18)

author	Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date	Sun, 18 Jul 2021 22:43:00 +0900
parents	79ff65ed7e25
children	c4bab56944e8

rev	line source
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s \| FileCheck -enable-var-scope -check-prefix=SI %s
1d019706d866 LLVM10 anatofuz parents: diff changeset	2
1d019706d866 LLVM10 anatofuz parents: diff changeset	3 declare i32 @llvm.amdgcn.workitem.id.x() #1
1d019706d866 LLVM10 anatofuz parents: diff changeset	4 declare { float, i1 } @llvm.amdgcn.div.scale.f32(float, float, i1) #1
1d019706d866 LLVM10 anatofuz parents: diff changeset	5 declare { double, i1 } @llvm.amdgcn.div.scale.f64(double, double, i1) #1
1d019706d866 LLVM10 anatofuz parents: diff changeset	6 declare float @llvm.fabs.f32(float) #1
1d019706d866 LLVM10 anatofuz parents: diff changeset	7
1d019706d866 LLVM10 anatofuz parents: diff changeset	8 ; SI-LABEL: {{^}}test_div_scale_f32_1:
1d019706d866 LLVM10 anatofuz parents: diff changeset	9 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
1d019706d866 LLVM10 anatofuz parents: diff changeset	10 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
1d019706d866 LLVM10 anatofuz parents: diff changeset	11 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	12 ; SI: buffer_store_dword [[RESULT0]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	13 ; SI: s_endpgm
1d019706d866 LLVM10 anatofuz parents: diff changeset	14 define amdgpu_kernel void @test_div_scale_f32_1(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
1d019706d866 LLVM10 anatofuz parents: diff changeset	15 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
1d019706d866 LLVM10 anatofuz parents: diff changeset	16 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
1d019706d866 LLVM10 anatofuz parents: diff changeset	17 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
1d019706d866 LLVM10 anatofuz parents: diff changeset	18
1d019706d866 LLVM10 anatofuz parents: diff changeset	19 %a = load volatile float, float addrspace(1)* %gep.0, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	20 %b = load volatile float, float addrspace(1)* %gep.1, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	21
1d019706d866 LLVM10 anatofuz parents: diff changeset	22 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
1d019706d866 LLVM10 anatofuz parents: diff changeset	23 %result0 = extractvalue { float, i1 } %result, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	24 store float %result0, float addrspace(1)* %out, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	25 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	26 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	27
1d019706d866 LLVM10 anatofuz parents: diff changeset	28 ; SI-LABEL: {{^}}test_div_scale_f32_2:
1d019706d866 LLVM10 anatofuz parents: diff changeset	29 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
1d019706d866 LLVM10 anatofuz parents: diff changeset	30 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
1d019706d866 LLVM10 anatofuz parents: diff changeset	31 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	32 ; SI: buffer_store_dword [[RESULT0]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	33 ; SI: s_endpgm
1d019706d866 LLVM10 anatofuz parents: diff changeset	34 define amdgpu_kernel void @test_div_scale_f32_2(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
1d019706d866 LLVM10 anatofuz parents: diff changeset	35 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
1d019706d866 LLVM10 anatofuz parents: diff changeset	36 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
1d019706d866 LLVM10 anatofuz parents: diff changeset	37 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
1d019706d866 LLVM10 anatofuz parents: diff changeset	38
1d019706d866 LLVM10 anatofuz parents: diff changeset	39 %a = load volatile float, float addrspace(1)* %gep.0, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	40 %b = load volatile float, float addrspace(1)* %gep.1, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	41
1d019706d866 LLVM10 anatofuz parents: diff changeset	42 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
1d019706d866 LLVM10 anatofuz parents: diff changeset	43 %result0 = extractvalue { float, i1 } %result, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	44 store float %result0, float addrspace(1)* %out, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	45 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	46 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	47
1d019706d866 LLVM10 anatofuz parents: diff changeset	48 ; SI-LABEL: {{^}}test_div_scale_f64_1:
1d019706d866 LLVM10 anatofuz parents: diff changeset	49 ; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
1d019706d866 LLVM10 anatofuz parents: diff changeset	50 ; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
1d019706d866 LLVM10 anatofuz parents: diff changeset	51 ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	52 ; SI: buffer_store_dwordx2 [[RESULT0]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	53 ; SI: s_endpgm
1d019706d866 LLVM10 anatofuz parents: diff changeset	54 define amdgpu_kernel void @test_div_scale_f64_1(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %in) nounwind {
1d019706d866 LLVM10 anatofuz parents: diff changeset	55 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
1d019706d866 LLVM10 anatofuz parents: diff changeset	56 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
1d019706d866 LLVM10 anatofuz parents: diff changeset	57 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
1d019706d866 LLVM10 anatofuz parents: diff changeset	58
1d019706d866 LLVM10 anatofuz parents: diff changeset	59 %a = load volatile double, double addrspace(1)* %gep.0, align 8
1d019706d866 LLVM10 anatofuz parents: diff changeset	60 %b = load volatile double, double addrspace(1)* %gep.1, align 8
1d019706d866 LLVM10 anatofuz parents: diff changeset	61
1d019706d866 LLVM10 anatofuz parents: diff changeset	62 %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
1d019706d866 LLVM10 anatofuz parents: diff changeset	63 %result0 = extractvalue { double, i1 } %result, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	64 store double %result0, double addrspace(1)* %out, align 8
1d019706d866 LLVM10 anatofuz parents: diff changeset	65 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	66 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	67
1d019706d866 LLVM10 anatofuz parents: diff changeset	68 ; SI-LABEL: {{^}}test_div_scale_f64_2:
1d019706d866 LLVM10 anatofuz parents: diff changeset	69 ; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
1d019706d866 LLVM10 anatofuz parents: diff changeset	70 ; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
1d019706d866 LLVM10 anatofuz parents: diff changeset	71 ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	72 ; SI: buffer_store_dwordx2 [[RESULT0]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	73 ; SI: s_endpgm
1d019706d866 LLVM10 anatofuz parents: diff changeset	74 define amdgpu_kernel void @test_div_scale_f64_2(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %in) nounwind {
1d019706d866 LLVM10 anatofuz parents: diff changeset	75 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
1d019706d866 LLVM10 anatofuz parents: diff changeset	76 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
1d019706d866 LLVM10 anatofuz parents: diff changeset	77 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
1d019706d866 LLVM10 anatofuz parents: diff changeset	78
1d019706d866 LLVM10 anatofuz parents: diff changeset	79 %a = load volatile double, double addrspace(1)* %gep.0, align 8
1d019706d866 LLVM10 anatofuz parents: diff changeset	80 %b = load volatile double, double addrspace(1)* %gep.1, align 8
1d019706d866 LLVM10 anatofuz parents: diff changeset	81
1d019706d866 LLVM10 anatofuz parents: diff changeset	82 %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
1d019706d866 LLVM10 anatofuz parents: diff changeset	83 %result0 = extractvalue { double, i1 } %result, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	84 store double %result0, double addrspace(1)* %out, align 8
1d019706d866 LLVM10 anatofuz parents: diff changeset	85 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	86 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	87
1d019706d866 LLVM10 anatofuz parents: diff changeset	88 ; SI-LABEL: {{^}}test_div_scale_f32_scalar_num_1:
1d019706d866 LLVM10 anatofuz parents: diff changeset	89 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	90 ; SI-DAG: s_load_dword [[A:s[0-9]+]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	91 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	92 ; SI: buffer_store_dword [[RESULT0]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	93 ; SI: s_endpgm
1d019706d866 LLVM10 anatofuz parents: diff changeset	94 define amdgpu_kernel void @test_div_scale_f32_scalar_num_1(float addrspace(1)* %out, float addrspace(1)* %in, float %a) nounwind {
1d019706d866 LLVM10 anatofuz parents: diff changeset	95 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
1d019706d866 LLVM10 anatofuz parents: diff changeset	96 %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
1d019706d866 LLVM10 anatofuz parents: diff changeset	97
1d019706d866 LLVM10 anatofuz parents: diff changeset	98 %b = load float, float addrspace(1)* %gep, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	99
1d019706d866 LLVM10 anatofuz parents: diff changeset	100 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
1d019706d866 LLVM10 anatofuz parents: diff changeset	101 %result0 = extractvalue { float, i1 } %result, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	102 store float %result0, float addrspace(1)* %out, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	103 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	104 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	105
1d019706d866 LLVM10 anatofuz parents: diff changeset	106 ; SI-LABEL: {{^}}test_div_scale_f32_scalar_num_2:
1d019706d866 LLVM10 anatofuz parents: diff changeset	107 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	108 ; SI-DAG: s_load_dword [[A:s[0-9]+]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	109 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	110 ; SI: buffer_store_dword [[RESULT0]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	111 ; SI: s_endpgm
1d019706d866 LLVM10 anatofuz parents: diff changeset	112 define amdgpu_kernel void @test_div_scale_f32_scalar_num_2(float addrspace(1)* %out, float addrspace(1)* %in, float %a) nounwind {
1d019706d866 LLVM10 anatofuz parents: diff changeset	113 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
1d019706d866 LLVM10 anatofuz parents: diff changeset	114 %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
1d019706d866 LLVM10 anatofuz parents: diff changeset	115
1d019706d866 LLVM10 anatofuz parents: diff changeset	116 %b = load float, float addrspace(1)* %gep, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	117
1d019706d866 LLVM10 anatofuz parents: diff changeset	118 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
1d019706d866 LLVM10 anatofuz parents: diff changeset	119 %result0 = extractvalue { float, i1 } %result, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	120 store float %result0, float addrspace(1)* %out, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	121 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	122 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	123
1d019706d866 LLVM10 anatofuz parents: diff changeset	124 ; SI-LABEL: {{^}}test_div_scale_f32_scalar_den_1:
1d019706d866 LLVM10 anatofuz parents: diff changeset	125 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	126 ; SI-DAG: s_load_dword [[B:s[0-9]+]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	127 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	128 ; SI: buffer_store_dword [[RESULT0]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	129 ; SI: s_endpgm
1d019706d866 LLVM10 anatofuz parents: diff changeset	130 define amdgpu_kernel void @test_div_scale_f32_scalar_den_1(float addrspace(1)* %out, float addrspace(1)* %in, float %b) nounwind {
1d019706d866 LLVM10 anatofuz parents: diff changeset	131 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
1d019706d866 LLVM10 anatofuz parents: diff changeset	132 %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
1d019706d866 LLVM10 anatofuz parents: diff changeset	133
1d019706d866 LLVM10 anatofuz parents: diff changeset	134 %a = load float, float addrspace(1)* %gep, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	135
1d019706d866 LLVM10 anatofuz parents: diff changeset	136 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
1d019706d866 LLVM10 anatofuz parents: diff changeset	137 %result0 = extractvalue { float, i1 } %result, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	138 store float %result0, float addrspace(1)* %out, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	139 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	140 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	141
1d019706d866 LLVM10 anatofuz parents: diff changeset	142 ; SI-LABEL: {{^}}test_div_scale_f32_scalar_den_2:
1d019706d866 LLVM10 anatofuz parents: diff changeset	143 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	144 ; SI-DAG: s_load_dword [[B:s[0-9]+]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	145 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	146 ; SI: buffer_store_dword [[RESULT0]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	147 ; SI: s_endpgm
1d019706d866 LLVM10 anatofuz parents: diff changeset	148 define amdgpu_kernel void @test_div_scale_f32_scalar_den_2(float addrspace(1)* %out, float addrspace(1)* %in, float %b) nounwind {
1d019706d866 LLVM10 anatofuz parents: diff changeset	149 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
1d019706d866 LLVM10 anatofuz parents: diff changeset	150 %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
1d019706d866 LLVM10 anatofuz parents: diff changeset	151
1d019706d866 LLVM10 anatofuz parents: diff changeset	152 %a = load float, float addrspace(1)* %gep, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	153
1d019706d866 LLVM10 anatofuz parents: diff changeset	154 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
1d019706d866 LLVM10 anatofuz parents: diff changeset	155 %result0 = extractvalue { float, i1 } %result, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	156 store float %result0, float addrspace(1)* %out, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	157 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	158 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	159
1d019706d866 LLVM10 anatofuz parents: diff changeset	160 ; SI-LABEL: {{^}}test_div_scale_f64_scalar_num_1:
1d019706d866 LLVM10 anatofuz parents: diff changeset	161 ; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	162 ; SI-DAG: s_load_dwordx2 [[A:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
1d019706d866 LLVM10 anatofuz parents: diff changeset	163 ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	164 ; SI: buffer_store_dwordx2 [[RESULT0]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	165 ; SI: s_endpgm
1d019706d866 LLVM10 anatofuz parents: diff changeset	166 define amdgpu_kernel void @test_div_scale_f64_scalar_num_1(double addrspace(1)* %out, double addrspace(1)* %in, double %a) nounwind {
1d019706d866 LLVM10 anatofuz parents: diff changeset	167 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
1d019706d866 LLVM10 anatofuz parents: diff changeset	168 %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
1d019706d866 LLVM10 anatofuz parents: diff changeset	169
1d019706d866 LLVM10 anatofuz parents: diff changeset	170 %b = load double, double addrspace(1)* %gep, align 8
1d019706d866 LLVM10 anatofuz parents: diff changeset	171
1d019706d866 LLVM10 anatofuz parents: diff changeset	172 %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
1d019706d866 LLVM10 anatofuz parents: diff changeset	173 %result0 = extractvalue { double, i1 } %result, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	174 store double %result0, double addrspace(1)* %out, align 8
1d019706d866 LLVM10 anatofuz parents: diff changeset	175 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	176 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	177
1d019706d866 LLVM10 anatofuz parents: diff changeset	178 ; SI-LABEL: {{^}}test_div_scale_f64_scalar_num_2:
1d019706d866 LLVM10 anatofuz parents: diff changeset	179 ; SI-DAG: s_load_dwordx2 [[A:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
1d019706d866 LLVM10 anatofuz parents: diff changeset	180 ; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	181 ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	182 ; SI: buffer_store_dwordx2 [[RESULT0]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	183 ; SI: s_endpgm
1d019706d866 LLVM10 anatofuz parents: diff changeset	184 define amdgpu_kernel void @test_div_scale_f64_scalar_num_2(double addrspace(1)* %out, double addrspace(1)* %in, double %a) nounwind {
1d019706d866 LLVM10 anatofuz parents: diff changeset	185 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
1d019706d866 LLVM10 anatofuz parents: diff changeset	186 %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
1d019706d866 LLVM10 anatofuz parents: diff changeset	187
1d019706d866 LLVM10 anatofuz parents: diff changeset	188 %b = load double, double addrspace(1)* %gep, align 8
1d019706d866 LLVM10 anatofuz parents: diff changeset	189
1d019706d866 LLVM10 anatofuz parents: diff changeset	190 %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
1d019706d866 LLVM10 anatofuz parents: diff changeset	191 %result0 = extractvalue { double, i1 } %result, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	192 store double %result0, double addrspace(1)* %out, align 8
1d019706d866 LLVM10 anatofuz parents: diff changeset	193 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	194 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	195
1d019706d866 LLVM10 anatofuz parents: diff changeset	196 ; SI-LABEL: {{^}}test_div_scale_f64_scalar_den_1:
1d019706d866 LLVM10 anatofuz parents: diff changeset	197 ; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	198 ; SI-DAG: s_load_dwordx2 [[B:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
1d019706d866 LLVM10 anatofuz parents: diff changeset	199 ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	200 ; SI: buffer_store_dwordx2 [[RESULT0]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	201 ; SI: s_endpgm
1d019706d866 LLVM10 anatofuz parents: diff changeset	202 define amdgpu_kernel void @test_div_scale_f64_scalar_den_1(double addrspace(1)* %out, double addrspace(1)* %in, double %b) nounwind {
1d019706d866 LLVM10 anatofuz parents: diff changeset	203 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
1d019706d866 LLVM10 anatofuz parents: diff changeset	204 %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
1d019706d866 LLVM10 anatofuz parents: diff changeset	205
1d019706d866 LLVM10 anatofuz parents: diff changeset	206 %a = load double, double addrspace(1)* %gep, align 8
1d019706d866 LLVM10 anatofuz parents: diff changeset	207
1d019706d866 LLVM10 anatofuz parents: diff changeset	208 %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
1d019706d866 LLVM10 anatofuz parents: diff changeset	209 %result0 = extractvalue { double, i1 } %result, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	210 store double %result0, double addrspace(1)* %out, align 8
1d019706d866 LLVM10 anatofuz parents: diff changeset	211 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	212 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	213
1d019706d866 LLVM10 anatofuz parents: diff changeset	214 ; SI-LABEL: {{^}}test_div_scale_f64_scalar_den_2:
1d019706d866 LLVM10 anatofuz parents: diff changeset	215 ; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	216 ; SI-DAG: s_load_dwordx2 [[B:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
1d019706d866 LLVM10 anatofuz parents: diff changeset	217 ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	218 ; SI: buffer_store_dwordx2 [[RESULT0]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	219 ; SI: s_endpgm
1d019706d866 LLVM10 anatofuz parents: diff changeset	220 define amdgpu_kernel void @test_div_scale_f64_scalar_den_2(double addrspace(1)* %out, double addrspace(1)* %in, double %b) nounwind {
1d019706d866 LLVM10 anatofuz parents: diff changeset	221 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
1d019706d866 LLVM10 anatofuz parents: diff changeset	222 %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
1d019706d866 LLVM10 anatofuz parents: diff changeset	223
1d019706d866 LLVM10 anatofuz parents: diff changeset	224 %a = load double, double addrspace(1)* %gep, align 8
1d019706d866 LLVM10 anatofuz parents: diff changeset	225
1d019706d866 LLVM10 anatofuz parents: diff changeset	226 %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
1d019706d866 LLVM10 anatofuz parents: diff changeset	227 %result0 = extractvalue { double, i1 } %result, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	228 store double %result0, double addrspace(1)* %out, align 8
1d019706d866 LLVM10 anatofuz parents: diff changeset	229 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	230 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	231
1d019706d866 LLVM10 anatofuz parents: diff changeset	232 ; SI-LABEL: {{^}}test_div_scale_f32_all_scalar_1:
1d019706d866 LLVM10 anatofuz parents: diff changeset	233 ; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0x13
1d019706d866 LLVM10 anatofuz parents: diff changeset	234 ; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0x1c
1d019706d866 LLVM10 anatofuz parents: diff changeset	235 ; SI: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	236 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[VA]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	237 ; SI: buffer_store_dword [[RESULT0]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	238 ; SI: s_endpgm
1d019706d866 LLVM10 anatofuz parents: diff changeset	239 define amdgpu_kernel void @test_div_scale_f32_all_scalar_1(float addrspace(1)* %out, [8 x i32], float %a, [8 x i32], float %b) nounwind {
1d019706d866 LLVM10 anatofuz parents: diff changeset	240 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
1d019706d866 LLVM10 anatofuz parents: diff changeset	241 %result0 = extractvalue { float, i1 } %result, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	242 store float %result0, float addrspace(1)* %out, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	243 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	244 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	245
1d019706d866 LLVM10 anatofuz parents: diff changeset	246 ; SI-LABEL: {{^}}test_div_scale_f32_all_scalar_2:
1d019706d866 LLVM10 anatofuz parents: diff changeset	247 ; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0x13
1d019706d866 LLVM10 anatofuz parents: diff changeset	248 ; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0x1c
1d019706d866 LLVM10 anatofuz parents: diff changeset	249 ; SI: v_mov_b32_e32 [[VB:v[0-9]+]], [[B]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	250 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[VB]], [[A]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	251 ; SI: buffer_store_dword [[RESULT0]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	252 ; SI: s_endpgm
1d019706d866 LLVM10 anatofuz parents: diff changeset	253 define amdgpu_kernel void @test_div_scale_f32_all_scalar_2(float addrspace(1)* %out, [8 x i32], float %a, [8 x i32], float %b) nounwind {
1d019706d866 LLVM10 anatofuz parents: diff changeset	254 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
1d019706d866 LLVM10 anatofuz parents: diff changeset	255 %result0 = extractvalue { float, i1 } %result, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	256 store float %result0, float addrspace(1)* %out, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	257 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	258 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	259
1d019706d866 LLVM10 anatofuz parents: diff changeset	260 ; SI-LABEL: {{^}}test_div_scale_f64_all_scalar_1:
1d019706d866 LLVM10 anatofuz parents: diff changeset	261 ; SI-DAG: s_load_dwordx2 s{{\[}}[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]{{\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0x13
1d019706d866 LLVM10 anatofuz parents: diff changeset	262 ; SI-DAG: s_load_dwordx2 [[B:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x1d
1d019706d866 LLVM10 anatofuz parents: diff changeset	263 ; SI-DAG: v_mov_b32_e32 v[[VA_LO:[0-9]+]], s[[A_LO]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	264 ; SI-DAG: v_mov_b32_e32 v[[VA_HI:[0-9]+]], s[[A_HI]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	265 ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], v{{\[}}[[VA_LO]]:[[VA_HI]]{{\]}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	266 ; SI: buffer_store_dwordx2 [[RESULT0]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	267 ; SI: s_endpgm
1d019706d866 LLVM10 anatofuz parents: diff changeset	268 define amdgpu_kernel void @test_div_scale_f64_all_scalar_1(double addrspace(1)* %out, [8 x i32], double %a, [8 x i32], double %b) nounwind {
1d019706d866 LLVM10 anatofuz parents: diff changeset	269 %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
1d019706d866 LLVM10 anatofuz parents: diff changeset	270 %result0 = extractvalue { double, i1 } %result, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	271 store double %result0, double addrspace(1)* %out, align 8
1d019706d866 LLVM10 anatofuz parents: diff changeset	272 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	273 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	274
1d019706d866 LLVM10 anatofuz parents: diff changeset	275 ; SI-LABEL: {{^}}test_div_scale_f64_all_scalar_2:
1d019706d866 LLVM10 anatofuz parents: diff changeset	276 ; SI-DAG: s_load_dwordx2 [[A:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x13
1d019706d866 LLVM10 anatofuz parents: diff changeset	277 ; SI-DAG: s_load_dwordx2 s{{\[}}[[B_LO:[0-9]+]]:[[B_HI:[0-9]+]]{{\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0x1d
1d019706d866 LLVM10 anatofuz parents: diff changeset	278 ; SI-DAG: v_mov_b32_e32 v[[VB_LO:[0-9]+]], s[[B_LO]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	279 ; SI-DAG: v_mov_b32_e32 v[[VB_HI:[0-9]+]], s[[B_HI]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	280 ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], v{{\[}}[[VB_LO]]:[[VB_HI]]{{\]}}, [[A]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	281 ; SI: buffer_store_dwordx2 [[RESULT0]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	282 ; SI: s_endpgm
1d019706d866 LLVM10 anatofuz parents: diff changeset	283 define amdgpu_kernel void @test_div_scale_f64_all_scalar_2(double addrspace(1)* %out, [8 x i32], double %a, [8 x i32], double %b) nounwind {
1d019706d866 LLVM10 anatofuz parents: diff changeset	284 %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
1d019706d866 LLVM10 anatofuz parents: diff changeset	285 %result0 = extractvalue { double, i1 } %result, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	286 store double %result0, double addrspace(1)* %out, align 8
1d019706d866 LLVM10 anatofuz parents: diff changeset	287 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	288 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	289
1d019706d866 LLVM10 anatofuz parents: diff changeset	290 ; SI-LABEL: {{^}}test_div_scale_f32_inline_imm_num:
1d019706d866 LLVM10 anatofuz parents: diff changeset	291 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	292 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[A]], 1.0
1d019706d866 LLVM10 anatofuz parents: diff changeset	293 ; SI: buffer_store_dword [[RESULT0]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	294 ; SI: s_endpgm
1d019706d866 LLVM10 anatofuz parents: diff changeset	295 define amdgpu_kernel void @test_div_scale_f32_inline_imm_num(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
1d019706d866 LLVM10 anatofuz parents: diff changeset	296 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
1d019706d866 LLVM10 anatofuz parents: diff changeset	297 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
1d019706d866 LLVM10 anatofuz parents: diff changeset	298 %a = load float, float addrspace(1)* %gep.0, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	299
1d019706d866 LLVM10 anatofuz parents: diff changeset	300 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float 1.0, float %a, i1 false) nounwind readnone
1d019706d866 LLVM10 anatofuz parents: diff changeset	301 %result0 = extractvalue { float, i1 } %result, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	302 store float %result0, float addrspace(1)* %out, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	303 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	304 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	305
1d019706d866 LLVM10 anatofuz parents: diff changeset	306 ; SI-LABEL: {{^}}test_div_scale_f32_inline_imm_den:
1d019706d866 LLVM10 anatofuz parents: diff changeset	307 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	308 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], 2.0, 2.0, [[A]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	309 ; SI: buffer_store_dword [[RESULT0]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	310 ; SI: s_endpgm
1d019706d866 LLVM10 anatofuz parents: diff changeset	311 define amdgpu_kernel void @test_div_scale_f32_inline_imm_den(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
1d019706d866 LLVM10 anatofuz parents: diff changeset	312 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
1d019706d866 LLVM10 anatofuz parents: diff changeset	313 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
1d019706d866 LLVM10 anatofuz parents: diff changeset	314 %a = load float, float addrspace(1)* %gep.0, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	315
1d019706d866 LLVM10 anatofuz parents: diff changeset	316 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float 2.0, i1 false) nounwind readnone
1d019706d866 LLVM10 anatofuz parents: diff changeset	317 %result0 = extractvalue { float, i1 } %result, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	318 store float %result0, float addrspace(1)* %out, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	319 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	320 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	321
221 79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	322 ; SI-LABEL: {{^}}test_div_scale_f32_fneg_num:
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	323 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	324 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	325 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], -[[A]]
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	326 ; SI: buffer_store_dword [[RESULT0]]
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	327 ; SI: s_endpgm
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	328 define amdgpu_kernel void @test_div_scale_f32_fneg_num(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	329 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	330 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	331 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	332
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	333 %a = load volatile float, float addrspace(1)* %gep.0, align 4
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	334 %b = load volatile float, float addrspace(1)* %gep.1, align 4
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	335
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	336 %a.fneg = fneg float %a
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	337
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	338 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a.fneg, float %b, i1 false) nounwind readnone
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	339 %result0 = extractvalue { float, i1 } %result, 0
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	340 store float %result0, float addrspace(1)* %out, align 4
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	341 ret void
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	342 }
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	343
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	344 ; SI-LABEL: {{^}}test_div_scale_f32_fabs_num:
1d019706d866 LLVM10 anatofuz parents: diff changeset	345 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
1d019706d866 LLVM10 anatofuz parents: diff changeset	346 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
1d019706d866 LLVM10 anatofuz parents: diff changeset	347 ; SI: v_and_b32_e32 [[ABS_A:v[0-9]+]], 0x7fffffff, [[A]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	348 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[ABS_A]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	349 ; SI: buffer_store_dword [[RESULT0]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	350 ; SI: s_endpgm
1d019706d866 LLVM10 anatofuz parents: diff changeset	351 define amdgpu_kernel void @test_div_scale_f32_fabs_num(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
1d019706d866 LLVM10 anatofuz parents: diff changeset	352 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
1d019706d866 LLVM10 anatofuz parents: diff changeset	353 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
1d019706d866 LLVM10 anatofuz parents: diff changeset	354 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
1d019706d866 LLVM10 anatofuz parents: diff changeset	355
1d019706d866 LLVM10 anatofuz parents: diff changeset	356 %a = load volatile float, float addrspace(1)* %gep.0, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	357 %b = load volatile float, float addrspace(1)* %gep.1, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	358
1d019706d866 LLVM10 anatofuz parents: diff changeset	359 %a.fabs = call float @llvm.fabs.f32(float %a) nounwind readnone
1d019706d866 LLVM10 anatofuz parents: diff changeset	360
1d019706d866 LLVM10 anatofuz parents: diff changeset	361 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a.fabs, float %b, i1 false) nounwind readnone
1d019706d866 LLVM10 anatofuz parents: diff changeset	362 %result0 = extractvalue { float, i1 } %result, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	363 store float %result0, float addrspace(1)* %out, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	364 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	365 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	366
221 79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	367 ; SI-LABEL: {{^}}test_div_scale_f32_fneg_den:
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	368 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	369 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	370 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], -[[B]], -[[B]], [[A]]
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	371 ; SI: buffer_store_dword [[RESULT0]]
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	372 ; SI: s_endpgm
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	373 define amdgpu_kernel void @test_div_scale_f32_fneg_den(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	374 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	375 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	376 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	377
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	378 %a = load volatile float, float addrspace(1)* %gep.0, align 4
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	379 %b = load volatile float, float addrspace(1)* %gep.1, align 4
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	380
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	381 %b.fneg = fneg float %b
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	382
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	383 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b.fneg, i1 false) nounwind readnone
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	384 %result0 = extractvalue { float, i1 } %result, 0
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	385 store float %result0, float addrspace(1)* %out, align 4
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	386 ret void
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	387 }
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	388
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	389 ; SI-LABEL: {{^}}test_div_scale_f32_fabs_den:
1d019706d866 LLVM10 anatofuz parents: diff changeset	390 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
1d019706d866 LLVM10 anatofuz parents: diff changeset	391 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
1d019706d866 LLVM10 anatofuz parents: diff changeset	392 ; SI: v_and_b32_e32 [[ABS_B:v[0-9]+]], 0x7fffffff, [[B]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	393 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[ABS_B]], [[ABS_B]], [[A]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	394 ; SI: buffer_store_dword [[RESULT0]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	395 ; SI: s_endpgm
1d019706d866 LLVM10 anatofuz parents: diff changeset	396 define amdgpu_kernel void @test_div_scale_f32_fabs_den(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
1d019706d866 LLVM10 anatofuz parents: diff changeset	397 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
1d019706d866 LLVM10 anatofuz parents: diff changeset	398 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
1d019706d866 LLVM10 anatofuz parents: diff changeset	399 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
1d019706d866 LLVM10 anatofuz parents: diff changeset	400
1d019706d866 LLVM10 anatofuz parents: diff changeset	401 %a = load volatile float, float addrspace(1)* %gep.0, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	402 %b = load volatile float, float addrspace(1)* %gep.1, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	403
1d019706d866 LLVM10 anatofuz parents: diff changeset	404 %b.fabs = call float @llvm.fabs.f32(float %b) nounwind readnone
1d019706d866 LLVM10 anatofuz parents: diff changeset	405
1d019706d866 LLVM10 anatofuz parents: diff changeset	406 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b.fabs, i1 false) nounwind readnone
1d019706d866 LLVM10 anatofuz parents: diff changeset	407 %result0 = extractvalue { float, i1 } %result, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	408 store float %result0, float addrspace(1)* %out, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	409 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	410 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	411
1d019706d866 LLVM10 anatofuz parents: diff changeset	412 ; SI-LABEL: {{^}}test_div_scale_f32_val_undef_val:
1d019706d866 LLVM10 anatofuz parents: diff changeset	413 ; SI: s_mov_b32 [[K:s[0-9]+]], 0x41000000
1d019706d866 LLVM10 anatofuz parents: diff changeset	414 ; SI: v_div_scale_f32 v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, [[K]], v{{[0-9]+}}, [[K]]
1d019706d866 LLVM10 anatofuz parents: diff changeset	415 define amdgpu_kernel void @test_div_scale_f32_val_undef_val(float addrspace(1)* %out) #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	416 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float 8.0, float undef, i1 false)
1d019706d866 LLVM10 anatofuz parents: diff changeset	417 %result0 = extractvalue { float, i1 } %result, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	418 store float %result0, float addrspace(1)* %out, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	419 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	420 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	421
1d019706d866 LLVM10 anatofuz parents: diff changeset	422 ; SI-LABEL: {{^}}test_div_scale_f32_undef_val_val:
1d019706d866 LLVM10 anatofuz parents: diff changeset	423 ; SI: s_mov_b32 [[K:s[0-9]+]], 0x41000000
1d019706d866 LLVM10 anatofuz parents: diff changeset	424 ; SI: v_div_scale_f32 v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[K]], v{{[0-9]+}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	425 define amdgpu_kernel void @test_div_scale_f32_undef_val_val(float addrspace(1)* %out) #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	426 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float undef, float 8.0, i1 false)
1d019706d866 LLVM10 anatofuz parents: diff changeset	427 %result0 = extractvalue { float, i1 } %result, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	428 store float %result0, float addrspace(1)* %out, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	429 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	430 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	431
1d019706d866 LLVM10 anatofuz parents: diff changeset	432 ; SI-LABEL: {{^}}test_div_scale_f32_undef_undef_val:
1d019706d866 LLVM10 anatofuz parents: diff changeset	433 ; SI-NOT: v0
1d019706d866 LLVM10 anatofuz parents: diff changeset	434 ; SI: v_div_scale_f32 v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s0, s0, v0
1d019706d866 LLVM10 anatofuz parents: diff changeset	435 define amdgpu_kernel void @test_div_scale_f32_undef_undef_val(float addrspace(1)* %out) #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	436 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float undef, float undef, i1 false)
1d019706d866 LLVM10 anatofuz parents: diff changeset	437 %result0 = extractvalue { float, i1 } %result, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	438 store float %result0, float addrspace(1)* %out, align 4
1d019706d866 LLVM10 anatofuz parents: diff changeset	439 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	440 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	441
1d019706d866 LLVM10 anatofuz parents: diff changeset	442 ; SI-LABEL: {{^}}test_div_scale_f64_val_undef_val:
1d019706d866 LLVM10 anatofuz parents: diff changeset	443 ; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0{{$}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	444 ; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0x40200000
1d019706d866 LLVM10 anatofuz parents: diff changeset	445 ; SI: v_div_scale_f64 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}, v[0:1], s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}
1d019706d866 LLVM10 anatofuz parents: diff changeset	446 define amdgpu_kernel void @test_div_scale_f64_val_undef_val(double addrspace(1)* %out) #0 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	447 %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double 8.0, double undef, i1 false)
1d019706d866 LLVM10 anatofuz parents: diff changeset	448 %result0 = extractvalue { double, i1 } %result, 0
1d019706d866 LLVM10 anatofuz parents: diff changeset	449 store double %result0, double addrspace(1)* %out, align 8
1d019706d866 LLVM10 anatofuz parents: diff changeset	450 ret void
1d019706d866 LLVM10 anatofuz parents: diff changeset	451 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	452
1d019706d866 LLVM10 anatofuz parents: diff changeset	453 attributes #0 = { nounwind }
1d019706d866 LLVM10 anatofuz parents: diff changeset	454 attributes #1 = { nounwind readnone speculatable }

Mercurial > hg > CbC > CbC_llvm

annotate llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll @ 223:5f17cb93ff66 llvm-original