annotate llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll @ 223:5f17cb93ff66 llvm-original

LLVM13 (2021/7/18)
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Sun, 18 Jul 2021 22:43:00 +0900
parents 79ff65ed7e25
children c4bab56944e8
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
150
anatofuz
parents:
diff changeset
1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI %s
anatofuz
parents:
diff changeset
2
anatofuz
parents:
diff changeset
3 declare i32 @llvm.amdgcn.workitem.id.x() #1
anatofuz
parents:
diff changeset
4 declare { float, i1 } @llvm.amdgcn.div.scale.f32(float, float, i1) #1
anatofuz
parents:
diff changeset
5 declare { double, i1 } @llvm.amdgcn.div.scale.f64(double, double, i1) #1
anatofuz
parents:
diff changeset
6 declare float @llvm.fabs.f32(float) #1
anatofuz
parents:
diff changeset
7
anatofuz
parents:
diff changeset
8 ; SI-LABEL: {{^}}test_div_scale_f32_1:
anatofuz
parents:
diff changeset
9 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
anatofuz
parents:
diff changeset
10 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
anatofuz
parents:
diff changeset
11 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
anatofuz
parents:
diff changeset
12 ; SI: buffer_store_dword [[RESULT0]]
anatofuz
parents:
diff changeset
13 ; SI: s_endpgm
anatofuz
parents:
diff changeset
14 define amdgpu_kernel void @test_div_scale_f32_1(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
anatofuz
parents:
diff changeset
15 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
anatofuz
parents:
diff changeset
16 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
anatofuz
parents:
diff changeset
17 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
anatofuz
parents:
diff changeset
18
anatofuz
parents:
diff changeset
19 %a = load volatile float, float addrspace(1)* %gep.0, align 4
anatofuz
parents:
diff changeset
20 %b = load volatile float, float addrspace(1)* %gep.1, align 4
anatofuz
parents:
diff changeset
21
anatofuz
parents:
diff changeset
22 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
anatofuz
parents:
diff changeset
23 %result0 = extractvalue { float, i1 } %result, 0
anatofuz
parents:
diff changeset
24 store float %result0, float addrspace(1)* %out, align 4
anatofuz
parents:
diff changeset
25 ret void
anatofuz
parents:
diff changeset
26 }
anatofuz
parents:
diff changeset
27
anatofuz
parents:
diff changeset
28 ; SI-LABEL: {{^}}test_div_scale_f32_2:
anatofuz
parents:
diff changeset
29 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
anatofuz
parents:
diff changeset
30 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
anatofuz
parents:
diff changeset
31 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
anatofuz
parents:
diff changeset
32 ; SI: buffer_store_dword [[RESULT0]]
anatofuz
parents:
diff changeset
33 ; SI: s_endpgm
anatofuz
parents:
diff changeset
34 define amdgpu_kernel void @test_div_scale_f32_2(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
anatofuz
parents:
diff changeset
35 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
anatofuz
parents:
diff changeset
36 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
anatofuz
parents:
diff changeset
37 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
anatofuz
parents:
diff changeset
38
anatofuz
parents:
diff changeset
39 %a = load volatile float, float addrspace(1)* %gep.0, align 4
anatofuz
parents:
diff changeset
40 %b = load volatile float, float addrspace(1)* %gep.1, align 4
anatofuz
parents:
diff changeset
41
anatofuz
parents:
diff changeset
42 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
anatofuz
parents:
diff changeset
43 %result0 = extractvalue { float, i1 } %result, 0
anatofuz
parents:
diff changeset
44 store float %result0, float addrspace(1)* %out, align 4
anatofuz
parents:
diff changeset
45 ret void
anatofuz
parents:
diff changeset
46 }
anatofuz
parents:
diff changeset
47
anatofuz
parents:
diff changeset
48 ; SI-LABEL: {{^}}test_div_scale_f64_1:
anatofuz
parents:
diff changeset
49 ; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
anatofuz
parents:
diff changeset
50 ; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
anatofuz
parents:
diff changeset
51 ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
anatofuz
parents:
diff changeset
52 ; SI: buffer_store_dwordx2 [[RESULT0]]
anatofuz
parents:
diff changeset
53 ; SI: s_endpgm
anatofuz
parents:
diff changeset
54 define amdgpu_kernel void @test_div_scale_f64_1(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %in) nounwind {
anatofuz
parents:
diff changeset
55 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
anatofuz
parents:
diff changeset
56 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
anatofuz
parents:
diff changeset
57 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
anatofuz
parents:
diff changeset
58
anatofuz
parents:
diff changeset
59 %a = load volatile double, double addrspace(1)* %gep.0, align 8
anatofuz
parents:
diff changeset
60 %b = load volatile double, double addrspace(1)* %gep.1, align 8
anatofuz
parents:
diff changeset
61
anatofuz
parents:
diff changeset
62 %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
anatofuz
parents:
diff changeset
63 %result0 = extractvalue { double, i1 } %result, 0
anatofuz
parents:
diff changeset
64 store double %result0, double addrspace(1)* %out, align 8
anatofuz
parents:
diff changeset
65 ret void
anatofuz
parents:
diff changeset
66 }
anatofuz
parents:
diff changeset
67
anatofuz
parents:
diff changeset
68 ; SI-LABEL: {{^}}test_div_scale_f64_2:
anatofuz
parents:
diff changeset
69 ; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
anatofuz
parents:
diff changeset
70 ; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
anatofuz
parents:
diff changeset
71 ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
anatofuz
parents:
diff changeset
72 ; SI: buffer_store_dwordx2 [[RESULT0]]
anatofuz
parents:
diff changeset
73 ; SI: s_endpgm
anatofuz
parents:
diff changeset
74 define amdgpu_kernel void @test_div_scale_f64_2(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %in) nounwind {
anatofuz
parents:
diff changeset
75 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
anatofuz
parents:
diff changeset
76 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
anatofuz
parents:
diff changeset
77 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
anatofuz
parents:
diff changeset
78
anatofuz
parents:
diff changeset
79 %a = load volatile double, double addrspace(1)* %gep.0, align 8
anatofuz
parents:
diff changeset
80 %b = load volatile double, double addrspace(1)* %gep.1, align 8
anatofuz
parents:
diff changeset
81
anatofuz
parents:
diff changeset
82 %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
anatofuz
parents:
diff changeset
83 %result0 = extractvalue { double, i1 } %result, 0
anatofuz
parents:
diff changeset
84 store double %result0, double addrspace(1)* %out, align 8
anatofuz
parents:
diff changeset
85 ret void
anatofuz
parents:
diff changeset
86 }
anatofuz
parents:
diff changeset
87
anatofuz
parents:
diff changeset
88 ; SI-LABEL: {{^}}test_div_scale_f32_scalar_num_1:
anatofuz
parents:
diff changeset
89 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]]
anatofuz
parents:
diff changeset
90 ; SI-DAG: s_load_dword [[A:s[0-9]+]]
anatofuz
parents:
diff changeset
91 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
anatofuz
parents:
diff changeset
92 ; SI: buffer_store_dword [[RESULT0]]
anatofuz
parents:
diff changeset
93 ; SI: s_endpgm
anatofuz
parents:
diff changeset
94 define amdgpu_kernel void @test_div_scale_f32_scalar_num_1(float addrspace(1)* %out, float addrspace(1)* %in, float %a) nounwind {
anatofuz
parents:
diff changeset
95 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
anatofuz
parents:
diff changeset
96 %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
anatofuz
parents:
diff changeset
97
anatofuz
parents:
diff changeset
98 %b = load float, float addrspace(1)* %gep, align 4
anatofuz
parents:
diff changeset
99
anatofuz
parents:
diff changeset
100 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
anatofuz
parents:
diff changeset
101 %result0 = extractvalue { float, i1 } %result, 0
anatofuz
parents:
diff changeset
102 store float %result0, float addrspace(1)* %out, align 4
anatofuz
parents:
diff changeset
103 ret void
anatofuz
parents:
diff changeset
104 }
anatofuz
parents:
diff changeset
105
anatofuz
parents:
diff changeset
106 ; SI-LABEL: {{^}}test_div_scale_f32_scalar_num_2:
anatofuz
parents:
diff changeset
107 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]]
anatofuz
parents:
diff changeset
108 ; SI-DAG: s_load_dword [[A:s[0-9]+]]
anatofuz
parents:
diff changeset
109 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
anatofuz
parents:
diff changeset
110 ; SI: buffer_store_dword [[RESULT0]]
anatofuz
parents:
diff changeset
111 ; SI: s_endpgm
anatofuz
parents:
diff changeset
112 define amdgpu_kernel void @test_div_scale_f32_scalar_num_2(float addrspace(1)* %out, float addrspace(1)* %in, float %a) nounwind {
anatofuz
parents:
diff changeset
113 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
anatofuz
parents:
diff changeset
114 %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
anatofuz
parents:
diff changeset
115
anatofuz
parents:
diff changeset
116 %b = load float, float addrspace(1)* %gep, align 4
anatofuz
parents:
diff changeset
117
anatofuz
parents:
diff changeset
118 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
anatofuz
parents:
diff changeset
119 %result0 = extractvalue { float, i1 } %result, 0
anatofuz
parents:
diff changeset
120 store float %result0, float addrspace(1)* %out, align 4
anatofuz
parents:
diff changeset
121 ret void
anatofuz
parents:
diff changeset
122 }
anatofuz
parents:
diff changeset
123
anatofuz
parents:
diff changeset
124 ; SI-LABEL: {{^}}test_div_scale_f32_scalar_den_1:
anatofuz
parents:
diff changeset
125 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]]
anatofuz
parents:
diff changeset
126 ; SI-DAG: s_load_dword [[B:s[0-9]+]]
anatofuz
parents:
diff changeset
127 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
anatofuz
parents:
diff changeset
128 ; SI: buffer_store_dword [[RESULT0]]
anatofuz
parents:
diff changeset
129 ; SI: s_endpgm
anatofuz
parents:
diff changeset
130 define amdgpu_kernel void @test_div_scale_f32_scalar_den_1(float addrspace(1)* %out, float addrspace(1)* %in, float %b) nounwind {
anatofuz
parents:
diff changeset
131 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
anatofuz
parents:
diff changeset
132 %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
anatofuz
parents:
diff changeset
133
anatofuz
parents:
diff changeset
134 %a = load float, float addrspace(1)* %gep, align 4
anatofuz
parents:
diff changeset
135
anatofuz
parents:
diff changeset
136 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
anatofuz
parents:
diff changeset
137 %result0 = extractvalue { float, i1 } %result, 0
anatofuz
parents:
diff changeset
138 store float %result0, float addrspace(1)* %out, align 4
anatofuz
parents:
diff changeset
139 ret void
anatofuz
parents:
diff changeset
140 }
anatofuz
parents:
diff changeset
141
anatofuz
parents:
diff changeset
142 ; SI-LABEL: {{^}}test_div_scale_f32_scalar_den_2:
anatofuz
parents:
diff changeset
143 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]]
anatofuz
parents:
diff changeset
144 ; SI-DAG: s_load_dword [[B:s[0-9]+]]
anatofuz
parents:
diff changeset
145 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
anatofuz
parents:
diff changeset
146 ; SI: buffer_store_dword [[RESULT0]]
anatofuz
parents:
diff changeset
147 ; SI: s_endpgm
anatofuz
parents:
diff changeset
148 define amdgpu_kernel void @test_div_scale_f32_scalar_den_2(float addrspace(1)* %out, float addrspace(1)* %in, float %b) nounwind {
anatofuz
parents:
diff changeset
149 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
anatofuz
parents:
diff changeset
150 %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
anatofuz
parents:
diff changeset
151
anatofuz
parents:
diff changeset
152 %a = load float, float addrspace(1)* %gep, align 4
anatofuz
parents:
diff changeset
153
anatofuz
parents:
diff changeset
154 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
anatofuz
parents:
diff changeset
155 %result0 = extractvalue { float, i1 } %result, 0
anatofuz
parents:
diff changeset
156 store float %result0, float addrspace(1)* %out, align 4
anatofuz
parents:
diff changeset
157 ret void
anatofuz
parents:
diff changeset
158 }
anatofuz
parents:
diff changeset
159
anatofuz
parents:
diff changeset
160 ; SI-LABEL: {{^}}test_div_scale_f64_scalar_num_1:
anatofuz
parents:
diff changeset
161 ; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]]
anatofuz
parents:
diff changeset
162 ; SI-DAG: s_load_dwordx2 [[A:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
anatofuz
parents:
diff changeset
163 ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
anatofuz
parents:
diff changeset
164 ; SI: buffer_store_dwordx2 [[RESULT0]]
anatofuz
parents:
diff changeset
165 ; SI: s_endpgm
anatofuz
parents:
diff changeset
166 define amdgpu_kernel void @test_div_scale_f64_scalar_num_1(double addrspace(1)* %out, double addrspace(1)* %in, double %a) nounwind {
anatofuz
parents:
diff changeset
167 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
anatofuz
parents:
diff changeset
168 %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
anatofuz
parents:
diff changeset
169
anatofuz
parents:
diff changeset
170 %b = load double, double addrspace(1)* %gep, align 8
anatofuz
parents:
diff changeset
171
anatofuz
parents:
diff changeset
172 %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
anatofuz
parents:
diff changeset
173 %result0 = extractvalue { double, i1 } %result, 0
anatofuz
parents:
diff changeset
174 store double %result0, double addrspace(1)* %out, align 8
anatofuz
parents:
diff changeset
175 ret void
anatofuz
parents:
diff changeset
176 }
anatofuz
parents:
diff changeset
177
anatofuz
parents:
diff changeset
178 ; SI-LABEL: {{^}}test_div_scale_f64_scalar_num_2:
anatofuz
parents:
diff changeset
179 ; SI-DAG: s_load_dwordx2 [[A:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
anatofuz
parents:
diff changeset
180 ; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]]
anatofuz
parents:
diff changeset
181 ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
anatofuz
parents:
diff changeset
182 ; SI: buffer_store_dwordx2 [[RESULT0]]
anatofuz
parents:
diff changeset
183 ; SI: s_endpgm
anatofuz
parents:
diff changeset
184 define amdgpu_kernel void @test_div_scale_f64_scalar_num_2(double addrspace(1)* %out, double addrspace(1)* %in, double %a) nounwind {
anatofuz
parents:
diff changeset
185 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
anatofuz
parents:
diff changeset
186 %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
anatofuz
parents:
diff changeset
187
anatofuz
parents:
diff changeset
188 %b = load double, double addrspace(1)* %gep, align 8
anatofuz
parents:
diff changeset
189
anatofuz
parents:
diff changeset
190 %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
anatofuz
parents:
diff changeset
191 %result0 = extractvalue { double, i1 } %result, 0
anatofuz
parents:
diff changeset
192 store double %result0, double addrspace(1)* %out, align 8
anatofuz
parents:
diff changeset
193 ret void
anatofuz
parents:
diff changeset
194 }
anatofuz
parents:
diff changeset
195
anatofuz
parents:
diff changeset
196 ; SI-LABEL: {{^}}test_div_scale_f64_scalar_den_1:
anatofuz
parents:
diff changeset
197 ; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
anatofuz
parents:
diff changeset
198 ; SI-DAG: s_load_dwordx2 [[B:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
anatofuz
parents:
diff changeset
199 ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
anatofuz
parents:
diff changeset
200 ; SI: buffer_store_dwordx2 [[RESULT0]]
anatofuz
parents:
diff changeset
201 ; SI: s_endpgm
anatofuz
parents:
diff changeset
202 define amdgpu_kernel void @test_div_scale_f64_scalar_den_1(double addrspace(1)* %out, double addrspace(1)* %in, double %b) nounwind {
anatofuz
parents:
diff changeset
203 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
anatofuz
parents:
diff changeset
204 %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
anatofuz
parents:
diff changeset
205
anatofuz
parents:
diff changeset
206 %a = load double, double addrspace(1)* %gep, align 8
anatofuz
parents:
diff changeset
207
anatofuz
parents:
diff changeset
208 %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
anatofuz
parents:
diff changeset
209 %result0 = extractvalue { double, i1 } %result, 0
anatofuz
parents:
diff changeset
210 store double %result0, double addrspace(1)* %out, align 8
anatofuz
parents:
diff changeset
211 ret void
anatofuz
parents:
diff changeset
212 }
anatofuz
parents:
diff changeset
213
anatofuz
parents:
diff changeset
214 ; SI-LABEL: {{^}}test_div_scale_f64_scalar_den_2:
anatofuz
parents:
diff changeset
215 ; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
anatofuz
parents:
diff changeset
216 ; SI-DAG: s_load_dwordx2 [[B:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
anatofuz
parents:
diff changeset
217 ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
anatofuz
parents:
diff changeset
218 ; SI: buffer_store_dwordx2 [[RESULT0]]
anatofuz
parents:
diff changeset
219 ; SI: s_endpgm
anatofuz
parents:
diff changeset
220 define amdgpu_kernel void @test_div_scale_f64_scalar_den_2(double addrspace(1)* %out, double addrspace(1)* %in, double %b) nounwind {
anatofuz
parents:
diff changeset
221 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
anatofuz
parents:
diff changeset
222 %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
anatofuz
parents:
diff changeset
223
anatofuz
parents:
diff changeset
224 %a = load double, double addrspace(1)* %gep, align 8
anatofuz
parents:
diff changeset
225
anatofuz
parents:
diff changeset
226 %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
anatofuz
parents:
diff changeset
227 %result0 = extractvalue { double, i1 } %result, 0
anatofuz
parents:
diff changeset
228 store double %result0, double addrspace(1)* %out, align 8
anatofuz
parents:
diff changeset
229 ret void
anatofuz
parents:
diff changeset
230 }
anatofuz
parents:
diff changeset
231
anatofuz
parents:
diff changeset
232 ; SI-LABEL: {{^}}test_div_scale_f32_all_scalar_1:
anatofuz
parents:
diff changeset
233 ; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0x13
anatofuz
parents:
diff changeset
234 ; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0x1c
anatofuz
parents:
diff changeset
235 ; SI: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]]
anatofuz
parents:
diff changeset
236 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[VA]]
anatofuz
parents:
diff changeset
237 ; SI: buffer_store_dword [[RESULT0]]
anatofuz
parents:
diff changeset
238 ; SI: s_endpgm
anatofuz
parents:
diff changeset
239 define amdgpu_kernel void @test_div_scale_f32_all_scalar_1(float addrspace(1)* %out, [8 x i32], float %a, [8 x i32], float %b) nounwind {
anatofuz
parents:
diff changeset
240 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
anatofuz
parents:
diff changeset
241 %result0 = extractvalue { float, i1 } %result, 0
anatofuz
parents:
diff changeset
242 store float %result0, float addrspace(1)* %out, align 4
anatofuz
parents:
diff changeset
243 ret void
anatofuz
parents:
diff changeset
244 }
anatofuz
parents:
diff changeset
245
anatofuz
parents:
diff changeset
246 ; SI-LABEL: {{^}}test_div_scale_f32_all_scalar_2:
anatofuz
parents:
diff changeset
247 ; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0x13
anatofuz
parents:
diff changeset
248 ; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0x1c
anatofuz
parents:
diff changeset
249 ; SI: v_mov_b32_e32 [[VB:v[0-9]+]], [[B]]
anatofuz
parents:
diff changeset
250 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[VB]], [[A]]
anatofuz
parents:
diff changeset
251 ; SI: buffer_store_dword [[RESULT0]]
anatofuz
parents:
diff changeset
252 ; SI: s_endpgm
anatofuz
parents:
diff changeset
253 define amdgpu_kernel void @test_div_scale_f32_all_scalar_2(float addrspace(1)* %out, [8 x i32], float %a, [8 x i32], float %b) nounwind {
anatofuz
parents:
diff changeset
254 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
anatofuz
parents:
diff changeset
255 %result0 = extractvalue { float, i1 } %result, 0
anatofuz
parents:
diff changeset
256 store float %result0, float addrspace(1)* %out, align 4
anatofuz
parents:
diff changeset
257 ret void
anatofuz
parents:
diff changeset
258 }
anatofuz
parents:
diff changeset
259
anatofuz
parents:
diff changeset
260 ; SI-LABEL: {{^}}test_div_scale_f64_all_scalar_1:
anatofuz
parents:
diff changeset
261 ; SI-DAG: s_load_dwordx2 s{{\[}}[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]{{\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0x13
anatofuz
parents:
diff changeset
262 ; SI-DAG: s_load_dwordx2 [[B:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x1d
anatofuz
parents:
diff changeset
263 ; SI-DAG: v_mov_b32_e32 v[[VA_LO:[0-9]+]], s[[A_LO]]
anatofuz
parents:
diff changeset
264 ; SI-DAG: v_mov_b32_e32 v[[VA_HI:[0-9]+]], s[[A_HI]]
anatofuz
parents:
diff changeset
265 ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], v{{\[}}[[VA_LO]]:[[VA_HI]]{{\]}}
anatofuz
parents:
diff changeset
266 ; SI: buffer_store_dwordx2 [[RESULT0]]
anatofuz
parents:
diff changeset
267 ; SI: s_endpgm
anatofuz
parents:
diff changeset
268 define amdgpu_kernel void @test_div_scale_f64_all_scalar_1(double addrspace(1)* %out, [8 x i32], double %a, [8 x i32], double %b) nounwind {
anatofuz
parents:
diff changeset
269 %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
anatofuz
parents:
diff changeset
270 %result0 = extractvalue { double, i1 } %result, 0
anatofuz
parents:
diff changeset
271 store double %result0, double addrspace(1)* %out, align 8
anatofuz
parents:
diff changeset
272 ret void
anatofuz
parents:
diff changeset
273 }
anatofuz
parents:
diff changeset
274
anatofuz
parents:
diff changeset
275 ; SI-LABEL: {{^}}test_div_scale_f64_all_scalar_2:
anatofuz
parents:
diff changeset
276 ; SI-DAG: s_load_dwordx2 [[A:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x13
anatofuz
parents:
diff changeset
277 ; SI-DAG: s_load_dwordx2 s{{\[}}[[B_LO:[0-9]+]]:[[B_HI:[0-9]+]]{{\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0x1d
anatofuz
parents:
diff changeset
278 ; SI-DAG: v_mov_b32_e32 v[[VB_LO:[0-9]+]], s[[B_LO]]
anatofuz
parents:
diff changeset
279 ; SI-DAG: v_mov_b32_e32 v[[VB_HI:[0-9]+]], s[[B_HI]]
anatofuz
parents:
diff changeset
280 ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], v{{\[}}[[VB_LO]]:[[VB_HI]]{{\]}}, [[A]]
anatofuz
parents:
diff changeset
281 ; SI: buffer_store_dwordx2 [[RESULT0]]
anatofuz
parents:
diff changeset
282 ; SI: s_endpgm
anatofuz
parents:
diff changeset
283 define amdgpu_kernel void @test_div_scale_f64_all_scalar_2(double addrspace(1)* %out, [8 x i32], double %a, [8 x i32], double %b) nounwind {
anatofuz
parents:
diff changeset
284 %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
anatofuz
parents:
diff changeset
285 %result0 = extractvalue { double, i1 } %result, 0
anatofuz
parents:
diff changeset
286 store double %result0, double addrspace(1)* %out, align 8
anatofuz
parents:
diff changeset
287 ret void
anatofuz
parents:
diff changeset
288 }
anatofuz
parents:
diff changeset
289
anatofuz
parents:
diff changeset
290 ; SI-LABEL: {{^}}test_div_scale_f32_inline_imm_num:
anatofuz
parents:
diff changeset
291 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
anatofuz
parents:
diff changeset
292 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[A]], 1.0
anatofuz
parents:
diff changeset
293 ; SI: buffer_store_dword [[RESULT0]]
anatofuz
parents:
diff changeset
294 ; SI: s_endpgm
anatofuz
parents:
diff changeset
295 define amdgpu_kernel void @test_div_scale_f32_inline_imm_num(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
anatofuz
parents:
diff changeset
296 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
anatofuz
parents:
diff changeset
297 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
anatofuz
parents:
diff changeset
298 %a = load float, float addrspace(1)* %gep.0, align 4
anatofuz
parents:
diff changeset
299
anatofuz
parents:
diff changeset
300 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float 1.0, float %a, i1 false) nounwind readnone
anatofuz
parents:
diff changeset
301 %result0 = extractvalue { float, i1 } %result, 0
anatofuz
parents:
diff changeset
302 store float %result0, float addrspace(1)* %out, align 4
anatofuz
parents:
diff changeset
303 ret void
anatofuz
parents:
diff changeset
304 }
anatofuz
parents:
diff changeset
305
anatofuz
parents:
diff changeset
306 ; SI-LABEL: {{^}}test_div_scale_f32_inline_imm_den:
anatofuz
parents:
diff changeset
307 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
anatofuz
parents:
diff changeset
308 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], 2.0, 2.0, [[A]]
anatofuz
parents:
diff changeset
309 ; SI: buffer_store_dword [[RESULT0]]
anatofuz
parents:
diff changeset
310 ; SI: s_endpgm
anatofuz
parents:
diff changeset
311 define amdgpu_kernel void @test_div_scale_f32_inline_imm_den(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
anatofuz
parents:
diff changeset
312 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
anatofuz
parents:
diff changeset
313 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
anatofuz
parents:
diff changeset
314 %a = load float, float addrspace(1)* %gep.0, align 4
anatofuz
parents:
diff changeset
315
anatofuz
parents:
diff changeset
316 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float 2.0, i1 false) nounwind readnone
anatofuz
parents:
diff changeset
317 %result0 = extractvalue { float, i1 } %result, 0
anatofuz
parents:
diff changeset
318 store float %result0, float addrspace(1)* %out, align 4
anatofuz
parents:
diff changeset
319 ret void
anatofuz
parents:
diff changeset
320 }
anatofuz
parents:
diff changeset
321
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
322 ; SI-LABEL: {{^}}test_div_scale_f32_fneg_num:
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
323 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
324 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
325 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], -[[A]]
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
326 ; SI: buffer_store_dword [[RESULT0]]
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
327 ; SI: s_endpgm
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
328 define amdgpu_kernel void @test_div_scale_f32_fneg_num(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
329 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
330 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
331 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
332
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
333 %a = load volatile float, float addrspace(1)* %gep.0, align 4
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
334 %b = load volatile float, float addrspace(1)* %gep.1, align 4
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
335
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
336 %a.fneg = fneg float %a
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
337
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
338 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a.fneg, float %b, i1 false) nounwind readnone
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
339 %result0 = extractvalue { float, i1 } %result, 0
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
340 store float %result0, float addrspace(1)* %out, align 4
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
341 ret void
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
342 }
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
343
150
anatofuz
parents:
diff changeset
344 ; SI-LABEL: {{^}}test_div_scale_f32_fabs_num:
anatofuz
parents:
diff changeset
345 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
anatofuz
parents:
diff changeset
346 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
anatofuz
parents:
diff changeset
347 ; SI: v_and_b32_e32 [[ABS_A:v[0-9]+]], 0x7fffffff, [[A]]
anatofuz
parents:
diff changeset
348 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[ABS_A]]
anatofuz
parents:
diff changeset
349 ; SI: buffer_store_dword [[RESULT0]]
anatofuz
parents:
diff changeset
350 ; SI: s_endpgm
anatofuz
parents:
diff changeset
351 define amdgpu_kernel void @test_div_scale_f32_fabs_num(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
anatofuz
parents:
diff changeset
352 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
anatofuz
parents:
diff changeset
353 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
anatofuz
parents:
diff changeset
354 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
anatofuz
parents:
diff changeset
355
anatofuz
parents:
diff changeset
356 %a = load volatile float, float addrspace(1)* %gep.0, align 4
anatofuz
parents:
diff changeset
357 %b = load volatile float, float addrspace(1)* %gep.1, align 4
anatofuz
parents:
diff changeset
358
anatofuz
parents:
diff changeset
359 %a.fabs = call float @llvm.fabs.f32(float %a) nounwind readnone
anatofuz
parents:
diff changeset
360
anatofuz
parents:
diff changeset
361 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a.fabs, float %b, i1 false) nounwind readnone
anatofuz
parents:
diff changeset
362 %result0 = extractvalue { float, i1 } %result, 0
anatofuz
parents:
diff changeset
363 store float %result0, float addrspace(1)* %out, align 4
anatofuz
parents:
diff changeset
364 ret void
anatofuz
parents:
diff changeset
365 }
anatofuz
parents:
diff changeset
366
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
367 ; SI-LABEL: {{^}}test_div_scale_f32_fneg_den:
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
368 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
369 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
370 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], -[[B]], -[[B]], [[A]]
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
371 ; SI: buffer_store_dword [[RESULT0]]
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
372 ; SI: s_endpgm
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
373 define amdgpu_kernel void @test_div_scale_f32_fneg_den(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
374 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
375 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
376 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
377
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
378 %a = load volatile float, float addrspace(1)* %gep.0, align 4
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
379 %b = load volatile float, float addrspace(1)* %gep.1, align 4
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
380
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
381 %b.fneg = fneg float %b
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
382
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
383 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b.fneg, i1 false) nounwind readnone
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
384 %result0 = extractvalue { float, i1 } %result, 0
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
385 store float %result0, float addrspace(1)* %out, align 4
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
386 ret void
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
387 }
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
388
150
anatofuz
parents:
diff changeset
389 ; SI-LABEL: {{^}}test_div_scale_f32_fabs_den:
anatofuz
parents:
diff changeset
390 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
anatofuz
parents:
diff changeset
391 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
anatofuz
parents:
diff changeset
392 ; SI: v_and_b32_e32 [[ABS_B:v[0-9]+]], 0x7fffffff, [[B]]
anatofuz
parents:
diff changeset
393 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[ABS_B]], [[ABS_B]], [[A]]
anatofuz
parents:
diff changeset
394 ; SI: buffer_store_dword [[RESULT0]]
anatofuz
parents:
diff changeset
395 ; SI: s_endpgm
anatofuz
parents:
diff changeset
396 define amdgpu_kernel void @test_div_scale_f32_fabs_den(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
anatofuz
parents:
diff changeset
397 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
anatofuz
parents:
diff changeset
398 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
anatofuz
parents:
diff changeset
399 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
anatofuz
parents:
diff changeset
400
anatofuz
parents:
diff changeset
401 %a = load volatile float, float addrspace(1)* %gep.0, align 4
anatofuz
parents:
diff changeset
402 %b = load volatile float, float addrspace(1)* %gep.1, align 4
anatofuz
parents:
diff changeset
403
anatofuz
parents:
diff changeset
404 %b.fabs = call float @llvm.fabs.f32(float %b) nounwind readnone
anatofuz
parents:
diff changeset
405
anatofuz
parents:
diff changeset
406 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b.fabs, i1 false) nounwind readnone
anatofuz
parents:
diff changeset
407 %result0 = extractvalue { float, i1 } %result, 0
anatofuz
parents:
diff changeset
408 store float %result0, float addrspace(1)* %out, align 4
anatofuz
parents:
diff changeset
409 ret void
anatofuz
parents:
diff changeset
410 }
anatofuz
parents:
diff changeset
411
anatofuz
parents:
diff changeset
412 ; SI-LABEL: {{^}}test_div_scale_f32_val_undef_val:
anatofuz
parents:
diff changeset
413 ; SI: s_mov_b32 [[K:s[0-9]+]], 0x41000000
anatofuz
parents:
diff changeset
414 ; SI: v_div_scale_f32 v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, [[K]], v{{[0-9]+}}, [[K]]
anatofuz
parents:
diff changeset
415 define amdgpu_kernel void @test_div_scale_f32_val_undef_val(float addrspace(1)* %out) #0 {
anatofuz
parents:
diff changeset
416 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float 8.0, float undef, i1 false)
anatofuz
parents:
diff changeset
417 %result0 = extractvalue { float, i1 } %result, 0
anatofuz
parents:
diff changeset
418 store float %result0, float addrspace(1)* %out, align 4
anatofuz
parents:
diff changeset
419 ret void
anatofuz
parents:
diff changeset
420 }
anatofuz
parents:
diff changeset
421
anatofuz
parents:
diff changeset
422 ; SI-LABEL: {{^}}test_div_scale_f32_undef_val_val:
anatofuz
parents:
diff changeset
423 ; SI: s_mov_b32 [[K:s[0-9]+]], 0x41000000
anatofuz
parents:
diff changeset
424 ; SI: v_div_scale_f32 v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[K]], v{{[0-9]+}}
anatofuz
parents:
diff changeset
425 define amdgpu_kernel void @test_div_scale_f32_undef_val_val(float addrspace(1)* %out) #0 {
anatofuz
parents:
diff changeset
426 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float undef, float 8.0, i1 false)
anatofuz
parents:
diff changeset
427 %result0 = extractvalue { float, i1 } %result, 0
anatofuz
parents:
diff changeset
428 store float %result0, float addrspace(1)* %out, align 4
anatofuz
parents:
diff changeset
429 ret void
anatofuz
parents:
diff changeset
430 }
anatofuz
parents:
diff changeset
431
anatofuz
parents:
diff changeset
432 ; SI-LABEL: {{^}}test_div_scale_f32_undef_undef_val:
anatofuz
parents:
diff changeset
433 ; SI-NOT: v0
anatofuz
parents:
diff changeset
434 ; SI: v_div_scale_f32 v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s0, s0, v0
anatofuz
parents:
diff changeset
435 define amdgpu_kernel void @test_div_scale_f32_undef_undef_val(float addrspace(1)* %out) #0 {
anatofuz
parents:
diff changeset
436 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float undef, float undef, i1 false)
anatofuz
parents:
diff changeset
437 %result0 = extractvalue { float, i1 } %result, 0
anatofuz
parents:
diff changeset
438 store float %result0, float addrspace(1)* %out, align 4
anatofuz
parents:
diff changeset
439 ret void
anatofuz
parents:
diff changeset
440 }
anatofuz
parents:
diff changeset
441
anatofuz
parents:
diff changeset
442 ; SI-LABEL: {{^}}test_div_scale_f64_val_undef_val:
anatofuz
parents:
diff changeset
443 ; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0{{$}}
anatofuz
parents:
diff changeset
444 ; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0x40200000
anatofuz
parents:
diff changeset
445 ; SI: v_div_scale_f64 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}, v[0:1], s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}
anatofuz
parents:
diff changeset
446 define amdgpu_kernel void @test_div_scale_f64_val_undef_val(double addrspace(1)* %out) #0 {
anatofuz
parents:
diff changeset
447 %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double 8.0, double undef, i1 false)
anatofuz
parents:
diff changeset
448 %result0 = extractvalue { double, i1 } %result, 0
anatofuz
parents:
diff changeset
449 store double %result0, double addrspace(1)* %out, align 8
anatofuz
parents:
diff changeset
450 ret void
anatofuz
parents:
diff changeset
451 }
anatofuz
parents:
diff changeset
452
anatofuz
parents:
diff changeset
453 attributes #0 = { nounwind }
anatofuz
parents:
diff changeset
454 attributes #1 = { nounwind readnone speculatable }