150
|
1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
|
|
2 ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
|
|
3
|
|
4 declare i64 @llvm.amdgcn.fcmp.f32(float, float, i32) #0
|
|
5 declare i64 @llvm.amdgcn.fcmp.f64(double, double, i32) #0
|
|
6 declare float @llvm.fabs.f32(float) #0
|
|
7
|
|
8 declare i64 @llvm.amdgcn.fcmp.f16(half, half, i32) #0
|
|
9 declare half @llvm.fabs.f16(half) #0
|
|
10
|
|
11 ; GCN-LABEL: {{^}}v_fcmp_f32_oeq_with_fabs:
|
|
12 ; GCN: v_cmp_eq_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}}, |{{v[0-9]+}}|
|
|
13 define amdgpu_kernel void @v_fcmp_f32_oeq_with_fabs(i64 addrspace(1)* %out, float %src, float %a) {
|
|
14 %temp = call float @llvm.fabs.f32(float %a)
|
|
15 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float %temp, i32 1)
|
|
16 store i64 %result, i64 addrspace(1)* %out
|
|
17 ret void
|
|
18 }
|
|
19
|
|
20 ; GCN-LABEL: {{^}}v_fcmp_f32_oeq_both_operands_with_fabs:
|
|
21 ; GCN: v_cmp_eq_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, |{{s[0-9]+}}|, |{{v[0-9]+}}|
|
|
22 define amdgpu_kernel void @v_fcmp_f32_oeq_both_operands_with_fabs(i64 addrspace(1)* %out, float %src, float %a) {
|
|
23 %temp = call float @llvm.fabs.f32(float %a)
|
|
24 %src_input = call float @llvm.fabs.f32(float %src)
|
|
25 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src_input, float %temp, i32 1)
|
|
26 store i64 %result, i64 addrspace(1)* %out
|
|
27 ret void
|
|
28 }
|
|
29
|
|
30 ; GCN-LABEL: {{^}}v_fcmp_f32:
|
|
31 ; GCN-NOT: v_cmp_eq_f32_e64
|
|
32 define amdgpu_kernel void @v_fcmp_f32(i64 addrspace(1)* %out, float %src) {
|
|
33 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 -1)
|
|
34 store i64 %result, i64 addrspace(1)* %out
|
|
35 ret void
|
|
36 }
|
|
37
|
|
38 ; GCN-LABEL: {{^}}v_fcmp_f32_oeq:
|
|
39 ; GCN: v_cmp_eq_f32_e64
|
|
40 define amdgpu_kernel void @v_fcmp_f32_oeq(i64 addrspace(1)* %out, float %src) {
|
|
41 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 1)
|
|
42 store i64 %result, i64 addrspace(1)* %out
|
|
43 ret void
|
|
44 }
|
|
45
|
|
46 ; GCN-LABEL: {{^}}v_fcmp_f32_one:
|
|
47 ; GCN: v_cmp_neq_f32_e64
|
|
48 define amdgpu_kernel void @v_fcmp_f32_one(i64 addrspace(1)* %out, float %src) {
|
|
49 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 6)
|
|
50 store i64 %result, i64 addrspace(1)* %out
|
|
51 ret void
|
|
52 }
|
|
53
|
|
54 ; GCN-LABEL: {{^}}v_fcmp_f32_ogt:
|
|
55 ; GCN: v_cmp_gt_f32_e64
|
|
56 define amdgpu_kernel void @v_fcmp_f32_ogt(i64 addrspace(1)* %out, float %src) {
|
|
57 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 2)
|
|
58 store i64 %result, i64 addrspace(1)* %out
|
|
59 ret void
|
|
60 }
|
|
61
|
|
62 ; GCN-LABEL: {{^}}v_fcmp_f32_oge:
|
|
63 ; GCN: v_cmp_ge_f32_e64
|
|
64 define amdgpu_kernel void @v_fcmp_f32_oge(i64 addrspace(1)* %out, float %src) {
|
|
65 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 3)
|
|
66 store i64 %result, i64 addrspace(1)* %out
|
|
67 ret void
|
|
68 }
|
|
69
|
|
70 ; GCN-LABEL: {{^}}v_fcmp_f32_olt:
|
|
71 ; GCN: v_cmp_lt_f32_e64
|
|
72 define amdgpu_kernel void @v_fcmp_f32_olt(i64 addrspace(1)* %out, float %src) {
|
|
73 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 4)
|
|
74 store i64 %result, i64 addrspace(1)* %out
|
|
75 ret void
|
|
76 }
|
|
77
|
|
78 ; GCN-LABEL: {{^}}v_fcmp_f32_ole:
|
|
79 ; GCN: v_cmp_le_f32_e64
|
|
80 define amdgpu_kernel void @v_fcmp_f32_ole(i64 addrspace(1)* %out, float %src) {
|
|
81 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 5)
|
|
82 store i64 %result, i64 addrspace(1)* %out
|
|
83 ret void
|
|
84 }
|
|
85
|
|
86
|
|
87 ; GCN-LABEL: {{^}}v_fcmp_f32_ueq:
|
|
88 ; GCN: v_cmp_nlg_f32_e64
|
|
89 define amdgpu_kernel void @v_fcmp_f32_ueq(i64 addrspace(1)* %out, float %src) {
|
|
90 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 9)
|
|
91 store i64 %result, i64 addrspace(1)* %out
|
|
92 ret void
|
|
93 }
|
|
94
|
|
95 ; GCN-LABEL: {{^}}v_fcmp_f32_une:
|
|
96 ; GCN: v_cmp_neq_f32_e64
|
|
97 define amdgpu_kernel void @v_fcmp_f32_une(i64 addrspace(1)* %out, float %src) {
|
|
98 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 14)
|
|
99 store i64 %result, i64 addrspace(1)* %out
|
|
100 ret void
|
|
101 }
|
|
102
|
|
103 ; GCN-LABEL: {{^}}v_fcmp_f32_ugt:
|
|
104 ; GCN: v_cmp_nle_f32_e64
|
|
105 define amdgpu_kernel void @v_fcmp_f32_ugt(i64 addrspace(1)* %out, float %src) {
|
|
106 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 10)
|
|
107 store i64 %result, i64 addrspace(1)* %out
|
|
108 ret void
|
|
109 }
|
|
110
|
|
111 ; GCN-LABEL: {{^}}v_fcmp_f32_uge:
|
|
112 ; GCN: v_cmp_nlt_f32_e64
|
|
113 define amdgpu_kernel void @v_fcmp_f32_uge(i64 addrspace(1)* %out, float %src) {
|
|
114 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 11)
|
|
115 store i64 %result, i64 addrspace(1)* %out
|
|
116 ret void
|
|
117 }
|
|
118
|
|
119 ; GCN-LABEL: {{^}}v_fcmp_f32_ult:
|
|
120 ; GCN: v_cmp_nge_f32_e64
|
|
121 define amdgpu_kernel void @v_fcmp_f32_ult(i64 addrspace(1)* %out, float %src) {
|
|
122 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 12)
|
|
123 store i64 %result, i64 addrspace(1)* %out
|
|
124 ret void
|
|
125 }
|
|
126
|
|
127 ; GCN-LABEL: {{^}}v_fcmp_f32_ule:
|
|
128 ; GCN: v_cmp_ngt_f32_e64
|
|
129 define amdgpu_kernel void @v_fcmp_f32_ule(i64 addrspace(1)* %out, float %src) {
|
|
130 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 13)
|
|
131 store i64 %result, i64 addrspace(1)* %out
|
|
132 ret void
|
|
133 }
|
|
134
|
|
135 ; GCN-LABEL: {{^}}v_fcmp_f64_oeq:
|
|
136 ; GCN: v_cmp_eq_f64_e64
|
|
137 define amdgpu_kernel void @v_fcmp_f64_oeq(i64 addrspace(1)* %out, double %src) {
|
|
138 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 1)
|
|
139 store i64 %result, i64 addrspace(1)* %out
|
|
140 ret void
|
|
141 }
|
|
142
|
|
143 ; GCN-LABEL: {{^}}v_fcmp_f64_one:
|
|
144 ; GCN: v_cmp_neq_f64_e64
|
|
145 define amdgpu_kernel void @v_fcmp_f64_one(i64 addrspace(1)* %out, double %src) {
|
|
146 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 6)
|
|
147 store i64 %result, i64 addrspace(1)* %out
|
|
148 ret void
|
|
149 }
|
|
150
|
|
151 ; GCN-LABEL: {{^}}v_fcmp_f64_ogt:
|
|
152 ; GCN: v_cmp_gt_f64_e64
|
|
153 define amdgpu_kernel void @v_fcmp_f64_ogt(i64 addrspace(1)* %out, double %src) {
|
|
154 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 2)
|
|
155 store i64 %result, i64 addrspace(1)* %out
|
|
156 ret void
|
|
157 }
|
|
158
|
|
159 ; GCN-LABEL: {{^}}v_fcmp_f64_oge:
|
|
160 ; GCN: v_cmp_ge_f64_e64
|
|
161 define amdgpu_kernel void @v_fcmp_f64_oge(i64 addrspace(1)* %out, double %src) {
|
|
162 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 3)
|
|
163 store i64 %result, i64 addrspace(1)* %out
|
|
164 ret void
|
|
165 }
|
|
166
|
|
167 ; GCN-LABEL: {{^}}v_fcmp_f64_olt:
|
|
168 ; GCN: v_cmp_lt_f64_e64
|
|
169 define amdgpu_kernel void @v_fcmp_f64_olt(i64 addrspace(1)* %out, double %src) {
|
|
170 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 4)
|
|
171 store i64 %result, i64 addrspace(1)* %out
|
|
172 ret void
|
|
173 }
|
|
174
|
|
175 ; GCN-LABEL: {{^}}v_fcmp_f64_ole:
|
|
176 ; GCN: v_cmp_le_f64_e64
|
|
177 define amdgpu_kernel void @v_fcmp_f64_ole(i64 addrspace(1)* %out, double %src) {
|
|
178 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 5)
|
|
179 store i64 %result, i64 addrspace(1)* %out
|
|
180 ret void
|
|
181 }
|
|
182
|
|
183 ; GCN-LABEL: {{^}}v_fcmp_f64_ueq:
|
|
184 ; GCN: v_cmp_nlg_f64_e64
|
|
185 define amdgpu_kernel void @v_fcmp_f64_ueq(i64 addrspace(1)* %out, double %src) {
|
|
186 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 9)
|
|
187 store i64 %result, i64 addrspace(1)* %out
|
|
188 ret void
|
|
189 }
|
|
190
|
|
191 ; GCN-LABEL: {{^}}v_fcmp_f64_une:
|
|
192 ; GCN: v_cmp_neq_f64_e64
|
|
193 define amdgpu_kernel void @v_fcmp_f64_une(i64 addrspace(1)* %out, double %src) {
|
|
194 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 14)
|
|
195 store i64 %result, i64 addrspace(1)* %out
|
|
196 ret void
|
|
197 }
|
|
198
|
|
199 ; GCN-LABEL: {{^}}v_fcmp_f64_ugt:
|
|
200 ; GCN: v_cmp_nle_f64_e64
|
|
201 define amdgpu_kernel void @v_fcmp_f64_ugt(i64 addrspace(1)* %out, double %src) {
|
|
202 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 10)
|
|
203 store i64 %result, i64 addrspace(1)* %out
|
|
204 ret void
|
|
205 }
|
|
206
|
|
207 ; GCN-LABEL: {{^}}v_fcmp_f64_uge:
|
|
208 ; GCN: v_cmp_nlt_f64_e64
|
|
209 define amdgpu_kernel void @v_fcmp_f64_uge(i64 addrspace(1)* %out, double %src) {
|
|
210 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 11)
|
|
211 store i64 %result, i64 addrspace(1)* %out
|
|
212 ret void
|
|
213 }
|
|
214
|
|
215 ; GCN-LABEL: {{^}}v_fcmp_f64_ult:
|
|
216 ; GCN: v_cmp_nge_f64_e64
|
|
217 define amdgpu_kernel void @v_fcmp_f64_ult(i64 addrspace(1)* %out, double %src) {
|
|
218 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 12)
|
|
219 store i64 %result, i64 addrspace(1)* %out
|
|
220 ret void
|
|
221 }
|
|
222
|
|
223 ; GCN-LABEL: {{^}}v_fcmp_f64_ule:
|
|
224 ; GCN: v_cmp_ngt_f64_e64
|
|
225 define amdgpu_kernel void @v_fcmp_f64_ule(i64 addrspace(1)* %out, double %src) {
|
|
226 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 13)
|
|
227 store i64 %result, i64 addrspace(1)* %out
|
|
228 ret void
|
|
229 }
|
|
230
|
|
231 ; GCN-LABEL: {{^}}v_fcmp_f16_oeq_with_fabs:
|
|
232 ; VI: v_cmp_eq_f16_e64 {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}}, |{{v[0-9]+}}|
|
|
233
|
|
234 ; SI: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], s{{[0-9]+}}
|
|
235 ; SI: v_cvt_f32_f16_e64 [[CVT1:v[0-9]+]], |s{{[0-9]+}}|
|
|
236 ; SI: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[CVT0]], [[CVT1]]
|
|
237 define amdgpu_kernel void @v_fcmp_f16_oeq_with_fabs(i64 addrspace(1)* %out, half %src, half %a) {
|
|
238 %temp = call half @llvm.fabs.f16(half %a)
|
|
239 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half %temp, i32 1)
|
|
240 store i64 %result, i64 addrspace(1)* %out
|
|
241 ret void
|
|
242 }
|
|
243
|
|
244 ; GCN-LABEL: {{^}}v_fcmp_f16_oeq_both_operands_with_fabs:
|
|
245 ; VI: v_cmp_eq_f16_e64 {{s\[[0-9]+:[0-9]+\]}}, |{{s[0-9]+}}|, |{{v[0-9]+}}|
|
|
246
|
|
247 ; SI: v_cvt_f32_f16_e64 [[CVT0:v[0-9]+]], |s{{[0-9]+}}|
|
|
248 ; SI: v_cvt_f32_f16_e64 [[CVT1:v[0-9]+]], |s{{[0-9]+}}|
|
|
249 ; SI: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[CVT0]], [[CVT1]]
|
|
250 define amdgpu_kernel void @v_fcmp_f16_oeq_both_operands_with_fabs(i64 addrspace(1)* %out, half %src, half %a) {
|
|
251 %temp = call half @llvm.fabs.f16(half %a)
|
|
252 %src_input = call half @llvm.fabs.f16(half %src)
|
|
253 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src_input, half %temp, i32 1)
|
|
254 store i64 %result, i64 addrspace(1)* %out
|
|
255 ret void
|
|
256 }
|
|
257
|
|
258 ; GCN-LABEL: {{^}}v_fcmp_f16:
|
|
259 ; GCN-NOT: v_cmp_eq_
|
|
260 define amdgpu_kernel void @v_fcmp_f16(i64 addrspace(1)* %out, half %src) {
|
|
261 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 -1)
|
|
262 store i64 %result, i64 addrspace(1)* %out
|
|
263 ret void
|
|
264 }
|
|
265
|
|
266 ; GCN-LABEL: {{^}}v_fcmp_f16_oeq:
|
|
267 ; VI: v_cmp_eq_f16_e64
|
|
268
|
|
269 ; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000
|
|
270 ; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}}
|
|
271 ; SI: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]]
|
|
272 define amdgpu_kernel void @v_fcmp_f16_oeq(i64 addrspace(1)* %out, half %src) {
|
|
273 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 1)
|
|
274 store i64 %result, i64 addrspace(1)* %out
|
|
275 ret void
|
|
276 }
|
|
277
|
|
278 ; GCN-LABEL: {{^}}v_fcmp_f16_one:
|
|
279 ; VI: v_cmp_neq_f16_e64
|
|
280
|
|
281 ; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000
|
|
282 ; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}}
|
|
283 ; SI: v_cmp_neq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]]
|
|
284 define amdgpu_kernel void @v_fcmp_f16_one(i64 addrspace(1)* %out, half %src) {
|
|
285 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 6)
|
|
286 store i64 %result, i64 addrspace(1)* %out
|
|
287 ret void
|
|
288 }
|
|
289
|
|
290 ; GCN-LABEL: {{^}}v_fcmp_f16_ogt:
|
|
291 ; VI: v_cmp_gt_f16_e64
|
|
292
|
|
293 ; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000
|
|
294 ; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}}
|
|
295 ; SI: v_cmp_lt_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]]
|
|
296 define amdgpu_kernel void @v_fcmp_f16_ogt(i64 addrspace(1)* %out, half %src) {
|
|
297 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 2)
|
|
298 store i64 %result, i64 addrspace(1)* %out
|
|
299 ret void
|
|
300 }
|
|
301
|
|
302 ; GCN-LABEL: {{^}}v_fcmp_f16_oge:
|
|
303 ; VI: v_cmp_ge_f16_e64
|
|
304
|
|
305 ; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000
|
|
306 ; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}}
|
|
307 ; SI: v_cmp_le_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]]
|
|
308 define amdgpu_kernel void @v_fcmp_f16_oge(i64 addrspace(1)* %out, half %src) {
|
|
309 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 3)
|
|
310 store i64 %result, i64 addrspace(1)* %out
|
|
311 ret void
|
|
312 }
|
|
313
|
|
314 ; GCN-LABEL: {{^}}v_fcmp_f16_olt:
|
|
315 ; VI: v_cmp_lt_f16_e64
|
|
316
|
|
317 ; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000
|
|
318 ; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}}
|
|
319 ; SI: v_cmp_gt_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]]
|
|
320 define amdgpu_kernel void @v_fcmp_f16_olt(i64 addrspace(1)* %out, half %src) {
|
|
321 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 4)
|
|
322 store i64 %result, i64 addrspace(1)* %out
|
|
323 ret void
|
|
324 }
|
|
325
|
|
326 ; GCN-LABEL: {{^}}v_fcmp_f16_ole:
|
|
327 ; VI: v_cmp_le_f16_e64
|
|
328
|
|
329 ; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000
|
|
330 ; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}}
|
|
331 ; SI: v_cmp_ge_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]]
|
|
332 define amdgpu_kernel void @v_fcmp_f16_ole(i64 addrspace(1)* %out, half %src) {
|
|
333 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 5)
|
|
334 store i64 %result, i64 addrspace(1)* %out
|
|
335 ret void
|
|
336 }
|
|
337
|
|
338 ; GCN-LABEL: {{^}}v_fcmp_f16_ueq:
|
|
339 ; VI: v_cmp_nlg_f16_e64
|
|
340
|
|
341 ; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000
|
|
342 ; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}}
|
|
343 ; SI: v_cmp_nlg_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]]
|
|
344 define amdgpu_kernel void @v_fcmp_f16_ueq(i64 addrspace(1)* %out, half %src) {
|
|
345 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 9)
|
|
346 store i64 %result, i64 addrspace(1)* %out
|
|
347 ret void
|
|
348 }
|
|
349
|
|
350 ; GCN-LABEL: {{^}}v_fcmp_f16_une:
|
|
351 ; VI: v_cmp_neq_f16_e64
|
|
352
|
|
353 ; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000
|
|
354 ; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}}
|
|
355 ; SI: v_cmp_neq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]]
|
|
356 define amdgpu_kernel void @v_fcmp_f16_une(i64 addrspace(1)* %out, half %src) {
|
|
357 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 14)
|
|
358 store i64 %result, i64 addrspace(1)* %out
|
|
359 ret void
|
|
360 }
|
|
361
|
|
362 ; GCN-LABEL: {{^}}v_fcmp_f16_ugt:
|
|
363 ; VI: v_cmp_nle_f16_e64
|
|
364
|
|
365 ; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000
|
|
366 ; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}}
|
|
367 ; SI: v_cmp_nge_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]]
|
|
368 define amdgpu_kernel void @v_fcmp_f16_ugt(i64 addrspace(1)* %out, half %src) {
|
|
369 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 10)
|
|
370 store i64 %result, i64 addrspace(1)* %out
|
|
371 ret void
|
|
372 }
|
|
373
|
|
374 ; GCN-LABEL: {{^}}v_fcmp_f16_uge:
|
|
375 ; VI: v_cmp_nlt_f16_e64
|
|
376
|
|
377 ; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000
|
|
378 ; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}}
|
|
379 ; SI: v_cmp_ngt_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]]
|
|
380 define amdgpu_kernel void @v_fcmp_f16_uge(i64 addrspace(1)* %out, half %src) {
|
|
381 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 11)
|
|
382 store i64 %result, i64 addrspace(1)* %out
|
|
383 ret void
|
|
384 }
|
|
385
|
|
386 ; GCN-LABEL: {{^}}v_fcmp_f16_ult:
|
|
387 ; VI: v_cmp_nge_f16_e64
|
|
388
|
|
389 ; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000
|
|
390 ; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}}
|
|
391 ; SI: v_cmp_nle_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]]
|
|
392 define amdgpu_kernel void @v_fcmp_f16_ult(i64 addrspace(1)* %out, half %src) {
|
|
393 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 12)
|
|
394 store i64 %result, i64 addrspace(1)* %out
|
|
395 ret void
|
|
396 }
|
|
397
|
|
398 ; GCN-LABEL: {{^}}v_fcmp_f16_ule:
|
|
399 ; VI: v_cmp_ngt_f16_e64
|
|
400
|
|
401 ; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000
|
|
402 ; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}}
|
|
403 ; SI: v_cmp_nlt_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]]
|
|
404 define amdgpu_kernel void @v_fcmp_f16_ule(i64 addrspace(1)* %out, half %src) {
|
|
405 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 13)
|
|
406 store i64 %result, i64 addrspace(1)* %out
|
|
407 ret void
|
|
408 }
|
|
409
|
|
410 attributes #0 = { nounwind readnone convergent }
|