150
|
1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN-SAFE,SI-SAFE,GCN,FUNC %s
|
|
2 ; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI-NONAN,GCN-NONAN,GCN,FUNC %s
|
|
3
|
|
4 ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI-SAFE,GCN-SAFE,GCN,FUNC %s
|
|
5 ; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI-NONAN,GCN-NONAN,GCN,FUNC %s
|
|
6
|
|
7 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s
|
|
8
|
|
9 declare i32 @llvm.amdgcn.workitem.id.x() #1
|
|
10
|
|
11 ; The two inputs to the instruction are different SGPRs from the same
|
|
12 ; super register, so we can't fold both SGPR operands even though they
|
|
13 ; are both the same register.
|
|
14
|
|
15 ; FUNC-LABEL: {{^}}s_test_fmin_legacy_subreg_inputs_f32:
|
|
16 ; EG: MIN *
|
|
17 ; SI-SAFE: v_min_legacy_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
|
|
18
|
|
19 ; SI-NONAN: v_min_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
|
|
20
|
|
21 ; VI-SAFE: v_cmp_nlt_f32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
|
|
22
|
|
23 ; VI-NONAN: v_min_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
|
|
24 define amdgpu_kernel void @s_test_fmin_legacy_subreg_inputs_f32(float addrspace(1)* %out, <4 x float> %reg0) #0 {
|
|
25 %r0 = extractelement <4 x float> %reg0, i32 0
|
|
26 %r1 = extractelement <4 x float> %reg0, i32 1
|
|
27 %r2 = fcmp uge float %r0, %r1
|
|
28 %r3 = select i1 %r2, float %r1, float %r0
|
|
29 store float %r3, float addrspace(1)* %out
|
|
30 ret void
|
|
31 }
|
|
32
|
|
33 ; FUNC-LABEL: {{^}}s_test_fmin_legacy_ule_f32:
|
|
34 ; GCN-DAG: s_load_dwordx2 s{{\[}}[[A:[0-9]+]]:[[B:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
|
|
35
|
|
36 ; SI-SAFE: v_mov_b32_e32 [[VA:v[0-9]+]], s[[A]]
|
|
37
|
|
38 ; GCN-NONAN: v_mov_b32_e32 [[VB:v[0-9]+]], s[[B]]
|
|
39
|
|
40 ; VI-SAFE: v_mov_b32_e32 [[VB:v[0-9]+]], s[[B]]
|
|
41
|
|
42 ; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, s[[B]], [[VA]]
|
|
43
|
|
44 ; VI-SAFE: v_mov_b32_e32 [[VA:v[0-9]+]], s[[A]]
|
|
45 ; VI-SAFE: v_cmp_ngt_f32_e32 vcc, s[[A]], [[VB]]
|
|
46 ; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[VB]], [[VA]]
|
|
47
|
|
48 ; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, s[[A]], [[VB]]
|
|
49 define amdgpu_kernel void @s_test_fmin_legacy_ule_f32(float addrspace(1)* %out, float %a, float %b) #0 {
|
|
50 %cmp = fcmp ule float %a, %b
|
|
51 %val = select i1 %cmp, float %a, float %b
|
|
52 store float %val, float addrspace(1)* %out, align 4
|
|
53 ret void
|
|
54 }
|
|
55
|
|
56 ; Nsz also needed
|
|
57 ; FIXME: Should separate tests
|
|
58 ; GCN-LABEL: {{^}}s_test_fmin_legacy_ule_f32_nnan_src:
|
|
59 ; GCN: s_load_dwordx2 s{{\[}}[[A:[0-9]+]]:[[B:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
|
|
60
|
|
61 ; GCN-DAG: v_add_f32_e64 [[ADD_A:v[0-9]+]], s[[A]], 1.0
|
|
62 ; GCN-DAG: v_add_f32_e64 [[ADD_B:v[0-9]+]], s[[B]], 2.0
|
|
63
|
|
64 ; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[ADD_B]], [[ADD_A]]
|
|
65
|
|
66 ; VI-SAFE: v_cmp_ngt_f32_e32 vcc, [[ADD_A]], [[ADD_B]]
|
|
67 ; VI-SAFE: v_cndmask_b32_e32 {{v[0-9]+}}, [[ADD_B]], [[ADD_A]], vcc
|
|
68
|
|
69 ; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[ADD_A]], [[ADD_B]]
|
|
70 define amdgpu_kernel void @s_test_fmin_legacy_ule_f32_nnan_src(float addrspace(1)* %out, float %a, float %b) #0 {
|
|
71 %a.nnan = fadd nnan float %a, 1.0
|
|
72 %b.nnan = fadd nnan float %b, 2.0
|
|
73 %cmp = fcmp ule float %a.nnan, %b.nnan
|
|
74 %val = select i1 %cmp, float %a.nnan, float %b.nnan
|
|
75 store float %val, float addrspace(1)* %out, align 4
|
|
76 ret void
|
|
77 }
|
|
78
|
|
79 ; FUNC-LABEL: {{^}}test_fmin_legacy_ule_f32:
|
|
80 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
81 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
82
|
|
83 ; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
|
|
84
|
|
85 ; VI-SAFE: v_cmp_ngt_f32_e32 vcc, [[A]], [[B]]
|
|
86 ; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
|
|
87
|
|
88 ; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
|
|
89 define amdgpu_kernel void @test_fmin_legacy_ule_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
|
90 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
|
91 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
|
92 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
|
93
|
|
94 %a = load volatile float, float addrspace(1)* %gep.0, align 4
|
|
95 %b = load volatile float, float addrspace(1)* %gep.1, align 4
|
|
96
|
|
97 %cmp = fcmp ule float %a, %b
|
|
98 %val = select i1 %cmp, float %a, float %b
|
|
99 store float %val, float addrspace(1)* %out, align 4
|
|
100 ret void
|
|
101 }
|
|
102
|
|
103 ; FUNC-LABEL: {{^}}test_fmin_legacy_ole_f32:
|
|
104 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
105 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
106
|
|
107 ; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
|
|
108
|
173
|
109 ; VI-SAFE: v_cmp_le_f32_e32 vcc, [[A]], [[B]]
|
150
|
110 ; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
|
|
111
|
|
112 ; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
|
|
113 define amdgpu_kernel void @test_fmin_legacy_ole_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
|
114 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
|
115 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
|
116 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
|
117
|
|
118 %a = load volatile float, float addrspace(1)* %gep.0, align 4
|
|
119 %b = load volatile float, float addrspace(1)* %gep.1, align 4
|
|
120
|
|
121 %cmp = fcmp ole float %a, %b
|
|
122 %val = select i1 %cmp, float %a, float %b
|
|
123 store float %val, float addrspace(1)* %out, align 4
|
|
124 ret void
|
|
125 }
|
|
126
|
|
127 ; FUNC-LABEL: {{^}}test_fmin_legacy_olt_f32:
|
|
128 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
129 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
130
|
|
131 ; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
|
|
132
|
173
|
133 ; VI-SAFE: v_cmp_lt_f32_e32 vcc, [[A]], [[B]]
|
150
|
134 ; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
|
|
135
|
|
136 ; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
|
|
137 define amdgpu_kernel void @test_fmin_legacy_olt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
|
138 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
|
139 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
|
140 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
|
141
|
|
142 %a = load volatile float, float addrspace(1)* %gep.0, align 4
|
|
143 %b = load volatile float, float addrspace(1)* %gep.1, align 4
|
|
144
|
|
145 %cmp = fcmp olt float %a, %b
|
|
146 %val = select i1 %cmp, float %a, float %b
|
|
147 store float %val, float addrspace(1)* %out, align 4
|
|
148 ret void
|
|
149 }
|
|
150
|
|
151 ; FUNC-LABEL: {{^}}test_fmin_legacy_ult_f32:
|
|
152 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
153 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
154
|
|
155 ; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
|
|
156
|
173
|
157 ; VI-SAFE: v_cmp_nge_f32_e32 vcc, [[A]], [[B]]
|
150
|
158 ; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
|
|
159
|
|
160 ; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
|
|
161 define amdgpu_kernel void @test_fmin_legacy_ult_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
|
162 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
|
163 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
|
164 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
|
165
|
|
166 %a = load volatile float, float addrspace(1)* %gep.0, align 4
|
|
167 %b = load volatile float, float addrspace(1)* %gep.1, align 4
|
|
168
|
|
169 %cmp = fcmp ult float %a, %b
|
|
170 %val = select i1 %cmp, float %a, float %b
|
|
171 store float %val, float addrspace(1)* %out, align 4
|
|
172 ret void
|
|
173 }
|
|
174
|
|
175 ; FUNC-LABEL: {{^}}test_fmin_legacy_ult_v1f32:
|
|
176 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
177 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
178
|
|
179 ; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
|
|
180
|
173
|
181 ; VI-SAFE: v_cmp_nge_f32_e32 vcc, [[A]], [[B]]
|
150
|
182 ; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
|
|
183
|
|
184 ; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
|
|
185 define amdgpu_kernel void @test_fmin_legacy_ult_v1f32(<1 x float> addrspace(1)* %out, <1 x float> addrspace(1)* %in) #0 {
|
|
186 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
|
187 %gep.0 = getelementptr <1 x float>, <1 x float> addrspace(1)* %in, i32 %tid
|
|
188 %gep.1 = getelementptr <1 x float>, <1 x float> addrspace(1)* %gep.0, i32 1
|
|
189
|
|
190 %a = load <1 x float>, <1 x float> addrspace(1)* %gep.0
|
|
191 %b = load <1 x float>, <1 x float> addrspace(1)* %gep.1
|
|
192
|
|
193 %cmp = fcmp ult <1 x float> %a, %b
|
|
194 %val = select <1 x i1> %cmp, <1 x float> %a, <1 x float> %b
|
|
195 store <1 x float> %val, <1 x float> addrspace(1)* %out
|
|
196 ret void
|
|
197 }
|
|
198
|
|
199 ; FUNC-LABEL: {{^}}test_fmin_legacy_ult_v2f32:
|
|
200 ; GCN: {{buffer|flat}}_load_dwordx2
|
|
201 ; GCN: {{buffer|flat}}_load_dwordx2
|
|
202 ; SI-SAFE: v_min_legacy_f32_e32
|
|
203 ; SI-SAFE: v_min_legacy_f32_e32
|
|
204
|
173
|
205 ; VI-SAFE: v_cmp_nge_f32_e32
|
150
|
206 ; VI-SAFE: v_cndmask_b32_e32
|
173
|
207 ; VI-SAFE: v_cmp_nge_f32_e32
|
150
|
208 ; VI-SAFE: v_cndmask_b32_e32
|
|
209
|
|
210 ; GCN-NONAN: v_min_f32_e32
|
|
211 ; GCN-NONAN: v_min_f32_e32
|
|
212 define amdgpu_kernel void @test_fmin_legacy_ult_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) #0 {
|
|
213 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
|
214 %gep.0 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in, i32 %tid
|
|
215 %gep.1 = getelementptr <2 x float>, <2 x float> addrspace(1)* %gep.0, i32 1
|
|
216
|
|
217 %a = load <2 x float>, <2 x float> addrspace(1)* %gep.0
|
|
218 %b = load <2 x float>, <2 x float> addrspace(1)* %gep.1
|
|
219
|
|
220 %cmp = fcmp ult <2 x float> %a, %b
|
|
221 %val = select <2 x i1> %cmp, <2 x float> %a, <2 x float> %b
|
|
222 store <2 x float> %val, <2 x float> addrspace(1)* %out
|
|
223 ret void
|
|
224 }
|
|
225
|
|
226 ; FUNC-LABEL: {{^}}test_fmin_legacy_ult_v3f32:
|
|
227 ; SI-SAFE: v_min_legacy_f32_e32
|
|
228 ; SI-SAFE: v_min_legacy_f32_e32
|
|
229 ; SI-SAFE: v_min_legacy_f32_e32
|
|
230 ; SI-SAFE-NOT: v_min_
|
|
231
|
|
232 ; VI-SAFE: v_cmp_nge_f32_e32
|
|
233 ; VI-SAFE: v_cndmask_b32_e32
|
|
234 ; VI-SAFE: v_cmp_nge_f32_e32
|
|
235 ; VI-SAFE: v_cndmask_b32_e32
|
|
236 ; VI-SAFE: v_cmp_nge_f32_e32
|
|
237 ; VI-SAFE: v_cndmask_b32_e32
|
|
238 ; VI-NOT: v_cmp
|
|
239 ; VI-NOT: v_cndmask
|
|
240
|
|
241 ; GCN-NONAN: v_min_f32_e32
|
|
242 ; GCN-NONAN: v_min_f32_e32
|
|
243 ; GCN-NONAN: v_min_f32_e32
|
|
244 ; GCN-NONAN-NOT: v_min_
|
|
245 define amdgpu_kernel void @test_fmin_legacy_ult_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 {
|
|
246 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
|
247 %gep.0 = getelementptr <3 x float>, <3 x float> addrspace(1)* %in, i32 %tid
|
|
248 %gep.1 = getelementptr <3 x float>, <3 x float> addrspace(1)* %gep.0, i32 1
|
|
249
|
|
250 %a = load <3 x float>, <3 x float> addrspace(1)* %gep.0
|
|
251 %b = load <3 x float>, <3 x float> addrspace(1)* %gep.1
|
|
252
|
|
253 %cmp = fcmp ult <3 x float> %a, %b
|
|
254 %val = select <3 x i1> %cmp, <3 x float> %a, <3 x float> %b
|
|
255 store <3 x float> %val, <3 x float> addrspace(1)* %out
|
|
256 ret void
|
|
257 }
|
|
258
|
|
259 ; FUNC-LABEL: {{^}}test_fmin_legacy_ole_f32_multi_use:
|
|
260 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
261 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
262 ; GCN-NOT: v_min
|
|
263 ; GCN: v_cmp_le_f32
|
|
264 ; GCN-NEXT: v_cndmask_b32
|
|
265 ; GCN-NOT: v_min
|
|
266 ; GCN: s_endpgm
|
|
267 define amdgpu_kernel void @test_fmin_legacy_ole_f32_multi_use(float addrspace(1)* %out0, i1 addrspace(1)* %out1, float addrspace(1)* %in) #0 {
|
|
268 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
|
269 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
|
270 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
|
271
|
|
272 %a = load volatile float, float addrspace(1)* %gep.0, align 4
|
|
273 %b = load volatile float, float addrspace(1)* %gep.1, align 4
|
|
274
|
|
275 %cmp = fcmp ole float %a, %b
|
|
276 %val0 = select i1 %cmp, float %a, float %b
|
|
277 store float %val0, float addrspace(1)* %out0, align 4
|
|
278 store i1 %cmp, i1 addrspace(1)* %out1
|
|
279 ret void
|
|
280 }
|
|
281
|
|
282 attributes #0 = { nounwind }
|
|
283 attributes #1 = { nounwind readnone }
|