252
|
1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
|
|
2 ; FIXME: Missing operand promote for f16
|
|
3 ; XUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,SI %s
|
|
4 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX89,GFX8 %s
|
|
5 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX89,GFX9 %s
|
|
6 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX10 %s
|
|
7 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX11 %s
|
|
8
|
|
9 define float @v_constrained_fpext_f16_to_f32_fpexcept_strict(half %arg) #0 {
|
|
10 ; GCN-LABEL: v_constrained_fpext_f16_to_f32_fpexcept_strict:
|
|
11 ; GCN: ; %bb.0:
|
|
12 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
13 ; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
14 ; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
15 %result = call float @llvm.experimental.constrained.fpext.f32.f16(half %arg, metadata !"fpexcept.strict")
|
|
16 ret float %result
|
|
17 }
|
|
18
|
|
19 define <2 x float> @v_constrained_fpext_v2f16_to_v2f32_fpexcept_strict(<2 x half> %arg) #0 {
|
|
20 ; GFX89-LABEL: v_constrained_fpext_v2f16_to_v2f32_fpexcept_strict:
|
|
21 ; GFX89: ; %bb.0:
|
|
22 ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
23 ; GFX89-NEXT: v_cvt_f32_f16_e32 v2, v0
|
|
24 ; GFX89-NEXT: v_cvt_f32_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
25 ; GFX89-NEXT: v_mov_b32_e32 v0, v2
|
|
26 ; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
27 ;
|
|
28 ; GFX10-LABEL: v_constrained_fpext_v2f16_to_v2f32_fpexcept_strict:
|
|
29 ; GFX10: ; %bb.0:
|
|
30 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
31 ; GFX10-NEXT: v_cvt_f32_f16_e32 v2, v0
|
|
32 ; GFX10-NEXT: v_cvt_f32_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
33 ; GFX10-NEXT: v_mov_b32_e32 v0, v2
|
|
34 ; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
35 ;
|
|
36 ; GFX11-LABEL: v_constrained_fpext_v2f16_to_v2f32_fpexcept_strict:
|
|
37 ; GFX11: ; %bb.0:
|
|
38 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
39 ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
|
|
40 ; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
41 ; GFX11-NEXT: v_cvt_f32_f16_e32 v1, v1
|
|
42 ; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
43 %result = call <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half> %arg, metadata !"fpexcept.strict")
|
|
44 ret <2 x float> %result
|
|
45 }
|
|
46
|
|
47 define <3 x float> @v_constrained_fpext_v3f16_to_v3f32_fpexcept_strict(<3 x half> %arg) #0 {
|
|
48 ; GFX89-LABEL: v_constrained_fpext_v3f16_to_v3f32_fpexcept_strict:
|
|
49 ; GFX89: ; %bb.0:
|
|
50 ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
51 ; GFX89-NEXT: v_cvt_f32_f16_e32 v4, v0
|
|
52 ; GFX89-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
53 ; GFX89-NEXT: v_cvt_f32_f16_e32 v2, v1
|
|
54 ; GFX89-NEXT: v_mov_b32_e32 v0, v4
|
|
55 ; GFX89-NEXT: v_mov_b32_e32 v1, v3
|
|
56 ; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
57 ;
|
|
58 ; GFX10-LABEL: v_constrained_fpext_v3f16_to_v3f32_fpexcept_strict:
|
|
59 ; GFX10: ; %bb.0:
|
|
60 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
61 ; GFX10-NEXT: v_cvt_f32_f16_e32 v4, v0
|
|
62 ; GFX10-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
63 ; GFX10-NEXT: v_cvt_f32_f16_e32 v2, v1
|
|
64 ; GFX10-NEXT: v_mov_b32_e32 v0, v4
|
|
65 ; GFX10-NEXT: v_mov_b32_e32 v1, v3
|
|
66 ; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
67 ;
|
|
68 ; GFX11-LABEL: v_constrained_fpext_v3f16_to_v3f32_fpexcept_strict:
|
|
69 ; GFX11: ; %bb.0:
|
|
70 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
71 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
|
|
72 ; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
73 ; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v2
|
|
74 ; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v1
|
|
75 ; GFX11-NEXT: v_mov_b32_e32 v1, v3
|
|
76 ; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
77 %result = call <3 x float> @llvm.experimental.constrained.fpext.v3f32.v3f16(<3 x half> %arg, metadata !"fpexcept.strict")
|
|
78 ret <3 x float> %result
|
|
79 }
|
|
80
|
|
81 define double @v_constrained_fpext_f32_to_f64_fpexcept_strict(float %arg) #0 {
|
|
82 ; GCN-LABEL: v_constrained_fpext_f32_to_f64_fpexcept_strict:
|
|
83 ; GCN: ; %bb.0:
|
|
84 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
85 ; GCN-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
|
|
86 ; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
87 %result = call double @llvm.experimental.constrained.fpext.f64.f32(float %arg, metadata !"fpexcept.strict")
|
|
88 ret double %result
|
|
89 }
|
|
90
|
|
91 define <2 x double> @v_constrained_fpext_v2f32_to_v2f64_fpexcept_strict(<2 x float> %arg) #0 {
|
|
92 ; GCN-LABEL: v_constrained_fpext_v2f32_to_v2f64_fpexcept_strict:
|
|
93 ; GCN: ; %bb.0:
|
|
94 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
95 ; GCN-NEXT: v_mov_b32_e32 v2, v1
|
|
96 ; GCN-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
|
|
97 ; GCN-NEXT: v_cvt_f64_f32_e32 v[2:3], v2
|
|
98 ; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
99 %result = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float> %arg, metadata !"fpexcept.strict")
|
|
100 ret <2 x double> %result
|
|
101 }
|
|
102
|
|
103 define <3 x double> @v_constrained_fpext_v3f32_to_v3f64_fpexcept_strict(<3 x float> %arg) #0 {
|
|
104 ; GFX89-LABEL: v_constrained_fpext_v3f32_to_v3f64_fpexcept_strict:
|
|
105 ; GFX89: ; %bb.0:
|
|
106 ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
107 ; GFX89-NEXT: v_mov_b32_e32 v4, v2
|
|
108 ; GFX89-NEXT: v_mov_b32_e32 v2, v1
|
|
109 ; GFX89-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
|
|
110 ; GFX89-NEXT: v_cvt_f64_f32_e32 v[2:3], v2
|
|
111 ; GFX89-NEXT: v_cvt_f64_f32_e32 v[4:5], v4
|
|
112 ; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
113 ;
|
|
114 ; GFX1011-LABEL: v_constrained_fpext_v3f32_to_v3f64_fpexcept_strict:
|
|
115 ; GFX1011: ; %bb.0:
|
|
116 ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
117 ; GFX1011-NEXT: v_mov_b32_e32 v4, v2
|
|
118 ; GFX1011-NEXT: v_mov_b32_e32 v2, v1
|
|
119 ; GFX1011-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
|
|
120 ; GFX1011-NEXT: v_cvt_f64_f32_e32 v[4:5], v4
|
|
121 ; GFX1011-NEXT: v_cvt_f64_f32_e32 v[2:3], v2
|
|
122 ; GFX1011-NEXT: s_setpc_b64 s[30:31]
|
|
123 %result = call <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(<3 x float> %arg, metadata !"fpexcept.strict")
|
|
124 ret <3 x double> %result
|
|
125 }
|
|
126
|
|
127 define double @v_constrained_fpext_f16_to_f64_fpexcept_strict(half %arg) #0 {
|
|
128 ; GCN-LABEL: v_constrained_fpext_f16_to_f64_fpexcept_strict:
|
|
129 ; GCN: ; %bb.0:
|
|
130 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
131 ; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
132 ; GCN-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
|
|
133 ; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
134 %result = call double @llvm.experimental.constrained.fpext.f64.f16(half %arg, metadata !"fpexcept.strict")
|
|
135 ret double %result
|
|
136 }
|
|
137
|
|
138 define <2 x double> @v_constrained_fpext_v2f16_to_v2f64_fpexcept_strict(<2 x half> %arg) #0 {
|
|
139 ; GFX89-LABEL: v_constrained_fpext_v2f16_to_v2f64_fpexcept_strict:
|
|
140 ; GFX89: ; %bb.0:
|
|
141 ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
142 ; GFX89-NEXT: v_cvt_f32_f16_e32 v1, v0
|
|
143 ; GFX89-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
144 ; GFX89-NEXT: v_cvt_f64_f32_e32 v[0:1], v1
|
|
145 ; GFX89-NEXT: v_cvt_f64_f32_e32 v[2:3], v2
|
|
146 ; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
147 ;
|
|
148 ; GFX10-LABEL: v_constrained_fpext_v2f16_to_v2f64_fpexcept_strict:
|
|
149 ; GFX10: ; %bb.0:
|
|
150 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
151 ; GFX10-NEXT: v_cvt_f32_f16_e32 v1, v0
|
|
152 ; GFX10-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
153 ; GFX10-NEXT: v_cvt_f64_f32_e32 v[0:1], v1
|
|
154 ; GFX10-NEXT: v_cvt_f64_f32_e32 v[2:3], v2
|
|
155 ; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
156 ;
|
|
157 ; GFX11-LABEL: v_constrained_fpext_v2f16_to_v2f64_fpexcept_strict:
|
|
158 ; GFX11: ; %bb.0:
|
|
159 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
160 ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
|
|
161 ; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
162 ; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v1
|
|
163 ; GFX11-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
|
|
164 ; GFX11-NEXT: v_cvt_f64_f32_e32 v[2:3], v2
|
|
165 ; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
166 %result = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f16(<2 x half> %arg, metadata !"fpexcept.strict")
|
|
167 ret <2 x double> %result
|
|
168 }
|
|
169
|
|
170 define <3 x double> @v_constrained_fpext_v3f16_to_v2f64_fpexcept_strict(<3 x half> %arg) #0 {
|
|
171 ; GFX89-LABEL: v_constrained_fpext_v3f16_to_v2f64_fpexcept_strict:
|
|
172 ; GFX89: ; %bb.0:
|
|
173 ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
174 ; GFX89-NEXT: v_cvt_f32_f16_e32 v2, v0
|
|
175 ; GFX89-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
176 ; GFX89-NEXT: v_cvt_f32_f16_e32 v4, v1
|
|
177 ; GFX89-NEXT: v_cvt_f64_f32_e32 v[0:1], v2
|
|
178 ; GFX89-NEXT: v_cvt_f64_f32_e32 v[2:3], v3
|
|
179 ; GFX89-NEXT: v_cvt_f64_f32_e32 v[4:5], v4
|
|
180 ; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
181 ;
|
|
182 ; GFX10-LABEL: v_constrained_fpext_v3f16_to_v2f64_fpexcept_strict:
|
|
183 ; GFX10: ; %bb.0:
|
|
184 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
185 ; GFX10-NEXT: v_cvt_f32_f16_e32 v2, v0
|
|
186 ; GFX10-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
|
|
187 ; GFX10-NEXT: v_cvt_f32_f16_e32 v4, v1
|
|
188 ; GFX10-NEXT: v_cvt_f64_f32_e32 v[0:1], v2
|
|
189 ; GFX10-NEXT: v_cvt_f64_f32_e32 v[2:3], v3
|
|
190 ; GFX10-NEXT: v_cvt_f64_f32_e32 v[4:5], v4
|
|
191 ; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
192 ;
|
|
193 ; GFX11-LABEL: v_constrained_fpext_v3f16_to_v2f64_fpexcept_strict:
|
|
194 ; GFX11: ; %bb.0:
|
|
195 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
196 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
|
|
197 ; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
198 ; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v1
|
|
199 ; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v2
|
|
200 ; GFX11-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
|
|
201 ; GFX11-NEXT: v_cvt_f64_f32_e32 v[4:5], v3
|
|
202 ; GFX11-NEXT: v_cvt_f64_f32_e32 v[2:3], v2
|
|
203 ; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
204 %result = call <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f16(<3 x half> %arg, metadata !"fpexcept.strict")
|
|
205 ret <3 x double> %result
|
|
206 }
|
|
207
|
|
208 define float @v_constrained_fneg_fpext_f16_to_f32_fpexcept_strict(half %arg) #0 {
|
|
209 ; GCN-LABEL: v_constrained_fneg_fpext_f16_to_f32_fpexcept_strict:
|
|
210 ; GCN: ; %bb.0:
|
|
211 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
212 ; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
213 ; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
|
|
214 ; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
215 %result = call float @llvm.experimental.constrained.fpext.f32.f16(half %arg, metadata !"fpexcept.strict")
|
|
216 %neg.result = fneg float %result
|
|
217 ret float %neg.result
|
|
218 }
|
|
219
|
|
220 define float @v_constrained_fpext_fneg_f16_to_f32_fpexcept_strict(half %arg) #0 {
|
|
221 ; GCN-LABEL: v_constrained_fpext_fneg_f16_to_f32_fpexcept_strict:
|
|
222 ; GCN: ; %bb.0:
|
|
223 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
224 ; GCN-NEXT: v_cvt_f32_f16_e64 v0, -v0
|
|
225 ; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
226 %neg.arg = fneg half %arg
|
|
227 %result = call float @llvm.experimental.constrained.fpext.f32.f16(half %neg.arg, metadata !"fpexcept.strict")
|
|
228 ret float %result
|
|
229 }
|
|
230
|
|
231 define double @v_constrained_fpext_fneg_f32_to_f64_fpexcept_strict(float %arg) #0 {
|
|
232 ; GCN-LABEL: v_constrained_fpext_fneg_f32_to_f64_fpexcept_strict:
|
|
233 ; GCN: ; %bb.0:
|
|
234 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
235 ; GCN-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
|
|
236 ; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
237 %neg.arg = fneg float %arg
|
|
238 %result = call double @llvm.experimental.constrained.fpext.f64.f32(float %arg, metadata !"fpexcept.strict")
|
|
239 ret double %result
|
|
240 }
|
|
241
|
|
242 define double @v_constrained_fneg_fpext_f32_to_f64_fpexcept_strict(float %arg) #0 {
|
|
243 ; GCN-LABEL: v_constrained_fneg_fpext_f32_to_f64_fpexcept_strict:
|
|
244 ; GCN: ; %bb.0:
|
|
245 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
246 ; GCN-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
|
|
247 ; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
|
|
248 ; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
249 %result = call double @llvm.experimental.constrained.fpext.f64.f32(float %arg, metadata !"fpexcept.strict")
|
|
250 %neg.result = fneg double %result
|
|
251 ret double %neg.result
|
|
252 }
|
|
253
|
|
254 declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata) #1
|
|
255 declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float>, metadata) #1
|
|
256 declare <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(<3 x float>, metadata) #1
|
|
257
|
|
258 declare double @llvm.experimental.constrained.fpext.f64.f16(half, metadata) #1
|
|
259 declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f16(<2 x half>, metadata) #1
|
|
260 declare <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f16(<3 x half>, metadata) #1
|
|
261
|
|
262 declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata) #1
|
|
263 declare <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half>, metadata) #1
|
|
264 declare <3 x float> @llvm.experimental.constrained.fpext.v3f32.v3f16(<3 x half>, metadata) #1
|
|
265
|
|
266 attributes #0 = { strictfp }
|
|
267 attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
|
|
268 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
|
269 ; GFX8: {{.*}}
|
|
270 ; GFX9: {{.*}}
|