Mercurial > hg > CbC > CbC_llvm
comparison llvm/test/CodeGen/AMDGPU/strict_fpext.ll @ 252:1f2b6ac9f198 llvm-original
LLVM16-1
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 18 Aug 2023 09:04:13 +0900 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
237:c80f45b162ad | 252:1f2b6ac9f198 |
---|---|
1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 | |
2 ; FIXME: Missing operand promote for f16 | |
3 ; XUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,SI %s | |
4 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX89,GFX8 %s | |
5 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX89,GFX9 %s | |
6 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX10 %s | |
7 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX11 %s | |
8 | |
9 define float @v_constrained_fpext_f16_to_f32_fpexcept_strict(half %arg) #0 { | |
10 ; GCN-LABEL: v_constrained_fpext_f16_to_f32_fpexcept_strict: | |
11 ; GCN: ; %bb.0: | |
12 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | |
13 ; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0 | |
14 ; GCN-NEXT: s_setpc_b64 s[30:31] | |
15 %result = call float @llvm.experimental.constrained.fpext.f32.f16(half %arg, metadata !"fpexcept.strict") | |
16 ret float %result | |
17 } | |
18 | |
19 define <2 x float> @v_constrained_fpext_v2f16_to_v2f32_fpexcept_strict(<2 x half> %arg) #0 { | |
20 ; GFX89-LABEL: v_constrained_fpext_v2f16_to_v2f32_fpexcept_strict: | |
21 ; GFX89: ; %bb.0: | |
22 ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | |
23 ; GFX89-NEXT: v_cvt_f32_f16_e32 v2, v0 | |
24 ; GFX89-NEXT: v_cvt_f32_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 | |
25 ; GFX89-NEXT: v_mov_b32_e32 v0, v2 | |
26 ; GFX89-NEXT: s_setpc_b64 s[30:31] | |
27 ; | |
28 ; GFX10-LABEL: v_constrained_fpext_v2f16_to_v2f32_fpexcept_strict: | |
29 ; GFX10: ; %bb.0: | |
30 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | |
31 ; GFX10-NEXT: v_cvt_f32_f16_e32 v2, v0 | |
32 ; GFX10-NEXT: v_cvt_f32_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 | |
33 ; GFX10-NEXT: v_mov_b32_e32 v0, v2 | |
34 ; GFX10-NEXT: s_setpc_b64 s[30:31] | |
35 ; | |
36 ; GFX11-LABEL: v_constrained_fpext_v2f16_to_v2f32_fpexcept_strict: | |
37 ; GFX11: ; %bb.0: | |
38 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | |
39 ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0 | |
40 ; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0 | |
41 ; GFX11-NEXT: v_cvt_f32_f16_e32 v1, v1 | |
42 ; GFX11-NEXT: s_setpc_b64 s[30:31] | |
43 %result = call <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half> %arg, metadata !"fpexcept.strict") | |
44 ret <2 x float> %result | |
45 } | |
46 | |
47 define <3 x float> @v_constrained_fpext_v3f16_to_v3f32_fpexcept_strict(<3 x half> %arg) #0 { | |
48 ; GFX89-LABEL: v_constrained_fpext_v3f16_to_v3f32_fpexcept_strict: | |
49 ; GFX89: ; %bb.0: | |
50 ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | |
51 ; GFX89-NEXT: v_cvt_f32_f16_e32 v4, v0 | |
52 ; GFX89-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 | |
53 ; GFX89-NEXT: v_cvt_f32_f16_e32 v2, v1 | |
54 ; GFX89-NEXT: v_mov_b32_e32 v0, v4 | |
55 ; GFX89-NEXT: v_mov_b32_e32 v1, v3 | |
56 ; GFX89-NEXT: s_setpc_b64 s[30:31] | |
57 ; | |
58 ; GFX10-LABEL: v_constrained_fpext_v3f16_to_v3f32_fpexcept_strict: | |
59 ; GFX10: ; %bb.0: | |
60 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | |
61 ; GFX10-NEXT: v_cvt_f32_f16_e32 v4, v0 | |
62 ; GFX10-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 | |
63 ; GFX10-NEXT: v_cvt_f32_f16_e32 v2, v1 | |
64 ; GFX10-NEXT: v_mov_b32_e32 v0, v4 | |
65 ; GFX10-NEXT: v_mov_b32_e32 v1, v3 | |
66 ; GFX10-NEXT: s_setpc_b64 s[30:31] | |
67 ; | |
68 ; GFX11-LABEL: v_constrained_fpext_v3f16_to_v3f32_fpexcept_strict: | |
69 ; GFX11: ; %bb.0: | |
70 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | |
71 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0 | |
72 ; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0 | |
73 ; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v2 | |
74 ; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v1 | |
75 ; GFX11-NEXT: v_mov_b32_e32 v1, v3 | |
76 ; GFX11-NEXT: s_setpc_b64 s[30:31] | |
77 %result = call <3 x float> @llvm.experimental.constrained.fpext.v3f32.v3f16(<3 x half> %arg, metadata !"fpexcept.strict") | |
78 ret <3 x float> %result | |
79 } | |
80 | |
81 define double @v_constrained_fpext_f32_to_f64_fpexcept_strict(float %arg) #0 { | |
82 ; GCN-LABEL: v_constrained_fpext_f32_to_f64_fpexcept_strict: | |
83 ; GCN: ; %bb.0: | |
84 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | |
85 ; GCN-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 | |
86 ; GCN-NEXT: s_setpc_b64 s[30:31] | |
87 %result = call double @llvm.experimental.constrained.fpext.f64.f32(float %arg, metadata !"fpexcept.strict") | |
88 ret double %result | |
89 } | |
90 | |
91 define <2 x double> @v_constrained_fpext_v2f32_to_v2f64_fpexcept_strict(<2 x float> %arg) #0 { | |
92 ; GCN-LABEL: v_constrained_fpext_v2f32_to_v2f64_fpexcept_strict: | |
93 ; GCN: ; %bb.0: | |
94 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | |
95 ; GCN-NEXT: v_mov_b32_e32 v2, v1 | |
96 ; GCN-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 | |
97 ; GCN-NEXT: v_cvt_f64_f32_e32 v[2:3], v2 | |
98 ; GCN-NEXT: s_setpc_b64 s[30:31] | |
99 %result = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float> %arg, metadata !"fpexcept.strict") | |
100 ret <2 x double> %result | |
101 } | |
102 | |
103 define <3 x double> @v_constrained_fpext_v3f32_to_v3f64_fpexcept_strict(<3 x float> %arg) #0 { | |
104 ; GFX89-LABEL: v_constrained_fpext_v3f32_to_v3f64_fpexcept_strict: | |
105 ; GFX89: ; %bb.0: | |
106 ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | |
107 ; GFX89-NEXT: v_mov_b32_e32 v4, v2 | |
108 ; GFX89-NEXT: v_mov_b32_e32 v2, v1 | |
109 ; GFX89-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 | |
110 ; GFX89-NEXT: v_cvt_f64_f32_e32 v[2:3], v2 | |
111 ; GFX89-NEXT: v_cvt_f64_f32_e32 v[4:5], v4 | |
112 ; GFX89-NEXT: s_setpc_b64 s[30:31] | |
113 ; | |
114 ; GFX1011-LABEL: v_constrained_fpext_v3f32_to_v3f64_fpexcept_strict: | |
115 ; GFX1011: ; %bb.0: | |
116 ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | |
117 ; GFX1011-NEXT: v_mov_b32_e32 v4, v2 | |
118 ; GFX1011-NEXT: v_mov_b32_e32 v2, v1 | |
119 ; GFX1011-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 | |
120 ; GFX1011-NEXT: v_cvt_f64_f32_e32 v[4:5], v4 | |
121 ; GFX1011-NEXT: v_cvt_f64_f32_e32 v[2:3], v2 | |
122 ; GFX1011-NEXT: s_setpc_b64 s[30:31] | |
123 %result = call <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(<3 x float> %arg, metadata !"fpexcept.strict") | |
124 ret <3 x double> %result | |
125 } | |
126 | |
127 define double @v_constrained_fpext_f16_to_f64_fpexcept_strict(half %arg) #0 { | |
128 ; GCN-LABEL: v_constrained_fpext_f16_to_f64_fpexcept_strict: | |
129 ; GCN: ; %bb.0: | |
130 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | |
131 ; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0 | |
132 ; GCN-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 | |
133 ; GCN-NEXT: s_setpc_b64 s[30:31] | |
134 %result = call double @llvm.experimental.constrained.fpext.f64.f16(half %arg, metadata !"fpexcept.strict") | |
135 ret double %result | |
136 } | |
137 | |
138 define <2 x double> @v_constrained_fpext_v2f16_to_v2f64_fpexcept_strict(<2 x half> %arg) #0 { | |
139 ; GFX89-LABEL: v_constrained_fpext_v2f16_to_v2f64_fpexcept_strict: | |
140 ; GFX89: ; %bb.0: | |
141 ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | |
142 ; GFX89-NEXT: v_cvt_f32_f16_e32 v1, v0 | |
143 ; GFX89-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 | |
144 ; GFX89-NEXT: v_cvt_f64_f32_e32 v[0:1], v1 | |
145 ; GFX89-NEXT: v_cvt_f64_f32_e32 v[2:3], v2 | |
146 ; GFX89-NEXT: s_setpc_b64 s[30:31] | |
147 ; | |
148 ; GFX10-LABEL: v_constrained_fpext_v2f16_to_v2f64_fpexcept_strict: | |
149 ; GFX10: ; %bb.0: | |
150 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | |
151 ; GFX10-NEXT: v_cvt_f32_f16_e32 v1, v0 | |
152 ; GFX10-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 | |
153 ; GFX10-NEXT: v_cvt_f64_f32_e32 v[0:1], v1 | |
154 ; GFX10-NEXT: v_cvt_f64_f32_e32 v[2:3], v2 | |
155 ; GFX10-NEXT: s_setpc_b64 s[30:31] | |
156 ; | |
157 ; GFX11-LABEL: v_constrained_fpext_v2f16_to_v2f64_fpexcept_strict: | |
158 ; GFX11: ; %bb.0: | |
159 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | |
160 ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0 | |
161 ; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0 | |
162 ; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v1 | |
163 ; GFX11-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 | |
164 ; GFX11-NEXT: v_cvt_f64_f32_e32 v[2:3], v2 | |
165 ; GFX11-NEXT: s_setpc_b64 s[30:31] | |
166 %result = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f16(<2 x half> %arg, metadata !"fpexcept.strict") | |
167 ret <2 x double> %result | |
168 } | |
169 | |
170 define <3 x double> @v_constrained_fpext_v3f16_to_v2f64_fpexcept_strict(<3 x half> %arg) #0 { | |
171 ; GFX89-LABEL: v_constrained_fpext_v3f16_to_v2f64_fpexcept_strict: | |
172 ; GFX89: ; %bb.0: | |
173 ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | |
174 ; GFX89-NEXT: v_cvt_f32_f16_e32 v2, v0 | |
175 ; GFX89-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 | |
176 ; GFX89-NEXT: v_cvt_f32_f16_e32 v4, v1 | |
177 ; GFX89-NEXT: v_cvt_f64_f32_e32 v[0:1], v2 | |
178 ; GFX89-NEXT: v_cvt_f64_f32_e32 v[2:3], v3 | |
179 ; GFX89-NEXT: v_cvt_f64_f32_e32 v[4:5], v4 | |
180 ; GFX89-NEXT: s_setpc_b64 s[30:31] | |
181 ; | |
182 ; GFX10-LABEL: v_constrained_fpext_v3f16_to_v2f64_fpexcept_strict: | |
183 ; GFX10: ; %bb.0: | |
184 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | |
185 ; GFX10-NEXT: v_cvt_f32_f16_e32 v2, v0 | |
186 ; GFX10-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 | |
187 ; GFX10-NEXT: v_cvt_f32_f16_e32 v4, v1 | |
188 ; GFX10-NEXT: v_cvt_f64_f32_e32 v[0:1], v2 | |
189 ; GFX10-NEXT: v_cvt_f64_f32_e32 v[2:3], v3 | |
190 ; GFX10-NEXT: v_cvt_f64_f32_e32 v[4:5], v4 | |
191 ; GFX10-NEXT: s_setpc_b64 s[30:31] | |
192 ; | |
193 ; GFX11-LABEL: v_constrained_fpext_v3f16_to_v2f64_fpexcept_strict: | |
194 ; GFX11: ; %bb.0: | |
195 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | |
196 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0 | |
197 ; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0 | |
198 ; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v1 | |
199 ; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v2 | |
200 ; GFX11-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 | |
201 ; GFX11-NEXT: v_cvt_f64_f32_e32 v[4:5], v3 | |
202 ; GFX11-NEXT: v_cvt_f64_f32_e32 v[2:3], v2 | |
203 ; GFX11-NEXT: s_setpc_b64 s[30:31] | |
204 %result = call <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f16(<3 x half> %arg, metadata !"fpexcept.strict") | |
205 ret <3 x double> %result | |
206 } | |
207 | |
208 define float @v_constrained_fneg_fpext_f16_to_f32_fpexcept_strict(half %arg) #0 { | |
209 ; GCN-LABEL: v_constrained_fneg_fpext_f16_to_f32_fpexcept_strict: | |
210 ; GCN: ; %bb.0: | |
211 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | |
212 ; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0 | |
213 ; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 | |
214 ; GCN-NEXT: s_setpc_b64 s[30:31] | |
215 %result = call float @llvm.experimental.constrained.fpext.f32.f16(half %arg, metadata !"fpexcept.strict") | |
216 %neg.result = fneg float %result | |
217 ret float %neg.result | |
218 } | |
219 | |
220 define float @v_constrained_fpext_fneg_f16_to_f32_fpexcept_strict(half %arg) #0 { | |
221 ; GCN-LABEL: v_constrained_fpext_fneg_f16_to_f32_fpexcept_strict: | |
222 ; GCN: ; %bb.0: | |
223 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | |
224 ; GCN-NEXT: v_cvt_f32_f16_e64 v0, -v0 | |
225 ; GCN-NEXT: s_setpc_b64 s[30:31] | |
226 %neg.arg = fneg half %arg | |
227 %result = call float @llvm.experimental.constrained.fpext.f32.f16(half %neg.arg, metadata !"fpexcept.strict") | |
228 ret float %result | |
229 } | |
230 | |
231 define double @v_constrained_fpext_fneg_f32_to_f64_fpexcept_strict(float %arg) #0 { | |
232 ; GCN-LABEL: v_constrained_fpext_fneg_f32_to_f64_fpexcept_strict: | |
233 ; GCN: ; %bb.0: | |
234 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | |
235 ; GCN-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 | |
236 ; GCN-NEXT: s_setpc_b64 s[30:31] | |
237 %neg.arg = fneg float %arg | |
238 %result = call double @llvm.experimental.constrained.fpext.f64.f32(float %arg, metadata !"fpexcept.strict") | |
239 ret double %result | |
240 } | |
241 | |
242 define double @v_constrained_fneg_fpext_f32_to_f64_fpexcept_strict(float %arg) #0 { | |
243 ; GCN-LABEL: v_constrained_fneg_fpext_f32_to_f64_fpexcept_strict: | |
244 ; GCN: ; %bb.0: | |
245 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | |
246 ; GCN-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 | |
247 ; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 | |
248 ; GCN-NEXT: s_setpc_b64 s[30:31] | |
249 %result = call double @llvm.experimental.constrained.fpext.f64.f32(float %arg, metadata !"fpexcept.strict") | |
250 %neg.result = fneg double %result | |
251 ret double %neg.result | |
252 } | |
253 | |
254 declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata) #1 | |
255 declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float>, metadata) #1 | |
256 declare <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(<3 x float>, metadata) #1 | |
257 | |
258 declare double @llvm.experimental.constrained.fpext.f64.f16(half, metadata) #1 | |
259 declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f16(<2 x half>, metadata) #1 | |
260 declare <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f16(<3 x half>, metadata) #1 | |
261 | |
262 declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata) #1 | |
263 declare <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half>, metadata) #1 | |
264 declare <3 x float> @llvm.experimental.constrained.fpext.v3f32.v3f16(<3 x half>, metadata) #1 | |
265 | |
266 attributes #0 = { strictfp } | |
267 attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } | |
268 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: | |
269 ; GFX8: {{.*}} | |
270 ; GFX9: {{.*}} |