221
|
1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
2 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s --check-prefixes=SI
|
|
3 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s --check-prefixes=VI
|
|
4 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s --check-prefixes=EG
|
150
|
5
|
|
6 declare float @llvm.fabs.f32(float) #1
|
|
7
|
|
8 define amdgpu_kernel void @fp_to_sint_i32(i32 addrspace(1)* %out, float %in) {
|
221
|
9 ; SI-LABEL: fp_to_sint_i32:
|
|
10 ; SI: ; %bb.0:
|
|
11 ; SI-NEXT: s_load_dword s4, s[0:1], 0xb
|
|
12 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
|
|
13 ; SI-NEXT: s_mov_b32 s3, 0xf000
|
|
14 ; SI-NEXT: s_mov_b32 s2, -1
|
|
15 ; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
16 ; SI-NEXT: v_cvt_i32_f32_e32 v0, s4
|
|
17 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
18 ; SI-NEXT: s_endpgm
|
|
19 ;
|
|
20 ; VI-LABEL: fp_to_sint_i32:
|
|
21 ; VI: ; %bb.0:
|
|
22 ; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
|
|
23 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
|
|
24 ; VI-NEXT: s_mov_b32 s3, 0xf000
|
|
25 ; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
26 ; VI-NEXT: v_cvt_i32_f32_e32 v0, s2
|
|
27 ; VI-NEXT: s_mov_b32 s2, -1
|
|
28 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
29 ; VI-NEXT: s_endpgm
|
|
30 ;
|
|
31 ; EG-LABEL: fp_to_sint_i32:
|
|
32 ; EG: ; %bb.0:
|
|
33 ; EG-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[]
|
|
34 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
|
|
35 ; EG-NEXT: CF_END
|
|
36 ; EG-NEXT: PAD
|
|
37 ; EG-NEXT: ALU clause starting at 4:
|
|
38 ; EG-NEXT: TRUNC * T0.W, KC0[2].Z,
|
|
39 ; EG-NEXT: FLT_TO_INT T0.X, PV.W,
|
|
40 ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
|
|
41 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
150
|
42 %conv = fptosi float %in to i32
|
|
43 store i32 %conv, i32 addrspace(1)* %out
|
|
44 ret void
|
|
45 }
|
|
46
|
|
47 define amdgpu_kernel void @fp_to_sint_i32_fabs(i32 addrspace(1)* %out, float %in) {
|
221
|
48 ; SI-LABEL: fp_to_sint_i32_fabs:
|
|
49 ; SI: ; %bb.0:
|
|
50 ; SI-NEXT: s_load_dword s4, s[0:1], 0xb
|
|
51 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
|
|
52 ; SI-NEXT: s_mov_b32 s3, 0xf000
|
|
53 ; SI-NEXT: s_mov_b32 s2, -1
|
|
54 ; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
55 ; SI-NEXT: v_cvt_i32_f32_e64 v0, |s4|
|
|
56 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
57 ; SI-NEXT: s_endpgm
|
|
58 ;
|
|
59 ; VI-LABEL: fp_to_sint_i32_fabs:
|
|
60 ; VI: ; %bb.0:
|
|
61 ; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
|
|
62 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
|
|
63 ; VI-NEXT: s_mov_b32 s3, 0xf000
|
|
64 ; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
65 ; VI-NEXT: v_cvt_i32_f32_e64 v0, |s2|
|
|
66 ; VI-NEXT: s_mov_b32 s2, -1
|
|
67 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
68 ; VI-NEXT: s_endpgm
|
|
69 ;
|
|
70 ; EG-LABEL: fp_to_sint_i32_fabs:
|
|
71 ; EG: ; %bb.0:
|
|
72 ; EG-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[]
|
|
73 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
|
|
74 ; EG-NEXT: CF_END
|
|
75 ; EG-NEXT: PAD
|
|
76 ; EG-NEXT: ALU clause starting at 4:
|
|
77 ; EG-NEXT: TRUNC * T0.W, |KC0[2].Z|,
|
|
78 ; EG-NEXT: FLT_TO_INT T0.X, PV.W,
|
|
79 ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
|
|
80 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
150
|
81 %in.fabs = call float @llvm.fabs.f32(float %in)
|
|
82 %conv = fptosi float %in.fabs to i32
|
|
83 store i32 %conv, i32 addrspace(1)* %out
|
|
84 ret void
|
|
85 }
|
|
86
|
|
87 define amdgpu_kernel void @fp_to_sint_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) {
|
221
|
88 ; SI-LABEL: fp_to_sint_v2i32:
|
|
89 ; SI: ; %bb.0:
|
|
90 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb
|
|
91 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
|
|
92 ; SI-NEXT: s_mov_b32 s3, 0xf000
|
|
93 ; SI-NEXT: s_mov_b32 s2, -1
|
|
94 ; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
95 ; SI-NEXT: v_cvt_i32_f32_e32 v1, s5
|
|
96 ; SI-NEXT: v_cvt_i32_f32_e32 v0, s4
|
|
97 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
|
|
98 ; SI-NEXT: s_endpgm
|
|
99 ;
|
|
100 ; VI-LABEL: fp_to_sint_v2i32:
|
|
101 ; VI: ; %bb.0:
|
|
102 ; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c
|
|
103 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
|
|
104 ; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
105 ; VI-NEXT: v_cvt_i32_f32_e32 v1, s3
|
|
106 ; VI-NEXT: v_cvt_i32_f32_e32 v0, s2
|
|
107 ; VI-NEXT: s_mov_b32 s3, 0xf000
|
|
108 ; VI-NEXT: s_mov_b32 s2, -1
|
|
109 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
|
|
110 ; VI-NEXT: s_endpgm
|
|
111 ;
|
|
112 ; EG-LABEL: fp_to_sint_v2i32:
|
|
113 ; EG: ; %bb.0:
|
|
114 ; EG-NEXT: ALU 5, @4, KC0[CB0:0-32], KC1[]
|
|
115 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
|
|
116 ; EG-NEXT: CF_END
|
|
117 ; EG-NEXT: PAD
|
|
118 ; EG-NEXT: ALU clause starting at 4:
|
|
119 ; EG-NEXT: TRUNC * T0.W, KC0[3].X,
|
|
120 ; EG-NEXT: FLT_TO_INT T0.Y, PV.W,
|
|
121 ; EG-NEXT: TRUNC * T0.W, KC0[2].W,
|
|
122 ; EG-NEXT: FLT_TO_INT T0.X, PV.W,
|
|
123 ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
|
|
124 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
150
|
125 %result = fptosi <2 x float> %in to <2 x i32>
|
|
126 store <2 x i32> %result, <2 x i32> addrspace(1)* %out
|
|
127 ret void
|
|
128 }
|
|
129
|
|
130 define amdgpu_kernel void @fp_to_sint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
|
221
|
131 ; SI-LABEL: fp_to_sint_v4i32:
|
|
132 ; SI: ; %bb.0:
|
|
133 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
|
|
134 ; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
135 ; SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0
|
|
136 ; SI-NEXT: s_mov_b32 s3, 0xf000
|
|
137 ; SI-NEXT: s_mov_b32 s2, -1
|
|
138 ; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
139 ; SI-NEXT: v_cvt_i32_f32_e32 v3, s7
|
|
140 ; SI-NEXT: v_cvt_i32_f32_e32 v2, s6
|
|
141 ; SI-NEXT: v_cvt_i32_f32_e32 v1, s5
|
|
142 ; SI-NEXT: v_cvt_i32_f32_e32 v0, s4
|
|
143 ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
|
144 ; SI-NEXT: s_endpgm
|
|
145 ;
|
|
146 ; VI-LABEL: fp_to_sint_v4i32:
|
|
147 ; VI: ; %bb.0:
|
|
148 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
|
|
149 ; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
150 ; VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0
|
|
151 ; VI-NEXT: s_mov_b32 s3, 0xf000
|
|
152 ; VI-NEXT: s_mov_b32 s2, -1
|
|
153 ; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
154 ; VI-NEXT: v_cvt_i32_f32_e32 v3, s7
|
|
155 ; VI-NEXT: v_cvt_i32_f32_e32 v2, s6
|
|
156 ; VI-NEXT: v_cvt_i32_f32_e32 v1, s5
|
|
157 ; VI-NEXT: v_cvt_i32_f32_e32 v0, s4
|
|
158 ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
|
159 ; VI-NEXT: s_endpgm
|
|
160 ;
|
|
161 ; EG-LABEL: fp_to_sint_v4i32:
|
|
162 ; EG: ; %bb.0:
|
|
163 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
|
|
164 ; EG-NEXT: TEX 0 @6
|
|
165 ; EG-NEXT: ALU 9, @9, KC0[CB0:0-32], KC1[]
|
|
166 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
|
|
167 ; EG-NEXT: CF_END
|
|
168 ; EG-NEXT: PAD
|
|
169 ; EG-NEXT: Fetch clause starting at 6:
|
|
170 ; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1
|
|
171 ; EG-NEXT: ALU clause starting at 8:
|
|
172 ; EG-NEXT: MOV * T0.X, KC0[2].Z,
|
|
173 ; EG-NEXT: ALU clause starting at 9:
|
|
174 ; EG-NEXT: TRUNC T0.W, T0.W,
|
|
175 ; EG-NEXT: TRUNC * T1.W, T0.Z,
|
|
176 ; EG-NEXT: FLT_TO_INT * T0.W, PV.W,
|
|
177 ; EG-NEXT: FLT_TO_INT T0.Z, T1.W,
|
|
178 ; EG-NEXT: TRUNC * T1.W, T0.Y,
|
|
179 ; EG-NEXT: FLT_TO_INT T0.Y, PV.W,
|
|
180 ; EG-NEXT: TRUNC * T1.W, T0.X,
|
|
181 ; EG-NEXT: FLT_TO_INT T0.X, PV.W,
|
|
182 ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
|
|
183 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
150
|
184 %value = load <4 x float>, <4 x float> addrspace(1) * %in
|
|
185 %result = fptosi <4 x float> %value to <4 x i32>
|
|
186 store <4 x i32> %result, <4 x i32> addrspace(1)* %out
|
|
187 ret void
|
|
188 }
|
|
189
|
|
190 ; Check that the compiler doesn't crash with a "cannot select" error
|
|
191 define amdgpu_kernel void @fp_to_sint_i64 (i64 addrspace(1)* %out, float %in) {
|
221
|
192 ; SI-LABEL: fp_to_sint_i64:
|
|
193 ; SI: ; %bb.0: ; %entry
|
|
194 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
|
|
195 ; SI-NEXT: s_load_dword s0, s[0:1], 0xb
|
|
196 ; SI-NEXT: s_mov_b32 s7, 0xf000
|
|
197 ; SI-NEXT: s_mov_b32 s6, -1
|
|
198 ; SI-NEXT: s_mov_b32 s1, 0
|
|
199 ; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
200 ; SI-NEXT: s_bfe_u32 s8, s0, 0x80017
|
|
201 ; SI-NEXT: s_and_b32 s2, s0, 0x7fffff
|
|
202 ; SI-NEXT: s_ashr_i32 s9, s0, 31
|
|
203 ; SI-NEXT: s_add_i32 s3, s8, 0xffffff6a
|
|
204 ; SI-NEXT: s_or_b32 s0, s2, 0x800000
|
|
205 ; SI-NEXT: s_sub_i32 s10, 0x96, s8
|
|
206 ; SI-NEXT: s_ashr_i32 s11, s9, 31
|
|
207 ; SI-NEXT: s_lshl_b64 s[2:3], s[0:1], s3
|
|
208 ; SI-NEXT: s_lshr_b64 s[0:1], s[0:1], s10
|
|
209 ; SI-NEXT: s_addk_i32 s8, 0xff81
|
|
210 ; SI-NEXT: v_mov_b32_e32 v0, s11
|
|
211 ; SI-NEXT: v_mov_b32_e32 v1, s1
|
|
212 ; SI-NEXT: v_mov_b32_e32 v2, s3
|
|
213 ; SI-NEXT: v_cmp_gt_i32_e64 vcc, s8, 23
|
|
214 ; SI-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
|
215 ; SI-NEXT: v_mov_b32_e32 v2, s0
|
|
216 ; SI-NEXT: v_mov_b32_e32 v3, s2
|
|
217 ; SI-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
|
|
218 ; SI-NEXT: v_xor_b32_e32 v1, s11, v1
|
|
219 ; SI-NEXT: v_xor_b32_e32 v2, s9, v2
|
|
220 ; SI-NEXT: v_subrev_i32_e32 v2, vcc, s9, v2
|
|
221 ; SI-NEXT: v_subb_u32_e32 v0, vcc, v1, v0, vcc
|
|
222 ; SI-NEXT: v_cmp_lt_i32_e64 s[0:1], s8, 0
|
|
223 ; SI-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[0:1]
|
|
224 ; SI-NEXT: v_cndmask_b32_e64 v0, v2, 0, s[0:1]
|
|
225 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
|
226 ; SI-NEXT: s_endpgm
|
|
227 ;
|
|
228 ; VI-LABEL: fp_to_sint_i64:
|
|
229 ; VI: ; %bb.0: ; %entry
|
|
230 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
|
|
231 ; VI-NEXT: s_load_dword s8, s[0:1], 0x2c
|
|
232 ; VI-NEXT: s_mov_b32 s1, 0
|
|
233 ; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
234 ; VI-NEXT: s_mov_b32 s6, -1
|
|
235 ; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
236 ; VI-NEXT: s_bfe_u32 s9, s8, 0x80017
|
|
237 ; VI-NEXT: s_and_b32 s0, s8, 0x7fffff
|
|
238 ; VI-NEXT: s_add_i32 s2, s9, 0xffffff6a
|
|
239 ; VI-NEXT: s_bitset1_b32 s0, 23
|
|
240 ; VI-NEXT: s_sub_i32 s10, 0x96, s9
|
|
241 ; VI-NEXT: s_lshl_b64 s[2:3], s[0:1], s2
|
|
242 ; VI-NEXT: s_lshr_b64 s[0:1], s[0:1], s10
|
|
243 ; VI-NEXT: s_addk_i32 s9, 0xff81
|
|
244 ; VI-NEXT: v_mov_b32_e32 v0, s1
|
|
245 ; VI-NEXT: v_mov_b32_e32 v1, s3
|
|
246 ; VI-NEXT: v_cmp_gt_i32_e64 vcc, s9, 23
|
|
247 ; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
|
248 ; VI-NEXT: v_mov_b32_e32 v1, s0
|
|
249 ; VI-NEXT: v_mov_b32_e32 v2, s2
|
|
250 ; VI-NEXT: s_ashr_i32 s0, s8, 31
|
|
251 ; VI-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
|
252 ; VI-NEXT: s_ashr_i32 s1, s0, 31
|
|
253 ; VI-NEXT: v_xor_b32_e32 v1, s0, v1
|
|
254 ; VI-NEXT: v_xor_b32_e32 v0, s1, v0
|
|
255 ; VI-NEXT: v_mov_b32_e32 v2, s1
|
|
256 ; VI-NEXT: v_subrev_u32_e32 v3, vcc, s0, v1
|
|
257 ; VI-NEXT: v_subb_u32_e32 v0, vcc, v0, v2, vcc
|
|
258 ; VI-NEXT: v_cmp_lt_i32_e64 s[0:1], s9, 0
|
|
259 ; VI-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[0:1]
|
|
260 ; VI-NEXT: v_cndmask_b32_e64 v0, v3, 0, s[0:1]
|
|
261 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
|
262 ; VI-NEXT: s_endpgm
|
|
263 ;
|
|
264 ; EG-LABEL: fp_to_sint_i64:
|
|
265 ; EG: ; %bb.0: ; %entry
|
|
266 ; EG-NEXT: ALU 41, @4, KC0[CB0:0-32], KC1[]
|
|
267 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
|
|
268 ; EG-NEXT: CF_END
|
|
269 ; EG-NEXT: PAD
|
|
270 ; EG-NEXT: ALU clause starting at 4:
|
|
271 ; EG-NEXT: MOV * T0.W, literal.x,
|
|
272 ; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
|
|
273 ; EG-NEXT: BFE_UINT T0.W, KC0[2].Z, literal.x, PV.W,
|
|
274 ; EG-NEXT: AND_INT * T1.W, KC0[2].Z, literal.y,
|
|
275 ; EG-NEXT: 23(3.222986e-44), 8388607(1.175494e-38)
|
|
276 ; EG-NEXT: OR_INT T1.W, PS, literal.x,
|
|
277 ; EG-NEXT: ADD_INT * T2.W, PV.W, literal.y,
|
|
278 ; EG-NEXT: 8388608(1.175494e-38), -150(nan)
|
|
279 ; EG-NEXT: ADD_INT T0.X, T0.W, literal.x,
|
|
280 ; EG-NEXT: SUB_INT T0.Y, literal.y, T0.W,
|
|
281 ; EG-NEXT: AND_INT T0.Z, PS, literal.z,
|
|
282 ; EG-NEXT: NOT_INT T0.W, PS,
|
|
283 ; EG-NEXT: LSHR * T3.W, PV.W, 1,
|
|
284 ; EG-NEXT: -127(nan), 150(2.101948e-43)
|
|
285 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
|
|
286 ; EG-NEXT: BIT_ALIGN_INT T1.X, 0.0, PS, PV.W,
|
|
287 ; EG-NEXT: LSHL T1.Y, T1.W, PV.Z,
|
|
288 ; EG-NEXT: AND_INT T0.Z, T2.W, literal.x, BS:VEC_120/SCL_212
|
|
289 ; EG-NEXT: BIT_ALIGN_INT T0.W, 0.0, T1.W, PV.Y, BS:VEC_021/SCL_122
|
|
290 ; EG-NEXT: AND_INT * T1.W, PV.Y, literal.x,
|
|
291 ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
|
|
292 ; EG-NEXT: CNDE_INT T0.Y, PS, PV.W, 0.0,
|
|
293 ; EG-NEXT: CNDE_INT T1.Z, PV.Z, PV.Y, 0.0,
|
|
294 ; EG-NEXT: CNDE_INT T0.W, PV.Z, PV.X, PV.Y,
|
|
295 ; EG-NEXT: SETGT_INT * T1.W, T0.X, literal.x,
|
|
296 ; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00)
|
|
297 ; EG-NEXT: CNDE_INT T0.Z, PS, 0.0, PV.W,
|
|
298 ; EG-NEXT: CNDE_INT T0.W, PS, PV.Y, PV.Z,
|
|
299 ; EG-NEXT: ASHR * T1.W, KC0[2].Z, literal.x,
|
|
300 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
|
|
301 ; EG-NEXT: XOR_INT T0.W, PV.W, PS,
|
|
302 ; EG-NEXT: XOR_INT * T2.W, PV.Z, PS,
|
|
303 ; EG-NEXT: SUB_INT T2.W, PS, T1.W,
|
|
304 ; EG-NEXT: SUBB_UINT * T3.W, PV.W, T1.W,
|
|
305 ; EG-NEXT: SUB_INT T2.W, PV.W, PS,
|
|
306 ; EG-NEXT: SETGT_INT * T3.W, T0.X, literal.x,
|
|
307 ; EG-NEXT: -1(nan), 0(0.000000e+00)
|
|
308 ; EG-NEXT: CNDE_INT T0.Y, PS, 0.0, PV.W,
|
|
309 ; EG-NEXT: SUB_INT * T0.W, T0.W, T1.W,
|
|
310 ; EG-NEXT: CNDE_INT T0.X, T3.W, 0.0, PV.W,
|
|
311 ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
|
|
312 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
150
|
313 entry:
|
|
314 %0 = fptosi float %in to i64
|
|
315 store i64 %0, i64 addrspace(1)* %out
|
|
316 ret void
|
|
317 }
|
|
318
|
|
319 define amdgpu_kernel void @fp_to_sint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) {
|
221
|
320 ; SI-LABEL: fp_to_sint_v2i64:
|
|
321 ; SI: ; %bb.0:
|
|
322 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
|
|
323 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb
|
|
324 ; SI-NEXT: s_mov_b32 s7, 0xf000
|
|
325 ; SI-NEXT: s_mov_b32 s6, -1
|
|
326 ; SI-NEXT: s_movk_i32 s8, 0xff6a
|
|
327 ; SI-NEXT: s_mov_b32 s2, 0x7fffff
|
|
328 ; SI-NEXT: s_mov_b32 s10, 0x800000
|
|
329 ; SI-NEXT: s_mov_b32 s3, 0
|
|
330 ; SI-NEXT: s_movk_i32 s9, 0x96
|
|
331 ; SI-NEXT: s_movk_i32 s11, 0xff81
|
|
332 ; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
333 ; SI-NEXT: s_bfe_u32 s12, s1, 0x80017
|
|
334 ; SI-NEXT: s_and_b32 s13, s1, s2
|
|
335 ; SI-NEXT: s_ashr_i32 s14, s1, 31
|
|
336 ; SI-NEXT: s_bfe_u32 s1, s0, 0x80017
|
|
337 ; SI-NEXT: s_and_b32 s15, s0, s2
|
|
338 ; SI-NEXT: s_ashr_i32 s16, s0, 31
|
|
339 ; SI-NEXT: s_add_i32 s0, s12, s8
|
|
340 ; SI-NEXT: s_or_b32 s2, s13, s10
|
|
341 ; SI-NEXT: s_sub_i32 s13, s9, s12
|
|
342 ; SI-NEXT: s_add_i32 s12, s12, s11
|
|
343 ; SI-NEXT: s_ashr_i32 s17, s14, 31
|
|
344 ; SI-NEXT: s_add_i32 s18, s1, s8
|
|
345 ; SI-NEXT: s_sub_i32 s19, s9, s1
|
|
346 ; SI-NEXT: s_add_i32 s11, s1, s11
|
|
347 ; SI-NEXT: s_ashr_i32 s20, s16, 31
|
|
348 ; SI-NEXT: s_lshl_b64 s[0:1], s[2:3], s0
|
|
349 ; SI-NEXT: s_lshr_b64 s[8:9], s[2:3], s13
|
|
350 ; SI-NEXT: v_mov_b32_e32 v0, s17
|
|
351 ; SI-NEXT: s_or_b32 s2, s15, s10
|
|
352 ; SI-NEXT: v_mov_b32_e32 v1, s20
|
|
353 ; SI-NEXT: v_mov_b32_e32 v2, s9
|
|
354 ; SI-NEXT: v_mov_b32_e32 v3, s1
|
|
355 ; SI-NEXT: v_cmp_gt_i32_e64 vcc, s12, 23
|
|
356 ; SI-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
|
|
357 ; SI-NEXT: v_mov_b32_e32 v3, s8
|
|
358 ; SI-NEXT: v_mov_b32_e32 v4, s0
|
|
359 ; SI-NEXT: s_lshl_b64 s[0:1], s[2:3], s18
|
|
360 ; SI-NEXT: s_lshr_b64 s[2:3], s[2:3], s19
|
|
361 ; SI-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
|
|
362 ; SI-NEXT: v_xor_b32_e32 v2, s17, v2
|
|
363 ; SI-NEXT: v_mov_b32_e32 v4, s3
|
|
364 ; SI-NEXT: v_mov_b32_e32 v5, s1
|
|
365 ; SI-NEXT: v_cmp_gt_i32_e64 vcc, s11, 23
|
|
366 ; SI-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
|
|
367 ; SI-NEXT: v_mov_b32_e32 v5, s2
|
|
368 ; SI-NEXT: v_mov_b32_e32 v6, s0
|
|
369 ; SI-NEXT: v_xor_b32_e32 v3, s14, v3
|
|
370 ; SI-NEXT: v_cndmask_b32_e32 v5, v5, v6, vcc
|
|
371 ; SI-NEXT: v_xor_b32_e32 v4, s20, v4
|
|
372 ; SI-NEXT: v_subrev_i32_e32 v6, vcc, s14, v3
|
|
373 ; SI-NEXT: v_subb_u32_e32 v0, vcc, v2, v0, vcc
|
|
374 ; SI-NEXT: v_xor_b32_e32 v5, s16, v5
|
|
375 ; SI-NEXT: v_cmp_lt_i32_e64 s[0:1], s12, 0
|
|
376 ; SI-NEXT: v_cndmask_b32_e64 v3, v0, 0, s[0:1]
|
|
377 ; SI-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[0:1]
|
|
378 ; SI-NEXT: v_subrev_i32_e32 v0, vcc, s16, v5
|
|
379 ; SI-NEXT: v_subb_u32_e32 v1, vcc, v4, v1, vcc
|
|
380 ; SI-NEXT: v_cmp_lt_i32_e64 s[0:1], s11, 0
|
|
381 ; SI-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[0:1]
|
|
382 ; SI-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[0:1]
|
|
383 ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
384 ; SI-NEXT: s_endpgm
|
|
385 ;
|
|
386 ; VI-LABEL: fp_to_sint_v2i64:
|
|
387 ; VI: ; %bb.0:
|
|
388 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
|
|
389 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c
|
|
390 ; VI-NEXT: s_mov_b32 s14, 0x7fffff
|
|
391 ; VI-NEXT: s_movk_i32 s12, 0xff6a
|
|
392 ; VI-NEXT: s_mov_b32 s15, 0x800000
|
|
393 ; VI-NEXT: s_movk_i32 s16, 0x96
|
|
394 ; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
395 ; VI-NEXT: s_bfe_u32 s13, s1, 0x80017
|
|
396 ; VI-NEXT: s_and_b32 s2, s1, s14
|
|
397 ; VI-NEXT: s_add_i32 s8, s13, s12
|
|
398 ; VI-NEXT: s_or_b32 s2, s2, s15
|
|
399 ; VI-NEXT: s_mov_b32 s3, 0
|
|
400 ; VI-NEXT: s_sub_i32 s10, s16, s13
|
|
401 ; VI-NEXT: s_movk_i32 s17, 0xff81
|
|
402 ; VI-NEXT: s_lshl_b64 s[8:9], s[2:3], s8
|
|
403 ; VI-NEXT: s_lshr_b64 s[10:11], s[2:3], s10
|
|
404 ; VI-NEXT: s_add_i32 s13, s13, s17
|
|
405 ; VI-NEXT: v_mov_b32_e32 v0, s11
|
|
406 ; VI-NEXT: v_mov_b32_e32 v1, s9
|
|
407 ; VI-NEXT: v_cmp_gt_i32_e64 vcc, s13, 23
|
|
408 ; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
|
409 ; VI-NEXT: v_mov_b32_e32 v2, s8
|
|
410 ; VI-NEXT: v_mov_b32_e32 v1, s10
|
|
411 ; VI-NEXT: s_ashr_i32 s1, s1, 31
|
|
412 ; VI-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
|
413 ; VI-NEXT: s_ashr_i32 s2, s1, 31
|
|
414 ; VI-NEXT: v_xor_b32_e32 v1, s1, v1
|
|
415 ; VI-NEXT: v_subrev_u32_e32 v1, vcc, s1, v1
|
|
416 ; VI-NEXT: v_xor_b32_e32 v0, s2, v0
|
|
417 ; VI-NEXT: v_mov_b32_e32 v2, s2
|
|
418 ; VI-NEXT: s_and_b32 s2, s0, s14
|
|
419 ; VI-NEXT: s_bfe_u32 s1, s0, 0x80017
|
|
420 ; VI-NEXT: v_subb_u32_e32 v0, vcc, v0, v2, vcc
|
|
421 ; VI-NEXT: v_cmp_lt_i32_e64 s[8:9], s13, 0
|
|
422 ; VI-NEXT: v_cndmask_b32_e64 v3, v0, 0, s[8:9]
|
|
423 ; VI-NEXT: v_cndmask_b32_e64 v2, v1, 0, s[8:9]
|
|
424 ; VI-NEXT: s_add_i32 s8, s1, s12
|
|
425 ; VI-NEXT: s_or_b32 s2, s2, s15
|
|
426 ; VI-NEXT: s_sub_i32 s10, s16, s1
|
|
427 ; VI-NEXT: s_lshl_b64 s[8:9], s[2:3], s8
|
|
428 ; VI-NEXT: s_lshr_b64 s[2:3], s[2:3], s10
|
|
429 ; VI-NEXT: s_add_i32 s1, s1, s17
|
|
430 ; VI-NEXT: v_mov_b32_e32 v0, s3
|
|
431 ; VI-NEXT: v_mov_b32_e32 v1, s9
|
|
432 ; VI-NEXT: v_cmp_gt_i32_e64 vcc, s1, 23
|
|
433 ; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
|
434 ; VI-NEXT: v_mov_b32_e32 v1, s2
|
|
435 ; VI-NEXT: v_mov_b32_e32 v4, s8
|
|
436 ; VI-NEXT: s_ashr_i32 s0, s0, 31
|
|
437 ; VI-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
|
|
438 ; VI-NEXT: s_ashr_i32 s2, s0, 31
|
|
439 ; VI-NEXT: v_xor_b32_e32 v1, s0, v1
|
|
440 ; VI-NEXT: v_subrev_u32_e32 v5, vcc, s0, v1
|
|
441 ; VI-NEXT: v_xor_b32_e32 v0, s2, v0
|
|
442 ; VI-NEXT: v_mov_b32_e32 v4, s2
|
|
443 ; VI-NEXT: v_subb_u32_e32 v0, vcc, v0, v4, vcc
|
|
444 ; VI-NEXT: v_cmp_lt_i32_e64 s[0:1], s1, 0
|
|
445 ; VI-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[0:1]
|
|
446 ; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
447 ; VI-NEXT: s_mov_b32 s6, -1
|
|
448 ; VI-NEXT: v_cndmask_b32_e64 v0, v5, 0, s[0:1]
|
|
449 ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
450 ; VI-NEXT: s_endpgm
|
|
451 ;
|
|
452 ; EG-LABEL: fp_to_sint_v2i64:
|
|
453 ; EG: ; %bb.0:
|
|
454 ; EG-NEXT: ALU 77, @4, KC0[CB0:0-32], KC1[]
|
|
455 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 1
|
|
456 ; EG-NEXT: CF_END
|
|
457 ; EG-NEXT: PAD
|
|
458 ; EG-NEXT: ALU clause starting at 4:
|
|
459 ; EG-NEXT: MOV * T0.W, literal.x,
|
|
460 ; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
|
|
461 ; EG-NEXT: BFE_UINT * T1.W, KC0[2].W, literal.x, PV.W,
|
|
462 ; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00)
|
|
463 ; EG-NEXT: AND_INT T0.Z, KC0[2].W, literal.x,
|
|
464 ; EG-NEXT: BFE_UINT T0.W, KC0[3].X, literal.y, T0.W,
|
|
465 ; EG-NEXT: ADD_INT * T2.W, PV.W, literal.z,
|
|
466 ; EG-NEXT: 8388607(1.175494e-38), 23(3.222986e-44)
|
|
467 ; EG-NEXT: -150(nan), 0(0.000000e+00)
|
|
468 ; EG-NEXT: SUB_INT T0.X, literal.x, PV.W,
|
|
469 ; EG-NEXT: SUB_INT T0.Y, literal.x, T1.W,
|
|
470 ; EG-NEXT: AND_INT T1.Z, PS, literal.y,
|
|
471 ; EG-NEXT: OR_INT T3.W, PV.Z, literal.z,
|
|
472 ; EG-NEXT: AND_INT * T4.W, KC0[3].X, literal.w,
|
|
473 ; EG-NEXT: 150(2.101948e-43), 31(4.344025e-44)
|
|
474 ; EG-NEXT: 8388608(1.175494e-38), 8388607(1.175494e-38)
|
|
475 ; EG-NEXT: OR_INT T1.X, PS, literal.x,
|
|
476 ; EG-NEXT: LSHL T1.Y, PV.W, PV.Z,
|
|
477 ; EG-NEXT: AND_INT T0.Z, T2.W, literal.y,
|
|
478 ; EG-NEXT: BIT_ALIGN_INT T4.W, 0.0, PV.W, PV.Y,
|
|
479 ; EG-NEXT: AND_INT * T5.W, PV.Y, literal.y,
|
|
480 ; EG-NEXT: 8388608(1.175494e-38), 32(4.484155e-44)
|
|
481 ; EG-NEXT: CNDE_INT T2.X, PS, PV.W, 0.0,
|
|
482 ; EG-NEXT: CNDE_INT T0.Y, PV.Z, PV.Y, 0.0,
|
|
483 ; EG-NEXT: ADD_INT T1.Z, T0.W, literal.x,
|
|
484 ; EG-NEXT: BIT_ALIGN_INT T4.W, 0.0, PV.X, T0.X,
|
|
485 ; EG-NEXT: AND_INT * T5.W, T0.X, literal.y,
|
|
486 ; EG-NEXT: -150(nan), 32(4.484155e-44)
|
|
487 ; EG-NEXT: CNDE_INT T0.X, PS, PV.W, 0.0,
|
|
488 ; EG-NEXT: NOT_INT T2.Y, T2.W,
|
|
489 ; EG-NEXT: AND_INT T2.Z, PV.Z, literal.x,
|
|
490 ; EG-NEXT: NOT_INT T2.W, PV.Z,
|
|
491 ; EG-NEXT: LSHR * T4.W, T1.X, 1,
|
|
492 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
|
|
493 ; EG-NEXT: LSHR T3.X, T3.W, 1,
|
|
494 ; EG-NEXT: ADD_INT T3.Y, T0.W, literal.x, BS:VEC_120/SCL_212
|
|
495 ; EG-NEXT: BIT_ALIGN_INT T3.Z, 0.0, PS, PV.W,
|
|
496 ; EG-NEXT: LSHL T0.W, T1.X, PV.Z,
|
|
497 ; EG-NEXT: AND_INT * T2.W, T1.Z, literal.y,
|
|
498 ; EG-NEXT: -127(nan), 32(4.484155e-44)
|
|
499 ; EG-NEXT: CNDE_INT T1.X, PS, PV.W, 0.0,
|
|
500 ; EG-NEXT: CNDE_INT T4.Y, PS, PV.Z, PV.W,
|
|
501 ; EG-NEXT: SETGT_INT T1.Z, PV.Y, literal.x,
|
|
502 ; EG-NEXT: BIT_ALIGN_INT T0.W, 0.0, PV.X, T2.Y,
|
|
503 ; EG-NEXT: ADD_INT * T1.W, T1.W, literal.y,
|
|
504 ; EG-NEXT: 23(3.222986e-44), -127(nan)
|
|
505 ; EG-NEXT: CNDE_INT T3.X, T0.Z, PV.W, T1.Y,
|
|
506 ; EG-NEXT: SETGT_INT T1.Y, PS, literal.x,
|
|
507 ; EG-NEXT: CNDE_INT T0.Z, PV.Z, 0.0, PV.Y,
|
|
508 ; EG-NEXT: CNDE_INT T0.W, PV.Z, T0.X, PV.X,
|
|
509 ; EG-NEXT: ASHR * T2.W, KC0[3].X, literal.y,
|
|
510 ; EG-NEXT: 23(3.222986e-44), 31(4.344025e-44)
|
|
511 ; EG-NEXT: XOR_INT T0.X, PV.W, PS,
|
|
512 ; EG-NEXT: XOR_INT T2.Y, PV.Z, PS,
|
|
513 ; EG-NEXT: CNDE_INT T0.Z, PV.Y, 0.0, PV.X,
|
|
514 ; EG-NEXT: CNDE_INT T0.W, PV.Y, T2.X, T0.Y,
|
|
515 ; EG-NEXT: ASHR * T3.W, KC0[2].W, literal.x,
|
|
516 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
|
|
517 ; EG-NEXT: XOR_INT T0.Y, PV.W, PS,
|
|
518 ; EG-NEXT: XOR_INT T0.Z, PV.Z, PS,
|
|
519 ; EG-NEXT: SUB_INT T0.W, PV.Y, T2.W,
|
|
520 ; EG-NEXT: SUBB_UINT * T4.W, PV.X, T2.W,
|
|
521 ; EG-NEXT: SUB_INT T1.Y, PV.W, PS,
|
|
522 ; EG-NEXT: SETGT_INT T1.Z, T3.Y, literal.x,
|
|
523 ; EG-NEXT: SUB_INT T0.W, PV.Z, T3.W,
|
|
524 ; EG-NEXT: SUBB_UINT * T4.W, PV.Y, T3.W,
|
|
525 ; EG-NEXT: -1(nan), 0(0.000000e+00)
|
|
526 ; EG-NEXT: SUB_INT T0.Z, PV.W, PS,
|
|
527 ; EG-NEXT: SETGT_INT T0.W, T1.W, literal.x,
|
|
528 ; EG-NEXT: CNDE_INT * T1.W, PV.Z, 0.0, PV.Y, BS:VEC_021/SCL_122
|
|
529 ; EG-NEXT: -1(nan), 0(0.000000e+00)
|
|
530 ; EG-NEXT: CNDE_INT T1.Y, PV.W, 0.0, PV.Z,
|
|
531 ; EG-NEXT: SUB_INT * T2.W, T0.X, T2.W,
|
|
532 ; EG-NEXT: CNDE_INT T1.Z, T1.Z, 0.0, PV.W,
|
|
533 ; EG-NEXT: SUB_INT * T2.W, T0.Y, T3.W,
|
|
534 ; EG-NEXT: CNDE_INT T1.X, T0.W, 0.0, PV.W,
|
|
535 ; EG-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
|
|
536 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
150
|
537 %conv = fptosi <2 x float> %x to <2 x i64>
|
|
538 store <2 x i64> %conv, <2 x i64> addrspace(1)* %out
|
|
539 ret void
|
|
540 }
|
|
541
|
|
542 define amdgpu_kernel void @fp_to_sint_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) {
|
221
|
543 ; SI-LABEL: fp_to_sint_v4i64:
|
|
544 ; SI: ; %bb.0:
|
|
545 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
|
|
546 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd
|
|
547 ; SI-NEXT: s_mov_b32 s7, 0xf000
|
|
548 ; SI-NEXT: s_mov_b32 s6, -1
|
|
549 ; SI-NEXT: s_movk_i32 s10, 0xff6a
|
|
550 ; SI-NEXT: s_mov_b32 s8, 0x7fffff
|
|
551 ; SI-NEXT: s_mov_b32 s11, 0x800000
|
|
552 ; SI-NEXT: s_mov_b32 s9, 0
|
|
553 ; SI-NEXT: s_movk_i32 s12, 0x96
|
|
554 ; SI-NEXT: s_movk_i32 s13, 0xff81
|
|
555 ; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
556 ; SI-NEXT: s_bfe_u32 s14, s1, 0x80017
|
|
557 ; SI-NEXT: s_and_b32 s15, s1, s8
|
|
558 ; SI-NEXT: s_ashr_i32 s16, s1, 31
|
|
559 ; SI-NEXT: s_bfe_u32 s1, s0, 0x80017
|
|
560 ; SI-NEXT: s_and_b32 s17, s0, s8
|
|
561 ; SI-NEXT: s_ashr_i32 s18, s0, 31
|
|
562 ; SI-NEXT: s_bfe_u32 s0, s3, 0x80017
|
|
563 ; SI-NEXT: s_and_b32 s19, s3, s8
|
|
564 ; SI-NEXT: s_ashr_i32 s20, s3, 31
|
|
565 ; SI-NEXT: s_bfe_u32 s3, s2, 0x80017
|
|
566 ; SI-NEXT: s_and_b32 s21, s2, s8
|
|
567 ; SI-NEXT: s_ashr_i32 s22, s2, 31
|
|
568 ; SI-NEXT: s_add_i32 s2, s14, s10
|
|
569 ; SI-NEXT: s_or_b32 s8, s15, s11
|
|
570 ; SI-NEXT: s_sub_i32 s15, s12, s14
|
|
571 ; SI-NEXT: s_add_i32 s14, s14, s13
|
|
572 ; SI-NEXT: s_ashr_i32 s23, s16, 31
|
|
573 ; SI-NEXT: s_add_i32 s24, s1, s10
|
|
574 ; SI-NEXT: s_sub_i32 s25, s12, s1
|
|
575 ; SI-NEXT: s_add_i32 s26, s1, s13
|
|
576 ; SI-NEXT: s_ashr_i32 s27, s18, 31
|
|
577 ; SI-NEXT: s_add_i32 s28, s0, s10
|
|
578 ; SI-NEXT: s_sub_i32 s29, s12, s0
|
|
579 ; SI-NEXT: s_add_i32 s30, s0, s13
|
|
580 ; SI-NEXT: s_ashr_i32 s31, s20, 31
|
|
581 ; SI-NEXT: s_add_i32 s10, s3, s10
|
|
582 ; SI-NEXT: s_sub_i32 s12, s12, s3
|
|
583 ; SI-NEXT: s_add_i32 s13, s3, s13
|
|
584 ; SI-NEXT: s_ashr_i32 s33, s22, 31
|
|
585 ; SI-NEXT: s_lshl_b64 s[0:1], s[8:9], s2
|
|
586 ; SI-NEXT: s_lshr_b64 s[2:3], s[8:9], s15
|
|
587 ; SI-NEXT: v_mov_b32_e32 v0, s23
|
|
588 ; SI-NEXT: s_or_b32 s8, s17, s11
|
|
589 ; SI-NEXT: v_mov_b32_e32 v1, s27
|
|
590 ; SI-NEXT: v_mov_b32_e32 v4, s31
|
|
591 ; SI-NEXT: v_mov_b32_e32 v5, s33
|
|
592 ; SI-NEXT: v_mov_b32_e32 v2, s3
|
|
593 ; SI-NEXT: v_mov_b32_e32 v3, s1
|
|
594 ; SI-NEXT: v_cmp_gt_i32_e64 vcc, s14, 23
|
|
595 ; SI-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
|
|
596 ; SI-NEXT: v_mov_b32_e32 v3, s2
|
|
597 ; SI-NEXT: v_mov_b32_e32 v6, s0
|
|
598 ; SI-NEXT: s_lshl_b64 s[0:1], s[8:9], s24
|
|
599 ; SI-NEXT: s_lshr_b64 s[2:3], s[8:9], s25
|
|
600 ; SI-NEXT: s_or_b32 s8, s19, s11
|
|
601 ; SI-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
|
|
602 ; SI-NEXT: v_xor_b32_e32 v2, s23, v2
|
|
603 ; SI-NEXT: v_mov_b32_e32 v6, s3
|
|
604 ; SI-NEXT: v_mov_b32_e32 v7, s1
|
|
605 ; SI-NEXT: v_cmp_gt_i32_e64 vcc, s26, 23
|
|
606 ; SI-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc
|
|
607 ; SI-NEXT: v_mov_b32_e32 v7, s2
|
|
608 ; SI-NEXT: v_mov_b32_e32 v8, s0
|
|
609 ; SI-NEXT: s_lshl_b64 s[0:1], s[8:9], s28
|
|
610 ; SI-NEXT: s_lshr_b64 s[2:3], s[8:9], s29
|
|
611 ; SI-NEXT: s_or_b32 s8, s21, s11
|
|
612 ; SI-NEXT: v_xor_b32_e32 v3, s16, v3
|
|
613 ; SI-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc
|
|
614 ; SI-NEXT: v_xor_b32_e32 v6, s27, v6
|
|
615 ; SI-NEXT: v_mov_b32_e32 v8, s3
|
|
616 ; SI-NEXT: v_mov_b32_e32 v9, s1
|
|
617 ; SI-NEXT: v_cmp_gt_i32_e64 vcc, s30, 23
|
|
618 ; SI-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc
|
|
619 ; SI-NEXT: v_mov_b32_e32 v9, s2
|
|
620 ; SI-NEXT: v_mov_b32_e32 v10, s0
|
|
621 ; SI-NEXT: s_lshl_b64 s[2:3], s[8:9], s10
|
|
622 ; SI-NEXT: s_lshr_b64 s[8:9], s[8:9], s12
|
|
623 ; SI-NEXT: v_subrev_i32_e64 v11, s[0:1], s16, v3
|
|
624 ; SI-NEXT: v_subb_u32_e64 v0, s[0:1], v2, v0, s[0:1]
|
|
625 ; SI-NEXT: v_xor_b32_e32 v7, s18, v7
|
|
626 ; SI-NEXT: v_cndmask_b32_e32 v9, v9, v10, vcc
|
|
627 ; SI-NEXT: v_xor_b32_e32 v8, s31, v8
|
|
628 ; SI-NEXT: v_mov_b32_e32 v2, s9
|
|
629 ; SI-NEXT: v_mov_b32_e32 v3, s3
|
|
630 ; SI-NEXT: v_cmp_gt_i32_e64 vcc, s13, 23
|
|
631 ; SI-NEXT: v_cndmask_b32_e32 v10, v2, v3, vcc
|
|
632 ; SI-NEXT: v_mov_b32_e32 v12, s8
|
|
633 ; SI-NEXT: v_mov_b32_e32 v13, s2
|
|
634 ; SI-NEXT: v_cmp_lt_i32_e64 s[0:1], s14, 0
|
|
635 ; SI-NEXT: v_cndmask_b32_e64 v3, v0, 0, s[0:1]
|
|
636 ; SI-NEXT: v_cndmask_b32_e64 v2, v11, 0, s[0:1]
|
|
637 ; SI-NEXT: v_subrev_i32_e64 v0, s[0:1], s18, v7
|
|
638 ; SI-NEXT: v_subb_u32_e64 v1, s[0:1], v6, v1, s[0:1]
|
|
639 ; SI-NEXT: v_xor_b32_e32 v6, s20, v9
|
|
640 ; SI-NEXT: v_cndmask_b32_e32 v7, v12, v13, vcc
|
|
641 ; SI-NEXT: v_xor_b32_e32 v9, s33, v10
|
|
642 ; SI-NEXT: v_cmp_lt_i32_e64 s[0:1], s26, 0
|
|
643 ; SI-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[0:1]
|
|
644 ; SI-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[0:1]
|
|
645 ; SI-NEXT: v_subrev_i32_e32 v6, vcc, s20, v6
|
|
646 ; SI-NEXT: v_subb_u32_e32 v4, vcc, v8, v4, vcc
|
|
647 ; SI-NEXT: v_xor_b32_e32 v8, s22, v7
|
|
648 ; SI-NEXT: v_cmp_lt_i32_e64 s[0:1], s30, 0
|
|
649 ; SI-NEXT: v_cndmask_b32_e64 v7, v4, 0, s[0:1]
|
|
650 ; SI-NEXT: v_cndmask_b32_e64 v6, v6, 0, s[0:1]
|
|
651 ; SI-NEXT: v_subrev_i32_e32 v4, vcc, s22, v8
|
|
652 ; SI-NEXT: v_subb_u32_e32 v5, vcc, v9, v5, vcc
|
|
653 ; SI-NEXT: v_cmp_lt_i32_e64 s[0:1], s13, 0
|
|
654 ; SI-NEXT: v_cndmask_b32_e64 v5, v5, 0, s[0:1]
|
|
655 ; SI-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[0:1]
|
|
656 ; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16
|
|
657 ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
658 ; SI-NEXT: s_endpgm
|
|
659 ;
|
|
660 ; VI-LABEL: fp_to_sint_v4i64:
|
|
661 ; VI: ; %bb.0:
|
|
662 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
|
|
663 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x34
|
|
664 ; VI-NEXT: s_mov_b32 s16, 0x7fffff
|
|
665 ; VI-NEXT: s_movk_i32 s14, 0xff6a
|
|
666 ; VI-NEXT: s_mov_b32 s17, 0x800000
|
|
667 ; VI-NEXT: s_movk_i32 s18, 0x96
|
|
668 ; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
669 ; VI-NEXT: s_bfe_u32 s15, s1, 0x80017
|
|
670 ; VI-NEXT: s_and_b32 s8, s1, s16
|
|
671 ; VI-NEXT: s_add_i32 s10, s15, s14
|
|
672 ; VI-NEXT: s_or_b32 s8, s8, s17
|
|
673 ; VI-NEXT: s_mov_b32 s9, 0
|
|
674 ; VI-NEXT: s_sub_i32 s12, s18, s15
|
|
675 ; VI-NEXT: s_movk_i32 s19, 0xff81
|
|
676 ; VI-NEXT: s_lshl_b64 s[10:11], s[8:9], s10
|
|
677 ; VI-NEXT: s_lshr_b64 s[12:13], s[8:9], s12
|
|
678 ; VI-NEXT: s_add_i32 s15, s15, s19
|
|
679 ; VI-NEXT: v_mov_b32_e32 v0, s13
|
|
680 ; VI-NEXT: v_mov_b32_e32 v1, s11
|
|
681 ; VI-NEXT: v_cmp_gt_i32_e64 vcc, s15, 23
|
|
682 ; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
|
683 ; VI-NEXT: v_mov_b32_e32 v2, s10
|
|
684 ; VI-NEXT: v_mov_b32_e32 v1, s12
|
|
685 ; VI-NEXT: s_ashr_i32 s1, s1, 31
|
|
686 ; VI-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
|
687 ; VI-NEXT: s_ashr_i32 s8, s1, 31
|
|
688 ; VI-NEXT: v_xor_b32_e32 v1, s1, v1
|
|
689 ; VI-NEXT: v_subrev_u32_e32 v1, vcc, s1, v1
|
|
690 ; VI-NEXT: v_xor_b32_e32 v0, s8, v0
|
|
691 ; VI-NEXT: v_mov_b32_e32 v2, s8
|
|
692 ; VI-NEXT: s_and_b32 s8, s0, s16
|
|
693 ; VI-NEXT: s_bfe_u32 s1, s0, 0x80017
|
|
694 ; VI-NEXT: v_subb_u32_e32 v0, vcc, v0, v2, vcc
|
|
695 ; VI-NEXT: v_cmp_lt_i32_e64 s[10:11], s15, 0
|
|
696 ; VI-NEXT: v_cndmask_b32_e64 v3, v0, 0, s[10:11]
|
|
697 ; VI-NEXT: v_cndmask_b32_e64 v2, v1, 0, s[10:11]
|
|
698 ; VI-NEXT: s_add_i32 s10, s1, s14
|
|
699 ; VI-NEXT: s_or_b32 s8, s8, s17
|
|
700 ; VI-NEXT: s_sub_i32 s12, s18, s1
|
|
701 ; VI-NEXT: s_lshl_b64 s[10:11], s[8:9], s10
|
|
702 ; VI-NEXT: s_lshr_b64 s[12:13], s[8:9], s12
|
|
703 ; VI-NEXT: s_add_i32 s1, s1, s19
|
|
704 ; VI-NEXT: v_mov_b32_e32 v0, s13
|
|
705 ; VI-NEXT: v_mov_b32_e32 v1, s11
|
|
706 ; VI-NEXT: v_cmp_gt_i32_e64 vcc, s1, 23
|
|
707 ; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
|
708 ; VI-NEXT: v_mov_b32_e32 v1, s12
|
|
709 ; VI-NEXT: v_mov_b32_e32 v4, s10
|
|
710 ; VI-NEXT: s_ashr_i32 s0, s0, 31
|
|
711 ; VI-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
|
|
712 ; VI-NEXT: s_ashr_i32 s8, s0, 31
|
|
713 ; VI-NEXT: v_xor_b32_e32 v1, s0, v1
|
|
714 ; VI-NEXT: v_subrev_u32_e32 v5, vcc, s0, v1
|
|
715 ; VI-NEXT: v_xor_b32_e32 v0, s8, v0
|
|
716 ; VI-NEXT: v_mov_b32_e32 v4, s8
|
|
717 ; VI-NEXT: v_subb_u32_e32 v0, vcc, v0, v4, vcc
|
|
718 ; VI-NEXT: v_cmp_lt_i32_e64 s[0:1], s1, 0
|
|
719 ; VI-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[0:1]
|
|
720 ; VI-NEXT: v_cndmask_b32_e64 v0, v5, 0, s[0:1]
|
|
721 ; VI-NEXT: s_bfe_u32 s12, s3, 0x80017
|
|
722 ; VI-NEXT: s_and_b32 s1, s3, s16
|
|
723 ; VI-NEXT: s_add_i32 s0, s12, s14
|
|
724 ; VI-NEXT: s_or_b32 s8, s1, s17
|
|
725 ; VI-NEXT: s_sub_i32 s10, s18, s12
|
|
726 ; VI-NEXT: s_lshl_b64 s[0:1], s[8:9], s0
|
|
727 ; VI-NEXT: s_lshr_b64 s[10:11], s[8:9], s10
|
|
728 ; VI-NEXT: s_add_i32 s12, s12, s19
|
|
729 ; VI-NEXT: v_mov_b32_e32 v4, s11
|
|
730 ; VI-NEXT: v_mov_b32_e32 v5, s1
|
|
731 ; VI-NEXT: v_cmp_gt_i32_e64 vcc, s12, 23
|
|
732 ; VI-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
|
|
733 ; VI-NEXT: v_mov_b32_e32 v6, s0
|
|
734 ; VI-NEXT: v_mov_b32_e32 v5, s10
|
|
735 ; VI-NEXT: s_ashr_i32 s0, s3, 31
|
|
736 ; VI-NEXT: v_cndmask_b32_e32 v5, v5, v6, vcc
|
|
737 ; VI-NEXT: s_ashr_i32 s1, s0, 31
|
|
738 ; VI-NEXT: v_xor_b32_e32 v5, s0, v5
|
|
739 ; VI-NEXT: v_xor_b32_e32 v4, s1, v4
|
|
740 ; VI-NEXT: v_mov_b32_e32 v6, s1
|
|
741 ; VI-NEXT: v_subrev_u32_e32 v5, vcc, s0, v5
|
|
742 ; VI-NEXT: v_subb_u32_e32 v4, vcc, v4, v6, vcc
|
|
743 ; VI-NEXT: v_cmp_lt_i32_e64 s[0:1], s12, 0
|
|
744 ; VI-NEXT: s_bfe_u32 s3, s2, 0x80017
|
|
745 ; VI-NEXT: v_cndmask_b32_e64 v7, v4, 0, s[0:1]
|
|
746 ; VI-NEXT: v_cndmask_b32_e64 v6, v5, 0, s[0:1]
|
|
747 ; VI-NEXT: s_and_b32 s1, s2, s16
|
|
748 ; VI-NEXT: s_add_i32 s0, s3, s14
|
|
749 ; VI-NEXT: s_or_b32 s8, s1, s17
|
|
750 ; VI-NEXT: s_sub_i32 s10, s18, s3
|
|
751 ; VI-NEXT: s_lshl_b64 s[0:1], s[8:9], s0
|
|
752 ; VI-NEXT: s_lshr_b64 s[8:9], s[8:9], s10
|
|
753 ; VI-NEXT: s_add_i32 s3, s3, s19
|
|
754 ; VI-NEXT: v_mov_b32_e32 v4, s9
|
|
755 ; VI-NEXT: v_mov_b32_e32 v5, s1
|
|
756 ; VI-NEXT: v_cmp_gt_i32_e64 vcc, s3, 23
|
|
757 ; VI-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
|
|
758 ; VI-NEXT: v_mov_b32_e32 v8, s0
|
|
759 ; VI-NEXT: v_mov_b32_e32 v5, s8
|
|
760 ; VI-NEXT: s_ashr_i32 s0, s2, 31
|
|
761 ; VI-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc
|
|
762 ; VI-NEXT: s_ashr_i32 s1, s0, 31
|
|
763 ; VI-NEXT: v_xor_b32_e32 v5, s0, v5
|
|
764 ; VI-NEXT: v_xor_b32_e32 v4, s1, v4
|
|
765 ; VI-NEXT: v_mov_b32_e32 v8, s1
|
|
766 ; VI-NEXT: v_subrev_u32_e32 v9, vcc, s0, v5
|
|
767 ; VI-NEXT: v_subb_u32_e32 v4, vcc, v4, v8, vcc
|
|
768 ; VI-NEXT: v_cmp_lt_i32_e64 s[0:1], s3, 0
|
|
769 ; VI-NEXT: v_cndmask_b32_e64 v5, v4, 0, s[0:1]
|
|
770 ; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
771 ; VI-NEXT: s_mov_b32 s6, -1
|
|
772 ; VI-NEXT: v_cndmask_b32_e64 v4, v9, 0, s[0:1]
|
|
773 ; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16
|
|
774 ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
775 ; VI-NEXT: s_endpgm
|
|
776 ;
|
|
777 ; EG-LABEL: fp_to_sint_v4i64:
|
|
778 ; EG: ; %bb.0:
|
|
779 ; EG-NEXT: ALU 101, @6, KC0[CB0:0-32], KC1[]
|
|
780 ; EG-NEXT: ALU 58, @108, KC0[CB0:0-32], KC1[]
|
|
781 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T0.X, 0
|
|
782 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T2.X, 1
|
|
783 ; EG-NEXT: CF_END
|
|
784 ; EG-NEXT: PAD
|
|
785 ; EG-NEXT: ALU clause starting at 6:
|
|
786 ; EG-NEXT: MOV * T0.W, literal.x,
|
|
787 ; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
|
|
788 ; EG-NEXT: BFE_UINT T1.W, KC0[4].X, literal.x, PV.W,
|
|
789 ; EG-NEXT: AND_INT * T2.W, KC0[4].X, literal.y,
|
|
790 ; EG-NEXT: 23(3.222986e-44), 8388607(1.175494e-38)
|
|
791 ; EG-NEXT: OR_INT T0.Z, PS, literal.x,
|
|
792 ; EG-NEXT: BFE_UINT T2.W, KC0[3].Z, literal.y, T0.W,
|
|
793 ; EG-NEXT: ADD_INT * T3.W, PV.W, literal.z,
|
|
794 ; EG-NEXT: 8388608(1.175494e-38), 23(3.222986e-44)
|
|
795 ; EG-NEXT: -150(nan), 0(0.000000e+00)
|
|
796 ; EG-NEXT: ADD_INT T0.Y, PV.W, literal.x,
|
|
797 ; EG-NEXT: AND_INT T1.Z, PS, literal.y,
|
|
798 ; EG-NEXT: NOT_INT T4.W, PS,
|
|
799 ; EG-NEXT: LSHR * T5.W, PV.Z, 1,
|
|
800 ; EG-NEXT: -127(nan), 31(4.344025e-44)
|
|
801 ; EG-NEXT: ADD_INT T0.X, T1.W, literal.x,
|
|
802 ; EG-NEXT: BIT_ALIGN_INT T1.Y, 0.0, PS, PV.W,
|
|
803 ; EG-NEXT: AND_INT T2.Z, T3.W, literal.y, BS:VEC_201
|
|
804 ; EG-NEXT: LSHL T3.W, T0.Z, PV.Z,
|
|
805 ; EG-NEXT: SUB_INT * T1.W, literal.z, T1.W,
|
|
806 ; EG-NEXT: -127(nan), 32(4.484155e-44)
|
|
807 ; EG-NEXT: 150(2.101948e-43), 0(0.000000e+00)
|
|
808 ; EG-NEXT: AND_INT T1.X, PS, literal.x,
|
|
809 ; EG-NEXT: BIT_ALIGN_INT T2.Y, 0.0, T0.Z, PS,
|
|
810 ; EG-NEXT: AND_INT T0.Z, KC0[3].Z, literal.y,
|
|
811 ; EG-NEXT: CNDE_INT T1.W, PV.Z, PV.Y, PV.W,
|
|
812 ; EG-NEXT: SETGT_INT * T4.W, PV.X, literal.z,
|
|
813 ; EG-NEXT: 32(4.484155e-44), 8388607(1.175494e-38)
|
|
814 ; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00)
|
|
815 ; EG-NEXT: CNDE_INT T2.X, PS, 0.0, PV.W,
|
|
816 ; EG-NEXT: OR_INT T1.Y, PV.Z, literal.x,
|
|
817 ; EG-NEXT: ADD_INT T0.Z, T2.W, literal.y,
|
|
818 ; EG-NEXT: CNDE_INT T1.W, PV.X, PV.Y, 0.0,
|
|
819 ; EG-NEXT: CNDE_INT * T3.W, T2.Z, T3.W, 0.0,
|
|
820 ; EG-NEXT: 8388608(1.175494e-38), -150(nan)
|
|
821 ; EG-NEXT: CNDE_INT T1.X, T4.W, PV.W, PS,
|
|
822 ; EG-NEXT: ASHR T2.Y, KC0[4].X, literal.x,
|
|
823 ; EG-NEXT: AND_INT T1.Z, PV.Z, literal.x,
|
|
824 ; EG-NEXT: NOT_INT T1.W, PV.Z,
|
|
825 ; EG-NEXT: LSHR * T3.W, PV.Y, 1,
|
|
826 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
|
|
827 ; EG-NEXT: BIT_ALIGN_INT T3.X, 0.0, PS, PV.W,
|
|
828 ; EG-NEXT: LSHL T3.Y, T1.Y, PV.Z,
|
|
829 ; EG-NEXT: XOR_INT T1.Z, PV.X, PV.Y,
|
|
830 ; EG-NEXT: XOR_INT T1.W, T2.X, PV.Y,
|
|
831 ; EG-NEXT: SUB_INT * T2.W, literal.x, T2.W,
|
|
832 ; EG-NEXT: 150(2.101948e-43), 0(0.000000e+00)
|
|
833 ; EG-NEXT: AND_INT T1.X, T0.Z, literal.x,
|
|
834 ; EG-NEXT: AND_INT T4.Y, PS, literal.x,
|
|
835 ; EG-NEXT: BIT_ALIGN_INT T0.Z, 0.0, T1.Y, PS, BS:VEC_021/SCL_122
|
|
836 ; EG-NEXT: SUB_INT T1.W, PV.W, T2.Y,
|
|
837 ; EG-NEXT: SUBB_UINT * T2.W, PV.Z, T2.Y,
|
|
838 ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
|
|
839 ; EG-NEXT: SUB_INT T2.X, PV.W, PS,
|
|
840 ; EG-NEXT: CNDE_INT T1.Y, PV.Y, PV.Z, 0.0,
|
|
841 ; EG-NEXT: CNDE_INT T0.Z, PV.X, T3.Y, 0.0,
|
|
842 ; EG-NEXT: CNDE_INT T1.W, PV.X, T3.X, T3.Y, BS:VEC_021/SCL_122
|
|
843 ; EG-NEXT: SETGT_INT * T2.W, T0.Y, literal.x,
|
|
844 ; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00)
|
|
845 ; EG-NEXT: BFE_UINT T1.X, KC0[3].W, literal.x, T0.W,
|
|
846 ; EG-NEXT: AND_INT T3.Y, KC0[3].W, literal.y,
|
|
847 ; EG-NEXT: CNDE_INT T2.Z, PS, 0.0, PV.W,
|
|
848 ; EG-NEXT: CNDE_INT T1.W, PS, PV.Y, PV.Z,
|
|
849 ; EG-NEXT: ASHR * T2.W, KC0[3].Z, literal.z,
|
|
850 ; EG-NEXT: 23(3.222986e-44), 8388607(1.175494e-38)
|
|
851 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
|
|
852 ; EG-NEXT: BFE_UINT T3.X, KC0[3].Y, literal.x, T0.W,
|
|
853 ; EG-NEXT: XOR_INT T1.Y, PV.W, PS,
|
|
854 ; EG-NEXT: XOR_INT T0.Z, PV.Z, PS,
|
|
855 ; EG-NEXT: OR_INT T0.W, PV.Y, literal.y,
|
|
856 ; EG-NEXT: SUB_INT * T1.W, literal.z, PV.X,
|
|
857 ; EG-NEXT: 23(3.222986e-44), 8388608(1.175494e-38)
|
|
858 ; EG-NEXT: 150(2.101948e-43), 0(0.000000e+00)
|
|
859 ; EG-NEXT: AND_INT T4.X, KC0[3].Y, literal.x,
|
|
860 ; EG-NEXT: AND_INT T3.Y, PS, literal.y,
|
|
861 ; EG-NEXT: BIT_ALIGN_INT T2.Z, 0.0, PV.W, PS,
|
|
862 ; EG-NEXT: SUB_INT T1.W, PV.Z, T2.W,
|
|
863 ; EG-NEXT: SUBB_UINT * T3.W, PV.Y, T2.W,
|
|
864 ; EG-NEXT: 8388607(1.175494e-38), 32(4.484155e-44)
|
|
865 ; EG-NEXT: SUB_INT T5.X, PV.W, PS,
|
|
866 ; EG-NEXT: SETGT_INT T0.Y, T0.Y, literal.x,
|
|
867 ; EG-NEXT: CNDE_INT T0.Z, PV.Y, PV.Z, 0.0,
|
|
868 ; EG-NEXT: OR_INT T1.W, PV.X, literal.y,
|
|
869 ; EG-NEXT: ADD_INT * T3.W, T3.X, literal.z,
|
|
870 ; EG-NEXT: -1(nan), 8388608(1.175494e-38)
|
|
871 ; EG-NEXT: -150(nan), 0(0.000000e+00)
|
|
872 ; EG-NEXT: ADD_INT T4.X, T3.X, literal.x,
|
|
873 ; EG-NEXT: SUB_INT T3.Y, literal.y, T3.X,
|
|
874 ; EG-NEXT: AND_INT T2.Z, PS, literal.z,
|
|
875 ; EG-NEXT: NOT_INT T4.W, PS,
|
|
876 ; EG-NEXT: LSHR * T5.W, PV.W, 1,
|
|
877 ; EG-NEXT: -127(nan), 150(2.101948e-43)
|
|
878 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
|
|
879 ; EG-NEXT: BIT_ALIGN_INT T3.X, 0.0, PS, PV.W,
|
|
880 ; EG-NEXT: LSHL T4.Y, T1.W, PV.Z,
|
|
881 ; EG-NEXT: AND_INT T2.Z, T3.W, literal.x, BS:VEC_120/SCL_212
|
|
882 ; EG-NEXT: BIT_ALIGN_INT T1.W, 0.0, T1.W, PV.Y, BS:VEC_021/SCL_122
|
|
883 ; EG-NEXT: AND_INT * T3.W, PV.Y, literal.x,
|
|
884 ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
|
|
885 ; EG-NEXT: ADD_INT T6.X, T1.X, literal.x,
|
|
886 ; EG-NEXT: CNDE_INT * T3.Y, PS, PV.W, 0.0,
|
|
887 ; EG-NEXT: -150(nan), 0(0.000000e+00)
|
|
888 ; EG-NEXT: ALU clause starting at 108:
|
|
889 ; EG-NEXT: CNDE_INT T3.Z, T2.Z, T4.Y, 0.0,
|
|
890 ; EG-NEXT: CNDE_INT T1.W, T2.Z, T3.X, T4.Y,
|
|
891 ; EG-NEXT: SETGT_INT * T3.W, T4.X, literal.x,
|
|
892 ; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00)
|
|
893 ; EG-NEXT: CNDE_INT T3.X, PS, 0.0, PV.W,
|
|
894 ; EG-NEXT: CNDE_INT T3.Y, PS, T3.Y, PV.Z,
|
|
895 ; EG-NEXT: AND_INT T2.Z, T6.X, literal.x,
|
|
896 ; EG-NEXT: NOT_INT T1.W, T6.X,
|
|
897 ; EG-NEXT: LSHR * T3.W, T0.W, 1,
|
|
898 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
|
|
899 ; EG-NEXT: ASHR T7.X, KC0[3].Y, literal.x,
|
|
900 ; EG-NEXT: ADD_INT T4.Y, T1.X, literal.y,
|
|
901 ; EG-NEXT: BIT_ALIGN_INT T3.Z, 0.0, PS, PV.W,
|
|
902 ; EG-NEXT: LSHL T0.W, T0.W, PV.Z,
|
|
903 ; EG-NEXT: AND_INT * T1.W, T6.X, literal.z,
|
|
904 ; EG-NEXT: 31(4.344025e-44), -127(nan)
|
|
905 ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
|
|
906 ; EG-NEXT: CNDE_INT T1.X, PS, PV.W, 0.0,
|
|
907 ; EG-NEXT: CNDE_INT T5.Y, PS, PV.Z, PV.W,
|
|
908 ; EG-NEXT: SETGT_INT T2.Z, PV.Y, literal.x,
|
|
909 ; EG-NEXT: XOR_INT T0.W, T3.Y, PV.X,
|
|
910 ; EG-NEXT: XOR_INT * T1.W, T3.X, PV.X,
|
|
911 ; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00)
|
|
912 ; EG-NEXT: SUB_INT T3.X, PS, T7.X,
|
|
913 ; EG-NEXT: SUBB_UINT T3.Y, PV.W, T7.X,
|
|
914 ; EG-NEXT: CNDE_INT T3.Z, PV.Z, 0.0, PV.Y,
|
|
915 ; EG-NEXT: CNDE_INT T1.W, PV.Z, T0.Z, PV.X,
|
|
916 ; EG-NEXT: ASHR * T3.W, KC0[3].W, literal.x,
|
|
917 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
|
|
918 ; EG-NEXT: XOR_INT T1.X, PV.W, PS,
|
|
919 ; EG-NEXT: XOR_INT T5.Y, PV.Z, PS,
|
|
920 ; EG-NEXT: SUB_INT T0.Z, PV.X, PV.Y,
|
|
921 ; EG-NEXT: SETGT_INT T1.W, T4.X, literal.x,
|
|
922 ; EG-NEXT: CNDE_INT * T6.W, T0.Y, 0.0, T5.X, BS:VEC_021/SCL_122
|
|
923 ; EG-NEXT: -1(nan), 0(0.000000e+00)
|
|
924 ; EG-NEXT: SETGT_INT T0.X, T0.X, literal.x,
|
|
925 ; EG-NEXT: CNDE_INT T6.Y, PV.W, 0.0, PV.Z,
|
|
926 ; EG-NEXT: SUB_INT T0.Z, T1.Y, T2.W, BS:VEC_021/SCL_122
|
|
927 ; EG-NEXT: SUB_INT T2.W, PV.Y, T3.W,
|
|
928 ; EG-NEXT: SUBB_UINT * T4.W, PV.X, T3.W,
|
|
929 ; EG-NEXT: -1(nan), 0(0.000000e+00)
|
|
930 ; EG-NEXT: SUB_INT T3.X, PV.W, PS,
|
|
931 ; EG-NEXT: SETGT_INT T1.Y, T4.Y, literal.x,
|
|
932 ; EG-NEXT: CNDE_INT T6.Z, T0.Y, 0.0, PV.Z, BS:VEC_120/SCL_212
|
|
933 ; EG-NEXT: SUB_INT T0.W, T0.W, T7.X,
|
|
934 ; EG-NEXT: CNDE_INT * T4.W, PV.X, 0.0, T2.X, BS:VEC_021/SCL_122
|
|
935 ; EG-NEXT: -1(nan), 0(0.000000e+00)
|
|
936 ; EG-NEXT: CNDE_INT T6.X, T1.W, 0.0, PV.W,
|
|
937 ; EG-NEXT: CNDE_INT T4.Y, PV.Y, 0.0, PV.X,
|
|
938 ; EG-NEXT: SUB_INT T0.W, T1.Z, T2.Y,
|
|
939 ; EG-NEXT: LSHR * T2.X, KC0[2].Y, literal.x,
|
|
940 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
|
941 ; EG-NEXT: CNDE_INT T4.Z, T0.X, 0.0, PV.W,
|
|
942 ; EG-NEXT: SUB_INT * T0.W, T1.X, T3.W, BS:VEC_120/SCL_212
|
|
943 ; EG-NEXT: CNDE_INT T4.X, T1.Y, 0.0, PV.W,
|
|
944 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
|
|
945 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
946 ; EG-NEXT: LSHR * T0.X, PV.W, literal.x,
|
|
947 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
150
|
948 %conv = fptosi <4 x float> %x to <4 x i64>
|
|
949 store <4 x i64> %conv, <4 x i64> addrspace(1)* %out
|
|
950 ret void
|
|
951 }
|
|
952
|
|
953 define amdgpu_kernel void @fp_to_uint_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 {
|
221
|
954 ; SI-LABEL: fp_to_uint_f32_to_i1:
|
|
955 ; SI: ; %bb.0:
|
|
956 ; SI-NEXT: s_load_dword s4, s[0:1], 0xb
|
|
957 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
|
|
958 ; SI-NEXT: s_mov_b32 s3, 0xf000
|
|
959 ; SI-NEXT: s_mov_b32 s2, -1
|
|
960 ; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
961 ; SI-NEXT: v_cmp_eq_f32_e64 s[4:5], -1.0, s4
|
|
962 ; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
|
|
963 ; SI-NEXT: buffer_store_byte v0, off, s[0:3], 0
|
|
964 ; SI-NEXT: s_endpgm
|
|
965 ;
|
|
966 ; VI-LABEL: fp_to_uint_f32_to_i1:
|
|
967 ; VI: ; %bb.0:
|
|
968 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
|
|
969 ; VI-NEXT: s_load_dword s0, s[0:1], 0x2c
|
|
970 ; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
971 ; VI-NEXT: s_mov_b32 s6, -1
|
|
972 ; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
973 ; VI-NEXT: v_cmp_eq_f32_e64 s[0:1], -1.0, s0
|
|
974 ; VI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
|
|
975 ; VI-NEXT: buffer_store_byte v0, off, s[4:7], 0
|
|
976 ; VI-NEXT: s_endpgm
|
|
977 ;
|
|
978 ; EG-LABEL: fp_to_uint_f32_to_i1:
|
|
979 ; EG: ; %bb.0:
|
|
980 ; EG-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[]
|
|
981 ; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X
|
|
982 ; EG-NEXT: CF_END
|
|
983 ; EG-NEXT: PAD
|
|
984 ; EG-NEXT: ALU clause starting at 4:
|
|
985 ; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x,
|
|
986 ; EG-NEXT: SETE_DX10 * T1.W, KC0[2].Z, literal.y,
|
|
987 ; EG-NEXT: 3(4.203895e-45), -1082130432(-1.000000e+00)
|
|
988 ; EG-NEXT: AND_INT T1.W, PS, 1,
|
|
989 ; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
|
|
990 ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
|
|
991 ; EG-NEXT: LSHL T0.X, PV.W, PS,
|
|
992 ; EG-NEXT: LSHL * T0.W, literal.x, PS,
|
|
993 ; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00)
|
|
994 ; EG-NEXT: MOV T0.Y, 0.0,
|
|
995 ; EG-NEXT: MOV * T0.Z, 0.0,
|
|
996 ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
|
|
997 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
150
|
998 %conv = fptosi float %in to i1
|
|
999 store i1 %conv, i1 addrspace(1)* %out
|
|
1000 ret void
|
|
1001 }
|
|
1002
|
|
1003 define amdgpu_kernel void @fp_to_uint_fabs_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 {
|
221
|
1004 ; SI-LABEL: fp_to_uint_fabs_f32_to_i1:
|
|
1005 ; SI: ; %bb.0:
|
|
1006 ; SI-NEXT: s_load_dword s4, s[0:1], 0xb
|
|
1007 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
|
|
1008 ; SI-NEXT: s_mov_b32 s3, 0xf000
|
|
1009 ; SI-NEXT: s_mov_b32 s2, -1
|
|
1010 ; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
1011 ; SI-NEXT: v_cmp_eq_f32_e64 s[4:5], -1.0, |s4|
|
|
1012 ; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
|
|
1013 ; SI-NEXT: buffer_store_byte v0, off, s[0:3], 0
|
|
1014 ; SI-NEXT: s_endpgm
|
|
1015 ;
|
|
1016 ; VI-LABEL: fp_to_uint_fabs_f32_to_i1:
|
|
1017 ; VI: ; %bb.0:
|
|
1018 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
|
|
1019 ; VI-NEXT: s_load_dword s0, s[0:1], 0x2c
|
|
1020 ; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
1021 ; VI-NEXT: s_mov_b32 s6, -1
|
|
1022 ; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
1023 ; VI-NEXT: v_cmp_eq_f32_e64 s[0:1], -1.0, |s0|
|
|
1024 ; VI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
|
|
1025 ; VI-NEXT: buffer_store_byte v0, off, s[4:7], 0
|
|
1026 ; VI-NEXT: s_endpgm
|
|
1027 ;
|
|
1028 ; EG-LABEL: fp_to_uint_fabs_f32_to_i1:
|
|
1029 ; EG: ; %bb.0:
|
|
1030 ; EG-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[]
|
|
1031 ; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X
|
|
1032 ; EG-NEXT: CF_END
|
|
1033 ; EG-NEXT: PAD
|
|
1034 ; EG-NEXT: ALU clause starting at 4:
|
|
1035 ; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x,
|
|
1036 ; EG-NEXT: SETE_DX10 * T1.W, |KC0[2].Z|, literal.y,
|
|
1037 ; EG-NEXT: 3(4.203895e-45), -1082130432(-1.000000e+00)
|
|
1038 ; EG-NEXT: AND_INT T1.W, PS, 1,
|
|
1039 ; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
|
|
1040 ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
|
|
1041 ; EG-NEXT: LSHL T0.X, PV.W, PS,
|
|
1042 ; EG-NEXT: LSHL * T0.W, literal.x, PS,
|
|
1043 ; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00)
|
|
1044 ; EG-NEXT: MOV T0.Y, 0.0,
|
|
1045 ; EG-NEXT: MOV * T0.Z, 0.0,
|
|
1046 ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
|
|
1047 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
150
|
1048 %in.fabs = call float @llvm.fabs.f32(float %in)
|
|
1049 %conv = fptosi float %in.fabs to i1
|
|
1050 store i1 %conv, i1 addrspace(1)* %out
|
|
1051 ret void
|
|
1052 }
|
|
1053
|
|
1054 define amdgpu_kernel void @fp_to_sint_f32_i16(i16 addrspace(1)* %out, float %in) #0 {
|
221
|
1055 ; SI-LABEL: fp_to_sint_f32_i16:
|
|
1056 ; SI: ; %bb.0:
|
|
1057 ; SI-NEXT: s_load_dword s4, s[0:1], 0xb
|
|
1058 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
|
|
1059 ; SI-NEXT: s_mov_b32 s3, 0xf000
|
|
1060 ; SI-NEXT: s_mov_b32 s2, -1
|
|
1061 ; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
1062 ; SI-NEXT: v_cvt_i32_f32_e32 v0, s4
|
|
1063 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
|
|
1064 ; SI-NEXT: s_endpgm
|
|
1065 ;
|
|
1066 ; VI-LABEL: fp_to_sint_f32_i16:
|
|
1067 ; VI: ; %bb.0:
|
|
1068 ; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
|
|
1069 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
|
|
1070 ; VI-NEXT: s_mov_b32 s3, 0xf000
|
|
1071 ; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
1072 ; VI-NEXT: v_cvt_i32_f32_e32 v0, s2
|
|
1073 ; VI-NEXT: s_mov_b32 s2, -1
|
|
1074 ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0
|
|
1075 ; VI-NEXT: s_endpgm
|
|
1076 ;
|
|
1077 ; EG-LABEL: fp_to_sint_f32_i16:
|
|
1078 ; EG: ; %bb.0:
|
|
1079 ; EG-NEXT: ALU 13, @4, KC0[CB0:0-32], KC1[]
|
|
1080 ; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X
|
|
1081 ; EG-NEXT: CF_END
|
|
1082 ; EG-NEXT: PAD
|
|
1083 ; EG-NEXT: ALU clause starting at 4:
|
|
1084 ; EG-NEXT: TRUNC T0.W, KC0[2].Z,
|
|
1085 ; EG-NEXT: AND_INT * T1.W, KC0[2].Y, literal.x,
|
|
1086 ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
|
|
1087 ; EG-NEXT: FLT_TO_INT * T0.W, PV.W,
|
|
1088 ; EG-NEXT: AND_INT T0.W, PV.W, literal.x,
|
|
1089 ; EG-NEXT: LSHL * T1.W, T1.W, literal.y,
|
|
1090 ; EG-NEXT: 65535(9.183409e-41), 3(4.203895e-45)
|
|
1091 ; EG-NEXT: LSHL T0.X, PV.W, PS,
|
|
1092 ; EG-NEXT: LSHL * T0.W, literal.x, PS,
|
|
1093 ; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
|
1094 ; EG-NEXT: MOV T0.Y, 0.0,
|
|
1095 ; EG-NEXT: MOV * T0.Z, 0.0,
|
|
1096 ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
|
|
1097 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
150
|
1098 %sint = fptosi float %in to i16
|
|
1099 store i16 %sint, i16 addrspace(1)* %out
|
|
1100 ret void
|
|
1101 }
|
|
1102
|
|
1103 attributes #0 = { nounwind }
|
|
1104 attributes #1 = { nounwind readnone }
|