comparison llvm/test/CodeGen/AMDGPU/fp_to_sint.ll @ 221:79ff65ed7e25

LLVM12 Original
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Tue, 15 Jun 2021 19:15:29 +0900 (2021-06-15)
parents 1d019706d866
children 5f17cb93ff66
comparison
equal deleted inserted replaced
220:42394fc6a535 221:79ff65ed7e25
1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap %s --check-prefix=SI --check-prefix=FUNC --check-prefix=GCN 1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap %s --check-prefix=VI --check-prefix=FUNC --check-prefix=GCN 2 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s --check-prefixes=SI
3 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -allow-deprecated-dag-overlap %s --check-prefix=EG --check-prefix=FUNC 3 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s --check-prefixes=VI
4 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s --check-prefixes=EG
4 5
5 declare float @llvm.fabs.f32(float) #1 6 declare float @llvm.fabs.f32(float) #1
6 7
7 ; FUNC-LABEL: {{^}}fp_to_sint_i32:
8 ; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
9 ; SI: v_cvt_i32_f32_e32
10 ; SI: s_endpgm
11 define amdgpu_kernel void @fp_to_sint_i32(i32 addrspace(1)* %out, float %in) { 8 define amdgpu_kernel void @fp_to_sint_i32(i32 addrspace(1)* %out, float %in) {
9 ; SI-LABEL: fp_to_sint_i32:
10 ; SI: ; %bb.0:
11 ; SI-NEXT: s_load_dword s4, s[0:1], 0xb
12 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
13 ; SI-NEXT: s_mov_b32 s3, 0xf000
14 ; SI-NEXT: s_mov_b32 s2, -1
15 ; SI-NEXT: s_waitcnt lgkmcnt(0)
16 ; SI-NEXT: v_cvt_i32_f32_e32 v0, s4
17 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
18 ; SI-NEXT: s_endpgm
19 ;
20 ; VI-LABEL: fp_to_sint_i32:
21 ; VI: ; %bb.0:
22 ; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
23 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
24 ; VI-NEXT: s_mov_b32 s3, 0xf000
25 ; VI-NEXT: s_waitcnt lgkmcnt(0)
26 ; VI-NEXT: v_cvt_i32_f32_e32 v0, s2
27 ; VI-NEXT: s_mov_b32 s2, -1
28 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
29 ; VI-NEXT: s_endpgm
30 ;
31 ; EG-LABEL: fp_to_sint_i32:
32 ; EG: ; %bb.0:
33 ; EG-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[]
34 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
35 ; EG-NEXT: CF_END
36 ; EG-NEXT: PAD
37 ; EG-NEXT: ALU clause starting at 4:
38 ; EG-NEXT: TRUNC * T0.W, KC0[2].Z,
39 ; EG-NEXT: FLT_TO_INT T0.X, PV.W,
40 ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
41 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
12 %conv = fptosi float %in to i32 42 %conv = fptosi float %in to i32
13 store i32 %conv, i32 addrspace(1)* %out 43 store i32 %conv, i32 addrspace(1)* %out
14 ret void 44 ret void
15 } 45 }
16 46
17 ; FUNC-LABEL: {{^}}fp_to_sint_i32_fabs:
18 ; SI: v_cvt_i32_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|{{$}}
19 define amdgpu_kernel void @fp_to_sint_i32_fabs(i32 addrspace(1)* %out, float %in) { 47 define amdgpu_kernel void @fp_to_sint_i32_fabs(i32 addrspace(1)* %out, float %in) {
48 ; SI-LABEL: fp_to_sint_i32_fabs:
49 ; SI: ; %bb.0:
50 ; SI-NEXT: s_load_dword s4, s[0:1], 0xb
51 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
52 ; SI-NEXT: s_mov_b32 s3, 0xf000
53 ; SI-NEXT: s_mov_b32 s2, -1
54 ; SI-NEXT: s_waitcnt lgkmcnt(0)
55 ; SI-NEXT: v_cvt_i32_f32_e64 v0, |s4|
56 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
57 ; SI-NEXT: s_endpgm
58 ;
59 ; VI-LABEL: fp_to_sint_i32_fabs:
60 ; VI: ; %bb.0:
61 ; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
62 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
63 ; VI-NEXT: s_mov_b32 s3, 0xf000
64 ; VI-NEXT: s_waitcnt lgkmcnt(0)
65 ; VI-NEXT: v_cvt_i32_f32_e64 v0, |s2|
66 ; VI-NEXT: s_mov_b32 s2, -1
67 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
68 ; VI-NEXT: s_endpgm
69 ;
70 ; EG-LABEL: fp_to_sint_i32_fabs:
71 ; EG: ; %bb.0:
72 ; EG-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[]
73 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
74 ; EG-NEXT: CF_END
75 ; EG-NEXT: PAD
76 ; EG-NEXT: ALU clause starting at 4:
77 ; EG-NEXT: TRUNC * T0.W, |KC0[2].Z|,
78 ; EG-NEXT: FLT_TO_INT T0.X, PV.W,
79 ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
80 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
20 %in.fabs = call float @llvm.fabs.f32(float %in) 81 %in.fabs = call float @llvm.fabs.f32(float %in)
21 %conv = fptosi float %in.fabs to i32 82 %conv = fptosi float %in.fabs to i32
22 store i32 %conv, i32 addrspace(1)* %out 83 store i32 %conv, i32 addrspace(1)* %out
23 ret void 84 ret void
24 } 85 }
25 86
26 ; FUNC-LABEL: {{^}}fp_to_sint_v2i32:
27 ; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
28 ; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
29 ; SI: v_cvt_i32_f32_e32
30 ; SI: v_cvt_i32_f32_e32
31 define amdgpu_kernel void @fp_to_sint_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) { 87 define amdgpu_kernel void @fp_to_sint_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) {
88 ; SI-LABEL: fp_to_sint_v2i32:
89 ; SI: ; %bb.0:
90 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb
91 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
92 ; SI-NEXT: s_mov_b32 s3, 0xf000
93 ; SI-NEXT: s_mov_b32 s2, -1
94 ; SI-NEXT: s_waitcnt lgkmcnt(0)
95 ; SI-NEXT: v_cvt_i32_f32_e32 v1, s5
96 ; SI-NEXT: v_cvt_i32_f32_e32 v0, s4
97 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
98 ; SI-NEXT: s_endpgm
99 ;
100 ; VI-LABEL: fp_to_sint_v2i32:
101 ; VI: ; %bb.0:
102 ; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c
103 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
104 ; VI-NEXT: s_waitcnt lgkmcnt(0)
105 ; VI-NEXT: v_cvt_i32_f32_e32 v1, s3
106 ; VI-NEXT: v_cvt_i32_f32_e32 v0, s2
107 ; VI-NEXT: s_mov_b32 s3, 0xf000
108 ; VI-NEXT: s_mov_b32 s2, -1
109 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
110 ; VI-NEXT: s_endpgm
111 ;
112 ; EG-LABEL: fp_to_sint_v2i32:
113 ; EG: ; %bb.0:
114 ; EG-NEXT: ALU 5, @4, KC0[CB0:0-32], KC1[]
115 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
116 ; EG-NEXT: CF_END
117 ; EG-NEXT: PAD
118 ; EG-NEXT: ALU clause starting at 4:
119 ; EG-NEXT: TRUNC * T0.W, KC0[3].X,
120 ; EG-NEXT: FLT_TO_INT T0.Y, PV.W,
121 ; EG-NEXT: TRUNC * T0.W, KC0[2].W,
122 ; EG-NEXT: FLT_TO_INT T0.X, PV.W,
123 ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
124 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
32 %result = fptosi <2 x float> %in to <2 x i32> 125 %result = fptosi <2 x float> %in to <2 x i32>
33 store <2 x i32> %result, <2 x i32> addrspace(1)* %out 126 store <2 x i32> %result, <2 x i32> addrspace(1)* %out
34 ret void 127 ret void
35 } 128 }
36 129
37 ; FUNC-LABEL: {{^}}fp_to_sint_v4i32:
38 ; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
39 ; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW]}}
40 ; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
41 ; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
42 ; SI: v_cvt_i32_f32_e32
43 ; SI: v_cvt_i32_f32_e32
44 ; SI: v_cvt_i32_f32_e32
45 ; SI: v_cvt_i32_f32_e32
46 define amdgpu_kernel void @fp_to_sint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { 130 define amdgpu_kernel void @fp_to_sint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
131 ; SI-LABEL: fp_to_sint_v4i32:
132 ; SI: ; %bb.0:
133 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
134 ; SI-NEXT: s_waitcnt lgkmcnt(0)
135 ; SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0
136 ; SI-NEXT: s_mov_b32 s3, 0xf000
137 ; SI-NEXT: s_mov_b32 s2, -1
138 ; SI-NEXT: s_waitcnt lgkmcnt(0)
139 ; SI-NEXT: v_cvt_i32_f32_e32 v3, s7
140 ; SI-NEXT: v_cvt_i32_f32_e32 v2, s6
141 ; SI-NEXT: v_cvt_i32_f32_e32 v1, s5
142 ; SI-NEXT: v_cvt_i32_f32_e32 v0, s4
143 ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
144 ; SI-NEXT: s_endpgm
145 ;
146 ; VI-LABEL: fp_to_sint_v4i32:
147 ; VI: ; %bb.0:
148 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
149 ; VI-NEXT: s_waitcnt lgkmcnt(0)
150 ; VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0
151 ; VI-NEXT: s_mov_b32 s3, 0xf000
152 ; VI-NEXT: s_mov_b32 s2, -1
153 ; VI-NEXT: s_waitcnt lgkmcnt(0)
154 ; VI-NEXT: v_cvt_i32_f32_e32 v3, s7
155 ; VI-NEXT: v_cvt_i32_f32_e32 v2, s6
156 ; VI-NEXT: v_cvt_i32_f32_e32 v1, s5
157 ; VI-NEXT: v_cvt_i32_f32_e32 v0, s4
158 ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
159 ; VI-NEXT: s_endpgm
160 ;
161 ; EG-LABEL: fp_to_sint_v4i32:
162 ; EG: ; %bb.0:
163 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
164 ; EG-NEXT: TEX 0 @6
165 ; EG-NEXT: ALU 9, @9, KC0[CB0:0-32], KC1[]
166 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
167 ; EG-NEXT: CF_END
168 ; EG-NEXT: PAD
169 ; EG-NEXT: Fetch clause starting at 6:
170 ; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1
171 ; EG-NEXT: ALU clause starting at 8:
172 ; EG-NEXT: MOV * T0.X, KC0[2].Z,
173 ; EG-NEXT: ALU clause starting at 9:
174 ; EG-NEXT: TRUNC T0.W, T0.W,
175 ; EG-NEXT: TRUNC * T1.W, T0.Z,
176 ; EG-NEXT: FLT_TO_INT * T0.W, PV.W,
177 ; EG-NEXT: FLT_TO_INT T0.Z, T1.W,
178 ; EG-NEXT: TRUNC * T1.W, T0.Y,
179 ; EG-NEXT: FLT_TO_INT T0.Y, PV.W,
180 ; EG-NEXT: TRUNC * T1.W, T0.X,
181 ; EG-NEXT: FLT_TO_INT T0.X, PV.W,
182 ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
183 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
47 %value = load <4 x float>, <4 x float> addrspace(1) * %in 184 %value = load <4 x float>, <4 x float> addrspace(1) * %in
48 %result = fptosi <4 x float> %value to <4 x i32> 185 %result = fptosi <4 x float> %value to <4 x i32>
49 store <4 x i32> %result, <4 x i32> addrspace(1)* %out 186 store <4 x i32> %result, <4 x i32> addrspace(1)* %out
50 ret void 187 ret void
51 } 188 }
52 189
53 ; FUNC-LABEL: {{^}}fp_to_sint_i64:
54
55 ; EG-DAG: AND_INT
56 ; EG-DAG: LSHR
57 ; EG-DAG: SUB_INT
58 ; EG-DAG: AND_INT
59 ; EG-DAG: ASHR
60 ; EG-DAG: AND_INT
61 ; EG-DAG: OR_INT
62 ; EG-DAG: SUB_INT
63 ; EG-DAG: LSHL
64 ; EG-DAG: LSHL
65 ; EG-DAG: SUB_INT
66 ; EG-DAG: LSHR
67 ; EG-DAG: LSHR
68 ; EG-DAG: SETGT_UINT
69 ; EG-DAG: SETGT_INT
70 ; EG-DAG: XOR_INT
71 ; EG-DAG: XOR_INT
72 ; EG: SUB_INT
73 ; EG-DAG: SUB_INT
74 ; EG-DAG: CNDE_INT
75 ; EG-DAG: CNDE_INT
76
77 ; Check that the compiler doesn't crash with a "cannot select" error 190 ; Check that the compiler doesn't crash with a "cannot select" error
78 ; SI: s_endpgm
79 define amdgpu_kernel void @fp_to_sint_i64 (i64 addrspace(1)* %out, float %in) { 191 define amdgpu_kernel void @fp_to_sint_i64 (i64 addrspace(1)* %out, float %in) {
192 ; SI-LABEL: fp_to_sint_i64:
193 ; SI: ; %bb.0: ; %entry
194 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
195 ; SI-NEXT: s_load_dword s0, s[0:1], 0xb
196 ; SI-NEXT: s_mov_b32 s7, 0xf000
197 ; SI-NEXT: s_mov_b32 s6, -1
198 ; SI-NEXT: s_mov_b32 s1, 0
199 ; SI-NEXT: s_waitcnt lgkmcnt(0)
200 ; SI-NEXT: s_bfe_u32 s8, s0, 0x80017
201 ; SI-NEXT: s_and_b32 s2, s0, 0x7fffff
202 ; SI-NEXT: s_ashr_i32 s9, s0, 31
203 ; SI-NEXT: s_add_i32 s3, s8, 0xffffff6a
204 ; SI-NEXT: s_or_b32 s0, s2, 0x800000
205 ; SI-NEXT: s_sub_i32 s10, 0x96, s8
206 ; SI-NEXT: s_ashr_i32 s11, s9, 31
207 ; SI-NEXT: s_lshl_b64 s[2:3], s[0:1], s3
208 ; SI-NEXT: s_lshr_b64 s[0:1], s[0:1], s10
209 ; SI-NEXT: s_addk_i32 s8, 0xff81
210 ; SI-NEXT: v_mov_b32_e32 v0, s11
211 ; SI-NEXT: v_mov_b32_e32 v1, s1
212 ; SI-NEXT: v_mov_b32_e32 v2, s3
213 ; SI-NEXT: v_cmp_gt_i32_e64 vcc, s8, 23
214 ; SI-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
215 ; SI-NEXT: v_mov_b32_e32 v2, s0
216 ; SI-NEXT: v_mov_b32_e32 v3, s2
217 ; SI-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
218 ; SI-NEXT: v_xor_b32_e32 v1, s11, v1
219 ; SI-NEXT: v_xor_b32_e32 v2, s9, v2
220 ; SI-NEXT: v_subrev_i32_e32 v2, vcc, s9, v2
221 ; SI-NEXT: v_subb_u32_e32 v0, vcc, v1, v0, vcc
222 ; SI-NEXT: v_cmp_lt_i32_e64 s[0:1], s8, 0
223 ; SI-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[0:1]
224 ; SI-NEXT: v_cndmask_b32_e64 v0, v2, 0, s[0:1]
225 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
226 ; SI-NEXT: s_endpgm
227 ;
228 ; VI-LABEL: fp_to_sint_i64:
229 ; VI: ; %bb.0: ; %entry
230 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
231 ; VI-NEXT: s_load_dword s8, s[0:1], 0x2c
232 ; VI-NEXT: s_mov_b32 s1, 0
233 ; VI-NEXT: s_mov_b32 s7, 0xf000
234 ; VI-NEXT: s_mov_b32 s6, -1
235 ; VI-NEXT: s_waitcnt lgkmcnt(0)
236 ; VI-NEXT: s_bfe_u32 s9, s8, 0x80017
237 ; VI-NEXT: s_and_b32 s0, s8, 0x7fffff
238 ; VI-NEXT: s_add_i32 s2, s9, 0xffffff6a
239 ; VI-NEXT: s_bitset1_b32 s0, 23
240 ; VI-NEXT: s_sub_i32 s10, 0x96, s9
241 ; VI-NEXT: s_lshl_b64 s[2:3], s[0:1], s2
242 ; VI-NEXT: s_lshr_b64 s[0:1], s[0:1], s10
243 ; VI-NEXT: s_addk_i32 s9, 0xff81
244 ; VI-NEXT: v_mov_b32_e32 v0, s1
245 ; VI-NEXT: v_mov_b32_e32 v1, s3
246 ; VI-NEXT: v_cmp_gt_i32_e64 vcc, s9, 23
247 ; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
248 ; VI-NEXT: v_mov_b32_e32 v1, s0
249 ; VI-NEXT: v_mov_b32_e32 v2, s2
250 ; VI-NEXT: s_ashr_i32 s0, s8, 31
251 ; VI-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
252 ; VI-NEXT: s_ashr_i32 s1, s0, 31
253 ; VI-NEXT: v_xor_b32_e32 v1, s0, v1
254 ; VI-NEXT: v_xor_b32_e32 v0, s1, v0
255 ; VI-NEXT: v_mov_b32_e32 v2, s1
256 ; VI-NEXT: v_subrev_u32_e32 v3, vcc, s0, v1
257 ; VI-NEXT: v_subb_u32_e32 v0, vcc, v0, v2, vcc
258 ; VI-NEXT: v_cmp_lt_i32_e64 s[0:1], s9, 0
259 ; VI-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[0:1]
260 ; VI-NEXT: v_cndmask_b32_e64 v0, v3, 0, s[0:1]
261 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
262 ; VI-NEXT: s_endpgm
263 ;
264 ; EG-LABEL: fp_to_sint_i64:
265 ; EG: ; %bb.0: ; %entry
266 ; EG-NEXT: ALU 41, @4, KC0[CB0:0-32], KC1[]
267 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
268 ; EG-NEXT: CF_END
269 ; EG-NEXT: PAD
270 ; EG-NEXT: ALU clause starting at 4:
271 ; EG-NEXT: MOV * T0.W, literal.x,
272 ; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
273 ; EG-NEXT: BFE_UINT T0.W, KC0[2].Z, literal.x, PV.W,
274 ; EG-NEXT: AND_INT * T1.W, KC0[2].Z, literal.y,
275 ; EG-NEXT: 23(3.222986e-44), 8388607(1.175494e-38)
276 ; EG-NEXT: OR_INT T1.W, PS, literal.x,
277 ; EG-NEXT: ADD_INT * T2.W, PV.W, literal.y,
278 ; EG-NEXT: 8388608(1.175494e-38), -150(nan)
279 ; EG-NEXT: ADD_INT T0.X, T0.W, literal.x,
280 ; EG-NEXT: SUB_INT T0.Y, literal.y, T0.W,
281 ; EG-NEXT: AND_INT T0.Z, PS, literal.z,
282 ; EG-NEXT: NOT_INT T0.W, PS,
283 ; EG-NEXT: LSHR * T3.W, PV.W, 1,
284 ; EG-NEXT: -127(nan), 150(2.101948e-43)
285 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
286 ; EG-NEXT: BIT_ALIGN_INT T1.X, 0.0, PS, PV.W,
287 ; EG-NEXT: LSHL T1.Y, T1.W, PV.Z,
288 ; EG-NEXT: AND_INT T0.Z, T2.W, literal.x, BS:VEC_120/SCL_212
289 ; EG-NEXT: BIT_ALIGN_INT T0.W, 0.0, T1.W, PV.Y, BS:VEC_021/SCL_122
290 ; EG-NEXT: AND_INT * T1.W, PV.Y, literal.x,
291 ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
292 ; EG-NEXT: CNDE_INT T0.Y, PS, PV.W, 0.0,
293 ; EG-NEXT: CNDE_INT T1.Z, PV.Z, PV.Y, 0.0,
294 ; EG-NEXT: CNDE_INT T0.W, PV.Z, PV.X, PV.Y,
295 ; EG-NEXT: SETGT_INT * T1.W, T0.X, literal.x,
296 ; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00)
297 ; EG-NEXT: CNDE_INT T0.Z, PS, 0.0, PV.W,
298 ; EG-NEXT: CNDE_INT T0.W, PS, PV.Y, PV.Z,
299 ; EG-NEXT: ASHR * T1.W, KC0[2].Z, literal.x,
300 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
301 ; EG-NEXT: XOR_INT T0.W, PV.W, PS,
302 ; EG-NEXT: XOR_INT * T2.W, PV.Z, PS,
303 ; EG-NEXT: SUB_INT T2.W, PS, T1.W,
304 ; EG-NEXT: SUBB_UINT * T3.W, PV.W, T1.W,
305 ; EG-NEXT: SUB_INT T2.W, PV.W, PS,
306 ; EG-NEXT: SETGT_INT * T3.W, T0.X, literal.x,
307 ; EG-NEXT: -1(nan), 0(0.000000e+00)
308 ; EG-NEXT: CNDE_INT T0.Y, PS, 0.0, PV.W,
309 ; EG-NEXT: SUB_INT * T0.W, T0.W, T1.W,
310 ; EG-NEXT: CNDE_INT T0.X, T3.W, 0.0, PV.W,
311 ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
312 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
80 entry: 313 entry:
81 %0 = fptosi float %in to i64 314 %0 = fptosi float %in to i64
82 store i64 %0, i64 addrspace(1)* %out 315 store i64 %0, i64 addrspace(1)* %out
83 ret void 316 ret void
84 } 317 }
85 318
86 ; FUNC: {{^}}fp_to_sint_v2i64:
87 ; EG-DAG: AND_INT
88 ; EG-DAG: LSHR
89 ; EG-DAG: SUB_INT
90 ; EG-DAG: AND_INT
91 ; EG-DAG: ASHR
92 ; EG-DAG: AND_INT
93 ; EG-DAG: OR_INT
94 ; EG-DAG: SUB_INT
95 ; EG-DAG: LSHL
96 ; EG-DAG: LSHL
97 ; EG-DAG: SUB_INT
98 ; EG-DAG: LSHR
99 ; EG-DAG: LSHR
100 ; EG-DAG: SETGT_UINT
101 ; EG-DAG: SETGT_INT
102 ; EG-DAG: XOR_INT
103 ; EG-DAG: XOR_INT
104 ; EG-DAG: SUB_INT
105 ; EG-DAG: SUB_INT
106 ; EG-DAG: CNDE_INT
107 ; EG-DAG: CNDE_INT
108 ; EG-DAG: AND_INT
109 ; EG-DAG: LSHR
110 ; EG-DAG: SUB_INT
111 ; EG-DAG: AND_INT
112 ; EG-DAG: ASHR
113 ; EG-DAG: AND_INT
114 ; EG-DAG: OR_INT
115 ; EG-DAG: SUB_INT
116 ; EG-DAG: LSHL
117 ; EG-DAG: LSHL
118 ; EG-DAG: SUB_INT
119 ; EG-DAG: LSHR
120 ; EG-DAG: LSHR
121 ; EG-DAG: SETGT_UINT
122 ; EG-DAG: SETGT_INT
123 ; EG-DAG: XOR_INT
124 ; EG-DAG: XOR_INT
125 ; EG-DAG: SUB_INT
126 ; EG-DAG: SUB_INT
127 ; EG-DAG: CNDE_INT
128 ; EG-DAG: CNDE_INT
129
130 ; SI: s_endpgm
131 define amdgpu_kernel void @fp_to_sint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) { 319 define amdgpu_kernel void @fp_to_sint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) {
320 ; SI-LABEL: fp_to_sint_v2i64:
321 ; SI: ; %bb.0:
322 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
323 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb
324 ; SI-NEXT: s_mov_b32 s7, 0xf000
325 ; SI-NEXT: s_mov_b32 s6, -1
326 ; SI-NEXT: s_movk_i32 s8, 0xff6a
327 ; SI-NEXT: s_mov_b32 s2, 0x7fffff
328 ; SI-NEXT: s_mov_b32 s10, 0x800000
329 ; SI-NEXT: s_mov_b32 s3, 0
330 ; SI-NEXT: s_movk_i32 s9, 0x96
331 ; SI-NEXT: s_movk_i32 s11, 0xff81
332 ; SI-NEXT: s_waitcnt lgkmcnt(0)
333 ; SI-NEXT: s_bfe_u32 s12, s1, 0x80017
334 ; SI-NEXT: s_and_b32 s13, s1, s2
335 ; SI-NEXT: s_ashr_i32 s14, s1, 31
336 ; SI-NEXT: s_bfe_u32 s1, s0, 0x80017
337 ; SI-NEXT: s_and_b32 s15, s0, s2
338 ; SI-NEXT: s_ashr_i32 s16, s0, 31
339 ; SI-NEXT: s_add_i32 s0, s12, s8
340 ; SI-NEXT: s_or_b32 s2, s13, s10
341 ; SI-NEXT: s_sub_i32 s13, s9, s12
342 ; SI-NEXT: s_add_i32 s12, s12, s11
343 ; SI-NEXT: s_ashr_i32 s17, s14, 31
344 ; SI-NEXT: s_add_i32 s18, s1, s8
345 ; SI-NEXT: s_sub_i32 s19, s9, s1
346 ; SI-NEXT: s_add_i32 s11, s1, s11
347 ; SI-NEXT: s_ashr_i32 s20, s16, 31
348 ; SI-NEXT: s_lshl_b64 s[0:1], s[2:3], s0
349 ; SI-NEXT: s_lshr_b64 s[8:9], s[2:3], s13
350 ; SI-NEXT: v_mov_b32_e32 v0, s17
351 ; SI-NEXT: s_or_b32 s2, s15, s10
352 ; SI-NEXT: v_mov_b32_e32 v1, s20
353 ; SI-NEXT: v_mov_b32_e32 v2, s9
354 ; SI-NEXT: v_mov_b32_e32 v3, s1
355 ; SI-NEXT: v_cmp_gt_i32_e64 vcc, s12, 23
356 ; SI-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
357 ; SI-NEXT: v_mov_b32_e32 v3, s8
358 ; SI-NEXT: v_mov_b32_e32 v4, s0
359 ; SI-NEXT: s_lshl_b64 s[0:1], s[2:3], s18
360 ; SI-NEXT: s_lshr_b64 s[2:3], s[2:3], s19
361 ; SI-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
362 ; SI-NEXT: v_xor_b32_e32 v2, s17, v2
363 ; SI-NEXT: v_mov_b32_e32 v4, s3
364 ; SI-NEXT: v_mov_b32_e32 v5, s1
365 ; SI-NEXT: v_cmp_gt_i32_e64 vcc, s11, 23
366 ; SI-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
367 ; SI-NEXT: v_mov_b32_e32 v5, s2
368 ; SI-NEXT: v_mov_b32_e32 v6, s0
369 ; SI-NEXT: v_xor_b32_e32 v3, s14, v3
370 ; SI-NEXT: v_cndmask_b32_e32 v5, v5, v6, vcc
371 ; SI-NEXT: v_xor_b32_e32 v4, s20, v4
372 ; SI-NEXT: v_subrev_i32_e32 v6, vcc, s14, v3
373 ; SI-NEXT: v_subb_u32_e32 v0, vcc, v2, v0, vcc
374 ; SI-NEXT: v_xor_b32_e32 v5, s16, v5
375 ; SI-NEXT: v_cmp_lt_i32_e64 s[0:1], s12, 0
376 ; SI-NEXT: v_cndmask_b32_e64 v3, v0, 0, s[0:1]
377 ; SI-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[0:1]
378 ; SI-NEXT: v_subrev_i32_e32 v0, vcc, s16, v5
379 ; SI-NEXT: v_subb_u32_e32 v1, vcc, v4, v1, vcc
380 ; SI-NEXT: v_cmp_lt_i32_e64 s[0:1], s11, 0
381 ; SI-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[0:1]
382 ; SI-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[0:1]
383 ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
384 ; SI-NEXT: s_endpgm
385 ;
386 ; VI-LABEL: fp_to_sint_v2i64:
387 ; VI: ; %bb.0:
388 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
389 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c
390 ; VI-NEXT: s_mov_b32 s14, 0x7fffff
391 ; VI-NEXT: s_movk_i32 s12, 0xff6a
392 ; VI-NEXT: s_mov_b32 s15, 0x800000
393 ; VI-NEXT: s_movk_i32 s16, 0x96
394 ; VI-NEXT: s_waitcnt lgkmcnt(0)
395 ; VI-NEXT: s_bfe_u32 s13, s1, 0x80017
396 ; VI-NEXT: s_and_b32 s2, s1, s14
397 ; VI-NEXT: s_add_i32 s8, s13, s12
398 ; VI-NEXT: s_or_b32 s2, s2, s15
399 ; VI-NEXT: s_mov_b32 s3, 0
400 ; VI-NEXT: s_sub_i32 s10, s16, s13
401 ; VI-NEXT: s_movk_i32 s17, 0xff81
402 ; VI-NEXT: s_lshl_b64 s[8:9], s[2:3], s8
403 ; VI-NEXT: s_lshr_b64 s[10:11], s[2:3], s10
404 ; VI-NEXT: s_add_i32 s13, s13, s17
405 ; VI-NEXT: v_mov_b32_e32 v0, s11
406 ; VI-NEXT: v_mov_b32_e32 v1, s9
407 ; VI-NEXT: v_cmp_gt_i32_e64 vcc, s13, 23
408 ; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
409 ; VI-NEXT: v_mov_b32_e32 v2, s8
410 ; VI-NEXT: v_mov_b32_e32 v1, s10
411 ; VI-NEXT: s_ashr_i32 s1, s1, 31
412 ; VI-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
413 ; VI-NEXT: s_ashr_i32 s2, s1, 31
414 ; VI-NEXT: v_xor_b32_e32 v1, s1, v1
415 ; VI-NEXT: v_subrev_u32_e32 v1, vcc, s1, v1
416 ; VI-NEXT: v_xor_b32_e32 v0, s2, v0
417 ; VI-NEXT: v_mov_b32_e32 v2, s2
418 ; VI-NEXT: s_and_b32 s2, s0, s14
419 ; VI-NEXT: s_bfe_u32 s1, s0, 0x80017
420 ; VI-NEXT: v_subb_u32_e32 v0, vcc, v0, v2, vcc
421 ; VI-NEXT: v_cmp_lt_i32_e64 s[8:9], s13, 0
422 ; VI-NEXT: v_cndmask_b32_e64 v3, v0, 0, s[8:9]
423 ; VI-NEXT: v_cndmask_b32_e64 v2, v1, 0, s[8:9]
424 ; VI-NEXT: s_add_i32 s8, s1, s12
425 ; VI-NEXT: s_or_b32 s2, s2, s15
426 ; VI-NEXT: s_sub_i32 s10, s16, s1
427 ; VI-NEXT: s_lshl_b64 s[8:9], s[2:3], s8
428 ; VI-NEXT: s_lshr_b64 s[2:3], s[2:3], s10
429 ; VI-NEXT: s_add_i32 s1, s1, s17
430 ; VI-NEXT: v_mov_b32_e32 v0, s3
431 ; VI-NEXT: v_mov_b32_e32 v1, s9
432 ; VI-NEXT: v_cmp_gt_i32_e64 vcc, s1, 23
433 ; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
434 ; VI-NEXT: v_mov_b32_e32 v1, s2
435 ; VI-NEXT: v_mov_b32_e32 v4, s8
436 ; VI-NEXT: s_ashr_i32 s0, s0, 31
437 ; VI-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
438 ; VI-NEXT: s_ashr_i32 s2, s0, 31
439 ; VI-NEXT: v_xor_b32_e32 v1, s0, v1
440 ; VI-NEXT: v_subrev_u32_e32 v5, vcc, s0, v1
441 ; VI-NEXT: v_xor_b32_e32 v0, s2, v0
442 ; VI-NEXT: v_mov_b32_e32 v4, s2
443 ; VI-NEXT: v_subb_u32_e32 v0, vcc, v0, v4, vcc
444 ; VI-NEXT: v_cmp_lt_i32_e64 s[0:1], s1, 0
445 ; VI-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[0:1]
446 ; VI-NEXT: s_mov_b32 s7, 0xf000
447 ; VI-NEXT: s_mov_b32 s6, -1
448 ; VI-NEXT: v_cndmask_b32_e64 v0, v5, 0, s[0:1]
449 ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
450 ; VI-NEXT: s_endpgm
451 ;
452 ; EG-LABEL: fp_to_sint_v2i64:
453 ; EG: ; %bb.0:
454 ; EG-NEXT: ALU 77, @4, KC0[CB0:0-32], KC1[]
455 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 1
456 ; EG-NEXT: CF_END
457 ; EG-NEXT: PAD
458 ; EG-NEXT: ALU clause starting at 4:
459 ; EG-NEXT: MOV * T0.W, literal.x,
460 ; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
461 ; EG-NEXT: BFE_UINT * T1.W, KC0[2].W, literal.x, PV.W,
462 ; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00)
463 ; EG-NEXT: AND_INT T0.Z, KC0[2].W, literal.x,
464 ; EG-NEXT: BFE_UINT T0.W, KC0[3].X, literal.y, T0.W,
465 ; EG-NEXT: ADD_INT * T2.W, PV.W, literal.z,
466 ; EG-NEXT: 8388607(1.175494e-38), 23(3.222986e-44)
467 ; EG-NEXT: -150(nan), 0(0.000000e+00)
468 ; EG-NEXT: SUB_INT T0.X, literal.x, PV.W,
469 ; EG-NEXT: SUB_INT T0.Y, literal.x, T1.W,
470 ; EG-NEXT: AND_INT T1.Z, PS, literal.y,
471 ; EG-NEXT: OR_INT T3.W, PV.Z, literal.z,
472 ; EG-NEXT: AND_INT * T4.W, KC0[3].X, literal.w,
473 ; EG-NEXT: 150(2.101948e-43), 31(4.344025e-44)
474 ; EG-NEXT: 8388608(1.175494e-38), 8388607(1.175494e-38)
475 ; EG-NEXT: OR_INT T1.X, PS, literal.x,
476 ; EG-NEXT: LSHL T1.Y, PV.W, PV.Z,
477 ; EG-NEXT: AND_INT T0.Z, T2.W, literal.y,
478 ; EG-NEXT: BIT_ALIGN_INT T4.W, 0.0, PV.W, PV.Y,
479 ; EG-NEXT: AND_INT * T5.W, PV.Y, literal.y,
480 ; EG-NEXT: 8388608(1.175494e-38), 32(4.484155e-44)
481 ; EG-NEXT: CNDE_INT T2.X, PS, PV.W, 0.0,
482 ; EG-NEXT: CNDE_INT T0.Y, PV.Z, PV.Y, 0.0,
483 ; EG-NEXT: ADD_INT T1.Z, T0.W, literal.x,
484 ; EG-NEXT: BIT_ALIGN_INT T4.W, 0.0, PV.X, T0.X,
485 ; EG-NEXT: AND_INT * T5.W, T0.X, literal.y,
486 ; EG-NEXT: -150(nan), 32(4.484155e-44)
487 ; EG-NEXT: CNDE_INT T0.X, PS, PV.W, 0.0,
488 ; EG-NEXT: NOT_INT T2.Y, T2.W,
489 ; EG-NEXT: AND_INT T2.Z, PV.Z, literal.x,
490 ; EG-NEXT: NOT_INT T2.W, PV.Z,
491 ; EG-NEXT: LSHR * T4.W, T1.X, 1,
492 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
493 ; EG-NEXT: LSHR T3.X, T3.W, 1,
494 ; EG-NEXT: ADD_INT T3.Y, T0.W, literal.x, BS:VEC_120/SCL_212
495 ; EG-NEXT: BIT_ALIGN_INT T3.Z, 0.0, PS, PV.W,
496 ; EG-NEXT: LSHL T0.W, T1.X, PV.Z,
497 ; EG-NEXT: AND_INT * T2.W, T1.Z, literal.y,
498 ; EG-NEXT: -127(nan), 32(4.484155e-44)
499 ; EG-NEXT: CNDE_INT T1.X, PS, PV.W, 0.0,
500 ; EG-NEXT: CNDE_INT T4.Y, PS, PV.Z, PV.W,
501 ; EG-NEXT: SETGT_INT T1.Z, PV.Y, literal.x,
502 ; EG-NEXT: BIT_ALIGN_INT T0.W, 0.0, PV.X, T2.Y,
503 ; EG-NEXT: ADD_INT * T1.W, T1.W, literal.y,
504 ; EG-NEXT: 23(3.222986e-44), -127(nan)
505 ; EG-NEXT: CNDE_INT T3.X, T0.Z, PV.W, T1.Y,
506 ; EG-NEXT: SETGT_INT T1.Y, PS, literal.x,
507 ; EG-NEXT: CNDE_INT T0.Z, PV.Z, 0.0, PV.Y,
508 ; EG-NEXT: CNDE_INT T0.W, PV.Z, T0.X, PV.X,
509 ; EG-NEXT: ASHR * T2.W, KC0[3].X, literal.y,
510 ; EG-NEXT: 23(3.222986e-44), 31(4.344025e-44)
511 ; EG-NEXT: XOR_INT T0.X, PV.W, PS,
512 ; EG-NEXT: XOR_INT T2.Y, PV.Z, PS,
513 ; EG-NEXT: CNDE_INT T0.Z, PV.Y, 0.0, PV.X,
514 ; EG-NEXT: CNDE_INT T0.W, PV.Y, T2.X, T0.Y,
515 ; EG-NEXT: ASHR * T3.W, KC0[2].W, literal.x,
516 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
517 ; EG-NEXT: XOR_INT T0.Y, PV.W, PS,
518 ; EG-NEXT: XOR_INT T0.Z, PV.Z, PS,
519 ; EG-NEXT: SUB_INT T0.W, PV.Y, T2.W,
520 ; EG-NEXT: SUBB_UINT * T4.W, PV.X, T2.W,
521 ; EG-NEXT: SUB_INT T1.Y, PV.W, PS,
522 ; EG-NEXT: SETGT_INT T1.Z, T3.Y, literal.x,
523 ; EG-NEXT: SUB_INT T0.W, PV.Z, T3.W,
524 ; EG-NEXT: SUBB_UINT * T4.W, PV.Y, T3.W,
525 ; EG-NEXT: -1(nan), 0(0.000000e+00)
526 ; EG-NEXT: SUB_INT T0.Z, PV.W, PS,
527 ; EG-NEXT: SETGT_INT T0.W, T1.W, literal.x,
528 ; EG-NEXT: CNDE_INT * T1.W, PV.Z, 0.0, PV.Y, BS:VEC_021/SCL_122
529 ; EG-NEXT: -1(nan), 0(0.000000e+00)
530 ; EG-NEXT: CNDE_INT T1.Y, PV.W, 0.0, PV.Z,
531 ; EG-NEXT: SUB_INT * T2.W, T0.X, T2.W,
532 ; EG-NEXT: CNDE_INT T1.Z, T1.Z, 0.0, PV.W,
533 ; EG-NEXT: SUB_INT * T2.W, T0.Y, T3.W,
534 ; EG-NEXT: CNDE_INT T1.X, T0.W, 0.0, PV.W,
535 ; EG-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
536 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
132 %conv = fptosi <2 x float> %x to <2 x i64> 537 %conv = fptosi <2 x float> %x to <2 x i64>
133 store <2 x i64> %conv, <2 x i64> addrspace(1)* %out 538 store <2 x i64> %conv, <2 x i64> addrspace(1)* %out
134 ret void 539 ret void
135 } 540 }
136 541
137 ; FUNC: {{^}}fp_to_sint_v4i64:
138 ; EG-DAG: AND_INT
139 ; EG-DAG: LSHR
140 ; EG-DAG: SUB_INT
141 ; EG-DAG: AND_INT
142 ; EG-DAG: ASHR
143 ; EG-DAG: AND_INT
144 ; EG-DAG: OR_INT
145 ; EG-DAG: SUB_INT
146 ; EG-DAG: LSHL
147 ; EG-DAG: LSHL
148 ; EG-DAG: SUB_INT
149 ; EG-DAG: LSHR
150 ; EG-DAG: LSHR
151 ; EG-DAG: SETGT_UINT
152 ; EG-DAG: SETGT_INT
153 ; EG-DAG: XOR_INT
154 ; EG-DAG: XOR_INT
155 ; EG-DAG: SUB_INT
156 ; EG-DAG: SUB_INT
157 ; EG-DAG: CNDE_INT
158 ; EG-DAG: CNDE_INT
159 ; EG-DAG: AND_INT
160 ; EG-DAG: LSHR
161 ; EG-DAG: SUB_INT
162 ; EG-DAG: AND_INT
163 ; EG-DAG: ASHR
164 ; EG-DAG: AND_INT
165 ; EG-DAG: OR_INT
166 ; EG-DAG: SUB_INT
167 ; EG-DAG: LSHL
168 ; EG-DAG: LSHL
169 ; EG-DAG: SUB_INT
170 ; EG-DAG: LSHR
171 ; EG-DAG: LSHR
172 ; EG-DAG: SETGT_UINT
173 ; EG-DAG: SETGT_INT
174 ; EG-DAG: XOR_INT
175 ; EG-DAG: XOR_INT
176 ; EG-DAG: SUB_INT
177 ; EG-DAG: SUB_INT
178 ; EG-DAG: CNDE_INT
179 ; EG-DAG: CNDE_INT
180 ; EG-DAG: AND_INT
181 ; EG-DAG: LSHR
182 ; EG-DAG: SUB_INT
183 ; EG-DAG: AND_INT
184 ; EG-DAG: ASHR
185 ; EG-DAG: AND_INT
186 ; EG-DAG: OR_INT
187 ; EG-DAG: SUB_INT
188 ; EG-DAG: LSHL
189 ; EG-DAG: LSHL
190 ; EG-DAG: SUB_INT
191 ; EG-DAG: LSHR
192 ; EG-DAG: LSHR
193 ; EG-DAG: SETGT_UINT
194 ; EG-DAG: SETGT_INT
195 ; EG-DAG: XOR_INT
196 ; EG-DAG: XOR_INT
197 ; EG-DAG: SUB_INT
198 ; EG-DAG: SUB_INT
199 ; EG-DAG: CNDE_INT
200 ; EG-DAG: CNDE_INT
201 ; EG-DAG: AND_INT
202 ; EG-DAG: LSHR
203 ; EG-DAG: SUB_INT
204 ; EG-DAG: AND_INT
205 ; EG-DAG: ASHR
206 ; EG-DAG: AND_INT
207 ; EG-DAG: OR_INT
208 ; EG-DAG: SUB_INT
209 ; EG-DAG: LSHL
210 ; EG-DAG: LSHL
211 ; EG-DAG: SUB_INT
212 ; EG-DAG: LSHR
213 ; EG-DAG: LSHR
214 ; EG-DAG: SETGT_UINT
215 ; EG-DAG: SETGT_INT
216 ; EG-DAG: XOR_INT
217 ; EG-DAG: XOR_INT
218 ; EG-DAG: SUB_INT
219 ; EG-DAG: SUB_INT
220 ; EG-DAG: CNDE_INT
221 ; EG-DAG: CNDE_INT
222
223 ; SI: s_endpgm
224 define amdgpu_kernel void @fp_to_sint_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) { 542 define amdgpu_kernel void @fp_to_sint_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) {
543 ; SI-LABEL: fp_to_sint_v4i64:
544 ; SI: ; %bb.0:
545 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
546 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd
547 ; SI-NEXT: s_mov_b32 s7, 0xf000
548 ; SI-NEXT: s_mov_b32 s6, -1
549 ; SI-NEXT: s_movk_i32 s10, 0xff6a
550 ; SI-NEXT: s_mov_b32 s8, 0x7fffff
551 ; SI-NEXT: s_mov_b32 s11, 0x800000
552 ; SI-NEXT: s_mov_b32 s9, 0
553 ; SI-NEXT: s_movk_i32 s12, 0x96
554 ; SI-NEXT: s_movk_i32 s13, 0xff81
555 ; SI-NEXT: s_waitcnt lgkmcnt(0)
556 ; SI-NEXT: s_bfe_u32 s14, s1, 0x80017
557 ; SI-NEXT: s_and_b32 s15, s1, s8
558 ; SI-NEXT: s_ashr_i32 s16, s1, 31
559 ; SI-NEXT: s_bfe_u32 s1, s0, 0x80017
560 ; SI-NEXT: s_and_b32 s17, s0, s8
561 ; SI-NEXT: s_ashr_i32 s18, s0, 31
562 ; SI-NEXT: s_bfe_u32 s0, s3, 0x80017
563 ; SI-NEXT: s_and_b32 s19, s3, s8
564 ; SI-NEXT: s_ashr_i32 s20, s3, 31
565 ; SI-NEXT: s_bfe_u32 s3, s2, 0x80017
566 ; SI-NEXT: s_and_b32 s21, s2, s8
567 ; SI-NEXT: s_ashr_i32 s22, s2, 31
568 ; SI-NEXT: s_add_i32 s2, s14, s10
569 ; SI-NEXT: s_or_b32 s8, s15, s11
570 ; SI-NEXT: s_sub_i32 s15, s12, s14
571 ; SI-NEXT: s_add_i32 s14, s14, s13
572 ; SI-NEXT: s_ashr_i32 s23, s16, 31
573 ; SI-NEXT: s_add_i32 s24, s1, s10
574 ; SI-NEXT: s_sub_i32 s25, s12, s1
575 ; SI-NEXT: s_add_i32 s26, s1, s13
576 ; SI-NEXT: s_ashr_i32 s27, s18, 31
577 ; SI-NEXT: s_add_i32 s28, s0, s10
578 ; SI-NEXT: s_sub_i32 s29, s12, s0
579 ; SI-NEXT: s_add_i32 s30, s0, s13
580 ; SI-NEXT: s_ashr_i32 s31, s20, 31
581 ; SI-NEXT: s_add_i32 s10, s3, s10
582 ; SI-NEXT: s_sub_i32 s12, s12, s3
583 ; SI-NEXT: s_add_i32 s13, s3, s13
584 ; SI-NEXT: s_ashr_i32 s33, s22, 31
585 ; SI-NEXT: s_lshl_b64 s[0:1], s[8:9], s2
586 ; SI-NEXT: s_lshr_b64 s[2:3], s[8:9], s15
587 ; SI-NEXT: v_mov_b32_e32 v0, s23
588 ; SI-NEXT: s_or_b32 s8, s17, s11
589 ; SI-NEXT: v_mov_b32_e32 v1, s27
590 ; SI-NEXT: v_mov_b32_e32 v4, s31
591 ; SI-NEXT: v_mov_b32_e32 v5, s33
592 ; SI-NEXT: v_mov_b32_e32 v2, s3
593 ; SI-NEXT: v_mov_b32_e32 v3, s1
594 ; SI-NEXT: v_cmp_gt_i32_e64 vcc, s14, 23
595 ; SI-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
596 ; SI-NEXT: v_mov_b32_e32 v3, s2
597 ; SI-NEXT: v_mov_b32_e32 v6, s0
598 ; SI-NEXT: s_lshl_b64 s[0:1], s[8:9], s24
599 ; SI-NEXT: s_lshr_b64 s[2:3], s[8:9], s25
600 ; SI-NEXT: s_or_b32 s8, s19, s11
601 ; SI-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
602 ; SI-NEXT: v_xor_b32_e32 v2, s23, v2
603 ; SI-NEXT: v_mov_b32_e32 v6, s3
604 ; SI-NEXT: v_mov_b32_e32 v7, s1
605 ; SI-NEXT: v_cmp_gt_i32_e64 vcc, s26, 23
606 ; SI-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc
607 ; SI-NEXT: v_mov_b32_e32 v7, s2
608 ; SI-NEXT: v_mov_b32_e32 v8, s0
609 ; SI-NEXT: s_lshl_b64 s[0:1], s[8:9], s28
610 ; SI-NEXT: s_lshr_b64 s[2:3], s[8:9], s29
611 ; SI-NEXT: s_or_b32 s8, s21, s11
612 ; SI-NEXT: v_xor_b32_e32 v3, s16, v3
613 ; SI-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc
614 ; SI-NEXT: v_xor_b32_e32 v6, s27, v6
615 ; SI-NEXT: v_mov_b32_e32 v8, s3
616 ; SI-NEXT: v_mov_b32_e32 v9, s1
617 ; SI-NEXT: v_cmp_gt_i32_e64 vcc, s30, 23
618 ; SI-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc
619 ; SI-NEXT: v_mov_b32_e32 v9, s2
620 ; SI-NEXT: v_mov_b32_e32 v10, s0
621 ; SI-NEXT: s_lshl_b64 s[2:3], s[8:9], s10
622 ; SI-NEXT: s_lshr_b64 s[8:9], s[8:9], s12
623 ; SI-NEXT: v_subrev_i32_e64 v11, s[0:1], s16, v3
624 ; SI-NEXT: v_subb_u32_e64 v0, s[0:1], v2, v0, s[0:1]
625 ; SI-NEXT: v_xor_b32_e32 v7, s18, v7
626 ; SI-NEXT: v_cndmask_b32_e32 v9, v9, v10, vcc
627 ; SI-NEXT: v_xor_b32_e32 v8, s31, v8
628 ; SI-NEXT: v_mov_b32_e32 v2, s9
629 ; SI-NEXT: v_mov_b32_e32 v3, s3
630 ; SI-NEXT: v_cmp_gt_i32_e64 vcc, s13, 23
631 ; SI-NEXT: v_cndmask_b32_e32 v10, v2, v3, vcc
632 ; SI-NEXT: v_mov_b32_e32 v12, s8
633 ; SI-NEXT: v_mov_b32_e32 v13, s2
634 ; SI-NEXT: v_cmp_lt_i32_e64 s[0:1], s14, 0
635 ; SI-NEXT: v_cndmask_b32_e64 v3, v0, 0, s[0:1]
636 ; SI-NEXT: v_cndmask_b32_e64 v2, v11, 0, s[0:1]
637 ; SI-NEXT: v_subrev_i32_e64 v0, s[0:1], s18, v7
638 ; SI-NEXT: v_subb_u32_e64 v1, s[0:1], v6, v1, s[0:1]
639 ; SI-NEXT: v_xor_b32_e32 v6, s20, v9
640 ; SI-NEXT: v_cndmask_b32_e32 v7, v12, v13, vcc
641 ; SI-NEXT: v_xor_b32_e32 v9, s33, v10
642 ; SI-NEXT: v_cmp_lt_i32_e64 s[0:1], s26, 0
643 ; SI-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[0:1]
644 ; SI-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[0:1]
645 ; SI-NEXT: v_subrev_i32_e32 v6, vcc, s20, v6
646 ; SI-NEXT: v_subb_u32_e32 v4, vcc, v8, v4, vcc
647 ; SI-NEXT: v_xor_b32_e32 v8, s22, v7
648 ; SI-NEXT: v_cmp_lt_i32_e64 s[0:1], s30, 0
649 ; SI-NEXT: v_cndmask_b32_e64 v7, v4, 0, s[0:1]
650 ; SI-NEXT: v_cndmask_b32_e64 v6, v6, 0, s[0:1]
651 ; SI-NEXT: v_subrev_i32_e32 v4, vcc, s22, v8
652 ; SI-NEXT: v_subb_u32_e32 v5, vcc, v9, v5, vcc
653 ; SI-NEXT: v_cmp_lt_i32_e64 s[0:1], s13, 0
654 ; SI-NEXT: v_cndmask_b32_e64 v5, v5, 0, s[0:1]
655 ; SI-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[0:1]
656 ; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16
657 ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
658 ; SI-NEXT: s_endpgm
659 ;
660 ; VI-LABEL: fp_to_sint_v4i64:
661 ; VI: ; %bb.0:
662 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
663 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x34
664 ; VI-NEXT: s_mov_b32 s16, 0x7fffff
665 ; VI-NEXT: s_movk_i32 s14, 0xff6a
666 ; VI-NEXT: s_mov_b32 s17, 0x800000
667 ; VI-NEXT: s_movk_i32 s18, 0x96
668 ; VI-NEXT: s_waitcnt lgkmcnt(0)
669 ; VI-NEXT: s_bfe_u32 s15, s1, 0x80017
670 ; VI-NEXT: s_and_b32 s8, s1, s16
671 ; VI-NEXT: s_add_i32 s10, s15, s14
672 ; VI-NEXT: s_or_b32 s8, s8, s17
673 ; VI-NEXT: s_mov_b32 s9, 0
674 ; VI-NEXT: s_sub_i32 s12, s18, s15
675 ; VI-NEXT: s_movk_i32 s19, 0xff81
676 ; VI-NEXT: s_lshl_b64 s[10:11], s[8:9], s10
677 ; VI-NEXT: s_lshr_b64 s[12:13], s[8:9], s12
678 ; VI-NEXT: s_add_i32 s15, s15, s19
679 ; VI-NEXT: v_mov_b32_e32 v0, s13
680 ; VI-NEXT: v_mov_b32_e32 v1, s11
681 ; VI-NEXT: v_cmp_gt_i32_e64 vcc, s15, 23
682 ; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
683 ; VI-NEXT: v_mov_b32_e32 v2, s10
684 ; VI-NEXT: v_mov_b32_e32 v1, s12
685 ; VI-NEXT: s_ashr_i32 s1, s1, 31
686 ; VI-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
687 ; VI-NEXT: s_ashr_i32 s8, s1, 31
688 ; VI-NEXT: v_xor_b32_e32 v1, s1, v1
689 ; VI-NEXT: v_subrev_u32_e32 v1, vcc, s1, v1
690 ; VI-NEXT: v_xor_b32_e32 v0, s8, v0
691 ; VI-NEXT: v_mov_b32_e32 v2, s8
692 ; VI-NEXT: s_and_b32 s8, s0, s16
693 ; VI-NEXT: s_bfe_u32 s1, s0, 0x80017
694 ; VI-NEXT: v_subb_u32_e32 v0, vcc, v0, v2, vcc
695 ; VI-NEXT: v_cmp_lt_i32_e64 s[10:11], s15, 0
696 ; VI-NEXT: v_cndmask_b32_e64 v3, v0, 0, s[10:11]
697 ; VI-NEXT: v_cndmask_b32_e64 v2, v1, 0, s[10:11]
698 ; VI-NEXT: s_add_i32 s10, s1, s14
699 ; VI-NEXT: s_or_b32 s8, s8, s17
700 ; VI-NEXT: s_sub_i32 s12, s18, s1
701 ; VI-NEXT: s_lshl_b64 s[10:11], s[8:9], s10
702 ; VI-NEXT: s_lshr_b64 s[12:13], s[8:9], s12
703 ; VI-NEXT: s_add_i32 s1, s1, s19
704 ; VI-NEXT: v_mov_b32_e32 v0, s13
705 ; VI-NEXT: v_mov_b32_e32 v1, s11
706 ; VI-NEXT: v_cmp_gt_i32_e64 vcc, s1, 23
707 ; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
708 ; VI-NEXT: v_mov_b32_e32 v1, s12
709 ; VI-NEXT: v_mov_b32_e32 v4, s10
710 ; VI-NEXT: s_ashr_i32 s0, s0, 31
711 ; VI-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
712 ; VI-NEXT: s_ashr_i32 s8, s0, 31
713 ; VI-NEXT: v_xor_b32_e32 v1, s0, v1
714 ; VI-NEXT: v_subrev_u32_e32 v5, vcc, s0, v1
715 ; VI-NEXT: v_xor_b32_e32 v0, s8, v0
716 ; VI-NEXT: v_mov_b32_e32 v4, s8
717 ; VI-NEXT: v_subb_u32_e32 v0, vcc, v0, v4, vcc
718 ; VI-NEXT: v_cmp_lt_i32_e64 s[0:1], s1, 0
719 ; VI-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[0:1]
720 ; VI-NEXT: v_cndmask_b32_e64 v0, v5, 0, s[0:1]
721 ; VI-NEXT: s_bfe_u32 s12, s3, 0x80017
722 ; VI-NEXT: s_and_b32 s1, s3, s16
723 ; VI-NEXT: s_add_i32 s0, s12, s14
724 ; VI-NEXT: s_or_b32 s8, s1, s17
725 ; VI-NEXT: s_sub_i32 s10, s18, s12
726 ; VI-NEXT: s_lshl_b64 s[0:1], s[8:9], s0
727 ; VI-NEXT: s_lshr_b64 s[10:11], s[8:9], s10
728 ; VI-NEXT: s_add_i32 s12, s12, s19
729 ; VI-NEXT: v_mov_b32_e32 v4, s11
730 ; VI-NEXT: v_mov_b32_e32 v5, s1
731 ; VI-NEXT: v_cmp_gt_i32_e64 vcc, s12, 23
732 ; VI-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
733 ; VI-NEXT: v_mov_b32_e32 v6, s0
734 ; VI-NEXT: v_mov_b32_e32 v5, s10
735 ; VI-NEXT: s_ashr_i32 s0, s3, 31
736 ; VI-NEXT: v_cndmask_b32_e32 v5, v5, v6, vcc
737 ; VI-NEXT: s_ashr_i32 s1, s0, 31
738 ; VI-NEXT: v_xor_b32_e32 v5, s0, v5
739 ; VI-NEXT: v_xor_b32_e32 v4, s1, v4
740 ; VI-NEXT: v_mov_b32_e32 v6, s1
741 ; VI-NEXT: v_subrev_u32_e32 v5, vcc, s0, v5
742 ; VI-NEXT: v_subb_u32_e32 v4, vcc, v4, v6, vcc
743 ; VI-NEXT: v_cmp_lt_i32_e64 s[0:1], s12, 0
744 ; VI-NEXT: s_bfe_u32 s3, s2, 0x80017
745 ; VI-NEXT: v_cndmask_b32_e64 v7, v4, 0, s[0:1]
746 ; VI-NEXT: v_cndmask_b32_e64 v6, v5, 0, s[0:1]
747 ; VI-NEXT: s_and_b32 s1, s2, s16
748 ; VI-NEXT: s_add_i32 s0, s3, s14
749 ; VI-NEXT: s_or_b32 s8, s1, s17
750 ; VI-NEXT: s_sub_i32 s10, s18, s3
751 ; VI-NEXT: s_lshl_b64 s[0:1], s[8:9], s0
752 ; VI-NEXT: s_lshr_b64 s[8:9], s[8:9], s10
753 ; VI-NEXT: s_add_i32 s3, s3, s19
754 ; VI-NEXT: v_mov_b32_e32 v4, s9
755 ; VI-NEXT: v_mov_b32_e32 v5, s1
756 ; VI-NEXT: v_cmp_gt_i32_e64 vcc, s3, 23
757 ; VI-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
758 ; VI-NEXT: v_mov_b32_e32 v8, s0
759 ; VI-NEXT: v_mov_b32_e32 v5, s8
760 ; VI-NEXT: s_ashr_i32 s0, s2, 31
761 ; VI-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc
762 ; VI-NEXT: s_ashr_i32 s1, s0, 31
763 ; VI-NEXT: v_xor_b32_e32 v5, s0, v5
764 ; VI-NEXT: v_xor_b32_e32 v4, s1, v4
765 ; VI-NEXT: v_mov_b32_e32 v8, s1
766 ; VI-NEXT: v_subrev_u32_e32 v9, vcc, s0, v5
767 ; VI-NEXT: v_subb_u32_e32 v4, vcc, v4, v8, vcc
768 ; VI-NEXT: v_cmp_lt_i32_e64 s[0:1], s3, 0
769 ; VI-NEXT: v_cndmask_b32_e64 v5, v4, 0, s[0:1]
770 ; VI-NEXT: s_mov_b32 s7, 0xf000
771 ; VI-NEXT: s_mov_b32 s6, -1
772 ; VI-NEXT: v_cndmask_b32_e64 v4, v9, 0, s[0:1]
773 ; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16
774 ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
775 ; VI-NEXT: s_endpgm
776 ;
777 ; EG-LABEL: fp_to_sint_v4i64:
778 ; EG: ; %bb.0:
779 ; EG-NEXT: ALU 101, @6, KC0[CB0:0-32], KC1[]
780 ; EG-NEXT: ALU 58, @108, KC0[CB0:0-32], KC1[]
781 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T0.X, 0
782 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T2.X, 1
783 ; EG-NEXT: CF_END
784 ; EG-NEXT: PAD
785 ; EG-NEXT: ALU clause starting at 6:
786 ; EG-NEXT: MOV * T0.W, literal.x,
787 ; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
788 ; EG-NEXT: BFE_UINT T1.W, KC0[4].X, literal.x, PV.W,
789 ; EG-NEXT: AND_INT * T2.W, KC0[4].X, literal.y,
790 ; EG-NEXT: 23(3.222986e-44), 8388607(1.175494e-38)
791 ; EG-NEXT: OR_INT T0.Z, PS, literal.x,
792 ; EG-NEXT: BFE_UINT T2.W, KC0[3].Z, literal.y, T0.W,
793 ; EG-NEXT: ADD_INT * T3.W, PV.W, literal.z,
794 ; EG-NEXT: 8388608(1.175494e-38), 23(3.222986e-44)
795 ; EG-NEXT: -150(nan), 0(0.000000e+00)
796 ; EG-NEXT: ADD_INT T0.Y, PV.W, literal.x,
797 ; EG-NEXT: AND_INT T1.Z, PS, literal.y,
798 ; EG-NEXT: NOT_INT T4.W, PS,
799 ; EG-NEXT: LSHR * T5.W, PV.Z, 1,
800 ; EG-NEXT: -127(nan), 31(4.344025e-44)
801 ; EG-NEXT: ADD_INT T0.X, T1.W, literal.x,
802 ; EG-NEXT: BIT_ALIGN_INT T1.Y, 0.0, PS, PV.W,
803 ; EG-NEXT: AND_INT T2.Z, T3.W, literal.y, BS:VEC_201
804 ; EG-NEXT: LSHL T3.W, T0.Z, PV.Z,
805 ; EG-NEXT: SUB_INT * T1.W, literal.z, T1.W,
806 ; EG-NEXT: -127(nan), 32(4.484155e-44)
807 ; EG-NEXT: 150(2.101948e-43), 0(0.000000e+00)
808 ; EG-NEXT: AND_INT T1.X, PS, literal.x,
809 ; EG-NEXT: BIT_ALIGN_INT T2.Y, 0.0, T0.Z, PS,
810 ; EG-NEXT: AND_INT T0.Z, KC0[3].Z, literal.y,
811 ; EG-NEXT: CNDE_INT T1.W, PV.Z, PV.Y, PV.W,
812 ; EG-NEXT: SETGT_INT * T4.W, PV.X, literal.z,
813 ; EG-NEXT: 32(4.484155e-44), 8388607(1.175494e-38)
814 ; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00)
815 ; EG-NEXT: CNDE_INT T2.X, PS, 0.0, PV.W,
816 ; EG-NEXT: OR_INT T1.Y, PV.Z, literal.x,
817 ; EG-NEXT: ADD_INT T0.Z, T2.W, literal.y,
818 ; EG-NEXT: CNDE_INT T1.W, PV.X, PV.Y, 0.0,
819 ; EG-NEXT: CNDE_INT * T3.W, T2.Z, T3.W, 0.0,
820 ; EG-NEXT: 8388608(1.175494e-38), -150(nan)
821 ; EG-NEXT: CNDE_INT T1.X, T4.W, PV.W, PS,
822 ; EG-NEXT: ASHR T2.Y, KC0[4].X, literal.x,
823 ; EG-NEXT: AND_INT T1.Z, PV.Z, literal.x,
824 ; EG-NEXT: NOT_INT T1.W, PV.Z,
825 ; EG-NEXT: LSHR * T3.W, PV.Y, 1,
826 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
827 ; EG-NEXT: BIT_ALIGN_INT T3.X, 0.0, PS, PV.W,
828 ; EG-NEXT: LSHL T3.Y, T1.Y, PV.Z,
829 ; EG-NEXT: XOR_INT T1.Z, PV.X, PV.Y,
830 ; EG-NEXT: XOR_INT T1.W, T2.X, PV.Y,
831 ; EG-NEXT: SUB_INT * T2.W, literal.x, T2.W,
832 ; EG-NEXT: 150(2.101948e-43), 0(0.000000e+00)
833 ; EG-NEXT: AND_INT T1.X, T0.Z, literal.x,
834 ; EG-NEXT: AND_INT T4.Y, PS, literal.x,
835 ; EG-NEXT: BIT_ALIGN_INT T0.Z, 0.0, T1.Y, PS, BS:VEC_021/SCL_122
836 ; EG-NEXT: SUB_INT T1.W, PV.W, T2.Y,
837 ; EG-NEXT: SUBB_UINT * T2.W, PV.Z, T2.Y,
838 ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
839 ; EG-NEXT: SUB_INT T2.X, PV.W, PS,
840 ; EG-NEXT: CNDE_INT T1.Y, PV.Y, PV.Z, 0.0,
841 ; EG-NEXT: CNDE_INT T0.Z, PV.X, T3.Y, 0.0,
842 ; EG-NEXT: CNDE_INT T1.W, PV.X, T3.X, T3.Y, BS:VEC_021/SCL_122
843 ; EG-NEXT: SETGT_INT * T2.W, T0.Y, literal.x,
844 ; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00)
845 ; EG-NEXT: BFE_UINT T1.X, KC0[3].W, literal.x, T0.W,
846 ; EG-NEXT: AND_INT T3.Y, KC0[3].W, literal.y,
847 ; EG-NEXT: CNDE_INT T2.Z, PS, 0.0, PV.W,
848 ; EG-NEXT: CNDE_INT T1.W, PS, PV.Y, PV.Z,
849 ; EG-NEXT: ASHR * T2.W, KC0[3].Z, literal.z,
850 ; EG-NEXT: 23(3.222986e-44), 8388607(1.175494e-38)
851 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
852 ; EG-NEXT: BFE_UINT T3.X, KC0[3].Y, literal.x, T0.W,
853 ; EG-NEXT: XOR_INT T1.Y, PV.W, PS,
854 ; EG-NEXT: XOR_INT T0.Z, PV.Z, PS,
855 ; EG-NEXT: OR_INT T0.W, PV.Y, literal.y,
856 ; EG-NEXT: SUB_INT * T1.W, literal.z, PV.X,
857 ; EG-NEXT: 23(3.222986e-44), 8388608(1.175494e-38)
858 ; EG-NEXT: 150(2.101948e-43), 0(0.000000e+00)
859 ; EG-NEXT: AND_INT T4.X, KC0[3].Y, literal.x,
860 ; EG-NEXT: AND_INT T3.Y, PS, literal.y,
861 ; EG-NEXT: BIT_ALIGN_INT T2.Z, 0.0, PV.W, PS,
862 ; EG-NEXT: SUB_INT T1.W, PV.Z, T2.W,
863 ; EG-NEXT: SUBB_UINT * T3.W, PV.Y, T2.W,
864 ; EG-NEXT: 8388607(1.175494e-38), 32(4.484155e-44)
865 ; EG-NEXT: SUB_INT T5.X, PV.W, PS,
866 ; EG-NEXT: SETGT_INT T0.Y, T0.Y, literal.x,
867 ; EG-NEXT: CNDE_INT T0.Z, PV.Y, PV.Z, 0.0,
868 ; EG-NEXT: OR_INT T1.W, PV.X, literal.y,
869 ; EG-NEXT: ADD_INT * T3.W, T3.X, literal.z,
870 ; EG-NEXT: -1(nan), 8388608(1.175494e-38)
871 ; EG-NEXT: -150(nan), 0(0.000000e+00)
872 ; EG-NEXT: ADD_INT T4.X, T3.X, literal.x,
873 ; EG-NEXT: SUB_INT T3.Y, literal.y, T3.X,
874 ; EG-NEXT: AND_INT T2.Z, PS, literal.z,
875 ; EG-NEXT: NOT_INT T4.W, PS,
876 ; EG-NEXT: LSHR * T5.W, PV.W, 1,
877 ; EG-NEXT: -127(nan), 150(2.101948e-43)
878 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
879 ; EG-NEXT: BIT_ALIGN_INT T3.X, 0.0, PS, PV.W,
880 ; EG-NEXT: LSHL T4.Y, T1.W, PV.Z,
881 ; EG-NEXT: AND_INT T2.Z, T3.W, literal.x, BS:VEC_120/SCL_212
882 ; EG-NEXT: BIT_ALIGN_INT T1.W, 0.0, T1.W, PV.Y, BS:VEC_021/SCL_122
883 ; EG-NEXT: AND_INT * T3.W, PV.Y, literal.x,
884 ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
885 ; EG-NEXT: ADD_INT T6.X, T1.X, literal.x,
886 ; EG-NEXT: CNDE_INT * T3.Y, PS, PV.W, 0.0,
887 ; EG-NEXT: -150(nan), 0(0.000000e+00)
888 ; EG-NEXT: ALU clause starting at 108:
889 ; EG-NEXT: CNDE_INT T3.Z, T2.Z, T4.Y, 0.0,
890 ; EG-NEXT: CNDE_INT T1.W, T2.Z, T3.X, T4.Y,
891 ; EG-NEXT: SETGT_INT * T3.W, T4.X, literal.x,
892 ; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00)
893 ; EG-NEXT: CNDE_INT T3.X, PS, 0.0, PV.W,
894 ; EG-NEXT: CNDE_INT T3.Y, PS, T3.Y, PV.Z,
895 ; EG-NEXT: AND_INT T2.Z, T6.X, literal.x,
896 ; EG-NEXT: NOT_INT T1.W, T6.X,
897 ; EG-NEXT: LSHR * T3.W, T0.W, 1,
898 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
899 ; EG-NEXT: ASHR T7.X, KC0[3].Y, literal.x,
900 ; EG-NEXT: ADD_INT T4.Y, T1.X, literal.y,
901 ; EG-NEXT: BIT_ALIGN_INT T3.Z, 0.0, PS, PV.W,
902 ; EG-NEXT: LSHL T0.W, T0.W, PV.Z,
903 ; EG-NEXT: AND_INT * T1.W, T6.X, literal.z,
904 ; EG-NEXT: 31(4.344025e-44), -127(nan)
905 ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
906 ; EG-NEXT: CNDE_INT T1.X, PS, PV.W, 0.0,
907 ; EG-NEXT: CNDE_INT T5.Y, PS, PV.Z, PV.W,
908 ; EG-NEXT: SETGT_INT T2.Z, PV.Y, literal.x,
909 ; EG-NEXT: XOR_INT T0.W, T3.Y, PV.X,
910 ; EG-NEXT: XOR_INT * T1.W, T3.X, PV.X,
911 ; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00)
912 ; EG-NEXT: SUB_INT T3.X, PS, T7.X,
913 ; EG-NEXT: SUBB_UINT T3.Y, PV.W, T7.X,
914 ; EG-NEXT: CNDE_INT T3.Z, PV.Z, 0.0, PV.Y,
915 ; EG-NEXT: CNDE_INT T1.W, PV.Z, T0.Z, PV.X,
916 ; EG-NEXT: ASHR * T3.W, KC0[3].W, literal.x,
917 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
918 ; EG-NEXT: XOR_INT T1.X, PV.W, PS,
919 ; EG-NEXT: XOR_INT T5.Y, PV.Z, PS,
920 ; EG-NEXT: SUB_INT T0.Z, PV.X, PV.Y,
921 ; EG-NEXT: SETGT_INT T1.W, T4.X, literal.x,
922 ; EG-NEXT: CNDE_INT * T6.W, T0.Y, 0.0, T5.X, BS:VEC_021/SCL_122
923 ; EG-NEXT: -1(nan), 0(0.000000e+00)
924 ; EG-NEXT: SETGT_INT T0.X, T0.X, literal.x,
925 ; EG-NEXT: CNDE_INT T6.Y, PV.W, 0.0, PV.Z,
926 ; EG-NEXT: SUB_INT T0.Z, T1.Y, T2.W, BS:VEC_021/SCL_122
927 ; EG-NEXT: SUB_INT T2.W, PV.Y, T3.W,
928 ; EG-NEXT: SUBB_UINT * T4.W, PV.X, T3.W,
929 ; EG-NEXT: -1(nan), 0(0.000000e+00)
930 ; EG-NEXT: SUB_INT T3.X, PV.W, PS,
931 ; EG-NEXT: SETGT_INT T1.Y, T4.Y, literal.x,
932 ; EG-NEXT: CNDE_INT T6.Z, T0.Y, 0.0, PV.Z, BS:VEC_120/SCL_212
933 ; EG-NEXT: SUB_INT T0.W, T0.W, T7.X,
934 ; EG-NEXT: CNDE_INT * T4.W, PV.X, 0.0, T2.X, BS:VEC_021/SCL_122
935 ; EG-NEXT: -1(nan), 0(0.000000e+00)
936 ; EG-NEXT: CNDE_INT T6.X, T1.W, 0.0, PV.W,
937 ; EG-NEXT: CNDE_INT T4.Y, PV.Y, 0.0, PV.X,
938 ; EG-NEXT: SUB_INT T0.W, T1.Z, T2.Y,
939 ; EG-NEXT: LSHR * T2.X, KC0[2].Y, literal.x,
940 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
941 ; EG-NEXT: CNDE_INT T4.Z, T0.X, 0.0, PV.W,
942 ; EG-NEXT: SUB_INT * T0.W, T1.X, T3.W, BS:VEC_120/SCL_212
943 ; EG-NEXT: CNDE_INT T4.X, T1.Y, 0.0, PV.W,
944 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
945 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
946 ; EG-NEXT: LSHR * T0.X, PV.W, literal.x,
947 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
225 %conv = fptosi <4 x float> %x to <4 x i64> 948 %conv = fptosi <4 x float> %x to <4 x i64>
226 store <4 x i64> %conv, <4 x i64> addrspace(1)* %out 949 store <4 x i64> %conv, <4 x i64> addrspace(1)* %out
227 ret void 950 ret void
228 } 951 }
229 952
230 ; FUNC-LABEL: {{^}}fp_to_uint_f32_to_i1:
231 ; SI: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, -1.0, s{{[0-9]+}}
232
233 ; EG: AND_INT
234 ; EG: SETE_DX10 {{[*]?}} T{{[0-9]+}}.{{[XYZW]}}, KC0[2].Z, literal.y,
235 ; EG-NEXT: -1082130432(-1.000000e+00)
236 define amdgpu_kernel void @fp_to_uint_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 { 953 define amdgpu_kernel void @fp_to_uint_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 {
954 ; SI-LABEL: fp_to_uint_f32_to_i1:
955 ; SI: ; %bb.0:
956 ; SI-NEXT: s_load_dword s4, s[0:1], 0xb
957 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
958 ; SI-NEXT: s_mov_b32 s3, 0xf000
959 ; SI-NEXT: s_mov_b32 s2, -1
960 ; SI-NEXT: s_waitcnt lgkmcnt(0)
961 ; SI-NEXT: v_cmp_eq_f32_e64 s[4:5], -1.0, s4
962 ; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
963 ; SI-NEXT: buffer_store_byte v0, off, s[0:3], 0
964 ; SI-NEXT: s_endpgm
965 ;
966 ; VI-LABEL: fp_to_uint_f32_to_i1:
967 ; VI: ; %bb.0:
968 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
969 ; VI-NEXT: s_load_dword s0, s[0:1], 0x2c
970 ; VI-NEXT: s_mov_b32 s7, 0xf000
971 ; VI-NEXT: s_mov_b32 s6, -1
972 ; VI-NEXT: s_waitcnt lgkmcnt(0)
973 ; VI-NEXT: v_cmp_eq_f32_e64 s[0:1], -1.0, s0
974 ; VI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
975 ; VI-NEXT: buffer_store_byte v0, off, s[4:7], 0
976 ; VI-NEXT: s_endpgm
977 ;
978 ; EG-LABEL: fp_to_uint_f32_to_i1:
979 ; EG: ; %bb.0:
980 ; EG-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[]
981 ; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X
982 ; EG-NEXT: CF_END
983 ; EG-NEXT: PAD
984 ; EG-NEXT: ALU clause starting at 4:
985 ; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x,
986 ; EG-NEXT: SETE_DX10 * T1.W, KC0[2].Z, literal.y,
987 ; EG-NEXT: 3(4.203895e-45), -1082130432(-1.000000e+00)
988 ; EG-NEXT: AND_INT T1.W, PS, 1,
989 ; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
990 ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
991 ; EG-NEXT: LSHL T0.X, PV.W, PS,
992 ; EG-NEXT: LSHL * T0.W, literal.x, PS,
993 ; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00)
994 ; EG-NEXT: MOV T0.Y, 0.0,
995 ; EG-NEXT: MOV * T0.Z, 0.0,
996 ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
997 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
237 %conv = fptosi float %in to i1 998 %conv = fptosi float %in to i1
238 store i1 %conv, i1 addrspace(1)* %out 999 store i1 %conv, i1 addrspace(1)* %out
239 ret void 1000 ret void
240 } 1001 }
241 1002
242 ; FUNC-LABEL: {{^}}fp_to_uint_fabs_f32_to_i1:
243 ; SI: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, -1.0, |s{{[0-9]+}}|
244 define amdgpu_kernel void @fp_to_uint_fabs_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 { 1003 define amdgpu_kernel void @fp_to_uint_fabs_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 {
1004 ; SI-LABEL: fp_to_uint_fabs_f32_to_i1:
1005 ; SI: ; %bb.0:
1006 ; SI-NEXT: s_load_dword s4, s[0:1], 0xb
1007 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1008 ; SI-NEXT: s_mov_b32 s3, 0xf000
1009 ; SI-NEXT: s_mov_b32 s2, -1
1010 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1011 ; SI-NEXT: v_cmp_eq_f32_e64 s[4:5], -1.0, |s4|
1012 ; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
1013 ; SI-NEXT: buffer_store_byte v0, off, s[0:3], 0
1014 ; SI-NEXT: s_endpgm
1015 ;
1016 ; VI-LABEL: fp_to_uint_fabs_f32_to_i1:
1017 ; VI: ; %bb.0:
1018 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
1019 ; VI-NEXT: s_load_dword s0, s[0:1], 0x2c
1020 ; VI-NEXT: s_mov_b32 s7, 0xf000
1021 ; VI-NEXT: s_mov_b32 s6, -1
1022 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1023 ; VI-NEXT: v_cmp_eq_f32_e64 s[0:1], -1.0, |s0|
1024 ; VI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
1025 ; VI-NEXT: buffer_store_byte v0, off, s[4:7], 0
1026 ; VI-NEXT: s_endpgm
1027 ;
1028 ; EG-LABEL: fp_to_uint_fabs_f32_to_i1:
1029 ; EG: ; %bb.0:
1030 ; EG-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[]
1031 ; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X
1032 ; EG-NEXT: CF_END
1033 ; EG-NEXT: PAD
1034 ; EG-NEXT: ALU clause starting at 4:
1035 ; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x,
1036 ; EG-NEXT: SETE_DX10 * T1.W, |KC0[2].Z|, literal.y,
1037 ; EG-NEXT: 3(4.203895e-45), -1082130432(-1.000000e+00)
1038 ; EG-NEXT: AND_INT T1.W, PS, 1,
1039 ; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
1040 ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
1041 ; EG-NEXT: LSHL T0.X, PV.W, PS,
1042 ; EG-NEXT: LSHL * T0.W, literal.x, PS,
1043 ; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00)
1044 ; EG-NEXT: MOV T0.Y, 0.0,
1045 ; EG-NEXT: MOV * T0.Z, 0.0,
1046 ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
1047 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
245 %in.fabs = call float @llvm.fabs.f32(float %in) 1048 %in.fabs = call float @llvm.fabs.f32(float %in)
246 %conv = fptosi float %in.fabs to i1 1049 %conv = fptosi float %in.fabs to i1
247 store i1 %conv, i1 addrspace(1)* %out 1050 store i1 %conv, i1 addrspace(1)* %out
248 ret void 1051 ret void
249 } 1052 }
250 1053
251 ; FUNC-LABEL: {{^}}fp_to_sint_f32_i16:
252 ; GCN: v_cvt_i32_f32_e32 [[VAL:v[0-9]+]], s{{[0-9]+}}
253 ; GCN: buffer_store_short [[VAL]]
254 define amdgpu_kernel void @fp_to_sint_f32_i16(i16 addrspace(1)* %out, float %in) #0 { 1054 define amdgpu_kernel void @fp_to_sint_f32_i16(i16 addrspace(1)* %out, float %in) #0 {
1055 ; SI-LABEL: fp_to_sint_f32_i16:
1056 ; SI: ; %bb.0:
1057 ; SI-NEXT: s_load_dword s4, s[0:1], 0xb
1058 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1059 ; SI-NEXT: s_mov_b32 s3, 0xf000
1060 ; SI-NEXT: s_mov_b32 s2, -1
1061 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1062 ; SI-NEXT: v_cvt_i32_f32_e32 v0, s4
1063 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
1064 ; SI-NEXT: s_endpgm
1065 ;
1066 ; VI-LABEL: fp_to_sint_f32_i16:
1067 ; VI: ; %bb.0:
1068 ; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
1069 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1070 ; VI-NEXT: s_mov_b32 s3, 0xf000
1071 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1072 ; VI-NEXT: v_cvt_i32_f32_e32 v0, s2
1073 ; VI-NEXT: s_mov_b32 s2, -1
1074 ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0
1075 ; VI-NEXT: s_endpgm
1076 ;
1077 ; EG-LABEL: fp_to_sint_f32_i16:
1078 ; EG: ; %bb.0:
1079 ; EG-NEXT: ALU 13, @4, KC0[CB0:0-32], KC1[]
1080 ; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X
1081 ; EG-NEXT: CF_END
1082 ; EG-NEXT: PAD
1083 ; EG-NEXT: ALU clause starting at 4:
1084 ; EG-NEXT: TRUNC T0.W, KC0[2].Z,
1085 ; EG-NEXT: AND_INT * T1.W, KC0[2].Y, literal.x,
1086 ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
1087 ; EG-NEXT: FLT_TO_INT * T0.W, PV.W,
1088 ; EG-NEXT: AND_INT T0.W, PV.W, literal.x,
1089 ; EG-NEXT: LSHL * T1.W, T1.W, literal.y,
1090 ; EG-NEXT: 65535(9.183409e-41), 3(4.203895e-45)
1091 ; EG-NEXT: LSHL T0.X, PV.W, PS,
1092 ; EG-NEXT: LSHL * T0.W, literal.x, PS,
1093 ; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
1094 ; EG-NEXT: MOV T0.Y, 0.0,
1095 ; EG-NEXT: MOV * T0.Z, 0.0,
1096 ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
1097 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
255 %sint = fptosi float %in to i16 1098 %sint = fptosi float %in to i16
256 store i16 %sint, i16 addrspace(1)* %out 1099 store i16 %sint, i16 addrspace(1)* %out
257 ret void 1100 ret void
258 } 1101 }
259 1102