Mercurial > hg > CbC > CbC_llvm
comparison llvm/test/CodeGen/AMDGPU/fp_to_sint.ll @ 221:79ff65ed7e25
LLVM12 Original
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 15 Jun 2021 19:15:29 +0900 (2021-06-15) |
parents | 1d019706d866 |
children | 5f17cb93ff66 |
comparison
equal
deleted
inserted
replaced
220:42394fc6a535 | 221:79ff65ed7e25 |
---|---|
1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap %s --check-prefix=SI --check-prefix=FUNC --check-prefix=GCN | 1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap %s --check-prefix=VI --check-prefix=FUNC --check-prefix=GCN | 2 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s --check-prefixes=SI |
3 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -allow-deprecated-dag-overlap %s --check-prefix=EG --check-prefix=FUNC | 3 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s --check-prefixes=VI |
4 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s --check-prefixes=EG | |
4 | 5 |
5 declare float @llvm.fabs.f32(float) #1 | 6 declare float @llvm.fabs.f32(float) #1 |
6 | 7 |
7 ; FUNC-LABEL: {{^}}fp_to_sint_i32: | |
8 ; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} | |
9 ; SI: v_cvt_i32_f32_e32 | |
10 ; SI: s_endpgm | |
11 define amdgpu_kernel void @fp_to_sint_i32(i32 addrspace(1)* %out, float %in) { | 8 define amdgpu_kernel void @fp_to_sint_i32(i32 addrspace(1)* %out, float %in) { |
9 ; SI-LABEL: fp_to_sint_i32: | |
10 ; SI: ; %bb.0: | |
11 ; SI-NEXT: s_load_dword s4, s[0:1], 0xb | |
12 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 | |
13 ; SI-NEXT: s_mov_b32 s3, 0xf000 | |
14 ; SI-NEXT: s_mov_b32 s2, -1 | |
15 ; SI-NEXT: s_waitcnt lgkmcnt(0) | |
16 ; SI-NEXT: v_cvt_i32_f32_e32 v0, s4 | |
17 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 | |
18 ; SI-NEXT: s_endpgm | |
19 ; | |
20 ; VI-LABEL: fp_to_sint_i32: | |
21 ; VI: ; %bb.0: | |
22 ; VI-NEXT: s_load_dword s2, s[0:1], 0x2c | |
23 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 | |
24 ; VI-NEXT: s_mov_b32 s3, 0xf000 | |
25 ; VI-NEXT: s_waitcnt lgkmcnt(0) | |
26 ; VI-NEXT: v_cvt_i32_f32_e32 v0, s2 | |
27 ; VI-NEXT: s_mov_b32 s2, -1 | |
28 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 | |
29 ; VI-NEXT: s_endpgm | |
30 ; | |
31 ; EG-LABEL: fp_to_sint_i32: | |
32 ; EG: ; %bb.0: | |
33 ; EG-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[] | |
34 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 | |
35 ; EG-NEXT: CF_END | |
36 ; EG-NEXT: PAD | |
37 ; EG-NEXT: ALU clause starting at 4: | |
38 ; EG-NEXT: TRUNC * T0.W, KC0[2].Z, | |
39 ; EG-NEXT: FLT_TO_INT T0.X, PV.W, | |
40 ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, | |
41 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) | |
12 %conv = fptosi float %in to i32 | 42 %conv = fptosi float %in to i32 |
13 store i32 %conv, i32 addrspace(1)* %out | 43 store i32 %conv, i32 addrspace(1)* %out |
14 ret void | 44 ret void |
15 } | 45 } |
16 | 46 |
17 ; FUNC-LABEL: {{^}}fp_to_sint_i32_fabs: | |
18 ; SI: v_cvt_i32_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|{{$}} | |
19 define amdgpu_kernel void @fp_to_sint_i32_fabs(i32 addrspace(1)* %out, float %in) { | 47 define amdgpu_kernel void @fp_to_sint_i32_fabs(i32 addrspace(1)* %out, float %in) { |
48 ; SI-LABEL: fp_to_sint_i32_fabs: | |
49 ; SI: ; %bb.0: | |
50 ; SI-NEXT: s_load_dword s4, s[0:1], 0xb | |
51 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 | |
52 ; SI-NEXT: s_mov_b32 s3, 0xf000 | |
53 ; SI-NEXT: s_mov_b32 s2, -1 | |
54 ; SI-NEXT: s_waitcnt lgkmcnt(0) | |
55 ; SI-NEXT: v_cvt_i32_f32_e64 v0, |s4| | |
56 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 | |
57 ; SI-NEXT: s_endpgm | |
58 ; | |
59 ; VI-LABEL: fp_to_sint_i32_fabs: | |
60 ; VI: ; %bb.0: | |
61 ; VI-NEXT: s_load_dword s2, s[0:1], 0x2c | |
62 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 | |
63 ; VI-NEXT: s_mov_b32 s3, 0xf000 | |
64 ; VI-NEXT: s_waitcnt lgkmcnt(0) | |
65 ; VI-NEXT: v_cvt_i32_f32_e64 v0, |s2| | |
66 ; VI-NEXT: s_mov_b32 s2, -1 | |
67 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 | |
68 ; VI-NEXT: s_endpgm | |
69 ; | |
70 ; EG-LABEL: fp_to_sint_i32_fabs: | |
71 ; EG: ; %bb.0: | |
72 ; EG-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[] | |
73 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 | |
74 ; EG-NEXT: CF_END | |
75 ; EG-NEXT: PAD | |
76 ; EG-NEXT: ALU clause starting at 4: | |
77 ; EG-NEXT: TRUNC * T0.W, |KC0[2].Z|, | |
78 ; EG-NEXT: FLT_TO_INT T0.X, PV.W, | |
79 ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, | |
80 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) | |
20 %in.fabs = call float @llvm.fabs.f32(float %in) | 81 %in.fabs = call float @llvm.fabs.f32(float %in) |
21 %conv = fptosi float %in.fabs to i32 | 82 %conv = fptosi float %in.fabs to i32 |
22 store i32 %conv, i32 addrspace(1)* %out | 83 store i32 %conv, i32 addrspace(1)* %out |
23 ret void | 84 ret void |
24 } | 85 } |
25 | 86 |
26 ; FUNC-LABEL: {{^}}fp_to_sint_v2i32: | |
27 ; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} | |
28 ; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} | |
29 ; SI: v_cvt_i32_f32_e32 | |
30 ; SI: v_cvt_i32_f32_e32 | |
31 define amdgpu_kernel void @fp_to_sint_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) { | 87 define amdgpu_kernel void @fp_to_sint_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) { |
88 ; SI-LABEL: fp_to_sint_v2i32: | |
89 ; SI: ; %bb.0: | |
90 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb | |
91 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 | |
92 ; SI-NEXT: s_mov_b32 s3, 0xf000 | |
93 ; SI-NEXT: s_mov_b32 s2, -1 | |
94 ; SI-NEXT: s_waitcnt lgkmcnt(0) | |
95 ; SI-NEXT: v_cvt_i32_f32_e32 v1, s5 | |
96 ; SI-NEXT: v_cvt_i32_f32_e32 v0, s4 | |
97 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 | |
98 ; SI-NEXT: s_endpgm | |
99 ; | |
100 ; VI-LABEL: fp_to_sint_v2i32: | |
101 ; VI: ; %bb.0: | |
102 ; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c | |
103 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 | |
104 ; VI-NEXT: s_waitcnt lgkmcnt(0) | |
105 ; VI-NEXT: v_cvt_i32_f32_e32 v1, s3 | |
106 ; VI-NEXT: v_cvt_i32_f32_e32 v0, s2 | |
107 ; VI-NEXT: s_mov_b32 s3, 0xf000 | |
108 ; VI-NEXT: s_mov_b32 s2, -1 | |
109 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 | |
110 ; VI-NEXT: s_endpgm | |
111 ; | |
112 ; EG-LABEL: fp_to_sint_v2i32: | |
113 ; EG: ; %bb.0: | |
114 ; EG-NEXT: ALU 5, @4, KC0[CB0:0-32], KC1[] | |
115 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 | |
116 ; EG-NEXT: CF_END | |
117 ; EG-NEXT: PAD | |
118 ; EG-NEXT: ALU clause starting at 4: | |
119 ; EG-NEXT: TRUNC * T0.W, KC0[3].X, | |
120 ; EG-NEXT: FLT_TO_INT T0.Y, PV.W, | |
121 ; EG-NEXT: TRUNC * T0.W, KC0[2].W, | |
122 ; EG-NEXT: FLT_TO_INT T0.X, PV.W, | |
123 ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, | |
124 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) | |
32 %result = fptosi <2 x float> %in to <2 x i32> | 125 %result = fptosi <2 x float> %in to <2 x i32> |
33 store <2 x i32> %result, <2 x i32> addrspace(1)* %out | 126 store <2 x i32> %result, <2 x i32> addrspace(1)* %out |
34 ret void | 127 ret void |
35 } | 128 } |
36 | 129 |
37 ; FUNC-LABEL: {{^}}fp_to_sint_v4i32: | |
38 ; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} | |
39 ; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW]}} | |
40 ; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} | |
41 ; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} | |
42 ; SI: v_cvt_i32_f32_e32 | |
43 ; SI: v_cvt_i32_f32_e32 | |
44 ; SI: v_cvt_i32_f32_e32 | |
45 ; SI: v_cvt_i32_f32_e32 | |
46 define amdgpu_kernel void @fp_to_sint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { | 130 define amdgpu_kernel void @fp_to_sint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { |
131 ; SI-LABEL: fp_to_sint_v4i32: | |
132 ; SI: ; %bb.0: | |
133 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 | |
134 ; SI-NEXT: s_waitcnt lgkmcnt(0) | |
135 ; SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 | |
136 ; SI-NEXT: s_mov_b32 s3, 0xf000 | |
137 ; SI-NEXT: s_mov_b32 s2, -1 | |
138 ; SI-NEXT: s_waitcnt lgkmcnt(0) | |
139 ; SI-NEXT: v_cvt_i32_f32_e32 v3, s7 | |
140 ; SI-NEXT: v_cvt_i32_f32_e32 v2, s6 | |
141 ; SI-NEXT: v_cvt_i32_f32_e32 v1, s5 | |
142 ; SI-NEXT: v_cvt_i32_f32_e32 v0, s4 | |
143 ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 | |
144 ; SI-NEXT: s_endpgm | |
145 ; | |
146 ; VI-LABEL: fp_to_sint_v4i32: | |
147 ; VI: ; %bb.0: | |
148 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 | |
149 ; VI-NEXT: s_waitcnt lgkmcnt(0) | |
150 ; VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 | |
151 ; VI-NEXT: s_mov_b32 s3, 0xf000 | |
152 ; VI-NEXT: s_mov_b32 s2, -1 | |
153 ; VI-NEXT: s_waitcnt lgkmcnt(0) | |
154 ; VI-NEXT: v_cvt_i32_f32_e32 v3, s7 | |
155 ; VI-NEXT: v_cvt_i32_f32_e32 v2, s6 | |
156 ; VI-NEXT: v_cvt_i32_f32_e32 v1, s5 | |
157 ; VI-NEXT: v_cvt_i32_f32_e32 v0, s4 | |
158 ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 | |
159 ; VI-NEXT: s_endpgm | |
160 ; | |
161 ; EG-LABEL: fp_to_sint_v4i32: | |
162 ; EG: ; %bb.0: | |
163 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] | |
164 ; EG-NEXT: TEX 0 @6 | |
165 ; EG-NEXT: ALU 9, @9, KC0[CB0:0-32], KC1[] | |
166 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 | |
167 ; EG-NEXT: CF_END | |
168 ; EG-NEXT: PAD | |
169 ; EG-NEXT: Fetch clause starting at 6: | |
170 ; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 | |
171 ; EG-NEXT: ALU clause starting at 8: | |
172 ; EG-NEXT: MOV * T0.X, KC0[2].Z, | |
173 ; EG-NEXT: ALU clause starting at 9: | |
174 ; EG-NEXT: TRUNC T0.W, T0.W, | |
175 ; EG-NEXT: TRUNC * T1.W, T0.Z, | |
176 ; EG-NEXT: FLT_TO_INT * T0.W, PV.W, | |
177 ; EG-NEXT: FLT_TO_INT T0.Z, T1.W, | |
178 ; EG-NEXT: TRUNC * T1.W, T0.Y, | |
179 ; EG-NEXT: FLT_TO_INT T0.Y, PV.W, | |
180 ; EG-NEXT: TRUNC * T1.W, T0.X, | |
181 ; EG-NEXT: FLT_TO_INT T0.X, PV.W, | |
182 ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, | |
183 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) | |
47 %value = load <4 x float>, <4 x float> addrspace(1) * %in | 184 %value = load <4 x float>, <4 x float> addrspace(1) * %in |
48 %result = fptosi <4 x float> %value to <4 x i32> | 185 %result = fptosi <4 x float> %value to <4 x i32> |
49 store <4 x i32> %result, <4 x i32> addrspace(1)* %out | 186 store <4 x i32> %result, <4 x i32> addrspace(1)* %out |
50 ret void | 187 ret void |
51 } | 188 } |
52 | 189 |
53 ; FUNC-LABEL: {{^}}fp_to_sint_i64: | |
54 | |
55 ; EG-DAG: AND_INT | |
56 ; EG-DAG: LSHR | |
57 ; EG-DAG: SUB_INT | |
58 ; EG-DAG: AND_INT | |
59 ; EG-DAG: ASHR | |
60 ; EG-DAG: AND_INT | |
61 ; EG-DAG: OR_INT | |
62 ; EG-DAG: SUB_INT | |
63 ; EG-DAG: LSHL | |
64 ; EG-DAG: LSHL | |
65 ; EG-DAG: SUB_INT | |
66 ; EG-DAG: LSHR | |
67 ; EG-DAG: LSHR | |
68 ; EG-DAG: SETGT_UINT | |
69 ; EG-DAG: SETGT_INT | |
70 ; EG-DAG: XOR_INT | |
71 ; EG-DAG: XOR_INT | |
72 ; EG: SUB_INT | |
73 ; EG-DAG: SUB_INT | |
74 ; EG-DAG: CNDE_INT | |
75 ; EG-DAG: CNDE_INT | |
76 | |
77 ; Check that the compiler doesn't crash with a "cannot select" error | 190 ; Check that the compiler doesn't crash with a "cannot select" error |
78 ; SI: s_endpgm | |
79 define amdgpu_kernel void @fp_to_sint_i64 (i64 addrspace(1)* %out, float %in) { | 191 define amdgpu_kernel void @fp_to_sint_i64 (i64 addrspace(1)* %out, float %in) { |
192 ; SI-LABEL: fp_to_sint_i64: | |
193 ; SI: ; %bb.0: ; %entry | |
194 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 | |
195 ; SI-NEXT: s_load_dword s0, s[0:1], 0xb | |
196 ; SI-NEXT: s_mov_b32 s7, 0xf000 | |
197 ; SI-NEXT: s_mov_b32 s6, -1 | |
198 ; SI-NEXT: s_mov_b32 s1, 0 | |
199 ; SI-NEXT: s_waitcnt lgkmcnt(0) | |
200 ; SI-NEXT: s_bfe_u32 s8, s0, 0x80017 | |
201 ; SI-NEXT: s_and_b32 s2, s0, 0x7fffff | |
202 ; SI-NEXT: s_ashr_i32 s9, s0, 31 | |
203 ; SI-NEXT: s_add_i32 s3, s8, 0xffffff6a | |
204 ; SI-NEXT: s_or_b32 s0, s2, 0x800000 | |
205 ; SI-NEXT: s_sub_i32 s10, 0x96, s8 | |
206 ; SI-NEXT: s_ashr_i32 s11, s9, 31 | |
207 ; SI-NEXT: s_lshl_b64 s[2:3], s[0:1], s3 | |
208 ; SI-NEXT: s_lshr_b64 s[0:1], s[0:1], s10 | |
209 ; SI-NEXT: s_addk_i32 s8, 0xff81 | |
210 ; SI-NEXT: v_mov_b32_e32 v0, s11 | |
211 ; SI-NEXT: v_mov_b32_e32 v1, s1 | |
212 ; SI-NEXT: v_mov_b32_e32 v2, s3 | |
213 ; SI-NEXT: v_cmp_gt_i32_e64 vcc, s8, 23 | |
214 ; SI-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc | |
215 ; SI-NEXT: v_mov_b32_e32 v2, s0 | |
216 ; SI-NEXT: v_mov_b32_e32 v3, s2 | |
217 ; SI-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc | |
218 ; SI-NEXT: v_xor_b32_e32 v1, s11, v1 | |
219 ; SI-NEXT: v_xor_b32_e32 v2, s9, v2 | |
220 ; SI-NEXT: v_subrev_i32_e32 v2, vcc, s9, v2 | |
221 ; SI-NEXT: v_subb_u32_e32 v0, vcc, v1, v0, vcc | |
222 ; SI-NEXT: v_cmp_lt_i32_e64 s[0:1], s8, 0 | |
223 ; SI-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[0:1] | |
224 ; SI-NEXT: v_cndmask_b32_e64 v0, v2, 0, s[0:1] | |
225 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 | |
226 ; SI-NEXT: s_endpgm | |
227 ; | |
228 ; VI-LABEL: fp_to_sint_i64: | |
229 ; VI: ; %bb.0: ; %entry | |
230 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 | |
231 ; VI-NEXT: s_load_dword s8, s[0:1], 0x2c | |
232 ; VI-NEXT: s_mov_b32 s1, 0 | |
233 ; VI-NEXT: s_mov_b32 s7, 0xf000 | |
234 ; VI-NEXT: s_mov_b32 s6, -1 | |
235 ; VI-NEXT: s_waitcnt lgkmcnt(0) | |
236 ; VI-NEXT: s_bfe_u32 s9, s8, 0x80017 | |
237 ; VI-NEXT: s_and_b32 s0, s8, 0x7fffff | |
238 ; VI-NEXT: s_add_i32 s2, s9, 0xffffff6a | |
239 ; VI-NEXT: s_bitset1_b32 s0, 23 | |
240 ; VI-NEXT: s_sub_i32 s10, 0x96, s9 | |
241 ; VI-NEXT: s_lshl_b64 s[2:3], s[0:1], s2 | |
242 ; VI-NEXT: s_lshr_b64 s[0:1], s[0:1], s10 | |
243 ; VI-NEXT: s_addk_i32 s9, 0xff81 | |
244 ; VI-NEXT: v_mov_b32_e32 v0, s1 | |
245 ; VI-NEXT: v_mov_b32_e32 v1, s3 | |
246 ; VI-NEXT: v_cmp_gt_i32_e64 vcc, s9, 23 | |
247 ; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc | |
248 ; VI-NEXT: v_mov_b32_e32 v1, s0 | |
249 ; VI-NEXT: v_mov_b32_e32 v2, s2 | |
250 ; VI-NEXT: s_ashr_i32 s0, s8, 31 | |
251 ; VI-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc | |
252 ; VI-NEXT: s_ashr_i32 s1, s0, 31 | |
253 ; VI-NEXT: v_xor_b32_e32 v1, s0, v1 | |
254 ; VI-NEXT: v_xor_b32_e32 v0, s1, v0 | |
255 ; VI-NEXT: v_mov_b32_e32 v2, s1 | |
256 ; VI-NEXT: v_subrev_u32_e32 v3, vcc, s0, v1 | |
257 ; VI-NEXT: v_subb_u32_e32 v0, vcc, v0, v2, vcc | |
258 ; VI-NEXT: v_cmp_lt_i32_e64 s[0:1], s9, 0 | |
259 ; VI-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[0:1] | |
260 ; VI-NEXT: v_cndmask_b32_e64 v0, v3, 0, s[0:1] | |
261 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 | |
262 ; VI-NEXT: s_endpgm | |
263 ; | |
264 ; EG-LABEL: fp_to_sint_i64: | |
265 ; EG: ; %bb.0: ; %entry | |
266 ; EG-NEXT: ALU 41, @4, KC0[CB0:0-32], KC1[] | |
267 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 | |
268 ; EG-NEXT: CF_END | |
269 ; EG-NEXT: PAD | |
270 ; EG-NEXT: ALU clause starting at 4: | |
271 ; EG-NEXT: MOV * T0.W, literal.x, | |
272 ; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) | |
273 ; EG-NEXT: BFE_UINT T0.W, KC0[2].Z, literal.x, PV.W, | |
274 ; EG-NEXT: AND_INT * T1.W, KC0[2].Z, literal.y, | |
275 ; EG-NEXT: 23(3.222986e-44), 8388607(1.175494e-38) | |
276 ; EG-NEXT: OR_INT T1.W, PS, literal.x, | |
277 ; EG-NEXT: ADD_INT * T2.W, PV.W, literal.y, | |
278 ; EG-NEXT: 8388608(1.175494e-38), -150(nan) | |
279 ; EG-NEXT: ADD_INT T0.X, T0.W, literal.x, | |
280 ; EG-NEXT: SUB_INT T0.Y, literal.y, T0.W, | |
281 ; EG-NEXT: AND_INT T0.Z, PS, literal.z, | |
282 ; EG-NEXT: NOT_INT T0.W, PS, | |
283 ; EG-NEXT: LSHR * T3.W, PV.W, 1, | |
284 ; EG-NEXT: -127(nan), 150(2.101948e-43) | |
285 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) | |
286 ; EG-NEXT: BIT_ALIGN_INT T1.X, 0.0, PS, PV.W, | |
287 ; EG-NEXT: LSHL T1.Y, T1.W, PV.Z, | |
288 ; EG-NEXT: AND_INT T0.Z, T2.W, literal.x, BS:VEC_120/SCL_212 | |
289 ; EG-NEXT: BIT_ALIGN_INT T0.W, 0.0, T1.W, PV.Y, BS:VEC_021/SCL_122 | |
290 ; EG-NEXT: AND_INT * T1.W, PV.Y, literal.x, | |
291 ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) | |
292 ; EG-NEXT: CNDE_INT T0.Y, PS, PV.W, 0.0, | |
293 ; EG-NEXT: CNDE_INT T1.Z, PV.Z, PV.Y, 0.0, | |
294 ; EG-NEXT: CNDE_INT T0.W, PV.Z, PV.X, PV.Y, | |
295 ; EG-NEXT: SETGT_INT * T1.W, T0.X, literal.x, | |
296 ; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00) | |
297 ; EG-NEXT: CNDE_INT T0.Z, PS, 0.0, PV.W, | |
298 ; EG-NEXT: CNDE_INT T0.W, PS, PV.Y, PV.Z, | |
299 ; EG-NEXT: ASHR * T1.W, KC0[2].Z, literal.x, | |
300 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) | |
301 ; EG-NEXT: XOR_INT T0.W, PV.W, PS, | |
302 ; EG-NEXT: XOR_INT * T2.W, PV.Z, PS, | |
303 ; EG-NEXT: SUB_INT T2.W, PS, T1.W, | |
304 ; EG-NEXT: SUBB_UINT * T3.W, PV.W, T1.W, | |
305 ; EG-NEXT: SUB_INT T2.W, PV.W, PS, | |
306 ; EG-NEXT: SETGT_INT * T3.W, T0.X, literal.x, | |
307 ; EG-NEXT: -1(nan), 0(0.000000e+00) | |
308 ; EG-NEXT: CNDE_INT T0.Y, PS, 0.0, PV.W, | |
309 ; EG-NEXT: SUB_INT * T0.W, T0.W, T1.W, | |
310 ; EG-NEXT: CNDE_INT T0.X, T3.W, 0.0, PV.W, | |
311 ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, | |
312 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) | |
80 entry: | 313 entry: |
81 %0 = fptosi float %in to i64 | 314 %0 = fptosi float %in to i64 |
82 store i64 %0, i64 addrspace(1)* %out | 315 store i64 %0, i64 addrspace(1)* %out |
83 ret void | 316 ret void |
84 } | 317 } |
85 | 318 |
86 ; FUNC: {{^}}fp_to_sint_v2i64: | |
87 ; EG-DAG: AND_INT | |
88 ; EG-DAG: LSHR | |
89 ; EG-DAG: SUB_INT | |
90 ; EG-DAG: AND_INT | |
91 ; EG-DAG: ASHR | |
92 ; EG-DAG: AND_INT | |
93 ; EG-DAG: OR_INT | |
94 ; EG-DAG: SUB_INT | |
95 ; EG-DAG: LSHL | |
96 ; EG-DAG: LSHL | |
97 ; EG-DAG: SUB_INT | |
98 ; EG-DAG: LSHR | |
99 ; EG-DAG: LSHR | |
100 ; EG-DAG: SETGT_UINT | |
101 ; EG-DAG: SETGT_INT | |
102 ; EG-DAG: XOR_INT | |
103 ; EG-DAG: XOR_INT | |
104 ; EG-DAG: SUB_INT | |
105 ; EG-DAG: SUB_INT | |
106 ; EG-DAG: CNDE_INT | |
107 ; EG-DAG: CNDE_INT | |
108 ; EG-DAG: AND_INT | |
109 ; EG-DAG: LSHR | |
110 ; EG-DAG: SUB_INT | |
111 ; EG-DAG: AND_INT | |
112 ; EG-DAG: ASHR | |
113 ; EG-DAG: AND_INT | |
114 ; EG-DAG: OR_INT | |
115 ; EG-DAG: SUB_INT | |
116 ; EG-DAG: LSHL | |
117 ; EG-DAG: LSHL | |
118 ; EG-DAG: SUB_INT | |
119 ; EG-DAG: LSHR | |
120 ; EG-DAG: LSHR | |
121 ; EG-DAG: SETGT_UINT | |
122 ; EG-DAG: SETGT_INT | |
123 ; EG-DAG: XOR_INT | |
124 ; EG-DAG: XOR_INT | |
125 ; EG-DAG: SUB_INT | |
126 ; EG-DAG: SUB_INT | |
127 ; EG-DAG: CNDE_INT | |
128 ; EG-DAG: CNDE_INT | |
129 | |
130 ; SI: s_endpgm | |
131 define amdgpu_kernel void @fp_to_sint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) { | 319 define amdgpu_kernel void @fp_to_sint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) { |
320 ; SI-LABEL: fp_to_sint_v2i64: | |
321 ; SI: ; %bb.0: | |
322 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 | |
323 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb | |
324 ; SI-NEXT: s_mov_b32 s7, 0xf000 | |
325 ; SI-NEXT: s_mov_b32 s6, -1 | |
326 ; SI-NEXT: s_movk_i32 s8, 0xff6a | |
327 ; SI-NEXT: s_mov_b32 s2, 0x7fffff | |
328 ; SI-NEXT: s_mov_b32 s10, 0x800000 | |
329 ; SI-NEXT: s_mov_b32 s3, 0 | |
330 ; SI-NEXT: s_movk_i32 s9, 0x96 | |
331 ; SI-NEXT: s_movk_i32 s11, 0xff81 | |
332 ; SI-NEXT: s_waitcnt lgkmcnt(0) | |
333 ; SI-NEXT: s_bfe_u32 s12, s1, 0x80017 | |
334 ; SI-NEXT: s_and_b32 s13, s1, s2 | |
335 ; SI-NEXT: s_ashr_i32 s14, s1, 31 | |
336 ; SI-NEXT: s_bfe_u32 s1, s0, 0x80017 | |
337 ; SI-NEXT: s_and_b32 s15, s0, s2 | |
338 ; SI-NEXT: s_ashr_i32 s16, s0, 31 | |
339 ; SI-NEXT: s_add_i32 s0, s12, s8 | |
340 ; SI-NEXT: s_or_b32 s2, s13, s10 | |
341 ; SI-NEXT: s_sub_i32 s13, s9, s12 | |
342 ; SI-NEXT: s_add_i32 s12, s12, s11 | |
343 ; SI-NEXT: s_ashr_i32 s17, s14, 31 | |
344 ; SI-NEXT: s_add_i32 s18, s1, s8 | |
345 ; SI-NEXT: s_sub_i32 s19, s9, s1 | |
346 ; SI-NEXT: s_add_i32 s11, s1, s11 | |
347 ; SI-NEXT: s_ashr_i32 s20, s16, 31 | |
348 ; SI-NEXT: s_lshl_b64 s[0:1], s[2:3], s0 | |
349 ; SI-NEXT: s_lshr_b64 s[8:9], s[2:3], s13 | |
350 ; SI-NEXT: v_mov_b32_e32 v0, s17 | |
351 ; SI-NEXT: s_or_b32 s2, s15, s10 | |
352 ; SI-NEXT: v_mov_b32_e32 v1, s20 | |
353 ; SI-NEXT: v_mov_b32_e32 v2, s9 | |
354 ; SI-NEXT: v_mov_b32_e32 v3, s1 | |
355 ; SI-NEXT: v_cmp_gt_i32_e64 vcc, s12, 23 | |
356 ; SI-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc | |
357 ; SI-NEXT: v_mov_b32_e32 v3, s8 | |
358 ; SI-NEXT: v_mov_b32_e32 v4, s0 | |
359 ; SI-NEXT: s_lshl_b64 s[0:1], s[2:3], s18 | |
360 ; SI-NEXT: s_lshr_b64 s[2:3], s[2:3], s19 | |
361 ; SI-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc | |
362 ; SI-NEXT: v_xor_b32_e32 v2, s17, v2 | |
363 ; SI-NEXT: v_mov_b32_e32 v4, s3 | |
364 ; SI-NEXT: v_mov_b32_e32 v5, s1 | |
365 ; SI-NEXT: v_cmp_gt_i32_e64 vcc, s11, 23 | |
366 ; SI-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc | |
367 ; SI-NEXT: v_mov_b32_e32 v5, s2 | |
368 ; SI-NEXT: v_mov_b32_e32 v6, s0 | |
369 ; SI-NEXT: v_xor_b32_e32 v3, s14, v3 | |
370 ; SI-NEXT: v_cndmask_b32_e32 v5, v5, v6, vcc | |
371 ; SI-NEXT: v_xor_b32_e32 v4, s20, v4 | |
372 ; SI-NEXT: v_subrev_i32_e32 v6, vcc, s14, v3 | |
373 ; SI-NEXT: v_subb_u32_e32 v0, vcc, v2, v0, vcc | |
374 ; SI-NEXT: v_xor_b32_e32 v5, s16, v5 | |
375 ; SI-NEXT: v_cmp_lt_i32_e64 s[0:1], s12, 0 | |
376 ; SI-NEXT: v_cndmask_b32_e64 v3, v0, 0, s[0:1] | |
377 ; SI-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[0:1] | |
378 ; SI-NEXT: v_subrev_i32_e32 v0, vcc, s16, v5 | |
379 ; SI-NEXT: v_subb_u32_e32 v1, vcc, v4, v1, vcc | |
380 ; SI-NEXT: v_cmp_lt_i32_e64 s[0:1], s11, 0 | |
381 ; SI-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[0:1] | |
382 ; SI-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[0:1] | |
383 ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 | |
384 ; SI-NEXT: s_endpgm | |
385 ; | |
386 ; VI-LABEL: fp_to_sint_v2i64: | |
387 ; VI: ; %bb.0: | |
388 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 | |
389 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c | |
390 ; VI-NEXT: s_mov_b32 s14, 0x7fffff | |
391 ; VI-NEXT: s_movk_i32 s12, 0xff6a | |
392 ; VI-NEXT: s_mov_b32 s15, 0x800000 | |
393 ; VI-NEXT: s_movk_i32 s16, 0x96 | |
394 ; VI-NEXT: s_waitcnt lgkmcnt(0) | |
395 ; VI-NEXT: s_bfe_u32 s13, s1, 0x80017 | |
396 ; VI-NEXT: s_and_b32 s2, s1, s14 | |
397 ; VI-NEXT: s_add_i32 s8, s13, s12 | |
398 ; VI-NEXT: s_or_b32 s2, s2, s15 | |
399 ; VI-NEXT: s_mov_b32 s3, 0 | |
400 ; VI-NEXT: s_sub_i32 s10, s16, s13 | |
401 ; VI-NEXT: s_movk_i32 s17, 0xff81 | |
402 ; VI-NEXT: s_lshl_b64 s[8:9], s[2:3], s8 | |
403 ; VI-NEXT: s_lshr_b64 s[10:11], s[2:3], s10 | |
404 ; VI-NEXT: s_add_i32 s13, s13, s17 | |
405 ; VI-NEXT: v_mov_b32_e32 v0, s11 | |
406 ; VI-NEXT: v_mov_b32_e32 v1, s9 | |
407 ; VI-NEXT: v_cmp_gt_i32_e64 vcc, s13, 23 | |
408 ; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc | |
409 ; VI-NEXT: v_mov_b32_e32 v2, s8 | |
410 ; VI-NEXT: v_mov_b32_e32 v1, s10 | |
411 ; VI-NEXT: s_ashr_i32 s1, s1, 31 | |
412 ; VI-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc | |
413 ; VI-NEXT: s_ashr_i32 s2, s1, 31 | |
414 ; VI-NEXT: v_xor_b32_e32 v1, s1, v1 | |
415 ; VI-NEXT: v_subrev_u32_e32 v1, vcc, s1, v1 | |
416 ; VI-NEXT: v_xor_b32_e32 v0, s2, v0 | |
417 ; VI-NEXT: v_mov_b32_e32 v2, s2 | |
418 ; VI-NEXT: s_and_b32 s2, s0, s14 | |
419 ; VI-NEXT: s_bfe_u32 s1, s0, 0x80017 | |
420 ; VI-NEXT: v_subb_u32_e32 v0, vcc, v0, v2, vcc | |
421 ; VI-NEXT: v_cmp_lt_i32_e64 s[8:9], s13, 0 | |
422 ; VI-NEXT: v_cndmask_b32_e64 v3, v0, 0, s[8:9] | |
423 ; VI-NEXT: v_cndmask_b32_e64 v2, v1, 0, s[8:9] | |
424 ; VI-NEXT: s_add_i32 s8, s1, s12 | |
425 ; VI-NEXT: s_or_b32 s2, s2, s15 | |
426 ; VI-NEXT: s_sub_i32 s10, s16, s1 | |
427 ; VI-NEXT: s_lshl_b64 s[8:9], s[2:3], s8 | |
428 ; VI-NEXT: s_lshr_b64 s[2:3], s[2:3], s10 | |
429 ; VI-NEXT: s_add_i32 s1, s1, s17 | |
430 ; VI-NEXT: v_mov_b32_e32 v0, s3 | |
431 ; VI-NEXT: v_mov_b32_e32 v1, s9 | |
432 ; VI-NEXT: v_cmp_gt_i32_e64 vcc, s1, 23 | |
433 ; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc | |
434 ; VI-NEXT: v_mov_b32_e32 v1, s2 | |
435 ; VI-NEXT: v_mov_b32_e32 v4, s8 | |
436 ; VI-NEXT: s_ashr_i32 s0, s0, 31 | |
437 ; VI-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc | |
438 ; VI-NEXT: s_ashr_i32 s2, s0, 31 | |
439 ; VI-NEXT: v_xor_b32_e32 v1, s0, v1 | |
440 ; VI-NEXT: v_subrev_u32_e32 v5, vcc, s0, v1 | |
441 ; VI-NEXT: v_xor_b32_e32 v0, s2, v0 | |
442 ; VI-NEXT: v_mov_b32_e32 v4, s2 | |
443 ; VI-NEXT: v_subb_u32_e32 v0, vcc, v0, v4, vcc | |
444 ; VI-NEXT: v_cmp_lt_i32_e64 s[0:1], s1, 0 | |
445 ; VI-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[0:1] | |
446 ; VI-NEXT: s_mov_b32 s7, 0xf000 | |
447 ; VI-NEXT: s_mov_b32 s6, -1 | |
448 ; VI-NEXT: v_cndmask_b32_e64 v0, v5, 0, s[0:1] | |
449 ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 | |
450 ; VI-NEXT: s_endpgm | |
451 ; | |
452 ; EG-LABEL: fp_to_sint_v2i64: | |
453 ; EG: ; %bb.0: | |
454 ; EG-NEXT: ALU 77, @4, KC0[CB0:0-32], KC1[] | |
455 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 1 | |
456 ; EG-NEXT: CF_END | |
457 ; EG-NEXT: PAD | |
458 ; EG-NEXT: ALU clause starting at 4: | |
459 ; EG-NEXT: MOV * T0.W, literal.x, | |
460 ; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) | |
461 ; EG-NEXT: BFE_UINT * T1.W, KC0[2].W, literal.x, PV.W, | |
462 ; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00) | |
463 ; EG-NEXT: AND_INT T0.Z, KC0[2].W, literal.x, | |
464 ; EG-NEXT: BFE_UINT T0.W, KC0[3].X, literal.y, T0.W, | |
465 ; EG-NEXT: ADD_INT * T2.W, PV.W, literal.z, | |
466 ; EG-NEXT: 8388607(1.175494e-38), 23(3.222986e-44) | |
467 ; EG-NEXT: -150(nan), 0(0.000000e+00) | |
468 ; EG-NEXT: SUB_INT T0.X, literal.x, PV.W, | |
469 ; EG-NEXT: SUB_INT T0.Y, literal.x, T1.W, | |
470 ; EG-NEXT: AND_INT T1.Z, PS, literal.y, | |
471 ; EG-NEXT: OR_INT T3.W, PV.Z, literal.z, | |
472 ; EG-NEXT: AND_INT * T4.W, KC0[3].X, literal.w, | |
473 ; EG-NEXT: 150(2.101948e-43), 31(4.344025e-44) | |
474 ; EG-NEXT: 8388608(1.175494e-38), 8388607(1.175494e-38) | |
475 ; EG-NEXT: OR_INT T1.X, PS, literal.x, | |
476 ; EG-NEXT: LSHL T1.Y, PV.W, PV.Z, | |
477 ; EG-NEXT: AND_INT T0.Z, T2.W, literal.y, | |
478 ; EG-NEXT: BIT_ALIGN_INT T4.W, 0.0, PV.W, PV.Y, | |
479 ; EG-NEXT: AND_INT * T5.W, PV.Y, literal.y, | |
480 ; EG-NEXT: 8388608(1.175494e-38), 32(4.484155e-44) | |
481 ; EG-NEXT: CNDE_INT T2.X, PS, PV.W, 0.0, | |
482 ; EG-NEXT: CNDE_INT T0.Y, PV.Z, PV.Y, 0.0, | |
483 ; EG-NEXT: ADD_INT T1.Z, T0.W, literal.x, | |
484 ; EG-NEXT: BIT_ALIGN_INT T4.W, 0.0, PV.X, T0.X, | |
485 ; EG-NEXT: AND_INT * T5.W, T0.X, literal.y, | |
486 ; EG-NEXT: -150(nan), 32(4.484155e-44) | |
487 ; EG-NEXT: CNDE_INT T0.X, PS, PV.W, 0.0, | |
488 ; EG-NEXT: NOT_INT T2.Y, T2.W, | |
489 ; EG-NEXT: AND_INT T2.Z, PV.Z, literal.x, | |
490 ; EG-NEXT: NOT_INT T2.W, PV.Z, | |
491 ; EG-NEXT: LSHR * T4.W, T1.X, 1, | |
492 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) | |
493 ; EG-NEXT: LSHR T3.X, T3.W, 1, | |
494 ; EG-NEXT: ADD_INT T3.Y, T0.W, literal.x, BS:VEC_120/SCL_212 | |
495 ; EG-NEXT: BIT_ALIGN_INT T3.Z, 0.0, PS, PV.W, | |
496 ; EG-NEXT: LSHL T0.W, T1.X, PV.Z, | |
497 ; EG-NEXT: AND_INT * T2.W, T1.Z, literal.y, | |
498 ; EG-NEXT: -127(nan), 32(4.484155e-44) | |
499 ; EG-NEXT: CNDE_INT T1.X, PS, PV.W, 0.0, | |
500 ; EG-NEXT: CNDE_INT T4.Y, PS, PV.Z, PV.W, | |
501 ; EG-NEXT: SETGT_INT T1.Z, PV.Y, literal.x, | |
502 ; EG-NEXT: BIT_ALIGN_INT T0.W, 0.0, PV.X, T2.Y, | |
503 ; EG-NEXT: ADD_INT * T1.W, T1.W, literal.y, | |
504 ; EG-NEXT: 23(3.222986e-44), -127(nan) | |
505 ; EG-NEXT: CNDE_INT T3.X, T0.Z, PV.W, T1.Y, | |
506 ; EG-NEXT: SETGT_INT T1.Y, PS, literal.x, | |
507 ; EG-NEXT: CNDE_INT T0.Z, PV.Z, 0.0, PV.Y, | |
508 ; EG-NEXT: CNDE_INT T0.W, PV.Z, T0.X, PV.X, | |
509 ; EG-NEXT: ASHR * T2.W, KC0[3].X, literal.y, | |
510 ; EG-NEXT: 23(3.222986e-44), 31(4.344025e-44) | |
511 ; EG-NEXT: XOR_INT T0.X, PV.W, PS, | |
512 ; EG-NEXT: XOR_INT T2.Y, PV.Z, PS, | |
513 ; EG-NEXT: CNDE_INT T0.Z, PV.Y, 0.0, PV.X, | |
514 ; EG-NEXT: CNDE_INT T0.W, PV.Y, T2.X, T0.Y, | |
515 ; EG-NEXT: ASHR * T3.W, KC0[2].W, literal.x, | |
516 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) | |
517 ; EG-NEXT: XOR_INT T0.Y, PV.W, PS, | |
518 ; EG-NEXT: XOR_INT T0.Z, PV.Z, PS, | |
519 ; EG-NEXT: SUB_INT T0.W, PV.Y, T2.W, | |
520 ; EG-NEXT: SUBB_UINT * T4.W, PV.X, T2.W, | |
521 ; EG-NEXT: SUB_INT T1.Y, PV.W, PS, | |
522 ; EG-NEXT: SETGT_INT T1.Z, T3.Y, literal.x, | |
523 ; EG-NEXT: SUB_INT T0.W, PV.Z, T3.W, | |
524 ; EG-NEXT: SUBB_UINT * T4.W, PV.Y, T3.W, | |
525 ; EG-NEXT: -1(nan), 0(0.000000e+00) | |
526 ; EG-NEXT: SUB_INT T0.Z, PV.W, PS, | |
527 ; EG-NEXT: SETGT_INT T0.W, T1.W, literal.x, | |
528 ; EG-NEXT: CNDE_INT * T1.W, PV.Z, 0.0, PV.Y, BS:VEC_021/SCL_122 | |
529 ; EG-NEXT: -1(nan), 0(0.000000e+00) | |
530 ; EG-NEXT: CNDE_INT T1.Y, PV.W, 0.0, PV.Z, | |
531 ; EG-NEXT: SUB_INT * T2.W, T0.X, T2.W, | |
532 ; EG-NEXT: CNDE_INT T1.Z, T1.Z, 0.0, PV.W, | |
533 ; EG-NEXT: SUB_INT * T2.W, T0.Y, T3.W, | |
534 ; EG-NEXT: CNDE_INT T1.X, T0.W, 0.0, PV.W, | |
535 ; EG-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, | |
536 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) | |
132 %conv = fptosi <2 x float> %x to <2 x i64> | 537 %conv = fptosi <2 x float> %x to <2 x i64> |
133 store <2 x i64> %conv, <2 x i64> addrspace(1)* %out | 538 store <2 x i64> %conv, <2 x i64> addrspace(1)* %out |
134 ret void | 539 ret void |
135 } | 540 } |
136 | 541 |
137 ; FUNC: {{^}}fp_to_sint_v4i64: | |
138 ; EG-DAG: AND_INT | |
139 ; EG-DAG: LSHR | |
140 ; EG-DAG: SUB_INT | |
141 ; EG-DAG: AND_INT | |
142 ; EG-DAG: ASHR | |
143 ; EG-DAG: AND_INT | |
144 ; EG-DAG: OR_INT | |
145 ; EG-DAG: SUB_INT | |
146 ; EG-DAG: LSHL | |
147 ; EG-DAG: LSHL | |
148 ; EG-DAG: SUB_INT | |
149 ; EG-DAG: LSHR | |
150 ; EG-DAG: LSHR | |
151 ; EG-DAG: SETGT_UINT | |
152 ; EG-DAG: SETGT_INT | |
153 ; EG-DAG: XOR_INT | |
154 ; EG-DAG: XOR_INT | |
155 ; EG-DAG: SUB_INT | |
156 ; EG-DAG: SUB_INT | |
157 ; EG-DAG: CNDE_INT | |
158 ; EG-DAG: CNDE_INT | |
159 ; EG-DAG: AND_INT | |
160 ; EG-DAG: LSHR | |
161 ; EG-DAG: SUB_INT | |
162 ; EG-DAG: AND_INT | |
163 ; EG-DAG: ASHR | |
164 ; EG-DAG: AND_INT | |
165 ; EG-DAG: OR_INT | |
166 ; EG-DAG: SUB_INT | |
167 ; EG-DAG: LSHL | |
168 ; EG-DAG: LSHL | |
169 ; EG-DAG: SUB_INT | |
170 ; EG-DAG: LSHR | |
171 ; EG-DAG: LSHR | |
172 ; EG-DAG: SETGT_UINT | |
173 ; EG-DAG: SETGT_INT | |
174 ; EG-DAG: XOR_INT | |
175 ; EG-DAG: XOR_INT | |
176 ; EG-DAG: SUB_INT | |
177 ; EG-DAG: SUB_INT | |
178 ; EG-DAG: CNDE_INT | |
179 ; EG-DAG: CNDE_INT | |
180 ; EG-DAG: AND_INT | |
181 ; EG-DAG: LSHR | |
182 ; EG-DAG: SUB_INT | |
183 ; EG-DAG: AND_INT | |
184 ; EG-DAG: ASHR | |
185 ; EG-DAG: AND_INT | |
186 ; EG-DAG: OR_INT | |
187 ; EG-DAG: SUB_INT | |
188 ; EG-DAG: LSHL | |
189 ; EG-DAG: LSHL | |
190 ; EG-DAG: SUB_INT | |
191 ; EG-DAG: LSHR | |
192 ; EG-DAG: LSHR | |
193 ; EG-DAG: SETGT_UINT | |
194 ; EG-DAG: SETGT_INT | |
195 ; EG-DAG: XOR_INT | |
196 ; EG-DAG: XOR_INT | |
197 ; EG-DAG: SUB_INT | |
198 ; EG-DAG: SUB_INT | |
199 ; EG-DAG: CNDE_INT | |
200 ; EG-DAG: CNDE_INT | |
201 ; EG-DAG: AND_INT | |
202 ; EG-DAG: LSHR | |
203 ; EG-DAG: SUB_INT | |
204 ; EG-DAG: AND_INT | |
205 ; EG-DAG: ASHR | |
206 ; EG-DAG: AND_INT | |
207 ; EG-DAG: OR_INT | |
208 ; EG-DAG: SUB_INT | |
209 ; EG-DAG: LSHL | |
210 ; EG-DAG: LSHL | |
211 ; EG-DAG: SUB_INT | |
212 ; EG-DAG: LSHR | |
213 ; EG-DAG: LSHR | |
214 ; EG-DAG: SETGT_UINT | |
215 ; EG-DAG: SETGT_INT | |
216 ; EG-DAG: XOR_INT | |
217 ; EG-DAG: XOR_INT | |
218 ; EG-DAG: SUB_INT | |
219 ; EG-DAG: SUB_INT | |
220 ; EG-DAG: CNDE_INT | |
221 ; EG-DAG: CNDE_INT | |
222 | |
223 ; SI: s_endpgm | |
224 define amdgpu_kernel void @fp_to_sint_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) { | 542 define amdgpu_kernel void @fp_to_sint_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) { |
543 ; SI-LABEL: fp_to_sint_v4i64: | |
544 ; SI: ; %bb.0: | |
545 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 | |
546 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd | |
547 ; SI-NEXT: s_mov_b32 s7, 0xf000 | |
548 ; SI-NEXT: s_mov_b32 s6, -1 | |
549 ; SI-NEXT: s_movk_i32 s10, 0xff6a | |
550 ; SI-NEXT: s_mov_b32 s8, 0x7fffff | |
551 ; SI-NEXT: s_mov_b32 s11, 0x800000 | |
552 ; SI-NEXT: s_mov_b32 s9, 0 | |
553 ; SI-NEXT: s_movk_i32 s12, 0x96 | |
554 ; SI-NEXT: s_movk_i32 s13, 0xff81 | |
555 ; SI-NEXT: s_waitcnt lgkmcnt(0) | |
556 ; SI-NEXT: s_bfe_u32 s14, s1, 0x80017 | |
557 ; SI-NEXT: s_and_b32 s15, s1, s8 | |
558 ; SI-NEXT: s_ashr_i32 s16, s1, 31 | |
559 ; SI-NEXT: s_bfe_u32 s1, s0, 0x80017 | |
560 ; SI-NEXT: s_and_b32 s17, s0, s8 | |
561 ; SI-NEXT: s_ashr_i32 s18, s0, 31 | |
562 ; SI-NEXT: s_bfe_u32 s0, s3, 0x80017 | |
563 ; SI-NEXT: s_and_b32 s19, s3, s8 | |
564 ; SI-NEXT: s_ashr_i32 s20, s3, 31 | |
565 ; SI-NEXT: s_bfe_u32 s3, s2, 0x80017 | |
566 ; SI-NEXT: s_and_b32 s21, s2, s8 | |
567 ; SI-NEXT: s_ashr_i32 s22, s2, 31 | |
568 ; SI-NEXT: s_add_i32 s2, s14, s10 | |
569 ; SI-NEXT: s_or_b32 s8, s15, s11 | |
570 ; SI-NEXT: s_sub_i32 s15, s12, s14 | |
571 ; SI-NEXT: s_add_i32 s14, s14, s13 | |
572 ; SI-NEXT: s_ashr_i32 s23, s16, 31 | |
573 ; SI-NEXT: s_add_i32 s24, s1, s10 | |
574 ; SI-NEXT: s_sub_i32 s25, s12, s1 | |
575 ; SI-NEXT: s_add_i32 s26, s1, s13 | |
576 ; SI-NEXT: s_ashr_i32 s27, s18, 31 | |
577 ; SI-NEXT: s_add_i32 s28, s0, s10 | |
578 ; SI-NEXT: s_sub_i32 s29, s12, s0 | |
579 ; SI-NEXT: s_add_i32 s30, s0, s13 | |
580 ; SI-NEXT: s_ashr_i32 s31, s20, 31 | |
581 ; SI-NEXT: s_add_i32 s10, s3, s10 | |
582 ; SI-NEXT: s_sub_i32 s12, s12, s3 | |
583 ; SI-NEXT: s_add_i32 s13, s3, s13 | |
584 ; SI-NEXT: s_ashr_i32 s33, s22, 31 | |
585 ; SI-NEXT: s_lshl_b64 s[0:1], s[8:9], s2 | |
586 ; SI-NEXT: s_lshr_b64 s[2:3], s[8:9], s15 | |
587 ; SI-NEXT: v_mov_b32_e32 v0, s23 | |
588 ; SI-NEXT: s_or_b32 s8, s17, s11 | |
589 ; SI-NEXT: v_mov_b32_e32 v1, s27 | |
590 ; SI-NEXT: v_mov_b32_e32 v4, s31 | |
591 ; SI-NEXT: v_mov_b32_e32 v5, s33 | |
592 ; SI-NEXT: v_mov_b32_e32 v2, s3 | |
593 ; SI-NEXT: v_mov_b32_e32 v3, s1 | |
594 ; SI-NEXT: v_cmp_gt_i32_e64 vcc, s14, 23 | |
595 ; SI-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc | |
596 ; SI-NEXT: v_mov_b32_e32 v3, s2 | |
597 ; SI-NEXT: v_mov_b32_e32 v6, s0 | |
598 ; SI-NEXT: s_lshl_b64 s[0:1], s[8:9], s24 | |
599 ; SI-NEXT: s_lshr_b64 s[2:3], s[8:9], s25 | |
600 ; SI-NEXT: s_or_b32 s8, s19, s11 | |
601 ; SI-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc | |
602 ; SI-NEXT: v_xor_b32_e32 v2, s23, v2 | |
603 ; SI-NEXT: v_mov_b32_e32 v6, s3 | |
604 ; SI-NEXT: v_mov_b32_e32 v7, s1 | |
605 ; SI-NEXT: v_cmp_gt_i32_e64 vcc, s26, 23 | |
606 ; SI-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc | |
607 ; SI-NEXT: v_mov_b32_e32 v7, s2 | |
608 ; SI-NEXT: v_mov_b32_e32 v8, s0 | |
609 ; SI-NEXT: s_lshl_b64 s[0:1], s[8:9], s28 | |
610 ; SI-NEXT: s_lshr_b64 s[2:3], s[8:9], s29 | |
611 ; SI-NEXT: s_or_b32 s8, s21, s11 | |
612 ; SI-NEXT: v_xor_b32_e32 v3, s16, v3 | |
613 ; SI-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc | |
614 ; SI-NEXT: v_xor_b32_e32 v6, s27, v6 | |
615 ; SI-NEXT: v_mov_b32_e32 v8, s3 | |
616 ; SI-NEXT: v_mov_b32_e32 v9, s1 | |
617 ; SI-NEXT: v_cmp_gt_i32_e64 vcc, s30, 23 | |
618 ; SI-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc | |
619 ; SI-NEXT: v_mov_b32_e32 v9, s2 | |
620 ; SI-NEXT: v_mov_b32_e32 v10, s0 | |
621 ; SI-NEXT: s_lshl_b64 s[2:3], s[8:9], s10 | |
622 ; SI-NEXT: s_lshr_b64 s[8:9], s[8:9], s12 | |
623 ; SI-NEXT: v_subrev_i32_e64 v11, s[0:1], s16, v3 | |
624 ; SI-NEXT: v_subb_u32_e64 v0, s[0:1], v2, v0, s[0:1] | |
625 ; SI-NEXT: v_xor_b32_e32 v7, s18, v7 | |
626 ; SI-NEXT: v_cndmask_b32_e32 v9, v9, v10, vcc | |
627 ; SI-NEXT: v_xor_b32_e32 v8, s31, v8 | |
628 ; SI-NEXT: v_mov_b32_e32 v2, s9 | |
629 ; SI-NEXT: v_mov_b32_e32 v3, s3 | |
630 ; SI-NEXT: v_cmp_gt_i32_e64 vcc, s13, 23 | |
631 ; SI-NEXT: v_cndmask_b32_e32 v10, v2, v3, vcc | |
632 ; SI-NEXT: v_mov_b32_e32 v12, s8 | |
633 ; SI-NEXT: v_mov_b32_e32 v13, s2 | |
634 ; SI-NEXT: v_cmp_lt_i32_e64 s[0:1], s14, 0 | |
635 ; SI-NEXT: v_cndmask_b32_e64 v3, v0, 0, s[0:1] | |
636 ; SI-NEXT: v_cndmask_b32_e64 v2, v11, 0, s[0:1] | |
637 ; SI-NEXT: v_subrev_i32_e64 v0, s[0:1], s18, v7 | |
638 ; SI-NEXT: v_subb_u32_e64 v1, s[0:1], v6, v1, s[0:1] | |
639 ; SI-NEXT: v_xor_b32_e32 v6, s20, v9 | |
640 ; SI-NEXT: v_cndmask_b32_e32 v7, v12, v13, vcc | |
641 ; SI-NEXT: v_xor_b32_e32 v9, s33, v10 | |
642 ; SI-NEXT: v_cmp_lt_i32_e64 s[0:1], s26, 0 | |
643 ; SI-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[0:1] | |
644 ; SI-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[0:1] | |
645 ; SI-NEXT: v_subrev_i32_e32 v6, vcc, s20, v6 | |
646 ; SI-NEXT: v_subb_u32_e32 v4, vcc, v8, v4, vcc | |
647 ; SI-NEXT: v_xor_b32_e32 v8, s22, v7 | |
648 ; SI-NEXT: v_cmp_lt_i32_e64 s[0:1], s30, 0 | |
649 ; SI-NEXT: v_cndmask_b32_e64 v7, v4, 0, s[0:1] | |
650 ; SI-NEXT: v_cndmask_b32_e64 v6, v6, 0, s[0:1] | |
651 ; SI-NEXT: v_subrev_i32_e32 v4, vcc, s22, v8 | |
652 ; SI-NEXT: v_subb_u32_e32 v5, vcc, v9, v5, vcc | |
653 ; SI-NEXT: v_cmp_lt_i32_e64 s[0:1], s13, 0 | |
654 ; SI-NEXT: v_cndmask_b32_e64 v5, v5, 0, s[0:1] | |
655 ; SI-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[0:1] | |
656 ; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16 | |
657 ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 | |
658 ; SI-NEXT: s_endpgm | |
659 ; | |
660 ; VI-LABEL: fp_to_sint_v4i64: | |
661 ; VI: ; %bb.0: | |
662 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 | |
663 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x34 | |
664 ; VI-NEXT: s_mov_b32 s16, 0x7fffff | |
665 ; VI-NEXT: s_movk_i32 s14, 0xff6a | |
666 ; VI-NEXT: s_mov_b32 s17, 0x800000 | |
667 ; VI-NEXT: s_movk_i32 s18, 0x96 | |
668 ; VI-NEXT: s_waitcnt lgkmcnt(0) | |
669 ; VI-NEXT: s_bfe_u32 s15, s1, 0x80017 | |
670 ; VI-NEXT: s_and_b32 s8, s1, s16 | |
671 ; VI-NEXT: s_add_i32 s10, s15, s14 | |
672 ; VI-NEXT: s_or_b32 s8, s8, s17 | |
673 ; VI-NEXT: s_mov_b32 s9, 0 | |
674 ; VI-NEXT: s_sub_i32 s12, s18, s15 | |
675 ; VI-NEXT: s_movk_i32 s19, 0xff81 | |
676 ; VI-NEXT: s_lshl_b64 s[10:11], s[8:9], s10 | |
677 ; VI-NEXT: s_lshr_b64 s[12:13], s[8:9], s12 | |
678 ; VI-NEXT: s_add_i32 s15, s15, s19 | |
679 ; VI-NEXT: v_mov_b32_e32 v0, s13 | |
680 ; VI-NEXT: v_mov_b32_e32 v1, s11 | |
681 ; VI-NEXT: v_cmp_gt_i32_e64 vcc, s15, 23 | |
682 ; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc | |
683 ; VI-NEXT: v_mov_b32_e32 v2, s10 | |
684 ; VI-NEXT: v_mov_b32_e32 v1, s12 | |
685 ; VI-NEXT: s_ashr_i32 s1, s1, 31 | |
686 ; VI-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc | |
687 ; VI-NEXT: s_ashr_i32 s8, s1, 31 | |
688 ; VI-NEXT: v_xor_b32_e32 v1, s1, v1 | |
689 ; VI-NEXT: v_subrev_u32_e32 v1, vcc, s1, v1 | |
690 ; VI-NEXT: v_xor_b32_e32 v0, s8, v0 | |
691 ; VI-NEXT: v_mov_b32_e32 v2, s8 | |
692 ; VI-NEXT: s_and_b32 s8, s0, s16 | |
693 ; VI-NEXT: s_bfe_u32 s1, s0, 0x80017 | |
694 ; VI-NEXT: v_subb_u32_e32 v0, vcc, v0, v2, vcc | |
695 ; VI-NEXT: v_cmp_lt_i32_e64 s[10:11], s15, 0 | |
696 ; VI-NEXT: v_cndmask_b32_e64 v3, v0, 0, s[10:11] | |
697 ; VI-NEXT: v_cndmask_b32_e64 v2, v1, 0, s[10:11] | |
698 ; VI-NEXT: s_add_i32 s10, s1, s14 | |
699 ; VI-NEXT: s_or_b32 s8, s8, s17 | |
700 ; VI-NEXT: s_sub_i32 s12, s18, s1 | |
701 ; VI-NEXT: s_lshl_b64 s[10:11], s[8:9], s10 | |
702 ; VI-NEXT: s_lshr_b64 s[12:13], s[8:9], s12 | |
703 ; VI-NEXT: s_add_i32 s1, s1, s19 | |
704 ; VI-NEXT: v_mov_b32_e32 v0, s13 | |
705 ; VI-NEXT: v_mov_b32_e32 v1, s11 | |
706 ; VI-NEXT: v_cmp_gt_i32_e64 vcc, s1, 23 | |
707 ; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc | |
708 ; VI-NEXT: v_mov_b32_e32 v1, s12 | |
709 ; VI-NEXT: v_mov_b32_e32 v4, s10 | |
710 ; VI-NEXT: s_ashr_i32 s0, s0, 31 | |
711 ; VI-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc | |
712 ; VI-NEXT: s_ashr_i32 s8, s0, 31 | |
713 ; VI-NEXT: v_xor_b32_e32 v1, s0, v1 | |
714 ; VI-NEXT: v_subrev_u32_e32 v5, vcc, s0, v1 | |
715 ; VI-NEXT: v_xor_b32_e32 v0, s8, v0 | |
716 ; VI-NEXT: v_mov_b32_e32 v4, s8 | |
717 ; VI-NEXT: v_subb_u32_e32 v0, vcc, v0, v4, vcc | |
718 ; VI-NEXT: v_cmp_lt_i32_e64 s[0:1], s1, 0 | |
719 ; VI-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[0:1] | |
720 ; VI-NEXT: v_cndmask_b32_e64 v0, v5, 0, s[0:1] | |
721 ; VI-NEXT: s_bfe_u32 s12, s3, 0x80017 | |
722 ; VI-NEXT: s_and_b32 s1, s3, s16 | |
723 ; VI-NEXT: s_add_i32 s0, s12, s14 | |
724 ; VI-NEXT: s_or_b32 s8, s1, s17 | |
725 ; VI-NEXT: s_sub_i32 s10, s18, s12 | |
726 ; VI-NEXT: s_lshl_b64 s[0:1], s[8:9], s0 | |
727 ; VI-NEXT: s_lshr_b64 s[10:11], s[8:9], s10 | |
728 ; VI-NEXT: s_add_i32 s12, s12, s19 | |
729 ; VI-NEXT: v_mov_b32_e32 v4, s11 | |
730 ; VI-NEXT: v_mov_b32_e32 v5, s1 | |
731 ; VI-NEXT: v_cmp_gt_i32_e64 vcc, s12, 23 | |
732 ; VI-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc | |
733 ; VI-NEXT: v_mov_b32_e32 v6, s0 | |
734 ; VI-NEXT: v_mov_b32_e32 v5, s10 | |
735 ; VI-NEXT: s_ashr_i32 s0, s3, 31 | |
736 ; VI-NEXT: v_cndmask_b32_e32 v5, v5, v6, vcc | |
737 ; VI-NEXT: s_ashr_i32 s1, s0, 31 | |
738 ; VI-NEXT: v_xor_b32_e32 v5, s0, v5 | |
739 ; VI-NEXT: v_xor_b32_e32 v4, s1, v4 | |
740 ; VI-NEXT: v_mov_b32_e32 v6, s1 | |
741 ; VI-NEXT: v_subrev_u32_e32 v5, vcc, s0, v5 | |
742 ; VI-NEXT: v_subb_u32_e32 v4, vcc, v4, v6, vcc | |
743 ; VI-NEXT: v_cmp_lt_i32_e64 s[0:1], s12, 0 | |
744 ; VI-NEXT: s_bfe_u32 s3, s2, 0x80017 | |
745 ; VI-NEXT: v_cndmask_b32_e64 v7, v4, 0, s[0:1] | |
746 ; VI-NEXT: v_cndmask_b32_e64 v6, v5, 0, s[0:1] | |
747 ; VI-NEXT: s_and_b32 s1, s2, s16 | |
748 ; VI-NEXT: s_add_i32 s0, s3, s14 | |
749 ; VI-NEXT: s_or_b32 s8, s1, s17 | |
750 ; VI-NEXT: s_sub_i32 s10, s18, s3 | |
751 ; VI-NEXT: s_lshl_b64 s[0:1], s[8:9], s0 | |
752 ; VI-NEXT: s_lshr_b64 s[8:9], s[8:9], s10 | |
753 ; VI-NEXT: s_add_i32 s3, s3, s19 | |
754 ; VI-NEXT: v_mov_b32_e32 v4, s9 | |
755 ; VI-NEXT: v_mov_b32_e32 v5, s1 | |
756 ; VI-NEXT: v_cmp_gt_i32_e64 vcc, s3, 23 | |
757 ; VI-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc | |
758 ; VI-NEXT: v_mov_b32_e32 v8, s0 | |
759 ; VI-NEXT: v_mov_b32_e32 v5, s8 | |
760 ; VI-NEXT: s_ashr_i32 s0, s2, 31 | |
761 ; VI-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc | |
762 ; VI-NEXT: s_ashr_i32 s1, s0, 31 | |
763 ; VI-NEXT: v_xor_b32_e32 v5, s0, v5 | |
764 ; VI-NEXT: v_xor_b32_e32 v4, s1, v4 | |
765 ; VI-NEXT: v_mov_b32_e32 v8, s1 | |
766 ; VI-NEXT: v_subrev_u32_e32 v9, vcc, s0, v5 | |
767 ; VI-NEXT: v_subb_u32_e32 v4, vcc, v4, v8, vcc | |
768 ; VI-NEXT: v_cmp_lt_i32_e64 s[0:1], s3, 0 | |
769 ; VI-NEXT: v_cndmask_b32_e64 v5, v4, 0, s[0:1] | |
770 ; VI-NEXT: s_mov_b32 s7, 0xf000 | |
771 ; VI-NEXT: s_mov_b32 s6, -1 | |
772 ; VI-NEXT: v_cndmask_b32_e64 v4, v9, 0, s[0:1] | |
773 ; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16 | |
774 ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 | |
775 ; VI-NEXT: s_endpgm | |
776 ; | |
777 ; EG-LABEL: fp_to_sint_v4i64: | |
778 ; EG: ; %bb.0: | |
779 ; EG-NEXT: ALU 101, @6, KC0[CB0:0-32], KC1[] | |
780 ; EG-NEXT: ALU 58, @108, KC0[CB0:0-32], KC1[] | |
781 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T0.X, 0 | |
782 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T2.X, 1 | |
783 ; EG-NEXT: CF_END | |
784 ; EG-NEXT: PAD | |
785 ; EG-NEXT: ALU clause starting at 6: | |
786 ; EG-NEXT: MOV * T0.W, literal.x, | |
787 ; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) | |
788 ; EG-NEXT: BFE_UINT T1.W, KC0[4].X, literal.x, PV.W, | |
789 ; EG-NEXT: AND_INT * T2.W, KC0[4].X, literal.y, | |
790 ; EG-NEXT: 23(3.222986e-44), 8388607(1.175494e-38) | |
791 ; EG-NEXT: OR_INT T0.Z, PS, literal.x, | |
792 ; EG-NEXT: BFE_UINT T2.W, KC0[3].Z, literal.y, T0.W, | |
793 ; EG-NEXT: ADD_INT * T3.W, PV.W, literal.z, | |
794 ; EG-NEXT: 8388608(1.175494e-38), 23(3.222986e-44) | |
795 ; EG-NEXT: -150(nan), 0(0.000000e+00) | |
796 ; EG-NEXT: ADD_INT T0.Y, PV.W, literal.x, | |
797 ; EG-NEXT: AND_INT T1.Z, PS, literal.y, | |
798 ; EG-NEXT: NOT_INT T4.W, PS, | |
799 ; EG-NEXT: LSHR * T5.W, PV.Z, 1, | |
800 ; EG-NEXT: -127(nan), 31(4.344025e-44) | |
801 ; EG-NEXT: ADD_INT T0.X, T1.W, literal.x, | |
802 ; EG-NEXT: BIT_ALIGN_INT T1.Y, 0.0, PS, PV.W, | |
803 ; EG-NEXT: AND_INT T2.Z, T3.W, literal.y, BS:VEC_201 | |
804 ; EG-NEXT: LSHL T3.W, T0.Z, PV.Z, | |
805 ; EG-NEXT: SUB_INT * T1.W, literal.z, T1.W, | |
806 ; EG-NEXT: -127(nan), 32(4.484155e-44) | |
807 ; EG-NEXT: 150(2.101948e-43), 0(0.000000e+00) | |
808 ; EG-NEXT: AND_INT T1.X, PS, literal.x, | |
809 ; EG-NEXT: BIT_ALIGN_INT T2.Y, 0.0, T0.Z, PS, | |
810 ; EG-NEXT: AND_INT T0.Z, KC0[3].Z, literal.y, | |
811 ; EG-NEXT: CNDE_INT T1.W, PV.Z, PV.Y, PV.W, | |
812 ; EG-NEXT: SETGT_INT * T4.W, PV.X, literal.z, | |
813 ; EG-NEXT: 32(4.484155e-44), 8388607(1.175494e-38) | |
814 ; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00) | |
815 ; EG-NEXT: CNDE_INT T2.X, PS, 0.0, PV.W, | |
816 ; EG-NEXT: OR_INT T1.Y, PV.Z, literal.x, | |
817 ; EG-NEXT: ADD_INT T0.Z, T2.W, literal.y, | |
818 ; EG-NEXT: CNDE_INT T1.W, PV.X, PV.Y, 0.0, | |
819 ; EG-NEXT: CNDE_INT * T3.W, T2.Z, T3.W, 0.0, | |
820 ; EG-NEXT: 8388608(1.175494e-38), -150(nan) | |
821 ; EG-NEXT: CNDE_INT T1.X, T4.W, PV.W, PS, | |
822 ; EG-NEXT: ASHR T2.Y, KC0[4].X, literal.x, | |
823 ; EG-NEXT: AND_INT T1.Z, PV.Z, literal.x, | |
824 ; EG-NEXT: NOT_INT T1.W, PV.Z, | |
825 ; EG-NEXT: LSHR * T3.W, PV.Y, 1, | |
826 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) | |
827 ; EG-NEXT: BIT_ALIGN_INT T3.X, 0.0, PS, PV.W, | |
828 ; EG-NEXT: LSHL T3.Y, T1.Y, PV.Z, | |
829 ; EG-NEXT: XOR_INT T1.Z, PV.X, PV.Y, | |
830 ; EG-NEXT: XOR_INT T1.W, T2.X, PV.Y, | |
831 ; EG-NEXT: SUB_INT * T2.W, literal.x, T2.W, | |
832 ; EG-NEXT: 150(2.101948e-43), 0(0.000000e+00) | |
833 ; EG-NEXT: AND_INT T1.X, T0.Z, literal.x, | |
834 ; EG-NEXT: AND_INT T4.Y, PS, literal.x, | |
835 ; EG-NEXT: BIT_ALIGN_INT T0.Z, 0.0, T1.Y, PS, BS:VEC_021/SCL_122 | |
836 ; EG-NEXT: SUB_INT T1.W, PV.W, T2.Y, | |
837 ; EG-NEXT: SUBB_UINT * T2.W, PV.Z, T2.Y, | |
838 ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) | |
839 ; EG-NEXT: SUB_INT T2.X, PV.W, PS, | |
840 ; EG-NEXT: CNDE_INT T1.Y, PV.Y, PV.Z, 0.0, | |
841 ; EG-NEXT: CNDE_INT T0.Z, PV.X, T3.Y, 0.0, | |
842 ; EG-NEXT: CNDE_INT T1.W, PV.X, T3.X, T3.Y, BS:VEC_021/SCL_122 | |
843 ; EG-NEXT: SETGT_INT * T2.W, T0.Y, literal.x, | |
844 ; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00) | |
845 ; EG-NEXT: BFE_UINT T1.X, KC0[3].W, literal.x, T0.W, | |
846 ; EG-NEXT: AND_INT T3.Y, KC0[3].W, literal.y, | |
847 ; EG-NEXT: CNDE_INT T2.Z, PS, 0.0, PV.W, | |
848 ; EG-NEXT: CNDE_INT T1.W, PS, PV.Y, PV.Z, | |
849 ; EG-NEXT: ASHR * T2.W, KC0[3].Z, literal.z, | |
850 ; EG-NEXT: 23(3.222986e-44), 8388607(1.175494e-38) | |
851 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) | |
852 ; EG-NEXT: BFE_UINT T3.X, KC0[3].Y, literal.x, T0.W, | |
853 ; EG-NEXT: XOR_INT T1.Y, PV.W, PS, | |
854 ; EG-NEXT: XOR_INT T0.Z, PV.Z, PS, | |
855 ; EG-NEXT: OR_INT T0.W, PV.Y, literal.y, | |
856 ; EG-NEXT: SUB_INT * T1.W, literal.z, PV.X, | |
857 ; EG-NEXT: 23(3.222986e-44), 8388608(1.175494e-38) | |
858 ; EG-NEXT: 150(2.101948e-43), 0(0.000000e+00) | |
859 ; EG-NEXT: AND_INT T4.X, KC0[3].Y, literal.x, | |
860 ; EG-NEXT: AND_INT T3.Y, PS, literal.y, | |
861 ; EG-NEXT: BIT_ALIGN_INT T2.Z, 0.0, PV.W, PS, | |
862 ; EG-NEXT: SUB_INT T1.W, PV.Z, T2.W, | |
863 ; EG-NEXT: SUBB_UINT * T3.W, PV.Y, T2.W, | |
864 ; EG-NEXT: 8388607(1.175494e-38), 32(4.484155e-44) | |
865 ; EG-NEXT: SUB_INT T5.X, PV.W, PS, | |
866 ; EG-NEXT: SETGT_INT T0.Y, T0.Y, literal.x, | |
867 ; EG-NEXT: CNDE_INT T0.Z, PV.Y, PV.Z, 0.0, | |
868 ; EG-NEXT: OR_INT T1.W, PV.X, literal.y, | |
869 ; EG-NEXT: ADD_INT * T3.W, T3.X, literal.z, | |
870 ; EG-NEXT: -1(nan), 8388608(1.175494e-38) | |
871 ; EG-NEXT: -150(nan), 0(0.000000e+00) | |
872 ; EG-NEXT: ADD_INT T4.X, T3.X, literal.x, | |
873 ; EG-NEXT: SUB_INT T3.Y, literal.y, T3.X, | |
874 ; EG-NEXT: AND_INT T2.Z, PS, literal.z, | |
875 ; EG-NEXT: NOT_INT T4.W, PS, | |
876 ; EG-NEXT: LSHR * T5.W, PV.W, 1, | |
877 ; EG-NEXT: -127(nan), 150(2.101948e-43) | |
878 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) | |
879 ; EG-NEXT: BIT_ALIGN_INT T3.X, 0.0, PS, PV.W, | |
880 ; EG-NEXT: LSHL T4.Y, T1.W, PV.Z, | |
881 ; EG-NEXT: AND_INT T2.Z, T3.W, literal.x, BS:VEC_120/SCL_212 | |
882 ; EG-NEXT: BIT_ALIGN_INT T1.W, 0.0, T1.W, PV.Y, BS:VEC_021/SCL_122 | |
883 ; EG-NEXT: AND_INT * T3.W, PV.Y, literal.x, | |
884 ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) | |
885 ; EG-NEXT: ADD_INT T6.X, T1.X, literal.x, | |
886 ; EG-NEXT: CNDE_INT * T3.Y, PS, PV.W, 0.0, | |
887 ; EG-NEXT: -150(nan), 0(0.000000e+00) | |
888 ; EG-NEXT: ALU clause starting at 108: | |
889 ; EG-NEXT: CNDE_INT T3.Z, T2.Z, T4.Y, 0.0, | |
890 ; EG-NEXT: CNDE_INT T1.W, T2.Z, T3.X, T4.Y, | |
891 ; EG-NEXT: SETGT_INT * T3.W, T4.X, literal.x, | |
892 ; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00) | |
893 ; EG-NEXT: CNDE_INT T3.X, PS, 0.0, PV.W, | |
894 ; EG-NEXT: CNDE_INT T3.Y, PS, T3.Y, PV.Z, | |
895 ; EG-NEXT: AND_INT T2.Z, T6.X, literal.x, | |
896 ; EG-NEXT: NOT_INT T1.W, T6.X, | |
897 ; EG-NEXT: LSHR * T3.W, T0.W, 1, | |
898 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) | |
899 ; EG-NEXT: ASHR T7.X, KC0[3].Y, literal.x, | |
900 ; EG-NEXT: ADD_INT T4.Y, T1.X, literal.y, | |
901 ; EG-NEXT: BIT_ALIGN_INT T3.Z, 0.0, PS, PV.W, | |
902 ; EG-NEXT: LSHL T0.W, T0.W, PV.Z, | |
903 ; EG-NEXT: AND_INT * T1.W, T6.X, literal.z, | |
904 ; EG-NEXT: 31(4.344025e-44), -127(nan) | |
905 ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) | |
906 ; EG-NEXT: CNDE_INT T1.X, PS, PV.W, 0.0, | |
907 ; EG-NEXT: CNDE_INT T5.Y, PS, PV.Z, PV.W, | |
908 ; EG-NEXT: SETGT_INT T2.Z, PV.Y, literal.x, | |
909 ; EG-NEXT: XOR_INT T0.W, T3.Y, PV.X, | |
910 ; EG-NEXT: XOR_INT * T1.W, T3.X, PV.X, | |
911 ; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00) | |
912 ; EG-NEXT: SUB_INT T3.X, PS, T7.X, | |
913 ; EG-NEXT: SUBB_UINT T3.Y, PV.W, T7.X, | |
914 ; EG-NEXT: CNDE_INT T3.Z, PV.Z, 0.0, PV.Y, | |
915 ; EG-NEXT: CNDE_INT T1.W, PV.Z, T0.Z, PV.X, | |
916 ; EG-NEXT: ASHR * T3.W, KC0[3].W, literal.x, | |
917 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) | |
918 ; EG-NEXT: XOR_INT T1.X, PV.W, PS, | |
919 ; EG-NEXT: XOR_INT T5.Y, PV.Z, PS, | |
920 ; EG-NEXT: SUB_INT T0.Z, PV.X, PV.Y, | |
921 ; EG-NEXT: SETGT_INT T1.W, T4.X, literal.x, | |
922 ; EG-NEXT: CNDE_INT * T6.W, T0.Y, 0.0, T5.X, BS:VEC_021/SCL_122 | |
923 ; EG-NEXT: -1(nan), 0(0.000000e+00) | |
924 ; EG-NEXT: SETGT_INT T0.X, T0.X, literal.x, | |
925 ; EG-NEXT: CNDE_INT T6.Y, PV.W, 0.0, PV.Z, | |
926 ; EG-NEXT: SUB_INT T0.Z, T1.Y, T2.W, BS:VEC_021/SCL_122 | |
927 ; EG-NEXT: SUB_INT T2.W, PV.Y, T3.W, | |
928 ; EG-NEXT: SUBB_UINT * T4.W, PV.X, T3.W, | |
929 ; EG-NEXT: -1(nan), 0(0.000000e+00) | |
930 ; EG-NEXT: SUB_INT T3.X, PV.W, PS, | |
931 ; EG-NEXT: SETGT_INT T1.Y, T4.Y, literal.x, | |
932 ; EG-NEXT: CNDE_INT T6.Z, T0.Y, 0.0, PV.Z, BS:VEC_120/SCL_212 | |
933 ; EG-NEXT: SUB_INT T0.W, T0.W, T7.X, | |
934 ; EG-NEXT: CNDE_INT * T4.W, PV.X, 0.0, T2.X, BS:VEC_021/SCL_122 | |
935 ; EG-NEXT: -1(nan), 0(0.000000e+00) | |
936 ; EG-NEXT: CNDE_INT T6.X, T1.W, 0.0, PV.W, | |
937 ; EG-NEXT: CNDE_INT T4.Y, PV.Y, 0.0, PV.X, | |
938 ; EG-NEXT: SUB_INT T0.W, T1.Z, T2.Y, | |
939 ; EG-NEXT: LSHR * T2.X, KC0[2].Y, literal.x, | |
940 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) | |
941 ; EG-NEXT: CNDE_INT T4.Z, T0.X, 0.0, PV.W, | |
942 ; EG-NEXT: SUB_INT * T0.W, T1.X, T3.W, BS:VEC_120/SCL_212 | |
943 ; EG-NEXT: CNDE_INT T4.X, T1.Y, 0.0, PV.W, | |
944 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, | |
945 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) | |
946 ; EG-NEXT: LSHR * T0.X, PV.W, literal.x, | |
947 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) | |
225 %conv = fptosi <4 x float> %x to <4 x i64> | 948 %conv = fptosi <4 x float> %x to <4 x i64> |
226 store <4 x i64> %conv, <4 x i64> addrspace(1)* %out | 949 store <4 x i64> %conv, <4 x i64> addrspace(1)* %out |
227 ret void | 950 ret void |
228 } | 951 } |
229 | 952 |
230 ; FUNC-LABEL: {{^}}fp_to_uint_f32_to_i1: | |
231 ; SI: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, -1.0, s{{[0-9]+}} | |
232 | |
233 ; EG: AND_INT | |
234 ; EG: SETE_DX10 {{[*]?}} T{{[0-9]+}}.{{[XYZW]}}, KC0[2].Z, literal.y, | |
235 ; EG-NEXT: -1082130432(-1.000000e+00) | |
236 define amdgpu_kernel void @fp_to_uint_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 { | 953 define amdgpu_kernel void @fp_to_uint_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 { |
954 ; SI-LABEL: fp_to_uint_f32_to_i1: | |
955 ; SI: ; %bb.0: | |
956 ; SI-NEXT: s_load_dword s4, s[0:1], 0xb | |
957 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 | |
958 ; SI-NEXT: s_mov_b32 s3, 0xf000 | |
959 ; SI-NEXT: s_mov_b32 s2, -1 | |
960 ; SI-NEXT: s_waitcnt lgkmcnt(0) | |
961 ; SI-NEXT: v_cmp_eq_f32_e64 s[4:5], -1.0, s4 | |
962 ; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] | |
963 ; SI-NEXT: buffer_store_byte v0, off, s[0:3], 0 | |
964 ; SI-NEXT: s_endpgm | |
965 ; | |
966 ; VI-LABEL: fp_to_uint_f32_to_i1: | |
967 ; VI: ; %bb.0: | |
968 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 | |
969 ; VI-NEXT: s_load_dword s0, s[0:1], 0x2c | |
970 ; VI-NEXT: s_mov_b32 s7, 0xf000 | |
971 ; VI-NEXT: s_mov_b32 s6, -1 | |
972 ; VI-NEXT: s_waitcnt lgkmcnt(0) | |
973 ; VI-NEXT: v_cmp_eq_f32_e64 s[0:1], -1.0, s0 | |
974 ; VI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] | |
975 ; VI-NEXT: buffer_store_byte v0, off, s[4:7], 0 | |
976 ; VI-NEXT: s_endpgm | |
977 ; | |
978 ; EG-LABEL: fp_to_uint_f32_to_i1: | |
979 ; EG: ; %bb.0: | |
980 ; EG-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[] | |
981 ; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X | |
982 ; EG-NEXT: CF_END | |
983 ; EG-NEXT: PAD | |
984 ; EG-NEXT: ALU clause starting at 4: | |
985 ; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x, | |
986 ; EG-NEXT: SETE_DX10 * T1.W, KC0[2].Z, literal.y, | |
987 ; EG-NEXT: 3(4.203895e-45), -1082130432(-1.000000e+00) | |
988 ; EG-NEXT: AND_INT T1.W, PS, 1, | |
989 ; EG-NEXT: LSHL * T0.W, PV.W, literal.x, | |
990 ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) | |
991 ; EG-NEXT: LSHL T0.X, PV.W, PS, | |
992 ; EG-NEXT: LSHL * T0.W, literal.x, PS, | |
993 ; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) | |
994 ; EG-NEXT: MOV T0.Y, 0.0, | |
995 ; EG-NEXT: MOV * T0.Z, 0.0, | |
996 ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, | |
997 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) | |
237 %conv = fptosi float %in to i1 | 998 %conv = fptosi float %in to i1 |
238 store i1 %conv, i1 addrspace(1)* %out | 999 store i1 %conv, i1 addrspace(1)* %out |
239 ret void | 1000 ret void |
240 } | 1001 } |
241 | 1002 |
242 ; FUNC-LABEL: {{^}}fp_to_uint_fabs_f32_to_i1: | |
243 ; SI: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, -1.0, |s{{[0-9]+}}| | |
244 define amdgpu_kernel void @fp_to_uint_fabs_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 { | 1003 define amdgpu_kernel void @fp_to_uint_fabs_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 { |
1004 ; SI-LABEL: fp_to_uint_fabs_f32_to_i1: | |
1005 ; SI: ; %bb.0: | |
1006 ; SI-NEXT: s_load_dword s4, s[0:1], 0xb | |
1007 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 | |
1008 ; SI-NEXT: s_mov_b32 s3, 0xf000 | |
1009 ; SI-NEXT: s_mov_b32 s2, -1 | |
1010 ; SI-NEXT: s_waitcnt lgkmcnt(0) | |
1011 ; SI-NEXT: v_cmp_eq_f32_e64 s[4:5], -1.0, |s4| | |
1012 ; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] | |
1013 ; SI-NEXT: buffer_store_byte v0, off, s[0:3], 0 | |
1014 ; SI-NEXT: s_endpgm | |
1015 ; | |
1016 ; VI-LABEL: fp_to_uint_fabs_f32_to_i1: | |
1017 ; VI: ; %bb.0: | |
1018 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 | |
1019 ; VI-NEXT: s_load_dword s0, s[0:1], 0x2c | |
1020 ; VI-NEXT: s_mov_b32 s7, 0xf000 | |
1021 ; VI-NEXT: s_mov_b32 s6, -1 | |
1022 ; VI-NEXT: s_waitcnt lgkmcnt(0) | |
1023 ; VI-NEXT: v_cmp_eq_f32_e64 s[0:1], -1.0, |s0| | |
1024 ; VI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] | |
1025 ; VI-NEXT: buffer_store_byte v0, off, s[4:7], 0 | |
1026 ; VI-NEXT: s_endpgm | |
1027 ; | |
1028 ; EG-LABEL: fp_to_uint_fabs_f32_to_i1: | |
1029 ; EG: ; %bb.0: | |
1030 ; EG-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[] | |
1031 ; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X | |
1032 ; EG-NEXT: CF_END | |
1033 ; EG-NEXT: PAD | |
1034 ; EG-NEXT: ALU clause starting at 4: | |
1035 ; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x, | |
1036 ; EG-NEXT: SETE_DX10 * T1.W, |KC0[2].Z|, literal.y, | |
1037 ; EG-NEXT: 3(4.203895e-45), -1082130432(-1.000000e+00) | |
1038 ; EG-NEXT: AND_INT T1.W, PS, 1, | |
1039 ; EG-NEXT: LSHL * T0.W, PV.W, literal.x, | |
1040 ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) | |
1041 ; EG-NEXT: LSHL T0.X, PV.W, PS, | |
1042 ; EG-NEXT: LSHL * T0.W, literal.x, PS, | |
1043 ; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) | |
1044 ; EG-NEXT: MOV T0.Y, 0.0, | |
1045 ; EG-NEXT: MOV * T0.Z, 0.0, | |
1046 ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, | |
1047 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) | |
245 %in.fabs = call float @llvm.fabs.f32(float %in) | 1048 %in.fabs = call float @llvm.fabs.f32(float %in) |
246 %conv = fptosi float %in.fabs to i1 | 1049 %conv = fptosi float %in.fabs to i1 |
247 store i1 %conv, i1 addrspace(1)* %out | 1050 store i1 %conv, i1 addrspace(1)* %out |
248 ret void | 1051 ret void |
249 } | 1052 } |
250 | 1053 |
251 ; FUNC-LABEL: {{^}}fp_to_sint_f32_i16: | |
252 ; GCN: v_cvt_i32_f32_e32 [[VAL:v[0-9]+]], s{{[0-9]+}} | |
253 ; GCN: buffer_store_short [[VAL]] | |
254 define amdgpu_kernel void @fp_to_sint_f32_i16(i16 addrspace(1)* %out, float %in) #0 { | 1054 define amdgpu_kernel void @fp_to_sint_f32_i16(i16 addrspace(1)* %out, float %in) #0 { |
1055 ; SI-LABEL: fp_to_sint_f32_i16: | |
1056 ; SI: ; %bb.0: | |
1057 ; SI-NEXT: s_load_dword s4, s[0:1], 0xb | |
1058 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 | |
1059 ; SI-NEXT: s_mov_b32 s3, 0xf000 | |
1060 ; SI-NEXT: s_mov_b32 s2, -1 | |
1061 ; SI-NEXT: s_waitcnt lgkmcnt(0) | |
1062 ; SI-NEXT: v_cvt_i32_f32_e32 v0, s4 | |
1063 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 | |
1064 ; SI-NEXT: s_endpgm | |
1065 ; | |
1066 ; VI-LABEL: fp_to_sint_f32_i16: | |
1067 ; VI: ; %bb.0: | |
1068 ; VI-NEXT: s_load_dword s2, s[0:1], 0x2c | |
1069 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 | |
1070 ; VI-NEXT: s_mov_b32 s3, 0xf000 | |
1071 ; VI-NEXT: s_waitcnt lgkmcnt(0) | |
1072 ; VI-NEXT: v_cvt_i32_f32_e32 v0, s2 | |
1073 ; VI-NEXT: s_mov_b32 s2, -1 | |
1074 ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 | |
1075 ; VI-NEXT: s_endpgm | |
1076 ; | |
1077 ; EG-LABEL: fp_to_sint_f32_i16: | |
1078 ; EG: ; %bb.0: | |
1079 ; EG-NEXT: ALU 13, @4, KC0[CB0:0-32], KC1[] | |
1080 ; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X | |
1081 ; EG-NEXT: CF_END | |
1082 ; EG-NEXT: PAD | |
1083 ; EG-NEXT: ALU clause starting at 4: | |
1084 ; EG-NEXT: TRUNC T0.W, KC0[2].Z, | |
1085 ; EG-NEXT: AND_INT * T1.W, KC0[2].Y, literal.x, | |
1086 ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) | |
1087 ; EG-NEXT: FLT_TO_INT * T0.W, PV.W, | |
1088 ; EG-NEXT: AND_INT T0.W, PV.W, literal.x, | |
1089 ; EG-NEXT: LSHL * T1.W, T1.W, literal.y, | |
1090 ; EG-NEXT: 65535(9.183409e-41), 3(4.203895e-45) | |
1091 ; EG-NEXT: LSHL T0.X, PV.W, PS, | |
1092 ; EG-NEXT: LSHL * T0.W, literal.x, PS, | |
1093 ; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) | |
1094 ; EG-NEXT: MOV T0.Y, 0.0, | |
1095 ; EG-NEXT: MOV * T0.Z, 0.0, | |
1096 ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, | |
1097 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) | |
255 %sint = fptosi float %in to i16 | 1098 %sint = fptosi float %in to i16 |
256 store i16 %sint, i16 addrspace(1)* %out | 1099 store i16 %sint, i16 addrspace(1)* %out |
257 ret void | 1100 ret void |
258 } | 1101 } |
259 | 1102 |