221
|
1 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
|
2 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
150
|
3
|
|
4 ; GCN-LABEL: {{^}}bfe_i32_arg_arg_arg:
|
|
5 ; GCN: v_bfe_i32
|
252
|
6 define amdgpu_kernel void @bfe_i32_arg_arg_arg(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 {
|
150
|
7 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 %src1, i32 %src1)
|
252
|
8 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
|
150
|
9 ret void
|
|
10 }
|
|
11
|
|
12 ; GCN-LABEL: {{^}}bfe_i32_arg_arg_imm:
|
|
13 ; GCN: v_bfe_i32
|
252
|
14 define amdgpu_kernel void @bfe_i32_arg_arg_imm(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 {
|
150
|
15 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 %src1, i32 123)
|
252
|
16 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
|
150
|
17 ret void
|
|
18 }
|
|
19
|
|
20 ; GCN-LABEL: {{^}}bfe_i32_arg_imm_arg:
|
|
21 ; GCN: v_bfe_i32
|
252
|
22 define amdgpu_kernel void @bfe_i32_arg_imm_arg(ptr addrspace(1) %out, i32 %src0, i32 %src2) #0 {
|
150
|
23 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 123, i32 %src2)
|
252
|
24 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
|
150
|
25 ret void
|
|
26 }
|
|
27
|
|
28 ; GCN-LABEL: {{^}}bfe_i32_imm_arg_arg:
|
|
29 ; GCN: v_bfe_i32
|
252
|
30 define amdgpu_kernel void @bfe_i32_imm_arg_arg(ptr addrspace(1) %out, i32 %src1, i32 %src2) #0 {
|
150
|
31 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 123, i32 %src1, i32 %src2)
|
252
|
32 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
|
150
|
33 ret void
|
|
34 }
|
|
35
|
|
36 ; GCN-LABEL: {{^}}v_bfe_print_arg:
|
|
37 ; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 2, 8
|
252
|
38 define amdgpu_kernel void @v_bfe_print_arg(ptr addrspace(1) %out, ptr addrspace(1) %src0) #0 {
|
|
39 %load = load i32, ptr addrspace(1) %src0, align 4
|
150
|
40 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 2, i32 8)
|
252
|
41 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
|
150
|
42 ret void
|
|
43 }
|
|
44
|
|
45 ; GCN-LABEL: {{^}}bfe_i32_arg_0_width_reg_offset:
|
|
46 ; GCN-NOT: {{[^@]}}bfe
|
|
47 ; GCN: s_endpgm
|
252
|
48 define amdgpu_kernel void @bfe_i32_arg_0_width_reg_offset(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 {
|
150
|
49 %bfe_u32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 %src1, i32 0)
|
252
|
50 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
|
150
|
51 ret void
|
|
52 }
|
|
53
|
|
54 ; GCN-LABEL: {{^}}bfe_i32_arg_0_width_imm_offset:
|
|
55 ; GCN-NOT: {{[^@]}}bfe
|
|
56 ; GCN: s_endpgm
|
252
|
57 define amdgpu_kernel void @bfe_i32_arg_0_width_imm_offset(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 {
|
150
|
58 %bfe_u32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 8, i32 0)
|
252
|
59 store i32 %bfe_u32, ptr addrspace(1) %out, align 4
|
150
|
60 ret void
|
|
61 }
|
|
62
|
|
63 ; GCN-LABEL: {{^}}bfe_i32_test_6:
|
|
64 ; GCN: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
|
|
65 ; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
|
|
66 ; GCN: s_endpgm
|
252
|
67 define amdgpu_kernel void @bfe_i32_test_6(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
|
68 %x = load i32, ptr addrspace(1) %in, align 4
|
150
|
69 %shl = shl i32 %x, 31
|
|
70 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 1, i32 31)
|
252
|
71 store i32 %bfe, ptr addrspace(1) %out, align 4
|
150
|
72 ret void
|
|
73 }
|
|
74
|
|
75 ; GCN-LABEL: {{^}}bfe_i32_test_7:
|
|
76 ; GCN-NOT: shl
|
|
77 ; GCN-NOT: {{[^@]}}bfe
|
|
78 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
|
|
79 ; GCN: buffer_store_dword [[VREG]],
|
|
80 ; GCN: s_endpgm
|
252
|
81 define amdgpu_kernel void @bfe_i32_test_7(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
|
82 %x = load i32, ptr addrspace(1) %in, align 4
|
150
|
83 %shl = shl i32 %x, 31
|
|
84 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 0, i32 31)
|
252
|
85 store i32 %bfe, ptr addrspace(1) %out, align 4
|
150
|
86 ret void
|
|
87 }
|
|
88
|
|
89 ; GCN-LABEL: {{^}}bfe_i32_test_8:
|
|
90 ; GCN: buffer_load_dword
|
|
91 ; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1
|
|
92 ; GCN: s_endpgm
|
252
|
93 define amdgpu_kernel void @bfe_i32_test_8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
|
94 %x = load i32, ptr addrspace(1) %in, align 4
|
150
|
95 %shl = shl i32 %x, 31
|
|
96 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 31, i32 1)
|
252
|
97 store i32 %bfe, ptr addrspace(1) %out, align 4
|
150
|
98 ret void
|
|
99 }
|
|
100
|
|
101 ; GCN-LABEL: {{^}}bfe_i32_test_9:
|
|
102 ; GCN-NOT: {{[^@]}}bfe
|
|
103 ; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
|
|
104 ; GCN-NOT: {{[^@]}}bfe
|
|
105 ; GCN: s_endpgm
|
252
|
106 define amdgpu_kernel void @bfe_i32_test_9(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
|
107 %x = load i32, ptr addrspace(1) %in, align 4
|
150
|
108 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 31, i32 1)
|
252
|
109 store i32 %bfe, ptr addrspace(1) %out, align 4
|
150
|
110 ret void
|
|
111 }
|
|
112
|
|
113 ; GCN-LABEL: {{^}}bfe_i32_test_10:
|
|
114 ; GCN-NOT: {{[^@]}}bfe
|
|
115 ; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
|
|
116 ; GCN-NOT: {{[^@]}}bfe
|
|
117 ; GCN: s_endpgm
|
252
|
118 define amdgpu_kernel void @bfe_i32_test_10(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
|
119 %x = load i32, ptr addrspace(1) %in, align 4
|
150
|
120 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 1, i32 31)
|
252
|
121 store i32 %bfe, ptr addrspace(1) %out, align 4
|
150
|
122 ret void
|
|
123 }
|
|
124
|
|
125 ; GCN-LABEL: {{^}}bfe_i32_test_11:
|
|
126 ; GCN-NOT: {{[^@]}}bfe
|
|
127 ; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
|
|
128 ; GCN-NOT: {{[^@]}}bfe
|
|
129 ; GCN: s_endpgm
|
252
|
130 define amdgpu_kernel void @bfe_i32_test_11(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
|
131 %x = load i32, ptr addrspace(1) %in, align 4
|
150
|
132 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 8, i32 24)
|
252
|
133 store i32 %bfe, ptr addrspace(1) %out, align 4
|
150
|
134 ret void
|
|
135 }
|
|
136
|
|
137 ; GCN-LABEL: {{^}}bfe_i32_test_12:
|
|
138 ; GCN-NOT: {{[^@]}}bfe
|
|
139 ; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}}
|
|
140 ; GCN-NOT: {{[^@]}}bfe
|
|
141 ; GCN: s_endpgm
|
252
|
142 define amdgpu_kernel void @bfe_i32_test_12(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
|
143 %x = load i32, ptr addrspace(1) %in, align 4
|
150
|
144 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 24, i32 8)
|
252
|
145 store i32 %bfe, ptr addrspace(1) %out, align 4
|
150
|
146 ret void
|
|
147 }
|
|
148
|
|
149 ; GCN-LABEL: {{^}}bfe_i32_test_13:
|
|
150 ; GCN: v_ashrrev_i32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}}
|
|
151 ; GCN-NOT: {{[^@]}}bfe
|
|
152 ; GCN: s_endpgm
|
252
|
153 define amdgpu_kernel void @bfe_i32_test_13(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
|
154 %x = load i32, ptr addrspace(1) %in, align 4
|
150
|
155 %shl = ashr i32 %x, 31
|
|
156 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 31, i32 1)
|
252
|
157 store i32 %bfe, ptr addrspace(1) %out, align 4 ret void
|
150
|
158 }
|
|
159
|
|
160 ; GCN-LABEL: {{^}}bfe_i32_test_14:
|
|
161 ; GCN-NOT: lshr
|
|
162 ; GCN-NOT: {{[^@]}}bfe
|
|
163 ; GCN: s_endpgm
|
252
|
164 define amdgpu_kernel void @bfe_i32_test_14(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
|
165 %x = load i32, ptr addrspace(1) %in, align 4
|
150
|
166 %shl = lshr i32 %x, 31
|
|
167 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 31, i32 1)
|
252
|
168 store i32 %bfe, ptr addrspace(1) %out, align 4 ret void
|
150
|
169 }
|
|
170
|
|
171 ; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_0:
|
|
172 ; GCN-NOT: {{[^@]}}bfe
|
|
173 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
|
|
174 ; GCN: buffer_store_dword [[VREG]],
|
|
175 ; GCN: s_endpgm
|
252
|
176 define amdgpu_kernel void @bfe_i32_constant_fold_test_0(ptr addrspace(1) %out) #0 {
|
150
|
177 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 0, i32 0, i32 0)
|
252
|
178 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
|
150
|
179 ret void
|
|
180 }
|
|
181
|
|
182 ; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_1:
|
|
183 ; GCN-NOT: {{[^@]}}bfe
|
|
184 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
|
|
185 ; GCN: buffer_store_dword [[VREG]],
|
|
186 ; GCN: s_endpgm
|
252
|
187 define amdgpu_kernel void @bfe_i32_constant_fold_test_1(ptr addrspace(1) %out) #0 {
|
150
|
188 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 12334, i32 0, i32 0)
|
252
|
189 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
|
150
|
190 ret void
|
|
191 }
|
|
192
|
|
193 ; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_2:
|
|
194 ; GCN-NOT: {{[^@]}}bfe
|
|
195 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
|
|
196 ; GCN: buffer_store_dword [[VREG]],
|
|
197 ; GCN: s_endpgm
|
252
|
198 define amdgpu_kernel void @bfe_i32_constant_fold_test_2(ptr addrspace(1) %out) #0 {
|
150
|
199 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 0, i32 0, i32 1)
|
252
|
200 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
|
150
|
201 ret void
|
|
202 }
|
|
203
|
|
204 ; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_3:
|
|
205 ; GCN-NOT: {{[^@]}}bfe
|
|
206 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
|
|
207 ; GCN: buffer_store_dword [[VREG]],
|
|
208 ; GCN: s_endpgm
|
252
|
209 define amdgpu_kernel void @bfe_i32_constant_fold_test_3(ptr addrspace(1) %out) #0 {
|
150
|
210 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 1, i32 0, i32 1)
|
252
|
211 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
|
150
|
212 ret void
|
|
213 }
|
|
214
|
|
215 ; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_4:
|
|
216 ; GCN-NOT: {{[^@]}}bfe
|
|
217 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
|
|
218 ; GCN: buffer_store_dword [[VREG]],
|
|
219 ; GCN: s_endpgm
|
252
|
220 define amdgpu_kernel void @bfe_i32_constant_fold_test_4(ptr addrspace(1) %out) #0 {
|
150
|
221 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 4294967295, i32 0, i32 1)
|
252
|
222 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
|
150
|
223 ret void
|
|
224 }
|
|
225
|
|
226 ; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_5:
|
|
227 ; GCN-NOT: {{[^@]}}bfe
|
|
228 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
|
|
229 ; GCN: buffer_store_dword [[VREG]],
|
|
230 ; GCN: s_endpgm
|
252
|
231 define amdgpu_kernel void @bfe_i32_constant_fold_test_5(ptr addrspace(1) %out) #0 {
|
150
|
232 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 128, i32 7, i32 1)
|
252
|
233 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
|
150
|
234 ret void
|
|
235 }
|
|
236
|
|
237 ; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_6:
|
|
238 ; GCN-NOT: {{[^@]}}bfe
|
|
239 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0xffffff80
|
|
240 ; GCN: buffer_store_dword [[VREG]],
|
|
241 ; GCN: s_endpgm
|
252
|
242 define amdgpu_kernel void @bfe_i32_constant_fold_test_6(ptr addrspace(1) %out) #0 {
|
150
|
243 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 128, i32 0, i32 8)
|
252
|
244 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
|
150
|
245 ret void
|
|
246 }
|
|
247
|
|
248 ; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_7:
|
|
249 ; GCN-NOT: {{[^@]}}bfe
|
|
250 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
|
|
251 ; GCN: buffer_store_dword [[VREG]],
|
|
252 ; GCN: s_endpgm
|
252
|
253 define amdgpu_kernel void @bfe_i32_constant_fold_test_7(ptr addrspace(1) %out) #0 {
|
150
|
254 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 127, i32 0, i32 8)
|
252
|
255 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
|
150
|
256 ret void
|
|
257 }
|
|
258
|
|
259 ; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_8:
|
|
260 ; GCN-NOT: {{[^@]}}bfe
|
|
261 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
|
|
262 ; GCN: buffer_store_dword [[VREG]],
|
|
263 ; GCN: s_endpgm
|
252
|
264 define amdgpu_kernel void @bfe_i32_constant_fold_test_8(ptr addrspace(1) %out) #0 {
|
150
|
265 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 127, i32 6, i32 8)
|
252
|
266 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
|
150
|
267 ret void
|
|
268 }
|
|
269
|
|
270 ; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_9:
|
|
271 ; GCN-NOT: {{[^@]}}bfe
|
|
272 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
|
|
273 ; GCN: buffer_store_dword [[VREG]],
|
|
274 ; GCN: s_endpgm
|
252
|
275 define amdgpu_kernel void @bfe_i32_constant_fold_test_9(ptr addrspace(1) %out) #0 {
|
150
|
276 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 65536, i32 16, i32 8)
|
252
|
277 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
|
150
|
278 ret void
|
|
279 }
|
|
280
|
|
281 ; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_10:
|
|
282 ; GCN-NOT: {{[^@]}}bfe
|
|
283 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
|
|
284 ; GCN: buffer_store_dword [[VREG]],
|
|
285 ; GCN: s_endpgm
|
252
|
286 define amdgpu_kernel void @bfe_i32_constant_fold_test_10(ptr addrspace(1) %out) #0 {
|
150
|
287 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 65535, i32 16, i32 16)
|
252
|
288 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
|
150
|
289 ret void
|
|
290 }
|
|
291
|
|
292 ; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_11:
|
|
293 ; GCN-NOT: {{[^@]}}bfe
|
|
294 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], -6
|
|
295 ; GCN: buffer_store_dword [[VREG]],
|
|
296 ; GCN: s_endpgm
|
252
|
297 define amdgpu_kernel void @bfe_i32_constant_fold_test_11(ptr addrspace(1) %out) #0 {
|
150
|
298 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 160, i32 4, i32 4)
|
252
|
299 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
|
150
|
300 ret void
|
|
301 }
|
|
302
|
|
303 ; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_12:
|
|
304 ; GCN-NOT: {{[^@]}}bfe
|
|
305 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
|
|
306 ; GCN: buffer_store_dword [[VREG]],
|
|
307 ; GCN: s_endpgm
|
252
|
308 define amdgpu_kernel void @bfe_i32_constant_fold_test_12(ptr addrspace(1) %out) #0 {
|
150
|
309 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 160, i32 31, i32 1)
|
252
|
310 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
|
150
|
311 ret void
|
|
312 }
|
|
313
|
|
314 ; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_13:
|
|
315 ; GCN-NOT: {{[^@]}}bfe
|
|
316 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
|
|
317 ; GCN: buffer_store_dword [[VREG]],
|
|
318 ; GCN: s_endpgm
|
252
|
319 define amdgpu_kernel void @bfe_i32_constant_fold_test_13(ptr addrspace(1) %out) #0 {
|
150
|
320 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 131070, i32 16, i32 16)
|
252
|
321 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
|
150
|
322 ret void
|
|
323 }
|
|
324
|
|
325 ; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_14:
|
|
326 ; GCN-NOT: {{[^@]}}bfe
|
|
327 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 40
|
|
328 ; GCN: buffer_store_dword [[VREG]],
|
|
329 ; GCN: s_endpgm
|
252
|
330 define amdgpu_kernel void @bfe_i32_constant_fold_test_14(ptr addrspace(1) %out) #0 {
|
150
|
331 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 160, i32 2, i32 30)
|
252
|
332 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
|
150
|
333 ret void
|
|
334 }
|
|
335
|
|
336 ; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_15:
|
|
337 ; GCN-NOT: {{[^@]}}bfe
|
|
338 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 10
|
|
339 ; GCN: buffer_store_dword [[VREG]],
|
|
340 ; GCN: s_endpgm
|
252
|
341 define amdgpu_kernel void @bfe_i32_constant_fold_test_15(ptr addrspace(1) %out) #0 {
|
150
|
342 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 160, i32 4, i32 28)
|
252
|
343 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
|
150
|
344 ret void
|
|
345 }
|
|
346
|
|
347 ; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_16:
|
|
348 ; GCN-NOT: {{[^@]}}bfe
|
|
349 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
|
|
350 ; GCN: buffer_store_dword [[VREG]],
|
|
351 ; GCN: s_endpgm
|
252
|
352 define amdgpu_kernel void @bfe_i32_constant_fold_test_16(ptr addrspace(1) %out) #0 {
|
150
|
353 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 4294967295, i32 1, i32 7)
|
252
|
354 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
|
150
|
355 ret void
|
|
356 }
|
|
357
|
|
358 ; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_17:
|
|
359 ; GCN-NOT: {{[^@]}}bfe
|
|
360 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
|
|
361 ; GCN: buffer_store_dword [[VREG]],
|
|
362 ; GCN: s_endpgm
|
252
|
363 define amdgpu_kernel void @bfe_i32_constant_fold_test_17(ptr addrspace(1) %out) #0 {
|
150
|
364 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 255, i32 1, i32 31)
|
252
|
365 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
|
150
|
366 ret void
|
|
367 }
|
|
368
|
|
369 ; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_18:
|
|
370 ; GCN-NOT: {{[^@]}}bfe
|
|
371 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
|
|
372 ; GCN: buffer_store_dword [[VREG]],
|
|
373 ; GCN: s_endpgm
|
252
|
374 define amdgpu_kernel void @bfe_i32_constant_fold_test_18(ptr addrspace(1) %out) #0 {
|
150
|
375 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 255, i32 31, i32 1)
|
252
|
376 store i32 %bfe_i32, ptr addrspace(1) %out, align 4
|
150
|
377 ret void
|
|
378 }
|
|
379
|
|
380 ; GCN-LABEL: {{^}}bfe_sext_in_reg_i24:
|
|
381 ; GCN: buffer_load_dword [[LOAD:v[0-9]+]],
|
|
382 ; GCN-NOT: v_lshl
|
|
383 ; GCN-NOT: v_ashr
|
|
384 ; GCN: v_bfe_i32 [[BFE:v[0-9]+]], [[LOAD]], 0, 24
|
|
385 ; GCN: buffer_store_dword [[BFE]],
|
252
|
386 define amdgpu_kernel void @bfe_sext_in_reg_i24(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
|
387 %x = load i32, ptr addrspace(1) %in, align 4
|
150
|
388 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 0, i32 24)
|
|
389 %shl = shl i32 %bfe, 8
|
|
390 %ashr = ashr i32 %shl, 8
|
252
|
391 store i32 %ashr, ptr addrspace(1) %out, align 4
|
150
|
392 ret void
|
|
393 }
|
|
394
|
|
395 ; GCN-LABEL: @simplify_demanded_bfe_sdiv
|
|
396 ; GCN: buffer_load_dword [[LOAD:v[0-9]+]]
|
|
397 ; GCN: v_bfe_i32 [[BFE:v[0-9]+]], [[LOAD]], 1, 16
|
|
398 ; GCN: v_lshrrev_b32_e32 [[TMP0:v[0-9]+]], 31, [[BFE]]
|
252
|
399 ; GCN: v_add_{{[iu]}}32_e32 [[TMP1:v[0-9]+]], vcc, [[BFE]], [[TMP0]]
|
150
|
400 ; GCN: v_ashrrev_i32_e32 [[TMP2:v[0-9]+]], 1, [[TMP1]]
|
|
401 ; GCN: buffer_store_dword [[TMP2]]
|
252
|
402 define amdgpu_kernel void @simplify_demanded_bfe_sdiv(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
|
403 %src = load i32, ptr addrspace(1) %in, align 4
|
150
|
404 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %src, i32 1, i32 16)
|
|
405 %div = sdiv i32 %bfe, 2
|
252
|
406 store i32 %div, ptr addrspace(1) %out, align 4
|
150
|
407 ret void
|
|
408 }
|
|
409
|
|
410 ; GCN-LABEL: {{^}}bfe_0_width:
|
|
411 ; GCN-NOT: {{[^@]}}bfe
|
|
412 ; GCN: s_endpgm
|
252
|
413 define amdgpu_kernel void @bfe_0_width(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
|
|
414 %load = load i32, ptr addrspace(1) %ptr, align 4
|
150
|
415 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 8, i32 0)
|
252
|
416 store i32 %bfe, ptr addrspace(1) %out, align 4
|
150
|
417 ret void
|
|
418 }
|
|
419
|
|
420 ; GCN-LABEL: {{^}}bfe_8_bfe_8:
|
|
421 ; GCN: v_bfe_i32
|
|
422 ; GCN-NOT: {{[^@]}}bfe
|
|
423 ; GCN: s_endpgm
|
252
|
424 define amdgpu_kernel void @bfe_8_bfe_8(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
|
|
425 %load = load i32, ptr addrspace(1) %ptr, align 4
|
150
|
426 %bfe0 = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 0, i32 8)
|
|
427 %bfe1 = call i32 @llvm.amdgcn.sbfe.i32(i32 %bfe0, i32 0, i32 8)
|
252
|
428 store i32 %bfe1, ptr addrspace(1) %out, align 4
|
150
|
429 ret void
|
|
430 }
|
|
431
|
|
432 ; GCN-LABEL: {{^}}bfe_8_bfe_16:
|
|
433 ; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
|
|
434 ; GCN: s_endpgm
|
252
|
435 define amdgpu_kernel void @bfe_8_bfe_16(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
|
|
436 %load = load i32, ptr addrspace(1) %ptr, align 4
|
150
|
437 %bfe0 = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 0, i32 8)
|
|
438 %bfe1 = call i32 @llvm.amdgcn.sbfe.i32(i32 %bfe0, i32 0, i32 16)
|
252
|
439 store i32 %bfe1, ptr addrspace(1) %out, align 4
|
150
|
440 ret void
|
|
441 }
|
|
442
|
|
443 ; This really should be folded into 1
|
|
444 ; GCN-LABEL: {{^}}bfe_16_bfe_8:
|
|
445 ; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
|
|
446 ; GCN-NOT: {{[^@]}}bfe
|
|
447 ; GCN: s_endpgm
|
252
|
448 define amdgpu_kernel void @bfe_16_bfe_8(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
|
|
449 %load = load i32, ptr addrspace(1) %ptr, align 4
|
150
|
450 %bfe0 = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 0, i32 16)
|
|
451 %bfe1 = call i32 @llvm.amdgcn.sbfe.i32(i32 %bfe0, i32 0, i32 8)
|
252
|
452 store i32 %bfe1, ptr addrspace(1) %out, align 4
|
150
|
453 ret void
|
|
454 }
|
|
455
|
|
456 ; Make sure there isn't a redundant BFE
|
|
457 ; GCN-LABEL: {{^}}sext_in_reg_i8_to_i32_bfe:
|
|
458 ; GCN: s_sext_i32_i8 s{{[0-9]+}}, s{{[0-9]+}}
|
|
459 ; GCN-NOT: {{[^@]}}bfe
|
|
460 ; GCN: s_endpgm
|
252
|
461 define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
|
150
|
462 %c = add i32 %a, %b ; add to prevent folding into extload
|
|
463 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %c, i32 0, i32 8)
|
|
464 %shl = shl i32 %bfe, 24
|
|
465 %ashr = ashr i32 %shl, 24
|
252
|
466 store i32 %ashr, ptr addrspace(1) %out, align 4
|
150
|
467 ret void
|
|
468 }
|
|
469
|
|
470 ; GCN-LABEL: {{^}}sext_in_reg_i8_to_i32_bfe_wrong:
|
252
|
471 define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe_wrong(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
|
150
|
472 %c = add i32 %a, %b ; add to prevent folding into extload
|
|
473 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %c, i32 8, i32 0)
|
|
474 %shl = shl i32 %bfe, 24
|
|
475 %ashr = ashr i32 %shl, 24
|
252
|
476 store i32 %ashr, ptr addrspace(1) %out, align 4
|
150
|
477 ret void
|
|
478 }
|
|
479
|
|
480 ; GCN-LABEL: {{^}}sextload_i8_to_i32_bfe:
|
|
481 ; GCN: buffer_load_sbyte
|
|
482 ; GCN-NOT: {{[^@]}}bfe
|
|
483 ; GCN: s_endpgm
|
252
|
484 define amdgpu_kernel void @sextload_i8_to_i32_bfe(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
|
|
485 %load = load i8, ptr addrspace(1) %ptr, align 1
|
150
|
486 %sext = sext i8 %load to i32
|
|
487 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %sext, i32 0, i32 8)
|
|
488 %shl = shl i32 %bfe, 24
|
|
489 %ashr = ashr i32 %shl, 24
|
252
|
490 store i32 %ashr, ptr addrspace(1) %out, align 4
|
150
|
491 ret void
|
|
492 }
|
|
493
|
|
494 ; GCN: .text
|
|
495 ; GCN-LABEL: {{^}}sextload_i8_to_i32_bfe_0:{{.*$}}
|
|
496 ; GCN-NOT: {{[^@]}}bfe
|
|
497 ; GCN: s_endpgm
|
252
|
498 define amdgpu_kernel void @sextload_i8_to_i32_bfe_0(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
|
|
499 %load = load i8, ptr addrspace(1) %ptr, align 1
|
150
|
500 %sext = sext i8 %load to i32
|
|
501 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %sext, i32 8, i32 0)
|
|
502 %shl = shl i32 %bfe, 24
|
|
503 %ashr = ashr i32 %shl, 24
|
252
|
504 store i32 %ashr, ptr addrspace(1) %out, align 4
|
150
|
505 ret void
|
|
506 }
|
|
507
|
|
508 ; GCN-LABEL: {{^}}sext_in_reg_i1_bfe_offset_0:
|
|
509 ; GCN-NOT: shr
|
|
510 ; GCN-NOT: shl
|
|
511 ; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1
|
|
512 ; GCN: s_endpgm
|
252
|
513 define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_0(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
|
514 %x = load i32, ptr addrspace(1) %in, align 4
|
150
|
515 %shl = shl i32 %x, 31
|
|
516 %shr = ashr i32 %shl, 31
|
|
517 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shr, i32 0, i32 1)
|
252
|
518 store i32 %bfe, ptr addrspace(1) %out, align 4
|
150
|
519 ret void
|
|
520 }
|
|
521
|
|
522 ; GCN-LABEL: {{^}}sext_in_reg_i1_bfe_offset_1:
|
|
523 ; GCN: buffer_load_dword
|
|
524 ; GCN-NOT: shl
|
|
525 ; GCN-NOT: shr
|
|
526 ; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 1
|
|
527 ; GCN: s_endpgm
|
252
|
528 define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
|
529 %x = load i32, ptr addrspace(1) %in, align 4
|
150
|
530 %shl = shl i32 %x, 30
|
|
531 %shr = ashr i32 %shl, 30
|
|
532 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shr, i32 1, i32 1)
|
252
|
533 store i32 %bfe, ptr addrspace(1) %out, align 4
|
150
|
534 ret void
|
|
535 }
|
|
536
|
|
537 ; GCN-LABEL: {{^}}sext_in_reg_i2_bfe_offset_1:
|
|
538 ; GCN: buffer_load_dword
|
|
539 ; GCN-NOT: v_lshl
|
|
540 ; GCN-NOT: v_ashr
|
|
541 ; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 2
|
|
542 ; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 2
|
|
543 ; GCN: s_endpgm
|
252
|
544 define amdgpu_kernel void @sext_in_reg_i2_bfe_offset_1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
|
545 %x = load i32, ptr addrspace(1) %in, align 4
|
150
|
546 %shl = shl i32 %x, 30
|
|
547 %shr = ashr i32 %shl, 30
|
|
548 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shr, i32 1, i32 2)
|
252
|
549 store i32 %bfe, ptr addrspace(1) %out, align 4
|
150
|
550 ret void
|
|
551 }
|
|
552
|
|
553 declare i32 @llvm.amdgcn.sbfe.i32(i32, i32, i32) #1
|
|
554
|
|
555 attributes #0 = { nounwind }
|
|
556 attributes #1 = { nounwind readnone }
|