comparison llvm/test/CodeGen/AMDGPU/bfe_uint.ll @ 252:1f2b6ac9f198 llvm-original

LLVM16-1
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Fri, 18 Aug 2023 09:04:13 +0900
parents 1d019706d866
children
comparison
equal deleted inserted replaced
237:c80f45b162ad 252:1f2b6ac9f198
1 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s 1 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
2 2
3 ; CHECK: {{^}}bfe_def: 3 ; CHECK: {{^}}bfe_def:
4 ; CHECK: BFE_UINT 4 ; CHECK: BFE_UINT
5 define amdgpu_kernel void @bfe_def(i32 addrspace(1)* %out, i32 %x) { 5 define amdgpu_kernel void @bfe_def(ptr addrspace(1) %out, i32 %x) {
6 entry: 6 entry:
7 %0 = lshr i32 %x, 5 7 %0 = lshr i32 %x, 5
8 %1 = and i32 %0, 15 ; 0xf 8 %1 = and i32 %0, 15 ; 0xf
9 store i32 %1, i32 addrspace(1)* %out 9 store i32 %1, ptr addrspace(1) %out
10 ret void 10 ret void
11 } 11 }
12 12
13 ; This program could be implemented using a BFE_UINT instruction, however 13 ; This program could be implemented using a BFE_UINT instruction, however
14 ; since the lshr constant + number of bits in the mask is >= 32, it can also be 14 ; since the lshr constant + number of bits in the mask is >= 32, it can also be
15 ; implmented with a LSHR instruction, which is better, because LSHR has less 15 ; implmented with a LSHR instruction, which is better, because LSHR has less
16 ; operands and requires less constants. 16 ; operands and requires less constants.
17 17
18 ; CHECK: {{^}}bfe_shift: 18 ; CHECK: {{^}}bfe_shift:
19 ; CHECK-NOT: BFE_UINT 19 ; CHECK-NOT: BFE_UINT
20 define amdgpu_kernel void @bfe_shift(i32 addrspace(1)* %out, i32 %x) { 20 define amdgpu_kernel void @bfe_shift(ptr addrspace(1) %out, i32 %x) {
21 entry: 21 entry:
22 %0 = lshr i32 %x, 16 22 %0 = lshr i32 %x, 16
23 %1 = and i32 %0, 65535 ; 0xffff 23 %1 = and i32 %0, 65535 ; 0xffff
24 store i32 %1, i32 addrspace(1)* %out 24 store i32 %1, ptr addrspace(1) %out
25 ret void 25 ret void
26 } 26 }