150
|
1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
|
|
2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
|
|
3
|
|
4 ; GCN-LABEL: {{^}}v_ubfe_sub_i32:
|
|
5 ; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]]
|
|
6 ; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]]
|
|
7 ; GCN: v_bfe_u32 v{{[0-9]+}}, [[SRC]], 0, [[WIDTH]]
|
|
8 define amdgpu_kernel void @v_ubfe_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
|
|
9 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
10 %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x
|
|
11 %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x
|
|
12 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
|
|
13 %src = load volatile i32, i32 addrspace(1)* %in0.gep
|
|
14 %width = load volatile i32, i32 addrspace(1)* %in0.gep
|
|
15 %sub = sub i32 32, %width
|
|
16 %shl = shl i32 %src, %sub
|
|
17 %bfe = lshr i32 %shl, %sub
|
|
18 store i32 %bfe, i32 addrspace(1)* %out.gep
|
|
19 ret void
|
|
20 }
|
|
21
|
|
22 ; GCN-LABEL: {{^}}v_ubfe_sub_multi_use_shl_i32:
|
|
23 ; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]]
|
|
24 ; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]]
|
|
25 ; GCN: v_sub_{{[iu]}}32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]]
|
|
26
|
|
27 ; SI-NEXT: v_lshl_b32_e32 [[SHL:v[0-9]+]], [[SRC]], [[SUB]]
|
|
28 ; SI-NEXT: v_lshr_b32_e32 [[BFE:v[0-9]+]], [[SHL]], [[SUB]]
|
|
29
|
|
30 ; VI-NEXT: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], [[SUB]], [[SRC]]
|
|
31 ; VI-NEXT: v_lshrrev_b32_e32 [[BFE:v[0-9]+]], [[SUB]], [[SHL]]
|
|
32
|
|
33 ; GCN: [[BFE]]
|
|
34 ; GCN: [[SHL]]
|
|
35 define amdgpu_kernel void @v_ubfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
|
|
36 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
37 %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x
|
|
38 %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x
|
|
39 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
|
|
40 %src = load volatile i32, i32 addrspace(1)* %in0.gep
|
|
41 %width = load volatile i32, i32 addrspace(1)* %in0.gep
|
|
42 %sub = sub i32 32, %width
|
|
43 %shl = shl i32 %src, %sub
|
|
44 %bfe = lshr i32 %shl, %sub
|
|
45 store i32 %bfe, i32 addrspace(1)* %out.gep
|
|
46 store volatile i32 %shl, i32 addrspace(1)* undef
|
|
47 ret void
|
|
48 }
|
|
49
|
|
50 ; GCN-LABEL: {{^}}s_ubfe_sub_i32:
|
|
51 ; GCN: s_load_dwordx2 s{{\[}}[[SRC:[0-9]+]]:[[WIDTH:[0-9]+]]{{\]}}, s[0:1], {{0xb|0x2c}}
|
|
52 ; GCN: v_mov_b32_e32 [[VWIDTH:v[0-9]+]], s[[WIDTH]]
|
|
53 ; GCN: v_bfe_u32 v{{[0-9]+}}, s[[SRC]], 0, [[VWIDTH]]
|
|
54 define amdgpu_kernel void @s_ubfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
|
|
55 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
56 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
|
|
57 %sub = sub i32 32, %width
|
|
58 %shl = shl i32 %src, %sub
|
|
59 %bfe = lshr i32 %shl, %sub
|
|
60 store i32 %bfe, i32 addrspace(1)* %out.gep
|
|
61 ret void
|
|
62 }
|
|
63
|
|
64 ; GCN-LABEL: {{^}}s_ubfe_sub_multi_use_shl_i32:
|
|
65 ; GCN: s_load_dwordx2 s{{\[}}[[SRC:[0-9]+]]:[[WIDTH:[0-9]+]]{{\]}}, s[0:1], {{0xb|0x2c}}
|
|
66 ; GCN: s_sub_i32 [[SUB:s[0-9]+]], 32, s[[WIDTH]]
|
|
67 ; GCN: s_lshl_b32 [[SHL:s[0-9]+]], s[[SRC]], [[SUB]]
|
|
68 ; GCN: s_lshr_b32 s{{[0-9]+}}, [[SHL]], [[SUB]]
|
|
69 define amdgpu_kernel void @s_ubfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
|
|
70 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
71 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
|
|
72 %sub = sub i32 32, %width
|
|
73 %shl = shl i32 %src, %sub
|
|
74 %bfe = lshr i32 %shl, %sub
|
|
75 store i32 %bfe, i32 addrspace(1)* %out.gep
|
|
76 store volatile i32 %shl, i32 addrspace(1)* undef
|
|
77 ret void
|
|
78 }
|
|
79
|
|
80 ; GCN-LABEL: {{^}}v_sbfe_sub_i32:
|
|
81 ; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]]
|
|
82 ; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]]
|
|
83 ; GCN: v_bfe_i32 v{{[0-9]+}}, [[SRC]], 0, [[WIDTH]]
|
|
84 define amdgpu_kernel void @v_sbfe_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
|
|
85 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
86 %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x
|
|
87 %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x
|
|
88 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
|
|
89 %src = load volatile i32, i32 addrspace(1)* %in0.gep
|
|
90 %width = load volatile i32, i32 addrspace(1)* %in0.gep
|
|
91 %sub = sub i32 32, %width
|
|
92 %shl = shl i32 %src, %sub
|
|
93 %bfe = ashr i32 %shl, %sub
|
|
94 store i32 %bfe, i32 addrspace(1)* %out.gep
|
|
95 ret void
|
|
96 }
|
|
97
|
|
98 ; GCN-LABEL: {{^}}v_sbfe_sub_multi_use_shl_i32:
|
|
99 ; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]]
|
|
100 ; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]]
|
|
101 ; GCN: v_sub_{{[iu]}}32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]]
|
|
102
|
|
103 ; SI-NEXT: v_lshl_b32_e32 [[SHL:v[0-9]+]], [[SRC]], [[SUB]]
|
|
104 ; SI-NEXT: v_ashr_i32_e32 [[BFE:v[0-9]+]], [[SHL]], [[SUB]]
|
|
105
|
|
106 ; VI-NEXT: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], [[SUB]], [[SRC]]
|
|
107 ; VI-NEXT: v_ashrrev_i32_e32 [[BFE:v[0-9]+]], [[SUB]], [[SHL]]
|
|
108
|
|
109 ; GCN: [[BFE]]
|
|
110 ; GCN: [[SHL]]
|
|
111 define amdgpu_kernel void @v_sbfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
|
|
112 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
113 %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x
|
|
114 %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x
|
|
115 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
|
|
116 %src = load volatile i32, i32 addrspace(1)* %in0.gep
|
|
117 %width = load volatile i32, i32 addrspace(1)* %in0.gep
|
|
118 %sub = sub i32 32, %width
|
|
119 %shl = shl i32 %src, %sub
|
|
120 %bfe = ashr i32 %shl, %sub
|
|
121 store i32 %bfe, i32 addrspace(1)* %out.gep
|
|
122 store volatile i32 %shl, i32 addrspace(1)* undef
|
|
123 ret void
|
|
124 }
|
|
125
|
|
126 ; GCN-LABEL: {{^}}s_sbfe_sub_i32:
|
|
127 ; GCN: s_load_dwordx2 s{{\[}}[[SRC:[0-9]+]]:[[WIDTH:[0-9]+]]{{\]}}, s[0:1], {{0xb|0x2c}}
|
|
128 ; GCN: v_mov_b32_e32 [[VWIDTH:v[0-9]+]], s[[WIDTH]]
|
|
129 ; GCN: v_bfe_i32 v{{[0-9]+}}, s[[SRC]], 0, [[VWIDTH]]
|
|
130 define amdgpu_kernel void @s_sbfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
|
|
131 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
132 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
|
|
133 %sub = sub i32 32, %width
|
|
134 %shl = shl i32 %src, %sub
|
|
135 %bfe = ashr i32 %shl, %sub
|
|
136 store i32 %bfe, i32 addrspace(1)* %out.gep
|
|
137 ret void
|
|
138 }
|
|
139
|
|
140 ; GCN-LABEL: {{^}}s_sbfe_sub_multi_use_shl_i32:
|
|
141 ; GCN: s_load_dwordx2 s{{\[}}[[SRC:[0-9]+]]:[[WIDTH:[0-9]+]]{{\]}}, s[0:1], {{0xb|0x2c}}
|
|
142 ; GCN: s_sub_i32 [[SUB:s[0-9]+]], 32, s[[WIDTH]]
|
|
143 ; GCN: s_lshl_b32 [[SHL:s[0-9]+]], s[[SRC]], [[SUB]]
|
|
144 ; GCN: s_ashr_i32 s{{[0-9]+}}, [[SHL]], [[SUB]]
|
|
145 define amdgpu_kernel void @s_sbfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
|
|
146 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
147 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
|
|
148 %sub = sub i32 32, %width
|
|
149 %shl = shl i32 %src, %sub
|
|
150 %bfe = ashr i32 %shl, %sub
|
|
151 store i32 %bfe, i32 addrspace(1)* %out.gep
|
|
152 store volatile i32 %shl, i32 addrspace(1)* undef
|
|
153 ret void
|
|
154 }
|
|
155
|
|
156 declare i32 @llvm.amdgcn.workitem.id.x() #0
|
|
157
|
|
158 attributes #0 = { nounwind readnone }
|
|
159 attributes #1 = { nounwind }
|