Mercurial > hg > CbC > CbC_llvm
comparison llvm/test/CodeGen/AMDGPU/shl.v2i16.ll @ 150:1d019706d866
LLVM10
author | anatofuz |
---|---|
date | Thu, 13 Feb 2020 15:10:13 +0900 |
parents | |
children | 0572611fdcc8 |
comparison
equal
deleted
inserted
replaced
147:c2174574ed3a | 150:1d019706d866 |
---|---|
1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | |
2 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s | |
3 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,VI %s | |
4 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,CI %s | |
5 | |
6 define amdgpu_kernel void @s_shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %lhs, <2 x i16> %rhs) #0 { | |
7 ; GFX9-LABEL: s_shl_v2i16: | |
8 ; GFX9: ; %bb.0: | |
9 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 | |
10 ; GFX9-NEXT: s_load_dword s2, s[0:1], 0x2c | |
11 ; GFX9-NEXT: s_load_dword s0, s[0:1], 0x30 | |
12 ; GFX9-NEXT: s_mov_b32 s7, 0xf000 | |
13 ; GFX9-NEXT: s_mov_b32 s6, -1 | |
14 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) | |
15 ; GFX9-NEXT: v_mov_b32_e32 v0, s2 | |
16 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, s0, v0 | |
17 ; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0 | |
18 ; GFX9-NEXT: s_endpgm | |
19 ; | |
20 ; VI-LABEL: s_shl_v2i16: | |
21 ; VI: ; %bb.0: | |
22 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 | |
23 ; VI-NEXT: s_load_dword s2, s[0:1], 0x2c | |
24 ; VI-NEXT: s_load_dword s0, s[0:1], 0x30 | |
25 ; VI-NEXT: s_mov_b32 s3, 0xffff | |
26 ; VI-NEXT: s_mov_b32 s7, 0xf000 | |
27 ; VI-NEXT: s_mov_b32 s6, -1 | |
28 ; VI-NEXT: s_waitcnt lgkmcnt(0) | |
29 ; VI-NEXT: s_lshr_b32 s1, s2, 16 | |
30 ; VI-NEXT: s_lshr_b32 s8, s0, 16 | |
31 ; VI-NEXT: s_and_b32 s2, s2, s3 | |
32 ; VI-NEXT: s_and_b32 s0, s0, s3 | |
33 ; VI-NEXT: s_lshl_b32 s0, s2, s0 | |
34 ; VI-NEXT: s_lshl_b32 s1, s1, s8 | |
35 ; VI-NEXT: s_lshl_b32 s1, s1, 16 | |
36 ; VI-NEXT: s_and_b32 s0, s0, s3 | |
37 ; VI-NEXT: s_or_b32 s0, s0, s1 | |
38 ; VI-NEXT: v_mov_b32_e32 v0, s0 | |
39 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 | |
40 ; VI-NEXT: s_endpgm | |
41 ; | |
42 ; CI-LABEL: s_shl_v2i16: | |
43 ; CI: ; %bb.0: | |
44 ; CI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 | |
45 ; CI-NEXT: s_load_dword s2, s[0:1], 0xb | |
46 ; CI-NEXT: s_load_dword s0, s[0:1], 0xc | |
47 ; CI-NEXT: s_mov_b32 s3, 0xffff | |
48 ; CI-NEXT: s_mov_b32 s7, 0xf000 | |
49 ; CI-NEXT: s_mov_b32 s6, -1 | |
50 ; CI-NEXT: s_waitcnt lgkmcnt(0) | |
51 ; CI-NEXT: s_lshr_b32 s1, s2, 16 | |
52 ; CI-NEXT: s_and_b32 s8, s0, s3 | |
53 ; CI-NEXT: s_lshr_b32 s0, s0, 16 | |
54 ; CI-NEXT: s_lshl_b32 s0, s1, s0 | |
55 ; CI-NEXT: s_lshl_b32 s1, s2, s8 | |
56 ; CI-NEXT: s_lshl_b32 s0, s0, 16 | |
57 ; CI-NEXT: s_and_b32 s1, s1, s3 | |
58 ; CI-NEXT: s_or_b32 s0, s1, s0 | |
59 ; CI-NEXT: v_mov_b32_e32 v0, s0 | |
60 ; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0 | |
61 ; CI-NEXT: s_endpgm | |
62 %result = shl <2 x i16> %lhs, %rhs | |
63 store <2 x i16> %result, <2 x i16> addrspace(1)* %out | |
64 ret void | |
65 } | |
66 | |
67 define amdgpu_kernel void @v_shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { | |
68 ; GFX9-LABEL: v_shl_v2i16: | |
69 ; GFX9: ; %bb.0: | |
70 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 | |
71 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 2, v0 | |
72 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) | |
73 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 | |
74 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s2, v2 | |
75 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc | |
76 ; GFX9-NEXT: global_load_dword v3, v[0:1], off | |
77 ; GFX9-NEXT: global_load_dword v4, v[0:1], off offset:4 | |
78 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s0, v2 | |
79 ; GFX9-NEXT: v_mov_b32_e32 v1, s1 | |
80 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc | |
81 ; GFX9-NEXT: s_waitcnt vmcnt(0) | |
82 ; GFX9-NEXT: v_pk_lshlrev_b16 v2, v4, v3 | |
83 ; GFX9-NEXT: global_store_dword v[0:1], v2, off | |
84 ; GFX9-NEXT: s_endpgm | |
85 ; | |
86 ; VI-LABEL: v_shl_v2i16: | |
87 ; VI: ; %bb.0: | |
88 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 | |
89 ; VI-NEXT: v_lshlrev_b32_e32 v4, 2, v0 | |
90 ; VI-NEXT: s_waitcnt lgkmcnt(0) | |
91 ; VI-NEXT: v_mov_b32_e32 v1, s3 | |
92 ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v4 | |
93 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc | |
94 ; VI-NEXT: v_add_u32_e32 v2, vcc, 4, v0 | |
95 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v1, vcc | |
96 ; VI-NEXT: flat_load_dword v5, v[0:1] | |
97 ; VI-NEXT: flat_load_dword v2, v[2:3] | |
98 ; VI-NEXT: v_mov_b32_e32 v1, s1 | |
99 ; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v4 | |
100 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc | |
101 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) | |
102 ; VI-NEXT: v_lshlrev_b16_e32 v3, v2, v5 | |
103 ; VI-NEXT: v_lshlrev_b16_sdwa v2, v2, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 | |
104 ; VI-NEXT: v_or_b32_e32 v2, v3, v2 | |
105 ; VI-NEXT: flat_store_dword v[0:1], v2 | |
106 ; VI-NEXT: s_endpgm | |
107 ; | |
108 ; CI-LABEL: v_shl_v2i16: | |
109 ; CI: ; %bb.0: | |
110 ; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 | |
111 ; CI-NEXT: s_mov_b32 s7, 0xf000 | |
112 ; CI-NEXT: s_mov_b32 s6, 0 | |
113 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 | |
114 ; CI-NEXT: v_mov_b32_e32 v1, 0 | |
115 ; CI-NEXT: s_waitcnt lgkmcnt(0) | |
116 ; CI-NEXT: s_mov_b64 s[4:5], s[2:3] | |
117 ; CI-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 | |
118 ; CI-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 offset:4 | |
119 ; CI-NEXT: s_mov_b32 s8, 0xffff | |
120 ; CI-NEXT: s_mov_b64 s[2:3], s[6:7] | |
121 ; CI-NEXT: s_waitcnt vmcnt(1) | |
122 ; CI-NEXT: v_lshrrev_b32_e32 v4, 16, v2 | |
123 ; CI-NEXT: s_waitcnt vmcnt(0) | |
124 ; CI-NEXT: v_and_b32_e32 v5, s8, v3 | |
125 ; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v3 | |
126 ; CI-NEXT: v_lshl_b32_e32 v3, v4, v3 | |
127 ; CI-NEXT: v_lshl_b32_e32 v2, v2, v5 | |
128 ; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 | |
129 ; CI-NEXT: v_and_b32_e32 v2, s8, v2 | |
130 ; CI-NEXT: v_or_b32_e32 v2, v2, v3 | |
131 ; CI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 | |
132 ; CI-NEXT: s_endpgm | |
133 %tid = call i32 @llvm.amdgcn.workitem.id.x() | |
134 %tid.ext = sext i32 %tid to i64 | |
135 %in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext | |
136 %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext | |
137 %b_ptr = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %in.gep, i32 1 | |
138 %a = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep | |
139 %b = load <2 x i16>, <2 x i16> addrspace(1)* %b_ptr | |
140 %result = shl <2 x i16> %a, %b | |
141 store <2 x i16> %result, <2 x i16> addrspace(1)* %out.gep | |
142 ret void | |
143 } | |
144 | |
145 define amdgpu_kernel void @shl_v_s_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in, <2 x i16> %sgpr) #0 { | |
146 ; GFX9-LABEL: shl_v_s_v2i16: | |
147 ; GFX9: ; %bb.0: | |
148 ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 | |
149 ; GFX9-NEXT: s_load_dword s0, s[0:1], 0x34 | |
150 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 2, v0 | |
151 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) | |
152 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 | |
153 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s6, v2 | |
154 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc | |
155 ; GFX9-NEXT: global_load_dword v3, v[0:1], off | |
156 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s4, v2 | |
157 ; GFX9-NEXT: v_mov_b32_e32 v1, s5 | |
158 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc | |
159 ; GFX9-NEXT: s_waitcnt vmcnt(0) | |
160 ; GFX9-NEXT: v_pk_lshlrev_b16 v2, s0, v3 | |
161 ; GFX9-NEXT: global_store_dword v[0:1], v2, off | |
162 ; GFX9-NEXT: s_endpgm | |
163 ; | |
164 ; VI-LABEL: shl_v_s_v2i16: | |
165 ; VI: ; %bb.0: | |
166 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 | |
167 ; VI-NEXT: s_load_dword s0, s[0:1], 0x34 | |
168 ; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0 | |
169 ; VI-NEXT: s_waitcnt lgkmcnt(0) | |
170 ; VI-NEXT: v_mov_b32_e32 v1, s7 | |
171 ; VI-NEXT: v_add_u32_e32 v0, vcc, s6, v2 | |
172 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc | |
173 ; VI-NEXT: flat_load_dword v3, v[0:1] | |
174 ; VI-NEXT: s_lshr_b32 s1, s0, 16 | |
175 ; VI-NEXT: v_mov_b32_e32 v4, s1 | |
176 ; VI-NEXT: v_add_u32_e32 v0, vcc, s4, v2 | |
177 ; VI-NEXT: v_mov_b32_e32 v1, s5 | |
178 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc | |
179 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) | |
180 ; VI-NEXT: v_lshlrev_b16_e32 v2, s0, v3 | |
181 ; VI-NEXT: v_lshlrev_b16_sdwa v3, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 | |
182 ; VI-NEXT: v_or_b32_e32 v2, v2, v3 | |
183 ; VI-NEXT: flat_store_dword v[0:1], v2 | |
184 ; VI-NEXT: s_endpgm | |
185 ; | |
186 ; CI-LABEL: shl_v_s_v2i16: | |
187 ; CI: ; %bb.0: | |
188 ; CI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 | |
189 ; CI-NEXT: s_load_dword s8, s[0:1], 0xd | |
190 ; CI-NEXT: s_mov_b32 s3, 0xf000 | |
191 ; CI-NEXT: s_mov_b32 s2, 0 | |
192 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 | |
193 ; CI-NEXT: s_waitcnt lgkmcnt(0) | |
194 ; CI-NEXT: s_mov_b64 s[0:1], s[6:7] | |
195 ; CI-NEXT: v_mov_b32_e32 v1, 0 | |
196 ; CI-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 | |
197 ; CI-NEXT: s_mov_b32 s9, 0xffff | |
198 ; CI-NEXT: s_lshr_b32 s10, s8, 16 | |
199 ; CI-NEXT: s_and_b32 s8, s8, s9 | |
200 ; CI-NEXT: s_mov_b64 s[6:7], s[2:3] | |
201 ; CI-NEXT: s_waitcnt vmcnt(0) | |
202 ; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v2 | |
203 ; CI-NEXT: v_lshlrev_b32_e32 v2, s8, v2 | |
204 ; CI-NEXT: v_lshlrev_b32_e32 v3, s10, v3 | |
205 ; CI-NEXT: v_and_b32_e32 v2, s9, v2 | |
206 ; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 | |
207 ; CI-NEXT: v_or_b32_e32 v2, v2, v3 | |
208 ; CI-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64 | |
209 ; CI-NEXT: s_endpgm | |
210 %tid = call i32 @llvm.amdgcn.workitem.id.x() | |
211 %tid.ext = sext i32 %tid to i64 | |
212 %in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext | |
213 %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext | |
214 %vgpr = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep | |
215 %result = shl <2 x i16> %vgpr, %sgpr | |
216 store <2 x i16> %result, <2 x i16> addrspace(1)* %out.gep | |
217 ret void | |
218 } | |
219 | |
220 define amdgpu_kernel void @shl_s_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in, <2 x i16> %sgpr) #0 { | |
221 ; GFX9-LABEL: shl_s_v_v2i16: | |
222 ; GFX9: ; %bb.0: | |
223 ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 | |
224 ; GFX9-NEXT: s_load_dword s0, s[0:1], 0x34 | |
225 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 2, v0 | |
226 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) | |
227 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 | |
228 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s6, v2 | |
229 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc | |
230 ; GFX9-NEXT: global_load_dword v3, v[0:1], off | |
231 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s4, v2 | |
232 ; GFX9-NEXT: v_mov_b32_e32 v1, s5 | |
233 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc | |
234 ; GFX9-NEXT: s_waitcnt vmcnt(0) | |
235 ; GFX9-NEXT: v_pk_lshlrev_b16 v2, v3, s0 | |
236 ; GFX9-NEXT: global_store_dword v[0:1], v2, off | |
237 ; GFX9-NEXT: s_endpgm | |
238 ; | |
239 ; VI-LABEL: shl_s_v_v2i16: | |
240 ; VI: ; %bb.0: | |
241 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 | |
242 ; VI-NEXT: s_load_dword s0, s[0:1], 0x34 | |
243 ; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0 | |
244 ; VI-NEXT: s_waitcnt lgkmcnt(0) | |
245 ; VI-NEXT: v_mov_b32_e32 v1, s7 | |
246 ; VI-NEXT: v_add_u32_e32 v0, vcc, s6, v2 | |
247 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc | |
248 ; VI-NEXT: flat_load_dword v3, v[0:1] | |
249 ; VI-NEXT: s_lshr_b32 s1, s0, 16 | |
250 ; VI-NEXT: v_mov_b32_e32 v4, s1 | |
251 ; VI-NEXT: v_add_u32_e32 v0, vcc, s4, v2 | |
252 ; VI-NEXT: v_mov_b32_e32 v1, s5 | |
253 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc | |
254 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) | |
255 ; VI-NEXT: v_lshlrev_b16_e64 v2, v3, s0 | |
256 ; VI-NEXT: v_lshlrev_b16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD | |
257 ; VI-NEXT: v_or_b32_e32 v2, v2, v3 | |
258 ; VI-NEXT: flat_store_dword v[0:1], v2 | |
259 ; VI-NEXT: s_endpgm | |
260 ; | |
261 ; CI-LABEL: shl_s_v_v2i16: | |
262 ; CI: ; %bb.0: | |
263 ; CI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 | |
264 ; CI-NEXT: s_load_dword s8, s[0:1], 0xd | |
265 ; CI-NEXT: s_mov_b32 s3, 0xf000 | |
266 ; CI-NEXT: s_mov_b32 s2, 0 | |
267 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 | |
268 ; CI-NEXT: s_waitcnt lgkmcnt(0) | |
269 ; CI-NEXT: s_mov_b64 s[0:1], s[6:7] | |
270 ; CI-NEXT: v_mov_b32_e32 v1, 0 | |
271 ; CI-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 | |
272 ; CI-NEXT: s_mov_b32 s0, 0xffff | |
273 ; CI-NEXT: s_lshr_b32 s1, s8, 16 | |
274 ; CI-NEXT: s_mov_b64 s[6:7], s[2:3] | |
275 ; CI-NEXT: s_waitcnt vmcnt(0) | |
276 ; CI-NEXT: v_and_b32_e32 v3, s0, v2 | |
277 ; CI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 | |
278 ; CI-NEXT: v_lshl_b32_e32 v2, s1, v2 | |
279 ; CI-NEXT: v_lshl_b32_e32 v3, s8, v3 | |
280 ; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 | |
281 ; CI-NEXT: v_and_b32_e32 v3, s0, v3 | |
282 ; CI-NEXT: v_or_b32_e32 v2, v3, v2 | |
283 ; CI-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64 | |
284 ; CI-NEXT: s_endpgm | |
285 %tid = call i32 @llvm.amdgcn.workitem.id.x() | |
286 %tid.ext = sext i32 %tid to i64 | |
287 %in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext | |
288 %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext | |
289 %vgpr = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep | |
290 %result = shl <2 x i16> %sgpr, %vgpr | |
291 store <2 x i16> %result, <2 x i16> addrspace(1)* %out.gep | |
292 ret void | |
293 } | |
294 | |
295 define amdgpu_kernel void @shl_imm_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { | |
296 ; GFX9-LABEL: shl_imm_v_v2i16: | |
297 ; GFX9: ; %bb.0: | |
298 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 | |
299 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 2, v0 | |
300 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) | |
301 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 | |
302 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s2, v2 | |
303 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc | |
304 ; GFX9-NEXT: global_load_dword v3, v[0:1], off | |
305 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s0, v2 | |
306 ; GFX9-NEXT: v_mov_b32_e32 v1, s1 | |
307 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc | |
308 ; GFX9-NEXT: s_waitcnt vmcnt(0) | |
309 ; GFX9-NEXT: v_pk_lshlrev_b16 v2, v3, 8 op_sel_hi:[1,0] | |
310 ; GFX9-NEXT: global_store_dword v[0:1], v2, off | |
311 ; GFX9-NEXT: s_endpgm | |
312 ; | |
313 ; VI-LABEL: shl_imm_v_v2i16: | |
314 ; VI: ; %bb.0: | |
315 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 | |
316 ; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0 | |
317 ; VI-NEXT: v_mov_b32_e32 v3, 8 | |
318 ; VI-NEXT: s_waitcnt lgkmcnt(0) | |
319 ; VI-NEXT: v_mov_b32_e32 v1, s3 | |
320 ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v2 | |
321 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc | |
322 ; VI-NEXT: flat_load_dword v4, v[0:1] | |
323 ; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v2 | |
324 ; VI-NEXT: v_mov_b32_e32 v1, s1 | |
325 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc | |
326 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) | |
327 ; VI-NEXT: v_lshlrev_b16_e64 v2, v4, 8 | |
328 ; VI-NEXT: v_lshlrev_b16_sdwa v3, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD | |
329 ; VI-NEXT: v_or_b32_e32 v2, v2, v3 | |
330 ; VI-NEXT: flat_store_dword v[0:1], v2 | |
331 ; VI-NEXT: s_endpgm | |
332 ; | |
333 ; CI-LABEL: shl_imm_v_v2i16: | |
334 ; CI: ; %bb.0: | |
335 ; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 | |
336 ; CI-NEXT: s_mov_b32 s7, 0xf000 | |
337 ; CI-NEXT: s_mov_b32 s6, 0 | |
338 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 | |
339 ; CI-NEXT: v_mov_b32_e32 v1, 0 | |
340 ; CI-NEXT: s_waitcnt lgkmcnt(0) | |
341 ; CI-NEXT: s_mov_b64 s[4:5], s[2:3] | |
342 ; CI-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 | |
343 ; CI-NEXT: s_mov_b64 s[2:3], s[6:7] | |
344 ; CI-NEXT: s_waitcnt vmcnt(0) | |
345 ; CI-NEXT: v_and_b32_e32 v3, 0xffff, v2 | |
346 ; CI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 | |
347 ; CI-NEXT: v_lshl_b32_e32 v2, 8, v2 | |
348 ; CI-NEXT: v_lshl_b32_e32 v3, 8, v3 | |
349 ; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 | |
350 ; CI-NEXT: v_and_b32_e32 v3, 0xfff8, v3 | |
351 ; CI-NEXT: v_or_b32_e32 v2, v3, v2 | |
352 ; CI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 | |
353 ; CI-NEXT: s_endpgm | |
354 %tid = call i32 @llvm.amdgcn.workitem.id.x() | |
355 %tid.ext = sext i32 %tid to i64 | |
356 %in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext | |
357 %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext | |
358 %vgpr = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep | |
359 %result = shl <2 x i16> <i16 8, i16 8>, %vgpr | |
360 store <2 x i16> %result, <2 x i16> addrspace(1)* %out.gep | |
361 ret void | |
362 } | |
363 | |
364 define amdgpu_kernel void @shl_v_imm_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { | |
365 ; GFX9-LABEL: shl_v_imm_v2i16: | |
366 ; GFX9: ; %bb.0: | |
367 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 | |
368 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 2, v0 | |
369 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) | |
370 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 | |
371 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s2, v2 | |
372 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc | |
373 ; GFX9-NEXT: global_load_dword v3, v[0:1], off | |
374 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s0, v2 | |
375 ; GFX9-NEXT: v_mov_b32_e32 v1, s1 | |
376 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc | |
377 ; GFX9-NEXT: s_waitcnt vmcnt(0) | |
378 ; GFX9-NEXT: v_pk_lshlrev_b16 v2, 8, v3 op_sel_hi:[0,1] | |
379 ; GFX9-NEXT: global_store_dword v[0:1], v2, off | |
380 ; GFX9-NEXT: s_endpgm | |
381 ; | |
382 ; VI-LABEL: shl_v_imm_v2i16: | |
383 ; VI: ; %bb.0: | |
384 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 | |
385 ; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0 | |
386 ; VI-NEXT: s_waitcnt lgkmcnt(0) | |
387 ; VI-NEXT: v_mov_b32_e32 v1, s3 | |
388 ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v2 | |
389 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc | |
390 ; VI-NEXT: flat_load_dword v3, v[0:1] | |
391 ; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v2 | |
392 ; VI-NEXT: v_mov_b32_e32 v1, s1 | |
393 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc | |
394 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) | |
395 ; VI-NEXT: v_lshlrev_b32_e32 v2, 8, v3 | |
396 ; VI-NEXT: v_and_b32_e32 v2, 0xff000000, v2 | |
397 ; VI-NEXT: v_lshlrev_b16_e32 v3, 8, v3 | |
398 ; VI-NEXT: v_or_b32_e32 v2, v3, v2 | |
399 ; VI-NEXT: flat_store_dword v[0:1], v2 | |
400 ; VI-NEXT: s_endpgm | |
401 ; | |
402 ; CI-LABEL: shl_v_imm_v2i16: | |
403 ; CI: ; %bb.0: | |
404 ; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 | |
405 ; CI-NEXT: s_mov_b32 s7, 0xf000 | |
406 ; CI-NEXT: s_mov_b32 s6, 0 | |
407 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 | |
408 ; CI-NEXT: v_mov_b32_e32 v1, 0 | |
409 ; CI-NEXT: s_waitcnt lgkmcnt(0) | |
410 ; CI-NEXT: s_mov_b64 s[4:5], s[2:3] | |
411 ; CI-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 | |
412 ; CI-NEXT: s_mov_b64 s[2:3], s[6:7] | |
413 ; CI-NEXT: s_waitcnt vmcnt(0) | |
414 ; CI-NEXT: v_lshlrev_b32_e32 v2, 8, v2 | |
415 ; CI-NEXT: v_and_b32_e32 v2, 0xff00ff00, v2 | |
416 ; CI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 | |
417 ; CI-NEXT: s_endpgm | |
418 %tid = call i32 @llvm.amdgcn.workitem.id.x() | |
419 %tid.ext = sext i32 %tid to i64 | |
420 %in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext | |
421 %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext | |
422 %vgpr = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep | |
423 %result = shl <2 x i16> %vgpr, <i16 8, i16 8> | |
424 store <2 x i16> %result, <2 x i16> addrspace(1)* %out.gep | |
425 ret void | |
426 } | |
427 | |
428 define amdgpu_kernel void @v_shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 { | |
429 ; GFX9-LABEL: v_shl_v4i16: | |
430 ; GFX9: ; %bb.0: | |
431 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 | |
432 ; GFX9-NEXT: v_lshlrev_b32_e32 v4, 3, v0 | |
433 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) | |
434 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 | |
435 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s2, v4 | |
436 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc | |
437 ; GFX9-NEXT: global_load_dwordx2 v[2:3], v[0:1], off | |
438 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off offset:8 | |
439 ; GFX9-NEXT: v_mov_b32_e32 v5, s1 | |
440 ; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, s0, v4 | |
441 ; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc | |
442 ; GFX9-NEXT: s_waitcnt vmcnt(0) | |
443 ; GFX9-NEXT: v_pk_lshlrev_b16 v1, v1, v3 | |
444 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, v0, v2 | |
445 ; GFX9-NEXT: global_store_dwordx2 v[4:5], v[0:1], off | |
446 ; GFX9-NEXT: s_endpgm | |
447 ; | |
448 ; VI-LABEL: v_shl_v4i16: | |
449 ; VI: ; %bb.0: | |
450 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 | |
451 ; VI-NEXT: v_lshlrev_b32_e32 v4, 3, v0 | |
452 ; VI-NEXT: s_waitcnt lgkmcnt(0) | |
453 ; VI-NEXT: v_mov_b32_e32 v1, s3 | |
454 ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v4 | |
455 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc | |
456 ; VI-NEXT: v_add_u32_e32 v2, vcc, 8, v0 | |
457 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v1, vcc | |
458 ; VI-NEXT: flat_load_dwordx2 v[0:1], v[0:1] | |
459 ; VI-NEXT: flat_load_dwordx2 v[2:3], v[2:3] | |
460 ; VI-NEXT: v_mov_b32_e32 v5, s1 | |
461 ; VI-NEXT: v_add_u32_e32 v4, vcc, s0, v4 | |
462 ; VI-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc | |
463 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) | |
464 ; VI-NEXT: v_lshlrev_b16_e32 v6, v3, v1 | |
465 ; VI-NEXT: v_lshlrev_b16_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 | |
466 ; VI-NEXT: v_lshlrev_b16_e32 v3, v2, v0 | |
467 ; VI-NEXT: v_lshlrev_b16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 | |
468 ; VI-NEXT: v_or_b32_e32 v1, v6, v1 | |
469 ; VI-NEXT: v_or_b32_e32 v0, v3, v0 | |
470 ; VI-NEXT: flat_store_dwordx2 v[4:5], v[0:1] | |
471 ; VI-NEXT: s_endpgm | |
472 ; | |
473 ; CI-LABEL: v_shl_v4i16: | |
474 ; CI: ; %bb.0: | |
475 ; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 | |
476 ; CI-NEXT: s_mov_b32 s7, 0xf000 | |
477 ; CI-NEXT: s_mov_b32 s6, 0 | |
478 ; CI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 | |
479 ; CI-NEXT: v_mov_b32_e32 v1, 0 | |
480 ; CI-NEXT: s_waitcnt lgkmcnt(0) | |
481 ; CI-NEXT: s_mov_b64 s[4:5], s[2:3] | |
482 ; CI-NEXT: buffer_load_dwordx2 v[2:3], v[0:1], s[4:7], 0 addr64 | |
483 ; CI-NEXT: buffer_load_dwordx2 v[4:5], v[0:1], s[4:7], 0 addr64 offset:8 | |
484 ; CI-NEXT: s_mov_b32 s8, 0xffff | |
485 ; CI-NEXT: s_mov_b64 s[2:3], s[6:7] | |
486 ; CI-NEXT: s_waitcnt vmcnt(1) | |
487 ; CI-NEXT: v_lshrrev_b32_e32 v6, 16, v2 | |
488 ; CI-NEXT: s_waitcnt vmcnt(0) | |
489 ; CI-NEXT: v_and_b32_e32 v8, s8, v4 | |
490 ; CI-NEXT: v_lshrrev_b32_e32 v4, 16, v4 | |
491 ; CI-NEXT: v_and_b32_e32 v9, s8, v5 | |
492 ; CI-NEXT: v_lshrrev_b32_e32 v7, 16, v3 | |
493 ; CI-NEXT: v_lshrrev_b32_e32 v5, 16, v5 | |
494 ; CI-NEXT: v_lshl_b32_e32 v5, v7, v5 | |
495 ; CI-NEXT: v_lshl_b32_e32 v3, v3, v9 | |
496 ; CI-NEXT: v_lshl_b32_e32 v4, v6, v4 | |
497 ; CI-NEXT: v_lshl_b32_e32 v2, v2, v8 | |
498 ; CI-NEXT: v_lshlrev_b32_e32 v5, 16, v5 | |
499 ; CI-NEXT: v_and_b32_e32 v3, s8, v3 | |
500 ; CI-NEXT: v_lshlrev_b32_e32 v4, 16, v4 | |
501 ; CI-NEXT: v_and_b32_e32 v2, s8, v2 | |
502 ; CI-NEXT: v_or_b32_e32 v3, v3, v5 | |
503 ; CI-NEXT: v_or_b32_e32 v2, v2, v4 | |
504 ; CI-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 | |
505 ; CI-NEXT: s_endpgm | |
506 %tid = call i32 @llvm.amdgcn.workitem.id.x() | |
507 %tid.ext = sext i32 %tid to i64 | |
508 %in.gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %in, i64 %tid.ext | |
509 %out.gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %out, i64 %tid.ext | |
510 %b_ptr = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %in.gep, i32 1 | |
511 %a = load <4 x i16>, <4 x i16> addrspace(1)* %in.gep | |
512 %b = load <4 x i16>, <4 x i16> addrspace(1)* %b_ptr | |
513 %result = shl <4 x i16> %a, %b | |
514 store <4 x i16> %result, <4 x i16> addrspace(1)* %out.gep | |
515 ret void | |
516 } | |
517 | |
518 define amdgpu_kernel void @shl_v_imm_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 { | |
519 ; GFX9-LABEL: shl_v_imm_v4i16: | |
520 ; GFX9: ; %bb.0: | |
521 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 | |
522 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 3, v0 | |
523 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) | |
524 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 | |
525 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s2, v2 | |
526 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc | |
527 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off | |
528 ; GFX9-NEXT: v_mov_b32_e32 v3, s1 | |
529 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s0, v2 | |
530 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc | |
531 ; GFX9-NEXT: s_waitcnt vmcnt(0) | |
532 ; GFX9-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1] | |
533 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1] | |
534 ; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off | |
535 ; GFX9-NEXT: s_endpgm | |
536 ; | |
537 ; VI-LABEL: shl_v_imm_v4i16: | |
538 ; VI: ; %bb.0: | |
539 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 | |
540 ; VI-NEXT: v_lshlrev_b32_e32 v2, 3, v0 | |
541 ; VI-NEXT: s_mov_b32 s4, 0xff000000 | |
542 ; VI-NEXT: s_waitcnt lgkmcnt(0) | |
543 ; VI-NEXT: v_mov_b32_e32 v1, s3 | |
544 ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v2 | |
545 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc | |
546 ; VI-NEXT: flat_load_dwordx2 v[0:1], v[0:1] | |
547 ; VI-NEXT: v_mov_b32_e32 v3, s1 | |
548 ; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 | |
549 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc | |
550 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) | |
551 ; VI-NEXT: v_lshlrev_b32_e32 v4, 8, v1 | |
552 ; VI-NEXT: v_lshlrev_b16_e32 v5, 8, v0 | |
553 ; VI-NEXT: v_lshlrev_b32_e32 v0, 8, v0 | |
554 ; VI-NEXT: v_and_b32_e32 v0, s4, v0 | |
555 ; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v1 | |
556 ; VI-NEXT: v_and_b32_e32 v4, s4, v4 | |
557 ; VI-NEXT: v_or_b32_e32 v1, v1, v4 | |
558 ; VI-NEXT: v_or_b32_e32 v0, v5, v0 | |
559 ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] | |
560 ; VI-NEXT: s_endpgm | |
561 ; | |
562 ; CI-LABEL: shl_v_imm_v4i16: | |
563 ; CI: ; %bb.0: | |
564 ; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 | |
565 ; CI-NEXT: s_mov_b32 s7, 0xf000 | |
566 ; CI-NEXT: s_mov_b32 s6, 0 | |
567 ; CI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 | |
568 ; CI-NEXT: v_mov_b32_e32 v1, 0 | |
569 ; CI-NEXT: s_waitcnt lgkmcnt(0) | |
570 ; CI-NEXT: s_mov_b64 s[4:5], s[2:3] | |
571 ; CI-NEXT: buffer_load_dwordx2 v[2:3], v[0:1], s[4:7], 0 addr64 | |
572 ; CI-NEXT: s_mov_b32 s8, 0xff00 | |
573 ; CI-NEXT: s_mov_b64 s[2:3], s[6:7] | |
574 ; CI-NEXT: s_waitcnt vmcnt(0) | |
575 ; CI-NEXT: v_lshrrev_b32_e32 v4, 8, v3 | |
576 ; CI-NEXT: v_lshlrev_b32_e32 v3, 8, v3 | |
577 ; CI-NEXT: v_and_b32_e32 v4, s8, v4 | |
578 ; CI-NEXT: v_lshlrev_b32_e32 v2, 8, v2 | |
579 ; CI-NEXT: v_and_b32_e32 v3, s8, v3 | |
580 ; CI-NEXT: v_lshlrev_b32_e32 v4, 16, v4 | |
581 ; CI-NEXT: v_or_b32_e32 v3, v3, v4 | |
582 ; CI-NEXT: v_and_b32_e32 v2, 0xff00ff00, v2 | |
583 ; CI-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 | |
584 ; CI-NEXT: s_endpgm | |
585 %tid = call i32 @llvm.amdgcn.workitem.id.x() | |
586 %tid.ext = sext i32 %tid to i64 | |
587 %in.gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %in, i64 %tid.ext | |
588 %out.gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %out, i64 %tid.ext | |
589 %vgpr = load <4 x i16>, <4 x i16> addrspace(1)* %in.gep | |
590 %result = shl <4 x i16> %vgpr, <i16 8, i16 8, i16 8, i16 8> | |
591 store <4 x i16> %result, <4 x i16> addrspace(1)* %out.gep | |
592 ret void | |
593 } | |
594 | |
595 declare i32 @llvm.amdgcn.workitem.id.x() #1 | |
596 | |
597 attributes #0 = { nounwind } | |
598 attributes #1 = { nounwind readnone } |