120
|
1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI,FUNC %s
|
|
2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,FUNC %s
|
121
|
3 ; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
120
|
4
|
|
5 ; FUNC-LABEL: {{^}}local_load_i16:
|
|
6 ; GCN: ds_read_u16 v{{[0-9]+}}
|
|
7
|
|
8 ; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
|
|
9 ; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
|
|
10 ; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
|
|
11 ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
|
|
12 ; EG: LDS_SHORT_WRITE {{\*?}} [[TO]], [[DATA]]
|
121
|
13 define amdgpu_kernel void @local_load_i16(i16 addrspace(3)* %out, i16 addrspace(3)* %in) {
|
120
|
14 entry:
|
|
15 %ld = load i16, i16 addrspace(3)* %in
|
|
16 store i16 %ld, i16 addrspace(3)* %out
|
|
17 ret void
|
|
18 }
|
|
19
|
|
20 ; FUNC-LABEL: {{^}}local_load_v2i16:
|
|
21 ; GCN: ds_read_b32
|
|
22
|
|
23 ; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
|
|
24 ; EG: LDS_READ_RET {{.*}} [[FROM]]
|
|
25 ; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
|
|
26 ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
|
|
27 ; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
|
121
|
28 define amdgpu_kernel void @local_load_v2i16(<2 x i16> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) {
|
120
|
29 entry:
|
|
30 %ld = load <2 x i16>, <2 x i16> addrspace(3)* %in
|
|
31 store <2 x i16> %ld, <2 x i16> addrspace(3)* %out
|
|
32 ret void
|
|
33 }
|
|
34
|
|
35 ; FUNC-LABEL: {{^}}local_load_v3i16:
|
|
36 ; GCN: ds_read_b64
|
|
37 ; GCN-DAG: ds_write_b32
|
|
38 ; GCN-DAG: ds_write_b16
|
|
39
|
|
40 ; EG-DAG: LDS_USHORT_READ_RET
|
|
41 ; EG-DAG: LDS_READ_RET
|
121
|
42 define amdgpu_kernel void @local_load_v3i16(<3 x i16> addrspace(3)* %out, <3 x i16> addrspace(3)* %in) {
|
120
|
43 entry:
|
|
44 %ld = load <3 x i16>, <3 x i16> addrspace(3)* %in
|
|
45 store <3 x i16> %ld, <3 x i16> addrspace(3)* %out
|
|
46 ret void
|
|
47 }
|
|
48
|
|
49 ; FUNC-LABEL: {{^}}local_load_v4i16:
|
|
50 ; GCN: ds_read_b64
|
|
51
|
|
52 ; EG: LDS_READ_RET
|
|
53 ; EG: LDS_READ_RET
|
121
|
54 define amdgpu_kernel void @local_load_v4i16(<4 x i16> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) {
|
120
|
55 entry:
|
|
56 %ld = load <4 x i16>, <4 x i16> addrspace(3)* %in
|
|
57 store <4 x i16> %ld, <4 x i16> addrspace(3)* %out
|
|
58 ret void
|
|
59 }
|
|
60
|
|
61 ; FUNC-LABEL: {{^}}local_load_v8i16:
|
|
62 ; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
|
|
63
|
|
64 ; EG: LDS_READ_RET
|
|
65 ; EG: LDS_READ_RET
|
|
66 ; EG: LDS_READ_RET
|
|
67 ; EG: LDS_READ_RET
|
121
|
68 define amdgpu_kernel void @local_load_v8i16(<8 x i16> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) {
|
120
|
69 entry:
|
|
70 %ld = load <8 x i16>, <8 x i16> addrspace(3)* %in
|
|
71 store <8 x i16> %ld, <8 x i16> addrspace(3)* %out
|
|
72 ret void
|
|
73 }
|
|
74
|
|
75 ; FUNC-LABEL: {{^}}local_load_v16i16:
|
|
76 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:3{{$}}
|
|
77 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:1 offset1:2{{$}}
|
|
78
|
|
79
|
|
80 ; EG: LDS_READ_RET
|
|
81 ; EG: LDS_READ_RET
|
|
82 ; EG: LDS_READ_RET
|
|
83 ; EG: LDS_READ_RET
|
|
84
|
|
85 ; EG: LDS_READ_RET
|
|
86 ; EG: LDS_READ_RET
|
|
87 ; EG: LDS_READ_RET
|
|
88 ; EG: LDS_READ_RET
|
121
|
89 define amdgpu_kernel void @local_load_v16i16(<16 x i16> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) {
|
120
|
90 entry:
|
|
91 %ld = load <16 x i16>, <16 x i16> addrspace(3)* %in
|
|
92 store <16 x i16> %ld, <16 x i16> addrspace(3)* %out
|
|
93 ret void
|
|
94 }
|
|
95
|
|
96 ; FUNC-LABEL: {{^}}local_zextload_i16_to_i32:
|
|
97 ; GCN: ds_read_u16
|
|
98 ; GCN: ds_write_b32
|
|
99
|
|
100 ; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
|
|
101 ; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
|
|
102 ; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
|
|
103 ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
|
|
104 ; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
|
121
|
105 define amdgpu_kernel void @local_zextload_i16_to_i32(i32 addrspace(3)* %out, i16 addrspace(3)* %in) #0 {
|
120
|
106 %a = load i16, i16 addrspace(3)* %in
|
|
107 %ext = zext i16 %a to i32
|
|
108 store i32 %ext, i32 addrspace(3)* %out
|
|
109 ret void
|
|
110 }
|
|
111
|
|
112 ; FUNC-LABEL: {{^}}local_sextload_i16_to_i32:
|
|
113 ; GCN-NOT: s_wqm_b64
|
|
114 ; GCN: s_mov_b32 m0
|
|
115 ; GCN: ds_read_i16
|
|
116
|
|
117 ; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
|
|
118 ; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
|
|
119 ; EG-DAG: MOV {{[* ]*}}[[TMP:T[0-9]+\.[XYZW]]], OQAP
|
|
120 ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
|
|
121 ; EG-DAG: BFE_INT {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], {{.*}}, 0.0, literal
|
|
122 ; EG: 16
|
|
123 ; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
|
121
|
124 define amdgpu_kernel void @local_sextload_i16_to_i32(i32 addrspace(3)* %out, i16 addrspace(3)* %in) #0 {
|
120
|
125 %a = load i16, i16 addrspace(3)* %in
|
|
126 %ext = sext i16 %a to i32
|
|
127 store i32 %ext, i32 addrspace(3)* %out
|
|
128 ret void
|
|
129 }
|
|
130
|
|
131 ; FUNC-LABEL: {{^}}local_zextload_v1i16_to_v1i32:
|
|
132 ; GCN: ds_read_u16
|
|
133
|
|
134 ; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
|
|
135 ; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
|
|
136 ; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
|
|
137 ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
|
|
138 ; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
|
121
|
139 define amdgpu_kernel void @local_zextload_v1i16_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 {
|
120
|
140 %load = load <1 x i16>, <1 x i16> addrspace(3)* %in
|
|
141 %ext = zext <1 x i16> %load to <1 x i32>
|
|
142 store <1 x i32> %ext, <1 x i32> addrspace(3)* %out
|
|
143 ret void
|
|
144 }
|
|
145
|
|
146 ; FUNC-LABEL: {{^}}local_sextload_v1i16_to_v1i32:
|
|
147 ; GCN: ds_read_i16
|
|
148
|
|
149 ; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
|
|
150 ; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
|
|
151 ; EG-DAG: MOV {{[* ]*}}[[TMP:T[0-9]+\.[XYZW]]], OQAP
|
|
152 ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
|
|
153 ; EG-DAG: BFE_INT {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], {{.*}}, 0.0, literal
|
|
154 ; EG: 16
|
|
155 ; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
|
121
|
156 define amdgpu_kernel void @local_sextload_v1i16_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 {
|
120
|
157 %load = load <1 x i16>, <1 x i16> addrspace(3)* %in
|
|
158 %ext = sext <1 x i16> %load to <1 x i32>
|
|
159 store <1 x i32> %ext, <1 x i32> addrspace(3)* %out
|
|
160 ret void
|
|
161 }
|
|
162
|
|
163 ; FUNC-LABEL: {{^}}local_zextload_v2i16_to_v2i32:
|
|
164 ; GCN-NOT: s_wqm_b64
|
|
165 ; GCN: s_mov_b32 m0
|
|
166 ; GCN: ds_read_b32
|
|
167
|
|
168 ; EG: LDS_READ_RET
|
121
|
169 define amdgpu_kernel void @local_zextload_v2i16_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 {
|
120
|
170 %load = load <2 x i16>, <2 x i16> addrspace(3)* %in
|
|
171 %ext = zext <2 x i16> %load to <2 x i32>
|
|
172 store <2 x i32> %ext, <2 x i32> addrspace(3)* %out
|
|
173 ret void
|
|
174 }
|
|
175
|
|
176 ; FUNC-LABEL: {{^}}local_sextload_v2i16_to_v2i32:
|
|
177 ; GCN-NOT: s_wqm_b64
|
|
178 ; GCN: s_mov_b32 m0
|
|
179 ; GCN: ds_read_b32
|
|
180
|
|
181 ; EG: LDS_READ_RET
|
|
182 ; EG: BFE_INT
|
|
183 ; EG: BFE_INT
|
121
|
184 define amdgpu_kernel void @local_sextload_v2i16_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 {
|
120
|
185 %load = load <2 x i16>, <2 x i16> addrspace(3)* %in
|
|
186 %ext = sext <2 x i16> %load to <2 x i32>
|
|
187 store <2 x i32> %ext, <2 x i32> addrspace(3)* %out
|
|
188 ret void
|
|
189 }
|
|
190
|
|
191 ; FUNC-LABEL: {{^}}local_local_zextload_v3i16_to_v3i32:
|
|
192 ; GCN: ds_read_b64
|
|
193 ; GCN-DAG: ds_write_b32
|
|
194 ; GCN-DAG: ds_write_b64
|
|
195
|
|
196 ; EG: LDS_READ_RET
|
121
|
197 define amdgpu_kernel void @local_local_zextload_v3i16_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i16> addrspace(3)* %in) {
|
120
|
198 entry:
|
|
199 %ld = load <3 x i16>, <3 x i16> addrspace(3)* %in
|
|
200 %ext = zext <3 x i16> %ld to <3 x i32>
|
|
201 store <3 x i32> %ext, <3 x i32> addrspace(3)* %out
|
|
202 ret void
|
|
203 }
|
|
204
|
|
205 ; FUNC-LABEL: {{^}}local_local_sextload_v3i16_to_v3i32:
|
|
206 ; GCN: ds_read_b64
|
|
207 ; GCN-DAG: ds_write_b32
|
|
208 ; GCN-DAG: ds_write_b64
|
|
209
|
|
210 ; EG: LDS_READ_RET
|
|
211 ; EG-DAG: BFE_INT
|
|
212 ; EG-DAG: BFE_INT
|
|
213 ; EG-DAG: BFE_INT
|
121
|
214 define amdgpu_kernel void @local_local_sextload_v3i16_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i16> addrspace(3)* %in) {
|
120
|
215 entry:
|
|
216 %ld = load <3 x i16>, <3 x i16> addrspace(3)* %in
|
|
217 %ext = sext <3 x i16> %ld to <3 x i32>
|
|
218 store <3 x i32> %ext, <3 x i32> addrspace(3)* %out
|
|
219 ret void
|
|
220 }
|
|
221
|
|
222 ; FUNC-LABEL: {{^}}local_local_zextload_v4i16_to_v4i32:
|
|
223 ; GCN-NOT: s_wqm_b64
|
|
224 ; GCN: s_mov_b32 m0
|
|
225 ; GCN: ds_read_b64
|
|
226
|
|
227 ; EG: LDS_READ_RET
|
|
228 ; EG: LDS_READ_RET
|
121
|
229 define amdgpu_kernel void @local_local_zextload_v4i16_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 {
|
120
|
230 %load = load <4 x i16>, <4 x i16> addrspace(3)* %in
|
|
231 %ext = zext <4 x i16> %load to <4 x i32>
|
|
232 store <4 x i32> %ext, <4 x i32> addrspace(3)* %out
|
|
233 ret void
|
|
234 }
|
|
235
|
|
236 ; FUNC-LABEL: {{^}}local_sextload_v4i16_to_v4i32:
|
|
237 ; GCN-NOT: s_wqm_b64
|
|
238 ; GCN: s_mov_b32 m0
|
|
239 ; GCN: ds_read_b64
|
|
240
|
|
241 ; EG: LDS_READ_RET
|
|
242 ; EG: LDS_READ_RET
|
|
243 ; EG-DAG: BFE_INT
|
|
244 ; EG-DAG: BFE_INT
|
|
245 ; EG-DAG: BFE_INT
|
|
246 ; EG-DAG: BFE_INT
|
121
|
247 define amdgpu_kernel void @local_sextload_v4i16_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 {
|
120
|
248 %load = load <4 x i16>, <4 x i16> addrspace(3)* %in
|
|
249 %ext = sext <4 x i16> %load to <4 x i32>
|
|
250 store <4 x i32> %ext, <4 x i32> addrspace(3)* %out
|
|
251 ret void
|
|
252 }
|
|
253
|
|
254 ; FUNC-LABEL: {{^}}local_zextload_v8i16_to_v8i32:
|
|
255 ; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
|
|
256
|
|
257 ; EG: LDS_READ_RET
|
|
258 ; EG: LDS_READ_RET
|
|
259 ; EG: LDS_READ_RET
|
|
260 ; EG: LDS_READ_RET
|
121
|
261 define amdgpu_kernel void @local_zextload_v8i16_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 {
|
120
|
262 %load = load <8 x i16>, <8 x i16> addrspace(3)* %in
|
|
263 %ext = zext <8 x i16> %load to <8 x i32>
|
|
264 store <8 x i32> %ext, <8 x i32> addrspace(3)* %out
|
|
265 ret void
|
|
266 }
|
|
267
|
|
268 ; FUNC-LABEL: {{^}}local_sextload_v8i16_to_v8i32:
|
|
269 ; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
|
|
270
|
|
271 ; EG: LDS_READ_RET
|
|
272 ; EG: LDS_READ_RET
|
|
273 ; EG: LDS_READ_RET
|
|
274 ; EG: LDS_READ_RET
|
|
275 ; EG-DAG: BFE_INT
|
|
276 ; EG-DAG: BFE_INT
|
|
277 ; EG-DAG: BFE_INT
|
|
278 ; EG-DAG: BFE_INT
|
|
279 ; EG-DAG: BFE_INT
|
|
280 ; EG-DAG: BFE_INT
|
|
281 ; EG-DAG: BFE_INT
|
|
282 ; EG-DAG: BFE_INT
|
121
|
283 define amdgpu_kernel void @local_sextload_v8i16_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 {
|
120
|
284 %load = load <8 x i16>, <8 x i16> addrspace(3)* %in
|
|
285 %ext = sext <8 x i16> %load to <8 x i32>
|
|
286 store <8 x i32> %ext, <8 x i32> addrspace(3)* %out
|
|
287 ret void
|
|
288 }
|
|
289
|
|
290 ; FUNC-LABEL: {{^}}local_zextload_v16i16_to_v16i32:
|
|
291 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
|
|
292 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}}
|
|
293
|
|
294 ; GCN: ds_write2_b64
|
|
295 ; GCN: ds_write2_b64
|
|
296 ; GCN: ds_write2_b64
|
|
297 ; GCN: ds_write2_b64
|
|
298
|
|
299 ; EG: LDS_READ_RET
|
|
300 ; EG: LDS_READ_RET
|
|
301 ; EG: LDS_READ_RET
|
|
302 ; EG: LDS_READ_RET
|
|
303 ; EG: LDS_READ_RET
|
|
304 ; EG: LDS_READ_RET
|
|
305 ; EG: LDS_READ_RET
|
|
306 ; EG: LDS_READ_RET
|
121
|
307 define amdgpu_kernel void @local_zextload_v16i16_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 {
|
120
|
308 %load = load <16 x i16>, <16 x i16> addrspace(3)* %in
|
|
309 %ext = zext <16 x i16> %load to <16 x i32>
|
|
310 store <16 x i32> %ext, <16 x i32> addrspace(3)* %out
|
|
311 ret void
|
|
312 }
|
|
313
|
|
314 ; FUNC-LABEL: {{^}}local_sextload_v16i16_to_v16i32:
|
|
315
|
|
316 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
|
|
317 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}}
|
|
318
|
|
319 ; EG: LDS_READ_RET
|
|
320 ; EG: LDS_READ_RET
|
|
321 ; EG: LDS_READ_RET
|
|
322 ; EG: LDS_READ_RET
|
|
323 ; EG: LDS_READ_RET
|
|
324 ; EG: LDS_READ_RET
|
|
325 ; EG: LDS_READ_RET
|
|
326 ; EG: LDS_READ_RET
|
|
327 ; EG-DAG: BFE_INT
|
|
328 ; EG-DAG: BFE_INT
|
|
329 ; EG-DAG: BFE_INT
|
|
330 ; EG-DAG: BFE_INT
|
|
331 ; EG-DAG: BFE_INT
|
|
332 ; EG-DAG: BFE_INT
|
|
333 ; EG-DAG: BFE_INT
|
|
334 ; EG-DAG: BFE_INT
|
|
335 ; EG-DAG: BFE_INT
|
|
336 ; EG-DAG: BFE_INT
|
|
337 ; EG-DAG: BFE_INT
|
|
338 ; EG-DAG: BFE_INT
|
|
339 ; EG-DAG: BFE_INT
|
|
340 ; EG-DAG: BFE_INT
|
|
341 ; EG-DAG: BFE_INT
|
|
342 ; EG-DAG: BFE_INT
|
121
|
343 define amdgpu_kernel void @local_sextload_v16i16_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 {
|
120
|
344 %load = load <16 x i16>, <16 x i16> addrspace(3)* %in
|
|
345 %ext = sext <16 x i16> %load to <16 x i32>
|
|
346 store <16 x i32> %ext, <16 x i32> addrspace(3)* %out
|
|
347 ret void
|
|
348 }
|
|
349
|
|
350 ; FUNC-LABEL: {{^}}local_zextload_v32i16_to_v32i32:
|
|
351 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
|
|
352 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3
|
|
353 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:4 offset1:5
|
|
354 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:6 offset1:7
|
|
355
|
|
356 ; EG: LDS_READ_RET
|
|
357 ; EG: LDS_READ_RET
|
|
358 ; EG: LDS_READ_RET
|
|
359 ; EG: LDS_READ_RET
|
|
360 ; EG: LDS_READ_RET
|
|
361 ; EG: LDS_READ_RET
|
|
362 ; EG: LDS_READ_RET
|
|
363 ; EG: LDS_READ_RET
|
|
364 ; EG: LDS_READ_RET
|
|
365 ; EG: LDS_READ_RET
|
|
366 ; EG: LDS_READ_RET
|
|
367 ; EG: LDS_READ_RET
|
|
368 ; EG: LDS_READ_RET
|
|
369 ; EG: LDS_READ_RET
|
|
370 ; EG: LDS_READ_RET
|
|
371 ; EG: LDS_READ_RET
|
121
|
372 define amdgpu_kernel void @local_zextload_v32i16_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 {
|
120
|
373 %load = load <32 x i16>, <32 x i16> addrspace(3)* %in
|
|
374 %ext = zext <32 x i16> %load to <32 x i32>
|
|
375 store <32 x i32> %ext, <32 x i32> addrspace(3)* %out
|
|
376 ret void
|
|
377 }
|
|
378
|
|
379 ; FUNC-LABEL: {{^}}local_sextload_v32i16_to_v32i32:
|
|
380 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}}
|
|
381 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:4 offset1:5
|
|
382 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
|
|
383 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:6 offset1:7
|
|
384 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:14 offset1:15
|
|
385 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:12 offset1:13
|
|
386 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:10 offset1:11
|
|
387 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:8 offset1:9
|
|
388 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:6 offset1:7
|
|
389 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:4 offset1:5
|
|
390 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:2 offset1:3
|
|
391 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset1:1
|
|
392
|
|
393 ; EG: LDS_READ_RET
|
|
394 ; EG: LDS_READ_RET
|
|
395 ; EG: LDS_READ_RET
|
|
396 ; EG: LDS_READ_RET
|
|
397 ; EG: LDS_READ_RET
|
|
398 ; EG: LDS_READ_RET
|
|
399 ; EG: LDS_READ_RET
|
|
400 ; EG: LDS_READ_RET
|
|
401 ; EG: LDS_READ_RET
|
|
402 ; EG: LDS_READ_RET
|
|
403 ; EG: LDS_READ_RET
|
|
404 ; EG: LDS_READ_RET
|
|
405 ; EG: LDS_READ_RET
|
|
406 ; EG: LDS_READ_RET
|
|
407 ; EG: LDS_READ_RET
|
|
408 ; EG: LDS_READ_RET
|
121
|
409 define amdgpu_kernel void @local_sextload_v32i16_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 {
|
120
|
410 %load = load <32 x i16>, <32 x i16> addrspace(3)* %in
|
|
411 %ext = sext <32 x i16> %load to <32 x i32>
|
|
412 store <32 x i32> %ext, <32 x i32> addrspace(3)* %out
|
|
413 ret void
|
|
414 }
|
|
415
|
|
416 ; FUNC-LABEL: {{^}}local_zextload_v64i16_to_v64i32:
|
|
417 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:14 offset1:15
|
|
418 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
|
|
419 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3
|
|
420 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:4 offset1:5
|
|
421 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:6 offset1:7
|
|
422 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:8 offset1:9
|
|
423 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:12 offset1:13
|
|
424 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:10 offset1:11
|
|
425 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:30 offset1:31
|
|
426 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:28 offset1:29
|
|
427 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:26 offset1:27
|
|
428 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:24 offset1:25
|
|
429 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:22 offset1:23
|
|
430 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:20 offset1:21
|
|
431 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:18 offset1:19
|
|
432 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:16 offset1:17
|
|
433 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:14 offset1:15
|
|
434 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:12 offset1:13
|
|
435 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:10 offset1:11
|
|
436 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:8 offset1:9
|
|
437 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:6 offset1:7
|
|
438 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:4 offset1:5
|
|
439 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:2 offset1:3
|
|
440 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset1:1
|
|
441
|
|
442 ; EG: LDS_READ_RET
|
|
443 ; EG: LDS_READ_RET
|
|
444 ; EG: LDS_READ_RET
|
|
445 ; EG: LDS_READ_RET
|
|
446 ; EG: LDS_READ_RET
|
|
447 ; EG: LDS_READ_RET
|
|
448 ; EG: LDS_READ_RET
|
|
449 ; EG: LDS_READ_RET
|
|
450 ; EG: LDS_READ_RET
|
|
451 ; EG: LDS_READ_RET
|
|
452 ; EG: LDS_READ_RET
|
|
453 ; EG: LDS_READ_RET
|
|
454 ; EG: LDS_READ_RET
|
|
455 ; EG: LDS_READ_RET
|
|
456 ; EG: LDS_READ_RET
|
|
457 ; EG: LDS_READ_RET
|
|
458 ; EG: LDS_READ_RET
|
|
459 ; EG: LDS_READ_RET
|
|
460 ; EG: LDS_READ_RET
|
|
461 ; EG: LDS_READ_RET
|
|
462 ; EG: LDS_READ_RET
|
|
463 ; EG: LDS_READ_RET
|
|
464 ; EG: LDS_READ_RET
|
|
465 ; EG: LDS_READ_RET
|
|
466 ; EG: LDS_READ_RET
|
|
467 ; EG: LDS_READ_RET
|
|
468 ; EG: LDS_READ_RET
|
|
469 ; EG: LDS_READ_RET
|
|
470 ; EG: LDS_READ_RET
|
|
471 ; EG: LDS_READ_RET
|
|
472 ; EG: LDS_READ_RET
|
|
473 ; EG: LDS_READ_RET
|
121
|
474 define amdgpu_kernel void @local_zextload_v64i16_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 {
|
120
|
475 %load = load <64 x i16>, <64 x i16> addrspace(3)* %in
|
|
476 %ext = zext <64 x i16> %load to <64 x i32>
|
|
477 store <64 x i32> %ext, <64 x i32> addrspace(3)* %out
|
|
478 ret void
|
|
479 }
|
|
480
|
|
481 ; FUNC-LABEL: {{^}}local_sextload_v64i16_to_v64i32:
|
|
482
|
|
483 ; EG: LDS_READ_RET
|
|
484 ; EG: LDS_READ_RET
|
|
485 ; EG: LDS_READ_RET
|
|
486 ; EG: LDS_READ_RET
|
|
487 ; EG: LDS_READ_RET
|
|
488 ; EG: LDS_READ_RET
|
|
489 ; EG: LDS_READ_RET
|
|
490 ; EG: LDS_READ_RET
|
|
491 ; EG: LDS_READ_RET
|
|
492 ; EG: LDS_READ_RET
|
|
493 ; EG: LDS_READ_RET
|
|
494 ; EG: LDS_READ_RET
|
|
495 ; EG: LDS_READ_RET
|
|
496 ; EG: LDS_READ_RET
|
|
497 ; EG: LDS_READ_RET
|
|
498 ; EG: LDS_READ_RET
|
|
499 ; EG: LDS_READ_RET
|
|
500 ; EG: LDS_READ_RET
|
|
501 ; EG: LDS_READ_RET
|
|
502 ; EG: LDS_READ_RET
|
|
503 ; EG: LDS_READ_RET
|
|
504 ; EG: LDS_READ_RET
|
|
505 ; EG: LDS_READ_RET
|
|
506 ; EG: LDS_READ_RET
|
|
507 ; EG: LDS_READ_RET
|
|
508 ; EG: LDS_READ_RET
|
|
509 ; EG: LDS_READ_RET
|
|
510 ; EG: LDS_READ_RET
|
|
511 ; EG: LDS_READ_RET
|
|
512 ; EG: LDS_READ_RET
|
|
513 ; EG: LDS_READ_RET
|
|
514 ; EG: LDS_READ_RET
|
121
|
515 define amdgpu_kernel void @local_sextload_v64i16_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 {
|
120
|
516 %load = load <64 x i16>, <64 x i16> addrspace(3)* %in
|
|
517 %ext = sext <64 x i16> %load to <64 x i32>
|
|
518 store <64 x i32> %ext, <64 x i32> addrspace(3)* %out
|
|
519 ret void
|
|
520 }
|
|
521
|
|
522 ; FUNC-LABEL: {{^}}local_zextload_i16_to_i64:
|
|
523 ; GCN-DAG: ds_read_u16 v[[LO:[0-9]+]],
|
|
524 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
|
|
525
|
|
526 ; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]]
|
|
527
|
|
528 ; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
|
|
529 ; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
|
|
530 ; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
|
|
531 ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
|
|
532 ; EG-DAG: LDS_WRITE
|
121
|
533 define amdgpu_kernel void @local_zextload_i16_to_i64(i64 addrspace(3)* %out, i16 addrspace(3)* %in) #0 {
|
120
|
534 %a = load i16, i16 addrspace(3)* %in
|
|
535 %ext = zext i16 %a to i64
|
|
536 store i64 %ext, i64 addrspace(3)* %out
|
|
537 ret void
|
|
538 }
|
|
539
|
|
540 ; FUNC-LABEL: {{^}}local_sextload_i16_to_i64:
|
|
541 ; FIXME: Need to optimize this sequence to avoid an extra shift.
|
|
542 ; t25: i32,ch = load<LD2[%in(addrspace=3)], anyext from i16> t12, t10, undef:i32
|
|
543 ; t28: i64 = any_extend t25
|
|
544 ; t30: i64 = sign_extend_inreg t28, ValueType:ch:i16
|
|
545 ; SI: ds_read_i16 v[[LO:[0-9]+]],
|
|
546 ; VI: ds_read_u16 v[[ULO:[0-9]+]]
|
|
547 ; VI: v_bfe_i32 v[[LO:[0-9]+]], v[[ULO]], 0, 16
|
|
548 ; GCN-DAG: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
|
|
549
|
|
550 ; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]]
|
|
551
|
|
552 ; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
|
|
553 ; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
|
|
554 ; EG-DAG: MOV {{[* ]*}}[[TMP:T[0-9]+\.[XYZW]]], OQAP
|
|
555 ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
|
|
556 ; EG-DAG: BFE_INT {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], {{.*}}, 0.0, literal
|
|
557 ; EG-DAG: LDS_WRITE
|
|
558 ; EG-DAG: 16
|
|
559 ; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
|
121
|
560 define amdgpu_kernel void @local_sextload_i16_to_i64(i64 addrspace(3)* %out, i16 addrspace(3)* %in) #0 {
|
120
|
561 %a = load i16, i16 addrspace(3)* %in
|
|
562 %ext = sext i16 %a to i64
|
|
563 store i64 %ext, i64 addrspace(3)* %out
|
|
564 ret void
|
|
565 }
|
|
566
|
|
567 ; FUNC-LABEL: {{^}}local_zextload_v1i16_to_v1i64:
|
|
568
|
|
569 ; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
|
|
570 ; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
|
|
571 ; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
|
|
572 ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
|
|
573 ; EG-DAG: LDS_WRITE
|
121
|
574 define amdgpu_kernel void @local_zextload_v1i16_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 {
|
120
|
575 %load = load <1 x i16>, <1 x i16> addrspace(3)* %in
|
|
576 %ext = zext <1 x i16> %load to <1 x i64>
|
|
577 store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
|
|
578 ret void
|
|
579 }
|
|
580
|
|
581 ; FUNC-LABEL: {{^}}local_sextload_v1i16_to_v1i64:
|
|
582
|
|
583 ; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
|
|
584 ; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
|
|
585 ; EG-DAG: MOV {{[* ]*}}[[TMP:T[0-9]+\.[XYZW]]], OQAP
|
|
586 ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
|
|
587 ; EG-DAG: BFE_INT {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], {{.*}}, 0.0, literal
|
|
588 ; EG-DAG: LDS_WRITE
|
|
589 ; EG-DAG: 16
|
|
590 ; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
|
121
|
591 define amdgpu_kernel void @local_sextload_v1i16_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 {
|
120
|
592 %load = load <1 x i16>, <1 x i16> addrspace(3)* %in
|
|
593 %ext = sext <1 x i16> %load to <1 x i64>
|
|
594 store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
|
|
595 ret void
|
|
596 }
|
|
597
|
|
598 ; FUNC-LABEL: {{^}}local_zextload_v2i16_to_v2i64:
|
|
599
|
|
600 ; EG: LDS_READ_RET
|
121
|
601 define amdgpu_kernel void @local_zextload_v2i16_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 {
|
120
|
602 %load = load <2 x i16>, <2 x i16> addrspace(3)* %in
|
|
603 %ext = zext <2 x i16> %load to <2 x i64>
|
|
604 store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
|
|
605 ret void
|
|
606 }
|
|
607
|
|
608 ; FUNC-LABEL: {{^}}local_sextload_v2i16_to_v2i64:
|
|
609
|
|
610 ; EG: LDS_READ_RET
|
|
611 ; EG-DAG: BFE_INT
|
|
612 ; EG-DAG: ASHR
|
121
|
613 define amdgpu_kernel void @local_sextload_v2i16_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 {
|
120
|
614 %load = load <2 x i16>, <2 x i16> addrspace(3)* %in
|
|
615 %ext = sext <2 x i16> %load to <2 x i64>
|
|
616 store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
|
|
617 ret void
|
|
618 }
|
|
619
|
|
620 ; FUNC-LABEL: {{^}}local_zextload_v4i16_to_v4i64:
|
|
621
|
|
622 ; EG: LDS_READ_RET
|
|
623 ; EG: LDS_READ_RET
|
121
|
624 define amdgpu_kernel void @local_zextload_v4i16_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 {
|
120
|
625 %load = load <4 x i16>, <4 x i16> addrspace(3)* %in
|
|
626 %ext = zext <4 x i16> %load to <4 x i64>
|
|
627 store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
|
|
628 ret void
|
|
629 }
|
|
630
|
|
631 ; FUNC-LABEL: {{^}}local_sextload_v4i16_to_v4i64:
|
|
632
|
|
633 ; EG: LDS_READ_RET
|
|
634 ; EG: LDS_READ_RET
|
|
635 ; EG-DAG: BFE_INT
|
|
636 ; EG-DAG: BFE_INT
|
|
637 ; EG-DAG: ASHR
|
|
638 ; EG-DAG: ASHR
|
121
|
639 define amdgpu_kernel void @local_sextload_v4i16_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 {
|
120
|
640 %load = load <4 x i16>, <4 x i16> addrspace(3)* %in
|
|
641 %ext = sext <4 x i16> %load to <4 x i64>
|
|
642 store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
|
|
643 ret void
|
|
644 }
|
|
645
|
|
646 ; FUNC-LABEL: {{^}}local_zextload_v8i16_to_v8i64:
|
|
647
|
|
648 ; EG: LDS_READ_RET
|
|
649 ; EG: LDS_READ_RET
|
|
650 ; EG: LDS_READ_RET
|
|
651 ; EG: LDS_READ_RET
|
121
|
652 define amdgpu_kernel void @local_zextload_v8i16_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 {
|
120
|
653 %load = load <8 x i16>, <8 x i16> addrspace(3)* %in
|
|
654 %ext = zext <8 x i16> %load to <8 x i64>
|
|
655 store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
|
|
656 ret void
|
|
657 }
|
|
658
|
|
659 ; FUNC-LABEL: {{^}}local_sextload_v8i16_to_v8i64:
|
|
660
|
|
661 ; EG: LDS_READ_RET
|
|
662 ; EG: LDS_READ_RET
|
|
663 ; EG: LDS_READ_RET
|
|
664 ; EG: LDS_READ_RET
|
|
665 ; EG-DAG: BFE_INT
|
|
666 ; EG-DAG: BFE_INT
|
|
667 ; EG-DAG: ASHR
|
|
668 ; EG-DAG: ASHR
|
|
669 ; EG-DAG: BFE_INT
|
|
670 ; EG-DAG: BFE_INT
|
|
671 ; EG-DAG: ASHR
|
|
672 ; EG-DAG: ASHR
|
121
|
673 define amdgpu_kernel void @local_sextload_v8i16_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 {
|
120
|
674 %load = load <8 x i16>, <8 x i16> addrspace(3)* %in
|
|
675 %ext = sext <8 x i16> %load to <8 x i64>
|
|
676 store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
|
|
677 ret void
|
|
678 }
|
|
679
|
|
680 ; FUNC-LABEL: {{^}}local_zextload_v16i16_to_v16i64:
|
|
681
|
|
682 ; EG: LDS_READ_RET
|
|
683 ; EG: LDS_READ_RET
|
|
684 ; EG: LDS_READ_RET
|
|
685 ; EG: LDS_READ_RET
|
|
686 ; EG: LDS_READ_RET
|
|
687 ; EG: LDS_READ_RET
|
|
688 ; EG: LDS_READ_RET
|
|
689 ; EG: LDS_READ_RET
|
121
|
690 define amdgpu_kernel void @local_zextload_v16i16_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 {
|
120
|
691 %load = load <16 x i16>, <16 x i16> addrspace(3)* %in
|
|
692 %ext = zext <16 x i16> %load to <16 x i64>
|
|
693 store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
|
|
694 ret void
|
|
695 }
|
|
696
|
|
697 ; FUNC-LABEL: {{^}}local_sextload_v16i16_to_v16i64:
|
|
698
|
|
699 ; EG: LDS_READ_RET
|
|
700 ; EG: LDS_READ_RET
|
|
701 ; EG: LDS_READ_RET
|
|
702 ; EG: LDS_READ_RET
|
|
703 ; EG: LDS_READ_RET
|
|
704 ; EG: LDS_READ_RET
|
|
705 ; EG: LDS_READ_RET
|
|
706 ; EG: LDS_READ_RET
|
|
707 ; EG-DAG: BFE_INT
|
|
708 ; EG-DAG: BFE_INT
|
|
709 ; EG-DAG: ASHR
|
|
710 ; EG-DAG: ASHR
|
|
711 ; EG-DAG: BFE_INT
|
|
712 ; EG-DAG: BFE_INT
|
|
713 ; EG-DAG: ASHR
|
|
714 ; EG-DAG: ASHR
|
|
715 ; EG-DAG: BFE_INT
|
|
716 ; EG-DAG: BFE_INT
|
|
717 ; EG-DAG: ASHR
|
|
718 ; EG-DAG: ASHR
|
|
719 ; EG-DAG: BFE_INT
|
|
720 ; EG-DAG: BFE_INT
|
|
721 ; EG-DAG: ASHR
|
|
722 ; EG-DAG: ASHR
|
121
|
723 define amdgpu_kernel void @local_sextload_v16i16_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 {
|
120
|
724 %load = load <16 x i16>, <16 x i16> addrspace(3)* %in
|
|
725 %ext = sext <16 x i16> %load to <16 x i64>
|
|
726 store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
|
|
727 ret void
|
|
728 }
|
|
729
|
|
730 ; FUNC-LABEL: {{^}}local_zextload_v32i16_to_v32i64:
|
|
731
|
|
732 ; EG: LDS_READ_RET
|
|
733 ; EG: LDS_READ_RET
|
|
734 ; EG: LDS_READ_RET
|
|
735 ; EG: LDS_READ_RET
|
|
736 ; EG: LDS_READ_RET
|
|
737 ; EG: LDS_READ_RET
|
|
738 ; EG: LDS_READ_RET
|
|
739 ; EG: LDS_READ_RET
|
|
740 ; EG: LDS_READ_RET
|
|
741 ; EG: LDS_READ_RET
|
|
742 ; EG: LDS_READ_RET
|
|
743 ; EG: LDS_READ_RET
|
|
744 ; EG: LDS_READ_RET
|
|
745 ; EG: LDS_READ_RET
|
|
746 ; EG: LDS_READ_RET
|
|
747 ; EG: LDS_READ_RET
|
121
|
748 define amdgpu_kernel void @local_zextload_v32i16_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 {
|
120
|
749 %load = load <32 x i16>, <32 x i16> addrspace(3)* %in
|
|
750 %ext = zext <32 x i16> %load to <32 x i64>
|
|
751 store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
|
|
752 ret void
|
|
753 }
|
|
754
|
|
755 ; FUNC-LABEL: {{^}}local_sextload_v32i16_to_v32i64:
|
|
756
|
|
757 ; EG: LDS_READ_RET
|
|
758 ; EG: LDS_READ_RET
|
|
759 ; EG: LDS_READ_RET
|
|
760 ; EG: LDS_READ_RET
|
|
761 ; EG: LDS_READ_RET
|
|
762 ; EG: LDS_READ_RET
|
|
763 ; EG: LDS_READ_RET
|
|
764 ; EG: LDS_READ_RET
|
|
765 ; EG: LDS_READ_RET
|
|
766 ; EG: LDS_READ_RET
|
|
767 ; EG: LDS_READ_RET
|
|
768 ; EG: LDS_READ_RET
|
|
769 ; EG: LDS_READ_RET
|
|
770 ; EG: LDS_READ_RET
|
|
771 ; EG: LDS_READ_RET
|
|
772 ; EG: LDS_READ_RET
|
|
773 ; EG-DAG: BFE_INT
|
|
774 ; EG-DAG: BFE_INT
|
|
775 ; EG-DAG: ASHR
|
|
776 ; EG-DAG: ASHR
|
|
777 ; EG-DAG: BFE_INT
|
|
778 ; EG-DAG: BFE_INT
|
|
779 ; EG-DAG: ASHR
|
|
780 ; EG-DAG: ASHR
|
|
781 ; EG-DAG: BFE_INT
|
|
782 ; EG-DAG: BFE_INT
|
|
783 ; EG-DAG: ASHR
|
|
784 ; EG-DAG: ASHR
|
|
785 ; EG-DAG: BFE_INT
|
|
786 ; EG-DAG: BFE_INT
|
|
787 ; EG-DAG: ASHR
|
|
788 ; EG-DAG: ASHR
|
|
789 ; EG-DAG: BFE_INT
|
|
790 ; EG-DAG: BFE_INT
|
|
791 ; EG-DAG: ASHR
|
|
792 ; EG-DAG: ASHR
|
|
793 ; EG-DAG: BFE_INT
|
|
794 ; EG-DAG: BFE_INT
|
|
795 ; EG-DAG: ASHR
|
|
796 ; EG-DAG: ASHR
|
|
797 ; EG-DAG: BFE_INT
|
|
798 ; EG-DAG: BFE_INT
|
|
799 ; EG-DAG: ASHR
|
|
800 ; EG-DAG: ASHR
|
|
801 ; EG-DAG: BFE_INT
|
|
802 ; EG-DAG: BFE_INT
|
|
803 ; EG-DAG: ASHR
|
|
804 ; EG-DAG: ASHR
|
121
|
805 define amdgpu_kernel void @local_sextload_v32i16_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 {
|
120
|
806 %load = load <32 x i16>, <32 x i16> addrspace(3)* %in
|
|
807 %ext = sext <32 x i16> %load to <32 x i64>
|
|
808 store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
|
|
809 ret void
|
|
810 }
|
|
811
|
|
812 ; ; XFUNC-LABEL: {{^}}local_zextload_v64i16_to_v64i64:
|
121
|
813 ; define amdgpu_kernel void @local_zextload_v64i16_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 {
|
120
|
814 ; %load = load <64 x i16>, <64 x i16> addrspace(3)* %in
|
|
815 ; %ext = zext <64 x i16> %load to <64 x i64>
|
|
816 ; store <64 x i64> %ext, <64 x i64> addrspace(3)* %out
|
|
817 ; ret void
|
|
818 ; }
|
|
819
|
|
820 ; ; XFUNC-LABEL: {{^}}local_sextload_v64i16_to_v64i64:
|
121
|
821 ; define amdgpu_kernel void @local_sextload_v64i16_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 {
|
120
|
822 ; %load = load <64 x i16>, <64 x i16> addrspace(3)* %in
|
|
823 ; %ext = sext <64 x i16> %load to <64 x i64>
|
|
824 ; store <64 x i64> %ext, <64 x i64> addrspace(3)* %out
|
|
825 ; ret void
|
|
826 ; }
|
|
827
|
|
828 attributes #0 = { nounwind }
|