150
|
1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
2 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
|
236
|
3 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
|
|
4 ; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
|
150
|
5
|
|
6 define amdgpu_ps <4 x float> @load_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
|
|
7 ; GFX9-LABEL: load_1d:
|
|
8 ; GFX9: ; %bb.0: ; %main_body
|
|
9 ; GFX9-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm a16
|
|
10 ; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
11 ; GFX9-NEXT: ; return to shader part epilog
|
|
12 ;
|
236
|
13 ; GFX10PLUS-LABEL: load_1d:
|
|
14 ; GFX10PLUS: ; %bb.0: ; %main_body
|
|
15 ; GFX10PLUS-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm a16
|
|
16 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
17 ; GFX10PLUS-NEXT: ; return to shader part epilog
|
150
|
18 main_body:
|
|
19 %s = extractelement <2 x i16> %coords, i32 0
|
|
20 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
|
|
21 ret <4 x float> %v
|
|
22 }
|
|
23
|
|
24 define amdgpu_ps <4 x float> @load_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
|
|
25 ; GFX9-LABEL: load_2d:
|
|
26 ; GFX9: ; %bb.0: ; %main_body
|
|
27 ; GFX9-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm a16
|
|
28 ; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
29 ; GFX9-NEXT: ; return to shader part epilog
|
|
30 ;
|
236
|
31 ; GFX10PLUS-LABEL: load_2d:
|
|
32 ; GFX10PLUS: ; %bb.0: ; %main_body
|
|
33 ; GFX10PLUS-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16
|
|
34 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
35 ; GFX10PLUS-NEXT: ; return to shader part epilog
|
150
|
36 main_body:
|
|
37 %s = extractelement <2 x i16> %coords, i32 0
|
|
38 %t = extractelement <2 x i16> %coords, i32 1
|
|
39 %v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 15, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0)
|
|
40 ret <4 x float> %v
|
|
41 }
|
|
42
|
|
43 define amdgpu_ps <4 x float> @load_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
|
|
44 ; GFX9-LABEL: load_3d:
|
|
45 ; GFX9: ; %bb.0: ; %main_body
|
|
46 ; GFX9-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16
|
|
47 ; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
48 ; GFX9-NEXT: ; return to shader part epilog
|
|
49 ;
|
236
|
50 ; GFX10PLUS-LABEL: load_3d:
|
|
51 ; GFX10PLUS: ; %bb.0: ; %main_body
|
|
52 ; GFX10PLUS-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16
|
|
53 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
54 ; GFX10PLUS-NEXT: ; return to shader part epilog
|
150
|
55 main_body:
|
|
56 %s = extractelement <2 x i16> %coords_lo, i32 0
|
|
57 %t = extractelement <2 x i16> %coords_lo, i32 1
|
|
58 %r = extractelement <2 x i16> %coords_hi, i32 0
|
|
59 %v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0)
|
|
60 ret <4 x float> %v
|
|
61 }
|
|
62
|
|
63 define amdgpu_ps <4 x float> @load_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
|
|
64 ; GFX9-LABEL: load_cube:
|
|
65 ; GFX9: ; %bb.0: ; %main_body
|
|
66 ; GFX9-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da
|
|
67 ; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
68 ; GFX9-NEXT: ; return to shader part epilog
|
|
69 ;
|
236
|
70 ; GFX10PLUS-LABEL: load_cube:
|
|
71 ; GFX10PLUS: ; %bb.0: ; %main_body
|
|
72 ; GFX10PLUS-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm a16
|
|
73 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
74 ; GFX10PLUS-NEXT: ; return to shader part epilog
|
150
|
75 main_body:
|
|
76 %s = extractelement <2 x i16> %coords_lo, i32 0
|
|
77 %t = extractelement <2 x i16> %coords_lo, i32 1
|
|
78 %slice = extractelement <2 x i16> %coords_hi, i32 0
|
|
79 %v = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
|
|
80 ret <4 x float> %v
|
|
81 }
|
|
82
|
|
83 define amdgpu_ps <4 x float> @load_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
|
|
84 ; GFX9-LABEL: load_1darray:
|
|
85 ; GFX9: ; %bb.0: ; %main_body
|
|
86 ; GFX9-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm a16 da
|
|
87 ; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
88 ; GFX9-NEXT: ; return to shader part epilog
|
|
89 ;
|
236
|
90 ; GFX10PLUS-LABEL: load_1darray:
|
|
91 ; GFX10PLUS: ; %bb.0: ; %main_body
|
|
92 ; GFX10PLUS-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm a16
|
|
93 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
94 ; GFX10PLUS-NEXT: ; return to shader part epilog
|
150
|
95 main_body:
|
|
96 %s = extractelement <2 x i16> %coords, i32 0
|
|
97 %slice = extractelement <2 x i16> %coords, i32 1
|
|
98 %v = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i16(i32 15, i16 %s, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
|
|
99 ret <4 x float> %v
|
|
100 }
|
|
101
|
|
102 define amdgpu_ps <4 x float> @load_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
|
|
103 ; GFX9-LABEL: load_2darray:
|
|
104 ; GFX9: ; %bb.0: ; %main_body
|
|
105 ; GFX9-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da
|
|
106 ; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
107 ; GFX9-NEXT: ; return to shader part epilog
|
|
108 ;
|
236
|
109 ; GFX10PLUS-LABEL: load_2darray:
|
|
110 ; GFX10PLUS: ; %bb.0: ; %main_body
|
|
111 ; GFX10PLUS-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm a16
|
|
112 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
113 ; GFX10PLUS-NEXT: ; return to shader part epilog
|
150
|
114 main_body:
|
|
115 %s = extractelement <2 x i16> %coords_lo, i32 0
|
|
116 %t = extractelement <2 x i16> %coords_lo, i32 1
|
|
117 %slice = extractelement <2 x i16> %coords_hi, i32 0
|
|
118 %v = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
|
|
119 ret <4 x float> %v
|
|
120 }
|
|
121
|
|
122 define amdgpu_ps <4 x float> @load_2dmsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
|
|
123 ; GFX9-LABEL: load_2dmsaa:
|
|
124 ; GFX9: ; %bb.0: ; %main_body
|
|
125 ; GFX9-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16
|
|
126 ; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
127 ; GFX9-NEXT: ; return to shader part epilog
|
|
128 ;
|
236
|
129 ; GFX10PLUS-LABEL: load_2dmsaa:
|
|
130 ; GFX10PLUS: ; %bb.0: ; %main_body
|
|
131 ; GFX10PLUS-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm a16
|
|
132 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
133 ; GFX10PLUS-NEXT: ; return to shader part epilog
|
150
|
134 main_body:
|
|
135 %s = extractelement <2 x i16> %coords_lo, i32 0
|
|
136 %t = extractelement <2 x i16> %coords_lo, i32 1
|
|
137 %fragid = extractelement <2 x i16> %coords_hi, i32 0
|
|
138 %v = call <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
|
|
139 ret <4 x float> %v
|
|
140 }
|
|
141
|
|
142 define amdgpu_ps <4 x float> @load_2darraymsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
|
|
143 ; GFX9-LABEL: load_2darraymsaa:
|
|
144 ; GFX9: ; %bb.0: ; %main_body
|
|
145 ; GFX9-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da
|
|
146 ; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
147 ; GFX9-NEXT: ; return to shader part epilog
|
|
148 ;
|
236
|
149 ; GFX10PLUS-LABEL: load_2darraymsaa:
|
|
150 ; GFX10PLUS: ; %bb.0: ; %main_body
|
|
151 ; GFX10PLUS-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16
|
|
152 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
153 ; GFX10PLUS-NEXT: ; return to shader part epilog
|
150
|
154 main_body:
|
|
155 %s = extractelement <2 x i16> %coords_lo, i32 0
|
|
156 %t = extractelement <2 x i16> %coords_lo, i32 1
|
|
157 %slice = extractelement <2 x i16> %coords_hi, i32 0
|
|
158 %fragid = extractelement <2 x i16> %coords_hi, i32 1
|
|
159 %v = call <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
|
|
160 ret <4 x float> %v
|
|
161 }
|
|
162
|
|
163 define amdgpu_ps <4 x float> @load_mip_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
|
|
164 ; GFX9-LABEL: load_mip_1d:
|
|
165 ; GFX9: ; %bb.0: ; %main_body
|
|
166 ; GFX9-NEXT: image_load_mip v[0:3], v0, s[0:7] dmask:0xf unorm a16
|
|
167 ; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
168 ; GFX9-NEXT: ; return to shader part epilog
|
|
169 ;
|
236
|
170 ; GFX10PLUS-LABEL: load_mip_1d:
|
|
171 ; GFX10PLUS: ; %bb.0: ; %main_body
|
|
172 ; GFX10PLUS-NEXT: image_load_mip v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm a16
|
|
173 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
174 ; GFX10PLUS-NEXT: ; return to shader part epilog
|
150
|
175 main_body:
|
|
176 %s = extractelement <2 x i16> %coords, i32 0
|
|
177 %mip = extractelement <2 x i16> %coords, i32 1
|
|
178 %v = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i16(i32 15, i16 %s, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
|
|
179 ret <4 x float> %v
|
|
180 }
|
|
181
|
|
182 define amdgpu_ps <4 x float> @load_mip_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
|
|
183 ; GFX9-LABEL: load_mip_2d:
|
|
184 ; GFX9: ; %bb.0: ; %main_body
|
|
185 ; GFX9-NEXT: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16
|
|
186 ; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
187 ; GFX9-NEXT: ; return to shader part epilog
|
|
188 ;
|
236
|
189 ; GFX10PLUS-LABEL: load_mip_2d:
|
|
190 ; GFX10PLUS: ; %bb.0: ; %main_body
|
|
191 ; GFX10PLUS-NEXT: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16
|
|
192 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
193 ; GFX10PLUS-NEXT: ; return to shader part epilog
|
150
|
194 main_body:
|
|
195 %s = extractelement <2 x i16> %coords_lo, i32 0
|
|
196 %t = extractelement <2 x i16> %coords_lo, i32 1
|
|
197 %mip = extractelement <2 x i16> %coords_hi, i32 0
|
|
198 %v = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
|
|
199 ret <4 x float> %v
|
|
200 }
|
|
201
|
|
202 define amdgpu_ps <4 x float> @load_mip_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
|
|
203 ; GFX9-LABEL: load_mip_3d:
|
|
204 ; GFX9: ; %bb.0: ; %main_body
|
|
205 ; GFX9-NEXT: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16
|
|
206 ; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
207 ; GFX9-NEXT: ; return to shader part epilog
|
|
208 ;
|
236
|
209 ; GFX10PLUS-LABEL: load_mip_3d:
|
|
210 ; GFX10PLUS: ; %bb.0: ; %main_body
|
|
211 ; GFX10PLUS-NEXT: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16
|
|
212 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
213 ; GFX10PLUS-NEXT: ; return to shader part epilog
|
150
|
214 main_body:
|
|
215 %s = extractelement <2 x i16> %coords_lo, i32 0
|
|
216 %t = extractelement <2 x i16> %coords_lo, i32 1
|
|
217 %r = extractelement <2 x i16> %coords_hi, i32 0
|
|
218 %mip = extractelement <2 x i16> %coords_hi, i32 1
|
|
219 %v = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %r, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
|
|
220 ret <4 x float> %v
|
|
221 }
|
|
222
|
|
223 define amdgpu_ps <4 x float> @load_mip_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
|
|
224 ; GFX9-LABEL: load_mip_cube:
|
|
225 ; GFX9: ; %bb.0: ; %main_body
|
|
226 ; GFX9-NEXT: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da
|
|
227 ; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
228 ; GFX9-NEXT: ; return to shader part epilog
|
|
229 ;
|
236
|
230 ; GFX10PLUS-LABEL: load_mip_cube:
|
|
231 ; GFX10PLUS: ; %bb.0: ; %main_body
|
|
232 ; GFX10PLUS-NEXT: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm a16
|
|
233 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
234 ; GFX10PLUS-NEXT: ; return to shader part epilog
|
150
|
235 main_body:
|
|
236 %s = extractelement <2 x i16> %coords_lo, i32 0
|
|
237 %t = extractelement <2 x i16> %coords_lo, i32 1
|
|
238 %slice = extractelement <2 x i16> %coords_hi, i32 0
|
|
239 %mip = extractelement <2 x i16> %coords_hi, i32 1
|
|
240 %v = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
|
|
241 ret <4 x float> %v
|
|
242 }
|
|
243
|
|
244 define amdgpu_ps <4 x float> @load_mip_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
|
|
245 ; GFX9-LABEL: load_mip_1darray:
|
|
246 ; GFX9: ; %bb.0: ; %main_body
|
|
247 ; GFX9-NEXT: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da
|
|
248 ; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
249 ; GFX9-NEXT: ; return to shader part epilog
|
|
250 ;
|
236
|
251 ; GFX10PLUS-LABEL: load_mip_1darray:
|
|
252 ; GFX10PLUS: ; %bb.0: ; %main_body
|
|
253 ; GFX10PLUS-NEXT: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm a16
|
|
254 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
255 ; GFX10PLUS-NEXT: ; return to shader part epilog
|
150
|
256 main_body:
|
|
257 %s = extractelement <2 x i16> %coords_lo, i32 0
|
|
258 %slice = extractelement <2 x i16> %coords_lo, i32 1
|
|
259 %mip = extractelement <2 x i16> %coords_hi, i32 0
|
|
260 %v = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i16(i32 15, i16 %s, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
|
|
261 ret <4 x float> %v
|
|
262 }
|
|
263
|
|
264 define amdgpu_ps <4 x float> @load_mip_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
|
|
265 ; GFX9-LABEL: load_mip_2darray:
|
|
266 ; GFX9: ; %bb.0: ; %main_body
|
|
267 ; GFX9-NEXT: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da
|
|
268 ; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
269 ; GFX9-NEXT: ; return to shader part epilog
|
|
270 ;
|
236
|
271 ; GFX10PLUS-LABEL: load_mip_2darray:
|
|
272 ; GFX10PLUS: ; %bb.0: ; %main_body
|
|
273 ; GFX10PLUS-NEXT: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm a16
|
|
274 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
275 ; GFX10PLUS-NEXT: ; return to shader part epilog
|
150
|
276 main_body:
|
|
277 %s = extractelement <2 x i16> %coords_lo, i32 0
|
|
278 %t = extractelement <2 x i16> %coords_lo, i32 1
|
|
279 %slice = extractelement <2 x i16> %coords_hi, i32 0
|
|
280 %mip = extractelement <2 x i16> %coords_hi, i32 1
|
|
281 %v = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
|
|
282 ret <4 x float> %v
|
|
283 }
|
|
284
|
|
285 define amdgpu_ps void @store_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
|
|
286 ; GFX9-LABEL: store_1d:
|
|
287 ; GFX9: ; %bb.0: ; %main_body
|
|
288 ; GFX9-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm a16
|
|
289 ; GFX9-NEXT: s_endpgm
|
|
290 ;
|
|
291 ; GFX10-LABEL: store_1d:
|
|
292 ; GFX10: ; %bb.0: ; %main_body
|
|
293 ; GFX10-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm a16
|
|
294 ; GFX10-NEXT: s_endpgm
|
236
|
295 ;
|
|
296 ; GFX11-LABEL: store_1d:
|
|
297 ; GFX11: ; %bb.0: ; %main_body
|
|
298 ; GFX11-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm a16
|
252
|
299 ; GFX11-NEXT: s_nop 0
|
236
|
300 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
301 ; GFX11-NEXT: s_endpgm
|
150
|
302 main_body:
|
|
303 %s = extractelement <2 x i16> %coords, i32 0
|
|
304 call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
|
|
305 ret void
|
|
306 }
|
|
307
|
|
308 define amdgpu_ps void @store_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
|
|
309 ; GFX9-LABEL: store_2d:
|
|
310 ; GFX9: ; %bb.0: ; %main_body
|
|
311 ; GFX9-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm a16
|
|
312 ; GFX9-NEXT: s_endpgm
|
|
313 ;
|
|
314 ; GFX10-LABEL: store_2d:
|
|
315 ; GFX10: ; %bb.0: ; %main_body
|
|
316 ; GFX10-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16
|
|
317 ; GFX10-NEXT: s_endpgm
|
236
|
318 ;
|
|
319 ; GFX11-LABEL: store_2d:
|
|
320 ; GFX11: ; %bb.0: ; %main_body
|
|
321 ; GFX11-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16
|
252
|
322 ; GFX11-NEXT: s_nop 0
|
236
|
323 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
324 ; GFX11-NEXT: s_endpgm
|
150
|
325 main_body:
|
|
326 %s = extractelement <2 x i16> %coords, i32 0
|
|
327 %t = extractelement <2 x i16> %coords, i32 1
|
|
328 call void @llvm.amdgcn.image.store.2d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0)
|
|
329 ret void
|
|
330 }
|
|
331
|
|
332 define amdgpu_ps void @store_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
|
|
333 ; GFX9-LABEL: store_3d:
|
|
334 ; GFX9: ; %bb.0: ; %main_body
|
|
335 ; GFX9-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16
|
|
336 ; GFX9-NEXT: s_endpgm
|
|
337 ;
|
|
338 ; GFX10-LABEL: store_3d:
|
|
339 ; GFX10: ; %bb.0: ; %main_body
|
|
340 ; GFX10-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16
|
|
341 ; GFX10-NEXT: s_endpgm
|
236
|
342 ;
|
|
343 ; GFX11-LABEL: store_3d:
|
|
344 ; GFX11: ; %bb.0: ; %main_body
|
|
345 ; GFX11-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16
|
252
|
346 ; GFX11-NEXT: s_nop 0
|
236
|
347 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
348 ; GFX11-NEXT: s_endpgm
|
150
|
349 main_body:
|
|
350 %s = extractelement <2 x i16> %coords_lo, i32 0
|
|
351 %t = extractelement <2 x i16> %coords_lo, i32 1
|
|
352 %r = extractelement <2 x i16> %coords_hi, i32 0
|
|
353 call void @llvm.amdgcn.image.store.3d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0)
|
|
354 ret void
|
|
355 }
|
|
356
|
|
357 define amdgpu_ps void @store_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
|
|
358 ; GFX9-LABEL: store_cube:
|
|
359 ; GFX9: ; %bb.0: ; %main_body
|
|
360 ; GFX9-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da
|
|
361 ; GFX9-NEXT: s_endpgm
|
|
362 ;
|
|
363 ; GFX10-LABEL: store_cube:
|
|
364 ; GFX10: ; %bb.0: ; %main_body
|
|
365 ; GFX10-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm a16
|
|
366 ; GFX10-NEXT: s_endpgm
|
236
|
367 ;
|
|
368 ; GFX11-LABEL: store_cube:
|
|
369 ; GFX11: ; %bb.0: ; %main_body
|
|
370 ; GFX11-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm a16
|
252
|
371 ; GFX11-NEXT: s_nop 0
|
236
|
372 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
373 ; GFX11-NEXT: s_endpgm
|
150
|
374 main_body:
|
|
375 %s = extractelement <2 x i16> %coords_lo, i32 0
|
|
376 %t = extractelement <2 x i16> %coords_lo, i32 1
|
|
377 %slice = extractelement <2 x i16> %coords_hi, i32 0
|
|
378 call void @llvm.amdgcn.image.store.cube.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
|
|
379 ret void
|
|
380 }
|
|
381
|
|
382 define amdgpu_ps void @store_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
|
|
383 ; GFX9-LABEL: store_1darray:
|
|
384 ; GFX9: ; %bb.0: ; %main_body
|
|
385 ; GFX9-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm a16 da
|
|
386 ; GFX9-NEXT: s_endpgm
|
|
387 ;
|
|
388 ; GFX10-LABEL: store_1darray:
|
|
389 ; GFX10: ; %bb.0: ; %main_body
|
|
390 ; GFX10-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm a16
|
|
391 ; GFX10-NEXT: s_endpgm
|
236
|
392 ;
|
|
393 ; GFX11-LABEL: store_1darray:
|
|
394 ; GFX11: ; %bb.0: ; %main_body
|
|
395 ; GFX11-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm a16
|
252
|
396 ; GFX11-NEXT: s_nop 0
|
236
|
397 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
398 ; GFX11-NEXT: s_endpgm
|
150
|
399 main_body:
|
|
400 %s = extractelement <2 x i16> %coords, i32 0
|
|
401 %slice = extractelement <2 x i16> %coords, i32 1
|
|
402 call void @llvm.amdgcn.image.store.1darray.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
|
|
403 ret void
|
|
404 }
|
|
405
|
|
406 define amdgpu_ps void @store_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
|
|
407 ; GFX9-LABEL: store_2darray:
|
|
408 ; GFX9: ; %bb.0: ; %main_body
|
|
409 ; GFX9-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da
|
|
410 ; GFX9-NEXT: s_endpgm
|
|
411 ;
|
|
412 ; GFX10-LABEL: store_2darray:
|
|
413 ; GFX10: ; %bb.0: ; %main_body
|
|
414 ; GFX10-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm a16
|
|
415 ; GFX10-NEXT: s_endpgm
|
236
|
416 ;
|
|
417 ; GFX11-LABEL: store_2darray:
|
|
418 ; GFX11: ; %bb.0: ; %main_body
|
|
419 ; GFX11-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm a16
|
252
|
420 ; GFX11-NEXT: s_nop 0
|
236
|
421 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
422 ; GFX11-NEXT: s_endpgm
|
150
|
423 main_body:
|
|
424 %s = extractelement <2 x i16> %coords_lo, i32 0
|
|
425 %t = extractelement <2 x i16> %coords_lo, i32 1
|
|
426 %slice = extractelement <2 x i16> %coords_hi, i32 0
|
|
427 call void @llvm.amdgcn.image.store.2darray.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
|
|
428 ret void
|
|
429 }
|
|
430
|
|
431 define amdgpu_ps void @store_2dmsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
|
|
432 ; GFX9-LABEL: store_2dmsaa:
|
|
433 ; GFX9: ; %bb.0: ; %main_body
|
|
434 ; GFX9-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16
|
|
435 ; GFX9-NEXT: s_endpgm
|
|
436 ;
|
|
437 ; GFX10-LABEL: store_2dmsaa:
|
|
438 ; GFX10: ; %bb.0: ; %main_body
|
|
439 ; GFX10-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm a16
|
|
440 ; GFX10-NEXT: s_endpgm
|
236
|
441 ;
|
|
442 ; GFX11-LABEL: store_2dmsaa:
|
|
443 ; GFX11: ; %bb.0: ; %main_body
|
|
444 ; GFX11-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm a16
|
252
|
445 ; GFX11-NEXT: s_nop 0
|
236
|
446 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
447 ; GFX11-NEXT: s_endpgm
|
150
|
448 main_body:
|
|
449 %s = extractelement <2 x i16> %coords_lo, i32 0
|
|
450 %t = extractelement <2 x i16> %coords_lo, i32 1
|
|
451 %fragid = extractelement <2 x i16> %coords_hi, i32 0
|
|
452 call void @llvm.amdgcn.image.store.2dmsaa.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
|
|
453 ret void
|
|
454 }
|
|
455
|
|
456 define amdgpu_ps void @store_2darraymsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
|
|
457 ; GFX9-LABEL: store_2darraymsaa:
|
|
458 ; GFX9: ; %bb.0: ; %main_body
|
|
459 ; GFX9-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da
|
|
460 ; GFX9-NEXT: s_endpgm
|
|
461 ;
|
|
462 ; GFX10-LABEL: store_2darraymsaa:
|
|
463 ; GFX10: ; %bb.0: ; %main_body
|
|
464 ; GFX10-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16
|
|
465 ; GFX10-NEXT: s_endpgm
|
236
|
466 ;
|
|
467 ; GFX11-LABEL: store_2darraymsaa:
|
|
468 ; GFX11: ; %bb.0: ; %main_body
|
|
469 ; GFX11-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16
|
252
|
470 ; GFX11-NEXT: s_nop 0
|
236
|
471 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
472 ; GFX11-NEXT: s_endpgm
|
150
|
473 main_body:
|
|
474 %s = extractelement <2 x i16> %coords_lo, i32 0
|
|
475 %t = extractelement <2 x i16> %coords_lo, i32 1
|
|
476 %slice = extractelement <2 x i16> %coords_hi, i32 0
|
|
477 %fragid = extractelement <2 x i16> %coords_hi, i32 1
|
|
478 call void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
|
|
479 ret void
|
|
480 }
|
|
481
|
|
482 define amdgpu_ps void @store_mip_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
|
|
483 ; GFX9-LABEL: store_mip_1d:
|
|
484 ; GFX9: ; %bb.0: ; %main_body
|
|
485 ; GFX9-NEXT: image_store_mip v[0:3], v4, s[0:7] dmask:0xf unorm a16
|
|
486 ; GFX9-NEXT: s_endpgm
|
|
487 ;
|
|
488 ; GFX10-LABEL: store_mip_1d:
|
|
489 ; GFX10: ; %bb.0: ; %main_body
|
|
490 ; GFX10-NEXT: image_store_mip v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm a16
|
|
491 ; GFX10-NEXT: s_endpgm
|
236
|
492 ;
|
|
493 ; GFX11-LABEL: store_mip_1d:
|
|
494 ; GFX11: ; %bb.0: ; %main_body
|
|
495 ; GFX11-NEXT: image_store_mip v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm a16
|
252
|
496 ; GFX11-NEXT: s_nop 0
|
236
|
497 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
498 ; GFX11-NEXT: s_endpgm
|
150
|
499 main_body:
|
|
500 %s = extractelement <2 x i16> %coords, i32 0
|
|
501 %mip = extractelement <2 x i16> %coords, i32 1
|
|
502 call void @llvm.amdgcn.image.store.mip.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
|
|
503 ret void
|
|
504 }
|
|
505
|
|
506 define amdgpu_ps void @store_mip_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
|
|
507 ; GFX9-LABEL: store_mip_2d:
|
|
508 ; GFX9: ; %bb.0: ; %main_body
|
|
509 ; GFX9-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16
|
|
510 ; GFX9-NEXT: s_endpgm
|
|
511 ;
|
|
512 ; GFX10-LABEL: store_mip_2d:
|
|
513 ; GFX10: ; %bb.0: ; %main_body
|
|
514 ; GFX10-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16
|
|
515 ; GFX10-NEXT: s_endpgm
|
236
|
516 ;
|
|
517 ; GFX11-LABEL: store_mip_2d:
|
|
518 ; GFX11: ; %bb.0: ; %main_body
|
|
519 ; GFX11-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16
|
252
|
520 ; GFX11-NEXT: s_nop 0
|
236
|
521 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
522 ; GFX11-NEXT: s_endpgm
|
150
|
523 main_body:
|
|
524 %s = extractelement <2 x i16> %coords_lo, i32 0
|
|
525 %t = extractelement <2 x i16> %coords_lo, i32 1
|
|
526 %mip = extractelement <2 x i16> %coords_hi, i32 0
|
|
527 call void @llvm.amdgcn.image.store.mip.2d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
|
|
528 ret void
|
|
529 }
|
|
530
|
|
531 define amdgpu_ps void @store_mip_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
|
|
532 ; GFX9-LABEL: store_mip_3d:
|
|
533 ; GFX9: ; %bb.0: ; %main_body
|
|
534 ; GFX9-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16
|
|
535 ; GFX9-NEXT: s_endpgm
|
|
536 ;
|
|
537 ; GFX10-LABEL: store_mip_3d:
|
|
538 ; GFX10: ; %bb.0: ; %main_body
|
|
539 ; GFX10-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16
|
|
540 ; GFX10-NEXT: s_endpgm
|
236
|
541 ;
|
|
542 ; GFX11-LABEL: store_mip_3d:
|
|
543 ; GFX11: ; %bb.0: ; %main_body
|
|
544 ; GFX11-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16
|
252
|
545 ; GFX11-NEXT: s_nop 0
|
236
|
546 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
547 ; GFX11-NEXT: s_endpgm
|
150
|
548 main_body:
|
|
549 %s = extractelement <2 x i16> %coords_lo, i32 0
|
|
550 %t = extractelement <2 x i16> %coords_lo, i32 1
|
|
551 %r = extractelement <2 x i16> %coords_hi, i32 0
|
|
552 %mip = extractelement <2 x i16> %coords_hi, i32 1
|
|
553 call void @llvm.amdgcn.image.store.mip.3d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %r, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
|
|
554 ret void
|
|
555 }
|
|
556
|
|
557 define amdgpu_ps void @store_mip_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
|
|
558 ; GFX9-LABEL: store_mip_cube:
|
|
559 ; GFX9: ; %bb.0: ; %main_body
|
|
560 ; GFX9-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da
|
|
561 ; GFX9-NEXT: s_endpgm
|
|
562 ;
|
|
563 ; GFX10-LABEL: store_mip_cube:
|
|
564 ; GFX10: ; %bb.0: ; %main_body
|
|
565 ; GFX10-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm a16
|
|
566 ; GFX10-NEXT: s_endpgm
|
236
|
567 ;
|
|
568 ; GFX11-LABEL: store_mip_cube:
|
|
569 ; GFX11: ; %bb.0: ; %main_body
|
|
570 ; GFX11-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm a16
|
252
|
571 ; GFX11-NEXT: s_nop 0
|
236
|
572 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
573 ; GFX11-NEXT: s_endpgm
|
150
|
574 main_body:
|
|
575 %s = extractelement <2 x i16> %coords_lo, i32 0
|
|
576 %t = extractelement <2 x i16> %coords_lo, i32 1
|
|
577 %slice = extractelement <2 x i16> %coords_hi, i32 0
|
|
578 %mip = extractelement <2 x i16> %coords_hi, i32 1
|
|
579 call void @llvm.amdgcn.image.store.mip.cube.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
|
|
580 ret void
|
|
581 }
|
|
582
|
|
583 define amdgpu_ps void @store_mip_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
|
|
584 ; GFX9-LABEL: store_mip_1darray:
|
|
585 ; GFX9: ; %bb.0: ; %main_body
|
|
586 ; GFX9-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da
|
|
587 ; GFX9-NEXT: s_endpgm
|
|
588 ;
|
|
589 ; GFX10-LABEL: store_mip_1darray:
|
|
590 ; GFX10: ; %bb.0: ; %main_body
|
|
591 ; GFX10-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm a16
|
|
592 ; GFX10-NEXT: s_endpgm
|
236
|
593 ;
|
|
594 ; GFX11-LABEL: store_mip_1darray:
|
|
595 ; GFX11: ; %bb.0: ; %main_body
|
|
596 ; GFX11-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm a16
|
252
|
597 ; GFX11-NEXT: s_nop 0
|
236
|
598 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
599 ; GFX11-NEXT: s_endpgm
|
150
|
600 main_body:
|
|
601 %s = extractelement <2 x i16> %coords_lo, i32 0
|
|
602 %slice = extractelement <2 x i16> %coords_lo, i32 1
|
|
603 %mip = extractelement <2 x i16> %coords_hi, i32 0
|
|
604 call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
|
|
605 ret void
|
|
606 }
|
|
607
|
|
608 define amdgpu_ps void @store_mip_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
|
|
609 ; GFX9-LABEL: store_mip_2darray:
|
|
610 ; GFX9: ; %bb.0: ; %main_body
|
|
611 ; GFX9-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da
|
|
612 ; GFX9-NEXT: s_endpgm
|
|
613 ;
|
|
614 ; GFX10-LABEL: store_mip_2darray:
|
|
615 ; GFX10: ; %bb.0: ; %main_body
|
|
616 ; GFX10-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm a16
|
|
617 ; GFX10-NEXT: s_endpgm
|
236
|
618 ;
|
|
619 ; GFX11-LABEL: store_mip_2darray:
|
|
620 ; GFX11: ; %bb.0: ; %main_body
|
|
621 ; GFX11-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm a16
|
252
|
622 ; GFX11-NEXT: s_nop 0
|
236
|
623 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
624 ; GFX11-NEXT: s_endpgm
|
150
|
625 main_body:
|
|
626 %s = extractelement <2 x i16> %coords_lo, i32 0
|
|
627 %t = extractelement <2 x i16> %coords_lo, i32 1
|
|
628 %slice = extractelement <2 x i16> %coords_hi, i32 0
|
|
629 %mip = extractelement <2 x i16> %coords_hi, i32 1
|
|
630 call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
|
|
631 ret void
|
|
632 }
|
|
633
|
|
634 define amdgpu_ps <4 x float> @getresinfo_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
|
|
635 ; GFX9-LABEL: getresinfo_1d:
|
|
636 ; GFX9: ; %bb.0: ; %main_body
|
|
637 ; GFX9-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16
|
|
638 ; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
639 ; GFX9-NEXT: ; return to shader part epilog
|
|
640 ;
|
236
|
641 ; GFX10PLUS-LABEL: getresinfo_1d:
|
|
642 ; GFX10PLUS: ; %bb.0: ; %main_body
|
|
643 ; GFX10PLUS-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm a16
|
|
644 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
645 ; GFX10PLUS-NEXT: ; return to shader part epilog
|
150
|
646 main_body:
|
|
647 %mip = extractelement <2 x i16> %coords, i32 0
|
|
648 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
|
|
649 ret <4 x float> %v
|
|
650 }
|
|
651
|
|
652 define amdgpu_ps <4 x float> @getresinfo_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
|
|
653 ; GFX9-LABEL: getresinfo_2d:
|
|
654 ; GFX9: ; %bb.0: ; %main_body
|
|
655 ; GFX9-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16
|
|
656 ; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
657 ; GFX9-NEXT: ; return to shader part epilog
|
|
658 ;
|
236
|
659 ; GFX10PLUS-LABEL: getresinfo_2d:
|
|
660 ; GFX10PLUS: ; %bb.0: ; %main_body
|
|
661 ; GFX10PLUS-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16
|
|
662 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
663 ; GFX10PLUS-NEXT: ; return to shader part epilog
|
150
|
664 main_body:
|
|
665 %mip = extractelement <2 x i16> %coords, i32 0
|
|
666 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
|
|
667 ret <4 x float> %v
|
|
668 }
|
|
669
|
|
670 define amdgpu_ps <4 x float> @getresinfo_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
|
|
671 ; GFX9-LABEL: getresinfo_3d:
|
|
672 ; GFX9: ; %bb.0: ; %main_body
|
|
673 ; GFX9-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16
|
|
674 ; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
675 ; GFX9-NEXT: ; return to shader part epilog
|
|
676 ;
|
236
|
677 ; GFX10PLUS-LABEL: getresinfo_3d:
|
|
678 ; GFX10PLUS: ; %bb.0: ; %main_body
|
|
679 ; GFX10PLUS-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16
|
|
680 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
681 ; GFX10PLUS-NEXT: ; return to shader part epilog
|
150
|
682 main_body:
|
|
683 %mip = extractelement <2 x i16> %coords, i32 0
|
|
684 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
|
|
685 ret <4 x float> %v
|
|
686 }
|
|
687
|
|
688 define amdgpu_ps <4 x float> @getresinfo_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
|
|
689 ; GFX9-LABEL: getresinfo_cube:
|
|
690 ; GFX9: ; %bb.0: ; %main_body
|
|
691 ; GFX9-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 da
|
|
692 ; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
693 ; GFX9-NEXT: ; return to shader part epilog
|
|
694 ;
|
236
|
695 ; GFX10PLUS-LABEL: getresinfo_cube:
|
|
696 ; GFX10PLUS: ; %bb.0: ; %main_body
|
|
697 ; GFX10PLUS-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm a16
|
|
698 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
699 ; GFX10PLUS-NEXT: ; return to shader part epilog
|
150
|
700 main_body:
|
|
701 %mip = extractelement <2 x i16> %coords, i32 0
|
|
702 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
|
|
703 ret <4 x float> %v
|
|
704 }
|
|
705
|
|
706 define amdgpu_ps <4 x float> @getresinfo_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
|
|
707 ; GFX9-LABEL: getresinfo_1darray:
|
|
708 ; GFX9: ; %bb.0: ; %main_body
|
|
709 ; GFX9-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 da
|
|
710 ; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
711 ; GFX9-NEXT: ; return to shader part epilog
|
|
712 ;
|
236
|
713 ; GFX10PLUS-LABEL: getresinfo_1darray:
|
|
714 ; GFX10PLUS: ; %bb.0: ; %main_body
|
|
715 ; GFX10PLUS-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm a16
|
|
716 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
717 ; GFX10PLUS-NEXT: ; return to shader part epilog
|
150
|
718 main_body:
|
|
719 %mip = extractelement <2 x i16> %coords, i32 0
|
|
720 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
|
|
721 ret <4 x float> %v
|
|
722 }
|
|
723
|
|
724 define amdgpu_ps <4 x float> @getresinfo_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
|
|
725 ; GFX9-LABEL: getresinfo_2darray:
|
|
726 ; GFX9: ; %bb.0: ; %main_body
|
|
727 ; GFX9-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 da
|
|
728 ; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
729 ; GFX9-NEXT: ; return to shader part epilog
|
|
730 ;
|
236
|
731 ; GFX10PLUS-LABEL: getresinfo_2darray:
|
|
732 ; GFX10PLUS: ; %bb.0: ; %main_body
|
|
733 ; GFX10PLUS-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm a16
|
|
734 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
735 ; GFX10PLUS-NEXT: ; return to shader part epilog
|
150
|
736 main_body:
|
|
737 %mip = extractelement <2 x i16> %coords, i32 0
|
|
738 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
|
|
739 ret <4 x float> %v
|
|
740 }
|
|
741
|
|
742 define amdgpu_ps <4 x float> @getresinfo_2dmsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
|
|
743 ; GFX9-LABEL: getresinfo_2dmsaa:
|
|
744 ; GFX9: ; %bb.0: ; %main_body
|
|
745 ; GFX9-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16
|
|
746 ; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
747 ; GFX9-NEXT: ; return to shader part epilog
|
|
748 ;
|
236
|
749 ; GFX10PLUS-LABEL: getresinfo_2dmsaa:
|
|
750 ; GFX10PLUS: ; %bb.0: ; %main_body
|
|
751 ; GFX10PLUS-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm a16
|
|
752 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
753 ; GFX10PLUS-NEXT: ; return to shader part epilog
|
150
|
754 main_body:
|
|
755 %mip = extractelement <2 x i16> %coords, i32 0
|
|
756 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
|
|
757 ret <4 x float> %v
|
|
758 }
|
|
759
|
|
760 define amdgpu_ps <4 x float> @getresinfo_2darraymsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
|
|
761 ; GFX9-LABEL: getresinfo_2darraymsaa:
|
|
762 ; GFX9: ; %bb.0: ; %main_body
|
|
763 ; GFX9-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 da
|
|
764 ; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
765 ; GFX9-NEXT: ; return to shader part epilog
|
|
766 ;
|
236
|
767 ; GFX10PLUS-LABEL: getresinfo_2darraymsaa:
|
|
768 ; GFX10PLUS: ; %bb.0: ; %main_body
|
|
769 ; GFX10PLUS-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16
|
|
770 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
771 ; GFX10PLUS-NEXT: ; return to shader part epilog
|
150
|
772 main_body:
|
|
773 %mip = extractelement <2 x i16> %coords, i32 0
|
|
774 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
|
|
775 ret <4 x float> %v
|
|
776 }
|
|
777
|
|
778 define amdgpu_ps float @load_1d_V1(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
|
|
779 ; GFX9-LABEL: load_1d_V1:
|
|
780 ; GFX9: ; %bb.0: ; %main_body
|
|
781 ; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x8 unorm a16
|
|
782 ; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
783 ; GFX9-NEXT: ; return to shader part epilog
|
|
784 ;
|
236
|
785 ; GFX10PLUS-LABEL: load_1d_V1:
|
|
786 ; GFX10PLUS: ; %bb.0: ; %main_body
|
|
787 ; GFX10PLUS-NEXT: image_load v0, v0, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm a16
|
|
788 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
789 ; GFX10PLUS-NEXT: ; return to shader part epilog
|
150
|
790 main_body:
|
|
791 %s = extractelement <2 x i16> %coords, i32 0
|
|
792 %v = call float @llvm.amdgcn.image.load.1d.f32.i16(i32 8, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
|
|
793 ret float %v
|
|
794 }
|
|
795
|
|
796 define amdgpu_ps <2 x float> @load_1d_V2(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
|
|
797 ; GFX9-LABEL: load_1d_V2:
|
|
798 ; GFX9: ; %bb.0: ; %main_body
|
|
799 ; GFX9-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x9 unorm a16
|
|
800 ; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
801 ; GFX9-NEXT: ; return to shader part epilog
|
|
802 ;
|
236
|
803 ; GFX10PLUS-LABEL: load_1d_V2:
|
|
804 ; GFX10PLUS: ; %bb.0: ; %main_body
|
|
805 ; GFX10PLUS-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x9 dim:SQ_RSRC_IMG_1D unorm a16
|
|
806 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
807 ; GFX10PLUS-NEXT: ; return to shader part epilog
|
150
|
808 main_body:
|
|
809 %s = extractelement <2 x i16> %coords, i32 0
|
|
810 %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i16(i32 9, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
|
|
811 ret <2 x float> %v
|
|
812 }
|
|
813
|
|
814 define amdgpu_ps void @store_1d_V1(<8 x i32> inreg %rsrc, float %vdata, <2 x i16> %coords) {
|
|
815 ; GFX9-LABEL: store_1d_V1:
|
|
816 ; GFX9: ; %bb.0: ; %main_body
|
|
817 ; GFX9-NEXT: image_store v0, v1, s[0:7] dmask:0x2 unorm a16
|
|
818 ; GFX9-NEXT: s_endpgm
|
|
819 ;
|
|
820 ; GFX10-LABEL: store_1d_V1:
|
|
821 ; GFX10: ; %bb.0: ; %main_body
|
|
822 ; GFX10-NEXT: image_store v0, v1, s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_1D unorm a16
|
|
823 ; GFX10-NEXT: s_endpgm
|
236
|
824 ;
|
|
825 ; GFX11-LABEL: store_1d_V1:
|
|
826 ; GFX11: ; %bb.0: ; %main_body
|
|
827 ; GFX11-NEXT: image_store v0, v1, s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_1D unorm a16
|
252
|
828 ; GFX11-NEXT: s_nop 0
|
236
|
829 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
830 ; GFX11-NEXT: s_endpgm
|
150
|
831 main_body:
|
|
832 %s = extractelement <2 x i16> %coords, i32 0
|
|
833 call void @llvm.amdgcn.image.store.1d.f32.i16(float %vdata, i32 2, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
|
|
834 ret void
|
|
835 }
|
|
836
|
|
837 define amdgpu_ps void @store_1d_V2(<8 x i32> inreg %rsrc, <2 x float> %vdata, <2 x i16> %coords) {
|
|
838 ; GFX9-LABEL: store_1d_V2:
|
|
839 ; GFX9: ; %bb.0: ; %main_body
|
|
840 ; GFX9-NEXT: image_store v[0:1], v2, s[0:7] dmask:0xc unorm a16
|
|
841 ; GFX9-NEXT: s_endpgm
|
|
842 ;
|
|
843 ; GFX10-LABEL: store_1d_V2:
|
|
844 ; GFX10: ; %bb.0: ; %main_body
|
|
845 ; GFX10-NEXT: image_store v[0:1], v2, s[0:7] dmask:0xc dim:SQ_RSRC_IMG_1D unorm a16
|
|
846 ; GFX10-NEXT: s_endpgm
|
236
|
847 ;
|
|
848 ; GFX11-LABEL: store_1d_V2:
|
|
849 ; GFX11: ; %bb.0: ; %main_body
|
|
850 ; GFX11-NEXT: image_store v[0:1], v2, s[0:7] dmask:0xc dim:SQ_RSRC_IMG_1D unorm a16
|
252
|
851 ; GFX11-NEXT: s_nop 0
|
236
|
852 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
853 ; GFX11-NEXT: s_endpgm
|
150
|
854 main_body:
|
|
855 %s = extractelement <2 x i16> %coords, i32 0
|
|
856 call void @llvm.amdgcn.image.store.1d.v2f32.i16(<2 x float> %vdata, i32 12, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
|
|
857 ret void
|
|
858 }
|
|
859
|
|
860 define amdgpu_ps <4 x float> @load_1d_glc(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
|
|
861 ; GFX9-LABEL: load_1d_glc:
|
|
862 ; GFX9: ; %bb.0: ; %main_body
|
|
863 ; GFX9-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc a16
|
|
864 ; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
865 ; GFX9-NEXT: ; return to shader part epilog
|
|
866 ;
|
236
|
867 ; GFX10PLUS-LABEL: load_1d_glc:
|
|
868 ; GFX10PLUS: ; %bb.0: ; %main_body
|
|
869 ; GFX10PLUS-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc a16
|
|
870 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
871 ; GFX10PLUS-NEXT: ; return to shader part epilog
|
150
|
872 main_body:
|
|
873 %s = extractelement <2 x i16> %coords, i32 0
|
|
874 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 1)
|
|
875 ret <4 x float> %v
|
|
876 }
|
|
877
|
|
878 define amdgpu_ps <4 x float> @load_1d_slc(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
|
|
879 ; GFX9-LABEL: load_1d_slc:
|
|
880 ; GFX9: ; %bb.0: ; %main_body
|
|
881 ; GFX9-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm slc a16
|
|
882 ; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
883 ; GFX9-NEXT: ; return to shader part epilog
|
|
884 ;
|
236
|
885 ; GFX10PLUS-LABEL: load_1d_slc:
|
|
886 ; GFX10PLUS: ; %bb.0: ; %main_body
|
|
887 ; GFX10PLUS-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm slc a16
|
|
888 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
889 ; GFX10PLUS-NEXT: ; return to shader part epilog
|
150
|
890 main_body:
|
|
891 %s = extractelement <2 x i16> %coords, i32 0
|
|
892 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 2)
|
|
893 ret <4 x float> %v
|
|
894 }
|
|
895
|
|
896 define amdgpu_ps <4 x float> @load_1d_glc_slc(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
|
|
897 ; GFX9-LABEL: load_1d_glc_slc:
|
|
898 ; GFX9: ; %bb.0: ; %main_body
|
|
899 ; GFX9-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc slc a16
|
|
900 ; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
901 ; GFX9-NEXT: ; return to shader part epilog
|
|
902 ;
|
236
|
903 ; GFX10PLUS-LABEL: load_1d_glc_slc:
|
|
904 ; GFX10PLUS: ; %bb.0: ; %main_body
|
|
905 ; GFX10PLUS-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc slc a16
|
|
906 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
907 ; GFX10PLUS-NEXT: ; return to shader part epilog
|
150
|
908 main_body:
|
|
909 %s = extractelement <2 x i16> %coords, i32 0
|
|
910 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 3)
|
|
911 ret <4 x float> %v
|
|
912 }
|
|
913
|
|
914 define amdgpu_ps void @store_1d_glc(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
|
|
915 ; GFX9-LABEL: store_1d_glc:
|
|
916 ; GFX9: ; %bb.0: ; %main_body
|
|
917 ; GFX9-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc a16
|
|
918 ; GFX9-NEXT: s_endpgm
|
|
919 ;
|
|
920 ; GFX10-LABEL: store_1d_glc:
|
|
921 ; GFX10: ; %bb.0: ; %main_body
|
|
922 ; GFX10-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc a16
|
|
923 ; GFX10-NEXT: s_endpgm
|
236
|
924 ;
|
|
925 ; GFX11-LABEL: store_1d_glc:
|
|
926 ; GFX11: ; %bb.0: ; %main_body
|
|
927 ; GFX11-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc a16
|
252
|
928 ; GFX11-NEXT: s_nop 0
|
236
|
929 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
930 ; GFX11-NEXT: s_endpgm
|
150
|
931 main_body:
|
|
932 %s = extractelement <2 x i16> %coords, i32 0
|
|
933 call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 1)
|
|
934 ret void
|
|
935 }
|
|
936
|
|
937 define amdgpu_ps void @store_1d_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
|
|
938 ; GFX9-LABEL: store_1d_slc:
|
|
939 ; GFX9: ; %bb.0: ; %main_body
|
|
940 ; GFX9-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm slc a16
|
|
941 ; GFX9-NEXT: s_endpgm
|
|
942 ;
|
|
943 ; GFX10-LABEL: store_1d_slc:
|
|
944 ; GFX10: ; %bb.0: ; %main_body
|
|
945 ; GFX10-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm slc a16
|
|
946 ; GFX10-NEXT: s_endpgm
|
236
|
947 ;
|
|
948 ; GFX11-LABEL: store_1d_slc:
|
|
949 ; GFX11: ; %bb.0: ; %main_body
|
|
950 ; GFX11-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm slc a16
|
252
|
951 ; GFX11-NEXT: s_nop 0
|
236
|
952 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
953 ; GFX11-NEXT: s_endpgm
|
150
|
954 main_body:
|
|
955 %s = extractelement <2 x i16> %coords, i32 0
|
|
956 call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 2)
|
|
957 ret void
|
|
958 }
|
|
959
|
|
960 define amdgpu_ps void @store_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
|
|
961 ; GFX9-LABEL: store_1d_glc_slc:
|
|
962 ; GFX9: ; %bb.0: ; %main_body
|
|
963 ; GFX9-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc slc a16
|
|
964 ; GFX9-NEXT: s_endpgm
|
|
965 ;
|
|
966 ; GFX10-LABEL: store_1d_glc_slc:
|
|
967 ; GFX10: ; %bb.0: ; %main_body
|
|
968 ; GFX10-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc slc a16
|
|
969 ; GFX10-NEXT: s_endpgm
|
236
|
970 ;
|
|
971 ; GFX11-LABEL: store_1d_glc_slc:
|
|
972 ; GFX11: ; %bb.0: ; %main_body
|
|
973 ; GFX11-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc slc a16
|
252
|
974 ; GFX11-NEXT: s_nop 0
|
236
|
975 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
976 ; GFX11-NEXT: s_endpgm
|
150
|
977 main_body:
|
|
978 %s = extractelement <2 x i16> %coords, i32 0
|
|
979 call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 3)
|
|
980 ret void
|
|
981 }
|
|
982
|
|
983 define amdgpu_ps <4 x float> @getresinfo_dmask0(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) #0 {
|
|
984 ; GFX9-LABEL: getresinfo_dmask0:
|
|
985 ; GFX9: ; %bb.0: ; %main_body
|
|
986 ; GFX9-NEXT: ; return to shader part epilog
|
|
987 ;
|
236
|
988 ; GFX10PLUS-LABEL: getresinfo_dmask0:
|
|
989 ; GFX10PLUS: ; %bb.0: ; %main_body
|
|
990 ; GFX10PLUS-NEXT: ; return to shader part epilog
|
150
|
991 main_body:
|
|
992 %mip = extractelement <2 x i16> %coords, i32 0
|
|
993 %r = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i16(i32 0, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
|
|
994 ret <4 x float> %r
|
|
995 }
|
|
996
|
|
997 declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #1
|
|
998 declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32, i16, i16, <8 x i32>, i32, i32) #1
|
|
999 declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1
|
|
1000 declare <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1
|
|
1001 declare <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i16(i32, i16, i16, <8 x i32>, i32, i32) #1
|
|
1002 declare <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1
|
|
1003 declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1
|
|
1004 declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #1
|
|
1005
|
|
1006 declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i16(i32, i16, i16, <8 x i32>, i32, i32) #1
|
|
1007 declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1
|
|
1008 declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #1
|
|
1009 declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #1
|
|
1010 declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1
|
|
1011 declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #1
|
|
1012
|
|
1013 declare void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float>, i32, i16, <8 x i32>, i32, i32) #0
|
|
1014 declare void @llvm.amdgcn.image.store.2d.v4f32.i16(<4 x float>, i32, i16, i16, <8 x i32>, i32, i32) #0
|
|
1015 declare void @llvm.amdgcn.image.store.3d.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0
|
|
1016 declare void @llvm.amdgcn.image.store.cube.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0
|
|
1017 declare void @llvm.amdgcn.image.store.1darray.v4f32.i16(<4 x float>, i32, i16, i16, <8 x i32>, i32, i32) #0
|
|
1018 declare void @llvm.amdgcn.image.store.2darray.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0
|
|
1019 declare void @llvm.amdgcn.image.store.2dmsaa.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0
|
|
1020 declare void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i16(<4 x float>, i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #0
|
|
1021
|
|
1022 declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i16(<4 x float>, i32, i16, i16, <8 x i32>, i32, i32) #0
|
|
1023 declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0
|
|
1024 declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i16(<4 x float>, i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #0
|
|
1025 declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i16(<4 x float>, i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #0
|
|
1026 declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0
|
|
1027 declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i16(<4 x float>, i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #0
|
|
1028
|
|
1029 declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
|
|
1030 declare <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
|
|
1031 declare <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
|
|
1032 declare <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
|
|
1033 declare <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
|
|
1034 declare <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
|
|
1035 declare <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
|
|
1036 declare <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
|
|
1037
|
|
1038 declare float @llvm.amdgcn.image.load.1d.f32.i16(i32, i16, <8 x i32>, i32, i32) #1
|
|
1039 declare float @llvm.amdgcn.image.load.2d.f32.i16(i32, i16, i16, <8 x i32>, i32, i32) #1
|
|
1040 declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i16(i32, i16, <8 x i32>, i32, i32) #1
|
|
1041 declare void @llvm.amdgcn.image.store.1d.f32.i16(float, i32, i16, <8 x i32>, i32, i32) #0
|
|
1042 declare void @llvm.amdgcn.image.store.1d.v2f32.i16(<2 x float>, i32, i16, <8 x i32>, i32, i32) #0
|
|
1043
|
|
1044 attributes #0 = { nounwind }
|
|
1045 attributes #1 = { nounwind readonly }
|
|
1046 attributes #2 = { nounwind readnone }
|