comparison llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.a16.dim.ll @ 221:79ff65ed7e25

LLVM12 Original
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Tue, 15 Jun 2021 19:15:29 +0900
parents
children 5f17cb93ff66
comparison
equal deleted inserted replaced
220:42394fc6a535 221:79ff65ed7e25
1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -global-isel -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10GISEL %s
4 ; TODO: global-isel produces more code - there will need to be some more combines in the postregbankselectcombine phase
5 ; Depends on some other changes to pass this test - those are in review separately
6
7 define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, half %s) {
8 ; GFX10-LABEL: sample_d_1d:
9 ; GFX10: ; %bb.0: ; %main_body
10 ; GFX10-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
11 ; GFX10-NEXT: s_waitcnt vmcnt(0)
12 ; GFX10-NEXT: ; return to shader part epilog
13 ;
14 ; GFX10GISEL-LABEL: sample_d_1d:
15 ; GFX10GISEL: ; %bb.0: ; %main_body
16 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16
17 ; GFX10GISEL-NEXT: v_and_or_b32 v2, 0xffff, v2, s12
18 ; GFX10GISEL-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
19 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
20 ; GFX10GISEL-NEXT: ; return to shader part epilog
21 main_body:
22 %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
23 ret <4 x float> %v
24 }
25
26 define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) {
27 ; GFX10-LABEL: sample_d_2d:
28 ; GFX10: ; %bb.0: ; %main_body
29 ; GFX10-NEXT: v_and_b32_e32 v4, 0xffff, v4
30 ; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4
31 ; GFX10-NEXT: image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
32 ; GFX10-NEXT: s_waitcnt vmcnt(0)
33 ; GFX10-NEXT: ; return to shader part epilog
34 ;
35 ; GFX10GISEL-LABEL: sample_d_2d:
36 ; GFX10GISEL: ; %bb.0: ; %main_body
37 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
38 ; GFX10GISEL-NEXT: v_and_or_b32 v4, 0xffff, v4, v5
39 ; GFX10GISEL-NEXT: image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
40 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
41 ; GFX10GISEL-NEXT: ; return to shader part epilog
42 main_body:
43 %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
44 ret <4 x float> %v
45 }
46
47 define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, half %s, half %t, half %r) {
48 ; GFX10-LABEL: sample_d_3d:
49 ; GFX10: ; %bb.0: ; %main_body
50 ; GFX10-NEXT: v_and_b32_e32 v6, 0xffff, v6
51 ; GFX10-NEXT: v_lshl_or_b32 v6, v7, 16, v6
52 ; GFX10-NEXT: image_sample_d v[0:3], [v0, v1, v2, v3, v4, v5, v6, v8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
53 ; GFX10-NEXT: s_waitcnt vmcnt(0)
54 ; GFX10-NEXT: ; return to shader part epilog
55 ;
56 ; GFX10GISEL-LABEL: sample_d_3d:
57 ; GFX10GISEL: ; %bb.0: ; %main_body
58 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, 0xffff
59 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v7, 16, v7
60 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16
61 ; GFX10GISEL-NEXT: v_and_or_b32 v6, v6, v9, v7
62 ; GFX10GISEL-NEXT: v_and_or_b32 v7, v8, v9, s12
63 ; GFX10GISEL-NEXT: image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
64 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
65 ; GFX10GISEL-NEXT: ; return to shader part epilog
66 main_body:
67 %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
68 ret <4 x float> %v
69 }
70
71 define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, half %s) {
72 ; GFX10-LABEL: sample_c_d_1d:
73 ; GFX10: ; %bb.0: ; %main_body
74 ; GFX10-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
75 ; GFX10-NEXT: s_waitcnt vmcnt(0)
76 ; GFX10-NEXT: ; return to shader part epilog
77 ;
78 ; GFX10GISEL-LABEL: sample_c_d_1d:
79 ; GFX10GISEL: ; %bb.0: ; %main_body
80 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16
81 ; GFX10GISEL-NEXT: v_and_or_b32 v3, 0xffff, v3, s12
82 ; GFX10GISEL-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
83 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
84 ; GFX10GISEL-NEXT: ; return to shader part epilog
85 main_body:
86 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
87 ret <4 x float> %v
88 }
89
90 define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) {
91 ; GFX10-LABEL: sample_c_d_2d:
92 ; GFX10: ; %bb.0: ; %main_body
93 ; GFX10-NEXT: v_and_b32_e32 v5, 0xffff, v5
94 ; GFX10-NEXT: v_lshl_or_b32 v5, v6, 16, v5
95 ; GFX10-NEXT: image_sample_c_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
96 ; GFX10-NEXT: s_waitcnt vmcnt(0)
97 ; GFX10-NEXT: ; return to shader part epilog
98 ;
99 ; GFX10GISEL-LABEL: sample_c_d_2d:
100 ; GFX10GISEL: ; %bb.0: ; %main_body
101 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v6, 16, v6
102 ; GFX10GISEL-NEXT: v_and_or_b32 v5, 0xffff, v5, v6
103 ; GFX10GISEL-NEXT: image_sample_c_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
104 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
105 ; GFX10GISEL-NEXT: ; return to shader part epilog
106 main_body:
107 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
108 ret <4 x float> %v
109 }
110
111 define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, half %s, half %clamp) {
112 ; GFX10-LABEL: sample_d_cl_1d:
113 ; GFX10: ; %bb.0: ; %main_body
114 ; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2
115 ; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2
116 ; GFX10-NEXT: image_sample_d_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
117 ; GFX10-NEXT: s_waitcnt vmcnt(0)
118 ; GFX10-NEXT: ; return to shader part epilog
119 ;
120 ; GFX10GISEL-LABEL: sample_d_cl_1d:
121 ; GFX10GISEL: ; %bb.0: ; %main_body
122 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
123 ; GFX10GISEL-NEXT: v_and_or_b32 v2, 0xffff, v2, v3
124 ; GFX10GISEL-NEXT: image_sample_d_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
125 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
126 ; GFX10GISEL-NEXT: ; return to shader part epilog
127 main_body:
128 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
129 ret <4 x float> %v
130 }
131
132 define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) {
133 ; GFX10-LABEL: sample_d_cl_2d:
134 ; GFX10: ; %bb.0: ; %main_body
135 ; GFX10-NEXT: v_and_b32_e32 v4, 0xffff, v4
136 ; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4
137 ; GFX10-NEXT: image_sample_d_cl v[0:3], [v0, v1, v2, v3, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
138 ; GFX10-NEXT: s_waitcnt vmcnt(0)
139 ; GFX10-NEXT: ; return to shader part epilog
140 ;
141 ; GFX10GISEL-LABEL: sample_d_cl_2d:
142 ; GFX10GISEL: ; %bb.0: ; %main_body
143 ; GFX10GISEL-NEXT: v_mov_b32_e32 v7, 0xffff
144 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
145 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16
146 ; GFX10GISEL-NEXT: v_and_or_b32 v4, v4, v7, v5
147 ; GFX10GISEL-NEXT: v_and_or_b32 v5, v6, v7, s12
148 ; GFX10GISEL-NEXT: image_sample_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
149 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
150 ; GFX10GISEL-NEXT: ; return to shader part epilog
151 main_body:
152 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
153 ret <4 x float> %v
154 }
155
156 define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp) {
157 ; GFX10-LABEL: sample_c_d_cl_1d:
158 ; GFX10: ; %bb.0: ; %main_body
159 ; GFX10-NEXT: v_and_b32_e32 v3, 0xffff, v3
160 ; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3
161 ; GFX10-NEXT: image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
162 ; GFX10-NEXT: s_waitcnt vmcnt(0)
163 ; GFX10-NEXT: ; return to shader part epilog
164 ;
165 ; GFX10GISEL-LABEL: sample_c_d_cl_1d:
166 ; GFX10GISEL: ; %bb.0: ; %main_body
167 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4
168 ; GFX10GISEL-NEXT: v_and_or_b32 v3, 0xffff, v3, v4
169 ; GFX10GISEL-NEXT: image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
170 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
171 ; GFX10GISEL-NEXT: ; return to shader part epilog
172 main_body:
173 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
174 ret <4 x float> %v
175 }
176
177 define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) {
178 ; GFX10-LABEL: sample_c_d_cl_2d:
179 ; GFX10: ; %bb.0: ; %main_body
180 ; GFX10-NEXT: v_and_b32_e32 v5, 0xffff, v5
181 ; GFX10-NEXT: v_lshl_or_b32 v5, v6, 16, v5
182 ; GFX10-NEXT: image_sample_c_d_cl v[0:3], [v0, v1, v2, v3, v4, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
183 ; GFX10-NEXT: s_waitcnt vmcnt(0)
184 ; GFX10-NEXT: ; return to shader part epilog
185 ;
186 ; GFX10GISEL-LABEL: sample_c_d_cl_2d:
187 ; GFX10GISEL: ; %bb.0: ; %main_body
188 ; GFX10GISEL-NEXT: v_mov_b32_e32 v8, 0xffff
189 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v6, 16, v6
190 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16
191 ; GFX10GISEL-NEXT: v_and_or_b32 v5, v5, v8, v6
192 ; GFX10GISEL-NEXT: v_and_or_b32 v6, v7, v8, s12
193 ; GFX10GISEL-NEXT: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
194 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
195 ; GFX10GISEL-NEXT: ; return to shader part epilog
196 main_body:
197 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
198 ret <4 x float> %v
199 }
200
201 define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, half %s) {
202 ; GFX10-LABEL: sample_cd_1d:
203 ; GFX10: ; %bb.0: ; %main_body
204 ; GFX10-NEXT: image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
205 ; GFX10-NEXT: s_waitcnt vmcnt(0)
206 ; GFX10-NEXT: ; return to shader part epilog
207 ;
208 ; GFX10GISEL-LABEL: sample_cd_1d:
209 ; GFX10GISEL: ; %bb.0: ; %main_body
210 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16
211 ; GFX10GISEL-NEXT: v_and_or_b32 v2, 0xffff, v2, s12
212 ; GFX10GISEL-NEXT: image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
213 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
214 ; GFX10GISEL-NEXT: ; return to shader part epilog
215 main_body:
216 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
217 ret <4 x float> %v
218 }
219
220 define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) {
221 ; GFX10-LABEL: sample_cd_2d:
222 ; GFX10: ; %bb.0: ; %main_body
223 ; GFX10-NEXT: v_and_b32_e32 v4, 0xffff, v4
224 ; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4
225 ; GFX10-NEXT: image_sample_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
226 ; GFX10-NEXT: s_waitcnt vmcnt(0)
227 ; GFX10-NEXT: ; return to shader part epilog
228 ;
229 ; GFX10GISEL-LABEL: sample_cd_2d:
230 ; GFX10GISEL: ; %bb.0: ; %main_body
231 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
232 ; GFX10GISEL-NEXT: v_and_or_b32 v4, 0xffff, v4, v5
233 ; GFX10GISEL-NEXT: image_sample_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
234 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
235 ; GFX10GISEL-NEXT: ; return to shader part epilog
236 main_body:
237 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
238 ret <4 x float> %v
239 }
240
241 define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, half %s) {
242 ; GFX10-LABEL: sample_c_cd_1d:
243 ; GFX10: ; %bb.0: ; %main_body
244 ; GFX10-NEXT: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
245 ; GFX10-NEXT: s_waitcnt vmcnt(0)
246 ; GFX10-NEXT: ; return to shader part epilog
247 ;
248 ; GFX10GISEL-LABEL: sample_c_cd_1d:
249 ; GFX10GISEL: ; %bb.0: ; %main_body
250 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16
251 ; GFX10GISEL-NEXT: v_and_or_b32 v3, 0xffff, v3, s12
252 ; GFX10GISEL-NEXT: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
253 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
254 ; GFX10GISEL-NEXT: ; return to shader part epilog
255 main_body:
256 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
257 ret <4 x float> %v
258 }
259
260 define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) {
261 ; GFX10-LABEL: sample_c_cd_2d:
262 ; GFX10: ; %bb.0: ; %main_body
263 ; GFX10-NEXT: v_and_b32_e32 v5, 0xffff, v5
264 ; GFX10-NEXT: v_lshl_or_b32 v5, v6, 16, v5
265 ; GFX10-NEXT: image_sample_c_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
266 ; GFX10-NEXT: s_waitcnt vmcnt(0)
267 ; GFX10-NEXT: ; return to shader part epilog
268 ;
269 ; GFX10GISEL-LABEL: sample_c_cd_2d:
270 ; GFX10GISEL: ; %bb.0: ; %main_body
271 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v6, 16, v6
272 ; GFX10GISEL-NEXT: v_and_or_b32 v5, 0xffff, v5, v6
273 ; GFX10GISEL-NEXT: image_sample_c_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
274 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
275 ; GFX10GISEL-NEXT: ; return to shader part epilog
276 main_body:
277 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
278 ret <4 x float> %v
279 }
280
281 define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, half %s, half %clamp) {
282 ; GFX10-LABEL: sample_cd_cl_1d:
283 ; GFX10: ; %bb.0: ; %main_body
284 ; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2
285 ; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2
286 ; GFX10-NEXT: image_sample_cd_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
287 ; GFX10-NEXT: s_waitcnt vmcnt(0)
288 ; GFX10-NEXT: ; return to shader part epilog
289 ;
290 ; GFX10GISEL-LABEL: sample_cd_cl_1d:
291 ; GFX10GISEL: ; %bb.0: ; %main_body
292 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
293 ; GFX10GISEL-NEXT: v_and_or_b32 v2, 0xffff, v2, v3
294 ; GFX10GISEL-NEXT: image_sample_cd_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
295 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
296 ; GFX10GISEL-NEXT: ; return to shader part epilog
297 main_body:
298 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
299 ret <4 x float> %v
300 }
301
302 define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) {
303 ; GFX10-LABEL: sample_cd_cl_2d:
304 ; GFX10: ; %bb.0: ; %main_body
305 ; GFX10-NEXT: v_and_b32_e32 v4, 0xffff, v4
306 ; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4
307 ; GFX10-NEXT: image_sample_cd_cl v[0:3], [v0, v1, v2, v3, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
308 ; GFX10-NEXT: s_waitcnt vmcnt(0)
309 ; GFX10-NEXT: ; return to shader part epilog
310 ;
311 ; GFX10GISEL-LABEL: sample_cd_cl_2d:
312 ; GFX10GISEL: ; %bb.0: ; %main_body
313 ; GFX10GISEL-NEXT: v_mov_b32_e32 v7, 0xffff
314 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
315 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16
316 ; GFX10GISEL-NEXT: v_and_or_b32 v4, v4, v7, v5
317 ; GFX10GISEL-NEXT: v_and_or_b32 v5, v6, v7, s12
318 ; GFX10GISEL-NEXT: image_sample_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
319 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
320 ; GFX10GISEL-NEXT: ; return to shader part epilog
321 main_body:
322 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
323 ret <4 x float> %v
324 }
325
326 define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp) {
327 ; GFX10-LABEL: sample_c_cd_cl_1d:
328 ; GFX10: ; %bb.0: ; %main_body
329 ; GFX10-NEXT: v_and_b32_e32 v3, 0xffff, v3
330 ; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3
331 ; GFX10-NEXT: image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
332 ; GFX10-NEXT: s_waitcnt vmcnt(0)
333 ; GFX10-NEXT: ; return to shader part epilog
334 ;
335 ; GFX10GISEL-LABEL: sample_c_cd_cl_1d:
336 ; GFX10GISEL: ; %bb.0: ; %main_body
337 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4
338 ; GFX10GISEL-NEXT: v_and_or_b32 v3, 0xffff, v3, v4
339 ; GFX10GISEL-NEXT: image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
340 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
341 ; GFX10GISEL-NEXT: ; return to shader part epilog
342 main_body:
343 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
344 ret <4 x float> %v
345 }
346
347 define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) {
348 ; GFX10-LABEL: sample_c_cd_cl_2d:
349 ; GFX10: ; %bb.0: ; %main_body
350 ; GFX10-NEXT: v_and_b32_e32 v5, 0xffff, v5
351 ; GFX10-NEXT: v_lshl_or_b32 v5, v6, 16, v5
352 ; GFX10-NEXT: image_sample_c_cd_cl v[0:3], [v0, v1, v2, v3, v4, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
353 ; GFX10-NEXT: s_waitcnt vmcnt(0)
354 ; GFX10-NEXT: ; return to shader part epilog
355 ;
356 ; GFX10GISEL-LABEL: sample_c_cd_cl_2d:
357 ; GFX10GISEL: ; %bb.0: ; %main_body
358 ; GFX10GISEL-NEXT: v_mov_b32_e32 v8, 0xffff
359 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v6, 16, v6
360 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16
361 ; GFX10GISEL-NEXT: v_and_or_b32 v5, v5, v8, v6
362 ; GFX10GISEL-NEXT: v_and_or_b32 v6, v7, v8, s12
363 ; GFX10GISEL-NEXT: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
364 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
365 ; GFX10GISEL-NEXT: ; return to shader part epilog
366 main_body:
367 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
368 ret <4 x float> %v
369 }
370
371 define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice) {
372 ; GFX10-LABEL: sample_c_d_o_2darray_V1:
373 ; GFX10: ; %bb.0: ; %main_body
374 ; GFX10-NEXT: v_and_b32_e32 v6, 0xffff, v6
375 ; GFX10-NEXT: v_lshl_or_b32 v6, v7, 16, v6
376 ; GFX10-NEXT: image_sample_c_d_o v0, [v0, v1, v2, v3, v4, v5, v6, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16
377 ; GFX10-NEXT: s_waitcnt vmcnt(0)
378 ; GFX10-NEXT: ; return to shader part epilog
379 ;
380 ; GFX10GISEL-LABEL: sample_c_d_o_2darray_V1:
381 ; GFX10GISEL: ; %bb.0: ; %main_body
382 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, 0xffff
383 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v7, 16, v7
384 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16
385 ; GFX10GISEL-NEXT: v_and_or_b32 v6, v6, v9, v7
386 ; GFX10GISEL-NEXT: v_and_or_b32 v7, v8, v9, s12
387 ; GFX10GISEL-NEXT: image_sample_c_d_o v0, v[0:7], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16
388 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
389 ; GFX10GISEL-NEXT: ; return to shader part epilog
390 main_body:
391 %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f16(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
392 ret float %v
393 }
394
395 define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice) {
396 ; GFX10-LABEL: sample_c_d_o_2darray_V2:
397 ; GFX10: ; %bb.0: ; %main_body
398 ; GFX10-NEXT: v_and_b32_e32 v6, 0xffff, v6
399 ; GFX10-NEXT: v_lshl_or_b32 v6, v7, 16, v6
400 ; GFX10-NEXT: image_sample_c_d_o v[0:1], [v0, v1, v2, v3, v4, v5, v6, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16
401 ; GFX10-NEXT: s_waitcnt vmcnt(0)
402 ; GFX10-NEXT: ; return to shader part epilog
403 ;
404 ; GFX10GISEL-LABEL: sample_c_d_o_2darray_V2:
405 ; GFX10GISEL: ; %bb.0: ; %main_body
406 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, 0xffff
407 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v7, 16, v7
408 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16
409 ; GFX10GISEL-NEXT: v_and_or_b32 v6, v6, v9, v7
410 ; GFX10GISEL-NEXT: v_and_or_b32 v7, v8, v9, s12
411 ; GFX10GISEL-NEXT: image_sample_c_d_o v[0:1], v[0:7], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16
412 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
413 ; GFX10GISEL-NEXT: ; return to shader part epilog
414 main_body:
415 %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
416 ret <2 x float> %v
417 }
418
419 declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f16(i32, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
420 declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f16(i32, float, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
421 declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f16(i32, float, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
422 declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32, float, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
423 declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32, float, float, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
424 declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
425 declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f16(i32, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
426 declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
427 declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
428
429 declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f16(i32, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
430 declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f16(i32, float, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
431 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32, float, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
432 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32, float, float, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
433 declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
434 declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f16(i32, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
435 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
436 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
437
438 declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f16(i32, i32, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
439 declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32, i32, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
440
441 define amdgpu_ps <4 x float> @sample_g16_noa16_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
442 ; GFX10-LABEL: sample_g16_noa16_d_1d:
443 ; GFX10: ; %bb.0: ; %main_body
444 ; GFX10-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
445 ; GFX10-NEXT: s_waitcnt vmcnt(0)
446 ; GFX10-NEXT: ; return to shader part epilog
447 ;
448 ; GFX10GISEL-LABEL: sample_g16_noa16_d_1d:
449 ; GFX10GISEL: ; %bb.0: ; %main_body
450 ; GFX10GISEL-NEXT: v_mov_b32_e32 v3, 0xffff
451 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16
452 ; GFX10GISEL-NEXT: v_and_or_b32 v0, v0, v3, s12
453 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v3, s12
454 ; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
455 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
456 ; GFX10GISEL-NEXT: ; return to shader part epilog
457 main_body:
458 %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
459 ret <4 x float> %v
460 }
461
462 define amdgpu_ps <4 x float> @sample_g16_noa16_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
463 ; GFX10-LABEL: sample_g16_noa16_d_2d:
464 ; GFX10: ; %bb.0: ; %main_body
465 ; GFX10-NEXT: v_mov_b32_e32 v6, 0xffff
466 ; GFX10-NEXT: v_and_b32_e32 v2, v6, v2
467 ; GFX10-NEXT: v_and_b32_e32 v0, v6, v0
468 ; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2
469 ; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0
470 ; GFX10-NEXT: image_sample_d_g16 v[0:3], [v0, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
471 ; GFX10-NEXT: s_waitcnt vmcnt(0)
472 ; GFX10-NEXT: ; return to shader part epilog
473 ;
474 ; GFX10GISEL-LABEL: sample_g16_noa16_d_2d:
475 ; GFX10GISEL: ; %bb.0: ; %main_body
476 ; GFX10GISEL-NEXT: v_mov_b32_e32 v6, 0xffff
477 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
478 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
479 ; GFX10GISEL-NEXT: v_and_or_b32 v0, v0, v6, v1
480 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v2, v6, v3
481 ; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], [v0, v1, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
482 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
483 ; GFX10GISEL-NEXT: ; return to shader part epilog
484 main_body:
485 %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
486 ret <4 x float> %v
487 }
488
489 define amdgpu_ps <4 x float> @sample_g16_noa16_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) {
490 ; GFX10-LABEL: sample_g16_noa16_d_3d:
491 ; GFX10: ; %bb.0: ; %main_body
492 ; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff
493 ; GFX10-NEXT: v_and_b32_e32 v3, v9, v3
494 ; GFX10-NEXT: v_and_b32_e32 v0, v9, v0
495 ; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3
496 ; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0
497 ; GFX10-NEXT: image_sample_d_g16 v[0:3], [v0, v2, v3, v5, v6, v7, v8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
498 ; GFX10-NEXT: s_waitcnt vmcnt(0)
499 ; GFX10-NEXT: ; return to shader part epilog
500 ;
501 ; GFX10GISEL-LABEL: sample_g16_noa16_d_3d:
502 ; GFX10GISEL: ; %bb.0: ; %main_body
503 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, 0xffff
504 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
505 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4
506 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16
507 ; GFX10GISEL-NEXT: v_and_or_b32 v0, v0, v9, v1
508 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v2, v9, s12
509 ; GFX10GISEL-NEXT: v_and_or_b32 v2, v3, v9, v4
510 ; GFX10GISEL-NEXT: v_and_or_b32 v3, v5, v9, s12
511 ; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], [v0, v1, v2, v3, v6, v7, v8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
512 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
513 ; GFX10GISEL-NEXT: ; return to shader part epilog
514 main_body:
515 %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
516 ret <4 x float> %v
517 }
518
519 define amdgpu_ps <4 x float> @sample_g16_noa16_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {
520 ; GFX10-LABEL: sample_g16_noa16_c_d_1d:
521 ; GFX10: ; %bb.0: ; %main_body
522 ; GFX10-NEXT: image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
523 ; GFX10-NEXT: s_waitcnt vmcnt(0)
524 ; GFX10-NEXT: ; return to shader part epilog
525 ;
526 ; GFX10GISEL-LABEL: sample_g16_noa16_c_d_1d:
527 ; GFX10GISEL: ; %bb.0: ; %main_body
528 ; GFX10GISEL-NEXT: v_mov_b32_e32 v4, 0xffff
529 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16
530 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v4, s12
531 ; GFX10GISEL-NEXT: v_and_or_b32 v2, v2, v4, s12
532 ; GFX10GISEL-NEXT: image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
533 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
534 ; GFX10GISEL-NEXT: ; return to shader part epilog
535 main_body:
536 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
537 ret <4 x float> %v
538 }
539
540 define amdgpu_ps <4 x float> @sample_g16_noa16_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
541 ; GFX10-LABEL: sample_g16_noa16_c_d_2d:
542 ; GFX10: ; %bb.0: ; %main_body
543 ; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff
544 ; GFX10-NEXT: v_and_b32_e32 v3, v7, v3
545 ; GFX10-NEXT: v_and_b32_e32 v1, v7, v1
546 ; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3
547 ; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1
548 ; GFX10-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
549 ; GFX10-NEXT: s_waitcnt vmcnt(0)
550 ; GFX10-NEXT: ; return to shader part epilog
551 ;
552 ; GFX10GISEL-LABEL: sample_g16_noa16_c_d_2d:
553 ; GFX10GISEL: ; %bb.0: ; %main_body
554 ; GFX10GISEL-NEXT: v_mov_b32_e32 v7, 0xffff
555 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2
556 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4
557 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v7, v2
558 ; GFX10GISEL-NEXT: v_and_or_b32 v2, v3, v7, v4
559 ; GFX10GISEL-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v2, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
560 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
561 ; GFX10GISEL-NEXT: ; return to shader part epilog
562 main_body:
563 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
564 ret <4 x float> %v
565 }
566
567 define amdgpu_ps <4 x float> @sample_g16_noa16_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) {
568 ; GFX10-LABEL: sample_g16_noa16_d_cl_1d:
569 ; GFX10: ; %bb.0: ; %main_body
570 ; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
571 ; GFX10-NEXT: s_waitcnt vmcnt(0)
572 ; GFX10-NEXT: ; return to shader part epilog
573 ;
574 ; GFX10GISEL-LABEL: sample_g16_noa16_d_cl_1d:
575 ; GFX10GISEL: ; %bb.0: ; %main_body
576 ; GFX10GISEL-NEXT: v_mov_b32_e32 v4, 0xffff
577 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16
578 ; GFX10GISEL-NEXT: v_and_or_b32 v0, v0, v4, s12
579 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v4, s12
580 ; GFX10GISEL-NEXT: image_sample_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
581 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
582 ; GFX10GISEL-NEXT: ; return to shader part epilog
583 main_body:
584 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
585 ret <4 x float> %v
586 }
587
588 define amdgpu_ps <4 x float> @sample_g16_noa16_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
589 ; GFX10-LABEL: sample_g16_noa16_d_cl_2d:
590 ; GFX10: ; %bb.0: ; %main_body
591 ; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff
592 ; GFX10-NEXT: v_and_b32_e32 v2, v7, v2
593 ; GFX10-NEXT: v_and_b32_e32 v0, v7, v0
594 ; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2
595 ; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0
596 ; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v2, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
597 ; GFX10-NEXT: s_waitcnt vmcnt(0)
598 ; GFX10-NEXT: ; return to shader part epilog
599 ;
600 ; GFX10GISEL-LABEL: sample_g16_noa16_d_cl_2d:
601 ; GFX10GISEL: ; %bb.0: ; %main_body
602 ; GFX10GISEL-NEXT: v_mov_b32_e32 v7, 0xffff
603 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
604 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
605 ; GFX10GISEL-NEXT: v_and_or_b32 v0, v0, v7, v1
606 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v2, v7, v3
607 ; GFX10GISEL-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v1, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
608 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
609 ; GFX10GISEL-NEXT: ; return to shader part epilog
610 main_body:
611 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
612 ret <4 x float> %v
613 }
614
615 define amdgpu_ps <4 x float> @sample_g16_noa16_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) {
616 ; GFX10-LABEL: sample_g16_noa16_c_d_cl_1d:
617 ; GFX10: ; %bb.0: ; %main_body
618 ; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
619 ; GFX10-NEXT: s_waitcnt vmcnt(0)
620 ; GFX10-NEXT: ; return to shader part epilog
621 ;
622 ; GFX10GISEL-LABEL: sample_g16_noa16_c_d_cl_1d:
623 ; GFX10GISEL: ; %bb.0: ; %main_body
624 ; GFX10GISEL-NEXT: v_mov_b32_e32 v5, 0xffff
625 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16
626 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v5, s12
627 ; GFX10GISEL-NEXT: v_and_or_b32 v2, v2, v5, s12
628 ; GFX10GISEL-NEXT: image_sample_c_d_cl_g16 v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
629 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
630 ; GFX10GISEL-NEXT: ; return to shader part epilog
631 main_body:
632 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
633 ret <4 x float> %v
634 }
635
636 define amdgpu_ps <4 x float> @sample_g16_noa16_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
637 ; GFX10-LABEL: sample_g16_noa16_c_d_cl_2d:
638 ; GFX10: ; %bb.0: ; %main_body
639 ; GFX10-NEXT: v_mov_b32_e32 v8, 0xffff
640 ; GFX10-NEXT: v_and_b32_e32 v3, v8, v3
641 ; GFX10-NEXT: v_and_b32_e32 v1, v8, v1
642 ; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3
643 ; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1
644 ; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v3, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
645 ; GFX10-NEXT: s_waitcnt vmcnt(0)
646 ; GFX10-NEXT: ; return to shader part epilog
647 ;
648 ; GFX10GISEL-LABEL: sample_g16_noa16_c_d_cl_2d:
649 ; GFX10GISEL: ; %bb.0: ; %main_body
650 ; GFX10GISEL-NEXT: v_mov_b32_e32 v8, 0xffff
651 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2
652 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4
653 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v8, v2
654 ; GFX10GISEL-NEXT: v_and_or_b32 v2, v3, v8, v4
655 ; GFX10GISEL-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v2, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
656 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
657 ; GFX10GISEL-NEXT: ; return to shader part epilog
658 main_body:
659 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
660 ret <4 x float> %v
661 }
662
663 define amdgpu_ps <4 x float> @sample_g16_noa16_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
664 ; GFX10-LABEL: sample_g16_noa16_cd_1d:
665 ; GFX10: ; %bb.0: ; %main_body
666 ; GFX10-NEXT: image_sample_cd_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
667 ; GFX10-NEXT: s_waitcnt vmcnt(0)
668 ; GFX10-NEXT: ; return to shader part epilog
669 ;
670 ; GFX10GISEL-LABEL: sample_g16_noa16_cd_1d:
671 ; GFX10GISEL: ; %bb.0: ; %main_body
672 ; GFX10GISEL-NEXT: v_mov_b32_e32 v3, 0xffff
673 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16
674 ; GFX10GISEL-NEXT: v_and_or_b32 v0, v0, v3, s12
675 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v3, s12
676 ; GFX10GISEL-NEXT: image_sample_cd_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
677 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
678 ; GFX10GISEL-NEXT: ; return to shader part epilog
679 main_body:
680 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
681 ret <4 x float> %v
682 }
683
684 define amdgpu_ps <4 x float> @sample_g16_noa16_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
685 ; GFX10-LABEL: sample_g16_noa16_cd_2d:
686 ; GFX10: ; %bb.0: ; %main_body
687 ; GFX10-NEXT: v_mov_b32_e32 v6, 0xffff
688 ; GFX10-NEXT: v_and_b32_e32 v2, v6, v2
689 ; GFX10-NEXT: v_and_b32_e32 v0, v6, v0
690 ; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2
691 ; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0
692 ; GFX10-NEXT: image_sample_cd_g16 v[0:3], [v0, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
693 ; GFX10-NEXT: s_waitcnt vmcnt(0)
694 ; GFX10-NEXT: ; return to shader part epilog
695 ;
696 ; GFX10GISEL-LABEL: sample_g16_noa16_cd_2d:
697 ; GFX10GISEL: ; %bb.0: ; %main_body
698 ; GFX10GISEL-NEXT: v_mov_b32_e32 v6, 0xffff
699 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
700 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
701 ; GFX10GISEL-NEXT: v_and_or_b32 v0, v0, v6, v1
702 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v2, v6, v3
703 ; GFX10GISEL-NEXT: image_sample_cd_g16 v[0:3], [v0, v1, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
704 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
705 ; GFX10GISEL-NEXT: ; return to shader part epilog
706 main_body:
707 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
708 ret <4 x float> %v
709 }
710
711 define amdgpu_ps <4 x float> @sample_g16_noa16_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {
712 ; GFX10-LABEL: sample_g16_noa16_c_cd_1d:
713 ; GFX10: ; %bb.0: ; %main_body
714 ; GFX10-NEXT: image_sample_c_cd_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
715 ; GFX10-NEXT: s_waitcnt vmcnt(0)
716 ; GFX10-NEXT: ; return to shader part epilog
717 ;
718 ; GFX10GISEL-LABEL: sample_g16_noa16_c_cd_1d:
719 ; GFX10GISEL: ; %bb.0: ; %main_body
720 ; GFX10GISEL-NEXT: v_mov_b32_e32 v4, 0xffff
721 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16
722 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v4, s12
723 ; GFX10GISEL-NEXT: v_and_or_b32 v2, v2, v4, s12
724 ; GFX10GISEL-NEXT: image_sample_c_cd_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
725 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
726 ; GFX10GISEL-NEXT: ; return to shader part epilog
727 main_body:
728 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
729 ret <4 x float> %v
730 }
731
732 define amdgpu_ps <4 x float> @sample_g16_noa16_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
733 ; GFX10-LABEL: sample_g16_noa16_c_cd_2d:
734 ; GFX10: ; %bb.0: ; %main_body
735 ; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff
736 ; GFX10-NEXT: v_and_b32_e32 v3, v7, v3
737 ; GFX10-NEXT: v_and_b32_e32 v1, v7, v1
738 ; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3
739 ; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1
740 ; GFX10-NEXT: image_sample_c_cd_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
741 ; GFX10-NEXT: s_waitcnt vmcnt(0)
742 ; GFX10-NEXT: ; return to shader part epilog
743 ;
744 ; GFX10GISEL-LABEL: sample_g16_noa16_c_cd_2d:
745 ; GFX10GISEL: ; %bb.0: ; %main_body
746 ; GFX10GISEL-NEXT: v_mov_b32_e32 v7, 0xffff
747 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2
748 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4
749 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v7, v2
750 ; GFX10GISEL-NEXT: v_and_or_b32 v2, v3, v7, v4
751 ; GFX10GISEL-NEXT: image_sample_c_cd_g16 v[0:3], [v0, v1, v2, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
752 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
753 ; GFX10GISEL-NEXT: ; return to shader part epilog
754 main_body:
755 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
756 ret <4 x float> %v
757 }
758
759 define amdgpu_ps <4 x float> @sample_g16_noa16_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) {
760 ; GFX10-LABEL: sample_g16_noa16_cd_cl_1d:
761 ; GFX10: ; %bb.0: ; %main_body
762 ; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
763 ; GFX10-NEXT: s_waitcnt vmcnt(0)
764 ; GFX10-NEXT: ; return to shader part epilog
765 ;
766 ; GFX10GISEL-LABEL: sample_g16_noa16_cd_cl_1d:
767 ; GFX10GISEL: ; %bb.0: ; %main_body
768 ; GFX10GISEL-NEXT: v_mov_b32_e32 v4, 0xffff
769 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16
770 ; GFX10GISEL-NEXT: v_and_or_b32 v0, v0, v4, s12
771 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v4, s12
772 ; GFX10GISEL-NEXT: image_sample_cd_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
773 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
774 ; GFX10GISEL-NEXT: ; return to shader part epilog
775 main_body:
776 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
777 ret <4 x float> %v
778 }
779
780 define amdgpu_ps <4 x float> @sample_g16_noa16_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
781 ; GFX10-LABEL: sample_g16_noa16_cd_cl_2d:
782 ; GFX10: ; %bb.0: ; %main_body
783 ; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff
784 ; GFX10-NEXT: v_and_b32_e32 v2, v7, v2
785 ; GFX10-NEXT: v_and_b32_e32 v0, v7, v0
786 ; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2
787 ; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0
788 ; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], [v0, v2, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
789 ; GFX10-NEXT: s_waitcnt vmcnt(0)
790 ; GFX10-NEXT: ; return to shader part epilog
791 ;
792 ; GFX10GISEL-LABEL: sample_g16_noa16_cd_cl_2d:
793 ; GFX10GISEL: ; %bb.0: ; %main_body
794 ; GFX10GISEL-NEXT: v_mov_b32_e32 v7, 0xffff
795 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
796 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
797 ; GFX10GISEL-NEXT: v_and_or_b32 v0, v0, v7, v1
798 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v2, v7, v3
799 ; GFX10GISEL-NEXT: image_sample_cd_cl_g16 v[0:3], [v0, v1, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
800 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
801 ; GFX10GISEL-NEXT: ; return to shader part epilog
802 main_body:
803 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
804 ret <4 x float> %v
805 }
806
807 define amdgpu_ps <4 x float> @sample_g16_noa16_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) {
808 ; GFX10-LABEL: sample_g16_noa16_c_cd_cl_1d:
809 ; GFX10: ; %bb.0: ; %main_body
810 ; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
811 ; GFX10-NEXT: s_waitcnt vmcnt(0)
812 ; GFX10-NEXT: ; return to shader part epilog
813 ;
814 ; GFX10GISEL-LABEL: sample_g16_noa16_c_cd_cl_1d:
815 ; GFX10GISEL: ; %bb.0: ; %main_body
816 ; GFX10GISEL-NEXT: v_mov_b32_e32 v5, 0xffff
817 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16
818 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v5, s12
819 ; GFX10GISEL-NEXT: v_and_or_b32 v2, v2, v5, s12
820 ; GFX10GISEL-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
821 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
822 ; GFX10GISEL-NEXT: ; return to shader part epilog
823 main_body:
824 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
825 ret <4 x float> %v
826 }
827
828 define amdgpu_ps <4 x float> @sample_g16_noa16_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
829 ; GFX10-LABEL: sample_g16_noa16_c_cd_cl_2d:
830 ; GFX10: ; %bb.0: ; %main_body
831 ; GFX10-NEXT: v_mov_b32_e32 v8, 0xffff
832 ; GFX10-NEXT: v_and_b32_e32 v3, v8, v3
833 ; GFX10-NEXT: v_and_b32_e32 v1, v8, v1
834 ; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3
835 ; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1
836 ; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], [v0, v1, v3, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
837 ; GFX10-NEXT: s_waitcnt vmcnt(0)
838 ; GFX10-NEXT: ; return to shader part epilog
839 ;
840 ; GFX10GISEL-LABEL: sample_g16_noa16_c_cd_cl_2d:
841 ; GFX10GISEL: ; %bb.0: ; %main_body
842 ; GFX10GISEL-NEXT: v_mov_b32_e32 v8, 0xffff
843 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2
844 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4
845 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v8, v2
846 ; GFX10GISEL-NEXT: v_and_or_b32 v2, v3, v8, v4
847 ; GFX10GISEL-NEXT: image_sample_c_cd_cl_g16 v[0:3], [v0, v1, v2, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
848 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
849 ; GFX10GISEL-NEXT: ; return to shader part epilog
850 main_body:
851 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
852 ret <4 x float> %v
853 }
854
855 define amdgpu_ps float @sample_g16_noa16_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
856 ; GFX10-LABEL: sample_g16_noa16_c_d_o_2darray_V1:
857 ; GFX10: ; %bb.0: ; %main_body
858 ; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff
859 ; GFX10-NEXT: v_and_b32_e32 v4, v9, v4
860 ; GFX10-NEXT: v_and_b32_e32 v2, v9, v2
861 ; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4
862 ; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2
863 ; GFX10-NEXT: image_sample_c_d_o_g16 v0, [v0, v1, v2, v4, v6, v7, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
864 ; GFX10-NEXT: s_waitcnt vmcnt(0)
865 ; GFX10-NEXT: ; return to shader part epilog
866 ;
867 ; GFX10GISEL-LABEL: sample_g16_noa16_c_d_o_2darray_V1:
868 ; GFX10GISEL: ; %bb.0: ; %main_body
869 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, 0xffff
870 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
871 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
872 ; GFX10GISEL-NEXT: v_and_or_b32 v2, v2, v9, v3
873 ; GFX10GISEL-NEXT: v_and_or_b32 v3, v4, v9, v5
874 ; GFX10GISEL-NEXT: image_sample_c_d_o_g16 v0, [v0, v1, v2, v3, v6, v7, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
875 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
876 ; GFX10GISEL-NEXT: ; return to shader part epilog
877 main_body:
878 %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f32(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
879 ret float %v
880 }
881
882 define amdgpu_ps <2 x float> @sample_g16_noa16_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
883 ; GFX10-LABEL: sample_g16_noa16_c_d_o_2darray_V2:
884 ; GFX10: ; %bb.0: ; %main_body
885 ; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff
886 ; GFX10-NEXT: v_and_b32_e32 v4, v9, v4
887 ; GFX10-NEXT: v_and_b32_e32 v2, v9, v2
888 ; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4
889 ; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2
890 ; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v4, v6, v7, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
891 ; GFX10-NEXT: s_waitcnt vmcnt(0)
892 ; GFX10-NEXT: ; return to shader part epilog
893 ;
894 ; GFX10GISEL-LABEL: sample_g16_noa16_c_d_o_2darray_V2:
895 ; GFX10GISEL: ; %bb.0: ; %main_body
896 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, 0xffff
897 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
898 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
899 ; GFX10GISEL-NEXT: v_and_or_b32 v2, v2, v9, v3
900 ; GFX10GISEL-NEXT: v_and_or_b32 v3, v4, v9, v5
901 ; GFX10GISEL-NEXT: image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v3, v6, v7, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
902 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
903 ; GFX10GISEL-NEXT: ; return to shader part epilog
904 main_body:
905 %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
906 ret <2 x float> %v
907 }
908
909 declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
910 declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
911 declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32, half, half, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
912 declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
913 declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
914 declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
915 declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
916 declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
917 declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
918
919 declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
920 declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
921 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
922 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
923 declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
924 declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
925 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
926 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
927
928 declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
929 declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
930
931 attributes #0 = { nounwind }
932 attributes #1 = { nounwind readonly }
933 attributes #2 = { nounwind readnone }