Mercurial > hg > CbC > CbC_llvm
comparison llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.a16.dim.ll @ 221:79ff65ed7e25
LLVM12 Original
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 15 Jun 2021 19:15:29 +0900 |
parents | |
children | 5f17cb93ff66 |
comparison
equal
deleted
inserted
replaced
220:42394fc6a535 | 221:79ff65ed7e25 |
---|---|
1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | |
2 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s | |
3 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -global-isel -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10GISEL %s | |
4 ; TODO: global-isel produces more code - there will need to be some more combines in the postregbankselectcombine phase | |
5 ; Depends on some other changes to pass this test - those are in review separately | |
6 | |
7 define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, half %s) { | |
8 ; GFX10-LABEL: sample_d_1d: | |
9 ; GFX10: ; %bb.0: ; %main_body | |
10 ; GFX10-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 | |
11 ; GFX10-NEXT: s_waitcnt vmcnt(0) | |
12 ; GFX10-NEXT: ; return to shader part epilog | |
13 ; | |
14 ; GFX10GISEL-LABEL: sample_d_1d: | |
15 ; GFX10GISEL: ; %bb.0: ; %main_body | |
16 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 | |
17 ; GFX10GISEL-NEXT: v_and_or_b32 v2, 0xffff, v2, s12 | |
18 ; GFX10GISEL-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 | |
19 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | |
20 ; GFX10GISEL-NEXT: ; return to shader part epilog | |
21 main_body: | |
22 %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | |
23 ret <4 x float> %v | |
24 } | |
25 | |
26 define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) { | |
27 ; GFX10-LABEL: sample_d_2d: | |
28 ; GFX10: ; %bb.0: ; %main_body | |
29 ; GFX10-NEXT: v_and_b32_e32 v4, 0xffff, v4 | |
30 ; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 | |
31 ; GFX10-NEXT: image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 | |
32 ; GFX10-NEXT: s_waitcnt vmcnt(0) | |
33 ; GFX10-NEXT: ; return to shader part epilog | |
34 ; | |
35 ; GFX10GISEL-LABEL: sample_d_2d: | |
36 ; GFX10GISEL: ; %bb.0: ; %main_body | |
37 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 | |
38 ; GFX10GISEL-NEXT: v_and_or_b32 v4, 0xffff, v4, v5 | |
39 ; GFX10GISEL-NEXT: image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 | |
40 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | |
41 ; GFX10GISEL-NEXT: ; return to shader part epilog | |
42 main_body: | |
43 %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | |
44 ret <4 x float> %v | |
45 } | |
46 | |
47 define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, half %s, half %t, half %r) { | |
48 ; GFX10-LABEL: sample_d_3d: | |
49 ; GFX10: ; %bb.0: ; %main_body | |
50 ; GFX10-NEXT: v_and_b32_e32 v6, 0xffff, v6 | |
51 ; GFX10-NEXT: v_lshl_or_b32 v6, v7, 16, v6 | |
52 ; GFX10-NEXT: image_sample_d v[0:3], [v0, v1, v2, v3, v4, v5, v6, v8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16 | |
53 ; GFX10-NEXT: s_waitcnt vmcnt(0) | |
54 ; GFX10-NEXT: ; return to shader part epilog | |
55 ; | |
56 ; GFX10GISEL-LABEL: sample_d_3d: | |
57 ; GFX10GISEL: ; %bb.0: ; %main_body | |
58 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, 0xffff | |
59 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v7, 16, v7 | |
60 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 | |
61 ; GFX10GISEL-NEXT: v_and_or_b32 v6, v6, v9, v7 | |
62 ; GFX10GISEL-NEXT: v_and_or_b32 v7, v8, v9, s12 | |
63 ; GFX10GISEL-NEXT: image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16 | |
64 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | |
65 ; GFX10GISEL-NEXT: ; return to shader part epilog | |
66 main_body: | |
67 %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | |
68 ret <4 x float> %v | |
69 } | |
70 | |
71 define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, half %s) { | |
72 ; GFX10-LABEL: sample_c_d_1d: | |
73 ; GFX10: ; %bb.0: ; %main_body | |
74 ; GFX10-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 | |
75 ; GFX10-NEXT: s_waitcnt vmcnt(0) | |
76 ; GFX10-NEXT: ; return to shader part epilog | |
77 ; | |
78 ; GFX10GISEL-LABEL: sample_c_d_1d: | |
79 ; GFX10GISEL: ; %bb.0: ; %main_body | |
80 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 | |
81 ; GFX10GISEL-NEXT: v_and_or_b32 v3, 0xffff, v3, s12 | |
82 ; GFX10GISEL-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 | |
83 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | |
84 ; GFX10GISEL-NEXT: ; return to shader part epilog | |
85 main_body: | |
86 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | |
87 ret <4 x float> %v | |
88 } | |
89 | |
90 define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) { | |
91 ; GFX10-LABEL: sample_c_d_2d: | |
92 ; GFX10: ; %bb.0: ; %main_body | |
93 ; GFX10-NEXT: v_and_b32_e32 v5, 0xffff, v5 | |
94 ; GFX10-NEXT: v_lshl_or_b32 v5, v6, 16, v5 | |
95 ; GFX10-NEXT: image_sample_c_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 | |
96 ; GFX10-NEXT: s_waitcnt vmcnt(0) | |
97 ; GFX10-NEXT: ; return to shader part epilog | |
98 ; | |
99 ; GFX10GISEL-LABEL: sample_c_d_2d: | |
100 ; GFX10GISEL: ; %bb.0: ; %main_body | |
101 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v6, 16, v6 | |
102 ; GFX10GISEL-NEXT: v_and_or_b32 v5, 0xffff, v5, v6 | |
103 ; GFX10GISEL-NEXT: image_sample_c_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 | |
104 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | |
105 ; GFX10GISEL-NEXT: ; return to shader part epilog | |
106 main_body: | |
107 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | |
108 ret <4 x float> %v | |
109 } | |
110 | |
111 define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, half %s, half %clamp) { | |
112 ; GFX10-LABEL: sample_d_cl_1d: | |
113 ; GFX10: ; %bb.0: ; %main_body | |
114 ; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2 | |
115 ; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 | |
116 ; GFX10-NEXT: image_sample_d_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 | |
117 ; GFX10-NEXT: s_waitcnt vmcnt(0) | |
118 ; GFX10-NEXT: ; return to shader part epilog | |
119 ; | |
120 ; GFX10GISEL-LABEL: sample_d_cl_1d: | |
121 ; GFX10GISEL: ; %bb.0: ; %main_body | |
122 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 | |
123 ; GFX10GISEL-NEXT: v_and_or_b32 v2, 0xffff, v2, v3 | |
124 ; GFX10GISEL-NEXT: image_sample_d_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 | |
125 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | |
126 ; GFX10GISEL-NEXT: ; return to shader part epilog | |
127 main_body: | |
128 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | |
129 ret <4 x float> %v | |
130 } | |
131 | |
132 define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) { | |
133 ; GFX10-LABEL: sample_d_cl_2d: | |
134 ; GFX10: ; %bb.0: ; %main_body | |
135 ; GFX10-NEXT: v_and_b32_e32 v4, 0xffff, v4 | |
136 ; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 | |
137 ; GFX10-NEXT: image_sample_d_cl v[0:3], [v0, v1, v2, v3, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 | |
138 ; GFX10-NEXT: s_waitcnt vmcnt(0) | |
139 ; GFX10-NEXT: ; return to shader part epilog | |
140 ; | |
141 ; GFX10GISEL-LABEL: sample_d_cl_2d: | |
142 ; GFX10GISEL: ; %bb.0: ; %main_body | |
143 ; GFX10GISEL-NEXT: v_mov_b32_e32 v7, 0xffff | |
144 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 | |
145 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 | |
146 ; GFX10GISEL-NEXT: v_and_or_b32 v4, v4, v7, v5 | |
147 ; GFX10GISEL-NEXT: v_and_or_b32 v5, v6, v7, s12 | |
148 ; GFX10GISEL-NEXT: image_sample_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 | |
149 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | |
150 ; GFX10GISEL-NEXT: ; return to shader part epilog | |
151 main_body: | |
152 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | |
153 ret <4 x float> %v | |
154 } | |
155 | |
156 define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp) { | |
157 ; GFX10-LABEL: sample_c_d_cl_1d: | |
158 ; GFX10: ; %bb.0: ; %main_body | |
159 ; GFX10-NEXT: v_and_b32_e32 v3, 0xffff, v3 | |
160 ; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 | |
161 ; GFX10-NEXT: image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 | |
162 ; GFX10-NEXT: s_waitcnt vmcnt(0) | |
163 ; GFX10-NEXT: ; return to shader part epilog | |
164 ; | |
165 ; GFX10GISEL-LABEL: sample_c_d_cl_1d: | |
166 ; GFX10GISEL: ; %bb.0: ; %main_body | |
167 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4 | |
168 ; GFX10GISEL-NEXT: v_and_or_b32 v3, 0xffff, v3, v4 | |
169 ; GFX10GISEL-NEXT: image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 | |
170 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | |
171 ; GFX10GISEL-NEXT: ; return to shader part epilog | |
172 main_body: | |
173 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | |
174 ret <4 x float> %v | |
175 } | |
176 | |
177 define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) { | |
178 ; GFX10-LABEL: sample_c_d_cl_2d: | |
179 ; GFX10: ; %bb.0: ; %main_body | |
180 ; GFX10-NEXT: v_and_b32_e32 v5, 0xffff, v5 | |
181 ; GFX10-NEXT: v_lshl_or_b32 v5, v6, 16, v5 | |
182 ; GFX10-NEXT: image_sample_c_d_cl v[0:3], [v0, v1, v2, v3, v4, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 | |
183 ; GFX10-NEXT: s_waitcnt vmcnt(0) | |
184 ; GFX10-NEXT: ; return to shader part epilog | |
185 ; | |
186 ; GFX10GISEL-LABEL: sample_c_d_cl_2d: | |
187 ; GFX10GISEL: ; %bb.0: ; %main_body | |
188 ; GFX10GISEL-NEXT: v_mov_b32_e32 v8, 0xffff | |
189 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v6, 16, v6 | |
190 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 | |
191 ; GFX10GISEL-NEXT: v_and_or_b32 v5, v5, v8, v6 | |
192 ; GFX10GISEL-NEXT: v_and_or_b32 v6, v7, v8, s12 | |
193 ; GFX10GISEL-NEXT: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 | |
194 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | |
195 ; GFX10GISEL-NEXT: ; return to shader part epilog | |
196 main_body: | |
197 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | |
198 ret <4 x float> %v | |
199 } | |
200 | |
201 define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, half %s) { | |
202 ; GFX10-LABEL: sample_cd_1d: | |
203 ; GFX10: ; %bb.0: ; %main_body | |
204 ; GFX10-NEXT: image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 | |
205 ; GFX10-NEXT: s_waitcnt vmcnt(0) | |
206 ; GFX10-NEXT: ; return to shader part epilog | |
207 ; | |
208 ; GFX10GISEL-LABEL: sample_cd_1d: | |
209 ; GFX10GISEL: ; %bb.0: ; %main_body | |
210 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 | |
211 ; GFX10GISEL-NEXT: v_and_or_b32 v2, 0xffff, v2, s12 | |
212 ; GFX10GISEL-NEXT: image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 | |
213 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | |
214 ; GFX10GISEL-NEXT: ; return to shader part epilog | |
215 main_body: | |
216 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | |
217 ret <4 x float> %v | |
218 } | |
219 | |
220 define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) { | |
221 ; GFX10-LABEL: sample_cd_2d: | |
222 ; GFX10: ; %bb.0: ; %main_body | |
223 ; GFX10-NEXT: v_and_b32_e32 v4, 0xffff, v4 | |
224 ; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 | |
225 ; GFX10-NEXT: image_sample_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 | |
226 ; GFX10-NEXT: s_waitcnt vmcnt(0) | |
227 ; GFX10-NEXT: ; return to shader part epilog | |
228 ; | |
229 ; GFX10GISEL-LABEL: sample_cd_2d: | |
230 ; GFX10GISEL: ; %bb.0: ; %main_body | |
231 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 | |
232 ; GFX10GISEL-NEXT: v_and_or_b32 v4, 0xffff, v4, v5 | |
233 ; GFX10GISEL-NEXT: image_sample_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 | |
234 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | |
235 ; GFX10GISEL-NEXT: ; return to shader part epilog | |
236 main_body: | |
237 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | |
238 ret <4 x float> %v | |
239 } | |
240 | |
241 define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, half %s) { | |
242 ; GFX10-LABEL: sample_c_cd_1d: | |
243 ; GFX10: ; %bb.0: ; %main_body | |
244 ; GFX10-NEXT: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 | |
245 ; GFX10-NEXT: s_waitcnt vmcnt(0) | |
246 ; GFX10-NEXT: ; return to shader part epilog | |
247 ; | |
248 ; GFX10GISEL-LABEL: sample_c_cd_1d: | |
249 ; GFX10GISEL: ; %bb.0: ; %main_body | |
250 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 | |
251 ; GFX10GISEL-NEXT: v_and_or_b32 v3, 0xffff, v3, s12 | |
252 ; GFX10GISEL-NEXT: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 | |
253 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | |
254 ; GFX10GISEL-NEXT: ; return to shader part epilog | |
255 main_body: | |
256 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | |
257 ret <4 x float> %v | |
258 } | |
259 | |
260 define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) { | |
261 ; GFX10-LABEL: sample_c_cd_2d: | |
262 ; GFX10: ; %bb.0: ; %main_body | |
263 ; GFX10-NEXT: v_and_b32_e32 v5, 0xffff, v5 | |
264 ; GFX10-NEXT: v_lshl_or_b32 v5, v6, 16, v5 | |
265 ; GFX10-NEXT: image_sample_c_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 | |
266 ; GFX10-NEXT: s_waitcnt vmcnt(0) | |
267 ; GFX10-NEXT: ; return to shader part epilog | |
268 ; | |
269 ; GFX10GISEL-LABEL: sample_c_cd_2d: | |
270 ; GFX10GISEL: ; %bb.0: ; %main_body | |
271 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v6, 16, v6 | |
272 ; GFX10GISEL-NEXT: v_and_or_b32 v5, 0xffff, v5, v6 | |
273 ; GFX10GISEL-NEXT: image_sample_c_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 | |
274 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | |
275 ; GFX10GISEL-NEXT: ; return to shader part epilog | |
276 main_body: | |
277 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | |
278 ret <4 x float> %v | |
279 } | |
280 | |
281 define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, half %s, half %clamp) { | |
282 ; GFX10-LABEL: sample_cd_cl_1d: | |
283 ; GFX10: ; %bb.0: ; %main_body | |
284 ; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2 | |
285 ; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 | |
286 ; GFX10-NEXT: image_sample_cd_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 | |
287 ; GFX10-NEXT: s_waitcnt vmcnt(0) | |
288 ; GFX10-NEXT: ; return to shader part epilog | |
289 ; | |
290 ; GFX10GISEL-LABEL: sample_cd_cl_1d: | |
291 ; GFX10GISEL: ; %bb.0: ; %main_body | |
292 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 | |
293 ; GFX10GISEL-NEXT: v_and_or_b32 v2, 0xffff, v2, v3 | |
294 ; GFX10GISEL-NEXT: image_sample_cd_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 | |
295 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | |
296 ; GFX10GISEL-NEXT: ; return to shader part epilog | |
297 main_body: | |
298 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | |
299 ret <4 x float> %v | |
300 } | |
301 | |
302 define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) { | |
303 ; GFX10-LABEL: sample_cd_cl_2d: | |
304 ; GFX10: ; %bb.0: ; %main_body | |
305 ; GFX10-NEXT: v_and_b32_e32 v4, 0xffff, v4 | |
306 ; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 | |
307 ; GFX10-NEXT: image_sample_cd_cl v[0:3], [v0, v1, v2, v3, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 | |
308 ; GFX10-NEXT: s_waitcnt vmcnt(0) | |
309 ; GFX10-NEXT: ; return to shader part epilog | |
310 ; | |
311 ; GFX10GISEL-LABEL: sample_cd_cl_2d: | |
312 ; GFX10GISEL: ; %bb.0: ; %main_body | |
313 ; GFX10GISEL-NEXT: v_mov_b32_e32 v7, 0xffff | |
314 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 | |
315 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 | |
316 ; GFX10GISEL-NEXT: v_and_or_b32 v4, v4, v7, v5 | |
317 ; GFX10GISEL-NEXT: v_and_or_b32 v5, v6, v7, s12 | |
318 ; GFX10GISEL-NEXT: image_sample_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 | |
319 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | |
320 ; GFX10GISEL-NEXT: ; return to shader part epilog | |
321 main_body: | |
322 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | |
323 ret <4 x float> %v | |
324 } | |
325 | |
326 define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp) { | |
327 ; GFX10-LABEL: sample_c_cd_cl_1d: | |
328 ; GFX10: ; %bb.0: ; %main_body | |
329 ; GFX10-NEXT: v_and_b32_e32 v3, 0xffff, v3 | |
330 ; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 | |
331 ; GFX10-NEXT: image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 | |
332 ; GFX10-NEXT: s_waitcnt vmcnt(0) | |
333 ; GFX10-NEXT: ; return to shader part epilog | |
334 ; | |
335 ; GFX10GISEL-LABEL: sample_c_cd_cl_1d: | |
336 ; GFX10GISEL: ; %bb.0: ; %main_body | |
337 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4 | |
338 ; GFX10GISEL-NEXT: v_and_or_b32 v3, 0xffff, v3, v4 | |
339 ; GFX10GISEL-NEXT: image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 | |
340 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | |
341 ; GFX10GISEL-NEXT: ; return to shader part epilog | |
342 main_body: | |
343 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | |
344 ret <4 x float> %v | |
345 } | |
346 | |
347 define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) { | |
348 ; GFX10-LABEL: sample_c_cd_cl_2d: | |
349 ; GFX10: ; %bb.0: ; %main_body | |
350 ; GFX10-NEXT: v_and_b32_e32 v5, 0xffff, v5 | |
351 ; GFX10-NEXT: v_lshl_or_b32 v5, v6, 16, v5 | |
352 ; GFX10-NEXT: image_sample_c_cd_cl v[0:3], [v0, v1, v2, v3, v4, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 | |
353 ; GFX10-NEXT: s_waitcnt vmcnt(0) | |
354 ; GFX10-NEXT: ; return to shader part epilog | |
355 ; | |
356 ; GFX10GISEL-LABEL: sample_c_cd_cl_2d: | |
357 ; GFX10GISEL: ; %bb.0: ; %main_body | |
358 ; GFX10GISEL-NEXT: v_mov_b32_e32 v8, 0xffff | |
359 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v6, 16, v6 | |
360 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 | |
361 ; GFX10GISEL-NEXT: v_and_or_b32 v5, v5, v8, v6 | |
362 ; GFX10GISEL-NEXT: v_and_or_b32 v6, v7, v8, s12 | |
363 ; GFX10GISEL-NEXT: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 | |
364 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | |
365 ; GFX10GISEL-NEXT: ; return to shader part epilog | |
366 main_body: | |
367 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | |
368 ret <4 x float> %v | |
369 } | |
370 | |
371 define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice) { | |
372 ; GFX10-LABEL: sample_c_d_o_2darray_V1: | |
373 ; GFX10: ; %bb.0: ; %main_body | |
374 ; GFX10-NEXT: v_and_b32_e32 v6, 0xffff, v6 | |
375 ; GFX10-NEXT: v_lshl_or_b32 v6, v7, 16, v6 | |
376 ; GFX10-NEXT: image_sample_c_d_o v0, [v0, v1, v2, v3, v4, v5, v6, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16 | |
377 ; GFX10-NEXT: s_waitcnt vmcnt(0) | |
378 ; GFX10-NEXT: ; return to shader part epilog | |
379 ; | |
380 ; GFX10GISEL-LABEL: sample_c_d_o_2darray_V1: | |
381 ; GFX10GISEL: ; %bb.0: ; %main_body | |
382 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, 0xffff | |
383 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v7, 16, v7 | |
384 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 | |
385 ; GFX10GISEL-NEXT: v_and_or_b32 v6, v6, v9, v7 | |
386 ; GFX10GISEL-NEXT: v_and_or_b32 v7, v8, v9, s12 | |
387 ; GFX10GISEL-NEXT: image_sample_c_d_o v0, v[0:7], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16 | |
388 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | |
389 ; GFX10GISEL-NEXT: ; return to shader part epilog | |
390 main_body: | |
391 %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f16(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | |
392 ret float %v | |
393 } | |
394 | |
395 define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice) { | |
396 ; GFX10-LABEL: sample_c_d_o_2darray_V2: | |
397 ; GFX10: ; %bb.0: ; %main_body | |
398 ; GFX10-NEXT: v_and_b32_e32 v6, 0xffff, v6 | |
399 ; GFX10-NEXT: v_lshl_or_b32 v6, v7, 16, v6 | |
400 ; GFX10-NEXT: image_sample_c_d_o v[0:1], [v0, v1, v2, v3, v4, v5, v6, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16 | |
401 ; GFX10-NEXT: s_waitcnt vmcnt(0) | |
402 ; GFX10-NEXT: ; return to shader part epilog | |
403 ; | |
404 ; GFX10GISEL-LABEL: sample_c_d_o_2darray_V2: | |
405 ; GFX10GISEL: ; %bb.0: ; %main_body | |
406 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, 0xffff | |
407 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v7, 16, v7 | |
408 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 | |
409 ; GFX10GISEL-NEXT: v_and_or_b32 v6, v6, v9, v7 | |
410 ; GFX10GISEL-NEXT: v_and_or_b32 v7, v8, v9, s12 | |
411 ; GFX10GISEL-NEXT: image_sample_c_d_o v[0:1], v[0:7], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16 | |
412 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | |
413 ; GFX10GISEL-NEXT: ; return to shader part epilog | |
414 main_body: | |
415 %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | |
416 ret <2 x float> %v | |
417 } | |
418 | |
419 declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f16(i32, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 | |
420 declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f16(i32, float, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 | |
421 declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f16(i32, float, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 | |
422 declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32, float, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 | |
423 declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32, float, float, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 | |
424 declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 | |
425 declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f16(i32, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 | |
426 declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 | |
427 declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 | |
428 | |
429 declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f16(i32, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 | |
430 declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f16(i32, float, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 | |
431 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32, float, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 | |
432 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32, float, float, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 | |
433 declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 | |
434 declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f16(i32, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 | |
435 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 | |
436 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 | |
437 | |
438 declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f16(i32, i32, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 | |
439 declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32, i32, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 | |
440 | |
441 define amdgpu_ps <4 x float> @sample_g16_noa16_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) { | |
442 ; GFX10-LABEL: sample_g16_noa16_d_1d: | |
443 ; GFX10: ; %bb.0: ; %main_body | |
444 ; GFX10-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D | |
445 ; GFX10-NEXT: s_waitcnt vmcnt(0) | |
446 ; GFX10-NEXT: ; return to shader part epilog | |
447 ; | |
448 ; GFX10GISEL-LABEL: sample_g16_noa16_d_1d: | |
449 ; GFX10GISEL: ; %bb.0: ; %main_body | |
450 ; GFX10GISEL-NEXT: v_mov_b32_e32 v3, 0xffff | |
451 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 | |
452 ; GFX10GISEL-NEXT: v_and_or_b32 v0, v0, v3, s12 | |
453 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v3, s12 | |
454 ; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D | |
455 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | |
456 ; GFX10GISEL-NEXT: ; return to shader part epilog | |
457 main_body: | |
458 %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | |
459 ret <4 x float> %v | |
460 } | |
461 | |
462 define amdgpu_ps <4 x float> @sample_g16_noa16_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { | |
463 ; GFX10-LABEL: sample_g16_noa16_d_2d: | |
464 ; GFX10: ; %bb.0: ; %main_body | |
465 ; GFX10-NEXT: v_mov_b32_e32 v6, 0xffff | |
466 ; GFX10-NEXT: v_and_b32_e32 v2, v6, v2 | |
467 ; GFX10-NEXT: v_and_b32_e32 v0, v6, v0 | |
468 ; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 | |
469 ; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 | |
470 ; GFX10-NEXT: image_sample_d_g16 v[0:3], [v0, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D | |
471 ; GFX10-NEXT: s_waitcnt vmcnt(0) | |
472 ; GFX10-NEXT: ; return to shader part epilog | |
473 ; | |
474 ; GFX10GISEL-LABEL: sample_g16_noa16_d_2d: | |
475 ; GFX10GISEL: ; %bb.0: ; %main_body | |
476 ; GFX10GISEL-NEXT: v_mov_b32_e32 v6, 0xffff | |
477 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 | |
478 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 | |
479 ; GFX10GISEL-NEXT: v_and_or_b32 v0, v0, v6, v1 | |
480 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v2, v6, v3 | |
481 ; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], [v0, v1, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D | |
482 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | |
483 ; GFX10GISEL-NEXT: ; return to shader part epilog | |
484 main_body: | |
485 %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | |
486 ret <4 x float> %v | |
487 } | |
488 | |
489 define amdgpu_ps <4 x float> @sample_g16_noa16_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) { | |
490 ; GFX10-LABEL: sample_g16_noa16_d_3d: | |
491 ; GFX10: ; %bb.0: ; %main_body | |
492 ; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff | |
493 ; GFX10-NEXT: v_and_b32_e32 v3, v9, v3 | |
494 ; GFX10-NEXT: v_and_b32_e32 v0, v9, v0 | |
495 ; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 | |
496 ; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 | |
497 ; GFX10-NEXT: image_sample_d_g16 v[0:3], [v0, v2, v3, v5, v6, v7, v8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D | |
498 ; GFX10-NEXT: s_waitcnt vmcnt(0) | |
499 ; GFX10-NEXT: ; return to shader part epilog | |
500 ; | |
501 ; GFX10GISEL-LABEL: sample_g16_noa16_d_3d: | |
502 ; GFX10GISEL: ; %bb.0: ; %main_body | |
503 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, 0xffff | |
504 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 | |
505 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4 | |
506 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 | |
507 ; GFX10GISEL-NEXT: v_and_or_b32 v0, v0, v9, v1 | |
508 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v2, v9, s12 | |
509 ; GFX10GISEL-NEXT: v_and_or_b32 v2, v3, v9, v4 | |
510 ; GFX10GISEL-NEXT: v_and_or_b32 v3, v5, v9, s12 | |
511 ; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], [v0, v1, v2, v3, v6, v7, v8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D | |
512 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | |
513 ; GFX10GISEL-NEXT: ; return to shader part epilog | |
514 main_body: | |
515 %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | |
516 ret <4 x float> %v | |
517 } | |
518 | |
519 define amdgpu_ps <4 x float> @sample_g16_noa16_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) { | |
520 ; GFX10-LABEL: sample_g16_noa16_c_d_1d: | |
521 ; GFX10: ; %bb.0: ; %main_body | |
522 ; GFX10-NEXT: image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D | |
523 ; GFX10-NEXT: s_waitcnt vmcnt(0) | |
524 ; GFX10-NEXT: ; return to shader part epilog | |
525 ; | |
526 ; GFX10GISEL-LABEL: sample_g16_noa16_c_d_1d: | |
527 ; GFX10GISEL: ; %bb.0: ; %main_body | |
528 ; GFX10GISEL-NEXT: v_mov_b32_e32 v4, 0xffff | |
529 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 | |
530 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v4, s12 | |
531 ; GFX10GISEL-NEXT: v_and_or_b32 v2, v2, v4, s12 | |
532 ; GFX10GISEL-NEXT: image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D | |
533 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | |
534 ; GFX10GISEL-NEXT: ; return to shader part epilog | |
535 main_body: | |
536 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | |
537 ret <4 x float> %v | |
538 } | |
539 | |
540 define amdgpu_ps <4 x float> @sample_g16_noa16_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { | |
541 ; GFX10-LABEL: sample_g16_noa16_c_d_2d: | |
542 ; GFX10: ; %bb.0: ; %main_body | |
543 ; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff | |
544 ; GFX10-NEXT: v_and_b32_e32 v3, v7, v3 | |
545 ; GFX10-NEXT: v_and_b32_e32 v1, v7, v1 | |
546 ; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 | |
547 ; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 | |
548 ; GFX10-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D | |
549 ; GFX10-NEXT: s_waitcnt vmcnt(0) | |
550 ; GFX10-NEXT: ; return to shader part epilog | |
551 ; | |
552 ; GFX10GISEL-LABEL: sample_g16_noa16_c_d_2d: | |
553 ; GFX10GISEL: ; %bb.0: ; %main_body | |
554 ; GFX10GISEL-NEXT: v_mov_b32_e32 v7, 0xffff | |
555 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2 | |
556 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4 | |
557 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v7, v2 | |
558 ; GFX10GISEL-NEXT: v_and_or_b32 v2, v3, v7, v4 | |
559 ; GFX10GISEL-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v2, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D | |
560 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | |
561 ; GFX10GISEL-NEXT: ; return to shader part epilog | |
562 main_body: | |
563 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | |
564 ret <4 x float> %v | |
565 } | |
566 | |
567 define amdgpu_ps <4 x float> @sample_g16_noa16_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) { | |
568 ; GFX10-LABEL: sample_g16_noa16_d_cl_1d: | |
569 ; GFX10: ; %bb.0: ; %main_body | |
570 ; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D | |
571 ; GFX10-NEXT: s_waitcnt vmcnt(0) | |
572 ; GFX10-NEXT: ; return to shader part epilog | |
573 ; | |
574 ; GFX10GISEL-LABEL: sample_g16_noa16_d_cl_1d: | |
575 ; GFX10GISEL: ; %bb.0: ; %main_body | |
576 ; GFX10GISEL-NEXT: v_mov_b32_e32 v4, 0xffff | |
577 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 | |
578 ; GFX10GISEL-NEXT: v_and_or_b32 v0, v0, v4, s12 | |
579 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v4, s12 | |
580 ; GFX10GISEL-NEXT: image_sample_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D | |
581 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | |
582 ; GFX10GISEL-NEXT: ; return to shader part epilog | |
583 main_body: | |
584 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | |
585 ret <4 x float> %v | |
586 } | |
587 | |
588 define amdgpu_ps <4 x float> @sample_g16_noa16_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { | |
589 ; GFX10-LABEL: sample_g16_noa16_d_cl_2d: | |
590 ; GFX10: ; %bb.0: ; %main_body | |
591 ; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff | |
592 ; GFX10-NEXT: v_and_b32_e32 v2, v7, v2 | |
593 ; GFX10-NEXT: v_and_b32_e32 v0, v7, v0 | |
594 ; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 | |
595 ; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 | |
596 ; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v2, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D | |
597 ; GFX10-NEXT: s_waitcnt vmcnt(0) | |
598 ; GFX10-NEXT: ; return to shader part epilog | |
599 ; | |
600 ; GFX10GISEL-LABEL: sample_g16_noa16_d_cl_2d: | |
601 ; GFX10GISEL: ; %bb.0: ; %main_body | |
602 ; GFX10GISEL-NEXT: v_mov_b32_e32 v7, 0xffff | |
603 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 | |
604 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 | |
605 ; GFX10GISEL-NEXT: v_and_or_b32 v0, v0, v7, v1 | |
606 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v2, v7, v3 | |
607 ; GFX10GISEL-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v1, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D | |
608 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | |
609 ; GFX10GISEL-NEXT: ; return to shader part epilog | |
610 main_body: | |
611 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | |
612 ret <4 x float> %v | |
613 } | |
614 | |
615 define amdgpu_ps <4 x float> @sample_g16_noa16_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) { | |
616 ; GFX10-LABEL: sample_g16_noa16_c_d_cl_1d: | |
617 ; GFX10: ; %bb.0: ; %main_body | |
618 ; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D | |
619 ; GFX10-NEXT: s_waitcnt vmcnt(0) | |
620 ; GFX10-NEXT: ; return to shader part epilog | |
621 ; | |
622 ; GFX10GISEL-LABEL: sample_g16_noa16_c_d_cl_1d: | |
623 ; GFX10GISEL: ; %bb.0: ; %main_body | |
624 ; GFX10GISEL-NEXT: v_mov_b32_e32 v5, 0xffff | |
625 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 | |
626 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v5, s12 | |
627 ; GFX10GISEL-NEXT: v_and_or_b32 v2, v2, v5, s12 | |
628 ; GFX10GISEL-NEXT: image_sample_c_d_cl_g16 v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D | |
629 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | |
630 ; GFX10GISEL-NEXT: ; return to shader part epilog | |
631 main_body: | |
632 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | |
633 ret <4 x float> %v | |
634 } | |
635 | |
636 define amdgpu_ps <4 x float> @sample_g16_noa16_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { | |
637 ; GFX10-LABEL: sample_g16_noa16_c_d_cl_2d: | |
638 ; GFX10: ; %bb.0: ; %main_body | |
639 ; GFX10-NEXT: v_mov_b32_e32 v8, 0xffff | |
640 ; GFX10-NEXT: v_and_b32_e32 v3, v8, v3 | |
641 ; GFX10-NEXT: v_and_b32_e32 v1, v8, v1 | |
642 ; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 | |
643 ; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 | |
644 ; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v3, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D | |
645 ; GFX10-NEXT: s_waitcnt vmcnt(0) | |
646 ; GFX10-NEXT: ; return to shader part epilog | |
647 ; | |
648 ; GFX10GISEL-LABEL: sample_g16_noa16_c_d_cl_2d: | |
649 ; GFX10GISEL: ; %bb.0: ; %main_body | |
650 ; GFX10GISEL-NEXT: v_mov_b32_e32 v8, 0xffff | |
651 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2 | |
652 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4 | |
653 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v8, v2 | |
654 ; GFX10GISEL-NEXT: v_and_or_b32 v2, v3, v8, v4 | |
655 ; GFX10GISEL-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v2, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D | |
656 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | |
657 ; GFX10GISEL-NEXT: ; return to shader part epilog | |
658 main_body: | |
659 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | |
660 ret <4 x float> %v | |
661 } | |
662 | |
663 define amdgpu_ps <4 x float> @sample_g16_noa16_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) { | |
664 ; GFX10-LABEL: sample_g16_noa16_cd_1d: | |
665 ; GFX10: ; %bb.0: ; %main_body | |
666 ; GFX10-NEXT: image_sample_cd_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D | |
667 ; GFX10-NEXT: s_waitcnt vmcnt(0) | |
668 ; GFX10-NEXT: ; return to shader part epilog | |
669 ; | |
670 ; GFX10GISEL-LABEL: sample_g16_noa16_cd_1d: | |
671 ; GFX10GISEL: ; %bb.0: ; %main_body | |
672 ; GFX10GISEL-NEXT: v_mov_b32_e32 v3, 0xffff | |
673 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 | |
674 ; GFX10GISEL-NEXT: v_and_or_b32 v0, v0, v3, s12 | |
675 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v3, s12 | |
676 ; GFX10GISEL-NEXT: image_sample_cd_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D | |
677 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | |
678 ; GFX10GISEL-NEXT: ; return to shader part epilog | |
679 main_body: | |
680 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | |
681 ret <4 x float> %v | |
682 } | |
683 | |
684 define amdgpu_ps <4 x float> @sample_g16_noa16_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { | |
685 ; GFX10-LABEL: sample_g16_noa16_cd_2d: | |
686 ; GFX10: ; %bb.0: ; %main_body | |
687 ; GFX10-NEXT: v_mov_b32_e32 v6, 0xffff | |
688 ; GFX10-NEXT: v_and_b32_e32 v2, v6, v2 | |
689 ; GFX10-NEXT: v_and_b32_e32 v0, v6, v0 | |
690 ; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 | |
691 ; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 | |
692 ; GFX10-NEXT: image_sample_cd_g16 v[0:3], [v0, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D | |
693 ; GFX10-NEXT: s_waitcnt vmcnt(0) | |
694 ; GFX10-NEXT: ; return to shader part epilog | |
695 ; | |
696 ; GFX10GISEL-LABEL: sample_g16_noa16_cd_2d: | |
697 ; GFX10GISEL: ; %bb.0: ; %main_body | |
698 ; GFX10GISEL-NEXT: v_mov_b32_e32 v6, 0xffff | |
699 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 | |
700 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 | |
701 ; GFX10GISEL-NEXT: v_and_or_b32 v0, v0, v6, v1 | |
702 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v2, v6, v3 | |
703 ; GFX10GISEL-NEXT: image_sample_cd_g16 v[0:3], [v0, v1, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D | |
704 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | |
705 ; GFX10GISEL-NEXT: ; return to shader part epilog | |
706 main_body: | |
707 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | |
708 ret <4 x float> %v | |
709 } | |
710 | |
711 define amdgpu_ps <4 x float> @sample_g16_noa16_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) { | |
712 ; GFX10-LABEL: sample_g16_noa16_c_cd_1d: | |
713 ; GFX10: ; %bb.0: ; %main_body | |
714 ; GFX10-NEXT: image_sample_c_cd_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D | |
715 ; GFX10-NEXT: s_waitcnt vmcnt(0) | |
716 ; GFX10-NEXT: ; return to shader part epilog | |
717 ; | |
718 ; GFX10GISEL-LABEL: sample_g16_noa16_c_cd_1d: | |
719 ; GFX10GISEL: ; %bb.0: ; %main_body | |
720 ; GFX10GISEL-NEXT: v_mov_b32_e32 v4, 0xffff | |
721 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 | |
722 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v4, s12 | |
723 ; GFX10GISEL-NEXT: v_and_or_b32 v2, v2, v4, s12 | |
724 ; GFX10GISEL-NEXT: image_sample_c_cd_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D | |
725 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | |
726 ; GFX10GISEL-NEXT: ; return to shader part epilog | |
727 main_body: | |
728 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | |
729 ret <4 x float> %v | |
730 } | |
731 | |
732 define amdgpu_ps <4 x float> @sample_g16_noa16_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { | |
733 ; GFX10-LABEL: sample_g16_noa16_c_cd_2d: | |
734 ; GFX10: ; %bb.0: ; %main_body | |
735 ; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff | |
736 ; GFX10-NEXT: v_and_b32_e32 v3, v7, v3 | |
737 ; GFX10-NEXT: v_and_b32_e32 v1, v7, v1 | |
738 ; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 | |
739 ; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 | |
740 ; GFX10-NEXT: image_sample_c_cd_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D | |
741 ; GFX10-NEXT: s_waitcnt vmcnt(0) | |
742 ; GFX10-NEXT: ; return to shader part epilog | |
743 ; | |
744 ; GFX10GISEL-LABEL: sample_g16_noa16_c_cd_2d: | |
745 ; GFX10GISEL: ; %bb.0: ; %main_body | |
746 ; GFX10GISEL-NEXT: v_mov_b32_e32 v7, 0xffff | |
747 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2 | |
748 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4 | |
749 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v7, v2 | |
750 ; GFX10GISEL-NEXT: v_and_or_b32 v2, v3, v7, v4 | |
751 ; GFX10GISEL-NEXT: image_sample_c_cd_g16 v[0:3], [v0, v1, v2, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D | |
752 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | |
753 ; GFX10GISEL-NEXT: ; return to shader part epilog | |
754 main_body: | |
755 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | |
756 ret <4 x float> %v | |
757 } | |
758 | |
759 define amdgpu_ps <4 x float> @sample_g16_noa16_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) { | |
760 ; GFX10-LABEL: sample_g16_noa16_cd_cl_1d: | |
761 ; GFX10: ; %bb.0: ; %main_body | |
762 ; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D | |
763 ; GFX10-NEXT: s_waitcnt vmcnt(0) | |
764 ; GFX10-NEXT: ; return to shader part epilog | |
765 ; | |
766 ; GFX10GISEL-LABEL: sample_g16_noa16_cd_cl_1d: | |
767 ; GFX10GISEL: ; %bb.0: ; %main_body | |
768 ; GFX10GISEL-NEXT: v_mov_b32_e32 v4, 0xffff | |
769 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 | |
770 ; GFX10GISEL-NEXT: v_and_or_b32 v0, v0, v4, s12 | |
771 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v4, s12 | |
772 ; GFX10GISEL-NEXT: image_sample_cd_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D | |
773 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | |
774 ; GFX10GISEL-NEXT: ; return to shader part epilog | |
775 main_body: | |
776 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | |
777 ret <4 x float> %v | |
778 } | |
779 | |
780 define amdgpu_ps <4 x float> @sample_g16_noa16_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { | |
781 ; GFX10-LABEL: sample_g16_noa16_cd_cl_2d: | |
782 ; GFX10: ; %bb.0: ; %main_body | |
783 ; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff | |
784 ; GFX10-NEXT: v_and_b32_e32 v2, v7, v2 | |
785 ; GFX10-NEXT: v_and_b32_e32 v0, v7, v0 | |
786 ; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 | |
787 ; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 | |
788 ; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], [v0, v2, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D | |
789 ; GFX10-NEXT: s_waitcnt vmcnt(0) | |
790 ; GFX10-NEXT: ; return to shader part epilog | |
791 ; | |
792 ; GFX10GISEL-LABEL: sample_g16_noa16_cd_cl_2d: | |
793 ; GFX10GISEL: ; %bb.0: ; %main_body | |
794 ; GFX10GISEL-NEXT: v_mov_b32_e32 v7, 0xffff | |
795 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 | |
796 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 | |
797 ; GFX10GISEL-NEXT: v_and_or_b32 v0, v0, v7, v1 | |
798 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v2, v7, v3 | |
799 ; GFX10GISEL-NEXT: image_sample_cd_cl_g16 v[0:3], [v0, v1, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D | |
800 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | |
801 ; GFX10GISEL-NEXT: ; return to shader part epilog | |
802 main_body: | |
803 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | |
804 ret <4 x float> %v | |
805 } | |
806 | |
807 define amdgpu_ps <4 x float> @sample_g16_noa16_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) { | |
808 ; GFX10-LABEL: sample_g16_noa16_c_cd_cl_1d: | |
809 ; GFX10: ; %bb.0: ; %main_body | |
810 ; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D | |
811 ; GFX10-NEXT: s_waitcnt vmcnt(0) | |
812 ; GFX10-NEXT: ; return to shader part epilog | |
813 ; | |
814 ; GFX10GISEL-LABEL: sample_g16_noa16_c_cd_cl_1d: | |
815 ; GFX10GISEL: ; %bb.0: ; %main_body | |
816 ; GFX10GISEL-NEXT: v_mov_b32_e32 v5, 0xffff | |
817 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 | |
818 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v5, s12 | |
819 ; GFX10GISEL-NEXT: v_and_or_b32 v2, v2, v5, s12 | |
820 ; GFX10GISEL-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D | |
821 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | |
822 ; GFX10GISEL-NEXT: ; return to shader part epilog | |
823 main_body: | |
824 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | |
825 ret <4 x float> %v | |
826 } | |
827 | |
828 define amdgpu_ps <4 x float> @sample_g16_noa16_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { | |
829 ; GFX10-LABEL: sample_g16_noa16_c_cd_cl_2d: | |
830 ; GFX10: ; %bb.0: ; %main_body | |
831 ; GFX10-NEXT: v_mov_b32_e32 v8, 0xffff | |
832 ; GFX10-NEXT: v_and_b32_e32 v3, v8, v3 | |
833 ; GFX10-NEXT: v_and_b32_e32 v1, v8, v1 | |
834 ; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 | |
835 ; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 | |
836 ; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], [v0, v1, v3, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D | |
837 ; GFX10-NEXT: s_waitcnt vmcnt(0) | |
838 ; GFX10-NEXT: ; return to shader part epilog | |
839 ; | |
840 ; GFX10GISEL-LABEL: sample_g16_noa16_c_cd_cl_2d: | |
841 ; GFX10GISEL: ; %bb.0: ; %main_body | |
842 ; GFX10GISEL-NEXT: v_mov_b32_e32 v8, 0xffff | |
843 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2 | |
844 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4 | |
845 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v8, v2 | |
846 ; GFX10GISEL-NEXT: v_and_or_b32 v2, v3, v8, v4 | |
847 ; GFX10GISEL-NEXT: image_sample_c_cd_cl_g16 v[0:3], [v0, v1, v2, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D | |
848 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | |
849 ; GFX10GISEL-NEXT: ; return to shader part epilog | |
850 main_body: | |
851 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | |
852 ret <4 x float> %v | |
853 } | |
854 | |
855 define amdgpu_ps float @sample_g16_noa16_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) { | |
856 ; GFX10-LABEL: sample_g16_noa16_c_d_o_2darray_V1: | |
857 ; GFX10: ; %bb.0: ; %main_body | |
858 ; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff | |
859 ; GFX10-NEXT: v_and_b32_e32 v4, v9, v4 | |
860 ; GFX10-NEXT: v_and_b32_e32 v2, v9, v2 | |
861 ; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 | |
862 ; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 | |
863 ; GFX10-NEXT: image_sample_c_d_o_g16 v0, [v0, v1, v2, v4, v6, v7, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY | |
864 ; GFX10-NEXT: s_waitcnt vmcnt(0) | |
865 ; GFX10-NEXT: ; return to shader part epilog | |
866 ; | |
867 ; GFX10GISEL-LABEL: sample_g16_noa16_c_d_o_2darray_V1: | |
868 ; GFX10GISEL: ; %bb.0: ; %main_body | |
869 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, 0xffff | |
870 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 | |
871 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 | |
872 ; GFX10GISEL-NEXT: v_and_or_b32 v2, v2, v9, v3 | |
873 ; GFX10GISEL-NEXT: v_and_or_b32 v3, v4, v9, v5 | |
874 ; GFX10GISEL-NEXT: image_sample_c_d_o_g16 v0, [v0, v1, v2, v3, v6, v7, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY | |
875 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | |
876 ; GFX10GISEL-NEXT: ; return to shader part epilog | |
877 main_body: | |
878 %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f32(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | |
879 ret float %v | |
880 } | |
881 | |
882 define amdgpu_ps <2 x float> @sample_g16_noa16_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) { | |
883 ; GFX10-LABEL: sample_g16_noa16_c_d_o_2darray_V2: | |
884 ; GFX10: ; %bb.0: ; %main_body | |
885 ; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff | |
886 ; GFX10-NEXT: v_and_b32_e32 v4, v9, v4 | |
887 ; GFX10-NEXT: v_and_b32_e32 v2, v9, v2 | |
888 ; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 | |
889 ; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 | |
890 ; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v4, v6, v7, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY | |
891 ; GFX10-NEXT: s_waitcnt vmcnt(0) | |
892 ; GFX10-NEXT: ; return to shader part epilog | |
893 ; | |
894 ; GFX10GISEL-LABEL: sample_g16_noa16_c_d_o_2darray_V2: | |
895 ; GFX10GISEL: ; %bb.0: ; %main_body | |
896 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, 0xffff | |
897 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 | |
898 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 | |
899 ; GFX10GISEL-NEXT: v_and_or_b32 v2, v2, v9, v3 | |
900 ; GFX10GISEL-NEXT: v_and_or_b32 v3, v4, v9, v5 | |
901 ; GFX10GISEL-NEXT: image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v3, v6, v7, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY | |
902 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | |
903 ; GFX10GISEL-NEXT: ; return to shader part epilog | |
904 main_body: | |
905 %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | |
906 ret <2 x float> %v | |
907 } | |
908 | |
909 declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 | |
910 declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 | |
911 declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32, half, half, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 | |
912 declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 | |
913 declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 | |
914 declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 | |
915 declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 | |
916 declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 | |
917 declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 | |
918 | |
919 declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 | |
920 declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 | |
921 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 | |
922 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 | |
923 declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 | |
924 declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 | |
925 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 | |
926 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 | |
927 | |
928 declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 | |
929 declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 | |
930 | |
931 attributes #0 = { nounwind } | |
932 attributes #1 = { nounwind readonly } | |
933 attributes #2 = { nounwind readnone } |