Mercurial > hg > CbC > CbC_llvm
comparison llvm/test/CodeGen/AMDGPU/ds-combine-large-stride.ll @ 150:1d019706d866
LLVM10
author | anatofuz |
---|---|
date | Thu, 13 Feb 2020 15:10:13 +0900 |
parents | |
children | 2e18cbf3894f |
comparison
equal
deleted
inserted
replaced
147:c2174574ed3a | 150:1d019706d866 |
---|---|
1 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s | |
2 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s | |
3 | |
4 ; GCN-LABEL: ds_read32_combine_stride_400: | |
5 ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 | |
6 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] | |
7 | |
8 ; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] | |
9 ; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] | |
10 ; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] | |
11 | |
12 ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x320, [[BASE]] | |
13 ; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x640, [[BASE]] | |
14 ; GFX9-DAG: v_add_u32_e32 [[B3:v[0-9]+]], 0x960, [[BASE]] | |
15 | |
16 ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset1:100 | |
17 ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:100 | |
18 ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B2]] offset1:100 | |
19 ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B3]] offset1:100 | |
20 define amdgpu_kernel void @ds_read32_combine_stride_400(float addrspace(3)* nocapture readonly %arg, float *nocapture %arg1) { | |
21 bb: | |
22 %tmp = load float, float addrspace(3)* %arg, align 4 | |
23 %tmp2 = fadd float %tmp, 0.000000e+00 | |
24 %tmp3 = getelementptr inbounds float, float addrspace(3)* %arg, i32 100 | |
25 %tmp4 = load float, float addrspace(3)* %tmp3, align 4 | |
26 %tmp5 = fadd float %tmp2, %tmp4 | |
27 %tmp6 = getelementptr inbounds float, float addrspace(3)* %arg, i32 200 | |
28 %tmp7 = load float, float addrspace(3)* %tmp6, align 4 | |
29 %tmp8 = fadd float %tmp5, %tmp7 | |
30 %tmp9 = getelementptr inbounds float, float addrspace(3)* %arg, i32 300 | |
31 %tmp10 = load float, float addrspace(3)* %tmp9, align 4 | |
32 %tmp11 = fadd float %tmp8, %tmp10 | |
33 %tmp12 = getelementptr inbounds float, float addrspace(3)* %arg, i32 400 | |
34 %tmp13 = load float, float addrspace(3)* %tmp12, align 4 | |
35 %tmp14 = fadd float %tmp11, %tmp13 | |
36 %tmp15 = getelementptr inbounds float, float addrspace(3)* %arg, i32 500 | |
37 %tmp16 = load float, float addrspace(3)* %tmp15, align 4 | |
38 %tmp17 = fadd float %tmp14, %tmp16 | |
39 %tmp18 = getelementptr inbounds float, float addrspace(3)* %arg, i32 600 | |
40 %tmp19 = load float, float addrspace(3)* %tmp18, align 4 | |
41 %tmp20 = fadd float %tmp17, %tmp19 | |
42 %tmp21 = getelementptr inbounds float, float addrspace(3)* %arg, i32 700 | |
43 %tmp22 = load float, float addrspace(3)* %tmp21, align 4 | |
44 %tmp23 = fadd float %tmp20, %tmp22 | |
45 store float %tmp23, float *%arg1, align 4 | |
46 ret void | |
47 } | |
48 | |
49 ; GCN-LABEL: ds_read32_combine_stride_400_back: | |
50 ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 | |
51 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] | |
52 | |
53 ; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] | |
54 ; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] | |
55 ; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] | |
56 | |
57 ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x320, [[BASE]] | |
58 ; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x640, [[BASE]] | |
59 ; GFX9-DAG: v_add_u32_e32 [[B3:v[0-9]+]], 0x960, [[BASE]] | |
60 | |
61 ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset1:100 | |
62 ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:100 | |
63 ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B2]] offset1:100 | |
64 ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B3]] offset1:100 | |
65 define amdgpu_kernel void @ds_read32_combine_stride_400_back(float addrspace(3)* nocapture readonly %arg, float *nocapture %arg1) { | |
66 bb: | |
67 %tmp = getelementptr inbounds float, float addrspace(3)* %arg, i32 700 | |
68 %tmp2 = load float, float addrspace(3)* %tmp, align 4 | |
69 %tmp3 = fadd float %tmp2, 0.000000e+00 | |
70 %tmp4 = getelementptr inbounds float, float addrspace(3)* %arg, i32 600 | |
71 %tmp5 = load float, float addrspace(3)* %tmp4, align 4 | |
72 %tmp6 = fadd float %tmp3, %tmp5 | |
73 %tmp7 = getelementptr inbounds float, float addrspace(3)* %arg, i32 500 | |
74 %tmp8 = load float, float addrspace(3)* %tmp7, align 4 | |
75 %tmp9 = fadd float %tmp6, %tmp8 | |
76 %tmp10 = getelementptr inbounds float, float addrspace(3)* %arg, i32 400 | |
77 %tmp11 = load float, float addrspace(3)* %tmp10, align 4 | |
78 %tmp12 = fadd float %tmp9, %tmp11 | |
79 %tmp13 = getelementptr inbounds float, float addrspace(3)* %arg, i32 300 | |
80 %tmp14 = load float, float addrspace(3)* %tmp13, align 4 | |
81 %tmp15 = fadd float %tmp12, %tmp14 | |
82 %tmp16 = getelementptr inbounds float, float addrspace(3)* %arg, i32 200 | |
83 %tmp17 = load float, float addrspace(3)* %tmp16, align 4 | |
84 %tmp18 = fadd float %tmp15, %tmp17 | |
85 %tmp19 = getelementptr inbounds float, float addrspace(3)* %arg, i32 100 | |
86 %tmp20 = load float, float addrspace(3)* %tmp19, align 4 | |
87 %tmp21 = fadd float %tmp18, %tmp20 | |
88 %tmp22 = load float, float addrspace(3)* %arg, align 4 | |
89 %tmp23 = fadd float %tmp21, %tmp22 | |
90 store float %tmp23, float *%arg1, align 4 | |
91 ret void | |
92 } | |
93 | |
94 ; GCN-LABEL: ds_read32_combine_stride_8192: | |
95 ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 | |
96 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] | |
97 ; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset1:32 | |
98 ; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:64 offset1:96 | |
99 ; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:128 offset1:160 | |
100 ; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:192 offset1:224 | |
101 define amdgpu_kernel void @ds_read32_combine_stride_8192(float addrspace(3)* nocapture readonly %arg, float *nocapture %arg1) { | |
102 bb: | |
103 %tmp = load float, float addrspace(3)* %arg, align 4 | |
104 %tmp2 = fadd float %tmp, 0.000000e+00 | |
105 %tmp3 = getelementptr inbounds float, float addrspace(3)* %arg, i32 2048 | |
106 %tmp4 = load float, float addrspace(3)* %tmp3, align 4 | |
107 %tmp5 = fadd float %tmp2, %tmp4 | |
108 %tmp6 = getelementptr inbounds float, float addrspace(3)* %arg, i32 4096 | |
109 %tmp7 = load float, float addrspace(3)* %tmp6, align 4 | |
110 %tmp8 = fadd float %tmp5, %tmp7 | |
111 %tmp9 = getelementptr inbounds float, float addrspace(3)* %arg, i32 6144 | |
112 %tmp10 = load float, float addrspace(3)* %tmp9, align 4 | |
113 %tmp11 = fadd float %tmp8, %tmp10 | |
114 %tmp12 = getelementptr inbounds float, float addrspace(3)* %arg, i32 8192 | |
115 %tmp13 = load float, float addrspace(3)* %tmp12, align 4 | |
116 %tmp14 = fadd float %tmp11, %tmp13 | |
117 %tmp15 = getelementptr inbounds float, float addrspace(3)* %arg, i32 10240 | |
118 %tmp16 = load float, float addrspace(3)* %tmp15, align 4 | |
119 %tmp17 = fadd float %tmp14, %tmp16 | |
120 %tmp18 = getelementptr inbounds float, float addrspace(3)* %arg, i32 12288 | |
121 %tmp19 = load float, float addrspace(3)* %tmp18, align 4 | |
122 %tmp20 = fadd float %tmp17, %tmp19 | |
123 %tmp21 = getelementptr inbounds float, float addrspace(3)* %arg, i32 14336 | |
124 %tmp22 = load float, float addrspace(3)* %tmp21, align 4 | |
125 %tmp23 = fadd float %tmp20, %tmp22 | |
126 store float %tmp23, float *%arg1, align 4 | |
127 ret void | |
128 } | |
129 | |
130 ; GCN-LABEL: ds_read32_combine_stride_8192_shifted: | |
131 ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 | |
132 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] | |
133 | |
134 ; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]] | |
135 ; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] | |
136 ; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] | |
137 | |
138 ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 8, [[BASE]] | |
139 ; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x4008, [[BASE]] | |
140 ; GFX9-DAG: v_add_u32_e32 [[B3:v[0-9]+]], 0x8008, [[BASE]] | |
141 | |
142 ; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:32 | |
143 ; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[B2]] offset1:32 | |
144 ; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[B3]] offset1:32 | |
145 define amdgpu_kernel void @ds_read32_combine_stride_8192_shifted(float addrspace(3)* nocapture readonly %arg, float *nocapture %arg1) { | |
146 bb: | |
147 %tmp = getelementptr inbounds float, float addrspace(3)* %arg, i32 2 | |
148 %tmp2 = load float, float addrspace(3)* %tmp, align 4 | |
149 %tmp3 = fadd float %tmp2, 0.000000e+00 | |
150 %tmp4 = getelementptr inbounds float, float addrspace(3)* %arg, i32 2050 | |
151 %tmp5 = load float, float addrspace(3)* %tmp4, align 4 | |
152 %tmp6 = fadd float %tmp3, %tmp5 | |
153 %tmp7 = getelementptr inbounds float, float addrspace(3)* %arg, i32 4098 | |
154 %tmp8 = load float, float addrspace(3)* %tmp7, align 4 | |
155 %tmp9 = fadd float %tmp6, %tmp8 | |
156 %tmp10 = getelementptr inbounds float, float addrspace(3)* %arg, i32 6146 | |
157 %tmp11 = load float, float addrspace(3)* %tmp10, align 4 | |
158 %tmp12 = fadd float %tmp9, %tmp11 | |
159 %tmp13 = getelementptr inbounds float, float addrspace(3)* %arg, i32 8194 | |
160 %tmp14 = load float, float addrspace(3)* %tmp13, align 4 | |
161 %tmp15 = fadd float %tmp12, %tmp14 | |
162 %tmp16 = getelementptr inbounds float, float addrspace(3)* %arg, i32 10242 | |
163 %tmp17 = load float, float addrspace(3)* %tmp16, align 4 | |
164 %tmp18 = fadd float %tmp15, %tmp17 | |
165 store float %tmp18, float *%arg1, align 4 | |
166 ret void | |
167 } | |
168 | |
169 ; GCN-LABEL: ds_read64_combine_stride_400: | |
170 ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 | |
171 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] | |
172 | |
173 ; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] | |
174 ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x960, [[BASE]] | |
175 | |
176 ; GCN-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset1:50 | |
177 ; GCN-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:100 offset1:150 | |
178 ; GCN-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:200 offset1:250 | |
179 ; GCN-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:50 | |
180 define amdgpu_kernel void @ds_read64_combine_stride_400(double addrspace(3)* nocapture readonly %arg, double *nocapture %arg1) { | |
181 bb: | |
182 %tmp = load double, double addrspace(3)* %arg, align 8 | |
183 %tmp2 = fadd double %tmp, 0.000000e+00 | |
184 %tmp3 = getelementptr inbounds double, double addrspace(3)* %arg, i32 50 | |
185 %tmp4 = load double, double addrspace(3)* %tmp3, align 8 | |
186 %tmp5 = fadd double %tmp2, %tmp4 | |
187 %tmp6 = getelementptr inbounds double, double addrspace(3)* %arg, i32 100 | |
188 %tmp7 = load double, double addrspace(3)* %tmp6, align 8 | |
189 %tmp8 = fadd double %tmp5, %tmp7 | |
190 %tmp9 = getelementptr inbounds double, double addrspace(3)* %arg, i32 150 | |
191 %tmp10 = load double, double addrspace(3)* %tmp9, align 8 | |
192 %tmp11 = fadd double %tmp8, %tmp10 | |
193 %tmp12 = getelementptr inbounds double, double addrspace(3)* %arg, i32 200 | |
194 %tmp13 = load double, double addrspace(3)* %tmp12, align 8 | |
195 %tmp14 = fadd double %tmp11, %tmp13 | |
196 %tmp15 = getelementptr inbounds double, double addrspace(3)* %arg, i32 250 | |
197 %tmp16 = load double, double addrspace(3)* %tmp15, align 8 | |
198 %tmp17 = fadd double %tmp14, %tmp16 | |
199 %tmp18 = getelementptr inbounds double, double addrspace(3)* %arg, i32 300 | |
200 %tmp19 = load double, double addrspace(3)* %tmp18, align 8 | |
201 %tmp20 = fadd double %tmp17, %tmp19 | |
202 %tmp21 = getelementptr inbounds double, double addrspace(3)* %arg, i32 350 | |
203 %tmp22 = load double, double addrspace(3)* %tmp21, align 8 | |
204 %tmp23 = fadd double %tmp20, %tmp22 | |
205 store double %tmp23, double *%arg1, align 8 | |
206 ret void | |
207 } | |
208 | |
209 ; GCN-LABEL: ds_read64_combine_stride_8192_shifted: | |
210 ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 | |
211 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] | |
212 | |
213 ; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]] | |
214 ; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] | |
215 ; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] | |
216 | |
217 ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 8, [[BASE]] | |
218 ; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x4008, [[BASE]] | |
219 ; GFX9-DAG: v_add_u32_e32 [[B3:v[0-9]+]], 0x8008, [[BASE]] | |
220 | |
221 ; GCN-DAG: ds_read2st64_b64 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:16 | |
222 ; GCN-DAG: ds_read2st64_b64 v[{{[0-9]+:[0-9]+}}], [[B2]] offset1:16 | |
223 ; GCN-DAG: ds_read2st64_b64 v[{{[0-9]+:[0-9]+}}], [[B3]] offset1:16 | |
224 define amdgpu_kernel void @ds_read64_combine_stride_8192_shifted(double addrspace(3)* nocapture readonly %arg, double *nocapture %arg1) { | |
225 bb: | |
226 %tmp = getelementptr inbounds double, double addrspace(3)* %arg, i32 1 | |
227 %tmp2 = load double, double addrspace(3)* %tmp, align 8 | |
228 %tmp3 = fadd double %tmp2, 0.000000e+00 | |
229 %tmp4 = getelementptr inbounds double, double addrspace(3)* %arg, i32 1025 | |
230 %tmp5 = load double, double addrspace(3)* %tmp4, align 8 | |
231 %tmp6 = fadd double %tmp3, %tmp5 | |
232 %tmp7 = getelementptr inbounds double, double addrspace(3)* %arg, i32 2049 | |
233 %tmp8 = load double, double addrspace(3)* %tmp7, align 8 | |
234 %tmp9 = fadd double %tmp6, %tmp8 | |
235 %tmp10 = getelementptr inbounds double, double addrspace(3)* %arg, i32 3073 | |
236 %tmp11 = load double, double addrspace(3)* %tmp10, align 8 | |
237 %tmp12 = fadd double %tmp9, %tmp11 | |
238 %tmp13 = getelementptr inbounds double, double addrspace(3)* %arg, i32 4097 | |
239 %tmp14 = load double, double addrspace(3)* %tmp13, align 8 | |
240 %tmp15 = fadd double %tmp12, %tmp14 | |
241 %tmp16 = getelementptr inbounds double, double addrspace(3)* %arg, i32 5121 | |
242 %tmp17 = load double, double addrspace(3)* %tmp16, align 8 | |
243 %tmp18 = fadd double %tmp15, %tmp17 | |
244 store double %tmp18, double *%arg1, align 8 | |
245 ret void | |
246 } | |
247 | |
248 ; GCN-LABEL: ds_write32_combine_stride_400: | |
249 ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 | |
250 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] | |
251 | |
252 ; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] | |
253 ; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] | |
254 ; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] | |
255 | |
256 ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x320, [[BASE]] | |
257 ; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x640, [[BASE]] | |
258 ; GFX9-DAG: v_add_u32_e32 [[B3:v[0-9]+]], 0x960, [[BASE]] | |
259 | |
260 ; GCN-DAG: ds_write2_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100 | |
261 ; GCN-DAG: ds_write2_b32 [[B1]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100 | |
262 ; GCN-DAG: ds_write2_b32 [[B2]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100 | |
263 ; GCN-DAG: ds_write2_b32 [[B3]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100 | |
264 define amdgpu_kernel void @ds_write32_combine_stride_400(float addrspace(3)* nocapture %arg) { | |
265 bb: | |
266 store float 1.000000e+00, float addrspace(3)* %arg, align 4 | |
267 %tmp = getelementptr inbounds float, float addrspace(3)* %arg, i32 100 | |
268 store float 1.000000e+00, float addrspace(3)* %tmp, align 4 | |
269 %tmp1 = getelementptr inbounds float, float addrspace(3)* %arg, i32 200 | |
270 store float 1.000000e+00, float addrspace(3)* %tmp1, align 4 | |
271 %tmp2 = getelementptr inbounds float, float addrspace(3)* %arg, i32 300 | |
272 store float 1.000000e+00, float addrspace(3)* %tmp2, align 4 | |
273 %tmp3 = getelementptr inbounds float, float addrspace(3)* %arg, i32 400 | |
274 store float 1.000000e+00, float addrspace(3)* %tmp3, align 4 | |
275 %tmp4 = getelementptr inbounds float, float addrspace(3)* %arg, i32 500 | |
276 store float 1.000000e+00, float addrspace(3)* %tmp4, align 4 | |
277 %tmp5 = getelementptr inbounds float, float addrspace(3)* %arg, i32 600 | |
278 store float 1.000000e+00, float addrspace(3)* %tmp5, align 4 | |
279 %tmp6 = getelementptr inbounds float, float addrspace(3)* %arg, i32 700 | |
280 store float 1.000000e+00, float addrspace(3)* %tmp6, align 4 | |
281 ret void | |
282 } | |
283 | |
284 ; GCN-LABEL: ds_write32_combine_stride_400_back: | |
285 ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 | |
286 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] | |
287 | |
288 ; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] | |
289 ; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] | |
290 ; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] | |
291 | |
292 ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x320, [[BASE]] | |
293 ; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x640, [[BASE]] | |
294 ; GFX9-DAG: v_add_u32_e32 [[B3:v[0-9]+]], 0x960, [[BASE]] | |
295 | |
296 ; GCN-DAG: ds_write2_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100 | |
297 ; GCN-DAG: ds_write2_b32 [[B1]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100 | |
298 ; GCN-DAG: ds_write2_b32 [[B2]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100 | |
299 ; GCN-DAG: ds_write2_b32 [[B3]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100 | |
300 define amdgpu_kernel void @ds_write32_combine_stride_400_back(float addrspace(3)* nocapture %arg) { | |
301 bb: | |
302 %tmp = getelementptr inbounds float, float addrspace(3)* %arg, i32 700 | |
303 store float 1.000000e+00, float addrspace(3)* %tmp, align 4 | |
304 %tmp1 = getelementptr inbounds float, float addrspace(3)* %arg, i32 600 | |
305 store float 1.000000e+00, float addrspace(3)* %tmp1, align 4 | |
306 %tmp2 = getelementptr inbounds float, float addrspace(3)* %arg, i32 500 | |
307 store float 1.000000e+00, float addrspace(3)* %tmp2, align 4 | |
308 %tmp3 = getelementptr inbounds float, float addrspace(3)* %arg, i32 400 | |
309 store float 1.000000e+00, float addrspace(3)* %tmp3, align 4 | |
310 %tmp4 = getelementptr inbounds float, float addrspace(3)* %arg, i32 300 | |
311 store float 1.000000e+00, float addrspace(3)* %tmp4, align 4 | |
312 %tmp5 = getelementptr inbounds float, float addrspace(3)* %arg, i32 200 | |
313 store float 1.000000e+00, float addrspace(3)* %tmp5, align 4 | |
314 %tmp6 = getelementptr inbounds float, float addrspace(3)* %arg, i32 100 | |
315 store float 1.000000e+00, float addrspace(3)* %tmp6, align 4 | |
316 store float 1.000000e+00, float addrspace(3)* %arg, align 4 | |
317 ret void | |
318 } | |
319 | |
320 ; GCN-LABEL: ds_write32_combine_stride_8192: | |
321 ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 | |
322 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] | |
323 ; GCN-DAG: ds_write2st64_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset1:32 | |
324 ; GCN-DAG: ds_write2st64_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset0:64 offset1:96 | |
325 ; GCN-DAG: ds_write2st64_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset0:128 offset1:160 | |
326 ; GCN-DAG: ds_write2st64_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset0:192 offset1:224 | |
327 define amdgpu_kernel void @ds_write32_combine_stride_8192(float addrspace(3)* nocapture %arg) { | |
328 bb: | |
329 store float 1.000000e+00, float addrspace(3)* %arg, align 4 | |
330 %tmp = getelementptr inbounds float, float addrspace(3)* %arg, i32 2048 | |
331 store float 1.000000e+00, float addrspace(3)* %tmp, align 4 | |
332 %tmp1 = getelementptr inbounds float, float addrspace(3)* %arg, i32 4096 | |
333 store float 1.000000e+00, float addrspace(3)* %tmp1, align 4 | |
334 %tmp2 = getelementptr inbounds float, float addrspace(3)* %arg, i32 6144 | |
335 store float 1.000000e+00, float addrspace(3)* %tmp2, align 4 | |
336 %tmp3 = getelementptr inbounds float, float addrspace(3)* %arg, i32 8192 | |
337 store float 1.000000e+00, float addrspace(3)* %tmp3, align 4 | |
338 %tmp4 = getelementptr inbounds float, float addrspace(3)* %arg, i32 10240 | |
339 store float 1.000000e+00, float addrspace(3)* %tmp4, align 4 | |
340 %tmp5 = getelementptr inbounds float, float addrspace(3)* %arg, i32 12288 | |
341 store float 1.000000e+00, float addrspace(3)* %tmp5, align 4 | |
342 %tmp6 = getelementptr inbounds float, float addrspace(3)* %arg, i32 14336 | |
343 store float 1.000000e+00, float addrspace(3)* %tmp6, align 4 | |
344 ret void | |
345 } | |
346 | |
347 ; GCN-LABEL: ds_write32_combine_stride_8192_shifted: | |
348 ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 | |
349 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] | |
350 | |
351 ; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, 4, [[BASE]] | |
352 ; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] | |
353 ; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] | |
354 | |
355 ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 4, [[BASE]] | |
356 ; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x4004, [[BASE]] | |
357 ; GFX9-DAG: v_add_u32_e32 [[B3:v[0-9]+]], 0x8004, [[BASE]] | |
358 | |
359 ; GCN-DAG: ds_write2st64_b32 [[B1]], v{{[0-9]+}}, v{{[0-9]+}} offset1:32 | |
360 ; GCN-DAG: ds_write2st64_b32 [[B2]], v{{[0-9]+}}, v{{[0-9]+}} offset1:32 | |
361 ; GCN-DAG: ds_write2st64_b32 [[B3]], v{{[0-9]+}}, v{{[0-9]+}} offset1:32 | |
362 define amdgpu_kernel void @ds_write32_combine_stride_8192_shifted(float addrspace(3)* nocapture %arg) { | |
363 bb: | |
364 %tmp = getelementptr inbounds float, float addrspace(3)* %arg, i32 1 | |
365 store float 1.000000e+00, float addrspace(3)* %tmp, align 4 | |
366 %tmp1 = getelementptr inbounds float, float addrspace(3)* %arg, i32 2049 | |
367 store float 1.000000e+00, float addrspace(3)* %tmp1, align 4 | |
368 %tmp2 = getelementptr inbounds float, float addrspace(3)* %arg, i32 4097 | |
369 store float 1.000000e+00, float addrspace(3)* %tmp2, align 4 | |
370 %tmp3 = getelementptr inbounds float, float addrspace(3)* %arg, i32 6145 | |
371 store float 1.000000e+00, float addrspace(3)* %tmp3, align 4 | |
372 %tmp4 = getelementptr inbounds float, float addrspace(3)* %arg, i32 8193 | |
373 store float 1.000000e+00, float addrspace(3)* %tmp4, align 4 | |
374 %tmp5 = getelementptr inbounds float, float addrspace(3)* %arg, i32 10241 | |
375 store float 1.000000e+00, float addrspace(3)* %tmp5, align 4 | |
376 ret void | |
377 } | |
378 | |
379 ; GCN-LABEL: ds_write64_combine_stride_400: | |
380 ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 | |
381 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] | |
382 | |
383 ; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] | |
384 ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x960, [[BASE]] | |
385 | |
386 ; GCN-DAG: ds_write2_b64 [[BASE]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:50 | |
387 ; GCN-DAG: ds_write2_b64 [[BASE]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset0:100 offset1:150 | |
388 ; GCN-DAG: ds_write2_b64 [[BASE]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset0:200 offset1:250 | |
389 ; GCN-DAG: ds_write2_b64 [[B1]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:50 | |
390 define amdgpu_kernel void @ds_write64_combine_stride_400(double addrspace(3)* nocapture %arg) { | |
391 bb: | |
392 store double 1.000000e+00, double addrspace(3)* %arg, align 8 | |
393 %tmp = getelementptr inbounds double, double addrspace(3)* %arg, i32 50 | |
394 store double 1.000000e+00, double addrspace(3)* %tmp, align 8 | |
395 %tmp1 = getelementptr inbounds double, double addrspace(3)* %arg, i32 100 | |
396 store double 1.000000e+00, double addrspace(3)* %tmp1, align 8 | |
397 %tmp2 = getelementptr inbounds double, double addrspace(3)* %arg, i32 150 | |
398 store double 1.000000e+00, double addrspace(3)* %tmp2, align 8 | |
399 %tmp3 = getelementptr inbounds double, double addrspace(3)* %arg, i32 200 | |
400 store double 1.000000e+00, double addrspace(3)* %tmp3, align 8 | |
401 %tmp4 = getelementptr inbounds double, double addrspace(3)* %arg, i32 250 | |
402 store double 1.000000e+00, double addrspace(3)* %tmp4, align 8 | |
403 %tmp5 = getelementptr inbounds double, double addrspace(3)* %arg, i32 300 | |
404 store double 1.000000e+00, double addrspace(3)* %tmp5, align 8 | |
405 %tmp6 = getelementptr inbounds double, double addrspace(3)* %arg, i32 350 | |
406 store double 1.000000e+00, double addrspace(3)* %tmp6, align 8 | |
407 ret void | |
408 } | |
409 | |
410 ; GCN-LABEL: ds_write64_combine_stride_8192_shifted: | |
411 ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 | |
412 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] | |
413 | |
414 ; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]] | |
415 ; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] | |
416 ; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] | |
417 | |
418 ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 8, [[BASE]] | |
419 ; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x4008, [[BASE]] | |
420 ; GFX9-DAG: v_add_u32_e32 [[B3:v[0-9]+]], 0x8008, [[BASE]] | |
421 | |
422 ; GCN-DAG: ds_write2st64_b64 [[B1]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:16 | |
423 ; GCN-DAG: ds_write2st64_b64 [[B2]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:16 | |
424 ; GCN-DAG: ds_write2st64_b64 [[B3]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:16 | |
425 define amdgpu_kernel void @ds_write64_combine_stride_8192_shifted(double addrspace(3)* nocapture %arg) { | |
426 bb: | |
427 %tmp = getelementptr inbounds double, double addrspace(3)* %arg, i32 1 | |
428 store double 1.000000e+00, double addrspace(3)* %tmp, align 8 | |
429 %tmp1 = getelementptr inbounds double, double addrspace(3)* %arg, i32 1025 | |
430 store double 1.000000e+00, double addrspace(3)* %tmp1, align 8 | |
431 %tmp2 = getelementptr inbounds double, double addrspace(3)* %arg, i32 2049 | |
432 store double 1.000000e+00, double addrspace(3)* %tmp2, align 8 | |
433 %tmp3 = getelementptr inbounds double, double addrspace(3)* %arg, i32 3073 | |
434 store double 1.000000e+00, double addrspace(3)* %tmp3, align 8 | |
435 %tmp4 = getelementptr inbounds double, double addrspace(3)* %arg, i32 4097 | |
436 store double 1.000000e+00, double addrspace(3)* %tmp4, align 8 | |
437 %tmp5 = getelementptr inbounds double, double addrspace(3)* %arg, i32 5121 | |
438 store double 1.000000e+00, double addrspace(3)* %tmp5, align 8 | |
439 ret void | |
440 } |