Mercurial > hg > CbC > CbC_llvm
comparison llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll @ 173:0572611fdcc8 llvm10 llvm12
reorgnization done
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 25 May 2020 11:55:54 +0900 |
parents | 1d019706d866 |
children | 2e18cbf3894f |
comparison
equal
deleted
inserted
replaced
172:9fbae9c8bf63 | 173:0572611fdcc8 |
---|---|
1 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s | 1 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s |
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s | 2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s |
3 | 3 |
4 %struct.ByValStruct = type { [4 x i32] } | 4 %struct.ByValStruct = type { [4 x i32] } |
5 | |
6 ; GCN-LABEL: {{^}}void_func_byval_struct: | |
7 ; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s32{{$}} | |
8 ; GCN-NOT: s32 | |
9 ; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}} | |
10 ; GCN-NOT: s32 | |
11 | |
12 ; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s32 offset:16{{$}} | |
13 ; GCN-NOT: s32 | |
14 ; GCN: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:16{{$}} | |
15 ; GCN-NOT: s32 | |
16 define hidden void @void_func_byval_struct(%struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg0, %struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg1) #1 { | |
17 entry: | |
18 %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0 | |
19 %tmp = load volatile i32, i32 addrspace(5)* %arrayidx, align 4 | |
20 %add = add nsw i32 %tmp, 1 | |
21 store volatile i32 %add, i32 addrspace(5)* %arrayidx, align 4 | |
22 %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0 | |
23 %tmp1 = load volatile i32, i32 addrspace(5)* %arrayidx2, align 4 | |
24 %add3 = add nsw i32 %tmp1, 2 | |
25 store volatile i32 %add3, i32 addrspace(5)* %arrayidx2, align 4 | |
26 store volatile i32 9, i32 addrspace(1)* null, align 4 | |
27 ret void | |
28 } | |
29 | |
30 ; Make sure the offset is folded and function's frame register is used | 5 ; Make sure the offset is folded and function's frame register is used |
31 ; rather than the global scratch wave offset. | 6 ; rather than the global scratch wave offset. |
32 ; GCN-LABEL: {{^}}void_func_byval_struct_use_outside_entry_block: | 7 ; GCN-LABEL: {{^}}void_func_byval_struct_use_outside_entry_block: |
33 ; GCN-NOT: v_lshrrev_b32 | 8 ; GCN-NOT: v_lshrrev_b32 |
34 ; GCN-NOT: s_sub_u32 | 9 ; GCN-NOT: s_sub_u32 |
65 br label %bb1 | 40 br label %bb1 |
66 | 41 |
67 bb1: | 42 bb1: |
68 ret void | 43 ret void |
69 } | 44 } |
70 | |
71 ; GCN-LABEL: {{^}}void_func_byval_struct_non_leaf: | |
72 ; GCN: buffer_store_dword v33, off, s[0:3], s32 offset:36 | |
73 ; GCN-DAG: v_writelane_b32 v33, s34, | |
74 ; GCN: s_mov_b32 s34, s32 | |
75 ; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s34{{$}} | |
76 ; GCN-DAG: s_add_u32 s32, s32, 0xc00{{$}} | |
77 ; GCN-DAG: buffer_store_dword v32, off, s[0:3], s34 offset:32 | |
78 ; GCN-NOT: v_writelane_b32 v{{[0-9]+}}, s32 | |
79 | |
80 ; GCN-DAG: v_add_{{[iu]}}32_e32 [[ADD0:v[0-9]+]], vcc, 1, [[LOAD0]] | |
81 ; GCN: buffer_store_dword [[ADD0]], off, s[0:3], s34{{$}} | |
82 | |
83 ; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s34 offset:16{{$}} | |
84 ; GCN-DAG: v_add_{{[iu]}}32_e32 [[ADD1:v[0-9]+]], vcc, 2, [[LOAD1]] | |
85 | |
86 ; GCN: s_swappc_b64 | |
87 | |
88 ; GCN: buffer_store_dword [[ADD1]], off, s[0:3], s34 offset:16{{$}} | |
89 | |
90 ; GCN: v_readlane_b32 | |
91 ; GCN-NOT: v_readlane_b32 s32 | |
92 ; GCN-DAG: buffer_load_dword v32, off, s[0:3], s34 offset:32 | |
93 ; GCN: s_sub_u32 s32, s32, 0xc00{{$}} | |
94 ; GCN: v_readlane_b32 s34, v33, | |
95 ; GCN-DAG: buffer_load_dword v33, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload | |
96 ; GCN: s_setpc_b64 | |
97 define void @void_func_byval_struct_non_leaf(%struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg0, %struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg1) #1 { | |
98 entry: | |
99 %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0 | |
100 %tmp = load volatile i32, i32 addrspace(5)* %arrayidx, align 4 | |
101 %add = add nsw i32 %tmp, 1 | |
102 store volatile i32 %add, i32 addrspace(5)* %arrayidx, align 4 | |
103 %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0 | |
104 %tmp1 = load volatile i32, i32 addrspace(5)* %arrayidx2, align 4 | |
105 %add3 = add nsw i32 %tmp1, 2 | |
106 call void @external_void_func_void() | |
107 store volatile i32 %add3, i32 addrspace(5)* %arrayidx2, align 4 | |
108 store volatile i32 9, i32 addrspace(1)* null, align 4 | |
109 ret void | |
110 } | |
111 | |
112 ; GCN-LABEL: {{^}}call_void_func_byval_struct_func: | |
113 ; GCN: s_mov_b32 s34, s32 | |
114 ; GCN-DAG: s_add_u32 s32, s32, 0xc00{{$}} | |
115 ; GCN-DAG: v_writelane_b32 | |
116 | |
117 ; GCN-DAG: v_mov_b32_e32 [[NINE:v[0-9]+]], 9 | |
118 ; GCN-DAG: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13 | |
119 | |
120 ; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s34{{$}} | |
121 ; GCN-DAG: buffer_store_dword [[THIRTEEN]], off, s[0:3], s34 offset:16 | |
122 | |
123 ; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s34{{$}} | |
124 ; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s34 offset:4 | |
125 ; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s34 offset:8 | |
126 ; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s34 offset:12 | |
127 | |
128 ; GCN-NOT: s_add_u32 s32, s32, 0x800 | |
129 | |
130 | |
131 ; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}} | |
132 ; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:4 | |
133 ; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:8 | |
134 ; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:12 | |
135 | |
136 ; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s34 offset:16 | |
137 ; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s34 offset:20 | |
138 ; GCN: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s34 offset:24 | |
139 ; GCN: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s34 offset:28 | |
140 | |
141 ; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:16 | |
142 ; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:20 | |
143 ; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:24 | |
144 ; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:28 | |
145 | |
146 ; GCN: s_swappc_b64 | |
147 ; GCN-NOT: v_readlane_b32 s32 | |
148 ; GCN: v_readlane_b32 | |
149 ; GCN-NOT: v_readlane_b32 s32 | |
150 | |
151 ; GCN-NOT: s_sub_u32 s32, s32, 0x800 | |
152 | |
153 ; GCN: s_sub_u32 s32, s32, 0xc00{{$}} | |
154 ; GCN: v_readlane_b32 s34, v | |
155 ; GCN: s_waitcnt | |
156 ; GCN: s_setpc_b64 | |
157 define void @call_void_func_byval_struct_func() #1 { | |
158 entry: | |
159 %arg0 = alloca %struct.ByValStruct, align 4, addrspace(5) | |
160 %arg1 = alloca %struct.ByValStruct, align 4, addrspace(5) | |
161 %tmp = bitcast %struct.ByValStruct addrspace(5)* %arg0 to i8 addrspace(5)* | |
162 call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp) | |
163 %tmp1 = bitcast %struct.ByValStruct addrspace(5)* %arg1 to i8 addrspace(5)* | |
164 call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp1) | |
165 %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0 | |
166 store volatile i32 9, i32 addrspace(5)* %arrayidx, align 4 | |
167 %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0 | |
168 store volatile i32 13, i32 addrspace(5)* %arrayidx2, align 4 | |
169 call void @void_func_byval_struct(%struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg0, %struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg1) | |
170 call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp1) | |
171 call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp) | |
172 ret void | |
173 } | |
174 | |
175 ; GCN-LABEL: {{^}}call_void_func_byval_struct_kernel: | |
176 ; GCN: s_mov_b32 s33, s7 | |
177 ; GCN-NOT: s_add_u32 s32, s32, 0x800 | |
178 | |
179 ; GCN: v_mov_b32_e32 [[NINE:v[0-9]+]], 9 | |
180 ; GCN: buffer_store_dword [[NINE]], off, s[0:3], s33 offset:8 | |
181 ; GCN: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13 | |
182 ; GCN: buffer_store_dword [[THIRTEEN]], off, s[0:3], s33 offset:24 | |
183 | |
184 ; GCN-NOT: s_add_u32 s32, s32, 0x800 | |
185 ; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s33 offset:8 | |
186 ; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s33 offset:12 | |
187 ; GCN-DAG: s_add_u32 s32, s33, 0xc00{{$}} | |
188 ; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s33 offset:16 | |
189 ; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s33 offset:20 | |
190 | |
191 ; GCN: s_getpc_b64 | |
192 | |
193 ; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}} | |
194 ; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:4 | |
195 ; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:8 | |
196 ; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:12 | |
197 | |
198 ; GCN-DAG: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s33 offset:24 | |
199 ; GCN-DAG: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s33 offset:28 | |
200 ; GCN-DAG: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s33 offset:32 | |
201 ; GCN-DAG: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s33 offset:36 | |
202 | |
203 ; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:16 | |
204 ; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:20 | |
205 ; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:24 | |
206 ; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:28 | |
207 | |
208 | |
209 ; GCN: s_swappc_b64 | |
210 ; GCN-NOT: s_sub_u32 s32 | |
211 ; GCN: s_endpgm | |
212 define amdgpu_kernel void @call_void_func_byval_struct_kernel() #1 { | |
213 entry: | |
214 %arg0 = alloca %struct.ByValStruct, align 4, addrspace(5) | |
215 %arg1 = alloca %struct.ByValStruct, align 4, addrspace(5) | |
216 %tmp = bitcast %struct.ByValStruct addrspace(5)* %arg0 to i8 addrspace(5)* | |
217 call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp) | |
218 %tmp1 = bitcast %struct.ByValStruct addrspace(5)* %arg1 to i8 addrspace(5)* | |
219 call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp1) | |
220 %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0 | |
221 store volatile i32 9, i32 addrspace(5)* %arrayidx, align 4 | |
222 %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0 | |
223 store volatile i32 13, i32 addrspace(5)* %arrayidx2, align 4 | |
224 call void @void_func_byval_struct(%struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg0, %struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg1) | |
225 call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp1) | |
226 call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp) | |
227 ret void | |
228 } | |
229 | |
230 ; GCN-LABEL: {{^}}void_func_byval_struct_align8: | |
231 ; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s32{{$}} | |
232 ; GCN-NOT: s32 | |
233 ; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}} | |
234 ; GCN-NOT: s32 | |
235 | |
236 ; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s32 offset:16{{$}} | |
237 ; GCN-NOT: s32 | |
238 ; GCN: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:16{{$}} | |
239 ; GCN-NOT: s32 | |
240 define hidden void @void_func_byval_struct_align8(%struct.ByValStruct addrspace(5)* byval noalias nocapture align 8 %arg0, %struct.ByValStruct addrspace(5)* byval noalias nocapture align 8 %arg1) #1 { | |
241 entry: | |
242 %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0 | |
243 %tmp = load volatile i32, i32 addrspace(5)* %arrayidx, align 8 | |
244 %add = add nsw i32 %tmp, 1 | |
245 store volatile i32 %add, i32 addrspace(5)* %arrayidx, align 8 | |
246 %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0 | |
247 %tmp1 = load volatile i32, i32 addrspace(5)* %arrayidx2, align 8 | |
248 %add3 = add nsw i32 %tmp1, 2 | |
249 store volatile i32 %add3, i32 addrspace(5)* %arrayidx2, align 8 | |
250 store volatile i32 9, i32 addrspace(1)* null, align 4 | |
251 ret void | |
252 } | |
253 | |
254 ; Make sure the byval alignment is respected in the call frame setup | |
255 ; GCN-LABEL: {{^}}call_void_func_byval_struct_align8_kernel: | |
256 ; GCN: s_mov_b32 s33, s7 | |
257 ; GCN-NOT: s_add_u32 s32, s32, 0x800 | |
258 | |
259 ; GCN: v_mov_b32_e32 [[NINE:v[0-9]+]], 9 | |
260 ; GCN: buffer_store_dword [[NINE]], off, s[0:3], s33 offset:8 | |
261 ; GCN: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13 | |
262 ; GCN: buffer_store_dword [[THIRTEEN]], off, s[0:3], s33 offset:24 | |
263 | |
264 | |
265 ; GCN-NOT: s_add_u32 s32, s32, 0x800 | |
266 | |
267 ; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s33 offset:8 | |
268 ; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s33 offset:12 | |
269 ; GCN: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s33 offset:16 | |
270 ; GCN: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s33 offset:20 | |
271 | |
272 ; GCN-NOT: s_add_u32 s32, s32, 0x800 | |
273 ; GCN-DAG: s_add_u32 s32, s33, 0xc00{{$}} | |
274 | |
275 ; GCN: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:12 | |
276 ; GCN: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:8 | |
277 ; GCN: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:4 | |
278 ; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}} | |
279 | |
280 | |
281 ; GCN-DAG: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s33 offset:24 | |
282 ; GCN-DAG: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s33 offset:28 | |
283 ; GCN-DAG: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s33 offset:32 | |
284 ; GCN-DAG: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s33 offset:36 | |
285 | |
286 ; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:16 | |
287 ; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:20 | |
288 ; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:24 | |
289 ; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:28 | |
290 | |
291 | |
292 ; GCN: s_swappc_b64 | |
293 ; GCN-NOT: s_sub_u32 s32 | |
294 ; GCN: s_endpgm | |
295 define amdgpu_kernel void @call_void_func_byval_struct_align8_kernel() #1 { | |
296 entry: | |
297 %arg0 = alloca %struct.ByValStruct, align 8, addrspace(5) | |
298 %arg1 = alloca %struct.ByValStruct, align 8, addrspace(5) | |
299 %tmp = bitcast %struct.ByValStruct addrspace(5)* %arg0 to i8 addrspace(5)* | |
300 call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp) | |
301 %tmp1 = bitcast %struct.ByValStruct addrspace(5)* %arg1 to i8 addrspace(5)* | |
302 call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp1) | |
303 %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0 | |
304 store volatile i32 9, i32 addrspace(5)* %arrayidx, align 8 | |
305 %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0 | |
306 store volatile i32 13, i32 addrspace(5)* %arrayidx2, align 8 | |
307 call void @void_func_byval_struct_align8(%struct.ByValStruct addrspace(5)* byval nonnull align 8 %arg0, %struct.ByValStruct addrspace(5)* byval nonnull align 8 %arg1) | |
308 call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp1) | |
309 call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp) | |
310 ret void | |
311 } | |
312 | |
313 ; GCN-LABEL: {{^}}call_void_func_byval_struct_align8_func: | |
314 ; GCN: s_mov_b32 s34, s32 | |
315 ; GCN-DAG: s_add_u32 s32, s32, 0xc00{{$}} | |
316 ; GCN-DAG: v_writelane_b32 | |
317 | |
318 ; GCN-DAG: v_mov_b32_e32 [[NINE:v[0-9]+]], 9 | |
319 ; GCN-DAG: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13 | |
320 | |
321 ; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s34{{$}} | |
322 ; GCN-DAG: buffer_store_dword [[THIRTEEN]], off, s[0:3], s34 offset:16 | |
323 | |
324 ; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s34{{$}} | |
325 ; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s34 offset:4 | |
326 ; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s34 offset:8 | |
327 ; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s34 offset:12 | |
328 | |
329 ; GCN-NOT: s_add_u32 s32, s32, 0x800 | |
330 | |
331 ; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}} | |
332 ; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:4 | |
333 ; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:8 | |
334 ; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:12 | |
335 | |
336 ; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s34 offset:16 | |
337 ; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s34 offset:20 | |
338 ; GCN: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s34 offset:24 | |
339 ; GCN: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s34 offset:28 | |
340 | |
341 ; GCN: s_waitcnt vmcnt(0) | |
342 ; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:16 | |
343 ; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:20 | |
344 ; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:24 | |
345 ; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:28 | |
346 | |
347 ; GCN: s_swappc_b64 | |
348 ; GCN-NOT: v_readlane_b32 s32 | |
349 ; GCN: v_readlane_b32 | |
350 ; GCN-NOT: v_readlane_b32 s32 | |
351 | |
352 ; GCN-NOT: s_sub_u32 s32, s32, 0x800 | |
353 | |
354 ; GCN: s_sub_u32 s32, s32, 0xc00{{$}} | |
355 ; GCN: v_readlane_b32 s34, v | |
356 ; GCN: s_waitcnt | |
357 ; GCN-NEXT: s_setpc_b64 | |
358 define void @call_void_func_byval_struct_align8_func() #0 { | |
359 entry: | |
360 %arg0 = alloca %struct.ByValStruct, align 8, addrspace(5) | |
361 %arg1 = alloca %struct.ByValStruct, align 8, addrspace(5) | |
362 %tmp = bitcast %struct.ByValStruct addrspace(5)* %arg0 to i8 addrspace(5)* | |
363 call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp) | |
364 %tmp1 = bitcast %struct.ByValStruct addrspace(5)* %arg1 to i8 addrspace(5)* | |
365 call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp1) | |
366 %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0 | |
367 store volatile i32 9, i32 addrspace(5)* %arrayidx, align 8 | |
368 %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0 | |
369 store volatile i32 13, i32 addrspace(5)* %arrayidx2, align 8 | |
370 call void @void_func_byval_struct_align8(%struct.ByValStruct addrspace(5)* byval nonnull align 8 %arg0, %struct.ByValStruct addrspace(5)* byval nonnull align 8 %arg1) | |
371 call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp1) | |
372 call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp) | |
373 ret void | |
374 } | |
375 | |
376 ; GCN-LABEL: {{^}}call_void_func_byval_struct_kernel_no_frame_pointer_elim: | |
377 define amdgpu_kernel void @call_void_func_byval_struct_kernel_no_frame_pointer_elim() #2 { | |
378 entry: | |
379 %arg0 = alloca %struct.ByValStruct, align 4, addrspace(5) | |
380 %arg1 = alloca %struct.ByValStruct, align 4, addrspace(5) | |
381 %tmp = bitcast %struct.ByValStruct addrspace(5)* %arg0 to i8 addrspace(5)* | |
382 call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp) | |
383 %tmp1 = bitcast %struct.ByValStruct addrspace(5)* %arg1 to i8 addrspace(5)* | |
384 call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp1) | |
385 %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0 | |
386 store volatile i32 9, i32 addrspace(5)* %arrayidx, align 4 | |
387 %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0 | |
388 store volatile i32 13, i32 addrspace(5)* %arrayidx2, align 4 | |
389 call void @void_func_byval_struct(%struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg0, %struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg1) | |
390 call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp1) | |
391 call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp) | |
392 ret void | |
393 } | |
394 | |
395 declare hidden void @external_void_func_void() #0 | 45 declare hidden void @external_void_func_void() #0 |
396 | 46 |
397 declare void @llvm.lifetime.start.p5i8(i64, i8 addrspace(5)* nocapture) #3 | 47 declare void @llvm.lifetime.start.p5i8(i64, i8 addrspace(5)* nocapture) #3 |
398 declare void @llvm.lifetime.end.p5i8(i64, i8 addrspace(5)* nocapture) #3 | 48 declare void @llvm.lifetime.end.p5i8(i64, i8 addrspace(5)* nocapture) #3 |
399 | 49 |