comparison llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll @ 173:0572611fdcc8 llvm10 llvm12

reorgnization done
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Mon, 25 May 2020 11:55:54 +0900
parents 1d019706d866
children 2e18cbf3894f
comparison
equal deleted inserted replaced
172:9fbae9c8bf63 173:0572611fdcc8
1 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s 1 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s 2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
3 3
4 %struct.ByValStruct = type { [4 x i32] } 4 %struct.ByValStruct = type { [4 x i32] }
5
6 ; GCN-LABEL: {{^}}void_func_byval_struct:
7 ; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s32{{$}}
8 ; GCN-NOT: s32
9 ; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}}
10 ; GCN-NOT: s32
11
12 ; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s32 offset:16{{$}}
13 ; GCN-NOT: s32
14 ; GCN: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:16{{$}}
15 ; GCN-NOT: s32
16 define hidden void @void_func_byval_struct(%struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg0, %struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg1) #1 {
17 entry:
18 %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0
19 %tmp = load volatile i32, i32 addrspace(5)* %arrayidx, align 4
20 %add = add nsw i32 %tmp, 1
21 store volatile i32 %add, i32 addrspace(5)* %arrayidx, align 4
22 %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0
23 %tmp1 = load volatile i32, i32 addrspace(5)* %arrayidx2, align 4
24 %add3 = add nsw i32 %tmp1, 2
25 store volatile i32 %add3, i32 addrspace(5)* %arrayidx2, align 4
26 store volatile i32 9, i32 addrspace(1)* null, align 4
27 ret void
28 }
29
30 ; Make sure the offset is folded and function's frame register is used 5 ; Make sure the offset is folded and function's frame register is used
31 ; rather than the global scratch wave offset. 6 ; rather than the global scratch wave offset.
32 ; GCN-LABEL: {{^}}void_func_byval_struct_use_outside_entry_block: 7 ; GCN-LABEL: {{^}}void_func_byval_struct_use_outside_entry_block:
33 ; GCN-NOT: v_lshrrev_b32 8 ; GCN-NOT: v_lshrrev_b32
34 ; GCN-NOT: s_sub_u32 9 ; GCN-NOT: s_sub_u32
65 br label %bb1 40 br label %bb1
66 41
67 bb1: 42 bb1:
68 ret void 43 ret void
69 } 44 }
70
71 ; GCN-LABEL: {{^}}void_func_byval_struct_non_leaf:
72 ; GCN: buffer_store_dword v33, off, s[0:3], s32 offset:36
73 ; GCN-DAG: v_writelane_b32 v33, s34,
74 ; GCN: s_mov_b32 s34, s32
75 ; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s34{{$}}
76 ; GCN-DAG: s_add_u32 s32, s32, 0xc00{{$}}
77 ; GCN-DAG: buffer_store_dword v32, off, s[0:3], s34 offset:32
78 ; GCN-NOT: v_writelane_b32 v{{[0-9]+}}, s32
79
80 ; GCN-DAG: v_add_{{[iu]}}32_e32 [[ADD0:v[0-9]+]], vcc, 1, [[LOAD0]]
81 ; GCN: buffer_store_dword [[ADD0]], off, s[0:3], s34{{$}}
82
83 ; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s34 offset:16{{$}}
84 ; GCN-DAG: v_add_{{[iu]}}32_e32 [[ADD1:v[0-9]+]], vcc, 2, [[LOAD1]]
85
86 ; GCN: s_swappc_b64
87
88 ; GCN: buffer_store_dword [[ADD1]], off, s[0:3], s34 offset:16{{$}}
89
90 ; GCN: v_readlane_b32
91 ; GCN-NOT: v_readlane_b32 s32
92 ; GCN-DAG: buffer_load_dword v32, off, s[0:3], s34 offset:32
93 ; GCN: s_sub_u32 s32, s32, 0xc00{{$}}
94 ; GCN: v_readlane_b32 s34, v33,
95 ; GCN-DAG: buffer_load_dword v33, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
96 ; GCN: s_setpc_b64
97 define void @void_func_byval_struct_non_leaf(%struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg0, %struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg1) #1 {
98 entry:
99 %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0
100 %tmp = load volatile i32, i32 addrspace(5)* %arrayidx, align 4
101 %add = add nsw i32 %tmp, 1
102 store volatile i32 %add, i32 addrspace(5)* %arrayidx, align 4
103 %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0
104 %tmp1 = load volatile i32, i32 addrspace(5)* %arrayidx2, align 4
105 %add3 = add nsw i32 %tmp1, 2
106 call void @external_void_func_void()
107 store volatile i32 %add3, i32 addrspace(5)* %arrayidx2, align 4
108 store volatile i32 9, i32 addrspace(1)* null, align 4
109 ret void
110 }
111
112 ; GCN-LABEL: {{^}}call_void_func_byval_struct_func:
113 ; GCN: s_mov_b32 s34, s32
114 ; GCN-DAG: s_add_u32 s32, s32, 0xc00{{$}}
115 ; GCN-DAG: v_writelane_b32
116
117 ; GCN-DAG: v_mov_b32_e32 [[NINE:v[0-9]+]], 9
118 ; GCN-DAG: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13
119
120 ; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s34{{$}}
121 ; GCN-DAG: buffer_store_dword [[THIRTEEN]], off, s[0:3], s34 offset:16
122
123 ; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s34{{$}}
124 ; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s34 offset:4
125 ; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s34 offset:8
126 ; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s34 offset:12
127
128 ; GCN-NOT: s_add_u32 s32, s32, 0x800
129
130
131 ; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}}
132 ; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:4
133 ; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:8
134 ; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:12
135
136 ; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s34 offset:16
137 ; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s34 offset:20
138 ; GCN: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s34 offset:24
139 ; GCN: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s34 offset:28
140
141 ; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:16
142 ; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:20
143 ; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:24
144 ; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:28
145
146 ; GCN: s_swappc_b64
147 ; GCN-NOT: v_readlane_b32 s32
148 ; GCN: v_readlane_b32
149 ; GCN-NOT: v_readlane_b32 s32
150
151 ; GCN-NOT: s_sub_u32 s32, s32, 0x800
152
153 ; GCN: s_sub_u32 s32, s32, 0xc00{{$}}
154 ; GCN: v_readlane_b32 s34, v
155 ; GCN: s_waitcnt
156 ; GCN: s_setpc_b64
157 define void @call_void_func_byval_struct_func() #1 {
158 entry:
159 %arg0 = alloca %struct.ByValStruct, align 4, addrspace(5)
160 %arg1 = alloca %struct.ByValStruct, align 4, addrspace(5)
161 %tmp = bitcast %struct.ByValStruct addrspace(5)* %arg0 to i8 addrspace(5)*
162 call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp)
163 %tmp1 = bitcast %struct.ByValStruct addrspace(5)* %arg1 to i8 addrspace(5)*
164 call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp1)
165 %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0
166 store volatile i32 9, i32 addrspace(5)* %arrayidx, align 4
167 %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0
168 store volatile i32 13, i32 addrspace(5)* %arrayidx2, align 4
169 call void @void_func_byval_struct(%struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg0, %struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg1)
170 call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp1)
171 call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp)
172 ret void
173 }
174
175 ; GCN-LABEL: {{^}}call_void_func_byval_struct_kernel:
176 ; GCN: s_mov_b32 s33, s7
177 ; GCN-NOT: s_add_u32 s32, s32, 0x800
178
179 ; GCN: v_mov_b32_e32 [[NINE:v[0-9]+]], 9
180 ; GCN: buffer_store_dword [[NINE]], off, s[0:3], s33 offset:8
181 ; GCN: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13
182 ; GCN: buffer_store_dword [[THIRTEEN]], off, s[0:3], s33 offset:24
183
184 ; GCN-NOT: s_add_u32 s32, s32, 0x800
185 ; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s33 offset:8
186 ; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s33 offset:12
187 ; GCN-DAG: s_add_u32 s32, s33, 0xc00{{$}}
188 ; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s33 offset:16
189 ; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s33 offset:20
190
191 ; GCN: s_getpc_b64
192
193 ; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}}
194 ; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:4
195 ; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:8
196 ; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:12
197
198 ; GCN-DAG: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s33 offset:24
199 ; GCN-DAG: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s33 offset:28
200 ; GCN-DAG: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s33 offset:32
201 ; GCN-DAG: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s33 offset:36
202
203 ; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:16
204 ; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:20
205 ; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:24
206 ; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:28
207
208
209 ; GCN: s_swappc_b64
210 ; GCN-NOT: s_sub_u32 s32
211 ; GCN: s_endpgm
212 define amdgpu_kernel void @call_void_func_byval_struct_kernel() #1 {
213 entry:
214 %arg0 = alloca %struct.ByValStruct, align 4, addrspace(5)
215 %arg1 = alloca %struct.ByValStruct, align 4, addrspace(5)
216 %tmp = bitcast %struct.ByValStruct addrspace(5)* %arg0 to i8 addrspace(5)*
217 call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp)
218 %tmp1 = bitcast %struct.ByValStruct addrspace(5)* %arg1 to i8 addrspace(5)*
219 call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp1)
220 %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0
221 store volatile i32 9, i32 addrspace(5)* %arrayidx, align 4
222 %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0
223 store volatile i32 13, i32 addrspace(5)* %arrayidx2, align 4
224 call void @void_func_byval_struct(%struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg0, %struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg1)
225 call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp1)
226 call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp)
227 ret void
228 }
229
230 ; GCN-LABEL: {{^}}void_func_byval_struct_align8:
231 ; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s32{{$}}
232 ; GCN-NOT: s32
233 ; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}}
234 ; GCN-NOT: s32
235
236 ; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s32 offset:16{{$}}
237 ; GCN-NOT: s32
238 ; GCN: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:16{{$}}
239 ; GCN-NOT: s32
240 define hidden void @void_func_byval_struct_align8(%struct.ByValStruct addrspace(5)* byval noalias nocapture align 8 %arg0, %struct.ByValStruct addrspace(5)* byval noalias nocapture align 8 %arg1) #1 {
241 entry:
242 %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0
243 %tmp = load volatile i32, i32 addrspace(5)* %arrayidx, align 8
244 %add = add nsw i32 %tmp, 1
245 store volatile i32 %add, i32 addrspace(5)* %arrayidx, align 8
246 %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0
247 %tmp1 = load volatile i32, i32 addrspace(5)* %arrayidx2, align 8
248 %add3 = add nsw i32 %tmp1, 2
249 store volatile i32 %add3, i32 addrspace(5)* %arrayidx2, align 8
250 store volatile i32 9, i32 addrspace(1)* null, align 4
251 ret void
252 }
253
254 ; Make sure the byval alignment is respected in the call frame setup
255 ; GCN-LABEL: {{^}}call_void_func_byval_struct_align8_kernel:
256 ; GCN: s_mov_b32 s33, s7
257 ; GCN-NOT: s_add_u32 s32, s32, 0x800
258
259 ; GCN: v_mov_b32_e32 [[NINE:v[0-9]+]], 9
260 ; GCN: buffer_store_dword [[NINE]], off, s[0:3], s33 offset:8
261 ; GCN: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13
262 ; GCN: buffer_store_dword [[THIRTEEN]], off, s[0:3], s33 offset:24
263
264
265 ; GCN-NOT: s_add_u32 s32, s32, 0x800
266
267 ; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s33 offset:8
268 ; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s33 offset:12
269 ; GCN: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s33 offset:16
270 ; GCN: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s33 offset:20
271
272 ; GCN-NOT: s_add_u32 s32, s32, 0x800
273 ; GCN-DAG: s_add_u32 s32, s33, 0xc00{{$}}
274
275 ; GCN: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:12
276 ; GCN: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:8
277 ; GCN: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:4
278 ; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}}
279
280
281 ; GCN-DAG: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s33 offset:24
282 ; GCN-DAG: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s33 offset:28
283 ; GCN-DAG: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s33 offset:32
284 ; GCN-DAG: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s33 offset:36
285
286 ; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:16
287 ; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:20
288 ; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:24
289 ; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:28
290
291
292 ; GCN: s_swappc_b64
293 ; GCN-NOT: s_sub_u32 s32
294 ; GCN: s_endpgm
295 define amdgpu_kernel void @call_void_func_byval_struct_align8_kernel() #1 {
296 entry:
297 %arg0 = alloca %struct.ByValStruct, align 8, addrspace(5)
298 %arg1 = alloca %struct.ByValStruct, align 8, addrspace(5)
299 %tmp = bitcast %struct.ByValStruct addrspace(5)* %arg0 to i8 addrspace(5)*
300 call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp)
301 %tmp1 = bitcast %struct.ByValStruct addrspace(5)* %arg1 to i8 addrspace(5)*
302 call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp1)
303 %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0
304 store volatile i32 9, i32 addrspace(5)* %arrayidx, align 8
305 %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0
306 store volatile i32 13, i32 addrspace(5)* %arrayidx2, align 8
307 call void @void_func_byval_struct_align8(%struct.ByValStruct addrspace(5)* byval nonnull align 8 %arg0, %struct.ByValStruct addrspace(5)* byval nonnull align 8 %arg1)
308 call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp1)
309 call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp)
310 ret void
311 }
312
313 ; GCN-LABEL: {{^}}call_void_func_byval_struct_align8_func:
314 ; GCN: s_mov_b32 s34, s32
315 ; GCN-DAG: s_add_u32 s32, s32, 0xc00{{$}}
316 ; GCN-DAG: v_writelane_b32
317
318 ; GCN-DAG: v_mov_b32_e32 [[NINE:v[0-9]+]], 9
319 ; GCN-DAG: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13
320
321 ; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s34{{$}}
322 ; GCN-DAG: buffer_store_dword [[THIRTEEN]], off, s[0:3], s34 offset:16
323
324 ; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s34{{$}}
325 ; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s34 offset:4
326 ; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s34 offset:8
327 ; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s34 offset:12
328
329 ; GCN-NOT: s_add_u32 s32, s32, 0x800
330
331 ; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}}
332 ; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:4
333 ; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:8
334 ; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:12
335
336 ; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s34 offset:16
337 ; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s34 offset:20
338 ; GCN: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s34 offset:24
339 ; GCN: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s34 offset:28
340
341 ; GCN: s_waitcnt vmcnt(0)
342 ; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:16
343 ; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:20
344 ; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:24
345 ; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:28
346
347 ; GCN: s_swappc_b64
348 ; GCN-NOT: v_readlane_b32 s32
349 ; GCN: v_readlane_b32
350 ; GCN-NOT: v_readlane_b32 s32
351
352 ; GCN-NOT: s_sub_u32 s32, s32, 0x800
353
354 ; GCN: s_sub_u32 s32, s32, 0xc00{{$}}
355 ; GCN: v_readlane_b32 s34, v
356 ; GCN: s_waitcnt
357 ; GCN-NEXT: s_setpc_b64
358 define void @call_void_func_byval_struct_align8_func() #0 {
359 entry:
360 %arg0 = alloca %struct.ByValStruct, align 8, addrspace(5)
361 %arg1 = alloca %struct.ByValStruct, align 8, addrspace(5)
362 %tmp = bitcast %struct.ByValStruct addrspace(5)* %arg0 to i8 addrspace(5)*
363 call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp)
364 %tmp1 = bitcast %struct.ByValStruct addrspace(5)* %arg1 to i8 addrspace(5)*
365 call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp1)
366 %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0
367 store volatile i32 9, i32 addrspace(5)* %arrayidx, align 8
368 %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0
369 store volatile i32 13, i32 addrspace(5)* %arrayidx2, align 8
370 call void @void_func_byval_struct_align8(%struct.ByValStruct addrspace(5)* byval nonnull align 8 %arg0, %struct.ByValStruct addrspace(5)* byval nonnull align 8 %arg1)
371 call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp1)
372 call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp)
373 ret void
374 }
375
376 ; GCN-LABEL: {{^}}call_void_func_byval_struct_kernel_no_frame_pointer_elim:
377 define amdgpu_kernel void @call_void_func_byval_struct_kernel_no_frame_pointer_elim() #2 {
378 entry:
379 %arg0 = alloca %struct.ByValStruct, align 4, addrspace(5)
380 %arg1 = alloca %struct.ByValStruct, align 4, addrspace(5)
381 %tmp = bitcast %struct.ByValStruct addrspace(5)* %arg0 to i8 addrspace(5)*
382 call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp)
383 %tmp1 = bitcast %struct.ByValStruct addrspace(5)* %arg1 to i8 addrspace(5)*
384 call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp1)
385 %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0
386 store volatile i32 9, i32 addrspace(5)* %arrayidx, align 4
387 %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0
388 store volatile i32 13, i32 addrspace(5)* %arrayidx2, align 4
389 call void @void_func_byval_struct(%struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg0, %struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg1)
390 call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp1)
391 call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp)
392 ret void
393 }
394
395 declare hidden void @external_void_func_void() #0 45 declare hidden void @external_void_func_void() #0
396 46
397 declare void @llvm.lifetime.start.p5i8(i64, i8 addrspace(5)* nocapture) #3 47 declare void @llvm.lifetime.start.p5i8(i64, i8 addrspace(5)* nocapture) #3
398 declare void @llvm.lifetime.end.p5i8(i64, i8 addrspace(5)* nocapture) #3 48 declare void @llvm.lifetime.end.p5i8(i64, i8 addrspace(5)* nocapture) #3
399 49