Mercurial > hg > CbC > CbC_llvm
comparison test/CodeGen/AArch64/arm64-abi_align.ll @ 77:54457678186b LLVM3.6
LLVM 3.6
author | Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 08 Sep 2014 22:06:00 +0900 |
parents | |
children | 60c9769439b8 |
comparison
equal
deleted
inserted
replaced
34:e874dbf0ad9d | 77:54457678186b |
---|---|
1 ; RUN: llc < %s -march=arm64 -mcpu=cyclone -enable-misched=false | FileCheck %s | |
2 ; RUN: llc < %s -O0 | FileCheck -check-prefix=FAST %s | |
3 target triple = "arm64-apple-darwin" | |
4 | |
5 ; rdar://12648441 | |
6 ; Generated from arm64-arguments.c with -O2. | |
7 ; Test passing structs with size < 8, < 16 and > 16 | |
8 ; with alignment of 16 and without | |
9 | |
10 ; Structs with size < 8 | |
11 %struct.s38 = type { i32, i16 } | |
12 ; With alignment of 16, the size will be padded to multiple of 16 bytes. | |
13 %struct.s39 = type { i32, i16, [10 x i8] } | |
14 ; Structs with size < 16 | |
15 %struct.s40 = type { i32, i16, i32, i16 } | |
16 %struct.s41 = type { i32, i16, i32, i16 } | |
17 ; Structs with size > 16 | |
18 %struct.s42 = type { i32, i16, i32, i16, i32, i16 } | |
19 %struct.s43 = type { i32, i16, i32, i16, i32, i16, [10 x i8] } | |
20 | |
21 @g38 = common global %struct.s38 zeroinitializer, align 4 | |
22 @g38_2 = common global %struct.s38 zeroinitializer, align 4 | |
23 @g39 = common global %struct.s39 zeroinitializer, align 16 | |
24 @g39_2 = common global %struct.s39 zeroinitializer, align 16 | |
25 @g40 = common global %struct.s40 zeroinitializer, align 4 | |
26 @g40_2 = common global %struct.s40 zeroinitializer, align 4 | |
27 @g41 = common global %struct.s41 zeroinitializer, align 16 | |
28 @g41_2 = common global %struct.s41 zeroinitializer, align 16 | |
29 @g42 = common global %struct.s42 zeroinitializer, align 4 | |
30 @g42_2 = common global %struct.s42 zeroinitializer, align 4 | |
31 @g43 = common global %struct.s43 zeroinitializer, align 16 | |
32 @g43_2 = common global %struct.s43 zeroinitializer, align 16 | |
33 | |
34 ; structs with size < 8 bytes, passed via i64 in x1 and x2 | |
35 define i32 @f38(i32 %i, i64 %s1.coerce, i64 %s2.coerce) #0 { | |
36 entry: | |
37 ; CHECK-LABEL: f38 | |
38 ; CHECK: add w[[A:[0-9]+]], w1, w0 | |
39 ; CHECK: add {{w[0-9]+}}, w[[A]], w2 | |
40 %s1.sroa.0.0.extract.trunc = trunc i64 %s1.coerce to i32 | |
41 %s1.sroa.1.4.extract.shift = lshr i64 %s1.coerce, 32 | |
42 %s2.sroa.0.0.extract.trunc = trunc i64 %s2.coerce to i32 | |
43 %s2.sroa.1.4.extract.shift = lshr i64 %s2.coerce, 32 | |
44 %sext8 = shl nuw nsw i64 %s1.sroa.1.4.extract.shift, 16 | |
45 %sext = trunc i64 %sext8 to i32 | |
46 %conv = ashr exact i32 %sext, 16 | |
47 %sext1011 = shl nuw nsw i64 %s2.sroa.1.4.extract.shift, 16 | |
48 %sext10 = trunc i64 %sext1011 to i32 | |
49 %conv6 = ashr exact i32 %sext10, 16 | |
50 %add = add i32 %s1.sroa.0.0.extract.trunc, %i | |
51 %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc | |
52 %add4 = add i32 %add3, %conv | |
53 %add7 = add i32 %add4, %conv6 | |
54 ret i32 %add7 | |
55 } | |
56 | |
57 define i32 @caller38() #1 { | |
58 entry: | |
59 ; CHECK-LABEL: caller38 | |
60 ; CHECK: ldr x1, | |
61 ; CHECK: ldr x2, | |
62 %0 = load i64* bitcast (%struct.s38* @g38 to i64*), align 4 | |
63 %1 = load i64* bitcast (%struct.s38* @g38_2 to i64*), align 4 | |
64 %call = tail call i32 @f38(i32 3, i64 %0, i64 %1) #5 | |
65 ret i32 %call | |
66 } | |
67 | |
68 declare i32 @f38_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, | |
69 i32 %i7, i32 %i8, i32 %i9, i64 %s1.coerce, i64 %s2.coerce) #0 | |
70 | |
71 ; structs with size < 8 bytes, passed on stack at [sp+8] and [sp+16] | |
72 ; i9 at [sp] | |
73 define i32 @caller38_stack() #1 { | |
74 entry: | |
75 ; CHECK-LABEL: caller38_stack | |
76 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8] | |
77 ; CHECK: movz w[[C:[0-9]+]], #0x9 | |
78 ; CHECK: str w[[C]], [sp] | |
79 %0 = load i64* bitcast (%struct.s38* @g38 to i64*), align 4 | |
80 %1 = load i64* bitcast (%struct.s38* @g38_2 to i64*), align 4 | |
81 %call = tail call i32 @f38_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, | |
82 i32 7, i32 8, i32 9, i64 %0, i64 %1) #5 | |
83 ret i32 %call | |
84 } | |
85 | |
86 ; structs with size < 8 bytes, alignment of 16 | |
87 ; passed via i128 in x1 and x3 | |
88 define i32 @f39(i32 %i, i128 %s1.coerce, i128 %s2.coerce) #0 { | |
89 entry: | |
90 ; CHECK-LABEL: f39 | |
91 ; CHECK: add w[[A:[0-9]+]], w1, w0 | |
92 ; CHECK: add {{w[0-9]+}}, w[[A]], w3 | |
93 %s1.sroa.0.0.extract.trunc = trunc i128 %s1.coerce to i32 | |
94 %s1.sroa.1.4.extract.shift = lshr i128 %s1.coerce, 32 | |
95 %s2.sroa.0.0.extract.trunc = trunc i128 %s2.coerce to i32 | |
96 %s2.sroa.1.4.extract.shift = lshr i128 %s2.coerce, 32 | |
97 %sext8 = shl nuw nsw i128 %s1.sroa.1.4.extract.shift, 16 | |
98 %sext = trunc i128 %sext8 to i32 | |
99 %conv = ashr exact i32 %sext, 16 | |
100 %sext1011 = shl nuw nsw i128 %s2.sroa.1.4.extract.shift, 16 | |
101 %sext10 = trunc i128 %sext1011 to i32 | |
102 %conv6 = ashr exact i32 %sext10, 16 | |
103 %add = add i32 %s1.sroa.0.0.extract.trunc, %i | |
104 %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc | |
105 %add4 = add i32 %add3, %conv | |
106 %add7 = add i32 %add4, %conv6 | |
107 ret i32 %add7 | |
108 } | |
109 | |
110 define i32 @caller39() #1 { | |
111 entry: | |
112 ; CHECK-LABEL: caller39 | |
113 ; CHECK: ldp x1, x2, | |
114 ; CHECK: ldp x3, x4, | |
115 %0 = load i128* bitcast (%struct.s39* @g39 to i128*), align 16 | |
116 %1 = load i128* bitcast (%struct.s39* @g39_2 to i128*), align 16 | |
117 %call = tail call i32 @f39(i32 3, i128 %0, i128 %1) #5 | |
118 ret i32 %call | |
119 } | |
120 | |
121 declare i32 @f39_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, | |
122 i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce) #0 | |
123 | |
124 ; structs with size < 8 bytes, alignment 16 | |
125 ; passed on stack at [sp+16] and [sp+32] | |
126 define i32 @caller39_stack() #1 { | |
127 entry: | |
128 ; CHECK-LABEL: caller39_stack | |
129 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #32] | |
130 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16] | |
131 ; CHECK: movz w[[C:[0-9]+]], #0x9 | |
132 ; CHECK: str w[[C]], [sp] | |
133 %0 = load i128* bitcast (%struct.s39* @g39 to i128*), align 16 | |
134 %1 = load i128* bitcast (%struct.s39* @g39_2 to i128*), align 16 | |
135 %call = tail call i32 @f39_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, | |
136 i32 7, i32 8, i32 9, i128 %0, i128 %1) #5 | |
137 ret i32 %call | |
138 } | |
139 | |
140 ; structs with size < 16 bytes | |
141 ; passed via i128 in x1 and x3 | |
142 define i32 @f40(i32 %i, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) #0 { | |
143 entry: | |
144 ; CHECK-LABEL: f40 | |
145 ; CHECK: add w[[A:[0-9]+]], w1, w0 | |
146 ; CHECK: add {{w[0-9]+}}, w[[A]], w3 | |
147 %s1.coerce.fca.0.extract = extractvalue [2 x i64] %s1.coerce, 0 | |
148 %s2.coerce.fca.0.extract = extractvalue [2 x i64] %s2.coerce, 0 | |
149 %s1.sroa.0.0.extract.trunc = trunc i64 %s1.coerce.fca.0.extract to i32 | |
150 %s2.sroa.0.0.extract.trunc = trunc i64 %s2.coerce.fca.0.extract to i32 | |
151 %s1.sroa.0.4.extract.shift = lshr i64 %s1.coerce.fca.0.extract, 32 | |
152 %sext8 = shl nuw nsw i64 %s1.sroa.0.4.extract.shift, 16 | |
153 %sext = trunc i64 %sext8 to i32 | |
154 %conv = ashr exact i32 %sext, 16 | |
155 %s2.sroa.0.4.extract.shift = lshr i64 %s2.coerce.fca.0.extract, 32 | |
156 %sext1011 = shl nuw nsw i64 %s2.sroa.0.4.extract.shift, 16 | |
157 %sext10 = trunc i64 %sext1011 to i32 | |
158 %conv6 = ashr exact i32 %sext10, 16 | |
159 %add = add i32 %s1.sroa.0.0.extract.trunc, %i | |
160 %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc | |
161 %add4 = add i32 %add3, %conv | |
162 %add7 = add i32 %add4, %conv6 | |
163 ret i32 %add7 | |
164 } | |
165 | |
166 define i32 @caller40() #1 { | |
167 entry: | |
168 ; CHECK-LABEL: caller40 | |
169 ; CHECK: ldp x1, x2, | |
170 ; CHECK: ldp x3, x4, | |
171 %0 = load [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4 | |
172 %1 = load [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4 | |
173 %call = tail call i32 @f40(i32 3, [2 x i64] %0, [2 x i64] %1) #5 | |
174 ret i32 %call | |
175 } | |
176 | |
177 declare i32 @f40_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, | |
178 i32 %i7, i32 %i8, i32 %i9, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) #0 | |
179 | |
180 ; structs with size < 16 bytes | |
181 ; passed on stack at [sp+8] and [sp+24] | |
182 define i32 @caller40_stack() #1 { | |
183 entry: | |
184 ; CHECK-LABEL: caller40_stack | |
185 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #24] | |
186 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8] | |
187 ; CHECK: movz w[[C:[0-9]+]], #0x9 | |
188 ; CHECK: str w[[C]], [sp] | |
189 %0 = load [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4 | |
190 %1 = load [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4 | |
191 %call = tail call i32 @f40_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, | |
192 i32 7, i32 8, i32 9, [2 x i64] %0, [2 x i64] %1) #5 | |
193 ret i32 %call | |
194 } | |
195 | |
196 ; structs with size < 16 bytes, alignment of 16 | |
197 ; passed via i128 in x1 and x3 | |
198 define i32 @f41(i32 %i, i128 %s1.coerce, i128 %s2.coerce) #0 { | |
199 entry: | |
200 ; CHECK-LABEL: f41 | |
201 ; CHECK: add w[[A:[0-9]+]], w1, w0 | |
202 ; CHECK: add {{w[0-9]+}}, w[[A]], w3 | |
203 %s1.sroa.0.0.extract.trunc = trunc i128 %s1.coerce to i32 | |
204 %s1.sroa.1.4.extract.shift = lshr i128 %s1.coerce, 32 | |
205 %s2.sroa.0.0.extract.trunc = trunc i128 %s2.coerce to i32 | |
206 %s2.sroa.1.4.extract.shift = lshr i128 %s2.coerce, 32 | |
207 %sext8 = shl nuw nsw i128 %s1.sroa.1.4.extract.shift, 16 | |
208 %sext = trunc i128 %sext8 to i32 | |
209 %conv = ashr exact i32 %sext, 16 | |
210 %sext1011 = shl nuw nsw i128 %s2.sroa.1.4.extract.shift, 16 | |
211 %sext10 = trunc i128 %sext1011 to i32 | |
212 %conv6 = ashr exact i32 %sext10, 16 | |
213 %add = add i32 %s1.sroa.0.0.extract.trunc, %i | |
214 %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc | |
215 %add4 = add i32 %add3, %conv | |
216 %add7 = add i32 %add4, %conv6 | |
217 ret i32 %add7 | |
218 } | |
219 | |
220 define i32 @caller41() #1 { | |
221 entry: | |
222 ; CHECK-LABEL: caller41 | |
223 ; CHECK: ldp x1, x2, | |
224 ; CHECK: ldp x3, x4, | |
225 %0 = load i128* bitcast (%struct.s41* @g41 to i128*), align 16 | |
226 %1 = load i128* bitcast (%struct.s41* @g41_2 to i128*), align 16 | |
227 %call = tail call i32 @f41(i32 3, i128 %0, i128 %1) #5 | |
228 ret i32 %call | |
229 } | |
230 | |
231 declare i32 @f41_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, | |
232 i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce) #0 | |
233 | |
234 ; structs with size < 16 bytes, alignment of 16 | |
235 ; passed on stack at [sp+16] and [sp+32] | |
236 define i32 @caller41_stack() #1 { | |
237 entry: | |
238 ; CHECK-LABEL: caller41_stack | |
239 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #32] | |
240 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16] | |
241 ; CHECK: movz w[[C:[0-9]+]], #0x9 | |
242 ; CHECK: str w[[C]], [sp] | |
243 %0 = load i128* bitcast (%struct.s41* @g41 to i128*), align 16 | |
244 %1 = load i128* bitcast (%struct.s41* @g41_2 to i128*), align 16 | |
245 %call = tail call i32 @f41_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, | |
246 i32 7, i32 8, i32 9, i128 %0, i128 %1) #5 | |
247 ret i32 %call | |
248 } | |
249 | |
250 ; structs with size of 22 bytes, passed indirectly in x1 and x2 | |
251 define i32 @f42(i32 %i, %struct.s42* nocapture %s1, %struct.s42* nocapture %s2) #2 { | |
252 entry: | |
253 ; CHECK-LABEL: f42 | |
254 ; CHECK: ldr w[[A:[0-9]+]], [x1] | |
255 ; CHECK: ldr w[[B:[0-9]+]], [x2] | |
256 ; CHECK: add w[[C:[0-9]+]], w[[A]], w0 | |
257 ; CHECK: add {{w[0-9]+}}, w[[C]], w[[B]] | |
258 ; FAST: f42 | |
259 ; FAST: ldr w[[A:[0-9]+]], [x1] | |
260 ; FAST: ldr w[[B:[0-9]+]], [x2] | |
261 ; FAST: add w[[C:[0-9]+]], w[[A]], w0 | |
262 ; FAST: add {{w[0-9]+}}, w[[C]], w[[B]] | |
263 %i1 = getelementptr inbounds %struct.s42* %s1, i64 0, i32 0 | |
264 %0 = load i32* %i1, align 4, !tbaa !0 | |
265 %i2 = getelementptr inbounds %struct.s42* %s2, i64 0, i32 0 | |
266 %1 = load i32* %i2, align 4, !tbaa !0 | |
267 %s = getelementptr inbounds %struct.s42* %s1, i64 0, i32 1 | |
268 %2 = load i16* %s, align 2, !tbaa !3 | |
269 %conv = sext i16 %2 to i32 | |
270 %s5 = getelementptr inbounds %struct.s42* %s2, i64 0, i32 1 | |
271 %3 = load i16* %s5, align 2, !tbaa !3 | |
272 %conv6 = sext i16 %3 to i32 | |
273 %add = add i32 %0, %i | |
274 %add3 = add i32 %add, %1 | |
275 %add4 = add i32 %add3, %conv | |
276 %add7 = add i32 %add4, %conv6 | |
277 ret i32 %add7 | |
278 } | |
279 | |
280 ; For s1, we allocate a 22-byte space, pass its address via x1 | |
281 define i32 @caller42() #3 { | |
282 entry: | |
283 ; CHECK-LABEL: caller42 | |
284 ; CHECK: str {{x[0-9]+}}, [sp, #48] | |
285 ; CHECK: str {{q[0-9]+}}, [sp, #32] | |
286 ; CHECK: str {{x[0-9]+}}, [sp, #16] | |
287 ; CHECK: str {{q[0-9]+}}, [sp] | |
288 ; CHECK: add x1, sp, #32 | |
289 ; CHECK: mov x2, sp | |
290 ; Space for s1 is allocated at sp+32 | |
291 ; Space for s2 is allocated at sp | |
292 | |
293 ; FAST-LABEL: caller42 | |
294 ; FAST: sub sp, sp, #96 | |
295 ; Space for s1 is allocated at fp-24 = sp+72 | |
296 ; Space for s2 is allocated at sp+48 | |
297 ; FAST: sub x[[A:[0-9]+]], x29, #24 | |
298 ; FAST: add x[[A:[0-9]+]], sp, #48 | |
299 ; Call memcpy with size = 24 (0x18) | |
300 ; FAST: orr {{x[0-9]+}}, xzr, #0x18 | |
301 %tmp = alloca %struct.s42, align 4 | |
302 %tmp1 = alloca %struct.s42, align 4 | |
303 %0 = bitcast %struct.s42* %tmp to i8* | |
304 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s42* @g42 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4 | |
305 %1 = bitcast %struct.s42* %tmp1 to i8* | |
306 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s42* @g42_2 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4 | |
307 %call = call i32 @f42(i32 3, %struct.s42* %tmp, %struct.s42* %tmp1) #5 | |
308 ret i32 %call | |
309 } | |
310 | |
311 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) #4 | |
312 | |
313 declare i32 @f42_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, | |
314 i32 %i7, i32 %i8, i32 %i9, %struct.s42* nocapture %s1, | |
315 %struct.s42* nocapture %s2) #2 | |
316 | |
317 define i32 @caller42_stack() #3 { | |
318 entry: | |
319 ; CHECK-LABEL: caller42_stack | |
320 ; CHECK: mov x29, sp | |
321 ; CHECK: sub sp, sp, #96 | |
322 ; CHECK: stur {{x[0-9]+}}, [x29, #-16] | |
323 ; CHECK: stur {{q[0-9]+}}, [x29, #-32] | |
324 ; CHECK: str {{x[0-9]+}}, [sp, #48] | |
325 ; CHECK: str {{q[0-9]+}}, [sp, #32] | |
326 ; Space for s1 is allocated at x29-32 = sp+64 | |
327 ; Space for s2 is allocated at sp+32 | |
328 ; CHECK: add x[[B:[0-9]+]], sp, #32 | |
329 ; CHECK: str x[[B]], [sp, #16] | |
330 ; CHECK: sub x[[A:[0-9]+]], x29, #32 | |
331 ; Address of s1 is passed on stack at sp+8 | |
332 ; CHECK: str x[[A]], [sp, #8] | |
333 ; CHECK: movz w[[C:[0-9]+]], #0x9 | |
334 ; CHECK: str w[[C]], [sp] | |
335 | |
336 ; FAST-LABEL: caller42_stack | |
337 ; Space for s1 is allocated at fp-24 | |
338 ; Space for s2 is allocated at fp-48 | |
339 ; FAST: sub x[[A:[0-9]+]], x29, #24 | |
340 ; FAST: sub x[[B:[0-9]+]], x29, #48 | |
341 ; Call memcpy with size = 24 (0x18) | |
342 ; FAST: orr {{x[0-9]+}}, xzr, #0x18 | |
343 ; FAST: str {{w[0-9]+}}, [sp] | |
344 ; Address of s1 is passed on stack at sp+8 | |
345 ; FAST: str {{x[0-9]+}}, [sp, #8] | |
346 ; FAST: str {{x[0-9]+}}, [sp, #16] | |
347 %tmp = alloca %struct.s42, align 4 | |
348 %tmp1 = alloca %struct.s42, align 4 | |
349 %0 = bitcast %struct.s42* %tmp to i8* | |
350 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s42* @g42 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4 | |
351 %1 = bitcast %struct.s42* %tmp1 to i8* | |
352 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s42* @g42_2 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4 | |
353 %call = call i32 @f42_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, | |
354 i32 8, i32 9, %struct.s42* %tmp, %struct.s42* %tmp1) #5 | |
355 ret i32 %call | |
356 } | |
357 | |
358 ; structs with size of 22 bytes, alignment of 16 | |
359 ; passed indirectly in x1 and x2 | |
360 define i32 @f43(i32 %i, %struct.s43* nocapture %s1, %struct.s43* nocapture %s2) #2 { | |
361 entry: | |
362 ; CHECK-LABEL: f43 | |
363 ; CHECK: ldr w[[A:[0-9]+]], [x1] | |
364 ; CHECK: ldr w[[B:[0-9]+]], [x2] | |
365 ; CHECK: add w[[C:[0-9]+]], w[[A]], w0 | |
366 ; CHECK: add {{w[0-9]+}}, w[[C]], w[[B]] | |
367 ; FAST-LABEL: f43 | |
368 ; FAST: ldr w[[A:[0-9]+]], [x1] | |
369 ; FAST: ldr w[[B:[0-9]+]], [x2] | |
370 ; FAST: add w[[C:[0-9]+]], w[[A]], w0 | |
371 ; FAST: add {{w[0-9]+}}, w[[C]], w[[B]] | |
372 %i1 = getelementptr inbounds %struct.s43* %s1, i64 0, i32 0 | |
373 %0 = load i32* %i1, align 4, !tbaa !0 | |
374 %i2 = getelementptr inbounds %struct.s43* %s2, i64 0, i32 0 | |
375 %1 = load i32* %i2, align 4, !tbaa !0 | |
376 %s = getelementptr inbounds %struct.s43* %s1, i64 0, i32 1 | |
377 %2 = load i16* %s, align 2, !tbaa !3 | |
378 %conv = sext i16 %2 to i32 | |
379 %s5 = getelementptr inbounds %struct.s43* %s2, i64 0, i32 1 | |
380 %3 = load i16* %s5, align 2, !tbaa !3 | |
381 %conv6 = sext i16 %3 to i32 | |
382 %add = add i32 %0, %i | |
383 %add3 = add i32 %add, %1 | |
384 %add4 = add i32 %add3, %conv | |
385 %add7 = add i32 %add4, %conv6 | |
386 ret i32 %add7 | |
387 } | |
388 | |
389 define i32 @caller43() #3 { | |
390 entry: | |
391 ; CHECK-LABEL: caller43 | |
392 ; CHECK: str {{q[0-9]+}}, [sp, #48] | |
393 ; CHECK: str {{q[0-9]+}}, [sp, #32] | |
394 ; CHECK: str {{q[0-9]+}}, [sp, #16] | |
395 ; CHECK: str {{q[0-9]+}}, [sp] | |
396 ; CHECK: add x1, sp, #32 | |
397 ; CHECK: mov x2, sp | |
398 ; Space for s1 is allocated at sp+32 | |
399 ; Space for s2 is allocated at sp | |
400 | |
401 ; FAST-LABEL: caller43 | |
402 ; FAST: mov x29, sp | |
403 ; Space for s1 is allocated at sp+32 | |
404 ; Space for s2 is allocated at sp | |
405 ; FAST: add x1, sp, #32 | |
406 ; FAST: mov x2, sp | |
407 ; FAST: str {{x[0-9]+}}, [sp, #32] | |
408 ; FAST: str {{x[0-9]+}}, [sp, #40] | |
409 ; FAST: str {{x[0-9]+}}, [sp, #48] | |
410 ; FAST: str {{x[0-9]+}}, [sp, #56] | |
411 ; FAST: str {{x[0-9]+}}, [sp] | |
412 ; FAST: str {{x[0-9]+}}, [sp, #8] | |
413 ; FAST: str {{x[0-9]+}}, [sp, #16] | |
414 ; FAST: str {{x[0-9]+}}, [sp, #24] | |
415 %tmp = alloca %struct.s43, align 16 | |
416 %tmp1 = alloca %struct.s43, align 16 | |
417 %0 = bitcast %struct.s43* %tmp to i8* | |
418 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s43* @g43 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4 | |
419 %1 = bitcast %struct.s43* %tmp1 to i8* | |
420 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s43* @g43_2 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4 | |
421 %call = call i32 @f43(i32 3, %struct.s43* %tmp, %struct.s43* %tmp1) #5 | |
422 ret i32 %call | |
423 } | |
424 | |
425 declare i32 @f43_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, | |
426 i32 %i7, i32 %i8, i32 %i9, %struct.s43* nocapture %s1, | |
427 %struct.s43* nocapture %s2) #2 | |
428 | |
429 define i32 @caller43_stack() #3 { | |
430 entry: | |
431 ; CHECK-LABEL: caller43_stack | |
432 ; CHECK: mov x29, sp | |
433 ; CHECK: sub sp, sp, #96 | |
434 ; CHECK: stur {{q[0-9]+}}, [x29, #-16] | |
435 ; CHECK: stur {{q[0-9]+}}, [x29, #-32] | |
436 ; CHECK: str {{q[0-9]+}}, [sp, #48] | |
437 ; CHECK: str {{q[0-9]+}}, [sp, #32] | |
438 ; Space for s1 is allocated at x29-32 = sp+64 | |
439 ; Space for s2 is allocated at sp+32 | |
440 ; CHECK: add x[[B:[0-9]+]], sp, #32 | |
441 ; CHECK: str x[[B]], [sp, #16] | |
442 ; CHECK: sub x[[A:[0-9]+]], x29, #32 | |
443 ; Address of s1 is passed on stack at sp+8 | |
444 ; CHECK: str x[[A]], [sp, #8] | |
445 ; CHECK: movz w[[C:[0-9]+]], #0x9 | |
446 ; CHECK: str w[[C]], [sp] | |
447 | |
448 ; FAST-LABEL: caller43_stack | |
449 ; FAST: sub sp, sp, #96 | |
450 ; Space for s1 is allocated at fp-32 = sp+64 | |
451 ; Space for s2 is allocated at sp+32 | |
452 ; FAST: sub x[[A:[0-9]+]], x29, #32 | |
453 ; FAST: add x[[B:[0-9]+]], sp, #32 | |
454 ; FAST: stur {{x[0-9]+}}, [x29, #-32] | |
455 ; FAST: stur {{x[0-9]+}}, [x29, #-24] | |
456 ; FAST: stur {{x[0-9]+}}, [x29, #-16] | |
457 ; FAST: stur {{x[0-9]+}}, [x29, #-8] | |
458 ; FAST: str {{x[0-9]+}}, [sp, #32] | |
459 ; FAST: str {{x[0-9]+}}, [sp, #40] | |
460 ; FAST: str {{x[0-9]+}}, [sp, #48] | |
461 ; FAST: str {{x[0-9]+}}, [sp, #56] | |
462 ; FAST: str {{w[0-9]+}}, [sp] | |
463 ; Address of s1 is passed on stack at sp+8 | |
464 ; FAST: str {{x[0-9]+}}, [sp, #8] | |
465 ; FAST: str {{x[0-9]+}}, [sp, #16] | |
466 %tmp = alloca %struct.s43, align 16 | |
467 %tmp1 = alloca %struct.s43, align 16 | |
468 %0 = bitcast %struct.s43* %tmp to i8* | |
469 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s43* @g43 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4 | |
470 %1 = bitcast %struct.s43* %tmp1 to i8* | |
471 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s43* @g43_2 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4 | |
472 %call = call i32 @f43_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, | |
473 i32 8, i32 9, %struct.s43* %tmp, %struct.s43* %tmp1) #5 | |
474 ret i32 %call | |
475 } | |
476 | |
477 ; rdar://13668927 | |
478 ; Check that we don't split an i128. | |
479 declare i32 @callee_i128_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, | |
480 i32 %i6, i32 %i7, i128 %s1, i32 %i8) | |
481 | |
482 define i32 @i128_split() { | |
483 entry: | |
484 ; CHECK-LABEL: i128_split | |
485 ; "i128 %0" should be on stack at [sp]. | |
486 ; "i32 8" should be on stack at [sp, #16]. | |
487 ; CHECK: str {{w[0-9]+}}, [sp, #16] | |
488 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp] | |
489 ; FAST-LABEL: i128_split | |
490 ; FAST: sub sp, sp | |
491 ; FAST: mov x[[ADDR:[0-9]+]], sp | |
492 ; FAST: str {{w[0-9]+}}, [x[[ADDR]], #16] | |
493 ; Load/Store opt is disabled with -O0, so the i128 is split. | |
494 ; FAST: str {{x[0-9]+}}, [x[[ADDR]], #8] | |
495 ; FAST: str {{x[0-9]+}}, [x[[ADDR]]] | |
496 %0 = load i128* bitcast (%struct.s41* @g41 to i128*), align 16 | |
497 %call = tail call i32 @callee_i128_split(i32 1, i32 2, i32 3, i32 4, i32 5, | |
498 i32 6, i32 7, i128 %0, i32 8) #5 | |
499 ret i32 %call | |
500 } | |
501 | |
502 declare i32 @callee_i64(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, | |
503 i32 %i6, i32 %i7, i64 %s1, i32 %i8) | |
504 | |
505 define i32 @i64_split() { | |
506 entry: | |
507 ; CHECK-LABEL: i64_split | |
508 ; "i64 %0" should be in register x7. | |
509 ; "i32 8" should be on stack at [sp]. | |
510 ; CHECK: ldr x7, [{{x[0-9]+}}] | |
511 ; CHECK: str {{w[0-9]+}}, [sp] | |
512 ; FAST-LABEL: i64_split | |
513 ; FAST: ldr x7, [{{x[0-9]+}}] | |
514 ; FAST: mov x[[R0:[0-9]+]], sp | |
515 ; FAST: orr w[[R1:[0-9]+]], wzr, #0x8 | |
516 ; FAST: str w[[R1]], {{\[}}x[[R0]]{{\]}} | |
517 %0 = load i64* bitcast (%struct.s41* @g41 to i64*), align 16 | |
518 %call = tail call i32 @callee_i64(i32 1, i32 2, i32 3, i32 4, i32 5, | |
519 i32 6, i32 7, i64 %0, i32 8) #5 | |
520 ret i32 %call | |
521 } | |
522 | |
523 attributes #0 = { noinline nounwind readnone "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" } | |
524 attributes #1 = { nounwind readonly "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" } | |
525 attributes #2 = { noinline nounwind readonly "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" } | |
526 attributes #3 = { nounwind "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" } | |
527 attributes #4 = { nounwind } | |
528 attributes #5 = { nobuiltin } | |
529 | |
530 !0 = metadata !{metadata !"int", metadata !1} | |
531 !1 = metadata !{metadata !"omnipotent char", metadata !2} | |
532 !2 = metadata !{metadata !"Simple C/C++ TBAA"} | |
533 !3 = metadata !{metadata !"short", metadata !1} | |
534 !4 = metadata !{i64 0, i64 4, metadata !0, i64 4, i64 2, metadata !3, i64 8, i64 4, metadata !0, i64 12, i64 2, metadata !3, i64 16, i64 4, metadata !0, i64 20, i64 2, metadata !3} |