83
|
1 ; RUN: llc -mtriple=aarch64-apple-ios7.0 -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-DARWINPCS
|
|
2 ; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AAPCS
|
|
3
|
|
4 declare void @callee(...)
|
|
5
|
|
6 define float @test_hfa_regs(float, [2 x float] %in) {
|
|
7 ; CHECK-LABEL: test_hfa_regs:
|
|
8 ; CHECK: fadd s0, s1, s2
|
|
9
|
|
10 %lhs = extractvalue [2 x float] %in, 0
|
|
11 %rhs = extractvalue [2 x float] %in, 1
|
|
12 %sum = fadd float %lhs, %rhs
|
|
13 ret float %sum
|
|
14 }
|
|
15
|
|
16 ; Check that the array gets allocated to a contiguous block on the stack (rather
|
|
17 ; than the default of 2 8-byte slots).
|
|
18 define float @test_hfa_block([7 x float], [2 x float] %in) {
|
|
19 ; CHECK-LABEL: test_hfa_block:
|
|
20 ; CHECK: ldp [[LHS:s[0-9]+]], [[RHS:s[0-9]+]], [sp]
|
|
21 ; CHECK: fadd s0, [[LHS]], [[RHS]]
|
|
22
|
|
23 %lhs = extractvalue [2 x float] %in, 0
|
|
24 %rhs = extractvalue [2 x float] %in, 1
|
|
25 %sum = fadd float %lhs, %rhs
|
|
26 ret float %sum
|
|
27 }
|
|
28
|
|
29 ; Check that an HFA prevents backfilling of VFP registers (i.e. %rhs must go on
|
|
30 ; the stack rather than in s7).
|
|
31 define float @test_hfa_block_consume([7 x float], [2 x float] %in, float %rhs) {
|
|
32 ; CHECK-LABEL: test_hfa_block_consume:
|
|
33 ; CHECK-DAG: ldr [[LHS:s[0-9]+]], [sp]
|
|
34 ; CHECK-DAG: ldr [[RHS:s[0-9]+]], [sp, #8]
|
|
35 ; CHECK: fadd s0, [[LHS]], [[RHS]]
|
|
36
|
|
37 %lhs = extractvalue [2 x float] %in, 0
|
|
38 %sum = fadd float %lhs, %rhs
|
|
39 ret float %sum
|
|
40 }
|
|
41
|
|
42 define float @test_hfa_stackalign([8 x float], [1 x float], [2 x float] %in) {
|
|
43 ; CHECK-LABEL: test_hfa_stackalign:
|
|
44 ; CHECK-AAPCS: ldp [[LHS:s[0-9]+]], [[RHS:s[0-9]+]], [sp, #8]
|
|
45 ; CHECK-DARWINPCS: ldp [[LHS:s[0-9]+]], [[RHS:s[0-9]+]], [sp, #4]
|
|
46 ; CHECK: fadd s0, [[LHS]], [[RHS]]
|
|
47 %lhs = extractvalue [2 x float] %in, 0
|
|
48 %rhs = extractvalue [2 x float] %in, 1
|
|
49 %sum = fadd float %lhs, %rhs
|
|
50 ret float %sum
|
|
51 }
|
|
52
|
|
53 ; An HFA that ends up on the stack should not have any effect on where
|
|
54 ; integer-based arguments go.
|
|
55 define i64 @test_hfa_ignores_gprs([7 x float], [2 x float] %in, i64, i64 %res) {
|
|
56 ; CHECK-LABEL: test_hfa_ignores_gprs:
|
|
57 ; CHECK: mov x0, x1
|
|
58 ret i64 %res
|
|
59 }
|
|
60
|
|
61 ; [2 x float] should not be promoted to double by the Darwin varargs handling,
|
|
62 ; but should go in an 8-byte aligned slot.
|
|
63 define void @test_varargs_stackalign() {
|
|
64 ; CHECK-LABEL: test_varargs_stackalign:
|
|
65 ; CHECK-DARWINPCS: stp {{w[0-9]+}}, {{w[0-9]+}}, [sp, #16]
|
|
66
|
95
|
67 call void(...) @callee([3 x float] undef, [2 x float] [float 1.0, float 2.0])
|
83
|
68 ret void
|
|
69 }
|
|
70
|
|
71 define i64 @test_smallstruct_block([7 x i64], [2 x i64] %in) {
|
|
72 ; CHECK-LABEL: test_smallstruct_block:
|
|
73 ; CHECK: ldp [[LHS:x[0-9]+]], [[RHS:x[0-9]+]], [sp]
|
|
74 ; CHECK: add x0, [[LHS]], [[RHS]]
|
|
75 %lhs = extractvalue [2 x i64] %in, 0
|
|
76 %rhs = extractvalue [2 x i64] %in, 1
|
|
77 %sum = add i64 %lhs, %rhs
|
|
78 ret i64 %sum
|
|
79 }
|
|
80
|
|
81 ; Check that a small struct prevents backfilling of registers (i.e. %rhs
|
|
82 ; must go on the stack rather than in x7).
|
|
83 define i64 @test_smallstruct_block_consume([7 x i64], [2 x i64] %in, i64 %rhs) {
|
|
84 ; CHECK-LABEL: test_smallstruct_block_consume:
|
|
85 ; CHECK-DAG: ldr [[LHS:x[0-9]+]], [sp]
|
|
86 ; CHECK-DAG: ldr [[RHS:x[0-9]+]], [sp, #16]
|
|
87 ; CHECK: add x0, [[LHS]], [[RHS]]
|
|
88
|
|
89 %lhs = extractvalue [2 x i64] %in, 0
|
|
90 %sum = add i64 %lhs, %rhs
|
|
91 ret i64 %sum
|
|
92 }
|
|
93
|
|
94 define <1 x i64> @test_v1i64_blocked([7 x double], [2 x <1 x i64>] %in) {
|
|
95 ; CHECK-LABEL: test_v1i64_blocked:
|
|
96 ; CHECK: ldr d0, [sp]
|
|
97 %val = extractvalue [2 x <1 x i64>] %in, 0
|
|
98 ret <1 x i64> %val
|
|
99 }
|
|
100
|
|
101 define <1 x double> @test_v1f64_blocked([7 x double], [2 x <1 x double>] %in) {
|
|
102 ; CHECK-LABEL: test_v1f64_blocked:
|
|
103 ; CHECK: ldr d0, [sp]
|
|
104 %val = extractvalue [2 x <1 x double>] %in, 0
|
|
105 ret <1 x double> %val
|
|
106 }
|
|
107
|
|
108 define <2 x i32> @test_v2i32_blocked([7 x double], [2 x <2 x i32>] %in) {
|
|
109 ; CHECK-LABEL: test_v2i32_blocked:
|
|
110 ; CHECK: ldr d0, [sp]
|
|
111 %val = extractvalue [2 x <2 x i32>] %in, 0
|
|
112 ret <2 x i32> %val
|
|
113 }
|
|
114
|
|
115 define <2 x float> @test_v2f32_blocked([7 x double], [2 x <2 x float>] %in) {
|
|
116 ; CHECK-LABEL: test_v2f32_blocked:
|
|
117 ; CHECK: ldr d0, [sp]
|
|
118 %val = extractvalue [2 x <2 x float>] %in, 0
|
|
119 ret <2 x float> %val
|
|
120 }
|
|
121
|
|
122 define <4 x i16> @test_v4i16_blocked([7 x double], [2 x <4 x i16>] %in) {
|
|
123 ; CHECK-LABEL: test_v4i16_blocked:
|
|
124 ; CHECK: ldr d0, [sp]
|
|
125 %val = extractvalue [2 x <4 x i16>] %in, 0
|
|
126 ret <4 x i16> %val
|
|
127 }
|
|
128
|
|
129 define <4 x half> @test_v4f16_blocked([7 x double], [2 x <4 x half>] %in) {
|
|
130 ; CHECK-LABEL: test_v4f16_blocked:
|
|
131 ; CHECK: ldr d0, [sp]
|
|
132 %val = extractvalue [2 x <4 x half>] %in, 0
|
|
133 ret <4 x half> %val
|
|
134 }
|
|
135
|
|
136 define <8 x i8> @test_v8i8_blocked([7 x double], [2 x <8 x i8>] %in) {
|
|
137 ; CHECK-LABEL: test_v8i8_blocked:
|
|
138 ; CHECK: ldr d0, [sp]
|
|
139 %val = extractvalue [2 x <8 x i8>] %in, 0
|
|
140 ret <8 x i8> %val
|
|
141 }
|
|
142
|
|
143 define <2 x i64> @test_v2i64_blocked([7 x double], [2 x <2 x i64>] %in) {
|
|
144 ; CHECK-LABEL: test_v2i64_blocked:
|
|
145 ; CHECK: ldr q0, [sp]
|
|
146 %val = extractvalue [2 x <2 x i64>] %in, 0
|
|
147 ret <2 x i64> %val
|
|
148 }
|
|
149
|
|
150 define <2 x double> @test_v2f64_blocked([7 x double], [2 x <2 x double>] %in) {
|
|
151 ; CHECK-LABEL: test_v2f64_blocked:
|
|
152 ; CHECK: ldr q0, [sp]
|
|
153 %val = extractvalue [2 x <2 x double>] %in, 0
|
|
154 ret <2 x double> %val
|
|
155 }
|
|
156
|
|
157 define <4 x i32> @test_v4i32_blocked([7 x double], [2 x <4 x i32>] %in) {
|
|
158 ; CHECK-LABEL: test_v4i32_blocked:
|
|
159 ; CHECK: ldr q0, [sp]
|
|
160 %val = extractvalue [2 x <4 x i32>] %in, 0
|
|
161 ret <4 x i32> %val
|
|
162 }
|
|
163
|
|
164 define <4 x float> @test_v4f32_blocked([7 x double], [2 x <4 x float>] %in) {
|
|
165 ; CHECK-LABEL: test_v4f32_blocked:
|
|
166 ; CHECK: ldr q0, [sp]
|
|
167 %val = extractvalue [2 x <4 x float>] %in, 0
|
|
168 ret <4 x float> %val
|
|
169 }
|
|
170
|
|
171 define <8 x i16> @test_v8i16_blocked([7 x double], [2 x <8 x i16>] %in) {
|
|
172 ; CHECK-LABEL: test_v8i16_blocked:
|
|
173 ; CHECK: ldr q0, [sp]
|
|
174 %val = extractvalue [2 x <8 x i16>] %in, 0
|
|
175 ret <8 x i16> %val
|
|
176 }
|
|
177
|
|
178 define <8 x half> @test_v8f16_blocked([7 x double], [2 x <8 x half>] %in) {
|
|
179 ; CHECK-LABEL: test_v8f16_blocked:
|
|
180 ; CHECK: ldr q0, [sp]
|
|
181 %val = extractvalue [2 x <8 x half>] %in, 0
|
|
182 ret <8 x half> %val
|
|
183 }
|
|
184
|
|
185 define <16 x i8> @test_v16i8_blocked([7 x double], [2 x <16 x i8>] %in) {
|
|
186 ; CHECK-LABEL: test_v16i8_blocked:
|
|
187 ; CHECK: ldr q0, [sp]
|
|
188 %val = extractvalue [2 x <16 x i8>] %in, 0
|
|
189 ret <16 x i8> %val
|
|
190 }
|
|
191
|
|
192 define half @test_f16_blocked([7 x double], [2 x half] %in) {
|
|
193 ; CHECK-LABEL: test_f16_blocked:
|
|
194 ; CHECK: ldr h0, [sp]
|
|
195 %val = extractvalue [2 x half] %in, 0
|
|
196 ret half %val
|
|
197 }
|