150
|
1 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
|
|
2 // RUN: -S -disable-O0-optnone -emit-llvm -o - %s | opt -S -mem2reg | \
|
|
3 // RUN: FileCheck -check-prefixes=CHECK,CHECK-A64 %s
|
|
4 // RUN: %clang_cc1 -triple armv8-none-linux-gnueabi -target-feature +neon \
|
|
5 // RUN: -target-feature +fp16 -S -disable-O0-optnone -emit-llvm -o - %s | \
|
|
6 // RUN: opt -S -mem2reg | FileCheck -check-prefixes=CHECK,CHECK-A32 %s
|
|
7
|
|
8 #include <arm_neon.h>
|
|
9
|
|
10 // CHECK-LABEL: @test_vld1_f16_x2(
|
|
11 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8
|
207
|
12 // CHECK-A32: %struct.float16x4x2_t* noalias sret(%struct.float16x4x2_t) align 8 [[RETVAL:%.*]],
|
150
|
13 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
|
|
14 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
|
|
15 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
|
|
16 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to [[HALF:(half|i16)]]*
|
|
17 // CHECK: [[VLD1XN:%.*]] = call { <4 x [[HALF]]>, <4 x [[HALF]]> } @llvm.{{aarch64.neon.ld1x2.v4f16.p0f16|arm.neon.vld1x2.v4i16.p0i16}}([[HALF]]* [[TMP2]])
|
|
18 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x [[HALF]]>, <4 x [[HALF]]> }*
|
|
19 // CHECK: store { <4 x [[HALF]]>, <4 x [[HALF]]> } [[VLD1XN]], { <4 x [[HALF]]>, <4 x [[HALF]]> }* [[TMP3]]
|
|
20 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8*
|
|
21 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
|
|
22 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 16, i1 false)
|
|
23 // CHECK-A64: [[TMP6:%.*]] = load %struct.float16x4x2_t, %struct.float16x4x2_t* [[RETVAL]], align 8
|
|
24 // CHECK-A64: ret %struct.float16x4x2_t [[TMP6]]
|
|
25 // CHECK-A32: ret void
|
|
26 float16x4x2_t test_vld1_f16_x2(float16_t const *a) {
|
|
27 return vld1_f16_x2(a);
|
|
28 }
|
|
29
|
|
30 // CHECK-LABEL: @test_vld1_f16_x3(
|
|
31 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float16x4x3_t, align 8
|
207
|
32 // CHECK-A32: %struct.float16x4x3_t* noalias sret(%struct.float16x4x3_t) align 8 [[RETVAL:%.*]],
|
150
|
33 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
|
|
34 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
|
|
35 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
|
|
36 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to [[HALF]]*
|
|
37 // CHECK: [[VLD1XN:%.*]] = call { <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]> } @llvm.{{aarch64.neon.ld1x3.v4f16.p0f16|arm.neon.vld1x3.v4i16.p0i16}}([[HALF]]* [[TMP2]])
|
|
38 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]> }*
|
|
39 // CHECK: store { <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]> } [[VLD1XN]], { <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]> }* [[TMP3]]
|
|
40 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x3_t* [[RETVAL]] to i8*
|
|
41 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
|
|
42 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 24, i1 false)
|
|
43 // CHECK-A64: [[TMP6:%.*]] = load %struct.float16x4x3_t, %struct.float16x4x3_t* [[RETVAL]], align 8
|
|
44 // CHECK-A64: ret %struct.float16x4x3_t [[TMP6]]
|
|
45 // CHECK-A32: ret void
|
|
46 float16x4x3_t test_vld1_f16_x3(float16_t const *a) {
|
|
47 return vld1_f16_x3(a);
|
|
48 }
|
|
49
|
|
50 // CHECK-LABEL: @test_vld1_f16_x4(
|
|
51 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float16x4x4_t, align 8
|
207
|
52 // CHECK-A32: %struct.float16x4x4_t* noalias sret(%struct.float16x4x4_t) align 8 [[RETVAL:%.*]],
|
150
|
53 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
|
|
54 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
|
|
55 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
|
|
56 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to [[HALF]]*
|
|
57 // CHECK: [[VLD1XN:%.*]] = call { <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]> } @llvm.{{aarch64.neon.ld1x4.v4f16.p0f16|arm.neon.vld1x4.v4i16.p0i16}}([[HALF]]* [[TMP2]])
|
|
58 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]> }*
|
|
59 // CHECK: store { <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]> } [[VLD1XN]], { <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]> }* [[TMP3]]
|
|
60 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x4_t* [[RETVAL]] to i8*
|
|
61 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
|
|
62 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 32, i1 false)
|
|
63 // CHECK-A64: [[TMP6:%.*]] = load %struct.float16x4x4_t, %struct.float16x4x4_t* [[RETVAL]], align 8
|
|
64 // CHECK-A64: ret %struct.float16x4x4_t [[TMP6]]
|
|
65 // CHECK-A32: ret void
|
|
66 float16x4x4_t test_vld1_f16_x4(float16_t const *a) {
|
|
67 return vld1_f16_x4(a);
|
|
68 }
|
|
69
|
|
70 // CHECK-LABEL: @test_vld1_f32_x2(
|
|
71 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
|
207
|
72 // CHECK-A32: %struct.float32x2x2_t* noalias sret(%struct.float32x2x2_t) align 8 [[RETVAL:%.*]],
|
150
|
73 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8
|
|
74 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
|
|
75 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
|
|
76 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float*
|
|
77 // CHECK: [[VLD1XN:%.*]] = call { <2 x float>, <2 x float> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v2f32.p0f32(float* [[TMP2]])
|
|
78 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float> }*
|
|
79 // CHECK: store { <2 x float>, <2 x float> } [[VLD1XN]], { <2 x float>, <2 x float> }* [[TMP3]]
|
|
80 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL]] to i8*
|
|
81 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
|
|
82 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 16, i1 false)
|
|
83 // CHECK-A64: [[TMP6:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8
|
|
84 // CHECK-A64: ret %struct.float32x2x2_t [[TMP6]]
|
|
85 // CHECK-A32: ret void
|
|
86 float32x2x2_t test_vld1_f32_x2(float32_t const *a) {
|
|
87 return vld1_f32_x2(a);
|
|
88 }
|
|
89
|
|
90 // CHECK-LABEL: @test_vld1_f32_x3(
|
|
91 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float32x2x3_t, align 8
|
207
|
92 // CHECK-A32: %struct.float32x2x3_t* noalias sret(%struct.float32x2x3_t) align 8 [[RETVAL:%.*]],
|
150
|
93 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8
|
|
94 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
|
|
95 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
|
|
96 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float*
|
|
97 // CHECK: [[VLD1XN:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v2f32.p0f32(float* [[TMP2]])
|
|
98 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float> }*
|
|
99 // CHECK: store { <2 x float>, <2 x float>, <2 x float> } [[VLD1XN]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP3]]
|
|
100 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x3_t* [[RETVAL]] to i8*
|
|
101 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
|
|
102 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 24, i1 false)
|
|
103 // CHECK-A64: [[TMP6:%.*]] = load %struct.float32x2x3_t, %struct.float32x2x3_t* [[RETVAL]], align 8
|
|
104 // CHECK-A64: ret %struct.float32x2x3_t [[TMP6]]
|
|
105 float32x2x3_t test_vld1_f32_x3(float32_t const *a) {
|
|
106 return vld1_f32_x3(a);
|
|
107 }
|
|
108
|
|
109 // CHECK-LABEL: @test_vld1_f32_x4(
|
|
110 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float32x2x4_t, align 8
|
207
|
111 // CHECK-A32: %struct.float32x2x4_t* noalias sret(%struct.float32x2x4_t) align 8 [[RETVAL:%.*]],
|
150
|
112 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8
|
|
113 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
|
|
114 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
|
|
115 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float*
|
|
116 // CHECK: [[VLD1XN:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v2f32.p0f32(float* [[TMP2]])
|
|
117 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float>, <2 x float> }*
|
|
118 // CHECK: store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD1XN]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP3]]
|
|
119 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x4_t* [[RETVAL]] to i8*
|
|
120 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
|
|
121 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 32, i1 false)
|
|
122 // CHECK-A64: [[TMP6:%.*]] = load %struct.float32x2x4_t, %struct.float32x2x4_t* [[RETVAL]], align 8
|
|
123 // CHECK-A64: ret %struct.float32x2x4_t [[TMP6]]
|
|
124 // CHECK-A32: ret void
|
|
125 float32x2x4_t test_vld1_f32_x4(float32_t const *a) {
|
|
126 return vld1_f32_x4(a);
|
|
127 }
|
|
128
|
|
129 // CHECK-LABEL: @test_vld1_p16_x2(
|
|
130 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
|
207
|
131 // CHECK-A32: %struct.poly16x4x2_t* noalias sret(%struct.poly16x4x2_t) align 8 [[RETVAL:%.*]],
|
150
|
132 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8
|
|
133 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
|
|
134 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
|
|
135 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
|
|
136 // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v4i16.p0i16(i16* [[TMP2]])
|
|
137 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
|
|
138 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16> }* [[TMP3]]
|
|
139 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL]] to i8*
|
|
140 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
|
|
141 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 16, i1 false)
|
|
142 // CHECK-A64: [[TMP6:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8
|
|
143 // CHECK-A64: ret %struct.poly16x4x2_t [[TMP6]]
|
|
144 // CHECK-A32: ret void
|
|
145 poly16x4x2_t test_vld1_p16_x2(poly16_t const *a) {
|
|
146 return vld1_p16_x2(a);
|
|
147 }
|
|
148
|
|
149 // CHECK-LABEL: @test_vld1_p16_x3(
|
|
150 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly16x4x3_t, align 8
|
207
|
151 // CHECK-A32: %struct.poly16x4x3_t* noalias sret(%struct.poly16x4x3_t) align 8 [[RETVAL:%.*]],
|
150
|
152 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8
|
|
153 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
|
|
154 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
|
|
155 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
|
|
156 // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v4i16.p0i16(i16* [[TMP2]])
|
|
157 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
|
|
158 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
|
|
159 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x3_t* [[RETVAL]] to i8*
|
|
160 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
|
|
161 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 24, i1 false)
|
|
162 // CHECK-A64: [[TMP6:%.*]] = load %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[RETVAL]], align 8
|
|
163 // CHECK-A64: ret %struct.poly16x4x3_t [[TMP6]]
|
|
164 // CHECK-A32: ret void
|
|
165 poly16x4x3_t test_vld1_p16_x3(poly16_t const *a) {
|
|
166 return vld1_p16_x3(a);
|
|
167 }
|
|
168
|
|
169 // CHECK-LABEL: @test_vld1_p16_x4(
|
|
170 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly16x4x4_t, align 8
|
207
|
171 // CHECK-A32: %struct.poly16x4x4_t* noalias sret(%struct.poly16x4x4_t) align 8 [[RETVAL:%.*]],
|
150
|
172 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8
|
|
173 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
|
|
174 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
|
|
175 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
|
|
176 // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v4i16.p0i16(i16* [[TMP2]])
|
|
177 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
|
|
178 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
|
|
179 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x4_t* [[RETVAL]] to i8*
|
|
180 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
|
|
181 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 32, i1 false)
|
|
182 // CHECK-A64: [[TMP6:%.*]] = load %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[RETVAL]], align 8
|
|
183 // CHECK-A64: ret %struct.poly16x4x4_t [[TMP6]]
|
|
184 // CHECK-A32: ret void
|
|
185 poly16x4x4_t test_vld1_p16_x4(poly16_t const *a) {
|
|
186 return vld1_p16_x4(a);
|
|
187 }
|
|
188
|
|
189 // CHECK-LABEL: @test_vld1_p8_x2(
|
|
190 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
|
207
|
191 // CHECK-A32: %struct.poly8x8x2_t* noalias sret(%struct.poly8x8x2_t) align 8 [[RETVAL:%.*]],
|
150
|
192 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8
|
|
193 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
|
|
194 // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v8i8.p0i8(i8* %a)
|
|
195 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
|
|
196 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8> }* [[TMP1]]
|
|
197 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL]] to i8*
|
|
198 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
|
|
199 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], {{i64|i32}} 16, i1 false)
|
|
200 // CHECK-A64: [[TMP4:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8
|
|
201 // CHECK-A64: ret %struct.poly8x8x2_t [[TMP4]]
|
|
202 // CHECK-A32: ret void
|
|
203 poly8x8x2_t test_vld1_p8_x2(poly8_t const *a) {
|
|
204 return vld1_p8_x2(a);
|
|
205 }
|
|
206
|
|
207 // CHECK-LABEL: @test_vld1_p8_x3(
|
|
208 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly8x8x3_t, align 8
|
207
|
209 // CHECK-A32: %struct.poly8x8x3_t* noalias sret(%struct.poly8x8x3_t) align 8 [[RETVAL:%.*]],
|
150
|
210 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8
|
|
211 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
|
|
212 // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v8i8.p0i8(i8* %a)
|
|
213 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
|
|
214 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
|
|
215 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* [[RETVAL]] to i8*
|
|
216 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
|
|
217 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], {{i64|i32}} 24, i1 false)
|
|
218 // CHECK-A64: [[TMP4:%.*]] = load %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[RETVAL]], align 8
|
|
219 // CHECK-A64: ret %struct.poly8x8x3_t [[TMP4]]
|
|
220 // CHECK-A32: ret void
|
|
221 poly8x8x3_t test_vld1_p8_x3(poly8_t const *a) {
|
|
222 return vld1_p8_x3(a);
|
|
223 }
|
|
224
|
|
225 // CHECK-LABEL: @test_vld1_p8_x4(
|
|
226 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly8x8x4_t, align 8
|
207
|
227 // CHECK-A32: %struct.poly8x8x4_t* noalias sret(%struct.poly8x8x4_t) align 8 [[RETVAL:%.*]],
|
150
|
228 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8
|
|
229 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
|
|
230 // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v8i8.p0i8(i8* %a)
|
|
231 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
|
|
232 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
|
|
233 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* [[RETVAL]] to i8*
|
|
234 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
|
|
235 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], {{i64|i32}} 32, i1 false)
|
|
236 // CHECK-A64: [[TMP4:%.*]] = load %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[RETVAL]], align 8
|
|
237 // CHECK-A64: ret %struct.poly8x8x4_t [[TMP4]]
|
|
238 // CHECK-A32: ret void
|
|
239 poly8x8x4_t test_vld1_p8_x4(poly8_t const *a) {
|
|
240 return vld1_p8_x4(a);
|
|
241 }
|
|
242
|
|
243 // CHECK-LABEL: @test_vld1_s16_x2(
|
|
244 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
|
207
|
245 // CHECK-A32: %struct.int16x4x2_t* noalias sret(%struct.int16x4x2_t) align 8 [[RETVAL:%.*]],
|
150
|
246 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8
|
|
247 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
|
|
248 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
|
|
249 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
|
|
250 // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v4i16.p0i16(i16* [[TMP2]])
|
|
251 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
|
|
252 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16> }* [[TMP3]]
|
|
253 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL]] to i8*
|
|
254 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
|
|
255 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 16, i1 false)
|
|
256 // CHECK-A64: [[TMP6:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8
|
|
257 // CHECK-A64: ret %struct.int16x4x2_t [[TMP6]]
|
|
258 // CHECK-A32: ret void
|
|
259 int16x4x2_t test_vld1_s16_x2(int16_t const *a) {
|
|
260 return vld1_s16_x2(a);
|
|
261 }
|
|
262
|
|
263 // CHECK-LABEL: @test_vld1_s16_x3(
|
|
264 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int16x4x3_t, align 8
|
207
|
265 // CHECK-A32: %struct.int16x4x3_t* noalias sret(%struct.int16x4x3_t) align 8 [[RETVAL:%.*]],
|
150
|
266 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8
|
|
267 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
|
|
268 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
|
|
269 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
|
|
270 // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v4i16.p0i16(i16* [[TMP2]])
|
|
271 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
|
|
272 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
|
|
273 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x3_t* [[RETVAL]] to i8*
|
|
274 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
|
|
275 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 24, i1 false)
|
|
276 // CHECK-A64: [[TMP6:%.*]] = load %struct.int16x4x3_t, %struct.int16x4x3_t* [[RETVAL]], align 8
|
|
277 // CHECK-A64: ret %struct.int16x4x3_t [[TMP6]]
|
|
278 // CHECK-A32: ret void
|
|
279 int16x4x3_t test_vld1_s16_x3(int16_t const *a) {
|
|
280 return vld1_s16_x3(a);
|
|
281 }
|
|
282
|
|
283 // CHECK-LABEL: @test_vld1_s16_x4(
|
|
284 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int16x4x4_t, align 8
|
207
|
285 // CHECK-A32: %struct.int16x4x4_t* noalias sret(%struct.int16x4x4_t) align 8 [[RETVAL:%.*]],
|
150
|
286 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8
|
|
287 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
|
|
288 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
|
|
289 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
|
|
290 // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v4i16.p0i16(i16* [[TMP2]])
|
|
291 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
|
|
292 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
|
|
293 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x4_t* [[RETVAL]] to i8*
|
|
294 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
|
|
295 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 32, i1 false)
|
|
296 // CHECK-A64: [[TMP6:%.*]] = load %struct.int16x4x4_t, %struct.int16x4x4_t* [[RETVAL]], align 8
|
|
297 // CHECK-A64: ret %struct.int16x4x4_t [[TMP6]]
|
|
298 // CHECK-A32: ret void
|
|
299 int16x4x4_t test_vld1_s16_x4(int16_t const *a) {
|
|
300 return vld1_s16_x4(a);
|
|
301 }
|
|
302
|
|
303 // CHECK-LABEL: @test_vld1_s32_x2(
|
|
304 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
|
207
|
305 // CHECK-A32: %struct.int32x2x2_t* noalias sret(%struct.int32x2x2_t) align 8 [[RETVAL:%.*]],
|
150
|
306 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8
|
|
307 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
|
|
308 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
|
|
309 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
|
|
310 // CHECK: [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v2i32.p0i32(i32* [[TMP2]])
|
|
311 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }*
|
|
312 // CHECK: store { <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32> }* [[TMP3]]
|
|
313 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL]] to i8*
|
|
314 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
|
|
315 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 16, i1 false)
|
|
316 // CHECK-A64: [[TMP6:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8
|
|
317 // CHECK-A64: ret %struct.int32x2x2_t [[TMP6]]
|
|
318 // CHECK-A32: ret void
|
|
319 int32x2x2_t test_vld1_s32_x2(int32_t const *a) {
|
|
320 return vld1_s32_x2(a);
|
|
321 }
|
|
322
|
|
323 // CHECK-LABEL: @test_vld1_s32_x3(
|
|
324 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int32x2x3_t, align 8
|
207
|
325 // CHECK-A32: %struct.int32x2x3_t* noalias sret(%struct.int32x2x3_t) align 8 [[RETVAL:%.*]],
|
150
|
326 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8
|
|
327 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
|
|
328 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
|
|
329 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
|
|
330 // CHECK: [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v2i32.p0i32(i32* [[TMP2]])
|
|
331 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }*
|
|
332 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
|
|
333 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x3_t* [[RETVAL]] to i8*
|
|
334 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
|
|
335 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 24, i1 false)
|
|
336 // CHECK-A64: [[TMP6:%.*]] = load %struct.int32x2x3_t, %struct.int32x2x3_t* [[RETVAL]], align 8
|
|
337 // CHECK-A64: ret %struct.int32x2x3_t [[TMP6]]
|
|
338 // CHECK-A32: ret void
|
|
339 int32x2x3_t test_vld1_s32_x3(int32_t const *a) {
|
|
340 return vld1_s32_x3(a);
|
|
341 }
|
|
342
|
|
343 // CHECK-LABEL: @test_vld1_s32_x4(
|
|
344 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int32x2x4_t, align 8
|
207
|
345 // CHECK-A32: %struct.int32x2x4_t* noalias sret(%struct.int32x2x4_t) align 8 [[RETVAL:%.*]],
|
150
|
346 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8
|
|
347 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
|
|
348 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
|
|
349 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
|
|
350 // CHECK: [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v2i32.p0i32(i32* [[TMP2]])
|
|
351 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }*
|
|
352 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
|
|
353 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x4_t* [[RETVAL]] to i8*
|
|
354 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
|
|
355 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 32, i1 false)
|
|
356 // CHECK-A64: [[TMP6:%.*]] = load %struct.int32x2x4_t, %struct.int32x2x4_t* [[RETVAL]], align 8
|
|
357 // CHECK-A64: ret %struct.int32x2x4_t [[TMP6]]
|
|
358 // CHECK-A32: ret void
|
|
359 int32x2x4_t test_vld1_s32_x4(int32_t const *a) {
|
|
360 return vld1_s32_x4(a);
|
|
361 }
|
|
362
|
|
363 // CHECK-LABEL: @test_vld1_s64_x2(
|
|
364 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int64x1x2_t, align 8
|
207
|
365 // CHECK-A32: %struct.int64x1x2_t* noalias sret(%struct.int64x1x2_t) align 8 [[RETVAL:%.*]],
|
150
|
366 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8
|
|
367 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
|
|
368 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
|
|
369 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
|
|
370 // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v1i64.p0i64(i64* [[TMP2]])
|
|
371 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
|
|
372 // CHECK: store { <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64> }* [[TMP3]]
|
|
373 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x2_t* [[RETVAL]] to i8*
|
|
374 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
|
|
375 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 16, i1 false)
|
|
376 // CHECK-A64: [[TMP6:%.*]] = load %struct.int64x1x2_t, %struct.int64x1x2_t* [[RETVAL]], align 8
|
|
377 // CHECK-A64: ret %struct.int64x1x2_t [[TMP6]]
|
|
378 // CHECK-A32: ret void
|
|
379 int64x1x2_t test_vld1_s64_x2(int64_t const *a) {
|
|
380 return vld1_s64_x2(a);
|
|
381 }
|
|
382
|
|
383 // CHECK-LABEL: @test_vld1_s64_x3(
|
|
384 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int64x1x3_t, align 8
|
207
|
385 // CHECK-A32: %struct.int64x1x3_t* noalias sret(%struct.int64x1x3_t) align 8 [[RETVAL:%.*]],
|
150
|
386 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8
|
|
387 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
|
|
388 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
|
|
389 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
|
|
390 // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v1i64.p0i64(i64* [[TMP2]])
|
|
391 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
|
|
392 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
|
|
393 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x3_t* [[RETVAL]] to i8*
|
|
394 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
|
|
395 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 24, i1 false)
|
|
396 // CHECK-A64: [[TMP6:%.*]] = load %struct.int64x1x3_t, %struct.int64x1x3_t* [[RETVAL]], align 8
|
|
397 // CHECK-A64: ret %struct.int64x1x3_t [[TMP6]]
|
|
398 // CHECK-A32: ret void
|
|
399 int64x1x3_t test_vld1_s64_x3(int64_t const *a) {
|
|
400 return vld1_s64_x3(a);
|
|
401 }
|
|
402
|
|
403 // CHECK-LABEL: @test_vld1_s64_x4(
|
|
404 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int64x1x4_t, align 8
|
207
|
405 // CHECK-A32: %struct.int64x1x4_t* noalias sret(%struct.int64x1x4_t) align 8 [[RETVAL:%.*]],
|
150
|
406 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8
|
|
407 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8*
|
|
408 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
|
|
409 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
|
|
410 // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v1i64.p0i64(i64* [[TMP2]])
|
|
411 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
|
|
412 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
|
|
413 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x4_t* [[RETVAL]] to i8*
|
|
414 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8*
|
|
415 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 32, i1 false)
|
|
416 // CHECK-A64: [[TMP6:%.*]] = load %struct.int64x1x4_t, %struct.int64x1x4_t* [[RETVAL]], align 8
|
|
417 // CHECK-A64: ret %struct.int64x1x4_t [[TMP6]]
|
|
418 // CHECK-A32: ret void
|
|
419 int64x1x4_t test_vld1_s64_x4(int64_t const *a) {
|
|
420 return vld1_s64_x4(a);
|
|
421 }
|
|
422
|
|
423 // CHECK-LABEL: @test_vld1_s8_x2(
|
|
424 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
|
207
|
425 // CHECK-A32: %struct.int8x8x2_t* noalias sret(%struct.int8x8x2_t) align 8 [[RETVAL:%.*]],
|
150
|
426 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8
|
|
427 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
|
|
428 // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v8i8.p0i8(i8* %a)
|
|
429 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
|
|
430 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8> }* [[TMP1]]
|
|
431 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL]] to i8*
|
|
432 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
|
|
433 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], {{i64|i32}} 16, i1 false)
|
|
434 // CHECK-A64: [[TMP4:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8
|
|
435 // CHECK-A64: ret %struct.int8x8x2_t [[TMP4]]
|
|
436 // CHECK-A32: ret void
|
|
437 int8x8x2_t test_vld1_s8_x2(int8_t const *a) {
|
|
438 return vld1_s8_x2(a);
|
|
439 }
|
|
440
|
|
441 // CHECK-LABEL: @test_vld1_s8_x3(
|
|
442 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int8x8x3_t, align 8
|
207
|
443 // CHECK-A32: %struct.int8x8x3_t* noalias sret(%struct.int8x8x3_t) align 8 [[RETVAL:%.*]],
|
150
|
444 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8
|
|
445 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
|
|
446 // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v8i8.p0i8(i8* %a)
|
|
447 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
|
|
448 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
|
|
449 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[RETVAL]] to i8*
|
|
450 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
|
|
451 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], {{i64|i32}} 24, i1 false)
|
|
452 // CHECK-A64: [[TMP4:%.*]] = load %struct.int8x8x3_t, %struct.int8x8x3_t* [[RETVAL]], align 8
|
|
453 // CHECK-A64: ret %struct.int8x8x3_t [[TMP4]]
|
|
454 // CHECK-A32: ret void
|
|
455 int8x8x3_t test_vld1_s8_x3(int8_t const *a) {
|
|
456 return vld1_s8_x3(a);
|
|
457 }
|
|
458
|
|
459 // CHECK-LABEL: @test_vld1_s8_x4(
|
|
460 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int8x8x4_t, align 8
|
207
|
461 // CHECK-A32: %struct.int8x8x4_t* noalias sret(%struct.int8x8x4_t) align 8 [[RETVAL:%.*]],
|
150
|
462 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8
|
|
463 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
|
|
464 // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v8i8.p0i8(i8* %a)
|
|
465 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
|
|
466 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
|
|
467 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* [[RETVAL]] to i8*
|
|
468 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
|
|
469 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], {{i64|i32}} 32, i1 false)
|
|
470 // CHECK-A64: [[TMP4:%.*]] = load %struct.int8x8x4_t, %struct.int8x8x4_t* [[RETVAL]], align 8
|
|
471 // CHECK-A64: ret %struct.int8x8x4_t [[TMP4]]
|
|
472 // CHECK-A32: ret void
|
|
473 int8x8x4_t test_vld1_s8_x4(int8_t const *a) {
|
|
474 return vld1_s8_x4(a);
|
|
475 }
|
|
476
|
|
477 // CHECK-LABEL: @test_vld1_u16_x2(
|
|
478 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
|
207
|
479 // CHECK-A32: %struct.uint16x4x2_t* noalias sret(%struct.uint16x4x2_t) align 8 [[RETVAL:%.*]],
|
150
|
480 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8
|
|
481 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
|
|
482 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
|
|
483 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
|
|
484 // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v4i16.p0i16(i16* [[TMP2]])
|
|
485 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
|
|
486 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16> }* [[TMP3]]
|
|
487 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL]] to i8*
|
|
488 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
|
|
489 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 16, i1 false)
|
|
490 // CHECK-A64: [[TMP6:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8
|
|
491 // CHECK-A64: ret %struct.uint16x4x2_t [[TMP6]]
|
|
492 // CHECK-A32: ret void
|
|
493 uint16x4x2_t test_vld1_u16_x2(uint16_t const *a) {
|
|
494 return vld1_u16_x2(a);
|
|
495 }
|
|
496
|
|
497 // CHECK-LABEL: @test_vld1_u16_x3(
|
|
498 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint16x4x3_t, align 8
|
207
|
499 // CHECK-A32: %struct.uint16x4x3_t* noalias sret(%struct.uint16x4x3_t) align 8 [[RETVAL:%.*]],
|
150
|
500 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8
|
|
501 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
|
|
502 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
|
|
503 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
|
|
504 // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v4i16.p0i16(i16* [[TMP2]])
|
|
505 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
|
|
506 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
|
|
507 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x3_t* [[RETVAL]] to i8*
|
|
508 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
|
|
509 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 24, i1 false)
|
|
510 // CHECK-A64: [[TMP6:%.*]] = load %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[RETVAL]], align 8
|
|
511 // CHECK-A64: ret %struct.uint16x4x3_t [[TMP6]]
|
|
512 // CHECK-A32: ret void
|
|
513 uint16x4x3_t test_vld1_u16_x3(uint16_t const *a) {
|
|
514 return vld1_u16_x3(a);
|
|
515 }
|
|
516
|
|
517 // CHECK-LABEL: @test_vld1_u16_x4(
|
|
518 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint16x4x4_t, align 8
|
207
|
519 // CHECK-A32: %struct.uint16x4x4_t* noalias sret(%struct.uint16x4x4_t) align 8 [[RETVAL:%.*]],
|
150
|
520 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8
|
|
521 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
|
|
522 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
|
|
523 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
|
|
524 // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v4i16.p0i16(i16* [[TMP2]])
|
|
525 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
|
|
526 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
|
|
527 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x4_t* [[RETVAL]] to i8*
|
|
528 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
|
|
529 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 32, i1 false)
|
|
530 // CHECK-A64: [[TMP6:%.*]] = load %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[RETVAL]], align 8
|
|
531 // CHECK-A64: ret %struct.uint16x4x4_t [[TMP6]]
|
|
532 // CHECK-A32: ret void
|
|
533 uint16x4x4_t test_vld1_u16_x4(uint16_t const *a) {
|
|
534 return vld1_u16_x4(a);
|
|
535 }
|
|
536
|
|
537 // CHECK-LABEL: @test_vld1_u32_x2(
|
|
538 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
|
207
|
539 // CHECK-A32: %struct.uint32x2x2_t* noalias sret(%struct.uint32x2x2_t) align 8 [[RETVAL:%.*]],
|
150
|
540 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8
|
|
541 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
|
|
542 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
|
|
543 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
|
|
544 // CHECK: [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v2i32.p0i32(i32* [[TMP2]])
|
|
545 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }*
|
|
546 // CHECK: store { <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32> }* [[TMP3]]
|
|
547 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL]] to i8*
|
|
548 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
|
|
549 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 16, i1 false)
|
|
550 // CHECK-A64: [[TMP6:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8
|
|
551 // CHECK-A64: ret %struct.uint32x2x2_t [[TMP6]]
|
|
552 // CHECK-A32: ret void
|
|
553 uint32x2x2_t test_vld1_u32_x2(uint32_t const *a) {
|
|
554 return vld1_u32_x2(a);
|
|
555 }
|
|
556
|
|
557 // CHECK-LABEL: @test_vld1_u32_x3(
|
|
558 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint32x2x3_t, align 8
|
207
|
559 // CHECK-A32: %struct.uint32x2x3_t* noalias sret(%struct.uint32x2x3_t) align 8 [[RETVAL:%.*]],
|
150
|
560 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8
|
|
561 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
|
|
562 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
|
|
563 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
|
|
564 // CHECK: [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v2i32.p0i32(i32* [[TMP2]])
|
|
565 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }*
|
|
566 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
|
|
567 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x3_t* [[RETVAL]] to i8*
|
|
568 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
|
|
569 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 24, i1 false)
|
|
570 // CHECK-A64: [[TMP6:%.*]] = load %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[RETVAL]], align 8
|
|
571 // CHECK-A64: ret %struct.uint32x2x3_t [[TMP6]]
|
|
572 // CHECK-A32: ret void
|
|
573 uint32x2x3_t test_vld1_u32_x3(uint32_t const *a) {
|
|
574 return vld1_u32_x3(a);
|
|
575 }
|
|
576
|
|
577 // CHECK-LABEL: @test_vld1_u32_x4(
|
|
578 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint32x2x4_t, align 8
|
207
|
579 // CHECK-A32: %struct.uint32x2x4_t* noalias sret(%struct.uint32x2x4_t) align 8 [[RETVAL:%.*]],
|
150
|
580 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8
|
|
581 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
|
|
582 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
|
|
583 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
|
|
584 // CHECK: [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v2i32.p0i32(i32* [[TMP2]])
|
|
585 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }*
|
|
586 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
|
|
587 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x4_t* [[RETVAL]] to i8*
|
|
588 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
|
|
589 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 32, i1 false)
|
|
590 // CHECK-A64: [[TMP6:%.*]] = load %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[RETVAL]], align 8
|
|
591 // CHECK-A64: ret %struct.uint32x2x4_t [[TMP6]]
|
|
592 // CHECK-A32: ret void
|
|
593 uint32x2x4_t test_vld1_u32_x4(uint32_t const *a) {
|
|
594 return vld1_u32_x4(a);
|
|
595 }
|
|
596
|
|
597 // CHECK-LABEL: @test_vld1_u64_x2(
|
|
598 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint64x1x2_t, align 8
|
207
|
599 // CHECK-A32: %struct.uint64x1x2_t* noalias sret(%struct.uint64x1x2_t) align 8 [[RETVAL:%.*]],
|
150
|
600 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8
|
|
601 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
|
|
602 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
|
|
603 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
|
|
604 // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v1i64.p0i64(i64* [[TMP2]])
|
|
605 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
|
|
606 // CHECK: store { <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64> }* [[TMP3]]
|
|
607 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x2_t* [[RETVAL]] to i8*
|
|
608 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
|
|
609 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 16, i1 false)
|
|
610 // CHECK-A64: [[TMP6:%.*]] = load %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[RETVAL]], align 8
|
|
611 // CHECK-A64: ret %struct.uint64x1x2_t [[TMP6]]
|
|
612 // CHECK-A32: ret void
|
|
613 uint64x1x2_t test_vld1_u64_x2(uint64_t const *a) {
|
|
614 return vld1_u64_x2(a);
|
|
615 }
|
|
616
|
|
617 // CHECK-LABEL: @test_vld1_u64_x3(
|
|
618 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint64x1x3_t, align 8
|
207
|
619 // CHECK-A32: %struct.uint64x1x3_t* noalias sret(%struct.uint64x1x3_t) align 8 [[RETVAL:%.*]],
|
150
|
620 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8
|
|
621 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
|
|
622 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
|
|
623 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
|
|
624 // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v1i64.p0i64(i64* [[TMP2]])
|
|
625 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
|
|
626 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
|
|
627 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x3_t* [[RETVAL]] to i8*
|
|
628 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
|
|
629 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 24, i1 false)
|
|
630 // CHECK-A64: [[TMP6:%.*]] = load %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[RETVAL]], align 8
|
|
631 // CHECK-A64: ret %struct.uint64x1x3_t [[TMP6]]
|
|
632 // CHECK-A32: ret void
|
|
633 uint64x1x3_t test_vld1_u64_x3(uint64_t const *a) {
|
|
634 return vld1_u64_x3(a);
|
|
635 }
|
|
636
|
|
637 // CHECK-LABEL: @test_vld1_u64_x4(
|
|
638 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint64x1x4_t, align 8
|
207
|
639 // CHECK-A32: %struct.uint64x1x4_t* noalias sret(%struct.uint64x1x4_t) align 8 [[RETVAL:%.*]],
|
150
|
640 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8
|
|
641 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8*
|
|
642 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
|
|
643 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
|
|
644 // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v1i64.p0i64(i64* [[TMP2]])
|
|
645 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
|
|
646 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
|
|
647 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x4_t* [[RETVAL]] to i8*
|
|
648 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8*
|
|
649 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 32, i1 false)
|
|
650 // CHECK-A64: [[TMP6:%.*]] = load %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[RETVAL]], align 8
|
|
651 // CHECK-A64: ret %struct.uint64x1x4_t [[TMP6]]
|
|
652 // CHECK-A32: ret void
|
|
653 uint64x1x4_t test_vld1_u64_x4(uint64_t const *a) {
|
|
654 return vld1_u64_x4(a);
|
|
655 }
|
|
656
|
|
657 // CHECK-LABEL: @test_vld1_u8_x2(
|
|
658 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
|
207
|
659 // CHECK-A32: %struct.uint8x8x2_t* noalias sret(%struct.uint8x8x2_t) align 8 [[RETVAL:%.*]],
|
150
|
660 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8
|
|
661 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
|
|
662 // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v8i8.p0i8(i8* %a)
|
|
663 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
|
|
664 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8> }* [[TMP1]]
|
|
665 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL]] to i8*
|
|
666 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
|
|
667 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], {{i64|i32}} 16, i1 false)
|
|
668 // CHECK-A64: [[TMP4:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8
|
|
669 // CHECK-A64: ret %struct.uint8x8x2_t [[TMP4]]
|
|
670 // CHECK-A32: ret void
|
|
671 uint8x8x2_t test_vld1_u8_x2(uint8_t const *a) {
|
|
672 return vld1_u8_x2(a);
|
|
673 }
|
|
674
|
|
675 // CHECK-LABEL: @test_vld1_u8_x3(
|
|
676 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint8x8x3_t, align 8
|
207
|
677 // CHECK-A32: %struct.uint8x8x3_t* noalias sret(%struct.uint8x8x3_t) align 8 [[RETVAL:%.*]],
|
150
|
678 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8
|
|
679 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
|
|
680 // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v8i8.p0i8(i8* %a)
|
|
681 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
|
|
682 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
|
|
683 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[RETVAL]] to i8*
|
|
684 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
|
|
685 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], {{i64|i32}} 24, i1 false)
|
|
686 // CHECK-A64: [[TMP4:%.*]] = load %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[RETVAL]], align 8
|
|
687 // CHECK-A64: ret %struct.uint8x8x3_t [[TMP4]]
|
|
688 // CHECK-A32: ret void
|
|
689 uint8x8x3_t test_vld1_u8_x3(uint8_t const *a) {
|
|
690 return vld1_u8_x3(a);
|
|
691 }
|
|
692
|
|
693 // CHECK-LABEL: @test_vld1_u8_x4(
|
|
694 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint8x8x4_t, align 8
|
207
|
695 // CHECK-A32: %struct.uint8x8x4_t* noalias sret(%struct.uint8x8x4_t) align 8 [[RETVAL:%.*]],
|
150
|
696 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8
|
|
697 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
|
|
698 // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v8i8.p0i8(i8* %a)
|
|
699 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
|
|
700 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
|
|
701 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* [[RETVAL]] to i8*
|
|
702 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
|
|
703 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], {{i64|i32}} 32, i1 false)
|
|
704 // CHECK-A64: [[TMP4:%.*]] = load %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[RETVAL]], align 8
|
|
705 // CHECK-A64: ret %struct.uint8x8x4_t [[TMP4]]
|
|
706 // CHECK-A32: ret void
|
|
707 uint8x8x4_t test_vld1_u8_x4(uint8_t const *a) {
|
|
708 return vld1_u8_x4(a);
|
|
709 }
|
|
710
|
|
711 // CHECK-LABEL: @test_vld1q_f16_x2(
|
|
712 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16
|
207
|
713 // CHECK-A32: %struct.float16x8x2_t* noalias sret(%struct.float16x8x2_t) align 8 [[RETVAL:%.*]],
|
150
|
714 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align {{16|8}}
|
|
715 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
|
|
716 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
|
|
717 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to [[HALF]]*
|
|
718 // CHECK: [[VLD1XN:%.*]] = call { <8 x [[HALF]]>, <8 x [[HALF]]> } @llvm.{{aarch64.neon.ld1x2.v8f16.p0f16|arm.neon.vld1x2.v8i16.p0i16}}([[HALF]]* [[TMP2]])
|
|
719 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x [[HALF]]>, <8 x [[HALF]]> }*
|
|
720 // CHECK: store { <8 x [[HALF]]>, <8 x [[HALF]]> } [[VLD1XN]], { <8 x [[HALF]]>, <8 x [[HALF]]> }* [[TMP3]]
|
|
721 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8*
|
|
722 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
|
|
723 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 32, i1 false)
|
|
724 // CHECK-A64: [[TMP6:%.*]] = load %struct.float16x8x2_t, %struct.float16x8x2_t* [[RETVAL]], align 16
|
|
725 // CHECK-A64: ret %struct.float16x8x2_t [[TMP6]]
|
|
726 // CHECK-A32: ret void
|
|
727 float16x8x2_t test_vld1q_f16_x2(float16_t const *a) {
|
|
728 return vld1q_f16_x2(a);
|
|
729 }
|
|
730
|
|
731 // CHECK-LABEL: @test_vld1q_f16_x3(
|
|
732 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float16x8x3_t, align 16
|
207
|
733 // CHECK-A32: %struct.float16x8x3_t* noalias sret(%struct.float16x8x3_t) align 8 [[RETVAL:%.*]],
|
150
|
734 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align {{16|8}}
|
|
735 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
|
|
736 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
|
|
737 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to [[HALF]]*
|
|
738 // CHECK: [[VLD1XN:%.*]] = call { <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]> } @llvm.{{aarch64.neon.ld1x3.v8f16.p0f16|arm.neon.vld1x3.v8i16.p0i16}}([[HALF]]* [[TMP2]])
|
|
739 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]> }*
|
|
740 // CHECK: store { <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]> } [[VLD1XN]], { <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]> }* [[TMP3]]
|
|
741 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x3_t* [[RETVAL]] to i8*
|
|
742 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
|
|
743 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 48, i1 false)
|
|
744 // CHECK-A64: [[TMP6:%.*]] = load %struct.float16x8x3_t, %struct.float16x8x3_t* [[RETVAL]], align 16
|
|
745 // CHECK-A64: ret %struct.float16x8x3_t [[TMP6]]
|
|
746 // CHECK-A32: ret void
|
|
747 float16x8x3_t test_vld1q_f16_x3(float16_t const *a) {
|
|
748 return vld1q_f16_x3(a);
|
|
749 }
|
|
750
|
|
751 // CHECK-LABEL: @test_vld1q_f16_x4(
|
|
752 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float16x8x4_t, align 16
|
207
|
753 // CHECK-A32: %struct.float16x8x4_t* noalias sret(%struct.float16x8x4_t) align 8 [[RETVAL:%.*]],
|
150
|
754 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align {{16|8}}
|
|
755 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
|
|
756 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
|
|
757 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to [[HALF]]*
|
|
758 // CHECK: [[VLD1XN:%.*]] = call { <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]> } @llvm.{{aarch64.neon.ld1x4.v8f16.p0f16|arm.neon.vld1x4.v8i16.p0i16}}([[HALF]]* [[TMP2]])
|
|
759 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]> }*
|
|
760 // CHECK: store { <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]> } [[VLD1XN]], { <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]> }* [[TMP3]]
|
|
761 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x4_t* [[RETVAL]] to i8*
|
|
762 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
|
|
763 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 64, i1 false)
|
|
764 // CHECK-A64: [[TMP6:%.*]] = load %struct.float16x8x4_t, %struct.float16x8x4_t* [[RETVAL]], align 16
|
|
765 // CHECK-A64: ret %struct.float16x8x4_t [[TMP6]]
|
|
766 // CHECK-A32: ret void
|
|
767 float16x8x4_t test_vld1q_f16_x4(float16_t const *a) {
|
|
768 return vld1q_f16_x4(a);
|
|
769 }
|
|
770
|
|
771 // CHECK-LABEL: @test_vld1q_f32_x2(
|
|
772 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
|
207
|
773 // CHECK-A32: %struct.float32x4x2_t* noalias sret(%struct.float32x4x2_t) align 8 [[RETVAL:%.*]],
|
150
|
774 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align {{16|8}}
|
|
775 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
|
|
776 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
|
|
777 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float*
|
|
778 // CHECK: [[VLD1XN:%.*]] = call { <4 x float>, <4 x float> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v4f32.p0f32(float* [[TMP2]])
|
|
779 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float> }*
|
|
780 // CHECK: store { <4 x float>, <4 x float> } [[VLD1XN]], { <4 x float>, <4 x float> }* [[TMP3]]
|
|
781 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL]] to i8*
|
|
782 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
|
|
783 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 32, i1 false)
|
|
784 // CHECK-A64: [[TMP6:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16
|
|
785 // CHECK-A64: ret %struct.float32x4x2_t [[TMP6]]
|
|
786 // CHECK-A32: ret void
|
|
787 float32x4x2_t test_vld1q_f32_x2(float32_t const *a) {
|
|
788 return vld1q_f32_x2(a);
|
|
789 }
|
|
790
|
|
791 // CHECK-LABEL: @test_vld1q_f32_x3(
|
|
792 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float32x4x3_t, align 16
|
207
|
793 // CHECK-A32: %struct.float32x4x3_t* noalias sret(%struct.float32x4x3_t) align 8 [[RETVAL:%.*]],
|
150
|
794 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align {{16|8}}
|
|
795 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
|
|
796 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
|
|
797 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float*
|
|
798 // CHECK: [[VLD1XN:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v4f32.p0f32(float* [[TMP2]])
|
|
799 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float> }*
|
|
800 // CHECK: store { <4 x float>, <4 x float>, <4 x float> } [[VLD1XN]], { <4 x float>, <4 x float>, <4 x float> }* [[TMP3]]
|
|
801 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x3_t* [[RETVAL]] to i8*
|
|
802 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
|
|
803 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 48, i1 false)
|
|
804 // CHECK-A64: [[TMP6:%.*]] = load %struct.float32x4x3_t, %struct.float32x4x3_t* [[RETVAL]], align 16
|
|
805 // CHECK-A64: ret %struct.float32x4x3_t [[TMP6]]
|
|
806 // CHECK-A32: ret void
|
|
807 float32x4x3_t test_vld1q_f32_x3(float32_t const *a) {
|
|
808 return vld1q_f32_x3(a);
|
|
809 }
|
|
810
|
|
811 // CHECK-LABEL: @test_vld1q_f32_x4(
|
|
812 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float32x4x4_t, align 16
|
207
|
813 // CHECK-A32: %struct.float32x4x4_t* noalias sret(%struct.float32x4x4_t) align 8 [[RETVAL:%.*]],
|
150
|
814 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align {{16|8}}
|
|
815 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
|
|
816 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
|
|
817 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float*
|
|
818 // CHECK: [[VLD1XN:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v4f32.p0f32(float* [[TMP2]])
|
|
819 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float>, <4 x float> }*
|
|
820 // CHECK: store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD1XN]], { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* [[TMP3]]
|
|
821 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x4_t* [[RETVAL]] to i8*
|
|
822 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
|
|
823 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 64, i1 false)
|
|
824 // CHECK-A64: [[TMP6:%.*]] = load %struct.float32x4x4_t, %struct.float32x4x4_t* [[RETVAL]], align 16
|
|
825 // CHECK-A64: ret %struct.float32x4x4_t [[TMP6]]
|
|
826 // CHECK-A32: ret void
|
|
827 float32x4x4_t test_vld1q_f32_x4(float32_t const *a) {
|
|
828 return vld1q_f32_x4(a);
|
|
829 }
|
|
830
|
|
831 // CHECK-LABEL: @test_vld1q_p16_x2(
|
|
832 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
|
207
|
833 // CHECK-A32: %struct.poly16x8x2_t* noalias sret(%struct.poly16x8x2_t) align 8 [[RETVAL:%.*]],
|
150
|
834 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align {{16|8}}
|
|
835 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
|
|
836 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
|
|
837 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
|
|
838 // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v8i16.p0i16(i16* [[TMP2]])
|
|
839 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
|
|
840 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16> }* [[TMP3]]
|
|
841 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL]] to i8*
|
|
842 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
|
|
843 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 32, i1 false)
|
|
844 // CHECK-A64: [[TMP6:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16
|
|
845 // CHECK-A64: ret %struct.poly16x8x2_t [[TMP6]]
|
|
846 // CHECK-A32: ret void
|
|
847 poly16x8x2_t test_vld1q_p16_x2(poly16_t const *a) {
|
|
848 return vld1q_p16_x2(a);
|
|
849 }
|
|
850
|
|
851 // CHECK-LABEL: @test_vld1q_p16_x3(
|
|
852 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly16x8x3_t, align 16
|
207
|
853 // CHECK-A32: %struct.poly16x8x3_t* noalias sret(%struct.poly16x8x3_t) align 8 [[RETVAL:%.*]],
|
150
|
854 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align {{16|8}}
|
|
855 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
|
|
856 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
|
|
857 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
|
|
858 // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v8i16.p0i16(i16* [[TMP2]])
|
|
859 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
|
|
860 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
|
|
861 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x3_t* [[RETVAL]] to i8*
|
|
862 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
|
|
863 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 48, i1 false)
|
|
864 // CHECK-A64: [[TMP6:%.*]] = load %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[RETVAL]], align 16
|
|
865 // CHECK-A64: ret %struct.poly16x8x3_t [[TMP6]]
|
|
866 // CHECK-A32: ret void
|
|
867 poly16x8x3_t test_vld1q_p16_x3(poly16_t const *a) {
|
|
868 return vld1q_p16_x3(a);
|
|
869 }
|
|
870
|
|
871 // CHECK-LABEL: @test_vld1q_p16_x4(
|
|
872 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly16x8x4_t, align 16
|
207
|
873 // CHECK-A32: %struct.poly16x8x4_t* noalias sret(%struct.poly16x8x4_t) align 8 [[RETVAL:%.*]],
|
150
|
874 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align {{16|8}}
|
|
875 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
|
|
876 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
|
|
877 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
|
|
878 // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v8i16.p0i16(i16* [[TMP2]])
|
|
879 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
|
|
880 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
|
|
881 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x4_t* [[RETVAL]] to i8*
|
|
882 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
|
|
883 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 64, i1 false)
|
|
884 // CHECK-A64: [[TMP6:%.*]] = load %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[RETVAL]], align 16
|
|
885 // CHECK-A64: ret %struct.poly16x8x4_t [[TMP6]]
|
|
886 // CHECK-A32: ret void
|
|
887 poly16x8x4_t test_vld1q_p16_x4(poly16_t const *a) {
|
|
888 return vld1q_p16_x4(a);
|
|
889 }
|
|
890
|
|
891 // CHECK-LABEL: @test_vld1q_p8_x2(
|
|
892 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
|
207
|
893 // CHECK-A32: %struct.poly8x16x2_t* noalias sret(%struct.poly8x16x2_t) align 8 [[RETVAL:%.*]],
|
150
|
894 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align {{16|8}}
|
|
895 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8*
|
|
896 // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v16i8.p0i8(i8* %a)
|
|
897 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
|
|
898 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8> }* [[TMP1]]
|
|
899 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL]] to i8*
|
|
900 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8*
|
|
901 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP2]], i8* align {{16|8}} [[TMP3]], {{i64|i32}} 32, i1 false)
|
|
902 // CHECK-A64: [[TMP4:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16
|
|
903 // CHECK-A64: ret %struct.poly8x16x2_t [[TMP4]]
|
|
904 // CHECK-A32: ret void
|
|
905 poly8x16x2_t test_vld1q_p8_x2(poly8_t const *a) {
|
|
906 return vld1q_p8_x2(a);
|
|
907 }
|
|
908
|
|
909 // CHECK-LABEL: @test_vld1q_p8_x3(
|
|
910 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly8x16x3_t, align 16
|
207
|
911 // CHECK-A32: %struct.poly8x16x3_t* noalias sret(%struct.poly8x16x3_t) align 8 [[RETVAL:%.*]],
|
150
|
912 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align {{16|8}}
|
|
913 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8*
|
|
914 // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v16i8.p0i8(i8* %a)
|
|
915 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
|
|
916 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
|
|
917 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x3_t* [[RETVAL]] to i8*
|
|
918 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8*
|
|
919 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP2]], i8* align {{16|8}} [[TMP3]], {{i64|i32}} 48, i1 false)
|
|
920 // CHECK-A64: [[TMP4:%.*]] = load %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[RETVAL]], align 16
|
|
921 // CHECK-A64: ret %struct.poly8x16x3_t [[TMP4]]
|
|
922 // CHECK-A32: ret void
|
|
923 poly8x16x3_t test_vld1q_p8_x3(poly8_t const *a) {
|
|
924 return vld1q_p8_x3(a);
|
|
925 }
|
|
926
|
|
927 // CHECK-LABEL: @test_vld1q_p8_x4(
|
|
928 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly8x16x4_t, align 16
|
207
|
929 // CHECK-A32: %struct.poly8x16x4_t* noalias sret(%struct.poly8x16x4_t) align 8 [[RETVAL:%.*]],
|
150
|
930 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align {{16|8}}
|
|
931 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8*
|
|
932 // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v16i8.p0i8(i8* %a)
|
|
933 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
|
|
934 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
|
|
935 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x4_t* [[RETVAL]] to i8*
|
|
936 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8*
|
|
937 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP2]], i8* align {{16|8}} [[TMP3]], {{i64|i32}} 64, i1 false)
|
|
938 // CHECK-A64: [[TMP4:%.*]] = load %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[RETVAL]], align 16
|
|
939 // CHECK-A64: ret %struct.poly8x16x4_t [[TMP4]]
|
|
940 // CHECK-A32: ret void
|
|
941 poly8x16x4_t test_vld1q_p8_x4(poly8_t const *a) {
|
|
942 return vld1q_p8_x4(a);
|
|
943 }
|
|
944
|
|
945 // CHECK-LABEL: @test_vld1q_s16_x2(
|
|
946 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
|
207
|
947 // CHECK-A32: %struct.int16x8x2_t* noalias sret(%struct.int16x8x2_t) align 8 [[RETVAL:%.*]],
|
150
|
948 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align {{16|8}}
|
|
949 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
|
|
950 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
|
|
951 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
|
|
952 // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v8i16.p0i16(i16* [[TMP2]])
|
|
953 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
|
|
954 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16> }* [[TMP3]]
|
|
955 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL]] to i8*
|
|
956 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
|
|
957 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 32, i1 false)
|
|
958 // CHECK-A64: [[TMP6:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16
|
|
959 // CHECK-A64: ret %struct.int16x8x2_t [[TMP6]]
|
|
960 // CHECK-A32: ret void
|
|
961 int16x8x2_t test_vld1q_s16_x2(int16_t const *a) {
|
|
962 return vld1q_s16_x2(a);
|
|
963 }
|
|
964
|
|
965 // CHECK-LABEL: @test_vld1q_s16_x3(
|
|
966 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int16x8x3_t, align 16
|
207
|
967 // CHECK-A32: %struct.int16x8x3_t* noalias sret(%struct.int16x8x3_t) align 8 [[RETVAL:%.*]],
|
150
|
968 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align {{16|8}}
|
|
969 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
|
|
970 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
|
|
971 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
|
|
972 // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v8i16.p0i16(i16* [[TMP2]])
|
|
973 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
|
|
974 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
|
|
975 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x3_t* [[RETVAL]] to i8*
|
|
976 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
|
|
977 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 48, i1 false)
|
|
978 // CHECK-A64: [[TMP6:%.*]] = load %struct.int16x8x3_t, %struct.int16x8x3_t* [[RETVAL]], align 16
|
|
979 // CHECK-A64: ret %struct.int16x8x3_t [[TMP6]]
|
|
980 // CHECK-A32: ret void
|
|
981 int16x8x3_t test_vld1q_s16_x3(int16_t const *a) {
|
|
982 return vld1q_s16_x3(a);
|
|
983 }
|
|
984
|
|
985 // CHECK-LABEL: @test_vld1q_s16_x4(
|
|
986 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int16x8x4_t, align 16
|
207
|
987 // CHECK-A32: %struct.int16x8x4_t* noalias sret(%struct.int16x8x4_t) align 8 [[RETVAL:%.*]],
|
150
|
988 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align {{16|8}}
|
|
989 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
|
|
990 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
|
|
991 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
|
|
992 // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v8i16.p0i16(i16* [[TMP2]])
|
|
993 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
|
|
994 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
|
|
995 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x4_t* [[RETVAL]] to i8*
|
|
996 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
|
|
997 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 64, i1 false)
|
|
998 // CHECK-A64: [[TMP6:%.*]] = load %struct.int16x8x4_t, %struct.int16x8x4_t* [[RETVAL]], align 16
|
|
999 // CHECK-A64: ret %struct.int16x8x4_t [[TMP6]]
|
|
1000 // CHECK-A32: ret void
|
|
1001 int16x8x4_t test_vld1q_s16_x4(int16_t const *a) {
|
|
1002 return vld1q_s16_x4(a);
|
|
1003 }
|
|
1004
|
|
1005 // CHECK-LABEL: @test_vld1q_s32_x2(
|
|
1006 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
|
207
|
1007 // CHECK-A32: %struct.int32x4x2_t* noalias sret(%struct.int32x4x2_t) align 8 [[RETVAL:%.*]],
|
150
|
1008 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align {{16|8}}
|
|
1009 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
|
|
1010 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
|
|
1011 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
|
|
1012 // CHECK: [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v4i32.p0i32(i32* [[TMP2]])
|
|
1013 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }*
|
|
1014 // CHECK: store { <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32> }* [[TMP3]]
|
|
1015 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL]] to i8*
|
|
1016 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
|
|
1017 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 32, i1 false)
|
|
1018 // CHECK-A64: [[TMP6:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16
|
|
1019 // CHECK-A64: ret %struct.int32x4x2_t [[TMP6]]
|
|
1020 // CHECK-A32: ret void
|
|
1021 int32x4x2_t test_vld1q_s32_x2(int32_t const *a) {
|
|
1022 return vld1q_s32_x2(a);
|
|
1023 }
|
|
1024
|
|
1025 // CHECK-LABEL: @test_vld1q_s32_x3(
|
|
1026 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int32x4x3_t, align 16
|
207
|
1027 // CHECK-A32: %struct.int32x4x3_t* noalias sret(%struct.int32x4x3_t) align 8 [[RETVAL:%.*]],
|
150
|
1028 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align {{16|8}}
|
|
1029 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
|
|
1030 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
|
|
1031 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
|
|
1032 // CHECK: [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v4i32.p0i32(i32* [[TMP2]])
|
|
1033 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }*
|
|
1034 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
|
|
1035 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x3_t* [[RETVAL]] to i8*
|
|
1036 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
|
|
1037 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 48, i1 false)
|
|
1038 // CHECK-A64: [[TMP6:%.*]] = load %struct.int32x4x3_t, %struct.int32x4x3_t* [[RETVAL]], align 16
|
|
1039 // CHECK-A64: ret %struct.int32x4x3_t [[TMP6]]
|
|
1040 // CHECK-A32: ret void
|
|
1041 int32x4x3_t test_vld1q_s32_x3(int32_t const *a) {
|
|
1042 return vld1q_s32_x3(a);
|
|
1043 }
|
|
1044
|
|
1045 // CHECK-LABEL: @test_vld1q_s32_x4(
|
|
1046 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int32x4x4_t, align 16
|
207
|
1047 // CHECK-A32: %struct.int32x4x4_t* noalias sret(%struct.int32x4x4_t) align 8 [[RETVAL:%.*]],
|
150
|
1048 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align {{16|8}}
|
|
1049 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
|
|
1050 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
|
|
1051 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
|
|
1052 // CHECK: [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v4i32.p0i32(i32* [[TMP2]])
|
|
1053 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }*
|
|
1054 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
|
|
1055 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x4_t* [[RETVAL]] to i8*
|
|
1056 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
|
|
1057 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 64, i1 false)
|
|
1058 // CHECK-A64: [[TMP6:%.*]] = load %struct.int32x4x4_t, %struct.int32x4x4_t* [[RETVAL]], align 16
|
|
1059 // CHECK-A64: ret %struct.int32x4x4_t [[TMP6]]
|
|
1060 // CHECK-A32: ret void
|
|
1061 int32x4x4_t test_vld1q_s32_x4(int32_t const *a) {
|
|
1062 return vld1q_s32_x4(a);
|
|
1063 }
|
|
1064
|
|
1065 // CHECK-LABEL: @test_vld1q_s64_x2(
|
|
1066 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int64x2x2_t, align 16
|
207
|
1067 // CHECK-A32: %struct.int64x2x2_t* noalias sret(%struct.int64x2x2_t) align 8 [[RETVAL:%.*]],
|
150
|
1068 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x2_t, align {{16|8}}
|
|
1069 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8*
|
|
1070 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
|
|
1071 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
|
|
1072 // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v2i64.p0i64(i64* [[TMP2]])
|
|
1073 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }*
|
|
1074 // CHECK: store { <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64> }* [[TMP3]]
|
|
1075 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x2_t* [[RETVAL]] to i8*
|
|
1076 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8*
|
|
1077 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 32, i1 false)
|
|
1078 // CHECK-A64: [[TMP6:%.*]] = load %struct.int64x2x2_t, %struct.int64x2x2_t* [[RETVAL]], align 16
|
|
1079 // CHECK-A64: ret %struct.int64x2x2_t [[TMP6]]
|
|
1080 // CHECK-A32: ret void
|
|
1081 int64x2x2_t test_vld1q_s64_x2(int64_t const *a) {
|
|
1082 return vld1q_s64_x2(a);
|
|
1083 }
|
|
1084
|
|
1085 // CHECK-LABEL: @test_vld1q_s64_x3(
|
|
1086 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int64x2x3_t, align 16
|
207
|
1087 // CHECK-A32: %struct.int64x2x3_t* noalias sret(%struct.int64x2x3_t) align 8 [[RETVAL:%.*]],
|
150
|
1088 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x3_t, align {{16|8}}
|
|
1089 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8*
|
|
1090 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
|
|
1091 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
|
|
1092 // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v2i64.p0i64(i64* [[TMP2]])
|
|
1093 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }*
|
|
1094 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
|
|
1095 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x3_t* [[RETVAL]] to i8*
|
|
1096 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8*
|
|
1097 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 48, i1 false)
|
|
1098 // CHECK-A64: [[TMP6:%.*]] = load %struct.int64x2x3_t, %struct.int64x2x3_t* [[RETVAL]], align 16
|
|
1099 // CHECK-A64: ret %struct.int64x2x3_t [[TMP6]]
|
|
1100 // CHECK-A32: ret void
|
|
1101 int64x2x3_t test_vld1q_s64_x3(int64_t const *a) {
|
|
1102 return vld1q_s64_x3(a);
|
|
1103 }
|
|
1104
|
|
1105 // CHECK-LABEL: @test_vld1q_s64_x4(
|
|
1106 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int64x2x4_t, align 16
|
207
|
1107 // CHECK-A32: %struct.int64x2x4_t* noalias sret(%struct.int64x2x4_t) align 8 [[RETVAL:%.*]],
|
150
|
1108 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x4_t, align {{16|8}}
|
|
1109 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8*
|
|
1110 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
|
|
1111 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
|
|
1112 // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v2i64.p0i64(i64* [[TMP2]])
|
|
1113 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }*
|
|
1114 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
|
|
1115 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x4_t* [[RETVAL]] to i8*
|
|
1116 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8*
|
|
1117 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 64, i1 false)
|
|
1118 // CHECK-A64: [[TMP6:%.*]] = load %struct.int64x2x4_t, %struct.int64x2x4_t* [[RETVAL]], align 16
|
|
1119 // CHECK-A64: ret %struct.int64x2x4_t [[TMP6]]
|
|
1120 // CHECK-A32: ret void
|
|
1121 int64x2x4_t test_vld1q_s64_x4(int64_t const *a) {
|
|
1122 return vld1q_s64_x4(a);
|
|
1123 }
|
|
1124
|
|
1125 // CHECK-LABEL: @test_vld1q_s8_x2(
|
|
1126 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
|
207
|
1127 // CHECK-A32: %struct.int8x16x2_t* noalias sret(%struct.int8x16x2_t) align 8 [[RETVAL:%.*]],
|
150
|
1128 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x2_t, align {{16|8}}
|
|
1129 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8*
|
|
1130 // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v16i8.p0i8(i8* %a)
|
|
1131 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
|
|
1132 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8> }* [[TMP1]]
|
|
1133 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL]] to i8*
|
|
1134 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8*
|
|
1135 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP2]], i8* align {{16|8}} [[TMP3]], {{i64|i32}} 32, i1 false)
|
|
1136 // CHECK-A64: [[TMP4:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16
|
|
1137 // CHECK-A64: ret %struct.int8x16x2_t [[TMP4]]
|
|
1138 // CHECK-A32: ret void
|
|
1139 int8x16x2_t test_vld1q_s8_x2(int8_t const *a) {
|
|
1140 return vld1q_s8_x2(a);
|
|
1141 }
|
|
1142
|
|
1143 // CHECK-LABEL: @test_vld1q_s8_x3(
|
|
1144 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int8x16x3_t, align 16
|
207
|
1145 // CHECK-A32: %struct.int8x16x3_t* noalias sret(%struct.int8x16x3_t) align 8 [[RETVAL:%.*]],
|
150
|
1146 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x3_t, align {{16|8}}
|
|
1147 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8*
|
|
1148 // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v16i8.p0i8(i8* %a)
|
|
1149 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
|
|
1150 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
|
|
1151 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x3_t* [[RETVAL]] to i8*
|
|
1152 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8*
|
|
1153 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP2]], i8* align {{16|8}} [[TMP3]], {{i64|i32}} 48, i1 false)
|
|
1154 // CHECK-A64: [[TMP4:%.*]] = load %struct.int8x16x3_t, %struct.int8x16x3_t* [[RETVAL]], align 16
|
|
1155 // CHECK-A64: ret %struct.int8x16x3_t [[TMP4]]
|
|
1156 // CHECK-A32: ret void
|
|
1157 int8x16x3_t test_vld1q_s8_x3(int8_t const *a) {
|
|
1158 return vld1q_s8_x3(a);
|
|
1159 }
|
|
1160
|
|
1161 // CHECK-LABEL: @test_vld1q_s8_x4(
|
|
1162 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int8x16x4_t, align 16
|
207
|
1163 // CHECK-A32: %struct.int8x16x4_t* noalias sret(%struct.int8x16x4_t) align 8 [[RETVAL:%.*]],
|
150
|
1164 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x4_t, align {{16|8}}
|
|
1165 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8*
|
|
1166 // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v16i8.p0i8(i8* %a)
|
|
1167 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
|
|
1168 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
|
|
1169 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x4_t* [[RETVAL]] to i8*
|
|
1170 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8*
|
|
1171 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP2]], i8* align {{16|8}} [[TMP3]], {{i64|i32}} 64, i1 false)
|
|
1172 // CHECK-A64: [[TMP4:%.*]] = load %struct.int8x16x4_t, %struct.int8x16x4_t* [[RETVAL]], align 16
|
|
1173 // CHECK-A64: ret %struct.int8x16x4_t [[TMP4]]
|
|
1174 // CHECK-A32: ret void
|
|
1175 int8x16x4_t test_vld1q_s8_x4(int8_t const *a) {
|
|
1176 return vld1q_s8_x4(a);
|
|
1177 }
|
|
1178
|
|
1179 // CHECK-LABEL: @test_vld1q_u16_x2(
|
|
1180 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
|
207
|
1181 // CHECK-A32: %struct.uint16x8x2_t* noalias sret(%struct.uint16x8x2_t) align 8 [[RETVAL:%.*]],
|
150
|
1182 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align {{16|8}}
|
|
1183 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
|
|
1184 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
|
|
1185 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
|
|
1186 // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v8i16.p0i16(i16* [[TMP2]])
|
|
1187 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
|
|
1188 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16> }* [[TMP3]]
|
|
1189 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL]] to i8*
|
|
1190 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
|
|
1191 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 32, i1 false)
|
|
1192 // CHECK-A64: [[TMP6:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16
|
|
1193 // CHECK-A64: ret %struct.uint16x8x2_t [[TMP6]]
|
|
1194 // CHECK-A32: ret void
|
|
1195 uint16x8x2_t test_vld1q_u16_x2(uint16_t const *a) {
|
|
1196 return vld1q_u16_x2(a);
|
|
1197 }
|
|
1198
|
|
1199 // CHECK-LABEL: @test_vld1q_u16_x3(
|
|
1200 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint16x8x3_t, align 16
|
207
|
1201 // CHECK-A32: %struct.uint16x8x3_t* noalias sret(%struct.uint16x8x3_t) align 8 [[RETVAL:%.*]],
|
150
|
1202 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align {{16|8}}
|
|
1203 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
|
|
1204 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
|
|
1205 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
|
|
1206 // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v8i16.p0i16(i16* [[TMP2]])
|
|
1207 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
|
|
1208 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
|
|
1209 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x3_t* [[RETVAL]] to i8*
|
|
1210 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
|
|
1211 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 48, i1 false)
|
|
1212 // CHECK-A64: [[TMP6:%.*]] = load %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[RETVAL]], align 16
|
|
1213 // CHECK-A64: ret %struct.uint16x8x3_t [[TMP6]]
|
|
1214 // CHECK-A32: ret void
|
|
1215 uint16x8x3_t test_vld1q_u16_x3(uint16_t const *a) {
|
|
1216 return vld1q_u16_x3(a);
|
|
1217 }
|
|
1218
|
|
1219 // CHECK-LABEL: @test_vld1q_u16_x4(
|
|
1220 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint16x8x4_t, align 16
|
207
|
1221 // CHECK-A32: %struct.uint16x8x4_t* noalias sret(%struct.uint16x8x4_t) align 8 [[RETVAL:%.*]],
|
150
|
1222 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align {{16|8}}
|
|
1223 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
|
|
1224 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
|
|
1225 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
|
|
1226 // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v8i16.p0i16(i16* [[TMP2]])
|
|
1227 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
|
|
1228 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
|
|
1229 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x4_t* [[RETVAL]] to i8*
|
|
1230 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
|
|
1231 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 64, i1 false)
|
|
1232 // CHECK-A64: [[TMP6:%.*]] = load %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[RETVAL]], align 16
|
|
1233 // CHECK-A64: ret %struct.uint16x8x4_t [[TMP6]]
|
|
1234 // CHECK-A32: ret void
|
|
1235 uint16x8x4_t test_vld1q_u16_x4(uint16_t const *a) {
|
|
1236 return vld1q_u16_x4(a);
|
|
1237 }
|
|
1238
|
|
1239 // CHECK-LABEL: @test_vld1q_u32_x2(
|
|
1240 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
|
207
|
1241 // CHECK-A32: %struct.uint32x4x2_t* noalias sret(%struct.uint32x4x2_t) align 8 [[RETVAL:%.*]],
|
150
|
1242 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align {{16|8}}
|
|
1243 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
|
|
1244 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
|
|
1245 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
|
|
1246 // CHECK: [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v4i32.p0i32(i32* [[TMP2]])
|
|
1247 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }*
|
|
1248 // CHECK: store { <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32> }* [[TMP3]]
|
|
1249 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL]] to i8*
|
|
1250 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
|
|
1251 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 32, i1 false)
|
|
1252 // CHECK-A64: [[TMP6:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16
|
|
1253 // CHECK-A64: ret %struct.uint32x4x2_t [[TMP6]]
|
|
1254 // CHECK-A32: ret void
|
|
1255 uint32x4x2_t test_vld1q_u32_x2(uint32_t const *a) {
|
|
1256 return vld1q_u32_x2(a);
|
|
1257 }
|
|
1258
|
|
1259 // CHECK-LABEL: @test_vld1q_u32_x3(
|
|
1260 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint32x4x3_t, align 16
|
207
|
1261 // CHECK-A32: %struct.uint32x4x3_t* noalias sret(%struct.uint32x4x3_t) align 8 [[RETVAL:%.*]],
|
150
|
1262 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align {{16|8}}
|
|
1263 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
|
|
1264 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
|
|
1265 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
|
|
1266 // CHECK: [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v4i32.p0i32(i32* [[TMP2]])
|
|
1267 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }*
|
|
1268 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
|
|
1269 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x3_t* [[RETVAL]] to i8*
|
|
1270 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
|
|
1271 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 48, i1 false)
|
|
1272 // CHECK-A64: [[TMP6:%.*]] = load %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[RETVAL]], align 16
|
|
1273 // CHECK-A64: ret %struct.uint32x4x3_t [[TMP6]]
|
|
1274 // CHECK-A32: ret void
|
|
1275 uint32x4x3_t test_vld1q_u32_x3(uint32_t const *a) {
|
|
1276 return vld1q_u32_x3(a);
|
|
1277 }
|
|
1278
|
|
1279 // CHECK-LABEL: @test_vld1q_u32_x4(
|
|
1280 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint32x4x4_t, align 16
|
207
|
1281 // CHECK-A32: %struct.uint32x4x4_t* noalias sret(%struct.uint32x4x4_t) align 8 [[RETVAL:%.*]],
|
150
|
1282 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align {{16|8}}
|
|
1283 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
|
|
1284 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
|
|
1285 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
|
|
1286 // CHECK: [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v4i32.p0i32(i32* [[TMP2]])
|
|
1287 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }*
|
|
1288 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
|
|
1289 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x4_t* [[RETVAL]] to i8*
|
|
1290 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
|
|
1291 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 64, i1 false)
|
|
1292 // CHECK-A64: [[TMP6:%.*]] = load %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[RETVAL]], align 16
|
|
1293 // CHECK-A64: ret %struct.uint32x4x4_t [[TMP6]]
|
|
1294 // CHECK-A32: ret void
|
|
1295 uint32x4x4_t test_vld1q_u32_x4(uint32_t const *a) {
|
|
1296 return vld1q_u32_x4(a);
|
|
1297 }
|
|
1298
|
|
1299 // CHECK-LABEL: @test_vld1q_u64_x2(
|
|
1300 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint64x2x2_t, align 16
|
207
|
1301 // CHECK-A32: %struct.uint64x2x2_t* noalias sret(%struct.uint64x2x2_t) align 8 [[RETVAL:%.*]],
|
150
|
1302 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x2_t, align {{16|8}}
|
|
1303 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8*
|
|
1304 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
|
|
1305 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
|
|
1306 // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v2i64.p0i64(i64* [[TMP2]])
|
|
1307 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }*
|
|
1308 // CHECK: store { <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64> }* [[TMP3]]
|
|
1309 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x2_t* [[RETVAL]] to i8*
|
|
1310 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8*
|
|
1311 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 32, i1 false)
|
|
1312 // CHECK-A64: [[TMP6:%.*]] = load %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[RETVAL]], align 16
|
|
1313 // CHECK-A64: ret %struct.uint64x2x2_t [[TMP6]]
|
|
1314 // CHECK-A32: ret void
|
|
1315 uint64x2x2_t test_vld1q_u64_x2(uint64_t const *a) {
|
|
1316 return vld1q_u64_x2(a);
|
|
1317 }
|
|
1318
|
|
1319 // CHECK-LABEL: @test_vld1q_u64_x3(
|
|
1320 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint64x2x3_t, align 16
|
207
|
1321 // CHECK-A32: %struct.uint64x2x3_t* noalias sret(%struct.uint64x2x3_t) align 8 [[RETVAL:%.*]],
|
150
|
1322 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x3_t, align {{16|8}}
|
|
1323 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8*
|
|
1324 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
|
|
1325 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
|
|
1326 // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v2i64.p0i64(i64* [[TMP2]])
|
|
1327 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }*
|
|
1328 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
|
|
1329 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x3_t* [[RETVAL]] to i8*
|
|
1330 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8*
|
|
1331 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 48, i1 false)
|
|
1332 // CHECK-A64: [[TMP6:%.*]] = load %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[RETVAL]], align 16
|
|
1333 // CHECK-A64: ret %struct.uint64x2x3_t [[TMP6]]
|
|
1334 // CHECK-A32: ret void
|
|
1335 uint64x2x3_t test_vld1q_u64_x3(uint64_t const *a) {
|
|
1336 return vld1q_u64_x3(a);
|
|
1337 }
|
|
1338
|
|
1339 // CHECK-LABEL: @test_vld1q_u64_x4(
|
|
1340 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint64x2x4_t, align 16
|
207
|
1341 // CHECK-A32: %struct.uint64x2x4_t* noalias sret(%struct.uint64x2x4_t) align 8 [[RETVAL:%.*]],
|
150
|
1342 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x4_t, align {{16|8}}
|
|
1343 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8*
|
|
1344 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
|
|
1345 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
|
|
1346 // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v2i64.p0i64(i64* [[TMP2]])
|
|
1347 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }*
|
|
1348 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
|
|
1349 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x4_t* [[RETVAL]] to i8*
|
|
1350 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8*
|
|
1351 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 64, i1 false)
|
|
1352 // CHECK-A64: [[TMP6:%.*]] = load %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[RETVAL]], align 16
|
|
1353 // CHECK-A64: ret %struct.uint64x2x4_t [[TMP6]]
|
|
1354 // CHECK-A32: ret void
|
|
1355 uint64x2x4_t test_vld1q_u64_x4(uint64_t const *a) {
|
|
1356 return vld1q_u64_x4(a);
|
|
1357 }
|
|
1358
|
|
1359 // CHECK-LABEL: @test_vld1q_u8_x2(
|
|
1360 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
|
207
|
1361 // CHECK-A32: %struct.uint8x16x2_t* noalias sret(%struct.uint8x16x2_t) align 8 [[RETVAL:%.*]],
|
150
|
1362 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align {{16|8}}
|
|
1363 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8*
|
|
1364 // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v16i8.p0i8(i8* %a)
|
|
1365 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
|
|
1366 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8> }* [[TMP1]]
|
|
1367 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL]] to i8*
|
|
1368 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8*
|
|
1369 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP2]], i8* align {{16|8}} [[TMP3]], {{i64|i32}} 32, i1 false)
|
|
1370 // CHECK-A64: [[TMP4:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16
|
|
1371 // CHECK-A64: ret %struct.uint8x16x2_t [[TMP4]]
|
|
1372 // CHECK-A32: ret void
|
|
1373 uint8x16x2_t test_vld1q_u8_x2(uint8_t const *a) {
|
|
1374 return vld1q_u8_x2(a);
|
|
1375 }
|
|
1376
|
|
1377 // CHECK-LABEL: @test_vld1q_u8_x3(
|
|
1378 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint8x16x3_t, align 16
|
207
|
1379 // CHECK-A32: %struct.uint8x16x3_t* noalias sret(%struct.uint8x16x3_t) align 8 [[RETVAL:%.*]],
|
150
|
1380 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align {{16|8}}
|
|
1381 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8*
|
|
1382 // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v16i8.p0i8(i8* %a)
|
|
1383 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
|
|
1384 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
|
|
1385 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x3_t* [[RETVAL]] to i8*
|
|
1386 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8*
|
|
1387 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP2]], i8* align {{16|8}} [[TMP3]], {{i64|i32}} 48, i1 false)
|
|
1388 // CHECK-A64: [[TMP4:%.*]] = load %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[RETVAL]], align 16
|
|
1389 // CHECK-A64: ret %struct.uint8x16x3_t [[TMP4]]
|
|
1390 // CHECK-A32: ret void
|
|
1391 uint8x16x3_t test_vld1q_u8_x3(uint8_t const *a) {
|
|
1392 return vld1q_u8_x3(a);
|
|
1393 }
|
|
1394
|
|
1395 // CHECK-LABEL: @test_vld1q_u8_x4(
|
|
1396 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint8x16x4_t, align 16
|
207
|
1397 // CHECK-A32: %struct.uint8x16x4_t* noalias sret(%struct.uint8x16x4_t) align 8 [[RETVAL:%.*]],
|
150
|
1398 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align {{16|8}}
|
|
1399 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8*
|
|
1400 // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v16i8.p0i8(i8* %a)
|
|
1401 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
|
|
1402 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
|
|
1403 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x4_t* [[RETVAL]] to i8*
|
|
1404 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8*
|
|
1405 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP2]], i8* align {{16|8}} [[TMP3]], {{i64|i32}} 64, i1 false)
|
|
1406 // CHECK-A64: [[TMP4:%.*]] = load %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[RETVAL]], align 16
|
|
1407 // CHECK-A64: ret %struct.uint8x16x4_t [[TMP4]]
|
|
1408 // CHECK-A32: ret void
|
|
1409 uint8x16x4_t test_vld1q_u8_x4(uint8_t const *a) {
|
|
1410 return vld1q_u8_x4(a);
|
|
1411 }
|
|
1412
|
|
1413 // CHECK-LABEL: @test_vld2_dup_f16(
|
|
1414 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
|
|
1415 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
|
|
1416 // CHECK: [[TMP1:%.*]] = bitcast half* %src to i8*
|
|
1417 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to half*
|
|
1418 // CHECK-A64: [[VLD2:%.*]] = call { <4 x half>, <4 x half> } @llvm.aarch64.neon.ld2r.v4f16.p0f16(half* [[TMP2]])
|
|
1419 // CHECK-A32: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2dup.v4i16.p0i8(i8* [[TMP1]], i32 2)
|
|
1420 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x [[HALF]]>, <4 x [[HALF]]> }*
|
|
1421 // CHECK: store { <4 x [[HALF]]>, <4 x [[HALF]]> } [[VLD2]], { <4 x [[HALF]]>, <4 x [[HALF]]> }* [[TMP3]]
|
|
1422 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x2_t* %dest to i8*
|
|
1423 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
|
|
1424 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 16, i1 false)
|
|
1425 // CHECK: ret void
|
|
1426 void test_vld2_dup_f16(float16x4x2_t *dest, const float16_t *src) {
|
|
1427 *dest = vld2_dup_f16(src);
|
|
1428 }
|
|
1429
|
|
1430 // CHECK-LABEL: @test_vld2_dup_f32(
|
|
1431 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8
|
|
1432 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
|
|
1433 // CHECK: [[TMP1:%.*]] = bitcast float* %src to i8*
|
|
1434 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float*
|
|
1435 // CHECK-A64: [[VLD2:%.*]] = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2r.v2f32.p0f32(float* [[TMP2]])
|
|
1436 // CHECK-A32: [[VLD2:%.*]] = call { <2 x float>, <2 x float> } @llvm.arm.neon.vld2dup.v2f32.p0i8(i8* [[TMP1]], i32 4)
|
|
1437 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float> }*
|
|
1438 // CHECK: store { <2 x float>, <2 x float> } [[VLD2]], { <2 x float>, <2 x float> }* [[TMP3]]
|
|
1439 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x2_t* %dest to i8*
|
|
1440 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
|
|
1441 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 16, i1 false)
|
|
1442 // CHECK: ret void
|
|
1443 void test_vld2_dup_f32(float32x2x2_t *dest, const float32_t *src) {
|
|
1444 *dest = vld2_dup_f32(src);
|
|
1445 }
|
|
1446
|
|
1447 // CHECK-LABEL: @test_vld2_dup_p16(
|
|
1448 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8
|
|
1449 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
|
|
1450 // CHECK: [[TMP1:%.*]] = bitcast i16* %src to i8*
|
|
1451 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
|
|
1452 // CHECK-A64: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* [[TMP2]])
|
|
1453 // CHECK-A32: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2dup.v4i16.p0i8(i8* [[TMP1]], i32 2)
|
|
1454 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
|
|
1455 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
|
|
1456 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x2_t* %dest to i8*
|
|
1457 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
|
|
1458 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 16, i1 false)
|
|
1459 // CHECK: ret void
|
|
1460 void test_vld2_dup_p16(poly16x4x2_t *dest, const poly16_t *src) {
|
|
1461 *dest = vld2_dup_p16(src);
|
|
1462 }
|
|
1463
|
|
1464 // CHECK-LABEL: @test_vld2_dup_p8(
|
|
1465 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8
|
|
1466 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
|
|
1467 // CHECK-A64: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8* %src)
|
|
1468 // CHECK-A32: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2dup.v8i8.p0i8(i8* %src, i32 1)
|
|
1469 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
|
|
1470 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP1]]
|
|
1471 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* %dest to i8*
|
|
1472 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
|
|
1473 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], {{i64|i32}} 16, i1 false)
|
|
1474 // CHECK: ret void
|
|
1475 void test_vld2_dup_p8(poly8x8x2_t *dest, poly8_t *src) {
|
|
1476 *dest = vld2_dup_p8(src);
|
|
1477 }
|
|
1478
|
|
1479 // CHECK-LABEL: @test_vld2_dup_s16(
|
|
1480 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8
|
|
1481 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
|
|
1482 // CHECK: [[TMP1:%.*]] = bitcast i16* %src to i8*
|
|
1483 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
|
|
1484 // CHECK-A64: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* [[TMP2]])
|
|
1485 // CHECK-A32: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2dup.v4i16.p0i8(i8* [[TMP1]], i32 2)
|
|
1486 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
|
|
1487 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
|
|
1488 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x2_t* %dest to i8*
|
|
1489 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
|
|
1490 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 16, i1 false)
|
|
1491 // CHECK: ret void
|
|
1492 void test_vld2_dup_s16(int16x4x2_t *dest, const int16_t *src) {
|
|
1493 *dest = vld2_dup_s16(src);
|
|
1494 }
|
|
1495
|
|
1496 // CHECK-LABEL: @test_vld2_dup_s32(
|
|
1497 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8
|
|
1498 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
|
|
1499 // CHECK: [[TMP1:%.*]] = bitcast i32* %src to i8*
|
|
1500 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
|
|
1501 // CHECK-A64: [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32* [[TMP2]])
|
|
1502 // CHECK-A32: [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2dup.v2i32.p0i8(i8* [[TMP1]], i32 4)
|
|
1503 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }*
|
|
1504 // CHECK: store { <2 x i32>, <2 x i32> } [[VLD2]], { <2 x i32>, <2 x i32> }* [[TMP3]]
|
|
1505 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x2_t* %dest to i8*
|
|
1506 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
|
|
1507 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 16, i1 false)
|
|
1508 // CHECK: ret void
|
|
1509 void test_vld2_dup_s32(int32x2x2_t *dest, const int32_t *src) {
|
|
1510 *dest = vld2_dup_s32(src);
|
|
1511 }
|
|
1512
|
|
1513 // CHECK-LABEL: @test_vld2_dup_s8(
|
|
1514 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8
|
|
1515 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
|
|
1516 // CHECK-A64: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8* %src)
|
|
1517 // CHECK-A32: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2dup.v8i8.p0i8(i8* %src, i32 1)
|
|
1518 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
|
|
1519 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP1]]
|
|
1520 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* %dest to i8*
|
|
1521 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
|
|
1522 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], {{i64|i32}} 16, i1 false)
|
|
1523 // CHECK: ret void
|
|
1524 void test_vld2_dup_s8(int8x8x2_t *dest, int8_t *src) {
|
|
1525 *dest = vld2_dup_s8(src);
|
|
1526 }
|
|
1527
|
|
1528 // CHECK-LABEL: @test_vld2_dup_u16(
|
|
1529 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8
|
|
1530 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
|
|
1531 // CHECK: [[TMP1:%.*]] = bitcast i16* %src to i8*
|
|
1532 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
|
|
1533 // CHECK-A64: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* [[TMP2]])
|
|
1534 // CHECK-A32: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2dup.v4i16.p0i8(i8* [[TMP1]], i32 2)
|
|
1535 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
|
|
1536 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
|
|
1537 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x2_t* %dest to i8*
|
|
1538 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
|
|
1539 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 16, i1 false)
|
|
1540 // CHECK: ret void
|
|
1541 void test_vld2_dup_u16(uint16x4x2_t *dest, const uint16_t *src) {
|
|
1542 *dest = vld2_dup_u16(src);
|
|
1543 }
|
|
1544
|
|
1545 // CHECK-LABEL: @test_vld2_dup_u32(
|
|
1546 // CHECK: entry:
|
|
1547 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8
|
|
1548 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
|
|
1549 // CHECK: [[TMP1:%.*]] = bitcast i32* %src to i8*
|
|
1550 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
|
|
1551 // CHECK-A64: [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32* [[TMP2]])
|
|
1552 // CHECK-A32: [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2dup.v2i32.p0i8(i8* [[TMP1]], i32 4)
|
|
1553 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }*
|
|
1554 // CHECK: store { <2 x i32>, <2 x i32> } [[VLD2]], { <2 x i32>, <2 x i32> }* [[TMP3]]
|
|
1555 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x2_t* %dest to i8*
|
|
1556 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
|
|
1557 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 16, i1 false)
|
|
1558 // CHECK: ret void
|
|
1559 void test_vld2_dup_u32(uint32x2x2_t *dest, const uint32_t *src) {
|
|
1560 *dest = vld2_dup_u32(src);
|
|
1561 }
|
|
1562
|
|
1563 // CHECK-LABEL: @test_vld2_dup_s64(
|
|
1564 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8
|
|
1565 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
|
|
1566 // CHECK: [[TMP1:%.*]] = bitcast i64* %src to i8*
|
|
1567 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
|
|
1568 // CHECK-A64: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64* [[TMP2]])
|
|
1569 // CHECK-A32: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2dup.v1i64.p0i8(i8* [[TMP1]], i32 8)
|
|
1570 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
|
|
1571 // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]]
|
|
1572 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x2_t* %dest to i8*
|
|
1573 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
|
|
1574 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 16, i1 false)
|
|
1575 // CHECK: ret void
|
|
1576 void test_vld2_dup_s64(int64x1x2_t *dest, const int64_t *src) {
|
|
1577 *dest = vld2_dup_s64(src);
|
|
1578 }
|
|
1579
|
|
1580 // CHECK-LABEL: @test_vld2_dup_u64(
|
|
1581 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8
|
|
1582 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
|
|
1583 // CHECK: [[TMP1:%.*]] = bitcast i64* %src to i8*
|
|
1584 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
|
|
1585 // CHECK-A64: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64* [[TMP2]])
|
|
1586 // CHECK-A32: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2dup.v1i64.p0i8(i8* [[TMP1]], i32 8)
|
|
1587 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
|
|
1588 // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]]
|
|
1589 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x2_t* %dest to i8*
|
|
1590 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
|
|
1591 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 16, i1 false)
|
|
1592 // CHECK: ret void
|
|
1593 void test_vld2_dup_u64(uint64x1x2_t *dest, const uint64_t *src) {
|
|
1594 *dest = vld2_dup_u64(src);
|
|
1595 }
|
|
1596
|
|
1597 // CHECK-LABEL: @test_vld2_dup_u8(
|
|
1598 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8
|
|
1599 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
|
|
1600 // CHECK-A64: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8* %src)
|
|
1601 // CHECK-A32: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2dup.v8i8.p0i8(i8* %src, i32 1)
|
|
1602 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
|
|
1603 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP1]]
|
|
1604 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* %dest to i8*
|
|
1605 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
|
|
1606 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], {{i64|i32}} 16, i1 false)
|
|
1607 // CHECK: ret void
|
|
1608 void test_vld2_dup_u8(uint8x8x2_t *dest, const uint8_t *src) {
|
|
1609 *dest = vld2_dup_u8(src);
|
|
1610 }
|
|
1611
|
|
1612 // CHECK-LABEL: @test_vld3_dup_f16(
|
|
1613 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
|
|
1614 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
|
|
1615 // CHECK: [[TMP1:%.*]] = bitcast half* %src to i8*
|
|
1616 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to half*
|
|
1617 // CHECK-A64: [[VLD3:%.*]] = call { <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld3r.v4f16.p0f16(half* [[TMP2]])
|
|
1618 // CHECK-A32: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3dup.v4i16.p0i8(i8* [[TMP1]], i32 2)
|
|
1619 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]> }*
|
|
1620 // CHECK: store { <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]> } [[VLD3]], { <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]> }* [[TMP3]]
|
|
1621 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x3_t* %dest to i8*
|
|
1622 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
|
|
1623 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 24, i1 false)
|
|
1624 // CHECK: ret void
|
|
1625 void test_vld3_dup_f16(float16x4x3_t *dest, float16_t *src) {
|
|
1626 *dest = vld3_dup_f16(src);
|
|
1627 }
|
|
1628
|
|
1629 // CHECK-LABEL: @test_vld3_dup_f32(
|
|
1630 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8
|
|
1631 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
|
|
1632 // CHECK: [[TMP1:%.*]] = bitcast float* %src to i8*
|
|
1633 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float*
|
|
1634 // CHECK-A64: [[VLD3:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3r.v2f32.p0f32(float* [[TMP2]])
|
|
1635 // CHECK-A32: [[VLD3:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3dup.v2f32.p0i8(i8* [[TMP1]], i32 4)
|
|
1636 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float> }*
|
|
1637 // CHECK: store { <2 x float>, <2 x float>, <2 x float> } [[VLD3]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP3]]
|
|
1638 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x3_t* %dest to i8*
|
|
1639 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
|
|
1640 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 24, i1 false)
|
|
1641 // CHECK: ret void
|
|
1642 void test_vld3_dup_f32(float32x2x3_t *dest, const float32_t *src) {
|
|
1643 *dest = vld3_dup_f32(src);
|
|
1644 }
|
|
1645
|
|
1646 // CHECK-LABEL: @test_vld3_dup_p16(
|
|
1647 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8
|
|
1648 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
|
|
1649 // CHECK: [[TMP1:%.*]] = bitcast i16* %src to i8*
|
|
1650 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
|
|
1651 // CHECK-A64: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* [[TMP2]])
|
|
1652 // CHECK-A32: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3dup.v4i16.p0i8(i8* [[TMP1]], i32 2)
|
|
1653 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
|
|
1654 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
|
|
1655 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x3_t* %dest to i8*
|
|
1656 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
|
|
1657 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 24, i1 false)
|
|
1658 // CHECK: ret void
|
|
1659 void test_vld3_dup_p16(poly16x4x3_t *dest, const poly16_t *src) {
|
|
1660 *dest = vld3_dup_p16(src);
|
|
1661 }
|
|
1662
|
|
1663 // CHECK-LABEL: @test_vld3_dup_p8(
|
|
1664 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8
|
|
1665 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
|
|
1666 // CHECK-A64: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8* %src)
|
|
1667 // CHECK-A32: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3dup.v8i8.p0i8(i8* %src, i32 1)
|
|
1668 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
|
|
1669 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
|
|
1670 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* %dest to i8*
|
|
1671 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
|
|
1672 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], {{i64|i32}} 24, i1 false)
|
|
1673 // CHECK: ret void
|
|
1674 void test_vld3_dup_p8(poly8x8x3_t *dest, const poly8_t *src) {
|
|
1675 *dest = vld3_dup_p8(src);
|
|
1676 }
|
|
1677
|
|
1678 // CHECK-LABEL: @test_vld3_dup_s16(
|
|
1679 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8
|
|
1680 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
|
|
1681 // CHECK: [[TMP1:%.*]] = bitcast i16* %src to i8*
|
|
1682 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
|
|
1683 // CHECK-A64: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* [[TMP2]])
|
|
1684 // CHECK-A32: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3dup.v4i16.p0i8(i8* [[TMP1]], i32 2)
|
|
1685 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
|
|
1686 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
|
|
1687 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x3_t* %dest to i8*
|
|
1688 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
|
|
1689 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 24, i1 false)
|
|
1690 // CHECK: ret void
|
|
1691 void test_vld3_dup_s16(int16x4x3_t *dest, const int16_t *src) {
|
|
1692 *dest = vld3_dup_s16(src);
|
|
1693 }
|
|
1694
|
|
1695 // CHECK-LABEL: @test_vld3_dup_s32(
|
|
1696 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8
|
|
1697 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
|
|
1698 // CHECK: [[TMP1:%.*]] = bitcast i32* %src to i8*
|
|
1699 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
|
|
1700 // CHECK-A64: [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32* [[TMP2]])
|
|
1701 // CHECK-A32: [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3dup.v2i32.p0i8(i8* [[TMP1]], i32 4)
|
|
1702 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }*
|
|
1703 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
|
|
1704 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x3_t* %dest to i8*
|
|
1705 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
|
|
1706 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 24, i1 false)
|
|
1707 // CHECK: ret void
|
|
1708 void test_vld3_dup_s32(int32x2x3_t *dest, const int32_t *src) {
|
|
1709 *dest = vld3_dup_s32(src);
|
|
1710 }
|
|
1711
|
|
1712 // CHECK-LABEL: @test_vld3_dup_s8(
|
|
1713 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8
|
|
1714 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
|
|
1715 // CHECK-A64: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8* %src)
|
|
1716 // CHECK-A32: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3dup.v8i8.p0i8(i8* %src, i32 1)
|
|
1717 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
|
|
1718 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
|
|
1719 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* %dest to i8*
|
|
1720 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
|
|
1721 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], {{i64|i32}} 24, i1 false)
|
|
1722 // CHECK: ret void
|
|
1723 void test_vld3_dup_s8(int8x8x3_t *dest, const int8_t *src) {
|
|
1724 *dest = vld3_dup_s8(src);
|
|
1725 }
|
|
1726
|
|
1727 // CHECK-LABEL: @test_vld3_dup_u16(
|
|
1728 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8
|
|
1729 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
|
|
1730 // CHECK: [[TMP1:%.*]] = bitcast i16* %src to i8*
|
|
1731 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
|
|
1732 // CHECK-A64: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* [[TMP2]])
|
|
1733 // CHECK-A32: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3dup.v4i16.p0i8(i8* [[TMP1]], i32 2)
|
|
1734 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
|
|
1735 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
|
|
1736 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x3_t* %dest to i8*
|
|
1737 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
|
|
1738 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 24, i1 false)
|
|
1739 // CHECK: ret void
|
|
1740 void test_vld3_dup_u16(uint16x4x3_t *dest, const uint16_t *src) {
|
|
1741 *dest = vld3_dup_u16(src);
|
|
1742 }
|
|
1743
|
|
1744 // CHECK-LABEL: @test_vld3_dup_u32(
|
|
1745 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8
|
|
1746 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
|
|
1747 // CHECK: [[TMP1:%.*]] = bitcast i32* %src to i8*
|
|
1748 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
|
|
1749 // CHECK-A64: [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32* [[TMP2]])
|
|
1750 // CHECK-A32: [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3dup.v2i32.p0i8(i8* [[TMP1]], i32 4)
|
|
1751 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }*
|
|
1752 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
|
|
1753 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x3_t* %dest to i8*
|
|
1754 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
|
|
1755 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 24, i1 false)
|
|
1756 // CHECK: ret void
|
|
1757 void test_vld3_dup_u32(uint32x2x3_t *dest, const uint32_t *src) {
|
|
1758 *dest = vld3_dup_u32(src);
|
|
1759 }
|
|
1760
|
|
1761 // CHECK-LABEL: @test_vld3_dup_u8(
|
|
1762 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8
|
|
1763 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
|
|
1764 // CHECK-A64: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8* %src)
|
|
1765 // CHECK-A32: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3dup.v8i8.p0i8(i8* %src, i32 1)
|
|
1766 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
|
|
1767 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
|
|
1768 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* %dest to i8*
|
|
1769 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
|
|
1770 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], {{i64|i32}} 24, i1 false)
|
|
1771 // CHECK: ret void
|
|
1772 void test_vld3_dup_u8(uint8x8x3_t *dest, const uint8_t *src) {
|
|
1773 *dest = vld3_dup_u8(src);
|
|
1774 }
|
|
1775
|
|
1776 // CHECK-LABEL: @test_vld3_dup_s64(
|
|
1777 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8
|
|
1778 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
|
|
1779 // CHECK: [[TMP1:%.*]] = bitcast i64* %src to i8*
|
|
1780 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
|
|
1781 // CHECK-A64: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64* [[TMP2]])
|
|
1782 // CHECK-A32: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3dup.v1i64.p0i8(i8* [[TMP1]], i32 8)
|
|
1783 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
|
|
1784 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
|
|
1785 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x3_t* %dest to i8*
|
|
1786 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
|
|
1787 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 24, i1 false)
|
|
1788 // CHECK: ret void
|
|
1789 void test_vld3_dup_s64(int64x1x3_t *dest, const int64_t *src) {
|
|
1790 *dest = vld3_dup_s64(src);
|
|
1791 }
|
|
1792
|
|
1793 // CHECK-LABEL: @test_vld3_dup_u64(
|
|
1794 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8
|
|
1795 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
|
|
1796 // CHECK: [[TMP1:%.*]] = bitcast i64* %src to i8*
|
|
1797 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
|
|
1798 // CHECK-A64: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64* [[TMP2]])
|
|
1799 // CHECK-A32: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3dup.v1i64.p0i8(i8* [[TMP1]], i32 8)
|
|
1800 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
|
|
1801 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
|
|
1802 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x3_t* %dest to i8*
|
|
1803 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
|
|
1804 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 24, i1 false)
|
|
1805 // CHECK: ret void
|
|
1806 void test_vld3_dup_u64(uint64x1x3_t *dest, const uint64_t *src) {
|
|
1807 *dest = vld3_dup_u64(src);
|
|
1808 }
|
|
1809
|
|
1810 // CHECK-LABEL: @test_vld4_dup_f16(
|
|
1811 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
|
|
1812 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
|
|
1813 // CHECK: [[TMP1:%.*]] = bitcast half* %src to i8*
|
|
1814 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to half*
|
|
1815 // CHECK-A64: [[VLD4:%.*]] = call { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld4r.v4f16.p0f16(half* [[TMP2]])
|
|
1816 // CHECK-A32: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4dup.v4i16.p0i8(i8* [[TMP1]], i32 2)
|
|
1817 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]> }*
|
|
1818 // CHECK: store { <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]> } [[VLD4]], { <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]> }* [[TMP3]]
|
|
1819 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x4_t* %dest to i8*
|
|
1820 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
|
|
1821 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 32, i1 false)
|
|
1822 // CHECK: ret void
|
|
1823 void test_vld4_dup_f16(float16x4x4_t *dest, const float16_t *src) {
|
|
1824 *dest = vld4_dup_f16(src);
|
|
1825 }
|
|
1826
|
|
1827 // CHECK-LABEL: @test_vld4_dup_f32(
|
|
1828 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8
|
|
1829 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
|
|
1830 // CHECK: [[TMP1:%.*]] = bitcast float* %src to i8*
|
|
1831 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float*
|
|
1832 // CHECK-A64: [[VLD4:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4r.v2f32.p0f32(float* [[TMP2]])
|
|
1833 // CHECK-A32: [[VLD4:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4dup.v2f32.p0i8(i8* [[TMP1]], i32 4)
|
|
1834 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float>, <2 x float> }*
|
|
1835 // CHECK: store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP3]]
|
|
1836 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x4_t* %dest to i8*
|
|
1837 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
|
|
1838 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 32, i1 false)
|
|
1839 // CHECK: ret void
|
|
1840 void test_vld4_dup_f32(float32x2x4_t *dest, const float32_t *src) {
|
|
1841 *dest = vld4_dup_f32(src);
|
|
1842 }
|
|
1843
|
|
1844 // CHECK-LABEL: @test_vld4_dup_p16(
|
|
1845 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8
|
|
1846 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
|
|
1847 // CHECK: [[TMP1:%.*]] = bitcast i16* %src to i8*
|
|
1848 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
|
|
1849 // CHECK-A64: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* [[TMP2]])
|
|
1850 // CHECK-A32: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4dup.v4i16.p0i8(i8* [[TMP1]], i32 2)
|
|
1851 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
|
|
1852 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
|
|
1853 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x4_t* %dest to i8*
|
|
1854 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
|
|
1855 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 32, i1 false)
|
|
1856 // CHECK: ret void
|
|
1857 void test_vld4_dup_p16(poly16x4x4_t *dest, const poly16_t *src) {
|
|
1858 *dest = vld4_dup_p16(src);
|
|
1859 }
|
|
1860
|
|
1861 // CHECK-LABEL: @test_vld4_dup_p8(
|
|
1862 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8
|
|
1863 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
|
|
1864 // CHECK-A64: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8* %src)
|
|
1865 // CHECK-A32: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4dup.v8i8.p0i8(i8* %src, i32 1)
|
|
1866 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
|
|
1867 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
|
|
1868 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* %dest to i8*
|
|
1869 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
|
|
1870 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], {{i64|i32}} 32, i1 false)
|
|
1871 // CHECK: ret void
|
|
1872 void test_vld4_dup_p8(poly8x8x4_t *dest, const poly8_t *src) {
|
|
1873 *dest = vld4_dup_p8(src);
|
|
1874 }
|
|
1875
|
|
1876 // CHECK-LABEL: @test_vld4_dup_s16(
|
|
1877 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8
|
|
1878 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
|
|
1879 // CHECK: [[TMP1:%.*]] = bitcast i16* %src to i8*
|
|
1880 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
|
|
1881 // CHECK-A64: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* [[TMP2]])
|
|
1882 // CHECK-A32: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4dup.v4i16.p0i8(i8* [[TMP1]], i32 2)
|
|
1883 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
|
|
1884 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
|
|
1885 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x4_t* %dest to i8*
|
|
1886 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
|
|
1887 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 32, i1 false)
|
|
1888 // CHECK: ret void
|
|
1889 void test_vld4_dup_s16(int16x4x4_t *dest, const int16_t *src) {
|
|
1890 *dest = vld4_dup_s16(src);
|
|
1891 }
|
|
1892
|
|
1893 // CHECK-LABEL: @test_vld4_dup_s32(
|
|
1894 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8
|
|
1895 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
|
|
1896 // CHECK: [[TMP1:%.*]] = bitcast i32* %src to i8*
|
|
1897 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
|
|
1898 // CHECK-A64: [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32* [[TMP2]])
|
|
1899 // CHECK-A32: [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4dup.v2i32.p0i8(i8* [[TMP1]], i32 4)
|
|
1900 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }*
|
|
1901 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
|
|
1902 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x4_t* %dest to i8*
|
|
1903 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
|
|
1904 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 32, i1 false)
|
|
1905 // CHECK: ret void
|
|
1906 void test_vld4_dup_s32(int32x2x4_t *dest, const int32_t *src) {
|
|
1907 *dest = vld4_dup_s32(src);
|
|
1908 }
|
|
1909
|
|
1910 // CHECK-LABEL: @test_vld4_dup_s8(
|
|
1911 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8
|
|
1912 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
|
|
1913 // CHECK-A64: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8* %src)
|
|
1914 // CHECK-A32: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4dup.v8i8.p0i8(i8* %src, i32 1)
|
|
1915 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
|
|
1916 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
|
|
1917 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* %dest to i8*
|
|
1918 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
|
|
1919 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], {{i64|i32}} 32, i1 false)
|
|
1920 // CHECK: ret void
|
|
1921 void test_vld4_dup_s8(int8x8x4_t *dest, const int8_t *src) {
|
|
1922 *dest = vld4_dup_s8(src);
|
|
1923 }
|
|
1924
|
|
1925 // CHECK-LABEL: @test_vld4_dup_u16(
|
|
1926 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8
|
|
1927 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
|
|
1928 // CHECK: [[TMP1:%.*]] = bitcast i16* %src to i8*
|
|
1929 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
|
|
1930 // CHECK-A64: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* [[TMP2]])
|
|
1931 // CHECK-A32: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4dup.v4i16.p0i8(i8* [[TMP1]], i32 2)
|
|
1932 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
|
|
1933 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
|
|
1934 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x4_t* %dest to i8*
|
|
1935 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
|
|
1936 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 32, i1 false)
|
|
1937 // CHECK: ret void
|
|
1938 void test_vld4_dup_u16(uint16x4x4_t *dest, const uint16_t *src) {
|
|
1939 *dest = vld4_dup_u16(src);
|
|
1940 }
|
|
1941
|
|
1942 // CHECK-LABEL: @test_vld4_dup_u32(
|
|
1943 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8
|
|
1944 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
|
|
1945 // CHECK: [[TMP1:%.*]] = bitcast i32* %src to i8*
|
|
1946 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
|
|
1947 // CHECK-A64: [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32* [[TMP2]])
|
|
1948 // CHECK-A32: [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4dup.v2i32.p0i8(i8* [[TMP1]], i32 4)
|
|
1949 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }*
|
|
1950 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
|
|
1951 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x4_t* %dest to i8*
|
|
1952 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
|
|
1953 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 32, i1 false)
|
|
1954 // CHECK: ret void
|
|
1955 void test_vld4_dup_u32(uint32x2x4_t *dest, const uint32_t *src) {
|
|
1956 *dest = vld4_dup_u32(src);
|
|
1957 }
|
|
1958
|
|
1959 // CHECK-LABEL: @test_vld4_dup_u8(
|
|
1960 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8
|
|
1961 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
|
|
1962 // CHECK-A64: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8* %src)
|
|
1963 // CHECK-A32: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4dup.v8i8.p0i8(i8* %src, i32 1)
|
|
1964 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
|
|
1965 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
|
|
1966 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* %dest to i8*
|
|
1967 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
|
|
1968 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], {{i64|i32}} 32, i1 false)
|
|
1969 // CHECK: ret void
|
|
1970 void test_vld4_dup_u8(uint8x8x4_t *dest, const uint8_t *src) {
|
|
1971 *dest = vld4_dup_u8(src);
|
|
1972 }
|
|
1973
|
|
1974 // CHECK-LABEL: @test_vld4_dup_s64(
|
|
1975 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8
|
|
1976 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8*
|
|
1977 // CHECK: [[TMP1:%.*]] = bitcast i64* %src to i8*
|
|
1978 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
|
|
1979 // CHECK-A64: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64* [[TMP2]])
|
|
1980 // CHECK-A32: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4dup.v1i64.p0i8(i8* [[TMP1]], i32 8)
|
|
1981 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
|
|
1982 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
|
|
1983 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x4_t* %dest to i8*
|
|
1984 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8*
|
|
1985 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 32, i1 false)
|
|
1986 // CHECK: ret void
|
|
1987 void test_vld4_dup_s64(int64x1x4_t *dest, const int64_t *src) {
|
|
1988 *dest = vld4_dup_s64(src);
|
|
1989 }
|
|
1990
|
|
1991 // CHECK-LABEL: @test_vld4_dup_u64(
|
|
1992 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8
|
|
1993 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8*
|
|
1994 // CHECK: [[TMP1:%.*]] = bitcast i64* %src to i8*
|
|
1995 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
|
|
1996 // CHECK-A64: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64* [[TMP2]])
|
|
1997 // CHECK-A32: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4dup.v1i64.p0i8(i8* [[TMP1]], i32 8)
|
|
1998 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
|
|
1999 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
|
|
2000 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x4_t* %dest to i8*
|
|
2001 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8*
|
|
2002 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 32, i1 false)
|
|
2003 // CHECK: ret void
|
|
2004 void test_vld4_dup_u64(uint64x1x4_t *dest, const uint64_t *src) {
|
|
2005 *dest = vld4_dup_u64(src);
|
|
2006 }
|
|
2007
|
|
2008 // CHECK-LABEL: @test_vld2q_dup_f16(
|
|
2009 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align {{16|8}}
|
|
2010 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
|
|
2011 // CHECK: [[TMP1:%.*]] = bitcast half* %src to i8*
|
|
2012 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to half*
|
|
2013 // CHECK-A64: [[VLD2:%.*]] = call { <8 x half>, <8 x half> } @llvm.aarch64.neon.ld2r.v8f16.p0f16(half* [[TMP2]])
|
|
2014 // CHECK-A32: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2dup.v8i16.p0i8(i8* [[TMP1]], i32 2)
|
|
2015 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x [[HALF]]>, <8 x [[HALF]]> }*
|
|
2016 // CHECK: store { <8 x [[HALF]]>, <8 x [[HALF]]> } [[VLD2]], { <8 x [[HALF]]>, <8 x [[HALF]]> }* [[TMP3]]
|
|
2017 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x2_t* %dest to i8*
|
|
2018 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
|
|
2019 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 32, i1 false)
|
|
2020 // CHECK: ret void
|
|
2021 void test_vld2q_dup_f16(float16x8x2_t *dest, const float16_t *src) {
|
|
2022 *dest = vld2q_dup_f16(src);
|
|
2023 }
|
|
2024
|
|
2025 // CHECK-LABEL: @test_vld2q_dup_f32(
|
|
2026 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align {{16|8}}
|
|
2027 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
|
|
2028 // CHECK: [[TMP1:%.*]] = bitcast float* %src to i8*
|
|
2029 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float*
|
|
2030 // CHECK-A64: [[VLD2:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2r.v4f32.p0f32(float* [[TMP2]])
|
|
2031 // CHECK-A32: [[VLD2:%.*]] = call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2dup.v4f32.p0i8(i8* [[TMP1]], i32 4)
|
|
2032 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float> }*
|
|
2033 // CHECK: store { <4 x float>, <4 x float> } [[VLD2]], { <4 x float>, <4 x float> }* [[TMP3]]
|
|
2034 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x2_t* %dest to i8*
|
|
2035 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
|
|
2036 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 32, i1 false)
|
|
2037 // CHECK: ret void
|
|
2038 void test_vld2q_dup_f32(float32x4x2_t *dest, const float32_t *src) {
|
|
2039 *dest = vld2q_dup_f32(src);
|
|
2040 }
|
|
2041
|
|
2042 // CHECK-LABEL: @test_vld2q_dup_p16(
|
|
2043 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align {{16|8}}
|
|
2044 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
|
|
2045 // CHECK: [[TMP1:%.*]] = bitcast i16* %src to i8*
|
|
2046 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
|
|
2047 // CHECK-A64: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16* [[TMP2]])
|
|
2048 // CHECK-A32: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2dup.v8i16.p0i8(i8* [[TMP1]], i32 2)
|
|
2049 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
|
|
2050 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
|
|
2051 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x2_t* %dest to i8*
|
|
2052 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
|
|
2053 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 32, i1 false)
|
|
2054 // CHECK: ret void
|
|
2055 void test_vld2q_dup_p16(poly16x8x2_t *dest, const poly16_t *src) {
|
|
2056 *dest = vld2q_dup_p16(src);
|
|
2057 }
|
|
2058
|
|
2059 // CHECK-LABEL: @test_vld2q_dup_p8(
|
|
2060 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align {{16|8}}
|
|
2061 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8*
|
|
2062 // CHECK-A64: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8* %src)
|
|
2063 // CHECK-A32: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2dup.v16i8.p0i8(i8* %src, i32 1)
|
|
2064 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
|
|
2065 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP1]]
|
|
2066 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x2_t* %dest to i8*
|
|
2067 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8*
|
|
2068 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP2]], i8* align {{16|8}} [[TMP3]], {{i64|i32}} 32, i1 false)
|
|
2069 // CHECK: ret void
|
|
2070 void test_vld2q_dup_p8(poly8x16x2_t *dest, const poly8_t *src) {
|
|
2071 *dest = vld2q_dup_p8(src);
|
|
2072 }
|
|
2073
|
|
2074 // CHECK-LABEL: @test_vld2q_dup_s16(
|
|
2075 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align {{16|8}}
|
|
2076 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
|
|
2077 // CHECK: [[TMP1:%.*]] = bitcast i16* %src to i8*
|
|
2078 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
|
|
2079 // CHECK-A64: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16* [[TMP2]])
|
|
2080 // CHECK-A32: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2dup.v8i16.p0i8(i8* [[TMP1]], i32 2)
|
|
2081 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
|
|
2082 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
|
|
2083 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x2_t* %dest to i8*
|
|
2084 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
|
|
2085 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 32, i1 false)
|
|
2086 // CHECK: ret void
|
|
2087 void test_vld2q_dup_s16(int16x8x2_t *dest, const int16_t *src) {
|
|
2088 *dest = vld2q_dup_s16(src);
|
|
2089 }
|
|
2090
|
|
2091 // CHECK-LABEL: @test_vld2q_dup_s32(
|
|
2092 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align {{16|8}}
|
|
2093 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
|
|
2094 // CHECK: [[TMP1:%.*]] = bitcast i32* %src to i8*
|
|
2095 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
|
|
2096 // CHECK-A64: [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32* [[TMP2]])
|
|
2097 // CHECK-A32: [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2dup.v4i32.p0i8(i8* [[TMP1]], i32 4)
|
|
2098 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }*
|
|
2099 // CHECK: store { <4 x i32>, <4 x i32> } [[VLD2]], { <4 x i32>, <4 x i32> }* [[TMP3]]
|
|
2100 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x2_t* %dest to i8*
|
|
2101 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
|
|
2102 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 32, i1 false)
|
|
2103 // CHECK: ret void
|
|
2104 void test_vld2q_dup_s32(int32x4x2_t *dest, const int32_t *src) {
|
|
2105 *dest = vld2q_dup_s32(src);
|
|
2106 }
|
|
2107
|
|
2108 // CHECK-LABEL: @test_vld2q_dup_s8(
|
|
2109 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x2_t, align {{16|8}}
|
|
2110 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8*
|
|
2111 // CHECK-A64: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8* %src)
|
|
2112 // CHECK-A32: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2dup.v16i8.p0i8(i8* %src, i32 1)
|
|
2113 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
|
|
2114 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP1]]
|
|
2115 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x2_t* %dest to i8*
|
|
2116 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8*
|
|
2117 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP2]], i8* align {{16|8}} [[TMP3]], {{i64|i32}} 32, i1 false)
|
|
2118 // CHECK: ret void
|
|
2119 void test_vld2q_dup_s8(int8x16x2_t *dest, const int8_t *src) {
|
|
2120 *dest = vld2q_dup_s8(src);
|
|
2121 }
|
|
2122
|
|
2123 // CHECK-LABEL: @test_vld2q_dup_u16(
|
|
2124 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align {{16|8}}
|
|
2125 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
|
|
2126 // CHECK: [[TMP1:%.*]] = bitcast i16* %src to i8*
|
|
2127 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
|
|
2128 // CHECK-A64: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16* [[TMP2]])
|
|
2129 // CHECK-A32: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2dup.v8i16.p0i8(i8* [[TMP1]], i32 2)
|
|
2130 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
|
|
2131 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
|
|
2132 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x2_t* %dest to i8*
|
|
2133 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
|
|
2134 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 32, i1 false)
|
|
2135 // CHECK: ret void
|
|
2136 void test_vld2q_dup_u16(uint16x8x2_t *dest, const uint16_t *src) {
|
|
2137 *dest = vld2q_dup_u16(src);
|
|
2138 }
|
|
2139
|
|
2140 // CHECK-LABEL: @test_vld2q_dup_u32(
|
|
2141 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align {{16|8}}
|
|
2142 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
|
|
2143 // CHECK: [[TMP1:%.*]] = bitcast i32* %src to i8*
|
|
2144 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
|
|
2145 // CHECK-A64: [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32* [[TMP2]])
|
|
2146 // CHECK-A32: [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2dup.v4i32.p0i8(i8* [[TMP1]], i32 4)
|
|
2147 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }*
|
|
2148 // CHECK: store { <4 x i32>, <4 x i32> } [[VLD2]], { <4 x i32>, <4 x i32> }* [[TMP3]]
|
|
2149 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x2_t* %dest to i8*
|
|
2150 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
|
|
2151 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 32, i1 false)
|
|
2152 // CHECK: ret void
|
|
2153 void test_vld2q_dup_u32(uint32x4x2_t *dest, const uint32_t *src) {
|
|
2154 *dest = vld2q_dup_u32(src);
|
|
2155 }
|
|
2156
|
|
2157 // CHECK-LABEL: @test_vld2q_dup_u8(
|
|
2158 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align {{16|8}}
|
|
2159 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8*
|
|
2160 // CHECK-A64: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8* %src)
|
|
2161 // CHECK-A32: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2dup.v16i8.p0i8(i8* %src, i32 1)
|
|
2162 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
|
|
2163 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP1]]
|
|
2164 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x2_t* %dest to i8*
|
|
2165 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8*
|
|
2166 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP2]], i8* align {{16|8}} [[TMP3]], {{i64|i32}} 32, i1 false)
|
|
2167 // CHECK: ret void
|
|
2168 void test_vld2q_dup_u8(uint8x16x2_t *dest, const uint8_t *src) {
|
|
2169 *dest = vld2q_dup_u8(src);
|
|
2170 }
|
|
2171
|
|
2172 // CHECK-LABEL: @test_vld3q_dup_f16(
|
|
2173 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align {{16|8}}
|
|
2174 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
|
|
2175 // CHECK: [[TMP1:%.*]] = bitcast half* %src to i8*
|
|
2176 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to half*
|
|
2177 // CHECK-A64: [[VLD3:%.*]] = call { <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld3r.v8f16.p0f16(half* [[TMP2]])
|
|
2178 // CHECK-A32: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3dup.v8i16.p0i8(i8* [[TMP1]], i32 2)
|
|
2179 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]> }*
|
|
2180 // CHECK: store { <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]> } [[VLD3]], { <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]> }* [[TMP3]]
|
|
2181 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x3_t* %dest to i8*
|
|
2182 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
|
|
2183 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 48, i1 false)
|
|
2184 // CHECK: ret void
|
|
2185 void test_vld3q_dup_f16(float16x8x3_t *dest, const float16_t *src) {
|
|
2186 *dest = vld3q_dup_f16(src);
|
|
2187 }
|
|
2188
|
|
2189 // CHECK-LABEL: @test_vld3q_dup_f32(
|
|
2190 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align {{16|8}}
|
|
2191 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
|
|
2192 // CHECK: [[TMP1:%.*]] = bitcast float* %src to i8*
|
|
2193 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float*
|
|
2194 // CHECK-A64: [[VLD3:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3r.v4f32.p0f32(float* [[TMP2]])
|
|
2195 // CHECK-A32: [[VLD3:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3dup.v4f32.p0i8(i8* [[TMP1]], i32 4)
|
|
2196 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float> }*
|
|
2197 // CHECK: store { <4 x float>, <4 x float>, <4 x float> } [[VLD3]], { <4 x float>, <4 x float>, <4 x float> }* [[TMP3]]
|
|
2198 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x3_t* %dest to i8*
|
|
2199 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
|
|
2200 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 48, i1 false)
|
|
2201 // CHECK: ret void
|
|
2202 void test_vld3q_dup_f32(float32x4x3_t *dest, const float32_t *src) {
|
|
2203 *dest = vld3q_dup_f32(src);
|
|
2204 }
|
|
2205
|
|
2206 // CHECK-LABEL: @test_vld3q_dup_p16(
|
|
2207 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align {{16|8}}
|
|
2208 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
|
|
2209 // CHECK: [[TMP1:%.*]] = bitcast i16* %src to i8*
|
|
2210 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
|
|
2211 // CHECK-A64: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16* [[TMP2]])
|
|
2212 // CHECK-A32: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3dup.v8i16.p0i8(i8* [[TMP1]], i32 2)
|
|
2213 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
|
|
2214 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
|
|
2215 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x3_t* %dest to i8*
|
|
2216 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
|
|
2217 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 48, i1 false)
|
|
2218 // CHECK: ret void
|
|
2219 void test_vld3q_dup_p16(poly16x8x3_t *dest, const poly16_t *src) {
|
|
2220 *dest = vld3q_dup_p16(src);
|
|
2221 }
|
|
2222
|
|
2223 // CHECK-LABEL: @test_vld3q_dup_p8(
|
|
2224 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align {{16|8}}
|
|
2225 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8*
|
|
2226 // CHECK-A64: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8* %src)
|
|
2227 // CHECK-A32: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3dup.v16i8.p0i8(i8* %src, i32 1)
|
|
2228 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
|
|
2229 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
|
|
2230 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x3_t* %dest to i8*
|
|
2231 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8*
|
|
2232 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP2]], i8* align {{16|8}} [[TMP3]], {{i64|i32}} 48, i1 false)
|
|
2233 // CHECK: ret void
|
|
2234 void test_vld3q_dup_p8(poly8x16x3_t *dest, const poly8_t *src) {
|
|
2235 *dest = vld3q_dup_p8(src);
|
|
2236 }
|
|
2237
|
|
2238 // CHECK-LABEL: @test_vld3q_dup_s16(
|
|
2239 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align {{16|8}}
|
|
2240 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
|
|
2241 // CHECK: [[TMP1:%.*]] = bitcast i16* %src to i8*
|
|
2242 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
|
|
2243 // CHECK-A64: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16* [[TMP2]])
|
|
2244 // CHECK-A32: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3dup.v8i16.p0i8(i8* [[TMP1]], i32 2)
|
|
2245 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
|
|
2246 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
|
|
2247 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x3_t* %dest to i8*
|
|
2248 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
|
|
2249 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 48, i1 false)
|
|
2250 // CHECK: ret void
|
|
2251 void test_vld3q_dup_s16(int16x8x3_t *dest, const int16_t *src) {
|
|
2252 *dest = vld3q_dup_s16(src);
|
|
2253 }
|
|
2254
|
|
2255 // CHECK-LABEL: @test_vld3q_dup_s32(
|
|
2256 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align {{16|8}}
|
|
2257 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
|
|
2258 // CHECK: [[TMP1:%.*]] = bitcast i32* %src to i8*
|
|
2259 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
|
|
2260 // CHECK-A64: [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32* [[TMP2]])
|
|
2261 // CHECK-A32: [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3dup.v4i32.p0i8(i8* [[TMP1]], i32 4)
|
|
2262 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }*
|
|
2263 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
|
|
2264 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x3_t* %dest to i8*
|
|
2265 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
|
|
2266 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 48, i1 false)
|
|
2267 // CHECK: ret void
|
|
2268 void test_vld3q_dup_s32(int32x4x3_t *dest, const int32_t *src) {
|
|
2269 *dest = vld3q_dup_s32(src);
|
|
2270 }
|
|
2271
|
|
2272 // CHECK-LABEL: @test_vld3q_dup_s8(
|
|
2273 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x3_t, align {{16|8}}
|
|
2274 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8*
|
|
2275 // CHECK-A64: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8* %src)
|
|
2276 // CHECK-A32: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3dup.v16i8.p0i8(i8* %src, i32 1)
|
|
2277 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
|
|
2278 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
|
|
2279 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x3_t* %dest to i8*
|
|
2280 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8*
|
|
2281 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP2]], i8* align {{16|8}} [[TMP3]], {{i64|i32}} 48, i1 false)
|
|
2282 // CHECK: ret void
|
|
2283 void test_vld3q_dup_s8(int8x16x3_t *dest, const int8_t *src) {
|
|
2284 *dest = vld3q_dup_s8(src);
|
|
2285 }
|
|
2286
|
|
2287 // CHECK-LABEL: @test_vld3q_dup_u16(
|
|
2288 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align {{16|8}}
|
|
2289 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
|
|
2290 // CHECK: [[TMP1:%.*]] = bitcast i16* %src to i8*
|
|
2291 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
|
|
2292 // CHECK-A64: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16* [[TMP2]])
|
|
2293 // CHECK-A32: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3dup.v8i16.p0i8(i8* [[TMP1]], i32 2)
|
|
2294 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
|
|
2295 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
|
|
2296 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x3_t* %dest to i8*
|
|
2297 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
|
|
2298 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 48, i1 false)
|
|
2299 // CHECK: ret void
|
|
2300 void test_vld3q_dup_u16(uint16x8x3_t *dest, const uint16_t *src) {
|
|
2301 *dest = vld3q_dup_u16(src);
|
|
2302 }
|
|
2303
|
|
2304 // CHECK-LABEL: @test_vld3q_dup_u32(
|
|
2305 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align {{16|8}}
|
|
2306 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
|
|
2307 // CHECK: [[TMP1:%.*]] = bitcast i32* %src to i8*
|
|
2308 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
|
|
2309 // CHECK-A64: [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32* [[TMP2]])
|
|
2310 // CHECK-A32: [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3dup.v4i32.p0i8(i8* [[TMP1]], i32 4)
|
|
2311 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }*
|
|
2312 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
|
|
2313 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x3_t* %dest to i8*
|
|
2314 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
|
|
2315 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 48, i1 false)
|
|
2316 // CHECK: ret void
|
|
2317 void test_vld3q_dup_u32(uint32x4x3_t *dest, const uint32_t *src) {
|
|
2318 *dest = vld3q_dup_u32(src);
|
|
2319 }
|
|
2320
|
|
2321 // CHECK-LABEL: @test_vld3q_dup_u8(
|
|
2322 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align {{16|8}}
|
|
2323 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8*
|
|
2324 // CHECK-A64: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8* %src)
|
|
2325 // CHECK-A32: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3dup.v16i8.p0i8(i8* %src, i32 1)
|
|
2326 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
|
|
2327 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
|
|
2328 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x3_t* %dest to i8*
|
|
2329 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8*
|
|
2330 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP2]], i8* align {{16|8}} [[TMP3]], {{i64|i32}} 48, i1 false)
|
|
2331 // CHECK: ret void
|
|
2332 void test_vld3q_dup_u8(uint8x16x3_t *dest, const uint8_t *src) {
|
|
2333 *dest = vld3q_dup_u8(src);
|
|
2334 }
|
|
2335
|
|
2336 // CHECK-LABEL: @test_vld4q_dup_f16(
|
|
2337 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align {{16|8}}
|
|
2338 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
|
|
2339 // CHECK: [[TMP1:%.*]] = bitcast half* %src to i8*
|
|
2340 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to half*
|
|
2341 // CHECK-A64: [[VLD4:%.*]] = call { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld4r.v8f16.p0f16(half* [[TMP2]])
|
|
2342 // CHECK-A32: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4dup.v8i16.p0i8(i8* [[TMP1]], i32 2)
|
|
2343 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]> }*
|
|
2344 // CHECK: store { <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]> } [[VLD4]], { <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]> }* [[TMP3]]
|
|
2345 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x4_t* %dest to i8*
|
|
2346 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
|
|
2347 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 64, i1 false)
|
|
2348 // CHECK: ret void
|
|
2349 void test_vld4q_dup_f16(float16x8x4_t *dest, const float16_t *src) {
|
|
2350 *dest = vld4q_dup_f16(src);
|
|
2351 }
|
|
2352
|
|
2353 // CHECK-LABEL: @test_vld4q_dup_f32(
|
|
2354 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align {{16|8}}
|
|
2355 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
|
|
2356 // CHECK: [[TMP1:%.*]] = bitcast float* %src to i8*
|
|
2357 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float*
|
|
2358 // CHECK-A64: [[VLD4:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4r.v4f32.p0f32(float* [[TMP2]])
|
|
2359 // CHECK-A32: [[VLD4:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4dup.v4f32.p0i8(i8* [[TMP1]], i32 4)
|
|
2360 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float>, <4 x float> }*
|
|
2361 // CHECK: store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4]], { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* [[TMP3]]
|
|
2362 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x4_t* %dest to i8*
|
|
2363 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
|
|
2364 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 64, i1 false)
|
|
2365 // CHECK: ret void
|
|
2366 void test_vld4q_dup_f32(float32x4x4_t *dest, const float32_t *src) {
|
|
2367 *dest = vld4q_dup_f32(src);
|
|
2368 }
|
|
2369
|
|
2370 // CHECK-LABEL: @test_vld4q_dup_p16(
|
|
2371 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align {{16|8}}
|
|
2372 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
|
|
2373 // CHECK: [[TMP1:%.*]] = bitcast i16* %src to i8*
|
|
2374 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
|
|
2375 // CHECK-A64: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16* [[TMP2]])
|
|
2376 // CHECK-A32: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4dup.v8i16.p0i8(i8* [[TMP1]], i32 2)
|
|
2377 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
|
|
2378 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
|
|
2379 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x4_t* %dest to i8*
|
|
2380 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
|
|
2381 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 64, i1 false)
|
|
2382 // CHECK: ret void
|
|
2383 void test_vld4q_dup_p16(poly16x8x4_t *dest, const poly16_t *src) {
|
|
2384 *dest = vld4q_dup_p16(src);
|
|
2385 }
|
|
2386
|
|
2387 // CHECK-LABEL: @test_vld4q_dup_p8(
|
|
2388 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align {{16|8}}
|
|
2389 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8*
|
|
2390 // CHECK-A64: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8* %src)
|
|
2391 // CHECK-A32: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4dup.v16i8.p0i8(i8* %src, i32 1)
|
|
2392 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
|
|
2393 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
|
|
2394 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x4_t* %dest to i8*
|
|
2395 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8*
|
|
2396 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP2]], i8* align {{16|8}} [[TMP3]], {{i64|i32}} 64, i1 false)
|
|
2397 // CHECK: ret void
|
|
2398 void test_vld4q_dup_p8(poly8x16x4_t *dest, const poly8_t *src) {
|
|
2399 *dest = vld4q_dup_p8(src);
|
|
2400 }
|
|
2401
|
|
2402 // CHECK-LABEL: @test_vld4q_dup_s16(
|
|
2403 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align {{16|8}}
|
|
2404 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
|
|
2405 // CHECK: [[TMP1:%.*]] = bitcast i16* %src to i8*
|
|
2406 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
|
|
2407 // CHECK-A64: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16* [[TMP2]])
|
|
2408 // CHECK-A32: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4dup.v8i16.p0i8(i8* [[TMP1]], i32 2)
|
|
2409 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
|
|
2410 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
|
|
2411 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x4_t* %dest to i8*
|
|
2412 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
|
|
2413 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 64, i1 false)
|
|
2414 // CHECK: ret void
|
|
2415 void test_vld4q_dup_s16(int16x8x4_t *dest, const int16_t *src) {
|
|
2416 *dest = vld4q_dup_s16(src);
|
|
2417 }
|
|
2418
|
|
2419 // CHECK-LABEL: @test_vld4q_dup_s32(
|
|
2420 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align {{16|8}}
|
|
2421 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
|
|
2422 // CHECK: [[TMP1:%.*]] = bitcast i32* %src to i8*
|
|
2423 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
|
|
2424 // CHECK-A64: [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32* [[TMP2]])
|
|
2425 // CHECK-A32: [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4dup.v4i32.p0i8(i8* [[TMP1]], i32 4)
|
|
2426 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }*
|
|
2427 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
|
|
2428 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x4_t* %dest to i8*
|
|
2429 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
|
|
2430 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 64, i1 false)
|
|
2431 // CHECK: ret void
|
|
2432 void test_vld4q_dup_s32(int32x4x4_t *dest, const int32_t *src) {
|
|
2433 *dest = vld4q_dup_s32(src);
|
|
2434 }
|
|
2435
|
|
2436 // CHECK-LABEL: @test_vld4q_dup_s8(
|
|
2437 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x4_t, align {{16|8}}
|
|
2438 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8*
|
|
2439 // CHECK-A64: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8* %src)
|
|
2440 // CHECK-A32: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4dup.v16i8.p0i8(i8* %src, i32 1)
|
|
2441 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
|
|
2442 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
|
|
2443 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x4_t* %dest to i8*
|
|
2444 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8*
|
|
2445 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP2]], i8* align {{16|8}} [[TMP3]], {{i64|i32}} 64, i1 false)
|
|
2446 // CHECK: ret void
|
|
2447 void test_vld4q_dup_s8(int8x16x4_t *dest, const int8_t *src) {
|
|
2448 *dest = vld4q_dup_s8(src);
|
|
2449 }
|
|
2450
|
|
2451 // CHECK-LABEL: @test_vld4q_dup_u16(
|
|
2452 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align {{16|8}}
|
|
2453 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
|
|
2454 // CHECK: [[TMP1:%.*]] = bitcast i16* %src to i8*
|
|
2455 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
|
|
2456 // CHECK-A64: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16* [[TMP2]])
|
|
2457 // CHECK-A32: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4dup.v8i16.p0i8(i8* [[TMP1]], i32 2)
|
|
2458 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
|
|
2459 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
|
|
2460 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x4_t* %dest to i8*
|
|
2461 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
|
|
2462 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 64, i1 false)
|
|
2463 // CHECK: ret void
|
|
2464 void test_vld4q_dup_u16(uint16x8x4_t *dest, const uint16_t *src) {
|
|
2465 *dest = vld4q_dup_u16(src);
|
|
2466 }
|
|
2467
|
|
2468 // CHECK-LABEL: @test_vld4q_dup_u32(
|
|
2469 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align {{16|8}}
|
|
2470 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
|
|
2471 // CHECK: [[TMP1:%.*]] = bitcast i32* %src to i8*
|
|
2472 // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
|
|
2473 // CHECK-A64: [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32* [[TMP2]])
|
|
2474 // CHECK-A32: [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4dup.v4i32.p0i8(i8* [[TMP1]], i32 4)
|
|
2475 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }*
|
|
2476 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
|
|
2477 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x4_t* %dest to i8*
|
|
2478 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
|
|
2479 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 64, i1 false)
|
|
2480 // CHECK: ret void
|
|
2481 void test_vld4q_dup_u32(uint32x4x4_t *dest, const uint32_t *src) {
|
|
2482 *dest = vld4q_dup_u32(src);
|
|
2483 }
|
|
2484
|
|
2485 // CHECK-LABEL: @test_vld4q_dup_u8(
|
|
2486 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align {{16|8}}
|
|
2487 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8*
|
|
2488 // CHECK-A64: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8* %src)
|
|
2489 // CHECK-A32: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4dup.v16i8.p0i8(i8* %src, i32 1)
|
|
2490 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
|
|
2491 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
|
|
2492 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x4_t* %dest to i8*
|
|
2493 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8*
|
|
2494 // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP2]], i8* align {{16|8}} [[TMP3]], {{i64|i32}} 64, i1 false)
|
|
2495 // CHECK: ret void
|
|
2496 void test_vld4q_dup_u8(uint8x16x4_t *dest, const uint8_t *src) {
|
|
2497 *dest = vld4q_dup_u8(src);
|
|
2498 }
|