comparison test/CodeGen/AArch64/arm64-neon-across.ll @ 77:54457678186b LLVM3.6

LLVM 3.6
author Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
date Mon, 08 Sep 2014 22:06:00 +0900
parents
children
comparison
equal deleted inserted replaced
34:e874dbf0ad9d 77:54457678186b
1 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
2
3 declare float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float>)
4
5 declare float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float>)
6
7 declare float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float>)
8
9 declare float @llvm.aarch64.neon.fmaxv.f32.v4f32(<4 x float>)
10
11 declare i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32>)
12
13 declare i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16>)
14
15 declare i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8>)
16
17 declare i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16>)
18
19 declare i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8>)
20
21 declare i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32>)
22
23 declare i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16>)
24
25 declare i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8>)
26
27 declare i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32>)
28
29 declare i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16>)
30
31 declare i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8>)
32
33 declare i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16>)
34
35 declare i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8>)
36
37 declare i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16>)
38
39 declare i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8>)
40
41 declare i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32>)
42
43 declare i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16>)
44
45 declare i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8>)
46
47 declare i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32>)
48
49 declare i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16>)
50
51 declare i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8>)
52
53 declare i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16>)
54
55 declare i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8>)
56
57 declare i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16>)
58
59 declare i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8>)
60
61 declare i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32>)
62
63 declare i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16>)
64
65 declare i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8>)
66
67 declare i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32>)
68
69 declare i32 @llvm.aarch64.neon.saddlv.i32.v8i16(<8 x i16>)
70
71 declare i32 @llvm.aarch64.neon.saddlv.i32.v16i8(<16 x i8>)
72
73 declare i32 @llvm.aarch64.neon.uaddlv.i32.v4i16(<4 x i16>)
74
75 declare i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8>)
76
77 declare i32 @llvm.aarch64.neon.saddlv.i32.v4i16(<4 x i16>)
78
79 declare i32 @llvm.aarch64.neon.saddlv.i32.v8i8(<8 x i8>)
80
81 define i16 @test_vaddlv_s8(<8 x i8> %a) {
82 ; CHECK: test_vaddlv_s8:
83 ; CHECK: saddlv h{{[0-9]+}}, {{v[0-9]+}}.8b
84 entry:
85 %saddlvv.i = tail call i32 @llvm.aarch64.neon.saddlv.i32.v8i8(<8 x i8> %a)
86 %0 = trunc i32 %saddlvv.i to i16
87 ret i16 %0
88 }
89
90 define i32 @test_vaddlv_s16(<4 x i16> %a) {
91 ; CHECK: test_vaddlv_s16:
92 ; CHECK: saddlv s{{[0-9]+}}, {{v[0-9]+}}.4h
93 entry:
94 %saddlvv.i = tail call i32 @llvm.aarch64.neon.saddlv.i32.v4i16(<4 x i16> %a)
95 ret i32 %saddlvv.i
96 }
97
98 define i16 @test_vaddlv_u8(<8 x i8> %a) {
99 ; CHECK: test_vaddlv_u8:
100 ; CHECK: uaddlv h{{[0-9]+}}, {{v[0-9]+}}.8b
101 entry:
102 %uaddlvv.i = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8> %a)
103 %0 = trunc i32 %uaddlvv.i to i16
104 ret i16 %0
105 }
106
107 define i32 @test_vaddlv_u16(<4 x i16> %a) {
108 ; CHECK: test_vaddlv_u16:
109 ; CHECK: uaddlv s{{[0-9]+}}, {{v[0-9]+}}.4h
110 entry:
111 %uaddlvv.i = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v4i16(<4 x i16> %a)
112 ret i32 %uaddlvv.i
113 }
114
115 define i16 @test_vaddlvq_s8(<16 x i8> %a) {
116 ; CHECK: test_vaddlvq_s8:
117 ; CHECK: saddlv h{{[0-9]+}}, {{v[0-9]+}}.16b
118 entry:
119 %saddlvv.i = tail call i32 @llvm.aarch64.neon.saddlv.i32.v16i8(<16 x i8> %a)
120 %0 = trunc i32 %saddlvv.i to i16
121 ret i16 %0
122 }
123
124 define i32 @test_vaddlvq_s16(<8 x i16> %a) {
125 ; CHECK: test_vaddlvq_s16:
126 ; CHECK: saddlv s{{[0-9]+}}, {{v[0-9]+}}.8h
127 entry:
128 %saddlvv.i = tail call i32 @llvm.aarch64.neon.saddlv.i32.v8i16(<8 x i16> %a)
129 ret i32 %saddlvv.i
130 }
131
132 define i64 @test_vaddlvq_s32(<4 x i32> %a) {
133 ; CHECK: test_vaddlvq_s32:
134 ; CHECK: saddlv d{{[0-9]+}}, {{v[0-9]+}}.4s
135 entry:
136 %saddlvv.i = tail call i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32> %a)
137 ret i64 %saddlvv.i
138 }
139
140 define i16 @test_vaddlvq_u8(<16 x i8> %a) {
141 ; CHECK: test_vaddlvq_u8:
142 ; CHECK: uaddlv h{{[0-9]+}}, {{v[0-9]+}}.16b
143 entry:
144 %uaddlvv.i = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8> %a)
145 %0 = trunc i32 %uaddlvv.i to i16
146 ret i16 %0
147 }
148
149 define i32 @test_vaddlvq_u16(<8 x i16> %a) {
150 ; CHECK: test_vaddlvq_u16:
151 ; CHECK: uaddlv s{{[0-9]+}}, {{v[0-9]+}}.8h
152 entry:
153 %uaddlvv.i = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16> %a)
154 ret i32 %uaddlvv.i
155 }
156
157 define i64 @test_vaddlvq_u32(<4 x i32> %a) {
158 ; CHECK: test_vaddlvq_u32:
159 ; CHECK: uaddlv d{{[0-9]+}}, {{v[0-9]+}}.4s
160 entry:
161 %uaddlvv.i = tail call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> %a)
162 ret i64 %uaddlvv.i
163 }
164
165 define i8 @test_vmaxv_s8(<8 x i8> %a) {
166 ; CHECK: test_vmaxv_s8:
167 ; CHECK: smaxv b{{[0-9]+}}, {{v[0-9]+}}.8b
168 entry:
169 %smaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> %a)
170 %0 = trunc i32 %smaxv.i to i8
171 ret i8 %0
172 }
173
174 define i16 @test_vmaxv_s16(<4 x i16> %a) {
175 ; CHECK: test_vmaxv_s16:
176 ; CHECK: smaxv h{{[0-9]+}}, {{v[0-9]+}}.4h
177 entry:
178 %smaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> %a)
179 %0 = trunc i32 %smaxv.i to i16
180 ret i16 %0
181 }
182
183 define i8 @test_vmaxv_u8(<8 x i8> %a) {
184 ; CHECK: test_vmaxv_u8:
185 ; CHECK: umaxv b{{[0-9]+}}, {{v[0-9]+}}.8b
186 entry:
187 %umaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8> %a)
188 %0 = trunc i32 %umaxv.i to i8
189 ret i8 %0
190 }
191
192 define i16 @test_vmaxv_u16(<4 x i16> %a) {
193 ; CHECK: test_vmaxv_u16:
194 ; CHECK: umaxv h{{[0-9]+}}, {{v[0-9]+}}.4h
195 entry:
196 %umaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16> %a)
197 %0 = trunc i32 %umaxv.i to i16
198 ret i16 %0
199 }
200
201 define i8 @test_vmaxvq_s8(<16 x i8> %a) {
202 ; CHECK: test_vmaxvq_s8:
203 ; CHECK: smaxv b{{[0-9]+}}, {{v[0-9]+}}.16b
204 entry:
205 %smaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> %a)
206 %0 = trunc i32 %smaxv.i to i8
207 ret i8 %0
208 }
209
210 define i16 @test_vmaxvq_s16(<8 x i16> %a) {
211 ; CHECK: test_vmaxvq_s16:
212 ; CHECK: smaxv h{{[0-9]+}}, {{v[0-9]+}}.8h
213 entry:
214 %smaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> %a)
215 %0 = trunc i32 %smaxv.i to i16
216 ret i16 %0
217 }
218
219 define i32 @test_vmaxvq_s32(<4 x i32> %a) {
220 ; CHECK: test_vmaxvq_s32:
221 ; CHECK: smaxv s{{[0-9]+}}, {{v[0-9]+}}.4s
222 entry:
223 %smaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> %a)
224 ret i32 %smaxv.i
225 }
226
227 define i8 @test_vmaxvq_u8(<16 x i8> %a) {
228 ; CHECK: test_vmaxvq_u8:
229 ; CHECK: umaxv b{{[0-9]+}}, {{v[0-9]+}}.16b
230 entry:
231 %umaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> %a)
232 %0 = trunc i32 %umaxv.i to i8
233 ret i8 %0
234 }
235
236 define i16 @test_vmaxvq_u16(<8 x i16> %a) {
237 ; CHECK: test_vmaxvq_u16:
238 ; CHECK: umaxv h{{[0-9]+}}, {{v[0-9]+}}.8h
239 entry:
240 %umaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16> %a)
241 %0 = trunc i32 %umaxv.i to i16
242 ret i16 %0
243 }
244
245 define i32 @test_vmaxvq_u32(<4 x i32> %a) {
246 ; CHECK: test_vmaxvq_u32:
247 ; CHECK: umaxv s{{[0-9]+}}, {{v[0-9]+}}.4s
248 entry:
249 %umaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32> %a)
250 ret i32 %umaxv.i
251 }
252
253 define i8 @test_vminv_s8(<8 x i8> %a) {
254 ; CHECK: test_vminv_s8:
255 ; CHECK: sminv b{{[0-9]+}}, {{v[0-9]+}}.8b
256 entry:
257 %sminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8> %a)
258 %0 = trunc i32 %sminv.i to i8
259 ret i8 %0
260 }
261
262 define i16 @test_vminv_s16(<4 x i16> %a) {
263 ; CHECK: test_vminv_s16:
264 ; CHECK: sminv h{{[0-9]+}}, {{v[0-9]+}}.4h
265 entry:
266 %sminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16> %a)
267 %0 = trunc i32 %sminv.i to i16
268 ret i16 %0
269 }
270
271 define i8 @test_vminv_u8(<8 x i8> %a) {
272 ; CHECK: test_vminv_u8:
273 ; CHECK: uminv b{{[0-9]+}}, {{v[0-9]+}}.8b
274 entry:
275 %uminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> %a)
276 %0 = trunc i32 %uminv.i to i8
277 ret i8 %0
278 }
279
280 define i16 @test_vminv_u16(<4 x i16> %a) {
281 ; CHECK: test_vminv_u16:
282 ; CHECK: uminv h{{[0-9]+}}, {{v[0-9]+}}.4h
283 entry:
284 %uminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16> %a)
285 %0 = trunc i32 %uminv.i to i16
286 ret i16 %0
287 }
288
289 define i8 @test_vminvq_s8(<16 x i8> %a) {
290 ; CHECK: test_vminvq_s8:
291 ; CHECK: sminv b{{[0-9]+}}, {{v[0-9]+}}.16b
292 entry:
293 %sminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8> %a)
294 %0 = trunc i32 %sminv.i to i8
295 ret i8 %0
296 }
297
298 define i16 @test_vminvq_s16(<8 x i16> %a) {
299 ; CHECK: test_vminvq_s16:
300 ; CHECK: sminv h{{[0-9]+}}, {{v[0-9]+}}.8h
301 entry:
302 %sminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16> %a)
303 %0 = trunc i32 %sminv.i to i16
304 ret i16 %0
305 }
306
307 define i32 @test_vminvq_s32(<4 x i32> %a) {
308 ; CHECK: test_vminvq_s32:
309 ; CHECK: sminv s{{[0-9]+}}, {{v[0-9]+}}.4s
310 entry:
311 %sminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32> %a)
312 ret i32 %sminv.i
313 }
314
315 define i8 @test_vminvq_u8(<16 x i8> %a) {
316 ; CHECK: test_vminvq_u8:
317 ; CHECK: uminv b{{[0-9]+}}, {{v[0-9]+}}.16b
318 entry:
319 %uminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> %a)
320 %0 = trunc i32 %uminv.i to i8
321 ret i8 %0
322 }
323
324 define i16 @test_vminvq_u16(<8 x i16> %a) {
325 ; CHECK: test_vminvq_u16:
326 ; CHECK: uminv h{{[0-9]+}}, {{v[0-9]+}}.8h
327 entry:
328 %uminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16> %a)
329 %0 = trunc i32 %uminv.i to i16
330 ret i16 %0
331 }
332
333 define i32 @test_vminvq_u32(<4 x i32> %a) {
334 ; CHECK: test_vminvq_u32:
335 ; CHECK: uminv s{{[0-9]+}}, {{v[0-9]+}}.4s
336 entry:
337 %uminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32> %a)
338 ret i32 %uminv.i
339 }
340
341 define i8 @test_vaddv_s8(<8 x i8> %a) {
342 ; CHECK: test_vaddv_s8:
343 ; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.8b
344 entry:
345 %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8> %a)
346 %0 = trunc i32 %vaddv.i to i8
347 ret i8 %0
348 }
349
350 define i16 @test_vaddv_s16(<4 x i16> %a) {
351 ; CHECK: test_vaddv_s16:
352 ; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.4h
353 entry:
354 %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> %a)
355 %0 = trunc i32 %vaddv.i to i16
356 ret i16 %0
357 }
358
359 define i8 @test_vaddv_u8(<8 x i8> %a) {
360 ; CHECK: test_vaddv_u8:
361 ; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.8b
362 entry:
363 %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8> %a)
364 %0 = trunc i32 %vaddv.i to i8
365 ret i8 %0
366 }
367
368 define i16 @test_vaddv_u16(<4 x i16> %a) {
369 ; CHECK: test_vaddv_u16:
370 ; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.4h
371 entry:
372 %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> %a)
373 %0 = trunc i32 %vaddv.i to i16
374 ret i16 %0
375 }
376
377 define i8 @test_vaddvq_s8(<16 x i8> %a) {
378 ; CHECK: test_vaddvq_s8:
379 ; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.16b
380 entry:
381 %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8> %a)
382 %0 = trunc i32 %vaddv.i to i8
383 ret i8 %0
384 }
385
386 define i16 @test_vaddvq_s16(<8 x i16> %a) {
387 ; CHECK: test_vaddvq_s16:
388 ; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.8h
389 entry:
390 %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16> %a)
391 %0 = trunc i32 %vaddv.i to i16
392 ret i16 %0
393 }
394
395 define i32 @test_vaddvq_s32(<4 x i32> %a) {
396 ; CHECK: test_vaddvq_s32:
397 ; CHECK: addv s{{[0-9]+}}, {{v[0-9]+}}.4s
398 entry:
399 %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32> %a)
400 ret i32 %vaddv.i
401 }
402
403 define i8 @test_vaddvq_u8(<16 x i8> %a) {
404 ; CHECK: test_vaddvq_u8:
405 ; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.16b
406 entry:
407 %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8> %a)
408 %0 = trunc i32 %vaddv.i to i8
409 ret i8 %0
410 }
411
412 define i16 @test_vaddvq_u16(<8 x i16> %a) {
413 ; CHECK: test_vaddvq_u16:
414 ; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.8h
415 entry:
416 %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16> %a)
417 %0 = trunc i32 %vaddv.i to i16
418 ret i16 %0
419 }
420
421 define i32 @test_vaddvq_u32(<4 x i32> %a) {
422 ; CHECK: test_vaddvq_u32:
423 ; CHECK: addv s{{[0-9]+}}, {{v[0-9]+}}.4s
424 entry:
425 %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32> %a)
426 ret i32 %vaddv.i
427 }
428
429 define float @test_vmaxvq_f32(<4 x float> %a) {
430 ; CHECK: test_vmaxvq_f32:
431 ; CHECK: fmaxv s{{[0-9]+}}, {{v[0-9]+}}.4s
432 entry:
433 %0 = call float @llvm.aarch64.neon.fmaxv.f32.v4f32(<4 x float> %a)
434 ret float %0
435 }
436
437 define float @test_vminvq_f32(<4 x float> %a) {
438 ; CHECK: test_vminvq_f32:
439 ; CHECK: fminv s{{[0-9]+}}, {{v[0-9]+}}.4s
440 entry:
441 %0 = call float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float> %a)
442 ret float %0
443 }
444
445 define float @test_vmaxnmvq_f32(<4 x float> %a) {
446 ; CHECK: test_vmaxnmvq_f32:
447 ; CHECK: fmaxnmv s{{[0-9]+}}, {{v[0-9]+}}.4s
448 entry:
449 %0 = call float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float> %a)
450 ret float %0
451 }
452
453 define float @test_vminnmvq_f32(<4 x float> %a) {
454 ; CHECK: test_vminnmvq_f32:
455 ; CHECK: fminnmv s{{[0-9]+}}, {{v[0-9]+}}.4s
456 entry:
457 %0 = call float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float> %a)
458 ret float %0
459 }
460