Mercurial > hg > Members > tobaru > cbc > CbC_llvm
comparison test/CodeGen/AArch64/arm64-neon-2velem-high.ll @ 95:afa8332a0e37
LLVM 3.8
author | Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 13 Oct 2015 17:48:58 +0900 |
parents | 54457678186b |
children | 1172e4bd9c6f |
comparison
equal
deleted
inserted
replaced
84:f3e34b893a5f | 95:afa8332a0e37 |
---|---|
1 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s | 1 ; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast \ |
2 | 2 ; RUN: < %s -verify-machineinstrs -asm-verbose=false | FileCheck %s |
3 declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) | 3 |
4 | 4 define <4 x i32> @test_vmull_high_n_s16(<8 x i16> %a, i16 %b) #0 { |
5 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) | |
6 | |
7 declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>) | |
8 | |
9 declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>) | |
10 | |
11 declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>) | |
12 | |
13 declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>) | |
14 | |
15 declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>) | |
16 | |
17 declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>) | |
18 | |
19 declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>) | |
20 | |
21 declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) | |
22 | |
23 declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>) | |
24 | |
25 declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) | |
26 | |
27 define <4 x i32> @test_vmull_high_n_s16(<8 x i16> %a, i16 %b) { | |
28 ; CHECK-LABEL: test_vmull_high_n_s16: | 5 ; CHECK-LABEL: test_vmull_high_n_s16: |
29 ; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0 | 6 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0 |
30 ; CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h | 7 ; CHECK-NEXT: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h |
8 ; CHECK-NEXT: ret | |
31 entry: | 9 entry: |
32 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> | 10 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
33 %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0 | 11 %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0 |
34 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1 | 12 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1 |
35 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2 | 13 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2 |
36 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3 | 14 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3 |
37 %vmull15.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) | 15 %vmull15.i.i = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) |
38 ret <4 x i32> %vmull15.i.i | 16 ret <4 x i32> %vmull15.i.i |
39 } | 17 } |
40 | 18 |
41 define <2 x i64> @test_vmull_high_n_s32(<4 x i32> %a, i32 %b) { | 19 define <4 x i32> @test_vmull_high_n_s16_imm(<8 x i16> %a) #0 { |
20 ; CHECK-LABEL: test_vmull_high_n_s16_imm: | |
21 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #0x1d | |
22 ; CHECK-NEXT: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h | |
23 ; CHECK-NEXT: ret | |
24 entry: | |
25 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> | |
26 %vmull15.i.i = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i16 29, i16 29, i16 29, i16 29>) | |
27 ret <4 x i32> %vmull15.i.i | |
28 } | |
29 | |
30 define <2 x i64> @test_vmull_high_n_s32(<4 x i32> %a, i32 %b) #0 { | |
42 ; CHECK-LABEL: test_vmull_high_n_s32: | 31 ; CHECK-LABEL: test_vmull_high_n_s32: |
43 ; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0 | 32 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0 |
44 ; CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s | 33 ; CHECK-NEXT: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s |
34 ; CHECK-NEXT: ret | |
45 entry: | 35 entry: |
46 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> | 36 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> |
47 %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0 | 37 %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0 |
48 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1 | 38 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1 |
49 %vmull9.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) | 39 %vmull9.i.i = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) |
50 ret <2 x i64> %vmull9.i.i | 40 ret <2 x i64> %vmull9.i.i |
51 } | 41 } |
52 | 42 |
53 define <4 x i32> @test_vmull_high_n_u16(<8 x i16> %a, i16 %b) { | 43 define <2 x i64> @test_vmull_high_n_s32_imm(<4 x i32> %a) #0 { |
44 ; CHECK-LABEL: test_vmull_high_n_s32_imm: | |
45 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #0x1, msl #8 | |
46 ; CHECK-NEXT: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s | |
47 ; CHECK-NEXT: ret | |
48 entry: | |
49 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> | |
50 %vmull9.i.i = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32 511, i32 511>) | |
51 ret <2 x i64> %vmull9.i.i | |
52 } | |
53 | |
54 define <4 x i32> @test_vmull_high_n_u16(<8 x i16> %a, i16 %b) #0 { | |
54 ; CHECK-LABEL: test_vmull_high_n_u16: | 55 ; CHECK-LABEL: test_vmull_high_n_u16: |
55 ; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0 | 56 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0 |
56 ; CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h | 57 ; CHECK-NEXT: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h |
58 ; CHECK-NEXT: ret | |
57 entry: | 59 entry: |
58 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> | 60 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
59 %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0 | 61 %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0 |
60 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1 | 62 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1 |
61 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2 | 63 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2 |
62 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3 | 64 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3 |
63 %vmull15.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) | 65 %vmull15.i.i = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) |
64 ret <4 x i32> %vmull15.i.i | 66 ret <4 x i32> %vmull15.i.i |
65 } | 67 } |
66 | 68 |
67 define <2 x i64> @test_vmull_high_n_u32(<4 x i32> %a, i32 %b) { | 69 define <4 x i32> @test_vmull_high_n_u16_imm(<8 x i16> %a) #0 { |
70 ; CHECK-LABEL: test_vmull_high_n_u16_imm: | |
71 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #0x11, lsl #8 | |
72 ; CHECK-NEXT: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h | |
73 ; CHECK-NEXT: ret | |
74 entry: | |
75 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> | |
76 %vmull15.i.i = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i16 4352, i16 4352, i16 4352, i16 4352>) | |
77 ret <4 x i32> %vmull15.i.i | |
78 } | |
79 | |
80 define <2 x i64> @test_vmull_high_n_u32(<4 x i32> %a, i32 %b) #0 { | |
68 ; CHECK-LABEL: test_vmull_high_n_u32: | 81 ; CHECK-LABEL: test_vmull_high_n_u32: |
69 ; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0 | 82 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0 |
70 ; CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s | 83 ; CHECK-NEXT: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s |
84 ; CHECK-NEXT: ret | |
71 entry: | 85 entry: |
72 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> | 86 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> |
73 %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0 | 87 %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0 |
74 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1 | 88 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1 |
75 %vmull9.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) | 89 %vmull9.i.i = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) |
76 ret <2 x i64> %vmull9.i.i | 90 ret <2 x i64> %vmull9.i.i |
77 } | 91 } |
78 | 92 |
79 define <4 x i32> @test_vqdmull_high_n_s16(<8 x i16> %a, i16 %b) { | 93 define <2 x i64> @test_vmull_high_n_u32_imm(<4 x i32> %a) #0 { |
94 ; CHECK-LABEL: test_vmull_high_n_u32_imm: | |
95 ; CHECK-NEXT: mvni [[REPLICATE:v[0-9]+]].4s, #0x1, msl #8 | |
96 ; CHECK-NEXT: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s | |
97 ; CHECK-NEXT: ret | |
98 entry: | |
99 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> | |
100 %vmull9.i.i = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32 4294966784, i32 4294966784>) | |
101 ret <2 x i64> %vmull9.i.i | |
102 } | |
103 | |
104 define <4 x i32> @test_vqdmull_high_n_s16(<8 x i16> %a, i16 %b) #0 { | |
80 ; CHECK-LABEL: test_vqdmull_high_n_s16: | 105 ; CHECK-LABEL: test_vqdmull_high_n_s16: |
81 ; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0 | 106 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0 |
82 ; CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h | 107 ; CHECK-NEXT: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h |
108 ; CHECK-NEXT: ret | |
83 entry: | 109 entry: |
84 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> | 110 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
85 %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0 | 111 %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0 |
86 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1 | 112 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1 |
87 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2 | 113 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2 |
88 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3 | 114 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3 |
89 %vqdmull15.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) | 115 %vqdmull15.i.i = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) |
90 ret <4 x i32> %vqdmull15.i.i | 116 ret <4 x i32> %vqdmull15.i.i |
91 } | 117 } |
92 | 118 |
93 define <2 x i64> @test_vqdmull_high_n_s32(<4 x i32> %a, i32 %b) { | 119 define <4 x i32> @test_vqdmull_high_n_s16_imm(<8 x i16> %a) #0 { |
120 ; CHECK-LABEL: test_vqdmull_high_n_s16_imm: | |
121 ; CHECK-NEXT: mvni [[REPLICATE:v[0-9]+]].8h, #0x11, lsl #8 | |
122 ; CHECK-NEXT: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h | |
123 ; CHECK-NEXT: ret | |
124 entry: | |
125 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> | |
126 %vqdmull15.i.i = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i16 61183, i16 61183, i16 61183, i16 61183>) | |
127 ret <4 x i32> %vqdmull15.i.i | |
128 } | |
129 | |
130 define <2 x i64> @test_vqdmull_high_n_s32(<4 x i32> %a, i32 %b) #0 { | |
94 ; CHECK-LABEL: test_vqdmull_high_n_s32: | 131 ; CHECK-LABEL: test_vqdmull_high_n_s32: |
95 ; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0 | 132 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0 |
96 ; CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s | 133 ; CHECK-NEXT: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s |
134 ; CHECK-NEXT: ret | |
97 entry: | 135 entry: |
98 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> | 136 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> |
99 %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0 | 137 %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0 |
100 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1 | 138 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1 |
101 %vqdmull9.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) | 139 %vqdmull9.i.i = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) |
102 ret <2 x i64> %vqdmull9.i.i | 140 ret <2 x i64> %vqdmull9.i.i |
103 } | 141 } |
104 | 142 |
105 define <4 x i32> @test_vmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) { | 143 define <2 x i64> @test_vqdmull_high_n_s32_imm(<4 x i32> %a) #0 { |
144 ; CHECK-LABEL: test_vqdmull_high_n_s32_imm: | |
145 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #0x1d | |
146 ; CHECK-NEXT: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s | |
147 ; CHECK-NEXT: ret | |
148 entry: | |
149 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> | |
150 %vqdmull9.i.i = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32 29, i32 29>) | |
151 ret <2 x i64> %vqdmull9.i.i | |
152 } | |
153 | |
154 define <4 x i32> @test_vmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 { | |
106 ; CHECK-LABEL: test_vmlal_high_n_s16: | 155 ; CHECK-LABEL: test_vmlal_high_n_s16: |
107 ; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0 | 156 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0 |
108 ; CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h | 157 ; CHECK-NEXT: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h |
158 ; CHECK-NEXT: ret | |
109 entry: | 159 entry: |
110 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> | 160 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
111 %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 | 161 %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 |
112 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 | 162 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 |
113 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 | 163 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 |
114 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 | 164 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 |
115 %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) | 165 %vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) |
116 %add.i.i = add <4 x i32> %vmull2.i.i.i, %a | 166 %add.i.i = add <4 x i32> %vmull2.i.i.i, %a |
117 ret <4 x i32> %add.i.i | 167 ret <4 x i32> %add.i.i |
118 } | 168 } |
119 | 169 |
120 define <2 x i64> @test_vmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) { | 170 define <4 x i32> @test_vmlal_high_n_s16_imm(<4 x i32> %a, <8 x i16> %b) #0 { |
171 ; CHECK-LABEL: test_vmlal_high_n_s16_imm: | |
172 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #0x1d | |
173 ; CHECK-NEXT: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h | |
174 ; CHECK-NEXT: ret | |
175 entry: | |
176 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> | |
177 %vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i16 29, i16 29, i16 29, i16 29>) | |
178 %add.i.i = add <4 x i32> %vmull2.i.i.i, %a | |
179 ret <4 x i32> %add.i.i | |
180 } | |
181 | |
182 define <2 x i64> @test_vmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 { | |
121 ; CHECK-LABEL: test_vmlal_high_n_s32: | 183 ; CHECK-LABEL: test_vmlal_high_n_s32: |
122 ; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0 | 184 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0 |
123 ; CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s | 185 ; CHECK-NEXT: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s |
186 ; CHECK-NEXT: ret | |
124 entry: | 187 entry: |
125 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> | 188 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> |
126 %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 | 189 %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 |
127 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 | 190 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 |
128 %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) | 191 %vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) |
129 %add.i.i = add <2 x i64> %vmull2.i.i.i, %a | 192 %add.i.i = add <2 x i64> %vmull2.i.i.i, %a |
130 ret <2 x i64> %add.i.i | 193 ret <2 x i64> %add.i.i |
131 } | 194 } |
132 | 195 |
133 define <4 x i32> @test_vmlal_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) { | 196 define <2 x i64> @test_vmlal_high_n_s32_imm(<2 x i64> %a, <4 x i32> %b) #0 { |
197 ; CHECK-LABEL: test_vmlal_high_n_s32_imm: | |
198 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #0x1d | |
199 ; CHECK-NEXT: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s | |
200 ; CHECK-NEXT: ret | |
201 entry: | |
202 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> | |
203 %vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32 29, i32 29>) | |
204 %add.i.i = add <2 x i64> %vmull2.i.i.i, %a | |
205 ret <2 x i64> %add.i.i | |
206 } | |
207 | |
208 define <4 x i32> @test_vmlal_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 { | |
134 ; CHECK-LABEL: test_vmlal_high_n_u16: | 209 ; CHECK-LABEL: test_vmlal_high_n_u16: |
135 ; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0 | 210 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0 |
136 ; CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h | 211 ; CHECK-NEXT: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h |
212 ; CHECK-NEXT: ret | |
137 entry: | 213 entry: |
138 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> | 214 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
139 %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 | 215 %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 |
140 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 | 216 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 |
141 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 | 217 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 |
142 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 | 218 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 |
143 %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) | 219 %vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) |
144 %add.i.i = add <4 x i32> %vmull2.i.i.i, %a | 220 %add.i.i = add <4 x i32> %vmull2.i.i.i, %a |
145 ret <4 x i32> %add.i.i | 221 ret <4 x i32> %add.i.i |
146 } | 222 } |
147 | 223 |
148 define <2 x i64> @test_vmlal_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) { | 224 define <4 x i32> @test_vmlal_high_n_u16_imm(<4 x i32> %a, <8 x i16> %b) #0 { |
225 ; CHECK-LABEL: test_vmlal_high_n_u16_imm: | |
226 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #0x1d | |
227 ; CHECK-NEXT: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h | |
228 ; CHECK-NEXT: ret | |
229 entry: | |
230 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> | |
231 %vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i16 29, i16 29, i16 29, i16 29>) | |
232 %add.i.i = add <4 x i32> %vmull2.i.i.i, %a | |
233 ret <4 x i32> %add.i.i | |
234 } | |
235 | |
236 define <2 x i64> @test_vmlal_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 { | |
149 ; CHECK-LABEL: test_vmlal_high_n_u32: | 237 ; CHECK-LABEL: test_vmlal_high_n_u32: |
150 ; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0 | 238 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0 |
151 ; CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s | 239 ; CHECK-NEXT: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s |
240 ; CHECK-NEXT: ret | |
152 entry: | 241 entry: |
153 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> | 242 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> |
154 %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 | 243 %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 |
155 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 | 244 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 |
156 %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) | 245 %vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) |
157 %add.i.i = add <2 x i64> %vmull2.i.i.i, %a | 246 %add.i.i = add <2 x i64> %vmull2.i.i.i, %a |
158 ret <2 x i64> %add.i.i | 247 ret <2 x i64> %add.i.i |
159 } | 248 } |
160 | 249 |
161 define <4 x i32> @test_vqdmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) { | 250 define <2 x i64> @test_vmlal_high_n_u32_imm(<2 x i64> %a, <4 x i32> %b) #0 { |
251 ; CHECK-LABEL: test_vmlal_high_n_u32_imm: | |
252 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #0x1d | |
253 ; CHECK-NEXT: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s | |
254 ; CHECK-NEXT: ret | |
255 entry: | |
256 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> | |
257 %vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32 29, i32 29>) | |
258 %add.i.i = add <2 x i64> %vmull2.i.i.i, %a | |
259 ret <2 x i64> %add.i.i | |
260 } | |
261 | |
262 define <4 x i32> @test_vqdmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 { | |
162 ; CHECK-LABEL: test_vqdmlal_high_n_s16: | 263 ; CHECK-LABEL: test_vqdmlal_high_n_s16: |
163 ; CHECK: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h | 264 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0 |
265 ; CHECK-NEXT: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h | |
266 ; CHECK-NEXT: ret | |
164 entry: | 267 entry: |
165 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> | 268 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
166 %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 | 269 %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 |
167 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 | 270 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 |
168 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 | 271 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 |
169 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 | 272 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 |
170 %vqdmlal15.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) | 273 %vqdmlal15.i.i = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) |
171 %vqdmlal17.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal15.i.i) | 274 %vqdmlal17.i.i = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal15.i.i) |
172 ret <4 x i32> %vqdmlal17.i.i | 275 ret <4 x i32> %vqdmlal17.i.i |
173 } | 276 } |
174 | 277 |
175 define <2 x i64> @test_vqdmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) { | 278 define <4 x i32> @test_vqdmlal_high_n_s16_imm(<4 x i32> %a, <8 x i16> %b) #0 { |
279 ; CHECK-LABEL: test_vqdmlal_high_n_s16_imm: | |
280 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #0x1d | |
281 ; CHECK-NEXT: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h | |
282 ; CHECK-NEXT: ret | |
283 entry: | |
284 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> | |
285 %vqdmlal15.i.i = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i16 29, i16 29, i16 29, i16 29>) | |
286 %vqdmlal17.i.i = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal15.i.i) | |
287 ret <4 x i32> %vqdmlal17.i.i | |
288 } | |
289 | |
290 define <2 x i64> @test_vqdmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 { | |
176 ; CHECK-LABEL: test_vqdmlal_high_n_s32: | 291 ; CHECK-LABEL: test_vqdmlal_high_n_s32: |
177 ; CHECK: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s | 292 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0 |
293 ; CHECK-NEXT: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s | |
294 ; CHECK-NEXT: ret | |
178 entry: | 295 entry: |
179 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> | 296 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> |
180 %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 | 297 %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 |
181 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 | 298 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 |
182 %vqdmlal9.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) | 299 %vqdmlal9.i.i = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) |
183 %vqdmlal11.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal9.i.i) | 300 %vqdmlal11.i.i = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal9.i.i) |
184 ret <2 x i64> %vqdmlal11.i.i | 301 ret <2 x i64> %vqdmlal11.i.i |
185 } | 302 } |
186 | 303 |
187 define <4 x i32> @test_vmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) { | 304 define <2 x i64> @test_vqdmlal_high_n_s32_imm(<2 x i64> %a, <4 x i32> %b) #0 { |
305 ; CHECK-LABEL: test_vqdmlal_high_n_s32_imm: | |
306 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #0x1d | |
307 ; CHECK-NEXT: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s | |
308 ; CHECK-NEXT: ret | |
309 entry: | |
310 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> | |
311 %vqdmlal9.i.i = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32 29, i32 29>) | |
312 %vqdmlal11.i.i = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal9.i.i) | |
313 ret <2 x i64> %vqdmlal11.i.i | |
314 } | |
315 | |
316 define <4 x i32> @test_vmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 { | |
188 ; CHECK-LABEL: test_vmlsl_high_n_s16: | 317 ; CHECK-LABEL: test_vmlsl_high_n_s16: |
189 ; CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h | 318 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0 |
319 ; CHECK-NEXT: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h | |
320 ; CHECK-NEXT: ret | |
190 entry: | 321 entry: |
191 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> | 322 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
192 %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 | 323 %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 |
193 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 | 324 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 |
194 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 | 325 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 |
195 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 | 326 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 |
196 %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) | 327 %vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) |
197 %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i | 328 %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i |
198 ret <4 x i32> %sub.i.i | 329 ret <4 x i32> %sub.i.i |
199 } | 330 } |
200 | 331 |
201 define <2 x i64> @test_vmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) { | 332 define <4 x i32> @test_vmlsl_high_n_s16_imm(<4 x i32> %a, <8 x i16> %b) #0 { |
333 ; CHECK-LABEL: test_vmlsl_high_n_s16_imm: | |
334 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #0x1d | |
335 ; CHECK-NEXT: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h | |
336 ; CHECK-NEXT: ret | |
337 entry: | |
338 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> | |
339 %vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i16 29, i16 29, i16 29, i16 29>) | |
340 %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i | |
341 ret <4 x i32> %sub.i.i | |
342 } | |
343 | |
344 define <2 x i64> @test_vmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 { | |
202 ; CHECK-LABEL: test_vmlsl_high_n_s32: | 345 ; CHECK-LABEL: test_vmlsl_high_n_s32: |
203 ; CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s | 346 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0 |
347 ; CHECK-NEXT: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s | |
348 ; CHECK-NEXT: ret | |
204 entry: | 349 entry: |
205 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> | 350 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> |
206 %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 | 351 %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 |
207 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 | 352 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 |
208 %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) | 353 %vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) |
209 %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i | 354 %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i |
210 ret <2 x i64> %sub.i.i | 355 ret <2 x i64> %sub.i.i |
211 } | 356 } |
212 | 357 |
213 define <4 x i32> @test_vmlsl_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) { | 358 define <2 x i64> @test_vmlsl_high_n_s32_imm(<2 x i64> %a, <4 x i32> %b) #0 { |
359 ; CHECK-LABEL: test_vmlsl_high_n_s32_imm: | |
360 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #0x1d | |
361 ; CHECK-NEXT: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s | |
362 ; CHECK-NEXT: ret | |
363 entry: | |
364 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> | |
365 %vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32 29, i32 29>) | |
366 %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i | |
367 ret <2 x i64> %sub.i.i | |
368 } | |
369 | |
370 define <4 x i32> @test_vmlsl_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 { | |
214 ; CHECK-LABEL: test_vmlsl_high_n_u16: | 371 ; CHECK-LABEL: test_vmlsl_high_n_u16: |
215 ; CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h | 372 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0 |
373 ; CHECK-NEXT: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h | |
374 ; CHECK-NEXT: ret | |
216 entry: | 375 entry: |
217 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> | 376 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
218 %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 | 377 %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 |
219 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 | 378 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 |
220 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 | 379 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 |
221 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 | 380 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 |
222 %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) | 381 %vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) |
223 %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i | 382 %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i |
224 ret <4 x i32> %sub.i.i | 383 ret <4 x i32> %sub.i.i |
225 } | 384 } |
226 | 385 |
227 define <2 x i64> @test_vmlsl_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) { | 386 define <4 x i32> @test_vmlsl_high_n_u16_imm(<4 x i32> %a, <8 x i16> %b) #0 { |
387 ; CHECK-LABEL: test_vmlsl_high_n_u16_imm: | |
388 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #0x1d | |
389 ; CHECK-NEXT: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h | |
390 ; CHECK-NEXT: ret | |
391 entry: | |
392 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> | |
393 %vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i16 29, i16 29, i16 29, i16 29>) | |
394 %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i | |
395 ret <4 x i32> %sub.i.i | |
396 } | |
397 | |
398 define <2 x i64> @test_vmlsl_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 { | |
228 ; CHECK-LABEL: test_vmlsl_high_n_u32: | 399 ; CHECK-LABEL: test_vmlsl_high_n_u32: |
229 ; CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s | 400 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0 |
401 ; CHECK-NEXT: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s | |
402 ; CHECK-NEXT: ret | |
230 entry: | 403 entry: |
231 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> | 404 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> |
232 %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 | 405 %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 |
233 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 | 406 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 |
234 %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) | 407 %vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) |
235 %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i | 408 %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i |
236 ret <2 x i64> %sub.i.i | 409 ret <2 x i64> %sub.i.i |
237 } | 410 } |
238 | 411 |
239 define <4 x i32> @test_vqdmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) { | 412 define <2 x i64> @test_vmlsl_high_n_u32_imm(<2 x i64> %a, <4 x i32> %b) #0 { |
413 ; CHECK-LABEL: test_vmlsl_high_n_u32_imm: | |
414 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #0x1d | |
415 ; CHECK-NEXT: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s | |
416 ; CHECK-NEXT: ret | |
417 entry: | |
418 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> | |
419 %vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32 29, i32 29>) | |
420 %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i | |
421 ret <2 x i64> %sub.i.i | |
422 } | |
423 | |
424 define <4 x i32> @test_vqdmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 { | |
240 ; CHECK-LABEL: test_vqdmlsl_high_n_s16: | 425 ; CHECK-LABEL: test_vqdmlsl_high_n_s16: |
241 ; CHECK: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h | 426 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0 |
427 ; CHECK-NEXT: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h | |
428 ; CHECK-NEXT: ret | |
242 entry: | 429 entry: |
243 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> | 430 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
244 %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 | 431 %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 |
245 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 | 432 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 |
246 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 | 433 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 |
247 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 | 434 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 |
248 %vqdmlsl15.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) | 435 %vqdmlsl15.i.i = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) |
249 %vqdmlsl17.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl15.i.i) | 436 %vqdmlsl17.i.i = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl15.i.i) |
250 ret <4 x i32> %vqdmlsl17.i.i | 437 ret <4 x i32> %vqdmlsl17.i.i |
251 } | 438 } |
252 | 439 |
253 define <2 x i64> @test_vqdmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) { | 440 define <4 x i32> @test_vqdmlsl_high_n_s16_imm(<4 x i32> %a, <8 x i16> %b) #0 { |
441 ; CHECK-LABEL: test_vqdmlsl_high_n_s16_imm: | |
442 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #0x1d | |
443 ; CHECK-NEXT: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h | |
444 ; CHECK-NEXT: ret | |
445 entry: | |
446 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> | |
447 %vqdmlsl15.i.i = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i16 29, i16 29, i16 29, i16 29>) | |
448 %vqdmlsl17.i.i = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl15.i.i) | |
449 ret <4 x i32> %vqdmlsl17.i.i | |
450 } | |
451 | |
452 define <2 x i64> @test_vqdmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 { | |
254 ; CHECK-LABEL: test_vqdmlsl_high_n_s32: | 453 ; CHECK-LABEL: test_vqdmlsl_high_n_s32: |
255 ; CHECK: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s | 454 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0 |
455 ; CHECK-NEXT: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s | |
456 ; CHECK-NEXT: ret | |
256 entry: | 457 entry: |
257 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> | 458 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> |
258 %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 | 459 %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 |
259 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 | 460 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 |
260 %vqdmlsl9.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) | 461 %vqdmlsl9.i.i = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) |
261 %vqdmlsl11.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl9.i.i) | 462 %vqdmlsl11.i.i = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl9.i.i) |
262 ret <2 x i64> %vqdmlsl11.i.i | 463 ret <2 x i64> %vqdmlsl11.i.i |
263 } | 464 } |
264 | 465 |
265 define <2 x float> @test_vmul_n_f32(<2 x float> %a, float %b) { | 466 define <2 x i64> @test_vqdmlsl_high_n_s32_imm(<2 x i64> %a, <4 x i32> %b) #0 { |
467 ; CHECK-LABEL: test_vqdmlsl_high_n_s32_imm: | |
468 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #0x1d | |
469 ; CHECK-NEXT: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s | |
470 ; CHECK-NEXT: ret | |
471 entry: | |
472 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> | |
473 %vqdmlsl9.i.i = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32 29, i32 29>) | |
474 %vqdmlsl11.i.i = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl9.i.i) | |
475 ret <2 x i64> %vqdmlsl11.i.i | |
476 } | |
477 | |
478 define <2 x float> @test_vmul_n_f32(<2 x float> %a, float %b) #0 { | |
266 ; CHECK-LABEL: test_vmul_n_f32: | 479 ; CHECK-LABEL: test_vmul_n_f32: |
267 ; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] | 480 ; CHECK-NEXT: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] |
481 ; CHECK-NEXT: ret | |
268 entry: | 482 entry: |
269 %vecinit.i = insertelement <2 x float> undef, float %b, i32 0 | 483 %vecinit.i = insertelement <2 x float> undef, float %b, i32 0 |
270 %vecinit1.i = insertelement <2 x float> %vecinit.i, float %b, i32 1 | 484 %vecinit1.i = insertelement <2 x float> %vecinit.i, float %b, i32 1 |
271 %mul.i = fmul <2 x float> %vecinit1.i, %a | 485 %mul.i = fmul <2 x float> %vecinit1.i, %a |
272 ret <2 x float> %mul.i | 486 ret <2 x float> %mul.i |
273 } | 487 } |
274 | 488 |
275 define <4 x float> @test_vmulq_n_f32(<4 x float> %a, float %b) { | 489 define <4 x float> @test_vmulq_n_f32(<4 x float> %a, float %b) #0 { |
276 ; CHECK-LABEL: test_vmulq_n_f32: | 490 ; CHECK-LABEL: test_vmulq_n_f32: |
277 ; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] | 491 ; CHECK-NEXT: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] |
492 ; CHECK-NEXT: ret | |
278 entry: | 493 entry: |
279 %vecinit.i = insertelement <4 x float> undef, float %b, i32 0 | 494 %vecinit.i = insertelement <4 x float> undef, float %b, i32 0 |
280 %vecinit1.i = insertelement <4 x float> %vecinit.i, float %b, i32 1 | 495 %vecinit1.i = insertelement <4 x float> %vecinit.i, float %b, i32 1 |
281 %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %b, i32 2 | 496 %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %b, i32 2 |
282 %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %b, i32 3 | 497 %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %b, i32 3 |
283 %mul.i = fmul <4 x float> %vecinit3.i, %a | 498 %mul.i = fmul <4 x float> %vecinit3.i, %a |
284 ret <4 x float> %mul.i | 499 ret <4 x float> %mul.i |
285 } | 500 } |
286 | 501 |
287 define <2 x double> @test_vmulq_n_f64(<2 x double> %a, double %b) { | 502 define <2 x double> @test_vmulq_n_f64(<2 x double> %a, double %b) #0 { |
288 ; CHECK-LABEL: test_vmulq_n_f64: | 503 ; CHECK-LABEL: test_vmulq_n_f64: |
289 ; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] | 504 ; CHECK-NEXT: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] |
505 ; CHECK-NEXT: ret | |
290 entry: | 506 entry: |
291 %vecinit.i = insertelement <2 x double> undef, double %b, i32 0 | 507 %vecinit.i = insertelement <2 x double> undef, double %b, i32 0 |
292 %vecinit1.i = insertelement <2 x double> %vecinit.i, double %b, i32 1 | 508 %vecinit1.i = insertelement <2 x double> %vecinit.i, double %b, i32 1 |
293 %mul.i = fmul <2 x double> %vecinit1.i, %a | 509 %mul.i = fmul <2 x double> %vecinit1.i, %a |
294 ret <2 x double> %mul.i | 510 ret <2 x double> %mul.i |
295 } | 511 } |
296 | 512 |
297 define <2 x float> @test_vfma_n_f32(<2 x float> %a, <2 x float> %b, float %n) { | 513 define <2 x float> @test_vfma_n_f32(<2 x float> %a, <2 x float> %b, float %n) #0 { |
298 ; CHECK-LABEL: test_vfma_n_f32: | 514 ; CHECK-LABEL: test_vfma_n_f32: |
299 ; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}] | 515 ; CHECK-NEXT: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}] |
516 ; CHECK-NEXT: ret | |
300 entry: | 517 entry: |
301 %vecinit.i = insertelement <2 x float> undef, float %n, i32 0 | 518 %vecinit.i = insertelement <2 x float> undef, float %n, i32 0 |
302 %vecinit1.i = insertelement <2 x float> %vecinit.i, float %n, i32 1 | 519 %vecinit1.i = insertelement <2 x float> %vecinit.i, float %n, i32 1 |
303 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %b, <2 x float> %vecinit1.i, <2 x float> %a) | 520 %0 = call <2 x float> @llvm.fma.v2f32(<2 x float> %b, <2 x float> %vecinit1.i, <2 x float> %a) |
304 ret <2 x float> %0 | 521 ret <2 x float> %0 |
305 } | 522 } |
306 | 523 |
307 define <4 x float> @test_vfmaq_n_f32(<4 x float> %a, <4 x float> %b, float %n) { | 524 define <4 x float> @test_vfmaq_n_f32(<4 x float> %a, <4 x float> %b, float %n) #0 { |
308 ; CHECK-LABEL: test_vfmaq_n_f32: | 525 ; CHECK-LABEL: test_vfmaq_n_f32: |
309 ; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}] | 526 ; CHECK-NEXT: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}] |
527 ; CHECK-NEXT: ret | |
310 entry: | 528 entry: |
311 %vecinit.i = insertelement <4 x float> undef, float %n, i32 0 | 529 %vecinit.i = insertelement <4 x float> undef, float %n, i32 0 |
312 %vecinit1.i = insertelement <4 x float> %vecinit.i, float %n, i32 1 | 530 %vecinit1.i = insertelement <4 x float> %vecinit.i, float %n, i32 1 |
313 %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %n, i32 2 | 531 %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %n, i32 2 |
314 %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %n, i32 3 | 532 %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %n, i32 3 |
315 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %vecinit3.i, <4 x float> %a) | 533 %0 = call <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %vecinit3.i, <4 x float> %a) |
316 ret <4 x float> %0 | 534 ret <4 x float> %0 |
317 } | 535 } |
318 | 536 |
319 define <2 x float> @test_vfms_n_f32(<2 x float> %a, <2 x float> %b, float %n) { | 537 define <2 x float> @test_vfms_n_f32(<2 x float> %a, <2 x float> %b, float %n) #0 { |
320 ; CHECK-LABEL: test_vfms_n_f32: | 538 ; CHECK-LABEL: test_vfms_n_f32: |
321 ; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}] | 539 ; CHECK-NEXT: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}] |
540 ; CHECK-NEXT: ret | |
322 entry: | 541 entry: |
323 %vecinit.i = insertelement <2 x float> undef, float %n, i32 0 | 542 %vecinit.i = insertelement <2 x float> undef, float %n, i32 0 |
324 %vecinit1.i = insertelement <2 x float> %vecinit.i, float %n, i32 1 | 543 %vecinit1.i = insertelement <2 x float> %vecinit.i, float %n, i32 1 |
325 %0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b | 544 %0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b |
326 %1 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %0, <2 x float> %vecinit1.i, <2 x float> %a) | 545 %1 = call <2 x float> @llvm.fma.v2f32(<2 x float> %0, <2 x float> %vecinit1.i, <2 x float> %a) |
327 ret <2 x float> %1 | 546 ret <2 x float> %1 |
328 } | 547 } |
329 | 548 |
330 define <4 x float> @test_vfmsq_n_f32(<4 x float> %a, <4 x float> %b, float %n) { | 549 define <4 x float> @test_vfmsq_n_f32(<4 x float> %a, <4 x float> %b, float %n) #0 { |
331 ; CHECK-LABEL: test_vfmsq_n_f32: | 550 ; CHECK-LABEL: test_vfmsq_n_f32: |
332 ; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}] | 551 ; CHECK-NEXT: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}] |
552 ; CHECK-NEXT: ret | |
333 entry: | 553 entry: |
334 %vecinit.i = insertelement <4 x float> undef, float %n, i32 0 | 554 %vecinit.i = insertelement <4 x float> undef, float %n, i32 0 |
335 %vecinit1.i = insertelement <4 x float> %vecinit.i, float %n, i32 1 | 555 %vecinit1.i = insertelement <4 x float> %vecinit.i, float %n, i32 1 |
336 %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %n, i32 2 | 556 %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %n, i32 2 |
337 %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %n, i32 3 | 557 %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %n, i32 3 |
338 %0 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b | 558 %0 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b |
339 %1 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %0, <4 x float> %vecinit3.i, <4 x float> %a) | 559 %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %0, <4 x float> %vecinit3.i, <4 x float> %a) |
340 ret <4 x float> %1 | 560 ret <4 x float> %1 |
341 } | 561 } |
562 | |
563 attributes #0 = { nounwind } | |
564 | |
565 declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) | |
566 declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>) | |
567 declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) | |
568 declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>) | |
569 declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>) | |
570 declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>) | |
571 declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>) | |
572 declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>) | |
573 declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>) | |
574 declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>) | |
575 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) | |
576 declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) |