comparison test/CodeGen/AArch64/arm64-neon-2velem-high.ll @ 95:afa8332a0e37

LLVM 3.8
author Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
date Tue, 13 Oct 2015 17:48:58 +0900
parents 54457678186b
children 1172e4bd9c6f
comparison
equal deleted inserted replaced
84:f3e34b893a5f 95:afa8332a0e37
1 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s 1 ; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast \
2 2 ; RUN: < %s -verify-machineinstrs -asm-verbose=false | FileCheck %s
3 declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) 3
4 4 define <4 x i32> @test_vmull_high_n_s16(<8 x i16> %a, i16 %b) #0 {
5 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
6
7 declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>)
8
9 declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>)
10
11 declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>)
12
13 declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>)
14
15 declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>)
16
17 declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>)
18
19 declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>)
20
21 declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>)
22
23 declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>)
24
25 declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>)
26
27 define <4 x i32> @test_vmull_high_n_s16(<8 x i16> %a, i16 %b) {
28 ; CHECK-LABEL: test_vmull_high_n_s16: 5 ; CHECK-LABEL: test_vmull_high_n_s16:
29 ; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0 6 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0
30 ; CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h 7 ; CHECK-NEXT: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
8 ; CHECK-NEXT: ret
31 entry: 9 entry:
32 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 10 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
33 %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0 11 %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0
34 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1 12 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1
35 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2 13 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2
36 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3 14 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3
37 %vmull15.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) 15 %vmull15.i.i = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
38 ret <4 x i32> %vmull15.i.i 16 ret <4 x i32> %vmull15.i.i
39 } 17 }
40 18
41 define <2 x i64> @test_vmull_high_n_s32(<4 x i32> %a, i32 %b) { 19 define <4 x i32> @test_vmull_high_n_s16_imm(<8 x i16> %a) #0 {
20 ; CHECK-LABEL: test_vmull_high_n_s16_imm:
21 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #0x1d
22 ; CHECK-NEXT: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
23 ; CHECK-NEXT: ret
24 entry:
25 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
26 %vmull15.i.i = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i16 29, i16 29, i16 29, i16 29>)
27 ret <4 x i32> %vmull15.i.i
28 }
29
30 define <2 x i64> @test_vmull_high_n_s32(<4 x i32> %a, i32 %b) #0 {
42 ; CHECK-LABEL: test_vmull_high_n_s32: 31 ; CHECK-LABEL: test_vmull_high_n_s32:
43 ; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0 32 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0
44 ; CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s 33 ; CHECK-NEXT: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
34 ; CHECK-NEXT: ret
45 entry: 35 entry:
46 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 36 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
47 %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0 37 %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0
48 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1 38 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1
49 %vmull9.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) 39 %vmull9.i.i = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
50 ret <2 x i64> %vmull9.i.i 40 ret <2 x i64> %vmull9.i.i
51 } 41 }
52 42
53 define <4 x i32> @test_vmull_high_n_u16(<8 x i16> %a, i16 %b) { 43 define <2 x i64> @test_vmull_high_n_s32_imm(<4 x i32> %a) #0 {
44 ; CHECK-LABEL: test_vmull_high_n_s32_imm:
45 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #0x1, msl #8
46 ; CHECK-NEXT: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
47 ; CHECK-NEXT: ret
48 entry:
49 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
50 %vmull9.i.i = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32 511, i32 511>)
51 ret <2 x i64> %vmull9.i.i
52 }
53
54 define <4 x i32> @test_vmull_high_n_u16(<8 x i16> %a, i16 %b) #0 {
54 ; CHECK-LABEL: test_vmull_high_n_u16: 55 ; CHECK-LABEL: test_vmull_high_n_u16:
55 ; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0 56 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0
56 ; CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h 57 ; CHECK-NEXT: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
58 ; CHECK-NEXT: ret
57 entry: 59 entry:
58 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 60 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
59 %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0 61 %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0
60 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1 62 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1
61 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2 63 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2
62 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3 64 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3
63 %vmull15.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) 65 %vmull15.i.i = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
64 ret <4 x i32> %vmull15.i.i 66 ret <4 x i32> %vmull15.i.i
65 } 67 }
66 68
67 define <2 x i64> @test_vmull_high_n_u32(<4 x i32> %a, i32 %b) { 69 define <4 x i32> @test_vmull_high_n_u16_imm(<8 x i16> %a) #0 {
70 ; CHECK-LABEL: test_vmull_high_n_u16_imm:
71 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #0x11, lsl #8
72 ; CHECK-NEXT: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
73 ; CHECK-NEXT: ret
74 entry:
75 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
76 %vmull15.i.i = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i16 4352, i16 4352, i16 4352, i16 4352>)
77 ret <4 x i32> %vmull15.i.i
78 }
79
80 define <2 x i64> @test_vmull_high_n_u32(<4 x i32> %a, i32 %b) #0 {
68 ; CHECK-LABEL: test_vmull_high_n_u32: 81 ; CHECK-LABEL: test_vmull_high_n_u32:
69 ; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0 82 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0
70 ; CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s 83 ; CHECK-NEXT: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
84 ; CHECK-NEXT: ret
71 entry: 85 entry:
72 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 86 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
73 %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0 87 %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0
74 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1 88 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1
75 %vmull9.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) 89 %vmull9.i.i = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
76 ret <2 x i64> %vmull9.i.i 90 ret <2 x i64> %vmull9.i.i
77 } 91 }
78 92
79 define <4 x i32> @test_vqdmull_high_n_s16(<8 x i16> %a, i16 %b) { 93 define <2 x i64> @test_vmull_high_n_u32_imm(<4 x i32> %a) #0 {
94 ; CHECK-LABEL: test_vmull_high_n_u32_imm:
95 ; CHECK-NEXT: mvni [[REPLICATE:v[0-9]+]].4s, #0x1, msl #8
96 ; CHECK-NEXT: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
97 ; CHECK-NEXT: ret
98 entry:
99 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
100 %vmull9.i.i = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32 4294966784, i32 4294966784>)
101 ret <2 x i64> %vmull9.i.i
102 }
103
104 define <4 x i32> @test_vqdmull_high_n_s16(<8 x i16> %a, i16 %b) #0 {
80 ; CHECK-LABEL: test_vqdmull_high_n_s16: 105 ; CHECK-LABEL: test_vqdmull_high_n_s16:
81 ; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0 106 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0
82 ; CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h 107 ; CHECK-NEXT: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
108 ; CHECK-NEXT: ret
83 entry: 109 entry:
84 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 110 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
85 %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0 111 %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0
86 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1 112 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1
87 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2 113 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2
88 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3 114 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3
89 %vqdmull15.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) 115 %vqdmull15.i.i = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
90 ret <4 x i32> %vqdmull15.i.i 116 ret <4 x i32> %vqdmull15.i.i
91 } 117 }
92 118
93 define <2 x i64> @test_vqdmull_high_n_s32(<4 x i32> %a, i32 %b) { 119 define <4 x i32> @test_vqdmull_high_n_s16_imm(<8 x i16> %a) #0 {
120 ; CHECK-LABEL: test_vqdmull_high_n_s16_imm:
121 ; CHECK-NEXT: mvni [[REPLICATE:v[0-9]+]].8h, #0x11, lsl #8
122 ; CHECK-NEXT: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
123 ; CHECK-NEXT: ret
124 entry:
125 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
126 %vqdmull15.i.i = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i16 61183, i16 61183, i16 61183, i16 61183>)
127 ret <4 x i32> %vqdmull15.i.i
128 }
129
130 define <2 x i64> @test_vqdmull_high_n_s32(<4 x i32> %a, i32 %b) #0 {
94 ; CHECK-LABEL: test_vqdmull_high_n_s32: 131 ; CHECK-LABEL: test_vqdmull_high_n_s32:
95 ; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0 132 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0
96 ; CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s 133 ; CHECK-NEXT: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
134 ; CHECK-NEXT: ret
97 entry: 135 entry:
98 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 136 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
99 %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0 137 %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0
100 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1 138 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1
101 %vqdmull9.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) 139 %vqdmull9.i.i = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
102 ret <2 x i64> %vqdmull9.i.i 140 ret <2 x i64> %vqdmull9.i.i
103 } 141 }
104 142
105 define <4 x i32> @test_vmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) { 143 define <2 x i64> @test_vqdmull_high_n_s32_imm(<4 x i32> %a) #0 {
144 ; CHECK-LABEL: test_vqdmull_high_n_s32_imm:
145 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #0x1d
146 ; CHECK-NEXT: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
147 ; CHECK-NEXT: ret
148 entry:
149 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
150 %vqdmull9.i.i = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32 29, i32 29>)
151 ret <2 x i64> %vqdmull9.i.i
152 }
153
154 define <4 x i32> @test_vmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 {
106 ; CHECK-LABEL: test_vmlal_high_n_s16: 155 ; CHECK-LABEL: test_vmlal_high_n_s16:
107 ; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0 156 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0
108 ; CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h 157 ; CHECK-NEXT: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
158 ; CHECK-NEXT: ret
109 entry: 159 entry:
110 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 160 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
111 %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 161 %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
112 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 162 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
113 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 163 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
114 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 164 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
115 %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) 165 %vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
116 %add.i.i = add <4 x i32> %vmull2.i.i.i, %a 166 %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
117 ret <4 x i32> %add.i.i 167 ret <4 x i32> %add.i.i
118 } 168 }
119 169
120 define <2 x i64> @test_vmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) { 170 define <4 x i32> @test_vmlal_high_n_s16_imm(<4 x i32> %a, <8 x i16> %b) #0 {
171 ; CHECK-LABEL: test_vmlal_high_n_s16_imm:
172 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #0x1d
173 ; CHECK-NEXT: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
174 ; CHECK-NEXT: ret
175 entry:
176 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
177 %vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i16 29, i16 29, i16 29, i16 29>)
178 %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
179 ret <4 x i32> %add.i.i
180 }
181
182 define <2 x i64> @test_vmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 {
121 ; CHECK-LABEL: test_vmlal_high_n_s32: 183 ; CHECK-LABEL: test_vmlal_high_n_s32:
122 ; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0 184 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0
123 ; CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s 185 ; CHECK-NEXT: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
186 ; CHECK-NEXT: ret
124 entry: 187 entry:
125 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 188 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
126 %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 189 %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
127 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 190 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
128 %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) 191 %vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
129 %add.i.i = add <2 x i64> %vmull2.i.i.i, %a 192 %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
130 ret <2 x i64> %add.i.i 193 ret <2 x i64> %add.i.i
131 } 194 }
132 195
133 define <4 x i32> @test_vmlal_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) { 196 define <2 x i64> @test_vmlal_high_n_s32_imm(<2 x i64> %a, <4 x i32> %b) #0 {
197 ; CHECK-LABEL: test_vmlal_high_n_s32_imm:
198 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #0x1d
199 ; CHECK-NEXT: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
200 ; CHECK-NEXT: ret
201 entry:
202 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
203 %vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32 29, i32 29>)
204 %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
205 ret <2 x i64> %add.i.i
206 }
207
208 define <4 x i32> @test_vmlal_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 {
134 ; CHECK-LABEL: test_vmlal_high_n_u16: 209 ; CHECK-LABEL: test_vmlal_high_n_u16:
135 ; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0 210 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0
136 ; CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h 211 ; CHECK-NEXT: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
212 ; CHECK-NEXT: ret
137 entry: 213 entry:
138 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 214 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
139 %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 215 %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
140 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 216 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
141 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 217 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
142 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 218 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
143 %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) 219 %vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
144 %add.i.i = add <4 x i32> %vmull2.i.i.i, %a 220 %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
145 ret <4 x i32> %add.i.i 221 ret <4 x i32> %add.i.i
146 } 222 }
147 223
148 define <2 x i64> @test_vmlal_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) { 224 define <4 x i32> @test_vmlal_high_n_u16_imm(<4 x i32> %a, <8 x i16> %b) #0 {
225 ; CHECK-LABEL: test_vmlal_high_n_u16_imm:
226 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #0x1d
227 ; CHECK-NEXT: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
228 ; CHECK-NEXT: ret
229 entry:
230 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
231 %vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i16 29, i16 29, i16 29, i16 29>)
232 %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
233 ret <4 x i32> %add.i.i
234 }
235
236 define <2 x i64> @test_vmlal_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 {
149 ; CHECK-LABEL: test_vmlal_high_n_u32: 237 ; CHECK-LABEL: test_vmlal_high_n_u32:
150 ; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0 238 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0
151 ; CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s 239 ; CHECK-NEXT: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
240 ; CHECK-NEXT: ret
152 entry: 241 entry:
153 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 242 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
154 %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 243 %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
155 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 244 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
156 %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) 245 %vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
157 %add.i.i = add <2 x i64> %vmull2.i.i.i, %a 246 %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
158 ret <2 x i64> %add.i.i 247 ret <2 x i64> %add.i.i
159 } 248 }
160 249
161 define <4 x i32> @test_vqdmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) { 250 define <2 x i64> @test_vmlal_high_n_u32_imm(<2 x i64> %a, <4 x i32> %b) #0 {
251 ; CHECK-LABEL: test_vmlal_high_n_u32_imm:
252 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #0x1d
253 ; CHECK-NEXT: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
254 ; CHECK-NEXT: ret
255 entry:
256 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
257 %vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32 29, i32 29>)
258 %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
259 ret <2 x i64> %add.i.i
260 }
261
262 define <4 x i32> @test_vqdmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 {
162 ; CHECK-LABEL: test_vqdmlal_high_n_s16: 263 ; CHECK-LABEL: test_vqdmlal_high_n_s16:
163 ; CHECK: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 264 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0
265 ; CHECK-NEXT: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
266 ; CHECK-NEXT: ret
164 entry: 267 entry:
165 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 268 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
166 %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 269 %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
167 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 270 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
168 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 271 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
169 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 272 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
170 %vqdmlal15.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) 273 %vqdmlal15.i.i = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
171 %vqdmlal17.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal15.i.i) 274 %vqdmlal17.i.i = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal15.i.i)
172 ret <4 x i32> %vqdmlal17.i.i 275 ret <4 x i32> %vqdmlal17.i.i
173 } 276 }
174 277
175 define <2 x i64> @test_vqdmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) { 278 define <4 x i32> @test_vqdmlal_high_n_s16_imm(<4 x i32> %a, <8 x i16> %b) #0 {
279 ; CHECK-LABEL: test_vqdmlal_high_n_s16_imm:
280 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #0x1d
281 ; CHECK-NEXT: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
282 ; CHECK-NEXT: ret
283 entry:
284 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
285 %vqdmlal15.i.i = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i16 29, i16 29, i16 29, i16 29>)
286 %vqdmlal17.i.i = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal15.i.i)
287 ret <4 x i32> %vqdmlal17.i.i
288 }
289
290 define <2 x i64> @test_vqdmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 {
176 ; CHECK-LABEL: test_vqdmlal_high_n_s32: 291 ; CHECK-LABEL: test_vqdmlal_high_n_s32:
177 ; CHECK: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 292 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0
293 ; CHECK-NEXT: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
294 ; CHECK-NEXT: ret
178 entry: 295 entry:
179 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 296 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
180 %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 297 %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
181 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 298 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
182 %vqdmlal9.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) 299 %vqdmlal9.i.i = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
183 %vqdmlal11.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal9.i.i) 300 %vqdmlal11.i.i = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal9.i.i)
184 ret <2 x i64> %vqdmlal11.i.i 301 ret <2 x i64> %vqdmlal11.i.i
185 } 302 }
186 303
187 define <4 x i32> @test_vmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) { 304 define <2 x i64> @test_vqdmlal_high_n_s32_imm(<2 x i64> %a, <4 x i32> %b) #0 {
305 ; CHECK-LABEL: test_vqdmlal_high_n_s32_imm:
306 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #0x1d
307 ; CHECK-NEXT: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
308 ; CHECK-NEXT: ret
309 entry:
310 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
311 %vqdmlal9.i.i = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32 29, i32 29>)
312 %vqdmlal11.i.i = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal9.i.i)
313 ret <2 x i64> %vqdmlal11.i.i
314 }
315
316 define <4 x i32> @test_vmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 {
188 ; CHECK-LABEL: test_vmlsl_high_n_s16: 317 ; CHECK-LABEL: test_vmlsl_high_n_s16:
189 ; CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 318 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0
319 ; CHECK-NEXT: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
320 ; CHECK-NEXT: ret
190 entry: 321 entry:
191 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 322 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
192 %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 323 %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
193 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 324 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
194 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 325 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
195 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 326 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
196 %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) 327 %vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
197 %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i 328 %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
198 ret <4 x i32> %sub.i.i 329 ret <4 x i32> %sub.i.i
199 } 330 }
200 331
201 define <2 x i64> @test_vmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) { 332 define <4 x i32> @test_vmlsl_high_n_s16_imm(<4 x i32> %a, <8 x i16> %b) #0 {
333 ; CHECK-LABEL: test_vmlsl_high_n_s16_imm:
334 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #0x1d
335 ; CHECK-NEXT: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
336 ; CHECK-NEXT: ret
337 entry:
338 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
339 %vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i16 29, i16 29, i16 29, i16 29>)
340 %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
341 ret <4 x i32> %sub.i.i
342 }
343
344 define <2 x i64> @test_vmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 {
202 ; CHECK-LABEL: test_vmlsl_high_n_s32: 345 ; CHECK-LABEL: test_vmlsl_high_n_s32:
203 ; CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 346 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0
347 ; CHECK-NEXT: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
348 ; CHECK-NEXT: ret
204 entry: 349 entry:
205 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 350 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
206 %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 351 %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
207 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 352 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
208 %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) 353 %vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
209 %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i 354 %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
210 ret <2 x i64> %sub.i.i 355 ret <2 x i64> %sub.i.i
211 } 356 }
212 357
213 define <4 x i32> @test_vmlsl_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) { 358 define <2 x i64> @test_vmlsl_high_n_s32_imm(<2 x i64> %a, <4 x i32> %b) #0 {
359 ; CHECK-LABEL: test_vmlsl_high_n_s32_imm:
360 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #0x1d
361 ; CHECK-NEXT: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
362 ; CHECK-NEXT: ret
363 entry:
364 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
365 %vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32 29, i32 29>)
366 %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
367 ret <2 x i64> %sub.i.i
368 }
369
370 define <4 x i32> @test_vmlsl_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 {
214 ; CHECK-LABEL: test_vmlsl_high_n_u16: 371 ; CHECK-LABEL: test_vmlsl_high_n_u16:
215 ; CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 372 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0
373 ; CHECK-NEXT: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
374 ; CHECK-NEXT: ret
216 entry: 375 entry:
217 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 376 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
218 %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 377 %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
219 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 378 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
220 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 379 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
221 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 380 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
222 %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) 381 %vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
223 %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i 382 %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
224 ret <4 x i32> %sub.i.i 383 ret <4 x i32> %sub.i.i
225 } 384 }
226 385
227 define <2 x i64> @test_vmlsl_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) { 386 define <4 x i32> @test_vmlsl_high_n_u16_imm(<4 x i32> %a, <8 x i16> %b) #0 {
387 ; CHECK-LABEL: test_vmlsl_high_n_u16_imm:
388 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #0x1d
389 ; CHECK-NEXT: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
390 ; CHECK-NEXT: ret
391 entry:
392 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
393 %vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i16 29, i16 29, i16 29, i16 29>)
394 %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
395 ret <4 x i32> %sub.i.i
396 }
397
398 define <2 x i64> @test_vmlsl_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 {
228 ; CHECK-LABEL: test_vmlsl_high_n_u32: 399 ; CHECK-LABEL: test_vmlsl_high_n_u32:
229 ; CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 400 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0
401 ; CHECK-NEXT: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
402 ; CHECK-NEXT: ret
230 entry: 403 entry:
231 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 404 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
232 %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 405 %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
233 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 406 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
234 %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) 407 %vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
235 %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i 408 %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
236 ret <2 x i64> %sub.i.i 409 ret <2 x i64> %sub.i.i
237 } 410 }
238 411
239 define <4 x i32> @test_vqdmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) { 412 define <2 x i64> @test_vmlsl_high_n_u32_imm(<2 x i64> %a, <4 x i32> %b) #0 {
413 ; CHECK-LABEL: test_vmlsl_high_n_u32_imm:
414 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #0x1d
415 ; CHECK-NEXT: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
416 ; CHECK-NEXT: ret
417 entry:
418 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
419 %vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32 29, i32 29>)
420 %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
421 ret <2 x i64> %sub.i.i
422 }
423
424 define <4 x i32> @test_vqdmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 {
240 ; CHECK-LABEL: test_vqdmlsl_high_n_s16: 425 ; CHECK-LABEL: test_vqdmlsl_high_n_s16:
241 ; CHECK: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 426 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0
427 ; CHECK-NEXT: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
428 ; CHECK-NEXT: ret
242 entry: 429 entry:
243 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 430 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
244 %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 431 %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
245 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 432 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
246 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 433 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
247 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 434 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
248 %vqdmlsl15.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) 435 %vqdmlsl15.i.i = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
249 %vqdmlsl17.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl15.i.i) 436 %vqdmlsl17.i.i = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl15.i.i)
250 ret <4 x i32> %vqdmlsl17.i.i 437 ret <4 x i32> %vqdmlsl17.i.i
251 } 438 }
252 439
253 define <2 x i64> @test_vqdmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) { 440 define <4 x i32> @test_vqdmlsl_high_n_s16_imm(<4 x i32> %a, <8 x i16> %b) #0 {
441 ; CHECK-LABEL: test_vqdmlsl_high_n_s16_imm:
442 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #0x1d
443 ; CHECK-NEXT: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
444 ; CHECK-NEXT: ret
445 entry:
446 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
447 %vqdmlsl15.i.i = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i16 29, i16 29, i16 29, i16 29>)
448 %vqdmlsl17.i.i = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl15.i.i)
449 ret <4 x i32> %vqdmlsl17.i.i
450 }
451
452 define <2 x i64> @test_vqdmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 {
254 ; CHECK-LABEL: test_vqdmlsl_high_n_s32: 453 ; CHECK-LABEL: test_vqdmlsl_high_n_s32:
255 ; CHECK: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 454 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0
455 ; CHECK-NEXT: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
456 ; CHECK-NEXT: ret
256 entry: 457 entry:
257 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 458 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
258 %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 459 %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
259 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 460 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
260 %vqdmlsl9.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) 461 %vqdmlsl9.i.i = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
261 %vqdmlsl11.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl9.i.i) 462 %vqdmlsl11.i.i = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl9.i.i)
262 ret <2 x i64> %vqdmlsl11.i.i 463 ret <2 x i64> %vqdmlsl11.i.i
263 } 464 }
264 465
265 define <2 x float> @test_vmul_n_f32(<2 x float> %a, float %b) { 466 define <2 x i64> @test_vqdmlsl_high_n_s32_imm(<2 x i64> %a, <4 x i32> %b) #0 {
467 ; CHECK-LABEL: test_vqdmlsl_high_n_s32_imm:
468 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #0x1d
469 ; CHECK-NEXT: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
470 ; CHECK-NEXT: ret
471 entry:
472 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
473 %vqdmlsl9.i.i = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32 29, i32 29>)
474 %vqdmlsl11.i.i = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl9.i.i)
475 ret <2 x i64> %vqdmlsl11.i.i
476 }
477
478 define <2 x float> @test_vmul_n_f32(<2 x float> %a, float %b) #0 {
266 ; CHECK-LABEL: test_vmul_n_f32: 479 ; CHECK-LABEL: test_vmul_n_f32:
267 ; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 480 ; CHECK-NEXT: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
481 ; CHECK-NEXT: ret
268 entry: 482 entry:
269 %vecinit.i = insertelement <2 x float> undef, float %b, i32 0 483 %vecinit.i = insertelement <2 x float> undef, float %b, i32 0
270 %vecinit1.i = insertelement <2 x float> %vecinit.i, float %b, i32 1 484 %vecinit1.i = insertelement <2 x float> %vecinit.i, float %b, i32 1
271 %mul.i = fmul <2 x float> %vecinit1.i, %a 485 %mul.i = fmul <2 x float> %vecinit1.i, %a
272 ret <2 x float> %mul.i 486 ret <2 x float> %mul.i
273 } 487 }
274 488
275 define <4 x float> @test_vmulq_n_f32(<4 x float> %a, float %b) { 489 define <4 x float> @test_vmulq_n_f32(<4 x float> %a, float %b) #0 {
276 ; CHECK-LABEL: test_vmulq_n_f32: 490 ; CHECK-LABEL: test_vmulq_n_f32:
277 ; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 491 ; CHECK-NEXT: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
492 ; CHECK-NEXT: ret
278 entry: 493 entry:
279 %vecinit.i = insertelement <4 x float> undef, float %b, i32 0 494 %vecinit.i = insertelement <4 x float> undef, float %b, i32 0
280 %vecinit1.i = insertelement <4 x float> %vecinit.i, float %b, i32 1 495 %vecinit1.i = insertelement <4 x float> %vecinit.i, float %b, i32 1
281 %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %b, i32 2 496 %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %b, i32 2
282 %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %b, i32 3 497 %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %b, i32 3
283 %mul.i = fmul <4 x float> %vecinit3.i, %a 498 %mul.i = fmul <4 x float> %vecinit3.i, %a
284 ret <4 x float> %mul.i 499 ret <4 x float> %mul.i
285 } 500 }
286 501
287 define <2 x double> @test_vmulq_n_f64(<2 x double> %a, double %b) { 502 define <2 x double> @test_vmulq_n_f64(<2 x double> %a, double %b) #0 {
288 ; CHECK-LABEL: test_vmulq_n_f64: 503 ; CHECK-LABEL: test_vmulq_n_f64:
289 ; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 504 ; CHECK-NEXT: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
505 ; CHECK-NEXT: ret
290 entry: 506 entry:
291 %vecinit.i = insertelement <2 x double> undef, double %b, i32 0 507 %vecinit.i = insertelement <2 x double> undef, double %b, i32 0
292 %vecinit1.i = insertelement <2 x double> %vecinit.i, double %b, i32 1 508 %vecinit1.i = insertelement <2 x double> %vecinit.i, double %b, i32 1
293 %mul.i = fmul <2 x double> %vecinit1.i, %a 509 %mul.i = fmul <2 x double> %vecinit1.i, %a
294 ret <2 x double> %mul.i 510 ret <2 x double> %mul.i
295 } 511 }
296 512
297 define <2 x float> @test_vfma_n_f32(<2 x float> %a, <2 x float> %b, float %n) { 513 define <2 x float> @test_vfma_n_f32(<2 x float> %a, <2 x float> %b, float %n) #0 {
298 ; CHECK-LABEL: test_vfma_n_f32: 514 ; CHECK-LABEL: test_vfma_n_f32:
299 ; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}] 515 ; CHECK-NEXT: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
516 ; CHECK-NEXT: ret
300 entry: 517 entry:
301 %vecinit.i = insertelement <2 x float> undef, float %n, i32 0 518 %vecinit.i = insertelement <2 x float> undef, float %n, i32 0
302 %vecinit1.i = insertelement <2 x float> %vecinit.i, float %n, i32 1 519 %vecinit1.i = insertelement <2 x float> %vecinit.i, float %n, i32 1
303 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %b, <2 x float> %vecinit1.i, <2 x float> %a) 520 %0 = call <2 x float> @llvm.fma.v2f32(<2 x float> %b, <2 x float> %vecinit1.i, <2 x float> %a)
304 ret <2 x float> %0 521 ret <2 x float> %0
305 } 522 }
306 523
307 define <4 x float> @test_vfmaq_n_f32(<4 x float> %a, <4 x float> %b, float %n) { 524 define <4 x float> @test_vfmaq_n_f32(<4 x float> %a, <4 x float> %b, float %n) #0 {
308 ; CHECK-LABEL: test_vfmaq_n_f32: 525 ; CHECK-LABEL: test_vfmaq_n_f32:
309 ; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}] 526 ; CHECK-NEXT: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
527 ; CHECK-NEXT: ret
310 entry: 528 entry:
311 %vecinit.i = insertelement <4 x float> undef, float %n, i32 0 529 %vecinit.i = insertelement <4 x float> undef, float %n, i32 0
312 %vecinit1.i = insertelement <4 x float> %vecinit.i, float %n, i32 1 530 %vecinit1.i = insertelement <4 x float> %vecinit.i, float %n, i32 1
313 %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %n, i32 2 531 %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %n, i32 2
314 %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %n, i32 3 532 %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %n, i32 3
315 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %vecinit3.i, <4 x float> %a) 533 %0 = call <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %vecinit3.i, <4 x float> %a)
316 ret <4 x float> %0 534 ret <4 x float> %0
317 } 535 }
318 536
319 define <2 x float> @test_vfms_n_f32(<2 x float> %a, <2 x float> %b, float %n) { 537 define <2 x float> @test_vfms_n_f32(<2 x float> %a, <2 x float> %b, float %n) #0 {
320 ; CHECK-LABEL: test_vfms_n_f32: 538 ; CHECK-LABEL: test_vfms_n_f32:
321 ; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}] 539 ; CHECK-NEXT: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
540 ; CHECK-NEXT: ret
322 entry: 541 entry:
323 %vecinit.i = insertelement <2 x float> undef, float %n, i32 0 542 %vecinit.i = insertelement <2 x float> undef, float %n, i32 0
324 %vecinit1.i = insertelement <2 x float> %vecinit.i, float %n, i32 1 543 %vecinit1.i = insertelement <2 x float> %vecinit.i, float %n, i32 1
325 %0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b 544 %0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b
326 %1 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %0, <2 x float> %vecinit1.i, <2 x float> %a) 545 %1 = call <2 x float> @llvm.fma.v2f32(<2 x float> %0, <2 x float> %vecinit1.i, <2 x float> %a)
327 ret <2 x float> %1 546 ret <2 x float> %1
328 } 547 }
329 548
330 define <4 x float> @test_vfmsq_n_f32(<4 x float> %a, <4 x float> %b, float %n) { 549 define <4 x float> @test_vfmsq_n_f32(<4 x float> %a, <4 x float> %b, float %n) #0 {
331 ; CHECK-LABEL: test_vfmsq_n_f32: 550 ; CHECK-LABEL: test_vfmsq_n_f32:
332 ; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}] 551 ; CHECK-NEXT: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
552 ; CHECK-NEXT: ret
333 entry: 553 entry:
334 %vecinit.i = insertelement <4 x float> undef, float %n, i32 0 554 %vecinit.i = insertelement <4 x float> undef, float %n, i32 0
335 %vecinit1.i = insertelement <4 x float> %vecinit.i, float %n, i32 1 555 %vecinit1.i = insertelement <4 x float> %vecinit.i, float %n, i32 1
336 %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %n, i32 2 556 %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %n, i32 2
337 %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %n, i32 3 557 %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %n, i32 3
338 %0 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b 558 %0 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
339 %1 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %0, <4 x float> %vecinit3.i, <4 x float> %a) 559 %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %0, <4 x float> %vecinit3.i, <4 x float> %a)
340 ret <4 x float> %1 560 ret <4 x float> %1
341 } 561 }
562
563 attributes #0 = { nounwind }
564
565 declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>)
566 declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>)
567 declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>)
568 declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>)
569 declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>)
570 declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>)
571 declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>)
572 declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>)
573 declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>)
574 declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>)
575 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
576 declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)