77
|
1 ; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
|
0
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
2
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
3 define <8 x i8> @vzipi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
|
95
|
4 ; CHECK-LABEL: vzipi8:
|
|
5 ; CHECK: @ BB#0:
|
|
6 ; CHECK-NEXT: vldr d16, [r1]
|
|
7 ; CHECK-NEXT: vldr d17, [r0]
|
|
8 ; CHECK-NEXT: vzip.8 d17, d16
|
|
9 ; CHECK-NEXT: vadd.i8 d16, d17, d16
|
|
10 ; CHECK-NEXT: vmov r0, r1, d16
|
|
11 ; CHECK-NEXT: mov pc, lr
|
|
12 %tmp1 = load <8 x i8>, <8 x i8>* %A
|
|
13 %tmp2 = load <8 x i8>, <8 x i8>* %B
|
0
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
14 %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
15 %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
16 %tmp5 = add <8 x i8> %tmp3, %tmp4
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
17 ret <8 x i8> %tmp5
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
18 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
19
|
95
|
20 define <16 x i8> @vzipi8_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind {
|
|
21 ; CHECK-LABEL: vzipi8_Qres:
|
|
22 ; CHECK: @ BB#0:
|
|
23 ; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1]
|
|
24 ; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0]
|
|
25 ; CHECK-NEXT: vzip.8 [[LDR0]], [[LDR1]]
|
|
26 ; CHECK-NEXT: vmov r0, r1, [[LDR0]]
|
|
27 ; CHECK-NEXT: vmov r2, r3, [[LDR1]]
|
|
28 ; CHECK-NEXT: mov pc, lr
|
|
29 %tmp1 = load <8 x i8>, <8 x i8>* %A
|
|
30 %tmp2 = load <8 x i8>, <8 x i8>* %B
|
|
31 %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
|
|
32 ret <16 x i8> %tmp3
|
|
33 }
|
|
34
|
0
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
35 define <4 x i16> @vzipi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
|
95
|
36 ; CHECK-LABEL: vzipi16:
|
|
37 ; CHECK: @ BB#0:
|
|
38 ; CHECK-NEXT: vldr d16, [r1]
|
|
39 ; CHECK-NEXT: vldr d17, [r0]
|
|
40 ; CHECK-NEXT: vzip.16 d17, d16
|
|
41 ; CHECK-NEXT: vadd.i16 d16, d17, d16
|
|
42 ; CHECK-NEXT: vmov r0, r1, d16
|
|
43 ; CHECK-NEXT: mov pc, lr
|
|
44 %tmp1 = load <4 x i16>, <4 x i16>* %A
|
|
45 %tmp2 = load <4 x i16>, <4 x i16>* %B
|
0
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
46 %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
47 %tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
48 %tmp5 = add <4 x i16> %tmp3, %tmp4
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
49 ret <4 x i16> %tmp5
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
50 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
51
|
95
|
52 define <8 x i16> @vzipi16_Qres(<4 x i16>* %A, <4 x i16>* %B) nounwind {
|
|
53 ; CHECK-LABEL: vzipi16_Qres:
|
|
54 ; CHECK: @ BB#0:
|
|
55 ; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1]
|
|
56 ; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0]
|
|
57 ; CHECK-NEXT: vzip.16 [[LDR0]], [[LDR1]]
|
|
58 ; CHECK-NEXT: vmov r0, r1, [[LDR0]]
|
|
59 ; CHECK-NEXT: vmov r2, r3, [[LDR1]]
|
|
60 ; CHECK-NEXT: mov pc, lr
|
|
61 %tmp1 = load <4 x i16>, <4 x i16>* %A
|
|
62 %tmp2 = load <4 x i16>, <4 x i16>* %B
|
|
63 %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
|
|
64 ret <8 x i16> %tmp3
|
|
65 }
|
|
66
|
0
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
67 ; VZIP.32 is equivalent to VTRN.32 for 64-bit vectors.
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
68
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
69 define <16 x i8> @vzipQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
|
95
|
70 ; CHECK-LABEL: vzipQi8:
|
|
71 ; CHECK: @ BB#0:
|
|
72 ; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
|
|
73 ; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
|
|
74 ; CHECK-NEXT: vzip.8 q9, q8
|
|
75 ; CHECK-NEXT: vadd.i8 q8, q9, q8
|
|
76 ; CHECK-NEXT: vmov r0, r1, d16
|
|
77 ; CHECK-NEXT: vmov r2, r3, d17
|
|
78 ; CHECK-NEXT: mov pc, lr
|
|
79 %tmp1 = load <16 x i8>, <16 x i8>* %A
|
|
80 %tmp2 = load <16 x i8>, <16 x i8>* %B
|
0
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
81 %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
82 %tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
83 %tmp5 = add <16 x i8> %tmp3, %tmp4
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
84 ret <16 x i8> %tmp5
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
85 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
86
|
95
|
87 define <32 x i8> @vzipQi8_QQres(<16 x i8>* %A, <16 x i8>* %B) nounwind {
|
|
88 ; CHECK-LABEL: vzipQi8_QQres:
|
|
89 ; CHECK: @ BB#0:
|
|
90 ; CHECK-NEXT: vld1.64 {d16, d17}, [r2]
|
|
91 ; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
|
|
92 ; CHECK-NEXT: vzip.8 q9, q8
|
|
93 ; CHECK-NEXT: vst1.8 {d18, d19}, [r0:128]!
|
|
94 ; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128]
|
|
95 ; CHECK-NEXT: mov pc, lr
|
|
96 %tmp1 = load <16 x i8>, <16 x i8>* %A
|
|
97 %tmp2 = load <16 x i8>, <16 x i8>* %B
|
|
98 %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
|
|
99 ret <32 x i8> %tmp3
|
|
100 }
|
|
101
|
0
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
102 define <8 x i16> @vzipQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
|
95
|
103 ; CHECK-LABEL: vzipQi16:
|
|
104 ; CHECK: @ BB#0:
|
|
105 ; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
|
|
106 ; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
|
|
107 ; CHECK-NEXT: vzip.16 q9, q8
|
|
108 ; CHECK-NEXT: vadd.i16 q8, q9, q8
|
|
109 ; CHECK-NEXT: vmov r0, r1, d16
|
|
110 ; CHECK-NEXT: vmov r2, r3, d17
|
|
111 ; CHECK-NEXT: mov pc, lr
|
|
112 %tmp1 = load <8 x i16>, <8 x i16>* %A
|
|
113 %tmp2 = load <8 x i16>, <8 x i16>* %B
|
0
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
114 %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
115 %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
116 %tmp5 = add <8 x i16> %tmp3, %tmp4
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
117 ret <8 x i16> %tmp5
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
118 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
119
|
95
|
120 define <16 x i16> @vzipQi16_QQres(<8 x i16>* %A, <8 x i16>* %B) nounwind {
|
|
121 ; CHECK-LABEL: vzipQi16_QQres:
|
|
122 ; CHECK: @ BB#0:
|
|
123 ; CHECK-NEXT: vld1.64 {d16, d17}, [r2]
|
|
124 ; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
|
|
125 ; CHECK-NEXT: vzip.16 q9, q8
|
|
126 ; CHECK-NEXT: vst1.16 {d18, d19}, [r0:128]!
|
|
127 ; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128]
|
|
128 ; CHECK-NEXT: mov pc, lr
|
|
129 %tmp1 = load <8 x i16>, <8 x i16>* %A
|
|
130 %tmp2 = load <8 x i16>, <8 x i16>* %B
|
|
131 %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
|
|
132 ret <16 x i16> %tmp3
|
|
133 }
|
|
134
|
0
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
135 define <4 x i32> @vzipQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
|
95
|
136 ; CHECK-LABEL: vzipQi32:
|
|
137 ; CHECK: @ BB#0:
|
|
138 ; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
|
|
139 ; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
|
|
140 ; CHECK-NEXT: vzip.32 q9, q8
|
|
141 ; CHECK-NEXT: vadd.i32 q8, q9, q8
|
|
142 ; CHECK-NEXT: vmov r0, r1, d16
|
|
143 ; CHECK-NEXT: vmov r2, r3, d17
|
|
144 ; CHECK-NEXT: mov pc, lr
|
|
145 %tmp1 = load <4 x i32>, <4 x i32>* %A
|
|
146 %tmp2 = load <4 x i32>, <4 x i32>* %B
|
0
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
147 %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
148 %tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
149 %tmp5 = add <4 x i32> %tmp3, %tmp4
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
150 ret <4 x i32> %tmp5
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
151 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
152
|
95
|
153 define <8 x i32> @vzipQi32_QQres(<4 x i32>* %A, <4 x i32>* %B) nounwind {
|
|
154 ; CHECK-LABEL: vzipQi32_QQres:
|
|
155 ; CHECK: @ BB#0:
|
|
156 ; CHECK-NEXT: vld1.64 {d16, d17}, [r2]
|
|
157 ; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
|
|
158 ; CHECK-NEXT: vzip.32 q9, q8
|
|
159 ; CHECK-NEXT: vst1.32 {d18, d19}, [r0:128]!
|
|
160 ; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128]
|
|
161 ; CHECK-NEXT: mov pc, lr
|
|
162 %tmp1 = load <4 x i32>, <4 x i32>* %A
|
|
163 %tmp2 = load <4 x i32>, <4 x i32>* %B
|
|
164 %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
|
|
165 ret <8 x i32> %tmp3
|
|
166 }
|
|
167
|
0
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
168 define <4 x float> @vzipQf(<4 x float>* %A, <4 x float>* %B) nounwind {
|
95
|
169 ; CHECK-LABEL: vzipQf:
|
|
170 ; CHECK: @ BB#0:
|
|
171 ; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
|
|
172 ; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
|
|
173 ; CHECK-NEXT: vzip.32 q9, q8
|
|
174 ; CHECK-NEXT: vadd.f32 q8, q9, q8
|
|
175 ; CHECK-NEXT: vmov r0, r1, d16
|
|
176 ; CHECK-NEXT: vmov r2, r3, d17
|
|
177 ; CHECK-NEXT: mov pc, lr
|
|
178 %tmp1 = load <4 x float>, <4 x float>* %A
|
|
179 %tmp2 = load <4 x float>, <4 x float>* %B
|
0
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
180 %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
181 %tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
182 %tmp5 = fadd <4 x float> %tmp3, %tmp4
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
183 ret <4 x float> %tmp5
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
184 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
185
|
95
|
186 define <8 x float> @vzipQf_QQres(<4 x float>* %A, <4 x float>* %B) nounwind {
|
|
187 ; CHECK-LABEL: vzipQf_QQres:
|
|
188 ; CHECK: @ BB#0:
|
|
189 ; CHECK-NEXT: vld1.64 {d16, d17}, [r2]
|
|
190 ; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
|
|
191 ; CHECK-NEXT: vzip.32 q9, q8
|
|
192 ; CHECK-NEXT: vst1.32 {d18, d19}, [r0:128]!
|
|
193 ; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128]
|
|
194 ; CHECK-NEXT: mov pc, lr
|
|
195 %tmp1 = load <4 x float>, <4 x float>* %A
|
|
196 %tmp2 = load <4 x float>, <4 x float>* %B
|
|
197 %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
|
|
198 ret <8 x float> %tmp3
|
|
199 }
|
|
200
|
0
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
201 ; Undef shuffle indices should not prevent matching to VZIP:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
202
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
203 define <8 x i8> @vzipi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
|
95
|
204 ; CHECK-LABEL: vzipi8_undef:
|
|
205 ; CHECK: @ BB#0:
|
|
206 ; CHECK-NEXT: vldr d16, [r1]
|
|
207 ; CHECK-NEXT: vldr d17, [r0]
|
|
208 ; CHECK-NEXT: vzip.8 d17, d16
|
|
209 ; CHECK-NEXT: vadd.i8 d16, d17, d16
|
|
210 ; CHECK-NEXT: vmov r0, r1, d16
|
|
211 ; CHECK-NEXT: mov pc, lr
|
|
212 %tmp1 = load <8 x i8>, <8 x i8>* %A
|
|
213 %tmp2 = load <8 x i8>, <8 x i8>* %B
|
0
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
214 %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 undef, i32 10, i32 3, i32 11>
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
215 %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 undef, i32 undef, i32 15>
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
216 %tmp5 = add <8 x i8> %tmp3, %tmp4
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
217 ret <8 x i8> %tmp5
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
218 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
219
|
95
|
220 define <16 x i8> @vzipi8_undef_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind {
|
|
221 ; CHECK-LABEL: vzipi8_undef_Qres:
|
|
222 ; CHECK: @ BB#0:
|
|
223 ; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1]
|
|
224 ; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0]
|
|
225 ; CHECK-NEXT: vzip.8 [[LDR0]], [[LDR1]]
|
|
226 ; CHECK-NEXT: vmov r0, r1, [[LDR0]]
|
|
227 ; CHECK-NEXT: vmov r2, r3, [[LDR1]]
|
|
228 ; CHECK-NEXT: mov pc, lr
|
|
229 %tmp1 = load <8 x i8>, <8 x i8>* %A
|
|
230 %tmp2 = load <8 x i8>, <8 x i8>* %B
|
|
231 %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 undef, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 undef, i32 undef, i32 15>
|
|
232 ret <16 x i8> %tmp3
|
|
233 }
|
|
234
|
0
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
235 define <16 x i8> @vzipQi8_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind {
|
95
|
236 ; CHECK-LABEL: vzipQi8_undef:
|
|
237 ; CHECK: @ BB#0:
|
|
238 ; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
|
|
239 ; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
|
|
240 ; CHECK-NEXT: vzip.8 q9, q8
|
|
241 ; CHECK-NEXT: vadd.i8 q8, q9, q8
|
|
242 ; CHECK-NEXT: vmov r0, r1, d16
|
|
243 ; CHECK-NEXT: vmov r2, r3, d17
|
|
244 ; CHECK-NEXT: mov pc, lr
|
|
245 %tmp1 = load <16 x i8>, <16 x i8>* %A
|
|
246 %tmp2 = load <16 x i8>, <16 x i8>* %B
|
0
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
247 %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 undef, i32 undef, i32 undef, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
248 %tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 undef, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 undef, i32 14, i32 30, i32 undef, i32 31>
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
249 %tmp5 = add <16 x i8> %tmp3, %tmp4
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
250 ret <16 x i8> %tmp5
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
251 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
252
|
95
|
253 define <32 x i8> @vzipQi8_undef_QQres(<16 x i8>* %A, <16 x i8>* %B) nounwind {
|
|
254 ; CHECK-LABEL: vzipQi8_undef_QQres:
|
|
255 ; CHECK: @ BB#0:
|
|
256 ; CHECK-NEXT: vld1.64 {d16, d17}, [r2]
|
|
257 ; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
|
|
258 ; CHECK-NEXT: vzip.8 q9, q8
|
|
259 ; CHECK-NEXT: vst1.8 {d18, d19}, [r0:128]!
|
|
260 ; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128]
|
|
261 ; CHECK-NEXT: mov pc, lr
|
|
262 %tmp1 = load <16 x i8>, <16 x i8>* %A
|
|
263 %tmp2 = load <16 x i8>, <16 x i8>* %B
|
|
264 %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <32 x i32> <i32 0, i32 16, i32 1, i32 undef, i32 undef, i32 undef, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 undef, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 undef, i32 14, i32 30, i32 undef, i32 31>
|
|
265 ret <32 x i8> %tmp3
|
|
266 }
|
|
267
|
|
268 define <8 x i16> @vzip_lower_shufflemask_undef(<4 x i16>* %A, <4 x i16>* %B) {
|
|
269 entry:
|
|
270 ; CHECK-LABEL: vzip_lower_shufflemask_undef
|
|
271 ; CHECK: vzip
|
|
272 %tmp1 = load <4 x i16>, <4 x i16>* %A
|
|
273 %tmp2 = load <4 x i16>, <4 x i16>* %B
|
|
274 %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 6, i32 3, i32 7>
|
|
275 ret <8 x i16> %0
|
|
276 }
|
|
277
|
|
278 define <4 x i32> @vzip_lower_shufflemask_zeroed(<2 x i32>* %A) {
|
|
279 entry:
|
|
280 ; CHECK-LABEL: vzip_lower_shufflemask_zeroed
|
|
281 ; CHECK-NOT: vtrn
|
|
282 ; CHECK: vzip
|
|
283 %tmp1 = load <2 x i32>, <2 x i32>* %A
|
|
284 %0 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp1, <4 x i32> <i32 0, i32 0, i32 1, i32 0>
|
|
285 ret <4 x i32> %0
|
|
286 }
|
|
287
|
|
288 define <4 x i32> @vzip_lower_shufflemask_vuzp(<2 x i32>* %A) {
|
|
289 entry:
|
|
290 ; CHECK-LABEL: vzip_lower_shufflemask_vuzp
|
|
291 ; CHECK-NOT: vuzp
|
|
292 ; CHECK: vzip
|
|
293 %tmp1 = load <2 x i32>, <2 x i32>* %A
|
|
294 %0 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp1, <4 x i32> <i32 0, i32 2, i32 1, i32 0>
|
|
295 ret <4 x i32> %0
|
|
296 }
|
|
297
|
|
298 define void @vzip_undef_rev_shufflemask_vtrn(<2 x i32>* %A, <4 x i32>* %B) {
|
|
299 entry:
|
|
300 ; CHECK-LABEL: vzip_undef_rev_shufflemask_vtrn
|
|
301 ; CHECK-NOT: vtrn
|
|
302 ; CHECK: vzip
|
|
303 %tmp1 = load <2 x i32>, <2 x i32>* %A
|
|
304 %0 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 0, i32 0>
|
|
305 store <4 x i32> %0, <4 x i32>* %B
|
|
306 ret void
|
|
307 }
|
|
308
|
|
309 define void @vzip_vext_factor(<8 x i16>* %A, <4 x i16>* %B) {
|
|
310 entry:
|
|
311 ; CHECK-LABEL: vzip_vext_factor
|
|
312 ; CHECK: vext.16 d16, d16, d17, #3
|
|
313 ; CHECK: vzip
|
|
314 %tmp1 = load <8 x i16>, <8 x i16>* %A
|
|
315 %0 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> <i32 4, i32 4, i32 5, i32 3>
|
|
316 store <4 x i16> %0, <4 x i16>* %B
|
|
317 ret void
|
|
318 }
|