236
|
1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature
|
252
|
2 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
|
|
3 // RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
|
150
|
4
|
236
|
5 // REQUIRES: aarch64-registered-target || arm-registered-target
|
150
|
6
|
|
7 #include <arm_neon.h>
|
|
8
|
236
|
9 // CHECK-LABEL: define {{[^@]+}}@test_vtbl1_s8
|
|
10 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
11 // CHECK-NEXT: entry:
|
|
12 // CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
13 // CHECK-NEXT: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[B]])
|
|
14 // CHECK-NEXT: ret <8 x i8> [[VTBL11_I]]
|
|
15 //
|
150
|
16 int8x8_t test_vtbl1_s8(int8x8_t a, int8x8_t b) {
|
|
17 return vtbl1_s8(a, b);
|
|
18 }
|
|
19
|
236
|
20 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl1_s8
|
|
21 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1:[0-9]+]] {
|
|
22 // CHECK-NEXT: entry:
|
|
23 // CHECK-NEXT: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[A]], <8 x i8> [[B]])
|
|
24 // CHECK-NEXT: ret <8 x i8> [[VTBL1_I]]
|
|
25 //
|
150
|
26 int8x8_t test_vqtbl1_s8(int8x16_t a, uint8x8_t b) {
|
|
27 return vqtbl1_s8(a, b);
|
|
28 }
|
|
29
|
236
|
30 // CHECK-LABEL: define {{[^@]+}}@test_vtbl2_s8
|
252
|
31 // CHECK-SAME: ([2 x <8 x i8>] alignstack(8) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
|
236
|
32 // CHECK-NEXT: entry:
|
|
33 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_INT8X8X2_T:%.*]], align 8
|
|
34 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_INT8X8X2_T]], align 8
|
252
|
35 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X2_T]], ptr [[A]], i32 0, i32 0
|
|
36 // CHECK-NEXT: store [2 x <8 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 8
|
|
37 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X2_T]], ptr [[A]], i32 0, i32 0
|
|
38 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
|
|
39 // CHECK-NEXT: store [2 x <8 x i8>] [[TMP0]], ptr [[__P0_I]], align 8
|
|
40 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8
|
|
41 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 1
|
|
42 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
|
236
|
43 // CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
44 // CHECK-NEXT: [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[B]])
|
|
45 // CHECK-NEXT: ret <8 x i8> [[VTBL13_I]]
|
|
46 //
|
150
|
47 int8x8_t test_vtbl2_s8(int8x8x2_t a, int8x8_t b) {
|
|
48 return vtbl2_s8(a, b);
|
|
49 }
|
|
50
|
236
|
51 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl2_s8
|
252
|
52 // CHECK-SAME: ([2 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
|
236
|
53 // CHECK-NEXT: entry:
|
|
54 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_INT8X16X2_T:%.*]], align 16
|
|
55 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_INT8X16X2_T]], align 16
|
252
|
56 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], ptr [[A]], i32 0, i32 0
|
|
57 // CHECK-NEXT: store [2 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
|
|
58 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], ptr [[A]], i32 0, i32 0
|
|
59 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
|
|
60 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
|
|
61 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
|
|
62 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
|
|
63 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
|
236
|
64 // CHECK-NEXT: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> [[B]])
|
|
65 // CHECK-NEXT: ret <8 x i8> [[VTBL2_I]]
|
|
66 //
|
150
|
67 int8x8_t test_vqtbl2_s8(int8x16x2_t a, uint8x8_t b) {
|
|
68 return vqtbl2_s8(a, b);
|
|
69 }
|
|
70
|
236
|
71 // CHECK-LABEL: define {{[^@]+}}@test_vtbl3_s8
|
252
|
72 // CHECK-SAME: ([3 x <8 x i8>] alignstack(8) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
|
236
|
73 // CHECK-NEXT: entry:
|
|
74 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_INT8X8X3_T:%.*]], align 8
|
|
75 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_INT8X8X3_T]], align 8
|
252
|
76 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], ptr [[A]], i32 0, i32 0
|
|
77 // CHECK-NEXT: store [3 x <8 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 8
|
|
78 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], ptr [[A]], i32 0, i32 0
|
|
79 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
|
|
80 // CHECK-NEXT: store [3 x <8 x i8>] [[TMP0]], ptr [[__P0_I]], align 8
|
|
81 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8
|
|
82 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 1
|
|
83 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
|
|
84 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 2
|
|
85 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
|
236
|
86 // CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
87 // CHECK-NEXT: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
88 // CHECK-NEXT: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[B]])
|
|
89 // CHECK-NEXT: ret <8 x i8> [[VTBL26_I]]
|
|
90 //
|
150
|
91 int8x8_t test_vtbl3_s8(int8x8x3_t a, int8x8_t b) {
|
|
92 return vtbl3_s8(a, b);
|
|
93 }
|
|
94
|
236
|
95 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl3_s8
|
252
|
96 // CHECK-SAME: ([3 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
|
236
|
97 // CHECK-NEXT: entry:
|
|
98 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_INT8X16X3_T:%.*]], align 16
|
|
99 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_INT8X16X3_T]], align 16
|
252
|
100 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], ptr [[A]], i32 0, i32 0
|
|
101 // CHECK-NEXT: store [3 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
|
|
102 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], ptr [[A]], i32 0, i32 0
|
|
103 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
|
|
104 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
|
|
105 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
|
|
106 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
|
|
107 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
|
|
108 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2
|
|
109 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
|
236
|
110 // CHECK-NEXT: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> [[B]])
|
|
111 // CHECK-NEXT: ret <8 x i8> [[VTBL3_I]]
|
|
112 //
|
150
|
113 int8x8_t test_vqtbl3_s8(int8x16x3_t a, uint8x8_t b) {
|
|
114 return vqtbl3_s8(a, b);
|
|
115 }
|
|
116
|
236
|
117 // CHECK-LABEL: define {{[^@]+}}@test_vtbl4_s8
|
252
|
118 // CHECK-SAME: ([4 x <8 x i8>] alignstack(8) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
|
236
|
119 // CHECK-NEXT: entry:
|
|
120 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_INT8X8X4_T:%.*]], align 8
|
|
121 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_INT8X8X4_T]], align 8
|
252
|
122 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], ptr [[A]], i32 0, i32 0
|
|
123 // CHECK-NEXT: store [4 x <8 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 8
|
|
124 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], ptr [[A]], i32 0, i32 0
|
|
125 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
|
|
126 // CHECK-NEXT: store [4 x <8 x i8>] [[TMP0]], ptr [[__P0_I]], align 8
|
|
127 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8
|
|
128 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 1
|
|
129 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
|
|
130 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 2
|
|
131 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
|
|
132 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 3
|
|
133 // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6_I]], align 8
|
236
|
134 // CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
135 // CHECK-NEXT: [[VTBL27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
136 // CHECK-NEXT: [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> [[B]])
|
|
137 // CHECK-NEXT: ret <8 x i8> [[VTBL28_I]]
|
|
138 //
|
150
|
139 int8x8_t test_vtbl4_s8(int8x8x4_t a, int8x8_t b) {
|
|
140 return vtbl4_s8(a, b);
|
|
141 }
|
|
142
|
236
|
143 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl4_s8
|
252
|
144 // CHECK-SAME: ([4 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
|
236
|
145 // CHECK-NEXT: entry:
|
|
146 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_INT8X16X4_T:%.*]], align 16
|
|
147 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_INT8X16X4_T]], align 16
|
252
|
148 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], ptr [[A]], i32 0, i32 0
|
|
149 // CHECK-NEXT: store [4 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
|
|
150 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], ptr [[A]], i32 0, i32 0
|
|
151 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
|
|
152 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
|
|
153 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
|
|
154 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
|
|
155 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
|
|
156 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2
|
|
157 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
|
|
158 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 3
|
|
159 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16
|
236
|
160 // CHECK-NEXT: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> [[B]])
|
|
161 // CHECK-NEXT: ret <8 x i8> [[VTBL4_I]]
|
|
162 //
|
150
|
163 int8x8_t test_vqtbl4_s8(int8x16x4_t a, uint8x8_t b) {
|
|
164 return vqtbl4_s8(a, b);
|
|
165 }
|
|
166
|
236
|
167 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl1q_s8
|
|
168 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
|
|
169 // CHECK-NEXT: entry:
|
|
170 // CHECK-NEXT: [[VTBL1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
|
|
171 // CHECK-NEXT: ret <16 x i8> [[VTBL1_I]]
|
|
172 //
|
150
|
173 int8x16_t test_vqtbl1q_s8(int8x16_t a, int8x16_t b) {
|
|
174 return vqtbl1q_s8(a, b);
|
|
175 }
|
|
176
|
236
|
177 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl2q_s8
|
252
|
178 // CHECK-SAME: ([2 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
|
236
|
179 // CHECK-NEXT: entry:
|
|
180 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_INT8X16X2_T:%.*]], align 16
|
|
181 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_INT8X16X2_T]], align 16
|
252
|
182 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], ptr [[A]], i32 0, i32 0
|
|
183 // CHECK-NEXT: store [2 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
|
|
184 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], ptr [[A]], i32 0, i32 0
|
|
185 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
|
|
186 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
|
|
187 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
|
|
188 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
|
|
189 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
|
236
|
190 // CHECK-NEXT: [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[B]])
|
|
191 // CHECK-NEXT: ret <16 x i8> [[VTBL2_I]]
|
|
192 //
|
150
|
193 int8x16_t test_vqtbl2q_s8(int8x16x2_t a, int8x16_t b) {
|
|
194 return vqtbl2q_s8(a, b);
|
|
195 }
|
|
196
|
236
|
197 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl3q_s8
|
252
|
198 // CHECK-SAME: ([3 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
|
236
|
199 // CHECK-NEXT: entry:
|
|
200 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_INT8X16X3_T:%.*]], align 16
|
|
201 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_INT8X16X3_T]], align 16
|
252
|
202 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], ptr [[A]], i32 0, i32 0
|
|
203 // CHECK-NEXT: store [3 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
|
|
204 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], ptr [[A]], i32 0, i32 0
|
|
205 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
|
|
206 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
|
|
207 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
|
|
208 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
|
|
209 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
|
|
210 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2
|
|
211 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
|
236
|
212 // CHECK-NEXT: [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[B]])
|
|
213 // CHECK-NEXT: ret <16 x i8> [[VTBL3_I]]
|
|
214 //
|
150
|
215 int8x16_t test_vqtbl3q_s8(int8x16x3_t a, int8x16_t b) {
|
|
216 return vqtbl3q_s8(a, b);
|
|
217 }
|
|
218
|
236
|
219 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl4q_s8
|
252
|
220 // CHECK-SAME: ([4 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
|
236
|
221 // CHECK-NEXT: entry:
|
|
222 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_INT8X16X4_T:%.*]], align 16
|
|
223 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_INT8X16X4_T]], align 16
|
252
|
224 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], ptr [[A]], i32 0, i32 0
|
|
225 // CHECK-NEXT: store [4 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
|
|
226 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], ptr [[A]], i32 0, i32 0
|
|
227 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
|
|
228 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
|
|
229 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
|
|
230 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
|
|
231 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
|
|
232 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2
|
|
233 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
|
|
234 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 3
|
|
235 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16
|
236
|
236 // CHECK-NEXT: [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[B]])
|
|
237 // CHECK-NEXT: ret <16 x i8> [[VTBL4_I]]
|
|
238 //
|
150
|
239 int8x16_t test_vqtbl4q_s8(int8x16x4_t a, int8x16_t b) {
|
|
240 return vqtbl4q_s8(a, b);
|
|
241 }
|
|
242
|
236
|
243 // CHECK-LABEL: define {{[^@]+}}@test_vtbx1_s8
|
|
244 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
|
|
245 // CHECK-NEXT: entry:
|
|
246 // CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
247 // CHECK-NEXT: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[C]])
|
|
248 // CHECK-NEXT: [[TMP0:%.*]] = icmp uge <8 x i8> [[C]], <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
|
|
249 // CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
|
|
250 // CHECK-NEXT: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[A]]
|
|
251 // CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i8> [[TMP1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
|
|
252 // CHECK-NEXT: [[TMP4:%.*]] = and <8 x i8> [[TMP3]], [[VTBL11_I]]
|
|
253 // CHECK-NEXT: [[VTBX_I:%.*]] = or <8 x i8> [[TMP2]], [[TMP4]]
|
|
254 // CHECK-NEXT: ret <8 x i8> [[VTBX_I]]
|
|
255 //
|
150
|
256 int8x8_t test_vtbx1_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
|
|
257 return vtbx1_s8(a, b, c);
|
|
258 }
|
|
259
|
236
|
260 // CHECK-LABEL: define {{[^@]+}}@test_vtbx2_s8
|
252
|
261 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [2 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
|
236
|
262 // CHECK-NEXT: entry:
|
|
263 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_INT8X8X2_T:%.*]], align 8
|
|
264 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X8X2_T]], align 8
|
252
|
265 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X2_T]], ptr [[B]], i32 0, i32 0
|
|
266 // CHECK-NEXT: store [2 x <8 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 8
|
|
267 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X2_T]], ptr [[B]], i32 0, i32 0
|
|
268 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
|
|
269 // CHECK-NEXT: store [2 x <8 x i8>] [[TMP0]], ptr [[__P1_I]], align 8
|
|
270 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8
|
|
271 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 1
|
|
272 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
|
236
|
273 // CHECK-NEXT: [[VTBX1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
274 // CHECK-NEXT: [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[A]], <16 x i8> [[VTBX1_I]], <8 x i8> [[C]])
|
|
275 // CHECK-NEXT: ret <8 x i8> [[VTBX13_I]]
|
|
276 //
|
150
|
277 int8x8_t test_vtbx2_s8(int8x8_t a, int8x8x2_t b, int8x8_t c) {
|
|
278 return vtbx2_s8(a, b, c);
|
|
279 }
|
|
280
|
236
|
281 // CHECK-LABEL: define {{[^@]+}}@test_vtbx3_s8
|
252
|
282 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [3 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
|
236
|
283 // CHECK-NEXT: entry:
|
|
284 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_INT8X8X3_T:%.*]], align 8
|
|
285 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X8X3_T]], align 8
|
252
|
286 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], ptr [[B]], i32 0, i32 0
|
|
287 // CHECK-NEXT: store [3 x <8 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 8
|
|
288 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], ptr [[B]], i32 0, i32 0
|
|
289 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
|
|
290 // CHECK-NEXT: store [3 x <8 x i8>] [[TMP0]], ptr [[__P1_I]], align 8
|
|
291 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8
|
|
292 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 1
|
|
293 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
|
|
294 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 2
|
|
295 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
|
236
|
296 // CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
297 // CHECK-NEXT: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
298 // CHECK-NEXT: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[C]])
|
|
299 // CHECK-NEXT: [[TMP4:%.*]] = icmp uge <8 x i8> [[C]], <i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24>
|
|
300 // CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i8>
|
|
301 // CHECK-NEXT: [[TMP6:%.*]] = and <8 x i8> [[TMP5]], [[A]]
|
|
302 // CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i8> [[TMP5]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
|
|
303 // CHECK-NEXT: [[TMP8:%.*]] = and <8 x i8> [[TMP7]], [[VTBL26_I]]
|
|
304 // CHECK-NEXT: [[VTBX_I:%.*]] = or <8 x i8> [[TMP6]], [[TMP8]]
|
|
305 // CHECK-NEXT: ret <8 x i8> [[VTBX_I]]
|
|
306 //
|
150
|
307 int8x8_t test_vtbx3_s8(int8x8_t a, int8x8x3_t b, int8x8_t c) {
|
|
308 return vtbx3_s8(a, b, c);
|
|
309 }
|
|
310
|
236
|
311 // CHECK-LABEL: define {{[^@]+}}@test_vtbx4_s8
|
252
|
312 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [4 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
|
236
|
313 // CHECK-NEXT: entry:
|
|
314 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_INT8X8X4_T:%.*]], align 8
|
|
315 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X8X4_T]], align 8
|
252
|
316 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], ptr [[B]], i32 0, i32 0
|
|
317 // CHECK-NEXT: store [4 x <8 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 8
|
|
318 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], ptr [[B]], i32 0, i32 0
|
|
319 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
|
|
320 // CHECK-NEXT: store [4 x <8 x i8>] [[TMP0]], ptr [[__P1_I]], align 8
|
|
321 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8
|
|
322 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 1
|
|
323 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
|
|
324 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 2
|
|
325 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
|
|
326 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 3
|
|
327 // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6_I]], align 8
|
236
|
328 // CHECK-NEXT: [[VTBX2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
329 // CHECK-NEXT: [[VTBX27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
330 // CHECK-NEXT: [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> [[C]])
|
|
331 // CHECK-NEXT: ret <8 x i8> [[VTBX28_I]]
|
|
332 //
|
150
|
333 int8x8_t test_vtbx4_s8(int8x8_t a, int8x8x4_t b, int8x8_t c) {
|
|
334 return vtbx4_s8(a, b, c);
|
|
335 }
|
|
336
|
236
|
337 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx1_s8
|
|
338 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
|
339 // CHECK-NEXT: entry:
|
|
340 // CHECK-NEXT: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <8 x i8> [[C]])
|
|
341 // CHECK-NEXT: ret <8 x i8> [[VTBX1_I]]
|
|
342 //
|
150
|
343 int8x8_t test_vqtbx1_s8(int8x8_t a, int8x16_t b, uint8x8_t c) {
|
|
344 return vqtbx1_s8(a, b, c);
|
|
345 }
|
|
346
|
236
|
347 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx2_s8
|
252
|
348 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [2 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
236
|
349 // CHECK-NEXT: entry:
|
|
350 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_INT8X16X2_T:%.*]], align 16
|
|
351 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X16X2_T]], align 16
|
252
|
352 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], ptr [[B]], i32 0, i32 0
|
|
353 // CHECK-NEXT: store [2 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
|
|
354 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], ptr [[B]], i32 0, i32 0
|
|
355 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
|
|
356 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
|
|
357 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
|
|
358 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
|
|
359 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
|
236
|
360 // CHECK-NEXT: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> [[C]])
|
|
361 // CHECK-NEXT: ret <8 x i8> [[VTBX2_I]]
|
|
362 //
|
150
|
363 int8x8_t test_vqtbx2_s8(int8x8_t a, int8x16x2_t b, uint8x8_t c) {
|
|
364 return vqtbx2_s8(a, b, c);
|
|
365 }
|
|
366
|
236
|
367 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx3_s8
|
252
|
368 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [3 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
236
|
369 // CHECK-NEXT: entry:
|
|
370 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_INT8X16X3_T:%.*]], align 16
|
|
371 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X16X3_T]], align 16
|
252
|
372 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], ptr [[B]], i32 0, i32 0
|
|
373 // CHECK-NEXT: store [3 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
|
|
374 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], ptr [[B]], i32 0, i32 0
|
|
375 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
|
|
376 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
|
|
377 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
|
|
378 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
|
|
379 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
|
|
380 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2
|
|
381 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
|
236
|
382 // CHECK-NEXT: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> [[C]])
|
|
383 // CHECK-NEXT: ret <8 x i8> [[VTBX3_I]]
|
|
384 //
|
150
|
385 int8x8_t test_vqtbx3_s8(int8x8_t a, int8x16x3_t b, uint8x8_t c) {
|
|
386 return vqtbx3_s8(a, b, c);
|
|
387 }
|
|
388
|
236
|
389 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx4_s8
|
252
|
390 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [4 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
236
|
391 // CHECK-NEXT: entry:
|
|
392 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_INT8X16X4_T:%.*]], align 16
|
|
393 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X16X4_T]], align 16
|
252
|
394 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], ptr [[B]], i32 0, i32 0
|
|
395 // CHECK-NEXT: store [4 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
|
|
396 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], ptr [[B]], i32 0, i32 0
|
|
397 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
|
|
398 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
|
|
399 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
|
|
400 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
|
|
401 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
|
|
402 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2
|
|
403 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
|
|
404 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 3
|
|
405 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16
|
236
|
406 // CHECK-NEXT: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> [[C]])
|
|
407 // CHECK-NEXT: ret <8 x i8> [[VTBX4_I]]
|
|
408 //
|
150
|
409 int8x8_t test_vqtbx4_s8(int8x8_t a, int8x16x4_t b, uint8x8_t c) {
|
|
410 return vqtbx4_s8(a, b, c);
|
|
411 }
|
|
412
|
236
|
413 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx1q_s8
|
|
414 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
|
415 // CHECK-NEXT: entry:
|
|
416 // CHECK-NEXT: [[VTBX1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]])
|
|
417 // CHECK-NEXT: ret <16 x i8> [[VTBX1_I]]
|
|
418 //
|
150
|
419 int8x16_t test_vqtbx1q_s8(int8x16_t a, int8x16_t b, uint8x16_t c) {
|
|
420 return vqtbx1q_s8(a, b, c);
|
|
421 }
|
|
422
|
236
|
423 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx2q_s8
|
252
|
424 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [2 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
236
|
425 // CHECK-NEXT: entry:
|
|
426 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_INT8X16X2_T:%.*]], align 16
|
|
427 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X16X2_T]], align 16
|
252
|
428 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], ptr [[B]], i32 0, i32 0
|
|
429 // CHECK-NEXT: store [2 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
|
|
430 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], ptr [[B]], i32 0, i32 0
|
|
431 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
|
|
432 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
|
|
433 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
|
|
434 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
|
|
435 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
|
236
|
436 // CHECK-NEXT: [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[C]])
|
|
437 // CHECK-NEXT: ret <16 x i8> [[VTBX2_I]]
|
|
438 //
|
150
|
439 int8x16_t test_vqtbx2q_s8(int8x16_t a, int8x16x2_t b, int8x16_t c) {
|
|
440 return vqtbx2q_s8(a, b, c);
|
|
441 }
|
|
442
|
236
|
443 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx3q_s8
|
252
|
444 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [3 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
236
|
445 // CHECK-NEXT: entry:
|
|
446 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_INT8X16X3_T:%.*]], align 16
|
|
447 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X16X3_T]], align 16
|
252
|
448 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], ptr [[B]], i32 0, i32 0
|
|
449 // CHECK-NEXT: store [3 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
|
|
450 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], ptr [[B]], i32 0, i32 0
|
|
451 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
|
|
452 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
|
|
453 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
|
|
454 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
|
|
455 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
|
|
456 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2
|
|
457 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
|
236
|
458 // CHECK-NEXT: [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[C]])
|
|
459 // CHECK-NEXT: ret <16 x i8> [[VTBX3_I]]
|
|
460 //
|
150
|
461 int8x16_t test_vqtbx3q_s8(int8x16_t a, int8x16x3_t b, int8x16_t c) {
|
|
462 return vqtbx3q_s8(a, b, c);
|
|
463 }
|
|
464
|
236
|
465 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx4q_s8
|
252
|
466 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [4 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
236
|
467 // CHECK-NEXT: entry:
|
|
468 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_INT8X16X4_T:%.*]], align 16
|
|
469 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X16X4_T]], align 16
|
252
|
470 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], ptr [[B]], i32 0, i32 0
|
|
471 // CHECK-NEXT: store [4 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
|
|
472 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], ptr [[B]], i32 0, i32 0
|
|
473 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
|
|
474 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
|
|
475 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
|
|
476 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
|
|
477 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
|
|
478 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2
|
|
479 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
|
|
480 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 3
|
|
481 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16
|
236
|
482 // CHECK-NEXT: [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[C]])
|
|
483 // CHECK-NEXT: ret <16 x i8> [[VTBX4_I]]
|
|
484 //
|
150
|
485 int8x16_t test_vqtbx4q_s8(int8x16_t a, int8x16x4_t b, int8x16_t c) {
|
|
486 return vqtbx4q_s8(a, b, c);
|
|
487 }
|
|
488
|
236
|
489 // CHECK-LABEL: define {{[^@]+}}@test_vtbl1_u8
|
|
490 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
|
|
491 // CHECK-NEXT: entry:
|
|
492 // CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
493 // CHECK-NEXT: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[B]])
|
|
494 // CHECK-NEXT: ret <8 x i8> [[VTBL11_I]]
|
|
495 //
|
150
|
496 uint8x8_t test_vtbl1_u8(uint8x8_t a, uint8x8_t b) {
|
|
497 return vtbl1_u8(a, b);
|
|
498 }
|
|
499
|
236
|
500 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl1_u8
|
|
501 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
|
|
502 // CHECK-NEXT: entry:
|
|
503 // CHECK-NEXT: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[A]], <8 x i8> [[B]])
|
|
504 // CHECK-NEXT: ret <8 x i8> [[VTBL1_I]]
|
|
505 //
|
150
|
506 uint8x8_t test_vqtbl1_u8(uint8x16_t a, uint8x8_t b) {
|
|
507 return vqtbl1_u8(a, b);
|
|
508 }
|
|
509
|
236
|
510 // CHECK-LABEL: define {{[^@]+}}@test_vtbl2_u8
|
252
|
511 // CHECK-SAME: ([2 x <8 x i8>] alignstack(8) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
|
236
|
512 // CHECK-NEXT: entry:
|
|
513 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X8X2_T:%.*]], align 8
|
|
514 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_UINT8X8X2_T]], align 8
|
252
|
515 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X2_T]], ptr [[A]], i32 0, i32 0
|
|
516 // CHECK-NEXT: store [2 x <8 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 8
|
|
517 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X2_T]], ptr [[A]], i32 0, i32 0
|
|
518 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
|
|
519 // CHECK-NEXT: store [2 x <8 x i8>] [[TMP0]], ptr [[__P0_I]], align 8
|
|
520 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8
|
|
521 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 1
|
|
522 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
|
236
|
523 // CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
524 // CHECK-NEXT: [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[B]])
|
|
525 // CHECK-NEXT: ret <8 x i8> [[VTBL13_I]]
|
|
526 //
|
150
|
527 uint8x8_t test_vtbl2_u8(uint8x8x2_t a, uint8x8_t b) {
|
|
528 return vtbl2_u8(a, b);
|
|
529 }
|
|
530
|
236
|
531 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl2_u8
|
252
|
532 // CHECK-SAME: ([2 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
|
236
|
533 // CHECK-NEXT: entry:
|
|
534 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X16X2_T:%.*]], align 16
|
|
535 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_UINT8X16X2_T]], align 16
|
252
|
536 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], ptr [[A]], i32 0, i32 0
|
|
537 // CHECK-NEXT: store [2 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
|
|
538 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], ptr [[A]], i32 0, i32 0
|
|
539 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
|
|
540 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
|
|
541 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
|
|
542 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
|
|
543 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
|
236
|
544 // CHECK-NEXT: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> [[B]])
|
|
545 // CHECK-NEXT: ret <8 x i8> [[VTBL2_I]]
|
|
546 //
|
150
|
547 uint8x8_t test_vqtbl2_u8(uint8x16x2_t a, uint8x8_t b) {
|
|
548 return vqtbl2_u8(a, b);
|
|
549 }
|
|
550
|
236
|
551 // CHECK-LABEL: define {{[^@]+}}@test_vtbl3_u8
|
252
|
552 // CHECK-SAME: ([3 x <8 x i8>] alignstack(8) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
|
236
|
553 // CHECK-NEXT: entry:
|
|
554 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X8X3_T:%.*]], align 8
|
|
555 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_UINT8X8X3_T]], align 8
|
252
|
556 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], ptr [[A]], i32 0, i32 0
|
|
557 // CHECK-NEXT: store [3 x <8 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 8
|
|
558 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], ptr [[A]], i32 0, i32 0
|
|
559 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
|
|
560 // CHECK-NEXT: store [3 x <8 x i8>] [[TMP0]], ptr [[__P0_I]], align 8
|
|
561 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8
|
|
562 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 1
|
|
563 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
|
|
564 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 2
|
|
565 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
|
236
|
566 // CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
567 // CHECK-NEXT: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
568 // CHECK-NEXT: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[B]])
|
|
569 // CHECK-NEXT: ret <8 x i8> [[VTBL26_I]]
|
|
570 //
|
150
|
571 uint8x8_t test_vtbl3_u8(uint8x8x3_t a, uint8x8_t b) {
|
|
572 return vtbl3_u8(a, b);
|
|
573 }
|
|
574
|
236
|
575 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl3_u8
|
252
|
576 // CHECK-SAME: ([3 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
|
236
|
577 // CHECK-NEXT: entry:
|
|
578 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X16X3_T:%.*]], align 16
|
|
579 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_UINT8X16X3_T]], align 16
|
252
|
580 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], ptr [[A]], i32 0, i32 0
|
|
581 // CHECK-NEXT: store [3 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
|
|
582 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], ptr [[A]], i32 0, i32 0
|
|
583 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
|
|
584 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
|
|
585 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
|
|
586 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
|
|
587 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
|
|
588 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2
|
|
589 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
|
236
|
590 // CHECK-NEXT: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> [[B]])
|
|
591 // CHECK-NEXT: ret <8 x i8> [[VTBL3_I]]
|
|
592 //
|
150
|
593 uint8x8_t test_vqtbl3_u8(uint8x16x3_t a, uint8x8_t b) {
|
|
594 return vqtbl3_u8(a, b);
|
|
595 }
|
|
596
|
236
|
597 // CHECK-LABEL: define {{[^@]+}}@test_vtbl4_u8
|
252
|
598 // CHECK-SAME: ([4 x <8 x i8>] alignstack(8) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
|
236
|
599 // CHECK-NEXT: entry:
|
|
600 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X8X4_T:%.*]], align 8
|
|
601 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_UINT8X8X4_T]], align 8
|
252
|
602 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], ptr [[A]], i32 0, i32 0
|
|
603 // CHECK-NEXT: store [4 x <8 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 8
|
|
604 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], ptr [[A]], i32 0, i32 0
|
|
605 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
|
|
606 // CHECK-NEXT: store [4 x <8 x i8>] [[TMP0]], ptr [[__P0_I]], align 8
|
|
607 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8
|
|
608 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 1
|
|
609 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
|
|
610 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 2
|
|
611 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
|
|
612 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 3
|
|
613 // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6_I]], align 8
|
236
|
614 // CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
615 // CHECK-NEXT: [[VTBL27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
616 // CHECK-NEXT: [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> [[B]])
|
|
617 // CHECK-NEXT: ret <8 x i8> [[VTBL28_I]]
|
|
618 //
|
150
|
619 uint8x8_t test_vtbl4_u8(uint8x8x4_t a, uint8x8_t b) {
|
|
620 return vtbl4_u8(a, b);
|
|
621 }
|
|
622
|
236
|
623 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl4_u8
|
252
|
624 // CHECK-SAME: ([4 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
|
236
|
625 // CHECK-NEXT: entry:
|
|
626 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X16X4_T:%.*]], align 16
|
|
627 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_UINT8X16X4_T]], align 16
|
252
|
628 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], ptr [[A]], i32 0, i32 0
|
|
629 // CHECK-NEXT: store [4 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
|
|
630 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], ptr [[A]], i32 0, i32 0
|
|
631 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
|
|
632 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
|
|
633 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
|
|
634 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
|
|
635 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
|
|
636 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2
|
|
637 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
|
|
638 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 3
|
|
639 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16
|
236
|
640 // CHECK-NEXT: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> [[B]])
|
|
641 // CHECK-NEXT: ret <8 x i8> [[VTBL4_I]]
|
|
642 //
|
150
|
643 uint8x8_t test_vqtbl4_u8(uint8x16x4_t a, uint8x8_t b) {
|
|
644 return vqtbl4_u8(a, b);
|
|
645 }
|
|
646
|
236
|
647 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl1q_u8
|
|
648 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
|
|
649 // CHECK-NEXT: entry:
|
|
650 // CHECK-NEXT: [[VTBL1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
|
|
651 // CHECK-NEXT: ret <16 x i8> [[VTBL1_I]]
|
|
652 //
|
150
|
653 uint8x16_t test_vqtbl1q_u8(uint8x16_t a, uint8x16_t b) {
|
|
654 return vqtbl1q_u8(a, b);
|
|
655 }
|
|
656
|
236
|
657 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl2q_u8
|
252
|
658 // CHECK-SAME: ([2 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
|
236
|
659 // CHECK-NEXT: entry:
|
|
660 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X16X2_T:%.*]], align 16
|
|
661 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_UINT8X16X2_T]], align 16
|
252
|
662 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], ptr [[A]], i32 0, i32 0
|
|
663 // CHECK-NEXT: store [2 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
|
|
664 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], ptr [[A]], i32 0, i32 0
|
|
665 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
|
|
666 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
|
|
667 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
|
|
668 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
|
|
669 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
|
236
|
670 // CHECK-NEXT: [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[B]])
|
|
671 // CHECK-NEXT: ret <16 x i8> [[VTBL2_I]]
|
|
672 //
|
150
|
673 uint8x16_t test_vqtbl2q_u8(uint8x16x2_t a, uint8x16_t b) {
|
|
674 return vqtbl2q_u8(a, b);
|
|
675 }
|
|
676
|
236
|
677 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl3q_u8
|
252
|
678 // CHECK-SAME: ([3 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
|
236
|
679 // CHECK-NEXT: entry:
|
|
680 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X16X3_T:%.*]], align 16
|
|
681 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_UINT8X16X3_T]], align 16
|
252
|
682 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], ptr [[A]], i32 0, i32 0
|
|
683 // CHECK-NEXT: store [3 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
|
|
684 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], ptr [[A]], i32 0, i32 0
|
|
685 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
|
|
686 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
|
|
687 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
|
|
688 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
|
|
689 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
|
|
690 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2
|
|
691 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
|
236
|
692 // CHECK-NEXT: [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[B]])
|
|
693 // CHECK-NEXT: ret <16 x i8> [[VTBL3_I]]
|
|
694 //
|
150
|
695 uint8x16_t test_vqtbl3q_u8(uint8x16x3_t a, uint8x16_t b) {
|
|
696 return vqtbl3q_u8(a, b);
|
|
697 }
|
|
698
|
236
|
699 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl4q_u8
|
252
|
700 // CHECK-SAME: ([4 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
|
236
|
701 // CHECK-NEXT: entry:
|
|
702 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X16X4_T:%.*]], align 16
|
|
703 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_UINT8X16X4_T]], align 16
|
252
|
704 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], ptr [[A]], i32 0, i32 0
|
|
705 // CHECK-NEXT: store [4 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
|
|
706 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], ptr [[A]], i32 0, i32 0
|
|
707 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
|
|
708 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
|
|
709 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
|
|
710 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
|
|
711 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
|
|
712 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2
|
|
713 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
|
|
714 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 3
|
|
715 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16
|
236
|
716 // CHECK-NEXT: [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[B]])
|
|
717 // CHECK-NEXT: ret <16 x i8> [[VTBL4_I]]
|
|
718 //
|
150
|
719 uint8x16_t test_vqtbl4q_u8(uint8x16x4_t a, uint8x16_t b) {
|
|
720 return vqtbl4q_u8(a, b);
|
|
721 }
|
|
722
|
236
|
723 // CHECK-LABEL: define {{[^@]+}}@test_vtbx1_u8
|
|
724 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
|
|
725 // CHECK-NEXT: entry:
|
|
726 // CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
727 // CHECK-NEXT: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[C]])
|
|
728 // CHECK-NEXT: [[TMP0:%.*]] = icmp uge <8 x i8> [[C]], <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
|
|
729 // CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
|
|
730 // CHECK-NEXT: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[A]]
|
|
731 // CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i8> [[TMP1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
|
|
732 // CHECK-NEXT: [[TMP4:%.*]] = and <8 x i8> [[TMP3]], [[VTBL11_I]]
|
|
733 // CHECK-NEXT: [[VTBX_I:%.*]] = or <8 x i8> [[TMP2]], [[TMP4]]
|
|
734 // CHECK-NEXT: ret <8 x i8> [[VTBX_I]]
|
|
735 //
|
150
|
736 uint8x8_t test_vtbx1_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
|
|
737 return vtbx1_u8(a, b, c);
|
|
738 }
|
|
739
|
236
|
740 // CHECK-LABEL: define {{[^@]+}}@test_vtbx2_u8
|
252
|
741 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [2 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
|
236
|
742 // CHECK-NEXT: entry:
|
|
743 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X8X2_T:%.*]], align 8
|
|
744 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X8X2_T]], align 8
|
252
|
745 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X2_T]], ptr [[B]], i32 0, i32 0
|
|
746 // CHECK-NEXT: store [2 x <8 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 8
|
|
747 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X2_T]], ptr [[B]], i32 0, i32 0
|
|
748 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
|
|
749 // CHECK-NEXT: store [2 x <8 x i8>] [[TMP0]], ptr [[__P1_I]], align 8
|
|
750 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8
|
|
751 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 1
|
|
752 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
|
236
|
753 // CHECK-NEXT: [[VTBX1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
754 // CHECK-NEXT: [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[A]], <16 x i8> [[VTBX1_I]], <8 x i8> [[C]])
|
|
755 // CHECK-NEXT: ret <8 x i8> [[VTBX13_I]]
|
|
756 //
|
150
|
757 uint8x8_t test_vtbx2_u8(uint8x8_t a, uint8x8x2_t b, uint8x8_t c) {
|
|
758 return vtbx2_u8(a, b, c);
|
|
759 }
|
|
760
|
236
|
761 // CHECK-LABEL: define {{[^@]+}}@test_vtbx3_u8
|
252
|
762 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [3 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
|
236
|
763 // CHECK-NEXT: entry:
|
|
764 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X8X3_T:%.*]], align 8
|
|
765 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X8X3_T]], align 8
|
252
|
766 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], ptr [[B]], i32 0, i32 0
|
|
767 // CHECK-NEXT: store [3 x <8 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 8
|
|
768 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], ptr [[B]], i32 0, i32 0
|
|
769 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
|
|
770 // CHECK-NEXT: store [3 x <8 x i8>] [[TMP0]], ptr [[__P1_I]], align 8
|
|
771 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8
|
|
772 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 1
|
|
773 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
|
|
774 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 2
|
|
775 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
|
236
|
776 // CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
777 // CHECK-NEXT: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
778 // CHECK-NEXT: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[C]])
|
|
779 // CHECK-NEXT: [[TMP4:%.*]] = icmp uge <8 x i8> [[C]], <i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24>
|
|
780 // CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i8>
|
|
781 // CHECK-NEXT: [[TMP6:%.*]] = and <8 x i8> [[TMP5]], [[A]]
|
|
782 // CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i8> [[TMP5]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
|
|
783 // CHECK-NEXT: [[TMP8:%.*]] = and <8 x i8> [[TMP7]], [[VTBL26_I]]
|
|
784 // CHECK-NEXT: [[VTBX_I:%.*]] = or <8 x i8> [[TMP6]], [[TMP8]]
|
|
785 // CHECK-NEXT: ret <8 x i8> [[VTBX_I]]
|
|
786 //
|
150
|
787 uint8x8_t test_vtbx3_u8(uint8x8_t a, uint8x8x3_t b, uint8x8_t c) {
|
|
788 return vtbx3_u8(a, b, c);
|
|
789 }
|
|
790
|
236
|
791 // CHECK-LABEL: define {{[^@]+}}@test_vtbx4_u8
|
252
|
792 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [4 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
|
236
|
793 // CHECK-NEXT: entry:
|
|
794 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X8X4_T:%.*]], align 8
|
|
795 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X8X4_T]], align 8
|
252
|
796 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], ptr [[B]], i32 0, i32 0
|
|
797 // CHECK-NEXT: store [4 x <8 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 8
|
|
798 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], ptr [[B]], i32 0, i32 0
|
|
799 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
|
|
800 // CHECK-NEXT: store [4 x <8 x i8>] [[TMP0]], ptr [[__P1_I]], align 8
|
|
801 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8
|
|
802 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 1
|
|
803 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
|
|
804 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 2
|
|
805 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
|
|
806 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 3
|
|
807 // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6_I]], align 8
|
236
|
808 // CHECK-NEXT: [[VTBX2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
809 // CHECK-NEXT: [[VTBX27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
810 // CHECK-NEXT: [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> [[C]])
|
|
811 // CHECK-NEXT: ret <8 x i8> [[VTBX28_I]]
|
|
812 //
|
150
|
813 uint8x8_t test_vtbx4_u8(uint8x8_t a, uint8x8x4_t b, uint8x8_t c) {
|
|
814 return vtbx4_u8(a, b, c);
|
|
815 }
|
|
816
|
236
|
817 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx1_u8
|
|
818 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
|
819 // CHECK-NEXT: entry:
|
|
820 // CHECK-NEXT: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <8 x i8> [[C]])
|
|
821 // CHECK-NEXT: ret <8 x i8> [[VTBX1_I]]
|
|
822 //
|
150
|
823 uint8x8_t test_vqtbx1_u8(uint8x8_t a, uint8x16_t b, uint8x8_t c) {
|
|
824 return vqtbx1_u8(a, b, c);
|
|
825 }
|
|
826
|
236
|
827 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx2_u8
|
252
|
828 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [2 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
236
|
829 // CHECK-NEXT: entry:
|
|
830 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X16X2_T:%.*]], align 16
|
|
831 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X16X2_T]], align 16
|
252
|
832 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], ptr [[B]], i32 0, i32 0
|
|
833 // CHECK-NEXT: store [2 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
|
|
834 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], ptr [[B]], i32 0, i32 0
|
|
835 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
|
|
836 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
|
|
837 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
|
|
838 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
|
|
839 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
|
236
|
840 // CHECK-NEXT: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> [[C]])
|
|
841 // CHECK-NEXT: ret <8 x i8> [[VTBX2_I]]
|
|
842 //
|
150
|
843 uint8x8_t test_vqtbx2_u8(uint8x8_t a, uint8x16x2_t b, uint8x8_t c) {
|
|
844 return vqtbx2_u8(a, b, c);
|
|
845 }
|
|
846
|
236
|
847 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx3_u8
|
252
|
848 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [3 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
236
|
849 // CHECK-NEXT: entry:
|
|
850 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X16X3_T:%.*]], align 16
|
|
851 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X16X3_T]], align 16
|
252
|
852 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], ptr [[B]], i32 0, i32 0
|
|
853 // CHECK-NEXT: store [3 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
|
|
854 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], ptr [[B]], i32 0, i32 0
|
|
855 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
|
|
856 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
|
|
857 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
|
|
858 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
|
|
859 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
|
|
860 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2
|
|
861 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
|
236
|
862 // CHECK-NEXT: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> [[C]])
|
|
863 // CHECK-NEXT: ret <8 x i8> [[VTBX3_I]]
|
|
864 //
|
150
|
865 uint8x8_t test_vqtbx3_u8(uint8x8_t a, uint8x16x3_t b, uint8x8_t c) {
|
|
866 return vqtbx3_u8(a, b, c);
|
|
867 }
|
|
868
|
236
|
869 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx4_u8
|
252
|
870 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [4 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
236
|
871 // CHECK-NEXT: entry:
|
|
872 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X16X4_T:%.*]], align 16
|
|
873 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X16X4_T]], align 16
|
252
|
874 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], ptr [[B]], i32 0, i32 0
|
|
875 // CHECK-NEXT: store [4 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
|
|
876 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], ptr [[B]], i32 0, i32 0
|
|
877 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
|
|
878 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
|
|
879 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
|
|
880 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
|
|
881 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
|
|
882 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2
|
|
883 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
|
|
884 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 3
|
|
885 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16
|
236
|
886 // CHECK-NEXT: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> [[C]])
|
|
887 // CHECK-NEXT: ret <8 x i8> [[VTBX4_I]]
|
|
888 //
|
150
|
889 uint8x8_t test_vqtbx4_u8(uint8x8_t a, uint8x16x4_t b, uint8x8_t c) {
|
|
890 return vqtbx4_u8(a, b, c);
|
|
891 }
|
|
892
|
236
|
893 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx1q_u8
|
|
894 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
|
895 // CHECK-NEXT: entry:
|
|
896 // CHECK-NEXT: [[VTBX1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]])
|
|
897 // CHECK-NEXT: ret <16 x i8> [[VTBX1_I]]
|
|
898 //
|
150
|
899 uint8x16_t test_vqtbx1q_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
|
|
900 return vqtbx1q_u8(a, b, c);
|
|
901 }
|
|
902
|
236
|
903 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx2q_u8
|
252
|
904 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [2 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
236
|
905 // CHECK-NEXT: entry:
|
|
906 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X16X2_T:%.*]], align 16
|
|
907 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X16X2_T]], align 16
|
252
|
908 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], ptr [[B]], i32 0, i32 0
|
|
909 // CHECK-NEXT: store [2 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
|
|
910 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], ptr [[B]], i32 0, i32 0
|
|
911 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
|
|
912 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
|
|
913 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
|
|
914 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
|
|
915 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
|
236
|
916 // CHECK-NEXT: [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[C]])
|
|
917 // CHECK-NEXT: ret <16 x i8> [[VTBX2_I]]
|
|
918 //
|
150
|
919 uint8x16_t test_vqtbx2q_u8(uint8x16_t a, uint8x16x2_t b, uint8x16_t c) {
|
|
920 return vqtbx2q_u8(a, b, c);
|
|
921 }
|
|
922
|
236
|
923 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx3q_u8
|
252
|
924 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [3 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
236
|
925 // CHECK-NEXT: entry:
|
|
926 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X16X3_T:%.*]], align 16
|
|
927 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X16X3_T]], align 16
|
252
|
928 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], ptr [[B]], i32 0, i32 0
|
|
929 // CHECK-NEXT: store [3 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
|
|
930 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], ptr [[B]], i32 0, i32 0
|
|
931 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
|
|
932 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
|
|
933 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
|
|
934 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
|
|
935 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
|
|
936 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2
|
|
937 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
|
236
|
938 // CHECK-NEXT: [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[C]])
|
|
939 // CHECK-NEXT: ret <16 x i8> [[VTBX3_I]]
|
|
940 //
|
150
|
941 uint8x16_t test_vqtbx3q_u8(uint8x16_t a, uint8x16x3_t b, uint8x16_t c) {
|
|
942 return vqtbx3q_u8(a, b, c);
|
|
943 }
|
|
944
|
236
|
945 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx4q_u8
|
252
|
946 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [4 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
236
|
947 // CHECK-NEXT: entry:
|
|
948 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X16X4_T:%.*]], align 16
|
|
949 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X16X4_T]], align 16
|
252
|
950 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], ptr [[B]], i32 0, i32 0
|
|
951 // CHECK-NEXT: store [4 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
|
|
952 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], ptr [[B]], i32 0, i32 0
|
|
953 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
|
|
954 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
|
|
955 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
|
|
956 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
|
|
957 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
|
|
958 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2
|
|
959 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
|
|
960 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 3
|
|
961 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16
|
236
|
962 // CHECK-NEXT: [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[C]])
|
|
963 // CHECK-NEXT: ret <16 x i8> [[VTBX4_I]]
|
|
964 //
|
150
|
965 uint8x16_t test_vqtbx4q_u8(uint8x16_t a, uint8x16x4_t b, uint8x16_t c) {
|
|
966 return vqtbx4q_u8(a, b, c);
|
|
967 }
|
|
968
|
236
|
969 // CHECK-LABEL: define {{[^@]+}}@test_vtbl1_p8
|
|
970 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
|
|
971 // CHECK-NEXT: entry:
|
|
972 // CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
973 // CHECK-NEXT: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[B]])
|
|
974 // CHECK-NEXT: ret <8 x i8> [[VTBL11_I]]
|
|
975 //
|
150
|
976 poly8x8_t test_vtbl1_p8(poly8x8_t a, uint8x8_t b) {
|
|
977 return vtbl1_p8(a, b);
|
|
978 }
|
|
979
|
236
|
980 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl1_p8
|
|
981 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
|
|
982 // CHECK-NEXT: entry:
|
|
983 // CHECK-NEXT: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[A]], <8 x i8> [[B]])
|
|
984 // CHECK-NEXT: ret <8 x i8> [[VTBL1_I]]
|
|
985 //
|
150
|
986 poly8x8_t test_vqtbl1_p8(poly8x16_t a, uint8x8_t b) {
|
|
987 return vqtbl1_p8(a, b);
|
|
988 }
|
|
989
|
236
|
990 // CHECK-LABEL: define {{[^@]+}}@test_vtbl2_p8
|
252
|
991 // CHECK-SAME: ([2 x <8 x i8>] alignstack(8) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
|
236
|
992 // CHECK-NEXT: entry:
|
|
993 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X8X2_T:%.*]], align 8
|
|
994 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_POLY8X8X2_T]], align 8
|
252
|
995 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X2_T]], ptr [[A]], i32 0, i32 0
|
|
996 // CHECK-NEXT: store [2 x <8 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 8
|
|
997 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X2_T]], ptr [[A]], i32 0, i32 0
|
|
998 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
|
|
999 // CHECK-NEXT: store [2 x <8 x i8>] [[TMP0]], ptr [[__P0_I]], align 8
|
|
1000 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8
|
|
1001 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 1
|
|
1002 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
|
236
|
1003 // CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
1004 // CHECK-NEXT: [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[B]])
|
|
1005 // CHECK-NEXT: ret <8 x i8> [[VTBL13_I]]
|
|
1006 //
|
150
|
1007 poly8x8_t test_vtbl2_p8(poly8x8x2_t a, uint8x8_t b) {
|
|
1008 return vtbl2_p8(a, b);
|
|
1009 }
|
|
1010
|
236
|
1011 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl2_p8
|
252
|
1012 // CHECK-SAME: ([2 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
|
236
|
1013 // CHECK-NEXT: entry:
|
|
1014 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X16X2_T:%.*]], align 16
|
|
1015 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_POLY8X16X2_T]], align 16
|
252
|
1016 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], ptr [[A]], i32 0, i32 0
|
|
1017 // CHECK-NEXT: store [2 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
|
|
1018 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], ptr [[A]], i32 0, i32 0
|
|
1019 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
|
|
1020 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
|
|
1021 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
|
|
1022 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
|
|
1023 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
|
236
|
1024 // CHECK-NEXT: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> [[B]])
|
|
1025 // CHECK-NEXT: ret <8 x i8> [[VTBL2_I]]
|
|
1026 //
|
150
|
1027 poly8x8_t test_vqtbl2_p8(poly8x16x2_t a, uint8x8_t b) {
|
|
1028 return vqtbl2_p8(a, b);
|
|
1029 }
|
|
1030
|
236
|
1031 // CHECK-LABEL: define {{[^@]+}}@test_vtbl3_p8
|
252
|
1032 // CHECK-SAME: ([3 x <8 x i8>] alignstack(8) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
|
236
|
1033 // CHECK-NEXT: entry:
|
|
1034 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X8X3_T:%.*]], align 8
|
|
1035 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_POLY8X8X3_T]], align 8
|
252
|
1036 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], ptr [[A]], i32 0, i32 0
|
|
1037 // CHECK-NEXT: store [3 x <8 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 8
|
|
1038 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], ptr [[A]], i32 0, i32 0
|
|
1039 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
|
|
1040 // CHECK-NEXT: store [3 x <8 x i8>] [[TMP0]], ptr [[__P0_I]], align 8
|
|
1041 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8
|
|
1042 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 1
|
|
1043 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
|
|
1044 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 2
|
|
1045 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
|
236
|
1046 // CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
1047 // CHECK-NEXT: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
1048 // CHECK-NEXT: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[B]])
|
|
1049 // CHECK-NEXT: ret <8 x i8> [[VTBL26_I]]
|
|
1050 //
|
150
|
1051 poly8x8_t test_vtbl3_p8(poly8x8x3_t a, uint8x8_t b) {
|
|
1052 return vtbl3_p8(a, b);
|
|
1053 }
|
|
1054
|
236
|
1055 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl3_p8
|
252
|
1056 // CHECK-SAME: ([3 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
|
236
|
1057 // CHECK-NEXT: entry:
|
|
1058 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X16X3_T:%.*]], align 16
|
|
1059 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_POLY8X16X3_T]], align 16
|
252
|
1060 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], ptr [[A]], i32 0, i32 0
|
|
1061 // CHECK-NEXT: store [3 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
|
|
1062 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], ptr [[A]], i32 0, i32 0
|
|
1063 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
|
|
1064 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
|
|
1065 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
|
|
1066 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
|
|
1067 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
|
|
1068 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2
|
|
1069 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
|
236
|
1070 // CHECK-NEXT: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> [[B]])
|
|
1071 // CHECK-NEXT: ret <8 x i8> [[VTBL3_I]]
|
|
1072 //
|
150
|
1073 poly8x8_t test_vqtbl3_p8(poly8x16x3_t a, uint8x8_t b) {
|
|
1074 return vqtbl3_p8(a, b);
|
|
1075 }
|
|
1076
|
236
|
1077 // CHECK-LABEL: define {{[^@]+}}@test_vtbl4_p8
|
252
|
1078 // CHECK-SAME: ([4 x <8 x i8>] alignstack(8) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
|
236
|
1079 // CHECK-NEXT: entry:
|
|
1080 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X8X4_T:%.*]], align 8
|
|
1081 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_POLY8X8X4_T]], align 8
|
252
|
1082 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], ptr [[A]], i32 0, i32 0
|
|
1083 // CHECK-NEXT: store [4 x <8 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 8
|
|
1084 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], ptr [[A]], i32 0, i32 0
|
|
1085 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
|
|
1086 // CHECK-NEXT: store [4 x <8 x i8>] [[TMP0]], ptr [[__P0_I]], align 8
|
|
1087 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8
|
|
1088 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 1
|
|
1089 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
|
|
1090 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 2
|
|
1091 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
|
|
1092 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 3
|
|
1093 // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6_I]], align 8
|
236
|
1094 // CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
1095 // CHECK-NEXT: [[VTBL27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
1096 // CHECK-NEXT: [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> [[B]])
|
|
1097 // CHECK-NEXT: ret <8 x i8> [[VTBL28_I]]
|
|
1098 //
|
150
|
1099 poly8x8_t test_vtbl4_p8(poly8x8x4_t a, uint8x8_t b) {
|
|
1100 return vtbl4_p8(a, b);
|
|
1101 }
|
|
1102
|
236
|
1103 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl4_p8
|
252
|
1104 // CHECK-SAME: ([4 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
|
236
|
1105 // CHECK-NEXT: entry:
|
|
1106 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X16X4_T:%.*]], align 16
|
|
1107 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_POLY8X16X4_T]], align 16
|
252
|
1108 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], ptr [[A]], i32 0, i32 0
|
|
1109 // CHECK-NEXT: store [4 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
|
|
1110 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], ptr [[A]], i32 0, i32 0
|
|
1111 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
|
|
1112 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
|
|
1113 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
|
|
1114 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
|
|
1115 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
|
|
1116 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2
|
|
1117 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
|
|
1118 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 3
|
|
1119 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16
|
236
|
1120 // CHECK-NEXT: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> [[B]])
|
|
1121 // CHECK-NEXT: ret <8 x i8> [[VTBL4_I]]
|
|
1122 //
|
150
|
1123 poly8x8_t test_vqtbl4_p8(poly8x16x4_t a, uint8x8_t b) {
|
|
1124 return vqtbl4_p8(a, b);
|
|
1125 }
|
|
1126
|
236
|
1127 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl1q_p8
|
|
1128 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
|
|
1129 // CHECK-NEXT: entry:
|
|
1130 // CHECK-NEXT: [[VTBL1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
|
|
1131 // CHECK-NEXT: ret <16 x i8> [[VTBL1_I]]
|
|
1132 //
|
150
|
1133 poly8x16_t test_vqtbl1q_p8(poly8x16_t a, uint8x16_t b) {
|
|
1134 return vqtbl1q_p8(a, b);
|
|
1135 }
|
|
1136
|
236
|
1137 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl2q_p8
|
252
|
1138 // CHECK-SAME: ([2 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
|
236
|
1139 // CHECK-NEXT: entry:
|
|
1140 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X16X2_T:%.*]], align 16
|
|
1141 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_POLY8X16X2_T]], align 16
|
252
|
1142 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], ptr [[A]], i32 0, i32 0
|
|
1143 // CHECK-NEXT: store [2 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
|
|
1144 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], ptr [[A]], i32 0, i32 0
|
|
1145 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
|
|
1146 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
|
|
1147 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
|
|
1148 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
|
|
1149 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
|
236
|
1150 // CHECK-NEXT: [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[B]])
|
|
1151 // CHECK-NEXT: ret <16 x i8> [[VTBL2_I]]
|
|
1152 //
|
150
|
1153 poly8x16_t test_vqtbl2q_p8(poly8x16x2_t a, uint8x16_t b) {
|
|
1154 return vqtbl2q_p8(a, b);
|
|
1155 }
|
|
1156
|
236
|
1157 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl3q_p8
|
252
|
1158 // CHECK-SAME: ([3 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
|
236
|
1159 // CHECK-NEXT: entry:
|
|
1160 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X16X3_T:%.*]], align 16
|
|
1161 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_POLY8X16X3_T]], align 16
|
252
|
1162 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], ptr [[A]], i32 0, i32 0
|
|
1163 // CHECK-NEXT: store [3 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
|
|
1164 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], ptr [[A]], i32 0, i32 0
|
|
1165 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
|
|
1166 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
|
|
1167 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
|
|
1168 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
|
|
1169 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
|
|
1170 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2
|
|
1171 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
|
236
|
1172 // CHECK-NEXT: [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[B]])
|
|
1173 // CHECK-NEXT: ret <16 x i8> [[VTBL3_I]]
|
|
1174 //
|
150
|
1175 poly8x16_t test_vqtbl3q_p8(poly8x16x3_t a, uint8x16_t b) {
|
|
1176 return vqtbl3q_p8(a, b);
|
|
1177 }
|
|
1178
|
236
|
1179 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl4q_p8
|
252
|
1180 // CHECK-SAME: ([4 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
|
236
|
1181 // CHECK-NEXT: entry:
|
|
1182 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X16X4_T:%.*]], align 16
|
|
1183 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_POLY8X16X4_T]], align 16
|
252
|
1184 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], ptr [[A]], i32 0, i32 0
|
|
1185 // CHECK-NEXT: store [4 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
|
|
1186 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], ptr [[A]], i32 0, i32 0
|
|
1187 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
|
|
1188 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
|
|
1189 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
|
|
1190 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
|
|
1191 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
|
|
1192 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2
|
|
1193 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
|
|
1194 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 3
|
|
1195 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16
|
236
|
1196 // CHECK-NEXT: [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[B]])
|
|
1197 // CHECK-NEXT: ret <16 x i8> [[VTBL4_I]]
|
|
1198 //
|
150
|
1199 poly8x16_t test_vqtbl4q_p8(poly8x16x4_t a, uint8x16_t b) {
|
|
1200 return vqtbl4q_p8(a, b);
|
|
1201 }
|
|
1202
|
236
|
1203 // CHECK-LABEL: define {{[^@]+}}@test_vtbx1_p8
|
|
1204 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
|
|
1205 // CHECK-NEXT: entry:
|
|
1206 // CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
1207 // CHECK-NEXT: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[C]])
|
|
1208 // CHECK-NEXT: [[TMP0:%.*]] = icmp uge <8 x i8> [[C]], <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
|
|
1209 // CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
|
|
1210 // CHECK-NEXT: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[A]]
|
|
1211 // CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i8> [[TMP1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
|
|
1212 // CHECK-NEXT: [[TMP4:%.*]] = and <8 x i8> [[TMP3]], [[VTBL11_I]]
|
|
1213 // CHECK-NEXT: [[VTBX_I:%.*]] = or <8 x i8> [[TMP2]], [[TMP4]]
|
|
1214 // CHECK-NEXT: ret <8 x i8> [[VTBX_I]]
|
|
1215 //
|
150
|
1216 poly8x8_t test_vtbx1_p8(poly8x8_t a, poly8x8_t b, uint8x8_t c) {
|
|
1217 return vtbx1_p8(a, b, c);
|
|
1218 }
|
|
1219
|
236
|
1220 // CHECK-LABEL: define {{[^@]+}}@test_vtbx2_p8
|
252
|
1221 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [2 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
|
236
|
1222 // CHECK-NEXT: entry:
|
|
1223 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X8X2_T:%.*]], align 8
|
|
1224 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X8X2_T]], align 8
|
252
|
1225 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X2_T]], ptr [[B]], i32 0, i32 0
|
|
1226 // CHECK-NEXT: store [2 x <8 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 8
|
|
1227 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X2_T]], ptr [[B]], i32 0, i32 0
|
|
1228 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
|
|
1229 // CHECK-NEXT: store [2 x <8 x i8>] [[TMP0]], ptr [[__P1_I]], align 8
|
|
1230 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8
|
|
1231 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 1
|
|
1232 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
|
236
|
1233 // CHECK-NEXT: [[VTBX1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
1234 // CHECK-NEXT: [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[A]], <16 x i8> [[VTBX1_I]], <8 x i8> [[C]])
|
|
1235 // CHECK-NEXT: ret <8 x i8> [[VTBX13_I]]
|
|
1236 //
|
150
|
1237 poly8x8_t test_vtbx2_p8(poly8x8_t a, poly8x8x2_t b, uint8x8_t c) {
|
|
1238 return vtbx2_p8(a, b, c);
|
|
1239 }
|
|
1240
|
236
|
1241 // CHECK-LABEL: define {{[^@]+}}@test_vtbx3_p8
|
252
|
1242 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [3 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
|
236
|
1243 // CHECK-NEXT: entry:
|
|
1244 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X8X3_T:%.*]], align 8
|
|
1245 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X8X3_T]], align 8
|
252
|
1246 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], ptr [[B]], i32 0, i32 0
|
|
1247 // CHECK-NEXT: store [3 x <8 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 8
|
|
1248 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], ptr [[B]], i32 0, i32 0
|
|
1249 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
|
|
1250 // CHECK-NEXT: store [3 x <8 x i8>] [[TMP0]], ptr [[__P1_I]], align 8
|
|
1251 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8
|
|
1252 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 1
|
|
1253 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
|
|
1254 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 2
|
|
1255 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
|
236
|
1256 // CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
1257 // CHECK-NEXT: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
1258 // CHECK-NEXT: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[C]])
|
|
1259 // CHECK-NEXT: [[TMP4:%.*]] = icmp uge <8 x i8> [[C]], <i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24>
|
|
1260 // CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i8>
|
|
1261 // CHECK-NEXT: [[TMP6:%.*]] = and <8 x i8> [[TMP5]], [[A]]
|
|
1262 // CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i8> [[TMP5]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
|
|
1263 // CHECK-NEXT: [[TMP8:%.*]] = and <8 x i8> [[TMP7]], [[VTBL26_I]]
|
|
1264 // CHECK-NEXT: [[VTBX_I:%.*]] = or <8 x i8> [[TMP6]], [[TMP8]]
|
|
1265 // CHECK-NEXT: ret <8 x i8> [[VTBX_I]]
|
|
1266 //
|
150
|
1267 poly8x8_t test_vtbx3_p8(poly8x8_t a, poly8x8x3_t b, uint8x8_t c) {
|
|
1268 return vtbx3_p8(a, b, c);
|
|
1269 }
|
|
1270
|
236
|
1271 // CHECK-LABEL: define {{[^@]+}}@test_vtbx4_p8
|
252
|
1272 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [4 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
|
236
|
1273 // CHECK-NEXT: entry:
|
|
1274 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X8X4_T:%.*]], align 8
|
|
1275 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X8X4_T]], align 8
|
252
|
1276 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], ptr [[B]], i32 0, i32 0
|
|
1277 // CHECK-NEXT: store [4 x <8 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 8
|
|
1278 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], ptr [[B]], i32 0, i32 0
|
|
1279 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
|
|
1280 // CHECK-NEXT: store [4 x <8 x i8>] [[TMP0]], ptr [[__P1_I]], align 8
|
|
1281 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8
|
|
1282 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 1
|
|
1283 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
|
|
1284 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 2
|
|
1285 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
|
|
1286 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 3
|
|
1287 // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6_I]], align 8
|
236
|
1288 // CHECK-NEXT: [[VTBX2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
1289 // CHECK-NEXT: [[VTBX27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
1290 // CHECK-NEXT: [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> [[C]])
|
|
1291 // CHECK-NEXT: ret <8 x i8> [[VTBX28_I]]
|
|
1292 //
|
150
|
1293 poly8x8_t test_vtbx4_p8(poly8x8_t a, poly8x8x4_t b, uint8x8_t c) {
|
|
1294 return vtbx4_p8(a, b, c);
|
|
1295 }
|
|
1296
|
236
|
1297 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx1_p8
|
|
1298 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
|
1299 // CHECK-NEXT: entry:
|
|
1300 // CHECK-NEXT: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <8 x i8> [[C]])
|
|
1301 // CHECK-NEXT: ret <8 x i8> [[VTBX1_I]]
|
|
1302 //
|
150
|
1303 poly8x8_t test_vqtbx1_p8(poly8x8_t a, uint8x16_t b, uint8x8_t c) {
|
|
1304 return vqtbx1_p8(a, b, c);
|
|
1305 }
|
|
1306
|
236
|
1307 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx2_p8
|
252
|
1308 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [2 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
236
|
1309 // CHECK-NEXT: entry:
|
|
1310 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X16X2_T:%.*]], align 16
|
|
1311 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X16X2_T]], align 16
|
252
|
1312 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], ptr [[B]], i32 0, i32 0
|
|
1313 // CHECK-NEXT: store [2 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
|
|
1314 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], ptr [[B]], i32 0, i32 0
|
|
1315 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
|
|
1316 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
|
|
1317 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
|
|
1318 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
|
|
1319 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
|
236
|
1320 // CHECK-NEXT: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> [[C]])
|
|
1321 // CHECK-NEXT: ret <8 x i8> [[VTBX2_I]]
|
|
1322 //
|
150
|
1323 poly8x8_t test_vqtbx2_p8(poly8x8_t a, poly8x16x2_t b, uint8x8_t c) {
|
|
1324 return vqtbx2_p8(a, b, c);
|
|
1325 }
|
|
1326
|
236
|
1327 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx3_p8
|
252
|
1328 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [3 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
236
|
1329 // CHECK-NEXT: entry:
|
|
1330 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X16X3_T:%.*]], align 16
|
|
1331 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X16X3_T]], align 16
|
252
|
1332 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], ptr [[B]], i32 0, i32 0
|
|
1333 // CHECK-NEXT: store [3 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
|
|
1334 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], ptr [[B]], i32 0, i32 0
|
|
1335 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
|
|
1336 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
|
|
1337 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
|
|
1338 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
|
|
1339 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
|
|
1340 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2
|
|
1341 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
|
236
|
1342 // CHECK-NEXT: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> [[C]])
|
|
1343 // CHECK-NEXT: ret <8 x i8> [[VTBX3_I]]
|
|
1344 //
|
150
|
1345 poly8x8_t test_vqtbx3_p8(poly8x8_t a, poly8x16x3_t b, uint8x8_t c) {
|
|
1346 return vqtbx3_p8(a, b, c);
|
|
1347 }
|
|
1348
|
236
|
1349 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx4_p8
|
252
|
1350 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [4 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
236
|
1351 // CHECK-NEXT: entry:
|
|
1352 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X16X4_T:%.*]], align 16
|
|
1353 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X16X4_T]], align 16
|
252
|
1354 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], ptr [[B]], i32 0, i32 0
|
|
1355 // CHECK-NEXT: store [4 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
|
|
1356 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], ptr [[B]], i32 0, i32 0
|
|
1357 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
|
|
1358 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
|
|
1359 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
|
|
1360 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
|
|
1361 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
|
|
1362 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2
|
|
1363 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
|
|
1364 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 3
|
|
1365 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16
|
236
|
1366 // CHECK-NEXT: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> [[C]])
|
|
1367 // CHECK-NEXT: ret <8 x i8> [[VTBX4_I]]
|
|
1368 //
|
150
|
1369 poly8x8_t test_vqtbx4_p8(poly8x8_t a, poly8x16x4_t b, uint8x8_t c) {
|
|
1370 return vqtbx4_p8(a, b, c);
|
|
1371 }
|
|
1372
|
236
|
1373 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx1q_p8
|
|
1374 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
|
1375 // CHECK-NEXT: entry:
|
|
1376 // CHECK-NEXT: [[VTBX1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]])
|
|
1377 // CHECK-NEXT: ret <16 x i8> [[VTBX1_I]]
|
|
1378 //
|
150
|
1379 poly8x16_t test_vqtbx1q_p8(poly8x16_t a, uint8x16_t b, uint8x16_t c) {
|
|
1380 return vqtbx1q_p8(a, b, c);
|
|
1381 }
|
|
1382
|
236
|
1383 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx2q_p8
|
252
|
1384 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [2 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
236
|
1385 // CHECK-NEXT: entry:
|
|
1386 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X16X2_T:%.*]], align 16
|
|
1387 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X16X2_T]], align 16
|
252
|
1388 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], ptr [[B]], i32 0, i32 0
|
|
1389 // CHECK-NEXT: store [2 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
|
|
1390 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], ptr [[B]], i32 0, i32 0
|
|
1391 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
|
|
1392 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
|
|
1393 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
|
|
1394 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
|
|
1395 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
|
236
|
1396 // CHECK-NEXT: [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[C]])
|
|
1397 // CHECK-NEXT: ret <16 x i8> [[VTBX2_I]]
|
|
1398 //
|
150
|
1399 poly8x16_t test_vqtbx2q_p8(poly8x16_t a, poly8x16x2_t b, uint8x16_t c) {
|
|
1400 return vqtbx2q_p8(a, b, c);
|
|
1401 }
|
|
1402
|
236
|
1403 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx3q_p8
|
252
|
1404 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [3 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
236
|
1405 // CHECK-NEXT: entry:
|
|
1406 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X16X3_T:%.*]], align 16
|
|
1407 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X16X3_T]], align 16
|
252
|
1408 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], ptr [[B]], i32 0, i32 0
|
|
1409 // CHECK-NEXT: store [3 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
|
|
1410 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], ptr [[B]], i32 0, i32 0
|
|
1411 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
|
|
1412 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
|
|
1413 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
|
|
1414 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
|
|
1415 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
|
|
1416 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2
|
|
1417 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
|
236
|
1418 // CHECK-NEXT: [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[C]])
|
|
1419 // CHECK-NEXT: ret <16 x i8> [[VTBX3_I]]
|
|
1420 //
|
150
|
1421 poly8x16_t test_vqtbx3q_p8(poly8x16_t a, poly8x16x3_t b, uint8x16_t c) {
|
|
1422 return vqtbx3q_p8(a, b, c);
|
|
1423 }
|
|
1424
|
236
|
1425 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx4q_p8
|
252
|
1426 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [4 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
236
|
1427 // CHECK-NEXT: entry:
|
|
1428 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X16X4_T:%.*]], align 16
|
|
1429 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X16X4_T]], align 16
|
252
|
1430 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], ptr [[B]], i32 0, i32 0
|
|
1431 // CHECK-NEXT: store [4 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
|
|
1432 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], ptr [[B]], i32 0, i32 0
|
|
1433 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
|
|
1434 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
|
|
1435 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
|
|
1436 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
|
|
1437 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
|
|
1438 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2
|
|
1439 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
|
|
1440 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 3
|
|
1441 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16
|
236
|
1442 // CHECK-NEXT: [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[C]])
|
|
1443 // CHECK-NEXT: ret <16 x i8> [[VTBX4_I]]
|
|
1444 //
|
150
|
1445 poly8x16_t test_vqtbx4q_p8(poly8x16_t a, poly8x16x4_t b, uint8x16_t c) {
|
|
1446 return vqtbx4q_p8(a, b, c);
|
|
1447 }
|
|
1448
|