236
|
1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature
|
|
2 // RUN: %clang_cc1 -no-opaque-pointers -triple arm64-none-linux-gnu -target-feature +neon \
|
150
|
3 // RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
|
|
4
|
236
|
5 // REQUIRES: aarch64-registered-target || arm-registered-target
|
150
|
6
|
|
7 #include <arm_neon.h>
|
|
8
|
236
|
9 // CHECK-LABEL: define {{[^@]+}}@test_vtbl1_s8
|
|
10 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
11 // CHECK-NEXT: entry:
|
|
12 // CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
13 // CHECK-NEXT: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[B]])
|
|
14 // CHECK-NEXT: ret <8 x i8> [[VTBL11_I]]
|
|
15 //
|
150
|
16 int8x8_t test_vtbl1_s8(int8x8_t a, int8x8_t b) {
|
|
17 return vtbl1_s8(a, b);
|
|
18 }
|
|
19
|
236
|
20 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl1_s8
|
|
21 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1:[0-9]+]] {
|
|
22 // CHECK-NEXT: entry:
|
|
23 // CHECK-NEXT: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[A]], <8 x i8> [[B]])
|
|
24 // CHECK-NEXT: ret <8 x i8> [[VTBL1_I]]
|
|
25 //
|
150
|
26 int8x8_t test_vqtbl1_s8(int8x16_t a, uint8x8_t b) {
|
|
27 return vqtbl1_s8(a, b);
|
|
28 }
|
|
29
|
236
|
30 // CHECK-LABEL: define {{[^@]+}}@test_vtbl2_s8
|
|
31 // CHECK-SAME: ([2 x <8 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
|
|
32 // CHECK-NEXT: entry:
|
|
33 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_INT8X8X2_T:%.*]], align 8
|
|
34 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_INT8X8X2_T]], align 8
|
|
35 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X2_T]], %struct.int8x8x2_t* [[A]], i32 0, i32 0
|
|
36 // CHECK-NEXT: store [2 x <8 x i8>] [[A_COERCE]], [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
|
|
37 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X2_T]], %struct.int8x8x2_t* [[A]], i32 0, i32 0
|
|
38 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <8 x i8>], [2 x <8 x i8>]* [[COERCE_DIVE1]], align 8
|
|
39 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X2_T]], %struct.int8x8x2_t* [[__P0_I]], i32 0, i32 0
|
|
40 // CHECK-NEXT: store [2 x <8 x i8>] [[TMP0]], [2 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
|
|
41 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X2_T]], %struct.int8x8x2_t* [[__P0_I]], i32 0, i32 0
|
|
42 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
43 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
|
|
44 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X2_T]], %struct.int8x8x2_t* [[__P0_I]], i32 0, i32 0
|
|
45 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
46 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
|
|
47 // CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
48 // CHECK-NEXT: [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[B]])
|
|
49 // CHECK-NEXT: ret <8 x i8> [[VTBL13_I]]
|
|
50 //
|
150
|
51 int8x8_t test_vtbl2_s8(int8x8x2_t a, int8x8_t b) {
|
|
52 return vtbl2_s8(a, b);
|
|
53 }
|
|
54
|
236
|
55 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl2_s8
|
|
56 // CHECK-SAME: ([2 x <16 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
|
|
57 // CHECK-NEXT: entry:
|
|
58 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_INT8X16X2_T:%.*]], align 16
|
|
59 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_INT8X16X2_T]], align 16
|
|
60 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[A]], i32 0, i32 0
|
|
61 // CHECK-NEXT: store [2 x <16 x i8>] [[A_COERCE]], [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
|
|
62 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[A]], i32 0, i32 0
|
|
63 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
|
|
64 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[__P0_I]], i32 0, i32 0
|
|
65 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
|
|
66 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[__P0_I]], i32 0, i32 0
|
|
67 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
68 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
|
|
69 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[__P0_I]], i32 0, i32 0
|
|
70 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
71 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
|
|
72 // CHECK-NEXT: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> [[B]])
|
|
73 // CHECK-NEXT: ret <8 x i8> [[VTBL2_I]]
|
|
74 //
|
150
|
75 int8x8_t test_vqtbl2_s8(int8x16x2_t a, uint8x8_t b) {
|
|
76 return vqtbl2_s8(a, b);
|
|
77 }
|
|
78
|
236
|
79 // CHECK-LABEL: define {{[^@]+}}@test_vtbl3_s8
|
|
80 // CHECK-SAME: ([3 x <8 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
|
|
81 // CHECK-NEXT: entry:
|
|
82 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_INT8X8X3_T:%.*]], align 8
|
|
83 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_INT8X8X3_T]], align 8
|
|
84 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], %struct.int8x8x3_t* [[A]], i32 0, i32 0
|
|
85 // CHECK-NEXT: store [3 x <8 x i8>] [[A_COERCE]], [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
|
|
86 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], %struct.int8x8x3_t* [[A]], i32 0, i32 0
|
|
87 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <8 x i8>], [3 x <8 x i8>]* [[COERCE_DIVE1]], align 8
|
|
88 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0
|
|
89 // CHECK-NEXT: store [3 x <8 x i8>] [[TMP0]], [3 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
|
|
90 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0
|
|
91 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
92 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
|
|
93 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0
|
|
94 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
95 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
|
|
96 // CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0
|
|
97 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
|
|
98 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
|
|
99 // CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
100 // CHECK-NEXT: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
101 // CHECK-NEXT: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[B]])
|
|
102 // CHECK-NEXT: ret <8 x i8> [[VTBL26_I]]
|
|
103 //
|
150
|
104 int8x8_t test_vtbl3_s8(int8x8x3_t a, int8x8_t b) {
|
|
105 return vtbl3_s8(a, b);
|
|
106 }
|
|
107
|
236
|
108 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl3_s8
|
|
109 // CHECK-SAME: ([3 x <16 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
|
|
110 // CHECK-NEXT: entry:
|
|
111 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_INT8X16X3_T:%.*]], align 16
|
|
112 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_INT8X16X3_T]], align 16
|
|
113 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[A]], i32 0, i32 0
|
|
114 // CHECK-NEXT: store [3 x <16 x i8>] [[A_COERCE]], [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
|
|
115 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[A]], i32 0, i32 0
|
|
116 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
|
|
117 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0
|
|
118 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
|
|
119 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0
|
|
120 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
121 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
|
|
122 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0
|
|
123 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
124 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
|
|
125 // CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0
|
|
126 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
|
|
127 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
|
|
128 // CHECK-NEXT: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> [[B]])
|
|
129 // CHECK-NEXT: ret <8 x i8> [[VTBL3_I]]
|
|
130 //
|
150
|
131 int8x8_t test_vqtbl3_s8(int8x16x3_t a, uint8x8_t b) {
|
|
132 return vqtbl3_s8(a, b);
|
|
133 }
|
|
134
|
236
|
135 // CHECK-LABEL: define {{[^@]+}}@test_vtbl4_s8
|
|
136 // CHECK-SAME: ([4 x <8 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
|
|
137 // CHECK-NEXT: entry:
|
|
138 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_INT8X8X4_T:%.*]], align 8
|
|
139 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_INT8X8X4_T]], align 8
|
|
140 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], %struct.int8x8x4_t* [[A]], i32 0, i32 0
|
|
141 // CHECK-NEXT: store [4 x <8 x i8>] [[A_COERCE]], [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
|
|
142 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], %struct.int8x8x4_t* [[A]], i32 0, i32 0
|
|
143 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <8 x i8>], [4 x <8 x i8>]* [[COERCE_DIVE1]], align 8
|
|
144 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0
|
|
145 // CHECK-NEXT: store [4 x <8 x i8>] [[TMP0]], [4 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
|
|
146 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0
|
|
147 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
148 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
|
|
149 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0
|
|
150 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
151 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
|
|
152 // CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0
|
|
153 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
|
|
154 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
|
|
155 // CHECK-NEXT: [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0
|
|
156 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i64 0, i64 3
|
|
157 // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
|
|
158 // CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
159 // CHECK-NEXT: [[VTBL27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
160 // CHECK-NEXT: [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> [[B]])
|
|
161 // CHECK-NEXT: ret <8 x i8> [[VTBL28_I]]
|
|
162 //
|
150
|
163 int8x8_t test_vtbl4_s8(int8x8x4_t a, int8x8_t b) {
|
|
164 return vtbl4_s8(a, b);
|
|
165 }
|
|
166
|
236
|
167 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl4_s8
|
|
168 // CHECK-SAME: ([4 x <16 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
|
|
169 // CHECK-NEXT: entry:
|
|
170 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_INT8X16X4_T:%.*]], align 16
|
|
171 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_INT8X16X4_T]], align 16
|
|
172 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[A]], i32 0, i32 0
|
|
173 // CHECK-NEXT: store [4 x <16 x i8>] [[A_COERCE]], [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
|
|
174 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[A]], i32 0, i32 0
|
|
175 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
|
|
176 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0
|
|
177 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
|
|
178 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0
|
|
179 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
180 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
|
|
181 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0
|
|
182 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
183 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
|
|
184 // CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0
|
|
185 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
|
|
186 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
|
|
187 // CHECK-NEXT: [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0
|
|
188 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
|
|
189 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
|
|
190 // CHECK-NEXT: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> [[B]])
|
|
191 // CHECK-NEXT: ret <8 x i8> [[VTBL4_I]]
|
|
192 //
|
150
|
193 int8x8_t test_vqtbl4_s8(int8x16x4_t a, uint8x8_t b) {
|
|
194 return vqtbl4_s8(a, b);
|
|
195 }
|
|
196
|
236
|
197 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl1q_s8
|
|
198 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
|
|
199 // CHECK-NEXT: entry:
|
|
200 // CHECK-NEXT: [[VTBL1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
|
|
201 // CHECK-NEXT: ret <16 x i8> [[VTBL1_I]]
|
|
202 //
|
150
|
203 int8x16_t test_vqtbl1q_s8(int8x16_t a, int8x16_t b) {
|
|
204 return vqtbl1q_s8(a, b);
|
|
205 }
|
|
206
|
236
|
207 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl2q_s8
|
|
208 // CHECK-SAME: ([2 x <16 x i8>] [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
|
|
209 // CHECK-NEXT: entry:
|
|
210 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_INT8X16X2_T:%.*]], align 16
|
|
211 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_INT8X16X2_T]], align 16
|
|
212 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[A]], i32 0, i32 0
|
|
213 // CHECK-NEXT: store [2 x <16 x i8>] [[A_COERCE]], [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
|
|
214 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[A]], i32 0, i32 0
|
|
215 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
|
|
216 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[__P0_I]], i32 0, i32 0
|
|
217 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
|
|
218 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[__P0_I]], i32 0, i32 0
|
|
219 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
220 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
|
|
221 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[__P0_I]], i32 0, i32 0
|
|
222 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
223 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
|
|
224 // CHECK-NEXT: [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[B]])
|
|
225 // CHECK-NEXT: ret <16 x i8> [[VTBL2_I]]
|
|
226 //
|
150
|
227 int8x16_t test_vqtbl2q_s8(int8x16x2_t a, int8x16_t b) {
|
|
228 return vqtbl2q_s8(a, b);
|
|
229 }
|
|
230
|
236
|
231 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl3q_s8
|
|
232 // CHECK-SAME: ([3 x <16 x i8>] [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
|
|
233 // CHECK-NEXT: entry:
|
|
234 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_INT8X16X3_T:%.*]], align 16
|
|
235 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_INT8X16X3_T]], align 16
|
|
236 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[A]], i32 0, i32 0
|
|
237 // CHECK-NEXT: store [3 x <16 x i8>] [[A_COERCE]], [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
|
|
238 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[A]], i32 0, i32 0
|
|
239 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
|
|
240 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0
|
|
241 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
|
|
242 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0
|
|
243 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
244 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
|
|
245 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0
|
|
246 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
247 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
|
|
248 // CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0
|
|
249 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
|
|
250 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
|
|
251 // CHECK-NEXT: [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[B]])
|
|
252 // CHECK-NEXT: ret <16 x i8> [[VTBL3_I]]
|
|
253 //
|
150
|
254 int8x16_t test_vqtbl3q_s8(int8x16x3_t a, int8x16_t b) {
|
|
255 return vqtbl3q_s8(a, b);
|
|
256 }
|
|
257
|
236
|
258 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl4q_s8
|
|
259 // CHECK-SAME: ([4 x <16 x i8>] [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
|
|
260 // CHECK-NEXT: entry:
|
|
261 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_INT8X16X4_T:%.*]], align 16
|
|
262 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_INT8X16X4_T]], align 16
|
|
263 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[A]], i32 0, i32 0
|
|
264 // CHECK-NEXT: store [4 x <16 x i8>] [[A_COERCE]], [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
|
|
265 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[A]], i32 0, i32 0
|
|
266 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
|
|
267 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0
|
|
268 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
|
|
269 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0
|
|
270 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
271 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
|
|
272 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0
|
|
273 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
274 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
|
|
275 // CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0
|
|
276 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
|
|
277 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
|
|
278 // CHECK-NEXT: [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0
|
|
279 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
|
|
280 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
|
|
281 // CHECK-NEXT: [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[B]])
|
|
282 // CHECK-NEXT: ret <16 x i8> [[VTBL4_I]]
|
|
283 //
|
150
|
284 int8x16_t test_vqtbl4q_s8(int8x16x4_t a, int8x16_t b) {
|
|
285 return vqtbl4q_s8(a, b);
|
|
286 }
|
|
287
|
236
|
288 // CHECK-LABEL: define {{[^@]+}}@test_vtbx1_s8
|
|
289 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
|
|
290 // CHECK-NEXT: entry:
|
|
291 // CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
292 // CHECK-NEXT: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[C]])
|
|
293 // CHECK-NEXT: [[TMP0:%.*]] = icmp uge <8 x i8> [[C]], <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
|
|
294 // CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
|
|
295 // CHECK-NEXT: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[A]]
|
|
296 // CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i8> [[TMP1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
|
|
297 // CHECK-NEXT: [[TMP4:%.*]] = and <8 x i8> [[TMP3]], [[VTBL11_I]]
|
|
298 // CHECK-NEXT: [[VTBX_I:%.*]] = or <8 x i8> [[TMP2]], [[TMP4]]
|
|
299 // CHECK-NEXT: ret <8 x i8> [[VTBX_I]]
|
|
300 //
|
150
|
301 int8x8_t test_vtbx1_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
|
|
302 return vtbx1_s8(a, b, c);
|
|
303 }
|
|
304
|
236
|
305 // CHECK-LABEL: define {{[^@]+}}@test_vtbx2_s8
|
|
306 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [2 x <8 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
|
|
307 // CHECK-NEXT: entry:
|
|
308 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_INT8X8X2_T:%.*]], align 8
|
|
309 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X8X2_T]], align 8
|
|
310 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X2_T]], %struct.int8x8x2_t* [[B]], i32 0, i32 0
|
|
311 // CHECK-NEXT: store [2 x <8 x i8>] [[B_COERCE]], [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
|
|
312 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X2_T]], %struct.int8x8x2_t* [[B]], i32 0, i32 0
|
|
313 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <8 x i8>], [2 x <8 x i8>]* [[COERCE_DIVE1]], align 8
|
|
314 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X2_T]], %struct.int8x8x2_t* [[__P1_I]], i32 0, i32 0
|
|
315 // CHECK-NEXT: store [2 x <8 x i8>] [[TMP0]], [2 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
|
|
316 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X2_T]], %struct.int8x8x2_t* [[__P1_I]], i32 0, i32 0
|
|
317 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
318 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
|
|
319 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X2_T]], %struct.int8x8x2_t* [[__P1_I]], i32 0, i32 0
|
|
320 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
321 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
|
|
322 // CHECK-NEXT: [[VTBX1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
323 // CHECK-NEXT: [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[A]], <16 x i8> [[VTBX1_I]], <8 x i8> [[C]])
|
|
324 // CHECK-NEXT: ret <8 x i8> [[VTBX13_I]]
|
|
325 //
|
150
|
326 int8x8_t test_vtbx2_s8(int8x8_t a, int8x8x2_t b, int8x8_t c) {
|
|
327 return vtbx2_s8(a, b, c);
|
|
328 }
|
|
329
|
236
|
330 // CHECK-LABEL: define {{[^@]+}}@test_vtbx3_s8
|
|
331 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [3 x <8 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
|
|
332 // CHECK-NEXT: entry:
|
|
333 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_INT8X8X3_T:%.*]], align 8
|
|
334 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X8X3_T]], align 8
|
|
335 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], %struct.int8x8x3_t* [[B]], i32 0, i32 0
|
|
336 // CHECK-NEXT: store [3 x <8 x i8>] [[B_COERCE]], [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
|
|
337 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], %struct.int8x8x3_t* [[B]], i32 0, i32 0
|
|
338 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <8 x i8>], [3 x <8 x i8>]* [[COERCE_DIVE1]], align 8
|
|
339 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0
|
|
340 // CHECK-NEXT: store [3 x <8 x i8>] [[TMP0]], [3 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
|
|
341 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0
|
|
342 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
343 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
|
|
344 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0
|
|
345 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
346 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
|
|
347 // CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0
|
|
348 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
|
|
349 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
|
|
350 // CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
351 // CHECK-NEXT: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
352 // CHECK-NEXT: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[C]])
|
|
353 // CHECK-NEXT: [[TMP4:%.*]] = icmp uge <8 x i8> [[C]], <i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24>
|
|
354 // CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i8>
|
|
355 // CHECK-NEXT: [[TMP6:%.*]] = and <8 x i8> [[TMP5]], [[A]]
|
|
356 // CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i8> [[TMP5]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
|
|
357 // CHECK-NEXT: [[TMP8:%.*]] = and <8 x i8> [[TMP7]], [[VTBL26_I]]
|
|
358 // CHECK-NEXT: [[VTBX_I:%.*]] = or <8 x i8> [[TMP6]], [[TMP8]]
|
|
359 // CHECK-NEXT: ret <8 x i8> [[VTBX_I]]
|
|
360 //
|
150
|
361 int8x8_t test_vtbx3_s8(int8x8_t a, int8x8x3_t b, int8x8_t c) {
|
|
362 return vtbx3_s8(a, b, c);
|
|
363 }
|
|
364
|
236
|
365 // CHECK-LABEL: define {{[^@]+}}@test_vtbx4_s8
|
|
366 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [4 x <8 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
|
|
367 // CHECK-NEXT: entry:
|
|
368 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_INT8X8X4_T:%.*]], align 8
|
|
369 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X8X4_T]], align 8
|
|
370 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], %struct.int8x8x4_t* [[B]], i32 0, i32 0
|
|
371 // CHECK-NEXT: store [4 x <8 x i8>] [[B_COERCE]], [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
|
|
372 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], %struct.int8x8x4_t* [[B]], i32 0, i32 0
|
|
373 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <8 x i8>], [4 x <8 x i8>]* [[COERCE_DIVE1]], align 8
|
|
374 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0
|
|
375 // CHECK-NEXT: store [4 x <8 x i8>] [[TMP0]], [4 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
|
|
376 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0
|
|
377 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
378 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
|
|
379 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0
|
|
380 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
381 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
|
|
382 // CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0
|
|
383 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
|
|
384 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
|
|
385 // CHECK-NEXT: [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0
|
|
386 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i64 0, i64 3
|
|
387 // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
|
|
388 // CHECK-NEXT: [[VTBX2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
389 // CHECK-NEXT: [[VTBX27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
390 // CHECK-NEXT: [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> [[C]])
|
|
391 // CHECK-NEXT: ret <8 x i8> [[VTBX28_I]]
|
|
392 //
|
150
|
393 int8x8_t test_vtbx4_s8(int8x8_t a, int8x8x4_t b, int8x8_t c) {
|
|
394 return vtbx4_s8(a, b, c);
|
|
395 }
|
|
396
|
236
|
397 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx1_s8
|
|
398 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
|
399 // CHECK-NEXT: entry:
|
|
400 // CHECK-NEXT: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <8 x i8> [[C]])
|
|
401 // CHECK-NEXT: ret <8 x i8> [[VTBX1_I]]
|
|
402 //
|
150
|
403 int8x8_t test_vqtbx1_s8(int8x8_t a, int8x16_t b, uint8x8_t c) {
|
|
404 return vqtbx1_s8(a, b, c);
|
|
405 }
|
|
406
|
236
|
407 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx2_s8
|
|
408 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [2 x <16 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
|
409 // CHECK-NEXT: entry:
|
|
410 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_INT8X16X2_T:%.*]], align 16
|
|
411 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X16X2_T]], align 16
|
|
412 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[B]], i32 0, i32 0
|
|
413 // CHECK-NEXT: store [2 x <16 x i8>] [[B_COERCE]], [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
|
|
414 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[B]], i32 0, i32 0
|
|
415 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
|
|
416 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[__P1_I]], i32 0, i32 0
|
|
417 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
|
|
418 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[__P1_I]], i32 0, i32 0
|
|
419 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
420 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
|
|
421 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[__P1_I]], i32 0, i32 0
|
|
422 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
423 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
|
|
424 // CHECK-NEXT: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> [[C]])
|
|
425 // CHECK-NEXT: ret <8 x i8> [[VTBX2_I]]
|
|
426 //
|
150
|
427 int8x8_t test_vqtbx2_s8(int8x8_t a, int8x16x2_t b, uint8x8_t c) {
|
|
428 return vqtbx2_s8(a, b, c);
|
|
429 }
|
|
430
|
236
|
431 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx3_s8
|
|
432 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [3 x <16 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
|
433 // CHECK-NEXT: entry:
|
|
434 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_INT8X16X3_T:%.*]], align 16
|
|
435 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X16X3_T]], align 16
|
|
436 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[B]], i32 0, i32 0
|
|
437 // CHECK-NEXT: store [3 x <16 x i8>] [[B_COERCE]], [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
|
|
438 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[B]], i32 0, i32 0
|
|
439 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
|
|
440 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P1_I]], i32 0, i32 0
|
|
441 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
|
|
442 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P1_I]], i32 0, i32 0
|
|
443 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
444 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
|
|
445 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P1_I]], i32 0, i32 0
|
|
446 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
447 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
|
|
448 // CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P1_I]], i32 0, i32 0
|
|
449 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
|
|
450 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
|
|
451 // CHECK-NEXT: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> [[C]])
|
|
452 // CHECK-NEXT: ret <8 x i8> [[VTBX3_I]]
|
|
453 //
|
150
|
454 int8x8_t test_vqtbx3_s8(int8x8_t a, int8x16x3_t b, uint8x8_t c) {
|
|
455 return vqtbx3_s8(a, b, c);
|
|
456 }
|
|
457
|
236
|
458 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx4_s8
|
|
459 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [4 x <16 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
|
460 // CHECK-NEXT: entry:
|
|
461 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_INT8X16X4_T:%.*]], align 16
|
|
462 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X16X4_T]], align 16
|
|
463 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[B]], i32 0, i32 0
|
|
464 // CHECK-NEXT: store [4 x <16 x i8>] [[B_COERCE]], [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
|
|
465 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[B]], i32 0, i32 0
|
|
466 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
|
|
467 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0
|
|
468 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
|
|
469 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0
|
|
470 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
471 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
|
|
472 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0
|
|
473 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
474 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
|
|
475 // CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0
|
|
476 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
|
|
477 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
|
|
478 // CHECK-NEXT: [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0
|
|
479 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
|
|
480 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
|
|
481 // CHECK-NEXT: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> [[C]])
|
|
482 // CHECK-NEXT: ret <8 x i8> [[VTBX4_I]]
|
|
483 //
|
150
|
484 int8x8_t test_vqtbx4_s8(int8x8_t a, int8x16x4_t b, uint8x8_t c) {
|
|
485 return vqtbx4_s8(a, b, c);
|
|
486 }
|
|
487
|
236
|
488 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx1q_s8
|
|
489 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
|
490 // CHECK-NEXT: entry:
|
|
491 // CHECK-NEXT: [[VTBX1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]])
|
|
492 // CHECK-NEXT: ret <16 x i8> [[VTBX1_I]]
|
|
493 //
|
150
|
494 int8x16_t test_vqtbx1q_s8(int8x16_t a, int8x16_t b, uint8x16_t c) {
|
|
495 return vqtbx1q_s8(a, b, c);
|
|
496 }
|
|
497
|
236
|
498 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx2q_s8
|
|
499 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [2 x <16 x i8>] [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
|
500 // CHECK-NEXT: entry:
|
|
501 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_INT8X16X2_T:%.*]], align 16
|
|
502 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X16X2_T]], align 16
|
|
503 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[B]], i32 0, i32 0
|
|
504 // CHECK-NEXT: store [2 x <16 x i8>] [[B_COERCE]], [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
|
|
505 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[B]], i32 0, i32 0
|
|
506 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
|
|
507 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[__P1_I]], i32 0, i32 0
|
|
508 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
|
|
509 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[__P1_I]], i32 0, i32 0
|
|
510 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
511 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
|
|
512 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[__P1_I]], i32 0, i32 0
|
|
513 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
514 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
|
|
515 // CHECK-NEXT: [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[C]])
|
|
516 // CHECK-NEXT: ret <16 x i8> [[VTBX2_I]]
|
|
517 //
|
150
|
518 int8x16_t test_vqtbx2q_s8(int8x16_t a, int8x16x2_t b, int8x16_t c) {
|
|
519 return vqtbx2q_s8(a, b, c);
|
|
520 }
|
|
521
|
236
|
522 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx3q_s8
|
|
523 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [3 x <16 x i8>] [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
|
524 // CHECK-NEXT: entry:
|
|
525 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_INT8X16X3_T:%.*]], align 16
|
|
526 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X16X3_T]], align 16
|
|
527 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[B]], i32 0, i32 0
|
|
528 // CHECK-NEXT: store [3 x <16 x i8>] [[B_COERCE]], [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
|
|
529 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[B]], i32 0, i32 0
|
|
530 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
|
|
531 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P1_I]], i32 0, i32 0
|
|
532 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
|
|
533 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P1_I]], i32 0, i32 0
|
|
534 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
535 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
|
|
536 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P1_I]], i32 0, i32 0
|
|
537 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
538 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
|
|
539 // CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P1_I]], i32 0, i32 0
|
|
540 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
|
|
541 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
|
|
542 // CHECK-NEXT: [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[C]])
|
|
543 // CHECK-NEXT: ret <16 x i8> [[VTBX3_I]]
|
|
544 //
|
150
|
545 int8x16_t test_vqtbx3q_s8(int8x16_t a, int8x16x3_t b, int8x16_t c) {
|
|
546 return vqtbx3q_s8(a, b, c);
|
|
547 }
|
|
548
|
236
|
549 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx4q_s8
|
|
550 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [4 x <16 x i8>] [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
|
551 // CHECK-NEXT: entry:
|
|
552 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_INT8X16X4_T:%.*]], align 16
|
|
553 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X16X4_T]], align 16
|
|
554 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[B]], i32 0, i32 0
|
|
555 // CHECK-NEXT: store [4 x <16 x i8>] [[B_COERCE]], [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
|
|
556 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[B]], i32 0, i32 0
|
|
557 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
|
|
558 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0
|
|
559 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
|
|
560 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0
|
|
561 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
562 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
|
|
563 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0
|
|
564 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
565 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
|
|
566 // CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0
|
|
567 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
|
|
568 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
|
|
569 // CHECK-NEXT: [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0
|
|
570 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
|
|
571 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
|
|
572 // CHECK-NEXT: [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[C]])
|
|
573 // CHECK-NEXT: ret <16 x i8> [[VTBX4_I]]
|
|
574 //
|
150
|
575 int8x16_t test_vqtbx4q_s8(int8x16_t a, int8x16x4_t b, int8x16_t c) {
|
|
576 return vqtbx4q_s8(a, b, c);
|
|
577 }
|
|
578
|
236
|
579 // CHECK-LABEL: define {{[^@]+}}@test_vtbl1_u8
|
|
580 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
|
|
581 // CHECK-NEXT: entry:
|
|
582 // CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
583 // CHECK-NEXT: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[B]])
|
|
584 // CHECK-NEXT: ret <8 x i8> [[VTBL11_I]]
|
|
585 //
|
150
|
586 uint8x8_t test_vtbl1_u8(uint8x8_t a, uint8x8_t b) {
|
|
587 return vtbl1_u8(a, b);
|
|
588 }
|
|
589
|
236
|
590 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl1_u8
|
|
591 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
|
|
592 // CHECK-NEXT: entry:
|
|
593 // CHECK-NEXT: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[A]], <8 x i8> [[B]])
|
|
594 // CHECK-NEXT: ret <8 x i8> [[VTBL1_I]]
|
|
595 //
|
150
|
596 uint8x8_t test_vqtbl1_u8(uint8x16_t a, uint8x8_t b) {
|
|
597 return vqtbl1_u8(a, b);
|
|
598 }
|
|
599
|
236
|
600 // CHECK-LABEL: define {{[^@]+}}@test_vtbl2_u8
|
|
601 // CHECK-SAME: ([2 x <8 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
|
|
602 // CHECK-NEXT: entry:
|
|
603 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X8X2_T:%.*]], align 8
|
|
604 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_UINT8X8X2_T]], align 8
|
|
605 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X2_T]], %struct.uint8x8x2_t* [[A]], i32 0, i32 0
|
|
606 // CHECK-NEXT: store [2 x <8 x i8>] [[A_COERCE]], [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
|
|
607 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X2_T]], %struct.uint8x8x2_t* [[A]], i32 0, i32 0
|
|
608 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <8 x i8>], [2 x <8 x i8>]* [[COERCE_DIVE1]], align 8
|
|
609 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X2_T]], %struct.uint8x8x2_t* [[__P0_I]], i32 0, i32 0
|
|
610 // CHECK-NEXT: store [2 x <8 x i8>] [[TMP0]], [2 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
|
|
611 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X2_T]], %struct.uint8x8x2_t* [[__P0_I]], i32 0, i32 0
|
|
612 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
613 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
|
|
614 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X2_T]], %struct.uint8x8x2_t* [[__P0_I]], i32 0, i32 0
|
|
615 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
616 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
|
|
617 // CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
618 // CHECK-NEXT: [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[B]])
|
|
619 // CHECK-NEXT: ret <8 x i8> [[VTBL13_I]]
|
|
620 //
|
150
|
621 uint8x8_t test_vtbl2_u8(uint8x8x2_t a, uint8x8_t b) {
|
|
622 return vtbl2_u8(a, b);
|
|
623 }
|
|
624
|
236
|
625 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl2_u8
|
|
626 // CHECK-SAME: ([2 x <16 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
|
|
627 // CHECK-NEXT: entry:
|
|
628 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X16X2_T:%.*]], align 16
|
|
629 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_UINT8X16X2_T]], align 16
|
|
630 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[A]], i32 0, i32 0
|
|
631 // CHECK-NEXT: store [2 x <16 x i8>] [[A_COERCE]], [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
|
|
632 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[A]], i32 0, i32 0
|
|
633 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
|
|
634 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[__P0_I]], i32 0, i32 0
|
|
635 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
|
|
636 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[__P0_I]], i32 0, i32 0
|
|
637 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
638 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
|
|
639 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[__P0_I]], i32 0, i32 0
|
|
640 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
641 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
|
|
642 // CHECK-NEXT: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> [[B]])
|
|
643 // CHECK-NEXT: ret <8 x i8> [[VTBL2_I]]
|
|
644 //
|
150
|
645 uint8x8_t test_vqtbl2_u8(uint8x16x2_t a, uint8x8_t b) {
|
|
646 return vqtbl2_u8(a, b);
|
|
647 }
|
|
648
|
236
|
649 // CHECK-LABEL: define {{[^@]+}}@test_vtbl3_u8
|
|
650 // CHECK-SAME: ([3 x <8 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
|
|
651 // CHECK-NEXT: entry:
|
|
652 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X8X3_T:%.*]], align 8
|
|
653 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_UINT8X8X3_T]], align 8
|
|
654 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], %struct.uint8x8x3_t* [[A]], i32 0, i32 0
|
|
655 // CHECK-NEXT: store [3 x <8 x i8>] [[A_COERCE]], [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
|
|
656 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], %struct.uint8x8x3_t* [[A]], i32 0, i32 0
|
|
657 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <8 x i8>], [3 x <8 x i8>]* [[COERCE_DIVE1]], align 8
|
|
658 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0
|
|
659 // CHECK-NEXT: store [3 x <8 x i8>] [[TMP0]], [3 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
|
|
660 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0
|
|
661 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
662 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
|
|
663 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0
|
|
664 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
665 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
|
|
666 // CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0
|
|
667 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
|
|
668 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
|
|
669 // CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
670 // CHECK-NEXT: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
671 // CHECK-NEXT: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[B]])
|
|
672 // CHECK-NEXT: ret <8 x i8> [[VTBL26_I]]
|
|
673 //
|
150
|
674 uint8x8_t test_vtbl3_u8(uint8x8x3_t a, uint8x8_t b) {
|
|
675 return vtbl3_u8(a, b);
|
|
676 }
|
|
677
|
236
|
678 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl3_u8
|
|
679 // CHECK-SAME: ([3 x <16 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
|
|
680 // CHECK-NEXT: entry:
|
|
681 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X16X3_T:%.*]], align 16
|
|
682 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_UINT8X16X3_T]], align 16
|
|
683 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[A]], i32 0, i32 0
|
|
684 // CHECK-NEXT: store [3 x <16 x i8>] [[A_COERCE]], [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
|
|
685 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[A]], i32 0, i32 0
|
|
686 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
|
|
687 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0
|
|
688 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
|
|
689 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0
|
|
690 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
691 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
|
|
692 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0
|
|
693 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
694 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
|
|
695 // CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0
|
|
696 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
|
|
697 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
|
|
698 // CHECK-NEXT: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> [[B]])
|
|
699 // CHECK-NEXT: ret <8 x i8> [[VTBL3_I]]
|
|
700 //
|
150
|
701 uint8x8_t test_vqtbl3_u8(uint8x16x3_t a, uint8x8_t b) {
|
|
702 return vqtbl3_u8(a, b);
|
|
703 }
|
|
704
|
236
|
705 // CHECK-LABEL: define {{[^@]+}}@test_vtbl4_u8
|
|
706 // CHECK-SAME: ([4 x <8 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
|
|
707 // CHECK-NEXT: entry:
|
|
708 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X8X4_T:%.*]], align 8
|
|
709 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_UINT8X8X4_T]], align 8
|
|
710 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], %struct.uint8x8x4_t* [[A]], i32 0, i32 0
|
|
711 // CHECK-NEXT: store [4 x <8 x i8>] [[A_COERCE]], [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
|
|
712 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], %struct.uint8x8x4_t* [[A]], i32 0, i32 0
|
|
713 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <8 x i8>], [4 x <8 x i8>]* [[COERCE_DIVE1]], align 8
|
|
714 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0
|
|
715 // CHECK-NEXT: store [4 x <8 x i8>] [[TMP0]], [4 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
|
|
716 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0
|
|
717 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
718 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
|
|
719 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0
|
|
720 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
721 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
|
|
722 // CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0
|
|
723 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
|
|
724 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
|
|
725 // CHECK-NEXT: [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0
|
|
726 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i64 0, i64 3
|
|
727 // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
|
|
728 // CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
729 // CHECK-NEXT: [[VTBL27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
730 // CHECK-NEXT: [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> [[B]])
|
|
731 // CHECK-NEXT: ret <8 x i8> [[VTBL28_I]]
|
|
732 //
|
150
|
733 uint8x8_t test_vtbl4_u8(uint8x8x4_t a, uint8x8_t b) {
|
|
734 return vtbl4_u8(a, b);
|
|
735 }
|
|
736
|
236
|
737 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl4_u8
|
|
738 // CHECK-SAME: ([4 x <16 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
|
|
739 // CHECK-NEXT: entry:
|
|
740 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X16X4_T:%.*]], align 16
|
|
741 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_UINT8X16X4_T]], align 16
|
|
742 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[A]], i32 0, i32 0
|
|
743 // CHECK-NEXT: store [4 x <16 x i8>] [[A_COERCE]], [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
|
|
744 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[A]], i32 0, i32 0
|
|
745 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
|
|
746 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0
|
|
747 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
|
|
748 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0
|
|
749 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
750 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
|
|
751 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0
|
|
752 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
753 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
|
|
754 // CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0
|
|
755 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
|
|
756 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
|
|
757 // CHECK-NEXT: [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0
|
|
758 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
|
|
759 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
|
|
760 // CHECK-NEXT: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> [[B]])
|
|
761 // CHECK-NEXT: ret <8 x i8> [[VTBL4_I]]
|
|
762 //
|
150
|
763 uint8x8_t test_vqtbl4_u8(uint8x16x4_t a, uint8x8_t b) {
|
|
764 return vqtbl4_u8(a, b);
|
|
765 }
|
|
766
|
236
|
767 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl1q_u8
|
|
768 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
|
|
769 // CHECK-NEXT: entry:
|
|
770 // CHECK-NEXT: [[VTBL1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
|
|
771 // CHECK-NEXT: ret <16 x i8> [[VTBL1_I]]
|
|
772 //
|
150
|
773 uint8x16_t test_vqtbl1q_u8(uint8x16_t a, uint8x16_t b) {
|
|
774 return vqtbl1q_u8(a, b);
|
|
775 }
|
|
776
|
236
|
777 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl2q_u8
|
|
778 // CHECK-SAME: ([2 x <16 x i8>] [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
|
|
779 // CHECK-NEXT: entry:
|
|
780 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X16X2_T:%.*]], align 16
|
|
781 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_UINT8X16X2_T]], align 16
|
|
782 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[A]], i32 0, i32 0
|
|
783 // CHECK-NEXT: store [2 x <16 x i8>] [[A_COERCE]], [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
|
|
784 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[A]], i32 0, i32 0
|
|
785 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
|
|
786 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[__P0_I]], i32 0, i32 0
|
|
787 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
|
|
788 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[__P0_I]], i32 0, i32 0
|
|
789 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
790 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
|
|
791 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[__P0_I]], i32 0, i32 0
|
|
792 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
793 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
|
|
794 // CHECK-NEXT: [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[B]])
|
|
795 // CHECK-NEXT: ret <16 x i8> [[VTBL2_I]]
|
|
796 //
|
150
|
797 uint8x16_t test_vqtbl2q_u8(uint8x16x2_t a, uint8x16_t b) {
|
|
798 return vqtbl2q_u8(a, b);
|
|
799 }
|
|
800
|
236
|
801 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl3q_u8
|
|
802 // CHECK-SAME: ([3 x <16 x i8>] [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
|
|
803 // CHECK-NEXT: entry:
|
|
804 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X16X3_T:%.*]], align 16
|
|
805 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_UINT8X16X3_T]], align 16
|
|
806 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[A]], i32 0, i32 0
|
|
807 // CHECK-NEXT: store [3 x <16 x i8>] [[A_COERCE]], [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
|
|
808 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[A]], i32 0, i32 0
|
|
809 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
|
|
810 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0
|
|
811 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
|
|
812 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0
|
|
813 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
814 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
|
|
815 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0
|
|
816 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
817 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
|
|
818 // CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0
|
|
819 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
|
|
820 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
|
|
821 // CHECK-NEXT: [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[B]])
|
|
822 // CHECK-NEXT: ret <16 x i8> [[VTBL3_I]]
|
|
823 //
|
150
|
824 uint8x16_t test_vqtbl3q_u8(uint8x16x3_t a, uint8x16_t b) {
|
|
825 return vqtbl3q_u8(a, b);
|
|
826 }
|
|
827
|
236
|
828 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl4q_u8
|
|
829 // CHECK-SAME: ([4 x <16 x i8>] [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
|
|
830 // CHECK-NEXT: entry:
|
|
831 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X16X4_T:%.*]], align 16
|
|
832 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_UINT8X16X4_T]], align 16
|
|
833 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[A]], i32 0, i32 0
|
|
834 // CHECK-NEXT: store [4 x <16 x i8>] [[A_COERCE]], [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
|
|
835 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[A]], i32 0, i32 0
|
|
836 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
|
|
837 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0
|
|
838 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
|
|
839 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0
|
|
840 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
841 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
|
|
842 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0
|
|
843 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
844 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
|
|
845 // CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0
|
|
846 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
|
|
847 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
|
|
848 // CHECK-NEXT: [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0
|
|
849 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
|
|
850 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
|
|
851 // CHECK-NEXT: [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[B]])
|
|
852 // CHECK-NEXT: ret <16 x i8> [[VTBL4_I]]
|
|
853 //
|
150
|
854 uint8x16_t test_vqtbl4q_u8(uint8x16x4_t a, uint8x16_t b) {
|
|
855 return vqtbl4q_u8(a, b);
|
|
856 }
|
|
857
|
236
|
858 // CHECK-LABEL: define {{[^@]+}}@test_vtbx1_u8
|
|
859 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
|
|
860 // CHECK-NEXT: entry:
|
|
861 // CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
862 // CHECK-NEXT: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[C]])
|
|
863 // CHECK-NEXT: [[TMP0:%.*]] = icmp uge <8 x i8> [[C]], <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
|
|
864 // CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
|
|
865 // CHECK-NEXT: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[A]]
|
|
866 // CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i8> [[TMP1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
|
|
867 // CHECK-NEXT: [[TMP4:%.*]] = and <8 x i8> [[TMP3]], [[VTBL11_I]]
|
|
868 // CHECK-NEXT: [[VTBX_I:%.*]] = or <8 x i8> [[TMP2]], [[TMP4]]
|
|
869 // CHECK-NEXT: ret <8 x i8> [[VTBX_I]]
|
|
870 //
|
150
|
871 uint8x8_t test_vtbx1_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
|
|
872 return vtbx1_u8(a, b, c);
|
|
873 }
|
|
874
|
236
|
875 // CHECK-LABEL: define {{[^@]+}}@test_vtbx2_u8
|
|
876 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [2 x <8 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
|
|
877 // CHECK-NEXT: entry:
|
|
878 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X8X2_T:%.*]], align 8
|
|
879 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X8X2_T]], align 8
|
|
880 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X2_T]], %struct.uint8x8x2_t* [[B]], i32 0, i32 0
|
|
881 // CHECK-NEXT: store [2 x <8 x i8>] [[B_COERCE]], [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
|
|
882 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X2_T]], %struct.uint8x8x2_t* [[B]], i32 0, i32 0
|
|
883 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <8 x i8>], [2 x <8 x i8>]* [[COERCE_DIVE1]], align 8
|
|
884 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X2_T]], %struct.uint8x8x2_t* [[__P1_I]], i32 0, i32 0
|
|
885 // CHECK-NEXT: store [2 x <8 x i8>] [[TMP0]], [2 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
|
|
886 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X2_T]], %struct.uint8x8x2_t* [[__P1_I]], i32 0, i32 0
|
|
887 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
888 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
|
|
889 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X2_T]], %struct.uint8x8x2_t* [[__P1_I]], i32 0, i32 0
|
|
890 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
891 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
|
|
892 // CHECK-NEXT: [[VTBX1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
893 // CHECK-NEXT: [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[A]], <16 x i8> [[VTBX1_I]], <8 x i8> [[C]])
|
|
894 // CHECK-NEXT: ret <8 x i8> [[VTBX13_I]]
|
|
895 //
|
150
|
896 uint8x8_t test_vtbx2_u8(uint8x8_t a, uint8x8x2_t b, uint8x8_t c) {
|
|
897 return vtbx2_u8(a, b, c);
|
|
898 }
|
|
899
|
236
|
900 // CHECK-LABEL: define {{[^@]+}}@test_vtbx3_u8
|
|
901 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [3 x <8 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
|
|
902 // CHECK-NEXT: entry:
|
|
903 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X8X3_T:%.*]], align 8
|
|
904 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X8X3_T]], align 8
|
|
905 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], %struct.uint8x8x3_t* [[B]], i32 0, i32 0
|
|
906 // CHECK-NEXT: store [3 x <8 x i8>] [[B_COERCE]], [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
|
|
907 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], %struct.uint8x8x3_t* [[B]], i32 0, i32 0
|
|
908 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <8 x i8>], [3 x <8 x i8>]* [[COERCE_DIVE1]], align 8
|
|
909 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0
|
|
910 // CHECK-NEXT: store [3 x <8 x i8>] [[TMP0]], [3 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
|
|
911 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0
|
|
912 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
913 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
|
|
914 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0
|
|
915 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
916 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
|
|
917 // CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0
|
|
918 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
|
|
919 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
|
|
920 // CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
921 // CHECK-NEXT: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
922 // CHECK-NEXT: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[C]])
|
|
923 // CHECK-NEXT: [[TMP4:%.*]] = icmp uge <8 x i8> [[C]], <i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24>
|
|
924 // CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i8>
|
|
925 // CHECK-NEXT: [[TMP6:%.*]] = and <8 x i8> [[TMP5]], [[A]]
|
|
926 // CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i8> [[TMP5]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
|
|
927 // CHECK-NEXT: [[TMP8:%.*]] = and <8 x i8> [[TMP7]], [[VTBL26_I]]
|
|
928 // CHECK-NEXT: [[VTBX_I:%.*]] = or <8 x i8> [[TMP6]], [[TMP8]]
|
|
929 // CHECK-NEXT: ret <8 x i8> [[VTBX_I]]
|
|
930 //
|
150
|
931 uint8x8_t test_vtbx3_u8(uint8x8_t a, uint8x8x3_t b, uint8x8_t c) {
|
|
932 return vtbx3_u8(a, b, c);
|
|
933 }
|
|
934
|
236
|
935 // CHECK-LABEL: define {{[^@]+}}@test_vtbx4_u8
|
|
936 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [4 x <8 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
|
|
937 // CHECK-NEXT: entry:
|
|
938 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X8X4_T:%.*]], align 8
|
|
939 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X8X4_T]], align 8
|
|
940 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], %struct.uint8x8x4_t* [[B]], i32 0, i32 0
|
|
941 // CHECK-NEXT: store [4 x <8 x i8>] [[B_COERCE]], [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
|
|
942 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], %struct.uint8x8x4_t* [[B]], i32 0, i32 0
|
|
943 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <8 x i8>], [4 x <8 x i8>]* [[COERCE_DIVE1]], align 8
|
|
944 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0
|
|
945 // CHECK-NEXT: store [4 x <8 x i8>] [[TMP0]], [4 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
|
|
946 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0
|
|
947 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
948 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
|
|
949 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0
|
|
950 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
951 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
|
|
952 // CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0
|
|
953 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
|
|
954 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
|
|
955 // CHECK-NEXT: [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0
|
|
956 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i64 0, i64 3
|
|
957 // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
|
|
958 // CHECK-NEXT: [[VTBX2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
959 // CHECK-NEXT: [[VTBX27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
960 // CHECK-NEXT: [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> [[C]])
|
|
961 // CHECK-NEXT: ret <8 x i8> [[VTBX28_I]]
|
|
962 //
|
150
|
963 uint8x8_t test_vtbx4_u8(uint8x8_t a, uint8x8x4_t b, uint8x8_t c) {
|
|
964 return vtbx4_u8(a, b, c);
|
|
965 }
|
|
966
|
236
|
967 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx1_u8
|
|
968 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
|
969 // CHECK-NEXT: entry:
|
|
970 // CHECK-NEXT: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <8 x i8> [[C]])
|
|
971 // CHECK-NEXT: ret <8 x i8> [[VTBX1_I]]
|
|
972 //
|
150
|
973 uint8x8_t test_vqtbx1_u8(uint8x8_t a, uint8x16_t b, uint8x8_t c) {
|
|
974 return vqtbx1_u8(a, b, c);
|
|
975 }
|
|
976
|
236
|
977 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx2_u8
|
|
978 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [2 x <16 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
|
979 // CHECK-NEXT: entry:
|
|
980 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X16X2_T:%.*]], align 16
|
|
981 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X16X2_T]], align 16
|
|
982 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[B]], i32 0, i32 0
|
|
983 // CHECK-NEXT: store [2 x <16 x i8>] [[B_COERCE]], [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
|
|
984 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[B]], i32 0, i32 0
|
|
985 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
|
|
986 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[__P1_I]], i32 0, i32 0
|
|
987 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
|
|
988 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[__P1_I]], i32 0, i32 0
|
|
989 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
990 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
|
|
991 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[__P1_I]], i32 0, i32 0
|
|
992 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
993 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
|
|
994 // CHECK-NEXT: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> [[C]])
|
|
995 // CHECK-NEXT: ret <8 x i8> [[VTBX2_I]]
|
|
996 //
|
150
|
997 uint8x8_t test_vqtbx2_u8(uint8x8_t a, uint8x16x2_t b, uint8x8_t c) {
|
|
998 return vqtbx2_u8(a, b, c);
|
|
999 }
|
|
1000
|
236
|
1001 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx3_u8
|
|
1002 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [3 x <16 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
|
1003 // CHECK-NEXT: entry:
|
|
1004 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X16X3_T:%.*]], align 16
|
|
1005 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X16X3_T]], align 16
|
|
1006 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[B]], i32 0, i32 0
|
|
1007 // CHECK-NEXT: store [3 x <16 x i8>] [[B_COERCE]], [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
|
|
1008 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[B]], i32 0, i32 0
|
|
1009 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
|
|
1010 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P1_I]], i32 0, i32 0
|
|
1011 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
|
|
1012 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P1_I]], i32 0, i32 0
|
|
1013 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
1014 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
|
|
1015 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P1_I]], i32 0, i32 0
|
|
1016 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
1017 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
|
|
1018 // CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P1_I]], i32 0, i32 0
|
|
1019 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
|
|
1020 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
|
|
1021 // CHECK-NEXT: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> [[C]])
|
|
1022 // CHECK-NEXT: ret <8 x i8> [[VTBX3_I]]
|
|
1023 //
|
150
|
1024 uint8x8_t test_vqtbx3_u8(uint8x8_t a, uint8x16x3_t b, uint8x8_t c) {
|
|
1025 return vqtbx3_u8(a, b, c);
|
|
1026 }
|
|
1027
|
236
|
1028 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx4_u8
|
|
1029 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [4 x <16 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
|
1030 // CHECK-NEXT: entry:
|
|
1031 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X16X4_T:%.*]], align 16
|
|
1032 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X16X4_T]], align 16
|
|
1033 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[B]], i32 0, i32 0
|
|
1034 // CHECK-NEXT: store [4 x <16 x i8>] [[B_COERCE]], [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
|
|
1035 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[B]], i32 0, i32 0
|
|
1036 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
|
|
1037 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0
|
|
1038 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
|
|
1039 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0
|
|
1040 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
1041 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
|
|
1042 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0
|
|
1043 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
1044 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
|
|
1045 // CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0
|
|
1046 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
|
|
1047 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
|
|
1048 // CHECK-NEXT: [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0
|
|
1049 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
|
|
1050 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
|
|
1051 // CHECK-NEXT: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> [[C]])
|
|
1052 // CHECK-NEXT: ret <8 x i8> [[VTBX4_I]]
|
|
1053 //
|
150
|
1054 uint8x8_t test_vqtbx4_u8(uint8x8_t a, uint8x16x4_t b, uint8x8_t c) {
|
|
1055 return vqtbx4_u8(a, b, c);
|
|
1056 }
|
|
1057
|
236
|
1058 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx1q_u8
|
|
1059 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
|
1060 // CHECK-NEXT: entry:
|
|
1061 // CHECK-NEXT: [[VTBX1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]])
|
|
1062 // CHECK-NEXT: ret <16 x i8> [[VTBX1_I]]
|
|
1063 //
|
150
|
1064 uint8x16_t test_vqtbx1q_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
|
|
1065 return vqtbx1q_u8(a, b, c);
|
|
1066 }
|
|
1067
|
236
|
1068 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx2q_u8
|
|
1069 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [2 x <16 x i8>] [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
|
1070 // CHECK-NEXT: entry:
|
|
1071 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X16X2_T:%.*]], align 16
|
|
1072 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X16X2_T]], align 16
|
|
1073 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[B]], i32 0, i32 0
|
|
1074 // CHECK-NEXT: store [2 x <16 x i8>] [[B_COERCE]], [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
|
|
1075 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[B]], i32 0, i32 0
|
|
1076 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
|
|
1077 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[__P1_I]], i32 0, i32 0
|
|
1078 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
|
|
1079 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[__P1_I]], i32 0, i32 0
|
|
1080 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
1081 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
|
|
1082 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[__P1_I]], i32 0, i32 0
|
|
1083 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
1084 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
|
|
1085 // CHECK-NEXT: [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[C]])
|
|
1086 // CHECK-NEXT: ret <16 x i8> [[VTBX2_I]]
|
|
1087 //
|
150
|
1088 uint8x16_t test_vqtbx2q_u8(uint8x16_t a, uint8x16x2_t b, uint8x16_t c) {
|
|
1089 return vqtbx2q_u8(a, b, c);
|
|
1090 }
|
|
1091
|
236
|
1092 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx3q_u8
|
|
1093 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [3 x <16 x i8>] [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
|
1094 // CHECK-NEXT: entry:
|
|
1095 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X16X3_T:%.*]], align 16
|
|
1096 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X16X3_T]], align 16
|
|
1097 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[B]], i32 0, i32 0
|
|
1098 // CHECK-NEXT: store [3 x <16 x i8>] [[B_COERCE]], [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
|
|
1099 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[B]], i32 0, i32 0
|
|
1100 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
|
|
1101 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P1_I]], i32 0, i32 0
|
|
1102 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
|
|
1103 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P1_I]], i32 0, i32 0
|
|
1104 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
1105 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
|
|
1106 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P1_I]], i32 0, i32 0
|
|
1107 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
1108 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
|
|
1109 // CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P1_I]], i32 0, i32 0
|
|
1110 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
|
|
1111 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
|
|
1112 // CHECK-NEXT: [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[C]])
|
|
1113 // CHECK-NEXT: ret <16 x i8> [[VTBX3_I]]
|
|
1114 //
|
150
|
1115 uint8x16_t test_vqtbx3q_u8(uint8x16_t a, uint8x16x3_t b, uint8x16_t c) {
|
|
1116 return vqtbx3q_u8(a, b, c);
|
|
1117 }
|
|
1118
|
236
|
1119 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx4q_u8
|
|
1120 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [4 x <16 x i8>] [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
|
1121 // CHECK-NEXT: entry:
|
|
1122 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X16X4_T:%.*]], align 16
|
|
1123 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X16X4_T]], align 16
|
|
1124 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[B]], i32 0, i32 0
|
|
1125 // CHECK-NEXT: store [4 x <16 x i8>] [[B_COERCE]], [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
|
|
1126 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[B]], i32 0, i32 0
|
|
1127 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
|
|
1128 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0
|
|
1129 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
|
|
1130 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0
|
|
1131 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
1132 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
|
|
1133 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0
|
|
1134 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
1135 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
|
|
1136 // CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0
|
|
1137 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
|
|
1138 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
|
|
1139 // CHECK-NEXT: [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0
|
|
1140 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
|
|
1141 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
|
|
1142 // CHECK-NEXT: [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[C]])
|
|
1143 // CHECK-NEXT: ret <16 x i8> [[VTBX4_I]]
|
|
1144 //
|
150
|
1145 uint8x16_t test_vqtbx4q_u8(uint8x16_t a, uint8x16x4_t b, uint8x16_t c) {
|
|
1146 return vqtbx4q_u8(a, b, c);
|
|
1147 }
|
|
1148
|
236
|
1149 // CHECK-LABEL: define {{[^@]+}}@test_vtbl1_p8
|
|
1150 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
|
|
1151 // CHECK-NEXT: entry:
|
|
1152 // CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
1153 // CHECK-NEXT: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[B]])
|
|
1154 // CHECK-NEXT: ret <8 x i8> [[VTBL11_I]]
|
|
1155 //
|
150
|
1156 poly8x8_t test_vtbl1_p8(poly8x8_t a, uint8x8_t b) {
|
|
1157 return vtbl1_p8(a, b);
|
|
1158 }
|
|
1159
|
236
|
1160 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl1_p8
|
|
1161 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
|
|
1162 // CHECK-NEXT: entry:
|
|
1163 // CHECK-NEXT: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[A]], <8 x i8> [[B]])
|
|
1164 // CHECK-NEXT: ret <8 x i8> [[VTBL1_I]]
|
|
1165 //
|
150
|
1166 poly8x8_t test_vqtbl1_p8(poly8x16_t a, uint8x8_t b) {
|
|
1167 return vqtbl1_p8(a, b);
|
|
1168 }
|
|
1169
|
236
|
1170 // CHECK-LABEL: define {{[^@]+}}@test_vtbl2_p8
|
|
1171 // CHECK-SAME: ([2 x <8 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
|
|
1172 // CHECK-NEXT: entry:
|
|
1173 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X8X2_T:%.*]], align 8
|
|
1174 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_POLY8X8X2_T]], align 8
|
|
1175 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X2_T]], %struct.poly8x8x2_t* [[A]], i32 0, i32 0
|
|
1176 // CHECK-NEXT: store [2 x <8 x i8>] [[A_COERCE]], [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
|
|
1177 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X2_T]], %struct.poly8x8x2_t* [[A]], i32 0, i32 0
|
|
1178 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <8 x i8>], [2 x <8 x i8>]* [[COERCE_DIVE1]], align 8
|
|
1179 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X2_T]], %struct.poly8x8x2_t* [[__P0_I]], i32 0, i32 0
|
|
1180 // CHECK-NEXT: store [2 x <8 x i8>] [[TMP0]], [2 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
|
|
1181 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X2_T]], %struct.poly8x8x2_t* [[__P0_I]], i32 0, i32 0
|
|
1182 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
1183 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
|
|
1184 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X2_T]], %struct.poly8x8x2_t* [[__P0_I]], i32 0, i32 0
|
|
1185 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
1186 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
|
|
1187 // CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
1188 // CHECK-NEXT: [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[B]])
|
|
1189 // CHECK-NEXT: ret <8 x i8> [[VTBL13_I]]
|
|
1190 //
|
150
|
1191 poly8x8_t test_vtbl2_p8(poly8x8x2_t a, uint8x8_t b) {
|
|
1192 return vtbl2_p8(a, b);
|
|
1193 }
|
|
1194
|
236
|
1195 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl2_p8
|
|
1196 // CHECK-SAME: ([2 x <16 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
|
|
1197 // CHECK-NEXT: entry:
|
|
1198 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X16X2_T:%.*]], align 16
|
|
1199 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_POLY8X16X2_T]], align 16
|
|
1200 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[A]], i32 0, i32 0
|
|
1201 // CHECK-NEXT: store [2 x <16 x i8>] [[A_COERCE]], [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
|
|
1202 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[A]], i32 0, i32 0
|
|
1203 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
|
|
1204 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[__P0_I]], i32 0, i32 0
|
|
1205 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
|
|
1206 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[__P0_I]], i32 0, i32 0
|
|
1207 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
1208 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
|
|
1209 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[__P0_I]], i32 0, i32 0
|
|
1210 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
1211 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
|
|
1212 // CHECK-NEXT: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> [[B]])
|
|
1213 // CHECK-NEXT: ret <8 x i8> [[VTBL2_I]]
|
|
1214 //
|
150
|
1215 poly8x8_t test_vqtbl2_p8(poly8x16x2_t a, uint8x8_t b) {
|
|
1216 return vqtbl2_p8(a, b);
|
|
1217 }
|
|
1218
|
236
|
1219 // CHECK-LABEL: define {{[^@]+}}@test_vtbl3_p8
|
|
1220 // CHECK-SAME: ([3 x <8 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
|
|
1221 // CHECK-NEXT: entry:
|
|
1222 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X8X3_T:%.*]], align 8
|
|
1223 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_POLY8X8X3_T]], align 8
|
|
1224 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], %struct.poly8x8x3_t* [[A]], i32 0, i32 0
|
|
1225 // CHECK-NEXT: store [3 x <8 x i8>] [[A_COERCE]], [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
|
|
1226 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], %struct.poly8x8x3_t* [[A]], i32 0, i32 0
|
|
1227 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <8 x i8>], [3 x <8 x i8>]* [[COERCE_DIVE1]], align 8
|
|
1228 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0
|
|
1229 // CHECK-NEXT: store [3 x <8 x i8>] [[TMP0]], [3 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
|
|
1230 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0
|
|
1231 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
1232 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
|
|
1233 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0
|
|
1234 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
1235 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
|
|
1236 // CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0
|
|
1237 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
|
|
1238 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
|
|
1239 // CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
1240 // CHECK-NEXT: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
1241 // CHECK-NEXT: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[B]])
|
|
1242 // CHECK-NEXT: ret <8 x i8> [[VTBL26_I]]
|
|
1243 //
|
150
|
1244 poly8x8_t test_vtbl3_p8(poly8x8x3_t a, uint8x8_t b) {
|
|
1245 return vtbl3_p8(a, b);
|
|
1246 }
|
|
1247
|
236
|
1248 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl3_p8
|
|
1249 // CHECK-SAME: ([3 x <16 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
|
|
1250 // CHECK-NEXT: entry:
|
|
1251 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X16X3_T:%.*]], align 16
|
|
1252 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_POLY8X16X3_T]], align 16
|
|
1253 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[A]], i32 0, i32 0
|
|
1254 // CHECK-NEXT: store [3 x <16 x i8>] [[A_COERCE]], [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
|
|
1255 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[A]], i32 0, i32 0
|
|
1256 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
|
|
1257 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0
|
|
1258 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
|
|
1259 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0
|
|
1260 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
1261 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
|
|
1262 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0
|
|
1263 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
1264 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
|
|
1265 // CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0
|
|
1266 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
|
|
1267 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
|
|
1268 // CHECK-NEXT: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> [[B]])
|
|
1269 // CHECK-NEXT: ret <8 x i8> [[VTBL3_I]]
|
|
1270 //
|
150
|
1271 poly8x8_t test_vqtbl3_p8(poly8x16x3_t a, uint8x8_t b) {
|
|
1272 return vqtbl3_p8(a, b);
|
|
1273 }
|
|
1274
|
236
|
1275 // CHECK-LABEL: define {{[^@]+}}@test_vtbl4_p8
|
|
1276 // CHECK-SAME: ([4 x <8 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
|
|
1277 // CHECK-NEXT: entry:
|
|
1278 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X8X4_T:%.*]], align 8
|
|
1279 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_POLY8X8X4_T]], align 8
|
|
1280 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], %struct.poly8x8x4_t* [[A]], i32 0, i32 0
|
|
1281 // CHECK-NEXT: store [4 x <8 x i8>] [[A_COERCE]], [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
|
|
1282 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], %struct.poly8x8x4_t* [[A]], i32 0, i32 0
|
|
1283 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <8 x i8>], [4 x <8 x i8>]* [[COERCE_DIVE1]], align 8
|
|
1284 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0
|
|
1285 // CHECK-NEXT: store [4 x <8 x i8>] [[TMP0]], [4 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
|
|
1286 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0
|
|
1287 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
1288 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
|
|
1289 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0
|
|
1290 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
1291 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
|
|
1292 // CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0
|
|
1293 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
|
|
1294 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
|
|
1295 // CHECK-NEXT: [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0
|
|
1296 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i64 0, i64 3
|
|
1297 // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
|
|
1298 // CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
1299 // CHECK-NEXT: [[VTBL27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
1300 // CHECK-NEXT: [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> [[B]])
|
|
1301 // CHECK-NEXT: ret <8 x i8> [[VTBL28_I]]
|
|
1302 //
|
150
|
1303 poly8x8_t test_vtbl4_p8(poly8x8x4_t a, uint8x8_t b) {
|
|
1304 return vtbl4_p8(a, b);
|
|
1305 }
|
|
1306
|
236
|
1307 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl4_p8
|
|
1308 // CHECK-SAME: ([4 x <16 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
|
|
1309 // CHECK-NEXT: entry:
|
|
1310 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X16X4_T:%.*]], align 16
|
|
1311 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_POLY8X16X4_T]], align 16
|
|
1312 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[A]], i32 0, i32 0
|
|
1313 // CHECK-NEXT: store [4 x <16 x i8>] [[A_COERCE]], [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
|
|
1314 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[A]], i32 0, i32 0
|
|
1315 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
|
|
1316 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0
|
|
1317 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
|
|
1318 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0
|
|
1319 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
1320 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
|
|
1321 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0
|
|
1322 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
1323 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
|
|
1324 // CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0
|
|
1325 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
|
|
1326 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
|
|
1327 // CHECK-NEXT: [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0
|
|
1328 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
|
|
1329 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
|
|
1330 // CHECK-NEXT: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> [[B]])
|
|
1331 // CHECK-NEXT: ret <8 x i8> [[VTBL4_I]]
|
|
1332 //
|
150
|
1333 poly8x8_t test_vqtbl4_p8(poly8x16x4_t a, uint8x8_t b) {
|
|
1334 return vqtbl4_p8(a, b);
|
|
1335 }
|
|
1336
|
236
|
1337 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl1q_p8
|
|
1338 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
|
|
1339 // CHECK-NEXT: entry:
|
|
1340 // CHECK-NEXT: [[VTBL1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
|
|
1341 // CHECK-NEXT: ret <16 x i8> [[VTBL1_I]]
|
|
1342 //
|
150
|
1343 poly8x16_t test_vqtbl1q_p8(poly8x16_t a, uint8x16_t b) {
|
|
1344 return vqtbl1q_p8(a, b);
|
|
1345 }
|
|
1346
|
236
|
1347 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl2q_p8
|
|
1348 // CHECK-SAME: ([2 x <16 x i8>] [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
|
|
1349 // CHECK-NEXT: entry:
|
|
1350 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X16X2_T:%.*]], align 16
|
|
1351 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_POLY8X16X2_T]], align 16
|
|
1352 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[A]], i32 0, i32 0
|
|
1353 // CHECK-NEXT: store [2 x <16 x i8>] [[A_COERCE]], [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
|
|
1354 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[A]], i32 0, i32 0
|
|
1355 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
|
|
1356 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[__P0_I]], i32 0, i32 0
|
|
1357 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
|
|
1358 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[__P0_I]], i32 0, i32 0
|
|
1359 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
1360 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
|
|
1361 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[__P0_I]], i32 0, i32 0
|
|
1362 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
1363 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
|
|
1364 // CHECK-NEXT: [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[B]])
|
|
1365 // CHECK-NEXT: ret <16 x i8> [[VTBL2_I]]
|
|
1366 //
|
150
|
1367 poly8x16_t test_vqtbl2q_p8(poly8x16x2_t a, uint8x16_t b) {
|
|
1368 return vqtbl2q_p8(a, b);
|
|
1369 }
|
|
1370
|
236
|
1371 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl3q_p8
|
|
1372 // CHECK-SAME: ([3 x <16 x i8>] [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
|
|
1373 // CHECK-NEXT: entry:
|
|
1374 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X16X3_T:%.*]], align 16
|
|
1375 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_POLY8X16X3_T]], align 16
|
|
1376 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[A]], i32 0, i32 0
|
|
1377 // CHECK-NEXT: store [3 x <16 x i8>] [[A_COERCE]], [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
|
|
1378 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[A]], i32 0, i32 0
|
|
1379 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
|
|
1380 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0
|
|
1381 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
|
|
1382 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0
|
|
1383 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
1384 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
|
|
1385 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0
|
|
1386 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
1387 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
|
|
1388 // CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0
|
|
1389 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
|
|
1390 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
|
|
1391 // CHECK-NEXT: [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[B]])
|
|
1392 // CHECK-NEXT: ret <16 x i8> [[VTBL3_I]]
|
|
1393 //
|
150
|
1394 poly8x16_t test_vqtbl3q_p8(poly8x16x3_t a, uint8x16_t b) {
|
|
1395 return vqtbl3q_p8(a, b);
|
|
1396 }
|
|
1397
|
236
|
1398 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl4q_p8
|
|
1399 // CHECK-SAME: ([4 x <16 x i8>] [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
|
|
1400 // CHECK-NEXT: entry:
|
|
1401 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X16X4_T:%.*]], align 16
|
|
1402 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_POLY8X16X4_T]], align 16
|
|
1403 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[A]], i32 0, i32 0
|
|
1404 // CHECK-NEXT: store [4 x <16 x i8>] [[A_COERCE]], [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
|
|
1405 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[A]], i32 0, i32 0
|
|
1406 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
|
|
1407 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0
|
|
1408 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
|
|
1409 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0
|
|
1410 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
1411 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
|
|
1412 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0
|
|
1413 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
1414 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
|
|
1415 // CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0
|
|
1416 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
|
|
1417 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
|
|
1418 // CHECK-NEXT: [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0
|
|
1419 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
|
|
1420 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
|
|
1421 // CHECK-NEXT: [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[B]])
|
|
1422 // CHECK-NEXT: ret <16 x i8> [[VTBL4_I]]
|
|
1423 //
|
150
|
1424 poly8x16_t test_vqtbl4q_p8(poly8x16x4_t a, uint8x16_t b) {
|
|
1425 return vqtbl4q_p8(a, b);
|
|
1426 }
|
|
1427
|
236
|
1428 // CHECK-LABEL: define {{[^@]+}}@test_vtbx1_p8
|
|
1429 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
|
|
1430 // CHECK-NEXT: entry:
|
|
1431 // CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
1432 // CHECK-NEXT: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[C]])
|
|
1433 // CHECK-NEXT: [[TMP0:%.*]] = icmp uge <8 x i8> [[C]], <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
|
|
1434 // CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
|
|
1435 // CHECK-NEXT: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[A]]
|
|
1436 // CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i8> [[TMP1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
|
|
1437 // CHECK-NEXT: [[TMP4:%.*]] = and <8 x i8> [[TMP3]], [[VTBL11_I]]
|
|
1438 // CHECK-NEXT: [[VTBX_I:%.*]] = or <8 x i8> [[TMP2]], [[TMP4]]
|
|
1439 // CHECK-NEXT: ret <8 x i8> [[VTBX_I]]
|
|
1440 //
|
150
|
1441 poly8x8_t test_vtbx1_p8(poly8x8_t a, poly8x8_t b, uint8x8_t c) {
|
|
1442 return vtbx1_p8(a, b, c);
|
|
1443 }
|
|
1444
|
236
|
1445 // CHECK-LABEL: define {{[^@]+}}@test_vtbx2_p8
|
|
1446 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [2 x <8 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
|
|
1447 // CHECK-NEXT: entry:
|
|
1448 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X8X2_T:%.*]], align 8
|
|
1449 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X8X2_T]], align 8
|
|
1450 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X2_T]], %struct.poly8x8x2_t* [[B]], i32 0, i32 0
|
|
1451 // CHECK-NEXT: store [2 x <8 x i8>] [[B_COERCE]], [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
|
|
1452 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X2_T]], %struct.poly8x8x2_t* [[B]], i32 0, i32 0
|
|
1453 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <8 x i8>], [2 x <8 x i8>]* [[COERCE_DIVE1]], align 8
|
|
1454 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X2_T]], %struct.poly8x8x2_t* [[__P1_I]], i32 0, i32 0
|
|
1455 // CHECK-NEXT: store [2 x <8 x i8>] [[TMP0]], [2 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
|
|
1456 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X2_T]], %struct.poly8x8x2_t* [[__P1_I]], i32 0, i32 0
|
|
1457 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
1458 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
|
|
1459 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X2_T]], %struct.poly8x8x2_t* [[__P1_I]], i32 0, i32 0
|
|
1460 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
1461 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
|
|
1462 // CHECK-NEXT: [[VTBX1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
1463 // CHECK-NEXT: [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[A]], <16 x i8> [[VTBX1_I]], <8 x i8> [[C]])
|
|
1464 // CHECK-NEXT: ret <8 x i8> [[VTBX13_I]]
|
|
1465 //
|
150
|
1466 poly8x8_t test_vtbx2_p8(poly8x8_t a, poly8x8x2_t b, uint8x8_t c) {
|
|
1467 return vtbx2_p8(a, b, c);
|
|
1468 }
|
|
1469
|
236
|
1470 // CHECK-LABEL: define {{[^@]+}}@test_vtbx3_p8
|
|
1471 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [3 x <8 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
|
|
1472 // CHECK-NEXT: entry:
|
|
1473 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X8X3_T:%.*]], align 8
|
|
1474 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X8X3_T]], align 8
|
|
1475 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], %struct.poly8x8x3_t* [[B]], i32 0, i32 0
|
|
1476 // CHECK-NEXT: store [3 x <8 x i8>] [[B_COERCE]], [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
|
|
1477 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], %struct.poly8x8x3_t* [[B]], i32 0, i32 0
|
|
1478 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <8 x i8>], [3 x <8 x i8>]* [[COERCE_DIVE1]], align 8
|
|
1479 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0
|
|
1480 // CHECK-NEXT: store [3 x <8 x i8>] [[TMP0]], [3 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
|
|
1481 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0
|
|
1482 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
1483 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
|
|
1484 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0
|
|
1485 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
1486 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
|
|
1487 // CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0
|
|
1488 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
|
|
1489 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
|
|
1490 // CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
1491 // CHECK-NEXT: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
1492 // CHECK-NEXT: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[C]])
|
|
1493 // CHECK-NEXT: [[TMP4:%.*]] = icmp uge <8 x i8> [[C]], <i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24>
|
|
1494 // CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i8>
|
|
1495 // CHECK-NEXT: [[TMP6:%.*]] = and <8 x i8> [[TMP5]], [[A]]
|
|
1496 // CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i8> [[TMP5]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
|
|
1497 // CHECK-NEXT: [[TMP8:%.*]] = and <8 x i8> [[TMP7]], [[VTBL26_I]]
|
|
1498 // CHECK-NEXT: [[VTBX_I:%.*]] = or <8 x i8> [[TMP6]], [[TMP8]]
|
|
1499 // CHECK-NEXT: ret <8 x i8> [[VTBX_I]]
|
|
1500 //
|
150
|
1501 poly8x8_t test_vtbx3_p8(poly8x8_t a, poly8x8x3_t b, uint8x8_t c) {
|
|
1502 return vtbx3_p8(a, b, c);
|
|
1503 }
|
|
1504
|
236
|
1505 // CHECK-LABEL: define {{[^@]+}}@test_vtbx4_p8
|
|
1506 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [4 x <8 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
|
|
1507 // CHECK-NEXT: entry:
|
|
1508 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X8X4_T:%.*]], align 8
|
|
1509 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X8X4_T]], align 8
|
|
1510 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], %struct.poly8x8x4_t* [[B]], i32 0, i32 0
|
|
1511 // CHECK-NEXT: store [4 x <8 x i8>] [[B_COERCE]], [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
|
|
1512 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], %struct.poly8x8x4_t* [[B]], i32 0, i32 0
|
|
1513 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <8 x i8>], [4 x <8 x i8>]* [[COERCE_DIVE1]], align 8
|
|
1514 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0
|
|
1515 // CHECK-NEXT: store [4 x <8 x i8>] [[TMP0]], [4 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
|
|
1516 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0
|
|
1517 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
1518 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
|
|
1519 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0
|
|
1520 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
1521 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
|
|
1522 // CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0
|
|
1523 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
|
|
1524 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
|
|
1525 // CHECK-NEXT: [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0
|
|
1526 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i64 0, i64 3
|
|
1527 // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
|
|
1528 // CHECK-NEXT: [[VTBX2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
1529 // CHECK-NEXT: [[VTBX27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
1530 // CHECK-NEXT: [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> [[C]])
|
|
1531 // CHECK-NEXT: ret <8 x i8> [[VTBX28_I]]
|
|
1532 //
|
150
|
1533 poly8x8_t test_vtbx4_p8(poly8x8_t a, poly8x8x4_t b, uint8x8_t c) {
|
|
1534 return vtbx4_p8(a, b, c);
|
|
1535 }
|
|
1536
|
236
|
1537 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx1_p8
|
|
1538 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
|
1539 // CHECK-NEXT: entry:
|
|
1540 // CHECK-NEXT: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <8 x i8> [[C]])
|
|
1541 // CHECK-NEXT: ret <8 x i8> [[VTBX1_I]]
|
|
1542 //
|
150
|
1543 poly8x8_t test_vqtbx1_p8(poly8x8_t a, uint8x16_t b, uint8x8_t c) {
|
|
1544 return vqtbx1_p8(a, b, c);
|
|
1545 }
|
|
1546
|
236
|
1547 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx2_p8
|
|
1548 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [2 x <16 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
|
1549 // CHECK-NEXT: entry:
|
|
1550 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X16X2_T:%.*]], align 16
|
|
1551 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X16X2_T]], align 16
|
|
1552 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[B]], i32 0, i32 0
|
|
1553 // CHECK-NEXT: store [2 x <16 x i8>] [[B_COERCE]], [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
|
|
1554 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[B]], i32 0, i32 0
|
|
1555 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
|
|
1556 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[__P1_I]], i32 0, i32 0
|
|
1557 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
|
|
1558 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[__P1_I]], i32 0, i32 0
|
|
1559 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
1560 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
|
|
1561 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[__P1_I]], i32 0, i32 0
|
|
1562 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
1563 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
|
|
1564 // CHECK-NEXT: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> [[C]])
|
|
1565 // CHECK-NEXT: ret <8 x i8> [[VTBX2_I]]
|
|
1566 //
|
150
|
1567 poly8x8_t test_vqtbx2_p8(poly8x8_t a, poly8x16x2_t b, uint8x8_t c) {
|
|
1568 return vqtbx2_p8(a, b, c);
|
|
1569 }
|
|
1570
|
236
|
1571 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx3_p8
|
|
1572 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [3 x <16 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
|
1573 // CHECK-NEXT: entry:
|
|
1574 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X16X3_T:%.*]], align 16
|
|
1575 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X16X3_T]], align 16
|
|
1576 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[B]], i32 0, i32 0
|
|
1577 // CHECK-NEXT: store [3 x <16 x i8>] [[B_COERCE]], [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
|
|
1578 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[B]], i32 0, i32 0
|
|
1579 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
|
|
1580 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P1_I]], i32 0, i32 0
|
|
1581 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
|
|
1582 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P1_I]], i32 0, i32 0
|
|
1583 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
1584 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
|
|
1585 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P1_I]], i32 0, i32 0
|
|
1586 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
1587 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
|
|
1588 // CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P1_I]], i32 0, i32 0
|
|
1589 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
|
|
1590 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
|
|
1591 // CHECK-NEXT: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> [[C]])
|
|
1592 // CHECK-NEXT: ret <8 x i8> [[VTBX3_I]]
|
|
1593 //
|
150
|
1594 poly8x8_t test_vqtbx3_p8(poly8x8_t a, poly8x16x3_t b, uint8x8_t c) {
|
|
1595 return vqtbx3_p8(a, b, c);
|
|
1596 }
|
|
1597
|
236
|
1598 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx4_p8
|
|
1599 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [4 x <16 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
|
1600 // CHECK-NEXT: entry:
|
|
1601 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X16X4_T:%.*]], align 16
|
|
1602 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X16X4_T]], align 16
|
|
1603 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[B]], i32 0, i32 0
|
|
1604 // CHECK-NEXT: store [4 x <16 x i8>] [[B_COERCE]], [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
|
|
1605 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[B]], i32 0, i32 0
|
|
1606 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
|
|
1607 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0
|
|
1608 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
|
|
1609 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0
|
|
1610 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
1611 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
|
|
1612 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0
|
|
1613 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
1614 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
|
|
1615 // CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0
|
|
1616 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
|
|
1617 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
|
|
1618 // CHECK-NEXT: [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0
|
|
1619 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
|
|
1620 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
|
|
1621 // CHECK-NEXT: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> [[C]])
|
|
1622 // CHECK-NEXT: ret <8 x i8> [[VTBX4_I]]
|
|
1623 //
|
150
|
1624 poly8x8_t test_vqtbx4_p8(poly8x8_t a, poly8x16x4_t b, uint8x8_t c) {
|
|
1625 return vqtbx4_p8(a, b, c);
|
|
1626 }
|
|
1627
|
236
|
1628 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx1q_p8
|
|
1629 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
|
1630 // CHECK-NEXT: entry:
|
|
1631 // CHECK-NEXT: [[VTBX1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]])
|
|
1632 // CHECK-NEXT: ret <16 x i8> [[VTBX1_I]]
|
|
1633 //
|
150
|
1634 poly8x16_t test_vqtbx1q_p8(poly8x16_t a, uint8x16_t b, uint8x16_t c) {
|
|
1635 return vqtbx1q_p8(a, b, c);
|
|
1636 }
|
|
1637
|
236
|
1638 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx2q_p8
|
|
1639 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [2 x <16 x i8>] [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
|
1640 // CHECK-NEXT: entry:
|
|
1641 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X16X2_T:%.*]], align 16
|
|
1642 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X16X2_T]], align 16
|
|
1643 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[B]], i32 0, i32 0
|
|
1644 // CHECK-NEXT: store [2 x <16 x i8>] [[B_COERCE]], [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
|
|
1645 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[B]], i32 0, i32 0
|
|
1646 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
|
|
1647 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[__P1_I]], i32 0, i32 0
|
|
1648 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
|
|
1649 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[__P1_I]], i32 0, i32 0
|
|
1650 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
1651 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
|
|
1652 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[__P1_I]], i32 0, i32 0
|
|
1653 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
1654 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
|
|
1655 // CHECK-NEXT: [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[C]])
|
|
1656 // CHECK-NEXT: ret <16 x i8> [[VTBX2_I]]
|
|
1657 //
|
150
|
1658 poly8x16_t test_vqtbx2q_p8(poly8x16_t a, poly8x16x2_t b, uint8x16_t c) {
|
|
1659 return vqtbx2q_p8(a, b, c);
|
|
1660 }
|
|
1661
|
236
|
1662 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx3q_p8
|
|
1663 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [3 x <16 x i8>] [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
|
1664 // CHECK-NEXT: entry:
|
|
1665 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X16X3_T:%.*]], align 16
|
|
1666 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X16X3_T]], align 16
|
|
1667 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[B]], i32 0, i32 0
|
|
1668 // CHECK-NEXT: store [3 x <16 x i8>] [[B_COERCE]], [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
|
|
1669 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[B]], i32 0, i32 0
|
|
1670 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
|
|
1671 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P1_I]], i32 0, i32 0
|
|
1672 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
|
|
1673 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P1_I]], i32 0, i32 0
|
|
1674 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
1675 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
|
|
1676 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P1_I]], i32 0, i32 0
|
|
1677 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
1678 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
|
|
1679 // CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P1_I]], i32 0, i32 0
|
|
1680 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
|
|
1681 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
|
|
1682 // CHECK-NEXT: [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[C]])
|
|
1683 // CHECK-NEXT: ret <16 x i8> [[VTBX3_I]]
|
|
1684 //
|
150
|
1685 poly8x16_t test_vqtbx3q_p8(poly8x16_t a, poly8x16x3_t b, uint8x16_t c) {
|
|
1686 return vqtbx3q_p8(a, b, c);
|
|
1687 }
|
|
1688
|
236
|
1689 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx4q_p8
|
|
1690 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [4 x <16 x i8>] [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
|
|
1691 // CHECK-NEXT: entry:
|
|
1692 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X16X4_T:%.*]], align 16
|
|
1693 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X16X4_T]], align 16
|
|
1694 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[B]], i32 0, i32 0
|
|
1695 // CHECK-NEXT: store [4 x <16 x i8>] [[B_COERCE]], [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
|
|
1696 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[B]], i32 0, i32 0
|
|
1697 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
|
|
1698 // CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0
|
|
1699 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
|
|
1700 // CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0
|
|
1701 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
|
|
1702 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
|
|
1703 // CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0
|
|
1704 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
|
|
1705 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
|
|
1706 // CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0
|
|
1707 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
|
|
1708 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
|
|
1709 // CHECK-NEXT: [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0
|
|
1710 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
|
|
1711 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
|
|
1712 // CHECK-NEXT: [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[C]])
|
|
1713 // CHECK-NEXT: ret <16 x i8> [[VTBX4_I]]
|
|
1714 //
|
150
|
1715 poly8x16_t test_vqtbx4q_p8(poly8x16_t a, poly8x16x4_t b, uint8x16_t c) {
|
|
1716 return vqtbx4q_p8(a, b, c);
|
|
1717 }
|
|
1718
|