95
|
1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw --show-mc-encoding| FileCheck %s
|
83
|
2
|
|
3 define i64 @test_pcmpeq_b(<64 x i8> %a, <64 x i8> %b) {
|
|
4 ; CHECK-LABEL: test_pcmpeq_b
|
|
5 ; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 ##
|
|
6 %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1)
|
|
7 ret i64 %res
|
|
8 }
|
|
9
|
|
10 define i64 @test_mask_pcmpeq_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
|
|
11 ; CHECK-LABEL: test_mask_pcmpeq_b
|
|
12 ; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} ##
|
|
13 %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
|
|
14 ret i64 %res
|
|
15 }
|
|
16
|
|
17 declare i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8>, <64 x i8>, i64)
|
|
18
|
|
19 define i32 @test_pcmpeq_w(<32 x i16> %a, <32 x i16> %b) {
|
|
20 ; CHECK-LABEL: test_pcmpeq_w
|
|
21 ; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 ##
|
|
22 %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1)
|
|
23 ret i32 %res
|
|
24 }
|
|
25
|
|
26 define i32 @test_mask_pcmpeq_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
|
|
27 ; CHECK-LABEL: test_mask_pcmpeq_w
|
|
28 ; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} ##
|
|
29 %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask)
|
|
30 ret i32 %res
|
|
31 }
|
|
32
|
|
33 declare i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16>, <32 x i16>, i32)
|
|
34
|
|
35 define i64 @test_pcmpgt_b(<64 x i8> %a, <64 x i8> %b) {
|
|
36 ; CHECK-LABEL: test_pcmpgt_b
|
|
37 ; CHECK: vpcmpgtb %zmm1, %zmm0, %k0 ##
|
|
38 %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1)
|
|
39 ret i64 %res
|
|
40 }
|
|
41
|
|
42 define i64 @test_mask_pcmpgt_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
|
|
43 ; CHECK-LABEL: test_mask_pcmpgt_b
|
|
44 ; CHECK: vpcmpgtb %zmm1, %zmm0, %k0 {%k1} ##
|
|
45 %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
|
|
46 ret i64 %res
|
|
47 }
|
|
48
|
|
49 declare i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8>, <64 x i8>, i64)
|
|
50
|
|
51 define i32 @test_pcmpgt_w(<32 x i16> %a, <32 x i16> %b) {
|
|
52 ; CHECK-LABEL: test_pcmpgt_w
|
|
53 ; CHECK: vpcmpgtw %zmm1, %zmm0, %k0 ##
|
|
54 %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1)
|
|
55 ret i32 %res
|
|
56 }
|
|
57
|
|
58 define i32 @test_mask_pcmpgt_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
|
|
59 ; CHECK-LABEL: test_mask_pcmpgt_w
|
|
60 ; CHECK: vpcmpgtw %zmm1, %zmm0, %k0 {%k1} ##
|
|
61 %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask)
|
|
62 ret i32 %res
|
|
63 }
|
|
64
|
|
65 declare i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16>, <32 x i16>, i32)
|
|
66
|
|
67 define <8 x i64> @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
|
|
68 ; CHECK_LABEL: test_cmp_b_512
|
|
69 ; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 ##
|
95
|
70 %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
|
83
|
71 %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0
|
|
72 ; CHECK: vpcmpltb %zmm1, %zmm0, %k0 ##
|
95
|
73 %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1)
|
83
|
74 %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1
|
|
75 ; CHECK: vpcmpleb %zmm1, %zmm0, %k0 ##
|
95
|
76 %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1)
|
83
|
77 %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2
|
|
78 ; CHECK: vpcmpunordb %zmm1, %zmm0, %k0 ##
|
95
|
79 %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1)
|
83
|
80 %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3
|
|
81 ; CHECK: vpcmpneqb %zmm1, %zmm0, %k0 ##
|
95
|
82 %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1)
|
83
|
83 %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4
|
|
84 ; CHECK: vpcmpnltb %zmm1, %zmm0, %k0 ##
|
95
|
85 %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1)
|
83
|
86 %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5
|
|
87 ; CHECK: vpcmpnleb %zmm1, %zmm0, %k0 ##
|
95
|
88 %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1)
|
83
|
89 %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6
|
|
90 ; CHECK: vpcmpordb %zmm1, %zmm0, %k0 ##
|
95
|
91 %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1)
|
83
|
92 %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7
|
|
93 ret <8 x i64> %vec7
|
|
94 }
|
|
95
|
|
96 define <8 x i64> @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
|
|
97 ; CHECK_LABEL: test_mask_cmp_b_512
|
|
98 ; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} ##
|
95
|
99 %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
|
83
|
100 %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0
|
|
101 ; CHECK: vpcmpltb %zmm1, %zmm0, %k0 {%k1} ##
|
95
|
102 %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask)
|
83
|
103 %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1
|
|
104 ; CHECK: vpcmpleb %zmm1, %zmm0, %k0 {%k1} ##
|
95
|
105 %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask)
|
83
|
106 %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2
|
|
107 ; CHECK: vpcmpunordb %zmm1, %zmm0, %k0 {%k1} ##
|
95
|
108 %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask)
|
83
|
109 %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3
|
|
110 ; CHECK: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} ##
|
95
|
111 %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask)
|
83
|
112 %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4
|
|
113 ; CHECK: vpcmpnltb %zmm1, %zmm0, %k0 {%k1} ##
|
95
|
114 %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask)
|
83
|
115 %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5
|
|
116 ; CHECK: vpcmpnleb %zmm1, %zmm0, %k0 {%k1} ##
|
95
|
117 %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask)
|
83
|
118 %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6
|
|
119 ; CHECK: vpcmpordb %zmm1, %zmm0, %k0 {%k1} ##
|
95
|
120 %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask)
|
83
|
121 %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7
|
|
122 ret <8 x i64> %vec7
|
|
123 }
|
|
124
|
95
|
125 declare i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone
|
83
|
126
|
|
127 define <8 x i64> @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
|
|
128 ; CHECK_LABEL: test_ucmp_b_512
|
|
129 ; CHECK: vpcmpequb %zmm1, %zmm0, %k0 ##
|
95
|
130 %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
|
83
|
131 %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0
|
|
132 ; CHECK: vpcmpltub %zmm1, %zmm0, %k0 ##
|
95
|
133 %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1)
|
83
|
134 %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1
|
|
135 ; CHECK: vpcmpleub %zmm1, %zmm0, %k0 ##
|
95
|
136 %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1)
|
83
|
137 %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2
|
|
138 ; CHECK: vpcmpunordub %zmm1, %zmm0, %k0 ##
|
95
|
139 %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1)
|
83
|
140 %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3
|
|
141 ; CHECK: vpcmpnequb %zmm1, %zmm0, %k0 ##
|
95
|
142 %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1)
|
83
|
143 %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4
|
|
144 ; CHECK: vpcmpnltub %zmm1, %zmm0, %k0 ##
|
95
|
145 %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1)
|
83
|
146 %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5
|
|
147 ; CHECK: vpcmpnleub %zmm1, %zmm0, %k0 ##
|
95
|
148 %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1)
|
83
|
149 %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6
|
|
150 ; CHECK: vpcmpordub %zmm1, %zmm0, %k0 ##
|
95
|
151 %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1)
|
83
|
152 %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7
|
|
153 ret <8 x i64> %vec7
|
|
154 }
|
|
155
|
|
156 define <8 x i64> @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
|
|
157 ; CHECK_LABEL: test_mask_ucmp_b_512
|
|
158 ; CHECK: vpcmpequb %zmm1, %zmm0, %k0 {%k1} ##
|
95
|
159 %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
|
83
|
160 %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0
|
|
161 ; CHECK: vpcmpltub %zmm1, %zmm0, %k0 {%k1} ##
|
95
|
162 %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask)
|
83
|
163 %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1
|
|
164 ; CHECK: vpcmpleub %zmm1, %zmm0, %k0 {%k1} ##
|
95
|
165 %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask)
|
83
|
166 %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2
|
|
167 ; CHECK: vpcmpunordub %zmm1, %zmm0, %k0 {%k1} ##
|
95
|
168 %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask)
|
83
|
169 %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3
|
|
170 ; CHECK: vpcmpnequb %zmm1, %zmm0, %k0 {%k1} ##
|
95
|
171 %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask)
|
83
|
172 %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4
|
|
173 ; CHECK: vpcmpnltub %zmm1, %zmm0, %k0 {%k1} ##
|
95
|
174 %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask)
|
83
|
175 %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5
|
|
176 ; CHECK: vpcmpnleub %zmm1, %zmm0, %k0 {%k1} ##
|
95
|
177 %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask)
|
83
|
178 %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6
|
|
179 ; CHECK: vpcmpordub %zmm1, %zmm0, %k0 {%k1} ##
|
95
|
180 %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask)
|
83
|
181 %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7
|
|
182 ret <8 x i64> %vec7
|
|
183 }
|
|
184
|
95
|
185 declare i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone
|
83
|
186
|
|
187 define <8 x i32> @test_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1) {
|
|
188 ; CHECK_LABEL: test_cmp_w_512
|
|
189 ; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 ##
|
95
|
190 %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1)
|
83
|
191 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
|
|
192 ; CHECK: vpcmpltw %zmm1, %zmm0, %k0 ##
|
95
|
193 %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1)
|
83
|
194 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
|
|
195 ; CHECK: vpcmplew %zmm1, %zmm0, %k0 ##
|
95
|
196 %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1)
|
83
|
197 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
|
|
198 ; CHECK: vpcmpunordw %zmm1, %zmm0, %k0 ##
|
95
|
199 %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1)
|
83
|
200 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
|
|
201 ; CHECK: vpcmpneqw %zmm1, %zmm0, %k0 ##
|
95
|
202 %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1)
|
83
|
203 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
|
|
204 ; CHECK: vpcmpnltw %zmm1, %zmm0, %k0 ##
|
95
|
205 %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1)
|
83
|
206 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
|
|
207 ; CHECK: vpcmpnlew %zmm1, %zmm0, %k0 ##
|
95
|
208 %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1)
|
83
|
209 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
|
|
210 ; CHECK: vpcmpordw %zmm1, %zmm0, %k0 ##
|
95
|
211 %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1)
|
83
|
212 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
|
|
213 ret <8 x i32> %vec7
|
|
214 }
|
|
215
|
|
216 define <8 x i32> @test_mask_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) {
|
|
217 ; CHECK_LABEL: test_mask_cmp_w_512
|
|
218 ; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} ##
|
95
|
219 %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask)
|
83
|
220 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
|
|
221 ; CHECK: vpcmpltw %zmm1, %zmm0, %k0 {%k1} ##
|
95
|
222 %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask)
|
83
|
223 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
|
|
224 ; CHECK: vpcmplew %zmm1, %zmm0, %k0 {%k1} ##
|
95
|
225 %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask)
|
83
|
226 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
|
|
227 ; CHECK: vpcmpunordw %zmm1, %zmm0, %k0 {%k1} ##
|
95
|
228 %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask)
|
83
|
229 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
|
|
230 ; CHECK: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} ##
|
95
|
231 %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask)
|
83
|
232 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
|
|
233 ; CHECK: vpcmpnltw %zmm1, %zmm0, %k0 {%k1} ##
|
95
|
234 %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask)
|
83
|
235 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
|
|
236 ; CHECK: vpcmpnlew %zmm1, %zmm0, %k0 {%k1} ##
|
95
|
237 %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask)
|
83
|
238 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
|
|
239 ; CHECK: vpcmpordw %zmm1, %zmm0, %k0 {%k1} ##
|
95
|
240 %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask)
|
83
|
241 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
|
|
242 ret <8 x i32> %vec7
|
|
243 }
|
|
244
|
95
|
245 declare i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone
|
83
|
246
|
|
247 define <8 x i32> @test_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1) {
|
|
248 ; CHECK_LABEL: test_ucmp_w_512
|
|
249 ; CHECK: vpcmpequw %zmm1, %zmm0, %k0 ##
|
95
|
250 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1)
|
83
|
251 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
|
|
252 ; CHECK: vpcmpltuw %zmm1, %zmm0, %k0 ##
|
95
|
253 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1)
|
83
|
254 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
|
|
255 ; CHECK: vpcmpleuw %zmm1, %zmm0, %k0 ##
|
95
|
256 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1)
|
83
|
257 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
|
|
258 ; CHECK: vpcmpunorduw %zmm1, %zmm0, %k0 ##
|
95
|
259 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1)
|
83
|
260 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
|
|
261 ; CHECK: vpcmpnequw %zmm1, %zmm0, %k0 ##
|
95
|
262 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1)
|
83
|
263 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
|
|
264 ; CHECK: vpcmpnltuw %zmm1, %zmm0, %k0 ##
|
95
|
265 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1)
|
83
|
266 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
|
|
267 ; CHECK: vpcmpnleuw %zmm1, %zmm0, %k0 ##
|
95
|
268 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1)
|
83
|
269 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
|
|
270 ; CHECK: vpcmporduw %zmm1, %zmm0, %k0 ##
|
95
|
271 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1)
|
83
|
272 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
|
|
273 ret <8 x i32> %vec7
|
|
274 }
|
|
275
|
|
276 define <8 x i32> @test_mask_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) {
|
|
277 ; CHECK_LABEL: test_mask_ucmp_w_512
|
|
278 ; CHECK: vpcmpequw %zmm1, %zmm0, %k0 {%k1} ##
|
95
|
279 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask)
|
83
|
280 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
|
|
281 ; CHECK: vpcmpltuw %zmm1, %zmm0, %k0 {%k1} ##
|
95
|
282 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask)
|
83
|
283 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
|
|
284 ; CHECK: vpcmpleuw %zmm1, %zmm0, %k0 {%k1} ##
|
95
|
285 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask)
|
83
|
286 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
|
|
287 ; CHECK: vpcmpunorduw %zmm1, %zmm0, %k0 {%k1} ##
|
95
|
288 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask)
|
83
|
289 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
|
|
290 ; CHECK: vpcmpnequw %zmm1, %zmm0, %k0 {%k1} ##
|
95
|
291 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask)
|
83
|
292 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
|
|
293 ; CHECK: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1} ##
|
95
|
294 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask)
|
83
|
295 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
|
|
296 ; CHECK: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1} ##
|
95
|
297 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask)
|
83
|
298 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
|
|
299 ; CHECK: vpcmporduw %zmm1, %zmm0, %k0 {%k1} ##
|
95
|
300 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask)
|
83
|
301 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
|
|
302 ret <8 x i32> %vec7
|
|
303 }
|
|
304
|
95
|
305 declare i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone
|
83
|
306
|
|
307 declare <64 x i8> @llvm.x86.avx512.mask.blend.b.512(<64 x i8>, <64 x i8>, i64) nounwind readonly
|
|
308
|
|
309 ; CHECK-LABEL: test_x86_mask_blend_w_512
|
|
310 define <32 x i16> @test_x86_mask_blend_w_512(i32 %mask, <32 x i16> %a1, <32 x i16> %a2) {
|
|
311 ; CHECK: vpblendmw
|
|
312 %res = call <32 x i16> @llvm.x86.avx512.mask.blend.w.512(<32 x i16> %a1, <32 x i16> %a2, i32 %mask) ; <<32 x i16>> [#uses=1]
|
|
313 ret <32 x i16> %res
|
|
314 }
|
|
315 declare <32 x i16> @llvm.x86.avx512.mask.blend.w.512(<32 x i16>, <32 x i16>, i32) nounwind readonly
|
|
316
|
95
|
317 ; CHECK-LABEL: test_x86_mask_blend_b_512
|
83
|
318 ; CHECK: vpblendmb
|
95
|
319 define <64 x i8> @test_x86_mask_blend_b_512(i64 %a0, <64 x i8> %a1, <64 x i8> %a2) {
|
|
320 %res = call <64 x i8> @llvm.x86.avx512.mask.blend.b.512(<64 x i8> %a1, <64 x i8> %a2, i64 %a0) ; <<64 x i8>> [#uses=1]
|
|
321 ret <64 x i8> %res
|
|
322 }
|
|
323
|
|
324 define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
|
|
325 ;CHECK-LABEL: test_mask_packs_epi32_rr_512
|
|
326 ;CHECK: vpackssdw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x6b,0xc1]
|
|
327 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
|
|
328 ret <32 x i16> %res
|
|
329 }
|
|
330
|
|
331 define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) {
|
|
332 ;CHECK-LABEL: test_mask_packs_epi32_rrk_512
|
|
333 ;CHECK: vpackssdw %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6b,0xd1]
|
|
334 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
|
|
335 ret <32 x i16> %res
|
|
336 }
|
|
337
|
|
338 define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) {
|
|
339 ;CHECK-LABEL: test_mask_packs_epi32_rrkz_512
|
|
340 ;CHECK: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0xc1]
|
|
341 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
|
|
342 ret <32 x i16> %res
|
|
343 }
|
|
344
|
|
345 define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
|
|
346 ;CHECK-LABEL: test_mask_packs_epi32_rm_512
|
|
347 ;CHECK: vpackssdw (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x6b,0x07]
|
|
348 %b = load <16 x i32>, <16 x i32>* %ptr_b
|
|
349 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
|
|
350 ret <32 x i16> %res
|
|
351 }
|
|
352
|
|
353 define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
|
|
354 ;CHECK-LABEL: test_mask_packs_epi32_rmk_512
|
|
355 ;CHECK: vpackssdw (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6b,0x0f]
|
|
356 %b = load <16 x i32>, <16 x i32>* %ptr_b
|
|
357 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
|
|
358 ret <32 x i16> %res
|
|
359 }
|
|
360
|
|
361 define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) {
|
|
362 ;CHECK-LABEL: test_mask_packs_epi32_rmkz_512
|
|
363 ;CHECK: vpackssdw (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0x07]
|
|
364 %b = load <16 x i32>, <16 x i32>* %ptr_b
|
|
365 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
|
|
366 ret <32 x i16> %res
|
|
367 }
|
|
368
|
|
369 define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
|
|
370 ;CHECK-LABEL: test_mask_packs_epi32_rmb_512
|
|
371 ;CHECK: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0x6b,0x07]
|
|
372 %q = load i32, i32* %ptr_b
|
|
373 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
|
|
374 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
|
|
375 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
|
|
376 ret <32 x i16> %res
|
|
377 }
|
|
378
|
|
379 define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) {
|
|
380 ;CHECK-LABEL: test_mask_packs_epi32_rmbk_512
|
|
381 ;CHECK: vpackssdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0x6b,0x0f]
|
|
382 %q = load i32, i32* %ptr_b
|
|
383 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
|
|
384 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
|
|
385 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
|
|
386 ret <32 x i16> %res
|
|
387 }
|
|
388
|
|
389 define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) {
|
|
390 ;CHECK-LABEL: test_mask_packs_epi32_rmbkz_512
|
|
391 ;CHECK: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0x6b,0x07]
|
|
392 %q = load i32, i32* %ptr_b
|
|
393 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
|
|
394 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
|
|
395 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
|
|
396 ret <32 x i16> %res
|
|
397 }
|
|
398
|
|
399 declare <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32)
|
|
400
|
|
401 define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
|
|
402 ;CHECK-LABEL: test_mask_packs_epi16_rr_512
|
|
403 ;CHECK: vpacksswb %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x63,0xc1]
|
|
404 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
|
|
405 ret <64 x i8> %res
|
|
406 }
|
|
407
|
|
408 define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) {
|
|
409 ;CHECK-LABEL: test_mask_packs_epi16_rrk_512
|
|
410 ;CHECK: vpacksswb %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x63,0xd1]
|
|
411 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
|
|
412 ret <64 x i8> %res
|
|
413 }
|
|
414
|
|
415 define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) {
|
|
416 ;CHECK-LABEL: test_mask_packs_epi16_rrkz_512
|
|
417 ;CHECK: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x63,0xc1]
|
|
418 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
|
|
419 ret <64 x i8> %res
|
|
420 }
|
|
421
|
|
422 define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
|
|
423 ;CHECK-LABEL: test_mask_packs_epi16_rm_512
|
|
424 ;CHECK: vpacksswb (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x63,0x07]
|
|
425 %b = load <32 x i16>, <32 x i16>* %ptr_b
|
|
426 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
|
|
427 ret <64 x i8> %res
|
|
428 }
|
|
429
|
|
430 define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
|
|
431 ;CHECK-LABEL: test_mask_packs_epi16_rmk_512
|
|
432 ;CHECK: vpacksswb (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x63,0x0f]
|
|
433 %b = load <32 x i16>, <32 x i16>* %ptr_b
|
|
434 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
|
|
435 ret <64 x i8> %res
|
|
436 }
|
|
437
|
|
438 define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) {
|
|
439 ;CHECK-LABEL: test_mask_packs_epi16_rmkz_512
|
|
440 ;CHECK: vpacksswb (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x63,0x07]
|
|
441 %b = load <32 x i16>, <32 x i16>* %ptr_b
|
|
442 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
|
|
443 ret <64 x i8> %res
|
|
444 }
|
|
445
|
|
446 declare <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64)
|
|
447
|
|
448
|
|
449 define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
|
|
450 ;CHECK-LABEL: test_mask_packus_epi32_rr_512
|
|
451 ;CHECK: vpackusdw %zmm1, %zmm0, %zmm0
|
|
452 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
|
|
453 ret <32 x i16> %res
|
|
454 }
|
|
455
|
|
456 define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) {
|
|
457 ;CHECK-LABEL: test_mask_packus_epi32_rrk_512
|
|
458 ;CHECK: vpackusdw %zmm1, %zmm0, %zmm2 {%k1}
|
|
459 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
|
|
460 ret <32 x i16> %res
|
|
461 }
|
|
462
|
|
463 define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) {
|
|
464 ;CHECK-LABEL: test_mask_packus_epi32_rrkz_512
|
|
465 ;CHECK: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z}
|
|
466 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
|
|
467 ret <32 x i16> %res
|
|
468 }
|
|
469
|
|
470 define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
|
|
471 ;CHECK-LABEL: test_mask_packus_epi32_rm_512
|
|
472 ;CHECK: vpackusdw (%rdi), %zmm0, %zmm0
|
|
473 %b = load <16 x i32>, <16 x i32>* %ptr_b
|
|
474 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
|
|
475 ret <32 x i16> %res
|
|
476 }
|
|
477
|
|
478 define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
|
|
479 ;CHECK-LABEL: test_mask_packus_epi32_rmk_512
|
|
480 ;CHECK: vpackusdw (%rdi), %zmm0, %zmm1 {%k1}
|
|
481 %b = load <16 x i32>, <16 x i32>* %ptr_b
|
|
482 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
|
|
483 ret <32 x i16> %res
|
|
484 }
|
|
485
|
|
486 define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) {
|
|
487 ;CHECK-LABEL: test_mask_packus_epi32_rmkz_512
|
|
488 ;CHECK: vpackusdw (%rdi), %zmm0, %zmm0 {%k1} {z}
|
|
489 %b = load <16 x i32>, <16 x i32>* %ptr_b
|
|
490 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
|
|
491 ret <32 x i16> %res
|
|
492 }
|
|
493
|
|
494 define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
|
|
495 ;CHECK-LABEL: test_mask_packus_epi32_rmb_512
|
|
496 ;CHECK: vpackusdw (%rdi){1to16}, %zmm0, %zmm0
|
|
497 %q = load i32, i32* %ptr_b
|
|
498 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
|
|
499 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
|
|
500 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
|
|
501 ret <32 x i16> %res
|
|
502 }
|
|
503
|
|
504 define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) {
|
|
505 ;CHECK-LABEL: test_mask_packus_epi32_rmbk_512
|
|
506 ;CHECK: vpackusdw (%rdi){1to16}, %zmm0, %zmm1 {%k1}
|
|
507 %q = load i32, i32* %ptr_b
|
|
508 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
|
|
509 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
|
|
510 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
|
|
511 ret <32 x i16> %res
|
|
512 }
|
|
513
|
|
514 define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) {
|
|
515 ;CHECK-LABEL: test_mask_packus_epi32_rmbkz_512
|
|
516 ;CHECK: vpackusdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z}
|
|
517 %q = load i32, i32* %ptr_b
|
|
518 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
|
|
519 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
|
|
520 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
|
|
521 ret <32 x i16> %res
|
|
522 }
|
|
523
|
|
524 declare <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32)
|
|
525
|
|
526 define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
|
|
527 ;CHECK-LABEL: test_mask_packus_epi16_rr_512
|
|
528 ;CHECK: vpackuswb %zmm1, %zmm0, %zmm0
|
|
529 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
|
|
530 ret <64 x i8> %res
|
|
531 }
|
|
532
|
|
533 define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) {
|
|
534 ;CHECK-LABEL: test_mask_packus_epi16_rrk_512
|
|
535 ;CHECK: vpackuswb %zmm1, %zmm0, %zmm2 {%k1}
|
|
536 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
|
|
537 ret <64 x i8> %res
|
|
538 }
|
|
539
|
|
540 define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) {
|
|
541 ;CHECK-LABEL: test_mask_packus_epi16_rrkz_512
|
|
542 ;CHECK: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z}
|
|
543 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
|
|
544 ret <64 x i8> %res
|
|
545 }
|
|
546
|
|
547 define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
|
|
548 ;CHECK-LABEL: test_mask_packus_epi16_rm_512
|
|
549 ;CHECK: vpackuswb (%rdi), %zmm0, %zmm0
|
|
550 %b = load <32 x i16>, <32 x i16>* %ptr_b
|
|
551 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
|
|
552 ret <64 x i8> %res
|
83
|
553 }
|
95
|
554
|
|
555 define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
|
|
556 ;CHECK-LABEL: test_mask_packus_epi16_rmk_512
|
|
557 ;CHECK: vpackuswb (%rdi), %zmm0, %zmm1 {%k1}
|
|
558 %b = load <32 x i16>, <32 x i16>* %ptr_b
|
|
559 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
|
|
560 ret <64 x i8> %res
|
|
561 }
|
|
562
|
|
563 define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) {
|
|
564 ;CHECK-LABEL: test_mask_packus_epi16_rmkz_512
|
|
565 ;CHECK: vpackuswb (%rdi), %zmm0, %zmm0 {%k1} {z}
|
|
566 %b = load <32 x i16>, <32 x i16>* %ptr_b
|
|
567 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
|
|
568 ret <64 x i8> %res
|
|
569 }
|
|
570
|
|
571 declare <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64)
|
|
572
|
|
573 define <32 x i16> @test_mask_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
|
|
574 ;CHECK-LABEL: test_mask_adds_epi16_rr_512
|
|
575 ;CHECK: vpaddsw %zmm1, %zmm0, %zmm0
|
|
576 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
|
|
577 ret <32 x i16> %res
|
|
578 }
|
|
579
|
|
580 define <32 x i16> @test_mask_adds_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
|
|
581 ;CHECK-LABEL: test_mask_adds_epi16_rrk_512
|
|
582 ;CHECK: vpaddsw %zmm1, %zmm0, %zmm2 {%k1}
|
|
583 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
|
|
584 ret <32 x i16> %res
|
|
585 }
|
|
586
|
|
587 define <32 x i16> @test_mask_adds_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
|
|
588 ;CHECK-LABEL: test_mask_adds_epi16_rrkz_512
|
|
589 ;CHECK: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z}
|
|
590 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
|
|
591 ret <32 x i16> %res
|
|
592 }
|
|
593
|
|
594 define <32 x i16> @test_mask_adds_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
|
|
595 ;CHECK-LABEL: test_mask_adds_epi16_rm_512
|
|
596 ;CHECK: vpaddsw (%rdi), %zmm0, %zmm0
|
|
597 %b = load <32 x i16>, <32 x i16>* %ptr_b
|
|
598 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
|
|
599 ret <32 x i16> %res
|
|
600 }
|
|
601
|
|
602 define <32 x i16> @test_mask_adds_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
|
|
603 ;CHECK-LABEL: test_mask_adds_epi16_rmk_512
|
|
604 ;CHECK: vpaddsw (%rdi), %zmm0, %zmm1 {%k1}
|
|
605 %b = load <32 x i16>, <32 x i16>* %ptr_b
|
|
606 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
|
|
607 ret <32 x i16> %res
|
|
608 }
|
|
609
|
|
610 define <32 x i16> @test_mask_adds_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
|
|
611 ;CHECK-LABEL: test_mask_adds_epi16_rmkz_512
|
|
612 ;CHECK: vpaddsw (%rdi), %zmm0, %zmm0 {%k1} {z}
|
|
613 %b = load <32 x i16>, <32 x i16>* %ptr_b
|
|
614 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
|
|
615 ret <32 x i16> %res
|
|
616 }
|
|
617
|
|
618 declare <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
|
|
619
|
|
620 define <32 x i16> @test_mask_subs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
|
|
621 ;CHECK-LABEL: test_mask_subs_epi16_rr_512
|
|
622 ;CHECK: vpsubsw %zmm1, %zmm0, %zmm0
|
|
623 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
|
|
624 ret <32 x i16> %res
|
|
625 }
|
|
626
|
|
627 define <32 x i16> @test_mask_subs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
|
|
628 ;CHECK-LABEL: test_mask_subs_epi16_rrk_512
|
|
629 ;CHECK: vpsubsw %zmm1, %zmm0, %zmm2 {%k1}
|
|
630 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
|
|
631 ret <32 x i16> %res
|
|
632 }
|
|
633
|
|
634 define <32 x i16> @test_mask_subs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
|
|
635 ;CHECK-LABEL: test_mask_subs_epi16_rrkz_512
|
|
636 ;CHECK: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z}
|
|
637 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
|
|
638 ret <32 x i16> %res
|
|
639 }
|
|
640
|
|
641 define <32 x i16> @test_mask_subs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
|
|
642 ;CHECK-LABEL: test_mask_subs_epi16_rm_512
|
|
643 ;CHECK: vpsubsw (%rdi), %zmm0, %zmm0
|
|
644 %b = load <32 x i16>, <32 x i16>* %ptr_b
|
|
645 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
|
|
646 ret <32 x i16> %res
|
|
647 }
|
|
648
|
|
649 define <32 x i16> @test_mask_subs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
|
|
650 ;CHECK-LABEL: test_mask_subs_epi16_rmk_512
|
|
651 ;CHECK: vpsubsw (%rdi), %zmm0, %zmm1 {%k1}
|
|
652 %b = load <32 x i16>, <32 x i16>* %ptr_b
|
|
653 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
|
|
654 ret <32 x i16> %res
|
|
655 }
|
|
656
|
|
657 define <32 x i16> @test_mask_subs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
|
|
658 ;CHECK-LABEL: test_mask_subs_epi16_rmkz_512
|
|
659 ;CHECK: vpsubsw (%rdi), %zmm0, %zmm0 {%k1} {z}
|
|
660 %b = load <32 x i16>, <32 x i16>* %ptr_b
|
|
661 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
|
|
662 ret <32 x i16> %res
|
|
663 }
|
|
664
|
|
665 declare <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
|
|
666
|
|
667 define <32 x i16> @test_mask_adds_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) {
|
|
668 ;CHECK-LABEL: test_mask_adds_epu16_rr_512
|
|
669 ;CHECK: vpaddusw %zmm1, %zmm0, %zmm0
|
|
670 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
|
|
671 ret <32 x i16> %res
|
|
672 }
|
|
673
|
|
674 define <32 x i16> @test_mask_adds_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
|
|
675 ;CHECK-LABEL: test_mask_adds_epu16_rrk_512
|
|
676 ;CHECK: vpaddusw %zmm1, %zmm0, %zmm2 {%k1}
|
|
677 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
|
|
678 ret <32 x i16> %res
|
|
679 }
|
|
680
|
|
681 define <32 x i16> @test_mask_adds_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
|
|
682 ;CHECK-LABEL: test_mask_adds_epu16_rrkz_512
|
|
683 ;CHECK: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z}
|
|
684 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
|
|
685 ret <32 x i16> %res
|
|
686 }
|
|
687
|
|
688 define <32 x i16> @test_mask_adds_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
|
|
689 ;CHECK-LABEL: test_mask_adds_epu16_rm_512
|
|
690 ;CHECK: vpaddusw (%rdi), %zmm0, %zmm0
|
|
691 %b = load <32 x i16>, <32 x i16>* %ptr_b
|
|
692 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
|
|
693 ret <32 x i16> %res
|
|
694 }
|
|
695
|
|
696 define <32 x i16> @test_mask_adds_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
|
|
697 ;CHECK-LABEL: test_mask_adds_epu16_rmk_512
|
|
698 ;CHECK: vpaddusw (%rdi), %zmm0, %zmm1 {%k1}
|
|
699 %b = load <32 x i16>, <32 x i16>* %ptr_b
|
|
700 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
|
|
701 ret <32 x i16> %res
|
|
702 }
|
|
703
|
|
704 define <32 x i16> @test_mask_adds_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
|
|
705 ;CHECK-LABEL: test_mask_adds_epu16_rmkz_512
|
|
706 ;CHECK: vpaddusw (%rdi), %zmm0, %zmm0 {%k1} {z}
|
|
707 %b = load <32 x i16>, <32 x i16>* %ptr_b
|
|
708 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
|
|
709 ret <32 x i16> %res
|
|
710 }
|
|
711
|
|
712 declare <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
|
|
713
|
|
714 define <32 x i16> @test_mask_subs_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) {
|
|
715 ;CHECK-LABEL: test_mask_subs_epu16_rr_512
|
|
716 ;CHECK: vpsubusw %zmm1, %zmm0, %zmm0
|
|
717 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
|
|
718 ret <32 x i16> %res
|
|
719 }
|
|
720
|
|
721 define <32 x i16> @test_mask_subs_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
|
|
722 ;CHECK-LABEL: test_mask_subs_epu16_rrk_512
|
|
723 ;CHECK: vpsubusw %zmm1, %zmm0, %zmm2 {%k1}
|
|
724 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
|
|
725 ret <32 x i16> %res
|
|
726 }
|
|
727
|
|
728 define <32 x i16> @test_mask_subs_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
|
|
729 ;CHECK-LABEL: test_mask_subs_epu16_rrkz_512
|
|
730 ;CHECK: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z}
|
|
731 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
|
|
732 ret <32 x i16> %res
|
|
733 }
|
|
734
|
|
735 define <32 x i16> @test_mask_subs_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
|
|
736 ;CHECK-LABEL: test_mask_subs_epu16_rm_512
|
|
737 ;CHECK: vpsubusw (%rdi), %zmm0, %zmm0
|
|
738 %b = load <32 x i16>, <32 x i16>* %ptr_b
|
|
739 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
|
|
740 ret <32 x i16> %res
|
|
741 }
|
|
742
|
|
743 define <32 x i16> @test_mask_subs_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
|
|
744 ;CHECK-LABEL: test_mask_subs_epu16_rmk_512
|
|
745 ;CHECK: vpsubusw (%rdi), %zmm0, %zmm1 {%k1}
|
|
746 %b = load <32 x i16>, <32 x i16>* %ptr_b
|
|
747 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
|
|
748 ret <32 x i16> %res
|
|
749 }
|
|
750
|
|
751 define <32 x i16> @test_mask_subs_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
|
|
752 ;CHECK-LABEL: test_mask_subs_epu16_rmkz_512
|
|
753 ;CHECK: vpsubusw (%rdi), %zmm0, %zmm0 {%k1} {z}
|
|
754 %b = load <32 x i16>, <32 x i16>* %ptr_b
|
|
755 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
|
|
756 ret <32 x i16> %res
|
|
757 }
|
|
758
|
|
759 declare <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
|
|
760
|
|
761 declare <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
|
|
762
|
|
763 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_b_512
|
|
764 ; CHECK-NOT: call
|
|
765 ; CHECK: vpmaxsb %zmm
|
|
766 ; CHECK: {%k1}
|
|
767 define <64 x i8>@test_int_x86_avx512_mask_pmaxs_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
|
|
768 %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
|
|
769 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
|
|
770 %res2 = add <64 x i8> %res, %res1
|
|
771 ret <64 x i8> %res2
|
|
772 }
|
|
773
|
|
774 declare <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
|
|
775
|
|
776 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_w_512
|
|
777 ; CHECK-NOT: call
|
|
778 ; CHECK: vpmaxsw %zmm
|
|
779 ; CHECK: {%k1}
|
|
780 define <32 x i16>@test_int_x86_avx512_mask_pmaxs_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
|
|
781 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
|
|
782 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
|
|
783 %res2 = add <32 x i16> %res, %res1
|
|
784 ret <32 x i16> %res2
|
|
785 }
|
|
786
|
|
787 declare <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
|
83
|
788
|
95
|
789 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_b_512
|
|
790 ; CHECK-NOT: call
|
|
791 ; CHECK: vpmaxub %zmm
|
|
792 ; CHECK: {%k1}
|
|
793 define <64 x i8>@test_int_x86_avx512_mask_pmaxu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
|
|
794 %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
|
|
795 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
|
|
796 %res2 = add <64 x i8> %res, %res1
|
|
797 ret <64 x i8> %res2
|
|
798 }
|
|
799
|
|
800 declare <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
|
|
801
|
|
802 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_w_512
|
|
803 ; CHECK-NOT: call
|
|
804 ; CHECK: vpmaxuw %zmm
|
|
805 ; CHECK: {%k1}
|
|
806 define <32 x i16>@test_int_x86_avx512_mask_pmaxu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
|
|
807 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
|
|
808 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
|
|
809 %res2 = add <32 x i16> %res, %res1
|
|
810 ret <32 x i16> %res2
|
|
811 }
|
|
812
|
|
813 declare <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
|
|
814
|
|
815 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_b_512
|
|
816 ; CHECK-NOT: call
|
|
817 ; CHECK: vpminsb %zmm
|
|
818 ; CHECK: {%k1}
|
|
819 define <64 x i8>@test_int_x86_avx512_mask_pmins_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
|
|
820 %res = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
|
|
821 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
|
|
822 %res2 = add <64 x i8> %res, %res1
|
|
823 ret <64 x i8> %res2
|
|
824 }
|
|
825
|
|
826 declare <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
|
|
827
|
|
828 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_w_512
|
|
829 ; CHECK-NOT: call
|
|
830 ; CHECK: vpminsw %zmm
|
|
831 ; CHECK: {%k1}
|
|
832 define <32 x i16>@test_int_x86_avx512_mask_pmins_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
|
|
833 %res = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
|
|
834 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
|
|
835 %res2 = add <32 x i16> %res, %res1
|
|
836 ret <32 x i16> %res2
|
|
837 }
|
|
838
|
|
839 declare <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
|
|
840
|
|
841 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_b_512
|
|
842 ; CHECK-NOT: call
|
|
843 ; CHECK: vpminub %zmm
|
|
844 ; CHECK: {%k1}
|
|
845 define <64 x i8>@test_int_x86_avx512_mask_pminu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
|
|
846 %res = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
|
|
847 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
|
|
848 %res2 = add <64 x i8> %res, %res1
|
|
849 ret <64 x i8> %res2
|
|
850 }
|
|
851
|
|
852 declare <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
|
|
853
|
|
854 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_w_512
|
|
855 ; CHECK-NOT: call
|
|
856 ; CHECK: vpminuw %zmm
|
|
857 ; CHECK: {%k1}
|
|
858 define <32 x i16>@test_int_x86_avx512_mask_pminu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
|
|
859 %res = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
|
|
860 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
|
|
861 %res2 = add <32 x i16> %res, %res1
|
|
862 ret <32 x i16> %res2
|
|
863 }
|
|
864
|
|
865 declare <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
|
|
866
|
|
867 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_hi_512
|
|
868 ; CHECK-NOT: call
|
|
869 ; CHECK: kmov
|
|
870 ; CHECK: vpermt2w %zmm{{.*}}{%k1}
|
|
871 define <32 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
|
|
872 %res = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
|
|
873 %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
|
|
874 %res2 = add <32 x i16> %res, %res1
|
|
875 ret <32 x i16> %res2
|
|
876 }
|
|
877
|
|
878 declare <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
|
|
879
|
|
880 ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_hi_512
|
|
881 ; CHECK-NOT: call
|
|
882 ; CHECK: kmov
|
|
883 ; CHECK: vpermt2w %zmm{{.*}}{%k1} {z}
|
|
884 define <32 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
|
|
885 %res = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
|
|
886 %res1 = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
|
|
887 %res2 = add <32 x i16> %res, %res1
|
|
888 ret <32 x i16> %res2
|
|
889 }
|
|
890
|
|
891 declare <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
|
|
892
|
|
893 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_hi_512
|
|
894 ; CHECK-NOT: call
|
|
895 ; CHECK: kmov
|
|
896 ; CHECK: vpermi2w %zmm{{.*}}{%k1}
|
|
897 define <32 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
|
|
898 %res = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
|
|
899 %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
|
|
900 %res2 = add <32 x i16> %res, %res1
|
|
901 ret <32 x i16> %res2
|
|
902 }
|
|
903
|
|
904 declare <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
|
|
905
|
|
906 ; CHECK-LABEL: @test_int_x86_avx512_mask_pavg_b_512
|
|
907 ; CHECK-NOT: call
|
|
908 ; CHECK: vpavgb %zmm
|
|
909 ; CHECK: {%k1}
|
|
910 define <64 x i8>@test_int_x86_avx512_mask_pavg_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
|
|
911 %res = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
|
|
912 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
|
|
913 %res2 = add <64 x i8> %res, %res1
|
|
914 ret <64 x i8> %res2
|
|
915 }
|
|
916
|
|
917 declare <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
|
|
918
|
|
919 ; CHECK-LABEL: @test_int_x86_avx512_mask_pavg_w_512
|
|
920 ; CHECK-NOT: call
|
|
921 ; CHECK: vpavgw %zmm
|
|
922 ; CHECK: {%k1}
|
|
923 define <32 x i16>@test_int_x86_avx512_mask_pavg_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
|
|
924 %res = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
|
|
925 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
|
|
926 %res2 = add <32 x i16> %res, %res1
|
|
927 ret <32 x i16> %res2
|
|
928 }
|
|
929
|
|
930 declare <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
|
|
931
|
|
932 ; CHECK-LABEL: @test_int_x86_avx512_mask_pshuf_b_512
|
|
933 ; CHECK-NOT: call
|
|
934 ; CHECK: kmov
|
|
935 ; CHECK: vpshufb %zmm{{.*}}{%k1}
|
|
936 define <64 x i8>@test_int_x86_avx512_mask_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
|
|
937 %res = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
|
|
938 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
|
|
939 %res2 = add <64 x i8> %res, %res1
|
|
940 ret <64 x i8> %res2
|
|
941 }
|
|
942
|
|
943 declare <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16>, <32 x i16>, i32)
|
|
944
|
|
945 ; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_w_512
|
|
946 ; CHECK-NOT: call
|
|
947 ; CHECK: kmov
|
|
948 ; CHECK: vpabsw{{.*}}{%k1}
|
|
949 define <32 x i16>@test_int_x86_avx512_mask_pabs_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) {
|
|
950 %res = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2)
|
|
951 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 -1)
|
|
952 %res2 = add <32 x i16> %res, %res1
|
|
953 ret <32 x i16> %res2
|
|
954 }
|
|
955
|
|
956 declare <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8>, <64 x i8>, i64)
|
|
957
|
|
958 ; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_b_512
|
|
959 ; CHECK-NOT: call
|
|
960 ; CHECK: kmov
|
|
961 ; CHECK: vpabsb{{.*}}{%k1}
|
|
962 define <64 x i8>@test_int_x86_avx512_mask_pabs_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) {
|
|
963 %res = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2)
|
|
964 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 -1)
|
|
965 %res2 = add <64 x i8> %res, %res1
|
|
966 ret <64 x i8> %res2
|
|
967 }
|
|
968
|
|
969 declare <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
|
|
970
|
|
971 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmulhu_w_512
|
|
972 ; CHECK-NOT: call
|
|
973 ; CHECK: kmov
|
|
974 ; CHECK: {%k1}
|
|
975 ; CHECK: vpmulhuw {{.*}}encoding: [0x62
|
|
976 define <32 x i16>@test_int_x86_avx512_mask_pmulhu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
|
|
977 %res = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
|
|
978 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
|
|
979 %res2 = add <32 x i16> %res, %res1
|
|
980 ret <32 x i16> %res2
|
|
981 }
|
|
982
|
|
983 declare <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
|
|
984
|
|
985 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmulh_w_512
|
|
986 ; CHECK-NOT: call
|
|
987 ; CHECK: kmov
|
|
988 ; CHECK: {%k1}
|
|
989 ; CHECK: vpmulhw {{.*}}encoding: [0x62
|
|
990 define <32 x i16>@test_int_x86_avx512_mask_pmulh_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
|
|
991 %res = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
|
|
992 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
|
|
993 %res2 = add <32 x i16> %res, %res1
|
|
994 ret <32 x i16> %res2
|
|
995 }
|
|
996
|
|
997 declare <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
|
|
998
|
|
999 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmulhr_sw_512
|
|
1000 ; CHECK-NOT: call
|
|
1001 ; CHECK: kmov
|
|
1002 ; CHECK: {%k1}
|
|
1003 ; CHECK: vpmulhrsw {{.*}}encoding: [0x62
|
|
1004 define <32 x i16>@test_int_x86_avx512_mask_pmulhr_sw_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
|
|
1005 %res = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
|
|
1006 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
|
|
1007 %res2 = add <32 x i16> %res, %res1
|
|
1008 ret <32 x i16> %res2
|
|
1009 }
|
|
1010
|
|
1011 declare <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16>, <32 x i8>, i32)
|
|
1012
|
|
1013 define <32 x i8>@test_int_x86_avx512_mask_pmov_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
|
|
1014 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_512:
|
|
1015 ; CHECK: vpmovwb %zmm0, %ymm1 {%k1}
|
|
1016 ; CHECK-NEXT: vpmovwb %zmm0, %ymm2 {%k1} {z}
|
|
1017 ; CHECK-NEXT: vpmovwb %zmm0, %ymm0
|
|
1018 %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
|
|
1019 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
|
|
1020 %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
|
|
1021 %res3 = add <32 x i8> %res0, %res1
|
|
1022 %res4 = add <32 x i8> %res3, %res2
|
|
1023 ret <32 x i8> %res4
|
83
|
1024 }
|
95
|
1025
|
|
1026 declare void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16>, i32)
|
|
1027
|
|
1028 define void @test_int_x86_avx512_mask_pmov_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
|
|
1029 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512:
|
|
1030 ; CHECK: vpmovwb %zmm0, (%rdi)
|
|
1031 ; CHECK: vpmovwb %zmm0, (%rdi) {%k1}
|
|
1032 call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
|
|
1033 call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
|
|
1034 ret void
|
|
1035 }
|
|
1036
|
|
1037 declare <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16>, <32 x i8>, i32)
|
|
1038
|
|
1039 define <32 x i8>@test_int_x86_avx512_mask_pmovs_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
|
|
1040 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_512:
|
|
1041 ; CHECK: vpmovswb %zmm0, %ymm1 {%k1}
|
|
1042 ; CHECK-NEXT: vpmovswb %zmm0, %ymm2 {%k1} {z}
|
|
1043 ; CHECK-NEXT: vpmovswb %zmm0, %ymm0
|
|
1044 %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
|
|
1045 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
|
|
1046 %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
|
|
1047 %res3 = add <32 x i8> %res0, %res1
|
|
1048 %res4 = add <32 x i8> %res3, %res2
|
|
1049 ret <32 x i8> %res4
|
|
1050 }
|
|
1051
|
|
1052 declare void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16>, i32)
|
|
1053
|
|
1054 define void @test_int_x86_avx512_mask_pmovs_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
|
|
1055 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512:
|
|
1056 ; CHECK: vpmovswb %zmm0, (%rdi)
|
|
1057 ; CHECK: vpmovswb %zmm0, (%rdi) {%k1}
|
|
1058 call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
|
|
1059 call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
|
|
1060 ret void
|
|
1061 }
|
|
1062
|
|
1063 declare <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16>, <32 x i8>, i32)
|
|
1064
|
|
1065 define <32 x i8>@test_int_x86_avx512_mask_pmovus_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
|
|
1066 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_512:
|
|
1067 ; CHECK: vpmovuswb %zmm0, %ymm1 {%k1}
|
|
1068 ; CHECK-NEXT: vpmovuswb %zmm0, %ymm2 {%k1} {z}
|
|
1069 ; CHECK-NEXT: vpmovuswb %zmm0, %ymm0
|
|
1070 %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
|
|
1071 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
|
|
1072 %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
|
|
1073 %res3 = add <32 x i8> %res0, %res1
|
|
1074 %res4 = add <32 x i8> %res3, %res2
|
|
1075 ret <32 x i8> %res4
|
|
1076 }
|
|
1077
|
|
1078 declare void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16>, i32)
|
|
1079
|
|
1080 define void @test_int_x86_avx512_mask_pmovus_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
|
|
1081 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512:
|
|
1082 ; CHECK: vpmovuswb %zmm0, (%rdi)
|
|
1083 ; CHECK: vpmovuswb %zmm0, (%rdi) {%k1}
|
|
1084 call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
|
|
1085 call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
|
|
1086 ret void
|
|
1087 }
|
|
1088
|
|
1089 declare <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8>, <64 x i8>, <32 x i16>, i32)
|
|
1090
|
|
1091 define <32 x i16>@test_int_x86_avx512_mask_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) {
|
|
1092 ; CHECK-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512:
|
|
1093 ; CHECK: ## BB#0:
|
|
1094 ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
|
1095 ; CHECK-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1}
|
|
1096 ; CHECK-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm0
|
|
1097 ; CHECK-NEXT: vpaddw %zmm0, %zmm2, %zmm0
|
|
1098 ; CHECK-NEXT: retq
|
|
1099 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3)
|
|
1100 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 -1)
|
|
1101 %res2 = add <32 x i16> %res, %res1
|
|
1102 ret <32 x i16> %res2
|
|
1103 }
|
|
1104
|
|
1105 declare <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16>, <32 x i16>, <16 x i32>, i16)
|
|
1106
|
|
1107 define <16 x i32>@test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) {
|
|
1108 ; CHECK-LABEL: test_int_x86_avx512_mask_pmaddw_d_512:
|
|
1109 ; CHECK: ## BB#0:
|
|
1110 ; CHECK-NEXT: kmovw %edi, %k1
|
|
1111 ; CHECK-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1}
|
|
1112 ; CHECK-NEXT: vpmaddwd %zmm1, %zmm0, %zmm0
|
|
1113 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
|
|
1114 ; CHECK-NEXT: retq
|
|
1115 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3)
|
|
1116 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 -1)
|
|
1117 %res2 = add <16 x i32> %res, %res1
|
|
1118 ret <16 x i32> %res2
|
|
1119 }
|
|
1120
|
|
1121 declare <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
|
|
1122
|
|
1123 define <64 x i8>@test_int_x86_avx512_mask_punpckhb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
|
|
1124 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhb_w_512:
|
|
1125 ; CHECK: ## BB#0:
|
|
1126 ; CHECK-NEXT: kmovq %rdi, %k1
|
|
1127 ; CHECK-NEXT: vpunpckhbw %zmm1, %zmm0, %zmm2 {%k1}
|
|
1128 ; CHECK-NEXT: vpunpckhbw %zmm1, %zmm0, %zmm0
|
|
1129 ; CHECK-NEXT: vpaddb %zmm0, %zmm2, %zmm0
|
|
1130 ; CHECK-NEXT: retq
|
|
1131 %res = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
|
|
1132 %res1 = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
|
|
1133 %res2 = add <64 x i8> %res, %res1
|
|
1134 ret <64 x i8> %res2
|
|
1135 }
|
|
1136
|
|
1137 declare <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
|
|
1138
|
|
1139 define <64 x i8>@test_int_x86_avx512_mask_punpcklb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
|
|
1140 ; CHECK-LABEL: test_int_x86_avx512_mask_punpcklb_w_512:
|
|
1141 ; CHECK: ## BB#0:
|
|
1142 ; CHECK-NEXT: kmovq %rdi, %k1
|
|
1143 ; CHECK-NEXT: vpunpcklbw %zmm1, %zmm0, %zmm2 {%k1}
|
|
1144 ; CHECK-NEXT: vpunpcklbw %zmm1, %zmm0, %zmm0
|
|
1145 ; CHECK-NEXT: vpaddb %zmm0, %zmm2, %zmm0
|
|
1146 ; CHECK-NEXT: retq
|
|
1147 %res = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
|
|
1148 %res1 = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
|
|
1149 %res2 = add <64 x i8> %res, %res1
|
|
1150 ret <64 x i8> %res2
|
|
1151 }
|
|
1152
|
|
1153 declare <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
|
|
1154
|
|
1155 define <32 x i16>@test_int_x86_avx512_mask_punpckhw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
|
|
1156 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhw_d_512:
|
|
1157 ; CHECK: ## BB#0:
|
|
1158 ; CHECK-NEXT: kmovd %edi, %k1
|
|
1159 ; CHECK-NEXT: vpunpckhwd %zmm1, %zmm0, %zmm2 {%k1}
|
|
1160 ; CHECK-NEXT: vpunpckhwd %zmm1, %zmm0, %zmm0
|
|
1161 ; CHECK-NEXT: vpaddw %zmm0, %zmm2, %zmm0
|
|
1162 ; CHECK-NEXT: retq
|
|
1163 %res = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
|
|
1164 %res1 = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
|
|
1165 %res2 = add <32 x i16> %res, %res1
|
|
1166 ret <32 x i16> %res2
|
|
1167 }
|
|
1168
|
|
1169 declare <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
|
|
1170
|
|
1171 define <32 x i16>@test_int_x86_avx512_mask_punpcklw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
|
|
1172 ; CHECK-LABEL: test_int_x86_avx512_mask_punpcklw_d_512:
|
|
1173 ; CHECK: ## BB#0:
|
|
1174 ; CHECK-NEXT: kmovd %edi, %k1
|
|
1175 ; CHECK-NEXT: vpunpcklwd %zmm1, %zmm0, %zmm2 {%k1}
|
|
1176 ; CHECK-NEXT: vpunpcklwd %zmm1, %zmm0, %zmm0
|
|
1177 ; CHECK-NEXT: vpaddw %zmm0, %zmm2, %zmm0
|
|
1178 ; CHECK-NEXT: retq
|
|
1179 %res = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
|
|
1180 %res1 = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
|
|
1181 %res2 = add <32 x i16> %res, %res1
|
|
1182 ret <32 x i16> %res2
|
|
1183 }
|
|
1184
|
|
1185 declare <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8>, <64 x i8>, i32, <64 x i8>, i64)
|
|
1186
|
|
1187 define <64 x i8>@test_int_x86_avx512_mask_palignr_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x3, i64 %x4) {
|
|
1188 ; CHECK-LABEL: test_int_x86_avx512_mask_palignr_512:
|
|
1189 ; CHECK: ## BB#0:
|
|
1190 ; CHECK-NEXT: kmovq %rdi, %k1
|
|
1191 ; CHECK-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm2 {%k1}
|
|
1192 ; CHECK-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm3 {%k1} {z}
|
|
1193 ; CHECK-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm0
|
|
1194 ; CHECK-NEXT: vpaddb %zmm3, %zmm2, %zmm1
|
|
1195 ; CHECK-NEXT: vpaddb %zmm0, %zmm1, %zmm0
|
|
1196 ; CHECK-NEXT: retq
|
|
1197 %res = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> %x3, i64 %x4)
|
|
1198 %res1 = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> zeroinitializer, i64 %x4)
|
|
1199 %res2 = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> %x3, i64 -1)
|
|
1200 %res3 = add <64 x i8> %res, %res1
|
|
1201 %res4 = add <64 x i8> %res3, %res2
|
|
1202 ret <64 x i8> %res4
|
|
1203 }
|
|
1204
|
|
1205 declare <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8>, <64 x i8>, i32, <32 x i16>, i32)
|
|
1206
|
|
1207 define <32 x i16>@test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x3, i32 %x4) {
|
|
1208 ; CHECK-LABEL: test_int_x86_avx512_mask_dbpsadbw_512:
|
|
1209 ; CHECK: ## BB#0:
|
|
1210 ; CHECK-NEXT: kmovd %edi, %k1
|
|
1211 ; CHECK-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1}
|
|
1212 ; CHECK-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm3 {%k1} {z}
|
|
1213 ; CHECK-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm0
|
|
1214 ; CHECK-NEXT: vpaddw %zmm3, %zmm2, %zmm1
|
|
1215 ; CHECK-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
|
1216 ; CHECK-NEXT: retq
|
|
1217 %res = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 %x4)
|
|
1218 %res1 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> zeroinitializer, i32 %x4)
|
|
1219 %res2 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 -1)
|
|
1220 %res3 = add <32 x i16> %res, %res1
|
|
1221 %res4 = add <32 x i16> %res3, %res2
|
|
1222 ret <32 x i16> %res4
|
|
1223 }
|
|
1224
|
|
1225 declare <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64>, i32)
|
|
1226
|
|
1227 ; CHECK-LABEL: @test_int_x86_avx512_mask_psll_dq_512
|
|
1228 ; CHECK-NOT: call
|
|
1229 ; CHECK: vpslldq
|
|
1230 ; CHECK: vpslldq
|
|
1231 define <8 x i64>@test_int_x86_avx512_mask_psll_dq_512(<8 x i64> %x0) {
|
|
1232 %res = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 8)
|
|
1233 %res1 = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 4)
|
|
1234 %res2 = add <8 x i64> %res, %res1
|
|
1235 ret <8 x i64> %res2
|
|
1236 }
|
|
1237
|
|
1238 declare <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64>, i32)
|
|
1239
|
|
1240 ; CHECK-LABEL: @test_int_x86_avx512_mask_psrl_dq_512
|
|
1241 ; CHECK-NOT: call
|
|
1242 ; CHECK: vpsrldq
|
|
1243 ; CHECK: vpsrldq
|
|
1244 define <8 x i64>@test_int_x86_avx512_mask_psrl_dq_512(<8 x i64> %x0) {
|
|
1245 %res = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 8)
|
|
1246 %res1 = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 4)
|
|
1247 %res2 = add <8 x i64> %res, %res1
|
|
1248 ret <8 x i64> %res2
|
|
1249 }
|
|
1250 declare <64 x i8> @llvm.x86.avx512.psad.bw.512(<64 x i8>, <64 x i8>)
|
|
1251
|
|
1252 ; CHECK-LABEL: @test_int_x86_avx512_mask_psadb_w_512
|
|
1253 ; CHECK-NOT: call
|
|
1254 ; CHECK: vpsadbw %zmm1
|
|
1255 ; CHECK: vpsadbw %zmm2
|
|
1256 define <64 x i8>@test_int_x86_avx512_mask_psadb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2){
|
|
1257 %res = call <64 x i8> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x1)
|
|
1258 %res1 = call <64 x i8> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x2)
|
|
1259 %res2 = add <64 x i8> %res, %res1
|
|
1260 ret <64 x i8> %res2
|
|
1261 }
|