Mercurial > hg > CbC > CbC_llvm
comparison test/CodeGen/X86/avx512bw-intrinsics.ll @ 83:60c9769439b8 LLVM3.7
LLVM 3.7
author | Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp> |
---|---|
date | Wed, 18 Feb 2015 14:55:36 +0900 |
parents | |
children | afa8332a0e37 |
comparison
equal
deleted
inserted
replaced
78:af83660cff7b | 83:60c9769439b8 |
---|---|
1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx --show-mc-encoding| FileCheck %s | |
2 | |
3 define i64 @test_pcmpeq_b(<64 x i8> %a, <64 x i8> %b) { | |
4 ; CHECK-LABEL: test_pcmpeq_b | |
5 ; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 ## | |
6 %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1) | |
7 ret i64 %res | |
8 } | |
9 | |
10 define i64 @test_mask_pcmpeq_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) { | |
11 ; CHECK-LABEL: test_mask_pcmpeq_b | |
12 ; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} ## | |
13 %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask) | |
14 ret i64 %res | |
15 } | |
16 | |
17 declare i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8>, <64 x i8>, i64) | |
18 | |
19 define i32 @test_pcmpeq_w(<32 x i16> %a, <32 x i16> %b) { | |
20 ; CHECK-LABEL: test_pcmpeq_w | |
21 ; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 ## | |
22 %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1) | |
23 ret i32 %res | |
24 } | |
25 | |
26 define i32 @test_mask_pcmpeq_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) { | |
27 ; CHECK-LABEL: test_mask_pcmpeq_w | |
28 ; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} ## | |
29 %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask) | |
30 ret i32 %res | |
31 } | |
32 | |
33 declare i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16>, <32 x i16>, i32) | |
34 | |
35 define i64 @test_pcmpgt_b(<64 x i8> %a, <64 x i8> %b) { | |
36 ; CHECK-LABEL: test_pcmpgt_b | |
37 ; CHECK: vpcmpgtb %zmm1, %zmm0, %k0 ## | |
38 %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1) | |
39 ret i64 %res | |
40 } | |
41 | |
42 define i64 @test_mask_pcmpgt_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) { | |
43 ; CHECK-LABEL: test_mask_pcmpgt_b | |
44 ; CHECK: vpcmpgtb %zmm1, %zmm0, %k0 {%k1} ## | |
45 %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask) | |
46 ret i64 %res | |
47 } | |
48 | |
49 declare i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8>, <64 x i8>, i64) | |
50 | |
51 define i32 @test_pcmpgt_w(<32 x i16> %a, <32 x i16> %b) { | |
52 ; CHECK-LABEL: test_pcmpgt_w | |
53 ; CHECK: vpcmpgtw %zmm1, %zmm0, %k0 ## | |
54 %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1) | |
55 ret i32 %res | |
56 } | |
57 | |
58 define i32 @test_mask_pcmpgt_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) { | |
59 ; CHECK-LABEL: test_mask_pcmpgt_w | |
60 ; CHECK: vpcmpgtw %zmm1, %zmm0, %k0 {%k1} ## | |
61 %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask) | |
62 ret i32 %res | |
63 } | |
64 | |
65 declare i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16>, <32 x i16>, i32) | |
66 | |
67 define <8 x i64> @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) { | |
68 ; CHECK_LABEL: test_cmp_b_512 | |
69 ; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 ## | |
70 %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 0, i64 -1) | |
71 %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0 | |
72 ; CHECK: vpcmpltb %zmm1, %zmm0, %k0 ## | |
73 %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 1, i64 -1) | |
74 %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1 | |
75 ; CHECK: vpcmpleb %zmm1, %zmm0, %k0 ## | |
76 %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 2, i64 -1) | |
77 %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2 | |
78 ; CHECK: vpcmpunordb %zmm1, %zmm0, %k0 ## | |
79 %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 3, i64 -1) | |
80 %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3 | |
81 ; CHECK: vpcmpneqb %zmm1, %zmm0, %k0 ## | |
82 %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 4, i64 -1) | |
83 %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4 | |
84 ; CHECK: vpcmpnltb %zmm1, %zmm0, %k0 ## | |
85 %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 5, i64 -1) | |
86 %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5 | |
87 ; CHECK: vpcmpnleb %zmm1, %zmm0, %k0 ## | |
88 %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 6, i64 -1) | |
89 %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6 | |
90 ; CHECK: vpcmpordb %zmm1, %zmm0, %k0 ## | |
91 %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 7, i64 -1) | |
92 %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7 | |
93 ret <8 x i64> %vec7 | |
94 } | |
95 | |
96 define <8 x i64> @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { | |
97 ; CHECK_LABEL: test_mask_cmp_b_512 | |
98 ; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} ## | |
99 %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 0, i64 %mask) | |
100 %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0 | |
101 ; CHECK: vpcmpltb %zmm1, %zmm0, %k0 {%k1} ## | |
102 %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 1, i64 %mask) | |
103 %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1 | |
104 ; CHECK: vpcmpleb %zmm1, %zmm0, %k0 {%k1} ## | |
105 %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 2, i64 %mask) | |
106 %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2 | |
107 ; CHECK: vpcmpunordb %zmm1, %zmm0, %k0 {%k1} ## | |
108 %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 3, i64 %mask) | |
109 %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3 | |
110 ; CHECK: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} ## | |
111 %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 4, i64 %mask) | |
112 %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4 | |
113 ; CHECK: vpcmpnltb %zmm1, %zmm0, %k0 {%k1} ## | |
114 %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 5, i64 %mask) | |
115 %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5 | |
116 ; CHECK: vpcmpnleb %zmm1, %zmm0, %k0 {%k1} ## | |
117 %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 6, i64 %mask) | |
118 %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6 | |
119 ; CHECK: vpcmpordb %zmm1, %zmm0, %k0 {%k1} ## | |
120 %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 7, i64 %mask) | |
121 %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7 | |
122 ret <8 x i64> %vec7 | |
123 } | |
124 | |
125 declare i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8>, <64 x i8>, i8, i64) nounwind readnone | |
126 | |
127 define <8 x i64> @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) { | |
128 ; CHECK_LABEL: test_ucmp_b_512 | |
129 ; CHECK: vpcmpequb %zmm1, %zmm0, %k0 ## | |
130 %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 0, i64 -1) | |
131 %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0 | |
132 ; CHECK: vpcmpltub %zmm1, %zmm0, %k0 ## | |
133 %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 1, i64 -1) | |
134 %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1 | |
135 ; CHECK: vpcmpleub %zmm1, %zmm0, %k0 ## | |
136 %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 2, i64 -1) | |
137 %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2 | |
138 ; CHECK: vpcmpunordub %zmm1, %zmm0, %k0 ## | |
139 %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 3, i64 -1) | |
140 %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3 | |
141 ; CHECK: vpcmpnequb %zmm1, %zmm0, %k0 ## | |
142 %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 4, i64 -1) | |
143 %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4 | |
144 ; CHECK: vpcmpnltub %zmm1, %zmm0, %k0 ## | |
145 %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 5, i64 -1) | |
146 %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5 | |
147 ; CHECK: vpcmpnleub %zmm1, %zmm0, %k0 ## | |
148 %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 6, i64 -1) | |
149 %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6 | |
150 ; CHECK: vpcmpordub %zmm1, %zmm0, %k0 ## | |
151 %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 7, i64 -1) | |
152 %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7 | |
153 ret <8 x i64> %vec7 | |
154 } | |
155 | |
156 define <8 x i64> @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { | |
157 ; CHECK_LABEL: test_mask_ucmp_b_512 | |
158 ; CHECK: vpcmpequb %zmm1, %zmm0, %k0 {%k1} ## | |
159 %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 0, i64 %mask) | |
160 %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0 | |
161 ; CHECK: vpcmpltub %zmm1, %zmm0, %k0 {%k1} ## | |
162 %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 1, i64 %mask) | |
163 %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1 | |
164 ; CHECK: vpcmpleub %zmm1, %zmm0, %k0 {%k1} ## | |
165 %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 2, i64 %mask) | |
166 %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2 | |
167 ; CHECK: vpcmpunordub %zmm1, %zmm0, %k0 {%k1} ## | |
168 %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 3, i64 %mask) | |
169 %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3 | |
170 ; CHECK: vpcmpnequb %zmm1, %zmm0, %k0 {%k1} ## | |
171 %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 4, i64 %mask) | |
172 %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4 | |
173 ; CHECK: vpcmpnltub %zmm1, %zmm0, %k0 {%k1} ## | |
174 %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 5, i64 %mask) | |
175 %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5 | |
176 ; CHECK: vpcmpnleub %zmm1, %zmm0, %k0 {%k1} ## | |
177 %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 6, i64 %mask) | |
178 %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6 | |
179 ; CHECK: vpcmpordub %zmm1, %zmm0, %k0 {%k1} ## | |
180 %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 7, i64 %mask) | |
181 %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7 | |
182 ret <8 x i64> %vec7 | |
183 } | |
184 | |
185 declare i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8>, <64 x i8>, i8, i64) nounwind readnone | |
186 | |
187 define <8 x i32> @test_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1) { | |
188 ; CHECK_LABEL: test_cmp_w_512 | |
189 ; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 ## | |
190 %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 0, i32 -1) | |
191 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 | |
192 ; CHECK: vpcmpltw %zmm1, %zmm0, %k0 ## | |
193 %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 1, i32 -1) | |
194 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1 | |
195 ; CHECK: vpcmplew %zmm1, %zmm0, %k0 ## | |
196 %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 2, i32 -1) | |
197 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2 | |
198 ; CHECK: vpcmpunordw %zmm1, %zmm0, %k0 ## | |
199 %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 3, i32 -1) | |
200 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3 | |
201 ; CHECK: vpcmpneqw %zmm1, %zmm0, %k0 ## | |
202 %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 4, i32 -1) | |
203 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4 | |
204 ; CHECK: vpcmpnltw %zmm1, %zmm0, %k0 ## | |
205 %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 5, i32 -1) | |
206 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5 | |
207 ; CHECK: vpcmpnlew %zmm1, %zmm0, %k0 ## | |
208 %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 6, i32 -1) | |
209 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6 | |
210 ; CHECK: vpcmpordw %zmm1, %zmm0, %k0 ## | |
211 %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 7, i32 -1) | |
212 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7 | |
213 ret <8 x i32> %vec7 | |
214 } | |
215 | |
216 define <8 x i32> @test_mask_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) { | |
217 ; CHECK_LABEL: test_mask_cmp_w_512 | |
218 ; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} ## | |
219 %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 0, i32 %mask) | |
220 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 | |
221 ; CHECK: vpcmpltw %zmm1, %zmm0, %k0 {%k1} ## | |
222 %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 1, i32 %mask) | |
223 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1 | |
224 ; CHECK: vpcmplew %zmm1, %zmm0, %k0 {%k1} ## | |
225 %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 2, i32 %mask) | |
226 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2 | |
227 ; CHECK: vpcmpunordw %zmm1, %zmm0, %k0 {%k1} ## | |
228 %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 3, i32 %mask) | |
229 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3 | |
230 ; CHECK: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} ## | |
231 %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 4, i32 %mask) | |
232 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4 | |
233 ; CHECK: vpcmpnltw %zmm1, %zmm0, %k0 {%k1} ## | |
234 %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 5, i32 %mask) | |
235 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5 | |
236 ; CHECK: vpcmpnlew %zmm1, %zmm0, %k0 {%k1} ## | |
237 %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 6, i32 %mask) | |
238 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6 | |
239 ; CHECK: vpcmpordw %zmm1, %zmm0, %k0 {%k1} ## | |
240 %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 7, i32 %mask) | |
241 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7 | |
242 ret <8 x i32> %vec7 | |
243 } | |
244 | |
245 declare i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16>, <32 x i16>, i8, i32) nounwind readnone | |
246 | |
247 define <8 x i32> @test_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1) { | |
248 ; CHECK_LABEL: test_ucmp_w_512 | |
249 ; CHECK: vpcmpequw %zmm1, %zmm0, %k0 ## | |
250 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 0, i32 -1) | |
251 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 | |
252 ; CHECK: vpcmpltuw %zmm1, %zmm0, %k0 ## | |
253 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 1, i32 -1) | |
254 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1 | |
255 ; CHECK: vpcmpleuw %zmm1, %zmm0, %k0 ## | |
256 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 2, i32 -1) | |
257 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2 | |
258 ; CHECK: vpcmpunorduw %zmm1, %zmm0, %k0 ## | |
259 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 3, i32 -1) | |
260 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3 | |
261 ; CHECK: vpcmpnequw %zmm1, %zmm0, %k0 ## | |
262 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 4, i32 -1) | |
263 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4 | |
264 ; CHECK: vpcmpnltuw %zmm1, %zmm0, %k0 ## | |
265 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 5, i32 -1) | |
266 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5 | |
267 ; CHECK: vpcmpnleuw %zmm1, %zmm0, %k0 ## | |
268 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 6, i32 -1) | |
269 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6 | |
270 ; CHECK: vpcmporduw %zmm1, %zmm0, %k0 ## | |
271 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 7, i32 -1) | |
272 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7 | |
273 ret <8 x i32> %vec7 | |
274 } | |
275 | |
276 define <8 x i32> @test_mask_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) { | |
277 ; CHECK_LABEL: test_mask_ucmp_w_512 | |
278 ; CHECK: vpcmpequw %zmm1, %zmm0, %k0 {%k1} ## | |
279 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 0, i32 %mask) | |
280 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 | |
281 ; CHECK: vpcmpltuw %zmm1, %zmm0, %k0 {%k1} ## | |
282 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 1, i32 %mask) | |
283 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1 | |
284 ; CHECK: vpcmpleuw %zmm1, %zmm0, %k0 {%k1} ## | |
285 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 2, i32 %mask) | |
286 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2 | |
287 ; CHECK: vpcmpunorduw %zmm1, %zmm0, %k0 {%k1} ## | |
288 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 3, i32 %mask) | |
289 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3 | |
290 ; CHECK: vpcmpnequw %zmm1, %zmm0, %k0 {%k1} ## | |
291 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 4, i32 %mask) | |
292 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4 | |
293 ; CHECK: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1} ## | |
294 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 5, i32 %mask) | |
295 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5 | |
296 ; CHECK: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1} ## | |
297 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 6, i32 %mask) | |
298 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6 | |
299 ; CHECK: vpcmporduw %zmm1, %zmm0, %k0 {%k1} ## | |
300 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 7, i32 %mask) | |
301 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7 | |
302 ret <8 x i32> %vec7 | |
303 } | |
304 | |
305 declare i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16>, <32 x i16>, i8, i32) nounwind readnone | |
306 | |
307 ; CHECK-LABEL: test_x86_mask_blend_b_256 | |
308 ; CHECK: vpblendmb | |
309 define <32 x i8> @test_x86_mask_blend_b_256(i32 %a0, <32 x i8> %a1, <32 x i8> %a2) { | |
310 %res = call <32 x i8> @llvm.x86.avx512.mask.blend.b.256(<32 x i8> %a1, <32 x i8> %a2, i32 %a0) ; <<32 x i8>> [#uses=1] | |
311 ret <32 x i8> %res | |
312 } | |
313 declare <32 x i8> @llvm.x86.avx512.mask.blend.b.256(<32 x i8>, <32 x i8>, i32) nounwind readonly | |
314 | |
315 ; CHECK-LABEL: test_x86_mask_blend_w_256 | |
316 define <16 x i16> @test_x86_mask_blend_w_256(i16 %mask, <16 x i16> %a1, <16 x i16> %a2) { | |
317 ; CHECK: vpblendmw | |
318 %res = call <16 x i16> @llvm.x86.avx512.mask.blend.w.256(<16 x i16> %a1, <16 x i16> %a2, i16 %mask) ; <<16 x i16>> [#uses=1] | |
319 ret <16 x i16> %res | |
320 } | |
321 declare <16 x i16> @llvm.x86.avx512.mask.blend.w.256(<16 x i16>, <16 x i16>, i16) nounwind readonly | |
322 | |
323 ; CHECK-LABEL: test_x86_mask_blend_b_512 | |
324 ; CHECK: vpblendmb | |
325 define <64 x i8> @test_x86_mask_blend_b_512(i64 %a0, <64 x i8> %a1, <64 x i8> %a2) { | |
326 %res = call <64 x i8> @llvm.x86.avx512.mask.blend.b.512(<64 x i8> %a1, <64 x i8> %a2, i64 %a0) ; <<64 x i8>> [#uses=1] | |
327 ret <64 x i8> %res | |
328 } | |
329 declare <64 x i8> @llvm.x86.avx512.mask.blend.b.512(<64 x i8>, <64 x i8>, i64) nounwind readonly | |
330 | |
331 ; CHECK-LABEL: test_x86_mask_blend_w_512 | |
332 define <32 x i16> @test_x86_mask_blend_w_512(i32 %mask, <32 x i16> %a1, <32 x i16> %a2) { | |
333 ; CHECK: vpblendmw | |
334 %res = call <32 x i16> @llvm.x86.avx512.mask.blend.w.512(<32 x i16> %a1, <32 x i16> %a2, i32 %mask) ; <<32 x i16>> [#uses=1] | |
335 ret <32 x i16> %res | |
336 } | |
337 declare <32 x i16> @llvm.x86.avx512.mask.blend.w.512(<32 x i16>, <32 x i16>, i32) nounwind readonly | |
338 | |
339 ; CHECK-LABEL: test_x86_mask_blend_b_128 | |
340 ; CHECK: vpblendmb | |
341 define <16 x i8> @test_x86_mask_blend_b_128(i16 %a0, <16 x i8> %a1, <16 x i8> %a2) { | |
342 %res = call <16 x i8> @llvm.x86.avx512.mask.blend.b.128(<16 x i8> %a1, <16 x i8> %a2, i16 %a0) ; <<16 x i8>> [#uses=1] | |
343 ret <16 x i8> %res | |
344 } | |
345 declare <16 x i8> @llvm.x86.avx512.mask.blend.b.128(<16 x i8>, <16 x i8>, i16) nounwind readonly | |
346 | |
347 ; CHECK-LABEL: test_x86_mask_blend_w_128 | |
348 define <8 x i16> @test_x86_mask_blend_w_128(i8 %mask, <8 x i16> %a1, <8 x i16> %a2) { | |
349 ; CHECK: vpblendmw | |
350 %res = call <8 x i16> @llvm.x86.avx512.mask.blend.w.128(<8 x i16> %a1, <8 x i16> %a2, i8 %mask) ; <<8 x i16>> [#uses=1] | |
351 ret <8 x i16> %res | |
352 } | |
353 declare <8 x i16> @llvm.x86.avx512.mask.blend.w.128(<8 x i16>, <8 x i16>, i8) nounwind readonly |