comparison test/CodeGen/X86/avx-isa-check.ll @ 95:afa8332a0e37 LLVM3.8

LLVM 3.8
author Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
date Tue, 13 Oct 2015 17:48:58 +0900
parents
children 7d135dc70f03
comparison
equal deleted inserted replaced
84:f3e34b893a5f 95:afa8332a0e37
1 ; check AVX2 instructions that are disabled in case avx512VL/avx512BW present
2
3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=core-avx2 -mattr=+avx2 -o /dev/null
4 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -o /dev/null
5 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -mattr=+avx512vl -o /dev/null
6 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -mattr=+avx512bw -o /dev/null
7 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -mattr=+avx512vl -mattr=+avx512bw -o /dev/null
8 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=skx -o /dev/null
9
10 define <4 x i64> @vpand_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
11 ; Force the execution domain with an add.
12 %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
13 %x = and <4 x i64> %a2, %b
14 ret <4 x i64> %x
15 }
16
17 define <2 x i64> @vpand_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
18 ; Force the execution domain with an add.
19 %a2 = add <2 x i64> %a, <i64 1, i64 1>
20 %x = and <2 x i64> %a2, %b
21 ret <2 x i64> %x
22 }
23
24 define <4 x i64> @vpandn_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
25 ; Force the execution domain with an add.
26 %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
27 %y = xor <4 x i64> %a2, <i64 -1, i64 -1, i64 -1, i64 -1>
28 %x = and <4 x i64> %a, %y
29 ret <4 x i64> %x
30 }
31
32 define <2 x i64> @vpandn_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
33 ; Force the execution domain with an add.
34 %a2 = add <2 x i64> %a, <i64 1, i64 1>
35 %y = xor <2 x i64> %a2, <i64 -1, i64 -1>
36 %x = and <2 x i64> %a, %y
37 ret <2 x i64> %x
38 }
39
40 define <4 x i64> @vpor_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
41 ; Force the execution domain with an add.
42 %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
43 %x = or <4 x i64> %a2, %b
44 ret <4 x i64> %x
45 }
46
47 define <4 x i64> @vpxor_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
48 ; Force the execution domain with an add.
49 %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
50 %x = xor <4 x i64> %a2, %b
51 ret <4 x i64> %x
52 }
53
54 define <2 x i64> @vpor_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
55 ; Force the execution domain with an add.
56 %a2 = add <2 x i64> %a, <i64 1, i64 1>
57 %x = or <2 x i64> %a2, %b
58 ret <2 x i64> %x
59 }
60
61 define <2 x i64> @vpxor_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
62 ; Force the execution domain with an add.
63 %a2 = add <2 x i64> %a, <i64 1, i64 1>
64 %x = xor <2 x i64> %a2, %b
65 ret <2 x i64> %x
66 }
67
68 define <4 x i64> @test_vpaddq_256(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
69 %x = add <4 x i64> %i, %j
70 ret <4 x i64> %x
71 }
72
73 define <8 x i32> @test_vpaddd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
74 %x = add <8 x i32> %i, %j
75 ret <8 x i32> %x
76 }
77
78 define <16 x i16> @test_vpaddw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
79 %x = add <16 x i16> %i, %j
80 ret <16 x i16> %x
81 }
82
83 define <32 x i8> @test_vpaddb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
84 %x = add <32 x i8> %i, %j
85 ret <32 x i8> %x
86 }
87
88 define <4 x i64> @test_vpsubq_256(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
89 %x = sub <4 x i64> %i, %j
90 ret <4 x i64> %x
91 }
92
93 define <8 x i32> @test_vpsubd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
94 %x = sub <8 x i32> %i, %j
95 ret <8 x i32> %x
96 }
97
98 define <16 x i16> @test_vpsubw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
99 %x = sub <16 x i16> %i, %j
100 ret <16 x i16> %x
101 }
102
103 define <32 x i8> @test_vpsubb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
104 %x = sub <32 x i8> %i, %j
105 ret <32 x i8> %x
106 }
107
108 define <16 x i16> @test_vpmullw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
109 %x = mul <16 x i16> %i, %j
110 ret <16 x i16> %x
111 }
112
113 define <8 x i32> @test_vpcmpgtd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
114 %bincmp = icmp slt <8 x i32> %i, %j
115 %x = sext <8 x i1> %bincmp to <8 x i32>
116 ret <8 x i32> %x
117 }
118
119 define <32 x i8> @test_vpcmpeqb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
120 %bincmp = icmp eq <32 x i8> %i, %j
121 %x = sext <32 x i1> %bincmp to <32 x i8>
122 ret <32 x i8> %x
123 }
124
125 define <16 x i16> @test_vpcmpeqw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
126 %bincmp = icmp eq <16 x i16> %i, %j
127 %x = sext <16 x i1> %bincmp to <16 x i16>
128 ret <16 x i16> %x
129 }
130
131 define <32 x i8> @test_vpcmpgtb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
132 %bincmp = icmp slt <32 x i8> %i, %j
133 %x = sext <32 x i1> %bincmp to <32 x i8>
134 ret <32 x i8> %x
135 }
136
137 define <16 x i16> @test_vpcmpgtw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
138 %bincmp = icmp slt <16 x i16> %i, %j
139 %x = sext <16 x i1> %bincmp to <16 x i16>
140 ret <16 x i16> %x
141 }
142
143 define <8 x i32> @test_vpcmpeqd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
144 %bincmp = icmp eq <8 x i32> %i, %j
145 %x = sext <8 x i1> %bincmp to <8 x i32>
146 ret <8 x i32> %x
147 }
148
149 define <2 x i64> @test_vpaddq_128(<2 x i64> %i, <2 x i64> %j) nounwind readnone {
150 %x = add <2 x i64> %i, %j
151 ret <2 x i64> %x
152 }
153
154 define <4 x i32> @test_vpaddd_128(<4 x i32> %i, <4 x i32> %j) nounwind readnone {
155 %x = add <4 x i32> %i, %j
156 ret <4 x i32> %x
157 }
158
159 define <8 x i16> @test_vpaddw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
160 %x = add <8 x i16> %i, %j
161 ret <8 x i16> %x
162 }
163
164 define <16 x i8> @test_vpaddb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
165 %x = add <16 x i8> %i, %j
166 ret <16 x i8> %x
167 }
168
169 define <2 x i64> @test_vpsubq_128(<2 x i64> %i, <2 x i64> %j) nounwind readnone {
170 %x = sub <2 x i64> %i, %j
171 ret <2 x i64> %x
172 }
173
174 define <4 x i32> @test_vpsubd_128(<4 x i32> %i, <4 x i32> %j) nounwind readnone {
175 %x = sub <4 x i32> %i, %j
176 ret <4 x i32> %x
177 }
178
179 define <8 x i16> @test_vpsubw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
180 %x = sub <8 x i16> %i, %j
181 ret <8 x i16> %x
182 }
183
184 define <16 x i8> @test_vpsubb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
185 %x = sub <16 x i8> %i, %j
186 ret <16 x i8> %x
187 }
188
189 define <8 x i16> @test_vpmullw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
190 %x = mul <8 x i16> %i, %j
191 ret <8 x i16> %x
192 }
193
194 define <8 x i16> @test_vpcmpgtw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
195 %bincmp = icmp slt <8 x i16> %i, %j
196 %x = sext <8 x i1> %bincmp to <8 x i16>
197 ret <8 x i16> %x
198 }
199
200 define <16 x i8> @test_vpcmpgtb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
201 %bincmp = icmp slt <16 x i8> %i, %j
202 %x = sext <16 x i1> %bincmp to <16 x i8>
203 ret <16 x i8> %x
204 }
205
206 define <8 x i16> @test_vpcmpeqw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
207 %bincmp = icmp eq <8 x i16> %i, %j
208 %x = sext <8 x i1> %bincmp to <8 x i16>
209 ret <8 x i16> %x
210 }
211
212 define <16 x i8> @test_vpcmpeqb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
213 %bincmp = icmp eq <16 x i8> %i, %j
214 %x = sext <16 x i1> %bincmp to <16 x i8>
215 ret <16 x i8> %x
216 }
217
218 define <8 x i16> @shuffle_v8i16_vpalignr(<8 x i16> %a, <8 x i16> %b) {
219 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
220 ret <8 x i16> %shuffle
221 }
222
223 define <16 x i16> @shuffle_v16i16_vpalignr(<16 x i16> %a, <16 x i16> %b) {
224 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 31, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
225 ret <16 x i16> %shuffle
226 }
227
228 define <16 x i8> @shuffle_v16i8_vpalignr(<16 x i8> %a, <16 x i8> %b) {
229 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
230 ret <16 x i8> %shuffle
231 }
232
233 define <32 x i8> @shuffle_v32i8_vpalignr(<32 x i8> %a, <32 x i8> %b) {
234 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
235 ret <32 x i8> %shuffle
236 }
237
238 define <2 x i64> @shuffle_v2i64_vpalignr(<2 x i64> %a, <2 x i64> %b) {
239 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
240 ret <2 x i64> %shuffle
241 }
242
243 define <4 x i32> @shuffle_v4i32_vpalignr(<4 x i32> %a, <4 x i32> %b) {
244 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 0, i32 1, i32 2>
245 ret <4 x i32> %shuffle
246 }
247
248 define <8 x i32> @shuffle_v8i32_vpalignr(<8 x i32> %a, <8 x i32> %b) {
249 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 0, i32 1, i32 2, i32 15, i32 4, i32 5, i32 6>
250 ret <8 x i32> %shuffle
251 }
252
253 define <4 x double> @shuffle_v4f64_5163(<4 x double> %a, <4 x double> %b) {
254 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 6, i32 3>
255 ret <4 x double> %shuffle
256 }
257
258 define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) {
259 %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
260 %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float>
261 %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
262 %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double>
263 ret <2 x double> %bitcast64
264 }
265
266 define <16 x i16> @shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24(<16 x i16> %a) {
267 %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24>
268 ret <16 x i16> %shuffle
269 }
270
271 define i64 @extract_v2i64(<2 x i64> %x, i64* %dst) {
272 %r1 = extractelement <2 x i64> %x, i32 0
273 %r2 = extractelement <2 x i64> %x, i32 1
274 store i64 %r2, i64* %dst, align 1
275 ret i64 %r1
276 }
277
278 define i32 @extract_v4i32(<4 x i32> %x, i32* %dst) {
279 %r1 = extractelement <4 x i32> %x, i32 1
280 %r2 = extractelement <4 x i32> %x, i32 3
281 store i32 %r2, i32* %dst, align 1
282 ret i32 %r1
283 }
284
285 define i16 @extract_v8i16(<8 x i16> %x, i16* %dst) {
286 %r1 = extractelement <8 x i16> %x, i32 1
287 %r2 = extractelement <8 x i16> %x, i32 3
288 store i16 %r2, i16* %dst, align 1
289 ret i16 %r1
290 }
291
292 define i8 @extract_v16i8(<16 x i8> %x, i8* %dst) {
293 %r1 = extractelement <16 x i8> %x, i32 1
294 %r2 = extractelement <16 x i8> %x, i32 3
295 store i8 %r2, i8* %dst, align 1
296 ret i8 %r1
297 }
298
299 define <2 x i64> @insert_v2i64(<2 x i64> %x, i64 %y , i64* %ptr) {
300 %val = load i64, i64* %ptr
301 %r1 = insertelement <2 x i64> %x, i64 %val, i32 1
302 %r2 = insertelement <2 x i64> %r1, i64 %y, i32 3
303 ret <2 x i64> %r2
304 }
305
306 define <4 x i32> @insert_v4i32(<4 x i32> %x, i32 %y, i32* %ptr) {
307 %val = load i32, i32* %ptr
308 %r1 = insertelement <4 x i32> %x, i32 %val, i32 1
309 %r2 = insertelement <4 x i32> %r1, i32 %y, i32 3
310 ret <4 x i32> %r2
311 }
312
313 define <8 x i16> @insert_v8i16(<8 x i16> %x, i16 %y, i16* %ptr) {
314 %val = load i16, i16* %ptr
315 %r1 = insertelement <8 x i16> %x, i16 %val, i32 1
316 %r2 = insertelement <8 x i16> %r1, i16 %y, i32 5
317 ret <8 x i16> %r2
318 }
319
320 define <16 x i8> @insert_v16i8(<16 x i8> %x, i8 %y, i8* %ptr) {
321 %val = load i8, i8* %ptr
322 %r1 = insertelement <16 x i8> %x, i8 %val, i32 3
323 %r2 = insertelement <16 x i8> %r1, i8 %y, i32 10
324 ret <16 x i8> %r2
325 }