Mercurial > hg > CbC > CbC_llvm
comparison test/CodeGen/X86/avx2-vector-shifts.ll @ 95:afa8332a0e37 LLVM3.8
LLVM 3.8
author | Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 13 Oct 2015 17:48:58 +0900 |
parents | 54457678186b |
children | 1172e4bd9c6f |
comparison
equal
deleted
inserted
replaced
84:f3e34b893a5f | 95:afa8332a0e37 |
---|---|
264 %and = and <4 x i64> %y, <i64 8, i64 8, i64 8, i64 8> | 264 %and = and <4 x i64> %y, <i64 8, i64 8, i64 8, i64 8> |
265 %trunc = trunc <4 x i64> %and to <4 x i32> | 265 %trunc = trunc <4 x i64> %and to <4 x i32> |
266 %sra = lshr <4 x i32> %x, %trunc | 266 %sra = lshr <4 x i32> %x, %trunc |
267 ret <4 x i32> %sra | 267 ret <4 x i32> %sra |
268 } | 268 } |
269 | |
270 ; | |
271 ; Vectorized byte shifts | |
272 ; | |
273 | |
274 define <8 x i16> @shl_8i16(<8 x i16> %r, <8 x i16> %a) nounwind { | |
275 ; CHECK-LABEL: shl_8i16 | |
276 ; CHECK: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero | |
277 ; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero | |
278 ; CHECK-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 | |
279 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero | |
280 ; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] | |
281 ; CHECK: retq | |
282 %shl = shl <8 x i16> %r, %a | |
283 ret <8 x i16> %shl | |
284 } | |
285 | |
286 define <16 x i16> @shl_16i16(<16 x i16> %r, <16 x i16> %a) nounwind { | |
287 ; CHECK-LABEL: shl_16i16 | |
288 ; CHECK: vpxor %ymm2, %ymm2, %ymm2 | |
289 ; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] | |
290 ; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15] | |
291 ; CHECK-NEXT: vpsllvd %ymm3, %ymm4, %ymm3 | |
292 ; CHECK-NEXT: vpsrld $16, %ymm3, %ymm3 | |
293 ; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11] | |
294 ; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11] | |
295 ; CHECK-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 | |
296 ; CHECK-NEXT: vpsrld $16, %ymm0, %ymm0 | |
297 ; CHECK-NEXT: vpackusdw %ymm3, %ymm0, %ymm0 | |
298 ; CHECK-NEXT: retq | |
299 %shl = shl <16 x i16> %r, %a | |
300 ret <16 x i16> %shl | |
301 } | |
302 | |
303 define <32 x i8> @shl_32i8(<32 x i8> %r, <32 x i8> %a) nounwind { | |
304 ; CHECK-LABEL: shl_32i8 | |
305 ; CHECK: vpsllw $5, %ymm1, %ymm1 | |
306 ; CHECK-NEXT: vpsllw $4, %ymm0, %ymm2 | |
307 ; CHECK-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 | |
308 ; CHECK-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 | |
309 ; CHECK-NEXT: vpsllw $2, %ymm0, %ymm2 | |
310 ; CHECK-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 | |
311 ; CHECK-NEXT: vpaddb %ymm1, %ymm1, %ymm1 | |
312 ; CHECK-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 | |
313 ; CHECK-NEXT: vpaddb %ymm0, %ymm0, %ymm2 | |
314 ; CHECK-NEXT: vpaddb %ymm1, %ymm1, %ymm1 | |
315 ; CHECK-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 | |
316 ; CHECK-NEXT: retq | |
317 %shl = shl <32 x i8> %r, %a | |
318 ret <32 x i8> %shl | |
319 } | |
320 | |
321 define <8 x i16> @ashr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind { | |
322 ; CHECK-LABEL: ashr_8i16 | |
323 ; CHECK: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero | |
324 ; CHECK-NEXT: vpmovsxwd %xmm0, %ymm0 | |
325 ; CHECK-NEXT: vpsravd %ymm1, %ymm0, %ymm0 | |
326 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero | |
327 ; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] | |
328 ; CHECK: retq | |
329 %ashr = ashr <8 x i16> %r, %a | |
330 ret <8 x i16> %ashr | |
331 } | |
332 | |
333 define <16 x i16> @ashr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind { | |
334 ; CHECK-LABEL: ashr_16i16 | |
335 ; CHECK: vpxor %ymm2, %ymm2, %ymm2 | |
336 ; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] | |
337 ; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15] | |
338 ; CHECK-NEXT: vpsravd %ymm3, %ymm4, %ymm3 | |
339 ; CHECK-NEXT: vpsrld $16, %ymm3, %ymm3 | |
340 ; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11] | |
341 ; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11] | |
342 ; CHECK-NEXT: vpsravd %ymm1, %ymm0, %ymm0 | |
343 ; CHECK-NEXT: vpsrld $16, %ymm0, %ymm0 | |
344 ; CHECK-NEXT: vpackusdw %ymm3, %ymm0, %ymm0 | |
345 ; CHECK-NEXT: retq | |
346 %ashr = ashr <16 x i16> %r, %a | |
347 ret <16 x i16> %ashr | |
348 } | |
349 | |
350 define <32 x i8> @ashr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind { | |
351 ; CHECK-LABEL: ashr_32i8 | |
352 ; CHECK: vpsllw $5, %ymm1, %ymm1 | |
353 ; CHECK-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] | |
354 ; CHECK-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] | |
355 ; CHECK-NEXT: vpsraw $4, %ymm3, %ymm4 | |
356 ; CHECK-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 | |
357 ; CHECK-NEXT: vpsraw $2, %ymm3, %ymm4 | |
358 ; CHECK-NEXT: vpaddw %ymm2, %ymm2, %ymm2 | |
359 ; CHECK-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 | |
360 ; CHECK-NEXT: vpsraw $1, %ymm3, %ymm4 | |
361 ; CHECK-NEXT: vpaddw %ymm2, %ymm2, %ymm2 | |
362 ; CHECK-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2 | |
363 ; CHECK-NEXT: vpsrlw $8, %ymm2, %ymm2 | |
364 ; CHECK-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] | |
365 ; CHECK-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] | |
366 ; CHECK-NEXT: vpsraw $4, %ymm0, %ymm3 | |
367 ; CHECK-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 | |
368 ; CHECK-NEXT: vpsraw $2, %ymm0, %ymm3 | |
369 ; CHECK-NEXT: vpaddw %ymm1, %ymm1, %ymm1 | |
370 ; CHECK-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 | |
371 ; CHECK-NEXT: vpsraw $1, %ymm0, %ymm3 | |
372 ; CHECK-NEXT: vpaddw %ymm1, %ymm1, %ymm1 | |
373 ; CHECK-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 | |
374 ; CHECK-NEXT: vpsrlw $8, %ymm0, %ymm0 | |
375 ; CHECK-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 | |
376 ; CHECK-NEXT: retq | |
377 %ashr = ashr <32 x i8> %r, %a | |
378 ret <32 x i8> %ashr | |
379 } | |
380 | |
381 define <8 x i16> @lshr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind { | |
382 ; CHECK-LABEL: lshr_8i16 | |
383 ; CHECK: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero | |
384 ; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero | |
385 ; CHECK-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 | |
386 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero | |
387 ; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] | |
388 ; CHECK: retq | |
389 %lshr = lshr <8 x i16> %r, %a | |
390 ret <8 x i16> %lshr | |
391 } | |
392 | |
393 define <16 x i16> @lshr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind { | |
394 ; CHECK-LABEL: lshr_16i16 | |
395 ; CHECK: vpxor %ymm2, %ymm2, %ymm2 | |
396 ; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] | |
397 ; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15] | |
398 ; CHECK-NEXT: vpsrlvd %ymm3, %ymm4, %ymm3 | |
399 ; CHECK-NEXT: vpsrld $16, %ymm3, %ymm3 | |
400 ; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11] | |
401 ; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11] | |
402 ; CHECK-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 | |
403 ; CHECK-NEXT: vpsrld $16, %ymm0, %ymm0 | |
404 ; CHECK-NEXT: vpackusdw %ymm3, %ymm0, %ymm0 | |
405 ; CHECK-NEXT: retq | |
406 %lshr = lshr <16 x i16> %r, %a | |
407 ret <16 x i16> %lshr | |
408 } | |
409 | |
410 define <32 x i8> @lshr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind { | |
411 ; CHECK-LABEL: lshr_32i8 | |
412 ; CHECK: vpsllw $5, %ymm1, %ymm1 | |
413 ; CHECK-NEXT: vpsrlw $4, %ymm0, %ymm2 | |
414 ; CHECK-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 | |
415 ; CHECK-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 | |
416 ; CHECK-NEXT: vpsrlw $2, %ymm0, %ymm2 | |
417 ; CHECK-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 | |
418 ; CHECK-NEXT: vpaddb %ymm1, %ymm1, %ymm1 | |
419 ; CHECK-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 | |
420 ; CHECK-NEXT: vpsrlw $1, %ymm0, %ymm2 | |
421 ; CHECK-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 | |
422 ; CHECK-NEXT: vpaddb %ymm1, %ymm1, %ymm1 | |
423 ; CHECK-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 | |
424 ; CHECK-NEXT: retq | |
425 %lshr = lshr <32 x i8> %r, %a | |
426 ret <32 x i8> %lshr | |
427 } |