comparison test/CodeGen/X86/avx2-vector-shifts.ll @ 95:afa8332a0e37 LLVM3.8

LLVM 3.8
author Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
date Tue, 13 Oct 2015 17:48:58 +0900
parents 54457678186b
children 1172e4bd9c6f
comparison
equal deleted inserted replaced
84:f3e34b893a5f 95:afa8332a0e37
264 %and = and <4 x i64> %y, <i64 8, i64 8, i64 8, i64 8> 264 %and = and <4 x i64> %y, <i64 8, i64 8, i64 8, i64 8>
265 %trunc = trunc <4 x i64> %and to <4 x i32> 265 %trunc = trunc <4 x i64> %and to <4 x i32>
266 %sra = lshr <4 x i32> %x, %trunc 266 %sra = lshr <4 x i32> %x, %trunc
267 ret <4 x i32> %sra 267 ret <4 x i32> %sra
268 } 268 }
269
270 ;
271 ; Vectorized byte shifts
272 ;
273
274 define <8 x i16> @shl_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
275 ; CHECK-LABEL: shl_8i16
276 ; CHECK: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
277 ; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
278 ; CHECK-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
279 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
280 ; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
281 ; CHECK: retq
282 %shl = shl <8 x i16> %r, %a
283 ret <8 x i16> %shl
284 }
285
286 define <16 x i16> @shl_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
287 ; CHECK-LABEL: shl_16i16
288 ; CHECK: vpxor %ymm2, %ymm2, %ymm2
289 ; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
290 ; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
291 ; CHECK-NEXT: vpsllvd %ymm3, %ymm4, %ymm3
292 ; CHECK-NEXT: vpsrld $16, %ymm3, %ymm3
293 ; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
294 ; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
295 ; CHECK-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
296 ; CHECK-NEXT: vpsrld $16, %ymm0, %ymm0
297 ; CHECK-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
298 ; CHECK-NEXT: retq
299 %shl = shl <16 x i16> %r, %a
300 ret <16 x i16> %shl
301 }
302
303 define <32 x i8> @shl_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
304 ; CHECK-LABEL: shl_32i8
305 ; CHECK: vpsllw $5, %ymm1, %ymm1
306 ; CHECK-NEXT: vpsllw $4, %ymm0, %ymm2
307 ; CHECK-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
308 ; CHECK-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
309 ; CHECK-NEXT: vpsllw $2, %ymm0, %ymm2
310 ; CHECK-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
311 ; CHECK-NEXT: vpaddb %ymm1, %ymm1, %ymm1
312 ; CHECK-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
313 ; CHECK-NEXT: vpaddb %ymm0, %ymm0, %ymm2
314 ; CHECK-NEXT: vpaddb %ymm1, %ymm1, %ymm1
315 ; CHECK-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
316 ; CHECK-NEXT: retq
317 %shl = shl <32 x i8> %r, %a
318 ret <32 x i8> %shl
319 }
320
321 define <8 x i16> @ashr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
322 ; CHECK-LABEL: ashr_8i16
323 ; CHECK: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
324 ; CHECK-NEXT: vpmovsxwd %xmm0, %ymm0
325 ; CHECK-NEXT: vpsravd %ymm1, %ymm0, %ymm0
326 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
327 ; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
328 ; CHECK: retq
329 %ashr = ashr <8 x i16> %r, %a
330 ret <8 x i16> %ashr
331 }
332
333 define <16 x i16> @ashr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
334 ; CHECK-LABEL: ashr_16i16
335 ; CHECK: vpxor %ymm2, %ymm2, %ymm2
336 ; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
337 ; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
338 ; CHECK-NEXT: vpsravd %ymm3, %ymm4, %ymm3
339 ; CHECK-NEXT: vpsrld $16, %ymm3, %ymm3
340 ; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
341 ; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
342 ; CHECK-NEXT: vpsravd %ymm1, %ymm0, %ymm0
343 ; CHECK-NEXT: vpsrld $16, %ymm0, %ymm0
344 ; CHECK-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
345 ; CHECK-NEXT: retq
346 %ashr = ashr <16 x i16> %r, %a
347 ret <16 x i16> %ashr
348 }
349
350 define <32 x i8> @ashr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
351 ; CHECK-LABEL: ashr_32i8
352 ; CHECK: vpsllw $5, %ymm1, %ymm1
353 ; CHECK-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
354 ; CHECK-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
355 ; CHECK-NEXT: vpsraw $4, %ymm3, %ymm4
356 ; CHECK-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
357 ; CHECK-NEXT: vpsraw $2, %ymm3, %ymm4
358 ; CHECK-NEXT: vpaddw %ymm2, %ymm2, %ymm2
359 ; CHECK-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
360 ; CHECK-NEXT: vpsraw $1, %ymm3, %ymm4
361 ; CHECK-NEXT: vpaddw %ymm2, %ymm2, %ymm2
362 ; CHECK-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2
363 ; CHECK-NEXT: vpsrlw $8, %ymm2, %ymm2
364 ; CHECK-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
365 ; CHECK-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
366 ; CHECK-NEXT: vpsraw $4, %ymm0, %ymm3
367 ; CHECK-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
368 ; CHECK-NEXT: vpsraw $2, %ymm0, %ymm3
369 ; CHECK-NEXT: vpaddw %ymm1, %ymm1, %ymm1
370 ; CHECK-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
371 ; CHECK-NEXT: vpsraw $1, %ymm0, %ymm3
372 ; CHECK-NEXT: vpaddw %ymm1, %ymm1, %ymm1
373 ; CHECK-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
374 ; CHECK-NEXT: vpsrlw $8, %ymm0, %ymm0
375 ; CHECK-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
376 ; CHECK-NEXT: retq
377 %ashr = ashr <32 x i8> %r, %a
378 ret <32 x i8> %ashr
379 }
380
381 define <8 x i16> @lshr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
382 ; CHECK-LABEL: lshr_8i16
383 ; CHECK: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
384 ; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
385 ; CHECK-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
386 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
387 ; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
388 ; CHECK: retq
389 %lshr = lshr <8 x i16> %r, %a
390 ret <8 x i16> %lshr
391 }
392
393 define <16 x i16> @lshr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
394 ; CHECK-LABEL: lshr_16i16
395 ; CHECK: vpxor %ymm2, %ymm2, %ymm2
396 ; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
397 ; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
398 ; CHECK-NEXT: vpsrlvd %ymm3, %ymm4, %ymm3
399 ; CHECK-NEXT: vpsrld $16, %ymm3, %ymm3
400 ; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
401 ; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
402 ; CHECK-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
403 ; CHECK-NEXT: vpsrld $16, %ymm0, %ymm0
404 ; CHECK-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
405 ; CHECK-NEXT: retq
406 %lshr = lshr <16 x i16> %r, %a
407 ret <16 x i16> %lshr
408 }
409
410 define <32 x i8> @lshr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
411 ; CHECK-LABEL: lshr_32i8
412 ; CHECK: vpsllw $5, %ymm1, %ymm1
413 ; CHECK-NEXT: vpsrlw $4, %ymm0, %ymm2
414 ; CHECK-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
415 ; CHECK-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
416 ; CHECK-NEXT: vpsrlw $2, %ymm0, %ymm2
417 ; CHECK-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
418 ; CHECK-NEXT: vpaddb %ymm1, %ymm1, %ymm1
419 ; CHECK-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
420 ; CHECK-NEXT: vpsrlw $1, %ymm0, %ymm2
421 ; CHECK-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
422 ; CHECK-NEXT: vpaddb %ymm1, %ymm1, %ymm1
423 ; CHECK-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
424 ; CHECK-NEXT: retq
425 %lshr = lshr <32 x i8> %r, %a
426 ret <32 x i8> %lshr
427 }