Mercurial > hg > CbC > CbC_llvm
comparison test/CodeGen/X86/vector-shuffle-128-v16.ll @ 95:afa8332a0e37 LLVM3.8
LLVM 3.8
author | Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 13 Oct 2015 17:48:58 +0900 |
parents | 60c9769439b8 |
children | 7d135dc70f03 |
comparison
equal
deleted
inserted
replaced
84:f3e34b893a5f | 95:afa8332a0e37 |
---|---|
1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-legality | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 | 1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 |
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 -x86-experimental-vector-shuffle-legality | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3 | 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3 |
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 -x86-experimental-vector-shuffle-legality | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41 | 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41 |
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx -x86-experimental-vector-shuffle-legality | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 | 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 |
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 -x86-experimental-vector-shuffle-legality | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 | 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 |
6 | 6 |
7 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" | 7 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" |
8 target triple = "x86_64-unknown-unknown" | 8 target triple = "x86_64-unknown-unknown" |
9 | 9 |
10 define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i8> %a, <16 x i8> %b) { | 10 define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i8> %a, <16 x i8> %b) { |
245 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> | 245 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> |
246 ret <16 x i8> %shuffle | 246 ret <16 x i8> %shuffle |
247 } | 247 } |
248 | 248 |
249 define <16 x i8> @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07(<16 x i8> %a, <16 x i8> %b) { | 249 define <16 x i8> @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07(<16 x i8> %a, <16 x i8> %b) { |
250 ; SSE-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07: | 250 ; SSE2-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07: |
251 ; SSE: # BB#0: | 251 ; SSE2: # BB#0: |
252 ; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] | 252 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] |
253 ; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] | 253 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] |
254 ; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] | 254 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,3] |
255 ; SSE-NEXT: movdqa %xmm1, %xmm0 | 255 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] |
256 ; SSE-NEXT: retq | 256 ; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4] |
257 ; SSE2-NEXT: pand %xmm2, %xmm1 | |
258 ; SSE2-NEXT: pandn %xmm0, %xmm2 | |
259 ; SSE2-NEXT: por %xmm1, %xmm2 | |
260 ; SSE2-NEXT: movdqa %xmm2, %xmm0 | |
261 ; SSE2-NEXT: retq | |
262 ; | |
263 ; SSSE3-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07: | |
264 ; SSSE3: # BB#0: | |
265 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] | |
266 ; SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] | |
267 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] | |
268 ; SSSE3-NEXT: movdqa %xmm1, %xmm0 | |
269 ; SSSE3-NEXT: retq | |
270 ; | |
271 ; SSE41-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07: | |
272 ; SSE41: # BB#0: | |
273 ; SSE41-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] | |
274 ; SSE41-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] | |
275 ; SSE41-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] | |
276 ; SSE41-NEXT: movdqa %xmm1, %xmm0 | |
277 ; SSE41-NEXT: retq | |
257 ; | 278 ; |
258 ; AVX1-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07: | 279 ; AVX1-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07: |
259 ; AVX1: # BB#0: | 280 ; AVX1: # BB#0: |
260 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] | 281 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] |
261 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] | 282 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] |
316 ; SSE2-NEXT: packuswb %xmm1, %xmm0 | 337 ; SSE2-NEXT: packuswb %xmm1, %xmm0 |
317 ; SSE2-NEXT: retq | 338 ; SSE2-NEXT: retq |
318 ; | 339 ; |
319 ; SSSE3-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20: | 340 ; SSSE3-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20: |
320 ; SSSE3: # BB#0: | 341 ; SSSE3: # BB#0: |
321 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[3,2,1,0,7,6,5,4] | 342 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] |
322 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4],zero,zero,zero,zero,zero,zero,zero,zero | 343 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,4,2,0,14,12,10,8,7,5,3,1,15,13,11,9] |
323 ; SSSE3-NEXT: por %xmm1, %xmm0 | |
324 ; SSSE3-NEXT: retq | 344 ; SSSE3-NEXT: retq |
325 ; | 345 ; |
326 ; SSE41-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20: | 346 ; SSE41-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20: |
327 ; SSE41: # BB#0: | 347 ; SSE41: # BB#0: |
328 ; SSE41-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[3,2,1,0,7,6,5,4] | 348 ; SSE41-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] |
329 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4],zero,zero,zero,zero,zero,zero,zero,zero | 349 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,4,2,0,14,12,10,8,7,5,3,1,15,13,11,9] |
330 ; SSE41-NEXT: por %xmm1, %xmm0 | |
331 ; SSE41-NEXT: retq | 350 ; SSE41-NEXT: retq |
332 ; | 351 ; |
333 ; AVX-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20: | 352 ; AVX-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20: |
334 ; AVX: # BB#0: | 353 ; AVX: # BB#0: |
335 ; AVX-NEXT: vpshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[3,2,1,0,7,6,5,4] | 354 ; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] |
336 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4],zero,zero,zero,zero,zero,zero,zero,zero | 355 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,4,2,0,14,12,10,8,7,5,3,1,15,13,11,9] |
337 ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 | |
338 ; AVX-NEXT: retq | 356 ; AVX-NEXT: retq |
339 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 19, i32 18, i32 17, i32 16, i32 23, i32 22, i32 21, i32 20> | 357 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 19, i32 18, i32 17, i32 16, i32 23, i32 22, i32 21, i32 20> |
340 ret <16 x i8> %shuffle | 358 ret <16 x i8> %shuffle |
341 } | 359 } |
342 | 360 |
343 define <16 x i8> @shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20(<16 x i8> %a, <16 x i8> %b) { | 361 define <16 x i8> @shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20(<16 x i8> %a, <16 x i8> %b) { |
344 ; SSE2-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20: | 362 ; SSE2-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20: |
345 ; SSE2: # BB#0: | 363 ; SSE2: # BB#0: |
346 ; SSE2-NEXT: pxor %xmm2, %xmm2 | 364 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] |
347 ; SSE2-NEXT: movdqa %xmm1, %xmm3 | 365 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] |
348 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] | 366 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] |
349 ; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,7,6,5,4] | 367 ; SSE2-NEXT: pxor %xmm1, %xmm1 |
350 ; SSE2-NEXT: movdqa %xmm0, %xmm4 | 368 ; SSE2-NEXT: movdqa %xmm0, %xmm2 |
351 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm2[8],xmm4[9],xmm2[9],xmm4[10],xmm2[10],xmm4[11],xmm2[11],xmm4[12],xmm2[12],xmm4[13],xmm2[13],xmm4[14],xmm2[14],xmm4[15],xmm2[15] | 369 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] |
352 ; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[3,2,1,0,4,5,6,7] | 370 ; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm2[3,2,1,0,4,5,6,7] |
353 ; SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm4[0],xmm3[1] | 371 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] |
354 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] | 372 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] |
355 ; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,6,5,4] | 373 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7] |
356 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] | 374 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm1[0] |
357 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] | 375 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] |
358 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] | 376 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,2,3] |
359 ; SSE2-NEXT: packuswb %xmm3, %xmm1 | 377 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7] |
360 ; SSE2-NEXT: movdqa %xmm1, %xmm0 | 378 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] |
379 ; SSE2-NEXT: packuswb %xmm3, %xmm0 | |
361 ; SSE2-NEXT: retq | 380 ; SSE2-NEXT: retq |
362 ; | 381 ; |
363 ; SSSE3-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20: | 382 ; SSSE3-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20: |
364 ; SSSE3: # BB#0: | 383 ; SSSE3: # BB#0: |
365 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,xmm1[15,14,13,12],zero,zero,zero,zero,xmm1[7,6,5,4] | 384 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[15,14,13,12,7,6,5,4,u,u,u,u,u,u,u,u] |
366 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0],zero,zero,zero,zero,xmm0[11,10,9,8],zero,zero,zero,zero | 385 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,11,10,9,8,u,u,u,u,u,u,u,u] |
367 ; SSSE3-NEXT: por %xmm1, %xmm0 | 386 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] |
368 ; SSSE3-NEXT: retq | 387 ; SSSE3-NEXT: retq |
369 ; | 388 ; |
370 ; SSE41-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20: | 389 ; SSE41-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20: |
371 ; SSE41: # BB#0: | 390 ; SSE41: # BB#0: |
372 ; SSE41-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,xmm1[15,14,13,12],zero,zero,zero,zero,xmm1[7,6,5,4] | 391 ; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[15,14,13,12,7,6,5,4,u,u,u,u,u,u,u,u] |
373 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0],zero,zero,zero,zero,xmm0[11,10,9,8],zero,zero,zero,zero | 392 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,11,10,9,8,u,u,u,u,u,u,u,u] |
374 ; SSE41-NEXT: por %xmm1, %xmm0 | 393 ; SSE41-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] |
375 ; SSE41-NEXT: retq | 394 ; SSE41-NEXT: retq |
376 ; | 395 ; |
377 ; AVX-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20: | 396 ; AVX-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20: |
378 ; AVX: # BB#0: | 397 ; AVX: # BB#0: |
379 ; AVX-NEXT: vpshufb {{.*#+}} xmm1 = zero,zero,zero,zero,xmm1[15,14,13,12],zero,zero,zero,zero,xmm1[7,6,5,4] | 398 ; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,14,13,12,7,6,5,4,u,u,u,u,u,u,u,u] |
380 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0],zero,zero,zero,zero,xmm0[11,10,9,8],zero,zero,zero,zero | 399 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,11,10,9,8,u,u,u,u,u,u,u,u] |
381 ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 | 400 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] |
382 ; AVX-NEXT: retq | 401 ; AVX-NEXT: retq |
383 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 31, i32 30, i32 29, i32 28, i32 11, i32 10, i32 9, i32 8, i32 23, i32 22, i32 21, i32 20> | 402 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 31, i32 30, i32 29, i32 28, i32 11, i32 10, i32 9, i32 8, i32 23, i32 22, i32 21, i32 20> |
384 ret <16 x i8> %shuffle | 403 ret <16 x i8> %shuffle |
385 } | 404 } |
386 | 405 |
387 define <16 x i8> @shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31(<16 x i8> %a, <16 x i8> %b) { | 406 define <16 x i8> @shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31(<16 x i8> %a, <16 x i8> %b) { |
388 ; SSE2-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31: | 407 ; SSE2-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31: |
389 ; SSE2: # BB#0: | 408 ; SSE2: # BB#0: |
390 ; SSE2-NEXT: pxor %xmm2, %xmm2 | 409 ; SSE2-NEXT: movaps {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] |
391 ; SSE2-NEXT: movdqa %xmm1, %xmm3 | 410 ; SSE2-NEXT: andps %xmm2, %xmm0 |
392 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15] | 411 ; SSE2-NEXT: andnps %xmm1, %xmm2 |
393 ; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[3,1,2,3,4,5,6,7] | 412 ; SSE2-NEXT: orps %xmm2, %xmm0 |
394 ; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,7,5,6,7] | |
395 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] | |
396 ; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[1,0,3,2,4,5,6,7] | |
397 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] | |
398 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[3,1,2,3,4,5,6,7] | |
399 ; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,6,7] | |
400 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] | |
401 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[1,0,3,2,4,5,6,7] | |
402 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] | |
403 ; SSE2-NEXT: packuswb %xmm0, %xmm1 | |
404 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 | |
405 ; SSE2-NEXT: packuswb %xmm0, %xmm0 | |
406 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] | |
407 ; SSE2-NEXT: retq | 413 ; SSE2-NEXT: retq |
408 ; | 414 ; |
409 ; SSSE3-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31: | 415 ; SSSE3-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31: |
410 ; SSSE3: # BB#0: | 416 ; SSSE3: # BB#0: |
411 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u] | 417 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u] |
413 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] | 419 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] |
414 ; SSSE3-NEXT: retq | 420 ; SSSE3-NEXT: retq |
415 ; | 421 ; |
416 ; SSE41-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31: | 422 ; SSE41-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31: |
417 ; SSE41: # BB#0: | 423 ; SSE41: # BB#0: |
418 ; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u] | 424 ; SSE41-NEXT: movdqa %xmm0, %xmm2 |
419 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] | 425 ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] |
420 ; SSE41-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] | 426 ; SSE41-NEXT: pblendvb %xmm2, %xmm1 |
427 ; SSE41-NEXT: movdqa %xmm1, %xmm0 | |
421 ; SSE41-NEXT: retq | 428 ; SSE41-NEXT: retq |
422 ; | 429 ; |
423 ; AVX-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31: | 430 ; AVX-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31: |
424 ; AVX: # BB#0: | 431 ; AVX: # BB#0: |
425 ; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u] | 432 ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] |
426 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] | 433 ; AVX-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 |
427 ; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] | |
428 ; AVX-NEXT: retq | 434 ; AVX-NEXT: retq |
429 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> | 435 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> |
430 ret <16 x i8> %shuffle | 436 ret <16 x i8> %shuffle |
431 } | 437 } |
432 | 438 |
433 define <16 x i8> @shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31(<16 x i8> %a, <16 x i8> %b) { | 439 define <16 x i8> @shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31(<16 x i8> %a, <16 x i8> %b) { |
434 ; SSE2-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31: | 440 ; SSE2-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31: |
435 ; SSE2: # BB#0: | 441 ; SSE2: # BB#0: |
436 ; SSE2-NEXT: pxor %xmm2, %xmm2 | 442 ; SSE2-NEXT: movaps {{.*#+}} xmm2 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0] |
437 ; SSE2-NEXT: movdqa %xmm0, %xmm3 | 443 ; SSE2-NEXT: andps %xmm2, %xmm0 |
438 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15] | 444 ; SSE2-NEXT: andnps %xmm1, %xmm2 |
439 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] | 445 ; SSE2-NEXT: orps %xmm2, %xmm0 |
440 ; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[3,1,2,3,4,5,6,7] | |
441 ; SSE2-NEXT: movdqa %xmm0, %xmm4 | |
442 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7] | |
443 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3] | |
444 ; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[3,1,2,3,4,5,6,7] | |
445 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] | |
446 ; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm4[2,0,3,1,4,5,6,7] | |
447 ; SSE2-NEXT: packuswb %xmm0, %xmm3 | |
448 ; SSE2-NEXT: movdqa %xmm1, %xmm4 | |
449 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm2[8],xmm4[9],xmm2[9],xmm4[10],xmm2[10],xmm4[11],xmm2[11],xmm4[12],xmm2[12],xmm4[13],xmm2[13],xmm4[14],xmm2[14],xmm4[15],xmm2[15] | |
450 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[3,1,2,3] | |
451 ; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[1,1,2,3,4,5,6,7] | |
452 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] | |
453 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,1,2,3] | |
454 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[1,1,2,3,4,5,6,7] | |
455 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3] | |
456 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,3,2,3] | |
457 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,0,3,1,4,5,6,7] | |
458 ; SSE2-NEXT: packuswb %xmm0, %xmm1 | |
459 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] | |
460 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 | |
461 ; SSE2-NEXT: packuswb %xmm0, %xmm0 | |
462 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] | |
463 ; SSE2-NEXT: retq | 446 ; SSE2-NEXT: retq |
464 ; | 447 ; |
465 ; SSSE3-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31: | 448 ; SSSE3-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31: |
466 ; SSSE3: # BB#0: | 449 ; SSSE3: # BB#0: |
467 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[3,7,11,15,u,u,u,u,u,u,u,u,u,u,u,u] | 450 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[15] |
468 ; SSSE3-NEXT: movdqa %xmm0, %xmm2 | 451 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2],zero,xmm0[4,5,6],zero,xmm0[8,9,10],zero,xmm0[12,13,14],zero |
469 ; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = xmm2[1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u] | 452 ; SSSE3-NEXT: por %xmm1, %xmm0 |
470 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] | |
471 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] | |
472 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] | |
473 ; SSSE3-NEXT: retq | 453 ; SSSE3-NEXT: retq |
474 ; | 454 ; |
475 ; SSE41-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31: | 455 ; SSE41-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31: |
476 ; SSE41: # BB#0: | 456 ; SSE41: # BB#0: |
477 ; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[3,7,11,15,u,u,u,u,u,u,u,u,u,u,u,u] | |
478 ; SSE41-NEXT: movdqa %xmm0, %xmm2 | 457 ; SSE41-NEXT: movdqa %xmm0, %xmm2 |
479 ; SSE41-NEXT: pshufb {{.*#+}} xmm2 = xmm2[1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u] | 458 ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0] |
480 ; SSE41-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] | 459 ; SSE41-NEXT: pblendvb %xmm2, %xmm1 |
481 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] | 460 ; SSE41-NEXT: movdqa %xmm1, %xmm0 |
482 ; SSE41-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] | |
483 ; SSE41-NEXT: retq | 461 ; SSE41-NEXT: retq |
484 ; | 462 ; |
485 ; AVX-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31: | 463 ; AVX-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31: |
486 ; AVX: # BB#0: | 464 ; AVX: # BB#0: |
487 ; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[3,7,11,15,u,u,u,u,u,u,u,u,u,u,u,u] | 465 ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0] |
488 ; AVX-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u] | 466 ; AVX-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 |
489 ; AVX-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] | |
490 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] | |
491 ; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] | |
492 ; AVX-NEXT: retq | 467 ; AVX-NEXT: retq |
493 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 4, i32 5, i32 6, i32 23, i32 8, i32 9, i32 10, i32 27, i32 12, i32 13, i32 14, i32 31> | 468 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 4, i32 5, i32 6, i32 23, i32 8, i32 9, i32 10, i32 27, i32 12, i32 13, i32 14, i32 31> |
469 ret <16 x i8> %shuffle | |
470 } | |
471 | |
472 define <16 x i8> @shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz(<16 x i8> %a) { | |
473 ; SSE-LABEL: shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz: | |
474 ; SSE: # BB#0: | |
475 ; SSE-NEXT: andps {{.*}}(%rip), %xmm0 | |
476 ; SSE-NEXT: retq | |
477 ; | |
478 ; AVX-LABEL: shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz: | |
479 ; AVX: # BB#0: | |
480 ; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 | |
481 ; AVX-NEXT: retq | |
482 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 4, i32 5, i32 6, i32 23, i32 8, i32 9, i32 10, i32 27, i32 12, i32 13, i32 14, i32 31> | |
494 ret <16 x i8> %shuffle | 483 ret <16 x i8> %shuffle |
495 } | 484 } |
496 | 485 |
497 define <16 x i8> @shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31(<16 x i8> %a, <16 x i8> %b) { | 486 define <16 x i8> @shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31(<16 x i8> %a, <16 x i8> %b) { |
498 ; SSE2-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31: | 487 ; SSE2-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31: |
499 ; SSE2: # BB#0: | 488 ; SSE2: # BB#0: |
500 ; SSE2-NEXT: pxor %xmm2, %xmm2 | 489 ; SSE2-NEXT: movaps {{.*#+}} xmm2 = [255,255,255,255,0,255,255,0,255,255,255,255,0,255,255,0] |
501 ; SSE2-NEXT: movdqa %xmm1, %xmm3 | 490 ; SSE2-NEXT: andps %xmm2, %xmm0 |
502 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15] | 491 ; SSE2-NEXT: andnps %xmm1, %xmm2 |
503 ; SSE2-NEXT: movdqa %xmm0, %xmm4 | 492 ; SSE2-NEXT: orps %xmm2, %xmm0 |
504 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm2[8],xmm4[9],xmm2[9],xmm4[10],xmm2[10],xmm4[11],xmm2[11],xmm4[12],xmm2[12],xmm4[13],xmm2[13],xmm4[14],xmm2[14],xmm4[15],xmm2[15] | |
505 ; SSE2-NEXT: movdqa %xmm4, %xmm5 | |
506 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm3[4],xmm5[5],xmm3[5],xmm5[6],xmm3[6],xmm5[7],xmm3[7] | |
507 ; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm5[2,1,2,3,4,5,6,7] | |
508 ; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,7,6,7] | |
509 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] | |
510 ; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[1,0,2,3,4,5,6,7] | |
511 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm3[0] | |
512 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] | |
513 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] | |
514 ; SSE2-NEXT: movdqa %xmm0, %xmm2 | |
515 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] | |
516 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[2,1,2,3,4,5,6,7] | |
517 ; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,7,6,7] | |
518 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] | |
519 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[1,0,2,3,4,5,6,7] | |
520 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] | |
521 ; SSE2-NEXT: packuswb %xmm4, %xmm0 | |
522 ; SSE2-NEXT: retq | 493 ; SSE2-NEXT: retq |
523 ; | 494 ; |
524 ; SSSE3-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31: | 495 ; SSSE3-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31: |
525 ; SSSE3: # BB#0: | 496 ; SSSE3: # BB#0: |
526 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,xmm1[4],zero,zero,xmm1[7],zero,zero,zero,zero,xmm1[12],zero,zero,xmm1[15] | 497 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,xmm1[4],zero,zero,xmm1[7],zero,zero,zero,zero,xmm1[12],zero,zero,xmm1[15] |
546 } | 517 } |
547 | 518 |
548 define <16 x i8> @shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15(<16 x i8> %a, <16 x i8> %b) { | 519 define <16 x i8> @shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15(<16 x i8> %a, <16 x i8> %b) { |
549 ; SSE2-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15: | 520 ; SSE2-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15: |
550 ; SSE2: # BB#0: | 521 ; SSE2: # BB#0: |
551 ; SSE2-NEXT: pxor %xmm2, %xmm2 | 522 ; SSE2-NEXT: movaps {{.*#+}} xmm2 = [255,255,255,255,0,0,0,0,255,255,0,0,255,0,255,0] |
552 ; SSE2-NEXT: movdqa %xmm1, %xmm3 | 523 ; SSE2-NEXT: andps %xmm2, %xmm1 |
553 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15] | 524 ; SSE2-NEXT: andnps %xmm0, %xmm2 |
554 ; SSE2-NEXT: movdqa %xmm1, %xmm4 | 525 ; SSE2-NEXT: orps %xmm1, %xmm2 |
555 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7] | 526 ; SSE2-NEXT: movaps %xmm2, %xmm0 |
556 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] | |
557 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[3,1,2,1] | |
558 ; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[2,0,2,3,4,5,6,7] | |
559 ; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,7,5,6,7] | |
560 ; SSE2-NEXT: movdqa %xmm0, %xmm4 | |
561 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7] | |
562 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,2,3] | |
563 ; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[0,1,1,3,4,5,6,7] | |
564 ; SSE2-NEXT: movdqa %xmm0, %xmm5 | |
565 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm2[8],xmm5[9],xmm2[9],xmm5[10],xmm2[10],xmm5[11],xmm2[11],xmm5[12],xmm2[12],xmm5[13],xmm2[13],xmm5[14],xmm2[14],xmm5[15],xmm2[15] | |
566 ; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm5[0,1,2,3,7,5,6,7] | |
567 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,1,2,3] | |
568 ; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,3,1,0,4,5,6,7] | |
569 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm2[0] | |
570 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,2,3] | |
571 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm4 = xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] | |
572 ; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] | |
573 ; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm4[2,1,2,3,4,5,6,7] | |
574 ; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7] | |
575 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] | |
576 ; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,0,2,3,4,5,6,7] | |
577 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0] | |
578 ; SSE2-NEXT: packuswb %xmm0, %xmm3 | |
579 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] | |
580 ; SSE2-NEXT: pand %xmm2, %xmm1 | |
581 ; SSE2-NEXT: pand %xmm2, %xmm0 | |
582 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,2,3] | |
583 ; SSE2-NEXT: movdqa %xmm1, %xmm2 | |
584 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] | |
585 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] | |
586 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,3,2,3,4,5,6,7] | |
587 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] | |
588 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] | |
589 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0] | |
590 ; SSE2-NEXT: packuswb %xmm0, %xmm2 | |
591 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7] | |
592 ; SSE2-NEXT: movdqa %xmm2, %xmm0 | |
593 ; SSE2-NEXT: retq | 527 ; SSE2-NEXT: retq |
594 ; | 528 ; |
595 ; SSSE3-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15: | 529 ; SSSE3-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15: |
596 ; SSSE3: # BB#0: | 530 ; SSSE3: # BB#0: |
597 ; SSSE3-NEXT: movdqa %xmm1, %xmm2 | 531 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[4,5,6,7],zero,zero,xmm0[10,11],zero,xmm0[13],zero,xmm0[15] |
598 ; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = xmm2[1,3],zero,zero,xmm2[9],zero,zero,zero,xmm2[u,u,u,u,u,u,u,u] | 532 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,2,3],zero,zero,zero,zero,xmm1[8,9],zero,zero,xmm1[12],zero,xmm1[14],zero |
599 ; SSSE3-NEXT: movdqa %xmm0, %xmm3 | |
600 ; SSSE3-NEXT: pshufb {{.*#+}} xmm3 = zero,zero,xmm3[5,7],zero,xmm3[11,13,15,u,u,u,u,u,u,u,u] | |
601 ; SSSE3-NEXT: por %xmm2, %xmm3 | |
602 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,xmm0[4,6],zero,xmm0[10],zero,zero,xmm0[u,u,u,u,u,u,u,u] | |
603 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,2],zero,zero,xmm1[8],zero,xmm1[12,14,u,u,u,u,u,u,u,u] | |
604 ; SSSE3-NEXT: por %xmm1, %xmm0 | 533 ; SSSE3-NEXT: por %xmm1, %xmm0 |
605 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] | |
606 ; SSSE3-NEXT: retq | 534 ; SSSE3-NEXT: retq |
607 ; | 535 ; |
608 ; SSE41-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15: | 536 ; SSE41-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15: |
609 ; SSE41: # BB#0: | 537 ; SSE41: # BB#0: |
610 ; SSE41-NEXT: movdqa %xmm1, %xmm2 | 538 ; SSE41-NEXT: movdqa %xmm0, %xmm2 |
611 ; SSE41-NEXT: pshufb {{.*#+}} xmm2 = xmm2[1,3],zero,zero,xmm2[9],zero,zero,zero,xmm2[u,u,u,u,u,u,u,u] | 539 ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,255,255,255,0,0,0,0,255,255,0,0,255,0,255,0] |
612 ; SSE41-NEXT: movdqa %xmm0, %xmm3 | 540 ; SSE41-NEXT: pblendvb %xmm1, %xmm2 |
613 ; SSE41-NEXT: pshufb {{.*#+}} xmm3 = zero,zero,xmm3[5,7],zero,xmm3[11,13,15,u,u,u,u,u,u,u,u] | 541 ; SSE41-NEXT: movdqa %xmm2, %xmm0 |
614 ; SSE41-NEXT: por %xmm2, %xmm3 | |
615 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,xmm0[4,6],zero,xmm0[10],zero,zero,xmm0[u,u,u,u,u,u,u,u] | |
616 ; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,2],zero,zero,xmm1[8],zero,xmm1[12,14,u,u,u,u,u,u,u,u] | |
617 ; SSE41-NEXT: por %xmm1, %xmm0 | |
618 ; SSE41-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] | |
619 ; SSE41-NEXT: retq | 542 ; SSE41-NEXT: retq |
620 ; | 543 ; |
621 ; AVX-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15: | 544 ; AVX-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15: |
622 ; AVX: # BB#0: | 545 ; AVX: # BB#0: |
623 ; AVX-NEXT: vpshufb {{.*#+}} xmm2 = xmm1[1,3],zero,zero,xmm1[9],zero,zero,zero,xmm1[u,u,u,u,u,u,u,u] | 546 ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,255,0,0,0,0,255,255,0,0,255,0,255,0] |
624 ; AVX-NEXT: vpshufb {{.*#+}} xmm3 = zero,zero,xmm0[5,7],zero,xmm0[11,13,15,u,u,u,u,u,u,u,u] | 547 ; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 |
625 ; AVX-NEXT: vpor %xmm2, %xmm3, %xmm2 | |
626 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,xmm0[4,6],zero,xmm0[10],zero,zero,xmm0[u,u,u,u,u,u,u,u] | |
627 ; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,2],zero,zero,xmm1[8],zero,xmm1[12,14,u,u,u,u,u,u,u,u] | |
628 ; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0 | |
629 ; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] | |
630 ; AVX-NEXT: retq | 548 ; AVX-NEXT: retq |
631 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 10, i32 11, i32 28, i32 13, i32 30, i32 15> | 549 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 10, i32 11, i32 28, i32 13, i32 30, i32 15> |
632 ret <16 x i8> %shuffle | 550 ret <16 x i8> %shuffle |
633 } | 551 } |
634 | 552 |
705 | 623 |
706 define <16 x i8> @PR20540(<8 x i8> %a) { | 624 define <16 x i8> @PR20540(<8 x i8> %a) { |
707 ; SSE2-LABEL: PR20540: | 625 ; SSE2-LABEL: PR20540: |
708 ; SSE2: # BB#0: | 626 ; SSE2: # BB#0: |
709 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 | 627 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 |
710 ; SSE2-NEXT: pxor %xmm1, %xmm1 | 628 ; SSE2-NEXT: packuswb %xmm0, %xmm0 |
711 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,3] | 629 ; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero |
712 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] | |
713 ; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4] | |
714 ; SSE2-NEXT: packuswb %xmm1, %xmm0 | |
715 ; SSE2-NEXT: retq | 630 ; SSE2-NEXT: retq |
716 ; | 631 ; |
717 ; SSSE3-LABEL: PR20540: | 632 ; SSSE3-LABEL: PR20540: |
718 ; SSSE3: # BB#0: | 633 ; SSSE3: # BB#0: |
719 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero | 634 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero |
731 %shuffle = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> | 646 %shuffle = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> |
732 ret <16 x i8> %shuffle | 647 ret <16 x i8> %shuffle |
733 } | 648 } |
734 | 649 |
735 define <16 x i8> @shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) { | 650 define <16 x i8> @shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) { |
736 ; SSE2-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: | 651 ; SSE-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: |
737 ; SSE2: # BB#0: | 652 ; SSE: # BB#0: |
738 ; SSE2-NEXT: movzbl %dil, %eax | 653 ; SSE-NEXT: movzbl %dil, %eax |
739 ; SSE2-NEXT: movd %eax, %xmm0 | 654 ; SSE-NEXT: movd %eax, %xmm0 |
740 ; SSE2-NEXT: retq | 655 ; SSE-NEXT: retq |
741 ; | |
742 ; SSSE3-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: | |
743 ; SSSE3: # BB#0: | |
744 ; SSSE3-NEXT: movd %edi, %xmm0 | |
745 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero | |
746 ; SSSE3-NEXT: retq | |
747 ; | |
748 ; SSE41-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: | |
749 ; SSE41: # BB#0: | |
750 ; SSE41-NEXT: movd %edi, %xmm0 | |
751 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero | |
752 ; SSE41-NEXT: retq | |
753 ; | 656 ; |
754 ; AVX-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: | 657 ; AVX-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: |
755 ; AVX: # BB#0: | 658 ; AVX: # BB#0: |
756 ; AVX-NEXT: vmovd %edi, %xmm0 | 659 ; AVX-NEXT: movzbl %dil, %eax |
757 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero | 660 ; AVX-NEXT: vmovd %eax, %xmm0 |
758 ; AVX-NEXT: retq | 661 ; AVX-NEXT: retq |
759 %a = insertelement <16 x i8> undef, i8 %i, i32 0 | 662 %a = insertelement <16 x i8> undef, i8 %i, i32 0 |
760 %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> | 663 %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> |
761 ret <16 x i8> %shuffle | 664 ret <16 x i8> %shuffle |
762 } | 665 } |
763 | 666 |
764 define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) { | 667 define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) { |
765 ; SSE2-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: | 668 ; SSE2-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: |
766 ; SSE2: # BB#0: | 669 ; SSE2: # BB#0: |
767 ; SSE2-NEXT: movzbl %dil, %eax | 670 ; SSE2-NEXT: shll $8, %edi |
768 ; SSE2-NEXT: movd %eax, %xmm0 | 671 ; SSE2-NEXT: pxor %xmm0, %xmm0 |
769 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10] | 672 ; SSE2-NEXT: pinsrw $2, %edi, %xmm0 |
770 ; SSE2-NEXT: retq | 673 ; SSE2-NEXT: retq |
771 ; | 674 ; |
772 ; SSSE3-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: | 675 ; SSSE3-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: |
773 ; SSSE3: # BB#0: | 676 ; SSSE3: # BB#0: |
774 ; SSSE3-NEXT: movd %edi, %xmm0 | 677 ; SSSE3-NEXT: shll $8, %edi |
775 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero | 678 ; SSSE3-NEXT: pxor %xmm0, %xmm0 |
679 ; SSSE3-NEXT: pinsrw $2, %edi, %xmm0 | |
776 ; SSSE3-NEXT: retq | 680 ; SSSE3-NEXT: retq |
777 ; | 681 ; |
778 ; SSE41-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: | 682 ; SSE41-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: |
779 ; SSE41: # BB#0: | 683 ; SSE41: # BB#0: |
780 ; SSE41-NEXT: movd %edi, %xmm0 | 684 ; SSE41-NEXT: pxor %xmm0, %xmm0 |
781 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero | 685 ; SSE41-NEXT: pinsrb $5, %edi, %xmm0 |
782 ; SSE41-NEXT: retq | 686 ; SSE41-NEXT: retq |
783 ; | 687 ; |
784 ; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: | 688 ; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: |
785 ; AVX: # BB#0: | 689 ; AVX: # BB#0: |
786 ; AVX-NEXT: vmovd %edi, %xmm0 | 690 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0 |
787 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero | 691 ; AVX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0 |
788 ; AVX-NEXT: retq | 692 ; AVX-NEXT: retq |
789 %a = insertelement <16 x i8> undef, i8 %i, i32 0 | 693 %a = insertelement <16 x i8> undef, i8 %i, i32 0 |
790 %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> | 694 %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> |
791 ret <16 x i8> %shuffle | 695 ret <16 x i8> %shuffle |
792 } | 696 } |
793 | 697 |
794 define <16 x i8> @shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16(i8 %i) { | 698 define <16 x i8> @shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16(i8 %i) { |
795 ; SSE-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16: | 699 ; SSE2-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16: |
796 ; SSE: # BB#0: | 700 ; SSE2: # BB#0: |
797 ; SSE-NEXT: movd %edi, %xmm0 | 701 ; SSE2-NEXT: shll $8, %edi |
798 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0] | 702 ; SSE2-NEXT: pxor %xmm0, %xmm0 |
799 ; SSE-NEXT: retq | 703 ; SSE2-NEXT: pinsrw $7, %edi, %xmm0 |
704 ; SSE2-NEXT: retq | |
705 ; | |
706 ; SSSE3-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16: | |
707 ; SSSE3: # BB#0: | |
708 ; SSSE3-NEXT: shll $8, %edi | |
709 ; SSSE3-NEXT: pxor %xmm0, %xmm0 | |
710 ; SSSE3-NEXT: pinsrw $7, %edi, %xmm0 | |
711 ; SSSE3-NEXT: retq | |
712 ; | |
713 ; SSE41-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16: | |
714 ; SSE41: # BB#0: | |
715 ; SSE41-NEXT: pxor %xmm0, %xmm0 | |
716 ; SSE41-NEXT: pinsrb $15, %edi, %xmm0 | |
717 ; SSE41-NEXT: retq | |
800 ; | 718 ; |
801 ; AVX-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16: | 719 ; AVX-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16: |
802 ; AVX: # BB#0: | 720 ; AVX: # BB#0: |
803 ; AVX-NEXT: vmovd %edi, %xmm0 | 721 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0 |
804 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0] | 722 ; AVX-NEXT: vpinsrb $15, %edi, %xmm0, %xmm0 |
805 ; AVX-NEXT: retq | 723 ; AVX-NEXT: retq |
806 %a = insertelement <16 x i8> undef, i8 %i, i32 0 | 724 %a = insertelement <16 x i8> undef, i8 %i, i32 0 |
807 %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 16> | 725 %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 16> |
808 ret <16 x i8> %shuffle | 726 ret <16 x i8> %shuffle |
809 } | 727 } |
810 | 728 |
811 define <16 x i8> @shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) { | 729 define <16 x i8> @shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) { |
812 ; SSE2-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: | 730 ; SSE2-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: |
813 ; SSE2: # BB#0: | 731 ; SSE2: # BB#0: |
814 ; SSE2-NEXT: movzbl %dil, %eax | 732 ; SSE2-NEXT: movzbl %dil, %eax |
815 ; SSE2-NEXT: movd %eax, %xmm0 | 733 ; SSE2-NEXT: pxor %xmm0, %xmm0 |
816 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] | 734 ; SSE2-NEXT: pinsrw $1, %eax, %xmm0 |
817 ; SSE2-NEXT: retq | 735 ; SSE2-NEXT: retq |
818 ; | 736 ; |
819 ; SSSE3-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: | 737 ; SSSE3-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: |
820 ; SSSE3: # BB#0: | 738 ; SSSE3: # BB#0: |
821 ; SSSE3-NEXT: movd %edi, %xmm0 | 739 ; SSSE3-NEXT: movzbl %dil, %eax |
822 ; SSSE3-NEXT: pslld $24, %xmm0 | 740 ; SSSE3-NEXT: pxor %xmm0, %xmm0 |
823 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero | 741 ; SSSE3-NEXT: pinsrw $1, %eax, %xmm0 |
824 ; SSSE3-NEXT: retq | 742 ; SSSE3-NEXT: retq |
825 ; | 743 ; |
826 ; SSE41-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: | 744 ; SSE41-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: |
827 ; SSE41: # BB#0: | 745 ; SSE41: # BB#0: |
828 ; SSE41-NEXT: movd %edi, %xmm0 | 746 ; SSE41-NEXT: pxor %xmm0, %xmm0 |
829 ; SSE41-NEXT: pslld $24, %xmm0 | 747 ; SSE41-NEXT: pinsrb $2, %edi, %xmm0 |
830 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero | |
831 ; SSE41-NEXT: retq | 748 ; SSE41-NEXT: retq |
832 ; | 749 ; |
833 ; AVX-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: | 750 ; AVX-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: |
834 ; AVX: # BB#0: | 751 ; AVX: # BB#0: |
835 ; AVX-NEXT: vmovd %edi, %xmm0 | 752 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0 |
836 ; AVX-NEXT: vpslld $24, %xmm0, %xmm0 | 753 ; AVX-NEXT: vpinsrb $2, %edi, %xmm0, %xmm0 |
837 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero | |
838 ; AVX-NEXT: retq | 754 ; AVX-NEXT: retq |
839 %a = insertelement <16 x i8> undef, i8 %i, i32 3 | 755 %a = insertelement <16 x i8> undef, i8 %i, i32 3 |
840 %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 1, i32 19, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> | 756 %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 1, i32 19, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> |
841 ret <16 x i8> %shuffle | 757 ret <16 x i8> %shuffle |
842 } | 758 } |
1192 ; SSE2-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00: | 1108 ; SSE2-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00: |
1193 ; SSE2: # BB#0: # %entry | 1109 ; SSE2: # BB#0: # %entry |
1194 ; SSE2-NEXT: pxor %xmm2, %xmm2 | 1110 ; SSE2-NEXT: pxor %xmm2, %xmm2 |
1195 ; SSE2-NEXT: movdqa %xmm0, %xmm3 | 1111 ; SSE2-NEXT: movdqa %xmm0, %xmm3 |
1196 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15] | 1112 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15] |
1197 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[2,3,0,1] | 1113 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,3,0,1] |
1114 ; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[0,1,2,2,4,5,6,7] | |
1115 ; SSE2-NEXT: pshufhw {{.*#+}} xmm4 = xmm4[0,1,2,3,4,5,7,7] | |
1116 ; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [65535,65535,65535,0,65535,0,0,65535] | |
1198 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] | 1117 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] |
1199 ; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7] | 1118 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,3] |
1200 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,3,2,3] | 1119 ; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,3,1,3,4,5,6,7] |
1201 ; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,0,3,3,4,5,6,7] | 1120 ; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6,4] |
1202 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3] | 1121 ; SSE2-NEXT: pand %xmm5, %xmm2 |
1203 ; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7] | 1122 ; SSE2-NEXT: pandn %xmm4, %xmm5 |
1204 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] | 1123 ; SSE2-NEXT: por %xmm2, %xmm5 |
1205 ; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[2,0,3,1,4,5,6,7] | 1124 ; SSE2-NEXT: psrlq $16, %xmm3 |
1206 ; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm3[2,1,2,3,4,5,6,7] | 1125 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3] |
1207 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,3,2,3] | 1126 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,1,3] |
1208 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3] | 1127 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7] |
1209 ; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[0,2,2,3,4,5,6,7] | 1128 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,4] |
1210 ; SSE2-NEXT: pshufhw {{.*#+}} xmm4 = xmm4[0,1,2,3,4,7,6,7] | 1129 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] |
1211 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3] | 1130 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] |
1212 ; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[0,2,3,1,4,5,6,7] | 1131 ; SSE2-NEXT: packuswb %xmm5, %xmm2 |
1213 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3] | 1132 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [255,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255] |
1214 ; SSE2-NEXT: packuswb %xmm0, %xmm4 | 1133 ; SSE2-NEXT: pand %xmm0, %xmm2 |
1215 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,1,2,3,4,5,6,7] | 1134 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,3] |
1216 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3] | 1135 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,3,3,4,5,6,7] |
1217 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,3,3,4,5,6,7] | 1136 ; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,5,7] |
1218 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] | 1137 ; SSE2-NEXT: pandn %xmm1, %xmm0 |
1219 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] | 1138 ; SSE2-NEXT: por %xmm2, %xmm0 |
1220 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7] | |
1221 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] | |
1222 ; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[0,2,1,3,4,5,6,7] | |
1223 ; SSE2-NEXT: packuswb %xmm0, %xmm2 | |
1224 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] | |
1225 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,3] | |
1226 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] | |
1227 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] | |
1228 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] | |
1229 ; SSE2-NEXT: retq | 1139 ; SSE2-NEXT: retq |
1230 ; | 1140 ; |
1231 ; SSSE3-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00: | 1141 ; SSSE3-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00: |
1232 ; SSSE3: # BB#0: # %entry | 1142 ; SSSE3: # BB#0: # %entry |
1233 ; SSSE3-NEXT: movdqa %xmm0, %xmm2 | 1143 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[2],zero,zero,zero |
1234 ; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = xmm2[2,7,1,11,u,u,u,u,u,u,u,u,u,u,u,u] | 1144 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm0[9,11,0] |
1235 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[6,6,2,2,2,2,3,3,4,4,5,5,6,6,7,7] | 1145 ; SSSE3-NEXT: por %xmm1, %xmm0 |
1236 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] | |
1237 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,7,14,2,3,14,9,0,u,u,u,u,u,u,u,u] | |
1238 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] | |
1239 ; SSSE3-NEXT: movdqa %xmm1, %xmm0 | |
1240 ; SSSE3-NEXT: retq | 1146 ; SSSE3-NEXT: retq |
1241 ; | 1147 ; |
1242 ; SSE41-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00: | 1148 ; SSE41-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00: |
1243 ; SSE41: # BB#0: # %entry | 1149 ; SSE41: # BB#0: # %entry |
1244 ; SSE41-NEXT: movdqa %xmm0, %xmm2 | 1150 ; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[2],zero,zero,zero |
1245 ; SSE41-NEXT: pshufb {{.*#+}} xmm2 = xmm2[2,7,1,11,u,u,u,u,u,u,u,u,u,u,u,u] | 1151 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm0[9,11,0] |
1246 ; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[6,6,2,2,2,2,3,3,4,4,5,5,6,6,7,7] | 1152 ; SSE41-NEXT: por %xmm1, %xmm0 |
1247 ; SSE41-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] | |
1248 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,7,14,2,3,14,9,0,u,u,u,u,u,u,u,u] | |
1249 ; SSE41-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] | |
1250 ; SSE41-NEXT: movdqa %xmm1, %xmm0 | |
1251 ; SSE41-NEXT: retq | 1153 ; SSE41-NEXT: retq |
1252 ; | 1154 ; |
1253 ; AVX-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00: | 1155 ; AVX-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00: |
1254 ; AVX: # BB#0: # %entry | 1156 ; AVX: # BB#0: # %entry |
1255 ; AVX-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[2,7,1,11,u,u,u,u,u,u,u,u,u,u,u,u] | 1157 ; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[2],zero,zero,zero |
1256 ; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[6,6,2,2,2,2,3,3,4,4,5,5,6,6,7,7] | 1158 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm0[9,11,0] |
1257 ; AVX-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] | 1159 ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 |
1258 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,7,14,2,3,14,9,0,u,u,u,u,u,u,u,u] | |
1259 ; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] | |
1260 ; AVX-NEXT: retq | 1160 ; AVX-NEXT: retq |
1261 entry: | 1161 entry: |
1262 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 undef, i32 10, i32 2, i32 7, i32 22, i32 14, i32 7, i32 2, i32 18, i32 3, i32 1, i32 14, i32 18, i32 9, i32 11, i32 0> | 1162 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 undef, i32 10, i32 2, i32 7, i32 22, i32 14, i32 7, i32 2, i32 18, i32 3, i32 1, i32 14, i32 18, i32 9, i32 11, i32 0> |
1263 | 1163 |
1264 ret <16 x i8> %shuffle | 1164 ret <16 x i8> %shuffle |
1410 ; SSE2-NEXT: packuswb %xmm1, %xmm0 | 1310 ; SSE2-NEXT: packuswb %xmm1, %xmm0 |
1411 ; SSE2-NEXT: retq | 1311 ; SSE2-NEXT: retq |
1412 ; | 1312 ; |
1413 ; SSSE3-LABEL: PR12412: | 1313 ; SSSE3-LABEL: PR12412: |
1414 ; SSSE3: # BB#0: # %entry | 1314 ; SSSE3: # BB#0: # %entry |
1415 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,2,4,6,8,10,12,14] | 1315 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> |
1416 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero | 1316 ; SSSE3-NEXT: pshufb %xmm2, %xmm1 |
1417 ; SSSE3-NEXT: por %xmm1, %xmm0 | 1317 ; SSSE3-NEXT: pshufb %xmm2, %xmm0 |
1318 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] | |
1418 ; SSSE3-NEXT: retq | 1319 ; SSSE3-NEXT: retq |
1419 ; | 1320 ; |
1420 ; SSE41-LABEL: PR12412: | 1321 ; SSE41-LABEL: PR12412: |
1421 ; SSE41: # BB#0: # %entry | 1322 ; SSE41: # BB#0: # %entry |
1422 ; SSE41-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,2,4,6,8,10,12,14] | 1323 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> |
1423 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero | 1324 ; SSE41-NEXT: pshufb %xmm2, %xmm1 |
1424 ; SSE41-NEXT: por %xmm1, %xmm0 | 1325 ; SSE41-NEXT: pshufb %xmm2, %xmm0 |
1326 ; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] | |
1425 ; SSE41-NEXT: retq | 1327 ; SSE41-NEXT: retq |
1426 ; | 1328 ; |
1427 ; AVX-LABEL: PR12412: | 1329 ; AVX-LABEL: PR12412: |
1428 ; AVX: # BB#0: # %entry | 1330 ; AVX: # BB#0: # %entry |
1429 ; AVX-NEXT: vpshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,2,4,6,8,10,12,14] | 1331 ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> |
1430 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero | 1332 ; AVX-NEXT: vpshufb %xmm2, %xmm1, %xmm1 |
1431 ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 | 1333 ; AVX-NEXT: vpshufb %xmm2, %xmm0, %xmm0 |
1334 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] | |
1432 ; AVX-NEXT: retq | 1335 ; AVX-NEXT: retq |
1433 entry: | 1336 entry: |
1434 %0 = shufflevector <16 x i8> %inval1, <16 x i8> %inval2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> | 1337 %0 = shufflevector <16 x i8> %inval1, <16 x i8> %inval2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> |
1435 ret <16 x i8> %0 | 1338 ret <16 x i8> %0 |
1436 } | 1339 } |
1340 | |
1341 define <16 x i8> @shuffle_v16i8_uu_02_03_zz_uu_06_07_zz_uu_10_11_zz_uu_14_15_zz(<16 x i8> %a) { | |
1342 ; SSE-LABEL: shuffle_v16i8_uu_02_03_zz_uu_06_07_zz_uu_10_11_zz_uu_14_15_zz: | |
1343 ; SSE: # BB#0: | |
1344 ; SSE-NEXT: psrld $8, %xmm0 | |
1345 ; SSE-NEXT: retq | |
1346 ; | |
1347 ; AVX-LABEL: shuffle_v16i8_uu_02_03_zz_uu_06_07_zz_uu_10_11_zz_uu_14_15_zz: | |
1348 ; AVX: # BB#0: | |
1349 ; AVX-NEXT: vpsrld $8, %xmm0, %xmm0 | |
1350 ; AVX-NEXT: retq | |
1351 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 undef, i32 2, i32 3, i32 16, i32 undef, i32 6, i32 7, i32 16, i32 undef, i32 10, i32 11, i32 16, i32 undef, i32 14, i32 15, i32 16> | |
1352 ret <16 x i8> %shuffle | |
1353 } | |
1354 | |
1355 define <16 x i8> @shuffle_v16i8_bitcast_unpack(<16 x i8> %a, <16 x i8> %b) { | |
1356 ; SSE-LABEL: shuffle_v16i8_bitcast_unpack: | |
1357 ; SSE: # BB#0: | |
1358 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] | |
1359 ; SSE-NEXT: retq | |
1360 ; | |
1361 ; AVX-LABEL: shuffle_v16i8_bitcast_unpack: | |
1362 ; AVX: # BB#0: | |
1363 ; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] | |
1364 ; AVX-NEXT: retq | |
1365 %shuffle8 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 7, i32 23, i32 6, i32 22, i32 5, i32 21, i32 4, i32 20, i32 3, i32 19, i32 2, i32 18, i32 1, i32 17, i32 0, i32 16> | |
1366 %bitcast32 = bitcast <16 x i8> %shuffle8 to <4 x float> | |
1367 %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> | |
1368 %bitcast16 = bitcast <4 x float> %shuffle32 to <8 x i16> | |
1369 %shuffle16 = shufflevector <8 x i16> %bitcast16, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> | |
1370 %bitcast8 = bitcast <8 x i16> %shuffle16 to <16 x i8> | |
1371 ret <16 x i8> %bitcast8 | |
1372 } | |
1373 | |
1374 define <16 x i8> @insert_dup_mem_v16i8_i32(i32* %ptr) { | |
1375 ; SSE2-LABEL: insert_dup_mem_v16i8_i32: | |
1376 ; SSE2: # BB#0: | |
1377 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero | |
1378 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] | |
1379 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] | |
1380 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] | |
1381 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] | |
1382 ; SSE2-NEXT: retq | |
1383 ; | |
1384 ; SSSE3-LABEL: insert_dup_mem_v16i8_i32: | |
1385 ; SSSE3: # BB#0: | |
1386 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero | |
1387 ; SSSE3-NEXT: pxor %xmm1, %xmm1 | |
1388 ; SSSE3-NEXT: pshufb %xmm1, %xmm0 | |
1389 ; SSSE3-NEXT: retq | |
1390 ; | |
1391 ; SSE41-LABEL: insert_dup_mem_v16i8_i32: | |
1392 ; SSE41: # BB#0: | |
1393 ; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero | |
1394 ; SSE41-NEXT: pxor %xmm1, %xmm1 | |
1395 ; SSE41-NEXT: pshufb %xmm1, %xmm0 | |
1396 ; SSE41-NEXT: retq | |
1397 ; | |
1398 ; AVX1-LABEL: insert_dup_mem_v16i8_i32: | |
1399 ; AVX1: # BB#0: | |
1400 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero | |
1401 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 | |
1402 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 | |
1403 ; AVX1-NEXT: retq | |
1404 ; | |
1405 ; AVX2-LABEL: insert_dup_mem_v16i8_i32: | |
1406 ; AVX2: # BB#0: | |
1407 ; AVX2-NEXT: vpbroadcastb (%rdi), %xmm0 | |
1408 ; AVX2-NEXT: retq | |
1409 %tmp = load i32, i32* %ptr, align 4 | |
1410 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 | |
1411 %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8> | |
1412 %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <16 x i32> zeroinitializer | |
1413 ret <16 x i8> %tmp3 | |
1414 } | |
1415 | |
1416 define <16 x i8> @insert_dup_mem_v16i8_sext_i8(i8* %ptr) { | |
1417 ; SSE2-LABEL: insert_dup_mem_v16i8_sext_i8: | |
1418 ; SSE2: # BB#0: | |
1419 ; SSE2-NEXT: movsbl (%rdi), %eax | |
1420 ; SSE2-NEXT: movd %eax, %xmm0 | |
1421 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] | |
1422 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] | |
1423 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] | |
1424 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] | |
1425 ; SSE2-NEXT: retq | |
1426 ; | |
1427 ; SSSE3-LABEL: insert_dup_mem_v16i8_sext_i8: | |
1428 ; SSSE3: # BB#0: | |
1429 ; SSSE3-NEXT: movsbl (%rdi), %eax | |
1430 ; SSSE3-NEXT: movd %eax, %xmm0 | |
1431 ; SSSE3-NEXT: pxor %xmm1, %xmm1 | |
1432 ; SSSE3-NEXT: pshufb %xmm1, %xmm0 | |
1433 ; SSSE3-NEXT: retq | |
1434 ; | |
1435 ; SSE41-LABEL: insert_dup_mem_v16i8_sext_i8: | |
1436 ; SSE41: # BB#0: | |
1437 ; SSE41-NEXT: movsbl (%rdi), %eax | |
1438 ; SSE41-NEXT: movd %eax, %xmm0 | |
1439 ; SSE41-NEXT: pxor %xmm1, %xmm1 | |
1440 ; SSE41-NEXT: pshufb %xmm1, %xmm0 | |
1441 ; SSE41-NEXT: retq | |
1442 ; | |
1443 ; AVX1-LABEL: insert_dup_mem_v16i8_sext_i8: | |
1444 ; AVX1: # BB#0: | |
1445 ; AVX1-NEXT: movsbl (%rdi), %eax | |
1446 ; AVX1-NEXT: vmovd %eax, %xmm0 | |
1447 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 | |
1448 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 | |
1449 ; AVX1-NEXT: retq | |
1450 ; | |
1451 ; AVX2-LABEL: insert_dup_mem_v16i8_sext_i8: | |
1452 ; AVX2: # BB#0: | |
1453 ; AVX2-NEXT: vpbroadcastb (%rdi), %xmm0 | |
1454 ; AVX2-NEXT: retq | |
1455 %tmp = load i8, i8* %ptr, align 1 | |
1456 %tmp1 = sext i8 %tmp to i32 | |
1457 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0 | |
1458 %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8> | |
1459 %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> undef, <16 x i32> zeroinitializer | |
1460 ret <16 x i8> %tmp4 | |
1461 } |