Mercurial > hg > CbC > CbC_llvm
comparison llvm/test/Transforms/CodeGenPrepare/X86/vec-shift.ll @ 173:0572611fdcc8 llvm10 llvm12
reorgnization done
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 25 May 2020 11:55:54 +0900 |
parents | 1d019706d866 |
children | 2e18cbf3894f |
comparison
equal
deleted
inserted
replaced
172:9fbae9c8bf63 | 173:0572611fdcc8 |
---|---|
1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py | 1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
2 ; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=avx -S < %s | FileCheck %s --check-prefixes=ALL,AVX | 2 ; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx -S < %s | FileCheck %s --check-prefixes=ALL,AVX,AVX1 |
3 ; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=avx2 -S < %s | FileCheck %s --check-prefixes=ALL,AVX2 | 3 ; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2 -S < %s | FileCheck %s --check-prefixes=ALL,AVX,AVX2 |
4 ; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=avx -S -enable-debugify < %s 2>&1 | FileCheck %s -check-prefix=DEBUG | 4 ; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx512bw -S < %s | FileCheck %s --check-prefixes=ALL,AVX,AVX512BW |
5 | 5 ; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx,+xop -S < %s | FileCheck %s --check-prefixes=ALL,XOP,XOPAVX1 |
6 define <4 x i32> @vector_variable_shift_right(<4 x i1> %cond, <4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { | 6 ; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2,+xop -S < %s | FileCheck %s --check-prefixes=ALL,XOP,XOPAVX2 |
7 ; AVX-LABEL: @vector_variable_shift_right( | 7 ; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx -S -enable-debugify < %s 2>&1 | FileCheck %s -check-prefix=DEBUG |
8 ; AVX-NEXT: [[SPLAT1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer | 8 |
9 ; AVX-NEXT: [[SPLAT2:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer | 9 define <4 x i32> @vector_variable_shift_right_v4i32(<4 x i1> %cond, <4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { |
10 ; AVX-NEXT: [[SEL:%.*]] = select <4 x i1> [[COND:%.*]], <4 x i32> [[SPLAT1]], <4 x i32> [[SPLAT2]] | 10 ; AVX1-LABEL: @vector_variable_shift_right_v4i32( |
11 ; AVX-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[Z:%.*]], [[SPLAT1]] | 11 ; AVX1-NEXT: [[SPLAT1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer |
12 ; AVX-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[Z]], [[SPLAT2]] | 12 ; AVX1-NEXT: [[SPLAT2:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer |
13 ; AVX-NEXT: [[TMP3:%.*]] = select <4 x i1> [[COND]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]] | 13 ; AVX1-NEXT: [[SEL:%.*]] = select <4 x i1> [[COND:%.*]], <4 x i32> [[SPLAT1]], <4 x i32> [[SPLAT2]] |
14 ; AVX-NEXT: ret <4 x i32> [[TMP3]] | 14 ; AVX1-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[Z:%.*]], [[SPLAT1]] |
15 ; | 15 ; AVX1-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[Z]], [[SPLAT2]] |
16 ; AVX2-LABEL: @vector_variable_shift_right( | 16 ; AVX1-NEXT: [[TMP3:%.*]] = select <4 x i1> [[COND]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]] |
17 ; AVX1-NEXT: ret <4 x i32> [[TMP3]] | |
18 ; | |
19 ; AVX2-LABEL: @vector_variable_shift_right_v4i32( | |
17 ; AVX2-NEXT: [[SPLAT1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer | 20 ; AVX2-NEXT: [[SPLAT1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer |
18 ; AVX2-NEXT: [[SPLAT2:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer | 21 ; AVX2-NEXT: [[SPLAT2:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer |
19 ; AVX2-NEXT: [[SEL:%.*]] = select <4 x i1> [[COND:%.*]], <4 x i32> [[SPLAT1]], <4 x i32> [[SPLAT2]] | 22 ; AVX2-NEXT: [[SEL:%.*]] = select <4 x i1> [[COND:%.*]], <4 x i32> [[SPLAT1]], <4 x i32> [[SPLAT2]] |
20 ; AVX2-NEXT: [[SH:%.*]] = lshr <4 x i32> [[Z:%.*]], [[SEL]] | 23 ; AVX2-NEXT: [[SH:%.*]] = lshr <4 x i32> [[Z:%.*]], [[SEL]] |
21 ; AVX2-NEXT: ret <4 x i32> [[SH]] | 24 ; AVX2-NEXT: ret <4 x i32> [[SH]] |
25 ; | |
26 ; AVX512BW-LABEL: @vector_variable_shift_right_v4i32( | |
27 ; AVX512BW-NEXT: [[SPLAT1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer | |
28 ; AVX512BW-NEXT: [[SPLAT2:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer | |
29 ; AVX512BW-NEXT: [[SEL:%.*]] = select <4 x i1> [[COND:%.*]], <4 x i32> [[SPLAT1]], <4 x i32> [[SPLAT2]] | |
30 ; AVX512BW-NEXT: [[SH:%.*]] = lshr <4 x i32> [[Z:%.*]], [[SEL]] | |
31 ; AVX512BW-NEXT: ret <4 x i32> [[SH]] | |
32 ; | |
33 ; XOP-LABEL: @vector_variable_shift_right_v4i32( | |
34 ; XOP-NEXT: [[SPLAT1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer | |
35 ; XOP-NEXT: [[SPLAT2:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer | |
36 ; XOP-NEXT: [[SEL:%.*]] = select <4 x i1> [[COND:%.*]], <4 x i32> [[SPLAT1]], <4 x i32> [[SPLAT2]] | |
37 ; XOP-NEXT: [[SH:%.*]] = lshr <4 x i32> [[Z:%.*]], [[SEL]] | |
38 ; XOP-NEXT: ret <4 x i32> [[SH]] | |
22 ; | 39 ; |
23 %splat1 = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> zeroinitializer | 40 %splat1 = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> zeroinitializer |
24 %splat2 = shufflevector <4 x i32> %y, <4 x i32> undef, <4 x i32> zeroinitializer | 41 %splat2 = shufflevector <4 x i32> %y, <4 x i32> undef, <4 x i32> zeroinitializer |
25 %sel = select <4 x i1> %cond, <4 x i32> %splat1, <4 x i32> %splat2 | 42 %sel = select <4 x i1> %cond, <4 x i32> %splat1, <4 x i32> %splat2 |
26 %sh = lshr <4 x i32> %z, %sel | 43 %sh = lshr <4 x i32> %z, %sel |
27 ret <4 x i32> %sh | 44 ret <4 x i32> %sh |
28 } | 45 } |
29 | 46 |
47 define <16 x i16> @vector_variable_shift_right_v16i16(<16 x i1> %cond, <16 x i16> %x, <16 x i16> %y, <16 x i16> %z) { | |
48 ; AVX1-LABEL: @vector_variable_shift_right_v16i16( | |
49 ; AVX1-NEXT: [[SPLAT1:%.*]] = shufflevector <16 x i16> [[X:%.*]], <16 x i16> undef, <16 x i32> zeroinitializer | |
50 ; AVX1-NEXT: [[SPLAT2:%.*]] = shufflevector <16 x i16> [[Y:%.*]], <16 x i16> undef, <16 x i32> zeroinitializer | |
51 ; AVX1-NEXT: [[SEL:%.*]] = select <16 x i1> [[COND:%.*]], <16 x i16> [[SPLAT1]], <16 x i16> [[SPLAT2]] | |
52 ; AVX1-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[Z:%.*]], [[SPLAT1]] | |
53 ; AVX1-NEXT: [[TMP2:%.*]] = lshr <16 x i16> [[Z]], [[SPLAT2]] | |
54 ; AVX1-NEXT: [[TMP3:%.*]] = select <16 x i1> [[COND]], <16 x i16> [[TMP1]], <16 x i16> [[TMP2]] | |
55 ; AVX1-NEXT: ret <16 x i16> [[TMP3]] | |
56 ; | |
57 ; AVX2-LABEL: @vector_variable_shift_right_v16i16( | |
58 ; AVX2-NEXT: [[SPLAT1:%.*]] = shufflevector <16 x i16> [[X:%.*]], <16 x i16> undef, <16 x i32> zeroinitializer | |
59 ; AVX2-NEXT: [[SPLAT2:%.*]] = shufflevector <16 x i16> [[Y:%.*]], <16 x i16> undef, <16 x i32> zeroinitializer | |
60 ; AVX2-NEXT: [[SEL:%.*]] = select <16 x i1> [[COND:%.*]], <16 x i16> [[SPLAT1]], <16 x i16> [[SPLAT2]] | |
61 ; AVX2-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[Z:%.*]], [[SPLAT1]] | |
62 ; AVX2-NEXT: [[TMP2:%.*]] = lshr <16 x i16> [[Z]], [[SPLAT2]] | |
63 ; AVX2-NEXT: [[TMP3:%.*]] = select <16 x i1> [[COND]], <16 x i16> [[TMP1]], <16 x i16> [[TMP2]] | |
64 ; AVX2-NEXT: ret <16 x i16> [[TMP3]] | |
65 ; | |
66 ; AVX512BW-LABEL: @vector_variable_shift_right_v16i16( | |
67 ; AVX512BW-NEXT: [[SPLAT1:%.*]] = shufflevector <16 x i16> [[X:%.*]], <16 x i16> undef, <16 x i32> zeroinitializer | |
68 ; AVX512BW-NEXT: [[SPLAT2:%.*]] = shufflevector <16 x i16> [[Y:%.*]], <16 x i16> undef, <16 x i32> zeroinitializer | |
69 ; AVX512BW-NEXT: [[SEL:%.*]] = select <16 x i1> [[COND:%.*]], <16 x i16> [[SPLAT1]], <16 x i16> [[SPLAT2]] | |
70 ; AVX512BW-NEXT: [[SH:%.*]] = lshr <16 x i16> [[Z:%.*]], [[SEL]] | |
71 ; AVX512BW-NEXT: ret <16 x i16> [[SH]] | |
72 ; | |
73 ; XOP-LABEL: @vector_variable_shift_right_v16i16( | |
74 ; XOP-NEXT: [[SPLAT1:%.*]] = shufflevector <16 x i16> [[X:%.*]], <16 x i16> undef, <16 x i32> zeroinitializer | |
75 ; XOP-NEXT: [[SPLAT2:%.*]] = shufflevector <16 x i16> [[Y:%.*]], <16 x i16> undef, <16 x i32> zeroinitializer | |
76 ; XOP-NEXT: [[SEL:%.*]] = select <16 x i1> [[COND:%.*]], <16 x i16> [[SPLAT1]], <16 x i16> [[SPLAT2]] | |
77 ; XOP-NEXT: [[SH:%.*]] = lshr <16 x i16> [[Z:%.*]], [[SEL]] | |
78 ; XOP-NEXT: ret <16 x i16> [[SH]] | |
79 ; | |
80 %splat1 = shufflevector <16 x i16> %x, <16 x i16> undef, <16 x i32> zeroinitializer | |
81 %splat2 = shufflevector <16 x i16> %y, <16 x i16> undef, <16 x i32> zeroinitializer | |
82 %sel = select <16 x i1> %cond, <16 x i16> %splat1, <16 x i16> %splat2 | |
83 %sh = lshr <16 x i16> %z, %sel | |
84 ret <16 x i16> %sh | |
85 } | |
86 | |
87 define <32 x i8> @vector_variable_shift_right_v32i8(<32 x i1> %cond, <32 x i8> %x, <32 x i8> %y, <32 x i8> %z) { | |
88 ; ALL-LABEL: @vector_variable_shift_right_v32i8( | |
89 ; ALL-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer | |
90 ; ALL-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer | |
91 ; ALL-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]] | |
92 ; ALL-NEXT: [[SH:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SEL]] | |
93 ; ALL-NEXT: ret <32 x i8> [[SH]] | |
94 ; | |
95 %splat1 = shufflevector <32 x i8> %x, <32 x i8> undef, <32 x i32> zeroinitializer | |
96 %splat2 = shufflevector <32 x i8> %y, <32 x i8> undef, <32 x i32> zeroinitializer | |
97 %sel = select <32 x i1> %cond, <32 x i8> %splat1, <32 x i8> %splat2 | |
98 %sh = lshr <32 x i8> %z, %sel | |
99 ret <32 x i8> %sh | |
100 } | |
101 | |
30 ; PR37428 - https://bugs.llvm.org/show_bug.cgi?id=37428 | 102 ; PR37428 - https://bugs.llvm.org/show_bug.cgi?id=37428 |
31 | 103 |
32 define void @vector_variable_shift_left_loop(i32* nocapture %arr, i8* nocapture readonly %control, i32 %count, i32 %amt0, i32 %amt1, i32 %x) { | 104 define void @vector_variable_shift_left_loop(i32* nocapture %arr, i8* nocapture readonly %control, i32 %count, i32 %amt0, i32 %amt1, i32 %x) { |
33 ; AVX-LABEL: @vector_variable_shift_left_loop( | 105 ; AVX1-LABEL: @vector_variable_shift_left_loop( |
34 ; AVX-NEXT: entry: | 106 ; AVX1-NEXT: entry: |
35 ; AVX-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[COUNT:%.*]], 0 | 107 ; AVX1-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[COUNT:%.*]], 0 |
36 ; AVX-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[COUNT]] to i64 | 108 ; AVX1-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[COUNT]] to i64 |
37 ; AVX-NEXT: br i1 [[CMP16]], label [[VECTOR_PH:%.*]], label [[EXIT:%.*]] | 109 ; AVX1-NEXT: br i1 [[CMP16]], label [[VECTOR_PH:%.*]], label [[EXIT:%.*]] |
38 ; AVX: vector.ph: | 110 ; AVX1: vector.ph: |
39 ; AVX-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967292 | 111 ; AVX1-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967292 |
40 ; AVX-NEXT: [[SPLATINSERT18:%.*]] = insertelement <4 x i32> undef, i32 [[AMT0:%.*]], i32 0 | 112 ; AVX1-NEXT: [[SPLATINSERT18:%.*]] = insertelement <4 x i32> undef, i32 [[AMT0:%.*]], i32 0 |
41 ; AVX-NEXT: [[SPLAT1:%.*]] = shufflevector <4 x i32> [[SPLATINSERT18]], <4 x i32> undef, <4 x i32> zeroinitializer | 113 ; AVX1-NEXT: [[SPLAT1:%.*]] = shufflevector <4 x i32> [[SPLATINSERT18]], <4 x i32> undef, <4 x i32> zeroinitializer |
42 ; AVX-NEXT: [[SPLATINSERT20:%.*]] = insertelement <4 x i32> undef, i32 [[AMT1:%.*]], i32 0 | 114 ; AVX1-NEXT: [[SPLATINSERT20:%.*]] = insertelement <4 x i32> undef, i32 [[AMT1:%.*]], i32 0 |
43 ; AVX-NEXT: [[SPLAT2:%.*]] = shufflevector <4 x i32> [[SPLATINSERT20]], <4 x i32> undef, <4 x i32> zeroinitializer | 115 ; AVX1-NEXT: [[SPLAT2:%.*]] = shufflevector <4 x i32> [[SPLATINSERT20]], <4 x i32> undef, <4 x i32> zeroinitializer |
44 ; AVX-NEXT: [[SPLATINSERT22:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0 | 116 ; AVX1-NEXT: [[SPLATINSERT22:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0 |
45 ; AVX-NEXT: br label [[VECTOR_BODY:%.*]] | 117 ; AVX1-NEXT: [[SPLAT3:%.*]] = shufflevector <4 x i32> [[SPLATINSERT22]], <4 x i32> undef, <4 x i32> zeroinitializer |
46 ; AVX: vector.body: | 118 ; AVX1-NEXT: br label [[VECTOR_BODY:%.*]] |
47 ; AVX-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] | 119 ; AVX1: vector.body: |
48 ; AVX-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[SPLATINSERT20]], <4 x i32> undef, <4 x i32> zeroinitializer | 120 ; AVX1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] |
49 ; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[SPLATINSERT18]], <4 x i32> undef, <4 x i32> zeroinitializer | 121 ; AVX1-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[CONTROL:%.*]], i64 [[INDEX]] |
50 ; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[SPLATINSERT22]], <4 x i32> undef, <4 x i32> zeroinitializer | 122 ; AVX1-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i8>* |
51 ; AVX-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[CONTROL:%.*]], i64 [[INDEX]] | 123 ; AVX1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1 |
52 ; AVX-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <4 x i8>* | 124 ; AVX1-NEXT: [[TMP2:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer |
53 ; AVX-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP4]], align 1 | 125 ; AVX1-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[SPLAT1]], <4 x i32> [[SPLAT2]] |
54 ; AVX-NEXT: [[TMP5:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer | 126 ; AVX1-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[SPLATINSERT18]], <4 x i32> undef, <4 x i32> zeroinitializer |
55 ; AVX-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[SPLAT1]], <4 x i32> [[SPLAT2]] | 127 ; AVX1-NEXT: [[TMP5:%.*]] = shl <4 x i32> [[SPLAT3]], [[TMP4]] |
56 ; AVX-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[TMP2]], [[TMP1]] | 128 ; AVX1-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[SPLATINSERT20]], <4 x i32> undef, <4 x i32> zeroinitializer |
57 ; AVX-NEXT: [[TMP8:%.*]] = shl <4 x i32> [[TMP2]], [[TMP0]] | 129 ; AVX1-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[SPLAT3]], [[TMP6]] |
58 ; AVX-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP7]], <4 x i32> [[TMP8]] | 130 ; AVX1-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP5]], <4 x i32> [[TMP7]] |
59 ; AVX-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[ARR:%.*]], i64 [[INDEX]] | 131 ; AVX1-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[ARR:%.*]], i64 [[INDEX]] |
60 ; AVX-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <4 x i32>* | 132 ; AVX1-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>* |
61 ; AVX-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* [[TMP11]], align 4 | 133 ; AVX1-NEXT: store <4 x i32> [[TMP8]], <4 x i32>* [[TMP10]], align 4 |
62 ; AVX-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 | 134 ; AVX1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 |
63 ; AVX-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] | 135 ; AVX1-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] |
64 ; AVX-NEXT: br i1 [[TMP12]], label [[EXIT]], label [[VECTOR_BODY]] | 136 ; AVX1-NEXT: br i1 [[TMP11]], label [[EXIT]], label [[VECTOR_BODY]] |
65 ; AVX: exit: | 137 ; AVX1: exit: |
66 ; AVX-NEXT: ret void | 138 ; AVX1-NEXT: ret void |
67 ; | 139 ; |
68 ; AVX2-LABEL: @vector_variable_shift_left_loop( | 140 ; AVX2-LABEL: @vector_variable_shift_left_loop( |
69 ; AVX2-NEXT: entry: | 141 ; AVX2-NEXT: entry: |
70 ; AVX2-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[COUNT:%.*]], 0 | 142 ; AVX2-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[COUNT:%.*]], 0 |
71 ; AVX2-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[COUNT]] to i64 | 143 ; AVX2-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[COUNT]] to i64 |
94 ; AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] | 166 ; AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] |
95 ; AVX2-NEXT: br i1 [[TMP7]], label [[EXIT]], label [[VECTOR_BODY]] | 167 ; AVX2-NEXT: br i1 [[TMP7]], label [[EXIT]], label [[VECTOR_BODY]] |
96 ; AVX2: exit: | 168 ; AVX2: exit: |
97 ; AVX2-NEXT: ret void | 169 ; AVX2-NEXT: ret void |
98 ; | 170 ; |
171 ; AVX512BW-LABEL: @vector_variable_shift_left_loop( | |
172 ; AVX512BW-NEXT: entry: | |
173 ; AVX512BW-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[COUNT:%.*]], 0 | |
174 ; AVX512BW-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[COUNT]] to i64 | |
175 ; AVX512BW-NEXT: br i1 [[CMP16]], label [[VECTOR_PH:%.*]], label [[EXIT:%.*]] | |
176 ; AVX512BW: vector.ph: | |
177 ; AVX512BW-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967292 | |
178 ; AVX512BW-NEXT: [[SPLATINSERT18:%.*]] = insertelement <4 x i32> undef, i32 [[AMT0:%.*]], i32 0 | |
179 ; AVX512BW-NEXT: [[SPLAT1:%.*]] = shufflevector <4 x i32> [[SPLATINSERT18]], <4 x i32> undef, <4 x i32> zeroinitializer | |
180 ; AVX512BW-NEXT: [[SPLATINSERT20:%.*]] = insertelement <4 x i32> undef, i32 [[AMT1:%.*]], i32 0 | |
181 ; AVX512BW-NEXT: [[SPLAT2:%.*]] = shufflevector <4 x i32> [[SPLATINSERT20]], <4 x i32> undef, <4 x i32> zeroinitializer | |
182 ; AVX512BW-NEXT: [[SPLATINSERT22:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0 | |
183 ; AVX512BW-NEXT: [[SPLAT3:%.*]] = shufflevector <4 x i32> [[SPLATINSERT22]], <4 x i32> undef, <4 x i32> zeroinitializer | |
184 ; AVX512BW-NEXT: br label [[VECTOR_BODY:%.*]] | |
185 ; AVX512BW: vector.body: | |
186 ; AVX512BW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] | |
187 ; AVX512BW-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[CONTROL:%.*]], i64 [[INDEX]] | |
188 ; AVX512BW-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i8>* | |
189 ; AVX512BW-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1 | |
190 ; AVX512BW-NEXT: [[TMP2:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer | |
191 ; AVX512BW-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[SPLAT1]], <4 x i32> [[SPLAT2]] | |
192 ; AVX512BW-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[SPLAT3]], [[TMP3]] | |
193 ; AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[ARR:%.*]], i64 [[INDEX]] | |
194 ; AVX512BW-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>* | |
195 ; AVX512BW-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP6]], align 4 | |
196 ; AVX512BW-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 | |
197 ; AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] | |
198 ; AVX512BW-NEXT: br i1 [[TMP7]], label [[EXIT]], label [[VECTOR_BODY]] | |
199 ; AVX512BW: exit: | |
200 ; AVX512BW-NEXT: ret void | |
201 ; | |
202 ; XOP-LABEL: @vector_variable_shift_left_loop( | |
203 ; XOP-NEXT: entry: | |
204 ; XOP-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[COUNT:%.*]], 0 | |
205 ; XOP-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[COUNT]] to i64 | |
206 ; XOP-NEXT: br i1 [[CMP16]], label [[VECTOR_PH:%.*]], label [[EXIT:%.*]] | |
207 ; XOP: vector.ph: | |
208 ; XOP-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967292 | |
209 ; XOP-NEXT: [[SPLATINSERT18:%.*]] = insertelement <4 x i32> undef, i32 [[AMT0:%.*]], i32 0 | |
210 ; XOP-NEXT: [[SPLAT1:%.*]] = shufflevector <4 x i32> [[SPLATINSERT18]], <4 x i32> undef, <4 x i32> zeroinitializer | |
211 ; XOP-NEXT: [[SPLATINSERT20:%.*]] = insertelement <4 x i32> undef, i32 [[AMT1:%.*]], i32 0 | |
212 ; XOP-NEXT: [[SPLAT2:%.*]] = shufflevector <4 x i32> [[SPLATINSERT20]], <4 x i32> undef, <4 x i32> zeroinitializer | |
213 ; XOP-NEXT: [[SPLATINSERT22:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0 | |
214 ; XOP-NEXT: [[SPLAT3:%.*]] = shufflevector <4 x i32> [[SPLATINSERT22]], <4 x i32> undef, <4 x i32> zeroinitializer | |
215 ; XOP-NEXT: br label [[VECTOR_BODY:%.*]] | |
216 ; XOP: vector.body: | |
217 ; XOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] | |
218 ; XOP-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[CONTROL:%.*]], i64 [[INDEX]] | |
219 ; XOP-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i8>* | |
220 ; XOP-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1 | |
221 ; XOP-NEXT: [[TMP2:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer | |
222 ; XOP-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[SPLAT1]], <4 x i32> [[SPLAT2]] | |
223 ; XOP-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[SPLAT3]], [[TMP3]] | |
224 ; XOP-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[ARR:%.*]], i64 [[INDEX]] | |
225 ; XOP-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>* | |
226 ; XOP-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP6]], align 4 | |
227 ; XOP-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 | |
228 ; XOP-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] | |
229 ; XOP-NEXT: br i1 [[TMP7]], label [[EXIT]], label [[VECTOR_BODY]] | |
230 ; XOP: exit: | |
231 ; XOP-NEXT: ret void | |
232 ; | |
99 entry: | 233 entry: |
100 %cmp16 = icmp sgt i32 %count, 0 | 234 %cmp16 = icmp sgt i32 %count, 0 |
101 %wide.trip.count = zext i32 %count to i64 | 235 %wide.trip.count = zext i32 %count to i64 |
102 br i1 %cmp16, label %vector.ph, label %exit | 236 br i1 %cmp16, label %vector.ph, label %exit |
103 | 237 |
128 | 262 |
129 exit: | 263 exit: |
130 ret void | 264 ret void |
131 } | 265 } |
132 | 266 |
267 ; PR37426 - https://bugs.llvm.org/show_bug.cgi?id=37426 | |
268 ; If we don't have real vector shift instructions (AVX1), convert the funnel | |
269 ; shift into 2 funnel shifts and sink the splat shuffles into the loop. | |
270 | |
271 define void @fancierRotate2(i32* %arr, i8* %control, i32 %rot0, i32 %rot1) { | |
272 ; AVX1-LABEL: @fancierRotate2( | |
273 ; AVX1-NEXT: entry: | |
274 ; AVX1-NEXT: [[I0:%.*]] = insertelement <8 x i32> undef, i32 [[ROT0:%.*]], i32 0 | |
275 ; AVX1-NEXT: [[S0:%.*]] = shufflevector <8 x i32> [[I0]], <8 x i32> undef, <8 x i32> zeroinitializer | |
276 ; AVX1-NEXT: [[I1:%.*]] = insertelement <8 x i32> undef, i32 [[ROT1:%.*]], i32 0 | |
277 ; AVX1-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[I1]], <8 x i32> undef, <8 x i32> zeroinitializer | |
278 ; AVX1-NEXT: br label [[LOOP:%.*]] | |
279 ; AVX1: loop: | |
280 ; AVX1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ] | |
281 ; AVX1-NEXT: [[T0:%.*]] = getelementptr inbounds i8, i8* [[CONTROL:%.*]], i64 [[INDEX]] | |
282 ; AVX1-NEXT: [[T1:%.*]] = bitcast i8* [[T0]] to <8 x i8>* | |
283 ; AVX1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, <8 x i8>* [[T1]], align 1 | |
284 ; AVX1-NEXT: [[T2:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer | |
285 ; AVX1-NEXT: [[SHAMT:%.*]] = select <8 x i1> [[T2]], <8 x i32> [[S0]], <8 x i32> [[S1]] | |
286 ; AVX1-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[ARR:%.*]], i64 [[INDEX]] | |
287 ; AVX1-NEXT: [[T5:%.*]] = bitcast i32* [[T4]] to <8 x i32>* | |
288 ; AVX1-NEXT: [[WIDE_LOAD21:%.*]] = load <8 x i32>, <8 x i32>* [[T5]], align 4 | |
289 ; AVX1-NEXT: [[TMP0:%.*]] = shufflevector <8 x i32> [[I0]], <8 x i32> undef, <8 x i32> zeroinitializer | |
290 ; AVX1-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> [[WIDE_LOAD21]], <8 x i32> [[WIDE_LOAD21]], <8 x i32> [[TMP0]]) | |
291 ; AVX1-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[I1]], <8 x i32> undef, <8 x i32> zeroinitializer | |
292 ; AVX1-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> [[WIDE_LOAD21]], <8 x i32> [[WIDE_LOAD21]], <8 x i32> [[TMP2]]) | |
293 ; AVX1-NEXT: [[TMP4:%.*]] = select <8 x i1> [[T2]], <8 x i32> [[TMP1]], <8 x i32> [[TMP3]] | |
294 ; AVX1-NEXT: store <8 x i32> [[TMP4]], <8 x i32>* [[T5]], align 4 | |
295 ; AVX1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 | |
296 ; AVX1-NEXT: [[T7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 | |
297 ; AVX1-NEXT: br i1 [[T7]], label [[EXIT:%.*]], label [[LOOP]] | |
298 ; AVX1: exit: | |
299 ; AVX1-NEXT: ret void | |
300 ; | |
301 ; AVX2-LABEL: @fancierRotate2( | |
302 ; AVX2-NEXT: entry: | |
303 ; AVX2-NEXT: [[I0:%.*]] = insertelement <8 x i32> undef, i32 [[ROT0:%.*]], i32 0 | |
304 ; AVX2-NEXT: [[S0:%.*]] = shufflevector <8 x i32> [[I0]], <8 x i32> undef, <8 x i32> zeroinitializer | |
305 ; AVX2-NEXT: [[I1:%.*]] = insertelement <8 x i32> undef, i32 [[ROT1:%.*]], i32 0 | |
306 ; AVX2-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[I1]], <8 x i32> undef, <8 x i32> zeroinitializer | |
307 ; AVX2-NEXT: br label [[LOOP:%.*]] | |
308 ; AVX2: loop: | |
309 ; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ] | |
310 ; AVX2-NEXT: [[T0:%.*]] = getelementptr inbounds i8, i8* [[CONTROL:%.*]], i64 [[INDEX]] | |
311 ; AVX2-NEXT: [[T1:%.*]] = bitcast i8* [[T0]] to <8 x i8>* | |
312 ; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, <8 x i8>* [[T1]], align 1 | |
313 ; AVX2-NEXT: [[T2:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer | |
314 ; AVX2-NEXT: [[SHAMT:%.*]] = select <8 x i1> [[T2]], <8 x i32> [[S0]], <8 x i32> [[S1]] | |
315 ; AVX2-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[ARR:%.*]], i64 [[INDEX]] | |
316 ; AVX2-NEXT: [[T5:%.*]] = bitcast i32* [[T4]] to <8 x i32>* | |
317 ; AVX2-NEXT: [[WIDE_LOAD21:%.*]] = load <8 x i32>, <8 x i32>* [[T5]], align 4 | |
318 ; AVX2-NEXT: [[ROT:%.*]] = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> [[WIDE_LOAD21]], <8 x i32> [[WIDE_LOAD21]], <8 x i32> [[SHAMT]]) | |
319 ; AVX2-NEXT: store <8 x i32> [[ROT]], <8 x i32>* [[T5]], align 4 | |
320 ; AVX2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 | |
321 ; AVX2-NEXT: [[T7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 | |
322 ; AVX2-NEXT: br i1 [[T7]], label [[EXIT:%.*]], label [[LOOP]] | |
323 ; AVX2: exit: | |
324 ; AVX2-NEXT: ret void | |
325 ; | |
326 ; AVX512BW-LABEL: @fancierRotate2( | |
327 ; AVX512BW-NEXT: entry: | |
328 ; AVX512BW-NEXT: [[I0:%.*]] = insertelement <8 x i32> undef, i32 [[ROT0:%.*]], i32 0 | |
329 ; AVX512BW-NEXT: [[S0:%.*]] = shufflevector <8 x i32> [[I0]], <8 x i32> undef, <8 x i32> zeroinitializer | |
330 ; AVX512BW-NEXT: [[I1:%.*]] = insertelement <8 x i32> undef, i32 [[ROT1:%.*]], i32 0 | |
331 ; AVX512BW-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[I1]], <8 x i32> undef, <8 x i32> zeroinitializer | |
332 ; AVX512BW-NEXT: br label [[LOOP:%.*]] | |
333 ; AVX512BW: loop: | |
334 ; AVX512BW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ] | |
335 ; AVX512BW-NEXT: [[T0:%.*]] = getelementptr inbounds i8, i8* [[CONTROL:%.*]], i64 [[INDEX]] | |
336 ; AVX512BW-NEXT: [[T1:%.*]] = bitcast i8* [[T0]] to <8 x i8>* | |
337 ; AVX512BW-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, <8 x i8>* [[T1]], align 1 | |
338 ; AVX512BW-NEXT: [[T2:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer | |
339 ; AVX512BW-NEXT: [[SHAMT:%.*]] = select <8 x i1> [[T2]], <8 x i32> [[S0]], <8 x i32> [[S1]] | |
340 ; AVX512BW-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[ARR:%.*]], i64 [[INDEX]] | |
341 ; AVX512BW-NEXT: [[T5:%.*]] = bitcast i32* [[T4]] to <8 x i32>* | |
342 ; AVX512BW-NEXT: [[WIDE_LOAD21:%.*]] = load <8 x i32>, <8 x i32>* [[T5]], align 4 | |
343 ; AVX512BW-NEXT: [[ROT:%.*]] = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> [[WIDE_LOAD21]], <8 x i32> [[WIDE_LOAD21]], <8 x i32> [[SHAMT]]) | |
344 ; AVX512BW-NEXT: store <8 x i32> [[ROT]], <8 x i32>* [[T5]], align 4 | |
345 ; AVX512BW-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 | |
346 ; AVX512BW-NEXT: [[T7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 | |
347 ; AVX512BW-NEXT: br i1 [[T7]], label [[EXIT:%.*]], label [[LOOP]] | |
348 ; AVX512BW: exit: | |
349 ; AVX512BW-NEXT: ret void | |
350 ; | |
351 ; XOP-LABEL: @fancierRotate2( | |
352 ; XOP-NEXT: entry: | |
353 ; XOP-NEXT: [[I0:%.*]] = insertelement <8 x i32> undef, i32 [[ROT0:%.*]], i32 0 | |
354 ; XOP-NEXT: [[S0:%.*]] = shufflevector <8 x i32> [[I0]], <8 x i32> undef, <8 x i32> zeroinitializer | |
355 ; XOP-NEXT: [[I1:%.*]] = insertelement <8 x i32> undef, i32 [[ROT1:%.*]], i32 0 | |
356 ; XOP-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[I1]], <8 x i32> undef, <8 x i32> zeroinitializer | |
357 ; XOP-NEXT: br label [[LOOP:%.*]] | |
358 ; XOP: loop: | |
359 ; XOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ] | |
360 ; XOP-NEXT: [[T0:%.*]] = getelementptr inbounds i8, i8* [[CONTROL:%.*]], i64 [[INDEX]] | |
361 ; XOP-NEXT: [[T1:%.*]] = bitcast i8* [[T0]] to <8 x i8>* | |
362 ; XOP-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, <8 x i8>* [[T1]], align 1 | |
363 ; XOP-NEXT: [[T2:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer | |
364 ; XOP-NEXT: [[SHAMT:%.*]] = select <8 x i1> [[T2]], <8 x i32> [[S0]], <8 x i32> [[S1]] | |
365 ; XOP-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[ARR:%.*]], i64 [[INDEX]] | |
366 ; XOP-NEXT: [[T5:%.*]] = bitcast i32* [[T4]] to <8 x i32>* | |
367 ; XOP-NEXT: [[WIDE_LOAD21:%.*]] = load <8 x i32>, <8 x i32>* [[T5]], align 4 | |
368 ; XOP-NEXT: [[ROT:%.*]] = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> [[WIDE_LOAD21]], <8 x i32> [[WIDE_LOAD21]], <8 x i32> [[SHAMT]]) | |
369 ; XOP-NEXT: store <8 x i32> [[ROT]], <8 x i32>* [[T5]], align 4 | |
370 ; XOP-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 | |
371 ; XOP-NEXT: [[T7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 | |
372 ; XOP-NEXT: br i1 [[T7]], label [[EXIT:%.*]], label [[LOOP]] | |
373 ; XOP: exit: | |
374 ; XOP-NEXT: ret void | |
375 ; | |
376 entry: | |
377 %i0 = insertelement <8 x i32> undef, i32 %rot0, i32 0 | |
378 %s0 = shufflevector <8 x i32> %i0, <8 x i32> undef, <8 x i32> zeroinitializer | |
379 %i1 = insertelement <8 x i32> undef, i32 %rot1, i32 0 | |
380 %s1 = shufflevector <8 x i32> %i1, <8 x i32> undef, <8 x i32> zeroinitializer | |
381 br label %loop | |
382 | |
383 loop: | |
384 %index = phi i64 [ 0, %entry ], [ %index.next, %loop ] | |
385 %t0 = getelementptr inbounds i8, i8* %control, i64 %index | |
386 %t1 = bitcast i8* %t0 to <8 x i8>* | |
387 %wide.load = load <8 x i8>, <8 x i8>* %t1, align 1 | |
388 %t2 = icmp eq <8 x i8> %wide.load, zeroinitializer | |
389 %shamt = select <8 x i1> %t2, <8 x i32> %s0, <8 x i32> %s1 | |
390 %t4 = getelementptr inbounds i32, i32* %arr, i64 %index | |
391 %t5 = bitcast i32* %t4 to <8 x i32>* | |
392 %wide.load21 = load <8 x i32>, <8 x i32>* %t5, align 4 | |
393 %rot = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %wide.load21, <8 x i32> %wide.load21, <8 x i32> %shamt) | |
394 store <8 x i32> %rot, <8 x i32>* %t5, align 4 | |
395 %index.next = add i64 %index, 8 | |
396 %t7 = icmp eq i64 %index.next, 1024 | |
397 br i1 %t7, label %exit, label %loop | |
398 | |
399 exit: | |
400 ret void | |
401 } | |
402 | |
403 declare <8 x i32> @llvm.fshl.v8i32(<8 x i32>, <8 x i32>, <8 x i32>) #1 | |
404 | |
133 ; Check that every instruction inserted by -codegenprepare has a debug location. | 405 ; Check that every instruction inserted by -codegenprepare has a debug location. |
134 ; DEBUG: CheckModuleDebugify: PASS | 406 ; DEBUG: CheckModuleDebugify: PASS |