95
|
1 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=sse4.1 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE
|
|
2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=avx2 | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2 -check-prefix=AVX2ONLY
|
|
3 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=avx512f | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2 -check-prefix=AVX512
|
77
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
4
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
5
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
6 ; Verify that we don't scalarize a packed vector shift left of 16-bit
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
7 ; signed integers if the amount is a constant build_vector.
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
8 ; Check that we produce a SSE2 packed integer multiply (pmullw) instead.
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
9
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
10 define <8 x i16> @test1(<8 x i16> %a) {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
11 %shl = shl <8 x i16> %a, <i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11>
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
12 ret <8 x i16> %shl
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
13 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
14 ; CHECK-LABEL: test1
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
15 ; CHECK: pmullw
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
16 ; CHECK-NEXT: ret
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
17
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
18
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
19 define <8 x i16> @test2(<8 x i16> %a) {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
20 %shl = shl <8 x i16> %a, <i16 0, i16 undef, i16 0, i16 0, i16 1, i16 undef, i16 -1, i16 1>
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
21 ret <8 x i16> %shl
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
22 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
23 ; CHECK-LABEL: test2
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
24 ; CHECK: pmullw
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
25 ; CHECK-NEXT: ret
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
26
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
27
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
28 ; Verify that a vector shift left of 32-bit signed integers is simply expanded
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
29 ; into a SSE4.1 pmulld (instead of cvttps2dq + pmulld) if the vector of shift
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
30 ; counts is a constant build_vector.
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
31
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
32 define <4 x i32> @test3(<4 x i32> %a) {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
33 %shl = shl <4 x i32> %a, <i32 1, i32 -1, i32 2, i32 -3>
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
34 ret <4 x i32> %shl
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
35 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
36 ; CHECK-LABEL: test3
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
37 ; CHECK-NOT: cvttps2dq
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
38 ; SSE: pmulld
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
39 ; AVX2: vpsllvd
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
40 ; CHECK-NEXT: ret
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
41
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
42
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
43 define <4 x i32> @test4(<4 x i32> %a) {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
44 %shl = shl <4 x i32> %a, <i32 0, i32 0, i32 1, i32 1>
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
45 ret <4 x i32> %shl
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
46 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
47 ; CHECK-LABEL: test4
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
48 ; CHECK-NOT: cvttps2dq
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
49 ; SSE: pmulld
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
50 ; AVX2: vpsllvd
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
51 ; CHECK-NEXT: ret
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
52
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
53
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
54 ; If we have AVX/SSE2 but not AVX2, verify that the following shift is split
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
55 ; into two pmullw instructions. With AVX2, the test case below would produce
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
56 ; a single vpmullw.
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
57
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
58 define <16 x i16> @test5(<16 x i16> %a) {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
59 %shl = shl <16 x i16> %a, <i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11>
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
60 ret <16 x i16> %shl
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
61 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
62 ; CHECK-LABEL: test5
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
63 ; SSE: pmullw
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
64 ; SSE-NEXT: pmullw
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
65 ; AVX2: vpmullw
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
66 ; AVX2-NOT: vpmullw
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
67 ; CHECK: ret
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
68
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
69
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
70 ; If we have AVX/SSE4.1 but not AVX2, verify that the following shift is split
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
71 ; into two pmulld instructions. With AVX2, the test case below would produce
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
72 ; a single vpsllvd instead.
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
73
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
74 define <8 x i32> @test6(<8 x i32> %a) {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
75 %shl = shl <8 x i32> %a, <i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3>
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
76 ret <8 x i32> %shl
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
77 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
78 ; CHECK-LABEL: test6
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
79 ; SSE: pmulld
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
80 ; SSE-NEXT: pmulld
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
81 ; AVX2: vpsllvd
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
82 ; CHECK: ret
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
83
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
84
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
85 ; With AVX2 and AVX512, the test case below should produce a sequence of
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
86 ; two vpmullw instructions. On SSE2 instead, we split the shift in four
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
87 ; parts and then we convert each part into a pmullw.
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
88
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
89 define <32 x i16> @test7(<32 x i16> %a) {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
90 %shl = shl <32 x i16> %a, <i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11>
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
91 ret <32 x i16> %shl
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
92 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
93 ; CHECK-LABEL: test7
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
94 ; SSE: pmullw
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
95 ; SSE-NEXT: pmullw
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
96 ; SSE-NEXT: pmullw
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
97 ; SSE-NEXT: pmullw
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
98 ; AVX2: vpmullw
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
99 ; AVX2-NEXT: vpmullw
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
100 ; CHECK: ret
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
101
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
102
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
103 ; Similar to test7; the difference is that with AVX512 support
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
104 ; we only produce a single vpsllvd/vpsllvq instead of a pair of vpsllvd/vpsllvq.
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
105
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
106 define <16 x i32> @test8(<16 x i32> %a) {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
107 %shl = shl <16 x i32> %a, <i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3>
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
108 ret <16 x i32> %shl
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
109 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
110 ; CHECK-LABEL: test8
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
111 ; SSE: pmulld
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
112 ; SSE-NEXT: pmulld
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
113 ; SSE-NEXT: pmulld
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
114 ; SSE-NEXT: pmulld
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
115 ; AVX2ONLY: vpsllvd
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
116 ; AVX2ONLY-NEXT: vpsllvd
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
117 ; AVX512: vpsllvd
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
118 ; AVX512-NOT: vpsllvd
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
119 ; CHECK: ret
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
120
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
121
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
122 ; The shift from 'test9' gets scalarized if we don't have AVX2/AVX512f support.
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
123
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
124 define <8 x i64> @test9(<8 x i64> %a) {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
125 %shl = shl <8 x i64> %a, <i64 1, i64 1, i64 2, i64 3, i64 1, i64 1, i64 2, i64 3>
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
126 ret <8 x i64> %shl
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
127 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
128 ; CHECK-LABEL: test9
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
129 ; AVX2ONLY: vpsllvq
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
130 ; AVX2ONLY-NEXT: vpsllvq
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
131 ; AVX512: vpsllvq
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
132 ; AVX512-NOT: vpsllvq
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
133 ; CHECK: ret
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
134
|