147
|
1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
2 ; RUN: opt %s -instcombine -S | FileCheck %s
|
|
3
|
|
4 ; Given pattern:
|
|
5 ; (trunc (x << Q) to iDst) << K
|
|
6 ; we should rewrite it as
|
|
7 ; (trunc (x << (Q+K)) to iDst) iff (Q+K) u< iDst
|
|
8 ; This is only valid for shl.
|
|
9 ; THIS FOLD DOES *NOT* REQUIRE ANY 'nuw'/`nsw` FLAGS!
|
|
10
|
|
11 ; Basic scalar test
|
|
12
|
|
13 define i16 @t0(i32 %x, i16 %y) {
|
|
14 ; CHECK-LABEL: @t0(
|
|
15 ; CHECK-NEXT: [[X_TR:%.*]] = trunc i32 [[X:%.*]] to i16
|
|
16 ; CHECK-NEXT: [[T5:%.*]] = shl i16 [[X_TR]], 8
|
|
17 ; CHECK-NEXT: ret i16 [[T5]]
|
|
18 ;
|
|
19 %t0 = sub i16 32, %y
|
|
20 %t1 = zext i16 %t0 to i32
|
|
21 %t2 = shl i32 %x, %t1
|
|
22 %t3 = trunc i32 %t2 to i16
|
|
23 %t4 = add i16 %y, -24
|
|
24 %t5 = shl i16 %t3, %t4
|
|
25 ret i16 %t5
|
|
26 }
|
|
27
|
|
28 define <2 x i16> @t1_vec_splat(<2 x i32> %x, <2 x i16> %y) {
|
|
29 ; CHECK-LABEL: @t1_vec_splat(
|
|
30 ; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[X:%.*]], <i32 8, i32 8>
|
|
31 ; CHECK-NEXT: [[T5:%.*]] = trunc <2 x i32> [[TMP1]] to <2 x i16>
|
|
32 ; CHECK-NEXT: ret <2 x i16> [[T5]]
|
|
33 ;
|
|
34 %t0 = sub <2 x i16> <i16 32, i16 32>, %y
|
|
35 %t1 = zext <2 x i16> %t0 to <2 x i32>
|
|
36 %t2 = shl <2 x i32> %x, %t1
|
|
37 %t3 = trunc <2 x i32> %t2 to <2 x i16>
|
|
38 %t4 = add <2 x i16> %y, <i16 -24, i16 -24>
|
|
39 %t5 = shl <2 x i16> %t3, %t4
|
|
40 ret <2 x i16> %t5
|
|
41 }
|
|
42
|
|
43 define <2 x i16> @t2_vec_nonsplat(<2 x i32> %x, <2 x i16> %y) {
|
|
44 ; CHECK-LABEL: @t2_vec_nonsplat(
|
|
45 ; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[X:%.*]], <i32 8, i32 30>
|
|
46 ; CHECK-NEXT: [[T5:%.*]] = trunc <2 x i32> [[TMP1]] to <2 x i16>
|
|
47 ; CHECK-NEXT: ret <2 x i16> [[T5]]
|
|
48 ;
|
|
49 %t0 = sub <2 x i16> <i16 32, i16 30>, %y
|
|
50 %t1 = zext <2 x i16> %t0 to <2 x i32>
|
|
51 %t2 = shl <2 x i32> %x, %t1
|
|
52 %t3 = trunc <2 x i32> %t2 to <2 x i16>
|
|
53 %t4 = add <2 x i16> %y, <i16 -24, i16 0>
|
|
54 %t5 = shl <2 x i16> %t3, %t4
|
|
55 ret <2 x i16> %t5
|
|
56 }
|
|
57
|
|
58 ; Basic vector tests
|
|
59
|
|
60 define <3 x i16> @t3_vec_nonsplat_undef0(<3 x i32> %x, <3 x i16> %y) {
|
|
61 ; CHECK-LABEL: @t3_vec_nonsplat_undef0(
|
|
62 ; CHECK-NEXT: [[TMP1:%.*]] = shl <3 x i32> [[X:%.*]], <i32 8, i32 0, i32 8>
|
|
63 ; CHECK-NEXT: [[T5:%.*]] = trunc <3 x i32> [[TMP1]] to <3 x i16>
|
|
64 ; CHECK-NEXT: ret <3 x i16> [[T5]]
|
|
65 ;
|
|
66 %t0 = sub <3 x i16> <i16 32, i16 undef, i16 32>, %y
|
|
67 %t1 = zext <3 x i16> %t0 to <3 x i32>
|
|
68 %t2 = shl <3 x i32> %x, %t1
|
|
69 %t3 = trunc <3 x i32> %t2 to <3 x i16>
|
|
70 %t4 = add <3 x i16> %y, <i16 -24, i16 -24, i16 -24>
|
|
71 %t5 = shl <3 x i16> %t3, %t4
|
|
72 ret <3 x i16> %t5
|
|
73 }
|
|
74
|
|
75 define <3 x i16> @t4_vec_nonsplat_undef1(<3 x i32> %x, <3 x i16> %y) {
|
|
76 ; CHECK-LABEL: @t4_vec_nonsplat_undef1(
|
|
77 ; CHECK-NEXT: [[TMP1:%.*]] = shl <3 x i32> [[X:%.*]], <i32 8, i32 0, i32 8>
|
|
78 ; CHECK-NEXT: [[T5:%.*]] = trunc <3 x i32> [[TMP1]] to <3 x i16>
|
|
79 ; CHECK-NEXT: ret <3 x i16> [[T5]]
|
|
80 ;
|
|
81 %t0 = sub <3 x i16> <i16 32, i16 32, i16 32>, %y
|
|
82 %t1 = zext <3 x i16> %t0 to <3 x i32>
|
|
83 %t2 = shl <3 x i32> %x, %t1
|
|
84 %t3 = trunc <3 x i32> %t2 to <3 x i16>
|
|
85 %t4 = add <3 x i16> %y, <i16 -24, i16 undef, i16 -24>
|
|
86 %t5 = shl <3 x i16> %t3, %t4
|
|
87 ret <3 x i16> %t5
|
|
88 }
|
|
89
|
|
90 define <3 x i16> @t5_vec_nonsplat_undef1(<3 x i32> %x, <3 x i16> %y) {
|
|
91 ; CHECK-LABEL: @t5_vec_nonsplat_undef1(
|
|
92 ; CHECK-NEXT: [[TMP1:%.*]] = shl <3 x i32> [[X:%.*]], <i32 8, i32 0, i32 8>
|
|
93 ; CHECK-NEXT: [[T5:%.*]] = trunc <3 x i32> [[TMP1]] to <3 x i16>
|
|
94 ; CHECK-NEXT: ret <3 x i16> [[T5]]
|
|
95 ;
|
|
96 %t0 = sub <3 x i16> <i16 32, i16 undef, i16 32>, %y
|
|
97 %t1 = zext <3 x i16> %t0 to <3 x i32>
|
|
98 %t2 = shl <3 x i32> %x, %t1
|
|
99 %t3 = trunc <3 x i32> %t2 to <3 x i16>
|
|
100 %t4 = add <3 x i16> %y, <i16 -24, i16 undef, i16 -24>
|
|
101 %t5 = shl <3 x i16> %t3, %t4
|
|
102 ret <3 x i16> %t5
|
|
103 }
|
|
104
|
|
105 ; One-use tests
|
|
106
|
|
107 declare void @use16(i16)
|
|
108 declare void @use32(i32)
|
|
109
|
|
110 define i16 @t6_extrause0(i32 %x, i16 %y) {
|
|
111 ; CHECK-LABEL: @t6_extrause0(
|
|
112 ; CHECK-NEXT: [[T0:%.*]] = sub i16 32, [[Y:%.*]]
|
|
113 ; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32
|
|
114 ; CHECK-NEXT: [[T2:%.*]] = shl i32 [[X:%.*]], [[T1]]
|
|
115 ; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16
|
|
116 ; CHECK-NEXT: call void @use16(i16 [[T3]])
|
|
117 ; CHECK-NEXT: [[X_TR:%.*]] = trunc i32 [[X]] to i16
|
|
118 ; CHECK-NEXT: [[T5:%.*]] = shl i16 [[X_TR]], 8
|
|
119 ; CHECK-NEXT: ret i16 [[T5]]
|
|
120 ;
|
|
121 %t0 = sub i16 32, %y
|
|
122 %t1 = zext i16 %t0 to i32
|
|
123 %t2 = shl i32 %x, %t1
|
|
124 %t3 = trunc i32 %t2 to i16
|
|
125 %t4 = add i16 %y, -24
|
|
126 call void @use16(i16 %t3)
|
|
127 %t5 = shl i16 %t3, %t4
|
|
128 ret i16 %t5
|
|
129 }
|
|
130
|
|
131 define i16 @t7_extrause1(i32 %x, i16 %y) {
|
|
132 ; CHECK-LABEL: @t7_extrause1(
|
|
133 ; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y:%.*]], -24
|
|
134 ; CHECK-NEXT: call void @use16(i16 [[T4]])
|
|
135 ; CHECK-NEXT: [[X_TR:%.*]] = trunc i32 [[X:%.*]] to i16
|
|
136 ; CHECK-NEXT: [[T5:%.*]] = shl i16 [[X_TR]], 8
|
|
137 ; CHECK-NEXT: ret i16 [[T5]]
|
|
138 ;
|
|
139 %t0 = sub i16 32, %y
|
|
140 %t1 = zext i16 %t0 to i32
|
|
141 %t2 = shl i32 %x, %t1
|
|
142 %t3 = trunc i32 %t2 to i16
|
|
143 %t4 = add i16 %y, -24
|
|
144 call void @use16(i16 %t4)
|
|
145 %t5 = shl i16 %t3, %t4
|
|
146 ret i16 %t5
|
|
147 }
|
|
148
|
|
149 define i16 @t8_extrause2(i32 %x, i16 %y) {
|
|
150 ; CHECK-LABEL: @t8_extrause2(
|
|
151 ; CHECK-NEXT: [[T0:%.*]] = sub i16 32, [[Y:%.*]]
|
|
152 ; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32
|
|
153 ; CHECK-NEXT: [[T2:%.*]] = shl i32 [[X:%.*]], [[T1]]
|
|
154 ; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16
|
|
155 ; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y]], -24
|
|
156 ; CHECK-NEXT: call void @use16(i16 [[T3]])
|
|
157 ; CHECK-NEXT: call void @use16(i16 [[T4]])
|
|
158 ; CHECK-NEXT: [[T5:%.*]] = shl i16 [[T3]], [[T4]]
|
|
159 ; CHECK-NEXT: ret i16 [[T5]]
|
|
160 ;
|
|
161 %t0 = sub i16 32, %y
|
|
162 %t1 = zext i16 %t0 to i32
|
|
163 %t2 = shl i32 %x, %t1
|
|
164 %t3 = trunc i32 %t2 to i16
|
|
165 %t4 = add i16 %y, -24
|
|
166 call void @use16(i16 %t3)
|
|
167 call void @use16(i16 %t4)
|
|
168 %t5 = shl i16 %t3, %t4
|
|
169 ret i16 %t5
|
|
170 }
|
|
171
|
|
172 ; No 'nuw'/'nsw' flags are to be propagated!
|
|
173 ; But we can't test that, such IR does not reach that code.
|
|
174
|
|
175 ; Negative tests
|
|
176
|
|
177 ; No folding possible for right-shifts..
|
|
178 define i16 @t9_shl(i32 %x, i16 %y) {
|
|
179 ; CHECK-LABEL: @t9_shl(
|
|
180 ; CHECK-NEXT: [[T0:%.*]] = sub i16 32, [[Y:%.*]]
|
|
181 ; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32
|
|
182 ; CHECK-NEXT: [[T2:%.*]] = lshr i32 [[X:%.*]], [[T1]]
|
|
183 ; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16
|
|
184 ; CHECK-NEXT: ret i16 [[T3]]
|
|
185 ;
|
|
186 %t0 = sub i16 32, %y
|
|
187 %t1 = zext i16 %t0 to i32
|
|
188 %t2 = lshr i32 %x, %t1
|
|
189 %t3 = trunc i32 %t2 to i16
|
|
190 %t4 = add i16 %y, -24
|
|
191 %t5 = lshr i16 %t3, %t4
|
|
192 ret i16 %t3
|
|
193 }
|
|
194 define i16 @t10_ashr(i32 %x, i16 %y) {
|
|
195 ; CHECK-LABEL: @t10_ashr(
|
|
196 ; CHECK-NEXT: [[T0:%.*]] = sub i16 32, [[Y:%.*]]
|
|
197 ; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32
|
|
198 ; CHECK-NEXT: [[T2:%.*]] = ashr i32 [[X:%.*]], [[T1]]
|
|
199 ; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16
|
|
200 ; CHECK-NEXT: ret i16 [[T3]]
|
|
201 ;
|
|
202 %t0 = sub i16 32, %y
|
|
203 %t1 = zext i16 %t0 to i32
|
|
204 %t2 = ashr i32 %x, %t1
|
|
205 %t3 = trunc i32 %t2 to i16
|
|
206 %t4 = add i16 %y, -24
|
|
207 %t5 = ashr i16 %t3, %t4
|
|
208 ret i16 %t3
|
|
209 }
|
|
210
|
|
211 ; Can't fold, total shift would be 32
|
|
212 define i16 @n11(i32 %x, i16 %y) {
|
|
213 ; CHECK-LABEL: @n11(
|
|
214 ; CHECK-NEXT: [[T0:%.*]] = sub i16 30, [[Y:%.*]]
|
|
215 ; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32
|
|
216 ; CHECK-NEXT: [[T2:%.*]] = shl i32 [[X:%.*]], [[T1]]
|
|
217 ; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16
|
|
218 ; CHECK-NEXT: ret i16 [[T3]]
|
|
219 ;
|
|
220 %t0 = sub i16 30, %y
|
|
221 %t1 = zext i16 %t0 to i32
|
|
222 %t2 = shl i32 %x, %t1
|
|
223 %t3 = trunc i32 %t2 to i16
|
|
224 %t4 = add i16 %y, -24
|
|
225 %t5 = shl i16 %t3, %t4
|
|
226 ret i16 %t3
|
|
227 }
|
|
228
|
|
229 ; Bit width mismatch of shit amount
|
|
230
|
|
231 @Y32 = global i32 42
|
|
232 @Y16 = global i16 42
|
|
233 define i16 @t01(i32 %x) {
|
|
234 ; CHECK-LABEL: @t01(
|
|
235 ; CHECK-NEXT: [[T0:%.*]] = shl i32 [[X:%.*]], ptrtoint (i32* @Y32 to i32)
|
|
236 ; CHECK-NEXT: [[T1:%.*]] = trunc i32 [[T0]] to i16
|
|
237 ; CHECK-NEXT: [[T2:%.*]] = shl i16 [[T1]], ptrtoint (i16* @Y16 to i16)
|
|
238 ; CHECK-NEXT: ret i16 [[T2]]
|
|
239 ;
|
|
240 %t0 = shl i32 %x, ptrtoint (i32* @Y32 to i32)
|
|
241 %t1 = trunc i32 %t0 to i16
|
|
242 %t2 = shl i16 %t1, ptrtoint (i16* @Y16 to i16)
|
|
243 ret i16 %t2
|
|
244 }
|