150
|
1 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
|
|
2
|
|
3 ; GCN-LABEL: {{^}}select_and1:
|
207
|
4 ; GCN: s_cselect_b32 [[SEL:s[0-9]+]], s{{[0-9]+}},
|
|
5 ; GCN: v_mov_b32_e32 [[VSEL:v[0-9]+]], [[SEL]]
|
150
|
6 ; GCN-NOT: v_and_b32
|
207
|
7 ; GCN: store_dword v{{[0-9]+}}, [[VSEL]], s{{\[[0-9]+:[0-9]+\]}}
|
150
|
8 define amdgpu_kernel void @select_and1(i32 addrspace(1)* %p, i32 %x, i32 %y) {
|
|
9 %c = icmp slt i32 %x, 11
|
|
10 %s = select i1 %c, i32 0, i32 -1
|
|
11 %a = and i32 %y, %s
|
|
12 store i32 %a, i32 addrspace(1)* %p, align 4
|
|
13 ret void
|
|
14 }
|
|
15
|
|
16 ; GCN-LABEL: {{^}}select_and2:
|
207
|
17 ; GCN: s_cselect_b32 [[SEL:s[0-9]+]], s{{[0-9]+}},
|
|
18 ; GCN: v_mov_b32_e32 [[VSEL:v[0-9]+]], [[SEL]]
|
150
|
19 ; GCN-NOT: v_and_b32
|
207
|
20 ; GCN: store_dword v{{[0-9]+}}, [[VSEL]], s{{\[[0-9]+:[0-9]+\]}}
|
150
|
21 define amdgpu_kernel void @select_and2(i32 addrspace(1)* %p, i32 %x, i32 %y) {
|
|
22 %c = icmp slt i32 %x, 11
|
|
23 %s = select i1 %c, i32 0, i32 -1
|
|
24 %a = and i32 %s, %y
|
|
25 store i32 %a, i32 addrspace(1)* %p, align 4
|
|
26 ret void
|
|
27 }
|
|
28
|
|
29 ; GCN-LABEL: {{^}}select_and3:
|
207
|
30 ; GCN: s_cselect_b32 [[SEL:s[0-9]+]], s{{[0-9]+}},
|
|
31 ; GCN: v_mov_b32_e32 [[VSEL:v[0-9]+]], [[SEL]]
|
150
|
32 ; GCN-NOT: v_and_b32
|
207
|
33 ; GCN: store_dword v{{[0-9]+}}, [[VSEL]], s{{\[[0-9]+:[0-9]+\]}}
|
150
|
34 define amdgpu_kernel void @select_and3(i32 addrspace(1)* %p, i32 %x, i32 %y) {
|
|
35 %c = icmp slt i32 %x, 11
|
|
36 %s = select i1 %c, i32 -1, i32 0
|
|
37 %a = and i32 %y, %s
|
|
38 store i32 %a, i32 addrspace(1)* %p, align 4
|
|
39 ret void
|
|
40 }
|
|
41
|
|
42 ; GCN-LABEL: {{^}}select_and_v4:
|
|
43 ; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}},
|
|
44 ; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}},
|
|
45 ; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}},
|
|
46 ; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}},
|
|
47 ; GCN-NOT: v_and_b32
|
|
48 ; GCN: store_dword
|
|
49 define amdgpu_kernel void @select_and_v4(<4 x i32> addrspace(1)* %p, i32 %x, <4 x i32> %y) {
|
|
50 %c = icmp slt i32 %x, 11
|
|
51 %s = select i1 %c, <4 x i32> zeroinitializer, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
52 %a = and <4 x i32> %s, %y
|
|
53 store <4 x i32> %a, <4 x i32> addrspace(1)* %p, align 32
|
|
54 ret void
|
|
55 }
|
|
56
|
|
57 ; GCN-LABEL: {{^}}select_or1:
|
207
|
58 ; GCN: s_cselect_b32 [[SEL:s[0-9]+]], s{{[0-9]+}},
|
|
59 ; GCN: v_mov_b32_e32 [[VSEL:v[0-9]+]], [[SEL]]
|
150
|
60 ; GCN-NOT: v_or_b32
|
207
|
61 ; GCN: store_dword v{{[0-9]+}}, [[VSEL]], s{{\[[0-9]+:[0-9]+\]}}
|
150
|
62 define amdgpu_kernel void @select_or1(i32 addrspace(1)* %p, i32 %x, i32 %y) {
|
|
63 %c = icmp slt i32 %x, 11
|
|
64 %s = select i1 %c, i32 0, i32 -1
|
|
65 %a = or i32 %y, %s
|
|
66 store i32 %a, i32 addrspace(1)* %p, align 4
|
|
67 ret void
|
|
68 }
|
|
69
|
|
70 ; GCN-LABEL: {{^}}select_or2:
|
207
|
71 ; GCN: s_cselect_b32 [[SEL:s[0-9]+]], s{{[0-9]+}},
|
|
72 ; GCN: v_mov_b32_e32 [[VSEL:v[0-9]+]], [[SEL]]
|
150
|
73 ; GCN-NOT: v_or_b32
|
207
|
74 ; GCN: store_dword v{{[0-9]+}}, [[VSEL]], s{{\[[0-9]+:[0-9]+\]}}
|
150
|
75 define amdgpu_kernel void @select_or2(i32 addrspace(1)* %p, i32 %x, i32 %y) {
|
|
76 %c = icmp slt i32 %x, 11
|
|
77 %s = select i1 %c, i32 0, i32 -1
|
|
78 %a = or i32 %s, %y
|
|
79 store i32 %a, i32 addrspace(1)* %p, align 4
|
|
80 ret void
|
|
81 }
|
|
82
|
|
83 ; GCN-LABEL: {{^}}select_or3:
|
207
|
84 ; GCN: s_cselect_b32 [[SEL:s[0-9]+]], s{{[0-9]+}},
|
|
85 ; GCN: v_mov_b32_e32 [[VSEL:v[0-9]+]], [[SEL]]
|
150
|
86 ; GCN-NOT: v_or_b32
|
207
|
87 ; GCN: store_dword v{{[0-9]+}}, [[VSEL]], s{{\[[0-9]+:[0-9]+\]}}
|
150
|
88 define amdgpu_kernel void @select_or3(i32 addrspace(1)* %p, i32 %x, i32 %y) {
|
|
89 %c = icmp slt i32 %x, 11
|
|
90 %s = select i1 %c, i32 -1, i32 0
|
|
91 %a = or i32 %y, %s
|
|
92 store i32 %a, i32 addrspace(1)* %p, align 4
|
|
93 ret void
|
|
94 }
|
|
95
|
|
96 ; GCN-LABEL: {{^}}select_or_v4:
|
|
97 ; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}},
|
|
98 ; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}},
|
|
99 ; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}},
|
|
100 ; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}},
|
|
101 ; GCN-NOT: v_or_b32
|
|
102 ; GCN: store_dword
|
|
103 define amdgpu_kernel void @select_or_v4(<4 x i32> addrspace(1)* %p, i32 %x, <4 x i32> %y) {
|
|
104 %c = icmp slt i32 %x, 11
|
|
105 %s = select i1 %c, <4 x i32> zeroinitializer, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
106 %a = or <4 x i32> %s, %y
|
|
107 store <4 x i32> %a, <4 x i32> addrspace(1)* %p, align 32
|
|
108 ret void
|
|
109 }
|
|
110
|
|
111 ; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants:
|
207
|
112 ; GCN: s_cselect_b32 s{{[0-9]+}}, 9, 2
|
150
|
113 define amdgpu_kernel void @sel_constants_sub_constant_sel_constants(i32 addrspace(1)* %p, i1 %cond) {
|
|
114 %sel = select i1 %cond, i32 -4, i32 3
|
|
115 %bo = sub i32 5, %sel
|
|
116 store i32 %bo, i32 addrspace(1)* %p, align 4
|
|
117 ret void
|
|
118 }
|
|
119
|
|
120 ; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants_i16:
|
207
|
121 ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 2, 9,
|
150
|
122 define amdgpu_kernel void @sel_constants_sub_constant_sel_constants_i16(i16 addrspace(1)* %p, i1 %cond) {
|
|
123 %sel = select i1 %cond, i16 -4, i16 3
|
|
124 %bo = sub i16 5, %sel
|
|
125 store i16 %bo, i16 addrspace(1)* %p, align 2
|
|
126 ret void
|
|
127 }
|
|
128
|
|
129 ; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants_i16_neg:
|
|
130 ; GCN: v_mov_b32_e32 [[F:v[0-9]+]], 0xfffff449
|
|
131 ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, [[F]], -3,
|
|
132 define amdgpu_kernel void @sel_constants_sub_constant_sel_constants_i16_neg(i16 addrspace(1)* %p, i1 %cond) {
|
|
133 %sel = select i1 %cond, i16 4, i16 3000
|
|
134 %bo = sub i16 1, %sel
|
|
135 store i16 %bo, i16 addrspace(1)* %p, align 2
|
|
136 ret void
|
|
137 }
|
|
138
|
|
139 ; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants_v2i16:
|
207
|
140 ; GCN-DAG: s_mov_b32 [[T:s[0-9]+]], 0x50009
|
|
141 ; GCN: s_cselect_b32 s{{[0-9]+}}, [[T]], 0x60002
|
150
|
142 define amdgpu_kernel void @sel_constants_sub_constant_sel_constants_v2i16(<2 x i16> addrspace(1)* %p, i1 %cond) {
|
|
143 %sel = select i1 %cond, <2 x i16> <i16 -4, i16 2>, <2 x i16> <i16 3, i16 1>
|
|
144 %bo = sub <2 x i16> <i16 5, i16 7>, %sel
|
|
145 store <2 x i16> %bo, <2 x i16> addrspace(1)* %p, align 4
|
|
146 ret void
|
|
147 }
|
|
148
|
|
149 ; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants_v4i32:
|
|
150 ; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 2, 9,
|
|
151 ; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 6, 5,
|
|
152 ; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 10, 6,
|
|
153 ; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 14, 7,
|
|
154 define amdgpu_kernel void @sel_constants_sub_constant_sel_constants_v4i32(<4 x i32> addrspace(1)* %p, i1 %cond) {
|
|
155 %sel = select i1 %cond, <4 x i32> <i32 -4, i32 2, i32 3, i32 4>, <4 x i32> <i32 3, i32 1, i32 -1, i32 -3>
|
|
156 %bo = sub <4 x i32> <i32 5, i32 7, i32 9, i32 11>, %sel
|
|
157 store <4 x i32> %bo, <4 x i32> addrspace(1)* %p, align 32
|
|
158 ret void
|
|
159 }
|
|
160
|
|
161 ; GCN-LABEL: {{^}}sdiv_constant_sel_constants_i64:
|
207
|
162 ; GCN: s_cselect_b32 s{{[0-9]+}}, 0, 5
|
150
|
163 define amdgpu_kernel void @sdiv_constant_sel_constants_i64(i64 addrspace(1)* %p, i1 %cond) {
|
|
164 %sel = select i1 %cond, i64 121, i64 23
|
|
165 %bo = sdiv i64 120, %sel
|
|
166 store i64 %bo, i64 addrspace(1)* %p, align 8
|
|
167 ret void
|
|
168 }
|
|
169
|
|
170 ; GCN-LABEL: {{^}}sdiv_constant_sel_constants_i32:
|
207
|
171 ; GCN: s_cselect_b32 s{{[0-9]+}}, 26, 8
|
150
|
172 define amdgpu_kernel void @sdiv_constant_sel_constants_i32(i32 addrspace(1)* %p, i1 %cond) {
|
|
173 %sel = select i1 %cond, i32 7, i32 23
|
|
174 %bo = sdiv i32 184, %sel
|
|
175 store i32 %bo, i32 addrspace(1)* %p, align 8
|
|
176 ret void
|
|
177 }
|
|
178
|
|
179 ; GCN-LABEL: {{^}}udiv_constant_sel_constants_i64:
|
207
|
180 ; GCN: s_cselect_b32 s{{[0-9]+}}, 0, 5
|
150
|
181 define amdgpu_kernel void @udiv_constant_sel_constants_i64(i64 addrspace(1)* %p, i1 %cond) {
|
|
182 %sel = select i1 %cond, i64 -4, i64 23
|
|
183 %bo = udiv i64 120, %sel
|
|
184 store i64 %bo, i64 addrspace(1)* %p, align 8
|
|
185 ret void
|
|
186 }
|
|
187
|
|
188 ; GCN-LABEL: {{^}}srem_constant_sel_constants:
|
207
|
189 ; GCN: s_cselect_b32 s{{[0-9]+}}, 33, 3
|
150
|
190 define amdgpu_kernel void @srem_constant_sel_constants(i64 addrspace(1)* %p, i1 %cond) {
|
|
191 %sel = select i1 %cond, i64 34, i64 15
|
|
192 %bo = srem i64 33, %sel
|
|
193 store i64 %bo, i64 addrspace(1)* %p, align 8
|
|
194 ret void
|
|
195 }
|
|
196
|
|
197 ; GCN-LABEL: {{^}}urem_constant_sel_constants:
|
207
|
198 ; GCN: s_cselect_b32 s{{[0-9]+}}, 33, 3
|
150
|
199 define amdgpu_kernel void @urem_constant_sel_constants(i64 addrspace(1)* %p, i1 %cond) {
|
|
200 %sel = select i1 %cond, i64 34, i64 15
|
|
201 %bo = urem i64 33, %sel
|
|
202 store i64 %bo, i64 addrspace(1)* %p, align 8
|
|
203 ret void
|
|
204 }
|
|
205
|
|
206 ; GCN-LABEL: {{^}}shl_constant_sel_constants:
|
207
|
207 ; GCN: s_cselect_b32 s{{[0-9]+}}, 4, 8
|
150
|
208 define amdgpu_kernel void @shl_constant_sel_constants(i32 addrspace(1)* %p, i1 %cond) {
|
|
209 %sel = select i1 %cond, i32 2, i32 3
|
|
210 %bo = shl i32 1, %sel
|
|
211 store i32 %bo, i32 addrspace(1)* %p, align 4
|
|
212 ret void
|
|
213 }
|
|
214
|
|
215 ; GCN-LABEL: {{^}}lshr_constant_sel_constants:
|
207
|
216 ; GCN: s_cselect_b32 s{{[0-9]+}}, 16, 8
|
150
|
217 define amdgpu_kernel void @lshr_constant_sel_constants(i32 addrspace(1)* %p, i1 %cond) {
|
|
218 %sel = select i1 %cond, i32 2, i32 3
|
|
219 %bo = lshr i32 64, %sel
|
|
220 store i32 %bo, i32 addrspace(1)* %p, align 4
|
|
221 ret void
|
|
222 }
|
|
223
|
|
224 ; GCN-LABEL: {{^}}ashr_constant_sel_constants:
|
207
|
225 ; GCN: s_cselect_b32 s{{[0-9]+}}, 32, 16
|
150
|
226 define amdgpu_kernel void @ashr_constant_sel_constants(i32 addrspace(1)* %p, i1 %cond) {
|
|
227 %sel = select i1 %cond, i32 2, i32 3
|
|
228 %bo = ashr i32 128, %sel
|
|
229 store i32 %bo, i32 addrspace(1)* %p, align 4
|
|
230 ret void
|
|
231 }
|
|
232
|
|
233 ; GCN-LABEL: {{^}}fsub_constant_sel_constants:
|
|
234 ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, -4.0, 1.0,
|
|
235 define amdgpu_kernel void @fsub_constant_sel_constants(float addrspace(1)* %p, i1 %cond) {
|
|
236 %sel = select i1 %cond, float -2.0, float 3.0
|
|
237 %bo = fsub float -1.0, %sel
|
|
238 store float %bo, float addrspace(1)* %p, align 4
|
|
239 ret void
|
|
240 }
|
|
241
|
|
242 ; GCN-LABEL: {{^}}fsub_constant_sel_constants_f16:
|
|
243 ; TODO: it shall be possible to fold constants with OpSel
|
|
244 ; GCN-DAG: v_mov_b32_e32 [[T:v[0-9]+]], 0x3c00
|
|
245 ; GCN-DAG: v_mov_b32_e32 [[F:v[0-9]+]], 0xc400
|
|
246 ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, [[F]], [[T]],
|
|
247 define amdgpu_kernel void @fsub_constant_sel_constants_f16(half addrspace(1)* %p, i1 %cond) {
|
|
248 %sel = select i1 %cond, half -2.0, half 3.0
|
|
249 %bo = fsub half -1.0, %sel
|
|
250 store half %bo, half addrspace(1)* %p, align 2
|
|
251 ret void
|
|
252 }
|
|
253
|
|
254 ; GCN-LABEL: {{^}}fsub_constant_sel_constants_v2f16:
|
207
|
255 ; GCN: s_cselect_b32 s{{[0-9]+}}, 0x45003c00, -2.0
|
150
|
256 define amdgpu_kernel void @fsub_constant_sel_constants_v2f16(<2 x half> addrspace(1)* %p, i1 %cond) {
|
|
257 %sel = select i1 %cond, <2 x half> <half -2.0, half -3.0>, <2 x half> <half -1.0, half 4.0>
|
|
258 %bo = fsub <2 x half> <half -1.0, half 2.0>, %sel
|
|
259 store <2 x half> %bo, <2 x half> addrspace(1)* %p, align 4
|
|
260 ret void
|
|
261 }
|
|
262
|
|
263 ; GCN-LABEL: {{^}}fsub_constant_sel_constants_v4f32:
|
|
264 ; GCN-DAG: v_mov_b32_e32 [[T2:v[0-9]+]], 0x40a00000
|
|
265 ; GCN-DAG: v_mov_b32_e32 [[T3:v[0-9]+]], 0x41100000
|
|
266 ; GCN-DAG: v_mov_b32_e32 [[T4:v[0-9]+]], 0x41500000
|
|
267 ; GCN-DAG: v_mov_b32_e32 [[F4:v[0-9]+]], 0x40c00000
|
|
268 ; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1.0,
|
|
269 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 2.0, [[T2]],
|
|
270 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 4.0, [[T3]],
|
|
271 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, [[F4]], [[T4]],
|
|
272 define amdgpu_kernel void @fsub_constant_sel_constants_v4f32(<4 x float> addrspace(1)* %p, i1 %cond) {
|
|
273 %sel = select i1 %cond, <4 x float> <float -2.0, float -3.0, float -4.0, float -5.0>, <4 x float> <float -1.0, float 0.0, float 1.0, float 2.0>
|
|
274 %bo = fsub <4 x float> <float -1.0, float 2.0, float 5.0, float 8.0>, %sel
|
|
275 store <4 x float> %bo, <4 x float> addrspace(1)* %p, align 32
|
|
276 ret void
|
|
277 }
|
|
278
|
|
279 ; GCN-LABEL: {{^}}fdiv_constant_sel_constants:
|
|
280 ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 4.0, -2.0,
|
|
281 define amdgpu_kernel void @fdiv_constant_sel_constants(float addrspace(1)* %p, i1 %cond) {
|
|
282 %sel = select i1 %cond, float -4.0, float 2.0
|
|
283 %bo = fdiv float 8.0, %sel
|
|
284 store float %bo, float addrspace(1)* %p, align 4
|
|
285 ret void
|
|
286 }
|
|
287
|
|
288 ; GCN-LABEL: {{^}}frem_constant_sel_constants:
|
|
289 ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 2.0, 1.0,
|
|
290 define amdgpu_kernel void @frem_constant_sel_constants(float addrspace(1)* %p, i1 %cond) {
|
|
291 %sel = select i1 %cond, float -4.0, float 3.0
|
|
292 %bo = frem float 5.0, %sel
|
|
293 store float %bo, float addrspace(1)* %p, align 4
|
|
294 ret void
|
|
295 }
|