150
|
1 # RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-optimize-exec-masking -o - %s | FileCheck %s
|
|
2
|
|
3 --- |
|
|
4 define amdgpu_kernel void @optimize_if_and_saveexec_xor(i32 %z, i32 %v) {
|
|
5 main_body:
|
|
6 br i1 undef, label %if, label %end
|
|
7
|
|
8 if: ; preds = %main_body
|
|
9 %v.if = load volatile i32, i32 addrspace(1)* undef
|
|
10 br label %end
|
|
11
|
|
12 end: ; preds = %if, %main_body
|
|
13 ret void
|
|
14 }
|
|
15
|
|
16 define amdgpu_kernel void @optimize_if_and_saveexec(i32 %z, i32 %v) {
|
|
17 main_body:
|
|
18 br i1 undef, label %if, label %end
|
|
19
|
|
20 if:
|
|
21 br label %end
|
|
22
|
|
23 end:
|
|
24 ret void
|
|
25 }
|
|
26
|
|
27 define amdgpu_kernel void @optimize_if_or_saveexec(i32 %z, i32 %v) {
|
|
28 main_body:
|
|
29 br i1 undef, label %if, label %end
|
|
30
|
|
31 if:
|
|
32 br label %end
|
|
33
|
|
34 end:
|
|
35 ret void
|
|
36 }
|
|
37
|
|
38 define amdgpu_kernel void @optimize_if_and_saveexec_xor_valu_middle(i32 %z, i32 %v) {
|
|
39 main_body:
|
|
40 br i1 undef, label %if, label %end
|
|
41
|
|
42 if: ; preds = %main_body
|
|
43 br label %end
|
|
44
|
|
45 end: ; preds = %if, %main_body
|
|
46 ret void
|
|
47 }
|
|
48
|
|
49 define amdgpu_kernel void @optimize_if_and_saveexec_xor_wrong_reg(i32 %z, i32 %v) {
|
|
50 main_body:
|
|
51 br i1 undef, label %if, label %end
|
|
52
|
|
53 if:
|
|
54 br label %end
|
|
55
|
|
56 end:
|
|
57 ret void
|
|
58 }
|
|
59
|
|
60 define amdgpu_kernel void @optimize_if_and_saveexec_xor_modify_copy_to_exec(i32 %z, i32 %v) {
|
|
61 main_body:
|
|
62 br i1 undef, label %if, label %end
|
|
63
|
|
64 if:
|
|
65 br label %end
|
|
66
|
|
67 end:
|
|
68 ret void
|
|
69 }
|
|
70
|
|
71 define amdgpu_kernel void @optimize_if_and_saveexec_xor_live_out_setexec(i32 %z, i32 %v) {
|
|
72 main_body:
|
|
73 br i1 undef, label %if, label %end
|
|
74
|
|
75 if:
|
|
76 br label %end
|
|
77
|
|
78 end:
|
|
79 ret void
|
|
80 }
|
|
81
|
|
82 define amdgpu_kernel void @optimize_if_unknown_saveexec(i32 %z, i32 %v) {
|
|
83 main_body:
|
|
84 br i1 undef, label %if, label %end
|
|
85
|
|
86 if:
|
|
87 br label %end
|
|
88
|
|
89 end:
|
|
90 ret void
|
|
91 }
|
|
92
|
|
93 define amdgpu_kernel void @optimize_if_andn2_saveexec(i32 %z, i32 %v) {
|
|
94 main_body:
|
|
95 br i1 undef, label %if, label %end
|
|
96
|
|
97 if:
|
|
98 br label %end
|
|
99
|
|
100 end:
|
|
101 ret void
|
|
102 }
|
|
103
|
|
104 define amdgpu_kernel void @optimize_if_andn2_saveexec_no_commute(i32 %z, i32 %v) {
|
|
105 main_body:
|
|
106 br i1 undef, label %if, label %end
|
|
107
|
|
108 if:
|
|
109 br label %end
|
|
110
|
|
111 end:
|
|
112 ret void
|
|
113 }
|
|
114
|
|
115 define amdgpu_kernel void @if_and_xor_read_exec_copy_subreg() {
|
|
116 main_body:
|
|
117 br i1 undef, label %if, label %end
|
|
118
|
|
119 if: ; preds = %main_body
|
|
120 br label %end
|
|
121
|
|
122 end: ; preds = %if, %main_body
|
|
123 ret void
|
|
124 }
|
|
125
|
|
126 ...
|
|
127 ---
|
|
128 # CHECK-LABEL: name: optimize_if_and_saveexec_xor{{$}}
|
|
129 # CHECK: $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
|
|
130 # CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
|
|
131 # CHECK-NEXT: SI_MASK_BRANCH
|
|
132
|
|
133 name: optimize_if_and_saveexec_xor
|
|
134 liveins:
|
|
135 - { reg: '$vgpr0' }
|
|
136 body: |
|
|
137 bb.0.main_body:
|
|
138 liveins: $vgpr0
|
|
139
|
|
140 $sgpr0_sgpr1 = COPY $exec
|
|
141 $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec
|
|
142 $vgpr0 = V_MOV_B32_e32 4, implicit $exec
|
|
143 $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
|
|
144 $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
|
|
145 $exec = S_MOV_B64_term killed $sgpr2_sgpr3
|
|
146 SI_MASK_BRANCH %bb.2, implicit $exec
|
|
147 S_BRANCH %bb.1
|
|
148
|
|
149 bb.1.if:
|
|
150 liveins: $sgpr0_sgpr1
|
|
151
|
|
152 $sgpr7 = S_MOV_B32 61440
|
|
153 $sgpr6 = S_MOV_B32 -1
|
|
154 $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
155
|
|
156 bb.2.end:
|
|
157 liveins: $vgpr0, $sgpr0_sgpr1
|
|
158
|
|
159 $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
|
|
160 $sgpr3 = S_MOV_B32 61440
|
|
161 $sgpr2 = S_MOV_B32 -1
|
|
162 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
163 S_ENDPGM 0
|
|
164
|
|
165 ...
|
|
166 ---
|
|
167 # CHECK-LABEL: name: optimize_if_and_saveexec{{$}}
|
|
168 # CHECK: $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
|
|
169 # CHECK-NEXT: SI_MASK_BRANCH
|
|
170
|
|
171 name: optimize_if_and_saveexec
|
|
172 liveins:
|
|
173 - { reg: '$vgpr0' }
|
|
174 body: |
|
|
175 bb.0.main_body:
|
|
176 liveins: $vgpr0
|
|
177
|
|
178 $sgpr0_sgpr1 = COPY $exec
|
|
179 $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec
|
|
180 $vgpr0 = V_MOV_B32_e32 4, implicit $exec
|
|
181 $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
|
|
182 $exec = S_MOV_B64_term killed $sgpr2_sgpr3
|
|
183 SI_MASK_BRANCH %bb.2, implicit $exec
|
|
184 S_BRANCH %bb.1
|
|
185
|
|
186 bb.1.if:
|
|
187 liveins: $sgpr0_sgpr1
|
|
188
|
|
189 $sgpr7 = S_MOV_B32 61440
|
|
190 $sgpr6 = S_MOV_B32 -1
|
|
191 $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
192
|
|
193 bb.2.end:
|
|
194 liveins: $vgpr0, $sgpr0_sgpr1
|
|
195
|
|
196 $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
|
|
197 $sgpr3 = S_MOV_B32 61440
|
|
198 $sgpr2 = S_MOV_B32 -1
|
|
199 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
200 S_ENDPGM 0
|
|
201
|
|
202 ...
|
|
203 ---
|
|
204 # CHECK-LABEL: name: optimize_if_or_saveexec{{$}}
|
|
205 # CHECK: $sgpr0_sgpr1 = S_OR_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
|
|
206 # CHECK-NEXT: SI_MASK_BRANCH
|
|
207
|
|
208 name: optimize_if_or_saveexec
|
|
209 liveins:
|
|
210 - { reg: '$vgpr0' }
|
|
211 body: |
|
|
212 bb.0.main_body:
|
|
213 liveins: $vgpr0
|
|
214
|
|
215 $sgpr0_sgpr1 = COPY $exec
|
|
216 $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec
|
|
217 $vgpr0 = V_MOV_B32_e32 4, implicit $exec
|
|
218 $sgpr2_sgpr3 = S_OR_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
|
|
219 $exec = S_MOV_B64_term killed $sgpr2_sgpr3
|
|
220 SI_MASK_BRANCH %bb.2, implicit $exec
|
|
221 S_BRANCH %bb.1
|
|
222
|
|
223 bb.1.if:
|
|
224 liveins: $sgpr0_sgpr1
|
|
225
|
|
226 $sgpr7 = S_MOV_B32 61440
|
|
227 $sgpr6 = S_MOV_B32 -1
|
|
228 $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
229
|
|
230 bb.2.end:
|
|
231 liveins: $vgpr0, $sgpr0_sgpr1
|
|
232
|
|
233 $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
|
|
234 $sgpr3 = S_MOV_B32 61440
|
|
235 $sgpr2 = S_MOV_B32 -1
|
|
236 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
237 S_ENDPGM 0
|
|
238
|
|
239 ...
|
|
240 ---
|
|
241 # CHECK-LABEL: name: optimize_if_and_saveexec_xor_valu_middle
|
|
242 # CHECK: $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
|
|
243 # CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
244 # CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
|
|
245 # CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3
|
|
246 # CHECK-NEXT: SI_MASK_BRANCH
|
|
247 name: optimize_if_and_saveexec_xor_valu_middle
|
|
248 liveins:
|
|
249 - { reg: '$vgpr0' }
|
|
250 body: |
|
|
251 bb.0.main_body:
|
|
252 liveins: $vgpr0
|
|
253
|
|
254 $sgpr0_sgpr1 = COPY $exec
|
|
255 $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec
|
|
256 $vgpr0 = V_MOV_B32_e32 4, implicit $exec
|
|
257 $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
|
|
258 BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
259 $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
|
|
260 $exec = S_MOV_B64_term killed $sgpr2_sgpr3
|
|
261 SI_MASK_BRANCH %bb.2, implicit $exec
|
|
262 S_BRANCH %bb.1
|
|
263
|
|
264 bb.1.if:
|
|
265 liveins: $sgpr0_sgpr1
|
|
266
|
|
267 $sgpr7 = S_MOV_B32 61440
|
|
268 $sgpr6 = S_MOV_B32 -1
|
|
269 $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
270
|
|
271 bb.2.end:
|
|
272 liveins: $vgpr0, $sgpr0_sgpr1
|
|
273
|
|
274 $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
|
|
275 $sgpr3 = S_MOV_B32 61440
|
|
276 $sgpr2 = S_MOV_B32 -1
|
|
277 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
278 S_ENDPGM 0
|
|
279
|
|
280 ...
|
|
281 ---
|
|
282 # CHECK-LABEL: name: optimize_if_and_saveexec_xor_wrong_reg{{$}}
|
|
283 # CHECK: $sgpr0_sgpr1 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
|
|
284 # CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 undef $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
|
|
285 # CHECK-NEXT: $exec = COPY $sgpr0_sgpr1
|
|
286 # CHECK-NEXT: SI_MASK_BRANCH %bb.2, implicit $exec
|
|
287 name: optimize_if_and_saveexec_xor_wrong_reg
|
|
288 liveins:
|
|
289 - { reg: '$vgpr0' }
|
|
290 body: |
|
|
291 bb.0.main_body:
|
|
292 liveins: $vgpr0
|
|
293
|
|
294 $sgpr6 = S_MOV_B32 -1
|
|
295 $sgpr7 = S_MOV_B32 61440
|
|
296 $sgpr0_sgpr1 = COPY $exec
|
|
297 $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec
|
|
298 $vgpr0 = V_MOV_B32_e32 4, implicit $exec
|
|
299 $sgpr0_sgpr1 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
|
|
300 $sgpr0_sgpr1 = S_XOR_B64 undef $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
|
|
301 $exec = S_MOV_B64_term $sgpr0_sgpr1
|
|
302 SI_MASK_BRANCH %bb.2, implicit $exec
|
|
303 S_BRANCH %bb.1
|
|
304
|
|
305 bb.1.if:
|
|
306 liveins: $sgpr0_sgpr1 , $sgpr4_sgpr5_sgpr6_sgpr7
|
|
307 $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
308
|
|
309 bb.2.end:
|
|
310 liveins: $vgpr0, $sgpr0_sgpr1, $sgpr4_sgpr5_sgpr6_sgpr7
|
|
311
|
|
312 $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
|
|
313 $sgpr3 = S_MOV_B32 61440
|
|
314 $sgpr2 = S_MOV_B32 -1
|
|
315 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
316 S_ENDPGM 0
|
|
317
|
|
318 ...
|
|
319 ---
|
|
320 # CHECK-LABEL: name: optimize_if_and_saveexec_xor_modify_copy_to_exec{{$}}
|
|
321 # CHECK: $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
|
|
322 # CHECK-NEXT: $sgpr2_sgpr3 = S_OR_B64 killed $sgpr2_sgpr3, 1, implicit-def $scc
|
|
323 # CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
|
|
324 # CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3
|
|
325 # CHECK-NEXT: SI_MASK_BRANCH %bb.2, implicit $exec
|
|
326
|
|
327 name: optimize_if_and_saveexec_xor_modify_copy_to_exec
|
|
328 liveins:
|
|
329 - { reg: '$vgpr0' }
|
|
330 body: |
|
|
331 bb.0.main_body:
|
|
332 liveins: $vgpr0
|
|
333
|
|
334 $sgpr0_sgpr1 = COPY $exec
|
|
335 $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec
|
|
336 $vgpr0 = V_MOV_B32_e32 4, implicit $exec
|
|
337 $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
|
|
338 $sgpr2_sgpr3 = S_OR_B64 killed $sgpr2_sgpr3, 1, implicit-def $scc
|
|
339 $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
|
|
340 $exec = S_MOV_B64_term killed $sgpr2_sgpr3
|
|
341 SI_MASK_BRANCH %bb.2, implicit $exec
|
|
342 S_BRANCH %bb.1
|
|
343
|
|
344 bb.1.if:
|
|
345 liveins: $sgpr0_sgpr1
|
|
346
|
|
347 $sgpr7 = S_MOV_B32 61440
|
|
348 $sgpr6 = S_MOV_B32 -1
|
|
349 $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
350
|
|
351 bb.2.end:
|
|
352 liveins: $vgpr0, $sgpr0_sgpr1
|
|
353
|
|
354 $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
|
|
355 $sgpr0 = S_MOV_B32 0
|
|
356 $sgpr1 = S_MOV_B32 1
|
|
357 $sgpr2 = S_MOV_B32 -1
|
|
358 $sgpr3 = S_MOV_B32 61440
|
|
359 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
360 S_ENDPGM 0
|
|
361
|
|
362 ...
|
|
363 ---
|
|
364 # CHECK-LABEL: name: optimize_if_and_saveexec_xor_live_out_setexec{{$}}
|
|
365 # CHECK: $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
|
|
366 # CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
|
|
367 # CHECK-NEXT: $exec = COPY $sgpr2_sgpr3
|
|
368 # CHECK-NEXT: SI_MASK_BRANCH
|
|
369 name: optimize_if_and_saveexec_xor_live_out_setexec
|
|
370 liveins:
|
|
371 - { reg: '$vgpr0' }
|
|
372 body: |
|
|
373 bb.0.main_body:
|
|
374 liveins: $vgpr0
|
|
375
|
|
376 $sgpr0_sgpr1 = COPY $exec
|
|
377 $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec
|
|
378 $vgpr0 = V_MOV_B32_e32 4, implicit $exec
|
|
379 $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
|
|
380 $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
|
|
381 $exec = S_MOV_B64_term $sgpr2_sgpr3
|
|
382 SI_MASK_BRANCH %bb.2, implicit $exec
|
|
383 S_BRANCH %bb.1
|
|
384
|
|
385 bb.1.if:
|
|
386 liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
|
|
387 S_SLEEP 0, implicit $sgpr2_sgpr3
|
|
388 $sgpr7 = S_MOV_B32 61440
|
|
389 $sgpr6 = S_MOV_B32 -1
|
|
390 $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
391
|
|
392 bb.2.end:
|
|
393 liveins: $vgpr0, $sgpr0_sgpr1
|
|
394
|
|
395 $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
|
|
396 $sgpr3 = S_MOV_B32 61440
|
|
397 $sgpr2 = S_MOV_B32 -1
|
|
398 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
399 S_ENDPGM 0
|
|
400
|
|
401 ...
|
|
402
|
|
403 # CHECK-LABEL: name: optimize_if_unknown_saveexec{{$}}
|
|
404 # CHECK: $sgpr0_sgpr1 = COPY $exec
|
|
405 # CHECK: $sgpr2_sgpr3 = S_LSHR_B64 $sgpr0_sgpr1, killed $vcc_lo, implicit-def $scc
|
|
406 # CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3
|
|
407 # CHECK-NEXT: SI_MASK_BRANCH %bb.2, implicit $exec
|
|
408
|
|
409 name: optimize_if_unknown_saveexec
|
|
410 liveins:
|
|
411 - { reg: '$vgpr0' }
|
|
412 body: |
|
|
413 bb.0.main_body:
|
|
414 liveins: $vgpr0
|
|
415
|
|
416 $sgpr0_sgpr1 = COPY $exec
|
|
417 $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec
|
|
418 $vgpr0 = V_MOV_B32_e32 4, implicit $exec
|
|
419 $sgpr2_sgpr3 = S_LSHR_B64 $sgpr0_sgpr1, killed $vcc_lo, implicit-def $scc
|
|
420 $exec = S_MOV_B64_term killed $sgpr2_sgpr3
|
|
421 SI_MASK_BRANCH %bb.2, implicit $exec
|
|
422 S_BRANCH %bb.1
|
|
423
|
|
424 bb.1.if:
|
|
425 liveins: $sgpr0_sgpr1
|
|
426
|
|
427 $sgpr7 = S_MOV_B32 61440
|
|
428 $sgpr6 = S_MOV_B32 -1
|
|
429 $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
430
|
|
431 bb.2.end:
|
|
432 liveins: $vgpr0, $sgpr0_sgpr1
|
|
433
|
|
434 $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
|
|
435 $sgpr3 = S_MOV_B32 61440
|
|
436 $sgpr2 = S_MOV_B32 -1
|
|
437 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
438 S_ENDPGM 0
|
|
439
|
|
440 ...
|
|
441 ---
|
|
442 # CHECK-LABEL: name: optimize_if_andn2_saveexec{{$}}
|
|
443 # CHECK: $sgpr0_sgpr1 = S_ANDN2_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
|
|
444 # CHECK-NEXT: SI_MASK_BRANCH
|
|
445
|
|
446 name: optimize_if_andn2_saveexec
|
|
447 liveins:
|
|
448 - { reg: '$vgpr0' }
|
|
449 body: |
|
|
450 bb.0.main_body:
|
|
451 liveins: $vgpr0
|
|
452
|
|
453 $sgpr0_sgpr1 = COPY $exec
|
|
454 $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec
|
|
455 $vgpr0 = V_MOV_B32_e32 4, implicit $exec
|
|
456 $sgpr2_sgpr3 = S_ANDN2_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
|
|
457 $exec = S_MOV_B64_term killed $sgpr2_sgpr3
|
|
458 SI_MASK_BRANCH %bb.2, implicit $exec
|
|
459 S_BRANCH %bb.1
|
|
460
|
|
461 bb.1.if:
|
|
462 liveins: $sgpr0_sgpr1
|
|
463
|
|
464 $sgpr7 = S_MOV_B32 61440
|
|
465 $sgpr6 = S_MOV_B32 -1
|
|
466 $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
467
|
|
468 bb.2.end:
|
|
469 liveins: $vgpr0, $sgpr0_sgpr1
|
|
470
|
|
471 $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
|
|
472 $sgpr3 = S_MOV_B32 61440
|
|
473 $sgpr2 = S_MOV_B32 -1
|
|
474 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
475 S_ENDPGM 0
|
|
476
|
|
477 ...
|
|
478 ---
|
|
479 # CHECK-LABEL: name: optimize_if_andn2_saveexec_no_commute{{$}}
|
|
480 # CHECK: $sgpr2_sgpr3 = S_ANDN2_B64 killed $vcc, $sgpr0_sgpr1, implicit-def $scc
|
|
481 # CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3
|
|
482 # CHECK-NEXT: SI_MASK_BRANCH %bb.2, implicit $exec
|
|
483 name: optimize_if_andn2_saveexec_no_commute
|
|
484 liveins:
|
|
485 - { reg: '$vgpr0' }
|
|
486 body: |
|
|
487 bb.0.main_body:
|
|
488 liveins: $vgpr0
|
|
489
|
|
490 $sgpr0_sgpr1 = COPY $exec
|
|
491 $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec
|
|
492 $vgpr0 = V_MOV_B32_e32 4, implicit $exec
|
|
493 $sgpr2_sgpr3 = S_ANDN2_B64 killed $vcc, $sgpr0_sgpr1, implicit-def $scc
|
|
494 $exec = S_MOV_B64_term killed $sgpr2_sgpr3
|
|
495 SI_MASK_BRANCH %bb.2, implicit $exec
|
|
496 S_BRANCH %bb.1
|
|
497
|
|
498 bb.1.if:
|
|
499 liveins: $sgpr0_sgpr1
|
|
500
|
|
501 $sgpr7 = S_MOV_B32 61440
|
|
502 $sgpr6 = S_MOV_B32 -1
|
|
503 $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
504
|
|
505 bb.2.end:
|
|
506 liveins: $vgpr0, $sgpr0_sgpr1
|
|
507
|
|
508 $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
|
|
509 $sgpr3 = S_MOV_B32 61440
|
|
510 $sgpr2 = S_MOV_B32 -1
|
|
511 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
512 S_ENDPGM 0
|
|
513
|
|
514 ...
|
|
515 ---
|
|
516 # A read from exec copy subreg prevents optimization
|
|
517 # CHECK-LABEL: name: if_and_xor_read_exec_copy_subreg{{$}}
|
|
518 # CHECK: $sgpr0_sgpr1 = COPY $exec
|
|
519 # CHECK-NEXT: $sgpr4 = S_MOV_B32 $sgpr1
|
|
520 name: if_and_xor_read_exec_copy_subreg
|
|
521 liveins:
|
|
522 - { reg: '$vgpr0' }
|
|
523 body: |
|
|
524 bb.0.main_body:
|
|
525 liveins: $vgpr0
|
|
526
|
|
527 $sgpr0_sgpr1 = COPY $exec
|
|
528 $sgpr4 = S_MOV_B32 $sgpr1
|
|
529 $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec
|
|
530 $vgpr0 = V_MOV_B32_e32 4, implicit $exec
|
|
531 $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
|
|
532 $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
|
|
533 $exec = S_MOV_B64_term killed $sgpr2_sgpr3
|
|
534 SI_MASK_BRANCH %bb.2, implicit $exec
|
|
535 S_BRANCH %bb.1
|
|
536
|
|
537 bb.1.if:
|
|
538 liveins: $sgpr0_sgpr1
|
|
539
|
|
540 $sgpr7 = S_MOV_B32 61440
|
|
541 $sgpr6 = S_MOV_B32 -1
|
|
542 $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
543
|
|
544 bb.2.end:
|
|
545 liveins: $vgpr0, $sgpr0_sgpr1
|
|
546
|
|
547 $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
|
|
548 $sgpr3 = S_MOV_B32 61440
|
|
549 $sgpr2 = S_MOV_B32 -1
|
|
550 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
551 S_ENDPGM 0
|
|
552 ...
|