Mercurial > hg > CbC > CbC_llvm
comparison test/CodeGen/AMDGPU/branch-relaxation.ll @ 147:c2174574ed3a
LLVM 10
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Wed, 14 Aug 2019 16:55:33 +0900 |
parents | 3a76565eade5 |
children |
comparison
equal
deleted
inserted
replaced
134:3a76565eade5 | 147:c2174574ed3a |
---|---|
1 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -amdgpu-s-branch-bits=4 < %s | FileCheck -check-prefix=GCN %s | 1 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -amdgpu-s-branch-bits=4 < %s | FileCheck -enable-var-scope -check-prefix=GCN %s |
2 | 2 |
3 | 3 |
4 ; FIXME: We should use llvm-mc for this, but we can't even parse our own output. | 4 ; FIXME: We should use llvm-mc for this, but we can't even parse our own output. |
5 ; See PR33579. | 5 ; See PR33579. |
6 ; RUN: llc -march=amdgcn -verify-machineinstrs -amdgpu-s-branch-bits=4 -o %t.o -filetype=obj %s | 6 ; RUN: llc -march=amdgcn -verify-machineinstrs -amdgpu-s-branch-bits=4 -o %t.o -filetype=obj %s |
59 ; GCN: s_load_dword [[CND:s[0-9]+]] | 59 ; GCN: s_load_dword [[CND:s[0-9]+]] |
60 ; GCN: s_cmp_eq_u32 [[CND]], 0 | 60 ; GCN: s_cmp_eq_u32 [[CND]], 0 |
61 ; GCN-NEXT: s_cbranch_scc0 [[LONGBB:BB[0-9]+_[0-9]+]] | 61 ; GCN-NEXT: s_cbranch_scc0 [[LONGBB:BB[0-9]+_[0-9]+]] |
62 | 62 |
63 ; GCN-NEXT: [[LONG_JUMP:BB[0-9]+_[0-9]+]]: ; %bb0 | 63 ; GCN-NEXT: [[LONG_JUMP:BB[0-9]+_[0-9]+]]: ; %bb0 |
64 ; GCN-NEXT: s_getpc_b64 vcc | 64 ; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} |
65 ; GCN-NEXT: s_add_u32 vcc_lo, vcc_lo, [[ENDBB:BB[0-9]+_[0-9]+]]-([[LONG_JUMP]]+4) | 65 ; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], [[ENDBB:BB[0-9]+_[0-9]+]]-([[LONG_JUMP]]+4) |
66 ; GCN-NEXT: s_addc_u32 vcc_hi, vcc_hi, 0 | 66 ; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], 0 |
67 ; GCN-NEXT: s_setpc_b64 vcc | 67 ; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}} |
68 | 68 |
69 ; GCN-NEXT: [[LONGBB]]: | 69 ; GCN-NEXT: [[LONGBB]]: |
70 ; GCN-NEXT: ;;#ASMSTART | 70 ; GCN-NEXT: ;;#ASMSTART |
71 ; GCN: v_nop_e64 | 71 ; GCN: v_nop_e64 |
72 ; GCN: v_nop_e64 | 72 ; GCN: v_nop_e64 |
103 ; GCN-DAG: v_cmp_eq_f32_e64 [[UNMASKED:s\[[0-9]+:[0-9]+\]]], [[CND]], 0 | 103 ; GCN-DAG: v_cmp_eq_f32_e64 [[UNMASKED:s\[[0-9]+:[0-9]+\]]], [[CND]], 0 |
104 ; GCN-DAG: s_and_b64 vcc, exec, [[UNMASKED]] | 104 ; GCN-DAG: s_and_b64 vcc, exec, [[UNMASKED]] |
105 ; GCN: s_cbranch_vccz [[LONGBB:BB[0-9]+_[0-9]+]] | 105 ; GCN: s_cbranch_vccz [[LONGBB:BB[0-9]+_[0-9]+]] |
106 | 106 |
107 ; GCN-NEXT: [[LONG_JUMP:BB[0-9]+_[0-9]+]]: ; %bb0 | 107 ; GCN-NEXT: [[LONG_JUMP:BB[0-9]+_[0-9]+]]: ; %bb0 |
108 ; GCN-NEXT: s_getpc_b64 vcc | 108 ; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} |
109 ; GCN-NEXT: s_add_u32 vcc_lo, vcc_lo, [[ENDBB:BB[0-9]+_[0-9]+]]-([[LONG_JUMP]]+4) | 109 ; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], [[ENDBB:BB[0-9]+_[0-9]+]]-([[LONG_JUMP]]+4) |
110 ; GCN-NEXT: s_addc_u32 vcc_hi, vcc_hi, 0 | 110 ; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], 0 |
111 ; GCN-NEXT: s_setpc_b64 vcc | 111 ; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}} |
112 | 112 |
113 ; GCN-NEXT: [[LONGBB]]: | 113 ; GCN-NEXT: [[LONGBB]]: |
114 ; GCN: v_nop_e64 | 114 ; GCN: v_nop_e64 |
115 ; GCN: v_nop_e64 | 115 ; GCN: v_nop_e64 |
116 ; GCN: v_nop_e64 | 116 ; GCN: v_nop_e64 |
189 | 189 |
190 ; GCN-NEXT: s_cbranch_scc0 [[ENDBB:BB[0-9]+_[0-9]+]] | 190 ; GCN-NEXT: s_cbranch_scc0 [[ENDBB:BB[0-9]+_[0-9]+]] |
191 | 191 |
192 ; GCN-NEXT: [[LONG_JUMP:BB[0-9]+_[0-9]+]]: ; %bb2 | 192 ; GCN-NEXT: [[LONG_JUMP:BB[0-9]+_[0-9]+]]: ; %bb2 |
193 ; GCN-NEXT: ; in Loop: Header=[[LOOPBB]] Depth=1 | 193 ; GCN-NEXT: ; in Loop: Header=[[LOOPBB]] Depth=1 |
194 ; GCN-NEXT: s_getpc_b64 vcc | 194 |
195 ; GCN-NEXT: s_sub_u32 vcc_lo, vcc_lo, ([[LONG_JUMP]]+4)-[[LOOPBB]] | 195 ; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} |
196 ; GCN-NEXT: s_subb_u32 vcc_hi, vcc_hi, 0 | 196 ; GCN-NEXT: s_sub_u32 s[[PC_LO]], s[[PC_LO]], ([[LONG_JUMP]]+4)-[[LOOPBB]] |
197 ; GCN-NEXT: s_setpc_b64 vcc | 197 ; GCN-NEXT: s_subb_u32 s[[PC_HI]], s[[PC_HI]], 0 |
198 ; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}} | |
198 | 199 |
199 ; GCN-NEXT: [[ENDBB]]: | 200 ; GCN-NEXT: [[ENDBB]]: |
200 ; GCN-NEXT: s_endpgm | 201 ; GCN-NEXT: s_endpgm |
201 define amdgpu_kernel void @long_backward_sbranch(i32 addrspace(1)* %arg) #0 { | 202 define amdgpu_kernel void @long_backward_sbranch(i32 addrspace(1)* %arg) #0 { |
202 bb: | 203 bb: |
223 ; GCN-LABEL: {{^}}uniform_unconditional_min_long_forward_branch: | 224 ; GCN-LABEL: {{^}}uniform_unconditional_min_long_forward_branch: |
224 ; GCN: s_cmp_eq_u32 | 225 ; GCN: s_cmp_eq_u32 |
225 ; GCN-NEXT: s_cbranch_scc0 [[BB2:BB[0-9]+_[0-9]+]] | 226 ; GCN-NEXT: s_cbranch_scc0 [[BB2:BB[0-9]+_[0-9]+]] |
226 | 227 |
227 ; GCN-NEXT: [[LONG_JUMP0:BB[0-9]+_[0-9]+]]: ; %bb0 | 228 ; GCN-NEXT: [[LONG_JUMP0:BB[0-9]+_[0-9]+]]: ; %bb0 |
228 ; GCN-NEXT: s_getpc_b64 vcc | 229 ; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]{{\]}} |
229 ; GCN-NEXT: s_add_u32 vcc_lo, vcc_lo, [[BB3:BB[0-9]_[0-9]+]]-([[LONG_JUMP0]]+4) | 230 ; GCN-NEXT: s_add_u32 s[[PC0_LO]], s[[PC0_LO]], [[BB3:BB[0-9]_[0-9]+]]-([[LONG_JUMP0]]+4) |
230 ; GCN-NEXT: s_addc_u32 vcc_hi, vcc_hi, 0{{$}} | 231 ; GCN-NEXT: s_addc_u32 s[[PC0_HI]], s[[PC0_HI]], 0{{$}} |
231 ; GCN-NEXT: s_setpc_b64 vcc | 232 ; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC0_LO]]:[[PC0_HI]]{{\]}} |
232 | 233 |
233 ; GCN-NEXT: [[BB2]]: ; %bb2 | 234 ; GCN-NEXT: [[BB2]]: ; %bb2 |
234 ; GCN: v_mov_b32_e32 [[BB2_K:v[0-9]+]], 17 | 235 ; GCN: v_mov_b32_e32 [[BB2_K:v[0-9]+]], 17 |
235 ; GCN: buffer_store_dword [[BB2_K]] | 236 ; GCN: buffer_store_dword [[BB2_K]] |
236 | 237 |
237 ; GCN-NEXT: [[LONG_JUMP1:BB[0-9]+_[0-9]+]]: ; %bb2 | 238 ; GCN-NEXT: [[LONG_JUMP1:BB[0-9]+_[0-9]+]]: ; %bb2 |
238 ; GCN-NEXT: s_getpc_b64 vcc | 239 ; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC1_LO:[0-9]+]]:[[PC1_HI:[0-9]+]]{{\]}} |
239 ; GCN-NEXT: s_add_u32 vcc_lo, vcc_lo, [[BB4:BB[0-9]_[0-9]+]]-([[LONG_JUMP1]]+4) | 240 ; GCN-NEXT: s_add_u32 s[[PC1_LO]], s[[PC1_LO]], [[BB4:BB[0-9]_[0-9]+]]-([[LONG_JUMP1]]+4) |
240 ; GCN-NEXT: s_addc_u32 vcc_hi, vcc_hi, 0{{$}} | 241 ; GCN-NEXT: s_addc_u32 s[[PC1_HI]], s[[PC1_HI]], 0{{$}} |
241 ; GCN-NEXT: s_setpc_b64 vcc | 242 ; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC1_LO]]:[[PC1_HI]]{{\]}} |
242 | 243 |
243 ; GCN: [[BB3]]: ; %bb3 | 244 ; GCN: [[BB3]]: ; %bb3 |
244 ; GCN: v_nop_e64 | 245 ; GCN: v_nop_e64 |
245 ; GCN: v_nop_e64 | 246 ; GCN: v_nop_e64 |
246 ; GCN: v_nop_e64 | 247 ; GCN: v_nop_e64 |
287 ; GCN-NEXT: v_nop_e64 | 288 ; GCN-NEXT: v_nop_e64 |
288 ; GCN-NEXT: ;;#ASMEND | 289 ; GCN-NEXT: ;;#ASMEND |
289 | 290 |
290 ; GCN-NEXT: [[LONGBB:BB[0-9]+_[0-9]+]]: ; %loop | 291 ; GCN-NEXT: [[LONGBB:BB[0-9]+_[0-9]+]]: ; %loop |
291 ; GCN-NEXT: ; in Loop: Header=[[LOOP]] Depth=1 | 292 ; GCN-NEXT: ; in Loop: Header=[[LOOP]] Depth=1 |
292 ; GCN-NEXT: s_getpc_b64 vcc | 293 |
293 ; GCN-NEXT: s_sub_u32 vcc_lo, vcc_lo, ([[LONGBB]]+4)-[[LOOP]] | 294 ; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} |
294 ; GCN-NEXT: s_subb_u32 vcc_hi, vcc_hi, 0{{$}} | 295 ; GCN-NEXT: s_sub_u32 s[[PC_LO]], s[[PC_LO]], ([[LONGBB]]+4)-[[LOOP]] |
295 ; GCN-NEXT: s_setpc_b64 vcc | 296 ; GCN-NEXT: s_subb_u32 s[[PC_HI]], s[[PC_HI]], 0{{$}} |
297 ; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}} | |
296 ; GCN-NEXT .Lfunc_end{{[0-9]+}}: | 298 ; GCN-NEXT .Lfunc_end{{[0-9]+}}: |
297 define amdgpu_kernel void @uniform_unconditional_min_long_backward_branch(i32 addrspace(1)* %arg, i32 %arg1) { | 299 define amdgpu_kernel void @uniform_unconditional_min_long_backward_branch(i32 addrspace(1)* %arg, i32 %arg1) { |
298 entry: | 300 entry: |
299 br label %loop | 301 br label %loop |
300 | 302 |
316 ; GCN: s_load_dword | 318 ; GCN: s_load_dword |
317 ; GCN: s_cmp_lt_i32 s{{[0-9]+}}, 0{{$}} | 319 ; GCN: s_cmp_lt_i32 s{{[0-9]+}}, 0{{$}} |
318 ; GCN-NEXT: s_cbranch_scc0 [[BB1:BB[0-9]+_[0-9]+]] | 320 ; GCN-NEXT: s_cbranch_scc0 [[BB1:BB[0-9]+_[0-9]+]] |
319 | 321 |
320 ; GCN-NEXT: [[LONGBB0:BB[0-9]+_[0-9]+]]: ; %bb0 | 322 ; GCN-NEXT: [[LONGBB0:BB[0-9]+_[0-9]+]]: ; %bb0 |
321 ; GCN-NEXT: s_getpc_b64 vcc | 323 |
322 ; GCN-NEXT: s_add_u32 vcc_lo, vcc_lo, [[BB2:BB[0-9]_[0-9]+]]-([[LONGBB0]]+4) | 324 ; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]{{\]}} |
323 ; GCN-NEXT: s_addc_u32 vcc_hi, vcc_hi, 0{{$}} | 325 ; GCN-NEXT: s_add_u32 s[[PC0_LO]], s[[PC0_LO]], [[BB2:BB[0-9]_[0-9]+]]-([[LONGBB0]]+4) |
324 ; GCN-NEXT: s_setpc_b64 vcc | 326 ; GCN-NEXT: s_addc_u32 s[[PC0_HI]], s[[PC0_HI]], 0{{$}} |
327 ; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC0_LO]]:[[PC0_HI]]{{\]}} | |
325 | 328 |
326 ; GCN-NEXT: [[BB1]]: ; %bb1 | 329 ; GCN-NEXT: [[BB1]]: ; %bb1 |
327 ; GCN-NEXT: s_load_dword | 330 ; GCN-NEXT: s_load_dword |
328 ; GCN-NEXT: s_waitcnt lgkmcnt(0) | 331 ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
329 ; GCN-NEXT: s_cmp_eq_u32 s{{[0-9]+}}, 3{{$}} | 332 ; GCN-NEXT: s_cmp_eq_u32 s{{[0-9]+}}, 3{{$}} |
330 ; GCN-NEXT: s_cbranch_scc0 [[BB2:BB[0-9]_[0-9]+]] | 333 ; GCN-NEXT: s_cbranch_scc0 [[BB2:BB[0-9]_[0-9]+]] |
331 | 334 |
332 ; GCN-NEXT: [[LONGBB1:BB[0-9]+_[0-9]+]]: ; %bb1 | 335 ; GCN-NEXT: [[LONGBB1:BB[0-9]+_[0-9]+]]: ; %bb1 |
333 ; GCN-NEXT: s_getpc_b64 vcc | 336 ; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC1_LO:[0-9]+]]:[[PC1_HI:[0-9]+]]{{\]}} |
334 ; GCN-NEXT: s_add_u32 vcc_lo, vcc_lo, [[BB3:BB[0-9]+_[0-9]+]]-([[LONGBB1]]+4) | 337 ; GCN-NEXT: s_add_u32 s[[PC1_LO]], s[[PC1_LO]], [[BB3:BB[0-9]+_[0-9]+]]-([[LONGBB1]]+4) |
335 ; GCN-NEXT: s_addc_u32 vcc_hi, vcc_hi, 0{{$}} | 338 ; GCN-NEXT: s_addc_u32 s[[PC1_HI]], s[[PC1_HI]], 0{{$}} |
336 ; GCN-NEXT: s_setpc_b64 vcc | 339 ; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC1_LO]]:[[PC1_HI]]{{\]}} |
337 | 340 |
338 ; GCN-NEXT: [[BB2]]: ; %bb2 | 341 ; GCN-NEXT: [[BB2]]: ; %bb2 |
339 ; GCN-NEXT: ;;#ASMSTART | 342 ; GCN-NEXT: ;;#ASMSTART |
340 ; GCN-NEXT: v_nop_e64 | 343 ; GCN-NEXT: v_nop_e64 |
341 ; GCN-NEXT: v_nop_e64 | 344 ; GCN-NEXT: v_nop_e64 |
387 ; GCN-NEXT: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc | 390 ; GCN-NEXT: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc |
388 ; GCN-NEXT: ; mask branch [[ENDIF:BB[0-9]+_[0-9]+]] | 391 ; GCN-NEXT: ; mask branch [[ENDIF:BB[0-9]+_[0-9]+]] |
389 ; GCN-NEXT: s_cbranch_execnz [[IF:BB[0-9]+_[0-9]+]] | 392 ; GCN-NEXT: s_cbranch_execnz [[IF:BB[0-9]+_[0-9]+]] |
390 | 393 |
391 ; GCN-NEXT: [[LONGBB:BB[0-9]+_[0-9]+]]: ; %entry | 394 ; GCN-NEXT: [[LONGBB:BB[0-9]+_[0-9]+]]: ; %entry |
392 ; GCN-NEXT: s_getpc_b64 vcc | 395 ; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} |
393 ; GCN-NEXT: s_add_u32 vcc_lo, vcc_lo, [[BB2:BB[0-9]_[0-9]+]]-([[LONGBB]]+4) | 396 ; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], [[BB2:BB[0-9]_[0-9]+]]-([[LONGBB]]+4) |
394 ; GCN-NEXT: s_addc_u32 vcc_hi, vcc_hi, 0{{$}} | 397 ; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], 0{{$}} |
395 ; GCN-NEXT: s_setpc_b64 vcc | 398 ; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}} |
396 | 399 |
397 ; GCN-NEXT: [[IF]]: ; %if | 400 ; GCN-NEXT: [[IF]]: ; %if |
398 ; GCN: buffer_store_dword | 401 ; GCN: buffer_store_dword |
399 ; GCN: s_cmp_lg_u32 | 402 ; GCN: s_cmp_lg_u32 |
400 ; GCN: s_cbranch_scc1 [[ENDIF]] | 403 ; GCN: s_cbranch_scc1 [[ENDIF]] |
429 } | 432 } |
430 | 433 |
431 ; si_mask_branch | 434 ; si_mask_branch |
432 | 435 |
433 ; GCN-LABEL: {{^}}analyze_mask_branch: | 436 ; GCN-LABEL: {{^}}analyze_mask_branch: |
434 ; GCN: v_cmp_lt_f32_e32 vcc | 437 ; GCN: v_cmp_nlt_f32_e32 vcc |
435 ; GCN-NEXT: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc | 438 ; GCN-NEXT: s_and_saveexec_b64 [[TEMP_MASK:s\[[0-9]+:[0-9]+\]]], vcc |
439 ; GCN-NEXT: s_xor_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], exec, [[TEMP_MASK]] | |
440 ; GCN-NEXT: ; mask branch [[FLOW:BB[0-9]+_[0-9]+]] | |
441 | |
442 ; GCN: [[FLOW]]: ; %Flow | |
443 ; GCN-NEXT: s_or_saveexec_b64 [[TEMP_MASK1:s\[[0-9]+:[0-9]+\]]], [[MASK]] | |
444 ; GCN-NEXT: s_xor_b64 exec, exec, [[TEMP_MASK1]] | |
436 ; GCN-NEXT: ; mask branch [[RET:BB[0-9]+_[0-9]+]] | 445 ; GCN-NEXT: ; mask branch [[RET:BB[0-9]+_[0-9]+]] |
437 | 446 |
438 ; GCN-NEXT: [[LOOP_BODY:BB[0-9]+_[0-9]+]]: ; %loop_body | 447 ; GCN: [[LOOP_BODY:BB[0-9]+_[0-9]+]]: ; %loop{{$}} |
439 ; GCN: ;;#ASMSTART | 448 ; GCN: ;;#ASMSTART |
440 ; GCN: v_nop_e64 | 449 ; GCN: v_nop_e64 |
441 ; GCN: v_nop_e64 | 450 ; GCN: v_nop_e64 |
442 ; GCN: v_nop_e64 | 451 ; GCN: v_nop_e64 |
443 ; GCN: v_nop_e64 | 452 ; GCN: v_nop_e64 |
444 ; GCN: v_nop_e64 | 453 ; GCN: v_nop_e64 |
445 ; GCN: v_nop_e64 | 454 ; GCN: v_nop_e64 |
446 ; GCN: ;;#ASMEND | 455 ; GCN: ;;#ASMEND |
447 | 456 ; GCN: s_cbranch_vccz [[RET]] |
448 ; GCN-NEXT: [[LONGBB:BB[0-9]+_[0-9]+]]: ; %loop_body | 457 |
458 ; GCN-NEXT: [[LONGBB:BB[0-9]+_[0-9]+]]: ; %loop | |
449 ; GCN-NEXT: ; in Loop: Header=[[LOOP_BODY]] Depth=1 | 459 ; GCN-NEXT: ; in Loop: Header=[[LOOP_BODY]] Depth=1 |
450 ; GCN-NEXT: s_getpc_b64 vcc | 460 ; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} |
451 ; GCN-NEXT: s_sub_u32 vcc_lo, vcc_lo, ([[LONGBB]]+4)-[[LOOP_BODY]] | 461 ; GCN-NEXT: s_sub_u32 s[[PC_LO]], s[[PC_LO]], ([[LONGBB]]+4)-[[LOOP_BODY]] |
452 ; GCN-NEXT: s_subb_u32 vcc_hi, vcc_hi, 0 | 462 ; GCN-NEXT: s_subb_u32 s[[PC_HI]], s[[PC_HI]], 0 |
453 ; GCN-NEXT: s_setpc_b64 vcc | 463 ; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}} |
454 | 464 |
455 ; GCN-NEXT: [[RET]]: ; %ret | 465 ; GCN-NEXT: [[RET]]: ; %UnifiedReturnBlock |
456 ; GCN-NEXT: s_or_b64 exec, exec, [[MASK]] | |
457 ; GCN: buffer_store_dword | |
458 ; GCN-NEXT: s_endpgm | 466 ; GCN-NEXT: s_endpgm |
459 define amdgpu_kernel void @analyze_mask_branch() #0 { | 467 define amdgpu_kernel void @analyze_mask_branch() #0 { |
460 entry: | 468 entry: |
461 %reg = call float asm sideeffect "v_mov_b32_e64 $0, 0", "=v"() | 469 %reg = call float asm sideeffect "v_mov_b32_e64 $0, 0", "=v"() |
462 %cmp0 = fcmp ogt float %reg, 0.000000e+00 | 470 %cmp0 = fcmp ogt float %reg, 0.000000e+00 |
483 ret void | 491 ret void |
484 } | 492 } |
485 | 493 |
486 ; GCN-LABEL: {{^}}long_branch_hang: | 494 ; GCN-LABEL: {{^}}long_branch_hang: |
487 ; GCN: s_cmp_lt_i32 s{{[0-9]+}}, 6 | 495 ; GCN: s_cmp_lt_i32 s{{[0-9]+}}, 6 |
488 ; GCN-NEXT: s_cbranch_scc1 {{BB[0-9]+_[0-9]+}} | 496 ; GCN: s_cbranch_scc1 {{BB[0-9]+_[0-9]+}} |
489 ; GCN-NEXT: s_branch [[LONG_BR_0:BB[0-9]+_[0-9]+]] | 497 ; GCN-NEXT: s_branch [[LONG_BR_0:BB[0-9]+_[0-9]+]] |
490 ; GCN-NEXT: BB{{[0-9]+_[0-9]+}}: | 498 ; GCN-NEXT: BB{{[0-9]+_[0-9]+}}: |
491 | 499 |
492 ; GCN: s_add_u32 vcc_lo, vcc_lo, [[LONG_BR_DEST0:BB[0-9]+_[0-9]+]]-( | 500 ; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, [[LONG_BR_DEST0:BB[0-9]+_[0-9]+]]-( |
493 ; GCN: s_setpc_b64 | 501 ; GCN-NEXT: s_addc_u32 |
502 ; GCN-NEXT: s_setpc_b64 | |
494 | 503 |
495 ; GCN-NEXT: [[LONG_BR_0]]: | 504 ; GCN-NEXT: [[LONG_BR_0]]: |
496 ; GCN-DAG: v_cmp_lt_i32 | 505 ; GCN-DAG: v_cmp_lt_i32 |
497 ; GCN-DAG: v_cmp_gt_i32 | 506 ; GCN-DAG: v_cmp_gt_i32 |
498 ; GCN: s_cbranch_vccnz | 507 ; GCN: s_cbranch_vccnz |