Mercurial > hg > CbC > CbC_llvm
diff test/CodeGen/AMDGPU/branch-relaxation.ll @ 147:c2174574ed3a
LLVM 10
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Wed, 14 Aug 2019 16:55:33 +0900 |
parents | 3a76565eade5 |
children |
line wrap: on
line diff
--- a/test/CodeGen/AMDGPU/branch-relaxation.ll Sat Feb 17 09:57:20 2018 +0900 +++ b/test/CodeGen/AMDGPU/branch-relaxation.ll Wed Aug 14 16:55:33 2019 +0900 @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -amdgpu-s-branch-bits=4 < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -amdgpu-s-branch-bits=4 < %s | FileCheck -enable-var-scope -check-prefix=GCN %s ; FIXME: We should use llvm-mc for this, but we can't even parse our own output. @@ -61,10 +61,10 @@ ; GCN-NEXT: s_cbranch_scc0 [[LONGBB:BB[0-9]+_[0-9]+]] ; GCN-NEXT: [[LONG_JUMP:BB[0-9]+_[0-9]+]]: ; %bb0 -; GCN-NEXT: s_getpc_b64 vcc -; GCN-NEXT: s_add_u32 vcc_lo, vcc_lo, [[ENDBB:BB[0-9]+_[0-9]+]]-([[LONG_JUMP]]+4) -; GCN-NEXT: s_addc_u32 vcc_hi, vcc_hi, 0 -; GCN-NEXT: s_setpc_b64 vcc +; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} +; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], [[ENDBB:BB[0-9]+_[0-9]+]]-([[LONG_JUMP]]+4) +; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], 0 +; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}} ; GCN-NEXT: [[LONGBB]]: ; GCN-NEXT: ;;#ASMSTART @@ -105,10 +105,10 @@ ; GCN: s_cbranch_vccz [[LONGBB:BB[0-9]+_[0-9]+]] ; GCN-NEXT: [[LONG_JUMP:BB[0-9]+_[0-9]+]]: ; %bb0 -; GCN-NEXT: s_getpc_b64 vcc -; GCN-NEXT: s_add_u32 vcc_lo, vcc_lo, [[ENDBB:BB[0-9]+_[0-9]+]]-([[LONG_JUMP]]+4) -; GCN-NEXT: s_addc_u32 vcc_hi, vcc_hi, 0 -; GCN-NEXT: s_setpc_b64 vcc +; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} +; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], [[ENDBB:BB[0-9]+_[0-9]+]]-([[LONG_JUMP]]+4) +; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], 0 +; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}} ; GCN-NEXT: [[LONGBB]]: ; GCN: v_nop_e64 @@ -191,10 +191,11 @@ ; GCN-NEXT: [[LONG_JUMP:BB[0-9]+_[0-9]+]]: ; %bb2 ; GCN-NEXT: ; in Loop: Header=[[LOOPBB]] Depth=1 -; GCN-NEXT: s_getpc_b64 vcc -; GCN-NEXT: s_sub_u32 vcc_lo, vcc_lo, ([[LONG_JUMP]]+4)-[[LOOPBB]] -; GCN-NEXT: s_subb_u32 vcc_hi, vcc_hi, 0 -; GCN-NEXT: s_setpc_b64 vcc + +; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} +; GCN-NEXT: s_sub_u32 s[[PC_LO]], s[[PC_LO]], ([[LONG_JUMP]]+4)-[[LOOPBB]] +; GCN-NEXT: s_subb_u32 s[[PC_HI]], s[[PC_HI]], 0 +; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}} ; GCN-NEXT: [[ENDBB]]: ; GCN-NEXT: s_endpgm @@ -225,20 +226,20 @@ ; GCN-NEXT: s_cbranch_scc0 [[BB2:BB[0-9]+_[0-9]+]] ; GCN-NEXT: [[LONG_JUMP0:BB[0-9]+_[0-9]+]]: ; %bb0 -; GCN-NEXT: s_getpc_b64 vcc -; GCN-NEXT: s_add_u32 vcc_lo, vcc_lo, [[BB3:BB[0-9]_[0-9]+]]-([[LONG_JUMP0]]+4) -; GCN-NEXT: s_addc_u32 vcc_hi, vcc_hi, 0{{$}} -; GCN-NEXT: s_setpc_b64 vcc +; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]{{\]}} +; GCN-NEXT: s_add_u32 s[[PC0_LO]], s[[PC0_LO]], [[BB3:BB[0-9]_[0-9]+]]-([[LONG_JUMP0]]+4) +; GCN-NEXT: s_addc_u32 s[[PC0_HI]], s[[PC0_HI]], 0{{$}} +; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC0_LO]]:[[PC0_HI]]{{\]}} ; GCN-NEXT: [[BB2]]: ; %bb2 ; GCN: v_mov_b32_e32 [[BB2_K:v[0-9]+]], 17 ; GCN: buffer_store_dword [[BB2_K]] ; GCN-NEXT: [[LONG_JUMP1:BB[0-9]+_[0-9]+]]: ; %bb2 -; GCN-NEXT: s_getpc_b64 vcc -; GCN-NEXT: s_add_u32 vcc_lo, vcc_lo, [[BB4:BB[0-9]_[0-9]+]]-([[LONG_JUMP1]]+4) -; GCN-NEXT: s_addc_u32 vcc_hi, vcc_hi, 0{{$}} -; GCN-NEXT: s_setpc_b64 vcc +; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC1_LO:[0-9]+]]:[[PC1_HI:[0-9]+]]{{\]}} +; GCN-NEXT: s_add_u32 s[[PC1_LO]], s[[PC1_LO]], [[BB4:BB[0-9]_[0-9]+]]-([[LONG_JUMP1]]+4) +; GCN-NEXT: s_addc_u32 s[[PC1_HI]], s[[PC1_HI]], 0{{$}} +; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC1_LO]]:[[PC1_HI]]{{\]}} ; GCN: [[BB3]]: ; %bb3 ; GCN: v_nop_e64 @@ -289,10 +290,11 @@ ; GCN-NEXT: [[LONGBB:BB[0-9]+_[0-9]+]]: ; %loop ; GCN-NEXT: ; in Loop: Header=[[LOOP]] Depth=1 -; GCN-NEXT: s_getpc_b64 vcc -; GCN-NEXT: s_sub_u32 vcc_lo, vcc_lo, ([[LONGBB]]+4)-[[LOOP]] -; GCN-NEXT: s_subb_u32 vcc_hi, vcc_hi, 0{{$}} -; GCN-NEXT: s_setpc_b64 vcc + +; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} +; GCN-NEXT: s_sub_u32 s[[PC_LO]], s[[PC_LO]], ([[LONGBB]]+4)-[[LOOP]] +; GCN-NEXT: s_subb_u32 s[[PC_HI]], s[[PC_HI]], 0{{$}} +; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}} ; GCN-NEXT .Lfunc_end{{[0-9]+}}: define amdgpu_kernel void @uniform_unconditional_min_long_backward_branch(i32 addrspace(1)* %arg, i32 %arg1) { entry: @@ -318,10 +320,11 @@ ; GCN-NEXT: s_cbranch_scc0 [[BB1:BB[0-9]+_[0-9]+]] ; GCN-NEXT: [[LONGBB0:BB[0-9]+_[0-9]+]]: ; %bb0 -; GCN-NEXT: s_getpc_b64 vcc -; GCN-NEXT: s_add_u32 vcc_lo, vcc_lo, [[BB2:BB[0-9]_[0-9]+]]-([[LONGBB0]]+4) -; GCN-NEXT: s_addc_u32 vcc_hi, vcc_hi, 0{{$}} -; GCN-NEXT: s_setpc_b64 vcc + +; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]{{\]}} +; GCN-NEXT: s_add_u32 s[[PC0_LO]], s[[PC0_LO]], [[BB2:BB[0-9]_[0-9]+]]-([[LONGBB0]]+4) +; GCN-NEXT: s_addc_u32 s[[PC0_HI]], s[[PC0_HI]], 0{{$}} +; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC0_LO]]:[[PC0_HI]]{{\]}} ; GCN-NEXT: [[BB1]]: ; %bb1 ; GCN-NEXT: s_load_dword @@ -330,10 +333,10 @@ ; GCN-NEXT: s_cbranch_scc0 [[BB2:BB[0-9]_[0-9]+]] ; GCN-NEXT: [[LONGBB1:BB[0-9]+_[0-9]+]]: ; %bb1 -; GCN-NEXT: s_getpc_b64 vcc -; GCN-NEXT: s_add_u32 vcc_lo, vcc_lo, [[BB3:BB[0-9]+_[0-9]+]]-([[LONGBB1]]+4) -; GCN-NEXT: s_addc_u32 vcc_hi, vcc_hi, 0{{$}} -; GCN-NEXT: s_setpc_b64 vcc +; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC1_LO:[0-9]+]]:[[PC1_HI:[0-9]+]]{{\]}} +; GCN-NEXT: s_add_u32 s[[PC1_LO]], s[[PC1_LO]], [[BB3:BB[0-9]+_[0-9]+]]-([[LONGBB1]]+4) +; GCN-NEXT: s_addc_u32 s[[PC1_HI]], s[[PC1_HI]], 0{{$}} +; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC1_LO]]:[[PC1_HI]]{{\]}} ; GCN-NEXT: [[BB2]]: ; %bb2 ; GCN-NEXT: ;;#ASMSTART @@ -389,10 +392,10 @@ ; GCN-NEXT: s_cbranch_execnz [[IF:BB[0-9]+_[0-9]+]] ; GCN-NEXT: [[LONGBB:BB[0-9]+_[0-9]+]]: ; %entry -; GCN-NEXT: s_getpc_b64 vcc -; GCN-NEXT: s_add_u32 vcc_lo, vcc_lo, [[BB2:BB[0-9]_[0-9]+]]-([[LONGBB]]+4) -; GCN-NEXT: s_addc_u32 vcc_hi, vcc_hi, 0{{$}} -; GCN-NEXT: s_setpc_b64 vcc +; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} +; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], [[BB2:BB[0-9]_[0-9]+]]-([[LONGBB]]+4) +; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], 0{{$}} +; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}} ; GCN-NEXT: [[IF]]: ; %if ; GCN: buffer_store_dword @@ -431,11 +434,17 @@ ; si_mask_branch ; GCN-LABEL: {{^}}analyze_mask_branch: -; GCN: v_cmp_lt_f32_e32 vcc -; GCN-NEXT: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc +; GCN: v_cmp_nlt_f32_e32 vcc +; GCN-NEXT: s_and_saveexec_b64 [[TEMP_MASK:s\[[0-9]+:[0-9]+\]]], vcc +; GCN-NEXT: s_xor_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], exec, [[TEMP_MASK]] +; GCN-NEXT: ; mask branch [[FLOW:BB[0-9]+_[0-9]+]] + +; GCN: [[FLOW]]: ; %Flow +; GCN-NEXT: s_or_saveexec_b64 [[TEMP_MASK1:s\[[0-9]+:[0-9]+\]]], [[MASK]] +; GCN-NEXT: s_xor_b64 exec, exec, [[TEMP_MASK1]] ; GCN-NEXT: ; mask branch [[RET:BB[0-9]+_[0-9]+]] -; GCN-NEXT: [[LOOP_BODY:BB[0-9]+_[0-9]+]]: ; %loop_body +; GCN: [[LOOP_BODY:BB[0-9]+_[0-9]+]]: ; %loop{{$}} ; GCN: ;;#ASMSTART ; GCN: v_nop_e64 ; GCN: v_nop_e64 @@ -444,17 +453,16 @@ ; GCN: v_nop_e64 ; GCN: v_nop_e64 ; GCN: ;;#ASMEND +; GCN: s_cbranch_vccz [[RET]] -; GCN-NEXT: [[LONGBB:BB[0-9]+_[0-9]+]]: ; %loop_body +; GCN-NEXT: [[LONGBB:BB[0-9]+_[0-9]+]]: ; %loop ; GCN-NEXT: ; in Loop: Header=[[LOOP_BODY]] Depth=1 -; GCN-NEXT: s_getpc_b64 vcc -; GCN-NEXT: s_sub_u32 vcc_lo, vcc_lo, ([[LONGBB]]+4)-[[LOOP_BODY]] -; GCN-NEXT: s_subb_u32 vcc_hi, vcc_hi, 0 -; GCN-NEXT: s_setpc_b64 vcc +; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} +; GCN-NEXT: s_sub_u32 s[[PC_LO]], s[[PC_LO]], ([[LONGBB]]+4)-[[LOOP_BODY]] +; GCN-NEXT: s_subb_u32 s[[PC_HI]], s[[PC_HI]], 0 +; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}} -; GCN-NEXT: [[RET]]: ; %ret -; GCN-NEXT: s_or_b64 exec, exec, [[MASK]] -; GCN: buffer_store_dword +; GCN-NEXT: [[RET]]: ; %UnifiedReturnBlock ; GCN-NEXT: s_endpgm define amdgpu_kernel void @analyze_mask_branch() #0 { entry: @@ -485,12 +493,13 @@ ; GCN-LABEL: {{^}}long_branch_hang: ; GCN: s_cmp_lt_i32 s{{[0-9]+}}, 6 -; GCN-NEXT: s_cbranch_scc1 {{BB[0-9]+_[0-9]+}} +; GCN: s_cbranch_scc1 {{BB[0-9]+_[0-9]+}} ; GCN-NEXT: s_branch [[LONG_BR_0:BB[0-9]+_[0-9]+]] ; GCN-NEXT: BB{{[0-9]+_[0-9]+}}: -; GCN: s_add_u32 vcc_lo, vcc_lo, [[LONG_BR_DEST0:BB[0-9]+_[0-9]+]]-( -; GCN: s_setpc_b64 +; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, [[LONG_BR_DEST0:BB[0-9]+_[0-9]+]]-( +; GCN-NEXT: s_addc_u32 +; GCN-NEXT: s_setpc_b64 ; GCN-NEXT: [[LONG_BR_0]]: ; GCN-DAG: v_cmp_lt_i32