Mercurial > hg > CbC > CbC_llvm
view test/CodeGen/AMDGPU/merge-load-store-vreg.mir @ 134:3a76565eade5 LLVM5.0.1
update 5.0.1
author | mir3636 |
---|---|
date | Sat, 17 Feb 2018 09:57:20 +0900 |
parents | |
children | c2174574ed3a |
line wrap: on
line source
# RUN: llc -march=amdgcn -mcpu=gfx803 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefixes=GCN,VI %s # RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s # If there's a base offset, check that SILoadStoreOptimizer creates # V_ADD_{I|U}32_e64 for that offset; _e64 uses a vreg for the carry (rather than # $vcc, which is used in _e32); this ensures that $vcc is not inadvertently # clobbered. # GCN-LABEL: name: kernel # VI: V_ADD_I32_e64 %6, %0, # VI-NEXT: DS_WRITE2_B32 killed %7, %0, %3, 0, 8, # VI: V_ADD_I32_e64 %10, %3, # VI-NEXT: DS_READ2_B32 killed %11, 0, 8, # GFX9: V_ADD_U32_e64 %6, %0, # GFX9-NEXT: DS_WRITE2_B32_gfx9 killed %7, %0, %3, 0, 8, # GFX9: V_ADD_U32_e64 %9, %3, # GFX9-NEXT: DS_READ2_B32_gfx9 killed %10, 0, 8, --- | @0 = internal unnamed_addr addrspace(3) global [256 x float] undef, align 4 define amdgpu_kernel void @kernel() { bb.0: br label %bb2 bb1: ret void bb2: %tmp = getelementptr inbounds [256 x float], [256 x float] addrspace(3)* @0, i32 0, i32 0 %tmp1 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 8 %tmp2 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 16 %tmp3 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 24 br label %bb1 } --- name: kernel body: | bb.0: %0:vgpr_32 = IMPLICIT_DEF S_BRANCH %bb.2 bb.1: S_ENDPGM bb.2: %1:sreg_64_xexec = V_CMP_NE_U32_e64 %0, 0, implicit $exec %2:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %1, implicit $exec V_CMP_NE_U32_e32 1, %2, implicit-def $vcc, implicit $exec DS_WRITE_B32 %0, %0, 1024, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp) %3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec DS_WRITE_B32 %0, %3, 1056, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp1) %4:vgpr_32 = DS_READ_B32 %3, 1088, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp2) %5:vgpr_32 = DS_READ_B32 %3, 1120, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp3) $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc S_CBRANCH_VCCNZ %bb.1, implicit $vcc S_BRANCH %bb.1 ...