Mercurial > hg > CbC > CbC_llvm
diff llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll @ 221:79ff65ed7e25
LLVM12 Original
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 15 Jun 2021 19:15:29 +0900 |
parents | 0572611fdcc8 |
children | 1f2b6ac9f198 |
line wrap: on
line diff
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll Tue Jun 15 19:13:43 2021 +0900 +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll Tue Jun 15 19:15:29 2021 +0900 @@ -1,14 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX8,VARIANT0 %s -; RUN: llc -march=amdgcn -mattr=+auto-waitcnt-before-barrier -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX8,VARIANT1 %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9,VARIANT2 %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+auto-waitcnt-before-barrier -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9,VARIANT3 %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=VARIANT0 %s +; RUN: llc -march=amdgcn -mattr=+auto-waitcnt-before-barrier -verify-machineinstrs < %s | FileCheck --check-prefix=VARIANT1 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=VARIANT2 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+auto-waitcnt-before-barrier -verify-machineinstrs < %s | FileCheck --check-prefix=VARIANT3 %s define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 { ; VARIANT0-LABEL: test_barrier: ; VARIANT0: ; %bb.0: ; %entry ; VARIANT0-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; VARIANT0-NEXT: s_load_dword s2, s[0:1], 0xb +; VARIANT0-NEXT: s_load_dword s0, s[0:1], 0xb ; VARIANT0-NEXT: s_mov_b32 s7, 0xf000 ; VARIANT0-NEXT: s_mov_b32 s6, 0 ; VARIANT0-NEXT: v_lshlrev_b32_e32 v1, 2, v0 @@ -18,7 +18,7 @@ ; VARIANT0-NEXT: buffer_store_dword v0, v[1:2], s[4:7], 0 addr64 ; VARIANT0-NEXT: s_waitcnt vmcnt(0) expcnt(0) ; VARIANT0-NEXT: s_barrier -; VARIANT0-NEXT: v_add_i32_e32 v3, vcc, s2, v3 +; VARIANT0-NEXT: v_add_i32_e32 v3, vcc, s0, v3 ; VARIANT0-NEXT: v_ashrrev_i32_e32 v4, 31, v3 ; VARIANT0-NEXT: v_lshl_b64 v[3:4], v[3:4], 2 ; VARIANT0-NEXT: buffer_load_dword v0, v[3:4], s[4:7], 0 addr64 @@ -29,7 +29,7 @@ ; VARIANT1-LABEL: test_barrier: ; VARIANT1: ; %bb.0: ; %entry ; VARIANT1-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; VARIANT1-NEXT: s_load_dword s2, s[0:1], 0xb +; VARIANT1-NEXT: s_load_dword s0, s[0:1], 0xb ; VARIANT1-NEXT: s_mov_b32 s7, 0xf000 ; VARIANT1-NEXT: s_mov_b32 s6, 0 ; VARIANT1-NEXT: v_lshlrev_b32_e32 v1, 2, v0 @@ -38,7 +38,7 @@ ; VARIANT1-NEXT: s_waitcnt lgkmcnt(0) ; VARIANT1-NEXT: buffer_store_dword v0, v[1:2], s[4:7], 0 addr64 ; VARIANT1-NEXT: s_barrier -; VARIANT1-NEXT: v_add_i32_e32 v3, vcc, s2, v3 +; VARIANT1-NEXT: v_add_i32_e32 v3, vcc, s0, v3 ; VARIANT1-NEXT: v_ashrrev_i32_e32 v4, 31, v3 ; VARIANT1-NEXT: v_lshl_b64 v[3:4], v[3:4], 2 ; VARIANT1-NEXT: s_waitcnt expcnt(0) @@ -50,46 +50,40 @@ ; VARIANT2-LABEL: test_barrier: ; VARIANT2: ; %bb.0: ; %entry ; VARIANT2-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 -; VARIANT2-NEXT: s_load_dword s0, s[0:1], 0x2c -; VARIANT2-NEXT: v_lshlrev_b32_e32 v1, 2, v0 +; VARIANT2-NEXT: s_load_dword s4, s[0:1], 0x2c +; VARIANT2-NEXT: v_lshlrev_b32_e32 v2, 2, v0 ; VARIANT2-NEXT: s_waitcnt lgkmcnt(0) -; VARIANT2-NEXT: v_mov_b32_e32 v2, s3 -; VARIANT2-NEXT: v_xad_u32 v3, v0, -1, s0 -; VARIANT2-NEXT: v_ashrrev_i32_e32 v4, 31, v3 -; VARIANT2-NEXT: v_add_co_u32_e32 v1, vcc, s2, v1 -; VARIANT2-NEXT: v_lshlrev_b64 v[3:4], 2, v[3:4] -; VARIANT2-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v2, vcc -; VARIANT2-NEXT: global_store_dword v[1:2], v0, off -; VARIANT2-NEXT: v_mov_b32_e32 v0, s3 -; VARIANT2-NEXT: v_add_co_u32_e32 v3, vcc, s2, v3 -; VARIANT2-NEXT: v_addc_co_u32_e32 v4, vcc, v0, v4, vcc +; VARIANT2-NEXT: global_store_dword v2, v0, s[2:3] +; VARIANT2-NEXT: v_xad_u32 v0, v0, -1, s4 +; VARIANT2-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; VARIANT2-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1] +; VARIANT2-NEXT: v_mov_b32_e32 v3, s3 +; VARIANT2-NEXT: v_add_co_u32_e32 v0, vcc, s2, v0 +; VARIANT2-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc ; VARIANT2-NEXT: s_waitcnt vmcnt(0) ; VARIANT2-NEXT: s_barrier -; VARIANT2-NEXT: global_load_dword v0, v[3:4], off +; VARIANT2-NEXT: global_load_dword v0, v[0:1], off ; VARIANT2-NEXT: s_waitcnt vmcnt(0) -; VARIANT2-NEXT: global_store_dword v[1:2], v0, off +; VARIANT2-NEXT: global_store_dword v2, v0, s[2:3] ; VARIANT2-NEXT: s_endpgm ; ; VARIANT3-LABEL: test_barrier: ; VARIANT3: ; %bb.0: ; %entry ; VARIANT3-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 -; VARIANT3-NEXT: s_load_dword s0, s[0:1], 0x2c -; VARIANT3-NEXT: v_lshlrev_b32_e32 v1, 2, v0 +; VARIANT3-NEXT: s_load_dword s4, s[0:1], 0x2c +; VARIANT3-NEXT: v_lshlrev_b32_e32 v2, 2, v0 ; VARIANT3-NEXT: s_waitcnt lgkmcnt(0) -; VARIANT3-NEXT: v_mov_b32_e32 v2, s3 -; VARIANT3-NEXT: v_xad_u32 v3, v0, -1, s0 -; VARIANT3-NEXT: v_ashrrev_i32_e32 v4, 31, v3 -; VARIANT3-NEXT: v_add_co_u32_e32 v1, vcc, s2, v1 -; VARIANT3-NEXT: v_lshlrev_b64 v[3:4], 2, v[3:4] -; VARIANT3-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v2, vcc -; VARIANT3-NEXT: global_store_dword v[1:2], v0, off -; VARIANT3-NEXT: v_mov_b32_e32 v0, s3 -; VARIANT3-NEXT: v_add_co_u32_e32 v3, vcc, s2, v3 -; VARIANT3-NEXT: v_addc_co_u32_e32 v4, vcc, v0, v4, vcc +; VARIANT3-NEXT: global_store_dword v2, v0, s[2:3] +; VARIANT3-NEXT: v_xad_u32 v0, v0, -1, s4 +; VARIANT3-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; VARIANT3-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1] +; VARIANT3-NEXT: v_mov_b32_e32 v3, s3 +; VARIANT3-NEXT: v_add_co_u32_e32 v0, vcc, s2, v0 +; VARIANT3-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc ; VARIANT3-NEXT: s_barrier -; VARIANT3-NEXT: global_load_dword v0, v[3:4], off +; VARIANT3-NEXT: global_load_dword v0, v[0:1], off ; VARIANT3-NEXT: s_waitcnt vmcnt(0) -; VARIANT3-NEXT: global_store_dword v[1:2], v0, off +; VARIANT3-NEXT: global_store_dword v2, v0, s[2:3] ; VARIANT3-NEXT: s_endpgm entry: %tmp = call i32 @llvm.amdgcn.workitem.id.x()