comparison llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll @ 236:c4bab56944e8 llvm-original

LLVM 16
author kono
date Wed, 09 Nov 2022 17:45:10 +0900
parents 1d019706d866
children 1f2b6ac9f198
comparison
equal deleted inserted replaced
232:70dce7da266c 236:c4bab56944e8
1 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s 1 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s
2 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s 2 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s
3 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s 3 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s
4 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG %s 4 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG %s
5 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG %s 5 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG %s
6 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -asm-verbose=0 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG %s
6 7
7 ; Minimum offset 8 ; Minimum offset
8 ; GCN-LABEL: {{^}}gws_init_offset0: 9 ; GCN-LABEL: {{^}}gws_init_offset0:
9 ; GCN-DAG: s_load_dword [[BAR_NUM:s[0-9]+]] 10 ; GCN-DAG: s_load_{{dword|b32}} [[BAR_NUM:s[0-9]+]]
10 ; GCN-DAG: s_mov_b32 m0, 0{{$}} 11 ; GCN-DAG: s_mov_b32 m0, 0{{$}}
11 ; GCN: v_mov_b32_e32 v0, [[BAR_NUM]] 12 ; GCN: v_mov_b32_e32 v0, [[BAR_NUM]]
12 ; NOLOOP: ds_gws_init v0 gds{{$}} 13 ; NOLOOP: ds_gws_init v0 gds{{$}}
13 14
14 ; LOOP: [[LOOP:BB[0-9]+_[0-9]+]]: 15 ; LOOP: [[LOOP:.LBB[0-9]+_[0-9]+]]:
15 ; LOOP-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0 16 ; LOOP-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0
16 ; LOOP-NEXT: ds_gws_init v0 gds 17 ; LOOP-NEXT: ds_gws_init v0 gds
17 ; LOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18 ; LOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18 ; LOOP-NEXT: s_getreg_b32 [[GETREG:s[0-9]+]], hwreg(HW_REG_TRAPSTS, 8, 1) 19 ; LOOP-NEXT: s_getreg_b32 [[GETREG:s[0-9]+]], hwreg(HW_REG_TRAPSTS, 8, 1)
19 ; LOOP-NEXT: s_cmp_lg_u32 [[GETREG]], 0 20 ; LOOP-NEXT: s_cmp_lg_u32 [[GETREG]], 0
23 ret void 24 ret void
24 } 25 }
25 26
26 ; Maximum offset 27 ; Maximum offset
27 ; GCN-LABEL: {{^}}gws_init_offset63: 28 ; GCN-LABEL: {{^}}gws_init_offset63:
28 ; NOLOOP-DAG: s_load_dword [[BAR_NUM:s[0-9]+]] 29 ; NOLOOP-DAG: s_load_{{dword|b32}} [[BAR_NUM:s[0-9]+]]
29 ; NOLOOP-DAG: s_mov_b32 m0, 0{{$}} 30 ; NOLOOP-DAG: s_mov_b32 m0, 0{{$}}
30 ; NOLOOP-DAG: v_mov_b32_e32 v0, [[BAR_NUM]] 31 ; NOLOOP-DAG: v_mov_b32_e32 v0, [[BAR_NUM]]
31 ; NOLOOP: ds_gws_init v0 offset:63 gds{{$}} 32 ; NOLOOP: ds_gws_init v0 offset:63 gds{{$}}
32 33
33 34
34 ; LOOP: s_mov_b32 m0, 0{{$}} 35 ; LOOP: s_mov_b32 m0, 0{{$}}
35 ; LOOP: [[LOOP:BB[0-9]+_[0-9]+]]: 36 ; LOOP: [[LOOP:.LBB[0-9]+_[0-9]+]]:
36 ; LOOP-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0 37 ; LOOP-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0
37 ; LOOP-NEXT: ds_gws_init v0 offset:63 gds 38 ; LOOP-NEXT: ds_gws_init v0 offset:63 gds
38 ; LOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 39 ; LOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
39 ; LOOP-NEXT: s_getreg_b32 [[GETREG:s[0-9]+]], hwreg(HW_REG_TRAPSTS, 8, 1) 40 ; LOOP-NEXT: s_getreg_b32 [[GETREG:s[0-9]+]], hwreg(HW_REG_TRAPSTS, 8, 1)
40 ; LOOP-NEXT: s_cmp_lg_u32 [[GETREG]], 0 41 ; LOOP-NEXT: s_cmp_lg_u32 [[GETREG]], 0
44 ret void 45 ret void
45 } 46 }
46 47
47 ; FIXME: Should be able to shift directly into m0 48 ; FIXME: Should be able to shift directly into m0
48 ; GCN-LABEL: {{^}}gws_init_sgpr_offset: 49 ; GCN-LABEL: {{^}}gws_init_sgpr_offset:
49 ; NOLOOP-DAG: s_load_dwordx2 s{{\[}}[[BAR_NUM:[0-9]+]]:[[OFFSET:[0-9]+]]{{\]}} 50 ; NOLOOP-DAG: s_load_{{dwordx2|b64}} s[[[BAR_NUM:[0-9]+]]:[[OFFSET:[0-9]+]]]
50 51
51 ; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16 52 ; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16
52 ; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}} 53 ; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}}
53 54
54 ; NOLOOP-GISEL-DAG: s_lshl_b32 m0, s[[OFFSET]], 16 55 ; NOLOOP-GISEL-DAG: s_lshl_b32 m0, s[[OFFSET]], 16
60 ret void 61 ret void
61 } 62 }
62 63
63 ; Variable offset in SGPR with constant add 64 ; Variable offset in SGPR with constant add
64 ; GCN-LABEL: {{^}}gws_init_sgpr_offset_add1: 65 ; GCN-LABEL: {{^}}gws_init_sgpr_offset_add1:
65 ; NOLOOP-DAG: s_load_dwordx2 s{{\[}}[[BAR_NUM:[0-9]+]]:[[OFFSET:[0-9]+]]{{\]}} 66 ; NOLOOP-DAG: s_load_{{dwordx2|b64}} s[[[BAR_NUM:[0-9]+]]:[[OFFSET:[0-9]+]]]
66 67
67 ; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16 68 ; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16
68 ; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}} 69 ; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}}
69 70
70 ; NOLOOP-GISEL-DAG: s_lshl_b32 m0, s[[OFFSET]], 16 71 ; NOLOOP-GISEL-DAG: s_lshl_b32 m0, s[[OFFSET]], 16
76 call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 %offset) 77 call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 %offset)
77 ret void 78 ret void
78 } 79 }
79 80
80 ; GCN-LABEL: {{^}}gws_init_vgpr_offset: 81 ; GCN-LABEL: {{^}}gws_init_vgpr_offset:
81 ; NOLOOP-DAG: s_load_dword [[BAR_NUM:s[0-9]+]] 82 ; NOLOOP-DAG: s_load_{{dword|b32}} [[BAR_NUM:s[0-9]+]]
82 ; NOLOOP-DAG: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v0 83 ; NOLOOP-DAG: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v0
83 84
84 ; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16 85 ; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16
85 ; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}} 86 ; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}}
86 87
94 ret void 95 ret void
95 } 96 }
96 97
97 ; Variable offset in VGPR with constant add 98 ; Variable offset in VGPR with constant add
98 ; GCN-LABEL: {{^}}gws_init_vgpr_offset_add: 99 ; GCN-LABEL: {{^}}gws_init_vgpr_offset_add:
99 ; NOLOOP-DAG: s_load_dword [[BAR_NUM:s[0-9]+]] 100 ; NOLOOP-DAG: s_load_{{dword|b32}} [[BAR_NUM:s[0-9]+]]
100 ; NOLOOP-DAG: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v0 101 ; NOLOOP-DAG: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v0
101 102
102 ; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16 103 ; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16
103 ; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}} 104 ; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}}
104 105