Mercurial > hg > CbC > CbC_llvm
comparison llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll @ 236:c4bab56944e8 llvm-original
LLVM 16
author | kono |
---|---|
date | Wed, 09 Nov 2022 17:45:10 +0900 |
parents | 1d019706d866 |
children | 1f2b6ac9f198 |
comparison
equal
deleted
inserted
replaced
232:70dce7da266c | 236:c4bab56944e8 |
---|---|
1 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s | 1 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s |
2 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s | 2 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s |
3 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s | 3 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s |
4 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG %s | 4 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG %s |
5 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG %s | 5 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG %s |
6 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -asm-verbose=0 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG %s | |
6 | 7 |
7 ; Minimum offset | 8 ; Minimum offset |
8 ; GCN-LABEL: {{^}}gws_init_offset0: | 9 ; GCN-LABEL: {{^}}gws_init_offset0: |
9 ; GCN-DAG: s_load_dword [[BAR_NUM:s[0-9]+]] | 10 ; GCN-DAG: s_load_{{dword|b32}} [[BAR_NUM:s[0-9]+]] |
10 ; GCN-DAG: s_mov_b32 m0, 0{{$}} | 11 ; GCN-DAG: s_mov_b32 m0, 0{{$}} |
11 ; GCN: v_mov_b32_e32 v0, [[BAR_NUM]] | 12 ; GCN: v_mov_b32_e32 v0, [[BAR_NUM]] |
12 ; NOLOOP: ds_gws_init v0 gds{{$}} | 13 ; NOLOOP: ds_gws_init v0 gds{{$}} |
13 | 14 |
14 ; LOOP: [[LOOP:BB[0-9]+_[0-9]+]]: | 15 ; LOOP: [[LOOP:.LBB[0-9]+_[0-9]+]]: |
15 ; LOOP-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0 | 16 ; LOOP-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0 |
16 ; LOOP-NEXT: ds_gws_init v0 gds | 17 ; LOOP-NEXT: ds_gws_init v0 gds |
17 ; LOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 18 ; LOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
18 ; LOOP-NEXT: s_getreg_b32 [[GETREG:s[0-9]+]], hwreg(HW_REG_TRAPSTS, 8, 1) | 19 ; LOOP-NEXT: s_getreg_b32 [[GETREG:s[0-9]+]], hwreg(HW_REG_TRAPSTS, 8, 1) |
19 ; LOOP-NEXT: s_cmp_lg_u32 [[GETREG]], 0 | 20 ; LOOP-NEXT: s_cmp_lg_u32 [[GETREG]], 0 |
23 ret void | 24 ret void |
24 } | 25 } |
25 | 26 |
26 ; Maximum offset | 27 ; Maximum offset |
27 ; GCN-LABEL: {{^}}gws_init_offset63: | 28 ; GCN-LABEL: {{^}}gws_init_offset63: |
28 ; NOLOOP-DAG: s_load_dword [[BAR_NUM:s[0-9]+]] | 29 ; NOLOOP-DAG: s_load_{{dword|b32}} [[BAR_NUM:s[0-9]+]] |
29 ; NOLOOP-DAG: s_mov_b32 m0, 0{{$}} | 30 ; NOLOOP-DAG: s_mov_b32 m0, 0{{$}} |
30 ; NOLOOP-DAG: v_mov_b32_e32 v0, [[BAR_NUM]] | 31 ; NOLOOP-DAG: v_mov_b32_e32 v0, [[BAR_NUM]] |
31 ; NOLOOP: ds_gws_init v0 offset:63 gds{{$}} | 32 ; NOLOOP: ds_gws_init v0 offset:63 gds{{$}} |
32 | 33 |
33 | 34 |
34 ; LOOP: s_mov_b32 m0, 0{{$}} | 35 ; LOOP: s_mov_b32 m0, 0{{$}} |
35 ; LOOP: [[LOOP:BB[0-9]+_[0-9]+]]: | 36 ; LOOP: [[LOOP:.LBB[0-9]+_[0-9]+]]: |
36 ; LOOP-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0 | 37 ; LOOP-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0 |
37 ; LOOP-NEXT: ds_gws_init v0 offset:63 gds | 38 ; LOOP-NEXT: ds_gws_init v0 offset:63 gds |
38 ; LOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 39 ; LOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
39 ; LOOP-NEXT: s_getreg_b32 [[GETREG:s[0-9]+]], hwreg(HW_REG_TRAPSTS, 8, 1) | 40 ; LOOP-NEXT: s_getreg_b32 [[GETREG:s[0-9]+]], hwreg(HW_REG_TRAPSTS, 8, 1) |
40 ; LOOP-NEXT: s_cmp_lg_u32 [[GETREG]], 0 | 41 ; LOOP-NEXT: s_cmp_lg_u32 [[GETREG]], 0 |
44 ret void | 45 ret void |
45 } | 46 } |
46 | 47 |
47 ; FIXME: Should be able to shift directly into m0 | 48 ; FIXME: Should be able to shift directly into m0 |
48 ; GCN-LABEL: {{^}}gws_init_sgpr_offset: | 49 ; GCN-LABEL: {{^}}gws_init_sgpr_offset: |
49 ; NOLOOP-DAG: s_load_dwordx2 s{{\[}}[[BAR_NUM:[0-9]+]]:[[OFFSET:[0-9]+]]{{\]}} | 50 ; NOLOOP-DAG: s_load_{{dwordx2|b64}} s[[[BAR_NUM:[0-9]+]]:[[OFFSET:[0-9]+]]] |
50 | 51 |
51 ; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16 | 52 ; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16 |
52 ; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}} | 53 ; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}} |
53 | 54 |
54 ; NOLOOP-GISEL-DAG: s_lshl_b32 m0, s[[OFFSET]], 16 | 55 ; NOLOOP-GISEL-DAG: s_lshl_b32 m0, s[[OFFSET]], 16 |
60 ret void | 61 ret void |
61 } | 62 } |
62 | 63 |
63 ; Variable offset in SGPR with constant add | 64 ; Variable offset in SGPR with constant add |
64 ; GCN-LABEL: {{^}}gws_init_sgpr_offset_add1: | 65 ; GCN-LABEL: {{^}}gws_init_sgpr_offset_add1: |
65 ; NOLOOP-DAG: s_load_dwordx2 s{{\[}}[[BAR_NUM:[0-9]+]]:[[OFFSET:[0-9]+]]{{\]}} | 66 ; NOLOOP-DAG: s_load_{{dwordx2|b64}} s[[[BAR_NUM:[0-9]+]]:[[OFFSET:[0-9]+]]] |
66 | 67 |
67 ; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16 | 68 ; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16 |
68 ; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}} | 69 ; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}} |
69 | 70 |
70 ; NOLOOP-GISEL-DAG: s_lshl_b32 m0, s[[OFFSET]], 16 | 71 ; NOLOOP-GISEL-DAG: s_lshl_b32 m0, s[[OFFSET]], 16 |
76 call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 %offset) | 77 call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 %offset) |
77 ret void | 78 ret void |
78 } | 79 } |
79 | 80 |
80 ; GCN-LABEL: {{^}}gws_init_vgpr_offset: | 81 ; GCN-LABEL: {{^}}gws_init_vgpr_offset: |
81 ; NOLOOP-DAG: s_load_dword [[BAR_NUM:s[0-9]+]] | 82 ; NOLOOP-DAG: s_load_{{dword|b32}} [[BAR_NUM:s[0-9]+]] |
82 ; NOLOOP-DAG: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v0 | 83 ; NOLOOP-DAG: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v0 |
83 | 84 |
84 ; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16 | 85 ; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16 |
85 ; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}} | 86 ; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}} |
86 | 87 |
94 ret void | 95 ret void |
95 } | 96 } |
96 | 97 |
97 ; Variable offset in VGPR with constant add | 98 ; Variable offset in VGPR with constant add |
98 ; GCN-LABEL: {{^}}gws_init_vgpr_offset_add: | 99 ; GCN-LABEL: {{^}}gws_init_vgpr_offset_add: |
99 ; NOLOOP-DAG: s_load_dword [[BAR_NUM:s[0-9]+]] | 100 ; NOLOOP-DAG: s_load_{{dword|b32}} [[BAR_NUM:s[0-9]+]] |
100 ; NOLOOP-DAG: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v0 | 101 ; NOLOOP-DAG: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v0 |
101 | 102 |
102 ; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16 | 103 ; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16 |
103 ; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}} | 104 ; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}} |
104 | 105 |