Mercurial > hg > CbC > CbC_llvm
comparison llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.append.ll @ 150:1d019706d866
LLVM10
author | anatofuz |
---|---|
date | Thu, 13 Feb 2020 15:10:13 +0900 |
parents | |
children | 2e18cbf3894f |
comparison
equal
deleted
inserted
replaced
147:c2174574ed3a | 150:1d019706d866 |
---|---|
1 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI,NOTGFX9,GCN-SDAG %s | |
2 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9,CIPLUS-SDAG,GCN-SDAG %s | |
3 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9,CIPLUS-SDAG,GCN-SDAG %s | |
4 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,GFX9,CIPLUS-SDAG,GCN-SDAG %s | |
5 | |
6 ; GCN-LABEL: {{^}}ds_append_lds: | |
7 ; GCN: s_load_dword [[PTR:s[0-9]+]] | |
8 ; GCN: s_mov_b32 m0, [[PTR]] | |
9 ; GCN: ds_append [[RESULT:v[0-9]+]]{{$}} | |
10 ; GCN-NOT: buffer_wbinvl1 | |
11 ; GCN: {{.*}}store{{.*}} [[RESULT]] | |
12 define amdgpu_kernel void @ds_append_lds(i32 addrspace(3)* %lds, i32 addrspace(1)* %out) #0 { | |
13 %val = call i32 @llvm.amdgcn.ds.append.p3i32(i32 addrspace(3)* %lds, i1 false) | |
14 store i32 %val, i32 addrspace(1)* %out | |
15 ret void | |
16 } | |
17 | |
18 ; GCN-LABEL: {{^}}ds_append_lds_max_offset: | |
19 ; GCN: s_load_dword [[PTR:s[0-9]+]] | |
20 ; GCN: s_mov_b32 m0, [[PTR]] | |
21 ; GCN: ds_append [[RESULT:v[0-9]+]] offset:65532{{$}} | |
22 ; GCN-NOT: buffer_wbinvl1 | |
23 ; GCN: {{.*}}store{{.*}} [[RESULT]] | |
24 define amdgpu_kernel void @ds_append_lds_max_offset(i32 addrspace(3)* %lds, i32 addrspace(1)* %out) #0 { | |
25 %gep = getelementptr inbounds i32, i32 addrspace(3)* %lds, i32 16383 | |
26 %val = call i32 @llvm.amdgcn.ds.append.p3i32(i32 addrspace(3)* %gep, i1 false) | |
27 store i32 %val, i32 addrspace(1)* %out | |
28 ret void | |
29 } | |
30 | |
31 ; GCN-LABEL: {{^}}ds_append_no_fold_offset_si: | |
32 ; GCN: s_load_dword [[PTR:s[0-9]+]] | |
33 | |
34 ; SI: s_add_i32 [[PTR]], [[PTR]], 16 | |
35 ; SI: s_mov_b32 m0, [[PTR]] | |
36 ; SI: ds_append [[RESULT:v[0-9]+]]{{$}} | |
37 | |
38 ; CIPLUS: s_mov_b32 m0, [[PTR]] | |
39 ; CIPLUS: ds_append [[RESULT:v[0-9]+]] offset:16{{$}} | |
40 | |
41 ; GCN-NOT: buffer_wbinvl1 | |
42 ; GCN: {{.*}}store{{.*}} [[RESULT]] | |
43 define amdgpu_kernel void @ds_append_no_fold_offset_si(i32 addrspace(3)* addrspace(4)* %lds.ptr, i32 addrspace(1)* %out) #0 { | |
44 %lds = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(4)* %lds.ptr, align 4 | |
45 %gep = getelementptr inbounds i32, i32 addrspace(3)* %lds, i32 4 | |
46 %val = call i32 @llvm.amdgcn.ds.append.p3i32(i32 addrspace(3)* %gep, i1 false) | |
47 store i32 %val, i32 addrspace(1)* %out | |
48 ret void | |
49 } | |
50 | |
51 ; GCN-LABEL: {{^}}ds_append_lds_over_max_offset: | |
52 ; GCN: s_load_dword [[PTR:s[0-9]+]] | |
53 | |
54 ; SI-SDAG: s_bitset1_b32 [[PTR]], 16 | |
55 ; CIPLUS-SDAG: s_add_i32 [[PTR]], [[PTR]], 0x10000 | |
56 ; GCN-SDAG: s_mov_b32 m0, [[PTR]] | |
57 | |
58 ; SI-GISEL: s_bitset1_b32 m0, 16 | |
59 ; CIPLUS-GISEL: s_add_u32 m0, [[PTR]], 0x10000 | |
60 | |
61 ; GCN: ds_append [[RESULT:v[0-9]+]]{{$}} | |
62 ; GCN-NOT: buffer_wbinvl1 | |
63 ; GCN: {{.*}}store{{.*}} [[RESULT]] | |
64 define amdgpu_kernel void @ds_append_lds_over_max_offset(i32 addrspace(3)* %lds, i32 addrspace(1)* %out) #0 { | |
65 %gep = getelementptr inbounds i32, i32 addrspace(3)* %lds, i32 16384 | |
66 %val = call i32 @llvm.amdgcn.ds.append.p3i32(i32 addrspace(3)* %gep, i1 false) | |
67 store i32 %val, i32 addrspace(1)* %out | |
68 ret void | |
69 } | |
70 | |
71 ; GCN-LABEL: {{^}}ds_append_lds_vgpr_addr: | |
72 ; GCN-SDAG: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v0 | |
73 ; GCN-SDAG: s_mov_b32 m0, [[READLANE]] | |
74 | |
75 ; GCN-GISEL: v_readfirstlane_b32 m0, v0 | |
76 | |
77 ; GCN: ds_append [[RESULT:v[0-9]+]]{{$}} | |
78 ; GCN-NOT: buffer_wbinvl1 | |
79 ; GCN: {{.*}}store{{.*}} [[RESULT]] | |
80 define void @ds_append_lds_vgpr_addr(i32 addrspace(3)* %lds, i32 addrspace(1)* %out) #0 { | |
81 %val = call i32 @llvm.amdgcn.ds.append.p3i32(i32 addrspace(3)* %lds, i1 false) | |
82 store i32 %val, i32 addrspace(1)* %out | |
83 ret void | |
84 } | |
85 | |
86 ; GCN-LABEL: {{^}}ds_append_gds: | |
87 ; GCN: s_load_dword [[PTR:s[0-9]+]] | |
88 ; GCN: s_mov_b32 m0, [[PTR]] | |
89 ; GCN: ds_append [[RESULT:v[0-9]+]] gds{{$}} | |
90 ; GCN-NOT: buffer_wbinvl1 | |
91 ; GCN: {{.*}}store{{.*}} [[RESULT]] | |
92 define amdgpu_kernel void @ds_append_gds(i32 addrspace(2)* %gds, i32 addrspace(1)* %out) #0 { | |
93 %val = call i32 @llvm.amdgcn.ds.append.p2i32(i32 addrspace(2)* %gds, i1 false) | |
94 store i32 %val, i32 addrspace(1)* %out | |
95 ret void | |
96 } | |
97 | |
98 ; GCN-LABEL: {{^}}ds_append_gds_max_offset: | |
99 ; GCN: s_load_dword [[PTR:s[0-9]+]] | |
100 ; GCN: s_mov_b32 m0, [[PTR]] | |
101 ; GCN: ds_append [[RESULT:v[0-9]+]] offset:65532 gds{{$}} | |
102 ; GCN-NOT: buffer_wbinvl1 | |
103 ; GCN: {{.*}}store{{.*}} [[RESULT]] | |
104 define amdgpu_kernel void @ds_append_gds_max_offset(i32 addrspace(2)* %gds, i32 addrspace(1)* %out) #0 { | |
105 %gep = getelementptr inbounds i32, i32 addrspace(2)* %gds, i32 16383 | |
106 %val = call i32 @llvm.amdgcn.ds.append.p2i32(i32 addrspace(2)* %gep, i1 false) | |
107 store i32 %val, i32 addrspace(1)* %out | |
108 ret void | |
109 } | |
110 | |
111 ; GCN-LABEL: {{^}}ds_append_gds_over_max_offset: | |
112 ; GCN-NOT: buffer_wbinvl1 | |
113 define amdgpu_kernel void @ds_append_gds_over_max_offset(i32 addrspace(2)* %gds, i32 addrspace(1)* %out) #0 { | |
114 %gep = getelementptr inbounds i32, i32 addrspace(2)* %gds, i32 16384 | |
115 %val = call i32 @llvm.amdgcn.ds.append.p2i32(i32 addrspace(2)* %gep, i1 false) | |
116 store i32 %val, i32 addrspace(1)* %out | |
117 ret void | |
118 } | |
119 | |
120 ; GCN-LABEL: {{^}}ds_append_lds_m0_restore: | |
121 ; GCN: s_load_dword [[PTR:s[0-9]+]] | |
122 ; GCN: s_mov_b32 m0, [[PTR]] | |
123 ; GCN: ds_append [[RESULT:v[0-9]+]]{{$}} | |
124 ; GCN-NOT: buffer_wbinvl1 | |
125 ; NOTGFX9: s_mov_b32 m0, -1 | |
126 ; GFX9-NOT: m0 | |
127 ; GCN: _store_dword | |
128 ; GCN: ds_read_b32 | |
129 define amdgpu_kernel void @ds_append_lds_m0_restore(i32 addrspace(3)* %lds, i32 addrspace(1)* %out) #0 { | |
130 %val0 = call i32 @llvm.amdgcn.ds.append.p3i32(i32 addrspace(3)* %lds, i1 false) | |
131 store i32 %val0, i32 addrspace(1)* %out | |
132 %val1 = load volatile i32, i32 addrspace(3)* %lds | |
133 ret void | |
134 } | |
135 | |
136 declare i32 @llvm.amdgcn.ds.append.p3i32(i32 addrspace(3)* nocapture, i1 immarg) #1 | |
137 declare i32 @llvm.amdgcn.ds.append.p2i32(i32 addrspace(2)* nocapture, i1 immarg) #1 | |
138 | |
139 attributes #0 = { nounwind } | |
140 attributes #1 = { argmemonly convergent nounwind } |