150
|
1 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIVI %s
|
|
2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,CIVI %s
|
|
3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
|
|
4 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -stop-before=machine-scheduler < %s | FileCheck -enable-var-scope -check-prefixes=MIR %s
|
|
5
|
|
6 declare i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* nocapture, i32, i32, i32, i1) #2
|
|
7 declare i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1) #2
|
|
8 declare i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* nocapture, i32, i32, i32, i1) #2
|
|
9
|
|
10 declare i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* nocapture, i64, i32, i32, i1) #2
|
|
11 declare i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* nocapture, i64, i32, i32, i1) #2
|
|
12 declare i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* nocapture, i64, i32, i32, i1) #2
|
|
13
|
|
14 declare i32 @llvm.amdgcn.workitem.id.x() #1
|
|
15
|
|
16 ; GCN-LABEL: {{^}}lds_atomic_inc_ret_i32:
|
|
17 ; CIVI-DAG: s_mov_b32 m0
|
|
18 ; GFX9-NOT: m0
|
|
19
|
|
20 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42
|
|
21 ; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]
|
|
22 ; MIR-LABEL: @lds_atomic_inc_ret_i32
|
|
23 ; MIR: DS_INC_RTN_U32 {{.*}} :: (load store 4 on %{{.*}}, !noalias !{{[0-9]+}}, addrspace 3)
|
|
24 define amdgpu_kernel void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 {
|
|
25 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false), !noalias !0
|
|
26 store i32 %result, i32 addrspace(1)* %out
|
|
27 ret void
|
|
28 }
|
|
29
|
|
30 !0 = distinct !{!0, !"noalias-scope"}
|
|
31
|
|
32 ; GCN-LABEL: {{^}}lds_atomic_inc_ret_i32_offset:
|
|
33 ; CIVI-DAG: s_mov_b32 m0
|
|
34 ; GFX9-NOT: m0
|
|
35
|
|
36 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42
|
|
37 ; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] offset:16
|
|
38 define amdgpu_kernel void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 {
|
|
39 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
|
|
40 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %gep, i32 42, i32 0, i32 0, i1 false)
|
|
41 store i32 %result, i32 addrspace(1)* %out
|
|
42 ret void
|
|
43 }
|
|
44
|
|
45 ; GCN-LABEL: {{^}}lds_atomic_inc_noret_i32:
|
|
46 ; CIVI-DAG: s_mov_b32 m0
|
|
47 ; GFX9-NOT: m0
|
|
48
|
|
49 ; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]],
|
|
50 ; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
|
|
51 ; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
|
|
52 ; GCN: ds_inc_u32 [[VPTR]], [[DATA]]
|
|
53 define amdgpu_kernel void @lds_atomic_inc_noret_i32(i32 addrspace(3)* %ptr) nounwind {
|
|
54 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false)
|
|
55 ret void
|
|
56 }
|
|
57
|
|
58 ; GCN-LABEL: {{^}}lds_atomic_inc_noret_i32_offset:
|
|
59 ; CIVI-DAG: s_mov_b32 m0
|
|
60 ; GFX9-NOT: m0
|
|
61
|
|
62 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42
|
|
63 ; GCN: ds_inc_u32 v{{[0-9]+}}, [[K]] offset:16
|
|
64 define amdgpu_kernel void @lds_atomic_inc_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
|
|
65 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
|
|
66 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %gep, i32 42, i32 0, i32 0, i1 false)
|
|
67 ret void
|
|
68 }
|
|
69
|
|
70 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i32:
|
|
71 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
|
|
72 ; CIVI: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
|
207
|
73 ; GFX9: global_atomic_inc v{{[0-9]+}}, v{{[0-9]+}}, [[K]], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
|
150
|
74 define amdgpu_kernel void @global_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
|
|
75 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 42, i32 0, i32 0, i1 false)
|
|
76 store i32 %result, i32 addrspace(1)* %out
|
|
77 ret void
|
|
78 }
|
|
79
|
|
80 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i32_offset:
|
|
81 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
|
|
82 ; CIVI: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 glc{{$}}
|
207
|
83 ; GFX9: global_atomic_inc v{{[0-9]+}}, v{{[0-9]+}}, [[K]], s{{\[[0-9]+:[0-9]+\]}} offset:16 glc{{$}}
|
150
|
84 define amdgpu_kernel void @global_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
|
|
85 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
|
|
86 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false)
|
|
87 store i32 %result, i32 addrspace(1)* %out
|
|
88 ret void
|
|
89 }
|
|
90
|
|
91 ; GCN-LABEL: {{^}}global_atomic_inc_noret_i32:
|
|
92 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
|
|
93 ; CIVI: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
|
207
|
94 ; GFX9: global_atomic_inc v{{[0-9]+}}, [[K]], s{{\[[0-9]+:[0-9]+\]$}}
|
150
|
95 define amdgpu_kernel void @global_atomic_inc_noret_i32(i32 addrspace(1)* %ptr) nounwind {
|
|
96 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 42, i32 0, i32 0, i1 false)
|
|
97 ret void
|
|
98 }
|
|
99
|
|
100 ; GCN-LABEL: {{^}}global_atomic_inc_noret_i32_offset:
|
|
101 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
|
|
102 ; CIVI: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
|
207
|
103 ; GFX9: global_atomic_inc v{{[0-9]+}}, [[K]], s{{\[[0-9]+:[0-9]+\]}} offset:16{{$}}
|
150
|
104 define amdgpu_kernel void @global_atomic_inc_noret_i32_offset(i32 addrspace(1)* %ptr) nounwind {
|
|
105 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
|
|
106 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false)
|
|
107 ret void
|
|
108 }
|
|
109
|
|
110 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i32_offset_addr64:
|
|
111 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
|
|
112 ; CI: buffer_atomic_inc [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20 glc{{$}}
|
|
113 ; VI: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
|
|
114 define amdgpu_kernel void @global_atomic_inc_ret_i32_offset_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
|
|
115 %id = call i32 @llvm.amdgcn.workitem.id.x()
|
|
116 %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id
|
|
117 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id
|
|
118 %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5
|
|
119 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false)
|
|
120 store i32 %result, i32 addrspace(1)* %out.gep
|
|
121 ret void
|
|
122 }
|
|
123
|
|
124 ; GCN-LABEL: {{^}}global_atomic_inc_noret_i32_offset_addr64:
|
|
125 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
|
|
126 ; CI: buffer_atomic_inc [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20{{$}}
|
|
127 ; VI: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
|
|
128 define amdgpu_kernel void @global_atomic_inc_noret_i32_offset_addr64(i32 addrspace(1)* %ptr) #0 {
|
|
129 %id = call i32 @llvm.amdgcn.workitem.id.x()
|
|
130 %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id
|
|
131 %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5
|
|
132 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false)
|
|
133 ret void
|
|
134 }
|
|
135
|
|
136 @lds0 = addrspace(3) global [512 x i32] undef, align 4
|
|
137
|
|
138 ; GCN-LABEL: {{^}}atomic_inc_shl_base_lds_0_i32:
|
207
|
139 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
|
150
|
140 ; GCN: ds_inc_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
|
|
141 define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
|
|
142 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
|
143 %idx.0 = add nsw i32 %tid.x, 2
|
|
144 %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds0, i32 0, i32 %idx.0
|
|
145 %val0 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %arrayidx0, i32 9, i32 0, i32 0, i1 false)
|
|
146 store i32 %idx.0, i32 addrspace(1)* %add_use
|
|
147 store i32 %val0, i32 addrspace(1)* %out
|
|
148 ret void
|
|
149 }
|
|
150
|
|
151 ; GCN-LABEL: {{^}}lds_atomic_inc_ret_i64:
|
|
152 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
|
|
153 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
|
|
154 ; GCN: ds_inc_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
|
|
155 define amdgpu_kernel void @lds_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 {
|
|
156 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %ptr, i64 42, i32 0, i32 0, i1 false)
|
|
157 store i64 %result, i64 addrspace(1)* %out
|
|
158 ret void
|
|
159 }
|
|
160
|
|
161 ; GCN-LABEL: {{^}}lds_atomic_inc_ret_i64_offset:
|
|
162 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
|
|
163 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
|
|
164 ; GCN: ds_inc_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32
|
|
165 define amdgpu_kernel void @lds_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 {
|
|
166 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
|
|
167 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %gep, i64 42, i32 0, i32 0, i1 false)
|
|
168 store i64 %result, i64 addrspace(1)* %out
|
|
169 ret void
|
|
170 }
|
|
171
|
|
172 ; GCN-LABEL: {{^}}lds_atomic_inc_noret_i64:
|
|
173 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
|
|
174 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
|
|
175 ; GCN: ds_inc_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
|
|
176 define amdgpu_kernel void @lds_atomic_inc_noret_i64(i64 addrspace(3)* %ptr) nounwind {
|
|
177 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %ptr, i64 42, i32 0, i32 0, i1 false)
|
|
178 ret void
|
|
179 }
|
|
180
|
|
181 ; GCN-LABEL: {{^}}lds_atomic_inc_noret_i64_offset:
|
|
182 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
|
|
183 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
|
|
184 ; GCN: ds_inc_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32{{$}}
|
|
185 define amdgpu_kernel void @lds_atomic_inc_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
|
|
186 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
|
|
187 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %gep, i64 42, i32 0, i32 0, i1 false)
|
|
188 ret void
|
|
189 }
|
|
190
|
|
191 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i64:
|
|
192 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
|
207
|
193 ; GFX9: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
|
150
|
194 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
|
|
195 ; CIVI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
|
207
|
196 ; GFX9: global_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[ZERO]], v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
|
150
|
197 define amdgpu_kernel void @global_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
|
|
198 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 42, i32 0, i32 0, i1 false)
|
|
199 store i64 %result, i64 addrspace(1)* %out
|
|
200 ret void
|
|
201 }
|
|
202
|
|
203 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i64_offset:
|
|
204 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
|
207
|
205 ; GFX9-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
|
|
206 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
|
150
|
207 ; CIVI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32 glc{{$}}
|
207
|
208 ; GFX9: global_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[ZERO]], v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
|
150
|
209 define amdgpu_kernel void @global_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
|
|
210 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
|
|
211 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
|
|
212 store i64 %result, i64 addrspace(1)* %out
|
|
213 ret void
|
|
214 }
|
|
215
|
|
216 ; GCN-LABEL: {{^}}global_atomic_inc_noret_i64:
|
|
217 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
|
207
|
218 ; GFX9-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
|
150
|
219 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
|
|
220 ; CIVI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
|
|
221
|
207
|
222 ; GFX9: global_atomic_inc_x2 v[[ZERO]], v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]$}}
|
150
|
223 define amdgpu_kernel void @global_atomic_inc_noret_i64(i64 addrspace(1)* %ptr) nounwind {
|
|
224 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 42, i32 0, i32 0, i1 false)
|
|
225 ret void
|
|
226 }
|
|
227
|
|
228 ; GCN-LABEL: {{^}}global_atomic_inc_noret_i64_offset:
|
|
229 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
|
207
|
230 ; GFX9-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
|
150
|
231 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
|
|
232 ; CIVI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32{{$}}
|
207
|
233 ; GFX9: global_atomic_inc_x2 v[[ZERO]], v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
|
150
|
234 define amdgpu_kernel void @global_atomic_inc_noret_i64_offset(i64 addrspace(1)* %ptr) nounwind {
|
|
235 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
|
|
236 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
|
|
237 ret void
|
|
238 }
|
|
239
|
|
240 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i64_offset_addr64:
|
|
241 ; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
|
|
242 ; CI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
|
|
243 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
|
|
244 ; CI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40 glc{{$}}
|
|
245 ; VI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
|
|
246 define amdgpu_kernel void @global_atomic_inc_ret_i64_offset_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
|
|
247 %id = call i32 @llvm.amdgcn.workitem.id.x()
|
|
248 %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id
|
|
249 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id
|
|
250 %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5
|
|
251 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
|
|
252 store i64 %result, i64 addrspace(1)* %out.gep
|
|
253 ret void
|
|
254 }
|
|
255
|
|
256 ; GCN-LABEL: {{^}}global_atomic_inc_noret_i64_offset_addr64:
|
|
257 ; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
|
|
258 ; CI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
|
|
259 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
|
|
260 ; CI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40{{$}}
|
|
261 ; VI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
|
|
262 define amdgpu_kernel void @global_atomic_inc_noret_i64_offset_addr64(i64 addrspace(1)* %ptr) #0 {
|
|
263 %id = call i32 @llvm.amdgcn.workitem.id.x()
|
|
264 %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id
|
|
265 %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5
|
|
266 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
|
|
267 ret void
|
|
268 }
|
|
269
|
|
270 ; GCN-LABEL: {{^}}flat_atomic_inc_ret_i32:
|
|
271 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
|
|
272 ; GCN: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
|
|
273 define amdgpu_kernel void @flat_atomic_inc_ret_i32(i32* %out, i32* %ptr) #0 {
|
|
274 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false)
|
|
275 store i32 %result, i32* %out
|
|
276 ret void
|
|
277 }
|
|
278
|
|
279 ; GCN-LABEL: {{^}}flat_atomic_inc_ret_i32_offset:
|
|
280 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
|
|
281 ; CIVI: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
|
|
282 ; GFX9: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:16 glc{{$}}
|
|
283 define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset(i32* %out, i32* %ptr) #0 {
|
|
284 %gep = getelementptr i32, i32* %ptr, i32 4
|
|
285 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
|
|
286 store i32 %result, i32* %out
|
|
287 ret void
|
|
288 }
|
|
289
|
|
290 ; GCN-LABEL: {{^}}flat_atomic_inc_noret_i32:
|
|
291 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
|
|
292 ; GCN: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
|
|
293 define amdgpu_kernel void @flat_atomic_inc_noret_i32(i32* %ptr) nounwind {
|
|
294 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false)
|
|
295 ret void
|
|
296 }
|
|
297
|
|
298 ; GCN-LABEL: {{^}}flat_atomic_inc_noret_i32_offset:
|
|
299 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
|
|
300 ; CIVI: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
|
|
301 ; GFX9: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:16{{$}}
|
|
302 define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset(i32* %ptr) nounwind {
|
|
303 %gep = getelementptr i32, i32* %ptr, i32 4
|
|
304 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
|
|
305 ret void
|
|
306 }
|
|
307
|
|
308 ; GCN-LABEL: {{^}}flat_atomic_inc_ret_i32_offset_addr64:
|
|
309 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
|
|
310 ; CIVI: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
|
|
311 ; GFX9: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:20 glc{{$}}
|
|
312 define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_addr64(i32* %out, i32* %ptr) #0 {
|
|
313 %id = call i32 @llvm.amdgcn.workitem.id.x()
|
|
314 %gep.tid = getelementptr i32, i32* %ptr, i32 %id
|
|
315 %out.gep = getelementptr i32, i32* %out, i32 %id
|
|
316 %gep = getelementptr i32, i32* %gep.tid, i32 5
|
|
317 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
|
|
318 store i32 %result, i32* %out.gep
|
|
319 ret void
|
|
320 }
|
|
321
|
|
322 ; GCN-LABEL: {{^}}flat_atomic_inc_noret_i32_offset_addr64:
|
|
323 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
|
|
324 ; CIVI: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
|
|
325 ; GFX9: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:20{{$}}
|
|
326 define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(i32* %ptr) #0 {
|
|
327 %id = call i32 @llvm.amdgcn.workitem.id.x()
|
|
328 %gep.tid = getelementptr i32, i32* %ptr, i32 %id
|
|
329 %gep = getelementptr i32, i32* %gep.tid, i32 5
|
|
330 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
|
|
331 ret void
|
|
332 }
|
|
333
|
|
334 @lds1 = addrspace(3) global [512 x i64] undef, align 8
|
|
335
|
|
336 ; GCN-LABEL: {{^}}atomic_inc_shl_base_lds_0_i64:
|
207
|
337 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 3, {{v[0-9]+}}
|
150
|
338 ; GCN: ds_inc_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, [[PTR]], v{{\[[0-9]+:[0-9]+\]}} offset:16
|
|
339 define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
|
|
340 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
|
341 %idx.0 = add nsw i32 %tid.x, 2
|
|
342 %arrayidx0 = getelementptr inbounds [512 x i64], [512 x i64] addrspace(3)* @lds1, i32 0, i32 %idx.0
|
|
343 %val0 = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %arrayidx0, i64 9, i32 0, i32 0, i1 false)
|
|
344 store i32 %idx.0, i32 addrspace(1)* %add_use
|
|
345 store i64 %val0, i64 addrspace(1)* %out
|
|
346 ret void
|
|
347 }
|
|
348
|
|
349 ; GCN-LABEL: {{^}}flat_atomic_inc_ret_i64:
|
|
350 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
|
|
351 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
|
|
352 ; GCN: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
|
|
353 define amdgpu_kernel void @flat_atomic_inc_ret_i64(i64* %out, i64* %ptr) #0 {
|
|
354 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false)
|
|
355 store i64 %result, i64* %out
|
|
356 ret void
|
|
357 }
|
|
358
|
|
359 ; GCN-LABEL: {{^}}flat_atomic_inc_ret_i64_offset:
|
|
360 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
|
|
361 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
|
|
362 ; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
|
|
363 ; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32 glc{{$}}
|
|
364 define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset(i64* %out, i64* %ptr) #0 {
|
|
365 %gep = getelementptr i64, i64* %ptr, i32 4
|
|
366 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
|
|
367 store i64 %result, i64* %out
|
|
368 ret void
|
|
369 }
|
|
370
|
|
371 ; GCN-LABEL: {{^}}flat_atomic_inc_noret_i64:
|
|
372 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
|
|
373 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
|
|
374 ; GCN: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
|
|
375 define amdgpu_kernel void @flat_atomic_inc_noret_i64(i64* %ptr) nounwind {
|
|
376 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false)
|
|
377 ret void
|
|
378 }
|
|
379
|
|
380 ; GCN-LABEL: {{^}}flat_atomic_inc_noret_i64_offset:
|
|
381 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
|
|
382 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
|
|
383 ; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
|
|
384 ; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32{{$}}
|
|
385 define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset(i64* %ptr) nounwind {
|
|
386 %gep = getelementptr i64, i64* %ptr, i32 4
|
|
387 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
|
|
388 ret void
|
|
389 }
|
|
390
|
|
391 ; GCN-LABEL: {{^}}flat_atomic_inc_ret_i64_offset_addr64:
|
|
392 ; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
|
|
393 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
|
|
394 ; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
|
|
395 ; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:40 glc{{$}}
|
|
396 define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(i64* %out, i64* %ptr) #0 {
|
|
397 %id = call i32 @llvm.amdgcn.workitem.id.x()
|
|
398 %gep.tid = getelementptr i64, i64* %ptr, i32 %id
|
|
399 %out.gep = getelementptr i64, i64* %out, i32 %id
|
|
400 %gep = getelementptr i64, i64* %gep.tid, i32 5
|
|
401 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
|
|
402 store i64 %result, i64* %out.gep
|
|
403 ret void
|
|
404 }
|
|
405
|
|
406 ; GCN-LABEL: {{^}}flat_atomic_inc_noret_i64_offset_addr64:
|
|
407 ; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
|
|
408 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
|
|
409 ; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
|
|
410 ; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:40{{$}}
|
|
411 define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(i64* %ptr) #0 {
|
|
412 %id = call i32 @llvm.amdgcn.workitem.id.x()
|
|
413 %gep.tid = getelementptr i64, i64* %ptr, i32 %id
|
|
414 %gep = getelementptr i64, i64* %gep.tid, i32 5
|
|
415 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
|
|
416 ret void
|
|
417 }
|
|
418
|
|
419 ; GCN-LABEL: {{^}}nocse_lds_atomic_inc_ret_i32:
|
|
420 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
|
|
421 ; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]
|
|
422 ; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]
|
|
423 define amdgpu_kernel void @nocse_lds_atomic_inc_ret_i32(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(3)* %ptr) #0 {
|
|
424 %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false)
|
|
425 %result1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false)
|
|
426
|
|
427 store i32 %result0, i32 addrspace(1)* %out0
|
|
428 store i32 %result1, i32 addrspace(1)* %out1
|
|
429 ret void
|
|
430 }
|
|
431
|
|
432 attributes #0 = { nounwind }
|
|
433 attributes #1 = { nounwind readnone }
|
|
434 attributes #2 = { nounwind argmemonly }
|