annotate llvm/test/CodeGen/AMDGPU/local-atomics.ll @ 223:5f17cb93ff66 llvm-original

LLVM13 (2021/7/18)
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Sun, 18 Jul 2021 22:43:00 +0900
parents 1d019706d866
children 1f2b6ac9f198
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
150
anatofuz
parents:
diff changeset
1 ; RUN: llc -march=amdgcn -amdgpu-atomic-optimizations=false -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,SICIVI,FUNC %s
anatofuz
parents:
diff changeset
2 ; RUN: llc -march=amdgcn -mcpu=bonaire -amdgpu-atomic-optimizations=false -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,FUNC %s
anatofuz
parents:
diff changeset
3 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-atomic-optimizations=false -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,SICIVI,FUNC %s
anatofuz
parents:
diff changeset
4 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-atomic-optimizations=false -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,FUNC %s
anatofuz
parents:
diff changeset
5 ; RUN: llc -march=r600 -mcpu=redwood -amdgpu-atomic-optimizations=false -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=EG,FUNC %s
anatofuz
parents:
diff changeset
6
anatofuz
parents:
diff changeset
7 ; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32:
anatofuz
parents:
diff changeset
8 ; EG: LDS_WRXCHG_RET *
anatofuz
parents:
diff changeset
9
anatofuz
parents:
diff changeset
10 ; SICIVI-DAG: s_mov_b32 m0
anatofuz
parents:
diff changeset
11 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
12
anatofuz
parents:
diff changeset
13 ; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]],
anatofuz
parents:
diff changeset
14 ; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
anatofuz
parents:
diff changeset
15 ; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
anatofuz
parents:
diff changeset
16 ; GCN: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]]
anatofuz
parents:
diff changeset
17 ; GCN: buffer_store_dword [[RESULT]],
anatofuz
parents:
diff changeset
18 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
19 define amdgpu_kernel void @lds_atomic_xchg_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
20 %result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 seq_cst
anatofuz
parents:
diff changeset
21 store i32 %result, i32 addrspace(1)* %out, align 4
anatofuz
parents:
diff changeset
22 ret void
anatofuz
parents:
diff changeset
23 }
anatofuz
parents:
diff changeset
24
anatofuz
parents:
diff changeset
25 ; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32_offset:
anatofuz
parents:
diff changeset
26 ; SICIVI: s_mov_b32 m0
anatofuz
parents:
diff changeset
27 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
28
anatofuz
parents:
diff changeset
29 ; EG: LDS_WRXCHG_RET *
anatofuz
parents:
diff changeset
30 ; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
anatofuz
parents:
diff changeset
31 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
32 define amdgpu_kernel void @lds_atomic_xchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
33 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
anatofuz
parents:
diff changeset
34 %result = atomicrmw xchg i32 addrspace(3)* %gep, i32 4 seq_cst
anatofuz
parents:
diff changeset
35 store i32 %result, i32 addrspace(1)* %out, align 4
anatofuz
parents:
diff changeset
36 ret void
anatofuz
parents:
diff changeset
37 }
anatofuz
parents:
diff changeset
38
anatofuz
parents:
diff changeset
39 ; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_f32_offset:
anatofuz
parents:
diff changeset
40 ; SICIVI: s_mov_b32 m0
anatofuz
parents:
diff changeset
41 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
42
anatofuz
parents:
diff changeset
43 ; EG: LDS_WRXCHG_RET *
anatofuz
parents:
diff changeset
44 ; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
anatofuz
parents:
diff changeset
45 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
46 define amdgpu_kernel void @lds_atomic_xchg_ret_f32_offset(float addrspace(1)* %out, float addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
47 %gep = getelementptr float, float addrspace(3)* %ptr, i32 4
anatofuz
parents:
diff changeset
48 %result = atomicrmw xchg float addrspace(3)* %gep, float 4.0 seq_cst
anatofuz
parents:
diff changeset
49 store float %result, float addrspace(1)* %out, align 4
anatofuz
parents:
diff changeset
50 ret void
anatofuz
parents:
diff changeset
51 }
anatofuz
parents:
diff changeset
52
anatofuz
parents:
diff changeset
53 ; XXX - Is it really necessary to load 4 into VGPR?
anatofuz
parents:
diff changeset
54 ; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32:
anatofuz
parents:
diff changeset
55 ; EG: LDS_ADD_RET *
anatofuz
parents:
diff changeset
56
anatofuz
parents:
diff changeset
57 ; SICIVI-DAG: s_mov_b32 m0
anatofuz
parents:
diff changeset
58 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
59
anatofuz
parents:
diff changeset
60 ; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]],
anatofuz
parents:
diff changeset
61 ; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
anatofuz
parents:
diff changeset
62 ; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
anatofuz
parents:
diff changeset
63 ; GCN: ds_add_rtn_u32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]]
anatofuz
parents:
diff changeset
64 ; GCN: buffer_store_dword [[RESULT]],
anatofuz
parents:
diff changeset
65 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
66 define amdgpu_kernel void @lds_atomic_add_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
67 %result = atomicrmw add i32 addrspace(3)* %ptr, i32 4 seq_cst
anatofuz
parents:
diff changeset
68 store i32 %result, i32 addrspace(1)* %out, align 4
anatofuz
parents:
diff changeset
69 ret void
anatofuz
parents:
diff changeset
70 }
anatofuz
parents:
diff changeset
71
anatofuz
parents:
diff changeset
72 ; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32_offset:
anatofuz
parents:
diff changeset
73 ; SICIVI: s_mov_b32 m0
anatofuz
parents:
diff changeset
74 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
75
anatofuz
parents:
diff changeset
76 ; EG: LDS_ADD_RET *
anatofuz
parents:
diff changeset
77 ; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
anatofuz
parents:
diff changeset
78 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
79 define amdgpu_kernel void @lds_atomic_add_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
80 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
anatofuz
parents:
diff changeset
81 %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
anatofuz
parents:
diff changeset
82 store i32 %result, i32 addrspace(1)* %out, align 4
anatofuz
parents:
diff changeset
83 ret void
anatofuz
parents:
diff changeset
84 }
anatofuz
parents:
diff changeset
85
anatofuz
parents:
diff changeset
86 ; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32_bad_si_offset:
anatofuz
parents:
diff changeset
87 ; SICIVI: s_mov_b32 m0
anatofuz
parents:
diff changeset
88 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
89
anatofuz
parents:
diff changeset
90 ; EG: LDS_ADD_RET *
anatofuz
parents:
diff changeset
91 ; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
anatofuz
parents:
diff changeset
92 ; CIVI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
anatofuz
parents:
diff changeset
93 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
94 define amdgpu_kernel void @lds_atomic_add_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
anatofuz
parents:
diff changeset
95 %sub = sub i32 %a, %b
anatofuz
parents:
diff changeset
96 %add = add i32 %sub, 4
anatofuz
parents:
diff changeset
97 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add
anatofuz
parents:
diff changeset
98 %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
anatofuz
parents:
diff changeset
99 store i32 %result, i32 addrspace(1)* %out, align 4
anatofuz
parents:
diff changeset
100 ret void
anatofuz
parents:
diff changeset
101 }
anatofuz
parents:
diff changeset
102
anatofuz
parents:
diff changeset
103 ; FUNC-LABEL: {{^}}lds_atomic_add1_ret_i32:
anatofuz
parents:
diff changeset
104 ; EG: LDS_ADD_RET *
anatofuz
parents:
diff changeset
105
anatofuz
parents:
diff changeset
106 ; SICIVI-DAG: s_mov_b32 m0
anatofuz
parents:
diff changeset
107 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
108
anatofuz
parents:
diff changeset
109 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
anatofuz
parents:
diff changeset
110 ; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[ONE]]
anatofuz
parents:
diff changeset
111 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
112 define amdgpu_kernel void @lds_atomic_add1_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
113 %result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst
anatofuz
parents:
diff changeset
114 store i32 %result, i32 addrspace(1)* %out, align 4
anatofuz
parents:
diff changeset
115 ret void
anatofuz
parents:
diff changeset
116 }
anatofuz
parents:
diff changeset
117
anatofuz
parents:
diff changeset
118 ; FUNC-LABEL: {{^}}lds_atomic_add1_ret_i32_offset:
anatofuz
parents:
diff changeset
119 ; EG: LDS_ADD_RET *
anatofuz
parents:
diff changeset
120
anatofuz
parents:
diff changeset
121 ; SICIVI-DAG: s_mov_b32 m0
anatofuz
parents:
diff changeset
122 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
123
anatofuz
parents:
diff changeset
124 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
anatofuz
parents:
diff changeset
125 ; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[ONE]] offset:16
anatofuz
parents:
diff changeset
126 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
127 define amdgpu_kernel void @lds_atomic_add1_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
128 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
anatofuz
parents:
diff changeset
129 %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
anatofuz
parents:
diff changeset
130 store i32 %result, i32 addrspace(1)* %out, align 4
anatofuz
parents:
diff changeset
131 ret void
anatofuz
parents:
diff changeset
132 }
anatofuz
parents:
diff changeset
133
anatofuz
parents:
diff changeset
134 ; FUNC-LABEL: {{^}}lds_atomic_add1_ret_i32_bad_si_offset:
anatofuz
parents:
diff changeset
135 ; SICIVI: s_mov_b32 m0
anatofuz
parents:
diff changeset
136 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
137
anatofuz
parents:
diff changeset
138 ; EG: LDS_ADD_RET *
anatofuz
parents:
diff changeset
139 ; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
anatofuz
parents:
diff changeset
140 ; CIVI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
anatofuz
parents:
diff changeset
141 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
142 define amdgpu_kernel void @lds_atomic_add1_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
anatofuz
parents:
diff changeset
143 %sub = sub i32 %a, %b
anatofuz
parents:
diff changeset
144 %add = add i32 %sub, 4
anatofuz
parents:
diff changeset
145 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add
anatofuz
parents:
diff changeset
146 %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
anatofuz
parents:
diff changeset
147 store i32 %result, i32 addrspace(1)* %out, align 4
anatofuz
parents:
diff changeset
148 ret void
anatofuz
parents:
diff changeset
149 }
anatofuz
parents:
diff changeset
150
anatofuz
parents:
diff changeset
151 ; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i32:
anatofuz
parents:
diff changeset
152 ; EG: LDS_SUB_RET *
anatofuz
parents:
diff changeset
153
anatofuz
parents:
diff changeset
154 ; SICIVI: s_mov_b32 m0
anatofuz
parents:
diff changeset
155 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
156
anatofuz
parents:
diff changeset
157 ; GCN: ds_sub_rtn_u32
anatofuz
parents:
diff changeset
158 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
159 define amdgpu_kernel void @lds_atomic_sub_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
160 %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 4 seq_cst
anatofuz
parents:
diff changeset
161 store i32 %result, i32 addrspace(1)* %out, align 4
anatofuz
parents:
diff changeset
162 ret void
anatofuz
parents:
diff changeset
163 }
anatofuz
parents:
diff changeset
164
anatofuz
parents:
diff changeset
165 ; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i32_offset:
anatofuz
parents:
diff changeset
166 ; EG: LDS_SUB_RET *
anatofuz
parents:
diff changeset
167
anatofuz
parents:
diff changeset
168 ; SICIVI: s_mov_b32 m0
anatofuz
parents:
diff changeset
169 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
170
anatofuz
parents:
diff changeset
171 ; GCN: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
anatofuz
parents:
diff changeset
172 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
173 define amdgpu_kernel void @lds_atomic_sub_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
174 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
anatofuz
parents:
diff changeset
175 %result = atomicrmw sub i32 addrspace(3)* %gep, i32 4 seq_cst
anatofuz
parents:
diff changeset
176 store i32 %result, i32 addrspace(1)* %out, align 4
anatofuz
parents:
diff changeset
177 ret void
anatofuz
parents:
diff changeset
178 }
anatofuz
parents:
diff changeset
179
anatofuz
parents:
diff changeset
180 ; FUNC-LABEL: {{^}}lds_atomic_sub1_ret_i32:
anatofuz
parents:
diff changeset
181 ; EG: LDS_SUB_RET *
anatofuz
parents:
diff changeset
182
anatofuz
parents:
diff changeset
183 ; SICIVI-DAG: s_mov_b32 m0
anatofuz
parents:
diff changeset
184 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
185
anatofuz
parents:
diff changeset
186 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
anatofuz
parents:
diff changeset
187 ; GCN: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[ONE]]
anatofuz
parents:
diff changeset
188 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
189 define amdgpu_kernel void @lds_atomic_sub1_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
190 %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst
anatofuz
parents:
diff changeset
191 store i32 %result, i32 addrspace(1)* %out, align 4
anatofuz
parents:
diff changeset
192 ret void
anatofuz
parents:
diff changeset
193 }
anatofuz
parents:
diff changeset
194
anatofuz
parents:
diff changeset
195 ; FUNC-LABEL: {{^}}lds_atomic_sub1_ret_i32_offset:
anatofuz
parents:
diff changeset
196 ; EG: LDS_SUB_RET *
anatofuz
parents:
diff changeset
197
anatofuz
parents:
diff changeset
198 ; SICIVI-DAG: s_mov_b32 m0
anatofuz
parents:
diff changeset
199 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
200
anatofuz
parents:
diff changeset
201 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
anatofuz
parents:
diff changeset
202 ; GCN: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[ONE]] offset:16
anatofuz
parents:
diff changeset
203 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
204 define amdgpu_kernel void @lds_atomic_sub1_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
205 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
anatofuz
parents:
diff changeset
206 %result = atomicrmw sub i32 addrspace(3)* %gep, i32 1 seq_cst
anatofuz
parents:
diff changeset
207 store i32 %result, i32 addrspace(1)* %out, align 4
anatofuz
parents:
diff changeset
208 ret void
anatofuz
parents:
diff changeset
209 }
anatofuz
parents:
diff changeset
210
anatofuz
parents:
diff changeset
211 ; FUNC-LABEL: {{^}}lds_atomic_and_ret_i32:
anatofuz
parents:
diff changeset
212 ; EG: LDS_AND_RET *
anatofuz
parents:
diff changeset
213
anatofuz
parents:
diff changeset
214 ; SICIVI-DAG: s_mov_b32 m0
anatofuz
parents:
diff changeset
215 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
216
anatofuz
parents:
diff changeset
217 ; GCN: ds_and_rtn_b32
anatofuz
parents:
diff changeset
218 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
219 define amdgpu_kernel void @lds_atomic_and_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
220 %result = atomicrmw and i32 addrspace(3)* %ptr, i32 4 seq_cst
anatofuz
parents:
diff changeset
221 store i32 %result, i32 addrspace(1)* %out, align 4
anatofuz
parents:
diff changeset
222 ret void
anatofuz
parents:
diff changeset
223 }
anatofuz
parents:
diff changeset
224
anatofuz
parents:
diff changeset
225 ; FUNC-LABEL: {{^}}lds_atomic_and_ret_i32_offset:
anatofuz
parents:
diff changeset
226 ; SICIVI: s_mov_b32 m0
anatofuz
parents:
diff changeset
227 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
228
anatofuz
parents:
diff changeset
229 ; EG: LDS_AND_RET *
anatofuz
parents:
diff changeset
230 ; GCN: ds_and_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
anatofuz
parents:
diff changeset
231 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
232 define amdgpu_kernel void @lds_atomic_and_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
233 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
anatofuz
parents:
diff changeset
234 %result = atomicrmw and i32 addrspace(3)* %gep, i32 4 seq_cst
anatofuz
parents:
diff changeset
235 store i32 %result, i32 addrspace(1)* %out, align 4
anatofuz
parents:
diff changeset
236 ret void
anatofuz
parents:
diff changeset
237 }
anatofuz
parents:
diff changeset
238
anatofuz
parents:
diff changeset
239 ; FUNC-LABEL: {{^}}lds_atomic_or_ret_i32:
anatofuz
parents:
diff changeset
240 ; SICIVI: s_mov_b32 m0
anatofuz
parents:
diff changeset
241 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
242
anatofuz
parents:
diff changeset
243 ; EG: LDS_OR_RET *
anatofuz
parents:
diff changeset
244 ; GCN: ds_or_rtn_b32
anatofuz
parents:
diff changeset
245 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
246 define amdgpu_kernel void @lds_atomic_or_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
247 %result = atomicrmw or i32 addrspace(3)* %ptr, i32 4 seq_cst
anatofuz
parents:
diff changeset
248 store i32 %result, i32 addrspace(1)* %out, align 4
anatofuz
parents:
diff changeset
249 ret void
anatofuz
parents:
diff changeset
250 }
anatofuz
parents:
diff changeset
251
anatofuz
parents:
diff changeset
252 ; FUNC-LABEL: {{^}}lds_atomic_or_ret_i32_offset:
anatofuz
parents:
diff changeset
253 ; SICIVI: s_mov_b32 m0
anatofuz
parents:
diff changeset
254 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
255
anatofuz
parents:
diff changeset
256 ; EG: LDS_OR_RET *
anatofuz
parents:
diff changeset
257 ; GCN: ds_or_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
anatofuz
parents:
diff changeset
258 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
259 define amdgpu_kernel void @lds_atomic_or_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
260 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
anatofuz
parents:
diff changeset
261 %result = atomicrmw or i32 addrspace(3)* %gep, i32 4 seq_cst
anatofuz
parents:
diff changeset
262 store i32 %result, i32 addrspace(1)* %out, align 4
anatofuz
parents:
diff changeset
263 ret void
anatofuz
parents:
diff changeset
264 }
anatofuz
parents:
diff changeset
265
anatofuz
parents:
diff changeset
266 ; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i32:
anatofuz
parents:
diff changeset
267 ; SICIVI: s_mov_b32 m0
anatofuz
parents:
diff changeset
268 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
269
anatofuz
parents:
diff changeset
270 ; EG: LDS_XOR_RET *
anatofuz
parents:
diff changeset
271 ; GCN: ds_xor_rtn_b32
anatofuz
parents:
diff changeset
272 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
273 define amdgpu_kernel void @lds_atomic_xor_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
274 %result = atomicrmw xor i32 addrspace(3)* %ptr, i32 4 seq_cst
anatofuz
parents:
diff changeset
275 store i32 %result, i32 addrspace(1)* %out, align 4
anatofuz
parents:
diff changeset
276 ret void
anatofuz
parents:
diff changeset
277 }
anatofuz
parents:
diff changeset
278
anatofuz
parents:
diff changeset
279 ; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i32_offset:
anatofuz
parents:
diff changeset
280 ; SICIVI: s_mov_b32 m0
anatofuz
parents:
diff changeset
281 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
282
anatofuz
parents:
diff changeset
283 ; EG: LDS_XOR_RET *
anatofuz
parents:
diff changeset
284 ; GCN: ds_xor_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
anatofuz
parents:
diff changeset
285 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
286 define amdgpu_kernel void @lds_atomic_xor_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
287 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
anatofuz
parents:
diff changeset
288 %result = atomicrmw xor i32 addrspace(3)* %gep, i32 4 seq_cst
anatofuz
parents:
diff changeset
289 store i32 %result, i32 addrspace(1)* %out, align 4
anatofuz
parents:
diff changeset
290 ret void
anatofuz
parents:
diff changeset
291 }
anatofuz
parents:
diff changeset
292
anatofuz
parents:
diff changeset
293 ; FIXME: There is no atomic nand instr
anatofuz
parents:
diff changeset
294 ; XFUNC-LABEL: {{^}}lds_atomic_nand_ret_i32:uction, so we somehow need to expand this.
anatofuz
parents:
diff changeset
295 ; define amdgpu_kernel void @lds_atomic_nand_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
296 ; %result = atomicrmw nand i32 addrspace(3)* %ptr, i32 4 seq_cst
anatofuz
parents:
diff changeset
297 ; store i32 %result, i32 addrspace(1)* %out, align 4
anatofuz
parents:
diff changeset
298 ; ret void
anatofuz
parents:
diff changeset
299 ; }
anatofuz
parents:
diff changeset
300
anatofuz
parents:
diff changeset
301 ; FUNC-LABEL: {{^}}lds_atomic_min_ret_i32:
anatofuz
parents:
diff changeset
302 ; SICIVI: s_mov_b32 m0
anatofuz
parents:
diff changeset
303 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
304
anatofuz
parents:
diff changeset
305 ; EG: LDS_MIN_INT_RET *
anatofuz
parents:
diff changeset
306 ; GCN: ds_min_rtn_i32
anatofuz
parents:
diff changeset
307 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
308 define amdgpu_kernel void @lds_atomic_min_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
309 %result = atomicrmw min i32 addrspace(3)* %ptr, i32 4 seq_cst
anatofuz
parents:
diff changeset
310 store i32 %result, i32 addrspace(1)* %out, align 4
anatofuz
parents:
diff changeset
311 ret void
anatofuz
parents:
diff changeset
312 }
anatofuz
parents:
diff changeset
313
anatofuz
parents:
diff changeset
314 ; FUNC-LABEL: {{^}}lds_atomic_min_ret_i32_offset:
anatofuz
parents:
diff changeset
315 ; SICIVI: s_mov_b32 m0
anatofuz
parents:
diff changeset
316 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
317
anatofuz
parents:
diff changeset
318 ; EG: LDS_MIN_INT_RET *
anatofuz
parents:
diff changeset
319 ; GCN: ds_min_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
anatofuz
parents:
diff changeset
320 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
321 define amdgpu_kernel void @lds_atomic_min_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
322 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
anatofuz
parents:
diff changeset
323 %result = atomicrmw min i32 addrspace(3)* %gep, i32 4 seq_cst
anatofuz
parents:
diff changeset
324 store i32 %result, i32 addrspace(1)* %out, align 4
anatofuz
parents:
diff changeset
325 ret void
anatofuz
parents:
diff changeset
326 }
anatofuz
parents:
diff changeset
327
anatofuz
parents:
diff changeset
328 ; FUNC-LABEL: {{^}}lds_atomic_max_ret_i32:
anatofuz
parents:
diff changeset
329 ; SICIVI: s_mov_b32 m0
anatofuz
parents:
diff changeset
330 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
331
anatofuz
parents:
diff changeset
332 ; EG: LDS_MAX_INT_RET *
anatofuz
parents:
diff changeset
333 ; GCN: ds_max_rtn_i32
anatofuz
parents:
diff changeset
334 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
335 define amdgpu_kernel void @lds_atomic_max_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
336 %result = atomicrmw max i32 addrspace(3)* %ptr, i32 4 seq_cst
anatofuz
parents:
diff changeset
337 store i32 %result, i32 addrspace(1)* %out, align 4
anatofuz
parents:
diff changeset
338 ret void
anatofuz
parents:
diff changeset
339 }
anatofuz
parents:
diff changeset
340
anatofuz
parents:
diff changeset
341 ; FUNC-LABEL: {{^}}lds_atomic_max_ret_i32_offset:
anatofuz
parents:
diff changeset
342 ; SICIVI: s_mov_b32 m0
anatofuz
parents:
diff changeset
343 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
344
anatofuz
parents:
diff changeset
345 ; EG: LDS_MAX_INT_RET *
anatofuz
parents:
diff changeset
346 ; GCN: ds_max_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
anatofuz
parents:
diff changeset
347 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
348 define amdgpu_kernel void @lds_atomic_max_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
349 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
anatofuz
parents:
diff changeset
350 %result = atomicrmw max i32 addrspace(3)* %gep, i32 4 seq_cst
anatofuz
parents:
diff changeset
351 store i32 %result, i32 addrspace(1)* %out, align 4
anatofuz
parents:
diff changeset
352 ret void
anatofuz
parents:
diff changeset
353 }
anatofuz
parents:
diff changeset
354
anatofuz
parents:
diff changeset
355 ; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i32:
anatofuz
parents:
diff changeset
356 ; SICIVI: s_mov_b32 m0
anatofuz
parents:
diff changeset
357 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
358
anatofuz
parents:
diff changeset
359 ; EG: LDS_MIN_UINT_RET *
anatofuz
parents:
diff changeset
360 ; GCN: ds_min_rtn_u32
anatofuz
parents:
diff changeset
361 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
362 define amdgpu_kernel void @lds_atomic_umin_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
363 %result = atomicrmw umin i32 addrspace(3)* %ptr, i32 4 seq_cst
anatofuz
parents:
diff changeset
364 store i32 %result, i32 addrspace(1)* %out, align 4
anatofuz
parents:
diff changeset
365 ret void
anatofuz
parents:
diff changeset
366 }
anatofuz
parents:
diff changeset
367
anatofuz
parents:
diff changeset
368 ; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i32_offset:
anatofuz
parents:
diff changeset
369 ; SICIVI: s_mov_b32 m0
anatofuz
parents:
diff changeset
370 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
371
anatofuz
parents:
diff changeset
372 ; EG: LDS_MIN_UINT_RET *
anatofuz
parents:
diff changeset
373 ; GCN: ds_min_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
anatofuz
parents:
diff changeset
374 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
375 define amdgpu_kernel void @lds_atomic_umin_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
376 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
anatofuz
parents:
diff changeset
377 %result = atomicrmw umin i32 addrspace(3)* %gep, i32 4 seq_cst
anatofuz
parents:
diff changeset
378 store i32 %result, i32 addrspace(1)* %out, align 4
anatofuz
parents:
diff changeset
379 ret void
anatofuz
parents:
diff changeset
380 }
anatofuz
parents:
diff changeset
381
anatofuz
parents:
diff changeset
382 ; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i32:
anatofuz
parents:
diff changeset
383 ; SICIVI: s_mov_b32 m0
anatofuz
parents:
diff changeset
384 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
385
anatofuz
parents:
diff changeset
386 ; EG: LDS_MAX_UINT_RET *
anatofuz
parents:
diff changeset
387 ; GCN: ds_max_rtn_u32
anatofuz
parents:
diff changeset
388 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
389 define amdgpu_kernel void @lds_atomic_umax_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
390 %result = atomicrmw umax i32 addrspace(3)* %ptr, i32 4 seq_cst
anatofuz
parents:
diff changeset
391 store i32 %result, i32 addrspace(1)* %out, align 4
anatofuz
parents:
diff changeset
392 ret void
anatofuz
parents:
diff changeset
393 }
anatofuz
parents:
diff changeset
394
anatofuz
parents:
diff changeset
395 ; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i32_offset:
anatofuz
parents:
diff changeset
396 ; SICIVI: s_mov_b32 m0
anatofuz
parents:
diff changeset
397 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
398
anatofuz
parents:
diff changeset
399 ; EG: LDS_MAX_UINT_RET *
anatofuz
parents:
diff changeset
400 ; GCN: ds_max_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
anatofuz
parents:
diff changeset
401 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
402 define amdgpu_kernel void @lds_atomic_umax_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
403 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
anatofuz
parents:
diff changeset
404 %result = atomicrmw umax i32 addrspace(3)* %gep, i32 4 seq_cst
anatofuz
parents:
diff changeset
405 store i32 %result, i32 addrspace(1)* %out, align 4
anatofuz
parents:
diff changeset
406 ret void
anatofuz
parents:
diff changeset
407 }
anatofuz
parents:
diff changeset
408
anatofuz
parents:
diff changeset
409 ; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i32:
anatofuz
parents:
diff changeset
410 ; SICIVI-DAG: s_mov_b32 m0
anatofuz
parents:
diff changeset
411 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
412
anatofuz
parents:
diff changeset
413 ; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]],
anatofuz
parents:
diff changeset
414 ; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
anatofuz
parents:
diff changeset
415 ; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
anatofuz
parents:
diff changeset
416 ; GCN: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]]
anatofuz
parents:
diff changeset
417 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
418 define amdgpu_kernel void @lds_atomic_xchg_noret_i32(i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
419 %result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 seq_cst
anatofuz
parents:
diff changeset
420 ret void
anatofuz
parents:
diff changeset
421 }
anatofuz
parents:
diff changeset
422
anatofuz
parents:
diff changeset
423 ; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i32_offset:
anatofuz
parents:
diff changeset
424 ; SICIVI: s_mov_b32 m0
anatofuz
parents:
diff changeset
425 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
426
anatofuz
parents:
diff changeset
427 ; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
anatofuz
parents:
diff changeset
428 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
429 define amdgpu_kernel void @lds_atomic_xchg_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
430 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
anatofuz
parents:
diff changeset
431 %result = atomicrmw xchg i32 addrspace(3)* %gep, i32 4 seq_cst
anatofuz
parents:
diff changeset
432 ret void
anatofuz
parents:
diff changeset
433 }
anatofuz
parents:
diff changeset
434
anatofuz
parents:
diff changeset
435 ; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32:
anatofuz
parents:
diff changeset
436 ; SICIVI-DAG: s_mov_b32 m0
anatofuz
parents:
diff changeset
437 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
438
anatofuz
parents:
diff changeset
439 ; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]],
anatofuz
parents:
diff changeset
440 ; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
anatofuz
parents:
diff changeset
441 ; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
anatofuz
parents:
diff changeset
442 ; GCN: ds_add_u32 [[VPTR]], [[DATA]]
anatofuz
parents:
diff changeset
443 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
444 define amdgpu_kernel void @lds_atomic_add_noret_i32(i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
445 %result = atomicrmw add i32 addrspace(3)* %ptr, i32 4 seq_cst
anatofuz
parents:
diff changeset
446 ret void
anatofuz
parents:
diff changeset
447 }
anatofuz
parents:
diff changeset
448
anatofuz
parents:
diff changeset
449 ; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_offset:
anatofuz
parents:
diff changeset
450 ; SICIVI: s_mov_b32 m0
anatofuz
parents:
diff changeset
451 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
452
anatofuz
parents:
diff changeset
453 ; GCN: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
anatofuz
parents:
diff changeset
454 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
455 define amdgpu_kernel void @lds_atomic_add_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
456 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
anatofuz
parents:
diff changeset
457 %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
anatofuz
parents:
diff changeset
458 ret void
anatofuz
parents:
diff changeset
459 }
anatofuz
parents:
diff changeset
460
anatofuz
parents:
diff changeset
461 ; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_bad_si_offset
anatofuz
parents:
diff changeset
462 ; SICIVI: s_mov_b32 m0
anatofuz
parents:
diff changeset
463 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
464
anatofuz
parents:
diff changeset
465 ; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}}
anatofuz
parents:
diff changeset
466 ; CIVI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
anatofuz
parents:
diff changeset
467 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
468 define amdgpu_kernel void @lds_atomic_add_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
anatofuz
parents:
diff changeset
469 %sub = sub i32 %a, %b
anatofuz
parents:
diff changeset
470 %add = add i32 %sub, 4
anatofuz
parents:
diff changeset
471 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add
anatofuz
parents:
diff changeset
472 %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
anatofuz
parents:
diff changeset
473 ret void
anatofuz
parents:
diff changeset
474 }
anatofuz
parents:
diff changeset
475
anatofuz
parents:
diff changeset
476 ; FUNC-LABEL: {{^}}lds_atomic_add1_noret_i32:
anatofuz
parents:
diff changeset
477 ; SICIVI-DAG: s_mov_b32 m0
anatofuz
parents:
diff changeset
478 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
479
anatofuz
parents:
diff changeset
480 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
anatofuz
parents:
diff changeset
481 ; GCN: ds_add_u32 v{{[0-9]+}}, [[ONE]]
anatofuz
parents:
diff changeset
482 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
483 define amdgpu_kernel void @lds_atomic_add1_noret_i32(i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
484 %result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst
anatofuz
parents:
diff changeset
485 ret void
anatofuz
parents:
diff changeset
486 }
anatofuz
parents:
diff changeset
487
anatofuz
parents:
diff changeset
488 ; FUNC-LABEL: {{^}}lds_atomic_add1_noret_i32_offset:
anatofuz
parents:
diff changeset
489 ; SICIVI-DAG: s_mov_b32 m0
anatofuz
parents:
diff changeset
490 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
491
anatofuz
parents:
diff changeset
492 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
anatofuz
parents:
diff changeset
493 ; GCN: ds_add_u32 v{{[0-9]+}}, [[ONE]] offset:16
anatofuz
parents:
diff changeset
494 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
495 define amdgpu_kernel void @lds_atomic_add1_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
496 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
anatofuz
parents:
diff changeset
497 %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
anatofuz
parents:
diff changeset
498 ret void
anatofuz
parents:
diff changeset
499 }
anatofuz
parents:
diff changeset
500
anatofuz
parents:
diff changeset
501 ; FUNC-LABEL: {{^}}lds_atomic_add1_noret_i32_bad_si_offset:
anatofuz
parents:
diff changeset
502 ; SICIVI: s_mov_b32 m0
anatofuz
parents:
diff changeset
503 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
504
anatofuz
parents:
diff changeset
505 ; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}}
anatofuz
parents:
diff changeset
506 ; CIVI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
anatofuz
parents:
diff changeset
507 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
508 define amdgpu_kernel void @lds_atomic_add1_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
anatofuz
parents:
diff changeset
509 %sub = sub i32 %a, %b
anatofuz
parents:
diff changeset
510 %add = add i32 %sub, 4
anatofuz
parents:
diff changeset
511 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add
anatofuz
parents:
diff changeset
512 %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
anatofuz
parents:
diff changeset
513 ret void
anatofuz
parents:
diff changeset
514 }
anatofuz
parents:
diff changeset
515
anatofuz
parents:
diff changeset
516 ; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i32:
anatofuz
parents:
diff changeset
517 ; SICIVI: s_mov_b32 m0
anatofuz
parents:
diff changeset
518 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
519
anatofuz
parents:
diff changeset
520 ; GCN: ds_sub_u32
anatofuz
parents:
diff changeset
521 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
522 define amdgpu_kernel void @lds_atomic_sub_noret_i32(i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
523 %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 4 seq_cst
anatofuz
parents:
diff changeset
524 ret void
anatofuz
parents:
diff changeset
525 }
anatofuz
parents:
diff changeset
526
anatofuz
parents:
diff changeset
527 ; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i32_offset:
anatofuz
parents:
diff changeset
528 ; SICIVI: s_mov_b32 m0
anatofuz
parents:
diff changeset
529 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
530
anatofuz
parents:
diff changeset
531 ; GCN: ds_sub_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
anatofuz
parents:
diff changeset
532 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
533 define amdgpu_kernel void @lds_atomic_sub_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
534 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
anatofuz
parents:
diff changeset
535 %result = atomicrmw sub i32 addrspace(3)* %gep, i32 4 seq_cst
anatofuz
parents:
diff changeset
536 ret void
anatofuz
parents:
diff changeset
537 }
anatofuz
parents:
diff changeset
538
anatofuz
parents:
diff changeset
539 ; FUNC-LABEL: {{^}}lds_atomic_sub1_noret_i32:
anatofuz
parents:
diff changeset
540 ; SICIVI-DAG: s_mov_b32 m0
anatofuz
parents:
diff changeset
541 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
542
anatofuz
parents:
diff changeset
543 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
anatofuz
parents:
diff changeset
544 ; GCN: ds_sub_u32 v{{[0-9]+}}, [[ONE]]
anatofuz
parents:
diff changeset
545 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
546 define amdgpu_kernel void @lds_atomic_sub1_noret_i32(i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
547 %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst
anatofuz
parents:
diff changeset
548 ret void
anatofuz
parents:
diff changeset
549 }
anatofuz
parents:
diff changeset
550
anatofuz
parents:
diff changeset
551 ; FUNC-LABEL: {{^}}lds_atomic_sub1_noret_i32_offset:
anatofuz
parents:
diff changeset
552 ; SICIVI-DAG: s_mov_b32 m0
anatofuz
parents:
diff changeset
553 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
554
anatofuz
parents:
diff changeset
555 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
anatofuz
parents:
diff changeset
556 ; GCN: ds_sub_u32 v{{[0-9]+}}, [[ONE]] offset:16
anatofuz
parents:
diff changeset
557 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
558 define amdgpu_kernel void @lds_atomic_sub1_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
559 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
anatofuz
parents:
diff changeset
560 %result = atomicrmw sub i32 addrspace(3)* %gep, i32 1 seq_cst
anatofuz
parents:
diff changeset
561 ret void
anatofuz
parents:
diff changeset
562 }
anatofuz
parents:
diff changeset
563
anatofuz
parents:
diff changeset
564 ; FUNC-LABEL: {{^}}lds_atomic_and_noret_i32:
anatofuz
parents:
diff changeset
565 ; SICIVI: s_mov_b32 m0
anatofuz
parents:
diff changeset
566 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
567
anatofuz
parents:
diff changeset
568 ; GCN: ds_and_b32
anatofuz
parents:
diff changeset
569 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
570 define amdgpu_kernel void @lds_atomic_and_noret_i32(i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
571 %result = atomicrmw and i32 addrspace(3)* %ptr, i32 4 seq_cst
anatofuz
parents:
diff changeset
572 ret void
anatofuz
parents:
diff changeset
573 }
anatofuz
parents:
diff changeset
574
anatofuz
parents:
diff changeset
575 ; FUNC-LABEL: {{^}}lds_atomic_and_noret_i32_offset:
anatofuz
parents:
diff changeset
576 ; SICIVI: s_mov_b32 m0
anatofuz
parents:
diff changeset
577 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
578
anatofuz
parents:
diff changeset
579 ; GCN: ds_and_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
anatofuz
parents:
diff changeset
580 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
581 define amdgpu_kernel void @lds_atomic_and_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
582 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
anatofuz
parents:
diff changeset
583 %result = atomicrmw and i32 addrspace(3)* %gep, i32 4 seq_cst
anatofuz
parents:
diff changeset
584 ret void
anatofuz
parents:
diff changeset
585 }
anatofuz
parents:
diff changeset
586
anatofuz
parents:
diff changeset
587 ; FUNC-LABEL: {{^}}lds_atomic_or_noret_i32:
anatofuz
parents:
diff changeset
588 ; SICIVI: s_mov_b32 m0
anatofuz
parents:
diff changeset
589 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
590
anatofuz
parents:
diff changeset
591 ; GCN: ds_or_b32
anatofuz
parents:
diff changeset
592 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
593 define amdgpu_kernel void @lds_atomic_or_noret_i32(i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
594 %result = atomicrmw or i32 addrspace(3)* %ptr, i32 4 seq_cst
anatofuz
parents:
diff changeset
595 ret void
anatofuz
parents:
diff changeset
596 }
anatofuz
parents:
diff changeset
597
anatofuz
parents:
diff changeset
598 ; FUNC-LABEL: {{^}}lds_atomic_or_noret_i32_offset:
anatofuz
parents:
diff changeset
599 ; SICIVI: s_mov_b32 m0
anatofuz
parents:
diff changeset
600 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
601
anatofuz
parents:
diff changeset
602 ; GCN: ds_or_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
anatofuz
parents:
diff changeset
603 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
604 define amdgpu_kernel void @lds_atomic_or_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
605 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
anatofuz
parents:
diff changeset
606 %result = atomicrmw or i32 addrspace(3)* %gep, i32 4 seq_cst
anatofuz
parents:
diff changeset
607 ret void
anatofuz
parents:
diff changeset
608 }
anatofuz
parents:
diff changeset
609
anatofuz
parents:
diff changeset
610 ; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i32:
anatofuz
parents:
diff changeset
611 ; SICIVI: s_mov_b32 m0
anatofuz
parents:
diff changeset
612 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
613
anatofuz
parents:
diff changeset
614 ; GCN: ds_xor_b32
anatofuz
parents:
diff changeset
615 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
616 define amdgpu_kernel void @lds_atomic_xor_noret_i32(i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
617 %result = atomicrmw xor i32 addrspace(3)* %ptr, i32 4 seq_cst
anatofuz
parents:
diff changeset
618 ret void
anatofuz
parents:
diff changeset
619 }
anatofuz
parents:
diff changeset
620
anatofuz
parents:
diff changeset
621 ; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i32_offset:
anatofuz
parents:
diff changeset
622 ; SICIVI: s_mov_b32 m0
anatofuz
parents:
diff changeset
623 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
624
anatofuz
parents:
diff changeset
625 ; GCN: ds_xor_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
anatofuz
parents:
diff changeset
626 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
627 define amdgpu_kernel void @lds_atomic_xor_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
628 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
anatofuz
parents:
diff changeset
629 %result = atomicrmw xor i32 addrspace(3)* %gep, i32 4 seq_cst
anatofuz
parents:
diff changeset
630 ret void
anatofuz
parents:
diff changeset
631 }
anatofuz
parents:
diff changeset
632
anatofuz
parents:
diff changeset
633 ; FIXME: There is no atomic nand instr
anatofuz
parents:
diff changeset
634 ; XFUNC-LABEL: {{^}}lds_atomic_nand_noret_i32:uction, so we somehow need to expand this.
anatofuz
parents:
diff changeset
635 ; define amdgpu_kernel void @lds_atomic_nand_noret_i32(i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
636 ; %result = atomicrmw nand i32 addrspace(3)* %ptr, i32 4 seq_cst
anatofuz
parents:
diff changeset
637 ; ret void
anatofuz
parents:
diff changeset
638 ; }
anatofuz
parents:
diff changeset
639
anatofuz
parents:
diff changeset
640 ; FUNC-LABEL: {{^}}lds_atomic_min_noret_i32:
anatofuz
parents:
diff changeset
641 ; SICIVI: s_mov_b32 m0
anatofuz
parents:
diff changeset
642 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
643
anatofuz
parents:
diff changeset
644 ; GCN: ds_min_i32
anatofuz
parents:
diff changeset
645 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
646 define amdgpu_kernel void @lds_atomic_min_noret_i32(i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
647 %result = atomicrmw min i32 addrspace(3)* %ptr, i32 4 seq_cst
anatofuz
parents:
diff changeset
648 ret void
anatofuz
parents:
diff changeset
649 }
anatofuz
parents:
diff changeset
650
anatofuz
parents:
diff changeset
651 ; FUNC-LABEL: {{^}}lds_atomic_min_noret_i32_offset:
anatofuz
parents:
diff changeset
652 ; SICIVI: s_mov_b32 m0
anatofuz
parents:
diff changeset
653 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
654
anatofuz
parents:
diff changeset
655 ; GCN: ds_min_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
anatofuz
parents:
diff changeset
656 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
657 define amdgpu_kernel void @lds_atomic_min_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
658 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
anatofuz
parents:
diff changeset
659 %result = atomicrmw min i32 addrspace(3)* %gep, i32 4 seq_cst
anatofuz
parents:
diff changeset
660 ret void
anatofuz
parents:
diff changeset
661 }
anatofuz
parents:
diff changeset
662
anatofuz
parents:
diff changeset
663 ; FUNC-LABEL: {{^}}lds_atomic_max_noret_i32:
anatofuz
parents:
diff changeset
664 ; SICIVI: s_mov_b32 m0
anatofuz
parents:
diff changeset
665 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
666
anatofuz
parents:
diff changeset
667 ; GCN: ds_max_i32
anatofuz
parents:
diff changeset
668 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
669 define amdgpu_kernel void @lds_atomic_max_noret_i32(i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
670 %result = atomicrmw max i32 addrspace(3)* %ptr, i32 4 seq_cst
anatofuz
parents:
diff changeset
671 ret void
anatofuz
parents:
diff changeset
672 }
anatofuz
parents:
diff changeset
673
anatofuz
parents:
diff changeset
674 ; FUNC-LABEL: {{^}}lds_atomic_max_noret_i32_offset:
anatofuz
parents:
diff changeset
675 ; SICIVI: s_mov_b32 m0
anatofuz
parents:
diff changeset
676 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
677
anatofuz
parents:
diff changeset
678 ; GCN: ds_max_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
anatofuz
parents:
diff changeset
679 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
680 define amdgpu_kernel void @lds_atomic_max_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
681 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
anatofuz
parents:
diff changeset
682 %result = atomicrmw max i32 addrspace(3)* %gep, i32 4 seq_cst
anatofuz
parents:
diff changeset
683 ret void
anatofuz
parents:
diff changeset
684 }
anatofuz
parents:
diff changeset
685
anatofuz
parents:
diff changeset
686 ; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i32:
anatofuz
parents:
diff changeset
687 ; SICIVI: s_mov_b32 m0
anatofuz
parents:
diff changeset
688 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
689
anatofuz
parents:
diff changeset
690 ; GCN: ds_min_u32
anatofuz
parents:
diff changeset
691 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
692 define amdgpu_kernel void @lds_atomic_umin_noret_i32(i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
693 %result = atomicrmw umin i32 addrspace(3)* %ptr, i32 4 seq_cst
anatofuz
parents:
diff changeset
694 ret void
anatofuz
parents:
diff changeset
695 }
anatofuz
parents:
diff changeset
696
anatofuz
parents:
diff changeset
697 ; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i32_offset:
anatofuz
parents:
diff changeset
698 ; SICIVI: s_mov_b32 m0
anatofuz
parents:
diff changeset
699 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
700
anatofuz
parents:
diff changeset
701 ; GCN: ds_min_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
anatofuz
parents:
diff changeset
702 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
703 define amdgpu_kernel void @lds_atomic_umin_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
704 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
anatofuz
parents:
diff changeset
705 %result = atomicrmw umin i32 addrspace(3)* %gep, i32 4 seq_cst
anatofuz
parents:
diff changeset
706 ret void
anatofuz
parents:
diff changeset
707 }
anatofuz
parents:
diff changeset
708
anatofuz
parents:
diff changeset
709 ; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i32:
anatofuz
parents:
diff changeset
710 ; SICIVI: s_mov_b32 m0
anatofuz
parents:
diff changeset
711 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
712
anatofuz
parents:
diff changeset
713 ; GCN: ds_max_u32
anatofuz
parents:
diff changeset
714 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
715 define amdgpu_kernel void @lds_atomic_umax_noret_i32(i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
716 %result = atomicrmw umax i32 addrspace(3)* %ptr, i32 4 seq_cst
anatofuz
parents:
diff changeset
717 ret void
anatofuz
parents:
diff changeset
718 }
anatofuz
parents:
diff changeset
719
anatofuz
parents:
diff changeset
720 ; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i32_offset:
anatofuz
parents:
diff changeset
721 ; SICIVI: s_mov_b32 m0
anatofuz
parents:
diff changeset
722 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
723
anatofuz
parents:
diff changeset
724 ; GCN: ds_max_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
anatofuz
parents:
diff changeset
725 ; GCN: s_endpgm
anatofuz
parents:
diff changeset
726 define amdgpu_kernel void @lds_atomic_umax_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
727 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
anatofuz
parents:
diff changeset
728 %result = atomicrmw umax i32 addrspace(3)* %gep, i32 4 seq_cst
anatofuz
parents:
diff changeset
729 ret void
anatofuz
parents:
diff changeset
730 }