121
|
1 ; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX8 %s
|
|
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX8 %s
|
|
3 ; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX9 %s
|
|
4 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX9 %s
|
|
5
|
|
6 declare i32 @llvm.amdgcn.workitem.id.x()
|
|
7
|
|
8 ; GCN-LABEL: {{^}}system_unordered
|
|
9 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
|
10 ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
11 define amdgpu_kernel void @system_unordered(
|
|
12 i32 %in, i32 addrspace(4)* %out) {
|
|
13 entry:
|
|
14 store atomic i32 %in, i32 addrspace(4)* %out unordered, align 4
|
|
15 ret void
|
|
16 }
|
|
17
|
|
18 ; GCN-LABEL: {{^}}system_monotonic
|
|
19 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
|
20 ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
21 define amdgpu_kernel void @system_monotonic(
|
|
22 i32 %in, i32 addrspace(4)* %out) {
|
|
23 entry:
|
|
24 store atomic i32 %in, i32 addrspace(4)* %out monotonic, align 4
|
|
25 ret void
|
|
26 }
|
|
27
|
|
28 ; GCN-LABEL: {{^}}system_release
|
|
29 ; GCN: s_waitcnt vmcnt(0){{$}}
|
|
30 ; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
31 define amdgpu_kernel void @system_release(
|
|
32 i32 %in, i32 addrspace(4)* %out) {
|
|
33 entry:
|
|
34 store atomic i32 %in, i32 addrspace(4)* %out release, align 4
|
|
35 ret void
|
|
36 }
|
|
37
|
|
38 ; GCN-LABEL: {{^}}system_seq_cst
|
|
39 ; GCN: s_waitcnt vmcnt(0){{$}}
|
|
40 ; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
41 define amdgpu_kernel void @system_seq_cst(
|
|
42 i32 %in, i32 addrspace(4)* %out) {
|
|
43 entry:
|
|
44 store atomic i32 %in, i32 addrspace(4)* %out seq_cst, align 4
|
|
45 ret void
|
|
46 }
|
|
47
|
|
48 ; GCN-LABEL: {{^}}singlethread_unordered
|
|
49 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
|
50 ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
51 define amdgpu_kernel void @singlethread_unordered(
|
|
52 i32 %in, i32 addrspace(4)* %out) {
|
|
53 entry:
|
|
54 store atomic i32 %in, i32 addrspace(4)* %out syncscope("singlethread") unordered, align 4
|
|
55 ret void
|
|
56 }
|
|
57
|
|
58 ; GCN-LABEL: {{^}}singlethread_monotonic
|
|
59 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
|
60 ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
61 define amdgpu_kernel void @singlethread_monotonic(
|
|
62 i32 %in, i32 addrspace(4)* %out) {
|
|
63 entry:
|
|
64 store atomic i32 %in, i32 addrspace(4)* %out syncscope("singlethread") monotonic, align 4
|
|
65 ret void
|
|
66 }
|
|
67
|
|
68 ; GCN-LABEL: {{^}}singlethread_release
|
|
69 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
|
70 ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
71 define amdgpu_kernel void @singlethread_release(
|
|
72 i32 %in, i32 addrspace(4)* %out) {
|
|
73 entry:
|
|
74 store atomic i32 %in, i32 addrspace(4)* %out syncscope("singlethread") release, align 4
|
|
75 ret void
|
|
76 }
|
|
77
|
|
78 ; GCN-LABEL: {{^}}singlethread_seq_cst
|
|
79 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
|
80 ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
81 define amdgpu_kernel void @singlethread_seq_cst(
|
|
82 i32 %in, i32 addrspace(4)* %out) {
|
|
83 entry:
|
|
84 store atomic i32 %in, i32 addrspace(4)* %out syncscope("singlethread") seq_cst, align 4
|
|
85 ret void
|
|
86 }
|
|
87
|
|
88 ; GCN-LABEL: {{^}}agent_unordered
|
|
89 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
|
90 ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
91 define amdgpu_kernel void @agent_unordered(
|
|
92 i32 %in, i32 addrspace(4)* %out) {
|
|
93 entry:
|
|
94 store atomic i32 %in, i32 addrspace(4)* %out syncscope("agent") unordered, align 4
|
|
95 ret void
|
|
96 }
|
|
97
|
|
98 ; GCN-LABEL: {{^}}agent_monotonic
|
|
99 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
|
100 ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
101 define amdgpu_kernel void @agent_monotonic(
|
|
102 i32 %in, i32 addrspace(4)* %out) {
|
|
103 entry:
|
|
104 store atomic i32 %in, i32 addrspace(4)* %out syncscope("agent") monotonic, align 4
|
|
105 ret void
|
|
106 }
|
|
107
|
|
108 ; GCN-LABEL: {{^}}agent_release
|
|
109 ; GCN: s_waitcnt vmcnt(0){{$}}
|
|
110 ; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
111 define amdgpu_kernel void @agent_release(
|
|
112 i32 %in, i32 addrspace(4)* %out) {
|
|
113 entry:
|
|
114 store atomic i32 %in, i32 addrspace(4)* %out syncscope("agent") release, align 4
|
|
115 ret void
|
|
116 }
|
|
117
|
|
118 ; GCN-LABEL: {{^}}agent_seq_cst
|
|
119 ; GCN: s_waitcnt vmcnt(0){{$}}
|
|
120 ; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
121 define amdgpu_kernel void @agent_seq_cst(
|
|
122 i32 %in, i32 addrspace(4)* %out) {
|
|
123 entry:
|
|
124 store atomic i32 %in, i32 addrspace(4)* %out syncscope("agent") seq_cst, align 4
|
|
125 ret void
|
|
126 }
|
|
127
|
|
128 ; GCN-LABEL: {{^}}workgroup_unordered
|
|
129 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
|
130 ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
131 define amdgpu_kernel void @workgroup_unordered(
|
|
132 i32 %in, i32 addrspace(4)* %out) {
|
|
133 entry:
|
|
134 store atomic i32 %in, i32 addrspace(4)* %out syncscope("workgroup") unordered, align 4
|
|
135 ret void
|
|
136 }
|
|
137
|
|
138 ; GCN-LABEL: {{^}}workgroup_monotonic
|
|
139 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
|
140 ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
141 define amdgpu_kernel void @workgroup_monotonic(
|
|
142 i32 %in, i32 addrspace(4)* %out) {
|
|
143 entry:
|
|
144 store atomic i32 %in, i32 addrspace(4)* %out syncscope("workgroup") monotonic, align 4
|
|
145 ret void
|
|
146 }
|
|
147
|
|
148 ; GCN-LABEL: {{^}}workgroup_release
|
|
149 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
|
150 ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
151 define amdgpu_kernel void @workgroup_release(
|
|
152 i32 %in, i32 addrspace(4)* %out) {
|
|
153 entry:
|
|
154 store atomic i32 %in, i32 addrspace(4)* %out syncscope("workgroup") release, align 4
|
|
155 ret void
|
|
156 }
|
|
157
|
|
158 ; GCN-LABEL: {{^}}workgroup_seq_cst
|
|
159 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
|
160 ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
161 define amdgpu_kernel void @workgroup_seq_cst(
|
|
162 i32 %in, i32 addrspace(4)* %out) {
|
|
163 entry:
|
|
164 store atomic i32 %in, i32 addrspace(4)* %out syncscope("workgroup") seq_cst, align 4
|
|
165 ret void
|
|
166 }
|
|
167
|
|
168 ; GCN-LABEL: {{^}}wavefront_unordered
|
|
169 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
|
170 ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
171 define amdgpu_kernel void @wavefront_unordered(
|
|
172 i32 %in, i32 addrspace(4)* %out) {
|
|
173 entry:
|
|
174 store atomic i32 %in, i32 addrspace(4)* %out syncscope("wavefront") unordered, align 4
|
|
175 ret void
|
|
176 }
|
|
177
|
|
178 ; GCN-LABEL: {{^}}wavefront_monotonic
|
|
179 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
|
180 ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
181 define amdgpu_kernel void @wavefront_monotonic(
|
|
182 i32 %in, i32 addrspace(4)* %out) {
|
|
183 entry:
|
|
184 store atomic i32 %in, i32 addrspace(4)* %out syncscope("wavefront") monotonic, align 4
|
|
185 ret void
|
|
186 }
|
|
187
|
|
188 ; GCN-LABEL: {{^}}wavefront_release
|
|
189 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
|
190 ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
191 define amdgpu_kernel void @wavefront_release(
|
|
192 i32 %in, i32 addrspace(4)* %out) {
|
|
193 entry:
|
|
194 store atomic i32 %in, i32 addrspace(4)* %out syncscope("wavefront") release, align 4
|
|
195 ret void
|
|
196 }
|
|
197
|
|
198 ; GCN-LABEL: {{^}}wavefront_seq_cst
|
|
199 ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
|
200 ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
201 define amdgpu_kernel void @wavefront_seq_cst(
|
|
202 i32 %in, i32 addrspace(4)* %out) {
|
|
203 entry:
|
|
204 store atomic i32 %in, i32 addrspace(4)* %out syncscope("wavefront") seq_cst, align 4
|
|
205 ret void
|
|
206 }
|
|
207
|
|
208 ; GCN-LABEL: {{^}}nontemporal_private_0
|
|
209 ; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}}
|
|
210 define amdgpu_kernel void @nontemporal_private_0(
|
|
211 i32 addrspace(4)* %in, i32* %out) {
|
|
212 entry:
|
|
213 %val = load i32, i32 addrspace(4)* %in, align 4
|
|
214 store i32 %val, i32* %out, !nontemporal !0
|
|
215 ret void
|
|
216 }
|
|
217
|
|
218 ; GCN-LABEL: {{^}}nontemporal_private_1
|
|
219 ; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}}
|
|
220 define amdgpu_kernel void @nontemporal_private_1(
|
|
221 i32 addrspace(4)* %in, i32* %out) {
|
|
222 entry:
|
|
223 %tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
224 %val = load i32, i32 addrspace(4)* %in, align 4
|
|
225 %out.gep = getelementptr inbounds i32, i32* %out, i32 %tid
|
|
226 store i32 %val, i32* %out.gep, !nontemporal !0
|
|
227 ret void
|
|
228 }
|
|
229
|
|
230 ; GCN-LABEL: {{^}}nontemporal_global_0
|
|
231 ; GFX8: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}}
|
|
232 ; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off glc slc{{$}}
|
|
233 define amdgpu_kernel void @nontemporal_global_0(
|
|
234 i32 addrspace(4)* %in, i32 addrspace(1)* %out) {
|
|
235 entry:
|
|
236 %val = load i32, i32 addrspace(4)* %in, align 4
|
|
237 store i32 %val, i32 addrspace(1)* %out, !nontemporal !0
|
|
238 ret void
|
|
239 }
|
|
240
|
|
241 ; GCN-LABEL: {{^}}nontemporal_global_1
|
|
242 ; GFX8: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}}
|
|
243 ; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off glc slc{{$}}
|
|
244 define amdgpu_kernel void @nontemporal_global_1(
|
|
245 i32 addrspace(4)* %in, i32 addrspace(1)* %out) {
|
|
246 entry:
|
|
247 %tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
248 %val = load i32, i32 addrspace(4)* %in, align 4
|
|
249 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
|
|
250 store i32 %val, i32 addrspace(1)* %out.gep, !nontemporal !0
|
|
251 ret void
|
|
252 }
|
|
253
|
|
254 ; GCN-LABEL: {{^}}nontemporal_local_0
|
|
255 ; GCN: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}}
|
|
256 define amdgpu_kernel void @nontemporal_local_0(
|
|
257 i32 addrspace(4)* %in, i32 addrspace(3)* %out) {
|
|
258 entry:
|
|
259 %val = load i32, i32 addrspace(4)* %in, align 4
|
|
260 store i32 %val, i32 addrspace(3)* %out, !nontemporal !0
|
|
261 ret void
|
|
262 }
|
|
263
|
|
264 ; GCN-LABEL: {{^}}nontemporal_local_1
|
|
265 ; GCN: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}}
|
|
266 define amdgpu_kernel void @nontemporal_local_1(
|
|
267 i32 addrspace(4)* %in, i32 addrspace(3)* %out) {
|
|
268 entry:
|
|
269 %tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
270 %val = load i32, i32 addrspace(4)* %in, align 4
|
|
271 %out.gep = getelementptr inbounds i32, i32 addrspace(3)* %out, i32 %tid
|
|
272 store i32 %val, i32 addrspace(3)* %out.gep, !nontemporal !0
|
|
273 ret void
|
|
274 }
|
|
275
|
|
276 ; GCN-LABEL: {{^}}nontemporal_flat_0
|
|
277 ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}}
|
|
278 define amdgpu_kernel void @nontemporal_flat_0(
|
|
279 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
|
|
280 entry:
|
|
281 %val = load i32, i32 addrspace(4)* %in, align 4
|
|
282 store i32 %val, i32 addrspace(4)* %out, !nontemporal !0
|
|
283 ret void
|
|
284 }
|
|
285
|
|
286 ; GCN-LABEL: {{^}}nontemporal_flat_1
|
|
287 ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}}
|
|
288 define amdgpu_kernel void @nontemporal_flat_1(
|
|
289 i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
|
|
290 entry:
|
|
291 %tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
292 %val = load i32, i32 addrspace(4)* %in, align 4
|
|
293 %out.gep = getelementptr inbounds i32, i32 addrspace(4)* %out, i32 %tid
|
|
294 store i32 %val, i32 addrspace(4)* %out.gep, !nontemporal !0
|
|
295 ret void
|
|
296 }
|
|
297
|
|
298 !0 = !{i32 1}
|