annotate llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll @ 207:2e18cbf3894f

LLVM12
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Tue, 08 Jun 2021 06:07:14 +0900
parents 1d019706d866
children 5f17cb93ff66
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
150
anatofuz
parents:
diff changeset
1 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIVI %s
anatofuz
parents:
diff changeset
2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,CIVI %s
anatofuz
parents:
diff changeset
3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
anatofuz
parents:
diff changeset
4 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -stop-before=machine-scheduler < %s | FileCheck -enable-var-scope -check-prefixes=MIR %s
anatofuz
parents:
diff changeset
5
anatofuz
parents:
diff changeset
6 declare i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* nocapture, i32, i32, i32, i1) #2
anatofuz
parents:
diff changeset
7 declare i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1) #2
anatofuz
parents:
diff changeset
8 declare i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* nocapture, i32, i32, i32, i1) #2
anatofuz
parents:
diff changeset
9
anatofuz
parents:
diff changeset
10 declare i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* nocapture, i64, i32, i32, i1) #2
anatofuz
parents:
diff changeset
11 declare i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* nocapture, i64, i32, i32, i1) #2
anatofuz
parents:
diff changeset
12 declare i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* nocapture, i64, i32, i32, i1) #2
anatofuz
parents:
diff changeset
13
anatofuz
parents:
diff changeset
14 declare i32 @llvm.amdgcn.workitem.id.x() #1
anatofuz
parents:
diff changeset
15
anatofuz
parents:
diff changeset
16 ; GCN-LABEL: {{^}}lds_atomic_inc_ret_i32:
anatofuz
parents:
diff changeset
17 ; CIVI-DAG: s_mov_b32 m0
anatofuz
parents:
diff changeset
18 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
19
anatofuz
parents:
diff changeset
20 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42
anatofuz
parents:
diff changeset
21 ; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]
anatofuz
parents:
diff changeset
22 ; MIR-LABEL: @lds_atomic_inc_ret_i32
anatofuz
parents:
diff changeset
23 ; MIR: DS_INC_RTN_U32 {{.*}} :: (load store 4 on %{{.*}}, !noalias !{{[0-9]+}}, addrspace 3)
anatofuz
parents:
diff changeset
24 define amdgpu_kernel void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 {
anatofuz
parents:
diff changeset
25 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false), !noalias !0
anatofuz
parents:
diff changeset
26 store i32 %result, i32 addrspace(1)* %out
anatofuz
parents:
diff changeset
27 ret void
anatofuz
parents:
diff changeset
28 }
anatofuz
parents:
diff changeset
29
anatofuz
parents:
diff changeset
30 !0 = distinct !{!0, !"noalias-scope"}
anatofuz
parents:
diff changeset
31
anatofuz
parents:
diff changeset
32 ; GCN-LABEL: {{^}}lds_atomic_inc_ret_i32_offset:
anatofuz
parents:
diff changeset
33 ; CIVI-DAG: s_mov_b32 m0
anatofuz
parents:
diff changeset
34 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
35
anatofuz
parents:
diff changeset
36 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42
anatofuz
parents:
diff changeset
37 ; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] offset:16
anatofuz
parents:
diff changeset
38 define amdgpu_kernel void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 {
anatofuz
parents:
diff changeset
39 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
anatofuz
parents:
diff changeset
40 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %gep, i32 42, i32 0, i32 0, i1 false)
anatofuz
parents:
diff changeset
41 store i32 %result, i32 addrspace(1)* %out
anatofuz
parents:
diff changeset
42 ret void
anatofuz
parents:
diff changeset
43 }
anatofuz
parents:
diff changeset
44
anatofuz
parents:
diff changeset
45 ; GCN-LABEL: {{^}}lds_atomic_inc_noret_i32:
anatofuz
parents:
diff changeset
46 ; CIVI-DAG: s_mov_b32 m0
anatofuz
parents:
diff changeset
47 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
48
anatofuz
parents:
diff changeset
49 ; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]],
anatofuz
parents:
diff changeset
50 ; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
anatofuz
parents:
diff changeset
51 ; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
anatofuz
parents:
diff changeset
52 ; GCN: ds_inc_u32 [[VPTR]], [[DATA]]
anatofuz
parents:
diff changeset
53 define amdgpu_kernel void @lds_atomic_inc_noret_i32(i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
54 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false)
anatofuz
parents:
diff changeset
55 ret void
anatofuz
parents:
diff changeset
56 }
anatofuz
parents:
diff changeset
57
anatofuz
parents:
diff changeset
58 ; GCN-LABEL: {{^}}lds_atomic_inc_noret_i32_offset:
anatofuz
parents:
diff changeset
59 ; CIVI-DAG: s_mov_b32 m0
anatofuz
parents:
diff changeset
60 ; GFX9-NOT: m0
anatofuz
parents:
diff changeset
61
anatofuz
parents:
diff changeset
62 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42
anatofuz
parents:
diff changeset
63 ; GCN: ds_inc_u32 v{{[0-9]+}}, [[K]] offset:16
anatofuz
parents:
diff changeset
64 define amdgpu_kernel void @lds_atomic_inc_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
65 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
anatofuz
parents:
diff changeset
66 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %gep, i32 42, i32 0, i32 0, i1 false)
anatofuz
parents:
diff changeset
67 ret void
anatofuz
parents:
diff changeset
68 }
anatofuz
parents:
diff changeset
69
anatofuz
parents:
diff changeset
70 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i32:
anatofuz
parents:
diff changeset
71 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
anatofuz
parents:
diff changeset
72 ; CIVI: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
207
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
73 ; GFX9: global_atomic_inc v{{[0-9]+}}, v{{[0-9]+}}, [[K]], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
150
anatofuz
parents:
diff changeset
74 define amdgpu_kernel void @global_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
anatofuz
parents:
diff changeset
75 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 42, i32 0, i32 0, i1 false)
anatofuz
parents:
diff changeset
76 store i32 %result, i32 addrspace(1)* %out
anatofuz
parents:
diff changeset
77 ret void
anatofuz
parents:
diff changeset
78 }
anatofuz
parents:
diff changeset
79
anatofuz
parents:
diff changeset
80 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i32_offset:
anatofuz
parents:
diff changeset
81 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
anatofuz
parents:
diff changeset
82 ; CIVI: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 glc{{$}}
207
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
83 ; GFX9: global_atomic_inc v{{[0-9]+}}, v{{[0-9]+}}, [[K]], s{{\[[0-9]+:[0-9]+\]}} offset:16 glc{{$}}
150
anatofuz
parents:
diff changeset
84 define amdgpu_kernel void @global_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
anatofuz
parents:
diff changeset
85 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
anatofuz
parents:
diff changeset
86 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false)
anatofuz
parents:
diff changeset
87 store i32 %result, i32 addrspace(1)* %out
anatofuz
parents:
diff changeset
88 ret void
anatofuz
parents:
diff changeset
89 }
anatofuz
parents:
diff changeset
90
anatofuz
parents:
diff changeset
91 ; GCN-LABEL: {{^}}global_atomic_inc_noret_i32:
anatofuz
parents:
diff changeset
92 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
anatofuz
parents:
diff changeset
93 ; CIVI: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
207
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
94 ; GFX9: global_atomic_inc v{{[0-9]+}}, [[K]], s{{\[[0-9]+:[0-9]+\]$}}
150
anatofuz
parents:
diff changeset
95 define amdgpu_kernel void @global_atomic_inc_noret_i32(i32 addrspace(1)* %ptr) nounwind {
anatofuz
parents:
diff changeset
96 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 42, i32 0, i32 0, i1 false)
anatofuz
parents:
diff changeset
97 ret void
anatofuz
parents:
diff changeset
98 }
anatofuz
parents:
diff changeset
99
anatofuz
parents:
diff changeset
100 ; GCN-LABEL: {{^}}global_atomic_inc_noret_i32_offset:
anatofuz
parents:
diff changeset
101 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
anatofuz
parents:
diff changeset
102 ; CIVI: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
207
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
103 ; GFX9: global_atomic_inc v{{[0-9]+}}, [[K]], s{{\[[0-9]+:[0-9]+\]}} offset:16{{$}}
150
anatofuz
parents:
diff changeset
104 define amdgpu_kernel void @global_atomic_inc_noret_i32_offset(i32 addrspace(1)* %ptr) nounwind {
anatofuz
parents:
diff changeset
105 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
anatofuz
parents:
diff changeset
106 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false)
anatofuz
parents:
diff changeset
107 ret void
anatofuz
parents:
diff changeset
108 }
anatofuz
parents:
diff changeset
109
anatofuz
parents:
diff changeset
110 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i32_offset_addr64:
anatofuz
parents:
diff changeset
111 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
anatofuz
parents:
diff changeset
112 ; CI: buffer_atomic_inc [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20 glc{{$}}
anatofuz
parents:
diff changeset
113 ; VI: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
anatofuz
parents:
diff changeset
114 define amdgpu_kernel void @global_atomic_inc_ret_i32_offset_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
anatofuz
parents:
diff changeset
115 %id = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
116 %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id
anatofuz
parents:
diff changeset
117 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id
anatofuz
parents:
diff changeset
118 %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5
anatofuz
parents:
diff changeset
119 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false)
anatofuz
parents:
diff changeset
120 store i32 %result, i32 addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
121 ret void
anatofuz
parents:
diff changeset
122 }
anatofuz
parents:
diff changeset
123
anatofuz
parents:
diff changeset
124 ; GCN-LABEL: {{^}}global_atomic_inc_noret_i32_offset_addr64:
anatofuz
parents:
diff changeset
125 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
anatofuz
parents:
diff changeset
126 ; CI: buffer_atomic_inc [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20{{$}}
anatofuz
parents:
diff changeset
127 ; VI: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
anatofuz
parents:
diff changeset
128 define amdgpu_kernel void @global_atomic_inc_noret_i32_offset_addr64(i32 addrspace(1)* %ptr) #0 {
anatofuz
parents:
diff changeset
129 %id = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
130 %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id
anatofuz
parents:
diff changeset
131 %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5
anatofuz
parents:
diff changeset
132 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false)
anatofuz
parents:
diff changeset
133 ret void
anatofuz
parents:
diff changeset
134 }
anatofuz
parents:
diff changeset
135
anatofuz
parents:
diff changeset
136 @lds0 = addrspace(3) global [512 x i32] undef, align 4
anatofuz
parents:
diff changeset
137
anatofuz
parents:
diff changeset
138 ; GCN-LABEL: {{^}}atomic_inc_shl_base_lds_0_i32:
207
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
139 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
150
anatofuz
parents:
diff changeset
140 ; GCN: ds_inc_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
anatofuz
parents:
diff changeset
141 define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
anatofuz
parents:
diff changeset
142 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
anatofuz
parents:
diff changeset
143 %idx.0 = add nsw i32 %tid.x, 2
anatofuz
parents:
diff changeset
144 %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds0, i32 0, i32 %idx.0
anatofuz
parents:
diff changeset
145 %val0 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %arrayidx0, i32 9, i32 0, i32 0, i1 false)
anatofuz
parents:
diff changeset
146 store i32 %idx.0, i32 addrspace(1)* %add_use
anatofuz
parents:
diff changeset
147 store i32 %val0, i32 addrspace(1)* %out
anatofuz
parents:
diff changeset
148 ret void
anatofuz
parents:
diff changeset
149 }
anatofuz
parents:
diff changeset
150
anatofuz
parents:
diff changeset
151 ; GCN-LABEL: {{^}}lds_atomic_inc_ret_i64:
anatofuz
parents:
diff changeset
152 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
anatofuz
parents:
diff changeset
153 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
anatofuz
parents:
diff changeset
154 ; GCN: ds_inc_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
anatofuz
parents:
diff changeset
155 define amdgpu_kernel void @lds_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 {
anatofuz
parents:
diff changeset
156 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %ptr, i64 42, i32 0, i32 0, i1 false)
anatofuz
parents:
diff changeset
157 store i64 %result, i64 addrspace(1)* %out
anatofuz
parents:
diff changeset
158 ret void
anatofuz
parents:
diff changeset
159 }
anatofuz
parents:
diff changeset
160
anatofuz
parents:
diff changeset
161 ; GCN-LABEL: {{^}}lds_atomic_inc_ret_i64_offset:
anatofuz
parents:
diff changeset
162 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
anatofuz
parents:
diff changeset
163 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
anatofuz
parents:
diff changeset
164 ; GCN: ds_inc_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32
anatofuz
parents:
diff changeset
165 define amdgpu_kernel void @lds_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 {
anatofuz
parents:
diff changeset
166 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
anatofuz
parents:
diff changeset
167 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %gep, i64 42, i32 0, i32 0, i1 false)
anatofuz
parents:
diff changeset
168 store i64 %result, i64 addrspace(1)* %out
anatofuz
parents:
diff changeset
169 ret void
anatofuz
parents:
diff changeset
170 }
anatofuz
parents:
diff changeset
171
anatofuz
parents:
diff changeset
172 ; GCN-LABEL: {{^}}lds_atomic_inc_noret_i64:
anatofuz
parents:
diff changeset
173 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
anatofuz
parents:
diff changeset
174 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
anatofuz
parents:
diff changeset
175 ; GCN: ds_inc_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
anatofuz
parents:
diff changeset
176 define amdgpu_kernel void @lds_atomic_inc_noret_i64(i64 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
177 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %ptr, i64 42, i32 0, i32 0, i1 false)
anatofuz
parents:
diff changeset
178 ret void
anatofuz
parents:
diff changeset
179 }
anatofuz
parents:
diff changeset
180
anatofuz
parents:
diff changeset
181 ; GCN-LABEL: {{^}}lds_atomic_inc_noret_i64_offset:
anatofuz
parents:
diff changeset
182 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
anatofuz
parents:
diff changeset
183 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
anatofuz
parents:
diff changeset
184 ; GCN: ds_inc_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32{{$}}
anatofuz
parents:
diff changeset
185 define amdgpu_kernel void @lds_atomic_inc_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
anatofuz
parents:
diff changeset
186 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
anatofuz
parents:
diff changeset
187 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %gep, i64 42, i32 0, i32 0, i1 false)
anatofuz
parents:
diff changeset
188 ret void
anatofuz
parents:
diff changeset
189 }
anatofuz
parents:
diff changeset
190
anatofuz
parents:
diff changeset
191 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i64:
anatofuz
parents:
diff changeset
192 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
207
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
193 ; GFX9: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
150
anatofuz
parents:
diff changeset
194 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
anatofuz
parents:
diff changeset
195 ; CIVI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
207
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
196 ; GFX9: global_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[ZERO]], v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
150
anatofuz
parents:
diff changeset
197 define amdgpu_kernel void @global_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
anatofuz
parents:
diff changeset
198 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 42, i32 0, i32 0, i1 false)
anatofuz
parents:
diff changeset
199 store i64 %result, i64 addrspace(1)* %out
anatofuz
parents:
diff changeset
200 ret void
anatofuz
parents:
diff changeset
201 }
anatofuz
parents:
diff changeset
202
anatofuz
parents:
diff changeset
203 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i64_offset:
anatofuz
parents:
diff changeset
204 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
207
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
205 ; GFX9-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
206 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
150
anatofuz
parents:
diff changeset
207 ; CIVI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32 glc{{$}}
207
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
208 ; GFX9: global_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[ZERO]], v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
150
anatofuz
parents:
diff changeset
209 define amdgpu_kernel void @global_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
anatofuz
parents:
diff changeset
210 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
anatofuz
parents:
diff changeset
211 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
anatofuz
parents:
diff changeset
212 store i64 %result, i64 addrspace(1)* %out
anatofuz
parents:
diff changeset
213 ret void
anatofuz
parents:
diff changeset
214 }
anatofuz
parents:
diff changeset
215
anatofuz
parents:
diff changeset
216 ; GCN-LABEL: {{^}}global_atomic_inc_noret_i64:
anatofuz
parents:
diff changeset
217 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
207
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
218 ; GFX9-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
150
anatofuz
parents:
diff changeset
219 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
anatofuz
parents:
diff changeset
220 ; CIVI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
anatofuz
parents:
diff changeset
221
207
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
222 ; GFX9: global_atomic_inc_x2 v[[ZERO]], v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]$}}
150
anatofuz
parents:
diff changeset
223 define amdgpu_kernel void @global_atomic_inc_noret_i64(i64 addrspace(1)* %ptr) nounwind {
anatofuz
parents:
diff changeset
224 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 42, i32 0, i32 0, i1 false)
anatofuz
parents:
diff changeset
225 ret void
anatofuz
parents:
diff changeset
226 }
anatofuz
parents:
diff changeset
227
anatofuz
parents:
diff changeset
228 ; GCN-LABEL: {{^}}global_atomic_inc_noret_i64_offset:
anatofuz
parents:
diff changeset
229 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
207
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
230 ; GFX9-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
150
anatofuz
parents:
diff changeset
231 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
anatofuz
parents:
diff changeset
232 ; CIVI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32{{$}}
207
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
233 ; GFX9: global_atomic_inc_x2 v[[ZERO]], v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
150
anatofuz
parents:
diff changeset
234 define amdgpu_kernel void @global_atomic_inc_noret_i64_offset(i64 addrspace(1)* %ptr) nounwind {
anatofuz
parents:
diff changeset
235 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
anatofuz
parents:
diff changeset
236 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
anatofuz
parents:
diff changeset
237 ret void
anatofuz
parents:
diff changeset
238 }
anatofuz
parents:
diff changeset
239
anatofuz
parents:
diff changeset
240 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i64_offset_addr64:
anatofuz
parents:
diff changeset
241 ; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
anatofuz
parents:
diff changeset
242 ; CI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
anatofuz
parents:
diff changeset
243 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
anatofuz
parents:
diff changeset
244 ; CI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40 glc{{$}}
anatofuz
parents:
diff changeset
245 ; VI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
anatofuz
parents:
diff changeset
246 define amdgpu_kernel void @global_atomic_inc_ret_i64_offset_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
anatofuz
parents:
diff changeset
247 %id = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
248 %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id
anatofuz
parents:
diff changeset
249 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id
anatofuz
parents:
diff changeset
250 %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5
anatofuz
parents:
diff changeset
251 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
anatofuz
parents:
diff changeset
252 store i64 %result, i64 addrspace(1)* %out.gep
anatofuz
parents:
diff changeset
253 ret void
anatofuz
parents:
diff changeset
254 }
anatofuz
parents:
diff changeset
255
anatofuz
parents:
diff changeset
256 ; GCN-LABEL: {{^}}global_atomic_inc_noret_i64_offset_addr64:
anatofuz
parents:
diff changeset
257 ; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
anatofuz
parents:
diff changeset
258 ; CI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
anatofuz
parents:
diff changeset
259 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
anatofuz
parents:
diff changeset
260 ; CI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40{{$}}
anatofuz
parents:
diff changeset
261 ; VI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
anatofuz
parents:
diff changeset
262 define amdgpu_kernel void @global_atomic_inc_noret_i64_offset_addr64(i64 addrspace(1)* %ptr) #0 {
anatofuz
parents:
diff changeset
263 %id = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
264 %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id
anatofuz
parents:
diff changeset
265 %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5
anatofuz
parents:
diff changeset
266 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
anatofuz
parents:
diff changeset
267 ret void
anatofuz
parents:
diff changeset
268 }
anatofuz
parents:
diff changeset
269
anatofuz
parents:
diff changeset
270 ; GCN-LABEL: {{^}}flat_atomic_inc_ret_i32:
anatofuz
parents:
diff changeset
271 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
anatofuz
parents:
diff changeset
272 ; GCN: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
anatofuz
parents:
diff changeset
273 define amdgpu_kernel void @flat_atomic_inc_ret_i32(i32* %out, i32* %ptr) #0 {
anatofuz
parents:
diff changeset
274 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false)
anatofuz
parents:
diff changeset
275 store i32 %result, i32* %out
anatofuz
parents:
diff changeset
276 ret void
anatofuz
parents:
diff changeset
277 }
anatofuz
parents:
diff changeset
278
anatofuz
parents:
diff changeset
279 ; GCN-LABEL: {{^}}flat_atomic_inc_ret_i32_offset:
anatofuz
parents:
diff changeset
280 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
anatofuz
parents:
diff changeset
281 ; CIVI: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
anatofuz
parents:
diff changeset
282 ; GFX9: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:16 glc{{$}}
anatofuz
parents:
diff changeset
283 define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset(i32* %out, i32* %ptr) #0 {
anatofuz
parents:
diff changeset
284 %gep = getelementptr i32, i32* %ptr, i32 4
anatofuz
parents:
diff changeset
285 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
anatofuz
parents:
diff changeset
286 store i32 %result, i32* %out
anatofuz
parents:
diff changeset
287 ret void
anatofuz
parents:
diff changeset
288 }
anatofuz
parents:
diff changeset
289
anatofuz
parents:
diff changeset
290 ; GCN-LABEL: {{^}}flat_atomic_inc_noret_i32:
anatofuz
parents:
diff changeset
291 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
anatofuz
parents:
diff changeset
292 ; GCN: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
anatofuz
parents:
diff changeset
293 define amdgpu_kernel void @flat_atomic_inc_noret_i32(i32* %ptr) nounwind {
anatofuz
parents:
diff changeset
294 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false)
anatofuz
parents:
diff changeset
295 ret void
anatofuz
parents:
diff changeset
296 }
anatofuz
parents:
diff changeset
297
anatofuz
parents:
diff changeset
298 ; GCN-LABEL: {{^}}flat_atomic_inc_noret_i32_offset:
anatofuz
parents:
diff changeset
299 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
anatofuz
parents:
diff changeset
300 ; CIVI: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
anatofuz
parents:
diff changeset
301 ; GFX9: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:16{{$}}
anatofuz
parents:
diff changeset
302 define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset(i32* %ptr) nounwind {
anatofuz
parents:
diff changeset
303 %gep = getelementptr i32, i32* %ptr, i32 4
anatofuz
parents:
diff changeset
304 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
anatofuz
parents:
diff changeset
305 ret void
anatofuz
parents:
diff changeset
306 }
anatofuz
parents:
diff changeset
307
anatofuz
parents:
diff changeset
308 ; GCN-LABEL: {{^}}flat_atomic_inc_ret_i32_offset_addr64:
anatofuz
parents:
diff changeset
309 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
anatofuz
parents:
diff changeset
310 ; CIVI: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
anatofuz
parents:
diff changeset
311 ; GFX9: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:20 glc{{$}}
anatofuz
parents:
diff changeset
312 define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_addr64(i32* %out, i32* %ptr) #0 {
anatofuz
parents:
diff changeset
313 %id = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
314 %gep.tid = getelementptr i32, i32* %ptr, i32 %id
anatofuz
parents:
diff changeset
315 %out.gep = getelementptr i32, i32* %out, i32 %id
anatofuz
parents:
diff changeset
316 %gep = getelementptr i32, i32* %gep.tid, i32 5
anatofuz
parents:
diff changeset
317 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
anatofuz
parents:
diff changeset
318 store i32 %result, i32* %out.gep
anatofuz
parents:
diff changeset
319 ret void
anatofuz
parents:
diff changeset
320 }
anatofuz
parents:
diff changeset
321
anatofuz
parents:
diff changeset
322 ; GCN-LABEL: {{^}}flat_atomic_inc_noret_i32_offset_addr64:
anatofuz
parents:
diff changeset
323 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
anatofuz
parents:
diff changeset
324 ; CIVI: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
anatofuz
parents:
diff changeset
325 ; GFX9: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:20{{$}}
anatofuz
parents:
diff changeset
326 define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(i32* %ptr) #0 {
anatofuz
parents:
diff changeset
327 %id = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
328 %gep.tid = getelementptr i32, i32* %ptr, i32 %id
anatofuz
parents:
diff changeset
329 %gep = getelementptr i32, i32* %gep.tid, i32 5
anatofuz
parents:
diff changeset
330 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
anatofuz
parents:
diff changeset
331 ret void
anatofuz
parents:
diff changeset
332 }
anatofuz
parents:
diff changeset
333
anatofuz
parents:
diff changeset
334 @lds1 = addrspace(3) global [512 x i64] undef, align 8
anatofuz
parents:
diff changeset
335
anatofuz
parents:
diff changeset
336 ; GCN-LABEL: {{^}}atomic_inc_shl_base_lds_0_i64:
207
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
337 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 3, {{v[0-9]+}}
150
anatofuz
parents:
diff changeset
338 ; GCN: ds_inc_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, [[PTR]], v{{\[[0-9]+:[0-9]+\]}} offset:16
anatofuz
parents:
diff changeset
339 define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
anatofuz
parents:
diff changeset
340 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
anatofuz
parents:
diff changeset
341 %idx.0 = add nsw i32 %tid.x, 2
anatofuz
parents:
diff changeset
342 %arrayidx0 = getelementptr inbounds [512 x i64], [512 x i64] addrspace(3)* @lds1, i32 0, i32 %idx.0
anatofuz
parents:
diff changeset
343 %val0 = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %arrayidx0, i64 9, i32 0, i32 0, i1 false)
anatofuz
parents:
diff changeset
344 store i32 %idx.0, i32 addrspace(1)* %add_use
anatofuz
parents:
diff changeset
345 store i64 %val0, i64 addrspace(1)* %out
anatofuz
parents:
diff changeset
346 ret void
anatofuz
parents:
diff changeset
347 }
anatofuz
parents:
diff changeset
348
anatofuz
parents:
diff changeset
349 ; GCN-LABEL: {{^}}flat_atomic_inc_ret_i64:
anatofuz
parents:
diff changeset
350 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
anatofuz
parents:
diff changeset
351 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
anatofuz
parents:
diff changeset
352 ; GCN: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
anatofuz
parents:
diff changeset
353 define amdgpu_kernel void @flat_atomic_inc_ret_i64(i64* %out, i64* %ptr) #0 {
anatofuz
parents:
diff changeset
354 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false)
anatofuz
parents:
diff changeset
355 store i64 %result, i64* %out
anatofuz
parents:
diff changeset
356 ret void
anatofuz
parents:
diff changeset
357 }
anatofuz
parents:
diff changeset
358
anatofuz
parents:
diff changeset
359 ; GCN-LABEL: {{^}}flat_atomic_inc_ret_i64_offset:
anatofuz
parents:
diff changeset
360 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
anatofuz
parents:
diff changeset
361 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
anatofuz
parents:
diff changeset
362 ; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
anatofuz
parents:
diff changeset
363 ; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32 glc{{$}}
anatofuz
parents:
diff changeset
364 define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset(i64* %out, i64* %ptr) #0 {
anatofuz
parents:
diff changeset
365 %gep = getelementptr i64, i64* %ptr, i32 4
anatofuz
parents:
diff changeset
366 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
anatofuz
parents:
diff changeset
367 store i64 %result, i64* %out
anatofuz
parents:
diff changeset
368 ret void
anatofuz
parents:
diff changeset
369 }
anatofuz
parents:
diff changeset
370
anatofuz
parents:
diff changeset
371 ; GCN-LABEL: {{^}}flat_atomic_inc_noret_i64:
anatofuz
parents:
diff changeset
372 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
anatofuz
parents:
diff changeset
373 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
anatofuz
parents:
diff changeset
374 ; GCN: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
anatofuz
parents:
diff changeset
375 define amdgpu_kernel void @flat_atomic_inc_noret_i64(i64* %ptr) nounwind {
anatofuz
parents:
diff changeset
376 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false)
anatofuz
parents:
diff changeset
377 ret void
anatofuz
parents:
diff changeset
378 }
anatofuz
parents:
diff changeset
379
anatofuz
parents:
diff changeset
380 ; GCN-LABEL: {{^}}flat_atomic_inc_noret_i64_offset:
anatofuz
parents:
diff changeset
381 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
anatofuz
parents:
diff changeset
382 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
anatofuz
parents:
diff changeset
383 ; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
anatofuz
parents:
diff changeset
384 ; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32{{$}}
anatofuz
parents:
diff changeset
385 define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset(i64* %ptr) nounwind {
anatofuz
parents:
diff changeset
386 %gep = getelementptr i64, i64* %ptr, i32 4
anatofuz
parents:
diff changeset
387 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
anatofuz
parents:
diff changeset
388 ret void
anatofuz
parents:
diff changeset
389 }
anatofuz
parents:
diff changeset
390
anatofuz
parents:
diff changeset
391 ; GCN-LABEL: {{^}}flat_atomic_inc_ret_i64_offset_addr64:
anatofuz
parents:
diff changeset
392 ; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
anatofuz
parents:
diff changeset
393 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
anatofuz
parents:
diff changeset
394 ; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
anatofuz
parents:
diff changeset
395 ; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:40 glc{{$}}
anatofuz
parents:
diff changeset
396 define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(i64* %out, i64* %ptr) #0 {
anatofuz
parents:
diff changeset
397 %id = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
398 %gep.tid = getelementptr i64, i64* %ptr, i32 %id
anatofuz
parents:
diff changeset
399 %out.gep = getelementptr i64, i64* %out, i32 %id
anatofuz
parents:
diff changeset
400 %gep = getelementptr i64, i64* %gep.tid, i32 5
anatofuz
parents:
diff changeset
401 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
anatofuz
parents:
diff changeset
402 store i64 %result, i64* %out.gep
anatofuz
parents:
diff changeset
403 ret void
anatofuz
parents:
diff changeset
404 }
anatofuz
parents:
diff changeset
405
anatofuz
parents:
diff changeset
406 ; GCN-LABEL: {{^}}flat_atomic_inc_noret_i64_offset_addr64:
anatofuz
parents:
diff changeset
407 ; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
anatofuz
parents:
diff changeset
408 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
anatofuz
parents:
diff changeset
409 ; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
anatofuz
parents:
diff changeset
410 ; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:40{{$}}
anatofuz
parents:
diff changeset
411 define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(i64* %ptr) #0 {
anatofuz
parents:
diff changeset
412 %id = call i32 @llvm.amdgcn.workitem.id.x()
anatofuz
parents:
diff changeset
413 %gep.tid = getelementptr i64, i64* %ptr, i32 %id
anatofuz
parents:
diff changeset
414 %gep = getelementptr i64, i64* %gep.tid, i32 5
anatofuz
parents:
diff changeset
415 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
anatofuz
parents:
diff changeset
416 ret void
anatofuz
parents:
diff changeset
417 }
anatofuz
parents:
diff changeset
418
anatofuz
parents:
diff changeset
419 ; GCN-LABEL: {{^}}nocse_lds_atomic_inc_ret_i32:
anatofuz
parents:
diff changeset
420 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
anatofuz
parents:
diff changeset
421 ; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]
anatofuz
parents:
diff changeset
422 ; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]
anatofuz
parents:
diff changeset
423 define amdgpu_kernel void @nocse_lds_atomic_inc_ret_i32(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(3)* %ptr) #0 {
anatofuz
parents:
diff changeset
424 %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false)
anatofuz
parents:
diff changeset
425 %result1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false)
anatofuz
parents:
diff changeset
426
anatofuz
parents:
diff changeset
427 store i32 %result0, i32 addrspace(1)* %out0
anatofuz
parents:
diff changeset
428 store i32 %result1, i32 addrspace(1)* %out1
anatofuz
parents:
diff changeset
429 ret void
anatofuz
parents:
diff changeset
430 }
anatofuz
parents:
diff changeset
431
anatofuz
parents:
diff changeset
432 attributes #0 = { nounwind }
anatofuz
parents:
diff changeset
433 attributes #1 = { nounwind readnone }
anatofuz
parents:
diff changeset
434 attributes #2 = { nounwind argmemonly }