Mercurial > hg > CbC > CbC_llvm
comparison test/CodeGen/AMDGPU/ctpop.ll @ 95:afa8332a0e37 LLVM3.8
LLVM 3.8
author | Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 13 Oct 2015 17:48:58 +0900 |
parents | |
children | 1172e4bd9c6f |
comparison
equal
deleted
inserted
replaced
84:f3e34b893a5f | 95:afa8332a0e37 |
---|---|
1 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC -check-prefix=SI %s | |
2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC -check-prefix=VI %s | |
3 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s | |
4 | |
5 declare i32 @llvm.ctpop.i32(i32) nounwind readnone | |
6 declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>) nounwind readnone | |
7 declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) nounwind readnone | |
8 declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) nounwind readnone | |
9 declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>) nounwind readnone | |
10 | |
11 ; FUNC-LABEL: {{^}}s_ctpop_i32: | |
12 ; GCN: s_load_dword [[SVAL:s[0-9]+]], | |
13 ; GCN: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[SVAL]] | |
14 ; GCN: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]] | |
15 ; GCN: buffer_store_dword [[VRESULT]], | |
16 ; GCN: s_endpgm | |
17 | |
18 ; EG: BCNT_INT | |
19 define void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind { | |
20 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone | |
21 store i32 %ctpop, i32 addrspace(1)* %out, align 4 | |
22 ret void | |
23 } | |
24 | |
25 ; XXX - Why 0 in register? | |
26 ; FUNC-LABEL: {{^}}v_ctpop_i32: | |
27 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], | |
28 ; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 0 | |
29 ; GCN: buffer_store_dword [[RESULT]], | |
30 ; GCN: s_endpgm | |
31 | |
32 ; EG: BCNT_INT | |
33 define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { | |
34 %val = load i32, i32 addrspace(1)* %in, align 4 | |
35 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone | |
36 store i32 %ctpop, i32 addrspace(1)* %out, align 4 | |
37 ret void | |
38 } | |
39 | |
40 ; FUNC-LABEL: {{^}}v_ctpop_add_chain_i32: | |
41 ; GCN: buffer_load_dword [[VAL1:v[0-9]+]], | |
42 ; GCN: buffer_load_dword [[VAL0:v[0-9]+]], | |
43 ; GCN: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], [[VAL1]], 0 | |
44 ; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]] | |
45 ; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]] | |
46 ; GCN: buffer_store_dword [[RESULT]], | |
47 ; GCN: s_endpgm | |
48 | |
49 ; EG: BCNT_INT | |
50 ; EG: BCNT_INT | |
51 define void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1) nounwind { | |
52 %val0 = load i32, i32 addrspace(1)* %in0, align 4 | |
53 %val1 = load i32, i32 addrspace(1)* %in1, align 4 | |
54 %ctpop0 = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone | |
55 %ctpop1 = call i32 @llvm.ctpop.i32(i32 %val1) nounwind readnone | |
56 %add = add i32 %ctpop0, %ctpop1 | |
57 store i32 %add, i32 addrspace(1)* %out, align 4 | |
58 ret void | |
59 } | |
60 | |
61 ; FUNC-LABEL: {{^}}v_ctpop_add_sgpr_i32: | |
62 ; GCN: buffer_load_dword [[VAL0:v[0-9]+]], | |
63 ; GCN-NEXT: s_waitcnt | |
64 ; GCN-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL0]], s{{[0-9]+}} | |
65 ; GCN-NEXT: buffer_store_dword [[RESULT]], | |
66 ; GCN: s_endpgm | |
67 define void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1, i32 %sval) nounwind { | |
68 %val0 = load i32, i32 addrspace(1)* %in0, align 4 | |
69 %ctpop0 = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone | |
70 %add = add i32 %ctpop0, %sval | |
71 store i32 %add, i32 addrspace(1)* %out, align 4 | |
72 ret void | |
73 } | |
74 | |
75 ; FUNC-LABEL: {{^}}v_ctpop_v2i32: | |
76 ; GCN: v_bcnt_u32_b32_e64 | |
77 ; GCN: v_bcnt_u32_b32_e64 | |
78 ; GCN: s_endpgm | |
79 | |
80 ; EG: BCNT_INT | |
81 ; EG: BCNT_INT | |
82 define void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %in) nounwind { | |
83 %val = load <2 x i32>, <2 x i32> addrspace(1)* %in, align 8 | |
84 %ctpop = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %val) nounwind readnone | |
85 store <2 x i32> %ctpop, <2 x i32> addrspace(1)* %out, align 8 | |
86 ret void | |
87 } | |
88 | |
89 ; FUNC-LABEL: {{^}}v_ctpop_v4i32: | |
90 ; GCN: v_bcnt_u32_b32_e64 | |
91 ; GCN: v_bcnt_u32_b32_e64 | |
92 ; GCN: v_bcnt_u32_b32_e64 | |
93 ; GCN: v_bcnt_u32_b32_e64 | |
94 ; GCN: s_endpgm | |
95 | |
96 ; EG: BCNT_INT | |
97 ; EG: BCNT_INT | |
98 ; EG: BCNT_INT | |
99 ; EG: BCNT_INT | |
100 define void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %in) nounwind { | |
101 %val = load <4 x i32>, <4 x i32> addrspace(1)* %in, align 16 | |
102 %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %val) nounwind readnone | |
103 store <4 x i32> %ctpop, <4 x i32> addrspace(1)* %out, align 16 | |
104 ret void | |
105 } | |
106 | |
107 ; FUNC-LABEL: {{^}}v_ctpop_v8i32: | |
108 ; GCN: v_bcnt_u32_b32_e64 | |
109 ; GCN: v_bcnt_u32_b32_e64 | |
110 ; GCN: v_bcnt_u32_b32_e64 | |
111 ; GCN: v_bcnt_u32_b32_e64 | |
112 ; GCN: v_bcnt_u32_b32_e64 | |
113 ; GCN: v_bcnt_u32_b32_e64 | |
114 ; GCN: v_bcnt_u32_b32_e64 | |
115 ; GCN: v_bcnt_u32_b32_e64 | |
116 ; GCN: s_endpgm | |
117 | |
118 ; EG: BCNT_INT | |
119 ; EG: BCNT_INT | |
120 ; EG: BCNT_INT | |
121 ; EG: BCNT_INT | |
122 ; EG: BCNT_INT | |
123 ; EG: BCNT_INT | |
124 ; EG: BCNT_INT | |
125 ; EG: BCNT_INT | |
126 define void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrspace(1)* noalias %in) nounwind { | |
127 %val = load <8 x i32>, <8 x i32> addrspace(1)* %in, align 32 | |
128 %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %val) nounwind readnone | |
129 store <8 x i32> %ctpop, <8 x i32> addrspace(1)* %out, align 32 | |
130 ret void | |
131 } | |
132 | |
133 ; FUNC-LABEL: {{^}}v_ctpop_v16i32: | |
134 ; GCN: v_bcnt_u32_b32_e64 | |
135 ; GCN: v_bcnt_u32_b32_e64 | |
136 ; GCN: v_bcnt_u32_b32_e64 | |
137 ; GCN: v_bcnt_u32_b32_e64 | |
138 ; GCN: v_bcnt_u32_b32_e64 | |
139 ; GCN: v_bcnt_u32_b32_e64 | |
140 ; GCN: v_bcnt_u32_b32_e64 | |
141 ; GCN: v_bcnt_u32_b32_e64 | |
142 ; GCN: v_bcnt_u32_b32_e64 | |
143 ; GCN: v_bcnt_u32_b32_e64 | |
144 ; GCN: v_bcnt_u32_b32_e64 | |
145 ; GCN: v_bcnt_u32_b32_e64 | |
146 ; GCN: v_bcnt_u32_b32_e64 | |
147 ; GCN: v_bcnt_u32_b32_e64 | |
148 ; GCN: v_bcnt_u32_b32_e64 | |
149 ; GCN: v_bcnt_u32_b32_e64 | |
150 ; GCN: s_endpgm | |
151 | |
152 ; EG: BCNT_INT | |
153 ; EG: BCNT_INT | |
154 ; EG: BCNT_INT | |
155 ; EG: BCNT_INT | |
156 ; EG: BCNT_INT | |
157 ; EG: BCNT_INT | |
158 ; EG: BCNT_INT | |
159 ; EG: BCNT_INT | |
160 ; EG: BCNT_INT | |
161 ; EG: BCNT_INT | |
162 ; EG: BCNT_INT | |
163 ; EG: BCNT_INT | |
164 ; EG: BCNT_INT | |
165 ; EG: BCNT_INT | |
166 ; EG: BCNT_INT | |
167 ; EG: BCNT_INT | |
168 define void @v_ctpop_v16i32(<16 x i32> addrspace(1)* noalias %out, <16 x i32> addrspace(1)* noalias %in) nounwind { | |
169 %val = load <16 x i32>, <16 x i32> addrspace(1)* %in, align 32 | |
170 %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %val) nounwind readnone | |
171 store <16 x i32> %ctpop, <16 x i32> addrspace(1)* %out, align 32 | |
172 ret void | |
173 } | |
174 | |
175 ; FUNC-LABEL: {{^}}v_ctpop_i32_add_inline_constant: | |
176 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], | |
177 ; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4 | |
178 ; GCN: buffer_store_dword [[RESULT]], | |
179 ; GCN: s_endpgm | |
180 | |
181 ; EG: BCNT_INT | |
182 define void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { | |
183 %val = load i32, i32 addrspace(1)* %in, align 4 | |
184 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone | |
185 %add = add i32 %ctpop, 4 | |
186 store i32 %add, i32 addrspace(1)* %out, align 4 | |
187 ret void | |
188 } | |
189 | |
190 ; FUNC-LABEL: {{^}}v_ctpop_i32_add_inline_constant_inv: | |
191 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], | |
192 ; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4 | |
193 ; GCN: buffer_store_dword [[RESULT]], | |
194 ; GCN: s_endpgm | |
195 | |
196 ; EG: BCNT_INT | |
197 define void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { | |
198 %val = load i32, i32 addrspace(1)* %in, align 4 | |
199 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone | |
200 %add = add i32 4, %ctpop | |
201 store i32 %add, i32 addrspace(1)* %out, align 4 | |
202 ret void | |
203 } | |
204 | |
205 ; FUNC-LABEL: {{^}}v_ctpop_i32_add_literal: | |
206 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], | |
207 ; GCN: v_mov_b32_e32 [[LIT:v[0-9]+]], 0x1869f | |
208 ; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]] | |
209 ; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]] | |
210 ; GCN: buffer_store_dword [[RESULT]], | |
211 ; GCN: s_endpgm | |
212 define void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { | |
213 %val = load i32, i32 addrspace(1)* %in, align 4 | |
214 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone | |
215 %add = add i32 %ctpop, 99999 | |
216 store i32 %add, i32 addrspace(1)* %out, align 4 | |
217 ret void | |
218 } | |
219 | |
220 ; FUNC-LABEL: {{^}}v_ctpop_i32_add_var: | |
221 ; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], | |
222 ; GCN-DAG: s_load_dword [[VAR:s[0-9]+]], | |
223 ; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] | |
224 ; GCN: buffer_store_dword [[RESULT]], | |
225 ; GCN: s_endpgm | |
226 | |
227 ; EG: BCNT_INT | |
228 define void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind { | |
229 %val = load i32, i32 addrspace(1)* %in, align 4 | |
230 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone | |
231 %add = add i32 %ctpop, %const | |
232 store i32 %add, i32 addrspace(1)* %out, align 4 | |
233 ret void | |
234 } | |
235 | |
236 ; FUNC-LABEL: {{^}}v_ctpop_i32_add_var_inv: | |
237 ; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], | |
238 ; GCN-DAG: s_load_dword [[VAR:s[0-9]+]], | |
239 ; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] | |
240 ; GCN: buffer_store_dword [[RESULT]], | |
241 ; GCN: s_endpgm | |
242 | |
243 ; EG: BCNT_INT | |
244 define void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind { | |
245 %val = load i32, i32 addrspace(1)* %in, align 4 | |
246 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone | |
247 %add = add i32 %const, %ctpop | |
248 store i32 %add, i32 addrspace(1)* %out, align 4 | |
249 ret void | |
250 } | |
251 | |
252 ; FUNC-LABEL: {{^}}v_ctpop_i32_add_vvar_inv: | |
253 ; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], {{0$}} | |
254 ; GCN-DAG: buffer_load_dword [[VAR:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0 offset:16 | |
255 ; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] | |
256 ; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] | |
257 ; GCN: buffer_store_dword [[RESULT]], | |
258 ; GCN: s_endpgm | |
259 | |
260 ; EG: BCNT_INT | |
261 define void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 addrspace(1)* noalias %constptr) nounwind { | |
262 %val = load i32, i32 addrspace(1)* %in, align 4 | |
263 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone | |
264 %gep = getelementptr i32, i32 addrspace(1)* %constptr, i32 4 | |
265 %const = load i32, i32 addrspace(1)* %gep, align 4 | |
266 %add = add i32 %const, %ctpop | |
267 store i32 %add, i32 addrspace(1)* %out, align 4 | |
268 ret void | |
269 } | |
270 | |
271 ; FIXME: We currently disallow SALU instructions in all branches, | |
272 ; but there are some cases when the should be allowed. | |
273 | |
274 ; FUNC-LABEL: {{^}}ctpop_i32_in_br: | |
275 ; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xd | |
276 ; VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x34 | |
277 ; GCN: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[VAL]] | |
278 ; GCN: v_mov_b32_e32 [[RESULT]], [[SRESULT]] | |
279 ; GCN: buffer_store_dword [[RESULT]], | |
280 ; GCN: s_endpgm | |
281 ; EG: BCNT_INT | |
282 define void @ctpop_i32_in_br(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %ctpop_arg, i32 %cond) { | |
283 entry: | |
284 %tmp0 = icmp eq i32 %cond, 0 | |
285 br i1 %tmp0, label %if, label %else | |
286 | |
287 if: | |
288 %tmp2 = call i32 @llvm.ctpop.i32(i32 %ctpop_arg) | |
289 br label %endif | |
290 | |
291 else: | |
292 %tmp3 = getelementptr i32, i32 addrspace(1)* %in, i32 1 | |
293 %tmp4 = load i32, i32 addrspace(1)* %tmp3 | |
294 br label %endif | |
295 | |
296 endif: | |
297 %tmp5 = phi i32 [%tmp2, %if], [%tmp4, %else] | |
298 store i32 %tmp5, i32 addrspace(1)* %out | |
299 ret void | |
300 } |